Runners are removed from manager when alloacted

This removes an existing unlocked shared access to runner.running.
This also sets us up for better management of the runners.
This commit is contained in:
2023-03-08 00:13:40 -07:00
parent f190274bce
commit fe53a17160
5 changed files with 93 additions and 76 deletions
+5 -8
View File
@@ -22,13 +22,12 @@ func setupHTTPServer(
mux *http.ServeMux, mux *http.ServeMux,
conf config.PipelineConf, conf config.PipelineConf,
db database.Database, db database.Database,
registerCh chan runnermanager.RunnerRegistration, runnerManagerChans runnermanager.RunnerManagerChans,
getRunnerCh chan runnermanager.GetRunnerRequest,
) error { ) error {
webhook.CreateWebhookHandler(db, conf, mux) webhook.CreateWebhookHandler(db, conf, mux)
pipeline_api.CreateHandler(getRunnerCh, mux) pipeline_api.CreateHandler(runnerManagerChans.Allocation, runnerManagerChans.Release, mux)
err := admin_api.CreateHandler(db, mux) err := admin_api.CreateHandler(db, mux)
if err != nil { if err != nil {
@@ -41,7 +40,7 @@ func setupHTTPServer(
log.Errorf("Could not upgrade runner connection to websocket: %v", err) log.Errorf("Could not upgrade runner connection to websocket: %v", err)
return return
} }
go runnermanager.RegisterRunner(conn, registerCh) go runnermanager.RegisterRunner(conn, runnerManagerChans.Registration)
}) })
return nil return nil
} }
@@ -52,16 +51,14 @@ func Listen(
port int, port int,
conf config.PipelineConf, conf config.PipelineConf,
db database.Database, db database.Database,
registerCh chan runnermanager.RunnerRegistration, runnerManagerChans runnermanager.RunnerManagerChans,
getRunnerCh chan runnermanager.GetRunnerRequest,
) error { ) error {
err := setupHTTPServer( err := setupHTTPServer(
mux, mux,
conf, conf,
db, db,
registerCh, runnerManagerChans,
getRunnerCh,
) )
if err != nil { if err != nil {
return fmt.Errorf("Could not setup http endpoints: %w", err) return fmt.Errorf("Could not setup http endpoints: %w", err)
+2 -3
View File
@@ -40,7 +40,7 @@ func main() {
return return
} }
getRunnerCh, registerCh, err := runnermanager.StartRunnerManager(configData.Config.Runners, db) runnerManagerChans, err := runnermanager.StartRunnerManager(configData.Config.Runners, db)
if err != nil { if err != nil {
log.Errorf("Could not start runner: %v", err) log.Errorf("Could not start runner: %v", err)
return return
@@ -56,7 +56,6 @@ func main() {
configData.Config.Port, configData.Config.Port,
configData.Config.PipelineConf, configData.Config.PipelineConf,
db, db,
registerCh, runnerManagerChans,
getRunnerCh,
)) ))
} }
+14 -9
View File
@@ -4,7 +4,6 @@ import (
"context" "context"
"fmt" "fmt"
"net/http" "net/http"
"strings"
"sync" "sync"
apiv2 "git.ohea.xyz/cursorius/pipeline-api/go/api/v2" apiv2 "git.ohea.xyz/cursorius/pipeline-api/go/api/v2"
@@ -19,7 +18,8 @@ import (
var log = logging.MustGetLogger("cursorius-server") var log = logging.MustGetLogger("cursorius-server")
type ApiServer struct { type ApiServer struct {
getRunnerCh chan runnermanager.GetRunnerRequest allocationCh chan runnermanager.RunnerAllocationRequest
releaseCh chan runnermanager.RunnerReleaseRequest
allocatedRunners map[uuid.UUID]*RunnerWrapper allocatedRunners map[uuid.UUID]*RunnerWrapper
allocatedRunnersMutex sync.RWMutex allocatedRunnersMutex sync.RWMutex
} }
@@ -38,10 +38,14 @@ func (r *RunnerWrapper) RunCommand(cmd string, args []string) (int64, string, st
return return_code, stdout, stderr, err return return_code, stdout, stderr, err
} }
func (r *RunnerWrapper) Release() { func (r *RunnerWrapper) Release(releaseCh chan runnermanager.RunnerReleaseRequest) {
r.mutex.Lock() r.mutex.Lock()
defer r.mutex.Unlock() defer r.mutex.Unlock()
r.runner.Release()
releaseCh <- runnermanager.RunnerReleaseRequest{
Runner: r.runner,
}
r.runner = nil
} }
func (s *ApiServer) GetRunnerFromMap(u uuid.UUID) (*RunnerWrapper, bool) { func (s *ApiServer) GetRunnerFromMap(u uuid.UUID) (*RunnerWrapper, bool) {
@@ -56,8 +60,8 @@ func (s *ApiServer) GetRunner(
req *connect.Request[apiv2.GetRunnerRequest], req *connect.Request[apiv2.GetRunnerRequest],
) (*connect.Response[apiv2.GetRunnerResponse], error) { ) (*connect.Response[apiv2.GetRunnerResponse], error) {
respChan := make(chan runnermanager.GetRunnerResponse) respChan := make(chan runnermanager.RunnerAllocationResponse)
s.getRunnerCh <- runnermanager.GetRunnerRequest{ s.allocationCh <- runnermanager.RunnerAllocationRequest{
Tags: req.Msg.Tags, Tags: req.Msg.Tags,
RespChan: respChan, RespChan: respChan,
} }
@@ -99,7 +103,7 @@ func (s *ApiServer) ReleaseRunner(
s.allocatedRunnersMutex.Lock() s.allocatedRunnersMutex.Lock()
runner := s.allocatedRunners[uuid] runner := s.allocatedRunners[uuid]
delete(s.allocatedRunners, uuid) delete(s.allocatedRunners, uuid)
runner.Release() runner.Release(s.releaseCh)
s.allocatedRunnersMutex.Unlock() s.allocatedRunnersMutex.Unlock()
res := connect.NewResponse(&apiv2.ReleaseRunnerResponse{}) res := connect.NewResponse(&apiv2.ReleaseRunnerResponse{})
@@ -138,9 +142,10 @@ func (s *ApiServer) RunCommand(
return res, nil return res, nil
} }
func CreateHandler(getRunnerCh chan runnermanager.GetRunnerRequest, mux *http.ServeMux) { func CreateHandler(allocationCh chan runnermanager.RunnerAllocationRequest, releaseCh chan runnermanager.RunnerReleaseRequest, mux *http.ServeMux) {
api_server := &ApiServer{ api_server := &ApiServer{
getRunnerCh: getRunnerCh, allocationCh: allocationCh,
releaseCh: releaseCh,
allocatedRunners: make(map[uuid.UUID]*RunnerWrapper), allocatedRunners: make(map[uuid.UUID]*RunnerWrapper),
} }
path, handler := apiv2connect.NewGetRunnerServiceHandler(api_server) path, handler := apiv2connect.NewGetRunnerServiceHandler(api_server)
+15 -5
View File
@@ -22,17 +22,27 @@ type Runner struct {
tags []string tags []string
conn *websocket.Conn conn *websocket.Conn
receiveChan chan []byte receiveChan chan []byte
running bool }
func (r *Runner) HasTags(requestedTags []string) bool {
tagIter:
for _, requestedTag := range requestedTags {
for _, posessedTag := range r.tags {
// if we find the tag, move on to search for the next one
if posessedTag == requestedTag {
continue tagIter
}
}
// if we don't find the tag
return false
}
return true
} }
func (r *Runner) Id() uuid.UUID { func (r *Runner) Id() uuid.UUID {
return r.id return r.id
} }
func (r *Runner) Release() {
r.running = false
}
func (r *Runner) RunCommand(cmd string, args []string) (returnCode int64, stdout string, stderr string, err error) { func (r *Runner) RunCommand(cmd string, args []string) (returnCode int64, stdout string, stderr string, err error) {
// Write RunCommand message to client // Write RunCommand message to client
+57 -51
View File
@@ -19,53 +19,57 @@ import (
var log = logging.MustGetLogger("cursorius-server") var log = logging.MustGetLogger("cursorius-server")
type RunnerRegistration struct { type RunnerManagerChans struct {
Secret string Allocation chan RunnerAllocationRequest
Id string Release chan RunnerReleaseRequest
Tags []string Registration chan RunnerRegistrationRequest
conn *websocket.Conn
} }
type runnerManager struct { type runnerManager struct {
getRunnerCh chan GetRunnerRequest chans RunnerManagerChans
registerCh chan RunnerRegistration
connectedRunners []Runner connectedRunners []Runner
numConnectedRunners uint64 numConnectedRunners uint64
configuredRunners map[string]config.Runner configuredRunners map[string]config.Runner
db database.Database db database.Database
} }
type GetRunnerRequest struct { type RunnerAllocationRequest struct {
Tags []string Tags []string
RespChan chan GetRunnerResponse RespChan chan RunnerAllocationResponse
} }
type GetRunnerResponse struct { type RunnerAllocationResponse struct {
Runner *Runner Runner *Runner
Err error Err error
} }
type RunnerReleaseRequest struct {
Runner *Runner
}
type RunnerRegistrationRequest struct {
Secret string
Id string
Tags []string
conn *websocket.Conn
}
type runnerJob struct { type runnerJob struct {
Id string Id string
URL string URL string
} }
func (r *runnerManager) processRequest(req GetRunnerRequest) { func (r *runnerManager) processRunnerAllocation(req RunnerAllocationRequest) {
tagsStr := util.FormatTags(req.Tags) tagsStr := util.FormatTags(req.Tags)
log.Infof("Got request for runner with tags \"%v\"", tagsStr) log.Infof("Got request for runner with tags \"%v\"", tagsStr)
log.Debugf("Finding runner with tags %v", runnerTagsStr.String()) log.Debugf("Finding runner with tags %v", tagsStr)
foundRunner := false foundRunner := false
runnersToRemove := []int{} runnersToRemove := []int{}
runnerIter: runnerIter:
for i, runner := range r.connectedRunners { for i, runner := range r.connectedRunners {
// don't allocate runner that is already occupied
if runner.running {
log.Debugf("Skipping runner %v, as runner is activly running another job", runner.id)
continue
}
// don't allocate runner with closed receiveChan (is defunct) // don't allocate runner with closed receiveChan (is defunct)
// there should never be messages to read on an inactive runner, // there should never be messages to read on an inactive runner,
// so we aren't losing any data here // so we aren't losing any data here
@@ -82,25 +86,20 @@ runnerIter:
default: default:
log.Debugf("Checking runner %v for requested tags", runner.id) log.Debugf("Checking runner %v for requested tags", runner.id)
tagIter: if !runner.HasTags(req.Tags) {
for _, requestedTag := range req.Tags {
for _, posessedTag := range runner.tags {
if requestedTag == posessedTag {
continue tagIter
}
}
continue runnerIter continue runnerIter
} }
r.connectedRunners[i].running = true runnersToRemove = append(runnersToRemove, i)
foundRunner = true foundRunner = true
req.RespChan <- GetRunnerResponse{ req.RespChan <- RunnerAllocationResponse{
Runner: &r.connectedRunners[i], Runner: &r.connectedRunners[i],
Err: nil, Err: nil,
} }
} }
} }
// remove allocated runner plus defunct runners
// since we iterate, all the indexes will be in accending order // since we iterate, all the indexes will be in accending order
for i, runnerInd := range runnersToRemove { for i, runnerInd := range runnersToRemove {
r.connectedRunners[runnerInd-i] = r.connectedRunners[len(r.connectedRunners)-1] r.connectedRunners[runnerInd-i] = r.connectedRunners[len(r.connectedRunners)-1]
@@ -115,43 +114,42 @@ runnerIter:
if len(r.connectedRunners) == 0 { if len(r.connectedRunners) == 0 {
errorMsg = "no connected runners" errorMsg = "no connected runners"
} }
req.RespChan <- GetRunnerResponse{ req.RespChan <- RunnerAllocationResponse{
Runner: &Runner{}, Runner: &Runner{},
Err: fmt.Errorf("Could not allocate runner: %v", errorMsg), Err: fmt.Errorf("Could not allocate runner: %v", errorMsg),
} }
} }
func (r *runnerManager) processRegistration(reg RunnerRegistration) { func (r *runnerManager) processRunnerRegistration(req RunnerRegistrationRequest) {
log.Debugf("New runner appeared with id: %v and secret: %v", reg.Id, reg.Secret) log.Debugf("New runner appeared with id: %v and secret: %v", req.Id, req.Secret)
// Get runner with give id from database // Get runner with give id from database
runnerId, err := uuid.Parse(reg.Id) runnerId, err := uuid.Parse(req.Id)
if err != nil { if err != nil {
log.Errorf("Disconnecting runner with id: %v, could not parse as UUID: %v", reg.Id, err) log.Errorf("Disconnecting runner with id: %v, could not parse as UUID: %v", req.Id, err)
reg.conn.Close(websocket.StatusNormalClosure, "registration invalid") req.conn.Close(websocket.StatusNormalClosure, "registration invalid")
return return
} }
dbRunner, err := r.db.GetRunnerById(runnerId) dbRunner, err := r.db.GetRunnerById(runnerId)
if err != nil { if err != nil {
log.Errorf("Disconnecting runner with id: %v, could not find runner in DB: %v", runnerId, err) log.Errorf("Disconnecting runner with id: %v, could not find runner in DB: %v", runnerId, err)
reg.conn.Close(websocket.StatusNormalClosure, "registration invalid") req.conn.Close(websocket.StatusNormalClosure, "registration invalid")
return return
} }
if reg.Secret != dbRunner.Token { if req.Secret != dbRunner.Token {
log.Errorf("Disconnecting runner with id: %v, invalid secret", runnerId) log.Errorf("Disconnecting runner with id: %v, invalid secret", runnerId)
reg.conn.Close(websocket.StatusNormalClosure, "registration invalid") req.conn.Close(websocket.StatusNormalClosure, "registration invalid")
return return
} }
log.Infof("Registering runner \"%v\" with tags %v", reg.Id, reg.Tags) log.Infof("Registering runner \"%v\" with tags %v", req.Id, req.Tags)
runner := Runner{ runner := Runner{
id: runnerId, id: runnerId,
tags: reg.Tags, tags: req.Tags,
conn: reg.conn, conn: req.conn,
receiveChan: make(chan []byte), receiveChan: make(chan []byte),
running: false,
} }
r.connectedRunners = append(r.connectedRunners, runner) r.connectedRunners = append(r.connectedRunners, runner)
// start goroutine to call Read function on websocket connection // start goroutine to call Read function on websocket connection
@@ -160,7 +158,7 @@ func (r *runnerManager) processRegistration(reg RunnerRegistration) {
defer log.Noticef("Deregistered runner with id: %v", runner.id) defer log.Noticef("Deregistered runner with id: %v", runner.id)
defer close(runner.receiveChan) defer close(runner.receiveChan)
for { for {
msgType, data, err := reg.conn.Read(context.Background()) msgType, data, err := req.conn.Read(context.Background())
if err != nil { if err != nil {
// TODO: this is still racy, since a runner could be allocated between the // TODO: this is still racy, since a runner could be allocated between the
// connection returning an err and the channel closing // connection returning an err and the channel closing
@@ -180,22 +178,30 @@ func (r *runnerManager) processRegistration(reg RunnerRegistration) {
}() }()
} }
func (r *runnerManager) processRunnerRelease(req RunnerReleaseRequest) {
r.connectedRunners = append(r.connectedRunners, *req.Runner)
}
func runRunnerManager(r runnerManager) { func runRunnerManager(r runnerManager) {
for { for {
select { select {
case request := <-r.getRunnerCh: case request := <-r.chans.Allocation:
r.processRequest(request) r.processRunnerAllocation(request)
case release := <-r.chans.Release:
case registration := <-r.registerCh: r.processRunnerRelease(release)
r.processRegistration(registration) case registration := <-r.chans.Registration:
r.processRunnerRegistration(registration)
} }
} }
} }
func StartRunnerManager(configuredRunners map[string]config.Runner, db database.Database) (chan GetRunnerRequest, chan RunnerRegistration, error) { func StartRunnerManager(configuredRunners map[string]config.Runner, db database.Database) (RunnerManagerChans, error) {
scheduler := runnerManager{ scheduler := runnerManager{
getRunnerCh: make(chan GetRunnerRequest), chans: RunnerManagerChans{
registerCh: make(chan RunnerRegistration), Allocation: make(chan RunnerAllocationRequest),
Release: make(chan RunnerReleaseRequest),
Registration: make(chan RunnerRegistrationRequest),
},
connectedRunners: make([]Runner, 0), connectedRunners: make([]Runner, 0),
configuredRunners: configuredRunners, configuredRunners: configuredRunners,
db: db, db: db,
@@ -203,14 +209,14 @@ func StartRunnerManager(configuredRunners map[string]config.Runner, db database.
go runRunnerManager(scheduler) go runRunnerManager(scheduler)
return scheduler.getRunnerCh, scheduler.registerCh, nil return scheduler.chans, nil
} }
func RegisterRunner(conn *websocket.Conn, registerCh chan RunnerRegistration) { func RegisterRunner(conn *websocket.Conn, registerCh chan RunnerRegistrationRequest) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
defer cancel() defer cancel()
var registration RunnerRegistration var registration RunnerRegistrationRequest
registration.conn = conn registration.conn = conn
typ, r, err := conn.Read(ctx) typ, r, err := conn.Read(ctx)