Runners are removed from manager when alloacted
This removes an existing unlocked shared access to runner.running. This also sets us up for better management of the runners.
This commit is contained in:
+5
-8
@@ -22,13 +22,12 @@ func setupHTTPServer(
|
|||||||
mux *http.ServeMux,
|
mux *http.ServeMux,
|
||||||
conf config.PipelineConf,
|
conf config.PipelineConf,
|
||||||
db database.Database,
|
db database.Database,
|
||||||
registerCh chan runnermanager.RunnerRegistration,
|
runnerManagerChans runnermanager.RunnerManagerChans,
|
||||||
getRunnerCh chan runnermanager.GetRunnerRequest,
|
|
||||||
) error {
|
) error {
|
||||||
|
|
||||||
webhook.CreateWebhookHandler(db, conf, mux)
|
webhook.CreateWebhookHandler(db, conf, mux)
|
||||||
|
|
||||||
pipeline_api.CreateHandler(getRunnerCh, mux)
|
pipeline_api.CreateHandler(runnerManagerChans.Allocation, runnerManagerChans.Release, mux)
|
||||||
|
|
||||||
err := admin_api.CreateHandler(db, mux)
|
err := admin_api.CreateHandler(db, mux)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -41,7 +40,7 @@ func setupHTTPServer(
|
|||||||
log.Errorf("Could not upgrade runner connection to websocket: %v", err)
|
log.Errorf("Could not upgrade runner connection to websocket: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
go runnermanager.RegisterRunner(conn, registerCh)
|
go runnermanager.RegisterRunner(conn, runnerManagerChans.Registration)
|
||||||
})
|
})
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -52,16 +51,14 @@ func Listen(
|
|||||||
port int,
|
port int,
|
||||||
conf config.PipelineConf,
|
conf config.PipelineConf,
|
||||||
db database.Database,
|
db database.Database,
|
||||||
registerCh chan runnermanager.RunnerRegistration,
|
runnerManagerChans runnermanager.RunnerManagerChans,
|
||||||
getRunnerCh chan runnermanager.GetRunnerRequest,
|
|
||||||
) error {
|
) error {
|
||||||
|
|
||||||
err := setupHTTPServer(
|
err := setupHTTPServer(
|
||||||
mux,
|
mux,
|
||||||
conf,
|
conf,
|
||||||
db,
|
db,
|
||||||
registerCh,
|
runnerManagerChans,
|
||||||
getRunnerCh,
|
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("Could not setup http endpoints: %w", err)
|
return fmt.Errorf("Could not setup http endpoints: %w", err)
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ func main() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
getRunnerCh, registerCh, err := runnermanager.StartRunnerManager(configData.Config.Runners, db)
|
runnerManagerChans, err := runnermanager.StartRunnerManager(configData.Config.Runners, db)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Could not start runner: %v", err)
|
log.Errorf("Could not start runner: %v", err)
|
||||||
return
|
return
|
||||||
@@ -56,7 +56,6 @@ func main() {
|
|||||||
configData.Config.Port,
|
configData.Config.Port,
|
||||||
configData.Config.PipelineConf,
|
configData.Config.PipelineConf,
|
||||||
db,
|
db,
|
||||||
registerCh,
|
runnerManagerChans,
|
||||||
getRunnerCh,
|
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
apiv2 "git.ohea.xyz/cursorius/pipeline-api/go/api/v2"
|
apiv2 "git.ohea.xyz/cursorius/pipeline-api/go/api/v2"
|
||||||
@@ -19,7 +18,8 @@ import (
|
|||||||
var log = logging.MustGetLogger("cursorius-server")
|
var log = logging.MustGetLogger("cursorius-server")
|
||||||
|
|
||||||
type ApiServer struct {
|
type ApiServer struct {
|
||||||
getRunnerCh chan runnermanager.GetRunnerRequest
|
allocationCh chan runnermanager.RunnerAllocationRequest
|
||||||
|
releaseCh chan runnermanager.RunnerReleaseRequest
|
||||||
allocatedRunners map[uuid.UUID]*RunnerWrapper
|
allocatedRunners map[uuid.UUID]*RunnerWrapper
|
||||||
allocatedRunnersMutex sync.RWMutex
|
allocatedRunnersMutex sync.RWMutex
|
||||||
}
|
}
|
||||||
@@ -38,10 +38,14 @@ func (r *RunnerWrapper) RunCommand(cmd string, args []string) (int64, string, st
|
|||||||
return return_code, stdout, stderr, err
|
return return_code, stdout, stderr, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RunnerWrapper) Release() {
|
func (r *RunnerWrapper) Release(releaseCh chan runnermanager.RunnerReleaseRequest) {
|
||||||
r.mutex.Lock()
|
r.mutex.Lock()
|
||||||
defer r.mutex.Unlock()
|
defer r.mutex.Unlock()
|
||||||
r.runner.Release()
|
|
||||||
|
releaseCh <- runnermanager.RunnerReleaseRequest{
|
||||||
|
Runner: r.runner,
|
||||||
|
}
|
||||||
|
r.runner = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *ApiServer) GetRunnerFromMap(u uuid.UUID) (*RunnerWrapper, bool) {
|
func (s *ApiServer) GetRunnerFromMap(u uuid.UUID) (*RunnerWrapper, bool) {
|
||||||
@@ -56,8 +60,8 @@ func (s *ApiServer) GetRunner(
|
|||||||
req *connect.Request[apiv2.GetRunnerRequest],
|
req *connect.Request[apiv2.GetRunnerRequest],
|
||||||
) (*connect.Response[apiv2.GetRunnerResponse], error) {
|
) (*connect.Response[apiv2.GetRunnerResponse], error) {
|
||||||
|
|
||||||
respChan := make(chan runnermanager.GetRunnerResponse)
|
respChan := make(chan runnermanager.RunnerAllocationResponse)
|
||||||
s.getRunnerCh <- runnermanager.GetRunnerRequest{
|
s.allocationCh <- runnermanager.RunnerAllocationRequest{
|
||||||
Tags: req.Msg.Tags,
|
Tags: req.Msg.Tags,
|
||||||
RespChan: respChan,
|
RespChan: respChan,
|
||||||
}
|
}
|
||||||
@@ -99,7 +103,7 @@ func (s *ApiServer) ReleaseRunner(
|
|||||||
s.allocatedRunnersMutex.Lock()
|
s.allocatedRunnersMutex.Lock()
|
||||||
runner := s.allocatedRunners[uuid]
|
runner := s.allocatedRunners[uuid]
|
||||||
delete(s.allocatedRunners, uuid)
|
delete(s.allocatedRunners, uuid)
|
||||||
runner.Release()
|
runner.Release(s.releaseCh)
|
||||||
s.allocatedRunnersMutex.Unlock()
|
s.allocatedRunnersMutex.Unlock()
|
||||||
|
|
||||||
res := connect.NewResponse(&apiv2.ReleaseRunnerResponse{})
|
res := connect.NewResponse(&apiv2.ReleaseRunnerResponse{})
|
||||||
@@ -138,9 +142,10 @@ func (s *ApiServer) RunCommand(
|
|||||||
return res, nil
|
return res, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func CreateHandler(getRunnerCh chan runnermanager.GetRunnerRequest, mux *http.ServeMux) {
|
func CreateHandler(allocationCh chan runnermanager.RunnerAllocationRequest, releaseCh chan runnermanager.RunnerReleaseRequest, mux *http.ServeMux) {
|
||||||
api_server := &ApiServer{
|
api_server := &ApiServer{
|
||||||
getRunnerCh: getRunnerCh,
|
allocationCh: allocationCh,
|
||||||
|
releaseCh: releaseCh,
|
||||||
allocatedRunners: make(map[uuid.UUID]*RunnerWrapper),
|
allocatedRunners: make(map[uuid.UUID]*RunnerWrapper),
|
||||||
}
|
}
|
||||||
path, handler := apiv2connect.NewGetRunnerServiceHandler(api_server)
|
path, handler := apiv2connect.NewGetRunnerServiceHandler(api_server)
|
||||||
|
|||||||
+15
-5
@@ -22,17 +22,27 @@ type Runner struct {
|
|||||||
tags []string
|
tags []string
|
||||||
conn *websocket.Conn
|
conn *websocket.Conn
|
||||||
receiveChan chan []byte
|
receiveChan chan []byte
|
||||||
running bool
|
}
|
||||||
|
|
||||||
|
func (r *Runner) HasTags(requestedTags []string) bool {
|
||||||
|
tagIter:
|
||||||
|
for _, requestedTag := range requestedTags {
|
||||||
|
for _, posessedTag := range r.tags {
|
||||||
|
// if we find the tag, move on to search for the next one
|
||||||
|
if posessedTag == requestedTag {
|
||||||
|
continue tagIter
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// if we don't find the tag
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *Runner) Id() uuid.UUID {
|
func (r *Runner) Id() uuid.UUID {
|
||||||
return r.id
|
return r.id
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *Runner) Release() {
|
|
||||||
r.running = false
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *Runner) RunCommand(cmd string, args []string) (returnCode int64, stdout string, stderr string, err error) {
|
func (r *Runner) RunCommand(cmd string, args []string) (returnCode int64, stdout string, stderr string, err error) {
|
||||||
|
|
||||||
// Write RunCommand message to client
|
// Write RunCommand message to client
|
||||||
|
|||||||
@@ -19,53 +19,57 @@ import (
|
|||||||
|
|
||||||
var log = logging.MustGetLogger("cursorius-server")
|
var log = logging.MustGetLogger("cursorius-server")
|
||||||
|
|
||||||
type RunnerRegistration struct {
|
type RunnerManagerChans struct {
|
||||||
Secret string
|
Allocation chan RunnerAllocationRequest
|
||||||
Id string
|
Release chan RunnerReleaseRequest
|
||||||
Tags []string
|
Registration chan RunnerRegistrationRequest
|
||||||
conn *websocket.Conn
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type runnerManager struct {
|
type runnerManager struct {
|
||||||
getRunnerCh chan GetRunnerRequest
|
chans RunnerManagerChans
|
||||||
registerCh chan RunnerRegistration
|
|
||||||
connectedRunners []Runner
|
connectedRunners []Runner
|
||||||
numConnectedRunners uint64
|
numConnectedRunners uint64
|
||||||
configuredRunners map[string]config.Runner
|
configuredRunners map[string]config.Runner
|
||||||
db database.Database
|
db database.Database
|
||||||
}
|
}
|
||||||
|
|
||||||
type GetRunnerRequest struct {
|
type RunnerAllocationRequest struct {
|
||||||
Tags []string
|
Tags []string
|
||||||
RespChan chan GetRunnerResponse
|
RespChan chan RunnerAllocationResponse
|
||||||
}
|
}
|
||||||
|
|
||||||
type GetRunnerResponse struct {
|
type RunnerAllocationResponse struct {
|
||||||
Runner *Runner
|
Runner *Runner
|
||||||
Err error
|
Err error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type RunnerReleaseRequest struct {
|
||||||
|
Runner *Runner
|
||||||
|
}
|
||||||
|
|
||||||
|
type RunnerRegistrationRequest struct {
|
||||||
|
Secret string
|
||||||
|
Id string
|
||||||
|
Tags []string
|
||||||
|
conn *websocket.Conn
|
||||||
|
}
|
||||||
|
|
||||||
type runnerJob struct {
|
type runnerJob struct {
|
||||||
Id string
|
Id string
|
||||||
URL string
|
URL string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *runnerManager) processRequest(req GetRunnerRequest) {
|
func (r *runnerManager) processRunnerAllocation(req RunnerAllocationRequest) {
|
||||||
tagsStr := util.FormatTags(req.Tags)
|
tagsStr := util.FormatTags(req.Tags)
|
||||||
log.Infof("Got request for runner with tags \"%v\"", tagsStr)
|
log.Infof("Got request for runner with tags \"%v\"", tagsStr)
|
||||||
|
|
||||||
log.Debugf("Finding runner with tags %v", runnerTagsStr.String())
|
log.Debugf("Finding runner with tags %v", tagsStr)
|
||||||
|
|
||||||
foundRunner := false
|
foundRunner := false
|
||||||
|
|
||||||
runnersToRemove := []int{}
|
runnersToRemove := []int{}
|
||||||
runnerIter:
|
runnerIter:
|
||||||
for i, runner := range r.connectedRunners {
|
for i, runner := range r.connectedRunners {
|
||||||
// don't allocate runner that is already occupied
|
|
||||||
if runner.running {
|
|
||||||
log.Debugf("Skipping runner %v, as runner is activly running another job", runner.id)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// don't allocate runner with closed receiveChan (is defunct)
|
// don't allocate runner with closed receiveChan (is defunct)
|
||||||
// there should never be messages to read on an inactive runner,
|
// there should never be messages to read on an inactive runner,
|
||||||
// so we aren't losing any data here
|
// so we aren't losing any data here
|
||||||
@@ -82,25 +86,20 @@ runnerIter:
|
|||||||
default:
|
default:
|
||||||
log.Debugf("Checking runner %v for requested tags", runner.id)
|
log.Debugf("Checking runner %v for requested tags", runner.id)
|
||||||
|
|
||||||
tagIter:
|
if !runner.HasTags(req.Tags) {
|
||||||
for _, requestedTag := range req.Tags {
|
|
||||||
for _, posessedTag := range runner.tags {
|
|
||||||
if requestedTag == posessedTag {
|
|
||||||
continue tagIter
|
|
||||||
}
|
|
||||||
}
|
|
||||||
continue runnerIter
|
continue runnerIter
|
||||||
}
|
}
|
||||||
|
|
||||||
r.connectedRunners[i].running = true
|
runnersToRemove = append(runnersToRemove, i)
|
||||||
foundRunner = true
|
foundRunner = true
|
||||||
req.RespChan <- GetRunnerResponse{
|
req.RespChan <- RunnerAllocationResponse{
|
||||||
Runner: &r.connectedRunners[i],
|
Runner: &r.connectedRunners[i],
|
||||||
Err: nil,
|
Err: nil,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
// remove allocated runner plus defunct runners
|
||||||
// since we iterate, all the indexes will be in accending order
|
// since we iterate, all the indexes will be in accending order
|
||||||
for i, runnerInd := range runnersToRemove {
|
for i, runnerInd := range runnersToRemove {
|
||||||
r.connectedRunners[runnerInd-i] = r.connectedRunners[len(r.connectedRunners)-1]
|
r.connectedRunners[runnerInd-i] = r.connectedRunners[len(r.connectedRunners)-1]
|
||||||
@@ -115,43 +114,42 @@ runnerIter:
|
|||||||
if len(r.connectedRunners) == 0 {
|
if len(r.connectedRunners) == 0 {
|
||||||
errorMsg = "no connected runners"
|
errorMsg = "no connected runners"
|
||||||
}
|
}
|
||||||
req.RespChan <- GetRunnerResponse{
|
req.RespChan <- RunnerAllocationResponse{
|
||||||
Runner: &Runner{},
|
Runner: &Runner{},
|
||||||
Err: fmt.Errorf("Could not allocate runner: %v", errorMsg),
|
Err: fmt.Errorf("Could not allocate runner: %v", errorMsg),
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *runnerManager) processRegistration(reg RunnerRegistration) {
|
func (r *runnerManager) processRunnerRegistration(req RunnerRegistrationRequest) {
|
||||||
log.Debugf("New runner appeared with id: %v and secret: %v", reg.Id, reg.Secret)
|
log.Debugf("New runner appeared with id: %v and secret: %v", req.Id, req.Secret)
|
||||||
|
|
||||||
// Get runner with give id from database
|
// Get runner with give id from database
|
||||||
runnerId, err := uuid.Parse(reg.Id)
|
runnerId, err := uuid.Parse(req.Id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Disconnecting runner with id: %v, could not parse as UUID: %v", reg.Id, err)
|
log.Errorf("Disconnecting runner with id: %v, could not parse as UUID: %v", req.Id, err)
|
||||||
reg.conn.Close(websocket.StatusNormalClosure, "registration invalid")
|
req.conn.Close(websocket.StatusNormalClosure, "registration invalid")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
dbRunner, err := r.db.GetRunnerById(runnerId)
|
dbRunner, err := r.db.GetRunnerById(runnerId)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Disconnecting runner with id: %v, could not find runner in DB: %v", runnerId, err)
|
log.Errorf("Disconnecting runner with id: %v, could not find runner in DB: %v", runnerId, err)
|
||||||
reg.conn.Close(websocket.StatusNormalClosure, "registration invalid")
|
req.conn.Close(websocket.StatusNormalClosure, "registration invalid")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if reg.Secret != dbRunner.Token {
|
if req.Secret != dbRunner.Token {
|
||||||
log.Errorf("Disconnecting runner with id: %v, invalid secret", runnerId)
|
log.Errorf("Disconnecting runner with id: %v, invalid secret", runnerId)
|
||||||
reg.conn.Close(websocket.StatusNormalClosure, "registration invalid")
|
req.conn.Close(websocket.StatusNormalClosure, "registration invalid")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Infof("Registering runner \"%v\" with tags %v", reg.Id, reg.Tags)
|
log.Infof("Registering runner \"%v\" with tags %v", req.Id, req.Tags)
|
||||||
runner := Runner{
|
runner := Runner{
|
||||||
id: runnerId,
|
id: runnerId,
|
||||||
tags: reg.Tags,
|
tags: req.Tags,
|
||||||
conn: reg.conn,
|
conn: req.conn,
|
||||||
receiveChan: make(chan []byte),
|
receiveChan: make(chan []byte),
|
||||||
running: false,
|
|
||||||
}
|
}
|
||||||
r.connectedRunners = append(r.connectedRunners, runner)
|
r.connectedRunners = append(r.connectedRunners, runner)
|
||||||
// start goroutine to call Read function on websocket connection
|
// start goroutine to call Read function on websocket connection
|
||||||
@@ -160,7 +158,7 @@ func (r *runnerManager) processRegistration(reg RunnerRegistration) {
|
|||||||
defer log.Noticef("Deregistered runner with id: %v", runner.id)
|
defer log.Noticef("Deregistered runner with id: %v", runner.id)
|
||||||
defer close(runner.receiveChan)
|
defer close(runner.receiveChan)
|
||||||
for {
|
for {
|
||||||
msgType, data, err := reg.conn.Read(context.Background())
|
msgType, data, err := req.conn.Read(context.Background())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// TODO: this is still racy, since a runner could be allocated between the
|
// TODO: this is still racy, since a runner could be allocated between the
|
||||||
// connection returning an err and the channel closing
|
// connection returning an err and the channel closing
|
||||||
@@ -180,22 +178,30 @@ func (r *runnerManager) processRegistration(reg RunnerRegistration) {
|
|||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *runnerManager) processRunnerRelease(req RunnerReleaseRequest) {
|
||||||
|
r.connectedRunners = append(r.connectedRunners, *req.Runner)
|
||||||
|
}
|
||||||
|
|
||||||
func runRunnerManager(r runnerManager) {
|
func runRunnerManager(r runnerManager) {
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case request := <-r.getRunnerCh:
|
case request := <-r.chans.Allocation:
|
||||||
r.processRequest(request)
|
r.processRunnerAllocation(request)
|
||||||
|
case release := <-r.chans.Release:
|
||||||
case registration := <-r.registerCh:
|
r.processRunnerRelease(release)
|
||||||
r.processRegistration(registration)
|
case registration := <-r.chans.Registration:
|
||||||
|
r.processRunnerRegistration(registration)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func StartRunnerManager(configuredRunners map[string]config.Runner, db database.Database) (chan GetRunnerRequest, chan RunnerRegistration, error) {
|
func StartRunnerManager(configuredRunners map[string]config.Runner, db database.Database) (RunnerManagerChans, error) {
|
||||||
scheduler := runnerManager{
|
scheduler := runnerManager{
|
||||||
getRunnerCh: make(chan GetRunnerRequest),
|
chans: RunnerManagerChans{
|
||||||
registerCh: make(chan RunnerRegistration),
|
Allocation: make(chan RunnerAllocationRequest),
|
||||||
|
Release: make(chan RunnerReleaseRequest),
|
||||||
|
Registration: make(chan RunnerRegistrationRequest),
|
||||||
|
},
|
||||||
connectedRunners: make([]Runner, 0),
|
connectedRunners: make([]Runner, 0),
|
||||||
configuredRunners: configuredRunners,
|
configuredRunners: configuredRunners,
|
||||||
db: db,
|
db: db,
|
||||||
@@ -203,14 +209,14 @@ func StartRunnerManager(configuredRunners map[string]config.Runner, db database.
|
|||||||
|
|
||||||
go runRunnerManager(scheduler)
|
go runRunnerManager(scheduler)
|
||||||
|
|
||||||
return scheduler.getRunnerCh, scheduler.registerCh, nil
|
return scheduler.chans, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func RegisterRunner(conn *websocket.Conn, registerCh chan RunnerRegistration) {
|
func RegisterRunner(conn *websocket.Conn, registerCh chan RunnerRegistrationRequest) {
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
|
ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
var registration RunnerRegistration
|
var registration RunnerRegistrationRequest
|
||||||
registration.conn = conn
|
registration.conn = conn
|
||||||
|
|
||||||
typ, r, err := conn.Read(ctx)
|
typ, r, err := conn.Read(ctx)
|
||||||
|
|||||||
Reference in New Issue
Block a user