diff --git a/go.mod b/go.mod index bc8539c..3442428 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module git.ohea.xyz/cursorius/server go 1.19 require ( - git.ohea.xyz/cursorius/pipeline-api/go/api/v2 v2.0.0-20230109075652-ead0aeff2eb9 + git.ohea.xyz/cursorius/pipeline-api/go/api/v2 v2.0.0-20230405234139-34d8875b72f4 git.ohea.xyz/cursorius/runner-api/go/api/v2 v2.0.0-20230109074922-e20285fe6cf2 git.ohea.xyz/golang/config v0.0.0-20220915224621-b9debd233173 github.com/bufbuild/connect-go v1.4.1 @@ -17,7 +17,7 @@ require ( github.com/jhoonb/archivex v0.0.0-20201016144719-6a343cdae81d github.com/op/go-logging v0.0.0-20160315200505-970db520ece7 golang.org/x/net v0.2.0 - google.golang.org/protobuf v1.28.1 + google.golang.org/protobuf v1.30.0 nhooyr.io/websocket v1.8.7 ) diff --git a/go.sum b/go.sum index f111696..90fc856 100644 --- a/go.sum +++ b/go.sum @@ -78,8 +78,8 @@ contrib.go.opencensus.io/exporter/stackdriver v0.13.5/go.mod h1:aXENhDJ1Y4lIg4EU contrib.go.opencensus.io/integrations/ocsql v0.1.4/go.mod h1:8DsSdjz3F+APR+0z0WkU1aRorQCFfRxvqjUUPMbF3fE= contrib.go.opencensus.io/resource v0.1.1/go.mod h1:F361eGI91LCmW1I/Saf+rX0+OFcigGlFvXwEGEnkRLA= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= -git.ohea.xyz/cursorius/pipeline-api/go/api/v2 v2.0.0-20230109075652-ead0aeff2eb9 h1:8p7Kw3B7dbi2zdgG+Me9ETRWrJzoNVjcase4YqXfGbs= -git.ohea.xyz/cursorius/pipeline-api/go/api/v2 v2.0.0-20230109075652-ead0aeff2eb9/go.mod h1:D7GGcFIH421mo6KuRaXXXmlXPwWeEsemTZG/BOZA/4o= +git.ohea.xyz/cursorius/pipeline-api/go/api/v2 v2.0.0-20230405234139-34d8875b72f4 h1:kKQQEg1nmWnqiNOqtUHteEuacyfy0NdxyDj6HPjbA2c= +git.ohea.xyz/cursorius/pipeline-api/go/api/v2 v2.0.0-20230405234139-34d8875b72f4/go.mod h1:D7GGcFIH421mo6KuRaXXXmlXPwWeEsemTZG/BOZA/4o= git.ohea.xyz/cursorius/runner-api/go/api/v2 v2.0.0-20230109074922-e20285fe6cf2 h1:G1XQEqhj1LZPQbH7avzvT7QL9Wfbb4CXMm0nLL39eDc= git.ohea.xyz/cursorius/runner-api/go/api/v2 v2.0.0-20230109074922-e20285fe6cf2/go.mod h1:F9y5Ck4Wchsaj5amSX2eDRUlQ/iYP1VNLFduvjNwmLc= git.ohea.xyz/cursorius/webhooks/v6 v6.0.2-0.20221224221147-a2bdbf1756ed h1:gsK15m4Npow74+R6OfZKwwAg1sl7QWQCRXOeE2QLUco= @@ -2129,8 +2129,9 @@ google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp0 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= -google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w= google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= +google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc/go.mod h1:m7x9LTH6d71AHyAX77c9yqWCCa3UKHcVEj9y7hAtKDk= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/pipeline_api/pipeline_api.go b/pipeline_api/pipeline_api.go index b2bf021..32a4727 100644 --- a/pipeline_api/pipeline_api.go +++ b/pipeline_api/pipeline_api.go @@ -5,6 +5,7 @@ import ( "fmt" "net/http" "sync" + "time" apiv2 "git.ohea.xyz/cursorius/pipeline-api/go/api/v2" "git.ohea.xyz/cursorius/pipeline-api/go/api/v2/apiv2connect" @@ -60,17 +61,52 @@ func (s *ApiServer) GetRunner( req *connect.Request[apiv2.GetRunnerRequest], ) (*connect.Response[apiv2.GetRunnerResponse], error) { + var response runnermanager.RunnerAllocationResponse + var timeoutCtx *context.Context + var retryInterval int64 = 0 + respChan := make(chan runnermanager.RunnerAllocationResponse) - s.allocationCh <- runnermanager.RunnerAllocationRequest{ - Tags: req.Msg.Tags, - RespChan: respChan, - } tagsStr := util.FormatTags(req.Msg.Tags) - response := <-respChan - if response.Err != nil { - log.Errorf("Could not get runner with tags \"%v\": %v", tagsStr, response.Err) + if req.Msg.Options != nil { + if req.Msg.Options.Timeout != 0 { + ctx, cancel := context.WithTimeout(context.Background(), time.Duration(req.Msg.Options.Timeout)*time.Second) + timeoutCtx = &ctx + defer cancel() + } + + retryInterval = req.Msg.Options.RetryInterval + } + + for { + s.allocationCh <- runnermanager.RunnerAllocationRequest{ + Tags: req.Msg.Tags, + RespChan: respChan, + } + + response = <-respChan + if response.Err == nil { + break + } + + log.Infof("Could not get runner with tags \"%v\": %v", tagsStr, response.Err) + + // If no timeout is specified, skip after one attempt + if timeoutCtx == nil { + break + } + + // If timeout is expired, stop trying to allocate runner + if (*timeoutCtx).Err() != nil { + break + } + + log.Infof("Sleeping for %v seconds before retry...", retryInterval) + time.Sleep(time.Duration(retryInterval) * time.Second) + } + + if response.Runner == nil { return nil, connect.NewError(connect.CodeNotFound, fmt.Errorf("Could not get runner")) } diff --git a/runnermanager/runner.go b/runnermanager/runner.go index 80314bd..36de119 100644 --- a/runnermanager/runner.go +++ b/runnermanager/runner.go @@ -97,6 +97,8 @@ func (r *Runner) sendProtoStruct(p protoreflect.ProtoMessage) error { ctx := context.Background() + log.Debugf("r.conn: %p", r.conn) + if err := r.conn.Write(ctx, websocket.MessageBinary, protoOut); err != nil { return fmt.Errorf("Could not send proto to websocket: %w", err) } diff --git a/runnermanager/runnermanager.go b/runnermanager/runnermanager.go index a2092b2..e48754e 100644 --- a/runnermanager/runnermanager.go +++ b/runnermanager/runnermanager.go @@ -19,46 +19,6 @@ import ( var log = logging.MustGetLogger("cursorius-server") -type RunnerManagerChans struct { - Allocation chan RunnerAllocationRequest - Release chan RunnerReleaseRequest - Registration chan RunnerRegistrationRequest -} - -type runnerManager struct { - chans RunnerManagerChans - connectedRunners []Runner - numConnectedRunners uint64 - configuredRunners map[string]config.Runner - db database.Database -} - -type RunnerAllocationRequest struct { - Tags []string - RespChan chan RunnerAllocationResponse -} - -type RunnerAllocationResponse struct { - Runner *Runner - Err error -} - -type RunnerReleaseRequest struct { - Runner *Runner -} - -type RunnerRegistrationRequest struct { - Secret string - Id string - Tags []string - conn *websocket.Conn -} - -type runnerJob struct { - Id string - URL string -} - func (r *runnerManager) processRunnerAllocation(req RunnerAllocationRequest) { tagsStr := util.FormatTags(req.Tags) log.Infof("Got request for runner with tags \"%v\"", tagsStr) @@ -92,6 +52,7 @@ runnerIter: runnersToRemove = append(runnersToRemove, i) foundRunner = true + log.Debugf("Runner %v has requested tags, allocating", runner.id) req.RespChan <- RunnerAllocationResponse{ Runner: &r.connectedRunners[i], Err: nil, @@ -103,7 +64,7 @@ runnerIter: // since we iterate, all the indexes will be in accending order for i, runnerInd := range runnersToRemove { r.connectedRunners[runnerInd-i] = r.connectedRunners[len(r.connectedRunners)-1] - r.connectedRunners = r.connectedRunners[0 : len(r.connectedRunners)-2] + r.connectedRunners = r.connectedRunners[0 : len(r.connectedRunners)-1] } if foundRunner { @@ -115,7 +76,7 @@ runnerIter: errorMsg = "no connected runners" } req.RespChan <- RunnerAllocationResponse{ - Runner: &Runner{}, + Runner: nil, Err: fmt.Errorf("Could not allocate runner: %v", errorMsg), } diff --git a/runnermanager/types.go b/runnermanager/types.go new file mode 100644 index 0000000..93a4a06 --- /dev/null +++ b/runnermanager/types.go @@ -0,0 +1,49 @@ +package runnermanager + +import ( + "nhooyr.io/websocket" + + "git.ohea.xyz/cursorius/server/config" + "git.ohea.xyz/cursorius/server/database" +) + +type RunnerManagerChans struct { + Allocation chan RunnerAllocationRequest + Release chan RunnerReleaseRequest + Registration chan RunnerRegistrationRequest +} + +type runnerManager struct { + chans RunnerManagerChans + connectedRunners []Runner + numConnectedRunners uint64 + configuredRunners map[string]config.Runner + db database.Database +} + +type RunnerAllocationRequest struct { + Tags []string + RespChan chan RunnerAllocationResponse + CancelChan chan string +} + +type RunnerAllocationResponse struct { + Runner *Runner + Err error +} + +type RunnerReleaseRequest struct { + Runner *Runner +} + +type RunnerRegistrationRequest struct { + Secret string + Id string + Tags []string + conn *websocket.Conn +} + +type runnerJob struct { + Id string + URL string +}