Add timeout and retry interval to GetRunner api
This commit is contained in:
@@ -3,7 +3,7 @@ module git.ohea.xyz/cursorius/server
|
|||||||
go 1.19
|
go 1.19
|
||||||
|
|
||||||
require (
|
require (
|
||||||
git.ohea.xyz/cursorius/pipeline-api/go/api/v2 v2.0.0-20230109075652-ead0aeff2eb9
|
git.ohea.xyz/cursorius/pipeline-api/go/api/v2 v2.0.0-20230405234139-34d8875b72f4
|
||||||
git.ohea.xyz/cursorius/runner-api/go/api/v2 v2.0.0-20230109074922-e20285fe6cf2
|
git.ohea.xyz/cursorius/runner-api/go/api/v2 v2.0.0-20230109074922-e20285fe6cf2
|
||||||
git.ohea.xyz/golang/config v0.0.0-20220915224621-b9debd233173
|
git.ohea.xyz/golang/config v0.0.0-20220915224621-b9debd233173
|
||||||
github.com/bufbuild/connect-go v1.4.1
|
github.com/bufbuild/connect-go v1.4.1
|
||||||
@@ -17,7 +17,7 @@ require (
|
|||||||
github.com/jhoonb/archivex v0.0.0-20201016144719-6a343cdae81d
|
github.com/jhoonb/archivex v0.0.0-20201016144719-6a343cdae81d
|
||||||
github.com/op/go-logging v0.0.0-20160315200505-970db520ece7
|
github.com/op/go-logging v0.0.0-20160315200505-970db520ece7
|
||||||
golang.org/x/net v0.2.0
|
golang.org/x/net v0.2.0
|
||||||
google.golang.org/protobuf v1.28.1
|
google.golang.org/protobuf v1.30.0
|
||||||
nhooyr.io/websocket v1.8.7
|
nhooyr.io/websocket v1.8.7
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -78,8 +78,8 @@ contrib.go.opencensus.io/exporter/stackdriver v0.13.5/go.mod h1:aXENhDJ1Y4lIg4EU
|
|||||||
contrib.go.opencensus.io/integrations/ocsql v0.1.4/go.mod h1:8DsSdjz3F+APR+0z0WkU1aRorQCFfRxvqjUUPMbF3fE=
|
contrib.go.opencensus.io/integrations/ocsql v0.1.4/go.mod h1:8DsSdjz3F+APR+0z0WkU1aRorQCFfRxvqjUUPMbF3fE=
|
||||||
contrib.go.opencensus.io/resource v0.1.1/go.mod h1:F361eGI91LCmW1I/Saf+rX0+OFcigGlFvXwEGEnkRLA=
|
contrib.go.opencensus.io/resource v0.1.1/go.mod h1:F361eGI91LCmW1I/Saf+rX0+OFcigGlFvXwEGEnkRLA=
|
||||||
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
|
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
|
||||||
git.ohea.xyz/cursorius/pipeline-api/go/api/v2 v2.0.0-20230109075652-ead0aeff2eb9 h1:8p7Kw3B7dbi2zdgG+Me9ETRWrJzoNVjcase4YqXfGbs=
|
git.ohea.xyz/cursorius/pipeline-api/go/api/v2 v2.0.0-20230405234139-34d8875b72f4 h1:kKQQEg1nmWnqiNOqtUHteEuacyfy0NdxyDj6HPjbA2c=
|
||||||
git.ohea.xyz/cursorius/pipeline-api/go/api/v2 v2.0.0-20230109075652-ead0aeff2eb9/go.mod h1:D7GGcFIH421mo6KuRaXXXmlXPwWeEsemTZG/BOZA/4o=
|
git.ohea.xyz/cursorius/pipeline-api/go/api/v2 v2.0.0-20230405234139-34d8875b72f4/go.mod h1:D7GGcFIH421mo6KuRaXXXmlXPwWeEsemTZG/BOZA/4o=
|
||||||
git.ohea.xyz/cursorius/runner-api/go/api/v2 v2.0.0-20230109074922-e20285fe6cf2 h1:G1XQEqhj1LZPQbH7avzvT7QL9Wfbb4CXMm0nLL39eDc=
|
git.ohea.xyz/cursorius/runner-api/go/api/v2 v2.0.0-20230109074922-e20285fe6cf2 h1:G1XQEqhj1LZPQbH7avzvT7QL9Wfbb4CXMm0nLL39eDc=
|
||||||
git.ohea.xyz/cursorius/runner-api/go/api/v2 v2.0.0-20230109074922-e20285fe6cf2/go.mod h1:F9y5Ck4Wchsaj5amSX2eDRUlQ/iYP1VNLFduvjNwmLc=
|
git.ohea.xyz/cursorius/runner-api/go/api/v2 v2.0.0-20230109074922-e20285fe6cf2/go.mod h1:F9y5Ck4Wchsaj5amSX2eDRUlQ/iYP1VNLFduvjNwmLc=
|
||||||
git.ohea.xyz/cursorius/webhooks/v6 v6.0.2-0.20221224221147-a2bdbf1756ed h1:gsK15m4Npow74+R6OfZKwwAg1sl7QWQCRXOeE2QLUco=
|
git.ohea.xyz/cursorius/webhooks/v6 v6.0.2-0.20221224221147-a2bdbf1756ed h1:gsK15m4Npow74+R6OfZKwwAg1sl7QWQCRXOeE2QLUco=
|
||||||
@@ -2129,8 +2129,9 @@ google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp0
|
|||||||
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
|
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
|
||||||
google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
|
google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
|
||||||
google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
|
google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
|
||||||
google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w=
|
|
||||||
google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
|
google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
|
||||||
|
google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=
|
||||||
|
google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
|
||||||
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
|
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
|
||||||
gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc/go.mod h1:m7x9LTH6d71AHyAX77c9yqWCCa3UKHcVEj9y7hAtKDk=
|
gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc/go.mod h1:m7x9LTH6d71AHyAX77c9yqWCCa3UKHcVEj9y7hAtKDk=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"sync"
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
apiv2 "git.ohea.xyz/cursorius/pipeline-api/go/api/v2"
|
apiv2 "git.ohea.xyz/cursorius/pipeline-api/go/api/v2"
|
||||||
"git.ohea.xyz/cursorius/pipeline-api/go/api/v2/apiv2connect"
|
"git.ohea.xyz/cursorius/pipeline-api/go/api/v2/apiv2connect"
|
||||||
@@ -60,17 +61,52 @@ func (s *ApiServer) GetRunner(
|
|||||||
req *connect.Request[apiv2.GetRunnerRequest],
|
req *connect.Request[apiv2.GetRunnerRequest],
|
||||||
) (*connect.Response[apiv2.GetRunnerResponse], error) {
|
) (*connect.Response[apiv2.GetRunnerResponse], error) {
|
||||||
|
|
||||||
|
var response runnermanager.RunnerAllocationResponse
|
||||||
|
var timeoutCtx *context.Context
|
||||||
|
var retryInterval int64 = 0
|
||||||
|
|
||||||
respChan := make(chan runnermanager.RunnerAllocationResponse)
|
respChan := make(chan runnermanager.RunnerAllocationResponse)
|
||||||
|
|
||||||
|
tagsStr := util.FormatTags(req.Msg.Tags)
|
||||||
|
|
||||||
|
if req.Msg.Options != nil {
|
||||||
|
if req.Msg.Options.Timeout != 0 {
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(req.Msg.Options.Timeout)*time.Second)
|
||||||
|
timeoutCtx = &ctx
|
||||||
|
defer cancel()
|
||||||
|
}
|
||||||
|
|
||||||
|
retryInterval = req.Msg.Options.RetryInterval
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
s.allocationCh <- runnermanager.RunnerAllocationRequest{
|
s.allocationCh <- runnermanager.RunnerAllocationRequest{
|
||||||
Tags: req.Msg.Tags,
|
Tags: req.Msg.Tags,
|
||||||
RespChan: respChan,
|
RespChan: respChan,
|
||||||
}
|
}
|
||||||
|
|
||||||
tagsStr := util.FormatTags(req.Msg.Tags)
|
response = <-respChan
|
||||||
|
if response.Err == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
response := <-respChan
|
log.Infof("Could not get runner with tags \"%v\": %v", tagsStr, response.Err)
|
||||||
if response.Err != nil {
|
|
||||||
log.Errorf("Could not get runner with tags \"%v\": %v", tagsStr, response.Err)
|
// If no timeout is specified, skip after one attempt
|
||||||
|
if timeoutCtx == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// If timeout is expired, stop trying to allocate runner
|
||||||
|
if (*timeoutCtx).Err() != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Infof("Sleeping for %v seconds before retry...", retryInterval)
|
||||||
|
time.Sleep(time.Duration(retryInterval) * time.Second)
|
||||||
|
}
|
||||||
|
|
||||||
|
if response.Runner == nil {
|
||||||
return nil, connect.NewError(connect.CodeNotFound, fmt.Errorf("Could not get runner"))
|
return nil, connect.NewError(connect.CodeNotFound, fmt.Errorf("Could not get runner"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -97,6 +97,8 @@ func (r *Runner) sendProtoStruct(p protoreflect.ProtoMessage) error {
|
|||||||
|
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
|
|
||||||
|
log.Debugf("r.conn: %p", r.conn)
|
||||||
|
|
||||||
if err := r.conn.Write(ctx, websocket.MessageBinary, protoOut); err != nil {
|
if err := r.conn.Write(ctx, websocket.MessageBinary, protoOut); err != nil {
|
||||||
return fmt.Errorf("Could not send proto to websocket: %w", err)
|
return fmt.Errorf("Could not send proto to websocket: %w", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,46 +19,6 @@ import (
|
|||||||
|
|
||||||
var log = logging.MustGetLogger("cursorius-server")
|
var log = logging.MustGetLogger("cursorius-server")
|
||||||
|
|
||||||
type RunnerManagerChans struct {
|
|
||||||
Allocation chan RunnerAllocationRequest
|
|
||||||
Release chan RunnerReleaseRequest
|
|
||||||
Registration chan RunnerRegistrationRequest
|
|
||||||
}
|
|
||||||
|
|
||||||
type runnerManager struct {
|
|
||||||
chans RunnerManagerChans
|
|
||||||
connectedRunners []Runner
|
|
||||||
numConnectedRunners uint64
|
|
||||||
configuredRunners map[string]config.Runner
|
|
||||||
db database.Database
|
|
||||||
}
|
|
||||||
|
|
||||||
type RunnerAllocationRequest struct {
|
|
||||||
Tags []string
|
|
||||||
RespChan chan RunnerAllocationResponse
|
|
||||||
}
|
|
||||||
|
|
||||||
type RunnerAllocationResponse struct {
|
|
||||||
Runner *Runner
|
|
||||||
Err error
|
|
||||||
}
|
|
||||||
|
|
||||||
type RunnerReleaseRequest struct {
|
|
||||||
Runner *Runner
|
|
||||||
}
|
|
||||||
|
|
||||||
type RunnerRegistrationRequest struct {
|
|
||||||
Secret string
|
|
||||||
Id string
|
|
||||||
Tags []string
|
|
||||||
conn *websocket.Conn
|
|
||||||
}
|
|
||||||
|
|
||||||
type runnerJob struct {
|
|
||||||
Id string
|
|
||||||
URL string
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *runnerManager) processRunnerAllocation(req RunnerAllocationRequest) {
|
func (r *runnerManager) processRunnerAllocation(req RunnerAllocationRequest) {
|
||||||
tagsStr := util.FormatTags(req.Tags)
|
tagsStr := util.FormatTags(req.Tags)
|
||||||
log.Infof("Got request for runner with tags \"%v\"", tagsStr)
|
log.Infof("Got request for runner with tags \"%v\"", tagsStr)
|
||||||
@@ -92,6 +52,7 @@ runnerIter:
|
|||||||
|
|
||||||
runnersToRemove = append(runnersToRemove, i)
|
runnersToRemove = append(runnersToRemove, i)
|
||||||
foundRunner = true
|
foundRunner = true
|
||||||
|
log.Debugf("Runner %v has requested tags, allocating", runner.id)
|
||||||
req.RespChan <- RunnerAllocationResponse{
|
req.RespChan <- RunnerAllocationResponse{
|
||||||
Runner: &r.connectedRunners[i],
|
Runner: &r.connectedRunners[i],
|
||||||
Err: nil,
|
Err: nil,
|
||||||
@@ -103,7 +64,7 @@ runnerIter:
|
|||||||
// since we iterate, all the indexes will be in accending order
|
// since we iterate, all the indexes will be in accending order
|
||||||
for i, runnerInd := range runnersToRemove {
|
for i, runnerInd := range runnersToRemove {
|
||||||
r.connectedRunners[runnerInd-i] = r.connectedRunners[len(r.connectedRunners)-1]
|
r.connectedRunners[runnerInd-i] = r.connectedRunners[len(r.connectedRunners)-1]
|
||||||
r.connectedRunners = r.connectedRunners[0 : len(r.connectedRunners)-2]
|
r.connectedRunners = r.connectedRunners[0 : len(r.connectedRunners)-1]
|
||||||
}
|
}
|
||||||
|
|
||||||
if foundRunner {
|
if foundRunner {
|
||||||
@@ -115,7 +76,7 @@ runnerIter:
|
|||||||
errorMsg = "no connected runners"
|
errorMsg = "no connected runners"
|
||||||
}
|
}
|
||||||
req.RespChan <- RunnerAllocationResponse{
|
req.RespChan <- RunnerAllocationResponse{
|
||||||
Runner: &Runner{},
|
Runner: nil,
|
||||||
Err: fmt.Errorf("Could not allocate runner: %v", errorMsg),
|
Err: fmt.Errorf("Could not allocate runner: %v", errorMsg),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,49 @@
|
|||||||
|
package runnermanager
|
||||||
|
|
||||||
|
import (
|
||||||
|
"nhooyr.io/websocket"
|
||||||
|
|
||||||
|
"git.ohea.xyz/cursorius/server/config"
|
||||||
|
"git.ohea.xyz/cursorius/server/database"
|
||||||
|
)
|
||||||
|
|
||||||
|
type RunnerManagerChans struct {
|
||||||
|
Allocation chan RunnerAllocationRequest
|
||||||
|
Release chan RunnerReleaseRequest
|
||||||
|
Registration chan RunnerRegistrationRequest
|
||||||
|
}
|
||||||
|
|
||||||
|
type runnerManager struct {
|
||||||
|
chans RunnerManagerChans
|
||||||
|
connectedRunners []Runner
|
||||||
|
numConnectedRunners uint64
|
||||||
|
configuredRunners map[string]config.Runner
|
||||||
|
db database.Database
|
||||||
|
}
|
||||||
|
|
||||||
|
type RunnerAllocationRequest struct {
|
||||||
|
Tags []string
|
||||||
|
RespChan chan RunnerAllocationResponse
|
||||||
|
CancelChan chan string
|
||||||
|
}
|
||||||
|
|
||||||
|
type RunnerAllocationResponse struct {
|
||||||
|
Runner *Runner
|
||||||
|
Err error
|
||||||
|
}
|
||||||
|
|
||||||
|
type RunnerReleaseRequest struct {
|
||||||
|
Runner *Runner
|
||||||
|
}
|
||||||
|
|
||||||
|
type RunnerRegistrationRequest struct {
|
||||||
|
Secret string
|
||||||
|
Id string
|
||||||
|
Tags []string
|
||||||
|
conn *websocket.Conn
|
||||||
|
}
|
||||||
|
|
||||||
|
type runnerJob struct {
|
||||||
|
Id string
|
||||||
|
URL string
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user