Add timeout and retry interval to GetRunner api

This commit is contained in:
2023-04-05 18:38:06 -06:00
parent a8e9a68f0e
commit ed7df18f83
6 changed files with 103 additions and 54 deletions
+2 -2
View File
@@ -3,7 +3,7 @@ module git.ohea.xyz/cursorius/server
go 1.19 go 1.19
require ( require (
git.ohea.xyz/cursorius/pipeline-api/go/api/v2 v2.0.0-20230109075652-ead0aeff2eb9 git.ohea.xyz/cursorius/pipeline-api/go/api/v2 v2.0.0-20230405234139-34d8875b72f4
git.ohea.xyz/cursorius/runner-api/go/api/v2 v2.0.0-20230109074922-e20285fe6cf2 git.ohea.xyz/cursorius/runner-api/go/api/v2 v2.0.0-20230109074922-e20285fe6cf2
git.ohea.xyz/golang/config v0.0.0-20220915224621-b9debd233173 git.ohea.xyz/golang/config v0.0.0-20220915224621-b9debd233173
github.com/bufbuild/connect-go v1.4.1 github.com/bufbuild/connect-go v1.4.1
@@ -17,7 +17,7 @@ require (
github.com/jhoonb/archivex v0.0.0-20201016144719-6a343cdae81d github.com/jhoonb/archivex v0.0.0-20201016144719-6a343cdae81d
github.com/op/go-logging v0.0.0-20160315200505-970db520ece7 github.com/op/go-logging v0.0.0-20160315200505-970db520ece7
golang.org/x/net v0.2.0 golang.org/x/net v0.2.0
google.golang.org/protobuf v1.28.1 google.golang.org/protobuf v1.30.0
nhooyr.io/websocket v1.8.7 nhooyr.io/websocket v1.8.7
) )
+4 -3
View File
@@ -78,8 +78,8 @@ contrib.go.opencensus.io/exporter/stackdriver v0.13.5/go.mod h1:aXENhDJ1Y4lIg4EU
contrib.go.opencensus.io/integrations/ocsql v0.1.4/go.mod h1:8DsSdjz3F+APR+0z0WkU1aRorQCFfRxvqjUUPMbF3fE= contrib.go.opencensus.io/integrations/ocsql v0.1.4/go.mod h1:8DsSdjz3F+APR+0z0WkU1aRorQCFfRxvqjUUPMbF3fE=
contrib.go.opencensus.io/resource v0.1.1/go.mod h1:F361eGI91LCmW1I/Saf+rX0+OFcigGlFvXwEGEnkRLA= contrib.go.opencensus.io/resource v0.1.1/go.mod h1:F361eGI91LCmW1I/Saf+rX0+OFcigGlFvXwEGEnkRLA=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
git.ohea.xyz/cursorius/pipeline-api/go/api/v2 v2.0.0-20230109075652-ead0aeff2eb9 h1:8p7Kw3B7dbi2zdgG+Me9ETRWrJzoNVjcase4YqXfGbs= git.ohea.xyz/cursorius/pipeline-api/go/api/v2 v2.0.0-20230405234139-34d8875b72f4 h1:kKQQEg1nmWnqiNOqtUHteEuacyfy0NdxyDj6HPjbA2c=
git.ohea.xyz/cursorius/pipeline-api/go/api/v2 v2.0.0-20230109075652-ead0aeff2eb9/go.mod h1:D7GGcFIH421mo6KuRaXXXmlXPwWeEsemTZG/BOZA/4o= git.ohea.xyz/cursorius/pipeline-api/go/api/v2 v2.0.0-20230405234139-34d8875b72f4/go.mod h1:D7GGcFIH421mo6KuRaXXXmlXPwWeEsemTZG/BOZA/4o=
git.ohea.xyz/cursorius/runner-api/go/api/v2 v2.0.0-20230109074922-e20285fe6cf2 h1:G1XQEqhj1LZPQbH7avzvT7QL9Wfbb4CXMm0nLL39eDc= git.ohea.xyz/cursorius/runner-api/go/api/v2 v2.0.0-20230109074922-e20285fe6cf2 h1:G1XQEqhj1LZPQbH7avzvT7QL9Wfbb4CXMm0nLL39eDc=
git.ohea.xyz/cursorius/runner-api/go/api/v2 v2.0.0-20230109074922-e20285fe6cf2/go.mod h1:F9y5Ck4Wchsaj5amSX2eDRUlQ/iYP1VNLFduvjNwmLc= git.ohea.xyz/cursorius/runner-api/go/api/v2 v2.0.0-20230109074922-e20285fe6cf2/go.mod h1:F9y5Ck4Wchsaj5amSX2eDRUlQ/iYP1VNLFduvjNwmLc=
git.ohea.xyz/cursorius/webhooks/v6 v6.0.2-0.20221224221147-a2bdbf1756ed h1:gsK15m4Npow74+R6OfZKwwAg1sl7QWQCRXOeE2QLUco= git.ohea.xyz/cursorius/webhooks/v6 v6.0.2-0.20221224221147-a2bdbf1756ed h1:gsK15m4Npow74+R6OfZKwwAg1sl7QWQCRXOeE2QLUco=
@@ -2129,8 +2129,9 @@ google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp0
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w=
google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=
google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc/go.mod h1:m7x9LTH6d71AHyAX77c9yqWCCa3UKHcVEj9y7hAtKDk= gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc/go.mod h1:m7x9LTH6d71AHyAX77c9yqWCCa3UKHcVEj9y7hAtKDk=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+43 -7
View File
@@ -5,6 +5,7 @@ import (
"fmt" "fmt"
"net/http" "net/http"
"sync" "sync"
"time"
apiv2 "git.ohea.xyz/cursorius/pipeline-api/go/api/v2" apiv2 "git.ohea.xyz/cursorius/pipeline-api/go/api/v2"
"git.ohea.xyz/cursorius/pipeline-api/go/api/v2/apiv2connect" "git.ohea.xyz/cursorius/pipeline-api/go/api/v2/apiv2connect"
@@ -60,17 +61,52 @@ func (s *ApiServer) GetRunner(
req *connect.Request[apiv2.GetRunnerRequest], req *connect.Request[apiv2.GetRunnerRequest],
) (*connect.Response[apiv2.GetRunnerResponse], error) { ) (*connect.Response[apiv2.GetRunnerResponse], error) {
var response runnermanager.RunnerAllocationResponse
var timeoutCtx *context.Context
var retryInterval int64 = 0
respChan := make(chan runnermanager.RunnerAllocationResponse) respChan := make(chan runnermanager.RunnerAllocationResponse)
s.allocationCh <- runnermanager.RunnerAllocationRequest{
Tags: req.Msg.Tags,
RespChan: respChan,
}
tagsStr := util.FormatTags(req.Msg.Tags) tagsStr := util.FormatTags(req.Msg.Tags)
response := <-respChan if req.Msg.Options != nil {
if response.Err != nil { if req.Msg.Options.Timeout != 0 {
log.Errorf("Could not get runner with tags \"%v\": %v", tagsStr, response.Err) ctx, cancel := context.WithTimeout(context.Background(), time.Duration(req.Msg.Options.Timeout)*time.Second)
timeoutCtx = &ctx
defer cancel()
}
retryInterval = req.Msg.Options.RetryInterval
}
for {
s.allocationCh <- runnermanager.RunnerAllocationRequest{
Tags: req.Msg.Tags,
RespChan: respChan,
}
response = <-respChan
if response.Err == nil {
break
}
log.Infof("Could not get runner with tags \"%v\": %v", tagsStr, response.Err)
// If no timeout is specified, skip after one attempt
if timeoutCtx == nil {
break
}
// If timeout is expired, stop trying to allocate runner
if (*timeoutCtx).Err() != nil {
break
}
log.Infof("Sleeping for %v seconds before retry...", retryInterval)
time.Sleep(time.Duration(retryInterval) * time.Second)
}
if response.Runner == nil {
return nil, connect.NewError(connect.CodeNotFound, fmt.Errorf("Could not get runner")) return nil, connect.NewError(connect.CodeNotFound, fmt.Errorf("Could not get runner"))
} }
+2
View File
@@ -97,6 +97,8 @@ func (r *Runner) sendProtoStruct(p protoreflect.ProtoMessage) error {
ctx := context.Background() ctx := context.Background()
log.Debugf("r.conn: %p", r.conn)
if err := r.conn.Write(ctx, websocket.MessageBinary, protoOut); err != nil { if err := r.conn.Write(ctx, websocket.MessageBinary, protoOut); err != nil {
return fmt.Errorf("Could not send proto to websocket: %w", err) return fmt.Errorf("Could not send proto to websocket: %w", err)
} }
+3 -42
View File
@@ -19,46 +19,6 @@ import (
var log = logging.MustGetLogger("cursorius-server") var log = logging.MustGetLogger("cursorius-server")
type RunnerManagerChans struct {
Allocation chan RunnerAllocationRequest
Release chan RunnerReleaseRequest
Registration chan RunnerRegistrationRequest
}
type runnerManager struct {
chans RunnerManagerChans
connectedRunners []Runner
numConnectedRunners uint64
configuredRunners map[string]config.Runner
db database.Database
}
type RunnerAllocationRequest struct {
Tags []string
RespChan chan RunnerAllocationResponse
}
type RunnerAllocationResponse struct {
Runner *Runner
Err error
}
type RunnerReleaseRequest struct {
Runner *Runner
}
type RunnerRegistrationRequest struct {
Secret string
Id string
Tags []string
conn *websocket.Conn
}
type runnerJob struct {
Id string
URL string
}
func (r *runnerManager) processRunnerAllocation(req RunnerAllocationRequest) { func (r *runnerManager) processRunnerAllocation(req RunnerAllocationRequest) {
tagsStr := util.FormatTags(req.Tags) tagsStr := util.FormatTags(req.Tags)
log.Infof("Got request for runner with tags \"%v\"", tagsStr) log.Infof("Got request for runner with tags \"%v\"", tagsStr)
@@ -92,6 +52,7 @@ runnerIter:
runnersToRemove = append(runnersToRemove, i) runnersToRemove = append(runnersToRemove, i)
foundRunner = true foundRunner = true
log.Debugf("Runner %v has requested tags, allocating", runner.id)
req.RespChan <- RunnerAllocationResponse{ req.RespChan <- RunnerAllocationResponse{
Runner: &r.connectedRunners[i], Runner: &r.connectedRunners[i],
Err: nil, Err: nil,
@@ -103,7 +64,7 @@ runnerIter:
// since we iterate, all the indexes will be in accending order // since we iterate, all the indexes will be in accending order
for i, runnerInd := range runnersToRemove { for i, runnerInd := range runnersToRemove {
r.connectedRunners[runnerInd-i] = r.connectedRunners[len(r.connectedRunners)-1] r.connectedRunners[runnerInd-i] = r.connectedRunners[len(r.connectedRunners)-1]
r.connectedRunners = r.connectedRunners[0 : len(r.connectedRunners)-2] r.connectedRunners = r.connectedRunners[0 : len(r.connectedRunners)-1]
} }
if foundRunner { if foundRunner {
@@ -115,7 +76,7 @@ runnerIter:
errorMsg = "no connected runners" errorMsg = "no connected runners"
} }
req.RespChan <- RunnerAllocationResponse{ req.RespChan <- RunnerAllocationResponse{
Runner: &Runner{}, Runner: nil,
Err: fmt.Errorf("Could not allocate runner: %v", errorMsg), Err: fmt.Errorf("Could not allocate runner: %v", errorMsg),
} }
+49
View File
@@ -0,0 +1,49 @@
package runnermanager
import (
"nhooyr.io/websocket"
"git.ohea.xyz/cursorius/server/config"
"git.ohea.xyz/cursorius/server/database"
)
type RunnerManagerChans struct {
Allocation chan RunnerAllocationRequest
Release chan RunnerReleaseRequest
Registration chan RunnerRegistrationRequest
}
type runnerManager struct {
chans RunnerManagerChans
connectedRunners []Runner
numConnectedRunners uint64
configuredRunners map[string]config.Runner
db database.Database
}
type RunnerAllocationRequest struct {
Tags []string
RespChan chan RunnerAllocationResponse
CancelChan chan string
}
type RunnerAllocationResponse struct {
Runner *Runner
Err error
}
type RunnerReleaseRequest struct {
Runner *Runner
}
type RunnerRegistrationRequest struct {
Secret string
Id string
Tags []string
conn *websocket.Conn
}
type runnerJob struct {
Id string
URL string
}