diff --git a/aggregator.go b/aggregator.go index 9f8da70ad..318d7b2b8 100644 --- a/aggregator.go +++ b/aggregator.go @@ -81,6 +81,11 @@ func newAggregator(params aggregatorParams) *aggregator { } } +func (a *aggregator) resetState() { + a.done = make(chan struct{}) + a.sema = make(chan struct{}, maxConcurrentAggregationChecks) +} + func (a *aggregator) shutdown() { if a.ga == nil { return diff --git a/heartbeat.go b/heartbeat.go index f42644572..c1ac62b7d 100644 --- a/heartbeat.go +++ b/heartbeat.go @@ -10,6 +10,7 @@ import ( "time" "github.com/google/uuid" + "github.com/hibiken/asynq/internal/base" "github.com/hibiken/asynq/internal/log" "github.com/hibiken/asynq/internal/timeutil" diff --git a/internal/base/base.go b/internal/base/base.go index 505e1ba2a..fbca74f13 100644 --- a/internal/base/base.go +++ b/internal/base/base.go @@ -14,12 +14,13 @@ import ( "sync" "time" - "github.com/hibiken/asynq/internal/errors" - pb "github.com/hibiken/asynq/internal/proto" - "github.com/hibiken/asynq/internal/timeutil" "github.com/redis/go-redis/v9" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/timestamppb" + + "github.com/hibiken/asynq/internal/errors" + pb "github.com/hibiken/asynq/internal/proto" + "github.com/hibiken/asynq/internal/timeutil" ) // Version of asynq library and CLI. @@ -722,4 +723,5 @@ type Broker interface { PublishCancelation(id string) error WriteResult(qname, id string, data []byte) (n int, err error) + SetQueueConcurrency(qname string, concurrency int) } diff --git a/internal/rdb/rdb.go b/internal/rdb/rdb.go index 22df5060e..9ee3d0384 100644 --- a/internal/rdb/rdb.go +++ b/internal/rdb/rdb.go @@ -13,11 +13,12 @@ import ( "time" "github.com/google/uuid" + "github.com/redis/go-redis/v9" + "github.com/spf13/cast" + "github.com/hibiken/asynq/internal/base" "github.com/hibiken/asynq/internal/errors" "github.com/hibiken/asynq/internal/timeutil" - "github.com/redis/go-redis/v9" - "github.com/spf13/cast" ) const statsTTL = 90 * 24 * time.Hour // 90 days @@ -25,19 +26,34 @@ const statsTTL = 90 * 24 * time.Hour // 90 days // LeaseDuration is the duration used to initially create a lease and to extend it thereafter. const LeaseDuration = 30 * time.Second +type Option func(r *RDB) + +func WithQueueConcurrency(queueConcurrency map[string]int) Option { + return func(r *RDB) { + for qname, concurrency := range queueConcurrency { + r.queueConcurrency.Store(qname, concurrency) + } + } +} + // RDB is a client interface to query and mutate task queues. type RDB struct { - client redis.UniversalClient - clock timeutil.Clock - queuesPublished sync.Map + client redis.UniversalClient + clock timeutil.Clock + queuesPublished sync.Map + queueConcurrency sync.Map } // NewRDB returns a new instance of RDB. -func NewRDB(client redis.UniversalClient) *RDB { - return &RDB{ +func NewRDB(client redis.UniversalClient, opts ...Option) *RDB { + r := &RDB{ client: client, clock: timeutil.NewRealClock(), } + for _, opt := range opts { + opt(r) + } + return r } // Close closes the connection with redis server. @@ -217,6 +233,7 @@ func (r *RDB) EnqueueUnique(ctx context.Context, msg *base.TaskMessage, ttl time // -- // ARGV[1] -> initial lease expiration Unix time // ARGV[2] -> task key prefix +// ARGV[3] -> queue concurrency // // Output: // Returns nil if no processable task is found in the given queue. @@ -225,15 +242,20 @@ func (r *RDB) EnqueueUnique(ctx context.Context, msg *base.TaskMessage, ttl time // Note: dequeueCmd checks whether a queue is paused first, before // calling RPOPLPUSH to pop a task from the queue. var dequeueCmd = redis.NewScript(` -if redis.call("EXISTS", KEYS[2]) == 0 then - local id = redis.call("RPOPLPUSH", KEYS[1], KEYS[3]) - if id then - local key = ARGV[2] .. id - redis.call("HSET", key, "state", "active") - redis.call("HDEL", key, "pending_since") - redis.call("ZADD", KEYS[4], ARGV[1], id) - return redis.call("HGET", key, "msg") - end +if redis.call("EXISTS", KEYS[2]) > 0 then + return nil +end +local count = redis.call("ZCARD", KEYS[4]) +if (count >= tonumber(ARGV[3])) then + return nil +end +local id = redis.call("RPOPLPUSH", KEYS[1], KEYS[3]) +if id then + local key = ARGV[2] .. id + redis.call("HSET", key, "state", "active") + redis.call("HDEL", key, "pending_since") + redis.call("ZADD", KEYS[4], ARGV[1], id) + return redis.call("HGET", key, "msg") end return nil`) @@ -251,9 +273,14 @@ func (r *RDB) Dequeue(qnames ...string) (msg *base.TaskMessage, leaseExpirationT base.LeaseKey(qname), } leaseExpirationTime = r.clock.Now().Add(LeaseDuration) + queueConcurrency, ok := r.queueConcurrency.Load(qname) + if !ok || queueConcurrency.(int) <= 0 { + queueConcurrency = math.MaxInt + } argv := []interface{}{ leaseExpirationTime.Unix(), base.TaskKeyPrefix(qname), + queueConcurrency, } res, err := dequeueCmd.Run(context.Background(), r.client, keys, argv...).Result() if err == redis.Nil { @@ -1556,3 +1583,7 @@ func (r *RDB) WriteResult(qname, taskID string, data []byte) (int, error) { } return len(data), nil } + +func (r *RDB) SetQueueConcurrency(qname string, concurrency int) { + r.queueConcurrency.Store(qname, concurrency) +} diff --git a/internal/rdb/rdb_test.go b/internal/rdb/rdb_test.go index 5249a29a6..6c4c4cfcf 100644 --- a/internal/rdb/rdb_test.go +++ b/internal/rdb/rdb_test.go @@ -18,11 +18,12 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "github.com/google/uuid" + "github.com/redis/go-redis/v9" + "github.com/hibiken/asynq/internal/base" "github.com/hibiken/asynq/internal/errors" h "github.com/hibiken/asynq/internal/testutil" "github.com/hibiken/asynq/internal/timeutil" - "github.com/redis/go-redis/v9" ) // variables used for package testing. @@ -384,6 +385,7 @@ func TestDequeue(t *testing.T) { wantPending map[string][]*base.TaskMessage wantActive map[string][]*base.TaskMessage wantLease map[string][]base.Z + queueConcurrency map[string]int }{ { pending: map[string][]*base.TaskMessage{ @@ -494,6 +496,92 @@ func TestDequeue(t *testing.T) { } } +func TestDequeueWithQueueConcurrency(t *testing.T) { + r := setup(t) + defer r.Close() + now := time.Now() + r.SetClock(timeutil.NewSimulatedClock(now)) + const taskNum = 3 + msgs := make([]*base.TaskMessage, 0, taskNum) + for i := 0; i < taskNum; i++ { + msg := &base.TaskMessage{ + ID: uuid.NewString(), + Type: "send_email", + Payload: h.JSON(map[string]interface{}{"subject": "hello!"}), + Queue: "default", + Timeout: 1800, + Deadline: 0, + } + msgs = append(msgs, msg) + } + + tests := []struct { + name string + pending map[string][]*base.TaskMessage + qnames []string // list of queues to query + queueConcurrency map[string]int + wantMsgs []*base.TaskMessage + }{ + { + name: "without queue concurrency control", + pending: map[string][]*base.TaskMessage{ + "default": msgs, + }, + qnames: []string{"default"}, + wantMsgs: msgs, + }, + { + name: "with queue concurrency control", + pending: map[string][]*base.TaskMessage{ + "default": msgs, + }, + qnames: []string{"default"}, + queueConcurrency: map[string]int{"default": 2}, + wantMsgs: msgs[:2], + }, + { + name: "with queue concurrency zero", + pending: map[string][]*base.TaskMessage{ + "default": msgs, + }, + qnames: []string{"default"}, + queueConcurrency: map[string]int{"default": 0}, + wantMsgs: msgs, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + h.FlushDB(t, r.client) // clean up db before each test case + h.SeedAllPendingQueues(t, r.client, tc.pending) + r.queueConcurrency.Range(func(key, value interface{}) bool { + r.queueConcurrency.Delete(key) + return true + }) + for queue, n := range tc.queueConcurrency { + r.queueConcurrency.Store(queue, n) + } + + gotMsgs := make([]*base.TaskMessage, 0, len(msgs)) + for i := 0; i < len(msgs); i++ { + msg, _, err := r.Dequeue(tc.qnames...) + if errors.Is(err, errors.ErrNoProcessableTask) { + break + } + if err != nil { + t.Errorf("(*RDB).Dequeue(%v) returned error %v", tc.qnames, err) + continue + } + gotMsgs = append(gotMsgs, msg) + } + if diff := cmp.Diff(tc.wantMsgs, gotMsgs, h.SortZSetEntryOpt); diff != "" { + t.Errorf("(*RDB).Dequeue(%v) returned message %v; want %v", + tc.qnames, gotMsgs, tc.wantMsgs) + } + }) + } +} + func TestDequeueError(t *testing.T) { r := setup(t) defer r.Close() diff --git a/internal/testbroker/testbroker.go b/internal/testbroker/testbroker.go index ffab6fe80..510bf4665 100644 --- a/internal/testbroker/testbroker.go +++ b/internal/testbroker/testbroker.go @@ -11,8 +11,9 @@ import ( "sync" "time" - "github.com/hibiken/asynq/internal/base" "github.com/redis/go-redis/v9" + + "github.com/hibiken/asynq/internal/base" ) var errRedisDown = errors.New("testutil: redis is down") @@ -297,3 +298,7 @@ func (tb *TestBroker) ReclaimStaleAggregationSets(qname string) error { } return tb.real.ReclaimStaleAggregationSets(qname) } + +func (tb *TestBroker) SetQueueConcurrency(qname string, concurrency int) { + tb.real.SetQueueConcurrency(qname, concurrency) +} diff --git a/processor.go b/processor.go index fa810d67d..fbd7b058b 100644 --- a/processor.go +++ b/processor.go @@ -16,12 +16,13 @@ import ( "sync" "time" + "golang.org/x/time/rate" + "github.com/hibiken/asynq/internal/base" asynqcontext "github.com/hibiken/asynq/internal/context" "github.com/hibiken/asynq/internal/errors" "github.com/hibiken/asynq/internal/log" "github.com/hibiken/asynq/internal/timeutil" - "golang.org/x/time/rate" ) type processor struct { @@ -57,7 +58,7 @@ type processor struct { // channel to communicate back to the long running "processor" goroutine. // once is used to send value to the channel only once. done chan struct{} - once sync.Once + once *sync.Once // quit channel is closed when the shutdown of the "processor" goroutine starts. quit chan struct{} @@ -112,6 +113,7 @@ func newProcessor(params processorParams) *processor { errLogLimiter: rate.NewLimiter(rate.Every(3*time.Second), 1), sema: make(chan struct{}, params.concurrency), done: make(chan struct{}), + once: &sync.Once{}, quit: make(chan struct{}), abort: make(chan struct{}), errHandler: params.errHandler, @@ -139,7 +141,9 @@ func (p *processor) stop() { func (p *processor) shutdown() { p.stop() - time.AfterFunc(p.shutdownTimeout, func() { close(p.abort) }) + go func(abort chan struct{}) { + time.AfterFunc(p.shutdownTimeout, func() { close(abort) }) + }(p.abort) p.logger.Info("Waiting for all workers to finish...") // block until all workers have released the token @@ -149,6 +153,14 @@ func (p *processor) shutdown() { p.logger.Info("All workers have finished") } +func (p *processor) resetState() { + p.sema = make(chan struct{}, cap(p.sema)) + p.done = make(chan struct{}) + p.quit = make(chan struct{}) + p.abort = make(chan struct{}) + p.once = &sync.Once{} +} + func (p *processor) start(wg *sync.WaitGroup) { wg.Add(1) go func() { diff --git a/server.go b/server.go index 0cc4f3874..db1efa2ba 100644 --- a/server.go +++ b/server.go @@ -15,10 +15,11 @@ import ( "sync" "time" + "github.com/redis/go-redis/v9" + "github.com/hibiken/asynq/internal/base" "github.com/hibiken/asynq/internal/log" "github.com/hibiken/asynq/internal/rdb" - "github.com/redis/go-redis/v9" ) // Server is responsible for task processing and task lifecycle management. @@ -43,6 +44,10 @@ type Server struct { state *serverState + mu sync.RWMutex + queues map[string]int + strictPriority bool + // wait group to wait for all goroutines to finish. wg sync.WaitGroup forwarder *forwarder @@ -253,6 +258,11 @@ type Config struct { // If unset or zero, default batch size of 100 is used. // Make sure to not put a big number as the batch size to prevent a long-running script. JanitorBatchSize int + + // Maximum number of concurrent tasks of a queue. + // + // If set to a zero or not set, NewServer will not limit concurrency of the queue. + QueueConcurrency map[string]int } // GroupAggregator aggregates a group of tasks into one before the tasks are passed to the Handler. @@ -475,7 +485,9 @@ func NewServerFromRedisClient(c redis.UniversalClient, cfg Config) *Server { } } if len(queues) == 0 { - queues = defaultQueueConfig + for qname, p := range defaultQueueConfig { + queues[qname] = p + } } var qnames []string for q := range queues { @@ -504,7 +516,7 @@ func NewServerFromRedisClient(c redis.UniversalClient, cfg Config) *Server { } logger.SetLevel(toInternalLogLevel(loglevel)) - rdb := rdb.NewRDB(c) + rdb := rdb.NewRDB(c, rdb.WithQueueConcurrency(cfg.QueueConcurrency)) starting := make(chan *workerInfo) finished := make(chan *base.TaskMessage) syncCh := make(chan *syncRequest) @@ -604,6 +616,8 @@ func NewServerFromRedisClient(c redis.UniversalClient, cfg Config) *Server { groupAggregator: cfg.GroupAggregator, }) return &Server{ + queues: queues, + strictPriority: cfg.StrictPriority, logger: logger, broker: rdb, sharedConnection: true, @@ -786,3 +800,79 @@ func (srv *Server) Ping() error { return srv.broker.Ping() } + +func (srv *Server) AddQueue(qname string, priority, concurrency int) { + srv.mu.Lock() + defer srv.mu.Unlock() + + if _, ok := srv.queues[qname]; ok { + srv.logger.Warnf("queue %s already exists, skipping", qname) + return + } + + srv.state.mu.Lock() + state := srv.state.value + if state == srvStateNew || state == srvStateClosed { + srv.queues[qname] = priority + srv.state.mu.Unlock() + return + } + srv.state.mu.Unlock() + + srv.logger.Info("restart server...") + srv.forwarder.shutdown() + srv.processor.shutdown() + srv.recoverer.shutdown() + srv.syncer.shutdown() + srv.subscriber.shutdown() + srv.janitor.shutdown() + srv.aggregator.shutdown() + srv.healthchecker.shutdown() + srv.heartbeater.shutdown() + srv.wg.Wait() + + srv.queues[qname] = priority + + qnames := make([]string, 0, len(srv.queues)) + for q := range srv.queues { + qnames = append(qnames, q) + } + srv.broker.SetQueueConcurrency(qname, concurrency) + srv.heartbeater.queues = srv.queues + srv.recoverer.queues = qnames + srv.forwarder.queues = qnames + srv.processor.resetState() + queues := normalizeQueues(srv.queues) + orderedQueues := []string(nil) + if srv.strictPriority { + orderedQueues = sortByPriority(queues) + } + srv.processor.queueConfig = srv.queues + srv.processor.orderedQueues = orderedQueues + srv.janitor.queues = qnames + srv.aggregator.resetState() + srv.aggregator.queues = qnames + + srv.heartbeater.start(&srv.wg) + srv.healthchecker.start(&srv.wg) + srv.subscriber.start(&srv.wg) + srv.syncer.start(&srv.wg) + srv.recoverer.start(&srv.wg) + srv.forwarder.start(&srv.wg) + srv.processor.start(&srv.wg) + srv.janitor.start(&srv.wg) + srv.aggregator.start(&srv.wg) + + srv.logger.Info("server restarted") +} + +func (srv *Server) HasQueue(qname string) bool { + srv.mu.RLock() + defer srv.mu.RUnlock() + _, ok := srv.queues[qname] + return ok +} + +func (srv *Server) SetQueueConcurrency(queue string, concurrency int) { + srv.broker.SetQueueConcurrency(queue, concurrency) +} diff --git a/server_test.go b/server_test.go index 967f5196f..437e408d4 100644 --- a/server_test.go +++ b/server_test.go @@ -11,6 +11,7 @@ import ( "testing" "time" + "github.com/hibiken/asynq/internal/base" "github.com/hibiken/asynq/internal/rdb" "github.com/hibiken/asynq/internal/testbroker" "github.com/hibiken/asynq/internal/testutil" @@ -80,6 +81,198 @@ func TestServerFromRedisClient(t *testing.T) { } } +func TestServerWithQueueConcurrency(t *testing.T) { + // https://github.com/go-redis/redis/issues/1029 + ignoreOpt := goleak.IgnoreTopFunction("github.com/redis/go-redis/v9/internal/pool.(*ConnPool).reaper") + defer goleak.VerifyNone(t, ignoreOpt) + + redisConnOpt := getRedisConnOpt(t) + r, ok := redisConnOpt.MakeRedisClient().(redis.UniversalClient) + if !ok { + t.Fatalf("asynq: unsupported RedisConnOpt type %T", r) + } + + const taskNum = 8 + const serverNum = 2 + tests := []struct { + name string + concurrency int + queueConcurrency int + wantActiveNum int + }{ + { + name: "based on client concurrency control", + concurrency: 2, + queueConcurrency: 6, + wantActiveNum: 2 * serverNum, + }, + { + name: "no queue concurrency control", + concurrency: 2, + queueConcurrency: 0, + wantActiveNum: 2 * serverNum, + }, + { + name: "based on queue concurrency control", + concurrency: 6, + queueConcurrency: 2, + wantActiveNum: 2, + }, + } + + // no-op handler + handle := func(ctx context.Context, task *Task) error { + time.Sleep(time.Second * 2) + return nil + } + + var servers [serverNum]*Server + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + var err error + testutil.FlushDB(t, r) + c := NewClient(redisConnOpt) + defer c.Close() + for i := 0; i < taskNum; i++ { + _, err = c.Enqueue(NewTask("send_email", + testutil.JSON(map[string]interface{}{"recipient_id": i + 123}))) + if err != nil { + t.Fatalf("could not enqueue a task: %v", err) + } + } + + for i := 0; i < serverNum; i++ { + srv := NewServer(redisConnOpt, Config{ + Concurrency: tc.concurrency, + LogLevel: testLogLevel, + QueueConcurrency: map[string]int{base.DefaultQueueName: tc.queueConcurrency}, + }) + err = srv.Start(HandlerFunc(handle)) + if err != nil { + t.Fatal(err) + } + servers[i] = srv + } + defer func() { + for _, srv := range servers { + srv.Shutdown() + } + }() + + time.Sleep(time.Second) + inspector := NewInspector(redisConnOpt) + tasks, err := inspector.ListActiveTasks(base.DefaultQueueName) + if err != nil { + t.Fatalf("could not list active tasks: %v", err) + } + if len(tasks) != tc.wantActiveNum { + t.Errorf("default queue has %d active tasks, want %d", len(tasks), tc.wantActiveNum) + } + }) + } +} + +func TestServerWithDynamicQueue(t *testing.T) { + // https://github.com/go-redis/redis/issues/1029 + ignoreOpt := goleak.IgnoreTopFunction("github.com/redis/go-redis/v9/internal/pool.(*ConnPool).reaper") + defer goleak.VerifyNone(t, ignoreOpt) + + redisConnOpt := getRedisConnOpt(t) + r, ok := redisConnOpt.MakeRedisClient().(redis.UniversalClient) + if !ok { + t.Fatalf("asynq: unsupported RedisConnOpt type %T", r) + } + + const taskNum = 8 + const serverNum = 2 + tests := []struct { + name string + concurrency int + queueConcurrency int + wantActiveNum int + }{ + { + name: "based on client concurrency control", + concurrency: 2, + queueConcurrency: 6, + wantActiveNum: 2 * serverNum, + }, + { + name: "no queue concurrency control", + concurrency: 2, + queueConcurrency: 0, + wantActiveNum: 2 * serverNum, + }, + { + name: "based on queue concurrency control", + concurrency: 6, + queueConcurrency: 2, + wantActiveNum: 2 * serverNum, + }, + } + + // no-op handler + handle := func(ctx context.Context, task *Task) error { + time.Sleep(time.Second * 2) + return nil + } + + var DynamicQueueNameFmt = "dynamic:%d:%d" + var servers [serverNum]*Server + for tcn, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + var err error + testutil.FlushDB(t, r) + c := NewClient(redisConnOpt) + defer c.Close() + for i := 0; i < taskNum; i++ { + _, err = c.Enqueue(NewTask("send_email", + testutil.JSON(map[string]interface{}{"recipient_id": i + 123})), + Queue(fmt.Sprintf(DynamicQueueNameFmt, tcn, i%2))) + if err != nil { + t.Fatalf("could not enqueue a task: %v", err) + } + } + + for i := 0; i < serverNum; i++ { + srv := NewServer(redisConnOpt, Config{ + Concurrency: tc.concurrency, + LogLevel: testLogLevel, + QueueConcurrency: map[string]int{base.DefaultQueueName: tc.queueConcurrency}, + }) + err = srv.Start(HandlerFunc(handle)) + if err != nil { + t.Fatal(err) + } + srv.AddQueue(fmt.Sprintf(DynamicQueueNameFmt, tcn, i), 1, tc.queueConcurrency) + servers[i] = srv + } + defer func() { + for _, srv := range servers { + srv.Shutdown() + } + }() + + time.Sleep(time.Second) + inspector := NewInspector(redisConnOpt) + + var tasks []*TaskInfo + + for i := range servers { + qtasks, err := inspector.ListActiveTasks(fmt.Sprintf(DynamicQueueNameFmt, tcn, i)) + if err != nil { + t.Fatalf("could not list active tasks: %v", err) + } + tasks = append(tasks, qtasks...) + } + + if len(tasks) != tc.wantActiveNum { + t.Errorf("dynamic queue has %d active tasks, want %d", len(tasks), tc.wantActiveNum) + } + }) + } +} + func TestServerRun(t *testing.T) { // https://github.com/go-redis/redis/issues/1029 ignoreOpt := goleak.IgnoreTopFunction("github.com/redis/go-redis/v9/internal/pool.(*ConnPool).reaper") diff --git a/subscriber.go b/subscriber.go index 8fc4eac9b..d4d0d0f76 100644 --- a/subscriber.go +++ b/subscriber.go @@ -80,6 +80,9 @@ func (s *subscriber) start(wg *sync.WaitGroup) { s.logger.Debug("Subscriber done") return case msg := <-cancelCh: + if msg == nil { + return + } cancel, ok := s.cancelations.Get(msg.Payload) if ok { cancel()