mirror of
https://github.com/go-gitea/gitea.git
synced 2026-06-28 16:46:17 +02:00
## Summary Fixes two related bugs that cause jobs in large workflows (50+ parallel jobs) to never get a runner assigned even though runners are free. ### Bug 1 — Concurrent runner race When N runners all poll `FetchTask` with a stale `tasksVersion` simultaneously, they all query the same waiting job list sorted by `(updated, id)` and all pick **job #1**. Only one wins the `UPDATE WHERE task_id=0` optimistic lock; the rest return empty-handed but still receive `latestVersion` in the response. They then consider themselves "up to date" and skip `PickTask` on every subsequent poll, leaving jobs #2–50 permanently unassigned. **Fix:** `CreateTaskForRunner` now iterates through all matching waiting jobs. When the optimistic lock fails on job #1, it immediately tries job #2, then #3, etc., each in its own independent transaction so a failed attempt rolls back cleanly before the next candidate is tried. `PickTask` no longer wraps this call in an outer `db.WithTx` (which caused `halfCommitter` entanglement that prevented per-attempt rollbacks). ### Bug 2 — Idle runner doesn't re-check after finishing a task `tasks_version` only bumps when a job transitions **to** waiting (new workflow triggered, blocked→unblocked). After a runner finishes its current task it polls `FetchTask` with `tasksVersion == latestVersion`, so the server skips `PickTask` entirely — the remaining 45 waiting jobs are invisible to the now-idle runner. **Fix:** Also call `IncreaseTaskVersion` in `UpdateRunJob` when a (non-reusable-caller) job transitions to a **done** state. Idle runners then see a version mismatch on their next poll and attempt `PickTask`, picking up the remaining jobs.
374 lines
10 KiB
Go
374 lines
10 KiB
Go
// Copyright 2026 The Gitea Authors. All rights reserved.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
package actions
|
|
|
|
import (
|
|
"strings"
|
|
"testing"
|
|
|
|
runnerv1 "gitea.dev/actions-proto-go/runner/v1"
|
|
"gitea.dev/models/db"
|
|
"gitea.dev/models/unittest"
|
|
"gitea.dev/modules/actions/jobparser"
|
|
"gitea.dev/modules/timeutil"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
"google.golang.org/protobuf/types/known/timestamppb"
|
|
)
|
|
|
|
func TestMakeTaskStepDisplayName(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
jobStep *jobparser.Step
|
|
expected string
|
|
}{
|
|
{
|
|
name: "explicit name",
|
|
jobStep: &jobparser.Step{
|
|
Name: "Test Step",
|
|
},
|
|
expected: "Test Step",
|
|
},
|
|
{
|
|
name: "uses step",
|
|
jobStep: &jobparser.Step{
|
|
Uses: "actions/checkout@v4",
|
|
},
|
|
expected: "Run actions/checkout@v4",
|
|
},
|
|
{
|
|
name: "single-line run",
|
|
jobStep: &jobparser.Step{
|
|
Run: "echo hello",
|
|
},
|
|
expected: "Run echo hello",
|
|
},
|
|
{
|
|
name: "multi-line run block scalar",
|
|
jobStep: &jobparser.Step{
|
|
Run: "\n echo hello \r\n echo world \n ",
|
|
},
|
|
expected: "Run echo hello",
|
|
},
|
|
{
|
|
name: "fallback to id",
|
|
jobStep: &jobparser.Step{
|
|
ID: "step-id",
|
|
},
|
|
expected: "Run step-id",
|
|
},
|
|
{
|
|
name: "very long name truncated",
|
|
jobStep: &jobparser.Step{
|
|
Name: strings.Repeat("a", 300),
|
|
},
|
|
expected: strings.Repeat("a", 252) + "…",
|
|
},
|
|
{
|
|
name: "very long run truncated",
|
|
jobStep: &jobparser.Step{
|
|
Run: strings.Repeat("a", 300),
|
|
},
|
|
expected: "Run " + strings.Repeat("a", 248) + "…",
|
|
},
|
|
}
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
result := makeTaskStepDisplayName(tt.jobStep, 255)
|
|
assert.Equal(t, tt.expected, result)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestTaskCancellingFinalizesToCancelled(t *testing.T) {
|
|
newRunningTask := func(t *testing.T) (*ActionTask, *ActionRunJob) {
|
|
t.Helper()
|
|
|
|
run := &ActionRun{
|
|
Title: "cancelling-test-run",
|
|
RepoID: 1,
|
|
OwnerID: 2,
|
|
WorkflowID: "test.yaml",
|
|
Index: 999,
|
|
TriggerUserID: 2,
|
|
Ref: "refs/heads/master",
|
|
CommitSHA: "c2d72f548424103f01ee1dc02889c1e2bff816b0",
|
|
Event: "push",
|
|
TriggerEvent: "push",
|
|
Status: StatusRunning,
|
|
Started: timeutil.TimeStampNow(),
|
|
}
|
|
require.NoError(t, db.Insert(t.Context(), run))
|
|
|
|
job := &ActionRunJob{
|
|
RunID: run.ID,
|
|
RepoID: run.RepoID,
|
|
OwnerID: run.OwnerID,
|
|
CommitSHA: run.CommitSHA,
|
|
Name: "cancelling-finalization-job",
|
|
Attempt: 1,
|
|
JobID: "cancelling-finalization-job",
|
|
Status: StatusRunning,
|
|
}
|
|
require.NoError(t, db.Insert(t.Context(), job))
|
|
|
|
runner := &ActionRunner{
|
|
UUID: "runner-cancelling-supported",
|
|
Name: "runner-cancelling-supported",
|
|
HasCancellingSupport: true,
|
|
}
|
|
require.NoError(t, db.Insert(t.Context(), runner))
|
|
|
|
task := &ActionTask{
|
|
JobID: job.ID,
|
|
Attempt: 1,
|
|
RunnerID: runner.ID,
|
|
Status: StatusRunning,
|
|
Started: timeutil.TimeStampNow(),
|
|
RepoID: run.RepoID,
|
|
OwnerID: run.OwnerID,
|
|
CommitSHA: run.CommitSHA,
|
|
}
|
|
require.NoError(t, db.Insert(t.Context(), task))
|
|
|
|
job.TaskID = task.ID
|
|
_, err := UpdateRunJob(t.Context(), job, nil, "task_id")
|
|
require.NoError(t, err)
|
|
|
|
return task, job
|
|
}
|
|
|
|
testResult := func(t *testing.T, result runnerv1.Result) {
|
|
t.Helper()
|
|
require.NoError(t, unittest.PrepareTestDatabase())
|
|
|
|
task, job := newRunningTask(t)
|
|
require.NoError(t, StopTask(t.Context(), task.ID, StatusCancelling))
|
|
|
|
taskAfterStop := unittest.AssertExistsAndLoadBean(t, &ActionTask{ID: task.ID})
|
|
assert.Equal(t, StatusCancelling, taskAfterStop.Status)
|
|
|
|
updatedTask, err := UpdateTaskByState(t.Context(), task.RunnerID, &runnerv1.TaskState{
|
|
Id: task.ID,
|
|
Result: result,
|
|
StoppedAt: timestamppb.Now(),
|
|
})
|
|
require.NoError(t, err)
|
|
assert.Equal(t, StatusCancelled, updatedTask.Status)
|
|
|
|
taskAfterUpdate := unittest.AssertExistsAndLoadBean(t, &ActionTask{ID: task.ID})
|
|
assert.Equal(t, StatusCancelled, taskAfterUpdate.Status)
|
|
|
|
jobAfterUpdate := unittest.AssertExistsAndLoadBean(t, &ActionRunJob{ID: job.ID})
|
|
assert.Equal(t, StatusCancelled, jobAfterUpdate.Status)
|
|
}
|
|
|
|
t.Run("runner reports success", func(t *testing.T) {
|
|
testResult(t, runnerv1.Result_RESULT_SUCCESS)
|
|
})
|
|
|
|
t.Run("runner reports failure", func(t *testing.T) {
|
|
testResult(t, runnerv1.Result_RESULT_FAILURE)
|
|
})
|
|
}
|
|
|
|
func TestStopTaskCancellingFallsBackForLegacyRunner(t *testing.T) {
|
|
require.NoError(t, unittest.PrepareTestDatabase())
|
|
|
|
run := &ActionRun{
|
|
Title: "cancelling-test-run",
|
|
RepoID: 1,
|
|
OwnerID: 2,
|
|
WorkflowID: "test.yaml",
|
|
Index: 999,
|
|
TriggerUserID: 2,
|
|
Ref: "refs/heads/master",
|
|
CommitSHA: "c2d72f548424103f01ee1dc02889c1e2bff816b0",
|
|
Event: "push",
|
|
TriggerEvent: "push",
|
|
Status: StatusRunning,
|
|
Started: timeutil.TimeStampNow(),
|
|
}
|
|
require.NoError(t, db.Insert(t.Context(), run))
|
|
|
|
job := &ActionRunJob{
|
|
RunID: run.ID,
|
|
RepoID: run.RepoID,
|
|
OwnerID: run.OwnerID,
|
|
CommitSHA: run.CommitSHA,
|
|
Name: "legacy-cancelling-job",
|
|
Attempt: 1,
|
|
JobID: "legacy-cancelling-job",
|
|
Status: StatusRunning,
|
|
}
|
|
require.NoError(t, db.Insert(t.Context(), job))
|
|
|
|
runner := &ActionRunner{
|
|
UUID: "runner-legacy-no-cancelling",
|
|
Name: "runner-legacy-no-cancelling",
|
|
HasCancellingSupport: false,
|
|
}
|
|
require.NoError(t, db.Insert(t.Context(), runner))
|
|
|
|
task := &ActionTask{
|
|
JobID: job.ID,
|
|
Attempt: 1,
|
|
RunnerID: runner.ID,
|
|
Status: StatusRunning,
|
|
Started: timeutil.TimeStampNow(),
|
|
RepoID: run.RepoID,
|
|
OwnerID: run.OwnerID,
|
|
CommitSHA: run.CommitSHA,
|
|
}
|
|
require.NoError(t, db.Insert(t.Context(), task))
|
|
|
|
job.TaskID = task.ID
|
|
_, err := UpdateRunJob(t.Context(), job, nil, "task_id")
|
|
require.NoError(t, err)
|
|
|
|
require.NoError(t, StopTask(t.Context(), task.ID, StatusCancelling))
|
|
|
|
taskAfterStop := unittest.AssertExistsAndLoadBean(t, &ActionTask{ID: task.ID})
|
|
assert.Equal(t, StatusCancelled, taskAfterStop.Status)
|
|
assert.NotZero(t, taskAfterStop.Stopped)
|
|
|
|
jobAfterStop := unittest.AssertExistsAndLoadBean(t, &ActionRunJob{ID: job.ID})
|
|
assert.Equal(t, StatusCancelled, jobAfterStop.Status)
|
|
assert.NotZero(t, jobAfterStop.Stopped)
|
|
}
|
|
|
|
func TestStopTaskCancellingFallsBackForMissingRunner(t *testing.T) {
|
|
require.NoError(t, unittest.PrepareTestDatabase())
|
|
|
|
run := &ActionRun{
|
|
Title: "cancelling-test-run",
|
|
RepoID: 1,
|
|
OwnerID: 2,
|
|
WorkflowID: "test.yaml",
|
|
Index: 999,
|
|
TriggerUserID: 2,
|
|
Ref: "refs/heads/master",
|
|
CommitSHA: "c2d72f548424103f01ee1dc02889c1e2bff816b0",
|
|
Event: "push",
|
|
TriggerEvent: "push",
|
|
Status: StatusRunning,
|
|
Started: timeutil.TimeStampNow(),
|
|
}
|
|
require.NoError(t, db.Insert(t.Context(), run))
|
|
|
|
job := &ActionRunJob{
|
|
RunID: run.ID,
|
|
RepoID: run.RepoID,
|
|
OwnerID: run.OwnerID,
|
|
CommitSHA: run.CommitSHA,
|
|
Name: "missing-runner-cancelling-job",
|
|
Attempt: 1,
|
|
JobID: "missing-runner-cancelling-job",
|
|
Status: StatusRunning,
|
|
}
|
|
require.NoError(t, db.Insert(t.Context(), job))
|
|
|
|
runner := &ActionRunner{
|
|
UUID: "runner-cleaned-up-before-cancel",
|
|
Name: "runner-cleaned-up-before-cancel",
|
|
HasCancellingSupport: true,
|
|
}
|
|
require.NoError(t, db.Insert(t.Context(), runner))
|
|
|
|
task := &ActionTask{
|
|
JobID: job.ID,
|
|
Attempt: 1,
|
|
RunnerID: runner.ID,
|
|
Status: StatusRunning,
|
|
Started: timeutil.TimeStampNow(),
|
|
RepoID: run.RepoID,
|
|
OwnerID: run.OwnerID,
|
|
CommitSHA: run.CommitSHA,
|
|
}
|
|
require.NoError(t, db.Insert(t.Context(), task))
|
|
|
|
job.TaskID = task.ID
|
|
_, err := UpdateRunJob(t.Context(), job, nil, "task_id")
|
|
require.NoError(t, err)
|
|
|
|
_, err = db.DeleteByID[ActionRunner](t.Context(), runner.ID)
|
|
require.NoError(t, err)
|
|
|
|
require.NoError(t, StopTask(t.Context(), task.ID, StatusCancelling))
|
|
|
|
taskAfterStop := unittest.AssertExistsAndLoadBean(t, &ActionTask{ID: task.ID})
|
|
assert.Equal(t, StatusCancelled, taskAfterStop.Status)
|
|
assert.NotZero(t, taskAfterStop.Stopped)
|
|
|
|
jobAfterStop := unittest.AssertExistsAndLoadBean(t, &ActionRunJob{ID: job.ID})
|
|
assert.Equal(t, StatusCancelled, jobAfterStop.Status)
|
|
assert.NotZero(t, jobAfterStop.Stopped)
|
|
}
|
|
|
|
// TestReleaseTaskForRunner verifies that releasing a freshly-claimed task returns
|
|
// its job to the waiting queue and deletes the task and its steps, so a failure
|
|
// while assembling the runner response cannot strand the job in running state.
|
|
func TestReleaseTaskForRunner(t *testing.T) {
|
|
require.NoError(t, unittest.PrepareTestDatabase())
|
|
|
|
run := &ActionRun{
|
|
Title: "release-task-test-run",
|
|
RepoID: 1,
|
|
OwnerID: 2,
|
|
WorkflowID: "test.yaml",
|
|
Index: 9902,
|
|
TriggerUserID: 2,
|
|
Ref: "refs/heads/main",
|
|
CommitSHA: "c2d72f548424103f01ee1dc02889c1e2bff816b0",
|
|
Event: "push",
|
|
TriggerEvent: "push",
|
|
Status: StatusWaiting,
|
|
}
|
|
require.NoError(t, db.Insert(t.Context(), run))
|
|
|
|
job := &ActionRunJob{
|
|
RunID: run.ID,
|
|
RepoID: run.RepoID,
|
|
OwnerID: run.OwnerID,
|
|
CommitSHA: run.CommitSHA,
|
|
Name: "release-job",
|
|
Attempt: 1,
|
|
JobID: "release-job",
|
|
Status: StatusWaiting,
|
|
RunsOn: []string{"ubuntu-latest"},
|
|
WorkflowPayload: []byte("on: push\njobs:\n release-job:\n runs-on: ubuntu-latest\n steps:\n - run: echo hi\n"),
|
|
}
|
|
require.NoError(t, db.Insert(t.Context(), job))
|
|
|
|
runner := &ActionRunner{
|
|
UUID: "release-runner-uuid",
|
|
Name: "release-runner",
|
|
AgentLabels: []string{"ubuntu-latest"},
|
|
}
|
|
runner.GenerateAndFillToken()
|
|
require.NoError(t, db.Insert(t.Context(), runner))
|
|
|
|
task, ok, err := CreateTaskForRunner(t.Context(), runner)
|
|
require.NoError(t, err)
|
|
require.True(t, ok)
|
|
require.NotNil(t, task)
|
|
|
|
claimed := unittest.AssertExistsAndLoadBean(t, &ActionRunJob{ID: job.ID})
|
|
require.Equal(t, StatusRunning, claimed.Status)
|
|
require.Equal(t, task.ID, claimed.TaskID)
|
|
|
|
require.NoError(t, ReleaseTaskForRunner(t.Context(), task))
|
|
|
|
// Job is back in the waiting queue with no task assigned.
|
|
released := unittest.AssertExistsAndLoadBean(t, &ActionRunJob{ID: job.ID})
|
|
assert.Equal(t, StatusWaiting, released.Status)
|
|
assert.Zero(t, released.TaskID)
|
|
assert.Zero(t, released.Started)
|
|
|
|
// The task and its steps are gone.
|
|
unittest.AssertNotExistsBean(t, &ActionTask{ID: task.ID})
|
|
unittest.AssertNotExistsBean(t, &ActionTaskStep{TaskID: task.ID})
|
|
}
|