mirror of
https://github.com/go-gitea/gitea.git
synced 2026-05-15 23:40:58 +02:00
fix(actions): deadlock between PrepareRunAndInsert and UpdateTaskByState (#37692)
Fix #36234 ## Bug Logs show `PrepareRunAndInsert: InsertRun: Error 1213: Deadlock found`, which `handleWorkflows` silently swallows via `log.Error + continue`, so the triggered run is dropped. ## Root cause The path `UpdateRun -> UpdateRepoRunsNumbers` runs the following SQL inside every status-changing transaction: ```sql UPDATE repository SET num_action_runs = (SELECT count(*) FROM action_run WHERE repo_id = N), num_closed_action_runs = (SELECT count(*) FROM action_run WHERE repo_id = N AND status IN (...)) WHERE id = N; ``` On any DB that treats subqueries inside an UPDATE as locking reads, this statement takes locks in two steps: 1. The outer UPDATE acquires an X lock on `repository[id=N]` 2. The embedded SELECT subqueries are evaluated as locking reads, taking S locks on every `action_run` row matching `repo_id = N` Two such concurrent transactions form a cycle via `repository[N]`: | Tx | Holds | Wants | Blocked by | |---|---|---|---| | A: `PrepareRunAndInsert` (push trigger) | X on inserted `action_run` row R_A; X on `repository[N]` (outer UPDATE already through step 1) | S on `action_run` rows for repo N (subquery, step 2) | B's X lock on R_B | | B: `UpdateTaskByState` (runner callback) | X on `action_run` row R_B (from `UpdateRun`) | X on `repository[N]` (outer UPDATE, step 1) | A's X lock on `repository[N]` | | **Cycle** | A waits for R_B; B waits for `repository[N]` | | deadlock error -> `handleWorkflows` swallows -> run lost | PostgreSQL's MVCC reads do not take these locks and SQLite serializes writers, so the symptom only surfaces on MySQL/MSSQL. ## Fix Split `UpdateRepoRunsNumbers` into small SQLs to avoid locking reads and move it out of DB transactions. --------- Signed-off-by: wxiaoguang <wxiaoguang@gmail.com> Co-authored-by: wxiaoguang <wxiaoguang@gmail.com> Co-authored-by: Nicolas <bircni@icloud.com>
This commit is contained in:
parent
f9b7b65371
commit
cf0f25b798
@ -7,7 +7,6 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"slices"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@ -224,30 +223,34 @@ func (run *ActionRun) IsSchedule() bool {
|
||||
}
|
||||
|
||||
// UpdateRepoRunsNumbers updates the number of runs and closed runs of a repository.
|
||||
func UpdateRepoRunsNumbers(ctx context.Context, repo *repo_model.Repository) error {
|
||||
_, err := db.GetEngine(ctx).ID(repo.ID).
|
||||
NoAutoTime().
|
||||
Cols("num_action_runs", "num_closed_action_runs").
|
||||
SetExpr("num_action_runs",
|
||||
builder.Select("count(*)").From("action_run").
|
||||
Where(builder.Eq{"repo_id": repo.ID}),
|
||||
).
|
||||
SetExpr("num_closed_action_runs",
|
||||
builder.Select("count(*)").From("action_run").
|
||||
Where(builder.Eq{
|
||||
"repo_id": repo.ID,
|
||||
}.And(
|
||||
builder.In("status",
|
||||
StatusSuccess,
|
||||
StatusFailure,
|
||||
StatusCancelled,
|
||||
StatusSkipped,
|
||||
),
|
||||
),
|
||||
),
|
||||
).
|
||||
Update(repo)
|
||||
return err
|
||||
// Callers MUST invoke this from outside any transaction that has X-locked action_run rows for the same repo, otherwise, transaction deadlock
|
||||
func UpdateRepoRunsNumbers(ctx context.Context, repoID int64) {
|
||||
if db.InTransaction(ctx) {
|
||||
setting.PanicInDevOrTesting("UpdateRepoRunsNumbers must not be called inside a transaction")
|
||||
}
|
||||
|
||||
e := db.GetEngine(ctx)
|
||||
|
||||
numActionRuns, err := e.Where("repo_id = ?", repoID).Count(new(ActionRun))
|
||||
if err != nil {
|
||||
log.Error("UpdateRepoRunsNumbers count num_action_runs for repo %d: %v", repoID, err)
|
||||
return
|
||||
}
|
||||
|
||||
numClosedActionRuns, err := e.Where("repo_id = ?", repoID).
|
||||
In("status", StatusSuccess, StatusFailure, StatusCancelled, StatusSkipped).
|
||||
Count(new(ActionRun))
|
||||
if err != nil {
|
||||
log.Error("UpdateRepoRunsNumbers count num_closed_action_runs for repo %d: %v", repoID, err)
|
||||
return
|
||||
}
|
||||
|
||||
if _, err := e.ID(repoID).Cols("num_action_runs", "num_closed_action_runs").NoAutoTime().Update(&repo_model.Repository{
|
||||
NumActionRuns: int(numActionRuns),
|
||||
NumClosedActionRuns: int(numClosedActionRuns),
|
||||
}); err != nil {
|
||||
log.Error("UpdateRepoRunsNumbers update repo %d: %v", repoID, err)
|
||||
}
|
||||
}
|
||||
|
||||
// CancelPreviousJobs cancels all previous jobs of the same repository, reference, workflow, and event.
|
||||
@ -415,18 +418,6 @@ func UpdateRun(ctx context.Context, run *ActionRun, cols ...string) error {
|
||||
// It's impossible that the run is not found, since Gitea never deletes runs.
|
||||
}
|
||||
|
||||
if run.Status != 0 || slices.Contains(cols, "status") {
|
||||
if run.RepoID == 0 {
|
||||
setting.PanicInDevOrTesting("RepoID should not be 0")
|
||||
}
|
||||
if err = run.LoadRepo(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := UpdateRepoRunsNumbers(ctx, run.Repo); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@ -29,8 +29,7 @@ func TestUpdateRepoRunsNumbers(t *testing.T) {
|
||||
assert.Equal(t, 2, repo.NumClosedActionRuns)
|
||||
|
||||
// now update will correct them, only num_actionr_runs and num_closed_action_runs should be updated
|
||||
err = UpdateRepoRunsNumbers(t.Context(), repo)
|
||||
assert.NoError(t, err)
|
||||
UpdateRepoRunsNumbers(t.Context(), repo.ID)
|
||||
repo = unittest.AssertExistsAndLoadBean(t, &repo_model.Repository{ID: 4})
|
||||
assert.Equal(t, 4, repo.NumActionRuns)
|
||||
assert.Equal(t, 3, repo.NumClosedActionRuns)
|
||||
|
||||
@ -250,6 +250,8 @@ func DeleteRun(ctx context.Context, run *actions_model.ActionRun) error {
|
||||
return err
|
||||
}
|
||||
|
||||
actions_model.UpdateRepoRunsNumbers(ctx, repoID)
|
||||
|
||||
// Delete files on storage
|
||||
for _, tas := range tasks {
|
||||
removeTaskLog(ctx, tas)
|
||||
|
||||
@ -78,7 +78,11 @@ func NotifyWorkflowRunStatusUpdate(ctx context.Context, run *actions_model.Actio
|
||||
}
|
||||
triggerUser = attempt.TriggerUser
|
||||
}
|
||||
|
||||
notify_service.WorkflowRunStatusUpdate(ctx, run.Repo, triggerUser, run)
|
||||
|
||||
// Recomputes the repository's num_action_runs / num_closed_action_runs counters since the run's status changed
|
||||
actions_model.UpdateRepoRunsNumbers(ctx, run.RepoID)
|
||||
}
|
||||
|
||||
// NotifyWorkflowJobsStatusUpdate notifies status updates for jobs without task.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user