From ea35af1b68d57522c7686618bd61d3216d91589f Mon Sep 17 00:00:00 2001 From: bircni Date: Sun, 7 Jun 2026 17:30:18 +0200 Subject: [PATCH] fix: bound CODEOWNERS regex match time (#38011) User-supplied CODEOWNERS patterns were compiled without a match timeout, so a crafted pattern (e.g. (a+)+) against a crafted file path could backtrack for tens of seconds inside the PR creation transaction and exhaust the database connection pool. Set MatchTimeout on each compiled rule; the caller already treats match errors as non-matches. --------- Signed-off-by: wxiaoguang Co-authored-by: wxiaoguang --- models/issues/pull.go | 8 ++++++++ models/issues/pull_test.go | 19 +++++++++++++++++++ services/issue/pull.go | 14 ++++++++++++++ 3 files changed, 41 insertions(+) diff --git a/models/issues/pull.go b/models/issues/pull.go index 7dbcef0d3f..2b93b926b0 100644 --- a/models/issues/pull.go +++ b/models/issues/pull.go @@ -10,6 +10,7 @@ import ( "fmt" "io" "strings" + "time" "gitea.dev/models/db" git_model "gitea.dev/models/git" @@ -860,6 +861,11 @@ func GetCodeOwnersFromContent(ctx context.Context, data string) ([]*CodeOwnerRul return rules, warnings } +// codeOwnerMatchTimeout bounds a single pattern match so a crafted pattern +// cannot stall via catastrophic backtracking. See also the aggregate budget +// enforced by the caller across the whole rules×files match loop. +const codeOwnerMatchTimeout = 150 * time.Millisecond + type CodeOwnerRule struct { Rule *regexp2.Regexp // it supports negative lookahead, does better for end users Negative bool @@ -888,6 +894,8 @@ func ParseCodeOwnersLine(ctx context.Context, tokens []string) (*CodeOwnerRule, warnings = append(warnings, fmt.Sprintf("incorrect codeowner regexp: %s", err)) return nil, warnings } + // Bound matching time so user-supplied patterns cannot stall PR creation via catastrophic backtracking. + rule.Rule.MatchTimeout = codeOwnerMatchTimeout for _, user := range tokens[1:] { user = strings.TrimPrefix(user, "@") diff --git a/models/issues/pull_test.go b/models/issues/pull_test.go index 166fdd8482..bd7499fa80 100644 --- a/models/issues/pull_test.go +++ b/models/issues/pull_test.go @@ -4,7 +4,9 @@ package issues_test import ( + "strings" "testing" + "time" "gitea.dev/models/db" issues_model "gitea.dev/models/issues" @@ -39,6 +41,7 @@ func TestPullRequest(t *testing.T) { t.Run("DeleteOrphanedObjects", testDeleteOrphanedObjects) t.Run("ParseCodeOwnersLine", testParseCodeOwnersLine) t.Run("CodeOwnerAbsolutePathPatterns", testCodeOwnerAbsolutePathPatterns) + t.Run("CodeOwnerPatternMatchTimeout", testCodeOwnerPatternMatchTimeout) t.Run("GetApprovers", testGetApprovers) t.Run("GetPullRequestByMergedCommit", testGetPullRequestByMergedCommit) t.Run("Migrate_InsertPullRequests", testMigrateInsertPullRequests) @@ -376,6 +379,22 @@ func testCodeOwnerAbsolutePathPatterns(t *testing.T) { } } +// testCodeOwnerPatternMatchTimeout ensures user-supplied CODEOWNERS patterns +// cannot stall pull request processing through catastrophic regex backtracking: +// each compiled rule must enforce a bounded match time. +func testCodeOwnerPatternMatchTimeout(t *testing.T) { + rules, _ := issues_model.GetCodeOwnersFromContent(t.Context(), "(a+)+ @user5\n") + require.Len(t, rules, 1) + + maliciousInput := strings.Repeat("a", 30) + "X" + start := time.Now() + _, err := rules[0].Rule.MatchString(maliciousInput) + elapsed := time.Since(start) + + require.Error(t, err, "expected MatchTimeout error on pathological input") + assert.Less(t, elapsed, time.Second, "match timeout did not bound regex evaluation; took %s", elapsed) +} + func testGetApprovers(t *testing.T) { pr := unittest.AssertExistsAndLoadBean(t, &issues_model.PullRequest{ID: 5}) // Official reviews are already deduplicated. Allow unofficial reviews diff --git a/services/issue/pull.go b/services/issue/pull.go index 055c1023b4..260ac5ceae 100644 --- a/services/issue/pull.go +++ b/services/issue/pull.go @@ -7,6 +7,7 @@ import ( "context" "fmt" "slices" + "time" issues_model "gitea.dev/models/issues" org_model "gitea.dev/models/organization" @@ -26,6 +27,10 @@ type ReviewRequestNotifier struct { var codeOwnerFiles = []string{"CODEOWNERS", "docs/CODEOWNERS", ".gitea/CODEOWNERS"} +// codeOwnerMatchBudget caps the total wall-clock time spent evaluating all +// CODEOWNERS rules against all changed files for a single PR. +const codeOwnerMatchBudget = 2 * time.Second + func IsCodeOwnerFile(f string) bool { return slices.Contains(codeOwnerFiles, f) } @@ -93,8 +98,17 @@ func PullRequestCodeOwnersReview(ctx context.Context, pr *issues_model.PullReque uniqUsers := make(map[int64]*user_model.User) uniqTeams := make(map[string]*org_model.Team) + // Bound the total time spent matching rules×files. The per-rule MatchTimeout + // only caps a single match; without an aggregate budget a crafted CODEOWNERS + // plus a PR touching many files could still exhaust CPU inside this loop. + matchDeadline := time.Now().Add(codeOwnerMatchBudget) +ruleLoop: for _, rule := range rules { for _, f := range changedFiles { + if time.Now().After(matchDeadline) { + log.Warn("CODEOWNERS matching for PR %s#%d exceeded its time budget; some rules were not evaluated", pr.BaseRepo.FullName(), pr.ID) + break ruleLoop + } shouldMatch := !rule.Negative matched, _ := rule.Rule.MatchString(f) // err only happens when timeouts, any error can be considered as not matched if matched == shouldMatch {