mirror of
https://github.com/go-gitea/gitea.git
synced 2026-06-07 21:54:16 +02:00
fix: bound CODEOWNERS regex match time (#38011)
User-supplied CODEOWNERS patterns were compiled without a match timeout, so a crafted pattern (e.g. (a+)+) against a crafted file path could backtrack for tens of seconds inside the PR creation transaction and exhaust the database connection pool. Set MatchTimeout on each compiled rule; the caller already treats match errors as non-matches. --------- Signed-off-by: wxiaoguang <wxiaoguang@gmail.com> Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
parent
e2fbfc8730
commit
ea35af1b68
@ -10,6 +10,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.dev/models/db"
|
||||
git_model "gitea.dev/models/git"
|
||||
@ -860,6 +861,11 @@ func GetCodeOwnersFromContent(ctx context.Context, data string) ([]*CodeOwnerRul
|
||||
return rules, warnings
|
||||
}
|
||||
|
||||
// codeOwnerMatchTimeout bounds a single pattern match so a crafted pattern
|
||||
// cannot stall via catastrophic backtracking. See also the aggregate budget
|
||||
// enforced by the caller across the whole rules×files match loop.
|
||||
const codeOwnerMatchTimeout = 150 * time.Millisecond
|
||||
|
||||
type CodeOwnerRule struct {
|
||||
Rule *regexp2.Regexp // it supports negative lookahead, does better for end users
|
||||
Negative bool
|
||||
@ -888,6 +894,8 @@ func ParseCodeOwnersLine(ctx context.Context, tokens []string) (*CodeOwnerRule,
|
||||
warnings = append(warnings, fmt.Sprintf("incorrect codeowner regexp: %s", err))
|
||||
return nil, warnings
|
||||
}
|
||||
// Bound matching time so user-supplied patterns cannot stall PR creation via catastrophic backtracking.
|
||||
rule.Rule.MatchTimeout = codeOwnerMatchTimeout
|
||||
|
||||
for _, user := range tokens[1:] {
|
||||
user = strings.TrimPrefix(user, "@")
|
||||
|
||||
@ -4,7 +4,9 @@
|
||||
package issues_test
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gitea.dev/models/db"
|
||||
issues_model "gitea.dev/models/issues"
|
||||
@ -39,6 +41,7 @@ func TestPullRequest(t *testing.T) {
|
||||
t.Run("DeleteOrphanedObjects", testDeleteOrphanedObjects)
|
||||
t.Run("ParseCodeOwnersLine", testParseCodeOwnersLine)
|
||||
t.Run("CodeOwnerAbsolutePathPatterns", testCodeOwnerAbsolutePathPatterns)
|
||||
t.Run("CodeOwnerPatternMatchTimeout", testCodeOwnerPatternMatchTimeout)
|
||||
t.Run("GetApprovers", testGetApprovers)
|
||||
t.Run("GetPullRequestByMergedCommit", testGetPullRequestByMergedCommit)
|
||||
t.Run("Migrate_InsertPullRequests", testMigrateInsertPullRequests)
|
||||
@ -376,6 +379,22 @@ func testCodeOwnerAbsolutePathPatterns(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// testCodeOwnerPatternMatchTimeout ensures user-supplied CODEOWNERS patterns
|
||||
// cannot stall pull request processing through catastrophic regex backtracking:
|
||||
// each compiled rule must enforce a bounded match time.
|
||||
func testCodeOwnerPatternMatchTimeout(t *testing.T) {
|
||||
rules, _ := issues_model.GetCodeOwnersFromContent(t.Context(), "(a+)+ @user5\n")
|
||||
require.Len(t, rules, 1)
|
||||
|
||||
maliciousInput := strings.Repeat("a", 30) + "X"
|
||||
start := time.Now()
|
||||
_, err := rules[0].Rule.MatchString(maliciousInput)
|
||||
elapsed := time.Since(start)
|
||||
|
||||
require.Error(t, err, "expected MatchTimeout error on pathological input")
|
||||
assert.Less(t, elapsed, time.Second, "match timeout did not bound regex evaluation; took %s", elapsed)
|
||||
}
|
||||
|
||||
func testGetApprovers(t *testing.T) {
|
||||
pr := unittest.AssertExistsAndLoadBean(t, &issues_model.PullRequest{ID: 5})
|
||||
// Official reviews are already deduplicated. Allow unofficial reviews
|
||||
|
||||
@ -7,6 +7,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"slices"
|
||||
"time"
|
||||
|
||||
issues_model "gitea.dev/models/issues"
|
||||
org_model "gitea.dev/models/organization"
|
||||
@ -26,6 +27,10 @@ type ReviewRequestNotifier struct {
|
||||
|
||||
var codeOwnerFiles = []string{"CODEOWNERS", "docs/CODEOWNERS", ".gitea/CODEOWNERS"}
|
||||
|
||||
// codeOwnerMatchBudget caps the total wall-clock time spent evaluating all
|
||||
// CODEOWNERS rules against all changed files for a single PR.
|
||||
const codeOwnerMatchBudget = 2 * time.Second
|
||||
|
||||
func IsCodeOwnerFile(f string) bool {
|
||||
return slices.Contains(codeOwnerFiles, f)
|
||||
}
|
||||
@ -93,8 +98,17 @@ func PullRequestCodeOwnersReview(ctx context.Context, pr *issues_model.PullReque
|
||||
|
||||
uniqUsers := make(map[int64]*user_model.User)
|
||||
uniqTeams := make(map[string]*org_model.Team)
|
||||
// Bound the total time spent matching rules×files. The per-rule MatchTimeout
|
||||
// only caps a single match; without an aggregate budget a crafted CODEOWNERS
|
||||
// plus a PR touching many files could still exhaust CPU inside this loop.
|
||||
matchDeadline := time.Now().Add(codeOwnerMatchBudget)
|
||||
ruleLoop:
|
||||
for _, rule := range rules {
|
||||
for _, f := range changedFiles {
|
||||
if time.Now().After(matchDeadline) {
|
||||
log.Warn("CODEOWNERS matching for PR %s#%d exceeded its time budget; some rules were not evaluated", pr.BaseRepo.FullName(), pr.ID)
|
||||
break ruleLoop
|
||||
}
|
||||
shouldMatch := !rule.Negative
|
||||
matched, _ := rule.Rule.MatchString(f) // err only happens when timeouts, any error can be considered as not matched
|
||||
if matched == shouldMatch {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user