diff --git a/modules/markup/html.go b/modules/markup/html.go index 0fe37ae305..0b4d68caa7 100644 --- a/modules/markup/html.go +++ b/modules/markup/html.go @@ -62,11 +62,11 @@ var globalVars = sync.OnceValue(func() *globalVarsType { // shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax v.shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`) - // anyHashPattern splits url containing SHA into parts - v.anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})((\.\w+)*)(/[-+~%./\w]+)?(\?[-+~%.\w&=]+)?(#[-+~%.\w]+)?`) + // anyHashPattern finds candidate commit/archive URLs; anyHashPatternExtract validates shape via ParseGiteaSiteURL. + v.anyHashPattern = regexp.MustCompile(`https?://\S+/(?:commit|archive)/([0-9a-f]{7,64})((?:\.\w+)*)(/[-+~%./\w]+)?(?:\?[-+~%.\w&=]+)?(#[-+~%.\w]+)?`) - // comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash" - v.comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(\.\.\.?)([0-9a-f]{7,64})?(#[-+~_%.a-zA-Z0-9]+)?`) + // comparePattern finds candidate compare URLs; comparePatternExtract validates shape via ParseGiteaSiteURL. + v.comparePattern = regexp.MustCompile(`https?://\S+/compare/([0-9a-f]{7,64})(\.\.\.?)([0-9a-f]{7,64})?(#[-+~_%.a-zA-Z0-9]+)?`) // fullURLPattern matches full URL like "mailto:...", "https://..." and "ssh+git://..." v.fullURLPattern = regexp.MustCompile(`^[a-z][-+\w]+:`) diff --git a/modules/markup/html_commit.go b/modules/markup/html_commit.go index a2c5160674..9683058a97 100644 --- a/modules/markup/html_commit.go +++ b/modules/markup/html_commit.go @@ -4,6 +4,7 @@ package markup import ( + "context" "fmt" "slices" "strings" @@ -17,14 +18,13 @@ import ( ) type anyHashPatternResult struct { - PosStart int - PosEnd int - FullURL string - CommitID string - CommitExt string - SubPath string - QueryParams string - QueryHash string + PosStart int + PosEnd int + FullURL string + CommitID string + CommitExt string + SubPath string + QueryHash string } func createCodeLink(href, content, class string) *html.Node { @@ -53,7 +53,31 @@ func createCodeLink(href, content, class string) *html.Node { return a } -func anyHashPatternExtract(s string) (ret anyHashPatternResult, ok bool) { +// stripTrailingSentencePeriod trims a trailing '.' that is likely sentence punctuation rather than part of the URL. +// It also clamps capture-group indices in m in place so they don't point past the trimmed URL. +func stripTrailingSentencePeriod(fullURL string, posEnd int, m []int) (string, int) { + if !strings.HasSuffix(fullURL, ".") { + return fullURL, posEnd + } + posEnd-- + fullURL = fullURL[:len(fullURL)-1] + for i := range m { + m[i] = min(m[i], posEnd) + } + return fullURL, posEnd +} + +// isRepoCommitRoutePath reports whether path (a Gitea repo subpath) identifies a commit by hash. +// It accepts `/commit/...`, `/archive/...`, or `//commit/...` (Gitea's RefTypeCommit route shape). +func isRepoCommitRoutePath(path string) bool { + if strings.HasPrefix(path, "/commit/") || strings.HasPrefix(path, "/archive/") { + return true + } + _, rest, ok := strings.Cut(strings.TrimPrefix(path, "/"), "/") + return ok && strings.HasPrefix(rest, "commit/") +} + +func anyHashPatternExtract(ctx context.Context, s string) (ret anyHashPatternResult, ok bool) { m := globalVars().anyHashPattern.FindStringSubmatchIndex(s) if m == nil { return ret, false @@ -65,28 +89,22 @@ func anyHashPatternExtract(s string) (ret anyHashPatternResult, ok bool) { pos += 2 ret.FullURL = s[ret.PosStart:ret.PosEnd] - if strings.HasSuffix(ret.FullURL, ".") { - // if url ends in '.', it's very likely that it is not part of the actual url but used to finish a sentence. - ret.PosEnd-- - ret.FullURL = ret.FullURL[:len(ret.FullURL)-1] - for i := range m { - m[i] = min(m[i], ret.PosEnd) - } + ret.FullURL, ret.PosEnd = stripTrailingSentencePeriod(ret.FullURL, ret.PosEnd, m) + + // reject URLs outside this Gitea instance or not shaped as a repo commit-route path + parsed := httplib.ParseGiteaSiteURL(ctx, ret.FullURL) + if parsed == nil || !isRepoCommitRoutePath(parsed.RepoSubPath) { + return ret, false } ret.CommitID = s[m[pos]:m[pos+1]] pos += 2 ret.CommitExt = s[m[pos]:m[pos+1]] - pos += 4 - - if m[pos] > 0 { - ret.SubPath = s[m[pos]:m[pos+1]] - } pos += 2 if m[pos] > 0 { - ret.QueryParams = s[m[pos]:m[pos+1]] + ret.SubPath = s[m[pos]:m[pos+1]] } pos += 2 @@ -107,7 +125,7 @@ func fullHashPatternProcessor(ctx *RenderContext, node *html.Node) { node = node.NextSibling continue } - ret, ok := anyHashPatternExtract(node.Data) + ret, ok := anyHashPatternExtract(ctx, node.Data) if !ok { node = node.NextSibling continue @@ -122,16 +140,46 @@ func fullHashPatternProcessor(ctx *RenderContext, node *html.Node) { if ret.QueryHash != "" { text += " (" + ret.QueryHash + ")" } - // only turn commit links to the current instance into hash link - if !httplib.IsCurrentGiteaSiteURL(ctx, ret.FullURL) { - node = node.NextSibling - continue - } replaceContent(node, ret.PosStart, ret.PosEnd, createCodeLink(ret.FullURL, text, "commit")) node = node.NextSibling.NextSibling } } +type comparePatternResult struct { + PosStart int + PosEnd int + FullURL string + Hash1 string + Dots string + Hash2 string + Fragment string +} + +func comparePatternExtract(ctx context.Context, s string) (ret comparePatternResult, ok bool) { + m := globalVars().comparePattern.FindStringSubmatchIndex(s) + if m == nil || slices.Contains(m[:8], -1) { // full match + hash1 + dots + hash2 all required + return ret, false + } + + ret.PosStart, ret.PosEnd = m[0], m[1] + ret.FullURL = s[ret.PosStart:ret.PosEnd] + ret.FullURL, ret.PosEnd = stripTrailingSentencePeriod(ret.FullURL, ret.PosEnd, m) + + // reject URLs outside this Gitea instance or not shaped as /{owner}/{repo}/compare/... + parsed := httplib.ParseGiteaSiteURL(ctx, ret.FullURL) + if parsed == nil || !strings.HasPrefix(parsed.RepoSubPath, "/compare/") { + return ret, false + } + + ret.Hash1 = s[m[2]:m[3]] + ret.Dots = s[m[4]:m[5]] + ret.Hash2 = s[m[6]:m[7]] + if m[9] > 0 { + ret.Fragment = s[m[8]:m[9]][1:] + } + return ret, true +} + func comparePatternProcessor(ctx *RenderContext, node *html.Node) { if ctx.RenderOptions.Metas == nil { return @@ -142,48 +190,16 @@ func comparePatternProcessor(ctx *RenderContext, node *html.Node) { node = node.NextSibling continue } - m := globalVars().comparePattern.FindStringSubmatchIndex(node.Data) - if m == nil || slices.Contains(m[:8], -1) { // ensure that every group (m[0]...m[7]) has a match + ret, ok := comparePatternExtract(ctx, node.Data) + if !ok { node = node.NextSibling continue } - - urlFull := node.Data[m[0]:m[1]] - text1 := base.ShortSha(node.Data[m[2]:m[3]]) - textDots := base.ShortSha(node.Data[m[4]:m[5]]) - text2 := base.ShortSha(node.Data[m[6]:m[7]]) - - hash := "" - if m[9] > 0 { - hash = node.Data[m[8]:m[9]][1:] + text := base.ShortSha(ret.Hash1) + ret.Dots + base.ShortSha(ret.Hash2) + if ret.Fragment != "" { + text += " (" + ret.Fragment + ")" } - - start := m[0] - end := m[1] - - // If url ends in '.', it's very likely that it is not part of the - // actual url but used to finish a sentence. - if strings.HasSuffix(urlFull, ".") { - end-- - urlFull = urlFull[:len(urlFull)-1] - if hash != "" { - hash = hash[:len(hash)-1] - } else if text2 != "" { - text2 = text2[:len(text2)-1] - } - } - - // only turn compare links to the current instance into hash link - if !httplib.IsCurrentGiteaSiteURL(ctx, urlFull) { - node = node.NextSibling - continue - } - - text := text1 + textDots + text2 - if hash != "" { - text += " (" + hash + ")" - } - replaceContent(node, start, end, createCodeLink(urlFull, text, "compare")) + replaceContent(node, ret.PosStart, ret.PosEnd, createCodeLink(ret.FullURL, text, "compare")) node = node.NextSibling.NextSibling } } diff --git a/modules/markup/html_internal_test.go b/modules/markup/html_internal_test.go index 3a91797e25..4083777cdc 100644 --- a/modules/markup/html_internal_test.go +++ b/modules/markup/html_internal_test.go @@ -355,64 +355,197 @@ func TestRegExp_sha1CurrentPattern(t *testing.T) { } func TestRegExp_anySHA1Pattern(t *testing.T) { + defer testModule.MockVariableValue(&setting.AppURL, TestAppURL)() + defer testModule.MockVariableValue(&setting.AppSubURL, "")() + testCases := map[string]anyHashPatternResult{ - "https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js#L2703": { - CommitID: "a644101ed04d0beacea864ce805e0c4f86ba1cd1", - SubPath: "/test/unit/event.js", - QueryHash: "L2703", - }, - "https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js": { - CommitID: "a644101ed04d0beacea864ce805e0c4f86ba1cd1", - SubPath: "/test/unit/event.js", - }, - "https://github.com/jquery/jquery/commit/0705be475092aede1eddae01319ec931fb9c65fc": { + "http://localhost:3000/jquery/jquery/commit/0705be475092aede1eddae01319ec931fb9c65fc": { CommitID: "0705be475092aede1eddae01319ec931fb9c65fc", }, - "https://github.com/jquery/jquery/tree/0705be475092aede1eddae01319ec931fb9c65fc/src": { - CommitID: "0705be475092aede1eddae01319ec931fb9c65fc", - SubPath: "/src", - }, - "https://try.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2": { + "http://localhost:3000/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2": { CommitID: "d8a994ef243349f321568f9e36d5c3f444b99cae", QueryHash: "diff-2", }, - "non-url": {}, - "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678?a=b#L1-L2": { + "http://localhost:3000/jquery/jquery/commit/0705be475092aede1eddae01": { + CommitID: "0705be475092aede1eddae01", + }, + "http://localhost:3000/jquery/jquery/commit/0705be4": { + CommitID: "0705be4", + }, + "http://localhost:3000/org/repo/commit/abc1234/file.go": { + CommitID: "abc1234", + SubPath: "/file.go", + }, + "http://localhost:3000/org/repo/commit/abc1234#L5-L10": { + CommitID: "abc1234", + QueryHash: "L5-L10", + }, + "http://localhost:3000/org/repo/commit/abc1234?w=1": { + CommitID: "abc1234", + }, + // .patch/.diff are Gitea routes for the commit's raw diff + "http://localhost:3000/org/repo/commit/abc1234.patch": { + CommitID: "abc1234", + CommitExt: ".patch", + }, + "http://localhost:3000/org/repo/commit/abc1234d.diff": { + CommitID: "abc1234d", + CommitExt: ".diff", + }, + // /archive/{hash}.tar.gz is a Gitea route for downloading a commit archive + "http://localhost:3000/org/repo/archive/0123456789012345678901234567890123456789.tar.gz": { + CommitID: "0123456789012345678901234567890123456789", + CommitExt: ".tar.gz", + }, + "http://localhost:3000/org/repo/commit/1234567812345678123456781234567812345678123456781234567812345678?a=b#L1-L2": { CommitID: "1234567812345678123456781234567812345678123456781234567812345678", QueryHash: "L1-L2", }, - "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678.": { - CommitID: "1234567812345678123456781234567812345678123456781234567812345678", + "http://localhost:3000/org/repo/commit/1234567812345678123456781234567812345678.": { + CommitID: "1234567812345678123456781234567812345678", }, - "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678/sub.": { - CommitID: "1234567812345678123456781234567812345678123456781234567812345678", - SubPath: "/sub", - }, - "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678?a=b.": { - CommitID: "1234567812345678123456781234567812345678123456781234567812345678", - }, - "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678?a=b&c=d": { - CommitID: "1234567812345678123456781234567812345678123456781234567812345678", - }, - "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678#hash.": { - CommitID: "1234567812345678123456781234567812345678123456781234567812345678", + "http://localhost:3000/org/repo/commit/abc1234#hash.": { + CommitID: "abc1234", QueryHash: "hash", }, + // Gitea routes that reference a commit by hash (RefTypeCommit) + "http://localhost:3000/org/repo/src/commit/abc1234/README.md": { + CommitID: "abc1234", + SubPath: "/README.md", + }, + "http://localhost:3000/org/repo/src/commit/abc1234/README.md#L5-L10": { + CommitID: "abc1234", + SubPath: "/README.md", + QueryHash: "L5-L10", + }, + "http://localhost:3000/org/repo/raw/commit/abc1234/README.md": { + CommitID: "abc1234", + SubPath: "/README.md", + }, + "http://localhost:3000/org/repo/render/commit/abc1234/README.md": { + CommitID: "abc1234", + SubPath: "/README.md", + }, + "http://localhost:3000/org/repo/blame/commit/abc1234/README.md": { + CommitID: "abc1234", + SubPath: "/README.md", + }, + "http://localhost:3000/org/repo/commits/commit/abc1234": { + CommitID: "abc1234", + }, + + // cross-site URLs are rejected + "https://github.com/jquery/jquery/commit/0705be475092aede1eddae01319ec931fb9c65fc": {}, + // directory named `commit` deep in a file path + "http://localhost:3000/org/repo/src/main/sub-dir/commit/20260304.txt": {}, + // file-view URLs by branch name are not hash-referencing + "http://localhost:3000/foo/bar/src/main/20260304.txt": {}, + // GitHub-style /blob/ and /tree/ URLs redirect to /src/... and are never hash-anchored commit URLs directly + "http://localhost:3000/foo/bar/blob/main/abcdef1/file": {}, + "http://localhost:3000/foo/bar/tree/0705be475092aede1eddae01/src": {}, + "non-url": {}, } for k, v := range testCases { - ret, ok := anyHashPatternExtract(k) + ret, ok := anyHashPatternExtract(t.Context(), k) if v.CommitID == "" { - assert.False(t, ok) + assert.False(t, ok, "expected no match for %q", k) } else { assert.Equal(t, strings.TrimSuffix(k, "."), ret.FullURL) assert.Equal(t, v.CommitID, ret.CommitID) + assert.Equal(t, v.CommitExt, ret.CommitExt) assert.Equal(t, v.SubPath, ret.SubPath) assert.Equal(t, v.QueryHash, ret.QueryHash) } } } +func TestRegExp_anySHA1Pattern_AppSubURL(t *testing.T) { + // multi-segment AppSubURL deployments are supported: ParseGiteaSiteURL strips the prefix. + defer testModule.MockVariableValue(&setting.AppURL, "http://localhost:3000/a/b/c/")() + defer testModule.MockVariableValue(&setting.AppSubURL, "/a/b/c")() + + ret, ok := anyHashPatternExtract(t.Context(), "http://localhost:3000/a/b/c/org/repo/commit/abc1234") + assert.True(t, ok) + assert.Equal(t, "abc1234", ret.CommitID) + + _, ok = anyHashPatternExtract(t.Context(), "http://localhost:3000/org/repo/commit/abc1234") + assert.False(t, ok, "URL outside AppSubURL must be rejected") +} + +func TestRegExp_comparePattern(t *testing.T) { + defer testModule.MockVariableValue(&setting.AppURL, TestAppURL)() + defer testModule.MockVariableValue(&setting.AppSubURL, "")() + + hash1 := "0705be475092aede1eddae01319ec931fb9c65fc" + hash2 := "d8a994ef243349f321568f9e36d5c3f444b99cae" + + testCases := map[string]comparePatternResult{ + "http://localhost:3000/org/repo/compare/" + hash1 + "..." + hash2: { + Hash1: hash1, Dots: "...", Hash2: hash2, + }, + // two-dot form + "http://localhost:3000/org/repo/compare/" + hash1 + ".." + hash2: { + Hash1: hash1, Dots: "..", Hash2: hash2, + }, + // short hashes + "http://localhost:3000/org/repo/compare/0705be4...d8a994e": { + Hash1: "0705be4", Dots: "...", Hash2: "d8a994e", + }, + // fragment + "http://localhost:3000/org/repo/compare/" + hash1 + "..." + hash2 + "#diff-2": { + Hash1: hash1, Dots: "...", Hash2: hash2, Fragment: "diff-2", + }, + // trailing sentence period after hash2 is stripped + "http://localhost:3000/org/repo/compare/" + hash1 + "..." + hash2 + ".": { + Hash1: hash1, Dots: "...", Hash2: hash2, + }, + // trailing sentence period after fragment is stripped + "http://localhost:3000/org/repo/compare/" + hash1 + "..." + hash2 + "#diff-2.": { + Hash1: hash1, Dots: "...", Hash2: hash2, Fragment: "diff-2", + }, + + // false positives that the old regex accepted (directory/file named with hash-range shape) + "http://localhost:3000/org/repo/src/" + hash1 + "..." + hash2: {}, + "http://localhost:3000/org/repo/releases/" + hash1 + "..." + hash2: {}, + "http://localhost:3000/org/repo/src/branch/main/sub-dir/compare/" + hash1 + "..." + hash2: {}, + // missing second hash (compare requires both) + "http://localhost:3000/org/repo/compare/" + hash1 + "...": {}, + // cross-site + "https://github.com/jquery/jquery/compare/" + hash1 + "..." + hash2: {}, + "non-url": {}, + } + + for k, v := range testCases { + ret, ok := comparePatternExtract(t.Context(), k) + if v.Hash1 == "" { + assert.False(t, ok, "expected no match for %q", k) + } else { + assert.Equal(t, strings.TrimSuffix(k, "."), ret.FullURL) + assert.Equal(t, v.Hash1, ret.Hash1) + assert.Equal(t, v.Dots, ret.Dots) + assert.Equal(t, v.Hash2, ret.Hash2) + assert.Equal(t, v.Fragment, ret.Fragment) + } + } +} + +func TestRegExp_comparePattern_AppSubURL(t *testing.T) { + defer testModule.MockVariableValue(&setting.AppURL, "http://localhost:3000/a/b/c/")() + defer testModule.MockVariableValue(&setting.AppSubURL, "/a/b/c")() + + hash1 := "0705be475092aede1eddae01319ec931fb9c65fc" + hash2 := "d8a994ef243349f321568f9e36d5c3f444b99cae" + + ret, ok := comparePatternExtract(t.Context(), "http://localhost:3000/a/b/c/org/repo/compare/"+hash1+"..."+hash2) + assert.True(t, ok) + assert.Equal(t, hash1, ret.Hash1) + assert.Equal(t, hash2, ret.Hash2) + + _, ok = comparePatternExtract(t.Context(), "http://localhost:3000/org/repo/compare/"+hash1+"..."+hash2) + assert.False(t, ok, "URL outside AppSubURL must be rejected") +} + func TestRegExp_shortLinkPattern(t *testing.T) { trueTestCases := []string{ "[[stuff]]", diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go index 4fa9466d19..3e8f4f7c65 100644 --- a/modules/markup/html_test.go +++ b/modules/markup/html_test.go @@ -36,7 +36,6 @@ func TestRender_Commits(t *testing.T) { repo := markup.TestAppURL + testRepoOwnerName + "/" + testRepoName + "/" commit := repo + "commit/" + sha commitPath := "/user13/repo11/commit/" + sha - tree := repo + "tree/" + sha + "/src" file := repo + "commit/" + sha + "/example.txt" fileWithExtra := file + ":" @@ -49,7 +48,6 @@ func TestRender_Commits(t *testing.T) { test(sha[:7], `

65f1bf2

`) test(sha[:39], `

65f1bf27bc

`) test(commit, `

65f1bf27bc

`) - test(tree, `

65f1bf27bc/src

`) test(file, `

65f1bf27bc/example.txt

`) test(fileWithExtra, `

65f1bf27bc/example.txt:

`) @@ -113,6 +111,16 @@ func TestRender_CrossReferences(t *testing.T) { test( inputURL, `

0123456789.patch

`) + + inputURL = setting.AppURL + "owner/repo/commit/01234567890123456789012345" + test( + inputURL, + `

0123456789

`) + + inputURL = setting.AppURL + "owner/repo/commit/0123456" + test( + inputURL, + `

0123456

`) } func TestRender_links(t *testing.T) {