diff --git a/modules/markup/html.go b/modules/markup/html.go
index 0fe37ae305..0b4d68caa7 100644
--- a/modules/markup/html.go
+++ b/modules/markup/html.go
@@ -62,11 +62,11 @@ var globalVars = sync.OnceValue(func() *globalVarsType {
// shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax
v.shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
- // anyHashPattern splits url containing SHA into parts
- v.anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})((\.\w+)*)(/[-+~%./\w]+)?(\?[-+~%.\w&=]+)?(#[-+~%.\w]+)?`)
+ // anyHashPattern finds candidate commit/archive URLs; anyHashPatternExtract validates shape via ParseGiteaSiteURL.
+ v.anyHashPattern = regexp.MustCompile(`https?://\S+/(?:commit|archive)/([0-9a-f]{7,64})((?:\.\w+)*)(/[-+~%./\w]+)?(?:\?[-+~%.\w&=]+)?(#[-+~%.\w]+)?`)
- // comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash"
- v.comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(\.\.\.?)([0-9a-f]{7,64})?(#[-+~_%.a-zA-Z0-9]+)?`)
+ // comparePattern finds candidate compare URLs; comparePatternExtract validates shape via ParseGiteaSiteURL.
+ v.comparePattern = regexp.MustCompile(`https?://\S+/compare/([0-9a-f]{7,64})(\.\.\.?)([0-9a-f]{7,64})?(#[-+~_%.a-zA-Z0-9]+)?`)
// fullURLPattern matches full URL like "mailto:...", "https://..." and "ssh+git://..."
v.fullURLPattern = regexp.MustCompile(`^[a-z][-+\w]+:`)
diff --git a/modules/markup/html_commit.go b/modules/markup/html_commit.go
index a2c5160674..9683058a97 100644
--- a/modules/markup/html_commit.go
+++ b/modules/markup/html_commit.go
@@ -4,6 +4,7 @@
package markup
import (
+ "context"
"fmt"
"slices"
"strings"
@@ -17,14 +18,13 @@ import (
)
type anyHashPatternResult struct {
- PosStart int
- PosEnd int
- FullURL string
- CommitID string
- CommitExt string
- SubPath string
- QueryParams string
- QueryHash string
+ PosStart int
+ PosEnd int
+ FullURL string
+ CommitID string
+ CommitExt string
+ SubPath string
+ QueryHash string
}
func createCodeLink(href, content, class string) *html.Node {
@@ -53,7 +53,31 @@ func createCodeLink(href, content, class string) *html.Node {
return a
}
-func anyHashPatternExtract(s string) (ret anyHashPatternResult, ok bool) {
+// stripTrailingSentencePeriod trims a trailing '.' that is likely sentence punctuation rather than part of the URL.
+// It also clamps capture-group indices in m in place so they don't point past the trimmed URL.
+func stripTrailingSentencePeriod(fullURL string, posEnd int, m []int) (string, int) {
+ if !strings.HasSuffix(fullURL, ".") {
+ return fullURL, posEnd
+ }
+ posEnd--
+ fullURL = fullURL[:len(fullURL)-1]
+ for i := range m {
+ m[i] = min(m[i], posEnd)
+ }
+ return fullURL, posEnd
+}
+
+// isRepoCommitRoutePath reports whether path (a Gitea repo subpath) identifies a commit by hash.
+// It accepts `/commit/...`, `/archive/...`, or `//commit/...` (Gitea's RefTypeCommit route shape).
+func isRepoCommitRoutePath(path string) bool {
+ if strings.HasPrefix(path, "/commit/") || strings.HasPrefix(path, "/archive/") {
+ return true
+ }
+ _, rest, ok := strings.Cut(strings.TrimPrefix(path, "/"), "/")
+ return ok && strings.HasPrefix(rest, "commit/")
+}
+
+func anyHashPatternExtract(ctx context.Context, s string) (ret anyHashPatternResult, ok bool) {
m := globalVars().anyHashPattern.FindStringSubmatchIndex(s)
if m == nil {
return ret, false
@@ -65,28 +89,22 @@ func anyHashPatternExtract(s string) (ret anyHashPatternResult, ok bool) {
pos += 2
ret.FullURL = s[ret.PosStart:ret.PosEnd]
- if strings.HasSuffix(ret.FullURL, ".") {
- // if url ends in '.', it's very likely that it is not part of the actual url but used to finish a sentence.
- ret.PosEnd--
- ret.FullURL = ret.FullURL[:len(ret.FullURL)-1]
- for i := range m {
- m[i] = min(m[i], ret.PosEnd)
- }
+ ret.FullURL, ret.PosEnd = stripTrailingSentencePeriod(ret.FullURL, ret.PosEnd, m)
+
+ // reject URLs outside this Gitea instance or not shaped as a repo commit-route path
+ parsed := httplib.ParseGiteaSiteURL(ctx, ret.FullURL)
+ if parsed == nil || !isRepoCommitRoutePath(parsed.RepoSubPath) {
+ return ret, false
}
ret.CommitID = s[m[pos]:m[pos+1]]
pos += 2
ret.CommitExt = s[m[pos]:m[pos+1]]
- pos += 4
-
- if m[pos] > 0 {
- ret.SubPath = s[m[pos]:m[pos+1]]
- }
pos += 2
if m[pos] > 0 {
- ret.QueryParams = s[m[pos]:m[pos+1]]
+ ret.SubPath = s[m[pos]:m[pos+1]]
}
pos += 2
@@ -107,7 +125,7 @@ func fullHashPatternProcessor(ctx *RenderContext, node *html.Node) {
node = node.NextSibling
continue
}
- ret, ok := anyHashPatternExtract(node.Data)
+ ret, ok := anyHashPatternExtract(ctx, node.Data)
if !ok {
node = node.NextSibling
continue
@@ -122,16 +140,46 @@ func fullHashPatternProcessor(ctx *RenderContext, node *html.Node) {
if ret.QueryHash != "" {
text += " (" + ret.QueryHash + ")"
}
- // only turn commit links to the current instance into hash link
- if !httplib.IsCurrentGiteaSiteURL(ctx, ret.FullURL) {
- node = node.NextSibling
- continue
- }
replaceContent(node, ret.PosStart, ret.PosEnd, createCodeLink(ret.FullURL, text, "commit"))
node = node.NextSibling.NextSibling
}
}
+type comparePatternResult struct {
+ PosStart int
+ PosEnd int
+ FullURL string
+ Hash1 string
+ Dots string
+ Hash2 string
+ Fragment string
+}
+
+func comparePatternExtract(ctx context.Context, s string) (ret comparePatternResult, ok bool) {
+ m := globalVars().comparePattern.FindStringSubmatchIndex(s)
+ if m == nil || slices.Contains(m[:8], -1) { // full match + hash1 + dots + hash2 all required
+ return ret, false
+ }
+
+ ret.PosStart, ret.PosEnd = m[0], m[1]
+ ret.FullURL = s[ret.PosStart:ret.PosEnd]
+ ret.FullURL, ret.PosEnd = stripTrailingSentencePeriod(ret.FullURL, ret.PosEnd, m)
+
+ // reject URLs outside this Gitea instance or not shaped as /{owner}/{repo}/compare/...
+ parsed := httplib.ParseGiteaSiteURL(ctx, ret.FullURL)
+ if parsed == nil || !strings.HasPrefix(parsed.RepoSubPath, "/compare/") {
+ return ret, false
+ }
+
+ ret.Hash1 = s[m[2]:m[3]]
+ ret.Dots = s[m[4]:m[5]]
+ ret.Hash2 = s[m[6]:m[7]]
+ if m[9] > 0 {
+ ret.Fragment = s[m[8]:m[9]][1:]
+ }
+ return ret, true
+}
+
func comparePatternProcessor(ctx *RenderContext, node *html.Node) {
if ctx.RenderOptions.Metas == nil {
return
@@ -142,48 +190,16 @@ func comparePatternProcessor(ctx *RenderContext, node *html.Node) {
node = node.NextSibling
continue
}
- m := globalVars().comparePattern.FindStringSubmatchIndex(node.Data)
- if m == nil || slices.Contains(m[:8], -1) { // ensure that every group (m[0]...m[7]) has a match
+ ret, ok := comparePatternExtract(ctx, node.Data)
+ if !ok {
node = node.NextSibling
continue
}
-
- urlFull := node.Data[m[0]:m[1]]
- text1 := base.ShortSha(node.Data[m[2]:m[3]])
- textDots := base.ShortSha(node.Data[m[4]:m[5]])
- text2 := base.ShortSha(node.Data[m[6]:m[7]])
-
- hash := ""
- if m[9] > 0 {
- hash = node.Data[m[8]:m[9]][1:]
+ text := base.ShortSha(ret.Hash1) + ret.Dots + base.ShortSha(ret.Hash2)
+ if ret.Fragment != "" {
+ text += " (" + ret.Fragment + ")"
}
-
- start := m[0]
- end := m[1]
-
- // If url ends in '.', it's very likely that it is not part of the
- // actual url but used to finish a sentence.
- if strings.HasSuffix(urlFull, ".") {
- end--
- urlFull = urlFull[:len(urlFull)-1]
- if hash != "" {
- hash = hash[:len(hash)-1]
- } else if text2 != "" {
- text2 = text2[:len(text2)-1]
- }
- }
-
- // only turn compare links to the current instance into hash link
- if !httplib.IsCurrentGiteaSiteURL(ctx, urlFull) {
- node = node.NextSibling
- continue
- }
-
- text := text1 + textDots + text2
- if hash != "" {
- text += " (" + hash + ")"
- }
- replaceContent(node, start, end, createCodeLink(urlFull, text, "compare"))
+ replaceContent(node, ret.PosStart, ret.PosEnd, createCodeLink(ret.FullURL, text, "compare"))
node = node.NextSibling.NextSibling
}
}
diff --git a/modules/markup/html_internal_test.go b/modules/markup/html_internal_test.go
index 3a91797e25..4083777cdc 100644
--- a/modules/markup/html_internal_test.go
+++ b/modules/markup/html_internal_test.go
@@ -355,64 +355,197 @@ func TestRegExp_sha1CurrentPattern(t *testing.T) {
}
func TestRegExp_anySHA1Pattern(t *testing.T) {
+ defer testModule.MockVariableValue(&setting.AppURL, TestAppURL)()
+ defer testModule.MockVariableValue(&setting.AppSubURL, "")()
+
testCases := map[string]anyHashPatternResult{
- "https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js#L2703": {
- CommitID: "a644101ed04d0beacea864ce805e0c4f86ba1cd1",
- SubPath: "/test/unit/event.js",
- QueryHash: "L2703",
- },
- "https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js": {
- CommitID: "a644101ed04d0beacea864ce805e0c4f86ba1cd1",
- SubPath: "/test/unit/event.js",
- },
- "https://github.com/jquery/jquery/commit/0705be475092aede1eddae01319ec931fb9c65fc": {
+ "http://localhost:3000/jquery/jquery/commit/0705be475092aede1eddae01319ec931fb9c65fc": {
CommitID: "0705be475092aede1eddae01319ec931fb9c65fc",
},
- "https://github.com/jquery/jquery/tree/0705be475092aede1eddae01319ec931fb9c65fc/src": {
- CommitID: "0705be475092aede1eddae01319ec931fb9c65fc",
- SubPath: "/src",
- },
- "https://try.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2": {
+ "http://localhost:3000/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2": {
CommitID: "d8a994ef243349f321568f9e36d5c3f444b99cae",
QueryHash: "diff-2",
},
- "non-url": {},
- "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678?a=b#L1-L2": {
+ "http://localhost:3000/jquery/jquery/commit/0705be475092aede1eddae01": {
+ CommitID: "0705be475092aede1eddae01",
+ },
+ "http://localhost:3000/jquery/jquery/commit/0705be4": {
+ CommitID: "0705be4",
+ },
+ "http://localhost:3000/org/repo/commit/abc1234/file.go": {
+ CommitID: "abc1234",
+ SubPath: "/file.go",
+ },
+ "http://localhost:3000/org/repo/commit/abc1234#L5-L10": {
+ CommitID: "abc1234",
+ QueryHash: "L5-L10",
+ },
+ "http://localhost:3000/org/repo/commit/abc1234?w=1": {
+ CommitID: "abc1234",
+ },
+ // .patch/.diff are Gitea routes for the commit's raw diff
+ "http://localhost:3000/org/repo/commit/abc1234.patch": {
+ CommitID: "abc1234",
+ CommitExt: ".patch",
+ },
+ "http://localhost:3000/org/repo/commit/abc1234d.diff": {
+ CommitID: "abc1234d",
+ CommitExt: ".diff",
+ },
+ // /archive/{hash}.tar.gz is a Gitea route for downloading a commit archive
+ "http://localhost:3000/org/repo/archive/0123456789012345678901234567890123456789.tar.gz": {
+ CommitID: "0123456789012345678901234567890123456789",
+ CommitExt: ".tar.gz",
+ },
+ "http://localhost:3000/org/repo/commit/1234567812345678123456781234567812345678123456781234567812345678?a=b#L1-L2": {
CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
QueryHash: "L1-L2",
},
- "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678.": {
- CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
+ "http://localhost:3000/org/repo/commit/1234567812345678123456781234567812345678.": {
+ CommitID: "1234567812345678123456781234567812345678",
},
- "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678/sub.": {
- CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
- SubPath: "/sub",
- },
- "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678?a=b.": {
- CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
- },
- "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678?a=b&c=d": {
- CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
- },
- "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678#hash.": {
- CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
+ "http://localhost:3000/org/repo/commit/abc1234#hash.": {
+ CommitID: "abc1234",
QueryHash: "hash",
},
+ // Gitea routes that reference a commit by hash (RefTypeCommit)
+ "http://localhost:3000/org/repo/src/commit/abc1234/README.md": {
+ CommitID: "abc1234",
+ SubPath: "/README.md",
+ },
+ "http://localhost:3000/org/repo/src/commit/abc1234/README.md#L5-L10": {
+ CommitID: "abc1234",
+ SubPath: "/README.md",
+ QueryHash: "L5-L10",
+ },
+ "http://localhost:3000/org/repo/raw/commit/abc1234/README.md": {
+ CommitID: "abc1234",
+ SubPath: "/README.md",
+ },
+ "http://localhost:3000/org/repo/render/commit/abc1234/README.md": {
+ CommitID: "abc1234",
+ SubPath: "/README.md",
+ },
+ "http://localhost:3000/org/repo/blame/commit/abc1234/README.md": {
+ CommitID: "abc1234",
+ SubPath: "/README.md",
+ },
+ "http://localhost:3000/org/repo/commits/commit/abc1234": {
+ CommitID: "abc1234",
+ },
+
+ // cross-site URLs are rejected
+ "https://github.com/jquery/jquery/commit/0705be475092aede1eddae01319ec931fb9c65fc": {},
+ // directory named `commit` deep in a file path
+ "http://localhost:3000/org/repo/src/main/sub-dir/commit/20260304.txt": {},
+ // file-view URLs by branch name are not hash-referencing
+ "http://localhost:3000/foo/bar/src/main/20260304.txt": {},
+ // GitHub-style /blob/ and /tree/ URLs redirect to /src/... and are never hash-anchored commit URLs directly
+ "http://localhost:3000/foo/bar/blob/main/abcdef1/file": {},
+ "http://localhost:3000/foo/bar/tree/0705be475092aede1eddae01/src": {},
+ "non-url": {},
}
for k, v := range testCases {
- ret, ok := anyHashPatternExtract(k)
+ ret, ok := anyHashPatternExtract(t.Context(), k)
if v.CommitID == "" {
- assert.False(t, ok)
+ assert.False(t, ok, "expected no match for %q", k)
} else {
assert.Equal(t, strings.TrimSuffix(k, "."), ret.FullURL)
assert.Equal(t, v.CommitID, ret.CommitID)
+ assert.Equal(t, v.CommitExt, ret.CommitExt)
assert.Equal(t, v.SubPath, ret.SubPath)
assert.Equal(t, v.QueryHash, ret.QueryHash)
}
}
}
+func TestRegExp_anySHA1Pattern_AppSubURL(t *testing.T) {
+ // multi-segment AppSubURL deployments are supported: ParseGiteaSiteURL strips the prefix.
+ defer testModule.MockVariableValue(&setting.AppURL, "http://localhost:3000/a/b/c/")()
+ defer testModule.MockVariableValue(&setting.AppSubURL, "/a/b/c")()
+
+ ret, ok := anyHashPatternExtract(t.Context(), "http://localhost:3000/a/b/c/org/repo/commit/abc1234")
+ assert.True(t, ok)
+ assert.Equal(t, "abc1234", ret.CommitID)
+
+ _, ok = anyHashPatternExtract(t.Context(), "http://localhost:3000/org/repo/commit/abc1234")
+ assert.False(t, ok, "URL outside AppSubURL must be rejected")
+}
+
+func TestRegExp_comparePattern(t *testing.T) {
+ defer testModule.MockVariableValue(&setting.AppURL, TestAppURL)()
+ defer testModule.MockVariableValue(&setting.AppSubURL, "")()
+
+ hash1 := "0705be475092aede1eddae01319ec931fb9c65fc"
+ hash2 := "d8a994ef243349f321568f9e36d5c3f444b99cae"
+
+ testCases := map[string]comparePatternResult{
+ "http://localhost:3000/org/repo/compare/" + hash1 + "..." + hash2: {
+ Hash1: hash1, Dots: "...", Hash2: hash2,
+ },
+ // two-dot form
+ "http://localhost:3000/org/repo/compare/" + hash1 + ".." + hash2: {
+ Hash1: hash1, Dots: "..", Hash2: hash2,
+ },
+ // short hashes
+ "http://localhost:3000/org/repo/compare/0705be4...d8a994e": {
+ Hash1: "0705be4", Dots: "...", Hash2: "d8a994e",
+ },
+ // fragment
+ "http://localhost:3000/org/repo/compare/" + hash1 + "..." + hash2 + "#diff-2": {
+ Hash1: hash1, Dots: "...", Hash2: hash2, Fragment: "diff-2",
+ },
+ // trailing sentence period after hash2 is stripped
+ "http://localhost:3000/org/repo/compare/" + hash1 + "..." + hash2 + ".": {
+ Hash1: hash1, Dots: "...", Hash2: hash2,
+ },
+ // trailing sentence period after fragment is stripped
+ "http://localhost:3000/org/repo/compare/" + hash1 + "..." + hash2 + "#diff-2.": {
+ Hash1: hash1, Dots: "...", Hash2: hash2, Fragment: "diff-2",
+ },
+
+ // false positives that the old regex accepted (directory/file named with hash-range shape)
+ "http://localhost:3000/org/repo/src/" + hash1 + "..." + hash2: {},
+ "http://localhost:3000/org/repo/releases/" + hash1 + "..." + hash2: {},
+ "http://localhost:3000/org/repo/src/branch/main/sub-dir/compare/" + hash1 + "..." + hash2: {},
+ // missing second hash (compare requires both)
+ "http://localhost:3000/org/repo/compare/" + hash1 + "...": {},
+ // cross-site
+ "https://github.com/jquery/jquery/compare/" + hash1 + "..." + hash2: {},
+ "non-url": {},
+ }
+
+ for k, v := range testCases {
+ ret, ok := comparePatternExtract(t.Context(), k)
+ if v.Hash1 == "" {
+ assert.False(t, ok, "expected no match for %q", k)
+ } else {
+ assert.Equal(t, strings.TrimSuffix(k, "."), ret.FullURL)
+ assert.Equal(t, v.Hash1, ret.Hash1)
+ assert.Equal(t, v.Dots, ret.Dots)
+ assert.Equal(t, v.Hash2, ret.Hash2)
+ assert.Equal(t, v.Fragment, ret.Fragment)
+ }
+ }
+}
+
+func TestRegExp_comparePattern_AppSubURL(t *testing.T) {
+ defer testModule.MockVariableValue(&setting.AppURL, "http://localhost:3000/a/b/c/")()
+ defer testModule.MockVariableValue(&setting.AppSubURL, "/a/b/c")()
+
+ hash1 := "0705be475092aede1eddae01319ec931fb9c65fc"
+ hash2 := "d8a994ef243349f321568f9e36d5c3f444b99cae"
+
+ ret, ok := comparePatternExtract(t.Context(), "http://localhost:3000/a/b/c/org/repo/compare/"+hash1+"..."+hash2)
+ assert.True(t, ok)
+ assert.Equal(t, hash1, ret.Hash1)
+ assert.Equal(t, hash2, ret.Hash2)
+
+ _, ok = comparePatternExtract(t.Context(), "http://localhost:3000/org/repo/compare/"+hash1+"..."+hash2)
+ assert.False(t, ok, "URL outside AppSubURL must be rejected")
+}
+
func TestRegExp_shortLinkPattern(t *testing.T) {
trueTestCases := []string{
"[[stuff]]",
diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go
index 4fa9466d19..3e8f4f7c65 100644
--- a/modules/markup/html_test.go
+++ b/modules/markup/html_test.go
@@ -36,7 +36,6 @@ func TestRender_Commits(t *testing.T) {
repo := markup.TestAppURL + testRepoOwnerName + "/" + testRepoName + "/"
commit := repo + "commit/" + sha
commitPath := "/user13/repo11/commit/" + sha
- tree := repo + "tree/" + sha + "/src"
file := repo + "commit/" + sha + "/example.txt"
fileWithExtra := file + ":"
@@ -49,7 +48,6 @@ func TestRender_Commits(t *testing.T) {
test(sha[:7], `65f1bf2
`)
test(sha[:39], `65f1bf27bc
`)
test(commit, `65f1bf27bc
`)
- test(tree, `65f1bf27bc/src
`)
test(file, `65f1bf27bc/example.txt
`)
test(fileWithExtra, `65f1bf27bc/example.txt:
`)
@@ -113,6 +111,16 @@ func TestRender_CrossReferences(t *testing.T) {
test(
inputURL,
`0123456789.patch
`)
+
+ inputURL = setting.AppURL + "owner/repo/commit/01234567890123456789012345"
+ test(
+ inputURL,
+ `0123456789
`)
+
+ inputURL = setting.AppURL + "owner/repo/commit/0123456"
+ test(
+ inputURL,
+ `0123456
`)
}
func TestRender_links(t *testing.T) {