From af6be75adb99ff42215a945927c016aa5e40dab2 Mon Sep 17 00:00:00 2001
From: Dejan Kitic <137049545+dek5troza@users.noreply.github.com>
Date: Sun, 20 Apr 2025 12:18:14 +0100
Subject: [PATCH] Valid email address should only start with alphanumeric
(#28174)
This fixes issue #27847 where regular expression allowed email address
to start with special symbols. Valid email addresses should start with
alphanumeric character, and as such will be rendered as email.
Added test cases from the bug report to validate, such input will not be
rendered anymore as email address.
---------
Co-authored-by: wxiaoguang
---
modules/markup/html.go | 3 ++-
modules/markup/html_email.go | 14 +++++++++++++-
modules/markup/html_test.go | 36 ++++++++++++++++++++++++++++--------
3 files changed, 43 insertions(+), 10 deletions(-)
diff --git a/modules/markup/html.go b/modules/markup/html.go
index 0e074cbcfa..7c3bd93699 100644
--- a/modules/markup/html.go
+++ b/modules/markup/html.go
@@ -71,7 +71,8 @@ var globalVars = sync.OnceValue(func() *globalVarsType {
// it is still accepted by the CommonMark specification, as well as the HTML5 spec:
// http://spec.commonmark.org/0.28/#email-address
// https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
- v.emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)(?:\\s|$|\\)|\\]|;|,|\\?|!|\\.(\\s|$))")
+ // At the moment, we use stricter rule for rendering purpose: only allow the "name" part starting after the word boundary
+ v.emailRegex = regexp.MustCompile(`\b([-\w.!#$%&'*+/=?^{|}~]*@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)\b`)
// emojiShortCodeRegex find emoji by alias like :smile:
v.emojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`)
diff --git a/modules/markup/html_email.go b/modules/markup/html_email.go
index cbfae8b829..cf18e99d98 100644
--- a/modules/markup/html_email.go
+++ b/modules/markup/html_email.go
@@ -3,7 +3,11 @@
package markup
-import "golang.org/x/net/html"
+import (
+ "strings"
+
+ "golang.org/x/net/html"
+)
// emailAddressProcessor replaces raw email addresses with a mailto: link.
func emailAddressProcessor(ctx *RenderContext, node *html.Node) {
@@ -14,6 +18,14 @@ func emailAddressProcessor(ctx *RenderContext, node *html.Node) {
return
}
+ var nextByte byte
+ if len(node.Data) > m[3] {
+ nextByte = node.Data[m[3]]
+ }
+ if strings.IndexByte(":/", nextByte) != -1 {
+ // for cases: "git@gitea.com:owner/repo.git", "https://git@gitea.com/owner/repo.git"
+ return
+ }
mail := node.Data[m[2]:m[3]]
replaceContent(node, m[2], m[3], createLink(ctx, "mailto:"+mail, mail, "" /*mailto*/))
node = node.NextSibling.NextSibling
diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go
index aab9fddd91..58f71bdd7b 100644
--- a/modules/markup/html_test.go
+++ b/modules/markup/html_test.go
@@ -225,10 +225,10 @@ func TestRender_email(t *testing.T) {
test := func(input, expected string) {
res, err := markup.RenderString(markup.NewTestRenderContext().WithRelativePath("a.md"), input)
assert.NoError(t, err)
- assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(res))
+ assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(res), "input: %s", input)
}
- // Text that should be turned into email link
+ // Text that should be turned into email link
test(
"info@gitea.com",
`info@gitea.com
`)
@@ -260,28 +260,48 @@ func TestRender_email(t *testing.T) {
j.doe@example.com?
j.doe@example.com!
`)
+ // match GitHub behavior
+ test("email@domain@domain.com", `email@domain@domain.com
`)
+
+ // match GitHub behavior
+ test(`"info@gitea.com"`, `"info@gitea.com"
`)
+
// Test that should *not* be turned into email links
- test(
- "\"info@gitea.com\"",
- `"info@gitea.com"
`)
test(
"/home/gitea/mailstore/info@gitea/com",
`/home/gitea/mailstore/info@gitea/com
`)
test(
"git@try.gitea.io:go-gitea/gitea.git",
`git@try.gitea.io:go-gitea/gitea.git
`)
+ test(
+ "https://foo:bar@gitea.io",
+ `https://foo:bar@gitea.io
`)
test(
"gitea@3",
`gitea@3
`)
test(
"gitea@gmail.c",
`gitea@gmail.c
`)
- test(
- "email@domain@domain.com",
- `email@domain@domain.com
`)
test(
"email@domain..com",
`email@domain..com
`)
+
+ cases := []struct {
+ input, expected string
+ }{
+ // match GitHub behavior
+ {"?a@d.zz", `?a@d.zz
`},
+ {"*a@d.zz", `*a@d.zz
`},
+ {"~a@d.zz", `~a@d.zz
`},
+
+ // the following cases don't match GitHub behavior, but they are valid email addresses ...
+ // maybe we should reduce the candidate characters for the "name" part in the future
+ {"a*a@d.zz", `a*a@d.zz
`},
+ {"a~a@d.zz", `a~a@d.zz
`},
+ }
+ for _, c := range cases {
+ test(c.input, c.expected)
+ }
}
func TestRender_emoji(t *testing.T) {