Merge branch 'main' into improve-workflow-run

2026-05-06 19:18:35 +02:00 · 2026-04-03 12:10:13 +08:00 · 2026-04-03 12:10:13 +08:00 · 429fd7088e
commit 429fd7088e
parent 379fd28120 6eed75af24
8 changed files with 173 additions and 97 deletions
--- a/modules/highlight/highlight.go
+++ b/modules/highlight/highlight.go
@ -5,13 +5,9 @@
 package highlight

 import (
-	"bufio"
 	"bytes"
-	"fmt"
-	gohtml "html"
 	"html/template"
-	"io"
-	"strings"
+	"slices"
 	"sync"

 	"code.gitea.io/gitea/modules/log"
@ -23,12 +19,14 @@ import (
 	"github.com/alecthomas/chroma/v2/styles"
 )

-// don't index files larger than this many bytes for performance purposes
+// don't highlight files larger than this many bytes for performance purposes
 const sizeLimit = 1024 * 1024

 type globalVarsType struct {
 	highlightMapping map[string]string
 	githubStyles     *chroma.Style
+	escapeFull       []template.HTML
+	escCtrlCharsMap  []template.HTML
 }

 var (
@ -44,10 +42,69 @@ func globalVars() *globalVarsType {
 		globalVarsPtr = &globalVarsType{}
 		globalVarsPtr.githubStyles = styles.Get("github")
 		globalVarsPtr.highlightMapping = setting.GetHighlightMapping()
+		globalVarsPtr.escCtrlCharsMap = make([]template.HTML, 256)
+		// ASCII Table 0x00 - 0x1F
+		controlCharNames := []string{
+			"NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
+			"BS", "HT", "LF", "VT", "FF", "CR", "SO", "SI",
+			"DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
+			"CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US",
+		}
+		// Uncomment this line if you'd debug the layout without creating a special file, then Space (0x20) will also be escaped.
+		// Don't worry, even if you forget to comment it out and push it to git repo, the CI tests will catch it and fail.
+		// controlCharNames = append(controlCharNames, "SP")
+		for i, s := range controlCharNames {
+			globalVarsPtr.escCtrlCharsMap[i] = template.HTML(`<span class="broken-code-point" data-escaped="` + s + `"><span class="char">` + string(byte(i)) + `</span></span>`)
+		}
+		globalVarsPtr.escCtrlCharsMap[0x7f] = template.HTML(`<span class="broken-code-point" data-escaped="DEL"><span class="char">` + string(byte(0x7f)) + `</span></span>`)
+		globalVarsPtr.escCtrlCharsMap['\t'] = ""
+		globalVarsPtr.escCtrlCharsMap['\n'] = ""
+		globalVarsPtr.escCtrlCharsMap['\r'] = ""
+
+		globalVarsPtr.escapeFull = slices.Clone(globalVarsPtr.escCtrlCharsMap)
+		// exactly the same as Golang's html.EscapeString
+		globalVarsPtr.escapeFull['&'] = "&amp;"
+		globalVarsPtr.escapeFull['\''] = "&#39;"
+		globalVarsPtr.escapeFull['<'] = "&lt;"
+		globalVarsPtr.escapeFull['>'] = "&gt;"
+		globalVarsPtr.escapeFull['"'] = "&#34;"
 	}
 	return globalVarsPtr
 }

+func escapeByMap(code []byte, escapeMap []template.HTML) template.HTML {
+	firstEscapePos := -1
+	for i, c := range code {
+		if escapeMap[c] != "" {
+			firstEscapePos = i
+			break
+		}
+	}
+	if firstEscapePos == -1 {
+		return template.HTML(util.UnsafeBytesToString(code))
+	}
+
+	buf := make([]byte, firstEscapePos, len(code)*2)
+	copy(buf[:firstEscapePos], code[:firstEscapePos])
+	for i := firstEscapePos; i < len(code); i++ {
+		c := code[i]
+		if esc := escapeMap[c]; esc != "" {
+			buf = append(buf, esc...)
+		} else {
+			buf = append(buf, c)
+		}
+	}
+	return template.HTML(util.UnsafeBytesToString(buf))
+}
+
+func escapeFullString(code string) template.HTML {
+	return escapeByMap(util.UnsafeStringToBytes(code), globalVars().escapeFull)
+}
+
+func escapeControlChars(code []byte) template.HTML {
+	return escapeByMap(code, globalVars().escCtrlCharsMap)
+}
+
 // UnsafeSplitHighlightedLines splits highlighted code into lines preserving HTML tags
 // It always includes '\n', '\n' can appear at the end of each line or in the middle of HTML tags
 // The '\n' is necessary for copying code from web UI to preserve original code lines
@ -90,7 +147,7 @@ func RenderCodeSlowGuess(fileName, language, code string) (output template.HTML,
 	}

 	if len(code) > sizeLimit {
-		return template.HTML(template.HTMLEscapeString(code)), nil, ""
+		return escapeFullString(code), nil, ""
 	}

 	lexer = detectChromaLexerWithAnalyze(fileName, language, util.UnsafeStringToBytes(code)) // it is also slow
@ -104,86 +161,66 @@ func RenderCodeByLexer(lexer chroma.Lexer, code string) template.HTML {
 		html.PreventSurroundingPre(true),
 	)

-	htmlbuf := bytes.Buffer{}
-	htmlw := bufio.NewWriter(&htmlbuf)
-
 	iterator, err := lexer.Tokenise(nil, code)
 	if err != nil {
 		log.Error("Can't tokenize code: %v", err)
-		return template.HTML(template.HTMLEscapeString(code))
-	}
-	// style not used for live site but need to pass something
-	err = formatter.Format(htmlw, globalVars().githubStyles, iterator)
-	if err != nil {
-		log.Error("Can't format code: %v", err)
-		return template.HTML(template.HTMLEscapeString(code))
+		return escapeFullString(code)
 	}

-	_ = htmlw.Flush()
-	// Chroma will add newlines for certain lexers in order to highlight them properly
-	// Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output
-	return template.HTML(strings.TrimSuffix(htmlbuf.String(), "\n"))
+	htmlBuf := &bytes.Buffer{}
+	// style not used for live site but need to pass something
+	err = formatter.Format(htmlBuf, globalVars().githubStyles, iterator)
+	if err != nil {
+		log.Error("Can't format code: %v", err)
+		return escapeFullString(code)
+	}
+
+	// At the moment, we do not escape control chars here (unlike RenderFullFile which escapes control chars).
+	// The reason is: it is a very rare case that a text file contains control chars.
+	// This function is usually used by highlight diff and blame, not quite sure whether there will be side effects.
+	// If there would be new user feedback about this, we can re-consider about various edge cases.
+	return template.HTML(htmlBuf.String())
 }

 // RenderFullFile returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name
-func RenderFullFile(fileName, language string, code []byte) ([]template.HTML, string, error) {
-	if len(code) > sizeLimit {
-		return RenderPlainText(code), "", nil
+func RenderFullFile(fileName, language string, code []byte) ([]template.HTML, string) {
+	if language == LanguagePlaintext || len(code) > sizeLimit {
+		return renderPlainText(code), formatLexerName(LanguagePlaintext)
 	}
-
-	formatter := html.New(html.WithClasses(true),
-		html.WithLineNumbers(false),
-		html.PreventSurroundingPre(true),
-	)
-
 	lexer := detectChromaLexerWithAnalyze(fileName, language, code)
 	lexerName := formatLexerName(lexer.Config().Name)
-
-	iterator, err := lexer.Tokenise(nil, string(code))
-	if err != nil {
-		return nil, "", fmt.Errorf("can't tokenize code: %w", err)
+	rendered := RenderCodeByLexer(lexer, util.UnsafeBytesToString(code))
+	unsafeLines := UnsafeSplitHighlightedLines(rendered)
+	lines := make([]template.HTML, 0, len(unsafeLines))
+	for _, lineBytes := range unsafeLines {
+		line := escapeControlChars(lineBytes)
+		lines = append(lines, line)
 	}
-
-	tokensLines := chroma.SplitTokensIntoLines(iterator.Tokens())
-	htmlBuf := &bytes.Buffer{}
-
-	lines := make([]template.HTML, 0, len(tokensLines))
-	for _, tokens := range tokensLines {
-		iterator = chroma.Literator(tokens...)
-		err = formatter.Format(htmlBuf, globalVars().githubStyles, iterator)
-		if err != nil {
-			return nil, "", fmt.Errorf("can't format code: %w", err)
-		}
-		lines = append(lines, template.HTML(htmlBuf.String()))
-		htmlBuf.Reset()
-	}
-
-	return lines, lexerName, nil
+	return lines, lexerName
 }

-// RenderPlainText returns non-highlighted HTML for code
-func RenderPlainText(code []byte) []template.HTML {
-	r := bufio.NewReader(bytes.NewReader(code))
-	m := make([]template.HTML, 0, bytes.Count(code, []byte{'\n'})+1)
-	for {
-		content, err := r.ReadString('\n')
-		if err != nil && err != io.EOF {
-			log.Error("failed to read string from buffer: %v", err)
-			break
+// renderPlainText returns non-highlighted HTML for code
+func renderPlainText(code []byte) []template.HTML {
+	lines := make([]template.HTML, 0, bytes.Count(code, []byte{'\n'})+1)
+	pos := 0
+	for pos < len(code) {
+		var content []byte
+		nextPos := bytes.IndexByte(code[pos:], '\n')
+		if nextPos == -1 {
+			content = code[pos:]
+			pos = len(code)
+		} else {
+			content = code[pos : pos+nextPos+1]
+			pos += nextPos + 1
 		}
-		if content == "" && err == io.EOF {
-			break
-		}
-		s := template.HTML(gohtml.EscapeString(content))
-		m = append(m, s)
+		lines = append(lines, escapeFullString(util.UnsafeBytesToString(content)))
 	}
-	return m
+	return lines
 }

 func formatLexerName(name string) string {
-	if name == "fallback" {
+	if name == LanguagePlaintext || name == chromaLexerFallback {
 		return "Plaintext"
 	}
-
 	return util.ToTitleCaseNoLower(name)
 }
--- a/modules/highlight/highlight_test.go
+++ b/modules/highlight/highlight_test.go
@ -118,8 +118,7 @@ c=2

 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			out, lexerName, err := RenderFullFile(tt.name, "", []byte(tt.code))
-			assert.NoError(t, err)
+			out, lexerName := RenderFullFile(tt.name, "", []byte(tt.code))
 			assert.Equal(t, tt.want, out)
 			assert.Equal(t, tt.lexerName, lexerName)
 		})
@ -182,7 +181,7 @@ c=2`),

 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			out := RenderPlainText([]byte(tt.code))
+			out := renderPlainText([]byte(tt.code))
 			assert.Equal(t, tt.want, out)
 		})
 	}
@ -205,3 +204,14 @@ func TestUnsafeSplitHighlightedLines(t *testing.T) {
 	assert.Equal(t, "<span>a</span>\n", string(ret[0]))
 	assert.Equal(t, "<span>b\n</span>", string(ret[1]))
 }
+
+func TestEscape(t *testing.T) {
+	assert.Equal(t, template.HTML("\t\r\n<span class=\"broken-code-point\" data-escaped=\"NUL\"><span class=\"char\">\x00</span></span><span class=\"broken-code-point\" data-escaped=\"US\"><span class=\"char\">\x1f</span></span>&'\"<>"), escapeControlChars([]byte("\t\r\n\x00\x1f&'\"<>")))
+	assert.Equal(t, template.HTML("<span class=\"broken-code-point\" data-escaped=\"NUL\"><span class=\"char\">\x00</span></span><span class=\"broken-code-point\" data-escaped=\"US\"><span class=\"char\">\x1f</span></span>&amp;&#39;&#34;&lt;&gt;\t\r\n"), escapeFullString("\x00\x1f&'\"<>\t\r\n"))
+
+	out, _ := RenderFullFile("a.py", "", []byte("# \x7f<>"))
+	assert.Equal(t, template.HTML(`<span class="c1"># <span class="broken-code-point" data-escaped="DEL"><span class="char">`+string(byte(0x7f))+`</span></span>&lt;&gt;</span>`), out[0])
+
+	out = renderPlainText([]byte("# \x7f<>"))
+	assert.Equal(t, template.HTML(`# <span class="broken-code-point" data-escaped="DEL"><span class="char">`+string(byte(0x7f))+`</span></span>&lt;&gt;`), out[0])
+}
--- a/modules/highlight/lexerdetect.go
+++ b/modules/highlight/lexerdetect.go
@ -16,7 +16,11 @@ import (
 	"github.com/go-enry/go-enry/v2"
 )

-const mapKeyLowerPrefix = "lower/"
+const (
+	mapKeyLowerPrefix   = "lower/"
+	LanguagePlaintext   = "plaintext"
+	chromaLexerFallback = "fallback"
+)

 // chromaLexers is fully managed by us to do fast lookup for chroma lexers by file name or language name
 // Don't use lexers.Get because it is very slow in many cases (iterate all rules, filepath glob match, etc.)
--- a/routers/web/repo/view_file.go
+++ b/routers/web/repo/view_file.go
@ -119,12 +119,8 @@ func handleFileViewRenderSource(ctx *context.Context, attrs *attribute.Attribute
 	}

 	language := attrs.GetLanguage().Value()
-	fileContent, lexerName, err := highlight.RenderFullFile(filename, language, buf)
+	fileContent, lexerName := highlight.RenderFullFile(filename, language, buf)
 	ctx.Data["LexerName"] = lexerName
-	if err != nil {
-		log.Error("highlight.RenderFullFile failed, fallback to plain text: %v", err)
-		fileContent = highlight.RenderPlainText(buf)
-	}
 	status := &charset.EscapeStatus{}
 	statuses := make([]*charset.EscapeStatus, len(fileContent))
 	for i, line := range fileContent {
--- a/services/gitdiff/gitdiff_test.go
+++ b/services/gitdiff/gitdiff_test.go
@ -1140,7 +1140,7 @@ func TestHighlightCodeLines(t *testing.T) {
 		ret := highlightCodeLinesForDiffFile(diffFile, true, []byte("a\nb\n"))
 		assert.Equal(t, map[int]template.HTML{
 			0: `<span class="n">a</span>` + nl,
-			1: `<span class="n">b</span>`,
+			1: `<span class="n">b</span>` + nl,
 		}, ret)
 	})
 }
--- a/web_src/css/index.css
+++ b/web_src/css/index.css
@ -33,6 +33,7 @@
@import "./modules/flexcontainer.css";
@import "./modules/codeeditor.css";
@import "./modules/chroma.css";
+@import "./modules/charescape.css";

@import "./shared/flex-list.css";
@import "./shared/milestone.css";
--- a/web_src/css/modules/charescape.css
+++ b/web_src/css/modules/charescape.css
@ -0,0 +1,48 @@
+/*
+Show the escaped and hide the real char:
+  <span class="broken-code-point" data-escaped="DEL"><span class="char">{real-char}</span></span>
+Only show the real-char:
+  <span class="broken-code-point">{real-char}</span>
+*/
+.broken-code-point:not([data-escaped]),
+.broken-code-point[data-escaped]::before {
+  border-radius: 4px;
+  padding: 0 2px;
+  color: var(--color-body);
+  background: var(--color-text-light-1);
+}
+
+.broken-code-point[data-escaped]::before {
+  visibility: visible;
+  content: attr(data-escaped);
+}
+.broken-code-point[data-escaped] .char {
+  /* make it copyable by selecting the text (AI suggestion, no other solution) */
+  position: absolute;
+  opacity: 0;
+  pointer-events: none;
+}
+
+/*
+Show the escaped and hide the real-char:
+  <span class="unicode-escaped">
+    <span class="escaped-code-point" data-escaped="U+1F600"><span class="char">{real-char}</span></span>
+  </span>
+Hide the escaped and show the real-char:
+  <span>
+    <span class="escaped-code-point" data-escaped="U+1F600"><span class="char">{real-char}</span></span>
+  </span>
+*/
+.unicode-escaped .escaped-code-point[data-escaped]::before {
+  visibility: visible;
+  content: attr(data-escaped);
+  color: var(--color-red);
+}
+
+.unicode-escaped .escaped-code-point .char {
+  display: none;
+}
+
+.unicode-escaped .ambiguous-code-point {
+  border: 1px var(--color-yellow) solid;
+}
--- a/web_src/css/repo.css
+++ b/web_src/css/repo.css
@ -8,26 +8,6 @@
  min-width: 40% !important;
 }

-.repository .unicode-escaped .escaped-code-point[data-escaped]::before {
-  visibility: visible;
-  content: attr(data-escaped);
-  font-family: var(--fonts-monospace);
-  color: var(--color-red);
-}
-
-.repository .unicode-escaped .escaped-code-point .char {
-  display: none;
-}
-
-.repository .broken-code-point {
-  font-family: var(--fonts-monospace);
-  color: var(--color-blue);
-}
-
-.repository .unicode-escaped .ambiguous-code-point {
-  border: 1px var(--color-yellow) solid;
-}
-
 .issue-content {
  display: flex;
  align-items: flex-start;