0
0
mirror of https://github.com/go-gitea/gitea.git synced 2026-04-03 12:41:10 +02:00

Merge branch 'main' into improve-workflow-run

This commit is contained in:
Giteabot 2026-04-03 12:10:13 +08:00 committed by GitHub
commit 429fd7088e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 173 additions and 97 deletions

View File

@ -5,13 +5,9 @@
package highlight
import (
"bufio"
"bytes"
"fmt"
gohtml "html"
"html/template"
"io"
"strings"
"slices"
"sync"
"code.gitea.io/gitea/modules/log"
@ -23,12 +19,14 @@ import (
"github.com/alecthomas/chroma/v2/styles"
)
// don't index files larger than this many bytes for performance purposes
// don't highlight files larger than this many bytes for performance purposes
const sizeLimit = 1024 * 1024
type globalVarsType struct {
highlightMapping map[string]string
githubStyles *chroma.Style
escapeFull []template.HTML
escCtrlCharsMap []template.HTML
}
var (
@ -44,10 +42,69 @@ func globalVars() *globalVarsType {
globalVarsPtr = &globalVarsType{}
globalVarsPtr.githubStyles = styles.Get("github")
globalVarsPtr.highlightMapping = setting.GetHighlightMapping()
globalVarsPtr.escCtrlCharsMap = make([]template.HTML, 256)
// ASCII Table 0x00 - 0x1F
controlCharNames := []string{
"NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
"BS", "HT", "LF", "VT", "FF", "CR", "SO", "SI",
"DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
"CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US",
}
// Uncomment this line if you'd debug the layout without creating a special file, then Space (0x20) will also be escaped.
// Don't worry, even if you forget to comment it out and push it to git repo, the CI tests will catch it and fail.
// controlCharNames = append(controlCharNames, "SP")
for i, s := range controlCharNames {
globalVarsPtr.escCtrlCharsMap[i] = template.HTML(`<span class="broken-code-point" data-escaped="` + s + `"><span class="char">` + string(byte(i)) + `</span></span>`)
}
globalVarsPtr.escCtrlCharsMap[0x7f] = template.HTML(`<span class="broken-code-point" data-escaped="DEL"><span class="char">` + string(byte(0x7f)) + `</span></span>`)
globalVarsPtr.escCtrlCharsMap['\t'] = ""
globalVarsPtr.escCtrlCharsMap['\n'] = ""
globalVarsPtr.escCtrlCharsMap['\r'] = ""
globalVarsPtr.escapeFull = slices.Clone(globalVarsPtr.escCtrlCharsMap)
// exactly the same as Golang's html.EscapeString
globalVarsPtr.escapeFull['&'] = "&amp;"
globalVarsPtr.escapeFull['\''] = "&#39;"
globalVarsPtr.escapeFull['<'] = "&lt;"
globalVarsPtr.escapeFull['>'] = "&gt;"
globalVarsPtr.escapeFull['"'] = "&#34;"
}
return globalVarsPtr
}
func escapeByMap(code []byte, escapeMap []template.HTML) template.HTML {
firstEscapePos := -1
for i, c := range code {
if escapeMap[c] != "" {
firstEscapePos = i
break
}
}
if firstEscapePos == -1 {
return template.HTML(util.UnsafeBytesToString(code))
}
buf := make([]byte, firstEscapePos, len(code)*2)
copy(buf[:firstEscapePos], code[:firstEscapePos])
for i := firstEscapePos; i < len(code); i++ {
c := code[i]
if esc := escapeMap[c]; esc != "" {
buf = append(buf, esc...)
} else {
buf = append(buf, c)
}
}
return template.HTML(util.UnsafeBytesToString(buf))
}
func escapeFullString(code string) template.HTML {
return escapeByMap(util.UnsafeStringToBytes(code), globalVars().escapeFull)
}
func escapeControlChars(code []byte) template.HTML {
return escapeByMap(code, globalVars().escCtrlCharsMap)
}
// UnsafeSplitHighlightedLines splits highlighted code into lines preserving HTML tags
// It always includes '\n', '\n' can appear at the end of each line or in the middle of HTML tags
// The '\n' is necessary for copying code from web UI to preserve original code lines
@ -90,7 +147,7 @@ func RenderCodeSlowGuess(fileName, language, code string) (output template.HTML,
}
if len(code) > sizeLimit {
return template.HTML(template.HTMLEscapeString(code)), nil, ""
return escapeFullString(code), nil, ""
}
lexer = detectChromaLexerWithAnalyze(fileName, language, util.UnsafeStringToBytes(code)) // it is also slow
@ -104,86 +161,66 @@ func RenderCodeByLexer(lexer chroma.Lexer, code string) template.HTML {
html.PreventSurroundingPre(true),
)
htmlbuf := bytes.Buffer{}
htmlw := bufio.NewWriter(&htmlbuf)
iterator, err := lexer.Tokenise(nil, code)
if err != nil {
log.Error("Can't tokenize code: %v", err)
return template.HTML(template.HTMLEscapeString(code))
}
// style not used for live site but need to pass something
err = formatter.Format(htmlw, globalVars().githubStyles, iterator)
if err != nil {
log.Error("Can't format code: %v", err)
return template.HTML(template.HTMLEscapeString(code))
return escapeFullString(code)
}
_ = htmlw.Flush()
// Chroma will add newlines for certain lexers in order to highlight them properly
// Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output
return template.HTML(strings.TrimSuffix(htmlbuf.String(), "\n"))
htmlBuf := &bytes.Buffer{}
// style not used for live site but need to pass something
err = formatter.Format(htmlBuf, globalVars().githubStyles, iterator)
if err != nil {
log.Error("Can't format code: %v", err)
return escapeFullString(code)
}
// At the moment, we do not escape control chars here (unlike RenderFullFile which escapes control chars).
// The reason is: it is a very rare case that a text file contains control chars.
// This function is usually used by highlight diff and blame, not quite sure whether there will be side effects.
// If there would be new user feedback about this, we can re-consider about various edge cases.
return template.HTML(htmlBuf.String())
}
// RenderFullFile returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name
func RenderFullFile(fileName, language string, code []byte) ([]template.HTML, string, error) {
if len(code) > sizeLimit {
return RenderPlainText(code), "", nil
func RenderFullFile(fileName, language string, code []byte) ([]template.HTML, string) {
if language == LanguagePlaintext || len(code) > sizeLimit {
return renderPlainText(code), formatLexerName(LanguagePlaintext)
}
formatter := html.New(html.WithClasses(true),
html.WithLineNumbers(false),
html.PreventSurroundingPre(true),
)
lexer := detectChromaLexerWithAnalyze(fileName, language, code)
lexerName := formatLexerName(lexer.Config().Name)
iterator, err := lexer.Tokenise(nil, string(code))
if err != nil {
return nil, "", fmt.Errorf("can't tokenize code: %w", err)
rendered := RenderCodeByLexer(lexer, util.UnsafeBytesToString(code))
unsafeLines := UnsafeSplitHighlightedLines(rendered)
lines := make([]template.HTML, 0, len(unsafeLines))
for _, lineBytes := range unsafeLines {
line := escapeControlChars(lineBytes)
lines = append(lines, line)
}
tokensLines := chroma.SplitTokensIntoLines(iterator.Tokens())
htmlBuf := &bytes.Buffer{}
lines := make([]template.HTML, 0, len(tokensLines))
for _, tokens := range tokensLines {
iterator = chroma.Literator(tokens...)
err = formatter.Format(htmlBuf, globalVars().githubStyles, iterator)
if err != nil {
return nil, "", fmt.Errorf("can't format code: %w", err)
}
lines = append(lines, template.HTML(htmlBuf.String()))
htmlBuf.Reset()
}
return lines, lexerName, nil
return lines, lexerName
}
// RenderPlainText returns non-highlighted HTML for code
func RenderPlainText(code []byte) []template.HTML {
r := bufio.NewReader(bytes.NewReader(code))
m := make([]template.HTML, 0, bytes.Count(code, []byte{'\n'})+1)
for {
content, err := r.ReadString('\n')
if err != nil && err != io.EOF {
log.Error("failed to read string from buffer: %v", err)
break
// renderPlainText returns non-highlighted HTML for code
func renderPlainText(code []byte) []template.HTML {
lines := make([]template.HTML, 0, bytes.Count(code, []byte{'\n'})+1)
pos := 0
for pos < len(code) {
var content []byte
nextPos := bytes.IndexByte(code[pos:], '\n')
if nextPos == -1 {
content = code[pos:]
pos = len(code)
} else {
content = code[pos : pos+nextPos+1]
pos += nextPos + 1
}
if content == "" && err == io.EOF {
break
}
s := template.HTML(gohtml.EscapeString(content))
m = append(m, s)
lines = append(lines, escapeFullString(util.UnsafeBytesToString(content)))
}
return m
return lines
}
func formatLexerName(name string) string {
if name == "fallback" {
if name == LanguagePlaintext || name == chromaLexerFallback {
return "Plaintext"
}
return util.ToTitleCaseNoLower(name)
}

View File

@ -118,8 +118,7 @@ c=2
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
out, lexerName, err := RenderFullFile(tt.name, "", []byte(tt.code))
assert.NoError(t, err)
out, lexerName := RenderFullFile(tt.name, "", []byte(tt.code))
assert.Equal(t, tt.want, out)
assert.Equal(t, tt.lexerName, lexerName)
})
@ -182,7 +181,7 @@ c=2`),
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
out := RenderPlainText([]byte(tt.code))
out := renderPlainText([]byte(tt.code))
assert.Equal(t, tt.want, out)
})
}
@ -205,3 +204,14 @@ func TestUnsafeSplitHighlightedLines(t *testing.T) {
assert.Equal(t, "<span>a</span>\n", string(ret[0]))
assert.Equal(t, "<span>b\n</span>", string(ret[1]))
}
func TestEscape(t *testing.T) {
assert.Equal(t, template.HTML("\t\r\n<span class=\"broken-code-point\" data-escaped=\"NUL\"><span class=\"char\">\x00</span></span><span class=\"broken-code-point\" data-escaped=\"US\"><span class=\"char\">\x1f</span></span>&'\"<>"), escapeControlChars([]byte("\t\r\n\x00\x1f&'\"<>")))
assert.Equal(t, template.HTML("<span class=\"broken-code-point\" data-escaped=\"NUL\"><span class=\"char\">\x00</span></span><span class=\"broken-code-point\" data-escaped=\"US\"><span class=\"char\">\x1f</span></span>&amp;&#39;&#34;&lt;&gt;\t\r\n"), escapeFullString("\x00\x1f&'\"<>\t\r\n"))
out, _ := RenderFullFile("a.py", "", []byte("# \x7f<>"))
assert.Equal(t, template.HTML(`<span class="c1"># <span class="broken-code-point" data-escaped="DEL"><span class="char">`+string(byte(0x7f))+`</span></span>&lt;&gt;</span>`), out[0])
out = renderPlainText([]byte("# \x7f<>"))
assert.Equal(t, template.HTML(`# <span class="broken-code-point" data-escaped="DEL"><span class="char">`+string(byte(0x7f))+`</span></span>&lt;&gt;`), out[0])
}

View File

@ -16,7 +16,11 @@ import (
"github.com/go-enry/go-enry/v2"
)
const mapKeyLowerPrefix = "lower/"
const (
mapKeyLowerPrefix = "lower/"
LanguagePlaintext = "plaintext"
chromaLexerFallback = "fallback"
)
// chromaLexers is fully managed by us to do fast lookup for chroma lexers by file name or language name
// Don't use lexers.Get because it is very slow in many cases (iterate all rules, filepath glob match, etc.)

View File

@ -119,12 +119,8 @@ func handleFileViewRenderSource(ctx *context.Context, attrs *attribute.Attribute
}
language := attrs.GetLanguage().Value()
fileContent, lexerName, err := highlight.RenderFullFile(filename, language, buf)
fileContent, lexerName := highlight.RenderFullFile(filename, language, buf)
ctx.Data["LexerName"] = lexerName
if err != nil {
log.Error("highlight.RenderFullFile failed, fallback to plain text: %v", err)
fileContent = highlight.RenderPlainText(buf)
}
status := &charset.EscapeStatus{}
statuses := make([]*charset.EscapeStatus, len(fileContent))
for i, line := range fileContent {

View File

@ -1140,7 +1140,7 @@ func TestHighlightCodeLines(t *testing.T) {
ret := highlightCodeLinesForDiffFile(diffFile, true, []byte("a\nb\n"))
assert.Equal(t, map[int]template.HTML{
0: `<span class="n">a</span>` + nl,
1: `<span class="n">b</span>`,
1: `<span class="n">b</span>` + nl,
}, ret)
})
}

View File

@ -33,6 +33,7 @@
@import "./modules/flexcontainer.css";
@import "./modules/codeeditor.css";
@import "./modules/chroma.css";
@import "./modules/charescape.css";
@import "./shared/flex-list.css";
@import "./shared/milestone.css";

View File

@ -0,0 +1,48 @@
/*
Show the escaped and hide the real char:
<span class="broken-code-point" data-escaped="DEL"><span class="char">{real-char}</span></span>
Only show the real-char:
<span class="broken-code-point">{real-char}</span>
*/
.broken-code-point:not([data-escaped]),
.broken-code-point[data-escaped]::before {
border-radius: 4px;
padding: 0 2px;
color: var(--color-body);
background: var(--color-text-light-1);
}
.broken-code-point[data-escaped]::before {
visibility: visible;
content: attr(data-escaped);
}
.broken-code-point[data-escaped] .char {
/* make it copyable by selecting the text (AI suggestion, no other solution) */
position: absolute;
opacity: 0;
pointer-events: none;
}
/*
Show the escaped and hide the real-char:
<span class="unicode-escaped">
<span class="escaped-code-point" data-escaped="U+1F600"><span class="char">{real-char}</span></span>
</span>
Hide the escaped and show the real-char:
<span>
<span class="escaped-code-point" data-escaped="U+1F600"><span class="char">{real-char}</span></span>
</span>
*/
.unicode-escaped .escaped-code-point[data-escaped]::before {
visibility: visible;
content: attr(data-escaped);
color: var(--color-red);
}
.unicode-escaped .escaped-code-point .char {
display: none;
}
.unicode-escaped .ambiguous-code-point {
border: 1px var(--color-yellow) solid;
}

View File

@ -8,26 +8,6 @@
min-width: 40% !important;
}
.repository .unicode-escaped .escaped-code-point[data-escaped]::before {
visibility: visible;
content: attr(data-escaped);
font-family: var(--fonts-monospace);
color: var(--color-red);
}
.repository .unicode-escaped .escaped-code-point .char {
display: none;
}
.repository .broken-code-point {
font-family: var(--fonts-monospace);
color: var(--color-blue);
}
.repository .unicode-escaped .ambiguous-code-point {
border: 1px var(--color-yellow) solid;
}
.issue-content {
display: flex;
align-items: flex-start;