From 1e96a85f1b67f967d727126a9534469e26b6ec2a Mon Sep 17 00:00:00 2001 From: silverwind Date: Thu, 2 Apr 2026 21:27:42 +0200 Subject: [PATCH 1/6] Use Unicode Control Pictures for control character display Render ASCII control characters (0x00-0x1F, 0x7F) as Unicode Control Pictures (U+2400-U+2421) instead of text abbreviations like [DEL] or [U+001E]. This applies to both the file view and diff view paths. Also style control char badges in red without background, matching the style of other escaped code points. Co-Authored-By: Claude (Opus 4.6) --- modules/charset/escape_stream.go | 10 +++++++++- modules/charset/escape_test.go | 2 +- modules/highlight/highlight.go | 18 +++++------------- modules/highlight/highlight_test.go | 8 ++++---- web_src/css/modules/charescape.css | 10 +++++----- 5 files changed, 24 insertions(+), 24 deletions(-) diff --git a/modules/charset/escape_stream.go b/modules/charset/escape_stream.go index 22e7f14f39..98a25e9e55 100644 --- a/modules/charset/escape_stream.go +++ b/modules/charset/escape_stream.go @@ -199,12 +199,20 @@ func (e *escapeStreamer) invisibleRune(r rune) error { e.escaped.Escaped = true e.escaped.HasInvisible = true + // Use Unicode Control Pictures for ASCII control chars + escaped := fmt.Sprintf("[U+%04X]", r) + if r >= 0 && r <= 0x1f { + escaped = string(0x2400 + r) + } else if r == 0x7f { + escaped = string(rune(0x2421)) + } + if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{ Key: "class", Val: "escaped-code-point", }, html.Attribute{ Key: "data-escaped", - Val: fmt.Sprintf("[U+%04X]", r), + Val: escaped, }); err != nil { return err } diff --git a/modules/charset/escape_test.go b/modules/charset/escape_test.go index 9d796a0c18..d79a4df6c6 100644 --- a/modules/charset/escape_test.go +++ b/modules/charset/escape_test.go @@ -151,7 +151,7 @@ func TestEscapeControlReader(t *testing.T) { for _, test := range escapeControlTests { test.name += " (+Control)" test.text = addPrefix("\u001E", test.text) - test.result = addPrefix(``+"\u001e"+``, test.result) + test.result = addPrefix(``+"\u001e"+``, test.result) test.status.Escaped = true test.status.HasInvisible = true tests = append(tests, test) diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go index addc372f85..35d041527a 100644 --- a/modules/highlight/highlight.go +++ b/modules/highlight/highlight.go @@ -43,20 +43,12 @@ func globalVars() *globalVarsType { globalVarsPtr.githubStyles = styles.Get("github") globalVarsPtr.highlightMapping = setting.GetHighlightMapping() globalVarsPtr.escCtrlCharsMap = make([]template.HTML, 256) - // ASCII Table 0x00 - 0x1F - controlCharNames := []string{ - "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", - "BS", "HT", "LF", "VT", "FF", "CR", "SO", "SI", - "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", - "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US", + // ASCII control characters 0x00-0x1F map to Unicode Control Pictures U+2400-U+241F + for i := range 0x20 { + globalVarsPtr.escCtrlCharsMap[i] = template.HTML(`` + string(byte(i)) + ``) } - // Uncomment this line if you'd debug the layout without creating a special file, then Space (0x20) will also be escaped. - // Don't worry, even if you forget to comment it out and push it to git repo, the CI tests will catch it and fail. - // controlCharNames = append(controlCharNames, "SP") - for i, s := range controlCharNames { - globalVarsPtr.escCtrlCharsMap[i] = template.HTML(`` + string(byte(i)) + ``) - } - globalVarsPtr.escCtrlCharsMap[0x7f] = template.HTML(`` + string(byte(0x7f)) + ``) + // DEL (0x7F) maps to U+2421 + globalVarsPtr.escCtrlCharsMap[0x7f] = template.HTML(`` + string(byte(0x7f)) + ``) globalVarsPtr.escCtrlCharsMap['\t'] = "" globalVarsPtr.escCtrlCharsMap['\n'] = "" globalVarsPtr.escCtrlCharsMap['\r'] = "" diff --git a/modules/highlight/highlight_test.go b/modules/highlight/highlight_test.go index cad22ba9bb..2bd298e780 100644 --- a/modules/highlight/highlight_test.go +++ b/modules/highlight/highlight_test.go @@ -206,12 +206,12 @@ func TestUnsafeSplitHighlightedLines(t *testing.T) { } func TestEscape(t *testing.T) { - assert.Equal(t, template.HTML("\t\r\n\x00\x1f&'\"<>"), escapeControlChars([]byte("\t\r\n\x00\x1f&'\"<>"))) - assert.Equal(t, template.HTML("\x00\x1f&'"<>\t\r\n"), escapeFullString("\x00\x1f&'\"<>\t\r\n")) + assert.Equal(t, template.HTML("\t\r\n\x00\x1f&'\"<>"), escapeControlChars([]byte("\t\r\n\x00\x1f&'\"<>"))) + assert.Equal(t, template.HTML("\x00\x1f&'"<>\t\r\n"), escapeFullString("\x00\x1f&'\"<>\t\r\n")) out, _ := RenderFullFile("a.py", "", []byte("# \x7f<>")) - assert.Equal(t, template.HTML(`# `+string(byte(0x7f))+`<>`), out[0]) + assert.Equal(t, template.HTML(`# `+string(byte(0x7f))+`<>`), out[0]) out = renderPlainText([]byte("# \x7f<>")) - assert.Equal(t, template.HTML(`# `+string(byte(0x7f))+`<>`), out[0]) + assert.Equal(t, template.HTML(`# `+string(byte(0x7f))+`<>`), out[0]) } diff --git a/web_src/css/modules/charescape.css b/web_src/css/modules/charescape.css index 0c9cbb55b5..0bbd0dd573 100644 --- a/web_src/css/modules/charescape.css +++ b/web_src/css/modules/charescape.css @@ -1,11 +1,10 @@ /* Show the escaped and hide the real char: - {real-char} + {real-char} Only show the real-char: {real-char} */ -.broken-code-point:not([data-escaped]), -.broken-code-point[data-escaped]::before { +.broken-code-point:not([data-escaped]) { border-radius: 4px; padding: 0 2px; color: var(--color-body); @@ -15,6 +14,7 @@ Only show the real-char: .broken-code-point[data-escaped]::before { visibility: visible; content: attr(data-escaped); + color: var(--color-red); } .broken-code-point[data-escaped] .char { /* make it copyable by selecting the text (AI suggestion, no other solution) */ @@ -26,11 +26,11 @@ Only show the real-char: /* Show the escaped and hide the real-char: - {real-char} + {real-char} Hide the escaped and show the real-char: - {real-char} + {real-char} */ .unicode-escaped .escaped-code-point[data-escaped]::before { From 846cedbfc1a68d704f1a2cdea07cb493920c9390 Mon Sep 17 00:00:00 2001 From: silverwind Date: Thu, 2 Apr 2026 21:39:38 +0200 Subject: [PATCH 2/6] Align escape warning button with code lines Add .lines-escape to shared .lines-num/.lines-code rule so it gets the same font-size, line-height, and vertical-align. Add horizontal padding to the warning emoji pseudo-element. Co-Authored-By: Claude (Opus 4.6) --- web_src/css/base.css | 1 + web_src/css/review.css | 1 + 2 files changed, 2 insertions(+) diff --git a/web_src/css/base.css b/web_src/css/base.css index bb16b9fe21..b004c5724f 100644 --- a/web_src/css/base.css +++ b/web_src/css/base.css @@ -686,6 +686,7 @@ overflow-menu .ui.label { } .lines-num, +.lines-escape, .lines-code { font-size: 12px; font-family: var(--fonts-monospace); diff --git a/web_src/css/review.css b/web_src/css/review.css index 9e320346d8..16103f8d8f 100644 --- a/web_src/css/review.css +++ b/web_src/css/review.css @@ -19,6 +19,7 @@ visibility: visible; content: "⚠️"; font-family: var(--fonts-emoji); + padding: 0 2px; color: var(--color-red); } From faaefd0fa8454b2c7f2a2caf5dcdef48592055cc Mon Sep 17 00:00:00 2001 From: silverwind Date: Thu, 2 Apr 2026 21:53:14 +0200 Subject: [PATCH 3/6] Replace escape warning emoji with octicon-alert-fill SVG mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use a CSS mask with octicon-alert-fill instead of the ⚠️ emoji for the toggle-escape-button. This gives consistent rendering across platforms and better vertical alignment with code lines. Co-Authored-By: Claude (Opus 4.6) --- web_src/css/base.css | 1 + web_src/css/review.css | 19 +++++++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/web_src/css/base.css b/web_src/css/base.css index b004c5724f..a8d9dea2a2 100644 --- a/web_src/css/base.css +++ b/web_src/css/base.css @@ -17,6 +17,7 @@ /* images */ --checkbox-mask-checked: url('data:image/svg+xml;utf8,'); --checkbox-mask-indeterminate: url('data:image/svg+xml;utf8,'); + --octicon-alert-fill: url('data:image/svg+xml;utf8,'); --octicon-chevron-right: url('data:image/svg+xml;utf8,'); --octicon-x: url('data:image/svg+xml;utf8,'); --select-arrows: url('data:image/svg+xml;utf8,'); diff --git a/web_src/css/review.css b/web_src/css/review.css index 16103f8d8f..8fb5e1e29a 100644 --- a/web_src/css/review.css +++ b/web_src/css/review.css @@ -15,12 +15,23 @@ transform: scale(1.1); } +.lines-escape .toggle-escape-button { + padding: 2px; + transform: translateY(-1px); +} + .lines-escape .toggle-escape-button::before { visibility: visible; - content: "⚠️"; - font-family: var(--fonts-emoji); - padding: 0 2px; - color: var(--color-red); + content: ""; + display: inline-block; + width: 14px; + height: 14px; + vertical-align: middle; + background-color: var(--color-yellow); + mask-image: var(--octicon-alert-fill); + -webkit-mask-image: var(--octicon-alert-fill); + mask-size: contain; + -webkit-mask-size: contain; } .repository .diff-file-box .code-diff td.lines-escape { From ec904822065916f47bff0769402a05369c8731fd Mon Sep 17 00:00:00 2001 From: silverwind Date: Thu, 2 Apr 2026 21:55:38 +0200 Subject: [PATCH 4/6] Fix broken-code-point positioning and comment Add position: relative to .broken-code-point[data-escaped] to establish a containing block for the absolutely positioned .char child. Anchor the child with left: 0. Replace AI-referencing comment with description of actual purpose. Co-Authored-By: Claude (Opus 4.6) --- web_src/css/modules/charescape.css | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/web_src/css/modules/charescape.css b/web_src/css/modules/charescape.css index 0bbd0dd573..b72d7bdf45 100644 --- a/web_src/css/modules/charescape.css +++ b/web_src/css/modules/charescape.css @@ -11,14 +11,20 @@ Only show the real-char: background: var(--color-text-light-1); } +.broken-code-point[data-escaped] { + position: relative; +} + .broken-code-point[data-escaped]::before { visibility: visible; content: attr(data-escaped); color: var(--color-red); } + .broken-code-point[data-escaped] .char { - /* make it copyable by selecting the text (AI suggestion, no other solution) */ + /* keep the original character selectable/copyable while showing the escaped label via ::before */ position: absolute; + left: 0; opacity: 0; pointer-events: none; } From f80593bb23fad75c9642bcd0096c4881cf75ad5d Mon Sep 17 00:00:00 2001 From: silverwind Date: Fri, 3 Apr 2026 10:19:55 +0200 Subject: [PATCH 5/6] Extract shared ControlCharPicture helper and improve badge styling Extract charset.ControlCharPicture() shared between highlight and charset escape paths. Add controlCharHTML helper to deduplicate HTML template. Style control char badges with body color on gray background matching the original styling. Co-Authored-By: Claude (Opus 4.6) --- modules/charset/escape_stream.go | 24 ++++++++++++++++++------ modules/highlight/highlight.go | 13 +++++++++---- web_src/css/modules/charescape.css | 5 ++++- 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/modules/charset/escape_stream.go b/modules/charset/escape_stream.go index 98a25e9e55..5aa15a6fa9 100644 --- a/modules/charset/escape_stream.go +++ b/modules/charset/escape_stream.go @@ -18,6 +18,19 @@ import ( // VScode defaultWordRegexp var defaultWordRegexp = regexp.MustCompile(`(-?\d*\.\d\w*)|([^\` + "`" + `\~\!\@\#\$\%\^\&\*\(\)\-\=\+\[\{\]\}\\\|\;\:\'\"\,\.\<\>\/\?\s\x00-\x1f]+)`) +// ControlCharPicture returns the Unicode Control Picture for ASCII control +// characters (0x00-0x1F → U+2400-U+241F, 0x7F → U+2421). For other runes it +// returns 0, false. +func ControlCharPicture(r rune) (rune, bool) { + if r >= 0 && r <= 0x1f { + return 0x2400 + r, true + } + if r == 0x7f { + return 0x2421, true + } + return 0, false +} + func NewEscapeStreamer(locale translation.Locale, next HTMLStreamer, allowed ...rune) HTMLStreamer { allowedM := make(map[rune]bool, len(allowed)) for _, v := range allowed { @@ -199,12 +212,11 @@ func (e *escapeStreamer) invisibleRune(r rune) error { e.escaped.Escaped = true e.escaped.HasInvisible = true - // Use Unicode Control Pictures for ASCII control chars - escaped := fmt.Sprintf("[U+%04X]", r) - if r >= 0 && r <= 0x1f { - escaped = string(0x2400 + r) - } else if r == 0x7f { - escaped = string(rune(0x2421)) + var escaped string + if pic, ok := ControlCharPicture(r); ok { + escaped = string(pic) + } else { + escaped = fmt.Sprintf("[U+%04X]", r) } if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{ diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go index 35d041527a..ec56a28851 100644 --- a/modules/highlight/highlight.go +++ b/modules/highlight/highlight.go @@ -10,6 +10,7 @@ import ( "slices" "sync" + "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/util" @@ -43,12 +44,12 @@ func globalVars() *globalVarsType { globalVarsPtr.githubStyles = styles.Get("github") globalVarsPtr.highlightMapping = setting.GetHighlightMapping() globalVarsPtr.escCtrlCharsMap = make([]template.HTML, 256) - // ASCII control characters 0x00-0x1F map to Unicode Control Pictures U+2400-U+241F for i := range 0x20 { - globalVarsPtr.escCtrlCharsMap[i] = template.HTML(`` + string(byte(i)) + ``) + pic, _ := charset.ControlCharPicture(rune(i)) + globalVarsPtr.escCtrlCharsMap[i] = controlCharHTML(pic, byte(i)) } - // DEL (0x7F) maps to U+2421 - globalVarsPtr.escCtrlCharsMap[0x7f] = template.HTML(`` + string(byte(0x7f)) + ``) + pic, _ := charset.ControlCharPicture(0x7f) + globalVarsPtr.escCtrlCharsMap[0x7f] = controlCharHTML(pic, 0x7f) globalVarsPtr.escCtrlCharsMap['\t'] = "" globalVarsPtr.escCtrlCharsMap['\n'] = "" globalVarsPtr.escCtrlCharsMap['\r'] = "" @@ -64,6 +65,10 @@ func globalVars() *globalVarsType { return globalVarsPtr } +func controlCharHTML(pic rune, char byte) template.HTML { + return template.HTML(`` + string(char) + ``) +} + func escapeByMap(code []byte, escapeMap []template.HTML) template.HTML { firstEscapePos := -1 for i, c := range code { diff --git a/web_src/css/modules/charescape.css b/web_src/css/modules/charescape.css index b72d7bdf45..6f3dc99028 100644 --- a/web_src/css/modules/charescape.css +++ b/web_src/css/modules/charescape.css @@ -18,7 +18,10 @@ Only show the real-char: .broken-code-point[data-escaped]::before { visibility: visible; content: attr(data-escaped); - color: var(--color-red); + border-radius: 2px; + padding: 0 1px; + color: var(--color-body); + background: var(--color-text-light-1); } .broken-code-point[data-escaped] .char { From 5edaf14f385d7495fed289f7fb6e3b0cfe88f99e Mon Sep 17 00:00:00 2001 From: silverwind Date: Fri, 3 Apr 2026 10:25:16 +0200 Subject: [PATCH 6/6] Escape control chars in RenderCodeByLexer for diff/blame views Move escapeControlChars from per-line in RenderFullFile to RenderCodeByLexer so control characters are always visible in both file view and diff/blame views via broken-code-point class. Co-Authored-By: Claude (Opus 4.6) --- modules/highlight/highlight.go | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go index ec56a28851..2d98805a0a 100644 --- a/modules/highlight/highlight.go +++ b/modules/highlight/highlight.go @@ -172,11 +172,7 @@ func RenderCodeByLexer(lexer chroma.Lexer, code string) template.HTML { return escapeFullString(code) } - // At the moment, we do not escape control chars here (unlike RenderFullFile which escapes control chars). - // The reason is: it is a very rare case that a text file contains control chars. - // This function is usually used by highlight diff and blame, not quite sure whether there will be side effects. - // If there would be new user feedback about this, we can re-consider about various edge cases. - return template.HTML(htmlBuf.String()) + return escapeControlChars(htmlBuf.Bytes()) } // RenderFullFile returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name @@ -188,10 +184,9 @@ func RenderFullFile(fileName, language string, code []byte) ([]template.HTML, st lexerName := formatLexerName(lexer.Config().Name) rendered := RenderCodeByLexer(lexer, util.UnsafeBytesToString(code)) unsafeLines := UnsafeSplitHighlightedLines(rendered) - lines := make([]template.HTML, 0, len(unsafeLines)) - for _, lineBytes := range unsafeLines { - line := escapeControlChars(lineBytes) - lines = append(lines, line) + lines := make([]template.HTML, len(unsafeLines)) + for idx, lineBytes := range unsafeLines { + lines[idx] = template.HTML(lineBytes) } return lines, lexerName }