mirror of
https://github.com/go-gitea/gitea.git
synced 2026-04-23 07:21:39 +02:00
Extract charset.ControlCharPicture() shared between highlight and charset escape paths. Add controlCharHTML helper to deduplicate HTML template. Style control char badges with body color on gray background matching the original styling. Co-Authored-By: Claude (Opus 4.6) <noreply@anthropic.com>
312 lines
7.8 KiB
Go
312 lines
7.8 KiB
Go
// Copyright 2022 The Gitea Authors. All rights reserved.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
package charset
|
|
|
|
import (
|
|
"fmt"
|
|
"regexp"
|
|
"strings"
|
|
"unicode"
|
|
"unicode/utf8"
|
|
|
|
"code.gitea.io/gitea/modules/translation"
|
|
|
|
"golang.org/x/net/html"
|
|
)
|
|
|
|
// VScode defaultWordRegexp
|
|
var defaultWordRegexp = regexp.MustCompile(`(-?\d*\.\d\w*)|([^\` + "`" + `\~\!\@\#\$\%\^\&\*\(\)\-\=\+\[\{\]\}\\\|\;\:\'\"\,\.\<\>\/\?\s\x00-\x1f]+)`)
|
|
|
|
// ControlCharPicture returns the Unicode Control Picture for ASCII control
|
|
// characters (0x00-0x1F → U+2400-U+241F, 0x7F → U+2421). For other runes it
|
|
// returns 0, false.
|
|
func ControlCharPicture(r rune) (rune, bool) {
|
|
if r >= 0 && r <= 0x1f {
|
|
return 0x2400 + r, true
|
|
}
|
|
if r == 0x7f {
|
|
return 0x2421, true
|
|
}
|
|
return 0, false
|
|
}
|
|
|
|
func NewEscapeStreamer(locale translation.Locale, next HTMLStreamer, allowed ...rune) HTMLStreamer {
|
|
allowedM := make(map[rune]bool, len(allowed))
|
|
for _, v := range allowed {
|
|
allowedM[v] = true
|
|
}
|
|
return &escapeStreamer{
|
|
escaped: &EscapeStatus{},
|
|
PassthroughHTMLStreamer: *NewPassthroughStreamer(next),
|
|
locale: locale,
|
|
ambiguousTables: AmbiguousTablesForLocale(locale),
|
|
allowed: allowedM,
|
|
}
|
|
}
|
|
|
|
type escapeStreamer struct {
|
|
PassthroughHTMLStreamer
|
|
escaped *EscapeStatus
|
|
locale translation.Locale
|
|
ambiguousTables []*AmbiguousTable
|
|
allowed map[rune]bool
|
|
}
|
|
|
|
func (e *escapeStreamer) EscapeStatus() *EscapeStatus {
|
|
return e.escaped
|
|
}
|
|
|
|
// Text tells the next streamer there is a text
|
|
func (e *escapeStreamer) Text(data string) error {
|
|
sb := &strings.Builder{}
|
|
var until int
|
|
var next int
|
|
pos := 0
|
|
if len(data) > len(UTF8BOM) && data[:len(UTF8BOM)] == string(UTF8BOM) {
|
|
_, _ = sb.WriteString(data[:len(UTF8BOM)])
|
|
pos = len(UTF8BOM)
|
|
}
|
|
dataBytes := []byte(data)
|
|
for pos < len(data) {
|
|
nextIdxs := defaultWordRegexp.FindStringIndex(data[pos:])
|
|
if nextIdxs == nil {
|
|
until = len(data)
|
|
next = until
|
|
} else {
|
|
until = min(nextIdxs[0]+pos, len(data))
|
|
next = min(nextIdxs[1]+pos, len(data))
|
|
}
|
|
|
|
// from pos until we know that the runes are not \r\t\n or even ' '
|
|
n := next - until
|
|
runes := make([]rune, 0, n)
|
|
positions := make([]int, 0, n+1)
|
|
|
|
for pos < until {
|
|
r, sz := utf8.DecodeRune(dataBytes[pos:])
|
|
positions = positions[:0]
|
|
positions = append(positions, pos, pos+sz)
|
|
types, confusables, _ := e.runeTypes(r)
|
|
if err := e.handleRunes(dataBytes, []rune{r}, positions, types, confusables, sb); err != nil {
|
|
return err
|
|
}
|
|
pos += sz
|
|
}
|
|
|
|
for i := pos; i < next; {
|
|
r, sz := utf8.DecodeRune(dataBytes[i:])
|
|
runes = append(runes, r)
|
|
positions = append(positions, i)
|
|
i += sz
|
|
}
|
|
positions = append(positions, next)
|
|
types, confusables, runeCounts := e.runeTypes(runes...)
|
|
if runeCounts.needsEscape() {
|
|
if err := e.handleRunes(dataBytes, runes, positions, types, confusables, sb); err != nil {
|
|
return err
|
|
}
|
|
} else {
|
|
_, _ = sb.Write(dataBytes[pos:next])
|
|
}
|
|
pos = next
|
|
}
|
|
if sb.Len() > 0 {
|
|
if err := e.PassthroughHTMLStreamer.Text(sb.String()); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (e *escapeStreamer) handleRunes(data []byte, runes []rune, positions []int, types []runeType, confusables []rune, sb *strings.Builder) error {
|
|
for i, r := range runes {
|
|
switch types[i] {
|
|
case brokenRuneType:
|
|
if sb.Len() > 0 {
|
|
if err := e.PassthroughHTMLStreamer.Text(sb.String()); err != nil {
|
|
return err
|
|
}
|
|
sb.Reset()
|
|
}
|
|
end := positions[i+1]
|
|
start := positions[i]
|
|
if err := e.brokenRune(data[start:end]); err != nil {
|
|
return err
|
|
}
|
|
case ambiguousRuneType:
|
|
if sb.Len() > 0 {
|
|
if err := e.PassthroughHTMLStreamer.Text(sb.String()); err != nil {
|
|
return err
|
|
}
|
|
sb.Reset()
|
|
}
|
|
if err := e.ambiguousRune(r, confusables[0]); err != nil {
|
|
return err
|
|
}
|
|
confusables = confusables[1:]
|
|
case invisibleRuneType:
|
|
if sb.Len() > 0 {
|
|
if err := e.PassthroughHTMLStreamer.Text(sb.String()); err != nil {
|
|
return err
|
|
}
|
|
sb.Reset()
|
|
}
|
|
if err := e.invisibleRune(r); err != nil {
|
|
return err
|
|
}
|
|
default:
|
|
_, _ = sb.WriteRune(r)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (e *escapeStreamer) brokenRune(bs []byte) error {
|
|
e.escaped.Escaped = true
|
|
e.escaped.HasBadRunes = true
|
|
|
|
if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{
|
|
Key: "class",
|
|
Val: "broken-code-point",
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
if err := e.PassthroughHTMLStreamer.Text(fmt.Sprintf("<%X>", bs)); err != nil {
|
|
return err
|
|
}
|
|
|
|
return e.PassthroughHTMLStreamer.EndTag("span")
|
|
}
|
|
|
|
func (e *escapeStreamer) ambiguousRune(r, c rune) error {
|
|
e.escaped.Escaped = true
|
|
e.escaped.HasAmbiguous = true
|
|
|
|
if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{
|
|
Key: "class",
|
|
Val: "ambiguous-code-point",
|
|
}, html.Attribute{
|
|
Key: "data-tooltip-content",
|
|
Val: e.locale.TrString("repo.ambiguous_character", r, c),
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{
|
|
Key: "class",
|
|
Val: "char",
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
if err := e.PassthroughHTMLStreamer.Text(string(r)); err != nil {
|
|
return err
|
|
}
|
|
if err := e.PassthroughHTMLStreamer.EndTag("span"); err != nil {
|
|
return err
|
|
}
|
|
|
|
return e.PassthroughHTMLStreamer.EndTag("span")
|
|
}
|
|
|
|
func (e *escapeStreamer) invisibleRune(r rune) error {
|
|
e.escaped.Escaped = true
|
|
e.escaped.HasInvisible = true
|
|
|
|
var escaped string
|
|
if pic, ok := ControlCharPicture(r); ok {
|
|
escaped = string(pic)
|
|
} else {
|
|
escaped = fmt.Sprintf("[U+%04X]", r)
|
|
}
|
|
|
|
if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{
|
|
Key: "class",
|
|
Val: "escaped-code-point",
|
|
}, html.Attribute{
|
|
Key: "data-escaped",
|
|
Val: escaped,
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{
|
|
Key: "class",
|
|
Val: "char",
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
if err := e.PassthroughHTMLStreamer.Text(string(r)); err != nil {
|
|
return err
|
|
}
|
|
if err := e.PassthroughHTMLStreamer.EndTag("span"); err != nil {
|
|
return err
|
|
}
|
|
|
|
return e.PassthroughHTMLStreamer.EndTag("span")
|
|
}
|
|
|
|
type runeCountType struct {
|
|
numBasicRunes int
|
|
numNonConfusingNonBasicRunes int
|
|
numAmbiguousRunes int
|
|
numInvisibleRunes int
|
|
numBrokenRunes int
|
|
}
|
|
|
|
func (counts runeCountType) needsEscape() bool {
|
|
if counts.numBrokenRunes > 0 {
|
|
return true
|
|
}
|
|
if counts.numBasicRunes == 0 &&
|
|
counts.numNonConfusingNonBasicRunes > 0 {
|
|
return false
|
|
}
|
|
return counts.numAmbiguousRunes > 0 || counts.numInvisibleRunes > 0
|
|
}
|
|
|
|
type runeType int
|
|
|
|
const (
|
|
basicASCIIRuneType runeType = iota // <- This is technically deadcode but its self-documenting so it should stay
|
|
brokenRuneType
|
|
nonBasicASCIIRuneType
|
|
ambiguousRuneType
|
|
invisibleRuneType
|
|
)
|
|
|
|
func (e *escapeStreamer) runeTypes(runes ...rune) (types []runeType, confusables []rune, runeCounts runeCountType) {
|
|
types = make([]runeType, len(runes))
|
|
for i, r := range runes {
|
|
var confusable rune
|
|
switch {
|
|
case r == utf8.RuneError:
|
|
types[i] = brokenRuneType
|
|
runeCounts.numBrokenRunes++
|
|
case r == ' ' || r == '\t' || r == '\n':
|
|
runeCounts.numBasicRunes++
|
|
case e.allowed[r]:
|
|
if r > 0x7e || r < 0x20 {
|
|
types[i] = nonBasicASCIIRuneType
|
|
runeCounts.numNonConfusingNonBasicRunes++
|
|
} else {
|
|
runeCounts.numBasicRunes++
|
|
}
|
|
case unicode.Is(InvisibleRanges, r):
|
|
types[i] = invisibleRuneType
|
|
runeCounts.numInvisibleRunes++
|
|
case unicode.IsControl(r):
|
|
types[i] = invisibleRuneType
|
|
runeCounts.numInvisibleRunes++
|
|
case isAmbiguous(r, &confusable, e.ambiguousTables...):
|
|
confusables = append(confusables, confusable)
|
|
types[i] = ambiguousRuneType
|
|
runeCounts.numAmbiguousRunes++
|
|
case r > 0x7e || r < 0x20:
|
|
types[i] = nonBasicASCIIRuneType
|
|
runeCounts.numNonConfusingNonBasicRunes++
|
|
default:
|
|
runeCounts.numBasicRunes++
|
|
}
|
|
}
|
|
return types, confusables, runeCounts
|
|
}
|