mirror of
https://github.com/go-gitea/gitea.git
synced 2026-02-21 09:04:53 +01:00
Fine tune diff highlighting (#36592)
This commit is contained in:
parent
47b387921a
commit
2876800cb2
@ -12,11 +12,15 @@ import (
|
||||
"math"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"code.gitea.io/gitea/modules/git/gitcmd"
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
)
|
||||
|
||||
var catFileBatchDebugWaitClose atomic.Int64
|
||||
|
||||
type catFileBatchCommunicator struct {
|
||||
cancel context.CancelFunc
|
||||
reqWriter io.Writer
|
||||
@ -36,7 +40,14 @@ func newCatFileBatch(ctx context.Context, repoPath string, cmdCatFile *gitcmd.Co
|
||||
ctx, ctxCancel := context.WithCancelCause(ctx)
|
||||
|
||||
// We often want to feed the commits in order into cat-file --batch, followed by their trees and subtrees as necessary.
|
||||
stdinWriter, stdoutReader, pipeClose := cmdCatFile.MakeStdinStdoutPipe()
|
||||
stdinWriter, stdoutReader, stdPipeClose := cmdCatFile.MakeStdinStdoutPipe()
|
||||
pipeClose := func() {
|
||||
if delay := catFileBatchDebugWaitClose.Load(); delay > 0 {
|
||||
time.Sleep(time.Duration(delay)) // for testing purpose only
|
||||
}
|
||||
stdPipeClose()
|
||||
}
|
||||
|
||||
ret = &catFileBatchCommunicator{
|
||||
debugGitCmd: cmdCatFile,
|
||||
cancel: func() { ctxCancel(nil) },
|
||||
|
||||
@ -5,9 +5,11 @@ package git
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"code.gitea.io/gitea/modules/test"
|
||||
|
||||
@ -37,6 +39,45 @@ func testCatFileBatch(t *testing.T) {
|
||||
require.Error(t, err)
|
||||
})
|
||||
|
||||
simulateQueryTerminated := func(pipeCloseDelay, pipeReadDelay time.Duration) (errRead error) {
|
||||
catFileBatchDebugWaitClose.Store(int64(pipeCloseDelay))
|
||||
defer catFileBatchDebugWaitClose.Store(0)
|
||||
batch, err := NewBatch(t.Context(), filepath.Join(testReposDir, "repo1_bare"))
|
||||
require.NoError(t, err)
|
||||
defer batch.Close()
|
||||
_, _ = batch.QueryInfo("e2129701f1a4d54dc44f03c93bca0a2aec7c5449")
|
||||
var c *catFileBatchCommunicator
|
||||
switch b := batch.(type) {
|
||||
case *catFileBatchLegacy:
|
||||
c = b.batchCheck
|
||||
_, _ = c.reqWriter.Write([]byte("in-complete-line-"))
|
||||
case *catFileBatchCommand:
|
||||
c = b.batch
|
||||
_, _ = c.reqWriter.Write([]byte("info"))
|
||||
default:
|
||||
t.FailNow()
|
||||
}
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Go(func() {
|
||||
time.Sleep(pipeReadDelay)
|
||||
var n int
|
||||
n, errRead = c.respReader.Read(make([]byte, 100))
|
||||
assert.Zero(t, n)
|
||||
})
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
c.debugGitCmd.DebugKill()
|
||||
wg.Wait()
|
||||
return errRead
|
||||
}
|
||||
|
||||
t.Run("QueryTerminated", func(t *testing.T) {
|
||||
err := simulateQueryTerminated(0, 20*time.Millisecond)
|
||||
assert.ErrorIs(t, err, os.ErrClosed) // pipes are closed faster
|
||||
err = simulateQueryTerminated(40*time.Millisecond, 20*time.Millisecond)
|
||||
assert.ErrorIs(t, err, io.EOF) // reader is faster
|
||||
})
|
||||
|
||||
batch, err := NewBatch(t.Context(), filepath.Join(testReposDir, "repo1_bare"))
|
||||
require.NoError(t, err)
|
||||
defer batch.Close()
|
||||
@ -60,30 +101,4 @@ func testCatFileBatch(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "file1\n", string(content))
|
||||
})
|
||||
|
||||
t.Run("QueryTerminated", func(t *testing.T) {
|
||||
var c *catFileBatchCommunicator
|
||||
switch b := batch.(type) {
|
||||
case *catFileBatchLegacy:
|
||||
c = b.batchCheck
|
||||
_, _ = c.reqWriter.Write([]byte("in-complete-line-"))
|
||||
case *catFileBatchCommand:
|
||||
c = b.batch
|
||||
_, _ = c.reqWriter.Write([]byte("info"))
|
||||
default:
|
||||
t.FailNow()
|
||||
return
|
||||
}
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Go(func() {
|
||||
buf := make([]byte, 100)
|
||||
_, _ = c.respReader.Read(buf)
|
||||
n, errRead := c.respReader.Read(buf)
|
||||
assert.Zero(t, n)
|
||||
assert.ErrorIs(t, errRead, io.EOF) // the pipe is closed due to command being killed
|
||||
})
|
||||
c.debugGitCmd.DebugKill()
|
||||
wg.Wait()
|
||||
})
|
||||
}
|
||||
|
||||
@ -9,6 +9,14 @@ import (
|
||||
)
|
||||
|
||||
type PipeBufferReader interface {
|
||||
// Read should be used in the same goroutine as command's Wait
|
||||
// When Reader in one goroutine, command's Wait in another goroutine, then the command exits, the pipe will be closed:
|
||||
// * If the Reader goroutine reads faster, it will read all remaining data and then get io.EOF
|
||||
// * But this io.EOF doesn't mean the Reader has gotten complete data, the data might still be corrupted
|
||||
// * If the Reader goroutine reads slower, it will get os.ErrClosed because the os.Pipe is closed ahead when the command exits
|
||||
//
|
||||
// When using 2 goroutines, no clear solution to distinguish these two cases or make Reader knows whether the data is complete
|
||||
// It should avoid using Reader in a different goroutine than the command if the Read error needs to be handled.
|
||||
Read(p []byte) (n int, err error)
|
||||
Bytes() []byte
|
||||
}
|
||||
|
||||
@ -16,6 +16,7 @@ import (
|
||||
"path"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"code.gitea.io/gitea/models/db"
|
||||
git_model "code.gitea.io/gitea/models/git"
|
||||
@ -123,8 +124,14 @@ type DiffHTMLOperation struct {
|
||||
// BlobExcerptChunkSize represent max lines of excerpt
|
||||
const BlobExcerptChunkSize = 20
|
||||
|
||||
// MaxDiffHighlightEntireFileSize is the maximum file size that will be highlighted with "entire file diff"
|
||||
const MaxDiffHighlightEntireFileSize = 1 * 1024 * 1024
|
||||
// Chroma seems extremely slow when highlighting large files, it might take dozens or hundreds of milliseconds.
|
||||
// When fully highlighting a diff with a lot of large files, it would take many seconds or even dozens of seconds.
|
||||
// So, don't highlight the entire file if it's too large, or highlighting takes too long.
|
||||
// When there is no full-file highlighting, the legacy "line-by-line" highlighting is still applied as the fallback.
|
||||
const (
|
||||
MaxFullFileHighlightSizeLimit = 256 * 1024
|
||||
MaxFullFileHighlightTimeLimit = 2 * time.Second
|
||||
)
|
||||
|
||||
// GetType returns the type of DiffLine.
|
||||
func (d *DiffLine) GetType() int {
|
||||
@ -564,7 +571,7 @@ func getCommitFileLineCountAndLimitedContent(commit *git.Commit, filePath string
|
||||
if err != nil {
|
||||
return 0, nil
|
||||
}
|
||||
w := &limitByteWriter{limit: MaxDiffHighlightEntireFileSize + 1}
|
||||
w := &limitByteWriter{limit: MaxFullFileHighlightSizeLimit + 1}
|
||||
lineCount, err = blob.GetBlobLineCount(w)
|
||||
if err != nil {
|
||||
return 0, nil
|
||||
@ -1317,6 +1324,8 @@ func GetDiffForRender(ctx context.Context, repoLink string, gitRepo *git.Reposit
|
||||
return nil, err
|
||||
}
|
||||
|
||||
startTime := time.Now()
|
||||
|
||||
checker, err := attribute.NewBatchChecker(gitRepo, opts.AfterCommitID, []string{attribute.LinguistVendored, attribute.LinguistGenerated, attribute.LinguistLanguage, attribute.GitlabLanguage, attribute.Diff})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -1356,7 +1365,8 @@ func GetDiffForRender(ctx context.Context, repoLink string, gitRepo *git.Reposit
|
||||
diffFile.Sections = append(diffFile.Sections, tailSection)
|
||||
}
|
||||
|
||||
shouldFullFileHighlight := !setting.Git.DisableDiffHighlight && attrDiff.Value() == ""
|
||||
shouldFullFileHighlight := attrDiff.Value() == "" // only do highlight if no custom diff command
|
||||
shouldFullFileHighlight = shouldFullFileHighlight && time.Since(startTime) < MaxFullFileHighlightTimeLimit
|
||||
if shouldFullFileHighlight {
|
||||
if limitedContent.LeftContent != nil {
|
||||
diffFile.highlightedLeftLines.value = highlightCodeLinesForDiffFile(diffFile, true /* left */, limitedContent.LeftContent.buf.Bytes())
|
||||
@ -1380,7 +1390,7 @@ func highlightCodeLinesForDiffFile(diffFile *DiffFile, isLeft bool, rawContent [
|
||||
}
|
||||
|
||||
func highlightCodeLines(name, lang string, sections []*DiffSection, isLeft bool, rawContent []byte) map[int]template.HTML {
|
||||
if setting.Git.DisableDiffHighlight || len(rawContent) > MaxDiffHighlightEntireFileSize {
|
||||
if setting.Git.DisableDiffHighlight || len(rawContent) > MaxFullFileHighlightSizeLimit {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@ -298,15 +298,21 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlContent template.HTML) s
|
||||
return res.String()
|
||||
}
|
||||
|
||||
func (hcd *highlightCodeDiff) extractNextPlaceholder(buf []byte, lastIdx int) (idx int, placeholder rune, runeLen int, token string) {
|
||||
for idx = lastIdx; idx < len(buf); {
|
||||
placeholder, runeLen = utf8.DecodeRune(buf[idx:])
|
||||
if token = hcd.placeholderTokenMap[placeholder]; token != "" {
|
||||
break
|
||||
// recoverOneRune tries to recover one rune
|
||||
// * if the rune is a placeholder, it will be recovered to the corresponding content
|
||||
// * otherwise it will be returned as is
|
||||
func (hcd *highlightCodeDiff) recoverOneRune(buf []byte) (r rune, runeLen int, isSingleTag bool, recovered string) {
|
||||
r, runeLen = utf8.DecodeRune(buf)
|
||||
token := hcd.placeholderTokenMap[r]
|
||||
if token == "" {
|
||||
return r, runeLen, false, "" // rune itself, not a placeholder
|
||||
} else if token[0] == '<' {
|
||||
if token[1] == '<' {
|
||||
return 0, runeLen, false, token[1 : len(token)-1] // full tag `<<span>content</span>>`, recover to `<span>content</span>`
|
||||
}
|
||||
idx += runeLen
|
||||
return r, runeLen, true, token // single tag
|
||||
}
|
||||
return idx, placeholder, runeLen, token
|
||||
return 0, runeLen, false, token // HTML entity
|
||||
}
|
||||
|
||||
func (hcd *highlightCodeDiff) recoverOneDiff(str string) template.HTML {
|
||||
@ -315,49 +321,65 @@ func (hcd *highlightCodeDiff) recoverOneDiff(str string) template.HTML {
|
||||
var diffCodeOpenTag string
|
||||
diffCodeCloseTag := hcd.placeholderTokenMap[hcd.diffCodeClose]
|
||||
strBytes := util.UnsafeStringToBytes(str)
|
||||
|
||||
// this loop is slightly longer than expected, for performance consideration
|
||||
for idx := 0; idx < len(strBytes); {
|
||||
newIdx, placeholder, lastRuneLen, token := hcd.extractNextPlaceholder(strBytes, idx)
|
||||
if newIdx != idx {
|
||||
// take a look at the next rune
|
||||
r, runeLen, isSingleTag, recovered := hcd.recoverOneRune(strBytes[idx:])
|
||||
idx += runeLen
|
||||
|
||||
// loop section 1: if it isn't a single tag, then try to find the following runes until the next single tag, and recover them together
|
||||
if !isSingleTag {
|
||||
if diffCodeOpenTag != "" {
|
||||
// start the "added/removed diff tag" if the current token is in the diff part
|
||||
sb.WriteString(diffCodeOpenTag)
|
||||
sb.Write(strBytes[idx:newIdx])
|
||||
sb.WriteString(diffCodeCloseTag)
|
||||
} else {
|
||||
sb.Write(strBytes[idx:newIdx])
|
||||
}
|
||||
} // else: no text content before, the current token is a placeholder
|
||||
if token == "" {
|
||||
break // reaches the string end, no more placeholder
|
||||
}
|
||||
idx = newIdx + lastRuneLen
|
||||
|
||||
// for HTML entity
|
||||
if token[0] == '&' {
|
||||
sb.WriteString(token)
|
||||
continue
|
||||
}
|
||||
|
||||
// for various HTML tags
|
||||
var recovered string
|
||||
if token[1] == '<' { // full tag `<<span>content</span>>`, recover to `<span>content</span>`
|
||||
recovered = token[1 : len(token)-1]
|
||||
if diffCodeOpenTag != "" {
|
||||
recovered = diffCodeOpenTag + recovered + diffCodeCloseTag
|
||||
} // else: just use the recovered content
|
||||
} else if token[1] != '/' { // opening tag
|
||||
if placeholder == hcd.diffCodeAddedOpen || placeholder == hcd.diffCodeRemovedOpen {
|
||||
diffCodeOpenTag = token
|
||||
if recovered != "" {
|
||||
sb.WriteString(recovered)
|
||||
} else {
|
||||
sb.WriteRune(r)
|
||||
}
|
||||
// inner loop to recover following runes until the next single tag
|
||||
for idx < len(strBytes) {
|
||||
r, runeLen, isSingleTag, recovered = hcd.recoverOneRune(strBytes[idx:])
|
||||
idx += runeLen
|
||||
if isSingleTag {
|
||||
break
|
||||
}
|
||||
if recovered != "" {
|
||||
sb.WriteString(recovered)
|
||||
} else {
|
||||
sb.WriteRune(r)
|
||||
}
|
||||
}
|
||||
if diffCodeOpenTag != "" {
|
||||
// end the "added/removed diff tag" if the current token is in the diff part
|
||||
sb.WriteString(diffCodeCloseTag)
|
||||
}
|
||||
}
|
||||
|
||||
if !isSingleTag {
|
||||
break // the inner loop has already consumed all remaining runes, no more single tag found
|
||||
}
|
||||
|
||||
// loop section 2: for opening/closing HTML tags
|
||||
placeholder := r
|
||||
if recovered[1] != '/' { // opening tag
|
||||
if placeholder == hcd.diffCodeAddedOpen || placeholder == hcd.diffCodeRemovedOpen {
|
||||
diffCodeOpenTag = recovered
|
||||
recovered = ""
|
||||
} else {
|
||||
recovered = token
|
||||
tagStack = append(tagStack, recovered)
|
||||
}
|
||||
} else { // closing tag
|
||||
if placeholder == hcd.diffCodeClose {
|
||||
diffCodeOpenTag = "" // the highlighted diff is closed, no more diff
|
||||
recovered = ""
|
||||
} else if len(tagStack) != 0 {
|
||||
recovered = token
|
||||
tagStack = tagStack[:len(tagStack)-1]
|
||||
} // else: if no opening tag in stack yet, skip the closing tag
|
||||
} else {
|
||||
recovered = ""
|
||||
}
|
||||
}
|
||||
sb.WriteString(recovered)
|
||||
}
|
||||
|
||||
@ -77,15 +77,13 @@ func TestDiffWithHighlight(t *testing.T) {
|
||||
|
||||
t.Run("ComplexDiff1", func(t *testing.T) {
|
||||
oldCode, _ := highlight.RenderCodeFast("a.go", "Go", `xxx || yyy`)
|
||||
newCode, _ := highlight.RenderCodeFast("a.go", "Go", `bot.xxx || bot.yyy`)
|
||||
newCode, _ := highlight.RenderCodeFast("a.go", "Go", `bot&xxx || bot&yyy`)
|
||||
hcd := newHighlightCodeDiff()
|
||||
out := hcd.diffLineWithHighlight(DiffLineAdd, oldCode, newCode)
|
||||
assert.Equal(t, strings.ReplaceAll(`
|
||||
<span class="added-code"><span class="nx">bot</span></span>
|
||||
<span class="added-code"><span class="p">.</span></span>
|
||||
<span class="added-code"><span class="nx">bot</span></span><span class="o"><span class="added-code">&</span></span>
|
||||
<span class="nx">xxx</span><span class="w"> </span><span class="o">||</span><span class="w"> </span>
|
||||
<span class="added-code"><span class="nx">bot</span></span>
|
||||
<span class="added-code"><span class="p">.</span></span>
|
||||
<span class="added-code"><span class="nx">bot</span></span><span class="o"><span class="added-code">&</span></span>
|
||||
<span class="nx">yyy</span>`, "\n", ""), string(out))
|
||||
})
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user