mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-11-04 12:53:43 +01:00 
			
		
		
		
	Reduce calls to git cat-file -s (#14682)
* Reduce calls to git cat-file -s There are multiple places where there are repeated calls to git cat-file -s due to the blobs not being created with their size. Through judicious use of git ls-tree -l and slight adjustments to the indexer code we can avoid a lot of these calls. * simplify by always expecting the long format * Also always set the sized field and tell the indexer the update is sized
This commit is contained in:
		
							parent
							
								
									7ba158183a
								
							
						
					
					
						commit
						ae7e6cd474
					
				@ -10,12 +10,13 @@ import (
 | 
			
		||||
	"bytes"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"strconv"
 | 
			
		||||
	"strings"
 | 
			
		||||
 | 
			
		||||
	"github.com/go-git/go-git/v5/plumbing/filemode"
 | 
			
		||||
	"github.com/go-git/go-git/v5/plumbing/object"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// ParseTreeEntries parses the output of a `git ls-tree` command.
 | 
			
		||||
// ParseTreeEntries parses the output of a `git ls-tree -l` command.
 | 
			
		||||
func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
 | 
			
		||||
	return parseTreeEntries(data, nil)
 | 
			
		||||
}
 | 
			
		||||
@ -23,7 +24,7 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
 | 
			
		||||
func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
 | 
			
		||||
	entries := make([]*TreeEntry, 0, 10)
 | 
			
		||||
	for pos := 0; pos < len(data); {
 | 
			
		||||
		// expect line to be of the form "<mode> <type> <sha>\t<filename>"
 | 
			
		||||
		// expect line to be of the form "<mode> <type> <sha> <space-padded-size>\t<filename>"
 | 
			
		||||
		entry := new(TreeEntry)
 | 
			
		||||
		entry.gogitTreeEntry = &object.TreeEntry{}
 | 
			
		||||
		entry.ptree = ptree
 | 
			
		||||
@ -61,7 +62,16 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
 | 
			
		||||
		entry.gogitTreeEntry.Hash = id
 | 
			
		||||
		pos += 41 // skip over sha and trailing space
 | 
			
		||||
 | 
			
		||||
		end := pos + bytes.IndexByte(data[pos:], '\n')
 | 
			
		||||
		end := pos + bytes.IndexByte(data[pos:], '\t')
 | 
			
		||||
		if end < pos {
 | 
			
		||||
			return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data))
 | 
			
		||||
		}
 | 
			
		||||
		entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64)
 | 
			
		||||
		entry.sized = true
 | 
			
		||||
 | 
			
		||||
		pos = end + 1
 | 
			
		||||
 | 
			
		||||
		end = pos + bytes.IndexByte(data[pos:], '\n')
 | 
			
		||||
		if end < pos {
 | 
			
		||||
			return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
@ -24,7 +24,7 @@ func TestParseTreeEntries(t *testing.T) {
 | 
			
		||||
			Expected: []*TreeEntry{},
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			Input: "100644 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c\texample/file2.txt\n",
 | 
			
		||||
			Input: "100644 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c    1022\texample/file2.txt\n",
 | 
			
		||||
			Expected: []*TreeEntry{
 | 
			
		||||
				{
 | 
			
		||||
					ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"),
 | 
			
		||||
@ -33,12 +33,14 @@ func TestParseTreeEntries(t *testing.T) {
 | 
			
		||||
						Name: "example/file2.txt",
 | 
			
		||||
						Mode: filemode.Regular,
 | 
			
		||||
					},
 | 
			
		||||
					size:  1022,
 | 
			
		||||
					sized: true,
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			Input: "120000 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c\t\"example/\\n.txt\"\n" +
 | 
			
		||||
				"040000 tree 1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8\texample\n",
 | 
			
		||||
			Input: "120000 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c  234131\t\"example/\\n.txt\"\n" +
 | 
			
		||||
				"040000 tree 1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8       -\texample\n",
 | 
			
		||||
			Expected: []*TreeEntry{
 | 
			
		||||
				{
 | 
			
		||||
					ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"),
 | 
			
		||||
@ -47,9 +49,12 @@ func TestParseTreeEntries(t *testing.T) {
 | 
			
		||||
						Name: "example/\n.txt",
 | 
			
		||||
						Mode: filemode.Symlink,
 | 
			
		||||
					},
 | 
			
		||||
					size:  234131,
 | 
			
		||||
					sized: true,
 | 
			
		||||
				},
 | 
			
		||||
				{
 | 
			
		||||
					ID: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"),
 | 
			
		||||
					ID:    MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"),
 | 
			
		||||
					sized: true,
 | 
			
		||||
					gogitTreeEntry: &object.TreeEntry{
 | 
			
		||||
						Hash: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"),
 | 
			
		||||
						Name: "example",
 | 
			
		||||
 | 
			
		||||
@ -10,9 +10,10 @@ import (
 | 
			
		||||
	"bytes"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"strconv"
 | 
			
		||||
	"strings"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// ParseTreeEntries parses the output of a `git ls-tree` command.
 | 
			
		||||
// ParseTreeEntries parses the output of a `git ls-tree -l` command.
 | 
			
		||||
func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
 | 
			
		||||
	return parseTreeEntries(data, nil)
 | 
			
		||||
}
 | 
			
		||||
@ -20,7 +21,7 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
 | 
			
		||||
func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
 | 
			
		||||
	entries := make([]*TreeEntry, 0, 10)
 | 
			
		||||
	for pos := 0; pos < len(data); {
 | 
			
		||||
		// expect line to be of the form "<mode> <type> <sha>\t<filename>"
 | 
			
		||||
		// expect line to be of the form "<mode> <type> <sha> <space-padded-size>\t<filename>"
 | 
			
		||||
		entry := new(TreeEntry)
 | 
			
		||||
		entry.ptree = ptree
 | 
			
		||||
		if pos+6 > len(data) {
 | 
			
		||||
@ -56,7 +57,16 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
 | 
			
		||||
		entry.ID = id
 | 
			
		||||
		pos += 41 // skip over sha and trailing space
 | 
			
		||||
 | 
			
		||||
		end := pos + bytes.IndexByte(data[pos:], '\n')
 | 
			
		||||
		end := pos + bytes.IndexByte(data[pos:], '\t')
 | 
			
		||||
		if end < pos {
 | 
			
		||||
			return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data))
 | 
			
		||||
		}
 | 
			
		||||
		entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64)
 | 
			
		||||
		entry.sized = true
 | 
			
		||||
 | 
			
		||||
		pos = end + 1
 | 
			
		||||
 | 
			
		||||
		end = pos + bytes.IndexByte(data[pos:], '\n')
 | 
			
		||||
		if end < pos {
 | 
			
		||||
			return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										70
									
								
								modules/git/parse_nogogit_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								modules/git/parse_nogogit_test.go
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,70 @@
 | 
			
		||||
// Copyright 2021 The Gitea Authors. All rights reserved.
 | 
			
		||||
// Use of this source code is governed by a MIT-style
 | 
			
		||||
// license that can be found in the LICENSE file.
 | 
			
		||||
 | 
			
		||||
// +build !gogit
 | 
			
		||||
 | 
			
		||||
package git
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"testing"
 | 
			
		||||
 | 
			
		||||
	"github.com/stretchr/testify/assert"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func TestParseTreeEntries(t *testing.T) {
 | 
			
		||||
 | 
			
		||||
	testCases := []struct {
 | 
			
		||||
		Input    string
 | 
			
		||||
		Expected []*TreeEntry
 | 
			
		||||
	}{
 | 
			
		||||
		{
 | 
			
		||||
			Input: `100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af    8218	README.md
 | 
			
		||||
100644 blob 037f27dc9d353ae4fd50f0474b2194c593914e35    4681	README_ZH.md
 | 
			
		||||
100644 blob 9846a94f7e8350a916632929d0fda38c90dd2ca8     429	SECURITY.md
 | 
			
		||||
040000 tree 84b90550547016f73c5dd3f50dea662389e67b6d       -	assets
 | 
			
		||||
`,
 | 
			
		||||
			Expected: []*TreeEntry{
 | 
			
		||||
				{
 | 
			
		||||
					ID:        MustIDFromString("ea0d83c9081af9500ac9f804101b3fd0a5c293af"),
 | 
			
		||||
					name:      "README.md",
 | 
			
		||||
					entryMode: EntryModeBlob,
 | 
			
		||||
					size:      8218,
 | 
			
		||||
					sized:     true,
 | 
			
		||||
				},
 | 
			
		||||
				{
 | 
			
		||||
					ID:        MustIDFromString("037f27dc9d353ae4fd50f0474b2194c593914e35"),
 | 
			
		||||
					name:      "README_ZH.md",
 | 
			
		||||
					entryMode: EntryModeBlob,
 | 
			
		||||
					size:      4681,
 | 
			
		||||
					sized:     true,
 | 
			
		||||
				},
 | 
			
		||||
				{
 | 
			
		||||
					ID:        MustIDFromString("9846a94f7e8350a916632929d0fda38c90dd2ca8"),
 | 
			
		||||
					name:      "SECURITY.md",
 | 
			
		||||
					entryMode: EntryModeBlob,
 | 
			
		||||
					size:      429,
 | 
			
		||||
					sized:     true,
 | 
			
		||||
				},
 | 
			
		||||
				{
 | 
			
		||||
					ID:        MustIDFromString("84b90550547016f73c5dd3f50dea662389e67b6d"),
 | 
			
		||||
					name:      "assets",
 | 
			
		||||
					entryMode: EntryModeTree,
 | 
			
		||||
					sized:     true,
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
	for _, testCase := range testCases {
 | 
			
		||||
		entries, err := ParseTreeEntries([]byte(testCase.Input))
 | 
			
		||||
		assert.NoError(t, err)
 | 
			
		||||
		assert.EqualValues(t, len(testCase.Expected), len(entries))
 | 
			
		||||
		for i, entry := range entries {
 | 
			
		||||
			assert.EqualValues(t, testCase.Expected[i].ID, entry.ID)
 | 
			
		||||
			assert.EqualValues(t, testCase.Expected[i].name, entry.name)
 | 
			
		||||
			assert.EqualValues(t, testCase.Expected[i].entryMode, entry.entryMode)
 | 
			
		||||
			assert.EqualValues(t, testCase.Expected[i].sized, entry.sized)
 | 
			
		||||
			assert.EqualValues(t, testCase.Expected[i].size, entry.size)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
@ -87,5 +87,7 @@ func (te *TreeEntry) Blob() *Blob {
 | 
			
		||||
		ID:       te.ID,
 | 
			
		||||
		repoPath: te.ptree.repo.Path,
 | 
			
		||||
		name:     te.Name(),
 | 
			
		||||
		size:     te.size,
 | 
			
		||||
		gotSize:  te.sized,
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -32,7 +32,7 @@ func (t *Tree) ListEntries() (Entries, error) {
 | 
			
		||||
		return t.entries, nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	stdout, err := NewCommand("ls-tree", t.ID.String()).RunInDirBytes(t.repo.Path)
 | 
			
		||||
	stdout, err := NewCommand("ls-tree", "-l", t.ID.String()).RunInDirBytes(t.repo.Path)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		if strings.Contains(err.Error(), "fatal: Not a valid object name") || strings.Contains(err.Error(), "fatal: not a tree object") {
 | 
			
		||||
			return nil, ErrNotExist{
 | 
			
		||||
@ -55,7 +55,7 @@ func (t *Tree) ListEntriesRecursive() (Entries, error) {
 | 
			
		||||
	if t.entriesRecursiveParsed {
 | 
			
		||||
		return t.entriesRecursive, nil
 | 
			
		||||
	}
 | 
			
		||||
	stdout, err := NewCommand("ls-tree", "-t", "-r", t.ID.String()).RunInDirBytes(t.repo.Path)
 | 
			
		||||
	stdout, err := NewCommand("ls-tree", "-t", "-l", "-r", t.ID.String()).RunInDirBytes(t.repo.Path)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
@ -179,14 +179,20 @@ func (b *BleveIndexer) addUpdate(commitSha string, update fileUpdate, repo *mode
 | 
			
		||||
		return nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
 | 
			
		||||
		RunInDir(repo.RepoPath())
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	size := update.Size
 | 
			
		||||
 | 
			
		||||
	if !update.Sized {
 | 
			
		||||
		stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
 | 
			
		||||
			RunInDir(repo.RepoPath())
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
		if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil {
 | 
			
		||||
			return fmt.Errorf("Misformatted git cat-file output: %v", err)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil {
 | 
			
		||||
		return fmt.Errorf("Misformatted git cat-file output: %v", err)
 | 
			
		||||
	} else if int64(size) > setting.Indexer.MaxIndexerFileSize {
 | 
			
		||||
 | 
			
		||||
	if size > setting.Indexer.MaxIndexerFileSize {
 | 
			
		||||
		return b.addDelete(update.Filename, repo, batch)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -178,14 +178,20 @@ func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *mo
 | 
			
		||||
		return nil, nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
 | 
			
		||||
		RunInDir(repo.RepoPath())
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	size := update.Size
 | 
			
		||||
 | 
			
		||||
	if !update.Sized {
 | 
			
		||||
		stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
 | 
			
		||||
			RunInDir(repo.RepoPath())
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return nil, err
 | 
			
		||||
		}
 | 
			
		||||
		if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil {
 | 
			
		||||
			return nil, fmt.Errorf("Misformatted git cat-file output: %v", err)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil {
 | 
			
		||||
		return nil, fmt.Errorf("Misformatted git cat-file output: %v", err)
 | 
			
		||||
	} else if int64(size) > setting.Indexer.MaxIndexerFileSize {
 | 
			
		||||
 | 
			
		||||
	if size > setting.Indexer.MaxIndexerFileSize {
 | 
			
		||||
		return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -17,6 +17,8 @@ import (
 | 
			
		||||
type fileUpdate struct {
 | 
			
		||||
	Filename string
 | 
			
		||||
	BlobSha  string
 | 
			
		||||
	Size     int64
 | 
			
		||||
	Sized    bool
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// repoChanges changes (file additions/updates/removals) to a repo
 | 
			
		||||
@ -77,6 +79,8 @@ func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) {
 | 
			
		||||
			updates[idxCount] = fileUpdate{
 | 
			
		||||
				Filename: entry.Name(),
 | 
			
		||||
				BlobSha:  entry.ID.String(),
 | 
			
		||||
				Size:     entry.Size(),
 | 
			
		||||
				Sized:    true,
 | 
			
		||||
			}
 | 
			
		||||
			idxCount++
 | 
			
		||||
		}
 | 
			
		||||
@ -87,7 +91,7 @@ func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) {
 | 
			
		||||
// genesisChanges get changes to add repo to the indexer for the first time
 | 
			
		||||
func genesisChanges(repo *models.Repository, revision string) (*repoChanges, error) {
 | 
			
		||||
	var changes repoChanges
 | 
			
		||||
	stdout, err := git.NewCommand("ls-tree", "--full-tree", "-r", revision).
 | 
			
		||||
	stdout, err := git.NewCommand("ls-tree", "--full-tree", "-l", "-r", revision).
 | 
			
		||||
		RunInDirBytes(repo.RepoPath())
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
@ -162,7 +166,7 @@ func nonGenesisChanges(repo *models.Repository, revision string) (*repoChanges,
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	cmd := git.NewCommand("ls-tree", "--full-tree", revision, "--")
 | 
			
		||||
	cmd := git.NewCommand("ls-tree", "--full-tree", "-l", revision, "--")
 | 
			
		||||
	cmd.AddArguments(updatedFilenames...)
 | 
			
		||||
	lsTreeStdout, err := cmd.RunInDirBytes(repo.RepoPath())
 | 
			
		||||
	if err != nil {
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user