From 0db554fa634737af59613768b2e01bfe3e239e68 Mon Sep 17 00:00:00 2001
From: wxiaoguang <wxiaoguang@gmail.com>
Date: Tue, 2 Apr 2024 04:23:17 +0800
Subject: [PATCH] Refactor commit signature parser (#30228)

To make it more flexible and support SSH signature.

The existing tests are not changed, there are also tests covering
`parseTagRef` which also calls `parsePayloadSignature` now. Add some new
tests to `Test_parseTagData`
---
 modules/git/commit.go               |   6 +-
 modules/git/commit_convert_gogit.go |   4 +-
 modules/git/commit_reader.go        |   2 +-
 modules/git/repo_tag.go             |  10 ++-
 modules/git/repo_tag_test.go        |   2 +-
 modules/git/tag.go                  | 104 +++++++++++++++------------
 modules/git/tag_test.go             | 106 +++++++++++++++++-----------
 7 files changed, 135 insertions(+), 99 deletions(-)

diff --git a/modules/git/commit.go b/modules/git/commit.go
index ef2676762c..5f442b0e1a 100644
--- a/modules/git/commit.go
+++ b/modules/git/commit.go
@@ -26,14 +26,14 @@ type Commit struct {
 	Author        *Signature
 	Committer     *Signature
 	CommitMessage string
-	Signature     *CommitGPGSignature
+	Signature     *CommitSignature
 
 	Parents        []ObjectID // ID strings
 	submoduleCache *ObjectCache
 }
 
-// CommitGPGSignature represents a git commit signature part.
-type CommitGPGSignature struct {
+// CommitSignature represents a git commit signature part.
+type CommitSignature struct {
 	Signature string
 	Payload   string // TODO check if can be reconstruct from the rest of commit information to not have duplicate data
 }
diff --git a/modules/git/commit_convert_gogit.go b/modules/git/commit_convert_gogit.go
index 33ef2f4487..d7b945ed6b 100644
--- a/modules/git/commit_convert_gogit.go
+++ b/modules/git/commit_convert_gogit.go
@@ -13,7 +13,7 @@ import (
 	"github.com/go-git/go-git/v5/plumbing/object"
 )
 
-func convertPGPSignature(c *object.Commit) *CommitGPGSignature {
+func convertPGPSignature(c *object.Commit) *CommitSignature {
 	if c.PGPSignature == "" {
 		return nil
 	}
@@ -57,7 +57,7 @@ func convertPGPSignature(c *object.Commit) *CommitGPGSignature {
 		return nil
 	}
 
-	return &CommitGPGSignature{
+	return &CommitSignature{
 		Signature: c.PGPSignature,
 		Payload:   w.String(),
 	}
diff --git a/modules/git/commit_reader.go b/modules/git/commit_reader.go
index 56c41dc473..f1f4a0e588 100644
--- a/modules/git/commit_reader.go
+++ b/modules/git/commit_reader.go
@@ -99,7 +99,7 @@ readLoop:
 		}
 	}
 	commit.CommitMessage = messageSB.String()
-	commit.Signature = &CommitGPGSignature{
+	commit.Signature = &CommitSignature{
 		Signature: signatureSB.String(),
 		Payload:   payloadSB.String(),
 	}
diff --git a/modules/git/repo_tag.go b/modules/git/repo_tag.go
index e8c5ce6fb8..2026a4c9f5 100644
--- a/modules/git/repo_tag.go
+++ b/modules/git/repo_tag.go
@@ -185,17 +185,15 @@ func parseTagRef(ref map[string]string) (tag *Tag, err error) {
 
 	tag.Tagger = parseSignatureFromCommitLine(ref["creator"])
 	tag.Message = ref["contents"]
-	// strip PGP signature if present in contents field
-	pgpStart := strings.Index(tag.Message, beginpgp)
-	if pgpStart >= 0 {
-		tag.Message = tag.Message[0:pgpStart]
-	}
+
+	// strip any signature if present in contents field
+	_, tag.Message, _ = parsePayloadSignature(util.UnsafeStringToBytes(tag.Message), 0)
 
 	// annotated tag with GPG signature
 	if tag.Type == "tag" && ref["contents:signature"] != "" {
 		payload := fmt.Sprintf("object %s\ntype commit\ntag %s\ntagger %s\n\n%s\n",
 			tag.Object, tag.Name, ref["creator"], strings.TrimSpace(tag.Message))
-		tag.Signature = &CommitGPGSignature{
+		tag.Signature = &CommitSignature{
 			Signature: ref["contents:signature"],
 			Payload:   payload,
 		}
diff --git a/modules/git/repo_tag_test.go b/modules/git/repo_tag_test.go
index 785c3442a7..0117cb902d 100644
--- a/modules/git/repo_tag_test.go
+++ b/modules/git/repo_tag_test.go
@@ -315,7 +315,7 @@ qbHDASXl
 				Type:    "tag",
 				Tagger:  parseSignatureFromCommitLine("Foo Bar <foo@bar.com> 1565789218 +0300"),
 				Message: "Add changelog of v1.9.1 (#7859)\n\n* add changelog of v1.9.1\n* Update CHANGELOG.md",
-				Signature: &CommitGPGSignature{
+				Signature: &CommitSignature{
 					Signature: `-----BEGIN PGP SIGNATURE-----
 
 aBCGzBAABCgAdFiEEyWRwv/q1Q6IjSv+D4IPOwzt33PoFAmI8jbIACgkQ4IPOwzt3
diff --git a/modules/git/tag.go b/modules/git/tag.go
index 94e5cd7c63..f7666aa89b 100644
--- a/modules/git/tag.go
+++ b/modules/git/tag.go
@@ -6,16 +6,10 @@ package git
 import (
 	"bytes"
 	"sort"
-	"strings"
 
 	"code.gitea.io/gitea/modules/util"
 )
 
-const (
-	beginpgp = "\n-----BEGIN PGP SIGNATURE-----\n"
-	endpgp   = "\n-----END PGP SIGNATURE-----"
-)
-
 // Tag represents a Git tag.
 type Tag struct {
 	Name      string
@@ -24,7 +18,7 @@ type Tag struct {
 	Type      string
 	Tagger    *Signature
 	Message   string
-	Signature *CommitGPGSignature
+	Signature *CommitSignature
 }
 
 // Commit return the commit of the tag reference
@@ -32,6 +26,36 @@ func (tag *Tag) Commit(gitRepo *Repository) (*Commit, error) {
 	return gitRepo.getCommit(tag.Object)
 }
 
+func parsePayloadSignature(data []byte, messageStart int) (payload, msg, sign string) {
+	pos := messageStart
+	signStart, signEnd := -1, -1
+	for {
+		eol := bytes.IndexByte(data[pos:], '\n')
+		if eol < 0 {
+			break
+		}
+		line := data[pos : pos+eol]
+		signType, hasPrefix := bytes.CutPrefix(line, []byte("-----BEGIN "))
+		signType, hasSuffix := bytes.CutSuffix(signType, []byte(" SIGNATURE-----"))
+		if hasPrefix && hasSuffix {
+			signEndBytes := append([]byte("\n-----END "), signType...)
+			signEndBytes = append(signEndBytes, []byte(" SIGNATURE-----")...)
+			signEnd = bytes.Index(data[pos:], signEndBytes)
+			if signEnd != -1 {
+				signStart = pos
+				signEnd = pos + signEnd + len(signEndBytes)
+			}
+		}
+		pos += eol + 1
+	}
+
+	if signStart != -1 && signEnd != -1 {
+		msgEnd := max(messageStart, signStart-1)
+		return string(data[:msgEnd]), string(data[messageStart:msgEnd]), string(data[signStart:signEnd])
+	}
+	return string(data), string(data[messageStart:]), ""
+}
+
 // Parse commit information from the (uncompressed) raw
 // data from the commit object.
 // \n\n separate headers from message
@@ -40,47 +64,37 @@ func parseTagData(objectFormat ObjectFormat, data []byte) (*Tag, error) {
 	tag.ID = objectFormat.EmptyObjectID()
 	tag.Object = objectFormat.EmptyObjectID()
 	tag.Tagger = &Signature{}
-	// we now have the contents of the commit object. Let's investigate...
-	nextline := 0
-l:
+
+	pos := 0
 	for {
-		eol := bytes.IndexByte(data[nextline:], '\n')
-		switch {
-		case eol > 0:
-			line := data[nextline : nextline+eol]
-			spacepos := bytes.IndexByte(line, ' ')
-			reftype := line[:spacepos]
-			switch string(reftype) {
-			case "object":
-				id, err := NewIDFromString(string(line[spacepos+1:]))
-				if err != nil {
-					return nil, err
-				}
-				tag.Object = id
-			case "type":
-				// A commit can have one or more parents
-				tag.Type = string(line[spacepos+1:])
-			case "tagger":
-				tag.Tagger = parseSignatureFromCommitLine(util.UnsafeBytesToString(line[spacepos+1:]))
-			}
-			nextline += eol + 1
-		case eol == 0:
-			tag.Message = string(data[nextline+1:])
-			break l
-		default:
-			break l
+		eol := bytes.IndexByte(data[pos:], '\n')
+		if eol == -1 {
+			break // shouldn't happen, but could just tolerate it
 		}
+		if eol == 0 {
+			pos++
+			break // end of headers
+		}
+		line := data[pos : pos+eol]
+		key, val, _ := bytes.Cut(line, []byte(" "))
+		switch string(key) {
+		case "object":
+			id, err := NewIDFromString(string(val))
+			if err != nil {
+				return nil, err
+			}
+			tag.Object = id
+		case "type":
+			tag.Type = string(val) // A commit can have one or more parents
+		case "tagger":
+			tag.Tagger = parseSignatureFromCommitLine(util.UnsafeBytesToString(val))
+		}
+		pos += eol + 1
 	}
-	idx := strings.LastIndex(tag.Message, beginpgp)
-	if idx > 0 {
-		endSigIdx := strings.Index(tag.Message[idx:], endpgp)
-		if endSigIdx > 0 {
-			tag.Signature = &CommitGPGSignature{
-				Signature: tag.Message[idx+1 : idx+endSigIdx+len(endpgp)],
-				Payload:   string(data[:bytes.LastIndex(data, []byte(beginpgp))+1]),
-			}
-			tag.Message = tag.Message[:idx+1]
-		}
+	payload, msg, sign := parsePayloadSignature(data, pos)
+	tag.Message = msg
+	if len(sign) > 0 {
+		tag.Signature = &CommitSignature{Signature: sign, Payload: payload}
 	}
 	return tag, nil
 }
diff --git a/modules/git/tag_test.go b/modules/git/tag_test.go
index f980b0c560..ba02c28946 100644
--- a/modules/git/tag_test.go
+++ b/modules/git/tag_test.go
@@ -12,24 +12,28 @@ import (
 
 func Test_parseTagData(t *testing.T) {
 	testData := []struct {
-		data []byte
-		tag  Tag
+		data     string
+		expected Tag
 	}{
-		{data: []byte(`object 3b114ab800c6432ad42387ccf6bc8d4388a2885a
+		{
+			data: `object 3b114ab800c6432ad42387ccf6bc8d4388a2885a
 type commit
 tag 1.22.0
 tagger Lucas Michot <lucas@semalead.com> 1484491741 +0100
 
-`), tag: Tag{
-			Name:      "",
-			ID:        Sha1ObjectFormat.EmptyObjectID(),
-			Object:    &Sha1Hash{0x3b, 0x11, 0x4a, 0xb8, 0x0, 0xc6, 0x43, 0x2a, 0xd4, 0x23, 0x87, 0xcc, 0xf6, 0xbc, 0x8d, 0x43, 0x88, 0xa2, 0x88, 0x5a},
-			Type:      "commit",
-			Tagger:    &Signature{Name: "Lucas Michot", Email: "lucas@semalead.com", When: time.Unix(1484491741, 0)},
-			Message:   "",
-			Signature: nil,
-		}},
-		{data: []byte(`object 7cdf42c0b1cc763ab7e4c33c47a24e27c66bfccc
+`,
+			expected: Tag{
+				Name:      "",
+				ID:        Sha1ObjectFormat.EmptyObjectID(),
+				Object:    MustIDFromString("3b114ab800c6432ad42387ccf6bc8d4388a2885a"),
+				Type:      "commit",
+				Tagger:    &Signature{Name: "Lucas Michot", Email: "lucas@semalead.com", When: time.Unix(1484491741, 0).In(time.FixedZone("", 3600))},
+				Message:   "",
+				Signature: nil,
+			},
+		},
+		{
+			data: `object 7cdf42c0b1cc763ab7e4c33c47a24e27c66bfccc
 type commit
 tag 1.22.1
 tagger Lucas Michot <lucas@semalead.com> 1484553735 +0100
@@ -37,37 +41,57 @@ tagger Lucas Michot <lucas@semalead.com> 1484553735 +0100
 test message
 o
 
-ono`), tag: Tag{
-			Name:      "",
-			ID:        Sha1ObjectFormat.EmptyObjectID(),
-			Object:    &Sha1Hash{0x7c, 0xdf, 0x42, 0xc0, 0xb1, 0xcc, 0x76, 0x3a, 0xb7, 0xe4, 0xc3, 0x3c, 0x47, 0xa2, 0x4e, 0x27, 0xc6, 0x6b, 0xfc, 0xcc},
-			Type:      "commit",
-			Tagger:    &Signature{Name: "Lucas Michot", Email: "lucas@semalead.com", When: time.Unix(1484553735, 0)},
-			Message:   "test message\no\n\nono",
-			Signature: nil,
-		}},
+ono`,
+			expected: Tag{
+				Name:      "",
+				ID:        Sha1ObjectFormat.EmptyObjectID(),
+				Object:    MustIDFromString("7cdf42c0b1cc763ab7e4c33c47a24e27c66bfccc"),
+				Type:      "commit",
+				Tagger:    &Signature{Name: "Lucas Michot", Email: "lucas@semalead.com", When: time.Unix(1484553735, 0).In(time.FixedZone("", 3600))},
+				Message:   "test message\no\n\nono",
+				Signature: nil,
+			},
+		},
+		{
+			data: `object 7cdf42c0b1cc763ab7e4c33c47a24e27c66bfaaa
+type commit
+tag v0
+tagger dummy user <dummy-email@example.com> 1484491741 +0100
+
+dummy message
+-----BEGIN SSH SIGNATURE-----
+dummy signature
+-----END SSH SIGNATURE-----
+`,
+			expected: Tag{
+				Name:    "",
+				ID:      Sha1ObjectFormat.EmptyObjectID(),
+				Object:  MustIDFromString("7cdf42c0b1cc763ab7e4c33c47a24e27c66bfaaa"),
+				Type:    "commit",
+				Tagger:  &Signature{Name: "dummy user", Email: "dummy-email@example.com", When: time.Unix(1484491741, 0).In(time.FixedZone("", 3600))},
+				Message: "dummy message",
+				Signature: &CommitSignature{
+					Signature: `-----BEGIN SSH SIGNATURE-----
+dummy signature
+-----END SSH SIGNATURE-----`,
+					Payload: `object 7cdf42c0b1cc763ab7e4c33c47a24e27c66bfaaa
+type commit
+tag v0
+tagger dummy user <dummy-email@example.com> 1484491741 +0100
+
+dummy message`,
+				},
+			},
+		},
 	}
 
 	for _, test := range testData {
-		tag, err := parseTagData(Sha1ObjectFormat, test.data)
+		tag, err := parseTagData(Sha1ObjectFormat, []byte(test.data))
 		assert.NoError(t, err)
-		assert.EqualValues(t, test.tag.ID, tag.ID)
-		assert.EqualValues(t, test.tag.Object, tag.Object)
-		assert.EqualValues(t, test.tag.Name, tag.Name)
-		assert.EqualValues(t, test.tag.Message, tag.Message)
-		assert.EqualValues(t, test.tag.Type, tag.Type)
-		if test.tag.Signature != nil && assert.NotNil(t, tag.Signature) {
-			assert.EqualValues(t, test.tag.Signature.Signature, tag.Signature.Signature)
-			assert.EqualValues(t, test.tag.Signature.Payload, tag.Signature.Payload)
-		} else {
-			assert.Nil(t, tag.Signature)
-		}
-		if test.tag.Tagger != nil && assert.NotNil(t, tag.Tagger) {
-			assert.EqualValues(t, test.tag.Tagger.Name, tag.Tagger.Name)
-			assert.EqualValues(t, test.tag.Tagger.Email, tag.Tagger.Email)
-			assert.EqualValues(t, test.tag.Tagger.When.Unix(), tag.Tagger.When.Unix())
-		} else {
-			assert.Nil(t, tag.Tagger)
-		}
+		assert.Equal(t, test.expected, *tag)
 	}
+
+	tag, err := parseTagData(Sha1ObjectFormat, []byte("type commit\n\nfoo\n-----BEGIN SSH SIGNATURE-----\ncorrupted..."))
+	assert.NoError(t, err)
+	assert.Equal(t, "foo\n-----BEGIN SSH SIGNATURE-----\ncorrupted...", tag.Message)
 }