From 703be6bf307ed19ce8dc8cd311d24aeb6e5b9861 Mon Sep 17 00:00:00 2001
From: Lunny Xiao <xiaolunwen@gmail.com>
Date: Mon, 25 Nov 2024 11:35:49 -0800
Subject: [PATCH] Add github compatible tarball download API endpoints (#32572)

Fix #29654
Fix #32481
---
 routers/api/v1/api.go                         |  2 +
 routers/api/v1/repo/download.go               | 53 +++++++++++++++++++
 routers/api/v1/repo/file.go                   | 15 ++++--
 routers/web/repo/repo.go                      | 14 ++++-
 services/repository/archiver/archiver.go      | 34 +++++++-----
 services/repository/archiver/archiver_test.go | 25 ++++-----
 tests/integration/api_repo_archive_test.go    | 40 ++++++++++++++
 7 files changed, 152 insertions(+), 31 deletions(-)
 create mode 100644 routers/api/v1/repo/download.go

diff --git a/routers/api/v1/api.go b/routers/api/v1/api.go
index 23f466873b..0079e8dc87 100644
--- a/routers/api/v1/api.go
+++ b/routers/api/v1/api.go
@@ -1377,6 +1377,8 @@ func Routes() *web.Router {
 					m.Post("", bind(api.UpdateRepoAvatarOption{}), repo.UpdateAvatar)
 					m.Delete("", repo.DeleteAvatar)
 				}, reqAdmin(), reqToken())
+
+				m.Get("/{ball_type:tarball|zipball|bundle}/*", reqRepoReader(unit.TypeCode), repo.DownloadArchive)
 			}, repoAssignment(), checkTokenPublicOnly())
 		}, tokenRequiresScopes(auth_model.AccessTokenScopeCategoryRepository))
 
diff --git a/routers/api/v1/repo/download.go b/routers/api/v1/repo/download.go
new file mode 100644
index 0000000000..3620c1465f
--- /dev/null
+++ b/routers/api/v1/repo/download.go
@@ -0,0 +1,53 @@
+// Copyright 2024 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package repo
+
+import (
+	"fmt"
+	"net/http"
+
+	"code.gitea.io/gitea/modules/git"
+	"code.gitea.io/gitea/modules/gitrepo"
+	"code.gitea.io/gitea/services/context"
+	archiver_service "code.gitea.io/gitea/services/repository/archiver"
+)
+
+func DownloadArchive(ctx *context.APIContext) {
+	var tp git.ArchiveType
+	switch ballType := ctx.PathParam("ball_type"); ballType {
+	case "tarball":
+		tp = git.TARGZ
+	case "zipball":
+		tp = git.ZIP
+	case "bundle":
+		tp = git.BUNDLE
+	default:
+		ctx.Error(http.StatusBadRequest, "", fmt.Sprintf("Unknown archive type: %s", ballType))
+		return
+	}
+
+	if ctx.Repo.GitRepo == nil {
+		gitRepo, err := gitrepo.OpenRepository(ctx, ctx.Repo.Repository)
+		if err != nil {
+			ctx.Error(http.StatusInternalServerError, "OpenRepository", err)
+			return
+		}
+		ctx.Repo.GitRepo = gitRepo
+		defer gitRepo.Close()
+	}
+
+	r, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, ctx.PathParam("*"), tp)
+	if err != nil {
+		ctx.ServerError("NewRequest", err)
+		return
+	}
+
+	archive, err := r.Await(ctx)
+	if err != nil {
+		ctx.ServerError("archive.Await", err)
+		return
+	}
+
+	download(ctx, r.GetArchiveName(), archive)
+}
diff --git a/routers/api/v1/repo/file.go b/routers/api/v1/repo/file.go
index 05650cc9be..959a4b952a 100644
--- a/routers/api/v1/repo/file.go
+++ b/routers/api/v1/repo/file.go
@@ -301,7 +301,13 @@ func GetArchive(ctx *context.APIContext) {
 
 func archiveDownload(ctx *context.APIContext) {
 	uri := ctx.PathParam("*")
-	aReq, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, uri)
+	ext, tp, err := archiver_service.ParseFileName(uri)
+	if err != nil {
+		ctx.Error(http.StatusBadRequest, "ParseFileName", err)
+		return
+	}
+
+	aReq, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, strings.TrimSuffix(uri, ext), tp)
 	if err != nil {
 		if errors.Is(err, archiver_service.ErrUnknownArchiveFormat{}) {
 			ctx.Error(http.StatusBadRequest, "unknown archive format", err)
@@ -327,9 +333,12 @@ func download(ctx *context.APIContext, archiveName string, archiver *repo_model.
 
 	// Add nix format link header so tarballs lock correctly:
 	// https://github.com/nixos/nix/blob/56763ff918eb308db23080e560ed2ea3e00c80a7/doc/manual/src/protocols/tarball-fetcher.md
-	ctx.Resp.Header().Add("Link", fmt.Sprintf(`<%s/archive/%s.tar.gz?rev=%s>; rel="immutable"`,
+	ctx.Resp.Header().Add("Link", fmt.Sprintf(`<%s/archive/%s.%s?rev=%s>; rel="immutable"`,
 		ctx.Repo.Repository.APIURL(),
-		archiver.CommitID, archiver.CommitID))
+		archiver.CommitID,
+		archiver.Type.String(),
+		archiver.CommitID,
+	))
 
 	rPath := archiver.RelativePath()
 	if setting.RepoArchive.Storage.ServeDirect() {
diff --git a/routers/web/repo/repo.go b/routers/web/repo/repo.go
index b62fd21585..f5e59b0357 100644
--- a/routers/web/repo/repo.go
+++ b/routers/web/repo/repo.go
@@ -464,7 +464,12 @@ func RedirectDownload(ctx *context.Context) {
 // Download an archive of a repository
 func Download(ctx *context.Context) {
 	uri := ctx.PathParam("*")
-	aReq, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, uri)
+	ext, tp, err := archiver_service.ParseFileName(uri)
+	if err != nil {
+		ctx.ServerError("ParseFileName", err)
+		return
+	}
+	aReq, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, strings.TrimSuffix(uri, ext), tp)
 	if err != nil {
 		if errors.Is(err, archiver_service.ErrUnknownArchiveFormat{}) {
 			ctx.Error(http.StatusBadRequest, err.Error())
@@ -523,7 +528,12 @@ func download(ctx *context.Context, archiveName string, archiver *repo_model.Rep
 // kind of drop it on the floor if this is the case.
 func InitiateDownload(ctx *context.Context) {
 	uri := ctx.PathParam("*")
-	aReq, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, uri)
+	ext, tp, err := archiver_service.ParseFileName(uri)
+	if err != nil {
+		ctx.ServerError("ParseFileName", err)
+		return
+	}
+	aReq, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, strings.TrimSuffix(uri, ext), tp)
 	if err != nil {
 		ctx.ServerError("archiver_service.NewRequest", err)
 		return
diff --git a/services/repository/archiver/archiver.go b/services/repository/archiver/archiver.go
index c33369d047..e1addbed33 100644
--- a/services/repository/archiver/archiver.go
+++ b/services/repository/archiver/archiver.go
@@ -67,30 +67,36 @@ func (e RepoRefNotFoundError) Is(err error) bool {
 	return ok
 }
 
-// NewRequest creates an archival request, based on the URI.  The
-// resulting ArchiveRequest is suitable for being passed to Await()
-// if it's determined that the request still needs to be satisfied.
-func NewRequest(repoID int64, repo *git.Repository, uri string) (*ArchiveRequest, error) {
-	r := &ArchiveRequest{
-		RepoID: repoID,
-	}
-
-	var ext string
+func ParseFileName(uri string) (ext string, tp git.ArchiveType, err error) {
 	switch {
 	case strings.HasSuffix(uri, ".zip"):
 		ext = ".zip"
-		r.Type = git.ZIP
+		tp = git.ZIP
 	case strings.HasSuffix(uri, ".tar.gz"):
 		ext = ".tar.gz"
-		r.Type = git.TARGZ
+		tp = git.TARGZ
 	case strings.HasSuffix(uri, ".bundle"):
 		ext = ".bundle"
-		r.Type = git.BUNDLE
+		tp = git.BUNDLE
 	default:
-		return nil, ErrUnknownArchiveFormat{RequestFormat: uri}
+		return "", 0, ErrUnknownArchiveFormat{RequestFormat: uri}
+	}
+	return ext, tp, nil
+}
+
+// NewRequest creates an archival request, based on the URI.  The
+// resulting ArchiveRequest is suitable for being passed to Await()
+// if it's determined that the request still needs to be satisfied.
+func NewRequest(repoID int64, repo *git.Repository, refName string, fileType git.ArchiveType) (*ArchiveRequest, error) {
+	if fileType < git.ZIP || fileType > git.BUNDLE {
+		return nil, ErrUnknownArchiveFormat{RequestFormat: fileType.String()}
 	}
 
-	r.refName = strings.TrimSuffix(uri, ext)
+	r := &ArchiveRequest{
+		RepoID:  repoID,
+		refName: refName,
+		Type:    fileType,
+	}
 
 	// Get corresponding commit.
 	commitID, err := repo.ConvertToGitID(r.refName)
diff --git a/services/repository/archiver/archiver_test.go b/services/repository/archiver/archiver_test.go
index b3f3ed7bf3..2ab18edf49 100644
--- a/services/repository/archiver/archiver_test.go
+++ b/services/repository/archiver/archiver_test.go
@@ -10,6 +10,7 @@ import (
 
 	"code.gitea.io/gitea/models/db"
 	"code.gitea.io/gitea/models/unittest"
+	"code.gitea.io/gitea/modules/git"
 	"code.gitea.io/gitea/services/contexttest"
 
 	_ "code.gitea.io/gitea/models/actions"
@@ -31,47 +32,47 @@ func TestArchive_Basic(t *testing.T) {
 	contexttest.LoadGitRepo(t, ctx)
 	defer ctx.Repo.GitRepo.Close()
 
-	bogusReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip")
+	bogusReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit, git.ZIP)
 	assert.NoError(t, err)
 	assert.NotNil(t, bogusReq)
 	assert.EqualValues(t, firstCommit+".zip", bogusReq.GetArchiveName())
 
 	// Check a series of bogus requests.
 	// Step 1, valid commit with a bad extension.
-	bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".dilbert")
+	bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit, 100)
 	assert.Error(t, err)
 	assert.Nil(t, bogusReq)
 
 	// Step 2, missing commit.
-	bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "dbffff.zip")
+	bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "dbffff", git.ZIP)
 	assert.Error(t, err)
 	assert.Nil(t, bogusReq)
 
 	// Step 3, doesn't look like branch/tag/commit.
-	bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "db.zip")
+	bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "db", git.ZIP)
 	assert.Error(t, err)
 	assert.Nil(t, bogusReq)
 
-	bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "master.zip")
+	bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "master", git.ZIP)
 	assert.NoError(t, err)
 	assert.NotNil(t, bogusReq)
 	assert.EqualValues(t, "master.zip", bogusReq.GetArchiveName())
 
-	bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "test/archive.zip")
+	bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "test/archive", git.ZIP)
 	assert.NoError(t, err)
 	assert.NotNil(t, bogusReq)
 	assert.EqualValues(t, "test-archive.zip", bogusReq.GetArchiveName())
 
 	// Now two valid requests, firstCommit with valid extensions.
-	zipReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip")
+	zipReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit, git.ZIP)
 	assert.NoError(t, err)
 	assert.NotNil(t, zipReq)
 
-	tgzReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".tar.gz")
+	tgzReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit, git.TARGZ)
 	assert.NoError(t, err)
 	assert.NotNil(t, tgzReq)
 
-	secondReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, secondCommit+".zip")
+	secondReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, secondCommit, git.ZIP)
 	assert.NoError(t, err)
 	assert.NotNil(t, secondReq)
 
@@ -91,7 +92,7 @@ func TestArchive_Basic(t *testing.T) {
 	// Sleep two seconds to make sure the queue doesn't change.
 	time.Sleep(2 * time.Second)
 
-	zipReq2, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip")
+	zipReq2, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit, git.ZIP)
 	assert.NoError(t, err)
 	// This zipReq should match what's sitting in the queue, as we haven't
 	// let it release yet.  From the consumer's point of view, this looks like
@@ -106,12 +107,12 @@ func TestArchive_Basic(t *testing.T) {
 	// Now we'll submit a request and TimedWaitForCompletion twice, before and
 	// after we release it.  We should trigger both the timeout and non-timeout
 	// cases.
-	timedReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, secondCommit+".tar.gz")
+	timedReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, secondCommit, git.TARGZ)
 	assert.NoError(t, err)
 	assert.NotNil(t, timedReq)
 	doArchive(db.DefaultContext, timedReq)
 
-	zipReq2, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip")
+	zipReq2, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit, git.ZIP)
 	assert.NoError(t, err)
 	// Now, we're guaranteed to have released the original zipReq from the queue.
 	// Ensure that we don't get handed back the released entry somehow, but they
diff --git a/tests/integration/api_repo_archive_test.go b/tests/integration/api_repo_archive_test.go
index eecb84d5d1..8589199da3 100644
--- a/tests/integration/api_repo_archive_test.go
+++ b/tests/integration/api_repo_archive_test.go
@@ -59,3 +59,43 @@ func TestAPIDownloadArchive(t *testing.T) {
 	link, _ = url.Parse(fmt.Sprintf("/api/v1/repos/%s/%s/archive/master", user2.Name, repo.Name))
 	MakeRequest(t, NewRequest(t, "GET", link.String()).AddTokenAuth(token), http.StatusBadRequest)
 }
+
+func TestAPIDownloadArchive2(t *testing.T) {
+	defer tests.PrepareTestEnv(t)()
+
+	repo := unittest.AssertExistsAndLoadBean(t, &repo_model.Repository{ID: 1})
+	user2 := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2})
+	session := loginUser(t, user2.LowerName)
+	token := getTokenForLoggedInUser(t, session, auth_model.AccessTokenScopeReadRepository)
+
+	link, _ := url.Parse(fmt.Sprintf("/api/v1/repos/%s/%s/zipball/master", user2.Name, repo.Name))
+	resp := MakeRequest(t, NewRequest(t, "GET", link.String()).AddTokenAuth(token), http.StatusOK)
+	bs, err := io.ReadAll(resp.Body)
+	assert.NoError(t, err)
+	assert.Len(t, bs, 320)
+
+	link, _ = url.Parse(fmt.Sprintf("/api/v1/repos/%s/%s/tarball/master", user2.Name, repo.Name))
+	resp = MakeRequest(t, NewRequest(t, "GET", link.String()).AddTokenAuth(token), http.StatusOK)
+	bs, err = io.ReadAll(resp.Body)
+	assert.NoError(t, err)
+	assert.Len(t, bs, 266)
+
+	// Must return a link to a commit ID as the "immutable" archive link
+	linkHeaderRe := regexp.MustCompile(`^<(https?://.*/api/v1/repos/user2/repo1/archive/[a-f0-9]+\.tar\.gz.*)>; rel="immutable"$`)
+	m := linkHeaderRe.FindStringSubmatch(resp.Header().Get("Link"))
+	assert.NotEmpty(t, m[1])
+	resp = MakeRequest(t, NewRequest(t, "GET", m[1]).AddTokenAuth(token), http.StatusOK)
+	bs2, err := io.ReadAll(resp.Body)
+	assert.NoError(t, err)
+	// The locked URL should give the same bytes as the non-locked one
+	assert.EqualValues(t, bs, bs2)
+
+	link, _ = url.Parse(fmt.Sprintf("/api/v1/repos/%s/%s/bundle/master", user2.Name, repo.Name))
+	resp = MakeRequest(t, NewRequest(t, "GET", link.String()).AddTokenAuth(token), http.StatusOK)
+	bs, err = io.ReadAll(resp.Body)
+	assert.NoError(t, err)
+	assert.Len(t, bs, 382)
+
+	link, _ = url.Parse(fmt.Sprintf("/api/v1/repos/%s/%s/archive/master", user2.Name, repo.Name))
+	MakeRequest(t, NewRequest(t, "GET", link.String()).AddTokenAuth(token), http.StatusBadRequest)
+}