mirror of
https://github.com/go-gitea/gitea.git
synced 2026-01-25 08:30:57 +01:00
Added cleanup method for files in Maven snapshot versions
This commit is contained in:
parent
2859b0602a
commit
7604caf844
@ -2676,6 +2676,9 @@ LEVEL = Info
|
||||
;LIMIT_SIZE_HELM = -1
|
||||
;; Maximum size of a Maven upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`)
|
||||
;LIMIT_SIZE_MAVEN = -1
|
||||
;; Specifies the number of most recent Maven snapshot builds to retain. `-1` retains all builds, while `1` retains only the latest build. Value should be -1 or positive.
|
||||
;; Cleanup expired packages/data then targets the files within all maven snapshots versions
|
||||
;RETAIN_MAVEN_SNAPSHOT_BUILDS = -1
|
||||
;; Maximum size of a npm upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`)
|
||||
;LIMIT_SIZE_NPM = -1
|
||||
;; Maximum size of a NuGet upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`)
|
||||
|
||||
@ -5,6 +5,9 @@ package packages
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
@ -21,6 +24,8 @@ func init() {
|
||||
}
|
||||
|
||||
var (
|
||||
// ErrMetadataFile indicated a metadata file
|
||||
ErrMetadataFile = errors.New("metadata file")
|
||||
// ErrDuplicatePackageFile indicates a duplicated package file error
|
||||
ErrDuplicatePackageFile = util.NewAlreadyExistErrorf("package file already exists")
|
||||
// ErrPackageFileNotExist indicates a package file not exist error
|
||||
@ -231,6 +236,80 @@ func HasFiles(ctx context.Context, opts *PackageFileSearchOptions) (bool, error)
|
||||
return db.Exist[PackageFile](ctx, opts.toConds())
|
||||
}
|
||||
|
||||
// GetFilesBelowBuildNumber retrieves all files for a Maven snapshot version where the build number is <= maxBuildNumber.
|
||||
// Returns two slices: one for filtered files and one for skipped files.
|
||||
func GetFilesBelowBuildNumber(ctx context.Context, versionID int64, maxBuildNumber int, classifiers ...string) ([]*PackageFile, []*PackageFile, error) {
|
||||
if maxBuildNumber <= 0 {
|
||||
return nil, nil, errors.New("maxBuildNumber must be a positive integer")
|
||||
}
|
||||
|
||||
files, err := GetFilesByVersionID(ctx, versionID)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("failed to retrieve files: %w", err)
|
||||
}
|
||||
|
||||
// Sort classifiers by length (longest first) once per call
|
||||
sort.SliceStable(classifiers, func(i, j int) bool {
|
||||
return len(classifiers[i]) > len(classifiers[j])
|
||||
})
|
||||
|
||||
var filteredFiles, skippedFiles []*PackageFile
|
||||
for _, file := range files {
|
||||
buildNumber, err := extractBuildNumberFromFileName(file.Name, classifiers...)
|
||||
if err != nil {
|
||||
if !errors.Is(err, ErrMetadataFile) {
|
||||
skippedFiles = append(skippedFiles, file)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if buildNumber <= maxBuildNumber {
|
||||
filteredFiles = append(filteredFiles, file)
|
||||
}
|
||||
}
|
||||
|
||||
return filteredFiles, skippedFiles, nil
|
||||
}
|
||||
|
||||
// extractBuildNumberFromFileName extracts the build number from a Maven snapshot file name.
|
||||
// Expected formats:
|
||||
//
|
||||
// "artifact-1.0.0-20250311.083409-9.tgz" returns 9
|
||||
// "artifact-to-test-2.0.0-20250311.083409-10-sources.tgz" returns 10
|
||||
func extractBuildNumberFromFileName(filename string, classifiers ...string) (int, error) {
|
||||
if strings.Contains(filename, "maven-metadata.xml") {
|
||||
return 0, ErrMetadataFile
|
||||
}
|
||||
|
||||
dotIdx := strings.LastIndex(filename, ".")
|
||||
if dotIdx == -1 {
|
||||
return 0, fmt.Errorf("extract build number from filename: no file extension found in '%s'", filename)
|
||||
}
|
||||
base := filename[:dotIdx]
|
||||
|
||||
// Remove classifier suffix if present.
|
||||
for _, classifier := range classifiers {
|
||||
suffix := "-" + classifier
|
||||
if strings.HasSuffix(base, suffix) {
|
||||
base = base[:len(base)-len(suffix)]
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// The build number should be the token after the last dash.
|
||||
lastDash := strings.LastIndex(base, "-")
|
||||
if lastDash == -1 {
|
||||
return 0, fmt.Errorf("extract build number from filename: invalid file name format in '%s'", filename)
|
||||
}
|
||||
buildNumberStr := base[lastDash+1:]
|
||||
buildNumber, err := strconv.Atoi(buildNumberStr)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("extract build number from filename: failed to convert build number '%s' to integer in '%s': %v", buildNumberStr, filename, err)
|
||||
}
|
||||
|
||||
return buildNumber, nil
|
||||
}
|
||||
|
||||
// CalculateFileSize sums up all blob sizes matching the search options.
|
||||
// It does NOT respect the deduplication of blobs.
|
||||
func CalculateFileSize(ctx context.Context, opts *PackageFileSearchOptions) (int64, error) {
|
||||
|
||||
@ -129,11 +129,16 @@ func getVersionByNameAndVersion(ctx context.Context, ownerID int64, packageType
|
||||
|
||||
// GetVersionsByPackageType gets all versions of a specific type
|
||||
func GetVersionsByPackageType(ctx context.Context, ownerID int64, packageType Type) ([]*PackageVersion, error) {
|
||||
pvs, _, err := SearchVersions(ctx, &PackageSearchOptions{
|
||||
OwnerID: ownerID,
|
||||
opts := &PackageSearchOptions{
|
||||
Type: packageType,
|
||||
IsInternal: optional.Some(false),
|
||||
})
|
||||
}
|
||||
|
||||
if ownerID != 0 {
|
||||
opts.OwnerID = ownerID
|
||||
}
|
||||
|
||||
pvs, _, err := SearchVersions(ctx, opts)
|
||||
return pvs, err
|
||||
}
|
||||
|
||||
|
||||
@ -5,7 +5,9 @@ package maven
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"errors"
|
||||
"io"
|
||||
"strconv"
|
||||
|
||||
"code.gitea.io/gitea/modules/util"
|
||||
"code.gitea.io/gitea/modules/validation"
|
||||
@ -31,6 +33,12 @@ type Dependency struct {
|
||||
Version string `json:"version,omitempty"`
|
||||
}
|
||||
|
||||
// SnapshotMetadata struct holds the build number and the list of classifiers for a snapshot version
|
||||
type SnapshotMetadata struct {
|
||||
BuildNumber int `json:"build_number,omitempty"`
|
||||
Classifiers []string `json:"classifiers,omitempty"`
|
||||
}
|
||||
|
||||
type pomStruct struct {
|
||||
XMLName xml.Name `xml:"project"`
|
||||
|
||||
@ -61,6 +69,26 @@ type pomStruct struct {
|
||||
} `xml:"dependencies>dependency"`
|
||||
}
|
||||
|
||||
type snapshotMetadataStruct struct {
|
||||
XMLName xml.Name `xml:"metadata"`
|
||||
GroupID string `xml:"groupId"`
|
||||
ArtifactID string `xml:"artifactId"`
|
||||
Version string `xml:"version"`
|
||||
Versioning struct {
|
||||
LastUpdated string `xml:"lastUpdated"`
|
||||
Snapshot struct {
|
||||
Timestamp string `xml:"timestamp"`
|
||||
BuildNumber string `xml:"buildNumber"`
|
||||
} `xml:"snapshot"`
|
||||
SnapshotVersions []struct {
|
||||
Extension string `xml:"extension"`
|
||||
Classifier string `xml:"classifier"`
|
||||
Value string `xml:"value"`
|
||||
Updated string `xml:"updated"`
|
||||
} `xml:"snapshotVersions>snapshotVersion"`
|
||||
} `xml:"versioning"`
|
||||
}
|
||||
|
||||
// ParsePackageMetaData parses the metadata of a pom file
|
||||
func ParsePackageMetaData(r io.Reader) (*Metadata, error) {
|
||||
var pom pomStruct
|
||||
@ -109,3 +137,31 @@ func ParsePackageMetaData(r io.Reader) (*Metadata, error) {
|
||||
Dependencies: dependencies,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// ParseSnapshotVersionMetadata parses the Maven Snapshot Version metadata to extract the build number and list of available classifiers.
|
||||
func ParseSnapshotVersionMetaData(r io.Reader) (*SnapshotMetadata, error) {
|
||||
var metadata snapshotMetadataStruct
|
||||
|
||||
dec := xml.NewDecoder(r)
|
||||
dec.CharsetReader = charset.NewReaderLabel
|
||||
if err := dec.Decode(&metadata); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
buildNumber, err := strconv.Atoi(metadata.Versioning.Snapshot.BuildNumber)
|
||||
if err != nil {
|
||||
return nil, errors.New("invalid or missing build number in snapshot metadata")
|
||||
}
|
||||
|
||||
var classifiers []string
|
||||
for _, snapshotVersion := range metadata.Versioning.SnapshotVersions {
|
||||
if snapshotVersion.Classifier != "" {
|
||||
classifiers = append(classifiers, snapshotVersion.Classifier)
|
||||
}
|
||||
}
|
||||
|
||||
return &SnapshotMetadata{
|
||||
BuildNumber: buildNumber,
|
||||
Classifiers: classifiers,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@ -41,10 +41,12 @@ var (
|
||||
LimitSizeSwift int64
|
||||
LimitSizeVagrant int64
|
||||
|
||||
DefaultRPMSignEnabled bool
|
||||
DefaultRPMSignEnabled bool
|
||||
RetainMavenSnapshotBuilds int
|
||||
}{
|
||||
Enabled: true,
|
||||
LimitTotalOwnerCount: -1,
|
||||
Enabled: true,
|
||||
LimitTotalOwnerCount: -1,
|
||||
RetainMavenSnapshotBuilds: -1,
|
||||
}
|
||||
)
|
||||
|
||||
@ -88,6 +90,7 @@ func loadPackagesFrom(rootCfg ConfigProvider) (err error) {
|
||||
Packages.LimitSizeSwift = mustBytes(sec, "LIMIT_SIZE_SWIFT")
|
||||
Packages.LimitSizeVagrant = mustBytes(sec, "LIMIT_SIZE_VAGRANT")
|
||||
Packages.DefaultRPMSignEnabled = sec.Key("DEFAULT_RPM_SIGN_ENABLED").MustBool(false)
|
||||
Packages.RetainMavenSnapshotBuilds = sec.Key("RETAIN_MAVEN_SNAPSHOT_BUILDS").MustInt(Packages.RetainMavenSnapshotBuilds)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
// Copyright 2022 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package container
|
||||
package cleanup
|
||||
|
||||
import (
|
||||
"context"
|
||||
@ -20,6 +20,7 @@ import (
|
||||
cargo_service "code.gitea.io/gitea/services/packages/cargo"
|
||||
container_service "code.gitea.io/gitea/services/packages/container"
|
||||
debian_service "code.gitea.io/gitea/services/packages/debian"
|
||||
maven_service "code.gitea.io/gitea/services/packages/maven"
|
||||
rpm_service "code.gitea.io/gitea/services/packages/rpm"
|
||||
)
|
||||
|
||||
@ -171,6 +172,10 @@ func CleanupExpiredData(ctx context.Context, olderThan time.Duration) error {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := maven_service.CleanupSnapshotVersions(ctx); err != nil {
|
||||
log.Error("Error during maven snapshot versions cleanup: %v", err)
|
||||
}
|
||||
|
||||
ps, err := packages_model.FindUnreferencedPackages(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
|
||||
138
services/packages/maven/cleanup.go
Normal file
138
services/packages/maven/cleanup.go
Normal file
@ -0,0 +1,138 @@
|
||||
package maven
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/models/packages"
|
||||
"code.gitea.io/gitea/modules/json"
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/packages/maven"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
packages_service "code.gitea.io/gitea/services/packages"
|
||||
)
|
||||
|
||||
// CleanupSnapshotVersions removes outdated files for SNAPHOT versions for all Maven packages.
|
||||
func CleanupSnapshotVersions(ctx context.Context) error {
|
||||
retainBuilds := setting.Packages.RetainMavenSnapshotBuilds
|
||||
log.Debug("Starting CleanupSnapshotVersion with retainBuilds: %d", retainBuilds)
|
||||
|
||||
if retainBuilds < 1 {
|
||||
log.Warn("Maven Cleanup: skipped as value for retainBuilds less than 1: %d. Minimum 1 build should be retained", retainBuilds)
|
||||
return nil
|
||||
}
|
||||
|
||||
versions, err := packages.GetVersionsByPackageType(ctx, 0, packages.TypeMaven)
|
||||
if err != nil {
|
||||
return fmt.Errorf("maven Cleanup: failed to retrieve Maven package versions: %w", err)
|
||||
}
|
||||
|
||||
var errs []error
|
||||
var metadataErrors []error
|
||||
|
||||
for _, version := range versions {
|
||||
if !isSnapshotVersion(version.Version) {
|
||||
continue
|
||||
}
|
||||
|
||||
var artifactId, groupId string
|
||||
if version.MetadataJSON != "" {
|
||||
var metadata map[string]interface{}
|
||||
if err := json.Unmarshal([]byte(version.MetadataJSON), &metadata); err != nil {
|
||||
log.Warn("Maven Cleanup: error during cleanup: failed to unmarshal metadataJSON for package version ID: %d: %w", version.ID, err)
|
||||
} else {
|
||||
artifactId, _ = metadata["artifact_id"].(string)
|
||||
groupId, _ = metadata["group_id"].(string)
|
||||
log.Debug("Maven Cleanup: processing package version with ID: %s, Group ID: %s, Artifact ID: %s, Version: %s", version.ID, groupId, artifactId, version.Version)
|
||||
}
|
||||
}
|
||||
|
||||
if err := cleanSnapshotFiles(ctx, version.ID, retainBuilds); err != nil {
|
||||
formattedErr := fmt.Errorf("version '%s' (ID: %d, Group ID: %s, Artifact ID: %s): %w",
|
||||
version.Version, version.ID, groupId, artifactId, err)
|
||||
|
||||
if errors.Is(err, packages.ErrMetadataFile) {
|
||||
metadataErrors = append(metadataErrors, formattedErr)
|
||||
} else {
|
||||
errs = append(errs, formattedErr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, err := range metadataErrors {
|
||||
log.Warn("Maven Cleanup: error during cleanup: %v", err)
|
||||
}
|
||||
|
||||
if len(errs) > 0 {
|
||||
for _, err := range errs {
|
||||
log.Error("Maven Cleanup: error during cleanup: %v", err)
|
||||
}
|
||||
return fmt.Errorf("maven Cleanup: completed with errors: %v", errs)
|
||||
}
|
||||
|
||||
log.Debug("Completed Maven Cleanup")
|
||||
return nil
|
||||
}
|
||||
|
||||
func isSnapshotVersion(version string) bool {
|
||||
return strings.HasSuffix(version, "-SNAPSHOT")
|
||||
}
|
||||
|
||||
func cleanSnapshotFiles(ctx context.Context, versionID int64, retainBuilds int) error {
|
||||
log.Debug("Starting cleanSnapshotFiles for versionID: %d with retainBuilds: %d", versionID, retainBuilds)
|
||||
|
||||
metadataFile, err := packages.GetFileForVersionByName(ctx, versionID, "maven-metadata.xml", packages.EmptyFileKey)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%w: failed to retrieve maven-metadata.xml: %w", packages.ErrMetadataFile, err)
|
||||
}
|
||||
|
||||
maxBuildNumber, classifiers, err := extractMaxBuildNumber(ctx, metadataFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%w: failed to extract max build number from maven-metadata.xml: %w", packages.ErrMetadataFile, err)
|
||||
}
|
||||
|
||||
thresholdBuildNumber := maxBuildNumber - retainBuilds
|
||||
if thresholdBuildNumber <= 0 {
|
||||
log.Debug("Maven Cleanup: no files to clean up, as the threshold build number is less than or equal to zero for versionID %d", versionID)
|
||||
return nil
|
||||
}
|
||||
|
||||
filesToRemove, _, err := packages.GetFilesBelowBuildNumber(ctx, versionID, thresholdBuildNumber, classifiers...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to retrieve files for version ID %d: %w", versionID, err)
|
||||
}
|
||||
|
||||
for _, file := range filesToRemove {
|
||||
log.Debug("Maven Cleanup: removing file '%s' below threshold %d", file.Name, thresholdBuildNumber)
|
||||
if err := packages_service.DeletePackageFile(ctx, file); err != nil {
|
||||
return fmt.Errorf("cleanSnapshotFiles: failed to delete file '%s': %w", file.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func extractMaxBuildNumber(ctx context.Context, metadataFile *packages.PackageFile) (int, []string, error) {
|
||||
pb, err := packages.GetBlobByID(ctx, metadataFile.BlobID)
|
||||
if err != nil {
|
||||
return 0, nil, fmt.Errorf("failed to get package blob: %w", err)
|
||||
}
|
||||
|
||||
content, _, _, err := packages_service.OpenBlobForDownload(ctx, metadataFile, pb, "", nil, true)
|
||||
if err != nil {
|
||||
return 0, nil, fmt.Errorf("failed to get package file stream: %w", err)
|
||||
}
|
||||
defer content.Close()
|
||||
|
||||
snapshotMetadata, err := maven.ParseSnapshotVersionMetaData(content)
|
||||
if err != nil {
|
||||
return 0, nil, fmt.Errorf("failed to parse maven-metadata.xml: %w", err)
|
||||
}
|
||||
|
||||
buildNumber := snapshotMetadata.BuildNumber
|
||||
classifiers := snapshotMetadata.Classifiers
|
||||
|
||||
return buildNumber, classifiers, nil
|
||||
}
|
||||
@ -598,8 +598,8 @@ func OpenBlobStream(pb *packages_model.PackageBlob) (io.ReadSeekCloser, error) {
|
||||
}
|
||||
|
||||
// OpenBlobForDownload returns the content of the specific package blob and increases the download counter.
|
||||
// If the storage supports direct serving and it's enabled, only the direct serving url is returned.
|
||||
func OpenBlobForDownload(ctx context.Context, pf *packages_model.PackageFile, pb *packages_model.PackageBlob, method string, serveDirectReqParams url.Values) (io.ReadSeekCloser, *url.URL, *packages_model.PackageFile, error) {
|
||||
// If the storage supports direct serving and it's enabled, only the direct serving url is returned; otherwise, forceInternalServe should be set to true.
|
||||
func OpenBlobForDownload(ctx context.Context, pf *packages_model.PackageFile, pb *packages_model.PackageBlob, method string, serveDirectReqParams url.Values, forceInternalServe ...bool) (io.ReadSeekCloser, *url.URL, *packages_model.PackageFile, error) {
|
||||
key := packages_module.BlobHash256Key(pb.HashSHA256)
|
||||
|
||||
cs := packages_module.NewContentStore()
|
||||
@ -608,7 +608,9 @@ func OpenBlobForDownload(ctx context.Context, pf *packages_model.PackageFile, pb
|
||||
var u *url.URL
|
||||
var err error
|
||||
|
||||
if cs.ShouldServeDirect() {
|
||||
internalServe := len(forceInternalServe) > 0 && forceInternalServe[0]
|
||||
|
||||
if !internalServe && cs.ShouldServeDirect() {
|
||||
u, err = cs.GetServeDirectURL(key, pf.Name, method, serveDirectReqParams)
|
||||
if err != nil && !errors.Is(err, storage.ErrURLNotSupported) {
|
||||
log.Error("Error getting serve direct url (fallback to local reader): %v", err)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user