From 2048363f9ed6de485a81afa980ed90bf916bb3b8 Mon Sep 17 00:00:00 2001
From: Giteabot <teabot@gitea.io>
Date: Wed, 31 Jan 2024 12:23:26 +0800
Subject: [PATCH] Don't remove all mirror repository's releases when mirroring
 (#28817) (#28939)

Backport #28817 by @lunny

Fix #22066

# Purpose

This PR fix the releases will be deleted when mirror repository sync the
tags.

# The problem

In the previous implementation of #19125. All releases record in
databases of one mirror repository will be deleted before sync.
Ref:
https://github.com/go-gitea/gitea/pull/19125/files#diff-2aa04998a791c30e5a02b49a97c07fcd93d50e8b31640ce2ddb1afeebf605d02R481

# The Pros

This PR introduced a new method which will load all releases from
databases and all tags on git data into memory. And detect which tags
needs to be inserted, which tags need to be updated or deleted. Only
tags releases(IsTag=true) which are not included in git data will be
deleted, only tags which sha1 changed will be updated. So it will not
delete any real releases include drafts.

# The Cons

The drawback is the memory usage will be higher than before if there are
many tags on this repository. This PR defined a special release struct
to reduce columns loaded from database to memory.

---------

Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
---
 models/repo/release.go          |  8 +++-
 modules/repository/repo.go      | 78 ++++++++++++++++++++++++++++++---
 modules/repository/repo_test.go | 76 ++++++++++++++++++++++++++++++++
 3 files changed, 155 insertions(+), 7 deletions(-)
 create mode 100644 modules/repository/repo_test.go

diff --git a/models/repo/release.go b/models/repo/release.go
index 223d3f2501..067de8a313 100644
--- a/models/repo/release.go
+++ b/models/repo/release.go
@@ -230,12 +230,18 @@ type FindReleasesOptions struct {
 	IsPreRelease  util.OptionalBool
 	IsDraft       util.OptionalBool
 	TagNames      []string
+	RepoID        int64
 	HasSha1       util.OptionalBool // useful to find draft releases which are created with existing tags
 }
 
 func (opts *FindReleasesOptions) toConds(repoID int64) builder.Cond {
+	opts.RepoID = repoID
+	return opts.ToConds()
+}
+
+func (opts *FindReleasesOptions) ToConds() builder.Cond {
 	cond := builder.NewCond()
-	cond = cond.And(builder.Eq{"repo_id": repoID})
+	cond = cond.And(builder.Eq{"repo_id": opts.RepoID})
 
 	if !opts.IncludeDrafts {
 		cond = cond.And(builder.Eq{"is_draft": false})
diff --git a/modules/repository/repo.go b/modules/repository/repo.go
index 974449112f..d06d75fa9c 100644
--- a/modules/repository/repo.go
+++ b/modules/repository/repo.go
@@ -491,6 +491,18 @@ func StoreMissingLfsObjectsInRepository(ctx context.Context, repo *repo_model.Re
 	return nil
 }
 
+// shortRelease to reduce load memory, this struct can replace repo_model.Release
+type shortRelease struct {
+	ID      int64
+	TagName string
+	Sha1    string
+	IsTag   bool
+}
+
+func (shortRelease) TableName() string {
+	return "release"
+}
+
 // pullMirrorReleaseSync is a pull-mirror specific tag<->release table
 // synchronization which overwrites all Releases from the repository tags. This
 // can be relied on since a pull-mirror is always identical to its
@@ -504,16 +516,22 @@ func pullMirrorReleaseSync(ctx context.Context, repo *repo_model.Repository, git
 		return fmt.Errorf("unable to GetTagInfos in pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err)
 	}
 	err = db.WithTx(ctx, func(ctx context.Context) error {
-		//
-		// clear out existing releases
-		//
-		if _, err := db.DeleteByBean(ctx, &repo_model.Release{RepoID: repo.ID}); err != nil {
-			return fmt.Errorf("unable to clear releases for pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err)
+		dbReleases := make([]*shortRelease, 0, len(tags))
+		err := db.Find(ctx, &repo_model.FindReleasesOptions{
+			ListOptions:   db.ListOptions{ListAll: true},
+			RepoID:        repo.ID,
+			IncludeDrafts: true,
+			IncludeTags:   true,
+		}, &dbReleases)
+		if err != nil {
+			return fmt.Errorf("unable to FindReleases in pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err)
 		}
+
+		inserts, deletes, updates := calcSync(tags, dbReleases)
 		//
 		// make release set identical to upstream tags
 		//
-		for _, tag := range tags {
+		for _, tag := range inserts {
 			release := repo_model.Release{
 				RepoID:       repo.ID,
 				TagName:      tag.Name,
@@ -530,6 +548,25 @@ func pullMirrorReleaseSync(ctx context.Context, repo *repo_model.Repository, git
 				return fmt.Errorf("unable insert tag %s for pull-mirror Repo[%d:%s/%s]: %w", tag.Name, repo.ID, repo.OwnerName, repo.Name, err)
 			}
 		}
+
+		// only delete tags releases
+		if len(deletes) > 0 {
+			if _, err := db.GetEngine(ctx).Where("repo_id=?", repo.ID).
+				In("id", deletes).
+				Delete(&repo_model.Release{}); err != nil {
+				return fmt.Errorf("unable to delete tags for pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err)
+			}
+		}
+
+		for _, tag := range updates {
+			if _, err := db.GetEngine(ctx).Where("repo_id = ? AND lower_tag_name = ?", repo.ID, strings.ToLower(tag.Name)).
+				Cols("sha1").
+				Update(&repo_model.Release{
+					Sha1: tag.Object.String(),
+				}); err != nil {
+				return fmt.Errorf("unable to update tag %s for pull-mirror Repo[%d:%s/%s]: %w", tag.Name, repo.ID, repo.OwnerName, repo.Name, err)
+			}
+		}
 		return nil
 	})
 	if err != nil {
@@ -539,3 +576,32 @@ func pullMirrorReleaseSync(ctx context.Context, repo *repo_model.Repository, git
 	log.Trace("pullMirrorReleaseSync: done rebuilding %d releases", numTags)
 	return nil
 }
+
+func calcSync(destTags []*git.Tag, dbTags []*shortRelease) ([]*git.Tag, []int64, []*git.Tag) {
+	destTagMap := make(map[string]*git.Tag)
+	for _, tag := range destTags {
+		destTagMap[tag.Name] = tag
+	}
+	dbTagMap := make(map[string]*shortRelease)
+	for _, rel := range dbTags {
+		dbTagMap[rel.TagName] = rel
+	}
+
+	inserted := make([]*git.Tag, 0, 10)
+	updated := make([]*git.Tag, 0, 10)
+	for _, tag := range destTags {
+		rel := dbTagMap[tag.Name]
+		if rel == nil {
+			inserted = append(inserted, tag)
+		} else if rel.Sha1 != tag.Object.String() {
+			updated = append(updated, tag)
+		}
+	}
+	deleted := make([]int64, 0, 10)
+	for _, tag := range dbTags {
+		if destTagMap[tag.TagName] == nil && tag.IsTag {
+			deleted = append(deleted, tag.ID)
+		}
+	}
+	return inserted, deleted, updated
+}
diff --git a/modules/repository/repo_test.go b/modules/repository/repo_test.go
new file mode 100644
index 0000000000..68980f92f9
--- /dev/null
+++ b/modules/repository/repo_test.go
@@ -0,0 +1,76 @@
+// Copyright 2024 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package repository
+
+import (
+	"testing"
+
+	"code.gitea.io/gitea/modules/git"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func Test_calcSync(t *testing.T) {
+	gitTags := []*git.Tag{
+		/*{
+			Name: "v0.1.0-beta", //deleted tag
+			Object: git.MustIDFromString(""),
+		},
+		{
+			Name: "v0.1.1-beta", //deleted tag but release should not be deleted because it's a release
+			Object: git.MustIDFromString(""),
+		},
+		*/
+		{
+			Name:   "v1.0.0", // keep as before
+			Object: git.MustIDFromString("1006e6e13c73ad3d9e2d5682ad266b5016523485"),
+		},
+		{
+			Name:   "v1.1.0", // retagged with new commit id
+			Object: git.MustIDFromString("bbdb7df30248e7d4a26a909c8d2598a152e13868"),
+		},
+		{
+			Name:   "v1.2.0", // new tag
+			Object: git.MustIDFromString("a5147145e2f24d89fd6d2a87826384cc1d253267"),
+		},
+	}
+
+	dbReleases := []*shortRelease{
+		{
+			ID:      1,
+			TagName: "v0.1.0-beta",
+			Sha1:    "244758d7da8dd1d9e0727e8cb7704ed4ba9a17c3",
+			IsTag:   true,
+		},
+		{
+			ID:      2,
+			TagName: "v0.1.1-beta",
+			Sha1:    "244758d7da8dd1d9e0727e8cb7704ed4ba9a17c3",
+			IsTag:   false,
+		},
+		{
+			ID:      3,
+			TagName: "v1.0.0",
+			Sha1:    "1006e6e13c73ad3d9e2d5682ad266b5016523485",
+		},
+		{
+			ID:      4,
+			TagName: "v1.1.0",
+			Sha1:    "53ab18dcecf4152b58328d1f47429510eb414d50",
+		},
+	}
+
+	inserts, deletes, updates := calcSync(gitTags, dbReleases)
+	if assert.EqualValues(t, 1, len(inserts), "inserts") {
+		assert.EqualValues(t, *gitTags[2], *inserts[0], "inserts equal")
+	}
+
+	if assert.EqualValues(t, 1, len(deletes), "deletes") {
+		assert.EqualValues(t, 1, deletes[0], "deletes equal")
+	}
+
+	if assert.EqualValues(t, 1, len(updates), "updates") {
+		assert.EqualValues(t, *gitTags[1], *updates[0], "updates equal")
+	}
+}