From 12237939cba72317edf272249301d7ebf0020e4f Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 25 Mar 2025 11:41:29 -0700 Subject: [PATCH 1/5] Some improvements --- routers/web/repo/contributors.go | 3 +- services/repository/contributors_graph.go | 96 ++++++++++++----------- 2 files changed, 51 insertions(+), 48 deletions(-) diff --git a/routers/web/repo/contributors.go b/routers/web/repo/contributors.go index 93dec1f350..26ab0a806c 100644 --- a/routers/web/repo/contributors.go +++ b/routers/web/repo/contributors.go @@ -4,6 +4,7 @@ package repo import ( + std_ctx "context" "errors" "net/http" @@ -27,7 +28,7 @@ func Contributors(ctx *context.Context) { // ContributorsData renders JSON of contributors along with their weekly commit statistics func ContributorsData(ctx *context.Context) { if contributorStats, err := contributors_service.GetContributorStats(ctx, ctx.Cache, ctx.Repo.Repository, ctx.Repo.Repository.DefaultBranch); err != nil { - if errors.Is(err, contributors_service.ErrAwaitGeneration) { + if errors.Is(err, contributors_service.ErrAwaitGeneration) || errors.Is(err, std_ctx.DeadlineExceeded) { ctx.Status(http.StatusAccepted) return } diff --git a/services/repository/contributors_graph.go b/services/repository/contributors_graph.go index a4ae505313..f91d18fc92 100644 --- a/services/repository/contributors_graph.go +++ b/services/repository/contributors_graph.go @@ -11,7 +11,6 @@ import ( "os" "strconv" "strings" - "sync" "time" "code.gitea.io/gitea/models/avatars" @@ -20,7 +19,7 @@ import ( "code.gitea.io/gitea/modules/cache" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/gitrepo" - "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/globallock" "code.gitea.io/gitea/modules/log" api "code.gitea.io/gitea/modules/structs" ) @@ -32,8 +31,7 @@ const ( var ( ErrAwaitGeneration = errors.New("generation took longer than ") - awaitGenerationTime = time.Second * 5 - generateLock = sync.Map{} + awaitGenerationTime = time.Second * 60 ) type WeekData struct { @@ -81,41 +79,44 @@ func findLastSundayBeforeDate(dateStr string) (string, error) { func GetContributorStats(ctx context.Context, cache cache.StringCache, repo *repo_model.Repository, revision string) (map[string]*ContributorData, error) { // as GetContributorStats is resource intensive we cache the result cacheKey := fmt.Sprintf(contributorStatsCacheKey, repo.FullName(), revision) - if !cache.IsExist(cacheKey) { - genReady := make(chan struct{}) - - // dont start multiple async generations - _, run := generateLock.Load(cacheKey) - if run { - return nil, ErrAwaitGeneration + if cache.IsExist(cacheKey) { + // TODO: renew timeout of cache cache.UpdateTimeout(cacheKey, contributorStatsCacheTimeout) + var res map[string]*ContributorData + if _, cacheErr := cache.GetJSON(cacheKey, &res); cacheErr != nil { + return nil, fmt.Errorf("cached error: %w", cacheErr.ToError()) } + return res, nil + } - generateLock.Store(cacheKey, struct{}{}) - // run generation async - go generateContributorStats(genReady, cache, cacheKey, repo, revision) + // dont start multiple async generations + releaser, err := globallock.Lock(ctx, cacheKey) + if err != nil { + return nil, err + } + defer releaser() - select { - case <-time.After(awaitGenerationTime): - return nil, ErrAwaitGeneration - case <-genReady: - // we got generation ready before timeout - break + // set a timeout for the generation + ctx, cancel := context.WithTimeout(ctx, awaitGenerationTime) + defer cancel() + + // run generation async + res, err := generateContributorStats(ctx, cache, cacheKey, repo, revision) + if err != nil { + switch { + case errors.Is(err, context.DeadlineExceeded): + _ = cache.PutJSON(cacheKey, fmt.Errorf("generateContributorStats: %w", ErrAwaitGeneration), contributorStatsCacheTimeout) + default: + _ = cache.PutJSON(cacheKey, fmt.Errorf("generateContributorStats: %w", err), contributorStatsCacheTimeout) } + return nil, err } - // TODO: renew timeout of cache cache.UpdateTimeout(cacheKey, contributorStatsCacheTimeout) - var res map[string]*ContributorData - if _, cacheErr := cache.GetJSON(cacheKey, &res); cacheErr != nil { - return nil, fmt.Errorf("cached error: %w", cacheErr.ToError()) - } + + _ = cache.PutJSON(cacheKey, res, contributorStatsCacheTimeout) return res, nil } // getExtendedCommitStats return the list of *ExtendedCommitStats for the given revision -func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int */) ([]*ExtendedCommitStats, error) { - baseCommit, err := repo.GetCommit(revision) - if err != nil { - return nil, err - } +func getExtendedCommitStats(ctx context.Context, repoPath string, baseCommit *git.Commit, revision string /*, limit int */) ([]*ExtendedCommitStats, error) { stdoutReader, stdoutWriter, err := os.Pipe() if err != nil { return nil, err @@ -131,8 +132,8 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int var extendedCommitStats []*ExtendedCommitStats stderr := new(strings.Builder) - err = gitCmd.Run(repo.Ctx, &git.RunOpts{ - Dir: repo.Path, + err = gitCmd.Run(ctx, &git.RunOpts{ + Dir: repoPath, Stdout: stdoutWriter, Stderr: stderr, PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error { @@ -140,6 +141,12 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int scanner := bufio.NewScanner(stdoutReader) for scanner.Scan() { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + line := strings.TrimSpace(scanner.Text()) if line != "---" { continue @@ -199,27 +206,26 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int return extendedCommitStats, nil } -func generateContributorStats(genDone chan struct{}, cache cache.StringCache, cacheKey string, repo *repo_model.Repository, revision string) { - ctx := graceful.GetManager().HammerContext() - +func generateContributorStats(ctx context.Context, cache cache.StringCache, cacheKey string, repo *repo_model.Repository, revision string) (map[string]*ContributorData, error) { gitRepo, closer, err := gitrepo.RepositoryFromContextOrOpen(ctx, repo) if err != nil { - _ = cache.PutJSON(cacheKey, fmt.Errorf("OpenRepository: %w", err), contributorStatsCacheTimeout) - return + return nil, err } defer closer.Close() if len(revision) == 0 { revision = repo.DefaultBranch } - extendedCommitStats, err := getExtendedCommitStats(gitRepo, revision) + baseCommit, err := gitRepo.GetCommit(revision) if err != nil { - _ = cache.PutJSON(cacheKey, fmt.Errorf("ExtendedCommitStats: %w", err), contributorStatsCacheTimeout) - return + return nil, err + } + extendedCommitStats, err := getExtendedCommitStats(ctx, repo.RepoPath(), baseCommit, revision) + if err != nil { + return nil, err } if len(extendedCommitStats) == 0 { - _ = cache.PutJSON(cacheKey, fmt.Errorf("no commit stats returned for revision '%s'", revision), contributorStatsCacheTimeout) - return + return nil, fmt.Errorf("no commit stats returned for revision '%s'", revision) } layout := time.DateOnly @@ -300,9 +306,5 @@ func generateContributorStats(genDone chan struct{}, cache cache.StringCache, ca total.TotalCommits++ } - _ = cache.PutJSON(cacheKey, contributorsCommitStats, contributorStatsCacheTimeout) - generateLock.Delete(cacheKey) - if genDone != nil { - genDone <- struct{}{} - } + return contributorsCommitStats, nil } From 262b562391a5f5441cb7a2da2d328d7288a332ac Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 25 Mar 2025 12:05:56 -0700 Subject: [PATCH 2/5] some improvements --- routers/web/repo/code_frequency.go | 7 +------ routers/web/repo/contributors.go | 6 ------ services/repository/contributors_graph.go | 22 +++++++++------------- 3 files changed, 10 insertions(+), 25 deletions(-) diff --git a/routers/web/repo/code_frequency.go b/routers/web/repo/code_frequency.go index 2b2dd5744a..9e5c1dc1e3 100644 --- a/routers/web/repo/code_frequency.go +++ b/routers/web/repo/code_frequency.go @@ -4,7 +4,6 @@ package repo import ( - "errors" "net/http" "code.gitea.io/gitea/modules/templates" @@ -30,11 +29,7 @@ func CodeFrequency(ctx *context.Context) { // CodeFrequencyData returns JSON of code frequency data func CodeFrequencyData(ctx *context.Context) { if contributorStats, err := contributors_service.GetContributorStats(ctx, ctx.Cache, ctx.Repo.Repository, ctx.Repo.Repository.DefaultBranch); err != nil { - if errors.Is(err, contributors_service.ErrAwaitGeneration) { - ctx.Status(http.StatusAccepted) - return - } - ctx.ServerError("GetContributorStats", err) + ctx.ServerError("GetCodeFrequencyData", err) } else { ctx.JSON(http.StatusOK, contributorStats["total"].Weeks) } diff --git a/routers/web/repo/contributors.go b/routers/web/repo/contributors.go index 26ab0a806c..c7785ce5a6 100644 --- a/routers/web/repo/contributors.go +++ b/routers/web/repo/contributors.go @@ -4,8 +4,6 @@ package repo import ( - std_ctx "context" - "errors" "net/http" "code.gitea.io/gitea/modules/templates" @@ -28,10 +26,6 @@ func Contributors(ctx *context.Context) { // ContributorsData renders JSON of contributors along with their weekly commit statistics func ContributorsData(ctx *context.Context) { if contributorStats, err := contributors_service.GetContributorStats(ctx, ctx.Cache, ctx.Repo.Repository, ctx.Repo.Repository.DefaultBranch); err != nil { - if errors.Is(err, contributors_service.ErrAwaitGeneration) || errors.Is(err, std_ctx.DeadlineExceeded) { - ctx.Status(http.StatusAccepted) - return - } ctx.ServerError("GetContributorStats", err) } else { ctx.JSON(http.StatusOK, contributorStats) diff --git a/services/repository/contributors_graph.go b/services/repository/contributors_graph.go index f91d18fc92..b9abc7612b 100644 --- a/services/repository/contributors_graph.go +++ b/services/repository/contributors_graph.go @@ -29,10 +29,7 @@ const ( contributorStatsCacheTimeout int64 = 60 * 10 ) -var ( - ErrAwaitGeneration = errors.New("generation took longer than ") - awaitGenerationTime = time.Second * 60 -) +var ErrAwaitGeneration = errors.New("generation took longer than ") type WeekData struct { Week int64 `json:"week"` // Starting day of the week as Unix timestamp @@ -95,19 +92,18 @@ func GetContributorStats(ctx context.Context, cache cache.StringCache, repo *rep } defer releaser() - // set a timeout for the generation - ctx, cancel := context.WithTimeout(ctx, awaitGenerationTime) - defer cancel() + // check if generation is already completed by other request + if cache.IsExist(cacheKey) { + var res map[string]*ContributorData + if _, cacheErr := cache.GetJSON(cacheKey, &res); cacheErr != nil { + return nil, fmt.Errorf("cached error: %w", cacheErr.ToError()) + } + return res, nil + } // run generation async res, err := generateContributorStats(ctx, cache, cacheKey, repo, revision) if err != nil { - switch { - case errors.Is(err, context.DeadlineExceeded): - _ = cache.PutJSON(cacheKey, fmt.Errorf("generateContributorStats: %w", ErrAwaitGeneration), contributorStatsCacheTimeout) - default: - _ = cache.PutJSON(cacheKey, fmt.Errorf("generateContributorStats: %w", err), contributorStatsCacheTimeout) - } return nil, err } From 450a27bc29b361bd0db53c36b9589d6156eb4e7c Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 25 Mar 2025 12:32:29 -0700 Subject: [PATCH 3/5] Fix test --- services/repository/contributors_graph.go | 8 ++++---- .../repository/contributors_graph_test.go | 20 +++++++------------ 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/services/repository/contributors_graph.go b/services/repository/contributors_graph.go index b9abc7612b..6488f10470 100644 --- a/services/repository/contributors_graph.go +++ b/services/repository/contributors_graph.go @@ -102,7 +102,7 @@ func GetContributorStats(ctx context.Context, cache cache.StringCache, repo *rep } // run generation async - res, err := generateContributorStats(ctx, cache, cacheKey, repo, revision) + res, err := generateContributorStats(ctx, repo, revision) if err != nil { return nil, err } @@ -112,7 +112,7 @@ func GetContributorStats(ctx context.Context, cache cache.StringCache, repo *rep } // getExtendedCommitStats return the list of *ExtendedCommitStats for the given revision -func getExtendedCommitStats(ctx context.Context, repoPath string, baseCommit *git.Commit, revision string /*, limit int */) ([]*ExtendedCommitStats, error) { +func getExtendedCommitStats(ctx context.Context, repoPath string, baseCommit *git.Commit) ([]*ExtendedCommitStats, error) { stdoutReader, stdoutWriter, err := os.Pipe() if err != nil { return nil, err @@ -202,7 +202,7 @@ func getExtendedCommitStats(ctx context.Context, repoPath string, baseCommit *gi return extendedCommitStats, nil } -func generateContributorStats(ctx context.Context, cache cache.StringCache, cacheKey string, repo *repo_model.Repository, revision string) (map[string]*ContributorData, error) { +func generateContributorStats(ctx context.Context, repo *repo_model.Repository, revision string) (map[string]*ContributorData, error) { gitRepo, closer, err := gitrepo.RepositoryFromContextOrOpen(ctx, repo) if err != nil { return nil, err @@ -216,7 +216,7 @@ func generateContributorStats(ctx context.Context, cache cache.StringCache, cach if err != nil { return nil, err } - extendedCommitStats, err := getExtendedCommitStats(ctx, repo.RepoPath(), baseCommit, revision) + extendedCommitStats, err := getExtendedCommitStats(ctx, repo.RepoPath(), baseCommit) if err != nil { return nil, err } diff --git a/services/repository/contributors_graph_test.go b/services/repository/contributors_graph_test.go index 7d32b1c931..92fbb87f58 100644 --- a/services/repository/contributors_graph_test.go +++ b/services/repository/contributors_graph_test.go @@ -10,8 +10,6 @@ import ( "code.gitea.io/gitea/models/db" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/models/unittest" - "code.gitea.io/gitea/modules/cache" - "code.gitea.io/gitea/modules/setting" "github.com/stretchr/testify/assert" ) @@ -21,18 +19,14 @@ func TestRepository_ContributorsGraph(t *testing.T) { repo := unittest.AssertExistsAndLoadBean(t, &repo_model.Repository{ID: 2}) assert.NoError(t, repo.LoadOwner(db.DefaultContext)) - mockCache, err := cache.NewStringCache(setting.Cache{}) + + data, err := generateContributorStats(t.Context(), repo, "404ref") + assert.ErrorContains(t, err, "object does not exist") + assert.Nil(t, data) + + data, err = generateContributorStats(t.Context(), repo, "master") assert.NoError(t, err) - - generateContributorStats(nil, mockCache, "key", repo, "404ref") - var data map[string]*ContributorData - _, getErr := mockCache.GetJSON("key", &data) - assert.NotNil(t, getErr) - assert.ErrorContains(t, getErr.ToError(), "object does not exist") - - generateContributorStats(nil, mockCache, "key2", repo, "master") - exist, _ := mockCache.GetJSON("key2", &data) - assert.True(t, exist) + assert.NotNil(t, data) var keys []string for k := range data { keys = append(keys, k) From 9d28a281a29e1151548f911c0f0bc3291de133a9 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 25 Mar 2025 12:37:43 -0700 Subject: [PATCH 4/5] Some comments --- services/repository/contributors_graph.go | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/services/repository/contributors_graph.go b/services/repository/contributors_graph.go index 6488f10470..9112b592a8 100644 --- a/services/repository/contributors_graph.go +++ b/services/repository/contributors_graph.go @@ -6,7 +6,6 @@ package repository import ( "bufio" "context" - "errors" "fmt" "os" "strconv" @@ -29,8 +28,6 @@ const ( contributorStatsCacheTimeout int64 = 60 * 10 ) -var ErrAwaitGeneration = errors.New("generation took longer than ") - type WeekData struct { Week int64 `json:"week"` // Starting day of the week as Unix timestamp Additions int `json:"additions"` // Number of additions in that week @@ -85,14 +82,14 @@ func GetContributorStats(ctx context.Context, cache cache.StringCache, repo *rep return res, nil } - // dont start multiple async generations + // dont start multiple generations for the same repository and same revision releaser, err := globallock.Lock(ctx, cacheKey) if err != nil { return nil, err } defer releaser() - // check if generation is already completed by other request + // check if generation is already completed by other request when we were waiting for lock if cache.IsExist(cacheKey) { var res map[string]*ContributorData if _, cacheErr := cache.GetJSON(cacheKey, &res); cacheErr != nil { @@ -101,7 +98,6 @@ func GetContributorStats(ctx context.Context, cache cache.StringCache, repo *rep return res, nil } - // run generation async res, err := generateContributorStats(ctx, repo, revision) if err != nil { return nil, err @@ -196,7 +192,7 @@ func getExtendedCommitStats(ctx context.Context, repoPath string, baseCommit *gi }, }) if err != nil { - return nil, fmt.Errorf("Failed to get ContributorsCommitStats for repository.\nError: %w\nStderr: %s", err, stderr) + return nil, fmt.Errorf("failed to get ContributorsCommitStats for repository.\nError: %w\nStderr: %s", err, stderr) } return extendedCommitStats, nil From 1dbcf898880f1f43c8c8efff5ddca7305bb891a5 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Wed, 2 Apr 2025 10:10:17 -0700 Subject: [PATCH 5/5] fix --- services/repository/contributors_graph.go | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/services/repository/contributors_graph.go b/services/repository/contributors_graph.go index 9112b592a8..f8fbc3d6f1 100644 --- a/services/repository/contributors_graph.go +++ b/services/repository/contributors_graph.go @@ -20,6 +20,7 @@ import ( "code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/modules/globallock" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" api "code.gitea.io/gitea/modules/structs" ) @@ -98,10 +99,14 @@ func GetContributorStats(ctx context.Context, cache cache.StringCache, repo *rep return res, nil } + ctx, cancel := context.WithTimeout(ctx, time.Duration(setting.Git.Timeout.Default)*time.Second) + defer cancel() + res, err := generateContributorStats(ctx, repo, revision) if err != nil { return nil, err } + cancel() _ = cache.PutJSON(cacheKey, res, contributorStatsCacheTimeout) return res, nil @@ -230,12 +235,17 @@ func generateContributorStats(ctx context.Context, repo *repo_model.Repository, } total := contributorsCommitStats["total"] + emailUserCache := make(map[string]*user_model.User) for _, v := range extendedCommitStats { userEmail := v.Author.Email if len(userEmail) == 0 { continue } - u, _ := user_model.GetUserByEmail(ctx, userEmail) + u, ok := emailUserCache[userEmail] + if !ok { + u, _ = user_model.GetUserByEmail(ctx, userEmail) + emailUserCache[userEmail] = u + } if u != nil { // update userEmail with user's primary email address so // that different mail addresses will linked to same account