2020-12-17 09:00:47 -05:00
|
|
|
// Copyright 2015 The Gogs Authors. All rights reserved.
|
|
|
|
// Copyright 2017 The Gitea Authors. All rights reserved.
|
2022-11-27 13:20:29 -05:00
|
|
|
// SPDX-License-Identifier: MIT
|
2020-12-17 09:00:47 -05:00
|
|
|
|
2021-08-24 12:47:09 -04:00
|
|
|
//go:build !gogit
|
2020-12-17 09:00:47 -05:00
|
|
|
|
|
|
|
package git
|
|
|
|
|
|
|
|
import (
|
2021-05-09 21:27:03 -04:00
|
|
|
"bufio"
|
|
|
|
"context"
|
2020-12-17 09:00:47 -05:00
|
|
|
"errors"
|
|
|
|
"path/filepath"
|
2021-06-25 12:54:08 -04:00
|
|
|
|
|
|
|
"code.gitea.io/gitea/modules/log"
|
2020-12-17 09:00:47 -05:00
|
|
|
)
|
|
|
|
|
|
|
|
// Repository represents a Git repository.
|
|
|
|
type Repository struct {
|
|
|
|
Path string
|
|
|
|
|
|
|
|
tagCache *ObjectCache
|
|
|
|
|
|
|
|
gpgSettings *GPGSettings
|
2021-05-09 21:27:03 -04:00
|
|
|
|
Prevent double use of `git cat-file` session. (#29298) (#29301)
Backport #29298
Fixes the reason why #29101 is hard to replicate.
Related #29297
Create a repo with a file with minimum size 4097 bytes (I use 10000) and
execute the following code:
```go
gitRepo, err := gitrepo.OpenRepository(db.DefaultContext, <repo>)
assert.NoError(t, err)
commit, err := gitRepo.GetCommit(<sha>)
assert.NoError(t, err)
entry, err := commit.GetTreeEntryByPath(<file>)
assert.NoError(t, err)
b := entry.Blob()
// Create a reader
r, err := b.DataAsync()
assert.NoError(t, err)
defer r.Close()
// Create a second reader
r2, err := b.DataAsync()
assert.NoError(t, err) // Should be no error but is ErrNotExist
defer r2.Close()
```
The problem is the check in `CatFileBatch`:
https://github.com/go-gitea/gitea/blob/79217ea63c1f77de7ca79813ae45950724e63d02/modules/git/repo_base_nogogit.go#L81-L87
`Buffered() > 0` is used to check if there is a "operation" in progress
at the moment. This is a problem because we can't control the internal
buffer in the `bufio.Reader`. The code above demonstrates a sequence
which initiates an operation for which the code thinks there is no
active processing. The second call to `DataAsync()` therefore reuses the
existing instances instead of creating a new batch reader.
2024-02-21 22:20:20 -05:00
|
|
|
batchInUse bool
|
2021-05-09 21:27:03 -04:00
|
|
|
batchCancel context.CancelFunc
|
|
|
|
batchReader *bufio.Reader
|
|
|
|
batchWriter WriteCloserError
|
|
|
|
|
Prevent double use of `git cat-file` session. (#29298) (#29301)
Backport #29298
Fixes the reason why #29101 is hard to replicate.
Related #29297
Create a repo with a file with minimum size 4097 bytes (I use 10000) and
execute the following code:
```go
gitRepo, err := gitrepo.OpenRepository(db.DefaultContext, <repo>)
assert.NoError(t, err)
commit, err := gitRepo.GetCommit(<sha>)
assert.NoError(t, err)
entry, err := commit.GetTreeEntryByPath(<file>)
assert.NoError(t, err)
b := entry.Blob()
// Create a reader
r, err := b.DataAsync()
assert.NoError(t, err)
defer r.Close()
// Create a second reader
r2, err := b.DataAsync()
assert.NoError(t, err) // Should be no error but is ErrNotExist
defer r2.Close()
```
The problem is the check in `CatFileBatch`:
https://github.com/go-gitea/gitea/blob/79217ea63c1f77de7ca79813ae45950724e63d02/modules/git/repo_base_nogogit.go#L81-L87
`Buffered() > 0` is used to check if there is a "operation" in progress
at the moment. This is a problem because we can't control the internal
buffer in the `bufio.Reader`. The code above demonstrates a sequence
which initiates an operation for which the code thinks there is no
active processing. The second call to `DataAsync()` therefore reuses the
existing instances instead of creating a new batch reader.
2024-02-21 22:20:20 -05:00
|
|
|
checkInUse bool
|
2021-05-09 21:27:03 -04:00
|
|
|
checkCancel context.CancelFunc
|
|
|
|
checkReader *bufio.Reader
|
|
|
|
checkWriter WriteCloserError
|
2021-11-30 15:06:32 -05:00
|
|
|
|
2022-07-25 11:39:42 -04:00
|
|
|
Ctx context.Context
|
|
|
|
LastCommitCache *LastCommitCache
|
2020-12-17 09:00:47 -05:00
|
|
|
}
|
|
|
|
|
2022-03-29 15:13:41 -04:00
|
|
|
// openRepositoryWithDefaultContext opens the repository at the given path with DefaultContext.
|
|
|
|
func openRepositoryWithDefaultContext(repoPath string) (*Repository, error) {
|
|
|
|
return OpenRepository(DefaultContext, repoPath)
|
2021-11-30 15:06:32 -05:00
|
|
|
}
|
|
|
|
|
2022-03-29 15:13:41 -04:00
|
|
|
// OpenRepository opens the repository at the given path with the provided context.
|
|
|
|
func OpenRepository(ctx context.Context, repoPath string) (*Repository, error) {
|
2020-12-17 09:00:47 -05:00
|
|
|
repoPath, err := filepath.Abs(repoPath)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
} else if !isDir(repoPath) {
|
|
|
|
return nil, errors.New("no such file or directory")
|
|
|
|
}
|
2021-05-09 21:27:03 -04:00
|
|
|
|
2021-12-16 14:01:14 -05:00
|
|
|
// Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first!
|
|
|
|
if err := EnsureValidGitRepository(ctx, repoPath); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2021-05-09 21:27:03 -04:00
|
|
|
repo := &Repository{
|
2020-12-17 09:00:47 -05:00
|
|
|
Path: repoPath,
|
|
|
|
tagCache: newObjectCache(),
|
2021-11-30 15:06:32 -05:00
|
|
|
Ctx: ctx,
|
2021-05-09 21:27:03 -04:00
|
|
|
}
|
|
|
|
|
2021-11-30 15:06:32 -05:00
|
|
|
repo.batchWriter, repo.batchReader, repo.batchCancel = CatFileBatch(ctx, repoPath)
|
2023-04-19 09:40:42 -04:00
|
|
|
repo.checkWriter, repo.checkReader, repo.checkCancel = CatFileBatchCheck(ctx, repoPath)
|
2021-05-09 21:27:03 -04:00
|
|
|
|
|
|
|
return repo, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// CatFileBatch obtains a CatFileBatch for this repository
|
2021-11-30 15:06:32 -05:00
|
|
|
func (repo *Repository) CatFileBatch(ctx context.Context) (WriteCloserError, *bufio.Reader, func()) {
|
Prevent double use of `git cat-file` session. (#29298) (#29301)
Backport #29298
Fixes the reason why #29101 is hard to replicate.
Related #29297
Create a repo with a file with minimum size 4097 bytes (I use 10000) and
execute the following code:
```go
gitRepo, err := gitrepo.OpenRepository(db.DefaultContext, <repo>)
assert.NoError(t, err)
commit, err := gitRepo.GetCommit(<sha>)
assert.NoError(t, err)
entry, err := commit.GetTreeEntryByPath(<file>)
assert.NoError(t, err)
b := entry.Blob()
// Create a reader
r, err := b.DataAsync()
assert.NoError(t, err)
defer r.Close()
// Create a second reader
r2, err := b.DataAsync()
assert.NoError(t, err) // Should be no error but is ErrNotExist
defer r2.Close()
```
The problem is the check in `CatFileBatch`:
https://github.com/go-gitea/gitea/blob/79217ea63c1f77de7ca79813ae45950724e63d02/modules/git/repo_base_nogogit.go#L81-L87
`Buffered() > 0` is used to check if there is a "operation" in progress
at the moment. This is a problem because we can't control the internal
buffer in the `bufio.Reader`. The code above demonstrates a sequence
which initiates an operation for which the code thinks there is no
active processing. The second call to `DataAsync()` therefore reuses the
existing instances instead of creating a new batch reader.
2024-02-21 22:20:20 -05:00
|
|
|
if repo.batchCancel == nil || repo.batchInUse {
|
2021-06-25 12:54:08 -04:00
|
|
|
log.Debug("Opening temporary cat file batch for: %s", repo.Path)
|
2021-11-30 15:06:32 -05:00
|
|
|
return CatFileBatch(ctx, repo.Path)
|
2021-05-09 21:27:03 -04:00
|
|
|
}
|
Prevent double use of `git cat-file` session. (#29298) (#29301)
Backport #29298
Fixes the reason why #29101 is hard to replicate.
Related #29297
Create a repo with a file with minimum size 4097 bytes (I use 10000) and
execute the following code:
```go
gitRepo, err := gitrepo.OpenRepository(db.DefaultContext, <repo>)
assert.NoError(t, err)
commit, err := gitRepo.GetCommit(<sha>)
assert.NoError(t, err)
entry, err := commit.GetTreeEntryByPath(<file>)
assert.NoError(t, err)
b := entry.Blob()
// Create a reader
r, err := b.DataAsync()
assert.NoError(t, err)
defer r.Close()
// Create a second reader
r2, err := b.DataAsync()
assert.NoError(t, err) // Should be no error but is ErrNotExist
defer r2.Close()
```
The problem is the check in `CatFileBatch`:
https://github.com/go-gitea/gitea/blob/79217ea63c1f77de7ca79813ae45950724e63d02/modules/git/repo_base_nogogit.go#L81-L87
`Buffered() > 0` is used to check if there is a "operation" in progress
at the moment. This is a problem because we can't control the internal
buffer in the `bufio.Reader`. The code above demonstrates a sequence
which initiates an operation for which the code thinks there is no
active processing. The second call to `DataAsync()` therefore reuses the
existing instances instead of creating a new batch reader.
2024-02-21 22:20:20 -05:00
|
|
|
repo.batchInUse = true
|
|
|
|
return repo.batchWriter, repo.batchReader, func() {
|
|
|
|
repo.batchInUse = false
|
|
|
|
}
|
2021-05-09 21:27:03 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// CatFileBatchCheck obtains a CatFileBatchCheck for this repository
|
2021-11-30 15:06:32 -05:00
|
|
|
func (repo *Repository) CatFileBatchCheck(ctx context.Context) (WriteCloserError, *bufio.Reader, func()) {
|
Prevent double use of `git cat-file` session. (#29298) (#29301)
Backport #29298
Fixes the reason why #29101 is hard to replicate.
Related #29297
Create a repo with a file with minimum size 4097 bytes (I use 10000) and
execute the following code:
```go
gitRepo, err := gitrepo.OpenRepository(db.DefaultContext, <repo>)
assert.NoError(t, err)
commit, err := gitRepo.GetCommit(<sha>)
assert.NoError(t, err)
entry, err := commit.GetTreeEntryByPath(<file>)
assert.NoError(t, err)
b := entry.Blob()
// Create a reader
r, err := b.DataAsync()
assert.NoError(t, err)
defer r.Close()
// Create a second reader
r2, err := b.DataAsync()
assert.NoError(t, err) // Should be no error but is ErrNotExist
defer r2.Close()
```
The problem is the check in `CatFileBatch`:
https://github.com/go-gitea/gitea/blob/79217ea63c1f77de7ca79813ae45950724e63d02/modules/git/repo_base_nogogit.go#L81-L87
`Buffered() > 0` is used to check if there is a "operation" in progress
at the moment. This is a problem because we can't control the internal
buffer in the `bufio.Reader`. The code above demonstrates a sequence
which initiates an operation for which the code thinks there is no
active processing. The second call to `DataAsync()` therefore reuses the
existing instances instead of creating a new batch reader.
2024-02-21 22:20:20 -05:00
|
|
|
if repo.checkCancel == nil || repo.checkInUse {
|
|
|
|
log.Debug("Opening temporary cat file batch-check for: %s", repo.Path)
|
2021-11-30 15:06:32 -05:00
|
|
|
return CatFileBatchCheck(ctx, repo.Path)
|
2021-05-09 21:27:03 -04:00
|
|
|
}
|
Prevent double use of `git cat-file` session. (#29298) (#29301)
Backport #29298
Fixes the reason why #29101 is hard to replicate.
Related #29297
Create a repo with a file with minimum size 4097 bytes (I use 10000) and
execute the following code:
```go
gitRepo, err := gitrepo.OpenRepository(db.DefaultContext, <repo>)
assert.NoError(t, err)
commit, err := gitRepo.GetCommit(<sha>)
assert.NoError(t, err)
entry, err := commit.GetTreeEntryByPath(<file>)
assert.NoError(t, err)
b := entry.Blob()
// Create a reader
r, err := b.DataAsync()
assert.NoError(t, err)
defer r.Close()
// Create a second reader
r2, err := b.DataAsync()
assert.NoError(t, err) // Should be no error but is ErrNotExist
defer r2.Close()
```
The problem is the check in `CatFileBatch`:
https://github.com/go-gitea/gitea/blob/79217ea63c1f77de7ca79813ae45950724e63d02/modules/git/repo_base_nogogit.go#L81-L87
`Buffered() > 0` is used to check if there is a "operation" in progress
at the moment. This is a problem because we can't control the internal
buffer in the `bufio.Reader`. The code above demonstrates a sequence
which initiates an operation for which the code thinks there is no
active processing. The second call to `DataAsync()` therefore reuses the
existing instances instead of creating a new batch reader.
2024-02-21 22:20:20 -05:00
|
|
|
repo.checkInUse = true
|
|
|
|
return repo.checkWriter, repo.checkReader, func() {
|
|
|
|
repo.checkInUse = false
|
|
|
|
}
|
2020-12-17 09:00:47 -05:00
|
|
|
}
|
|
|
|
|
2022-01-19 18:26:57 -05:00
|
|
|
func (repo *Repository) Close() (err error) {
|
2021-05-09 21:27:03 -04:00
|
|
|
if repo == nil {
|
2023-07-09 07:58:06 -04:00
|
|
|
return nil
|
2021-05-09 21:27:03 -04:00
|
|
|
}
|
|
|
|
if repo.batchCancel != nil {
|
|
|
|
repo.batchCancel()
|
|
|
|
repo.batchReader = nil
|
|
|
|
repo.batchWriter = nil
|
|
|
|
repo.batchCancel = nil
|
Prevent double use of `git cat-file` session. (#29298) (#29301)
Backport #29298
Fixes the reason why #29101 is hard to replicate.
Related #29297
Create a repo with a file with minimum size 4097 bytes (I use 10000) and
execute the following code:
```go
gitRepo, err := gitrepo.OpenRepository(db.DefaultContext, <repo>)
assert.NoError(t, err)
commit, err := gitRepo.GetCommit(<sha>)
assert.NoError(t, err)
entry, err := commit.GetTreeEntryByPath(<file>)
assert.NoError(t, err)
b := entry.Blob()
// Create a reader
r, err := b.DataAsync()
assert.NoError(t, err)
defer r.Close()
// Create a second reader
r2, err := b.DataAsync()
assert.NoError(t, err) // Should be no error but is ErrNotExist
defer r2.Close()
```
The problem is the check in `CatFileBatch`:
https://github.com/go-gitea/gitea/blob/79217ea63c1f77de7ca79813ae45950724e63d02/modules/git/repo_base_nogogit.go#L81-L87
`Buffered() > 0` is used to check if there is a "operation" in progress
at the moment. This is a problem because we can't control the internal
buffer in the `bufio.Reader`. The code above demonstrates a sequence
which initiates an operation for which the code thinks there is no
active processing. The second call to `DataAsync()` therefore reuses the
existing instances instead of creating a new batch reader.
2024-02-21 22:20:20 -05:00
|
|
|
repo.batchInUse = false
|
2021-05-09 21:27:03 -04:00
|
|
|
}
|
|
|
|
if repo.checkCancel != nil {
|
|
|
|
repo.checkCancel()
|
|
|
|
repo.checkCancel = nil
|
|
|
|
repo.checkReader = nil
|
|
|
|
repo.checkWriter = nil
|
Prevent double use of `git cat-file` session. (#29298) (#29301)
Backport #29298
Fixes the reason why #29101 is hard to replicate.
Related #29297
Create a repo with a file with minimum size 4097 bytes (I use 10000) and
execute the following code:
```go
gitRepo, err := gitrepo.OpenRepository(db.DefaultContext, <repo>)
assert.NoError(t, err)
commit, err := gitRepo.GetCommit(<sha>)
assert.NoError(t, err)
entry, err := commit.GetTreeEntryByPath(<file>)
assert.NoError(t, err)
b := entry.Blob()
// Create a reader
r, err := b.DataAsync()
assert.NoError(t, err)
defer r.Close()
// Create a second reader
r2, err := b.DataAsync()
assert.NoError(t, err) // Should be no error but is ErrNotExist
defer r2.Close()
```
The problem is the check in `CatFileBatch`:
https://github.com/go-gitea/gitea/blob/79217ea63c1f77de7ca79813ae45950724e63d02/modules/git/repo_base_nogogit.go#L81-L87
`Buffered() > 0` is used to check if there is a "operation" in progress
at the moment. This is a problem because we can't control the internal
buffer in the `bufio.Reader`. The code above demonstrates a sequence
which initiates an operation for which the code thinks there is no
active processing. The second call to `DataAsync()` therefore reuses the
existing instances instead of creating a new batch reader.
2024-02-21 22:20:20 -05:00
|
|
|
repo.checkInUse = false
|
2021-05-09 21:27:03 -04:00
|
|
|
}
|
2022-07-25 11:39:42 -04:00
|
|
|
repo.LastCommitCache = nil
|
|
|
|
repo.tagCache = nil
|
2022-06-20 06:02:49 -04:00
|
|
|
return err
|
2020-12-17 09:00:47 -05:00
|
|
|
}
|