2016-11-03 18:16:01 -04:00
|
|
|
// Copyright 2015 The Gogs Authors. All rights reserved.
|
2019-06-07 16:29:29 -04:00
|
|
|
// Copyright 2019 The Gitea Authors. All rights reserved.
|
2022-11-27 13:20:29 -05:00
|
|
|
// SPDX-License-Identifier: MIT
|
2016-11-03 18:16:01 -04:00
|
|
|
|
|
|
|
package git
|
|
|
|
|
|
|
|
import (
|
2021-12-23 03:32:29 -05:00
|
|
|
"bufio"
|
2020-05-26 01:58:07 -04:00
|
|
|
"bytes"
|
2022-01-19 18:26:57 -05:00
|
|
|
"context"
|
2021-12-23 03:32:29 -05:00
|
|
|
"errors"
|
2016-11-03 18:16:01 -04:00
|
|
|
"fmt"
|
2018-01-07 08:10:20 -05:00
|
|
|
"io"
|
2021-12-23 03:32:29 -05:00
|
|
|
"os"
|
|
|
|
"path/filepath"
|
2020-05-26 01:58:07 -04:00
|
|
|
"regexp"
|
2016-11-03 18:16:01 -04:00
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
"time"
|
2019-06-12 15:41:28 -04:00
|
|
|
|
|
|
|
logger "code.gitea.io/gitea/modules/log"
|
2016-11-03 18:16:01 -04:00
|
|
|
)
|
|
|
|
|
2019-06-07 16:29:29 -04:00
|
|
|
// CompareInfo represents needed information for comparing references.
|
|
|
|
type CompareInfo struct {
|
2021-02-16 10:39:45 -05:00
|
|
|
MergeBase string
|
|
|
|
BaseCommitID string
|
|
|
|
HeadCommitID string
|
2021-08-09 14:08:51 -04:00
|
|
|
Commits []*Commit
|
2021-02-16 10:39:45 -05:00
|
|
|
NumFiles int
|
2016-11-03 18:16:01 -04:00
|
|
|
}
|
|
|
|
|
2019-06-11 19:32:08 -04:00
|
|
|
// GetMergeBase checks and returns merge base of two branches and the reference used as base.
|
2021-12-19 23:41:31 -05:00
|
|
|
func (repo *Repository) GetMergeBase(tmpRemote, base, head string) (string, string, error) {
|
2019-06-07 16:29:29 -04:00
|
|
|
if tmpRemote == "" {
|
|
|
|
tmpRemote = "origin"
|
|
|
|
}
|
|
|
|
|
|
|
|
if tmpRemote != "origin" {
|
2021-12-02 02:28:08 -05:00
|
|
|
tmpBaseName := RemotePrefix + tmpRemote + "/tmp_" + base
|
2019-06-07 16:29:29 -04:00
|
|
|
// Fetch commit into a temporary branch in order to be able to handle commits and tags
|
2022-10-23 10:44:45 -04:00
|
|
|
_, _, err := NewCommand(repo.Ctx, "fetch", "--no-tags").AddDynamicArguments(tmpRemote).AddDashesAndList(base + ":" + tmpBaseName).RunStdString(&RunOpts{Dir: repo.Path})
|
2019-06-07 16:29:29 -04:00
|
|
|
if err == nil {
|
|
|
|
base = tmpBaseName
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-23 10:44:45 -04:00
|
|
|
stdout, _, err := NewCommand(repo.Ctx, "merge-base").AddDashesAndList(base, head).RunStdString(&RunOpts{Dir: repo.Path})
|
2019-06-11 19:32:08 -04:00
|
|
|
return strings.TrimSpace(stdout), base, err
|
2016-11-03 18:16:01 -04:00
|
|
|
}
|
|
|
|
|
2019-06-07 16:29:29 -04:00
|
|
|
// GetCompareInfo generates and returns compare information between base and head branches of repositories.
|
2021-10-15 12:05:33 -04:00
|
|
|
func (repo *Repository) GetCompareInfo(basePath, baseBranch, headBranch string, directComparison, fileOnly bool) (_ *CompareInfo, err error) {
|
2019-06-07 16:29:29 -04:00
|
|
|
var (
|
|
|
|
remoteBranch string
|
|
|
|
tmpRemote string
|
|
|
|
)
|
2016-11-03 18:16:01 -04:00
|
|
|
|
|
|
|
// We don't need a temporary remote for same repository.
|
|
|
|
if repo.Path != basePath {
|
|
|
|
// Add a temporary remote
|
2019-06-07 16:29:29 -04:00
|
|
|
tmpRemote = strconv.FormatInt(time.Now().UnixNano(), 10)
|
2019-08-13 04:30:44 -04:00
|
|
|
if err = repo.AddRemote(tmpRemote, basePath, false); err != nil {
|
2022-10-24 15:29:17 -04:00
|
|
|
return nil, fmt.Errorf("AddRemote: %w", err)
|
2016-11-03 18:16:01 -04:00
|
|
|
}
|
2019-06-12 15:41:28 -04:00
|
|
|
defer func() {
|
|
|
|
if err := repo.RemoveRemote(tmpRemote); err != nil {
|
|
|
|
logger.Error("GetPullRequestInfo: RemoveRemote: %v", err)
|
|
|
|
}
|
|
|
|
}()
|
2016-11-03 18:16:01 -04:00
|
|
|
}
|
|
|
|
|
2019-06-07 16:29:29 -04:00
|
|
|
compareInfo := new(CompareInfo)
|
2021-02-16 10:39:45 -05:00
|
|
|
|
2022-01-19 18:26:57 -05:00
|
|
|
compareInfo.HeadCommitID, err = GetFullCommitID(repo.Ctx, repo.Path, headBranch)
|
2021-02-16 10:39:45 -05:00
|
|
|
if err != nil {
|
|
|
|
compareInfo.HeadCommitID = headBranch
|
|
|
|
}
|
|
|
|
|
2019-06-11 19:32:08 -04:00
|
|
|
compareInfo.MergeBase, remoteBranch, err = repo.GetMergeBase(tmpRemote, baseBranch, headBranch)
|
2019-04-09 16:45:58 -04:00
|
|
|
if err == nil {
|
2022-01-19 18:26:57 -05:00
|
|
|
compareInfo.BaseCommitID, err = GetFullCommitID(repo.Ctx, repo.Path, remoteBranch)
|
2021-02-16 10:39:45 -05:00
|
|
|
if err != nil {
|
|
|
|
compareInfo.BaseCommitID = remoteBranch
|
|
|
|
}
|
2021-09-27 08:19:34 -04:00
|
|
|
separator := "..."
|
|
|
|
baseCommitID := compareInfo.MergeBase
|
|
|
|
if directComparison {
|
|
|
|
separator = ".."
|
|
|
|
baseCommitID = compareInfo.BaseCommitID
|
|
|
|
}
|
|
|
|
|
2020-07-29 13:53:04 -04:00
|
|
|
// We have a common base - therefore we know that ... should work
|
2021-10-15 12:05:33 -04:00
|
|
|
if !fileOnly {
|
2022-03-31 22:55:30 -04:00
|
|
|
var logs []byte
|
2022-10-23 10:44:45 -04:00
|
|
|
logs, _, err = NewCommand(repo.Ctx, "log").AddDynamicArguments(baseCommitID + separator + headBranch).AddArguments(prettyLogFormat).RunStdBytes(&RunOpts{Dir: repo.Path})
|
2021-10-15 12:05:33 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
compareInfo.Commits, err = repo.parsePrettyFormatLogToList(logs)
|
|
|
|
if err != nil {
|
2022-10-24 15:29:17 -04:00
|
|
|
return nil, fmt.Errorf("parsePrettyFormatLogToList: %w", err)
|
2021-10-15 12:05:33 -04:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
compareInfo.Commits = []*Commit{}
|
2019-04-09 16:45:58 -04:00
|
|
|
}
|
|
|
|
} else {
|
2021-08-09 14:08:51 -04:00
|
|
|
compareInfo.Commits = []*Commit{}
|
2022-01-19 18:26:57 -05:00
|
|
|
compareInfo.MergeBase, err = GetFullCommitID(repo.Ctx, repo.Path, remoteBranch)
|
2019-04-09 16:45:58 -04:00
|
|
|
if err != nil {
|
2019-06-07 16:29:29 -04:00
|
|
|
compareInfo.MergeBase = remoteBranch
|
2019-04-09 16:45:58 -04:00
|
|
|
}
|
2021-02-16 10:39:45 -05:00
|
|
|
compareInfo.BaseCommitID = compareInfo.MergeBase
|
2016-11-03 18:16:01 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// Count number of changed files.
|
2020-05-26 01:58:07 -04:00
|
|
|
// This probably should be removed as we need to use shortstat elsewhere
|
|
|
|
// Now there is git diff --shortstat but this appears to be slower than simply iterating with --nameonly
|
2021-09-27 08:19:34 -04:00
|
|
|
compareInfo.NumFiles, err = repo.GetDiffNumChangedFiles(remoteBranch, headBranch, directComparison)
|
2016-11-03 18:16:01 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2019-06-07 16:29:29 -04:00
|
|
|
return compareInfo, nil
|
2016-11-03 18:16:01 -04:00
|
|
|
}
|
|
|
|
|
2020-05-26 01:58:07 -04:00
|
|
|
type lineCountWriter struct {
|
|
|
|
numLines int
|
|
|
|
}
|
|
|
|
|
|
|
|
// Write counts the number of newlines in the provided bytestream
|
|
|
|
func (l *lineCountWriter) Write(p []byte) (n int, err error) {
|
|
|
|
n = len(p)
|
|
|
|
l.numLines += bytes.Count(p, []byte{'\000'})
|
2022-06-20 06:02:49 -04:00
|
|
|
return n, err
|
2020-05-26 01:58:07 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// GetDiffNumChangedFiles counts the number of changed files
|
|
|
|
// This is substantially quicker than shortstat but...
|
2021-09-27 08:19:34 -04:00
|
|
|
func (repo *Repository) GetDiffNumChangedFiles(base, head string, directComparison bool) (int, error) {
|
2020-05-26 01:58:07 -04:00
|
|
|
// Now there is git diff --shortstat but this appears to be slower than simply iterating with --nameonly
|
|
|
|
w := &lineCountWriter{}
|
|
|
|
stderr := new(bytes.Buffer)
|
|
|
|
|
2021-09-27 08:19:34 -04:00
|
|
|
separator := "..."
|
|
|
|
if directComparison {
|
|
|
|
separator = ".."
|
|
|
|
}
|
|
|
|
|
2022-10-23 10:44:45 -04:00
|
|
|
if err := NewCommand(repo.Ctx, "diff", "-z", "--name-only").AddDynamicArguments(base + separator + head).
|
2022-03-31 22:55:30 -04:00
|
|
|
Run(&RunOpts{
|
|
|
|
Dir: repo.Path,
|
|
|
|
Stdout: w,
|
|
|
|
Stderr: stderr,
|
2022-02-11 07:47:22 -05:00
|
|
|
}); err != nil {
|
2020-07-29 13:53:04 -04:00
|
|
|
if strings.Contains(stderr.String(), "no merge base") {
|
|
|
|
// git >= 2.28 now returns an error if base and head have become unrelated.
|
|
|
|
// previously it would return the results of git diff -z --name-only base head so let's try that...
|
|
|
|
w = &lineCountWriter{}
|
|
|
|
stderr.Reset()
|
2022-10-23 10:44:45 -04:00
|
|
|
if err = NewCommand(repo.Ctx, "diff", "-z", "--name-only").AddDynamicArguments(base, head).Run(&RunOpts{
|
2022-03-31 22:55:30 -04:00
|
|
|
Dir: repo.Path,
|
|
|
|
Stdout: w,
|
|
|
|
Stderr: stderr,
|
2022-02-11 07:47:22 -05:00
|
|
|
}); err == nil {
|
2020-07-29 13:53:04 -04:00
|
|
|
return w.numLines, nil
|
|
|
|
}
|
|
|
|
}
|
2022-10-24 15:29:17 -04:00
|
|
|
return 0, fmt.Errorf("%w: Stderr: %s", err, stderr)
|
2020-05-26 01:58:07 -04:00
|
|
|
}
|
|
|
|
return w.numLines, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetDiffShortStat counts number of changed files, number of additions and deletions
|
|
|
|
func (repo *Repository) GetDiffShortStat(base, head string) (numFiles, totalAdditions, totalDeletions int, err error) {
|
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 21:30:43 -05:00
|
|
|
numFiles, totalAdditions, totalDeletions, err = GetDiffShortStat(repo.Ctx, repo.Path, nil, base+"..."+head)
|
2020-07-29 13:53:04 -04:00
|
|
|
if err != nil && strings.Contains(err.Error(), "no merge base") {
|
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 21:30:43 -05:00
|
|
|
return GetDiffShortStat(repo.Ctx, repo.Path, nil, base, head)
|
2020-07-29 13:53:04 -04:00
|
|
|
}
|
2022-06-20 06:02:49 -04:00
|
|
|
return numFiles, totalAdditions, totalDeletions, err
|
2020-05-26 01:58:07 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// GetDiffShortStat counts number of changed files, number of additions and deletions
|
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 21:30:43 -05:00
|
|
|
func GetDiffShortStat(ctx context.Context, repoPath string, trustedArgs TrustedCmdArgs, dynamicArgs ...string) (numFiles, totalAdditions, totalDeletions int, err error) {
|
2020-05-26 01:58:07 -04:00
|
|
|
// Now if we call:
|
|
|
|
// $ git diff --shortstat 1ebb35b98889ff77299f24d82da426b434b0cca0...788b8b1440462d477f45b0088875
|
|
|
|
// we get:
|
|
|
|
// " 9902 files changed, 2034198 insertions(+), 298800 deletions(-)\n"
|
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 21:30:43 -05:00
|
|
|
cmd := NewCommand(ctx, "diff", "--shortstat").AddArguments(trustedArgs...).AddDynamicArguments(dynamicArgs...)
|
|
|
|
stdout, _, err := cmd.RunStdString(&RunOpts{Dir: repoPath})
|
2020-05-26 01:58:07 -04:00
|
|
|
if err != nil {
|
|
|
|
return 0, 0, 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return parseDiffStat(stdout)
|
|
|
|
}
|
|
|
|
|
|
|
|
var shortStatFormat = regexp.MustCompile(
|
|
|
|
`\s*(\d+) files? changed(?:, (\d+) insertions?\(\+\))?(?:, (\d+) deletions?\(-\))?`)
|
|
|
|
|
2021-12-23 03:32:29 -05:00
|
|
|
var patchCommits = regexp.MustCompile(`^From\s(\w+)\s`)
|
|
|
|
|
2020-05-26 01:58:07 -04:00
|
|
|
func parseDiffStat(stdout string) (numFiles, totalAdditions, totalDeletions int, err error) {
|
|
|
|
if len(stdout) == 0 || stdout == "\n" {
|
|
|
|
return 0, 0, 0, nil
|
|
|
|
}
|
|
|
|
groups := shortStatFormat.FindStringSubmatch(stdout)
|
|
|
|
if len(groups) != 4 {
|
|
|
|
return 0, 0, 0, fmt.Errorf("unable to parse shortstat: %s groups: %s", stdout, groups)
|
|
|
|
}
|
|
|
|
|
|
|
|
numFiles, err = strconv.Atoi(groups[1])
|
|
|
|
if err != nil {
|
2022-10-24 15:29:17 -04:00
|
|
|
return 0, 0, 0, fmt.Errorf("unable to parse shortstat: %s. Error parsing NumFiles %w", stdout, err)
|
2020-05-26 01:58:07 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
if len(groups[2]) != 0 {
|
|
|
|
totalAdditions, err = strconv.Atoi(groups[2])
|
|
|
|
if err != nil {
|
2022-10-24 15:29:17 -04:00
|
|
|
return 0, 0, 0, fmt.Errorf("unable to parse shortstat: %s. Error parsing NumAdditions %w", stdout, err)
|
2020-05-26 01:58:07 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(groups[3]) != 0 {
|
|
|
|
totalDeletions, err = strconv.Atoi(groups[3])
|
|
|
|
if err != nil {
|
2022-10-24 15:29:17 -04:00
|
|
|
return 0, 0, 0, fmt.Errorf("unable to parse shortstat: %s. Error parsing NumDeletions %w", stdout, err)
|
2020-05-26 01:58:07 -04:00
|
|
|
}
|
|
|
|
}
|
2022-06-20 06:02:49 -04:00
|
|
|
return numFiles, totalAdditions, totalDeletions, err
|
2020-05-26 01:58:07 -04:00
|
|
|
}
|
|
|
|
|
2019-12-13 17:21:06 -05:00
|
|
|
// GetDiffOrPatch generates either diff or formatted patch data between given revisions
|
2021-09-27 17:09:49 -04:00
|
|
|
func (repo *Repository) GetDiffOrPatch(base, head string, w io.Writer, patch, binary bool) error {
|
|
|
|
if patch {
|
2019-12-13 17:21:06 -05:00
|
|
|
return repo.GetPatch(base, head, w)
|
|
|
|
}
|
2021-09-27 17:09:49 -04:00
|
|
|
if binary {
|
|
|
|
return repo.GetDiffBinary(base, head, w)
|
|
|
|
}
|
2019-12-13 17:21:06 -05:00
|
|
|
return repo.GetDiff(base, head, w)
|
2016-11-03 18:16:01 -04:00
|
|
|
}
|
2018-01-07 08:10:20 -05:00
|
|
|
|
2021-09-27 17:09:49 -04:00
|
|
|
// GetDiff generates and returns patch data between given revisions, optimized for human readability
|
2019-12-13 17:21:06 -05:00
|
|
|
func (repo *Repository) GetDiff(base, head string, w io.Writer) error {
|
2022-10-23 10:44:45 -04:00
|
|
|
return NewCommand(repo.Ctx, "diff", "-p").AddDynamicArguments(base, head).Run(&RunOpts{
|
2022-03-31 22:55:30 -04:00
|
|
|
Dir: repo.Path,
|
|
|
|
Stdout: w,
|
2022-02-11 07:47:22 -05:00
|
|
|
})
|
2021-09-27 17:09:49 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// GetDiffBinary generates and returns patch data between given revisions, including binary diffs.
|
|
|
|
func (repo *Repository) GetDiffBinary(base, head string, w io.Writer) error {
|
2022-10-23 10:44:45 -04:00
|
|
|
return NewCommand(repo.Ctx, "diff", "-p", "--binary", "--histogram").AddDynamicArguments(base, head).Run(&RunOpts{
|
2022-03-31 22:55:30 -04:00
|
|
|
Dir: repo.Path,
|
|
|
|
Stdout: w,
|
2022-02-11 07:47:22 -05:00
|
|
|
})
|
2019-12-13 17:21:06 -05:00
|
|
|
}
|
2018-01-07 08:10:20 -05:00
|
|
|
|
2021-09-27 17:09:49 -04:00
|
|
|
// GetPatch generates and returns format-patch data between given revisions, able to be used with `git apply`
|
2019-12-13 17:21:06 -05:00
|
|
|
func (repo *Repository) GetPatch(base, head string, w io.Writer) error {
|
2020-07-29 13:53:04 -04:00
|
|
|
stderr := new(bytes.Buffer)
|
2022-10-23 10:44:45 -04:00
|
|
|
err := NewCommand(repo.Ctx, "format-patch", "--binary", "--stdout").AddDynamicArguments(base + "..." + head).
|
2022-03-31 22:55:30 -04:00
|
|
|
Run(&RunOpts{
|
|
|
|
Dir: repo.Path,
|
|
|
|
Stdout: w,
|
|
|
|
Stderr: stderr,
|
2022-02-11 07:47:22 -05:00
|
|
|
})
|
2020-07-29 13:53:04 -04:00
|
|
|
if err != nil && bytes.Contains(stderr.Bytes(), []byte("no merge base")) {
|
2022-10-23 10:44:45 -04:00
|
|
|
return NewCommand(repo.Ctx, "format-patch", "--binary", "--stdout").AddDynamicArguments(base, head).
|
2022-03-31 22:55:30 -04:00
|
|
|
Run(&RunOpts{
|
|
|
|
Dir: repo.Path,
|
|
|
|
Stdout: w,
|
2022-02-11 07:47:22 -05:00
|
|
|
})
|
2020-07-29 13:53:04 -04:00
|
|
|
}
|
|
|
|
return err
|
2018-01-07 08:10:20 -05:00
|
|
|
}
|
2020-01-08 20:47:45 -05:00
|
|
|
|
2022-05-07 14:28:10 -04:00
|
|
|
// GetFilesChangedBetween returns a list of all files that have been changed between the given commits
|
|
|
|
func (repo *Repository) GetFilesChangedBetween(base, head string) ([]string, error) {
|
2023-03-03 17:28:38 -05:00
|
|
|
stdout, _, err := NewCommand(repo.Ctx, "diff", "--name-only", "-z").AddDynamicArguments(base + ".." + head).RunStdString(&RunOpts{Dir: repo.Path})
|
2022-05-07 14:28:10 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2023-03-03 17:28:38 -05:00
|
|
|
split := strings.Split(stdout, "\000")
|
|
|
|
|
|
|
|
// Because Git will always emit filenames with a terminal NUL ignore the last entry in the split - which will always be empty.
|
|
|
|
if len(split) > 0 {
|
|
|
|
split = split[:len(split)-1]
|
|
|
|
}
|
|
|
|
|
|
|
|
return split, err
|
2022-05-07 14:28:10 -04:00
|
|
|
}
|
|
|
|
|
2020-01-08 20:47:45 -05:00
|
|
|
// GetDiffFromMergeBase generates and return patch data from merge base to head
|
|
|
|
func (repo *Repository) GetDiffFromMergeBase(base, head string, w io.Writer) error {
|
2020-07-29 13:53:04 -04:00
|
|
|
stderr := new(bytes.Buffer)
|
2022-10-23 10:44:45 -04:00
|
|
|
err := NewCommand(repo.Ctx, "diff", "-p", "--binary").AddDynamicArguments(base + "..." + head).
|
2022-03-31 22:55:30 -04:00
|
|
|
Run(&RunOpts{
|
|
|
|
Dir: repo.Path,
|
|
|
|
Stdout: w,
|
|
|
|
Stderr: stderr,
|
2022-02-11 07:47:22 -05:00
|
|
|
})
|
2020-07-29 13:53:04 -04:00
|
|
|
if err != nil && bytes.Contains(stderr.Bytes(), []byte("no merge base")) {
|
2021-09-27 17:09:49 -04:00
|
|
|
return repo.GetDiffBinary(base, head, w)
|
2020-07-29 13:53:04 -04:00
|
|
|
}
|
|
|
|
return err
|
2020-01-08 20:47:45 -05:00
|
|
|
}
|
2021-12-23 03:32:29 -05:00
|
|
|
|
|
|
|
// ReadPatchCommit will check if a diff patch exists and return stats
|
|
|
|
func (repo *Repository) ReadPatchCommit(prID int64) (commitSHA string, err error) {
|
|
|
|
// Migrated repositories download patches to "pulls" location
|
|
|
|
patchFile := fmt.Sprintf("pulls/%d.patch", prID)
|
|
|
|
loadPatch, err := os.Open(filepath.Join(repo.Path, patchFile))
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
defer loadPatch.Close()
|
|
|
|
// Read only the first line of the patch - usually it contains the first commit made in patch
|
|
|
|
scanner := bufio.NewScanner(loadPatch)
|
|
|
|
scanner.Scan()
|
|
|
|
// Parse the Patch stats, sometimes Migration returns a 404 for the patch file
|
|
|
|
commitSHAGroups := patchCommits.FindStringSubmatch(scanner.Text())
|
|
|
|
if len(commitSHAGroups) != 0 {
|
|
|
|
commitSHA = commitSHAGroups[1]
|
|
|
|
} else {
|
|
|
|
return "", errors.New("patch file doesn't contain valid commit ID")
|
|
|
|
}
|
|
|
|
return commitSHA, nil
|
|
|
|
}
|