2019-02-12 08:07:31 -05:00
// Copyright 2019 The Gitea Authors. All rights reserved.
2022-11-27 13:20:29 -05:00
// SPDX-License-Identifier: MIT
2019-02-12 08:07:31 -05:00
2021-11-24 02:56:24 -05:00
package files
2019-02-12 08:07:31 -05:00
import (
"bytes"
"context"
"fmt"
"io"
"os"
2019-04-17 12:06:35 -04:00
"regexp"
2019-02-12 08:07:31 -05:00
"strings"
"time"
"code.gitea.io/gitea/models"
2021-12-09 20:27:50 -05:00
repo_model "code.gitea.io/gitea/models/repo"
2021-11-24 04:49:20 -05:00
user_model "code.gitea.io/gitea/models/user"
2019-04-17 12:06:35 -04:00
"code.gitea.io/gitea/modules/git"
2019-05-11 11:29:17 -04:00
"code.gitea.io/gitea/modules/log"
2022-05-08 12:46:32 -04:00
repo_module "code.gitea.io/gitea/modules/repository"
2019-02-12 08:07:31 -05:00
"code.gitea.io/gitea/modules/setting"
2021-12-10 03:14:24 -05:00
asymkey_service "code.gitea.io/gitea/services/asymkey"
2019-09-05 22:20:09 -04:00
"code.gitea.io/gitea/services/gitdiff"
2019-02-12 08:07:31 -05:00
)
2019-04-17 12:06:35 -04:00
// TemporaryUploadRepository is a type to wrap our upload repositories as a shallow clone
2019-02-12 08:07:31 -05:00
type TemporaryUploadRepository struct {
2022-01-19 18:26:57 -05:00
ctx context . Context
2021-12-09 20:27:50 -05:00
repo * repo_model . Repository
2019-04-17 12:06:35 -04:00
gitRepo * git . Repository
2019-02-12 08:07:31 -05:00
basePath string
}
// NewTemporaryUploadRepository creates a new temporary upload repository
2022-01-19 18:26:57 -05:00
func NewTemporaryUploadRepository ( ctx context . Context , repo * repo_model . Repository ) ( * TemporaryUploadRepository , error ) {
2022-05-08 12:46:32 -04:00
basePath , err := repo_module . CreateTemporaryPath ( "upload" )
2019-05-11 11:29:17 -04:00
if err != nil {
return nil , err
2019-02-12 08:07:31 -05:00
}
2022-01-19 18:26:57 -05:00
t := & TemporaryUploadRepository { ctx : ctx , repo : repo , basePath : basePath }
2019-02-12 08:07:31 -05:00
return t , nil
}
// Close the repository cleaning up all files
func ( t * TemporaryUploadRepository ) Close ( ) {
2019-11-13 02:01:19 -05:00
defer t . gitRepo . Close ( )
2022-05-08 12:46:32 -04:00
if err := repo_module . RemoveTemporaryPath ( t . basePath ) ; err != nil {
2019-05-11 11:29:17 -04:00
log . Error ( "Failed to remove temporary path %s: %v" , t . basePath , err )
2019-02-12 08:07:31 -05:00
}
}
// Clone the base repository to our path and set branch as the HEAD
2024-01-16 10:06:51 -05:00
func ( t * TemporaryUploadRepository ) Clone ( branch string , bare bool ) error {
cmd := git . NewCommand ( t . ctx , "clone" , "-s" , "-b" ) . AddDynamicArguments ( branch , t . repo . RepoPath ( ) , t . basePath )
if bare {
cmd . AddArguments ( "--bare" )
}
if _ , _ , err := cmd . RunStdString ( nil ) ; err != nil {
2019-11-11 06:46:28 -05:00
stderr := err . Error ( )
2019-04-17 12:06:35 -04:00
if matched , _ := regexp . MatchString ( ".*Remote branch .* not found in upstream origin.*" , stderr ) ; matched {
2019-04-19 08:17:27 -04:00
return git . ErrBranchNotExist {
2019-04-17 12:06:35 -04:00
Name : branch ,
}
} else if matched , _ := regexp . MatchString ( ".* repository .* does not exist.*" , stderr ) ; matched {
2021-12-09 20:27:50 -05:00
return repo_model . ErrRepoNotExist {
2019-04-17 12:06:35 -04:00
ID : t . repo . ID ,
UID : t . repo . OwnerID ,
OwnerName : t . repo . OwnerName ,
Name : t . repo . Name ,
}
}
2023-10-23 22:54:59 -04:00
return fmt . Errorf ( "Clone: %w %s" , err , stderr )
2019-04-17 12:06:35 -04:00
}
2022-03-29 15:13:41 -04:00
gitRepo , err := git . OpenRepository ( t . ctx , t . basePath )
2019-04-17 12:06:35 -04:00
if err != nil {
return err
2019-02-12 08:07:31 -05:00
}
2019-04-17 12:06:35 -04:00
t . gitRepo = gitRepo
2019-02-12 08:07:31 -05:00
return nil
}
2022-03-28 15:48:41 -04:00
// Init the repository
2023-12-17 06:56:08 -05:00
func ( t * TemporaryUploadRepository ) Init ( objectFormatName string ) error {
if err := git . InitRepository ( t . ctx , t . basePath , false , objectFormatName ) ; err != nil {
2022-03-28 15:48:41 -04:00
return err
}
2022-03-29 15:13:41 -04:00
gitRepo , err := git . OpenRepository ( t . ctx , t . basePath )
2022-03-28 15:48:41 -04:00
if err != nil {
return err
}
t . gitRepo = gitRepo
return nil
}
2019-02-12 08:07:31 -05:00
// SetDefaultIndex sets the git index to our HEAD
func ( t * TemporaryUploadRepository ) SetDefaultIndex ( ) error {
2022-03-31 22:55:30 -04:00
if _ , _ , err := git . NewCommand ( t . ctx , "read-tree" , "HEAD" ) . RunStdString ( & git . RunOpts { Dir : t . basePath } ) ; err != nil {
2022-10-24 15:29:17 -04:00
return fmt . Errorf ( "SetDefaultIndex: %w" , err )
2019-02-12 08:07:31 -05:00
}
return nil
}
2024-01-16 10:06:51 -05:00
// RefreshIndex looks at the current index and checks to see if merges or updates are needed by checking stat() information.
func ( t * TemporaryUploadRepository ) RefreshIndex ( ) error {
if _ , _ , err := git . NewCommand ( t . ctx , "update-index" , "--refresh" ) . RunStdString ( & git . RunOpts { Dir : t . basePath } ) ; err != nil {
return fmt . Errorf ( "RefreshIndex: %w" , err )
}
return nil
}
2019-02-12 08:07:31 -05:00
// LsFiles checks if the given filename arguments are in the index
func ( t * TemporaryUploadRepository ) LsFiles ( filenames ... string ) ( [ ] string , error ) {
stdOut := new ( bytes . Buffer )
stdErr := new ( bytes . Buffer )
2022-10-23 10:44:45 -04:00
if err := git . NewCommand ( t . ctx , "ls-files" , "-z" ) . AddDashesAndList ( filenames ... ) .
2022-03-31 22:55:30 -04:00
Run ( & git . RunOpts {
Dir : t . basePath ,
Stdout : stdOut ,
Stderr : stdErr ,
2022-02-11 07:47:22 -05:00
} ) ; err != nil {
2019-11-11 06:46:28 -05:00
log . Error ( "Unable to run git ls-files for temporary repo: %s (%s) Error: %v\nstdout: %s\nstderr: %s" , t . repo . FullName ( ) , t . basePath , err , stdOut . String ( ) , stdErr . String ( ) )
2022-10-24 15:29:17 -04:00
err = fmt . Errorf ( "Unable to run git ls-files for temporary repo of: %s Error: %w\nstdout: %s\nstderr: %s" , t . repo . FullName ( ) , err , stdOut . String ( ) , stdErr . String ( ) )
2019-02-12 08:07:31 -05:00
return nil , err
}
2023-08-29 11:47:26 -04:00
fileList := make ( [ ] string , 0 , len ( filenames ) )
2019-02-12 08:07:31 -05:00
for _ , line := range bytes . Split ( stdOut . Bytes ( ) , [ ] byte { '\000' } ) {
2023-08-29 11:47:26 -04:00
fileList = append ( fileList , string ( line ) )
2019-02-12 08:07:31 -05:00
}
2023-08-29 11:47:26 -04:00
return fileList , nil
2019-02-12 08:07:31 -05:00
}
// RemoveFilesFromIndex removes the given files from the index
func ( t * TemporaryUploadRepository ) RemoveFilesFromIndex ( filenames ... string ) error {
2024-04-21 20:13:44 -04:00
objFmt , err := t . gitRepo . GetObjectFormat ( )
if err != nil {
return fmt . Errorf ( "unable to get object format for temporary repo: %q, error: %w" , t . repo . FullName ( ) , err )
}
2019-02-12 08:07:31 -05:00
stdOut := new ( bytes . Buffer )
stdErr := new ( bytes . Buffer )
stdIn := new ( bytes . Buffer )
for _ , file := range filenames {
if file != "" {
2024-04-21 20:13:44 -04:00
// man git-update-index: input syntax (1): mode SP sha1 TAB path
// mode=0 means "remove from index", then hash part "does not matter as long as it is well formatted."
_ , _ = fmt . Fprintf ( stdIn , "0 %s\t%s\x00" , objFmt . EmptyObjectID ( ) , file )
2019-02-12 08:07:31 -05:00
}
}
2022-02-11 07:47:22 -05:00
if err := git . NewCommand ( t . ctx , "update-index" , "--remove" , "-z" , "--index-info" ) .
2022-03-31 22:55:30 -04:00
Run ( & git . RunOpts {
Dir : t . basePath ,
Stdin : stdIn ,
Stdout : stdOut ,
Stderr : stdErr ,
2022-02-11 07:47:22 -05:00
} ) ; err != nil {
2024-04-21 20:13:44 -04:00
return fmt . Errorf ( "unable to update-index for temporary repo: %q, error: %w\nstdout: %s\nstderr: %s" , t . repo . FullName ( ) , err , stdOut . String ( ) , stdErr . String ( ) )
2019-02-12 08:07:31 -05:00
}
2019-11-11 06:46:28 -05:00
return nil
2019-02-12 08:07:31 -05:00
}
// HashObject writes the provided content to the object db and returns its hash
func ( t * TemporaryUploadRepository ) HashObject ( content io . Reader ) ( string , error ) {
2019-11-11 06:46:28 -05:00
stdOut := new ( bytes . Buffer )
stdErr := new ( bytes . Buffer )
2019-02-12 08:07:31 -05:00
2022-02-11 07:47:22 -05:00
if err := git . NewCommand ( t . ctx , "hash-object" , "-w" , "--stdin" ) .
2022-03-31 22:55:30 -04:00
Run ( & git . RunOpts {
Dir : t . basePath ,
Stdin : content ,
Stdout : stdOut ,
Stderr : stdErr ,
2022-02-11 07:47:22 -05:00
} ) ; err != nil {
2019-11-11 06:46:28 -05:00
log . Error ( "Unable to hash-object to temporary repo: %s (%s) Error: %v\nstdout: %s\nstderr: %s" , t . repo . FullName ( ) , t . basePath , err , stdOut . String ( ) , stdErr . String ( ) )
2022-10-24 15:29:17 -04:00
return "" , fmt . Errorf ( "Unable to hash-object to temporary repo: %s Error: %w\nstdout: %s\nstderr: %s" , t . repo . FullName ( ) , err , stdOut . String ( ) , stdErr . String ( ) )
2019-02-12 08:07:31 -05:00
}
2019-11-11 06:46:28 -05:00
return strings . TrimSpace ( stdOut . String ( ) ) , nil
2019-02-12 08:07:31 -05:00
}
// AddObjectToIndex adds the provided object hash to the index with the provided mode and path
func ( t * TemporaryUploadRepository ) AddObjectToIndex ( mode , objectHash , objectPath string ) error {
2022-10-23 10:44:45 -04:00
if _ , _ , err := git . NewCommand ( t . ctx , "update-index" , "--add" , "--replace" , "--cacheinfo" ) . AddDynamicArguments ( mode , objectHash , objectPath ) . RunStdString ( & git . RunOpts { Dir : t . basePath } ) ; err != nil {
2019-11-11 06:46:28 -05:00
stderr := err . Error ( )
2019-04-17 12:06:35 -04:00
if matched , _ := regexp . MatchString ( ".*Invalid path '.*" , stderr ) ; matched {
return models . ErrFilePathInvalid {
Message : objectPath ,
Path : objectPath ,
}
}
2019-11-11 06:46:28 -05:00
log . Error ( "Unable to add object to index: %s %s %s in temporary repo %s(%s) Error: %v" , mode , objectHash , objectPath , t . repo . FullName ( ) , t . basePath , err )
2022-10-24 15:29:17 -04:00
return fmt . Errorf ( "Unable to add object to index at %s in temporary repo %s Error: %w" , objectPath , t . repo . FullName ( ) , err )
2019-02-12 08:07:31 -05:00
}
return nil
}
// WriteTree writes the current index as a tree to the object db and returns its hash
func ( t * TemporaryUploadRepository ) WriteTree ( ) ( string , error ) {
2022-03-31 22:55:30 -04:00
stdout , _ , err := git . NewCommand ( t . ctx , "write-tree" ) . RunStdString ( & git . RunOpts { Dir : t . basePath } )
2019-02-12 08:07:31 -05:00
if err != nil {
2019-11-11 06:46:28 -05:00
log . Error ( "Unable to write tree in temporary repo: %s(%s): Error: %v" , t . repo . FullName ( ) , t . basePath , err )
2022-10-24 15:29:17 -04:00
return "" , fmt . Errorf ( "Unable to write-tree in temporary repo for: %s Error: %w" , t . repo . FullName ( ) , err )
2019-02-12 08:07:31 -05:00
}
2019-11-11 06:46:28 -05:00
return strings . TrimSpace ( stdout ) , nil
2019-04-17 12:06:35 -04:00
}
2019-02-12 08:07:31 -05:00
2019-04-17 12:06:35 -04:00
// GetLastCommit gets the last commit ID SHA of the repo
func ( t * TemporaryUploadRepository ) GetLastCommit ( ) ( string , error ) {
return t . GetLastCommitByRef ( "HEAD" )
}
// GetLastCommitByRef gets the last commit ID SHA of the repo by ref
func ( t * TemporaryUploadRepository ) GetLastCommitByRef ( ref string ) ( string , error ) {
if ref == "" {
ref = "HEAD"
}
2022-10-23 10:44:45 -04:00
stdout , _ , err := git . NewCommand ( t . ctx , "rev-parse" ) . AddDynamicArguments ( ref ) . RunStdString ( & git . RunOpts { Dir : t . basePath } )
2019-04-17 12:06:35 -04:00
if err != nil {
2019-11-11 06:46:28 -05:00
log . Error ( "Unable to get last ref for %s in temporary repo: %s(%s): Error: %v" , ref , t . repo . FullName ( ) , t . basePath , err )
2022-10-24 15:29:17 -04:00
return "" , fmt . Errorf ( "Unable to rev-parse %s in temporary repo for: %s Error: %w" , ref , t . repo . FullName ( ) , err )
2019-04-17 12:06:35 -04:00
}
2019-11-11 06:46:28 -05:00
return strings . TrimSpace ( stdout ) , nil
2019-02-12 08:07:31 -05:00
}
// CommitTree creates a commit from a given tree for the user with provided message
2022-03-28 15:48:41 -04:00
func ( t * TemporaryUploadRepository ) CommitTree ( parent string , author , committer * user_model . User , treeHash , message string , signoff bool ) ( string , error ) {
return t . CommitTreeWithDate ( parent , author , committer , treeHash , message , signoff , time . Now ( ) , time . Now ( ) )
2019-12-23 21:33:52 -05:00
}
// CommitTreeWithDate creates a commit from a given tree for the user with provided message
2022-03-28 15:48:41 -04:00
func ( t * TemporaryUploadRepository ) CommitTreeWithDate ( parent string , author , committer * user_model . User , treeHash , message string , signoff bool , authorDate , committerDate time . Time ) ( string , error ) {
2019-04-17 12:06:35 -04:00
authorSig := author . NewGitSig ( )
committerSig := committer . NewGitSig ( )
2019-02-12 08:07:31 -05:00
// Because this may call hooks we should pass in the environment
env := append ( os . Environ ( ) ,
2019-04-17 12:06:35 -04:00
"GIT_AUTHOR_NAME=" + authorSig . Name ,
"GIT_AUTHOR_EMAIL=" + authorSig . Email ,
2019-12-23 21:33:52 -05:00
"GIT_AUTHOR_DATE=" + authorDate . Format ( time . RFC3339 ) ,
"GIT_COMMITTER_DATE=" + committerDate . Format ( time . RFC3339 ) ,
2019-02-12 08:07:31 -05:00
)
2019-10-16 09:42:42 -04:00
2019-10-11 20:13:27 -04:00
messageBytes := new ( bytes . Buffer )
_ , _ = messageBytes . WriteString ( message )
_ , _ = messageBytes . WriteString ( "\n" )
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 21:30:43 -05:00
cmdCommitTree := git . NewCommand ( t . ctx , "commit-tree" ) . AddDynamicArguments ( treeHash )
2022-03-28 15:48:41 -04:00
if parent != "" {
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 21:30:43 -05:00
cmdCommitTree . AddOptionValues ( "-p" , parent )
2022-03-28 15:48:41 -04:00
}
2019-10-16 09:42:42 -04:00
2022-06-16 11:47:44 -04:00
var sign bool
var keyID string
var signer * git . Signature
if parent != "" {
sign , keyID , signer , _ = asymkey_service . SignCRUDAction ( t . ctx , t . repo . RepoPath ( ) , author , t . basePath , parent )
} else {
sign , keyID , signer , _ = asymkey_service . SignInitialCommit ( t . ctx , t . repo . RepoPath ( ) , author )
}
if sign {
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 21:30:43 -05:00
cmdCommitTree . AddOptionFormat ( "-S%s" , keyID )
2022-06-16 11:47:44 -04:00
if t . repo . GetTrustModel ( ) == repo_model . CommitterTrustModel || t . repo . GetTrustModel ( ) == repo_model . CollaboratorCommitterTrustModel {
if committerSig . Name != authorSig . Name || committerSig . Email != authorSig . Email {
// Add trailers
_ , _ = messageBytes . WriteString ( "\n" )
_ , _ = messageBytes . WriteString ( "Co-authored-by: " )
_ , _ = messageBytes . WriteString ( committerSig . String ( ) )
_ , _ = messageBytes . WriteString ( "\n" )
_ , _ = messageBytes . WriteString ( "Co-committed-by: " )
_ , _ = messageBytes . WriteString ( committerSig . String ( ) )
_ , _ = messageBytes . WriteString ( "\n" )
2020-09-19 12:44:55 -04:00
}
2022-06-16 11:47:44 -04:00
committerSig = signer
2019-10-16 09:42:42 -04:00
}
2022-06-16 11:47:44 -04:00
} else {
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 21:30:43 -05:00
cmdCommitTree . AddArguments ( "--no-gpg-sign" )
2019-10-11 20:13:27 -04:00
}
2021-01-29 03:57:45 -05:00
if signoff {
// Signed-off-by
_ , _ = messageBytes . WriteString ( "\n" )
_ , _ = messageBytes . WriteString ( "Signed-off-by: " )
_ , _ = messageBytes . WriteString ( committerSig . String ( ) )
}
2020-09-19 12:44:55 -04:00
env = append ( env ,
"GIT_COMMITTER_NAME=" + committerSig . Name ,
"GIT_COMMITTER_EMAIL=" + committerSig . Email ,
)
2019-11-11 06:46:28 -05:00
stdout := new ( bytes . Buffer )
stderr := new ( bytes . Buffer )
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 21:30:43 -05:00
if err := cmdCommitTree .
2022-03-31 22:55:30 -04:00
Run ( & git . RunOpts {
Env : env ,
Dir : t . basePath ,
Stdin : messageBytes ,
Stdout : stdout ,
Stderr : stderr ,
2022-02-11 07:47:22 -05:00
} ) ; err != nil {
2019-11-11 06:46:28 -05:00
log . Error ( "Unable to commit-tree in temporary repo: %s (%s) Error: %v\nStdout: %s\nStderr: %s" ,
t . repo . FullName ( ) , t . basePath , err , stdout , stderr )
2022-10-24 15:29:17 -04:00
return "" , fmt . Errorf ( "Unable to commit-tree in temporary repo: %s Error: %w\nStdout: %s\nStderr: %s" ,
2019-11-11 06:46:28 -05:00
t . repo . FullName ( ) , err , stdout , stderr )
2019-02-12 08:07:31 -05:00
}
2019-11-11 06:46:28 -05:00
return strings . TrimSpace ( stdout . String ( ) ) , nil
2019-02-12 08:07:31 -05:00
}
// Push the provided commitHash to the repository branch by the provided user
2021-12-19 23:41:31 -05:00
func ( t * TemporaryUploadRepository ) Push ( doer * user_model . User , commitHash , branch string ) error {
2019-02-12 08:07:31 -05:00
// Because calls hooks we need to pass in the environment
2022-05-08 12:46:32 -04:00
env := repo_module . PushingEnvironment ( doer , t . repo )
2022-01-19 18:26:57 -05:00
if err := git . Push ( t . ctx , t . basePath , git . PushOptions {
2020-03-28 00:13:18 -04:00
Remote : t . repo . RepoPath ( ) ,
2021-12-02 02:28:08 -05:00
Branch : strings . TrimSpace ( commitHash ) + ":" + git . BranchPrefix + strings . TrimSpace ( branch ) ,
2020-03-28 00:13:18 -04:00
Env : env ,
} ) ; err != nil {
if git . IsErrPushOutOfDate ( err ) {
return err
} else if git . IsErrPushRejected ( err ) {
rejectErr := err . ( * git . ErrPushRejected )
log . Info ( "Unable to push back to repo from temporary repo due to rejection: %s (%s)\nStdout: %s\nStderr: %s\nError: %v" ,
t . repo . FullName ( ) , t . basePath , rejectErr . StdOut , rejectErr . StdErr , rejectErr . Err )
2020-02-22 08:08:48 -05:00
return err
}
2020-03-28 00:13:18 -04:00
log . Error ( "Unable to push back to repo from temporary repo: %s (%s)\nError: %v" ,
t . repo . FullName ( ) , t . basePath , err )
2019-11-11 06:46:28 -05:00
return fmt . Errorf ( "Unable to push back to repo from temporary repo: %s (%s) Error: %v" ,
t . repo . FullName ( ) , t . basePath , err )
2019-02-12 08:07:31 -05:00
}
return nil
}
// DiffIndex returns a Diff of the current index to the head
2019-11-11 06:46:28 -05:00
func ( t * TemporaryUploadRepository ) DiffIndex ( ) ( * gitdiff . Diff , error ) {
stdoutReader , stdoutWriter , err := os . Pipe ( )
2019-02-12 08:07:31 -05:00
if err != nil {
2019-11-11 06:46:28 -05:00
log . Error ( "Unable to open stdout pipe: %v" , err )
2022-10-24 15:29:17 -04:00
return nil , fmt . Errorf ( "Unable to open stdout pipe: %w" , err )
2019-02-12 08:07:31 -05:00
}
2019-11-11 06:46:28 -05:00
defer func ( ) {
_ = stdoutReader . Close ( )
_ = stdoutWriter . Close ( )
} ( )
stderr := new ( bytes . Buffer )
var diff * gitdiff . Diff
var finalErr error
2022-02-06 14:01:47 -05:00
if err := git . NewCommand ( t . ctx , "diff-index" , "--src-prefix=\\a/" , "--dst-prefix=\\b/" , "--cached" , "-p" , "HEAD" ) .
2022-03-31 22:55:30 -04:00
Run ( & git . RunOpts {
2022-02-11 07:47:22 -05:00
Timeout : 30 * time . Second ,
Dir : t . basePath ,
Stdout : stdoutWriter ,
Stderr : stderr ,
PipelineFunc : func ( ctx context . Context , cancel context . CancelFunc ) error {
_ = stdoutWriter . Close ( )
2024-11-01 23:29:37 -04:00
defer cancel ( )
2023-10-03 06:30:41 -04:00
diff , finalErr = gitdiff . ParsePatch ( t . ctx , setting . Git . MaxGitDiffLines , setting . Git . MaxGitDiffLineCharacters , setting . Git . MaxGitDiffFiles , stdoutReader , "" )
2022-02-11 07:47:22 -05:00
if finalErr != nil {
log . Error ( "ParsePatch: %v" , finalErr )
cancel ( )
}
_ = stdoutReader . Close ( )
return finalErr
} ,
2019-11-11 06:46:28 -05:00
} ) ; err != nil {
if finalErr != nil {
log . Error ( "Unable to ParsePatch in temporary repo %s (%s). Error: %v" , t . repo . FullName ( ) , t . basePath , finalErr )
return nil , finalErr
}
2024-11-01 23:29:37 -04:00
// If the process exited early, don't error
if err != context . Canceled {
log . Error ( "Unable to run diff-index pipeline in temporary repo %s (%s). Error: %v\nStderr: %s" ,
t . repo . FullName ( ) , t . basePath , err , stderr )
return nil , fmt . Errorf ( "Unable to run diff-index pipeline in temporary repo %s. Error: %w\nStderr: %s" ,
t . repo . FullName ( ) , err , stderr )
}
2019-02-12 08:07:31 -05:00
}
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 21:30:43 -05:00
diff . NumFiles , diff . TotalAddition , diff . TotalDeletion , err = git . GetDiffShortStat ( t . ctx , t . basePath , git . TrustedCmdArgs { "--cached" } , "HEAD" )
2020-05-26 01:58:07 -04:00
if err != nil {
return nil , err
}
2019-02-12 08:07:31 -05:00
return diff , nil
}
2019-04-17 12:06:35 -04:00
// GetBranchCommit Gets the commit object of the given branch
func ( t * TemporaryUploadRepository ) GetBranchCommit ( branch string ) ( * git . Commit , error ) {
if t . gitRepo == nil {
return nil , fmt . Errorf ( "repository has not been cloned" )
}
return t . gitRepo . GetBranchCommit ( branch )
}
// GetCommit Gets the commit object of the given commit ID
func ( t * TemporaryUploadRepository ) GetCommit ( commitID string ) ( * git . Commit , error ) {
if t . gitRepo == nil {
return nil , fmt . Errorf ( "repository has not been cloned" )
}
return t . gitRepo . GetCommit ( commitID )
}