// Copyright 2021 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT package git import ( "bufio" "bytes" "context" "errors" "io" "path" "sort" "strings" "code.gitea.io/gitea/modules/container" "github.com/djherbis/buffer" "github.com/djherbis/nio/v3" ) // LogNameStatusRepo opens git log --raw in the provided repo and returns a stdin pipe, a stdout reader and cancel function func LogNameStatusRepo(ctx context.Context, repository, head, treepath string, paths ...string) (*bufio.Reader, func()) { // We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. // so let's create a batch stdin and stdout stdoutReader, stdoutWriter := nio.Pipe(buffer.New(32 * 1024)) // Lets also create a context so that we can absolutely ensure that the command should die when we're done ctx, ctxCancel := context.WithCancel(ctx) cancel := func() { ctxCancel() _ = stdoutReader.Close() _ = stdoutWriter.Close() } cmd := NewCommand(ctx) cmd.AddArguments("log", "--name-status", "-c", "--format=commit%x00%H %P%x00", "--parents", "--no-renames", "-t", "-z").AddDynamicArguments(head) var files []string if len(paths) < 70 { if treepath != "" { files = append(files, treepath) for _, pth := range paths { if pth != "" { files = append(files, path.Join(treepath, pth)) } } } else { for _, pth := range paths { if pth != "" { files = append(files, pth) } } } } else if treepath != "" { files = append(files, treepath) } // Use the :(literal) pathspec magic to handle edge cases with files named like ":file.txt" or "*.jpg" for i, file := range files { files[i] = ":(literal)" + file } cmd.AddDashesAndList(files...) go func() { stderr := strings.Builder{} err := cmd.Run(&RunOpts{ Dir: repository, Stdout: stdoutWriter, Stderr: &stderr, }) if err != nil { _ = stdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String())) return } _ = stdoutWriter.Close() }() // For simplicities sake we'll us a buffered reader to read from the cat-file --batch bufReader := bufio.NewReaderSize(stdoutReader, 32*1024) return bufReader, cancel } // LogNameStatusRepoParser parses a git log raw output from LogRawRepo type LogNameStatusRepoParser struct { treepath string paths []string next []byte buffull bool rd *bufio.Reader cancel func() } // NewLogNameStatusRepoParser returns a new parser for a git log raw output func NewLogNameStatusRepoParser(ctx context.Context, repository, head, treepath string, paths ...string) *LogNameStatusRepoParser { rd, cancel := LogNameStatusRepo(ctx, repository, head, treepath, paths...) return &LogNameStatusRepoParser{ treepath: treepath, paths: paths, rd: rd, cancel: cancel, } } // LogNameStatusCommitData represents a commit artefact from git log raw type LogNameStatusCommitData struct { CommitID string ParentIDs []string Paths []bool } // Next returns the next LogStatusCommitData func (g *LogNameStatusRepoParser) Next(treepath string, paths2ids map[string]int, changed []bool, maxpathlen int) (*LogNameStatusCommitData, error) { var err error if g.next == nil || len(g.next) == 0 { g.buffull = false g.next, err = g.rd.ReadSlice('\x00') if err != nil { if err == bufio.ErrBufferFull { g.buffull = true } else if err == io.EOF { return nil, nil } else { return nil, err } } } ret := LogNameStatusCommitData{} if bytes.Equal(g.next, []byte("commit\000")) { g.next, err = g.rd.ReadSlice('\x00') if err != nil { if err == bufio.ErrBufferFull { g.buffull = true } else if err == io.EOF { return nil, nil } else { return nil, err } } } // Our "line" must look like: <commitid> SP (<parent> SP) * NUL commitIDs := string(g.next) if g.buffull { more, err := g.rd.ReadString('\x00') if err != nil { return nil, err } commitIDs += more } commitIDs = commitIDs[:len(commitIDs)-1] splitIDs := strings.Split(commitIDs, " ") ret.CommitID = splitIDs[0] if len(splitIDs) > 1 { ret.ParentIDs = splitIDs[1:] } // now read the next "line" g.buffull = false g.next, err = g.rd.ReadSlice('\x00') if err != nil { if err == bufio.ErrBufferFull { g.buffull = true } else if err != io.EOF { return nil, err } } if err == io.EOF || !(g.next[0] == '\n' || g.next[0] == '\000') { return &ret, nil } // Ok we have some changes. // This line will look like: NL <fname> NUL // // Subsequent lines will not have the NL - so drop it here - g.bufffull must also be false at this point too. if g.next[0] == '\n' { g.next = g.next[1:] } else { g.buffull = false g.next, err = g.rd.ReadSlice('\x00') if err != nil { if err == bufio.ErrBufferFull { g.buffull = true } else if err != io.EOF { return nil, err } } if len(g.next) == 0 { return &ret, nil } if g.next[0] == '\x00' { g.buffull = false g.next, err = g.rd.ReadSlice('\x00') if err != nil { if err == bufio.ErrBufferFull { g.buffull = true } else if err != io.EOF { return nil, err } } } } fnameBuf := make([]byte, 4096) diffloop: for { if err == io.EOF || bytes.Equal(g.next, []byte("commit\000")) { return &ret, nil } g.next, err = g.rd.ReadSlice('\x00') if err != nil { if err == bufio.ErrBufferFull { g.buffull = true } else if err == io.EOF { return &ret, nil } else { return nil, err } } copy(fnameBuf, g.next) if len(fnameBuf) < len(g.next) { fnameBuf = append(fnameBuf, g.next[len(fnameBuf):]...) } else { fnameBuf = fnameBuf[:len(g.next)] } if err != nil { if err != bufio.ErrBufferFull { return nil, err } more, err := g.rd.ReadBytes('\x00') if err != nil { return nil, err } fnameBuf = append(fnameBuf, more...) } // read the next line g.buffull = false g.next, err = g.rd.ReadSlice('\x00') if err != nil { if err == bufio.ErrBufferFull { g.buffull = true } else if err != io.EOF { return nil, err } } if treepath != "" { if !bytes.HasPrefix(fnameBuf, []byte(treepath)) { fnameBuf = fnameBuf[:cap(fnameBuf)] continue diffloop } } fnameBuf = fnameBuf[len(treepath) : len(fnameBuf)-1] if len(fnameBuf) > maxpathlen { fnameBuf = fnameBuf[:cap(fnameBuf)] continue diffloop } if len(fnameBuf) > 0 { if len(treepath) > 0 { if fnameBuf[0] != '/' || bytes.IndexByte(fnameBuf[1:], '/') >= 0 { fnameBuf = fnameBuf[:cap(fnameBuf)] continue diffloop } fnameBuf = fnameBuf[1:] } else if bytes.IndexByte(fnameBuf, '/') >= 0 { fnameBuf = fnameBuf[:cap(fnameBuf)] continue diffloop } } idx, ok := paths2ids[string(fnameBuf)] if !ok { fnameBuf = fnameBuf[:cap(fnameBuf)] continue diffloop } if ret.Paths == nil { ret.Paths = changed } changed[idx] = true } } // Close closes the parser func (g *LogNameStatusRepoParser) Close() { g.cancel() } // WalkGitLog walks the git log --name-status for the head commit in the provided treepath and files func WalkGitLog(ctx context.Context, repo *Repository, head *Commit, treepath string, paths ...string) (map[string]string, error) { headRef := head.ID.String() tree, err := head.SubTree(treepath) if err != nil { return nil, err } entries, err := tree.ListEntries() if err != nil { return nil, err } if len(paths) == 0 { paths = make([]string, 0, len(entries)+1) paths = append(paths, "") for _, entry := range entries { paths = append(paths, entry.Name()) } } else { sort.Strings(paths) if paths[0] != "" { paths = append([]string{""}, paths...) } // remove duplicates for i := len(paths) - 1; i > 0; i-- { if paths[i] == paths[i-1] { paths = append(paths[:i-1], paths[i:]...) } } } path2idx := map[string]int{} maxpathlen := len(treepath) for i := range paths { path2idx[paths[i]] = i pthlen := len(paths[i]) + len(treepath) + 1 if pthlen > maxpathlen { maxpathlen = pthlen } } g := NewLogNameStatusRepoParser(ctx, repo.Path, head.ID.String(), treepath, paths...) // don't use defer g.Close() here as g may change its value - instead wrap in a func defer func() { g.Close() }() results := make([]string, len(paths)) remaining := len(paths) nextRestart := (len(paths) * 3) / 4 if nextRestart > 70 { nextRestart = 70 } lastEmptyParent := head.ID.String() commitSinceLastEmptyParent := uint64(0) commitSinceNextRestart := uint64(0) parentRemaining := make(container.Set[string]) changed := make([]bool, len(paths)) heaploop: for { select { case <-ctx.Done(): if ctx.Err() == context.DeadlineExceeded { break heaploop } g.Close() return nil, ctx.Err() default: } current, err := g.Next(treepath, path2idx, changed, maxpathlen) if err != nil { if errors.Is(err, context.DeadlineExceeded) { break heaploop } g.Close() return nil, err } if current == nil { break heaploop } parentRemaining.Remove(current.CommitID) for i, found := range current.Paths { if !found { continue } changed[i] = false if results[i] == "" { results[i] = current.CommitID if err := repo.LastCommitCache.Put(headRef, path.Join(treepath, paths[i]), current.CommitID); err != nil { return nil, err } delete(path2idx, paths[i]) remaining-- if results[0] == "" { results[0] = current.CommitID if err := repo.LastCommitCache.Put(headRef, treepath, current.CommitID); err != nil { return nil, err } delete(path2idx, "") remaining-- } } } if remaining <= 0 { break heaploop } commitSinceLastEmptyParent++ if len(parentRemaining) == 0 { lastEmptyParent = current.CommitID commitSinceLastEmptyParent = 0 } if remaining <= nextRestart { commitSinceNextRestart++ if 4*commitSinceNextRestart > 3*commitSinceLastEmptyParent { g.Close() remainingPaths := make([]string, 0, len(paths)) for i, pth := range paths { if results[i] == "" { remainingPaths = append(remainingPaths, pth) } } g = NewLogNameStatusRepoParser(ctx, repo.Path, lastEmptyParent, treepath, remainingPaths...) parentRemaining = make(container.Set[string]) nextRestart = (remaining * 3) / 4 continue heaploop } } parentRemaining.AddMultiple(current.ParentIDs...) } g.Close() resultsMap := map[string]string{} for i, pth := range paths { resultsMap[pth] = results[i] } return resultsMap, nil }