// Copyright 2021 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package git

import (
	"bufio"
	"bytes"
	"context"
	"errors"
	"io"
	"path"
	"sort"
	"strings"

	"code.gitea.io/gitea/modules/container"

	"github.com/djherbis/buffer"
	"github.com/djherbis/nio/v3"
)

// LogNameStatusRepo opens git log --raw in the provided repo and returns a stdin pipe, a stdout reader and cancel function
func LogNameStatusRepo(ctx context.Context, repository, head, treepath string, paths ...string) (*bufio.Reader, func()) {
	// We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
	// so let's create a batch stdin and stdout
	stdoutReader, stdoutWriter := nio.Pipe(buffer.New(32 * 1024))

	// Lets also create a context so that we can absolutely ensure that the command should die when we're done
	ctx, ctxCancel := context.WithCancel(ctx)

	cancel := func() {
		ctxCancel()
		_ = stdoutReader.Close()
		_ = stdoutWriter.Close()
	}

	cmd := NewCommand(ctx)
	cmd.AddArguments("log", "--name-status", "-c", "--format=commit%x00%H %P%x00", "--parents", "--no-renames", "-t", "-z").AddDynamicArguments(head)

	var files []string
	if len(paths) < 70 {
		if treepath != "" {
			files = append(files, treepath)
			for _, pth := range paths {
				if pth != "" {
					files = append(files, path.Join(treepath, pth))
				}
			}
		} else {
			for _, pth := range paths {
				if pth != "" {
					files = append(files, pth)
				}
			}
		}
	} else if treepath != "" {
		files = append(files, treepath)
	}
	// Use the :(literal) pathspec magic to handle edge cases with files named like ":file.txt" or "*.jpg"
	for i, file := range files {
		files[i] = ":(literal)" + file
	}
	cmd.AddDashesAndList(files...)

	go func() {
		stderr := strings.Builder{}
		err := cmd.Run(&RunOpts{
			Dir:    repository,
			Stdout: stdoutWriter,
			Stderr: &stderr,
		})
		if err != nil {
			_ = stdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String()))
			return
		}

		_ = stdoutWriter.Close()
	}()

	// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
	bufReader := bufio.NewReaderSize(stdoutReader, 32*1024)

	return bufReader, cancel
}

// LogNameStatusRepoParser parses a git log raw output from LogRawRepo
type LogNameStatusRepoParser struct {
	treepath string
	paths    []string
	next     []byte
	buffull  bool
	rd       *bufio.Reader
	cancel   func()
}

// NewLogNameStatusRepoParser returns a new parser for a git log raw output
func NewLogNameStatusRepoParser(ctx context.Context, repository, head, treepath string, paths ...string) *LogNameStatusRepoParser {
	rd, cancel := LogNameStatusRepo(ctx, repository, head, treepath, paths...)
	return &LogNameStatusRepoParser{
		treepath: treepath,
		paths:    paths,
		rd:       rd,
		cancel:   cancel,
	}
}

// LogNameStatusCommitData represents a commit artefact from git log raw
type LogNameStatusCommitData struct {
	CommitID  string
	ParentIDs []string
	Paths     []bool
}

// Next returns the next LogStatusCommitData
func (g *LogNameStatusRepoParser) Next(treepath string, paths2ids map[string]int, changed []bool, maxpathlen int) (*LogNameStatusCommitData, error) {
	var err error
	if g.next == nil || len(g.next) == 0 {
		g.buffull = false
		g.next, err = g.rd.ReadSlice('\x00')
		if err != nil {
			if err == bufio.ErrBufferFull {
				g.buffull = true
			} else if err == io.EOF {
				return nil, nil
			} else {
				return nil, err
			}
		}
	}

	ret := LogNameStatusCommitData{}
	if bytes.Equal(g.next, []byte("commit\000")) {
		g.next, err = g.rd.ReadSlice('\x00')
		if err != nil {
			if err == bufio.ErrBufferFull {
				g.buffull = true
			} else if err == io.EOF {
				return nil, nil
			} else {
				return nil, err
			}
		}
	}

	// Our "line" must look like: <commitid> SP (<parent> SP) * NUL
	commitIDs := string(g.next)
	if g.buffull {
		more, err := g.rd.ReadString('\x00')
		if err != nil {
			return nil, err
		}
		commitIDs += more
	}
	commitIDs = commitIDs[:len(commitIDs)-1]
	splitIDs := strings.Split(commitIDs, " ")
	ret.CommitID = splitIDs[0]
	if len(splitIDs) > 1 {
		ret.ParentIDs = splitIDs[1:]
	}

	// now read the next "line"
	g.buffull = false
	g.next, err = g.rd.ReadSlice('\x00')
	if err != nil {
		if err == bufio.ErrBufferFull {
			g.buffull = true
		} else if err != io.EOF {
			return nil, err
		}
	}

	if err == io.EOF || !(g.next[0] == '\n' || g.next[0] == '\000') {
		return &ret, nil
	}

	// Ok we have some changes.
	// This line will look like: NL <fname> NUL
	//
	// Subsequent lines will not have the NL - so drop it here - g.bufffull must also be false at this point too.
	if g.next[0] == '\n' {
		g.next = g.next[1:]
	} else {
		g.buffull = false
		g.next, err = g.rd.ReadSlice('\x00')
		if err != nil {
			if err == bufio.ErrBufferFull {
				g.buffull = true
			} else if err != io.EOF {
				return nil, err
			}
		}
		if len(g.next) == 0 {
			return &ret, nil
		}
		if g.next[0] == '\x00' {
			g.buffull = false
			g.next, err = g.rd.ReadSlice('\x00')
			if err != nil {
				if err == bufio.ErrBufferFull {
					g.buffull = true
				} else if err != io.EOF {
					return nil, err
				}
			}
		}
	}

	fnameBuf := make([]byte, 4096)

diffloop:
	for {
		if err == io.EOF || bytes.Equal(g.next, []byte("commit\000")) {
			return &ret, nil
		}
		g.next, err = g.rd.ReadSlice('\x00')
		if err != nil {
			if err == bufio.ErrBufferFull {
				g.buffull = true
			} else if err == io.EOF {
				return &ret, nil
			} else {
				return nil, err
			}
		}
		copy(fnameBuf, g.next)
		if len(fnameBuf) < len(g.next) {
			fnameBuf = append(fnameBuf, g.next[len(fnameBuf):]...)
		} else {
			fnameBuf = fnameBuf[:len(g.next)]
		}
		if err != nil {
			if err != bufio.ErrBufferFull {
				return nil, err
			}
			more, err := g.rd.ReadBytes('\x00')
			if err != nil {
				return nil, err
			}
			fnameBuf = append(fnameBuf, more...)
		}

		// read the next line
		g.buffull = false
		g.next, err = g.rd.ReadSlice('\x00')
		if err != nil {
			if err == bufio.ErrBufferFull {
				g.buffull = true
			} else if err != io.EOF {
				return nil, err
			}
		}

		if treepath != "" {
			if !bytes.HasPrefix(fnameBuf, []byte(treepath)) {
				fnameBuf = fnameBuf[:cap(fnameBuf)]
				continue diffloop
			}
		}
		fnameBuf = fnameBuf[len(treepath) : len(fnameBuf)-1]
		if len(fnameBuf) > maxpathlen {
			fnameBuf = fnameBuf[:cap(fnameBuf)]
			continue diffloop
		}
		if len(fnameBuf) > 0 {
			if len(treepath) > 0 {
				if fnameBuf[0] != '/' || bytes.IndexByte(fnameBuf[1:], '/') >= 0 {
					fnameBuf = fnameBuf[:cap(fnameBuf)]
					continue diffloop
				}
				fnameBuf = fnameBuf[1:]
			} else if bytes.IndexByte(fnameBuf, '/') >= 0 {
				fnameBuf = fnameBuf[:cap(fnameBuf)]
				continue diffloop
			}
		}

		idx, ok := paths2ids[string(fnameBuf)]
		if !ok {
			fnameBuf = fnameBuf[:cap(fnameBuf)]
			continue diffloop
		}
		if ret.Paths == nil {
			ret.Paths = changed
		}
		changed[idx] = true
	}
}

// Close closes the parser
func (g *LogNameStatusRepoParser) Close() {
	g.cancel()
}

// WalkGitLog walks the git log --name-status for the head commit in the provided treepath and files
func WalkGitLog(ctx context.Context, repo *Repository, head *Commit, treepath string, paths ...string) (map[string]string, error) {
	headRef := head.ID.String()

	tree, err := head.SubTree(treepath)
	if err != nil {
		return nil, err
	}

	entries, err := tree.ListEntries()
	if err != nil {
		return nil, err
	}

	if len(paths) == 0 {
		paths = make([]string, 0, len(entries)+1)
		paths = append(paths, "")
		for _, entry := range entries {
			paths = append(paths, entry.Name())
		}
	} else {
		sort.Strings(paths)
		if paths[0] != "" {
			paths = append([]string{""}, paths...)
		}
		// remove duplicates
		for i := len(paths) - 1; i > 0; i-- {
			if paths[i] == paths[i-1] {
				paths = append(paths[:i-1], paths[i:]...)
			}
		}
	}

	path2idx := map[string]int{}
	maxpathlen := len(treepath)

	for i := range paths {
		path2idx[paths[i]] = i
		pthlen := len(paths[i]) + len(treepath) + 1
		if pthlen > maxpathlen {
			maxpathlen = pthlen
		}
	}

	g := NewLogNameStatusRepoParser(ctx, repo.Path, head.ID.String(), treepath, paths...)
	// don't use defer g.Close() here as g may change its value - instead wrap in a func
	defer func() {
		g.Close()
	}()

	results := make([]string, len(paths))
	remaining := len(paths)
	nextRestart := (len(paths) * 3) / 4
	if nextRestart > 70 {
		nextRestart = 70
	}
	lastEmptyParent := head.ID.String()
	commitSinceLastEmptyParent := uint64(0)
	commitSinceNextRestart := uint64(0)
	parentRemaining := make(container.Set[string])

	changed := make([]bool, len(paths))

heaploop:
	for {
		select {
		case <-ctx.Done():
			if ctx.Err() == context.DeadlineExceeded {
				break heaploop
			}
			g.Close()
			return nil, ctx.Err()
		default:
		}
		current, err := g.Next(treepath, path2idx, changed, maxpathlen)
		if err != nil {
			if errors.Is(err, context.DeadlineExceeded) {
				break heaploop
			}
			g.Close()
			return nil, err
		}
		if current == nil {
			break heaploop
		}
		parentRemaining.Remove(current.CommitID)
		for i, found := range current.Paths {
			if !found {
				continue
			}
			changed[i] = false
			if results[i] == "" {
				results[i] = current.CommitID
				if err := repo.LastCommitCache.Put(headRef, path.Join(treepath, paths[i]), current.CommitID); err != nil {
					return nil, err
				}
				delete(path2idx, paths[i])
				remaining--
				if results[0] == "" {
					results[0] = current.CommitID
					if err := repo.LastCommitCache.Put(headRef, treepath, current.CommitID); err != nil {
						return nil, err
					}
					delete(path2idx, "")
					remaining--
				}
			}
		}

		if remaining <= 0 {
			break heaploop
		}
		commitSinceLastEmptyParent++
		if len(parentRemaining) == 0 {
			lastEmptyParent = current.CommitID
			commitSinceLastEmptyParent = 0
		}
		if remaining <= nextRestart {
			commitSinceNextRestart++
			if 4*commitSinceNextRestart > 3*commitSinceLastEmptyParent {
				g.Close()
				remainingPaths := make([]string, 0, len(paths))
				for i, pth := range paths {
					if results[i] == "" {
						remainingPaths = append(remainingPaths, pth)
					}
				}
				g = NewLogNameStatusRepoParser(ctx, repo.Path, lastEmptyParent, treepath, remainingPaths...)
				parentRemaining = make(container.Set[string])
				nextRestart = (remaining * 3) / 4
				continue heaploop
			}
		}
		parentRemaining.AddMultiple(current.ParentIDs...)
	}
	g.Close()

	resultsMap := map[string]string{}
	for i, pth := range paths {
		resultsMap[pth] = results[i]
	}

	return resultsMap, nil
}