1
0
mirror of https://github.com/go-gitea/gitea.git synced 2025-02-02 15:09:33 -05:00

Support repo code search without setting up an indexer ()

By using git's ability, end users (especially small instance users) do
not need to enable the indexer, they could also benefit from the code
searching feature.

Fix 


![image](https://github.com/go-gitea/gitea/assets/2114189/11b7e458-88a4-480d-b4d7-72ee59406dd1)


![image](https://github.com/go-gitea/gitea/assets/2114189/0fe777d5-c95c-4288-a818-0427680805b6)

---------

Co-authored-by: silverwind <me@silverwind.io>
This commit is contained in:
wxiaoguang 2024-03-25 00:05:00 +08:00 committed by GitHub
parent 90a4f9a49e
commit 4734d43e14
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 253 additions and 65 deletions
docs/content
modules
options/locale
routers/web/repo
templates

@ -17,6 +17,12 @@ menu:
# Repository indexer
## Builtin repository code search without indexer
Users could do repository-level code search without setting up a repository indexer.
The builtin code search is based on the `git grep` command, which is fast and efficient for small repositories.
Better code search support could be achieved by setting up the repository indexer.
## Setting up the repository indexer
Gitea can search through the files of the repositories by enabling this function in your [`app.ini`](administration/config-cheat-sheet.md):

@ -87,6 +87,9 @@ _Symbols used in table:_
| Git Blame | ✓ | ✘ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
| Visual comparison of image changes | ✓ | ✘ | ✓ | ? | ? | ? | ✘ | ✘ |
- Gitea has builtin repository-level code search
- Better code search support could be achieved by [using a repository indexer](administration/repo-indexer.md)
## Issue Tracker
| Feature | Gitea | Gogs | GitHub EE | GitLab CE | GitLab EE | BitBucket | RhodeCode CE | RhodeCode EE |

@ -367,7 +367,6 @@ type RunStdError interface {
error
Unwrap() error
Stderr() string
IsExitCode(code int) bool
}
type runStdError struct {
@ -392,9 +391,9 @@ func (r *runStdError) Stderr() string {
return r.stderr
}
func (r *runStdError) IsExitCode(code int) bool {
func IsErrorExitCode(err error, code int) bool {
var exitError *exec.ExitError
if errors.As(r.err, &exitError) {
if errors.As(err, &exitError) {
return exitError.ExitCode() == code
}
return false

@ -341,7 +341,7 @@ func checkGitVersionCompatibility(gitVer *version.Version) error {
func configSet(key, value string) error {
stdout, _, err := NewCommand(DefaultContext, "config", "--global", "--get").AddDynamicArguments(key).RunStdString(nil)
if err != nil && !err.IsExitCode(1) {
if err != nil && !IsErrorExitCode(err, 1) {
return fmt.Errorf("failed to get git config %s, err: %w", key, err)
}
@ -364,7 +364,7 @@ func configSetNonExist(key, value string) error {
// already exist
return nil
}
if err.IsExitCode(1) {
if IsErrorExitCode(err, 1) {
// not exist, set new config
_, _, err = NewCommand(DefaultContext, "config", "--global").AddDynamicArguments(key, value).RunStdString(nil)
if err != nil {
@ -382,7 +382,7 @@ func configAddNonExist(key, value string) error {
// already exist
return nil
}
if err.IsExitCode(1) {
if IsErrorExitCode(err, 1) {
// not exist, add new config
_, _, err = NewCommand(DefaultContext, "config", "--global", "--add").AddDynamicArguments(key, value).RunStdString(nil)
if err != nil {
@ -403,7 +403,7 @@ func configUnsetAll(key, value string) error {
}
return nil
}
if err.IsExitCode(1) {
if IsErrorExitCode(err, 1) {
// not exist
return nil
}

112
modules/git/grep.go Normal file

@ -0,0 +1,112 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package git
import (
"bufio"
"bytes"
"context"
"errors"
"fmt"
"os"
"strconv"
"strings"
"code.gitea.io/gitea/modules/util"
)
type GrepResult struct {
Filename string
LineNumbers []int
LineCodes []string
}
type GrepOptions struct {
RefName string
ContextLineNumber int
IsFuzzy bool
}
func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepOptions) ([]*GrepResult, error) {
stdoutReader, stdoutWriter, err := os.Pipe()
if err != nil {
return nil, fmt.Errorf("unable to create os pipe to grep: %w", err)
}
defer func() {
_ = stdoutReader.Close()
_ = stdoutWriter.Close()
}()
/*
The output is like this ( "^@" means \x00):
HEAD:.air.toml
6^@bin = "gitea"
HEAD:.changelog.yml
2^@repo: go-gitea/gitea
*/
var results []*GrepResult
cmd := NewCommand(ctx, "grep", "--null", "--break", "--heading", "--fixed-strings", "--line-number", "--ignore-case", "--full-name")
cmd.AddOptionValues("--context", fmt.Sprint(opts.ContextLineNumber))
if opts.IsFuzzy {
words := strings.Fields(search)
for _, word := range words {
cmd.AddOptionValues("-e", strings.TrimLeft(word, "-"))
}
} else {
cmd.AddOptionValues("-e", strings.TrimLeft(search, "-"))
}
cmd.AddDynamicArguments(util.IfZero(opts.RefName, "HEAD"))
stderr := bytes.Buffer{}
err = cmd.Run(&RunOpts{
Dir: repo.Path,
Stdout: stdoutWriter,
Stderr: &stderr,
PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
_ = stdoutWriter.Close()
defer stdoutReader.Close()
isInBlock := false
scanner := bufio.NewScanner(stdoutReader)
var res *GrepResult
for scanner.Scan() {
line := scanner.Text()
if !isInBlock {
if _ /* ref */, filename, ok := strings.Cut(line, ":"); ok {
isInBlock = true
res = &GrepResult{Filename: filename}
results = append(results, res)
}
continue
}
if line == "" {
if len(results) >= 50 {
cancel()
break
}
isInBlock = false
continue
}
if line == "--" {
continue
}
if lineNum, lineCode, ok := strings.Cut(line, "\x00"); ok {
lineNumInt, _ := strconv.Atoi(lineNum)
res.LineNumbers = append(res.LineNumbers, lineNumInt)
res.LineCodes = append(res.LineCodes, lineCode)
}
}
return scanner.Err()
},
})
// git grep exits with 1 if no results are found
if IsErrorExitCode(err, 1) && stderr.Len() == 0 {
return nil, nil
}
if err != nil && !errors.Is(err, context.Canceled) {
return nil, fmt.Errorf("unable to run git grep: %w, stderr: %s", err, stderr.String())
}
return results, nil
}

41
modules/git/grep_test.go Normal file

@ -0,0 +1,41 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package git
import (
"context"
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
)
func TestGrepSearch(t *testing.T) {
repo, err := openRepositoryWithDefaultContext(filepath.Join(testReposDir, "language_stats_repo"))
assert.NoError(t, err)
defer repo.Close()
res, err := GrepSearch(context.Background(), repo, "void", GrepOptions{})
assert.NoError(t, err)
assert.Equal(t, []*GrepResult{
{
Filename: "java-hello/main.java",
LineNumbers: []int{3},
LineCodes: []string{" public static void main(String[] args)"},
},
{
Filename: "main.vendor.java",
LineNumbers: []int{3},
LineCodes: []string{" public static void main(String[] args)"},
},
}, res)
res, err = GrepSearch(context.Background(), repo, "no-such-content", GrepOptions{})
assert.NoError(t, err)
assert.Len(t, res, 0)
res, err = GrepSearch(context.Background(), &Repository{Path: "no-such-git-repo"}, "no-such-content", GrepOptions{})
assert.Error(t, err)
assert.Len(t, res, 0)
}

@ -70,13 +70,27 @@ func writeStrings(buf *bytes.Buffer, strs ...string) error {
return nil
}
func HighlightSearchResultCode(filename string, lineNums []int, code string) []ResultLine {
// we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting
hl, _ := highlight.Code(filename, "", code)
highlightedLines := strings.Split(string(hl), "\n")
// The lineNums outputted by highlight.Code might not match the original lineNums, because "highlight" removes the last `\n`
lines := make([]ResultLine, min(len(highlightedLines), len(lineNums)))
for i := 0; i < len(lines); i++ {
lines[i].Num = lineNums[i]
lines[i].FormattedContent = template.HTML(highlightedLines[i])
}
return lines
}
func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Result, error) {
startLineNum := 1 + strings.Count(result.Content[:startIndex], "\n")
var formattedLinesBuffer bytes.Buffer
contentLines := strings.SplitAfter(result.Content[startIndex:endIndex], "\n")
lines := make([]ResultLine, 0, len(contentLines))
lineNums := make([]int, 0, len(contentLines))
index := startIndex
for i, line := range contentLines {
var err error
@ -91,29 +105,16 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res
line[closeActiveIndex:],
)
} else {
err = writeStrings(&formattedLinesBuffer,
line,
)
err = writeStrings(&formattedLinesBuffer, line)
}
if err != nil {
return nil, err
}
lines = append(lines, ResultLine{Num: startLineNum + i})
lineNums = append(lineNums, startLineNum+i)
index += len(line)
}
// we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting
hl, _ := highlight.Code(result.Filename, "", formattedLinesBuffer.String())
highlightedLines := strings.Split(string(hl), "\n")
// The lines outputted by highlight.Code might not match the original lines, because "highlight" removes the last `\n`
lines = lines[:min(len(highlightedLines), len(lines))]
highlightedLines = highlightedLines[:len(lines)]
for i := 0; i < len(lines); i++ {
lines[i].FormattedContent = template.HTML(highlightedLines[i])
}
return &Result{
RepoID: result.RepoID,
Filename: result.Filename,
@ -121,7 +122,7 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res
UpdatedUnix: result.UpdatedUnix,
Language: result.Language,
Color: result.Color,
Lines: lines,
Lines: HighlightSearchResultCode(result.Filename, lineNums, formattedLinesBuffer.String()),
}, nil
}

@ -172,6 +172,7 @@ org_kind = Search orgs...
team_kind = Search teams...
code_kind = Search code...
code_search_unavailable = Code search is currently not available. Please contact the site administrator.
code_search_by_git_grep = Current code search results are provided by "git grep". There might be better results if site administrator enables Repository Indexer.
package_kind = Search packages...
project_kind = Search projects...
branch_kind = Search branches...

@ -5,9 +5,11 @@ package repo
import (
"net/http"
"strings"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/git"
code_indexer "code.gitea.io/gitea/modules/indexer/code"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/services/context"
@ -17,11 +19,6 @@ const tplSearch base.TplName = "repo/search"
// Search render repository search page
func Search(ctx *context.Context) {
if !setting.Indexer.RepoIndexerEnabled {
ctx.Redirect(ctx.Repo.RepoLink)
return
}
language := ctx.FormTrim("l")
keyword := ctx.FormTrim("q")
@ -42,26 +39,54 @@ func Search(ctx *context.Context) {
page = 1
}
total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
RepoIDs: []int64{ctx.Repo.Repository.ID},
Keyword: keyword,
IsKeywordFuzzy: isFuzzy,
Language: language,
Paginator: &db.ListOptions{
Page: page,
PageSize: setting.UI.RepoSearchPagingNum,
},
})
if err != nil {
if code_indexer.IsAvailable(ctx) {
ctx.ServerError("SearchResults", err)
var total int
var searchResults []*code_indexer.Result
var searchResultLanguages []*code_indexer.SearchResultLanguages
if setting.Indexer.RepoIndexerEnabled {
var err error
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
RepoIDs: []int64{ctx.Repo.Repository.ID},
Keyword: keyword,
IsKeywordFuzzy: isFuzzy,
Language: language,
Paginator: &db.ListOptions{
Page: page,
PageSize: setting.UI.RepoSearchPagingNum,
},
})
if err != nil {
if code_indexer.IsAvailable(ctx) {
ctx.ServerError("SearchResults", err)
return
}
ctx.Data["CodeIndexerUnavailable"] = true
} else {
ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx)
}
} else {
res, err := git.GrepSearch(ctx, ctx.Repo.GitRepo, keyword, git.GrepOptions{ContextLineNumber: 3, IsFuzzy: isFuzzy})
if err != nil {
ctx.ServerError("GrepSearch", err)
return
}
ctx.Data["CodeIndexerUnavailable"] = true
} else {
ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx)
total = len(res)
pageStart := min((page-1)*setting.UI.RepoSearchPagingNum, len(res))
pageEnd := min(page*setting.UI.RepoSearchPagingNum, len(res))
res = res[pageStart:pageEnd]
for _, r := range res {
searchResults = append(searchResults, &code_indexer.Result{
RepoID: ctx.Repo.Repository.ID,
Filename: r.Filename,
CommitID: ctx.Repo.CommitID,
// UpdatedUnix: not supported yet
// Language: not supported yet
// Color: not supported yet
Lines: code_indexer.HighlightSearchResultCode(r.Filename, r.LineNumbers, strings.Join(r.LineCodes, "\n")),
})
}
}
ctx.Data["CodeIndexerEnabled"] = setting.Indexer.RepoIndexerEnabled
ctx.Data["Repo"] = ctx.Repo.Repository
ctx.Data["SearchResults"] = searchResults
ctx.Data["SearchResultLanguages"] = searchResultLanguages

@ -5,27 +5,18 @@
{{template "base/alert" .}}
{{template "repo/code/recently_pushed_new_branches" .}}
{{if and (not .HideRepoInfo) (not .IsBlame)}}
<div class="ui repo-description gt-word-break">
<div id="repo-desc" class="tw-text-16">
<div class="repo-description">
<div id="repo-desc" class="gt-word-break tw-text-16">
{{$description := .Repository.DescriptionHTML $.Context}}
{{if $description}}<span class="description">{{$description | RenderCodeBlock}}</span>{{else if .IsRepositoryAdmin}}<span class="no-description text-italic">{{ctx.Locale.Tr "repo.no_desc"}}</span>{{end}}
<a class="link" href="{{.Repository.Website}}">{{.Repository.Website}}</a>
</div>
{{if .RepoSearchEnabled}}
<div class="ui repo-search">
<form class="ui form ignore-dirty" action="{{.RepoLink}}/search" method="get">
<div class="field">
<div class="ui small action input{{if .CodeIndexerUnavailable}} disabled left icon{{end}}"{{if .CodeIndexerUnavailable}} data-tooltip-content="{{ctx.Locale.Tr "search.code_search_unavailable"}}"{{end}}>
<input name="q" value="{{.Keyword}}"{{if .CodeIndexerUnavailable}} disabled{{end}} placeholder="{{ctx.Locale.Tr "search.code_kind"}}">
{{if .CodeIndexerUnavailable}}
<i class="icon">{{svg "octicon-alert"}}</i>
{{end}}
{{template "shared/search/button" dict "Disabled" .CodeIndexerUnavailable}}
</div>
</div>
</form>
<form class="ignore-dirty" action="{{.RepoLink}}/search" method="get">
<div class="ui small action input">
<input name="q" value="{{.Keyword}}" placeholder="{{ctx.Locale.Tr "search.code_kind"}}">
{{template "shared/search/button"}}
</div>
{{end}}
</form>
</div>
<div class="tw-flex tw-items-center tw-flex-wrap tw-gap-1" id="repo-topics">
{{range .Topics}}<a class="ui repo-topic large label topic gt-m-0" href="{{AppSubUrl}}/explore/repos?q={{.Name}}&topic=1">{{.Name}}</a>{{end}}

@ -7,9 +7,16 @@
<div class="ui error message">
<p>{{ctx.Locale.Tr "search.code_search_unavailable"}}</p>
</div>
{{else if .SearchResults}}
{{template "shared/search/code/results" .}}
{{else if .Keyword}}
<div>{{ctx.Locale.Tr "search.no_results"}}</div>
{{else}}
{{if not .CodeIndexerEnabled}}
<div class="ui message">
<p>{{ctx.Locale.Tr "search.code_search_by_git_grep"}}</p>
</div>
{{end}}
{{if .SearchResults}}
{{template "shared/search/code/results" .}}
{{else if .Keyword}}
<div>{{ctx.Locale.Tr "search.no_results"}}</div>
{{end}}
{{end}}
</div>

@ -1,3 +1,4 @@
{{if or .result.Language (not .result.UpdatedUnix.IsZero)}}
<div class="ui bottom attached table segment tw-flex tw-items-center tw-justify-between">
<div class="tw-flex tw-items-center gt-ml-4">
{{if .result.Language}}
@ -10,3 +11,4 @@
{{end}}
</div>
</div>
{{end}}