mirror of
https://github.com/go-gitea/gitea.git
synced 2024-11-04 08:17:24 -05:00
Fix bug on elastic search (#12811)
* Fix bug on elastic search * Add more comments for elastic search result startIndex and endIndex * refactor indexPos * refactor indexPos * Fix bug
This commit is contained in:
parent
ae528d8321
commit
8ce10fb6e1
@ -90,6 +90,7 @@ const (
|
|||||||
},
|
},
|
||||||
"content": {
|
"content": {
|
||||||
"type": "text",
|
"type": "text",
|
||||||
|
"term_vector": "with_positions_offsets",
|
||||||
"index": true
|
"index": true
|
||||||
},
|
},
|
||||||
"commit_id": {
|
"commit_id": {
|
||||||
@ -251,6 +252,22 @@ func (b *ElasticSearchIndexer) Delete(repoID int64) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// indexPos find words positions for start and the following end on content. It will
|
||||||
|
// return the beginning position of the frist start and the ending position of the
|
||||||
|
// first end following the start string.
|
||||||
|
// If not found any of the positions, it will return -1, -1.
|
||||||
|
func indexPos(content, start, end string) (int, int) {
|
||||||
|
startIdx := strings.Index(content, start)
|
||||||
|
if startIdx < 0 {
|
||||||
|
return -1, -1
|
||||||
|
}
|
||||||
|
endIdx := strings.Index(content[startIdx+len(start):], end)
|
||||||
|
if endIdx < 0 {
|
||||||
|
return -1, -1
|
||||||
|
}
|
||||||
|
return startIdx, startIdx + len(start) + endIdx + len(end)
|
||||||
|
}
|
||||||
|
|
||||||
func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) (int64, []*SearchResult, []*SearchResultLanguages, error) {
|
func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) (int64, []*SearchResult, []*SearchResultLanguages, error) {
|
||||||
hits := make([]*SearchResult, 0, pageSize)
|
hits := make([]*SearchResult, 0, pageSize)
|
||||||
for _, hit := range searchResult.Hits.Hits {
|
for _, hit := range searchResult.Hits.Hits {
|
||||||
@ -260,18 +277,12 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int)
|
|||||||
var startIndex, endIndex int = -1, -1
|
var startIndex, endIndex int = -1, -1
|
||||||
c, ok := hit.Highlight["content"]
|
c, ok := hit.Highlight["content"]
|
||||||
if ok && len(c) > 0 {
|
if ok && len(c) > 0 {
|
||||||
var subStr = make([]rune, 0, len(kw))
|
// FIXME: Since the high lighting content will include <em> and </em> for the keywords,
|
||||||
startIndex = strings.IndexFunc(c[0], func(r rune) bool {
|
// now we should find the poisitions. But how to avoid html content which contains the
|
||||||
if len(subStr) >= len(kw) {
|
// <em> and </em> tags? If elastic search has handled that?
|
||||||
subStr = subStr[1:]
|
startIndex, endIndex = indexPos(c[0], "<em>", "</em>")
|
||||||
}
|
if startIndex == -1 {
|
||||||
subStr = append(subStr, r)
|
panic(fmt.Sprintf("1===%s,,,%#v,,,%s", kw, hit.Highlight, c[0]))
|
||||||
return strings.EqualFold(kw, string(subStr))
|
|
||||||
})
|
|
||||||
if startIndex > -1 {
|
|
||||||
endIndex = startIndex + len(kw)
|
|
||||||
} else {
|
|
||||||
panic(fmt.Sprintf("1===%#v", hit.Highlight))
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
panic(fmt.Sprintf("2===%#v", hit.Highlight))
|
panic(fmt.Sprintf("2===%#v", hit.Highlight))
|
||||||
@ -293,7 +304,7 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int)
|
|||||||
UpdatedUnix: timeutil.TimeStamp(res["updated_at"].(float64)),
|
UpdatedUnix: timeutil.TimeStamp(res["updated_at"].(float64)),
|
||||||
Language: language,
|
Language: language,
|
||||||
StartIndex: startIndex,
|
StartIndex: startIndex,
|
||||||
EndIndex: endIndex,
|
EndIndex: endIndex - 9, // remove the length <em></em> since we give Content the original data
|
||||||
Color: enry.GetColor(language),
|
Color: enry.GetColor(language),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -347,7 +358,12 @@ func (b *ElasticSearchIndexer) Search(repoIDs []int64, language, keyword string,
|
|||||||
Index(b.indexerAliasName).
|
Index(b.indexerAliasName).
|
||||||
Aggregation("language", aggregation).
|
Aggregation("language", aggregation).
|
||||||
Query(query).
|
Query(query).
|
||||||
Highlight(elastic.NewHighlight().Field("content")).
|
Highlight(
|
||||||
|
elastic.NewHighlight().
|
||||||
|
Field("content").
|
||||||
|
NumOfFragments(0). // return all highting content on fragments
|
||||||
|
HighlighterType("fvh"),
|
||||||
|
).
|
||||||
Sort("repo_id", true).
|
Sort("repo_id", true).
|
||||||
From(start).Size(pageSize).
|
From(start).Size(pageSize).
|
||||||
Do(context.Background())
|
Do(context.Background())
|
||||||
@ -373,7 +389,12 @@ func (b *ElasticSearchIndexer) Search(repoIDs []int64, language, keyword string,
|
|||||||
searchResult, err := b.client.Search().
|
searchResult, err := b.client.Search().
|
||||||
Index(b.indexerAliasName).
|
Index(b.indexerAliasName).
|
||||||
Query(query).
|
Query(query).
|
||||||
Highlight(elastic.NewHighlight().Field("content")).
|
Highlight(
|
||||||
|
elastic.NewHighlight().
|
||||||
|
Field("content").
|
||||||
|
NumOfFragments(0). // return all highting content on fragments
|
||||||
|
HighlighterType("fvh"),
|
||||||
|
).
|
||||||
Sort("repo_id", true).
|
Sort("repo_id", true).
|
||||||
From(start).Size(pageSize).
|
From(start).Size(pageSize).
|
||||||
Do(context.Background())
|
Do(context.Background())
|
||||||
|
@ -34,3 +34,9 @@ func TestESIndexAndSearch(t *testing.T) {
|
|||||||
|
|
||||||
testIndexer("elastic_search", t, indexer)
|
testIndexer("elastic_search", t, indexer)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestIndexPos(t *testing.T) {
|
||||||
|
startIdx, endIdx := indexPos("test index start and end", "start", "end")
|
||||||
|
assert.EqualValues(t, 11, startIdx)
|
||||||
|
assert.EqualValues(t, 24, endIdx)
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user