0
0
mirror of https://github.com/go-gitea/gitea.git synced 2025-03-21 15:32:02 +01:00

Determine fuzziness of bleve indexer by keyword length ()

also bleve did match on fuzzy search and the other way around. this also fix that bug.
This commit is contained in:
6543 2024-03-23 16:45:13 +01:00 committed by GitHub
parent 1cdc6c3a4e
commit b9c57fb78e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 29 additions and 37 deletions
modules/indexer
code/bleve
internal/bleve
issues/bleve
tests/integration

@ -39,6 +39,8 @@ import (
const (
unicodeNormalizeName = "unicodeNormalize"
maxBatchSize = 16
// fuzzyDenominator determines the levenshtein distance per each character of a keyword
fuzzyDenominator = 4
)
func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
@ -239,15 +241,12 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
keywordQuery query.Query
)
phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword)
phraseQuery.FieldVal = "Content"
phraseQuery.Analyzer = repoIndexerAnalyzer
keywordQuery = phraseQuery
if opts.IsKeywordFuzzy {
phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword)
phraseQuery.FieldVal = "Content"
phraseQuery.Analyzer = repoIndexerAnalyzer
keywordQuery = phraseQuery
} else {
prefixQuery := bleve.NewPrefixQuery(opts.Keyword)
prefixQuery.FieldVal = "Content"
keywordQuery = prefixQuery
phraseQuery.Fuzziness = len(opts.Keyword) / fuzzyDenominator
}
if len(opts.RepoIDs) > 0 {

@ -20,17 +20,11 @@ func NumericEqualityQuery(value int64, field string) *query.NumericRangeQuery {
}
// MatchPhraseQuery generates a match phrase query for the given phrase, field and analyzer
func MatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQuery {
func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query.MatchPhraseQuery {
q := bleve.NewMatchPhraseQuery(matchPhrase)
q.FieldVal = field
q.Analyzer = analyzer
return q
}
// PrefixQuery generates a match prefix query for the given prefix and field
func PrefixQuery(matchPrefix, field string) *query.PrefixQuery {
q := bleve.NewPrefixQuery(matchPrefix)
q.FieldVal = field
q.Fuzziness = fuzziness
return q
}

@ -35,7 +35,11 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
})
}
const maxBatchSize = 16
const (
maxBatchSize = 16
// fuzzyDenominator determines the levenshtein distance per each character of a keyword
fuzzyDenominator = 4
)
// IndexerData an update to the issue indexer
type IndexerData internal.IndexerData
@ -156,19 +160,16 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
var queries []query.Query
if options.Keyword != "" {
fuzziness := 0
if options.IsFuzzyKeyword {
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer),
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer),
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer),
}...))
} else {
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
inner_bleve.PrefixQuery(options.Keyword, "title"),
inner_bleve.PrefixQuery(options.Keyword, "content"),
inner_bleve.PrefixQuery(options.Keyword, "comments"),
}...))
fuzziness = len(options.Keyword) / fuzzyDenominator
}
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness),
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness),
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness),
}...))
}
if len(options.RepoIDs) > 0 || options.AllPublic {

@ -32,7 +32,7 @@ func TestSearchRepo(t *testing.T) {
repo, err := repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "repo1")
assert.NoError(t, err)
executeIndexer(t, repo, code_indexer.UpdateRepoIndexer)
code_indexer.UpdateRepoIndexer(repo)
testSearch(t, "/user2/repo1/search?q=Description&page=1", []string{"README.md"})
@ -42,12 +42,14 @@ func TestSearchRepo(t *testing.T) {
repo, err = repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "glob")
assert.NoError(t, err)
executeIndexer(t, repo, code_indexer.UpdateRepoIndexer)
code_indexer.UpdateRepoIndexer(repo)
testSearch(t, "/user2/glob/search?q=loren&page=1", []string{"a.txt"})
testSearch(t, "/user2/glob/search?q=file3&page=1", []string{"x/b.txt"})
testSearch(t, "/user2/glob/search?q=file4&page=1", []string{})
testSearch(t, "/user2/glob/search?q=file5&page=1", []string{})
testSearch(t, "/user2/glob/search?q=loren&page=1&t=match", []string{"a.txt"})
testSearch(t, "/user2/glob/search?q=file3&page=1", []string{"x/b.txt", "a.txt"})
testSearch(t, "/user2/glob/search?q=file3&page=1&t=match", []string{"x/b.txt", "a.txt"})
testSearch(t, "/user2/glob/search?q=file4&page=1&t=match", []string{"x/b.txt", "a.txt"})
testSearch(t, "/user2/glob/search?q=file5&page=1&t=match", []string{"x/b.txt", "a.txt"})
}
func testSearch(t *testing.T, url string, expected []string) {
@ -57,7 +59,3 @@ func testSearch(t *testing.T, url string, expected []string) {
filenames := resultFilenames(t, NewHTMLParser(t, resp.Body))
assert.EqualValues(t, expected, filenames)
}
func executeIndexer(t *testing.T, repo *repo_model.Repository, op func(*repo_model.Repository)) {
op(repo)
}