Improving search query (#288)

* Reduce the numbers of bucket results on each ChangeBucket event
* Warmup after reindexing, fixes #250
* Using multiple sort keys, fixes #218
* Index also unfree packages, fixes #226, fixes #237
* Add wilcard query, fixes #279
This commit is contained in:
Rok Garbas 2021-02-07 19:10:13 +01:00 committed by GitHub
parent 72434f54a5
commit cfb5e540b9
Failed to generate hash of commit
7 changed files with 117 additions and 72 deletions

View file

@ -72,9 +72,9 @@ jobs:
path: ./eval-cache
key: eval-cache-${{ env.EVAL_ID }}
- name: Installing NixFlakes
- name: Installing nixFlakes (and jq)
run: |
nix-env -iA nixpkgs.nixFlakes
nix-env -iA nixpkgs.nixFlakes nixpkgs.jq
echo 'experimental-features = nix-command flakes' | sudo tee -a /etc/nix/nix.conf
nix --version
cat /etc/nix/nix.conf
@ -89,3 +89,10 @@ jobs:
cp ./eval-cache/builds.json ./eval-${{ env.EVAL_ID }}.json
./result/bin/import-channel --es-url ${{ secrets.ELASTICSEARCH_URL }} --channel ${{ matrix.channel }} -vvv
if: github.repository == 'NixOS/nixos-search'
- name: Warmup ${{ matrix.channel }} channel
run: |
curl ${{ secrets.ELASTICSEARCH_URL }}/latest-$(cat VERSION)-${{ matrix.channel }}/_search | jq '.took'
curl ${{ secrets.ELASTICSEARCH_URL }}/latest-$(cat VERSION)-${{ matrix.channel }}/_search | jq '.took'
curl ${{ secrets.ELASTICSEARCH_URL }}/latest-$(cat VERSION)-${{ matrix.channel }}/_search | jq '.took'
if: github.repository == 'NixOS/nixos-search'

View file

@ -16,7 +16,6 @@ jobs:
- uses: cachix/cachix-action@v8
with:
name: nixos-search
signingKey: '${{ secrets.CACHIX_SIGNING_KEY }}'
- name: Install unstable channel
run: |

View file

@ -1 +1 @@
18
19

View file

@ -1,6 +1,9 @@
{
# Ensures no aliases are in the results.
allowAliases = false;
allowAliases = true;
# Also list unfree packages
allowUnfree = true;
# Enable recursion into attribute sets that nix-env normally doesn't look into
# so that we can get a more complete picture of the available packages for the

View file

@ -282,6 +282,8 @@ makeRequest options channel query from size _ sort =
"option_name"
[]
[]
[]
"option_name"
[ ( "option_name", 6.0 )
, ( "option_name_query", 3.0 )
, ( "option_description", 1.0 )

View file

@ -632,12 +632,14 @@ makeRequest options channel query from size maybeBuckets sort =
sort
"package"
"package_attr_name"
[ "package_pversion" ]
[ "package_attr_set"
, "package_license_set"
, "package_maintainers_set"
, "package_platforms"
]
filterByBuckets
"package_attr_name"
[ ( "package_attr_name", 9.0 )
, ( "package_pname", 6.0 )
, ( "package_attr_name_query", 4.0 )

View file

@ -486,25 +486,43 @@ toAggregations bucketsFields =
toSortQuery :
Sort
-> String
-> List String
-> ( String, Json.Encode.Value )
toSortQuery sort field =
toSortQuery sort field fields =
( "sort"
, case sort of
AlphabeticallyAsc ->
Json.Encode.list Json.Encode.object
[ [ ( field, Json.Encode.string "asc" )
]
[ List.append
[ ( field, Json.Encode.string "asc" )
]
(List.map
(\x -> ( x, Json.Encode.string "asc" ))
fields
)
]
AlphabeticallyDesc ->
Json.Encode.list Json.Encode.object
[ [ ( field, Json.Encode.string "desc" )
]
[ List.append
[ ( field, Json.Encode.string "desc" )
]
(List.map
(\x -> ( x, Json.Encode.string "desc" ))
fields
)
]
Relevance ->
Json.Encode.list Json.Encode.string
[ "_score"
Json.Encode.list Json.Encode.object
[ List.append
[ ( "_score", Json.Encode.string "desc" )
, ( field, Json.Encode.string "desc" )
]
(List.map
(\x -> ( x, Json.Encode.string "desc" ))
fields
)
]
)
@ -970,9 +988,10 @@ filterByType type_ =
searchFields :
String
-> String
-> List ( String, Float )
-> List (List ( String, Json.Encode.Value ))
searchFields query fields =
searchFields query mainField fields =
let
queryVariations q =
case ( List.head q, List.tail q ) of
@ -1002,22 +1021,39 @@ searchFields query fields =
|> List.map (\( field, score ) -> [ field ++ "^" ++ String.fromFloat score, field ++ ".edge^" ++ String.fromFloat score ])
|> List.concat
in
List.map
(\queryWords ->
[ ( "multi_match"
, Json.Encode.object
[ ( "type", Json.Encode.string "cross_fields" )
, ( "query", Json.Encode.string <| String.join " " queryWords )
, ( "analyzer", Json.Encode.string "whitespace" )
, ( "auto_generate_synonyms_phrase_query", Json.Encode.bool False )
, ( "operator", Json.Encode.string "and" )
, ( "_name", Json.Encode.string <| "multi_match_" ++ String.join "_" queryWords )
, ( "fields", Json.Encode.list Json.Encode.string allFields )
]
)
]
List.append
(List.map
(\queryWords ->
[ ( "multi_match"
, Json.Encode.object
[ ( "type", Json.Encode.string "cross_fields" )
, ( "query", Json.Encode.string <| String.join " " queryWords )
, ( "analyzer", Json.Encode.string "whitespace" )
, ( "auto_generate_synonyms_phrase_query", Json.Encode.bool False )
, ( "operator", Json.Encode.string "and" )
, ( "_name", Json.Encode.string <| "multi_match_" ++ String.join "_" queryWords )
, ( "fields", Json.Encode.list Json.Encode.string allFields )
]
)
]
)
(queryVariations (String.words (String.toLower query)))
)
(List.map
(\queryWord ->
[ ( "wildcard"
, Json.Encode.object
[ ( mainField
, Json.Encode.object
[ ( "value", Json.Encode.string ("*" ++ queryWord ++ "*") )
]
)
]
)
]
)
(String.words (String.toLower query))
)
(queryVariations (String.words (String.toLower query)))
makeRequestBody :
@ -1028,10 +1064,12 @@ makeRequestBody :
-> String
-> String
-> List String
-> List String
-> List ( String, Json.Encode.Value )
-> String
-> List ( String, Float )
-> Http.Body
makeRequestBody query from sizeRaw sort type_ sortField bucketsFields filterByBuckets fields =
makeRequestBody query from sizeRaw sort type_ sortField otherSortFields bucketsFields filterByBuckets mainField fields =
let
-- you can not request more then 10000 results otherwise it will return 404
size =
@ -1043,49 +1081,43 @@ makeRequestBody query from sizeRaw sort type_ sortField bucketsFields filterByBu
in
Http.jsonBody
(Json.Encode.object
(List.append
[ ( "from"
, Json.Encode.int from
)
, ( "size"
, Json.Encode.int size
)
, toSortQuery sort sortField
, toAggregations bucketsFields
, ( "query"
, Json.Encode.object
[ ( "bool"
, Json.Encode.object
[ ( "filter"
, Json.Encode.list Json.Encode.object
[ filterByType type_ ]
)
, ( "must"
, Json.Encode.list Json.Encode.object
[ [ ( "dis_max"
, Json.Encode.object
[ ( "tie_breaker", Json.Encode.float 0.7 )
, ( "queries"
, Json.Encode.list Json.Encode.object
(searchFields query fields)
)
]
)
]
]
)
]
)
]
)
]
(if List.isEmpty filterByBuckets then
[]
else
[ ( "post_filter", Json.Encode.object filterByBuckets ) ]
)
)
[ ( "from"
, Json.Encode.int from
)
, ( "size"
, Json.Encode.int size
)
, toSortQuery sort sortField otherSortFields
, toAggregations bucketsFields
, ( "query"
, Json.Encode.object
[ ( "bool"
, Json.Encode.object
[ ( "filter"
, Json.Encode.list Json.Encode.object
[ filterByType type_
, filterByBuckets
]
)
, ( "must"
, Json.Encode.list Json.Encode.object
[ [ ( "dis_max"
, Json.Encode.object
[ ( "tie_breaker", Json.Encode.float 0.7 )
, ( "queries"
, Json.Encode.list Json.Encode.object
(searchFields query mainField fields)
)
]
)
]
]
)
]
)
]
)
]
)