Improving search query (#288)

* Reduce the numbers of bucket results on each ChangeBucket event
* Warmup after reindexing, fixes #250
* Using multiple sort keys, fixes #218
* Index also unfree packages, fixes #226, fixes #237
* Add wilcard query, fixes #279
This commit is contained in:
Rok Garbas 2021-02-07 19:10:13 +01:00 committed by GitHub
parent 72434f54a5
commit cfb5e540b9
Failed to generate hash of commit
7 changed files with 117 additions and 72 deletions

View file

@ -72,9 +72,9 @@ jobs:
path: ./eval-cache path: ./eval-cache
key: eval-cache-${{ env.EVAL_ID }} key: eval-cache-${{ env.EVAL_ID }}
- name: Installing NixFlakes - name: Installing nixFlakes (and jq)
run: | run: |
nix-env -iA nixpkgs.nixFlakes nix-env -iA nixpkgs.nixFlakes nixpkgs.jq
echo 'experimental-features = nix-command flakes' | sudo tee -a /etc/nix/nix.conf echo 'experimental-features = nix-command flakes' | sudo tee -a /etc/nix/nix.conf
nix --version nix --version
cat /etc/nix/nix.conf cat /etc/nix/nix.conf
@ -89,3 +89,10 @@ jobs:
cp ./eval-cache/builds.json ./eval-${{ env.EVAL_ID }}.json cp ./eval-cache/builds.json ./eval-${{ env.EVAL_ID }}.json
./result/bin/import-channel --es-url ${{ secrets.ELASTICSEARCH_URL }} --channel ${{ matrix.channel }} -vvv ./result/bin/import-channel --es-url ${{ secrets.ELASTICSEARCH_URL }} --channel ${{ matrix.channel }} -vvv
if: github.repository == 'NixOS/nixos-search' if: github.repository == 'NixOS/nixos-search'
- name: Warmup ${{ matrix.channel }} channel
run: |
curl ${{ secrets.ELASTICSEARCH_URL }}/latest-$(cat VERSION)-${{ matrix.channel }}/_search | jq '.took'
curl ${{ secrets.ELASTICSEARCH_URL }}/latest-$(cat VERSION)-${{ matrix.channel }}/_search | jq '.took'
curl ${{ secrets.ELASTICSEARCH_URL }}/latest-$(cat VERSION)-${{ matrix.channel }}/_search | jq '.took'
if: github.repository == 'NixOS/nixos-search'

View file

@ -16,7 +16,6 @@ jobs:
- uses: cachix/cachix-action@v8 - uses: cachix/cachix-action@v8
with: with:
name: nixos-search name: nixos-search
signingKey: '${{ secrets.CACHIX_SIGNING_KEY }}'
- name: Install unstable channel - name: Install unstable channel
run: | run: |

View file

@ -1 +1 @@
18 19

View file

@ -1,6 +1,9 @@
{ {
# Ensures no aliases are in the results. # Ensures no aliases are in the results.
allowAliases = false; allowAliases = true;
# Also list unfree packages
allowUnfree = true;
# Enable recursion into attribute sets that nix-env normally doesn't look into # Enable recursion into attribute sets that nix-env normally doesn't look into
# so that we can get a more complete picture of the available packages for the # so that we can get a more complete picture of the available packages for the

View file

@ -282,6 +282,8 @@ makeRequest options channel query from size _ sort =
"option_name" "option_name"
[] []
[] []
[]
"option_name"
[ ( "option_name", 6.0 ) [ ( "option_name", 6.0 )
, ( "option_name_query", 3.0 ) , ( "option_name_query", 3.0 )
, ( "option_description", 1.0 ) , ( "option_description", 1.0 )

View file

@ -632,12 +632,14 @@ makeRequest options channel query from size maybeBuckets sort =
sort sort
"package" "package"
"package_attr_name" "package_attr_name"
[ "package_pversion" ]
[ "package_attr_set" [ "package_attr_set"
, "package_license_set" , "package_license_set"
, "package_maintainers_set" , "package_maintainers_set"
, "package_platforms" , "package_platforms"
] ]
filterByBuckets filterByBuckets
"package_attr_name"
[ ( "package_attr_name", 9.0 ) [ ( "package_attr_name", 9.0 )
, ( "package_pname", 6.0 ) , ( "package_pname", 6.0 )
, ( "package_attr_name_query", 4.0 ) , ( "package_attr_name_query", 4.0 )

View file

@ -486,25 +486,43 @@ toAggregations bucketsFields =
toSortQuery : toSortQuery :
Sort Sort
-> String -> String
-> List String
-> ( String, Json.Encode.Value ) -> ( String, Json.Encode.Value )
toSortQuery sort field = toSortQuery sort field fields =
( "sort" ( "sort"
, case sort of , case sort of
AlphabeticallyAsc -> AlphabeticallyAsc ->
Json.Encode.list Json.Encode.object Json.Encode.list Json.Encode.object
[ [ ( field, Json.Encode.string "asc" ) [ List.append
] [ ( field, Json.Encode.string "asc" )
]
(List.map
(\x -> ( x, Json.Encode.string "asc" ))
fields
)
] ]
AlphabeticallyDesc -> AlphabeticallyDesc ->
Json.Encode.list Json.Encode.object Json.Encode.list Json.Encode.object
[ [ ( field, Json.Encode.string "desc" ) [ List.append
] [ ( field, Json.Encode.string "desc" )
]
(List.map
(\x -> ( x, Json.Encode.string "desc" ))
fields
)
] ]
Relevance -> Relevance ->
Json.Encode.list Json.Encode.string Json.Encode.list Json.Encode.object
[ "_score" [ List.append
[ ( "_score", Json.Encode.string "desc" )
, ( field, Json.Encode.string "desc" )
]
(List.map
(\x -> ( x, Json.Encode.string "desc" ))
fields
)
] ]
) )
@ -970,9 +988,10 @@ filterByType type_ =
searchFields : searchFields :
String String
-> String
-> List ( String, Float ) -> List ( String, Float )
-> List (List ( String, Json.Encode.Value )) -> List (List ( String, Json.Encode.Value ))
searchFields query fields = searchFields query mainField fields =
let let
queryVariations q = queryVariations q =
case ( List.head q, List.tail q ) of case ( List.head q, List.tail q ) of
@ -1002,22 +1021,39 @@ searchFields query fields =
|> List.map (\( field, score ) -> [ field ++ "^" ++ String.fromFloat score, field ++ ".edge^" ++ String.fromFloat score ]) |> List.map (\( field, score ) -> [ field ++ "^" ++ String.fromFloat score, field ++ ".edge^" ++ String.fromFloat score ])
|> List.concat |> List.concat
in in
List.map List.append
(\queryWords -> (List.map
[ ( "multi_match" (\queryWords ->
, Json.Encode.object [ ( "multi_match"
[ ( "type", Json.Encode.string "cross_fields" ) , Json.Encode.object
, ( "query", Json.Encode.string <| String.join " " queryWords ) [ ( "type", Json.Encode.string "cross_fields" )
, ( "analyzer", Json.Encode.string "whitespace" ) , ( "query", Json.Encode.string <| String.join " " queryWords )
, ( "auto_generate_synonyms_phrase_query", Json.Encode.bool False ) , ( "analyzer", Json.Encode.string "whitespace" )
, ( "operator", Json.Encode.string "and" ) , ( "auto_generate_synonyms_phrase_query", Json.Encode.bool False )
, ( "_name", Json.Encode.string <| "multi_match_" ++ String.join "_" queryWords ) , ( "operator", Json.Encode.string "and" )
, ( "fields", Json.Encode.list Json.Encode.string allFields ) , ( "_name", Json.Encode.string <| "multi_match_" ++ String.join "_" queryWords )
] , ( "fields", Json.Encode.list Json.Encode.string allFields )
) ]
] )
]
)
(queryVariations (String.words (String.toLower query)))
)
(List.map
(\queryWord ->
[ ( "wildcard"
, Json.Encode.object
[ ( mainField
, Json.Encode.object
[ ( "value", Json.Encode.string ("*" ++ queryWord ++ "*") )
]
)
]
)
]
)
(String.words (String.toLower query))
) )
(queryVariations (String.words (String.toLower query)))
makeRequestBody : makeRequestBody :
@ -1028,10 +1064,12 @@ makeRequestBody :
-> String -> String
-> String -> String
-> List String -> List String
-> List String
-> List ( String, Json.Encode.Value ) -> List ( String, Json.Encode.Value )
-> String
-> List ( String, Float ) -> List ( String, Float )
-> Http.Body -> Http.Body
makeRequestBody query from sizeRaw sort type_ sortField bucketsFields filterByBuckets fields = makeRequestBody query from sizeRaw sort type_ sortField otherSortFields bucketsFields filterByBuckets mainField fields =
let let
-- you can not request more then 10000 results otherwise it will return 404 -- you can not request more then 10000 results otherwise it will return 404
size = size =
@ -1043,49 +1081,43 @@ makeRequestBody query from sizeRaw sort type_ sortField bucketsFields filterByBu
in in
Http.jsonBody Http.jsonBody
(Json.Encode.object (Json.Encode.object
(List.append [ ( "from"
[ ( "from" , Json.Encode.int from
, Json.Encode.int from )
) , ( "size"
, ( "size" , Json.Encode.int size
, Json.Encode.int size )
) , toSortQuery sort sortField otherSortFields
, toSortQuery sort sortField , toAggregations bucketsFields
, toAggregations bucketsFields , ( "query"
, ( "query" , Json.Encode.object
, Json.Encode.object [ ( "bool"
[ ( "bool" , Json.Encode.object
, Json.Encode.object [ ( "filter"
[ ( "filter" , Json.Encode.list Json.Encode.object
, Json.Encode.list Json.Encode.object [ filterByType type_
[ filterByType type_ ] , filterByBuckets
) ]
, ( "must" )
, Json.Encode.list Json.Encode.object , ( "must"
[ [ ( "dis_max" , Json.Encode.list Json.Encode.object
, Json.Encode.object [ [ ( "dis_max"
[ ( "tie_breaker", Json.Encode.float 0.7 ) , Json.Encode.object
, ( "queries" [ ( "tie_breaker", Json.Encode.float 0.7 )
, Json.Encode.list Json.Encode.object , ( "queries"
(searchFields query fields) , Json.Encode.list Json.Encode.object
) (searchFields query mainField fields)
] )
) ]
] )
] ]
) ]
] )
) ]
] )
) ]
] )
(if List.isEmpty filterByBuckets then ]
[]
else
[ ( "post_filter", Json.Encode.object filterByBuckets ) ]
)
)
) )