From cfb5e540b91903968d66de6e415335300334eb29 Mon Sep 17 00:00:00 2001 From: Rok Garbas Date: Sun, 7 Feb 2021 19:10:13 +0100 Subject: [PATCH] Improving search query (#288) * Reduce the numbers of bucket results on each ChangeBucket event * Warmup after reindexing, fixes #250 * Using multiple sort keys, fixes #218 * Index also unfree packages, fixes #226, fixes #237 * Add wilcard query, fixes #279 --- .github/workflows/cron.yml | 11 +- .github/workflows/pulls.yml | 1 - VERSION | 2 +- .../import_scripts/packages-config.nix | 5 +- src/Page/Options.elm | 2 + src/Page/Packages.elm | 2 + src/Search.elm | 166 +++++++++++------- 7 files changed, 117 insertions(+), 72 deletions(-) diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml index 78ff029..da6f575 100644 --- a/.github/workflows/cron.yml +++ b/.github/workflows/cron.yml @@ -72,9 +72,9 @@ jobs: path: ./eval-cache key: eval-cache-${{ env.EVAL_ID }} - - name: Installing NixFlakes + - name: Installing nixFlakes (and jq) run: | - nix-env -iA nixpkgs.nixFlakes + nix-env -iA nixpkgs.nixFlakes nixpkgs.jq echo 'experimental-features = nix-command flakes' | sudo tee -a /etc/nix/nix.conf nix --version cat /etc/nix/nix.conf @@ -89,3 +89,10 @@ jobs: cp ./eval-cache/builds.json ./eval-${{ env.EVAL_ID }}.json ./result/bin/import-channel --es-url ${{ secrets.ELASTICSEARCH_URL }} --channel ${{ matrix.channel }} -vvv if: github.repository == 'NixOS/nixos-search' + + - name: Warmup ${{ matrix.channel }} channel + run: | + curl ${{ secrets.ELASTICSEARCH_URL }}/latest-$(cat VERSION)-${{ matrix.channel }}/_search | jq '.took' + curl ${{ secrets.ELASTICSEARCH_URL }}/latest-$(cat VERSION)-${{ matrix.channel }}/_search | jq '.took' + curl ${{ secrets.ELASTICSEARCH_URL }}/latest-$(cat VERSION)-${{ matrix.channel }}/_search | jq '.took' + if: github.repository == 'NixOS/nixos-search' diff --git a/.github/workflows/pulls.yml b/.github/workflows/pulls.yml index 05faae6..0b7ae40 100644 --- a/.github/workflows/pulls.yml +++ b/.github/workflows/pulls.yml @@ -16,7 +16,6 @@ jobs: - uses: cachix/cachix-action@v8 with: name: nixos-search - signingKey: '${{ secrets.CACHIX_SIGNING_KEY }}' - name: Install unstable channel run: | diff --git a/VERSION b/VERSION index 3c03207..d6b2404 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -18 +19 diff --git a/import-scripts/import_scripts/packages-config.nix b/import-scripts/import_scripts/packages-config.nix index 565d4a7..813aecc 100644 --- a/import-scripts/import_scripts/packages-config.nix +++ b/import-scripts/import_scripts/packages-config.nix @@ -1,6 +1,9 @@ { # Ensures no aliases are in the results. - allowAliases = false; + allowAliases = true; + + # Also list unfree packages + allowUnfree = true; # Enable recursion into attribute sets that nix-env normally doesn't look into # so that we can get a more complete picture of the available packages for the diff --git a/src/Page/Options.elm b/src/Page/Options.elm index 5654d63..f0fe724 100644 --- a/src/Page/Options.elm +++ b/src/Page/Options.elm @@ -282,6 +282,8 @@ makeRequest options channel query from size _ sort = "option_name" [] [] + [] + "option_name" [ ( "option_name", 6.0 ) , ( "option_name_query", 3.0 ) , ( "option_description", 1.0 ) diff --git a/src/Page/Packages.elm b/src/Page/Packages.elm index 0487b67..9482db7 100644 --- a/src/Page/Packages.elm +++ b/src/Page/Packages.elm @@ -632,12 +632,14 @@ makeRequest options channel query from size maybeBuckets sort = sort "package" "package_attr_name" + [ "package_pversion" ] [ "package_attr_set" , "package_license_set" , "package_maintainers_set" , "package_platforms" ] filterByBuckets + "package_attr_name" [ ( "package_attr_name", 9.0 ) , ( "package_pname", 6.0 ) , ( "package_attr_name_query", 4.0 ) diff --git a/src/Search.elm b/src/Search.elm index 4f8731c..5397035 100644 --- a/src/Search.elm +++ b/src/Search.elm @@ -486,25 +486,43 @@ toAggregations bucketsFields = toSortQuery : Sort -> String + -> List String -> ( String, Json.Encode.Value ) -toSortQuery sort field = +toSortQuery sort field fields = ( "sort" , case sort of AlphabeticallyAsc -> Json.Encode.list Json.Encode.object - [ [ ( field, Json.Encode.string "asc" ) - ] + [ List.append + [ ( field, Json.Encode.string "asc" ) + ] + (List.map + (\x -> ( x, Json.Encode.string "asc" )) + fields + ) ] AlphabeticallyDesc -> Json.Encode.list Json.Encode.object - [ [ ( field, Json.Encode.string "desc" ) - ] + [ List.append + [ ( field, Json.Encode.string "desc" ) + ] + (List.map + (\x -> ( x, Json.Encode.string "desc" )) + fields + ) ] Relevance -> - Json.Encode.list Json.Encode.string - [ "_score" + Json.Encode.list Json.Encode.object + [ List.append + [ ( "_score", Json.Encode.string "desc" ) + , ( field, Json.Encode.string "desc" ) + ] + (List.map + (\x -> ( x, Json.Encode.string "desc" )) + fields + ) ] ) @@ -970,9 +988,10 @@ filterByType type_ = searchFields : String + -> String -> List ( String, Float ) -> List (List ( String, Json.Encode.Value )) -searchFields query fields = +searchFields query mainField fields = let queryVariations q = case ( List.head q, List.tail q ) of @@ -1002,22 +1021,39 @@ searchFields query fields = |> List.map (\( field, score ) -> [ field ++ "^" ++ String.fromFloat score, field ++ ".edge^" ++ String.fromFloat score ]) |> List.concat in - List.map - (\queryWords -> - [ ( "multi_match" - , Json.Encode.object - [ ( "type", Json.Encode.string "cross_fields" ) - , ( "query", Json.Encode.string <| String.join " " queryWords ) - , ( "analyzer", Json.Encode.string "whitespace" ) - , ( "auto_generate_synonyms_phrase_query", Json.Encode.bool False ) - , ( "operator", Json.Encode.string "and" ) - , ( "_name", Json.Encode.string <| "multi_match_" ++ String.join "_" queryWords ) - , ( "fields", Json.Encode.list Json.Encode.string allFields ) - ] - ) - ] + List.append + (List.map + (\queryWords -> + [ ( "multi_match" + , Json.Encode.object + [ ( "type", Json.Encode.string "cross_fields" ) + , ( "query", Json.Encode.string <| String.join " " queryWords ) + , ( "analyzer", Json.Encode.string "whitespace" ) + , ( "auto_generate_synonyms_phrase_query", Json.Encode.bool False ) + , ( "operator", Json.Encode.string "and" ) + , ( "_name", Json.Encode.string <| "multi_match_" ++ String.join "_" queryWords ) + , ( "fields", Json.Encode.list Json.Encode.string allFields ) + ] + ) + ] + ) + (queryVariations (String.words (String.toLower query))) + ) + (List.map + (\queryWord -> + [ ( "wildcard" + , Json.Encode.object + [ ( mainField + , Json.Encode.object + [ ( "value", Json.Encode.string ("*" ++ queryWord ++ "*") ) + ] + ) + ] + ) + ] + ) + (String.words (String.toLower query)) ) - (queryVariations (String.words (String.toLower query))) makeRequestBody : @@ -1028,10 +1064,12 @@ makeRequestBody : -> String -> String -> List String + -> List String -> List ( String, Json.Encode.Value ) + -> String -> List ( String, Float ) -> Http.Body -makeRequestBody query from sizeRaw sort type_ sortField bucketsFields filterByBuckets fields = +makeRequestBody query from sizeRaw sort type_ sortField otherSortFields bucketsFields filterByBuckets mainField fields = let -- you can not request more then 10000 results otherwise it will return 404 size = @@ -1043,49 +1081,43 @@ makeRequestBody query from sizeRaw sort type_ sortField bucketsFields filterByBu in Http.jsonBody (Json.Encode.object - (List.append - [ ( "from" - , Json.Encode.int from - ) - , ( "size" - , Json.Encode.int size - ) - , toSortQuery sort sortField - , toAggregations bucketsFields - , ( "query" - , Json.Encode.object - [ ( "bool" - , Json.Encode.object - [ ( "filter" - , Json.Encode.list Json.Encode.object - [ filterByType type_ ] - ) - , ( "must" - , Json.Encode.list Json.Encode.object - [ [ ( "dis_max" - , Json.Encode.object - [ ( "tie_breaker", Json.Encode.float 0.7 ) - , ( "queries" - , Json.Encode.list Json.Encode.object - (searchFields query fields) - ) - ] - ) - ] - ] - ) - ] - ) - ] - ) - ] - (if List.isEmpty filterByBuckets then - [] - - else - [ ( "post_filter", Json.Encode.object filterByBuckets ) ] - ) - ) + [ ( "from" + , Json.Encode.int from + ) + , ( "size" + , Json.Encode.int size + ) + , toSortQuery sort sortField otherSortFields + , toAggregations bucketsFields + , ( "query" + , Json.Encode.object + [ ( "bool" + , Json.Encode.object + [ ( "filter" + , Json.Encode.list Json.Encode.object + [ filterByType type_ + , filterByBuckets + ] + ) + , ( "must" + , Json.Encode.list Json.Encode.object + [ [ ( "dis_max" + , Json.Encode.object + [ ( "tie_breaker", Json.Encode.float 0.7 ) + , ( "queries" + , Json.Encode.list Json.Encode.object + (searchFields query mainField fields) + ) + ] + ) + ] + ] + ) + ] + ) + ] + ) + ] )