From b4a653519c20571c82cdf8da55420eaf101ac7cc Mon Sep 17 00:00:00 2001 From: Rok Garbas Date: Sun, 30 Aug 2020 01:03:09 +0200 Subject: [PATCH] rework ranking queries (#168) --- src/Page/Options.elm | 125 ++---------------------------------- src/Page/Packages.elm | 146 +++++------------------------------------- src/Search.elm | 130 ++++++++++++++----------------------- 3 files changed, 70 insertions(+), 331 deletions(-) diff --git a/src/Page/Options.elm b/src/Page/Options.elm index 296f46d..fd2ae4f 100644 --- a/src/Page/Options.elm +++ b/src/Page/Options.elm @@ -279,132 +279,19 @@ makeRequest : -> Int -> Search.Sort -> Cmd Msg -makeRequest options channel queryRaw from size sort = - let - query = - queryRaw - |> String.trim - - delimiters = - Maybe.withDefault Regex.never (Regex.fromString "[. ]") - - should_match boost_base = - List.indexedMap - (\i ( field, boost ) -> - [ ( "match" - , Json.Encode.object - [ ( field - , Json.Encode.object - [ ( "query", Json.Encode.string query ) - , ( "boost", Json.Encode.float <| boost_base * boost ) - , ( "analyzer", Json.Encode.string "whitespace" ) - , ( "fuzziness", Json.Encode.string "1" ) - , ( "_name" - , Json.Encode.string <| - "should_match_" - ++ String.fromInt (i + 1) - ) - ] - ) - ] - ) - ] - ) - [ ( "option_name", 1 ) - , ( "option_name_query", 1 ) - , ( "option_description", 1 ) - ] - - should_match_bool_prefix boost_base = - List.indexedMap - (\i ( field, boost ) -> - [ ( "match_bool_prefix" - , Json.Encode.object - [ ( field - , Json.Encode.object - [ ( "query", Json.Encode.string query ) - , ( "boost", Json.Encode.float <| boost_base * boost ) - , ( "analyzer", Json.Encode.string "whitespace" ) - , ( "fuzziness", Json.Encode.string "1" ) - , ( "_name" - , Json.Encode.string <| - "should_match_bool_prefix_" - ++ String.fromInt (i + 1) - ) - ] - ) - ] - ) - ] - ) - [ ( "option_name", 1 ) - , ( "option_name_query", 1 ) - ] - - should_terms boost_base = - List.indexedMap - (\i ( field, boost ) -> - [ ( "terms" - , Json.Encode.object - [ ( field - , Json.Encode.list Json.Encode.string (Regex.split delimiters query) - ) - , ( "boost", Json.Encode.float <| boost_base * boost ) - , ( "_name" - , Json.Encode.string <| - "should_terms_" - ++ String.fromInt (i + 1) - ) - ] - ) - ] - ) - [ ( "option_name", 1 ) - , ( "option_name_query", 1 ) - ] - - should_term boost_base = - List.indexedMap - (\i ( field, boost ) -> - [ ( "term" - , Json.Encode.object - [ ( field - , Json.Encode.object - [ ( "value", Json.Encode.string query ) - , ( "boost", Json.Encode.float <| boost_base * boost ) - , ( "_name" - , Json.Encode.string <| - "should_term_" - ++ String.fromInt (i + 1) - ) - ] - ) - ] - ) - ] - ) - [ ( "option_name", 1 ) - , ( "option_name_query", 1 ) - ] - - should_queries = - [] - |> List.append (should_term 10000) - |> List.append (should_terms 1000) - |> List.append (should_match_bool_prefix 100) - |> List.append (should_match 10) - in +makeRequest options channel query from size sort = Search.makeRequest - (Search.makeRequestBody query + (Search.makeRequestBody + (String.trim query) from size sort "option" "option_name" - [ "option_name_query" - , "option_description" + [ "option_name^2" + , "option_name_query^2" + , "option_description^1" ] - should_queries ) ("latest-" ++ String.fromInt options.mappingSchemaVersion ++ "-" ++ channel) decodeResultItemSource diff --git a/src/Page/Packages.elm b/src/Page/Packages.elm index 637755f..d4afd10 100644 --- a/src/Page/Packages.elm +++ b/src/Page/Packages.elm @@ -206,8 +206,15 @@ viewResultItem channel show item = -- DEBUG: |> List.append -- DEBUG: [ tr [] -- DEBUG: [ td [ colspan 4 ] - -- DEBUG: [ p [] [ text <| "score: " ++ String.fromFloat item.score ] - -- DEBUG: , p [] + -- DEBUG: [ div [] + -- DEBUG: [ text <| + -- DEBUG: "score: " + -- DEBUG: ++ (item.score + -- DEBUG: |> Maybe.map String.fromFloat + -- DEBUG: |> Maybe.withDefault "No score" + -- DEBUG: ) + -- DEBUG: ] + -- DEBUG: , div [] -- DEBUG: [ text <| -- DEBUG: "matched queries: " -- DEBUG: , ul [] @@ -398,140 +405,21 @@ makeRequest : -> Int -> Search.Sort -> Cmd Msg -makeRequest options channel queryRaw from size sort = - let - query = - queryRaw - |> String.trim - - delimiters = - Maybe.withDefault Regex.never (Regex.fromString "[. ]") - - should_match boost_base = - List.indexedMap - (\i ( field, boost ) -> - [ ( "match" - , Json.Encode.object - [ ( field - , Json.Encode.object - [ ( "query", Json.Encode.string query ) - , ( "boost", Json.Encode.float <| boost_base * boost ) - , ( "analyzer", Json.Encode.string "whitespace" ) - , ( "fuzziness", Json.Encode.string "1" ) - , ( "_name" - , Json.Encode.string <| - "should_match_" - ++ String.fromInt (i + 1) - ) - ] - ) - ] - ) - ] - ) - [ ( "package_attr_name", 5 ) - , ( "package_attr_name_query", 3 ) - , ( "package_pname", 4 ) - , ( "package_description", 2 ) - , ( "package_longDescription", 1 ) - ] - - should_match_bool_prefix boost_base = - List.indexedMap - (\i ( field, boost ) -> - [ ( "match_bool_prefix" - , Json.Encode.object - [ ( field - , Json.Encode.object - [ ( "query", Json.Encode.string query ) - , ( "boost", Json.Encode.float <| boost_base * boost ) - , ( "analyzer", Json.Encode.string "whitespace" ) - , ( "fuzziness", Json.Encode.string "1" ) - , ( "_name" - , Json.Encode.string <| - "should_match_bool_prefix_" - ++ String.fromInt (i + 1) - ) - ] - ) - ] - ) - ] - ) - [ ( "package_attr_name", 2 ) - , ( "package_attr_name_query", 1 ) - , ( "package_pname", 3 ) - ] - - should_terms boost_base = - List.indexedMap - (\i ( field, boost ) -> - [ ( "terms" - , Json.Encode.object - [ ( field - , Json.Encode.list Json.Encode.string (Regex.split delimiters query) - ) - , ( "boost", Json.Encode.float <| boost_base * boost ) - , ( "_name" - , Json.Encode.string <| - "should_terms_" - ++ String.fromInt (i + 1) - ) - ] - ) - ] - ) - [ ( "package_attr_name", 3 ) - , ( "package_attr_name_query", 2 ) - , ( "package_pname", 4 ) - , ( "package_attr_set", 1 ) - ] - - should_term boost_base = - List.indexedMap - (\i ( field, boost ) -> - [ ( "term" - , Json.Encode.object - [ ( field - , Json.Encode.object - [ ( "value", Json.Encode.string query ) - , ( "boost", Json.Encode.float <| boost_base * boost ) - , ( "_name" - , Json.Encode.string <| - "should_term_" - ++ String.fromInt (i + 1) - ) - ] - ) - ] - ) - ] - ) - [ ( "package_attr_name", 2 ) - , ( "package_attr_name_query", 1 ) - , ( "package_pname", 3 ) - ] - - should_queries = - [] - |> List.append (should_term 10000) - |> List.append (should_terms 1000) - |> List.append (should_match_bool_prefix 100) - |> List.append (should_match 10) - in +makeRequest options channel query from size sort = Search.makeRequest - (Search.makeRequestBody query + (Search.makeRequestBody + (String.trim query) from size sort "package" "package_attr_name" - [ "package_attr_name_query" - , "package_pname" - , "package_description" - , "package_longDescription" + [ "package_attr_name^2" + , "package_attr_name_query^2" + , "package_pname^2" + , "package_description^1" + , "package_longDescription^1" ] - should_queries ) ("latest-" ++ String.fromInt options.mappingSchemaVersion ++ "-" ++ channel) decodeResultItemSource diff --git a/src/Search.elm b/src/Search.elm index 1155764..cbe257c 100644 --- a/src/Search.elm +++ b/src/Search.elm @@ -134,7 +134,7 @@ init channel query show from size sort model = defaultSize = model |> Maybe.map (\x -> x.size) - |> Maybe.withDefault 15 + |> Maybe.withDefault 30 in ( { channel = Maybe.withDefault defaultChannel channel , query = query @@ -721,82 +721,48 @@ type alias Options = filter_by_type : String - -> ( String, Json.Encode.Value ) + -> List ( String, Json.Encode.Value ) filter_by_type type_ = - ( "term" - , Json.Encode.object - [ ( "type" - , Json.Encode.object - [ ( "value", Json.Encode.string type_ ) - , ( "_name", Json.Encode.string <| "filter_" ++ type_ ++ "s" ) + [ ( "term" + , Json.Encode.object + [ ( "type" + , Json.Encode.object + [ ( "value", Json.Encode.string type_ ) + , ( "_name", Json.Encode.string <| "filter_" ++ type_ ++ "s" ) + ] + ) + ] + ) + ] + + +search_fields : + String + -> List String + -> List (List ( String, Json.Encode.Value )) +search_fields query fields = + query + |> String.words + |> List.reverse + |> List.indexedMap + (\queryIndex queryWord -> + [ ( "multi_match" + , Json.Encode.object + [ ( "type", Json.Encode.string "most_fields" ) + , ( "query", Json.Encode.string queryWord ) + , ( "fuzziness", Json.Encode.int <| String.length queryWord // 5 ) + , ( "operator", Json.Encode.string "or" ) + , ( "_name" + , Json.Encode.string <| "multi_match_" ++ queryWord ++ "_" ++ (queryIndex + 1 |> String.fromInt) + ) + , ( "fields" + , Json.Encode.list Json.Encode.string + (List.map (\field -> field ++ "." ++ (queryIndex + 1 |> String.fromInt)) fields) + ) + ] + ) ] - ) - ] - ) - - -filter_by_query : - List String - -> String - -> ( String, Json.Encode.Value ) -filter_by_query fields queryRaw = - let - query = - queryRaw - |> String.trim - in - ( "bool" - , Json.Encode.object - [ ( "should" - , Json.Encode.list Json.Encode.object - (query - |> String.words - |> List.indexedMap - (\i query_word -> - [ ( "bool" - , Json.Encode.object - [ ( "should" - , Json.Encode.list Json.Encode.object - (List.concatMap - (\field -> - [ [ ( "match" - , Json.Encode.object - [ ( field - , Json.Encode.object - [ ( "query", Json.Encode.string query_word ) - , ( "fuzziness", Json.Encode.string "1" ) - , ( "_name", Json.Encode.string <| "filter_queries_" ++ String.fromInt i ++ "_should_match" ) - ] - ) - ] - ) - ] - , [ ( "match_bool_prefix" - , Json.Encode.object - [ ( field - , Json.Encode.object - [ ( "query", Json.Encode.string query_word ) - , ( "_name" - , Json.Encode.string <| "filter_queries_" ++ String.fromInt (i + 1) ++ "_should_prefix" - ) - ] - ) - ] - ) - ] - ] - ) - fields - ) - ) - ] - ) - ] - ) - ) - ) - ] - ) + ) makeRequestBody : @@ -807,9 +773,8 @@ makeRequestBody : -> String -> String -> List String - -> List (List ( String, Json.Encode.Value )) -> Http.Body -makeRequestBody query from sizeRaw sort type_ sort_field query_fields should_queries = +makeRequestBody query from sizeRaw sort type_ sortField fields = let -- you can not request more then 10000 results otherwise it will return 404 size = @@ -827,19 +792,18 @@ makeRequestBody query from sizeRaw sort type_ sort_field query_fields should_que , ( "size" , Json.Encode.int size ) - , toSortQuery sort sort_field + , toSortQuery sort sortField , ( "query" , Json.Encode.object [ ( "bool" , Json.Encode.object [ ( "filter" , Json.Encode.list Json.Encode.object - [ [ filter_by_type type_ ] - , [ filter_by_query query_fields query ] - ] + [ filter_by_type type_ ] ) - , ( "should" - , Json.Encode.list Json.Encode.object should_queries + , ( "must" + , Json.Encode.list Json.Encode.object + (search_fields query fields) ) ] )