rework ranking queries (#168)

This commit is contained in:
Rok Garbas 2020-08-30 01:03:09 +02:00 committed by GitHub
parent 652381936b
commit b4a653519c
Failed to generate hash of commit
3 changed files with 70 additions and 331 deletions

View file

@ -279,132 +279,19 @@ makeRequest :
-> Int -> Int
-> Search.Sort -> Search.Sort
-> Cmd Msg -> Cmd Msg
makeRequest options channel queryRaw from size sort = makeRequest options channel query from size sort =
let
query =
queryRaw
|> String.trim
delimiters =
Maybe.withDefault Regex.never (Regex.fromString "[. ]")
should_match boost_base =
List.indexedMap
(\i ( field, boost ) ->
[ ( "match"
, Json.Encode.object
[ ( field
, Json.Encode.object
[ ( "query", Json.Encode.string query )
, ( "boost", Json.Encode.float <| boost_base * boost )
, ( "analyzer", Json.Encode.string "whitespace" )
, ( "fuzziness", Json.Encode.string "1" )
, ( "_name"
, Json.Encode.string <|
"should_match_"
++ String.fromInt (i + 1)
)
]
)
]
)
]
)
[ ( "option_name", 1 )
, ( "option_name_query", 1 )
, ( "option_description", 1 )
]
should_match_bool_prefix boost_base =
List.indexedMap
(\i ( field, boost ) ->
[ ( "match_bool_prefix"
, Json.Encode.object
[ ( field
, Json.Encode.object
[ ( "query", Json.Encode.string query )
, ( "boost", Json.Encode.float <| boost_base * boost )
, ( "analyzer", Json.Encode.string "whitespace" )
, ( "fuzziness", Json.Encode.string "1" )
, ( "_name"
, Json.Encode.string <|
"should_match_bool_prefix_"
++ String.fromInt (i + 1)
)
]
)
]
)
]
)
[ ( "option_name", 1 )
, ( "option_name_query", 1 )
]
should_terms boost_base =
List.indexedMap
(\i ( field, boost ) ->
[ ( "terms"
, Json.Encode.object
[ ( field
, Json.Encode.list Json.Encode.string (Regex.split delimiters query)
)
, ( "boost", Json.Encode.float <| boost_base * boost )
, ( "_name"
, Json.Encode.string <|
"should_terms_"
++ String.fromInt (i + 1)
)
]
)
]
)
[ ( "option_name", 1 )
, ( "option_name_query", 1 )
]
should_term boost_base =
List.indexedMap
(\i ( field, boost ) ->
[ ( "term"
, Json.Encode.object
[ ( field
, Json.Encode.object
[ ( "value", Json.Encode.string query )
, ( "boost", Json.Encode.float <| boost_base * boost )
, ( "_name"
, Json.Encode.string <|
"should_term_"
++ String.fromInt (i + 1)
)
]
)
]
)
]
)
[ ( "option_name", 1 )
, ( "option_name_query", 1 )
]
should_queries =
[]
|> List.append (should_term 10000)
|> List.append (should_terms 1000)
|> List.append (should_match_bool_prefix 100)
|> List.append (should_match 10)
in
Search.makeRequest Search.makeRequest
(Search.makeRequestBody query (Search.makeRequestBody
(String.trim query)
from from
size size
sort sort
"option" "option"
"option_name" "option_name"
[ "option_name_query" [ "option_name^2"
, "option_description" , "option_name_query^2"
, "option_description^1"
] ]
should_queries
) )
("latest-" ++ String.fromInt options.mappingSchemaVersion ++ "-" ++ channel) ("latest-" ++ String.fromInt options.mappingSchemaVersion ++ "-" ++ channel)
decodeResultItemSource decodeResultItemSource

View file

@ -206,8 +206,15 @@ viewResultItem channel show item =
-- DEBUG: |> List.append -- DEBUG: |> List.append
-- DEBUG: [ tr [] -- DEBUG: [ tr []
-- DEBUG: [ td [ colspan 4 ] -- DEBUG: [ td [ colspan 4 ]
-- DEBUG: [ p [] [ text <| "score: " ++ String.fromFloat item.score ] -- DEBUG: [ div []
-- DEBUG: , p [] -- DEBUG: [ text <|
-- DEBUG: "score: "
-- DEBUG: ++ (item.score
-- DEBUG: |> Maybe.map String.fromFloat
-- DEBUG: |> Maybe.withDefault "No score"
-- DEBUG: )
-- DEBUG: ]
-- DEBUG: , div []
-- DEBUG: [ text <| -- DEBUG: [ text <|
-- DEBUG: "matched queries: " -- DEBUG: "matched queries: "
-- DEBUG: , ul [] -- DEBUG: , ul []
@ -398,140 +405,21 @@ makeRequest :
-> Int -> Int
-> Search.Sort -> Search.Sort
-> Cmd Msg -> Cmd Msg
makeRequest options channel queryRaw from size sort = makeRequest options channel query from size sort =
let
query =
queryRaw
|> String.trim
delimiters =
Maybe.withDefault Regex.never (Regex.fromString "[. ]")
should_match boost_base =
List.indexedMap
(\i ( field, boost ) ->
[ ( "match"
, Json.Encode.object
[ ( field
, Json.Encode.object
[ ( "query", Json.Encode.string query )
, ( "boost", Json.Encode.float <| boost_base * boost )
, ( "analyzer", Json.Encode.string "whitespace" )
, ( "fuzziness", Json.Encode.string "1" )
, ( "_name"
, Json.Encode.string <|
"should_match_"
++ String.fromInt (i + 1)
)
]
)
]
)
]
)
[ ( "package_attr_name", 5 )
, ( "package_attr_name_query", 3 )
, ( "package_pname", 4 )
, ( "package_description", 2 )
, ( "package_longDescription", 1 )
]
should_match_bool_prefix boost_base =
List.indexedMap
(\i ( field, boost ) ->
[ ( "match_bool_prefix"
, Json.Encode.object
[ ( field
, Json.Encode.object
[ ( "query", Json.Encode.string query )
, ( "boost", Json.Encode.float <| boost_base * boost )
, ( "analyzer", Json.Encode.string "whitespace" )
, ( "fuzziness", Json.Encode.string "1" )
, ( "_name"
, Json.Encode.string <|
"should_match_bool_prefix_"
++ String.fromInt (i + 1)
)
]
)
]
)
]
)
[ ( "package_attr_name", 2 )
, ( "package_attr_name_query", 1 )
, ( "package_pname", 3 )
]
should_terms boost_base =
List.indexedMap
(\i ( field, boost ) ->
[ ( "terms"
, Json.Encode.object
[ ( field
, Json.Encode.list Json.Encode.string (Regex.split delimiters query)
)
, ( "boost", Json.Encode.float <| boost_base * boost )
, ( "_name"
, Json.Encode.string <|
"should_terms_"
++ String.fromInt (i + 1)
)
]
)
]
)
[ ( "package_attr_name", 3 )
, ( "package_attr_name_query", 2 )
, ( "package_pname", 4 )
, ( "package_attr_set", 1 )
]
should_term boost_base =
List.indexedMap
(\i ( field, boost ) ->
[ ( "term"
, Json.Encode.object
[ ( field
, Json.Encode.object
[ ( "value", Json.Encode.string query )
, ( "boost", Json.Encode.float <| boost_base * boost )
, ( "_name"
, Json.Encode.string <|
"should_term_"
++ String.fromInt (i + 1)
)
]
)
]
)
]
)
[ ( "package_attr_name", 2 )
, ( "package_attr_name_query", 1 )
, ( "package_pname", 3 )
]
should_queries =
[]
|> List.append (should_term 10000)
|> List.append (should_terms 1000)
|> List.append (should_match_bool_prefix 100)
|> List.append (should_match 10)
in
Search.makeRequest Search.makeRequest
(Search.makeRequestBody query (Search.makeRequestBody
(String.trim query)
from from
size size
sort sort
"package" "package"
"package_attr_name" "package_attr_name"
[ "package_attr_name_query" [ "package_attr_name^2"
, "package_pname" , "package_attr_name_query^2"
, "package_description" , "package_pname^2"
, "package_longDescription" , "package_description^1"
, "package_longDescription^1"
] ]
should_queries
) )
("latest-" ++ String.fromInt options.mappingSchemaVersion ++ "-" ++ channel) ("latest-" ++ String.fromInt options.mappingSchemaVersion ++ "-" ++ channel)
decodeResultItemSource decodeResultItemSource

View file

@ -134,7 +134,7 @@ init channel query show from size sort model =
defaultSize = defaultSize =
model model
|> Maybe.map (\x -> x.size) |> Maybe.map (\x -> x.size)
|> Maybe.withDefault 15 |> Maybe.withDefault 30
in in
( { channel = Maybe.withDefault defaultChannel channel ( { channel = Maybe.withDefault defaultChannel channel
, query = query , query = query
@ -721,82 +721,48 @@ type alias Options =
filter_by_type : filter_by_type :
String String
-> ( String, Json.Encode.Value ) -> List ( String, Json.Encode.Value )
filter_by_type type_ = filter_by_type type_ =
( "term" [ ( "term"
, Json.Encode.object , Json.Encode.object
[ ( "type" [ ( "type"
, Json.Encode.object , Json.Encode.object
[ ( "value", Json.Encode.string type_ ) [ ( "value", Json.Encode.string type_ )
, ( "_name", Json.Encode.string <| "filter_" ++ type_ ++ "s" ) , ( "_name", Json.Encode.string <| "filter_" ++ type_ ++ "s" )
]
)
]
)
]
search_fields :
String
-> List String
-> List (List ( String, Json.Encode.Value ))
search_fields query fields =
query
|> String.words
|> List.reverse
|> List.indexedMap
(\queryIndex queryWord ->
[ ( "multi_match"
, Json.Encode.object
[ ( "type", Json.Encode.string "most_fields" )
, ( "query", Json.Encode.string queryWord )
, ( "fuzziness", Json.Encode.int <| String.length queryWord // 5 )
, ( "operator", Json.Encode.string "or" )
, ( "_name"
, Json.Encode.string <| "multi_match_" ++ queryWord ++ "_" ++ (queryIndex + 1 |> String.fromInt)
)
, ( "fields"
, Json.Encode.list Json.Encode.string
(List.map (\field -> field ++ "." ++ (queryIndex + 1 |> String.fromInt)) fields)
)
]
)
] ]
) )
]
)
filter_by_query :
List String
-> String
-> ( String, Json.Encode.Value )
filter_by_query fields queryRaw =
let
query =
queryRaw
|> String.trim
in
( "bool"
, Json.Encode.object
[ ( "should"
, Json.Encode.list Json.Encode.object
(query
|> String.words
|> List.indexedMap
(\i query_word ->
[ ( "bool"
, Json.Encode.object
[ ( "should"
, Json.Encode.list Json.Encode.object
(List.concatMap
(\field ->
[ [ ( "match"
, Json.Encode.object
[ ( field
, Json.Encode.object
[ ( "query", Json.Encode.string query_word )
, ( "fuzziness", Json.Encode.string "1" )
, ( "_name", Json.Encode.string <| "filter_queries_" ++ String.fromInt i ++ "_should_match" )
]
)
]
)
]
, [ ( "match_bool_prefix"
, Json.Encode.object
[ ( field
, Json.Encode.object
[ ( "query", Json.Encode.string query_word )
, ( "_name"
, Json.Encode.string <| "filter_queries_" ++ String.fromInt (i + 1) ++ "_should_prefix"
)
]
)
]
)
]
]
)
fields
)
)
]
)
]
)
)
)
]
)
makeRequestBody : makeRequestBody :
@ -807,9 +773,8 @@ makeRequestBody :
-> String -> String
-> String -> String
-> List String -> List String
-> List (List ( String, Json.Encode.Value ))
-> Http.Body -> Http.Body
makeRequestBody query from sizeRaw sort type_ sort_field query_fields should_queries = makeRequestBody query from sizeRaw sort type_ sortField fields =
let let
-- you can not request more then 10000 results otherwise it will return 404 -- you can not request more then 10000 results otherwise it will return 404
size = size =
@ -827,19 +792,18 @@ makeRequestBody query from sizeRaw sort type_ sort_field query_fields should_que
, ( "size" , ( "size"
, Json.Encode.int size , Json.Encode.int size
) )
, toSortQuery sort sort_field , toSortQuery sort sortField
, ( "query" , ( "query"
, Json.Encode.object , Json.Encode.object
[ ( "bool" [ ( "bool"
, Json.Encode.object , Json.Encode.object
[ ( "filter" [ ( "filter"
, Json.Encode.list Json.Encode.object , Json.Encode.list Json.Encode.object
[ [ filter_by_type type_ ] [ filter_by_type type_ ]
, [ filter_by_query query_fields query ]
]
) )
, ( "should" , ( "must"
, Json.Encode.list Json.Encode.object should_queries , Json.Encode.list Json.Encode.object
(search_fields query fields)
) )
] ]
) )