Reworked search query (#197)
- still muse multi_match but now with cross_fields type - add edge n-gram index field since cross_fields works only with match and this way we can have prefix support - suffix support is still supported as before by reversing the words in data and query - for query we now create variations of all multi_match queries. from 2 words you get 4 queries, from 3 works you get 8 queries and so on.
This commit is contained in:
parent
5a65c6f94b
commit
86ad9d036d
|
@ -34,7 +34,22 @@ ANALYSIS = {
|
|||
"normalizer": {
|
||||
"lowercase": {"type": "custom", "char_filter": [], "filter": ["lowercase"]}
|
||||
},
|
||||
"tokenizer": {
|
||||
"edge": {
|
||||
"type": "edge_ngram",
|
||||
"min_gram": 2,
|
||||
"max_gram": 50,
|
||||
"token_chars": [
|
||||
"letter",
|
||||
"digit",
|
||||
# Either we use them or we would need to strip them before that.
|
||||
"punctuation",
|
||||
"symbol",
|
||||
],
|
||||
},
|
||||
},
|
||||
"analyzer": {
|
||||
"edge": {"tokenizer": "edge"},
|
||||
"lowercase": {
|
||||
"type": "custom",
|
||||
"tokenizer": "keyword",
|
||||
|
@ -65,22 +80,67 @@ MAPPING = {
|
|||
"drv_path": {"type": "keyword"},
|
||||
},
|
||||
},
|
||||
"package_attr_name": {"type": "keyword", "normalizer": "lowercase"},
|
||||
"package_attr_name_reverse": {"type": "keyword", "normalizer": "lowercase"},
|
||||
"package_attr_name_query": {"type": "keyword", "normalizer": "lowercase"},
|
||||
"package_attr_name": {
|
||||
"type": "keyword",
|
||||
"normalizer": "lowercase",
|
||||
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||
},
|
||||
"package_attr_name_reverse": {
|
||||
"type": "keyword",
|
||||
"normalizer": "lowercase",
|
||||
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||
},
|
||||
"package_attr_name_query": {
|
||||
"type": "keyword",
|
||||
"normalizer": "lowercase",
|
||||
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||
},
|
||||
"package_attr_name_query_reverse": {
|
||||
"type": "keyword",
|
||||
"normalizer": "lowercase",
|
||||
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||
},
|
||||
"package_attr_set": {
|
||||
"type": "keyword",
|
||||
"normalizer": "lowercase",
|
||||
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||
},
|
||||
"package_attr_set_reverse": {
|
||||
"type": "keyword",
|
||||
"normalizer": "lowercase",
|
||||
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||
},
|
||||
"package_pname": {
|
||||
"type": "keyword",
|
||||
"normalizer": "lowercase",
|
||||
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||
},
|
||||
"package_pname_reverse": {
|
||||
"type": "keyword",
|
||||
"normalizer": "lowercase",
|
||||
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||
},
|
||||
"package_attr_set": {"type": "keyword", "normalizer": "lowercase"},
|
||||
"package_attr_set_reverse": {"type": "keyword", "normalizer": "lowercase"},
|
||||
"package_pname": {"type": "keyword", "normalizer": "lowercase"},
|
||||
"package_pname_reverse": {"type": "keyword", "normalizer": "lowercase"},
|
||||
"package_pversion": {"type": "keyword"},
|
||||
"package_description": {"type": "text", "analyzer": "english"},
|
||||
"package_description_reverse": {"type": "text", "analyzer": "english"},
|
||||
"package_longDescription": {"type": "text", "analyzer": "english"},
|
||||
"package_longDescription_reverse": {"type": "text", "analyzer": "english"},
|
||||
"package_description": {
|
||||
"type": "text",
|
||||
"analyzer": "english",
|
||||
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||
},
|
||||
"package_description_reverse": {
|
||||
"type": "text",
|
||||
"analyzer": "english",
|
||||
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||
},
|
||||
"package_longDescription": {
|
||||
"type": "text",
|
||||
"analyzer": "english",
|
||||
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||
},
|
||||
"package_longDescription_reverse": {
|
||||
"type": "text",
|
||||
"analyzer": "english",
|
||||
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||
},
|
||||
"package_license": {
|
||||
"type": "nested",
|
||||
"properties": {"fullName": {"type": "text"}, "url": {"type": "text"}},
|
||||
|
@ -98,12 +158,36 @@ MAPPING = {
|
|||
"package_homepage": {"type": "keyword"},
|
||||
"package_system": {"type": "keyword"},
|
||||
# Options fields
|
||||
"option_name": {"type": "keyword", "normalizer": "lowercase"},
|
||||
"option_name_reverse": {"type": "keyword", "normalizer": "lowercase"},
|
||||
"option_name_query": {"type": "keyword", "normalizer": "lowercase"},
|
||||
"option_name_query_reverse": {"type": "keyword", "normalizer": "lowercase"},
|
||||
"option_description": {"type": "text", "analyzer": "english"},
|
||||
"option_description_reverse": {"type": "text", "analyzer": "english"},
|
||||
"option_name": {
|
||||
"type": "keyword",
|
||||
"normalizer": "lowercase",
|
||||
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||
},
|
||||
"option_name_reverse": {
|
||||
"type": "keyword",
|
||||
"normalizer": "lowercase",
|
||||
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||
},
|
||||
"option_name_query": {
|
||||
"type": "keyword",
|
||||
"normalizer": "lowercase",
|
||||
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||
},
|
||||
"option_name_query_reverse": {
|
||||
"type": "keyword",
|
||||
"normalizer": "lowercase",
|
||||
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||
},
|
||||
"option_description": {
|
||||
"type": "text",
|
||||
"analyzer": "english",
|
||||
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||
},
|
||||
"option_description_reverse": {
|
||||
"type": "text",
|
||||
"analyzer": "english",
|
||||
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||
},
|
||||
"option_type": {"type": "keyword"},
|
||||
"option_default": {"type": "text"},
|
||||
"option_example": {"type": "text"},
|
||||
|
|
|
@ -288,8 +288,8 @@ makeRequest options channel query from size sort =
|
|||
sort
|
||||
"option"
|
||||
"option_name"
|
||||
[ ( "option_name", 2.2 )
|
||||
, ( "option_name_query", 2.0 )
|
||||
[ ( "option_name", 6.0 )
|
||||
, ( "option_name_query", 3.0 )
|
||||
, ( "option_description", 1.0 )
|
||||
]
|
||||
)
|
||||
|
|
|
@ -414,10 +414,10 @@ makeRequest options channel query from size sort =
|
|||
sort
|
||||
"package"
|
||||
"package_attr_name"
|
||||
[ ( "package_attr_name", 2.4 )
|
||||
, ( "package_pname", 2.2 )
|
||||
, ( "package_attr_name_query", 2.0 )
|
||||
, ( "package_description", 1.2 )
|
||||
[ ( "package_attr_name", 9.0 )
|
||||
, ( "package_pname", 6.0 )
|
||||
, ( "package_attr_name_query", 4.0 )
|
||||
, ( "package_description", 1.3 )
|
||||
, ( "package_longDescription", 1.0 )
|
||||
]
|
||||
)
|
||||
|
|
127
src/Search.elm
127
src/Search.elm
|
@ -62,6 +62,7 @@ import Http
|
|||
import Json.Decode
|
||||
import Json.Encode
|
||||
import RemoteData
|
||||
import Set
|
||||
import Task
|
||||
import Url.Builder
|
||||
|
||||
|
@ -753,38 +754,56 @@ filter_by_type type_ =
|
|||
]
|
||||
|
||||
|
||||
search_fields :
|
||||
Float
|
||||
-> List String
|
||||
searchFields :
|
||||
String
|
||||
-> List ( String, Float )
|
||||
-> List (List ( String, Json.Encode.Value ))
|
||||
search_fields baseScore queryWords fields =
|
||||
queryWords
|
||||
|> List.reverse
|
||||
|> List.indexedMap
|
||||
(\queryIndex queryWord ->
|
||||
[ ( "multi_match"
|
||||
, Json.Encode.object
|
||||
[ ( "type", Json.Encode.string "bool_prefix" )
|
||||
, ( "query", Json.Encode.string queryWord )
|
||||
, ( "analyzer", Json.Encode.string "lowercase" )
|
||||
, ( "auto_generate_synonyms_phrase_query", Json.Encode.bool False )
|
||||
, ( "prefix_length", Json.Encode.int 3 )
|
||||
, ( "operator", Json.Encode.string "or" )
|
||||
, ( "_name"
|
||||
, Json.Encode.string <| "multi_match_" ++ queryWord ++ "_" ++ (queryIndex + 1 |> String.fromInt)
|
||||
)
|
||||
, ( "fields"
|
||||
, Json.Encode.list Json.Encode.string
|
||||
(List.map
|
||||
(\( field, score ) -> field ++ "^" ++ (baseScore * (score + (0.1 * (queryIndex + 1 |> toFloat))) |> String.fromFloat))
|
||||
fields
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
]
|
||||
)
|
||||
searchFields query fields =
|
||||
let
|
||||
queryVariations q =
|
||||
case ( List.head q, List.tail q ) of
|
||||
( Just h, Just t ) ->
|
||||
let
|
||||
tail : List (List String)
|
||||
tail =
|
||||
queryVariations t
|
||||
in
|
||||
List.append
|
||||
(List.map (\x -> List.append [ h ] x) tail)
|
||||
(List.map (\x -> List.append [ String.reverse h ] x) tail)
|
||||
|> Set.fromList
|
||||
|> Set.toList
|
||||
|
||||
( Just h, Nothing ) ->
|
||||
[ [ h ], [ String.reverse h ] ]
|
||||
|
||||
( _, _ ) ->
|
||||
[ [], [] ]
|
||||
|
||||
reverseFields =
|
||||
List.map (\( field, score ) -> ( field ++ "_reverse", score * 0.8 )) fields
|
||||
|
||||
allFields =
|
||||
List.append fields reverseFields
|
||||
|> List.map (\( field, score ) -> [ field ++ "^" ++ String.fromFloat score, field ++ ".edge^" ++ String.fromFloat score ])
|
||||
|> List.concat
|
||||
in
|
||||
List.map
|
||||
(\queryWords ->
|
||||
[ ( "multi_match"
|
||||
, Json.Encode.object
|
||||
[ ( "type", Json.Encode.string "cross_fields" )
|
||||
, ( "query", Json.Encode.string <| String.join " " queryWords )
|
||||
, ( "analyzer", Json.Encode.string "whitespace" )
|
||||
, ( "auto_generate_synonyms_phrase_query", Json.Encode.bool False )
|
||||
, ( "operator", Json.Encode.string "and" )
|
||||
, ( "_name", Json.Encode.string <| "multi_match_" ++ String.join "_" queryWords )
|
||||
, ( "fields", Json.Encode.list Json.Encode.string allFields )
|
||||
]
|
||||
)
|
||||
]
|
||||
)
|
||||
(queryVariations (String.words query))
|
||||
|
||||
|
||||
makeRequestBody :
|
||||
|
@ -830,31 +849,29 @@ makeRequestBody query from sizeRaw sort type_ sortField fields =
|
|||
[ ( "tie_breaker", Json.Encode.float 0.7 )
|
||||
, ( "queries"
|
||||
, Json.Encode.list Json.Encode.object
|
||||
[ [ ( "bool"
|
||||
, Json.Encode.object
|
||||
[ ( "must"
|
||||
, Json.Encode.list Json.Encode.object <|
|
||||
search_fields
|
||||
1.0
|
||||
(String.words query)
|
||||
fields
|
||||
)
|
||||
]
|
||||
)
|
||||
]
|
||||
, [ ( "bool"
|
||||
, Json.Encode.object
|
||||
[ ( "must"
|
||||
, Json.Encode.list Json.Encode.object <|
|
||||
search_fields
|
||||
0.8
|
||||
(String.words query |> List.map String.reverse)
|
||||
(List.map (\( field, score ) -> ( field ++ "_reverse", score )) fields)
|
||||
)
|
||||
]
|
||||
)
|
||||
]
|
||||
]
|
||||
(searchFields query fields)
|
||||
-- [ [ ( "bool"
|
||||
-- , Json.Encode.object
|
||||
-- [ ( "must"
|
||||
-- , Json.Encode.list Json.Encode.object <|
|
||||
-- searchFields query fields
|
||||
-- )
|
||||
-- ]
|
||||
-- )
|
||||
-- ]
|
||||
-- ]
|
||||
-- , [ ( "bool"
|
||||
-- , Json.Encode.object
|
||||
-- [ ( "must"
|
||||
-- , Json.Encode.list Json.Encode.object <|
|
||||
-- searchFields
|
||||
-- 0.8
|
||||
-- (String.words query |> List.map String.reverse)
|
||||
-- )
|
||||
-- ]
|
||||
-- )
|
||||
-- ]
|
||||
--]
|
||||
)
|
||||
]
|
||||
)
|
||||
|
|
Loading…
Reference in a new issue