Reworked search query (#197)
- still muse multi_match but now with cross_fields type - add edge n-gram index field since cross_fields works only with match and this way we can have prefix support - suffix support is still supported as before by reversing the words in data and query - for query we now create variations of all multi_match queries. from 2 words you get 4 queries, from 3 works you get 8 queries and so on.
This commit is contained in:
parent
5a65c6f94b
commit
86ad9d036d
|
@ -34,7 +34,22 @@ ANALYSIS = {
|
||||||
"normalizer": {
|
"normalizer": {
|
||||||
"lowercase": {"type": "custom", "char_filter": [], "filter": ["lowercase"]}
|
"lowercase": {"type": "custom", "char_filter": [], "filter": ["lowercase"]}
|
||||||
},
|
},
|
||||||
|
"tokenizer": {
|
||||||
|
"edge": {
|
||||||
|
"type": "edge_ngram",
|
||||||
|
"min_gram": 2,
|
||||||
|
"max_gram": 50,
|
||||||
|
"token_chars": [
|
||||||
|
"letter",
|
||||||
|
"digit",
|
||||||
|
# Either we use them or we would need to strip them before that.
|
||||||
|
"punctuation",
|
||||||
|
"symbol",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
"analyzer": {
|
"analyzer": {
|
||||||
|
"edge": {"tokenizer": "edge"},
|
||||||
"lowercase": {
|
"lowercase": {
|
||||||
"type": "custom",
|
"type": "custom",
|
||||||
"tokenizer": "keyword",
|
"tokenizer": "keyword",
|
||||||
|
@ -65,22 +80,67 @@ MAPPING = {
|
||||||
"drv_path": {"type": "keyword"},
|
"drv_path": {"type": "keyword"},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"package_attr_name": {"type": "keyword", "normalizer": "lowercase"},
|
"package_attr_name": {
|
||||||
"package_attr_name_reverse": {"type": "keyword", "normalizer": "lowercase"},
|
"type": "keyword",
|
||||||
"package_attr_name_query": {"type": "keyword", "normalizer": "lowercase"},
|
"normalizer": "lowercase",
|
||||||
|
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||||
|
},
|
||||||
|
"package_attr_name_reverse": {
|
||||||
|
"type": "keyword",
|
||||||
|
"normalizer": "lowercase",
|
||||||
|
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||||
|
},
|
||||||
|
"package_attr_name_query": {
|
||||||
|
"type": "keyword",
|
||||||
|
"normalizer": "lowercase",
|
||||||
|
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||||
|
},
|
||||||
"package_attr_name_query_reverse": {
|
"package_attr_name_query_reverse": {
|
||||||
"type": "keyword",
|
"type": "keyword",
|
||||||
"normalizer": "lowercase",
|
"normalizer": "lowercase",
|
||||||
|
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||||
|
},
|
||||||
|
"package_attr_set": {
|
||||||
|
"type": "keyword",
|
||||||
|
"normalizer": "lowercase",
|
||||||
|
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||||
|
},
|
||||||
|
"package_attr_set_reverse": {
|
||||||
|
"type": "keyword",
|
||||||
|
"normalizer": "lowercase",
|
||||||
|
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||||
|
},
|
||||||
|
"package_pname": {
|
||||||
|
"type": "keyword",
|
||||||
|
"normalizer": "lowercase",
|
||||||
|
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||||
|
},
|
||||||
|
"package_pname_reverse": {
|
||||||
|
"type": "keyword",
|
||||||
|
"normalizer": "lowercase",
|
||||||
|
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||||
},
|
},
|
||||||
"package_attr_set": {"type": "keyword", "normalizer": "lowercase"},
|
|
||||||
"package_attr_set_reverse": {"type": "keyword", "normalizer": "lowercase"},
|
|
||||||
"package_pname": {"type": "keyword", "normalizer": "lowercase"},
|
|
||||||
"package_pname_reverse": {"type": "keyword", "normalizer": "lowercase"},
|
|
||||||
"package_pversion": {"type": "keyword"},
|
"package_pversion": {"type": "keyword"},
|
||||||
"package_description": {"type": "text", "analyzer": "english"},
|
"package_description": {
|
||||||
"package_description_reverse": {"type": "text", "analyzer": "english"},
|
"type": "text",
|
||||||
"package_longDescription": {"type": "text", "analyzer": "english"},
|
"analyzer": "english",
|
||||||
"package_longDescription_reverse": {"type": "text", "analyzer": "english"},
|
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||||
|
},
|
||||||
|
"package_description_reverse": {
|
||||||
|
"type": "text",
|
||||||
|
"analyzer": "english",
|
||||||
|
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||||
|
},
|
||||||
|
"package_longDescription": {
|
||||||
|
"type": "text",
|
||||||
|
"analyzer": "english",
|
||||||
|
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||||
|
},
|
||||||
|
"package_longDescription_reverse": {
|
||||||
|
"type": "text",
|
||||||
|
"analyzer": "english",
|
||||||
|
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||||
|
},
|
||||||
"package_license": {
|
"package_license": {
|
||||||
"type": "nested",
|
"type": "nested",
|
||||||
"properties": {"fullName": {"type": "text"}, "url": {"type": "text"}},
|
"properties": {"fullName": {"type": "text"}, "url": {"type": "text"}},
|
||||||
|
@ -98,12 +158,36 @@ MAPPING = {
|
||||||
"package_homepage": {"type": "keyword"},
|
"package_homepage": {"type": "keyword"},
|
||||||
"package_system": {"type": "keyword"},
|
"package_system": {"type": "keyword"},
|
||||||
# Options fields
|
# Options fields
|
||||||
"option_name": {"type": "keyword", "normalizer": "lowercase"},
|
"option_name": {
|
||||||
"option_name_reverse": {"type": "keyword", "normalizer": "lowercase"},
|
"type": "keyword",
|
||||||
"option_name_query": {"type": "keyword", "normalizer": "lowercase"},
|
"normalizer": "lowercase",
|
||||||
"option_name_query_reverse": {"type": "keyword", "normalizer": "lowercase"},
|
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||||
"option_description": {"type": "text", "analyzer": "english"},
|
},
|
||||||
"option_description_reverse": {"type": "text", "analyzer": "english"},
|
"option_name_reverse": {
|
||||||
|
"type": "keyword",
|
||||||
|
"normalizer": "lowercase",
|
||||||
|
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||||
|
},
|
||||||
|
"option_name_query": {
|
||||||
|
"type": "keyword",
|
||||||
|
"normalizer": "lowercase",
|
||||||
|
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||||
|
},
|
||||||
|
"option_name_query_reverse": {
|
||||||
|
"type": "keyword",
|
||||||
|
"normalizer": "lowercase",
|
||||||
|
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||||
|
},
|
||||||
|
"option_description": {
|
||||||
|
"type": "text",
|
||||||
|
"analyzer": "english",
|
||||||
|
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||||
|
},
|
||||||
|
"option_description_reverse": {
|
||||||
|
"type": "text",
|
||||||
|
"analyzer": "english",
|
||||||
|
"fields": {"edge": {"type": "text", "analyzer": "edge"}},
|
||||||
|
},
|
||||||
"option_type": {"type": "keyword"},
|
"option_type": {"type": "keyword"},
|
||||||
"option_default": {"type": "text"},
|
"option_default": {"type": "text"},
|
||||||
"option_example": {"type": "text"},
|
"option_example": {"type": "text"},
|
||||||
|
|
|
@ -288,8 +288,8 @@ makeRequest options channel query from size sort =
|
||||||
sort
|
sort
|
||||||
"option"
|
"option"
|
||||||
"option_name"
|
"option_name"
|
||||||
[ ( "option_name", 2.2 )
|
[ ( "option_name", 6.0 )
|
||||||
, ( "option_name_query", 2.0 )
|
, ( "option_name_query", 3.0 )
|
||||||
, ( "option_description", 1.0 )
|
, ( "option_description", 1.0 )
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
|
@ -414,10 +414,10 @@ makeRequest options channel query from size sort =
|
||||||
sort
|
sort
|
||||||
"package"
|
"package"
|
||||||
"package_attr_name"
|
"package_attr_name"
|
||||||
[ ( "package_attr_name", 2.4 )
|
[ ( "package_attr_name", 9.0 )
|
||||||
, ( "package_pname", 2.2 )
|
, ( "package_pname", 6.0 )
|
||||||
, ( "package_attr_name_query", 2.0 )
|
, ( "package_attr_name_query", 4.0 )
|
||||||
, ( "package_description", 1.2 )
|
, ( "package_description", 1.3 )
|
||||||
, ( "package_longDescription", 1.0 )
|
, ( "package_longDescription", 1.0 )
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
127
src/Search.elm
127
src/Search.elm
|
@ -62,6 +62,7 @@ import Http
|
||||||
import Json.Decode
|
import Json.Decode
|
||||||
import Json.Encode
|
import Json.Encode
|
||||||
import RemoteData
|
import RemoteData
|
||||||
|
import Set
|
||||||
import Task
|
import Task
|
||||||
import Url.Builder
|
import Url.Builder
|
||||||
|
|
||||||
|
@ -753,38 +754,56 @@ filter_by_type type_ =
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
search_fields :
|
searchFields :
|
||||||
Float
|
String
|
||||||
-> List String
|
|
||||||
-> List ( String, Float )
|
-> List ( String, Float )
|
||||||
-> List (List ( String, Json.Encode.Value ))
|
-> List (List ( String, Json.Encode.Value ))
|
||||||
search_fields baseScore queryWords fields =
|
searchFields query fields =
|
||||||
queryWords
|
let
|
||||||
|> List.reverse
|
queryVariations q =
|
||||||
|> List.indexedMap
|
case ( List.head q, List.tail q ) of
|
||||||
(\queryIndex queryWord ->
|
( Just h, Just t ) ->
|
||||||
[ ( "multi_match"
|
let
|
||||||
, Json.Encode.object
|
tail : List (List String)
|
||||||
[ ( "type", Json.Encode.string "bool_prefix" )
|
tail =
|
||||||
, ( "query", Json.Encode.string queryWord )
|
queryVariations t
|
||||||
, ( "analyzer", Json.Encode.string "lowercase" )
|
in
|
||||||
, ( "auto_generate_synonyms_phrase_query", Json.Encode.bool False )
|
List.append
|
||||||
, ( "prefix_length", Json.Encode.int 3 )
|
(List.map (\x -> List.append [ h ] x) tail)
|
||||||
, ( "operator", Json.Encode.string "or" )
|
(List.map (\x -> List.append [ String.reverse h ] x) tail)
|
||||||
, ( "_name"
|
|> Set.fromList
|
||||||
, Json.Encode.string <| "multi_match_" ++ queryWord ++ "_" ++ (queryIndex + 1 |> String.fromInt)
|
|> Set.toList
|
||||||
)
|
|
||||||
, ( "fields"
|
( Just h, Nothing ) ->
|
||||||
, Json.Encode.list Json.Encode.string
|
[ [ h ], [ String.reverse h ] ]
|
||||||
(List.map
|
|
||||||
(\( field, score ) -> field ++ "^" ++ (baseScore * (score + (0.1 * (queryIndex + 1 |> toFloat))) |> String.fromFloat))
|
( _, _ ) ->
|
||||||
fields
|
[ [], [] ]
|
||||||
)
|
|
||||||
)
|
reverseFields =
|
||||||
]
|
List.map (\( field, score ) -> ( field ++ "_reverse", score * 0.8 )) fields
|
||||||
)
|
|
||||||
]
|
allFields =
|
||||||
)
|
List.append fields reverseFields
|
||||||
|
|> List.map (\( field, score ) -> [ field ++ "^" ++ String.fromFloat score, field ++ ".edge^" ++ String.fromFloat score ])
|
||||||
|
|> List.concat
|
||||||
|
in
|
||||||
|
List.map
|
||||||
|
(\queryWords ->
|
||||||
|
[ ( "multi_match"
|
||||||
|
, Json.Encode.object
|
||||||
|
[ ( "type", Json.Encode.string "cross_fields" )
|
||||||
|
, ( "query", Json.Encode.string <| String.join " " queryWords )
|
||||||
|
, ( "analyzer", Json.Encode.string "whitespace" )
|
||||||
|
, ( "auto_generate_synonyms_phrase_query", Json.Encode.bool False )
|
||||||
|
, ( "operator", Json.Encode.string "and" )
|
||||||
|
, ( "_name", Json.Encode.string <| "multi_match_" ++ String.join "_" queryWords )
|
||||||
|
, ( "fields", Json.Encode.list Json.Encode.string allFields )
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
(queryVariations (String.words query))
|
||||||
|
|
||||||
|
|
||||||
makeRequestBody :
|
makeRequestBody :
|
||||||
|
@ -830,31 +849,29 @@ makeRequestBody query from sizeRaw sort type_ sortField fields =
|
||||||
[ ( "tie_breaker", Json.Encode.float 0.7 )
|
[ ( "tie_breaker", Json.Encode.float 0.7 )
|
||||||
, ( "queries"
|
, ( "queries"
|
||||||
, Json.Encode.list Json.Encode.object
|
, Json.Encode.list Json.Encode.object
|
||||||
[ [ ( "bool"
|
(searchFields query fields)
|
||||||
, Json.Encode.object
|
-- [ [ ( "bool"
|
||||||
[ ( "must"
|
-- , Json.Encode.object
|
||||||
, Json.Encode.list Json.Encode.object <|
|
-- [ ( "must"
|
||||||
search_fields
|
-- , Json.Encode.list Json.Encode.object <|
|
||||||
1.0
|
-- searchFields query fields
|
||||||
(String.words query)
|
-- )
|
||||||
fields
|
-- ]
|
||||||
)
|
-- )
|
||||||
]
|
-- ]
|
||||||
)
|
-- ]
|
||||||
]
|
-- , [ ( "bool"
|
||||||
, [ ( "bool"
|
-- , Json.Encode.object
|
||||||
, Json.Encode.object
|
-- [ ( "must"
|
||||||
[ ( "must"
|
-- , Json.Encode.list Json.Encode.object <|
|
||||||
, Json.Encode.list Json.Encode.object <|
|
-- searchFields
|
||||||
search_fields
|
-- 0.8
|
||||||
0.8
|
-- (String.words query |> List.map String.reverse)
|
||||||
(String.words query |> List.map String.reverse)
|
-- )
|
||||||
(List.map (\( field, score ) -> ( field ++ "_reverse", score )) fields)
|
-- ]
|
||||||
)
|
-- )
|
||||||
]
|
-- ]
|
||||||
)
|
--]
|
||||||
]
|
|
||||||
]
|
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in a new issue