diff --git a/VERSION b/VERSION index 48082f7..b1bd38b 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -12 +13 diff --git a/import-scripts/import_scripts/channel.py b/import-scripts/import_scripts/channel.py index b76c82a..2c2621d 100644 --- a/import-scripts/import_scripts/channel.py +++ b/import-scripts/import_scripts/channel.py @@ -34,7 +34,22 @@ ANALYSIS = { "normalizer": { "lowercase": {"type": "custom", "char_filter": [], "filter": ["lowercase"]} }, + "tokenizer": { + "edge": { + "type": "edge_ngram", + "min_gram": 2, + "max_gram": 50, + "token_chars": [ + "letter", + "digit", + # Either we use them or we would need to strip them before that. + "punctuation", + "symbol", + ], + }, + }, "analyzer": { + "edge": {"tokenizer": "edge"}, "lowercase": { "type": "custom", "tokenizer": "keyword", @@ -65,22 +80,67 @@ MAPPING = { "drv_path": {"type": "keyword"}, }, }, - "package_attr_name": {"type": "keyword", "normalizer": "lowercase"}, - "package_attr_name_reverse": {"type": "keyword", "normalizer": "lowercase"}, - "package_attr_name_query": {"type": "keyword", "normalizer": "lowercase"}, + "package_attr_name": { + "type": "keyword", + "normalizer": "lowercase", + "fields": {"edge": {"type": "text", "analyzer": "edge"}}, + }, + "package_attr_name_reverse": { + "type": "keyword", + "normalizer": "lowercase", + "fields": {"edge": {"type": "text", "analyzer": "edge"}}, + }, + "package_attr_name_query": { + "type": "keyword", + "normalizer": "lowercase", + "fields": {"edge": {"type": "text", "analyzer": "edge"}}, + }, "package_attr_name_query_reverse": { "type": "keyword", "normalizer": "lowercase", + "fields": {"edge": {"type": "text", "analyzer": "edge"}}, + }, + "package_attr_set": { + "type": "keyword", + "normalizer": "lowercase", + "fields": {"edge": {"type": "text", "analyzer": "edge"}}, + }, + "package_attr_set_reverse": { + "type": "keyword", + "normalizer": "lowercase", + "fields": {"edge": {"type": "text", "analyzer": "edge"}}, + }, + "package_pname": { + "type": "keyword", + "normalizer": "lowercase", + "fields": {"edge": {"type": "text", "analyzer": "edge"}}, + }, + "package_pname_reverse": { + "type": "keyword", + "normalizer": "lowercase", + "fields": {"edge": {"type": "text", "analyzer": "edge"}}, }, - "package_attr_set": {"type": "keyword", "normalizer": "lowercase"}, - "package_attr_set_reverse": {"type": "keyword", "normalizer": "lowercase"}, - "package_pname": {"type": "keyword", "normalizer": "lowercase"}, - "package_pname_reverse": {"type": "keyword", "normalizer": "lowercase"}, "package_pversion": {"type": "keyword"}, - "package_description": {"type": "text", "analyzer": "english"}, - "package_description_reverse": {"type": "text", "analyzer": "english"}, - "package_longDescription": {"type": "text", "analyzer": "english"}, - "package_longDescription_reverse": {"type": "text", "analyzer": "english"}, + "package_description": { + "type": "text", + "analyzer": "english", + "fields": {"edge": {"type": "text", "analyzer": "edge"}}, + }, + "package_description_reverse": { + "type": "text", + "analyzer": "english", + "fields": {"edge": {"type": "text", "analyzer": "edge"}}, + }, + "package_longDescription": { + "type": "text", + "analyzer": "english", + "fields": {"edge": {"type": "text", "analyzer": "edge"}}, + }, + "package_longDescription_reverse": { + "type": "text", + "analyzer": "english", + "fields": {"edge": {"type": "text", "analyzer": "edge"}}, + }, "package_license": { "type": "nested", "properties": {"fullName": {"type": "text"}, "url": {"type": "text"}}, @@ -98,12 +158,36 @@ MAPPING = { "package_homepage": {"type": "keyword"}, "package_system": {"type": "keyword"}, # Options fields - "option_name": {"type": "keyword", "normalizer": "lowercase"}, - "option_name_reverse": {"type": "keyword", "normalizer": "lowercase"}, - "option_name_query": {"type": "keyword", "normalizer": "lowercase"}, - "option_name_query_reverse": {"type": "keyword", "normalizer": "lowercase"}, - "option_description": {"type": "text", "analyzer": "english"}, - "option_description_reverse": {"type": "text", "analyzer": "english"}, + "option_name": { + "type": "keyword", + "normalizer": "lowercase", + "fields": {"edge": {"type": "text", "analyzer": "edge"}}, + }, + "option_name_reverse": { + "type": "keyword", + "normalizer": "lowercase", + "fields": {"edge": {"type": "text", "analyzer": "edge"}}, + }, + "option_name_query": { + "type": "keyword", + "normalizer": "lowercase", + "fields": {"edge": {"type": "text", "analyzer": "edge"}}, + }, + "option_name_query_reverse": { + "type": "keyword", + "normalizer": "lowercase", + "fields": {"edge": {"type": "text", "analyzer": "edge"}}, + }, + "option_description": { + "type": "text", + "analyzer": "english", + "fields": {"edge": {"type": "text", "analyzer": "edge"}}, + }, + "option_description_reverse": { + "type": "text", + "analyzer": "english", + "fields": {"edge": {"type": "text", "analyzer": "edge"}}, + }, "option_type": {"type": "keyword"}, "option_default": {"type": "text"}, "option_example": {"type": "text"}, diff --git a/src/Page/Options.elm b/src/Page/Options.elm index 970bef4..7af7e03 100644 --- a/src/Page/Options.elm +++ b/src/Page/Options.elm @@ -288,8 +288,8 @@ makeRequest options channel query from size sort = sort "option" "option_name" - [ ( "option_name", 2.2 ) - , ( "option_name_query", 2.0 ) + [ ( "option_name", 6.0 ) + , ( "option_name_query", 3.0 ) , ( "option_description", 1.0 ) ] ) diff --git a/src/Page/Packages.elm b/src/Page/Packages.elm index a913bb6..2db6edc 100644 --- a/src/Page/Packages.elm +++ b/src/Page/Packages.elm @@ -414,10 +414,10 @@ makeRequest options channel query from size sort = sort "package" "package_attr_name" - [ ( "package_attr_name", 2.4 ) - , ( "package_pname", 2.2 ) - , ( "package_attr_name_query", 2.0 ) - , ( "package_description", 1.2 ) + [ ( "package_attr_name", 9.0 ) + , ( "package_pname", 6.0 ) + , ( "package_attr_name_query", 4.0 ) + , ( "package_description", 1.3 ) , ( "package_longDescription", 1.0 ) ] ) diff --git a/src/Search.elm b/src/Search.elm index 4b62b50..924bf62 100644 --- a/src/Search.elm +++ b/src/Search.elm @@ -62,6 +62,7 @@ import Http import Json.Decode import Json.Encode import RemoteData +import Set import Task import Url.Builder @@ -753,38 +754,56 @@ filter_by_type type_ = ] -search_fields : - Float - -> List String +searchFields : + String -> List ( String, Float ) -> List (List ( String, Json.Encode.Value )) -search_fields baseScore queryWords fields = - queryWords - |> List.reverse - |> List.indexedMap - (\queryIndex queryWord -> - [ ( "multi_match" - , Json.Encode.object - [ ( "type", Json.Encode.string "bool_prefix" ) - , ( "query", Json.Encode.string queryWord ) - , ( "analyzer", Json.Encode.string "lowercase" ) - , ( "auto_generate_synonyms_phrase_query", Json.Encode.bool False ) - , ( "prefix_length", Json.Encode.int 3 ) - , ( "operator", Json.Encode.string "or" ) - , ( "_name" - , Json.Encode.string <| "multi_match_" ++ queryWord ++ "_" ++ (queryIndex + 1 |> String.fromInt) - ) - , ( "fields" - , Json.Encode.list Json.Encode.string - (List.map - (\( field, score ) -> field ++ "^" ++ (baseScore * (score + (0.1 * (queryIndex + 1 |> toFloat))) |> String.fromFloat)) - fields - ) - ) - ] - ) - ] - ) +searchFields query fields = + let + queryVariations q = + case ( List.head q, List.tail q ) of + ( Just h, Just t ) -> + let + tail : List (List String) + tail = + queryVariations t + in + List.append + (List.map (\x -> List.append [ h ] x) tail) + (List.map (\x -> List.append [ String.reverse h ] x) tail) + |> Set.fromList + |> Set.toList + + ( Just h, Nothing ) -> + [ [ h ], [ String.reverse h ] ] + + ( _, _ ) -> + [ [], [] ] + + reverseFields = + List.map (\( field, score ) -> ( field ++ "_reverse", score * 0.8 )) fields + + allFields = + List.append fields reverseFields + |> List.map (\( field, score ) -> [ field ++ "^" ++ String.fromFloat score, field ++ ".edge^" ++ String.fromFloat score ]) + |> List.concat + in + List.map + (\queryWords -> + [ ( "multi_match" + , Json.Encode.object + [ ( "type", Json.Encode.string "cross_fields" ) + , ( "query", Json.Encode.string <| String.join " " queryWords ) + , ( "analyzer", Json.Encode.string "whitespace" ) + , ( "auto_generate_synonyms_phrase_query", Json.Encode.bool False ) + , ( "operator", Json.Encode.string "and" ) + , ( "_name", Json.Encode.string <| "multi_match_" ++ String.join "_" queryWords ) + , ( "fields", Json.Encode.list Json.Encode.string allFields ) + ] + ) + ] + ) + (queryVariations (String.words query)) makeRequestBody : @@ -830,31 +849,29 @@ makeRequestBody query from sizeRaw sort type_ sortField fields = [ ( "tie_breaker", Json.Encode.float 0.7 ) , ( "queries" , Json.Encode.list Json.Encode.object - [ [ ( "bool" - , Json.Encode.object - [ ( "must" - , Json.Encode.list Json.Encode.object <| - search_fields - 1.0 - (String.words query) - fields - ) - ] - ) - ] - , [ ( "bool" - , Json.Encode.object - [ ( "must" - , Json.Encode.list Json.Encode.object <| - search_fields - 0.8 - (String.words query |> List.map String.reverse) - (List.map (\( field, score ) -> ( field ++ "_reverse", score )) fields) - ) - ] - ) - ] - ] + (searchFields query fields) + -- [ [ ( "bool" + -- , Json.Encode.object + -- [ ( "must" + -- , Json.Encode.list Json.Encode.object <| + -- searchFields query fields + -- ) + -- ] + -- ) + -- ] + -- ] + -- , [ ( "bool" + -- , Json.Encode.object + -- [ ( "must" + -- , Json.Encode.list Json.Encode.object <| + -- searchFields + -- 0.8 + -- (String.words query |> List.map String.reverse) + -- ) + -- ] + -- ) + -- ] + --] ) ] )