From c12f4dfb6c9fa17ef1244dc3f2029a68e05e0dca Mon Sep 17 00:00:00 2001 From: Rok Garbas Date: Tue, 1 Sep 2020 17:20:35 +0200 Subject: [PATCH] add suffix search (or reverse prefix) (#176) also fixes #170 --- VERSION | 2 +- import-scripts/import_scripts/channel.py | 73 ++++++++++++++++++++++-- import-scripts/tests/test_channel.py | 15 +++++ src/Page/Options.elm | 6 +- src/Page/Packages.elm | 10 ++-- src/Search.elm | 51 ++++++++++++++--- 6 files changed, 135 insertions(+), 22 deletions(-) diff --git a/VERSION b/VERSION index b4de394..48082f7 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -11 +12 diff --git a/import-scripts/import_scripts/channel.py b/import-scripts/import_scripts/channel.py index 805de6e..b76c82a 100644 --- a/import-scripts/import_scripts/channel.py +++ b/import-scripts/import_scripts/channel.py @@ -66,12 +66,21 @@ MAPPING = { }, }, "package_attr_name": {"type": "keyword", "normalizer": "lowercase"}, + "package_attr_name_reverse": {"type": "keyword", "normalizer": "lowercase"}, "package_attr_name_query": {"type": "keyword", "normalizer": "lowercase"}, + "package_attr_name_query_reverse": { + "type": "keyword", + "normalizer": "lowercase", + }, "package_attr_set": {"type": "keyword", "normalizer": "lowercase"}, + "package_attr_set_reverse": {"type": "keyword", "normalizer": "lowercase"}, "package_pname": {"type": "keyword", "normalizer": "lowercase"}, + "package_pname_reverse": {"type": "keyword", "normalizer": "lowercase"}, "package_pversion": {"type": "keyword"}, "package_description": {"type": "text", "analyzer": "english"}, + "package_description_reverse": {"type": "text", "analyzer": "english"}, "package_longDescription": {"type": "text", "analyzer": "english"}, + "package_longDescription_reverse": {"type": "text", "analyzer": "english"}, "package_license": { "type": "nested", "properties": {"fullName": {"type": "text"}, "url": {"type": "text"}}, @@ -90,8 +99,11 @@ MAPPING = { "package_system": {"type": "keyword"}, # Options fields "option_name": {"type": "keyword", "normalizer": "lowercase"}, + "option_name_reverse": {"type": "keyword", "normalizer": "lowercase"}, "option_name_query": {"type": "keyword", "normalizer": "lowercase"}, + "option_name_query_reverse": {"type": "keyword", "normalizer": "lowercase"}, "option_description": {"type": "text", "analyzer": "english"}, + "option_description_reverse": {"type": "text", "analyzer": "english"}, "option_type": {"type": "keyword"}, "option_default": {"type": "text"}, "option_example": {"type": "text"}, @@ -100,6 +112,37 @@ MAPPING = { } +# def field_reverse_str(field): + + +def string_reverse(text): + return text[::-1] + + +def field_reverse(field): + + if isinstance(field, str): + + if " " in field: + field = " ".join(map(field_reverse, field.split(" "))) + else: + field = string_reverse(field) + + elif isinstance(field, list): + field = list(map(field_reverse, field)) + + elif isinstance(field, tuple): + field = tuple(map(field_reverse, field)) + + elif field is None: + pass + + else: + raise NotImplementedError(f"Don't know how to reverse {field}") + + return field + + def parse_query(text): """Tokenize package attr_name @@ -334,16 +377,27 @@ def get_packages(evaluation, evaluation_builds): } ) + package_attr_name_query = list(parse_query(attr_name)) + package_pname = remove_attr_set(data["pname"]) + package_description = data["meta"].get("description") + package_longDescription = data["meta"].get("longDescription", "") + yield dict( type="package", package_hydra=hydra, package_attr_name=attr_name, - package_attr_name_query=list(parse_query(attr_name)), + package_attr_name_reverse=field_reverse(attr_name), + package_attr_name_query=package_attr_name_query, + package_attr_name_query_reverse=field_reverse(package_attr_name_query), package_attr_set=attr_set, - package_pname=remove_attr_set(data["pname"]), + package_attr_set_reverse=field_reverse(attr_set), + package_pname=package_pname, + package_pname_reverse=field_reverse(package_pname), package_pversion=data["version"], - package_description=data["meta"].get("description"), - package_longDescription=data["meta"].get("longDescription", ""), + package_description=package_description, + package_description_reverse=field_reverse(package_description), + package_longDescription=package_longDescription, + package_longDescription_reverse=field_reverse(package_longDescription), package_license=licenses, package_maintainers=maintainers, package_platforms=[i for i in platforms if i], @@ -400,11 +454,16 @@ def get_options(evaluation): xml_description, "html", format="docbook", ) + option_name_query = parse_query(name) + yield dict( type="option", option_name=name, - option_name_query=parse_query(name), + option_name_reverse=field_reverse(name), + option_name_query=option_name_query, + option_name_query_reverse=field_reverse(option_name_query), option_description=description, + option_description_reverse=field_reverse(description), option_type=option.get("type"), option_default=default, option_example=example, @@ -507,7 +566,9 @@ def run(es_url, channel, force, verbose): evaluation_packages = get_last_evaluation(CHANNELS[channel]) evaluation_options = get_last_evaluation(CHANNELS[channel]) - evaluation_packages_builds = get_evaluation_builds(evaluation_packages["id"]) + evaluation_packages_builds = ( + dict() + ) # get_evaluation_builds(evaluation_packages["id"]) es = elasticsearch.Elasticsearch([es_url]) diff --git a/import-scripts/tests/test_channel.py b/import-scripts/tests/test_channel.py index 6cf4568..cba00a4 100644 --- a/import-scripts/tests/test_channel.py +++ b/import-scripts/tests/test_channel.py @@ -66,3 +66,18 @@ def test_parse_query(text, expected): import import_scripts.channel assert sorted(import_scripts.channel.parse_query(text)) == sorted(expected) + + +@pytest.mark.parametrize( + "field,expected", + [ + ("example", "elpmaxe"), + ("example two", "elpmaxe owt"), + (["example", "three"], ["elpmaxe", "eerht"]), + (("example", "three"), ("elpmaxe", "eerht")), + ], +) +def test_field_reverse(field, expected): + import import_scripts.channel + + assert import_scripts.channel.field_reverse(field) == expected diff --git a/src/Page/Options.elm b/src/Page/Options.elm index 7693251..970bef4 100644 --- a/src/Page/Options.elm +++ b/src/Page/Options.elm @@ -288,9 +288,9 @@ makeRequest options channel query from size sort = sort "option" "option_name" - [ "option_name^2.2" - , "option_name_query^2.1" - , "option_description^1." + [ ( "option_name", 2.2 ) + , ( "option_name_query", 2.0 ) + , ( "option_description", 1.0 ) ] ) ("latest-" ++ String.fromInt options.mappingSchemaVersion ++ "-" ++ channel) diff --git a/src/Page/Packages.elm b/src/Page/Packages.elm index e3a8826..dbae19f 100644 --- a/src/Page/Packages.elm +++ b/src/Page/Packages.elm @@ -414,11 +414,11 @@ makeRequest options channel query from size sort = sort "package" "package_attr_name" - [ "package_attr_name^2.3" - , "package_pname^2.2" - , "package_attr_name_query^2.1" - , "package_description^1." - , "package_longDescription^1." + [ ( "package_attr_name", 2.4 ) + , ( "package_pname", 2.2 ) + , ( "package_attr_name_query", 2.0 ) + , ( "package_description", 1.2 ) + , ( "package_longDescription", 1.0 ) ] ) ("latest-" ++ String.fromInt options.mappingSchemaVersion ++ "-" ++ channel) diff --git a/src/Search.elm b/src/Search.elm index 14487ff..9ebd8f8 100644 --- a/src/Search.elm +++ b/src/Search.elm @@ -737,12 +737,12 @@ filter_by_type type_ = search_fields : - String + Float -> List String + -> List ( String, Float ) -> List (List ( String, Json.Encode.Value )) -search_fields query fields = - query - |> String.words +search_fields baseScore queryWords fields = + queryWords |> List.reverse |> List.indexedMap (\queryIndex queryWord -> @@ -758,7 +758,10 @@ search_fields query fields = ) , ( "fields" , Json.Encode.list Json.Encode.string - (List.map (\field -> field ++ (queryIndex + 1 |> String.fromInt)) fields) + (List.map + (\( field, score ) -> field ++ "^" ++ (baseScore * (score + (0.1 * (queryIndex + 1 |> toFloat))) |> String.fromFloat)) + fields + ) ) ] ) @@ -773,7 +776,7 @@ makeRequestBody : -> Sort -> String -> String - -> List String + -> List ( String, Float ) -> Http.Body makeRequestBody query from sizeRaw sort type_ sortField fields = let @@ -804,7 +807,41 @@ makeRequestBody query from sizeRaw sort type_ sortField fields = ) , ( "must" , Json.Encode.list Json.Encode.object - (search_fields query fields) + [ [ ( "dis_max" + , Json.Encode.object + [ ( "tie_breaker", Json.Encode.float 0.7 ) + , ( "queries" + , Json.Encode.list Json.Encode.object + [ [ ( "bool" + , Json.Encode.object + [ ( "must" + , Json.Encode.list Json.Encode.object <| + search_fields + 1.0 + (String.words query) + fields + ) + ] + ) + ] + , [ ( "bool" + , Json.Encode.object + [ ( "must" + , Json.Encode.list Json.Encode.object <| + search_fields + 0.8 + (String.words query |> List.map String.reverse) + (List.map (\( field, score ) -> ( field ++ "_reverse", score )) fields) + ) + ] + ) + ] + ] + ) + ] + ) + ] + ] ) ] )