add suffix search (or reverse prefix) (#176)

also fixes #170
This commit is contained in:
Rok Garbas 2020-09-01 17:20:35 +02:00 committed by GitHub
parent 2ffaefeba0
commit c12f4dfb6c
Failed to generate hash of commit
6 changed files with 135 additions and 22 deletions

View file

@ -1 +1 @@
11 12

View file

@ -66,12 +66,21 @@ MAPPING = {
}, },
}, },
"package_attr_name": {"type": "keyword", "normalizer": "lowercase"}, "package_attr_name": {"type": "keyword", "normalizer": "lowercase"},
"package_attr_name_reverse": {"type": "keyword", "normalizer": "lowercase"},
"package_attr_name_query": {"type": "keyword", "normalizer": "lowercase"}, "package_attr_name_query": {"type": "keyword", "normalizer": "lowercase"},
"package_attr_name_query_reverse": {
"type": "keyword",
"normalizer": "lowercase",
},
"package_attr_set": {"type": "keyword", "normalizer": "lowercase"}, "package_attr_set": {"type": "keyword", "normalizer": "lowercase"},
"package_attr_set_reverse": {"type": "keyword", "normalizer": "lowercase"},
"package_pname": {"type": "keyword", "normalizer": "lowercase"}, "package_pname": {"type": "keyword", "normalizer": "lowercase"},
"package_pname_reverse": {"type": "keyword", "normalizer": "lowercase"},
"package_pversion": {"type": "keyword"}, "package_pversion": {"type": "keyword"},
"package_description": {"type": "text", "analyzer": "english"}, "package_description": {"type": "text", "analyzer": "english"},
"package_description_reverse": {"type": "text", "analyzer": "english"},
"package_longDescription": {"type": "text", "analyzer": "english"}, "package_longDescription": {"type": "text", "analyzer": "english"},
"package_longDescription_reverse": {"type": "text", "analyzer": "english"},
"package_license": { "package_license": {
"type": "nested", "type": "nested",
"properties": {"fullName": {"type": "text"}, "url": {"type": "text"}}, "properties": {"fullName": {"type": "text"}, "url": {"type": "text"}},
@ -90,8 +99,11 @@ MAPPING = {
"package_system": {"type": "keyword"}, "package_system": {"type": "keyword"},
# Options fields # Options fields
"option_name": {"type": "keyword", "normalizer": "lowercase"}, "option_name": {"type": "keyword", "normalizer": "lowercase"},
"option_name_reverse": {"type": "keyword", "normalizer": "lowercase"},
"option_name_query": {"type": "keyword", "normalizer": "lowercase"}, "option_name_query": {"type": "keyword", "normalizer": "lowercase"},
"option_name_query_reverse": {"type": "keyword", "normalizer": "lowercase"},
"option_description": {"type": "text", "analyzer": "english"}, "option_description": {"type": "text", "analyzer": "english"},
"option_description_reverse": {"type": "text", "analyzer": "english"},
"option_type": {"type": "keyword"}, "option_type": {"type": "keyword"},
"option_default": {"type": "text"}, "option_default": {"type": "text"},
"option_example": {"type": "text"}, "option_example": {"type": "text"},
@ -100,6 +112,37 @@ MAPPING = {
} }
# def field_reverse_str(field):
def string_reverse(text):
return text[::-1]
def field_reverse(field):
if isinstance(field, str):
if " " in field:
field = " ".join(map(field_reverse, field.split(" ")))
else:
field = string_reverse(field)
elif isinstance(field, list):
field = list(map(field_reverse, field))
elif isinstance(field, tuple):
field = tuple(map(field_reverse, field))
elif field is None:
pass
else:
raise NotImplementedError(f"Don't know how to reverse {field}")
return field
def parse_query(text): def parse_query(text):
"""Tokenize package attr_name """Tokenize package attr_name
@ -334,16 +377,27 @@ def get_packages(evaluation, evaluation_builds):
} }
) )
package_attr_name_query = list(parse_query(attr_name))
package_pname = remove_attr_set(data["pname"])
package_description = data["meta"].get("description")
package_longDescription = data["meta"].get("longDescription", "")
yield dict( yield dict(
type="package", type="package",
package_hydra=hydra, package_hydra=hydra,
package_attr_name=attr_name, package_attr_name=attr_name,
package_attr_name_query=list(parse_query(attr_name)), package_attr_name_reverse=field_reverse(attr_name),
package_attr_name_query=package_attr_name_query,
package_attr_name_query_reverse=field_reverse(package_attr_name_query),
package_attr_set=attr_set, package_attr_set=attr_set,
package_pname=remove_attr_set(data["pname"]), package_attr_set_reverse=field_reverse(attr_set),
package_pname=package_pname,
package_pname_reverse=field_reverse(package_pname),
package_pversion=data["version"], package_pversion=data["version"],
package_description=data["meta"].get("description"), package_description=package_description,
package_longDescription=data["meta"].get("longDescription", ""), package_description_reverse=field_reverse(package_description),
package_longDescription=package_longDescription,
package_longDescription_reverse=field_reverse(package_longDescription),
package_license=licenses, package_license=licenses,
package_maintainers=maintainers, package_maintainers=maintainers,
package_platforms=[i for i in platforms if i], package_platforms=[i for i in platforms if i],
@ -400,11 +454,16 @@ def get_options(evaluation):
xml_description, "html", format="docbook", xml_description, "html", format="docbook",
) )
option_name_query = parse_query(name)
yield dict( yield dict(
type="option", type="option",
option_name=name, option_name=name,
option_name_query=parse_query(name), option_name_reverse=field_reverse(name),
option_name_query=option_name_query,
option_name_query_reverse=field_reverse(option_name_query),
option_description=description, option_description=description,
option_description_reverse=field_reverse(description),
option_type=option.get("type"), option_type=option.get("type"),
option_default=default, option_default=default,
option_example=example, option_example=example,
@ -507,7 +566,9 @@ def run(es_url, channel, force, verbose):
evaluation_packages = get_last_evaluation(CHANNELS[channel]) evaluation_packages = get_last_evaluation(CHANNELS[channel])
evaluation_options = get_last_evaluation(CHANNELS[channel]) evaluation_options = get_last_evaluation(CHANNELS[channel])
evaluation_packages_builds = get_evaluation_builds(evaluation_packages["id"]) evaluation_packages_builds = (
dict()
) # get_evaluation_builds(evaluation_packages["id"])
es = elasticsearch.Elasticsearch([es_url]) es = elasticsearch.Elasticsearch([es_url])

View file

@ -66,3 +66,18 @@ def test_parse_query(text, expected):
import import_scripts.channel import import_scripts.channel
assert sorted(import_scripts.channel.parse_query(text)) == sorted(expected) assert sorted(import_scripts.channel.parse_query(text)) == sorted(expected)
@pytest.mark.parametrize(
"field,expected",
[
("example", "elpmaxe"),
("example two", "elpmaxe owt"),
(["example", "three"], ["elpmaxe", "eerht"]),
(("example", "three"), ("elpmaxe", "eerht")),
],
)
def test_field_reverse(field, expected):
import import_scripts.channel
assert import_scripts.channel.field_reverse(field) == expected

View file

@ -288,9 +288,9 @@ makeRequest options channel query from size sort =
sort sort
"option" "option"
"option_name" "option_name"
[ "option_name^2.2" [ ( "option_name", 2.2 )
, "option_name_query^2.1" , ( "option_name_query", 2.0 )
, "option_description^1." , ( "option_description", 1.0 )
] ]
) )
("latest-" ++ String.fromInt options.mappingSchemaVersion ++ "-" ++ channel) ("latest-" ++ String.fromInt options.mappingSchemaVersion ++ "-" ++ channel)

View file

@ -414,11 +414,11 @@ makeRequest options channel query from size sort =
sort sort
"package" "package"
"package_attr_name" "package_attr_name"
[ "package_attr_name^2.3" [ ( "package_attr_name", 2.4 )
, "package_pname^2.2" , ( "package_pname", 2.2 )
, "package_attr_name_query^2.1" , ( "package_attr_name_query", 2.0 )
, "package_description^1." , ( "package_description", 1.2 )
, "package_longDescription^1." , ( "package_longDescription", 1.0 )
] ]
) )
("latest-" ++ String.fromInt options.mappingSchemaVersion ++ "-" ++ channel) ("latest-" ++ String.fromInt options.mappingSchemaVersion ++ "-" ++ channel)

View file

@ -737,12 +737,12 @@ filter_by_type type_ =
search_fields : search_fields :
String Float
-> List String -> List String
-> List ( String, Float )
-> List (List ( String, Json.Encode.Value )) -> List (List ( String, Json.Encode.Value ))
search_fields query fields = search_fields baseScore queryWords fields =
query queryWords
|> String.words
|> List.reverse |> List.reverse
|> List.indexedMap |> List.indexedMap
(\queryIndex queryWord -> (\queryIndex queryWord ->
@ -758,7 +758,10 @@ search_fields query fields =
) )
, ( "fields" , ( "fields"
, Json.Encode.list Json.Encode.string , Json.Encode.list Json.Encode.string
(List.map (\field -> field ++ (queryIndex + 1 |> String.fromInt)) fields) (List.map
(\( field, score ) -> field ++ "^" ++ (baseScore * (score + (0.1 * (queryIndex + 1 |> toFloat))) |> String.fromFloat))
fields
)
) )
] ]
) )
@ -773,7 +776,7 @@ makeRequestBody :
-> Sort -> Sort
-> String -> String
-> String -> String
-> List String -> List ( String, Float )
-> Http.Body -> Http.Body
makeRequestBody query from sizeRaw sort type_ sortField fields = makeRequestBody query from sizeRaw sort type_ sortField fields =
let let
@ -804,7 +807,41 @@ makeRequestBody query from sizeRaw sort type_ sortField fields =
) )
, ( "must" , ( "must"
, Json.Encode.list Json.Encode.object , Json.Encode.list Json.Encode.object
(search_fields query fields) [ [ ( "dis_max"
, Json.Encode.object
[ ( "tie_breaker", Json.Encode.float 0.7 )
, ( "queries"
, Json.Encode.list Json.Encode.object
[ [ ( "bool"
, Json.Encode.object
[ ( "must"
, Json.Encode.list Json.Encode.object <|
search_fields
1.0
(String.words query)
fields
)
]
)
]
, [ ( "bool"
, Json.Encode.object
[ ( "must"
, Json.Encode.list Json.Encode.object <|
search_fields
0.8
(String.words query |> List.map String.reverse)
(List.map (\( field, score ) -> ( field ++ "_reverse", score )) fields)
)
]
)
]
]
)
]
)
]
]
) )
] ]
) )