add suffix search (or reverse prefix) (#176)

also fixes #170
This commit is contained in:
Rok Garbas 2020-09-01 17:20:35 +02:00 committed by GitHub
parent 2ffaefeba0
commit c12f4dfb6c
Failed to generate hash of commit
6 changed files with 135 additions and 22 deletions

View file

@ -1 +1 @@
11
12

View file

@ -66,12 +66,21 @@ MAPPING = {
},
},
"package_attr_name": {"type": "keyword", "normalizer": "lowercase"},
"package_attr_name_reverse": {"type": "keyword", "normalizer": "lowercase"},
"package_attr_name_query": {"type": "keyword", "normalizer": "lowercase"},
"package_attr_name_query_reverse": {
"type": "keyword",
"normalizer": "lowercase",
},
"package_attr_set": {"type": "keyword", "normalizer": "lowercase"},
"package_attr_set_reverse": {"type": "keyword", "normalizer": "lowercase"},
"package_pname": {"type": "keyword", "normalizer": "lowercase"},
"package_pname_reverse": {"type": "keyword", "normalizer": "lowercase"},
"package_pversion": {"type": "keyword"},
"package_description": {"type": "text", "analyzer": "english"},
"package_description_reverse": {"type": "text", "analyzer": "english"},
"package_longDescription": {"type": "text", "analyzer": "english"},
"package_longDescription_reverse": {"type": "text", "analyzer": "english"},
"package_license": {
"type": "nested",
"properties": {"fullName": {"type": "text"}, "url": {"type": "text"}},
@ -90,8 +99,11 @@ MAPPING = {
"package_system": {"type": "keyword"},
# Options fields
"option_name": {"type": "keyword", "normalizer": "lowercase"},
"option_name_reverse": {"type": "keyword", "normalizer": "lowercase"},
"option_name_query": {"type": "keyword", "normalizer": "lowercase"},
"option_name_query_reverse": {"type": "keyword", "normalizer": "lowercase"},
"option_description": {"type": "text", "analyzer": "english"},
"option_description_reverse": {"type": "text", "analyzer": "english"},
"option_type": {"type": "keyword"},
"option_default": {"type": "text"},
"option_example": {"type": "text"},
@ -100,6 +112,37 @@ MAPPING = {
}
# def field_reverse_str(field):
def string_reverse(text):
return text[::-1]
def field_reverse(field):
if isinstance(field, str):
if " " in field:
field = " ".join(map(field_reverse, field.split(" ")))
else:
field = string_reverse(field)
elif isinstance(field, list):
field = list(map(field_reverse, field))
elif isinstance(field, tuple):
field = tuple(map(field_reverse, field))
elif field is None:
pass
else:
raise NotImplementedError(f"Don't know how to reverse {field}")
return field
def parse_query(text):
"""Tokenize package attr_name
@ -334,16 +377,27 @@ def get_packages(evaluation, evaluation_builds):
}
)
package_attr_name_query = list(parse_query(attr_name))
package_pname = remove_attr_set(data["pname"])
package_description = data["meta"].get("description")
package_longDescription = data["meta"].get("longDescription", "")
yield dict(
type="package",
package_hydra=hydra,
package_attr_name=attr_name,
package_attr_name_query=list(parse_query(attr_name)),
package_attr_name_reverse=field_reverse(attr_name),
package_attr_name_query=package_attr_name_query,
package_attr_name_query_reverse=field_reverse(package_attr_name_query),
package_attr_set=attr_set,
package_pname=remove_attr_set(data["pname"]),
package_attr_set_reverse=field_reverse(attr_set),
package_pname=package_pname,
package_pname_reverse=field_reverse(package_pname),
package_pversion=data["version"],
package_description=data["meta"].get("description"),
package_longDescription=data["meta"].get("longDescription", ""),
package_description=package_description,
package_description_reverse=field_reverse(package_description),
package_longDescription=package_longDescription,
package_longDescription_reverse=field_reverse(package_longDescription),
package_license=licenses,
package_maintainers=maintainers,
package_platforms=[i for i in platforms if i],
@ -400,11 +454,16 @@ def get_options(evaluation):
xml_description, "html", format="docbook",
)
option_name_query = parse_query(name)
yield dict(
type="option",
option_name=name,
option_name_query=parse_query(name),
option_name_reverse=field_reverse(name),
option_name_query=option_name_query,
option_name_query_reverse=field_reverse(option_name_query),
option_description=description,
option_description_reverse=field_reverse(description),
option_type=option.get("type"),
option_default=default,
option_example=example,
@ -507,7 +566,9 @@ def run(es_url, channel, force, verbose):
evaluation_packages = get_last_evaluation(CHANNELS[channel])
evaluation_options = get_last_evaluation(CHANNELS[channel])
evaluation_packages_builds = get_evaluation_builds(evaluation_packages["id"])
evaluation_packages_builds = (
dict()
) # get_evaluation_builds(evaluation_packages["id"])
es = elasticsearch.Elasticsearch([es_url])

View file

@ -66,3 +66,18 @@ def test_parse_query(text, expected):
import import_scripts.channel
assert sorted(import_scripts.channel.parse_query(text)) == sorted(expected)
@pytest.mark.parametrize(
"field,expected",
[
("example", "elpmaxe"),
("example two", "elpmaxe owt"),
(["example", "three"], ["elpmaxe", "eerht"]),
(("example", "three"), ("elpmaxe", "eerht")),
],
)
def test_field_reverse(field, expected):
import import_scripts.channel
assert import_scripts.channel.field_reverse(field) == expected

View file

@ -288,9 +288,9 @@ makeRequest options channel query from size sort =
sort
"option"
"option_name"
[ "option_name^2.2"
, "option_name_query^2.1"
, "option_description^1."
[ ( "option_name", 2.2 )
, ( "option_name_query", 2.0 )
, ( "option_description", 1.0 )
]
)
("latest-" ++ String.fromInt options.mappingSchemaVersion ++ "-" ++ channel)

View file

@ -414,11 +414,11 @@ makeRequest options channel query from size sort =
sort
"package"
"package_attr_name"
[ "package_attr_name^2.3"
, "package_pname^2.2"
, "package_attr_name_query^2.1"
, "package_description^1."
, "package_longDescription^1."
[ ( "package_attr_name", 2.4 )
, ( "package_pname", 2.2 )
, ( "package_attr_name_query", 2.0 )
, ( "package_description", 1.2 )
, ( "package_longDescription", 1.0 )
]
)
("latest-" ++ String.fromInt options.mappingSchemaVersion ++ "-" ++ channel)

View file

@ -737,12 +737,12 @@ filter_by_type type_ =
search_fields :
String
Float
-> List String
-> List ( String, Float )
-> List (List ( String, Json.Encode.Value ))
search_fields query fields =
query
|> String.words
search_fields baseScore queryWords fields =
queryWords
|> List.reverse
|> List.indexedMap
(\queryIndex queryWord ->
@ -758,7 +758,10 @@ search_fields query fields =
)
, ( "fields"
, Json.Encode.list Json.Encode.string
(List.map (\field -> field ++ (queryIndex + 1 |> String.fromInt)) fields)
(List.map
(\( field, score ) -> field ++ "^" ++ (baseScore * (score + (0.1 * (queryIndex + 1 |> toFloat))) |> String.fromFloat))
fields
)
)
]
)
@ -773,7 +776,7 @@ makeRequestBody :
-> Sort
-> String
-> String
-> List String
-> List ( String, Float )
-> Http.Body
makeRequestBody query from sizeRaw sort type_ sortField fields =
let
@ -804,7 +807,41 @@ makeRequestBody query from sizeRaw sort type_ sortField fields =
)
, ( "must"
, Json.Encode.list Json.Encode.object
(search_fields query fields)
[ [ ( "dis_max"
, Json.Encode.object
[ ( "tie_breaker", Json.Encode.float 0.7 )
, ( "queries"
, Json.Encode.list Json.Encode.object
[ [ ( "bool"
, Json.Encode.object
[ ( "must"
, Json.Encode.list Json.Encode.object <|
search_fields
1.0
(String.words query)
fields
)
]
)
]
, [ ( "bool"
, Json.Encode.object
[ ( "must"
, Json.Encode.list Json.Encode.object <|
search_fields
0.8
(String.words query |> List.map String.reverse)
(List.map (\( field, score ) -> ( field ++ "_reverse", score )) fields)
)
]
)
]
]
)
]
)
]
]
)
]
)