parent
2ffaefeba0
commit
c12f4dfb6c
|
@ -66,12 +66,21 @@ MAPPING = {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"package_attr_name": {"type": "keyword", "normalizer": "lowercase"},
|
"package_attr_name": {"type": "keyword", "normalizer": "lowercase"},
|
||||||
|
"package_attr_name_reverse": {"type": "keyword", "normalizer": "lowercase"},
|
||||||
"package_attr_name_query": {"type": "keyword", "normalizer": "lowercase"},
|
"package_attr_name_query": {"type": "keyword", "normalizer": "lowercase"},
|
||||||
|
"package_attr_name_query_reverse": {
|
||||||
|
"type": "keyword",
|
||||||
|
"normalizer": "lowercase",
|
||||||
|
},
|
||||||
"package_attr_set": {"type": "keyword", "normalizer": "lowercase"},
|
"package_attr_set": {"type": "keyword", "normalizer": "lowercase"},
|
||||||
|
"package_attr_set_reverse": {"type": "keyword", "normalizer": "lowercase"},
|
||||||
"package_pname": {"type": "keyword", "normalizer": "lowercase"},
|
"package_pname": {"type": "keyword", "normalizer": "lowercase"},
|
||||||
|
"package_pname_reverse": {"type": "keyword", "normalizer": "lowercase"},
|
||||||
"package_pversion": {"type": "keyword"},
|
"package_pversion": {"type": "keyword"},
|
||||||
"package_description": {"type": "text", "analyzer": "english"},
|
"package_description": {"type": "text", "analyzer": "english"},
|
||||||
|
"package_description_reverse": {"type": "text", "analyzer": "english"},
|
||||||
"package_longDescription": {"type": "text", "analyzer": "english"},
|
"package_longDescription": {"type": "text", "analyzer": "english"},
|
||||||
|
"package_longDescription_reverse": {"type": "text", "analyzer": "english"},
|
||||||
"package_license": {
|
"package_license": {
|
||||||
"type": "nested",
|
"type": "nested",
|
||||||
"properties": {"fullName": {"type": "text"}, "url": {"type": "text"}},
|
"properties": {"fullName": {"type": "text"}, "url": {"type": "text"}},
|
||||||
|
@ -90,8 +99,11 @@ MAPPING = {
|
||||||
"package_system": {"type": "keyword"},
|
"package_system": {"type": "keyword"},
|
||||||
# Options fields
|
# Options fields
|
||||||
"option_name": {"type": "keyword", "normalizer": "lowercase"},
|
"option_name": {"type": "keyword", "normalizer": "lowercase"},
|
||||||
|
"option_name_reverse": {"type": "keyword", "normalizer": "lowercase"},
|
||||||
"option_name_query": {"type": "keyword", "normalizer": "lowercase"},
|
"option_name_query": {"type": "keyword", "normalizer": "lowercase"},
|
||||||
|
"option_name_query_reverse": {"type": "keyword", "normalizer": "lowercase"},
|
||||||
"option_description": {"type": "text", "analyzer": "english"},
|
"option_description": {"type": "text", "analyzer": "english"},
|
||||||
|
"option_description_reverse": {"type": "text", "analyzer": "english"},
|
||||||
"option_type": {"type": "keyword"},
|
"option_type": {"type": "keyword"},
|
||||||
"option_default": {"type": "text"},
|
"option_default": {"type": "text"},
|
||||||
"option_example": {"type": "text"},
|
"option_example": {"type": "text"},
|
||||||
|
@ -100,6 +112,37 @@ MAPPING = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# def field_reverse_str(field):
|
||||||
|
|
||||||
|
|
||||||
|
def string_reverse(text):
|
||||||
|
return text[::-1]
|
||||||
|
|
||||||
|
|
||||||
|
def field_reverse(field):
|
||||||
|
|
||||||
|
if isinstance(field, str):
|
||||||
|
|
||||||
|
if " " in field:
|
||||||
|
field = " ".join(map(field_reverse, field.split(" ")))
|
||||||
|
else:
|
||||||
|
field = string_reverse(field)
|
||||||
|
|
||||||
|
elif isinstance(field, list):
|
||||||
|
field = list(map(field_reverse, field))
|
||||||
|
|
||||||
|
elif isinstance(field, tuple):
|
||||||
|
field = tuple(map(field_reverse, field))
|
||||||
|
|
||||||
|
elif field is None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(f"Don't know how to reverse {field}")
|
||||||
|
|
||||||
|
return field
|
||||||
|
|
||||||
|
|
||||||
def parse_query(text):
|
def parse_query(text):
|
||||||
"""Tokenize package attr_name
|
"""Tokenize package attr_name
|
||||||
|
|
||||||
|
@ -334,16 +377,27 @@ def get_packages(evaluation, evaluation_builds):
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
package_attr_name_query = list(parse_query(attr_name))
|
||||||
|
package_pname = remove_attr_set(data["pname"])
|
||||||
|
package_description = data["meta"].get("description")
|
||||||
|
package_longDescription = data["meta"].get("longDescription", "")
|
||||||
|
|
||||||
yield dict(
|
yield dict(
|
||||||
type="package",
|
type="package",
|
||||||
package_hydra=hydra,
|
package_hydra=hydra,
|
||||||
package_attr_name=attr_name,
|
package_attr_name=attr_name,
|
||||||
package_attr_name_query=list(parse_query(attr_name)),
|
package_attr_name_reverse=field_reverse(attr_name),
|
||||||
|
package_attr_name_query=package_attr_name_query,
|
||||||
|
package_attr_name_query_reverse=field_reverse(package_attr_name_query),
|
||||||
package_attr_set=attr_set,
|
package_attr_set=attr_set,
|
||||||
package_pname=remove_attr_set(data["pname"]),
|
package_attr_set_reverse=field_reverse(attr_set),
|
||||||
|
package_pname=package_pname,
|
||||||
|
package_pname_reverse=field_reverse(package_pname),
|
||||||
package_pversion=data["version"],
|
package_pversion=data["version"],
|
||||||
package_description=data["meta"].get("description"),
|
package_description=package_description,
|
||||||
package_longDescription=data["meta"].get("longDescription", ""),
|
package_description_reverse=field_reverse(package_description),
|
||||||
|
package_longDescription=package_longDescription,
|
||||||
|
package_longDescription_reverse=field_reverse(package_longDescription),
|
||||||
package_license=licenses,
|
package_license=licenses,
|
||||||
package_maintainers=maintainers,
|
package_maintainers=maintainers,
|
||||||
package_platforms=[i for i in platforms if i],
|
package_platforms=[i for i in platforms if i],
|
||||||
|
@ -400,11 +454,16 @@ def get_options(evaluation):
|
||||||
xml_description, "html", format="docbook",
|
xml_description, "html", format="docbook",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
option_name_query = parse_query(name)
|
||||||
|
|
||||||
yield dict(
|
yield dict(
|
||||||
type="option",
|
type="option",
|
||||||
option_name=name,
|
option_name=name,
|
||||||
option_name_query=parse_query(name),
|
option_name_reverse=field_reverse(name),
|
||||||
|
option_name_query=option_name_query,
|
||||||
|
option_name_query_reverse=field_reverse(option_name_query),
|
||||||
option_description=description,
|
option_description=description,
|
||||||
|
option_description_reverse=field_reverse(description),
|
||||||
option_type=option.get("type"),
|
option_type=option.get("type"),
|
||||||
option_default=default,
|
option_default=default,
|
||||||
option_example=example,
|
option_example=example,
|
||||||
|
@ -507,7 +566,9 @@ def run(es_url, channel, force, verbose):
|
||||||
|
|
||||||
evaluation_packages = get_last_evaluation(CHANNELS[channel])
|
evaluation_packages = get_last_evaluation(CHANNELS[channel])
|
||||||
evaluation_options = get_last_evaluation(CHANNELS[channel])
|
evaluation_options = get_last_evaluation(CHANNELS[channel])
|
||||||
evaluation_packages_builds = get_evaluation_builds(evaluation_packages["id"])
|
evaluation_packages_builds = (
|
||||||
|
dict()
|
||||||
|
) # get_evaluation_builds(evaluation_packages["id"])
|
||||||
|
|
||||||
es = elasticsearch.Elasticsearch([es_url])
|
es = elasticsearch.Elasticsearch([es_url])
|
||||||
|
|
||||||
|
|
|
@ -66,3 +66,18 @@ def test_parse_query(text, expected):
|
||||||
import import_scripts.channel
|
import import_scripts.channel
|
||||||
|
|
||||||
assert sorted(import_scripts.channel.parse_query(text)) == sorted(expected)
|
assert sorted(import_scripts.channel.parse_query(text)) == sorted(expected)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"field,expected",
|
||||||
|
[
|
||||||
|
("example", "elpmaxe"),
|
||||||
|
("example two", "elpmaxe owt"),
|
||||||
|
(["example", "three"], ["elpmaxe", "eerht"]),
|
||||||
|
(("example", "three"), ("elpmaxe", "eerht")),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_field_reverse(field, expected):
|
||||||
|
import import_scripts.channel
|
||||||
|
|
||||||
|
assert import_scripts.channel.field_reverse(field) == expected
|
||||||
|
|
|
@ -288,9 +288,9 @@ makeRequest options channel query from size sort =
|
||||||
sort
|
sort
|
||||||
"option"
|
"option"
|
||||||
"option_name"
|
"option_name"
|
||||||
[ "option_name^2.2"
|
[ ( "option_name", 2.2 )
|
||||||
, "option_name_query^2.1"
|
, ( "option_name_query", 2.0 )
|
||||||
, "option_description^1."
|
, ( "option_description", 1.0 )
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
("latest-" ++ String.fromInt options.mappingSchemaVersion ++ "-" ++ channel)
|
("latest-" ++ String.fromInt options.mappingSchemaVersion ++ "-" ++ channel)
|
||||||
|
|
|
@ -414,11 +414,11 @@ makeRequest options channel query from size sort =
|
||||||
sort
|
sort
|
||||||
"package"
|
"package"
|
||||||
"package_attr_name"
|
"package_attr_name"
|
||||||
[ "package_attr_name^2.3"
|
[ ( "package_attr_name", 2.4 )
|
||||||
, "package_pname^2.2"
|
, ( "package_pname", 2.2 )
|
||||||
, "package_attr_name_query^2.1"
|
, ( "package_attr_name_query", 2.0 )
|
||||||
, "package_description^1."
|
, ( "package_description", 1.2 )
|
||||||
, "package_longDescription^1."
|
, ( "package_longDescription", 1.0 )
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
("latest-" ++ String.fromInt options.mappingSchemaVersion ++ "-" ++ channel)
|
("latest-" ++ String.fromInt options.mappingSchemaVersion ++ "-" ++ channel)
|
||||||
|
|
|
@ -737,12 +737,12 @@ filter_by_type type_ =
|
||||||
|
|
||||||
|
|
||||||
search_fields :
|
search_fields :
|
||||||
String
|
Float
|
||||||
-> List String
|
-> List String
|
||||||
|
-> List ( String, Float )
|
||||||
-> List (List ( String, Json.Encode.Value ))
|
-> List (List ( String, Json.Encode.Value ))
|
||||||
search_fields query fields =
|
search_fields baseScore queryWords fields =
|
||||||
query
|
queryWords
|
||||||
|> String.words
|
|
||||||
|> List.reverse
|
|> List.reverse
|
||||||
|> List.indexedMap
|
|> List.indexedMap
|
||||||
(\queryIndex queryWord ->
|
(\queryIndex queryWord ->
|
||||||
|
@ -758,7 +758,10 @@ search_fields query fields =
|
||||||
)
|
)
|
||||||
, ( "fields"
|
, ( "fields"
|
||||||
, Json.Encode.list Json.Encode.string
|
, Json.Encode.list Json.Encode.string
|
||||||
(List.map (\field -> field ++ (queryIndex + 1 |> String.fromInt)) fields)
|
(List.map
|
||||||
|
(\( field, score ) -> field ++ "^" ++ (baseScore * (score + (0.1 * (queryIndex + 1 |> toFloat))) |> String.fromFloat))
|
||||||
|
fields
|
||||||
|
)
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
@ -773,7 +776,7 @@ makeRequestBody :
|
||||||
-> Sort
|
-> Sort
|
||||||
-> String
|
-> String
|
||||||
-> String
|
-> String
|
||||||
-> List String
|
-> List ( String, Float )
|
||||||
-> Http.Body
|
-> Http.Body
|
||||||
makeRequestBody query from sizeRaw sort type_ sortField fields =
|
makeRequestBody query from sizeRaw sort type_ sortField fields =
|
||||||
let
|
let
|
||||||
|
@ -804,7 +807,41 @@ makeRequestBody query from sizeRaw sort type_ sortField fields =
|
||||||
)
|
)
|
||||||
, ( "must"
|
, ( "must"
|
||||||
, Json.Encode.list Json.Encode.object
|
, Json.Encode.list Json.Encode.object
|
||||||
(search_fields query fields)
|
[ [ ( "dis_max"
|
||||||
|
, Json.Encode.object
|
||||||
|
[ ( "tie_breaker", Json.Encode.float 0.7 )
|
||||||
|
, ( "queries"
|
||||||
|
, Json.Encode.list Json.Encode.object
|
||||||
|
[ [ ( "bool"
|
||||||
|
, Json.Encode.object
|
||||||
|
[ ( "must"
|
||||||
|
, Json.Encode.list Json.Encode.object <|
|
||||||
|
search_fields
|
||||||
|
1.0
|
||||||
|
(String.words query)
|
||||||
|
fields
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
, [ ( "bool"
|
||||||
|
, Json.Encode.object
|
||||||
|
[ ( "must"
|
||||||
|
, Json.Encode.list Json.Encode.object <|
|
||||||
|
search_fields
|
||||||
|
0.8
|
||||||
|
(String.words query |> List.map String.reverse)
|
||||||
|
(List.map (\( field, score ) -> ( field ++ "_reverse", score )) fields)
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
]
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in a new issue