Use a custom attrname analyzer (#35)
* Use unsigned boto s3 requests, Without this change you need s3 credentials, even though the bucket is public * Use custom attrname analyzer * Adapt query to new schema Use pname/pversion to not clash with elasticsearch parsing of version
This commit is contained in:
parent
3816a7033a
commit
042cb40a8e
|
@ -11,15 +11,54 @@ import os.path
|
|||
import shlex
|
||||
import subprocess
|
||||
import tqdm
|
||||
import botocore.client
|
||||
import botocore
|
||||
|
||||
|
||||
|
||||
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
ANALYSIS = {
|
||||
'analyzer': {
|
||||
'nixAttrName': {
|
||||
'type': 'custom',
|
||||
'tokenizer': 'nix_attrname',
|
||||
'filter': ['lowercase', 'nix_stopwords'],
|
||||
},
|
||||
},
|
||||
'tokenizer': {
|
||||
'nix_attrname': {
|
||||
'type': 'pattern',
|
||||
# Split on attrname separators like _, .
|
||||
'pattern': "|".join([
|
||||
'[_.-]', # Common separators like underscores, dots and dashes
|
||||
'\\d+?Packages', # python37Packages -> python
|
||||
# Camelcase tokenizer adapted from
|
||||
# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pattern-analyzer.html
|
||||
"".join([
|
||||
'(?<=[\\p{L}&&[^\\p{Lu}]])' # lower case
|
||||
'(?=\\p{Lu})', # followed by upper case
|
||||
'|',
|
||||
'(?<=\\p{Lu})' # or upper case
|
||||
'(?=\\p{Lu}[\\p{L}&&[^\\p{Lu}]])', # followed by lower case
|
||||
])
|
||||
])
|
||||
},
|
||||
},
|
||||
'filter': {
|
||||
'nix_stopwords': {
|
||||
'type': 'stop',
|
||||
'ignore_case': True,
|
||||
'stopwords': ['packages', 'package', 'options', 'option'],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def get_last_evaluation(channel):
|
||||
project, project_version = channel.split("-", 1)
|
||||
|
||||
s3 = boto3.client("s3")
|
||||
s3 = boto3.client("s3", config=botocore.client.Config(signature_version=botocore.UNSIGNED))
|
||||
s3_result = s3.list_objects(
|
||||
Bucket="nix-releases",
|
||||
Prefix=f"{project}/{project_version}/",
|
||||
|
@ -64,9 +103,9 @@ def get_packages(evaluation):
|
|||
licenses = data["meta"].get("license")
|
||||
if licenses:
|
||||
if type(licenses) == str:
|
||||
licenses = [dict(fullName=licenses)]
|
||||
licenses = [dict(fullName=licenses)]
|
||||
elif type(licenses) == dict:
|
||||
licenses = [licenses]
|
||||
licenses = [licenses]
|
||||
licenses = [
|
||||
type(license) == str
|
||||
and dict(fullName=license, url=None)
|
||||
|
@ -97,8 +136,8 @@ def get_packages(evaluation):
|
|||
yield dict(
|
||||
id=attr_name,
|
||||
attr_name=attr_name,
|
||||
name=data["pname"],
|
||||
version=data["version"],
|
||||
pname=data["pname"],
|
||||
pversion=data["version"],
|
||||
description=data["meta"].get("description"),
|
||||
longDescription=data["meta"].get("longDescription", ""),
|
||||
license=licenses,
|
||||
|
@ -151,12 +190,20 @@ def recreate_index(es, channel):
|
|||
es.indices.create(
|
||||
index=f"{channel}-packages",
|
||||
body=dict(
|
||||
settings=dict(number_of_shards=1),
|
||||
settings=dict(number_of_shards=1, analysis=ANALYSIS),
|
||||
mappings=dict(
|
||||
properties=dict(
|
||||
attr_name=dict(type="keyword"),
|
||||
name=dict(type="keyword"),
|
||||
version=dict(type="text"),
|
||||
attr_name=dict(
|
||||
type="text",
|
||||
analyzer="nixAttrName",
|
||||
fields={
|
||||
"raw": {
|
||||
"type": "keyword",
|
||||
}
|
||||
},
|
||||
),
|
||||
pname=dict(type="keyword"),
|
||||
pversion=dict(type="text"),
|
||||
description=dict(type="text"),
|
||||
longDescription=dict(type="text"),
|
||||
license=dict(
|
||||
|
@ -186,7 +233,7 @@ def recreate_index(es, channel):
|
|||
es.indices.create(
|
||||
index=f"{channel}-options",
|
||||
body=dict(
|
||||
settings=dict(number_of_shards=1),
|
||||
settings=dict(number_of_shards=1, analysis=ANALYSIS),
|
||||
mappings=dict(
|
||||
properties=dict(
|
||||
option_name=dict(type="keyword"),
|
||||
|
|
|
@ -442,53 +442,46 @@ makeRequestBody :
|
|||
-> Int
|
||||
-> Http.Body
|
||||
makeRequestBody field query from size =
|
||||
let
|
||||
stringIn name value =
|
||||
[ ( name, Json.Encode.string value ) ]
|
||||
|
||||
objectIn name object =
|
||||
[ ( name, Json.Encode.object object ) ]
|
||||
in
|
||||
-- Prefix Query
|
||||
-- {
|
||||
-- ""
|
||||
-- "query": {
|
||||
-- "prefix": {
|
||||
-- "user": {
|
||||
-- "value": ""
|
||||
-- }
|
||||
-- "multi_match" : {
|
||||
-- "query": "python37Packages.requests",
|
||||
-- "fields": [
|
||||
-- "attr_name.raw",
|
||||
-- "attr_name",
|
||||
-- "pname",
|
||||
-- "pversion",
|
||||
-- "description",
|
||||
-- "longDescription"
|
||||
-- ]
|
||||
-- }
|
||||
-- }
|
||||
-- }
|
||||
--query
|
||||
-- |> stringIn "value"
|
||||
-- |> objectIn field
|
||||
-- |> objectIn "prefix"
|
||||
-- |> objectIn "query"
|
||||
-- |> Json.Encode.object
|
||||
-- |> Http.jsonBody
|
||||
--
|
||||
-- Wildcard Query
|
||||
-- {
|
||||
-- "query": {
|
||||
-- "wildcard": {
|
||||
-- "<field>": {
|
||||
-- "value": "*<value>*",
|
||||
-- }
|
||||
-- }
|
||||
-- }
|
||||
-- }
|
||||
("*" ++ query ++ "*")
|
||||
|> stringIn "value"
|
||||
|> objectIn field
|
||||
|> objectIn "wildcard"
|
||||
|> objectIn "query"
|
||||
|> List.append
|
||||
Http.jsonBody
|
||||
(Json.Encode.object
|
||||
[ ( "from", Json.Encode.int from )
|
||||
, ( "size", Json.Encode.int size )
|
||||
, ( "query"
|
||||
, Json.Encode.object
|
||||
[ ( "multi_match"
|
||||
, Json.Encode.object
|
||||
[ ( "query", Json.Encode.string query )
|
||||
, ( "fields"
|
||||
, Json.Encode.list Json.Encode.string
|
||||
[ "attr_name.raw"
|
||||
, "attr_name"
|
||||
, "pname"
|
||||
, "pversion"
|
||||
, "description"
|
||||
, "longDescription"
|
||||
]
|
||||
)
|
||||
]
|
||||
)
|
||||
]
|
||||
)
|
||||
]
|
||||
|> Json.Encode.object
|
||||
|> Http.jsonBody
|
||||
)
|
||||
|
||||
|
||||
makeRequest :
|
||||
|
|
|
@ -53,8 +53,8 @@ type alias Model =
|
|||
|
||||
type alias ResultItemSource =
|
||||
{ attr_name : String
|
||||
, name : String
|
||||
, version : String
|
||||
, pname : String
|
||||
, pversion : String
|
||||
, description : Maybe String
|
||||
, longDescription : Maybe String
|
||||
, licenses : List ResultPackageLicense
|
||||
|
@ -163,8 +163,8 @@ viewResultItem showDetailsFor item =
|
|||
in
|
||||
tr [ onClick (SearchMsg (ElasticSearch.ShowDetails item.id)) ]
|
||||
[ td [] [ text item.source.attr_name ]
|
||||
, td [] [ text item.source.name ]
|
||||
, td [] [ text item.source.version ]
|
||||
, td [] [ text item.source.pname ]
|
||||
, td [] [ text item.source.pversion ]
|
||||
, td [] [ text <| Maybe.withDefault "" item.source.description ]
|
||||
]
|
||||
:: packageDetails
|
||||
|
@ -304,8 +304,8 @@ decodeResultItemSource : Json.Decode.Decoder ResultItemSource
|
|||
decodeResultItemSource =
|
||||
Json.Decode.succeed ResultItemSource
|
||||
|> Json.Decode.Pipeline.required "attr_name" Json.Decode.string
|
||||
|> Json.Decode.Pipeline.required "name" Json.Decode.string
|
||||
|> Json.Decode.Pipeline.required "version" Json.Decode.string
|
||||
|> Json.Decode.Pipeline.required "pname" Json.Decode.string
|
||||
|> Json.Decode.Pipeline.required "pversion" Json.Decode.string
|
||||
|> Json.Decode.Pipeline.required "description" (Json.Decode.nullable Json.Decode.string)
|
||||
|> Json.Decode.Pipeline.required "longDescription" (Json.Decode.nullable Json.Decode.string)
|
||||
|> Json.Decode.Pipeline.required "license" (Json.Decode.list decodeResultPackageLicense)
|
||||
|
|
Loading…
Reference in a new issue