Use a custom attrname analyzer (#35)
* Use unsigned boto s3 requests, Without this change you need s3 credentials, even though the bucket is public * Use custom attrname analyzer * Adapt query to new schema Use pname/pversion to not clash with elasticsearch parsing of version
This commit is contained in:
parent
3816a7033a
commit
042cb40a8e
|
@ -11,15 +11,54 @@ import os.path
|
||||||
import shlex
|
import shlex
|
||||||
import subprocess
|
import subprocess
|
||||||
import tqdm
|
import tqdm
|
||||||
|
import botocore.client
|
||||||
|
import botocore
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
|
||||||
|
ANALYSIS = {
|
||||||
|
'analyzer': {
|
||||||
|
'nixAttrName': {
|
||||||
|
'type': 'custom',
|
||||||
|
'tokenizer': 'nix_attrname',
|
||||||
|
'filter': ['lowercase', 'nix_stopwords'],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'tokenizer': {
|
||||||
|
'nix_attrname': {
|
||||||
|
'type': 'pattern',
|
||||||
|
# Split on attrname separators like _, .
|
||||||
|
'pattern': "|".join([
|
||||||
|
'[_.-]', # Common separators like underscores, dots and dashes
|
||||||
|
'\\d+?Packages', # python37Packages -> python
|
||||||
|
# Camelcase tokenizer adapted from
|
||||||
|
# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pattern-analyzer.html
|
||||||
|
"".join([
|
||||||
|
'(?<=[\\p{L}&&[^\\p{Lu}]])' # lower case
|
||||||
|
'(?=\\p{Lu})', # followed by upper case
|
||||||
|
'|',
|
||||||
|
'(?<=\\p{Lu})' # or upper case
|
||||||
|
'(?=\\p{Lu}[\\p{L}&&[^\\p{Lu}]])', # followed by lower case
|
||||||
|
])
|
||||||
|
])
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'filter': {
|
||||||
|
'nix_stopwords': {
|
||||||
|
'type': 'stop',
|
||||||
|
'ignore_case': True,
|
||||||
|
'stopwords': ['packages', 'package', 'options', 'option'],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_last_evaluation(channel):
|
def get_last_evaluation(channel):
|
||||||
project, project_version = channel.split("-", 1)
|
project, project_version = channel.split("-", 1)
|
||||||
|
s3 = boto3.client("s3", config=botocore.client.Config(signature_version=botocore.UNSIGNED))
|
||||||
s3 = boto3.client("s3")
|
|
||||||
s3_result = s3.list_objects(
|
s3_result = s3.list_objects(
|
||||||
Bucket="nix-releases",
|
Bucket="nix-releases",
|
||||||
Prefix=f"{project}/{project_version}/",
|
Prefix=f"{project}/{project_version}/",
|
||||||
|
@ -97,8 +136,8 @@ def get_packages(evaluation):
|
||||||
yield dict(
|
yield dict(
|
||||||
id=attr_name,
|
id=attr_name,
|
||||||
attr_name=attr_name,
|
attr_name=attr_name,
|
||||||
name=data["pname"],
|
pname=data["pname"],
|
||||||
version=data["version"],
|
pversion=data["version"],
|
||||||
description=data["meta"].get("description"),
|
description=data["meta"].get("description"),
|
||||||
longDescription=data["meta"].get("longDescription", ""),
|
longDescription=data["meta"].get("longDescription", ""),
|
||||||
license=licenses,
|
license=licenses,
|
||||||
|
@ -151,12 +190,20 @@ def recreate_index(es, channel):
|
||||||
es.indices.create(
|
es.indices.create(
|
||||||
index=f"{channel}-packages",
|
index=f"{channel}-packages",
|
||||||
body=dict(
|
body=dict(
|
||||||
settings=dict(number_of_shards=1),
|
settings=dict(number_of_shards=1, analysis=ANALYSIS),
|
||||||
mappings=dict(
|
mappings=dict(
|
||||||
properties=dict(
|
properties=dict(
|
||||||
attr_name=dict(type="keyword"),
|
attr_name=dict(
|
||||||
name=dict(type="keyword"),
|
type="text",
|
||||||
version=dict(type="text"),
|
analyzer="nixAttrName",
|
||||||
|
fields={
|
||||||
|
"raw": {
|
||||||
|
"type": "keyword",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
),
|
||||||
|
pname=dict(type="keyword"),
|
||||||
|
pversion=dict(type="text"),
|
||||||
description=dict(type="text"),
|
description=dict(type="text"),
|
||||||
longDescription=dict(type="text"),
|
longDescription=dict(type="text"),
|
||||||
license=dict(
|
license=dict(
|
||||||
|
@ -186,7 +233,7 @@ def recreate_index(es, channel):
|
||||||
es.indices.create(
|
es.indices.create(
|
||||||
index=f"{channel}-options",
|
index=f"{channel}-options",
|
||||||
body=dict(
|
body=dict(
|
||||||
settings=dict(number_of_shards=1),
|
settings=dict(number_of_shards=1, analysis=ANALYSIS),
|
||||||
mappings=dict(
|
mappings=dict(
|
||||||
properties=dict(
|
properties=dict(
|
||||||
option_name=dict(type="keyword"),
|
option_name=dict(type="keyword"),
|
||||||
|
|
|
@ -442,53 +442,46 @@ makeRequestBody :
|
||||||
-> Int
|
-> Int
|
||||||
-> Http.Body
|
-> Http.Body
|
||||||
makeRequestBody field query from size =
|
makeRequestBody field query from size =
|
||||||
let
|
|
||||||
stringIn name value =
|
|
||||||
[ ( name, Json.Encode.string value ) ]
|
|
||||||
|
|
||||||
objectIn name object =
|
|
||||||
[ ( name, Json.Encode.object object ) ]
|
|
||||||
in
|
|
||||||
-- Prefix Query
|
-- Prefix Query
|
||||||
-- {
|
-- {
|
||||||
-- ""
|
|
||||||
-- "query": {
|
-- "query": {
|
||||||
-- "prefix": {
|
-- "multi_match" : {
|
||||||
-- "user": {
|
-- "query": "python37Packages.requests",
|
||||||
-- "value": ""
|
-- "fields": [
|
||||||
|
-- "attr_name.raw",
|
||||||
|
-- "attr_name",
|
||||||
|
-- "pname",
|
||||||
|
-- "pversion",
|
||||||
|
-- "description",
|
||||||
|
-- "longDescription"
|
||||||
|
-- ]
|
||||||
-- }
|
-- }
|
||||||
-- }
|
-- }
|
||||||
-- }
|
Http.jsonBody
|
||||||
-- }
|
(Json.Encode.object
|
||||||
--query
|
|
||||||
-- |> stringIn "value"
|
|
||||||
-- |> objectIn field
|
|
||||||
-- |> objectIn "prefix"
|
|
||||||
-- |> objectIn "query"
|
|
||||||
-- |> Json.Encode.object
|
|
||||||
-- |> Http.jsonBody
|
|
||||||
--
|
|
||||||
-- Wildcard Query
|
|
||||||
-- {
|
|
||||||
-- "query": {
|
|
||||||
-- "wildcard": {
|
|
||||||
-- "<field>": {
|
|
||||||
-- "value": "*<value>*",
|
|
||||||
-- }
|
|
||||||
-- }
|
|
||||||
-- }
|
|
||||||
-- }
|
|
||||||
("*" ++ query ++ "*")
|
|
||||||
|> stringIn "value"
|
|
||||||
|> objectIn field
|
|
||||||
|> objectIn "wildcard"
|
|
||||||
|> objectIn "query"
|
|
||||||
|> List.append
|
|
||||||
[ ( "from", Json.Encode.int from )
|
[ ( "from", Json.Encode.int from )
|
||||||
, ( "size", Json.Encode.int size )
|
, ( "size", Json.Encode.int size )
|
||||||
|
, ( "query"
|
||||||
|
, Json.Encode.object
|
||||||
|
[ ( "multi_match"
|
||||||
|
, Json.Encode.object
|
||||||
|
[ ( "query", Json.Encode.string query )
|
||||||
|
, ( "fields"
|
||||||
|
, Json.Encode.list Json.Encode.string
|
||||||
|
[ "attr_name.raw"
|
||||||
|
, "attr_name"
|
||||||
|
, "pname"
|
||||||
|
, "pversion"
|
||||||
|
, "description"
|
||||||
|
, "longDescription"
|
||||||
]
|
]
|
||||||
|> Json.Encode.object
|
)
|
||||||
|> Http.jsonBody
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
makeRequest :
|
makeRequest :
|
||||||
|
|
|
@ -53,8 +53,8 @@ type alias Model =
|
||||||
|
|
||||||
type alias ResultItemSource =
|
type alias ResultItemSource =
|
||||||
{ attr_name : String
|
{ attr_name : String
|
||||||
, name : String
|
, pname : String
|
||||||
, version : String
|
, pversion : String
|
||||||
, description : Maybe String
|
, description : Maybe String
|
||||||
, longDescription : Maybe String
|
, longDescription : Maybe String
|
||||||
, licenses : List ResultPackageLicense
|
, licenses : List ResultPackageLicense
|
||||||
|
@ -163,8 +163,8 @@ viewResultItem showDetailsFor item =
|
||||||
in
|
in
|
||||||
tr [ onClick (SearchMsg (ElasticSearch.ShowDetails item.id)) ]
|
tr [ onClick (SearchMsg (ElasticSearch.ShowDetails item.id)) ]
|
||||||
[ td [] [ text item.source.attr_name ]
|
[ td [] [ text item.source.attr_name ]
|
||||||
, td [] [ text item.source.name ]
|
, td [] [ text item.source.pname ]
|
||||||
, td [] [ text item.source.version ]
|
, td [] [ text item.source.pversion ]
|
||||||
, td [] [ text <| Maybe.withDefault "" item.source.description ]
|
, td [] [ text <| Maybe.withDefault "" item.source.description ]
|
||||||
]
|
]
|
||||||
:: packageDetails
|
:: packageDetails
|
||||||
|
@ -304,8 +304,8 @@ decodeResultItemSource : Json.Decode.Decoder ResultItemSource
|
||||||
decodeResultItemSource =
|
decodeResultItemSource =
|
||||||
Json.Decode.succeed ResultItemSource
|
Json.Decode.succeed ResultItemSource
|
||||||
|> Json.Decode.Pipeline.required "attr_name" Json.Decode.string
|
|> Json.Decode.Pipeline.required "attr_name" Json.Decode.string
|
||||||
|> Json.Decode.Pipeline.required "name" Json.Decode.string
|
|> Json.Decode.Pipeline.required "pname" Json.Decode.string
|
||||||
|> Json.Decode.Pipeline.required "version" Json.Decode.string
|
|> Json.Decode.Pipeline.required "pversion" Json.Decode.string
|
||||||
|> Json.Decode.Pipeline.required "description" (Json.Decode.nullable Json.Decode.string)
|
|> Json.Decode.Pipeline.required "description" (Json.Decode.nullable Json.Decode.string)
|
||||||
|> Json.Decode.Pipeline.required "longDescription" (Json.Decode.nullable Json.Decode.string)
|
|> Json.Decode.Pipeline.required "longDescription" (Json.Decode.nullable Json.Decode.string)
|
||||||
|> Json.Decode.Pipeline.required "license" (Json.Decode.list decodeResultPackageLicense)
|
|> Json.Decode.Pipeline.required "license" (Json.Decode.list decodeResultPackageLicense)
|
||||||
|
|
Loading…
Reference in a new issue