Use a custom attrname analyzer (#35)

* Use unsigned boto s3 requests, 

Without this change you need s3 credentials, even though the bucket is public

* Use custom attrname analyzer

* Adapt query to new schema

Use pname/pversion to not clash with elasticsearch parsing of version
This commit is contained in:
adisbladis 2020-05-19 12:54:48 +02:00 committed by GitHub
parent 3816a7033a
commit 042cb40a8e
Failed to generate hash of commit
3 changed files with 96 additions and 56 deletions

View file

@ -11,15 +11,54 @@ import os.path
import shlex import shlex
import subprocess import subprocess
import tqdm import tqdm
import botocore.client
import botocore
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
ANALYSIS = {
'analyzer': {
'nixAttrName': {
'type': 'custom',
'tokenizer': 'nix_attrname',
'filter': ['lowercase', 'nix_stopwords'],
},
},
'tokenizer': {
'nix_attrname': {
'type': 'pattern',
# Split on attrname separators like _, .
'pattern': "|".join([
'[_.-]', # Common separators like underscores, dots and dashes
'\\d+?Packages', # python37Packages -> python
# Camelcase tokenizer adapted from
# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pattern-analyzer.html
"".join([
'(?<=[\\p{L}&&[^\\p{Lu}]])' # lower case
'(?=\\p{Lu})', # followed by upper case
'|',
'(?<=\\p{Lu})' # or upper case
'(?=\\p{Lu}[\\p{L}&&[^\\p{Lu}]])', # followed by lower case
])
])
},
},
'filter': {
'nix_stopwords': {
'type': 'stop',
'ignore_case': True,
'stopwords': ['packages', 'package', 'options', 'option'],
},
},
}
def get_last_evaluation(channel): def get_last_evaluation(channel):
project, project_version = channel.split("-", 1) project, project_version = channel.split("-", 1)
s3 = boto3.client("s3", config=botocore.client.Config(signature_version=botocore.UNSIGNED))
s3 = boto3.client("s3")
s3_result = s3.list_objects( s3_result = s3.list_objects(
Bucket="nix-releases", Bucket="nix-releases",
Prefix=f"{project}/{project_version}/", Prefix=f"{project}/{project_version}/",
@ -97,8 +136,8 @@ def get_packages(evaluation):
yield dict( yield dict(
id=attr_name, id=attr_name,
attr_name=attr_name, attr_name=attr_name,
name=data["pname"], pname=data["pname"],
version=data["version"], pversion=data["version"],
description=data["meta"].get("description"), description=data["meta"].get("description"),
longDescription=data["meta"].get("longDescription", ""), longDescription=data["meta"].get("longDescription", ""),
license=licenses, license=licenses,
@ -151,12 +190,20 @@ def recreate_index(es, channel):
es.indices.create( es.indices.create(
index=f"{channel}-packages", index=f"{channel}-packages",
body=dict( body=dict(
settings=dict(number_of_shards=1), settings=dict(number_of_shards=1, analysis=ANALYSIS),
mappings=dict( mappings=dict(
properties=dict( properties=dict(
attr_name=dict(type="keyword"), attr_name=dict(
name=dict(type="keyword"), type="text",
version=dict(type="text"), analyzer="nixAttrName",
fields={
"raw": {
"type": "keyword",
}
},
),
pname=dict(type="keyword"),
pversion=dict(type="text"),
description=dict(type="text"), description=dict(type="text"),
longDescription=dict(type="text"), longDescription=dict(type="text"),
license=dict( license=dict(
@ -186,7 +233,7 @@ def recreate_index(es, channel):
es.indices.create( es.indices.create(
index=f"{channel}-options", index=f"{channel}-options",
body=dict( body=dict(
settings=dict(number_of_shards=1), settings=dict(number_of_shards=1, analysis=ANALYSIS),
mappings=dict( mappings=dict(
properties=dict( properties=dict(
option_name=dict(type="keyword"), option_name=dict(type="keyword"),

View file

@ -442,53 +442,46 @@ makeRequestBody :
-> Int -> Int
-> Http.Body -> Http.Body
makeRequestBody field query from size = makeRequestBody field query from size =
let
stringIn name value =
[ ( name, Json.Encode.string value ) ]
objectIn name object =
[ ( name, Json.Encode.object object ) ]
in
-- Prefix Query -- Prefix Query
-- { -- {
-- ""
-- "query": { -- "query": {
-- "prefix": { -- "multi_match" : {
-- "user": { -- "query": "python37Packages.requests",
-- "value": "" -- "fields": [
-- } -- "attr_name.raw",
-- "attr_name",
-- "pname",
-- "pversion",
-- "description",
-- "longDescription"
-- ]
-- } -- }
-- } -- }
-- } Http.jsonBody
--query (Json.Encode.object
-- |> stringIn "value"
-- |> objectIn field
-- |> objectIn "prefix"
-- |> objectIn "query"
-- |> Json.Encode.object
-- |> Http.jsonBody
--
-- Wildcard Query
-- {
-- "query": {
-- "wildcard": {
-- "<field>": {
-- "value": "*<value>*",
-- }
-- }
-- }
-- }
("*" ++ query ++ "*")
|> stringIn "value"
|> objectIn field
|> objectIn "wildcard"
|> objectIn "query"
|> List.append
[ ( "from", Json.Encode.int from ) [ ( "from", Json.Encode.int from )
, ( "size", Json.Encode.int size ) , ( "size", Json.Encode.int size )
, ( "query"
, Json.Encode.object
[ ( "multi_match"
, Json.Encode.object
[ ( "query", Json.Encode.string query )
, ( "fields"
, Json.Encode.list Json.Encode.string
[ "attr_name.raw"
, "attr_name"
, "pname"
, "pversion"
, "description"
, "longDescription"
]
)
]
)
]
)
] ]
|> Json.Encode.object )
|> Http.jsonBody
makeRequest : makeRequest :

View file

@ -53,8 +53,8 @@ type alias Model =
type alias ResultItemSource = type alias ResultItemSource =
{ attr_name : String { attr_name : String
, name : String , pname : String
, version : String , pversion : String
, description : Maybe String , description : Maybe String
, longDescription : Maybe String , longDescription : Maybe String
, licenses : List ResultPackageLicense , licenses : List ResultPackageLicense
@ -163,8 +163,8 @@ viewResultItem showDetailsFor item =
in in
tr [ onClick (SearchMsg (ElasticSearch.ShowDetails item.id)) ] tr [ onClick (SearchMsg (ElasticSearch.ShowDetails item.id)) ]
[ td [] [ text item.source.attr_name ] [ td [] [ text item.source.attr_name ]
, td [] [ text item.source.name ] , td [] [ text item.source.pname ]
, td [] [ text item.source.version ] , td [] [ text item.source.pversion ]
, td [] [ text <| Maybe.withDefault "" item.source.description ] , td [] [ text <| Maybe.withDefault "" item.source.description ]
] ]
:: packageDetails :: packageDetails
@ -304,8 +304,8 @@ decodeResultItemSource : Json.Decode.Decoder ResultItemSource
decodeResultItemSource = decodeResultItemSource =
Json.Decode.succeed ResultItemSource Json.Decode.succeed ResultItemSource
|> Json.Decode.Pipeline.required "attr_name" Json.Decode.string |> Json.Decode.Pipeline.required "attr_name" Json.Decode.string
|> Json.Decode.Pipeline.required "name" Json.Decode.string |> Json.Decode.Pipeline.required "pname" Json.Decode.string
|> Json.Decode.Pipeline.required "version" Json.Decode.string |> Json.Decode.Pipeline.required "pversion" Json.Decode.string
|> Json.Decode.Pipeline.required "description" (Json.Decode.nullable Json.Decode.string) |> Json.Decode.Pipeline.required "description" (Json.Decode.nullable Json.Decode.string)
|> Json.Decode.Pipeline.required "longDescription" (Json.Decode.nullable Json.Decode.string) |> Json.Decode.Pipeline.required "longDescription" (Json.Decode.nullable Json.Decode.string)
|> Json.Decode.Pipeline.required "license" (Json.Decode.list decodeResultPackageLicense) |> Json.Decode.Pipeline.required "license" (Json.Decode.list decodeResultPackageLicense)