Format and link import script (#44)
* shorten name for import script * format import script with black and lint with flake8
This commit is contained in:
parent
2868805c2d
commit
4629bc39cc
|
@ -1,6 +1,14 @@
|
||||||
#! /usr/bin/env nix-shell
|
#! /usr/bin/env nix-shell
|
||||||
#! nix-shell -i python3 -p python3 python3Packages.click python3Packages.elasticsearch python3Packages.boto3 python3Packages.tqdm
|
#! nix-shell -i python3 -p python3 python3Packages.click python3Packages.click-log python3Packages.elasticsearch python3Packages.boto3 python3Packages.tqdm
|
||||||
|
|
||||||
|
# develop:
|
||||||
|
# $ nix-shell -p python3Packages.black python3Packages.mypy python3Packages.flake8
|
||||||
|
#
|
||||||
|
# format:
|
||||||
|
# $ nix-shell -p python3Packages.black --command "black import-channel"
|
||||||
|
#
|
||||||
|
# lint:
|
||||||
|
# $ nix-shell -p python3Packages.flake8 --command "flake8 --ignore E501,E265 import-channel"
|
||||||
|
|
||||||
import boto3
|
import boto3
|
||||||
import click
|
import click
|
||||||
|
@ -15,42 +23,45 @@ import botocore.client
|
||||||
import botocore
|
import botocore
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
|
||||||
ANALYSIS = {
|
ANALYSIS = {
|
||||||
'analyzer': {
|
"analyzer": {
|
||||||
'nixAttrName': {
|
"nixAttrName": {
|
||||||
'type': 'custom',
|
"type": "custom",
|
||||||
'tokenizer': 'nix_attrname',
|
"tokenizer": "nix_attrname",
|
||||||
'filter': ['lowercase', 'nix_stopwords'],
|
"filter": ["lowercase", "nix_stopwords"],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
'tokenizer': {
|
"tokenizer": {
|
||||||
'nix_attrname': {
|
"nix_attrname": {
|
||||||
'type': 'pattern',
|
"type": "pattern",
|
||||||
# Split on attrname separators like _, .
|
# Split on attrname separators like _, .
|
||||||
'pattern': "|".join([
|
"pattern": "|".join(
|
||||||
'[_.-]', # Common separators like underscores, dots and dashes
|
[
|
||||||
'\\d+?Packages', # python37Packages -> python
|
"[_.-]", # Common separators like underscores, dots and dashes
|
||||||
|
"\\d+?Packages", # python37Packages -> python
|
||||||
# Camelcase tokenizer adapted from
|
# Camelcase tokenizer adapted from
|
||||||
# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pattern-analyzer.html
|
# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pattern-analyzer.html
|
||||||
"".join([
|
"".join(
|
||||||
'(?<=[\\p{L}&&[^\\p{Lu}]])' # lower case
|
[
|
||||||
'(?=\\p{Lu})', # followed by upper case
|
"(?<=[\\p{L}&&[^\\p{Lu}]])" # lower case
|
||||||
'|',
|
"(?=\\p{Lu})", # followed by upper case
|
||||||
'(?<=\\p{Lu})' # or upper case
|
"|",
|
||||||
'(?=\\p{Lu}[\\p{L}&&[^\\p{Lu}]])', # followed by lower case
|
"(?<=\\p{Lu})" # or upper case
|
||||||
])
|
"(?=\\p{Lu}[\\p{L}&&[^\\p{Lu}]])", # followed by lower case
|
||||||
])
|
]
|
||||||
|
),
|
||||||
|
]
|
||||||
|
),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
'filter': {
|
"filter": {
|
||||||
'nix_stopwords': {
|
"nix_stopwords": {
|
||||||
'type': 'stop',
|
"type": "stop",
|
||||||
'ignore_case': True,
|
"ignore_case": True,
|
||||||
'stopwords': ['packages', 'package', 'options', 'option'],
|
"stopwords": ["packages", "package", "options", "option"],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@ -58,11 +69,11 @@ ANALYSIS = {
|
||||||
|
|
||||||
def get_last_evaluation(channel):
|
def get_last_evaluation(channel):
|
||||||
project, project_version = channel.split("-", 1)
|
project, project_version = channel.split("-", 1)
|
||||||
s3 = boto3.client("s3", config=botocore.client.Config(signature_version=botocore.UNSIGNED))
|
s3 = boto3.client(
|
||||||
|
"s3", config=botocore.client.Config(signature_version=botocore.UNSIGNED)
|
||||||
|
)
|
||||||
s3_result = s3.list_objects(
|
s3_result = s3.list_objects(
|
||||||
Bucket="nix-releases",
|
Bucket="nix-releases", Prefix=f"{project}/{project_version}/", Delimiter="/",
|
||||||
Prefix=f"{project}/{project_version}/",
|
|
||||||
Delimiter="/",
|
|
||||||
)
|
)
|
||||||
evaluations = []
|
evaluations = []
|
||||||
for item in s3_result.get("CommonPrefixes"):
|
for item in s3_result.get("CommonPrefixes"):
|
||||||
|
@ -73,14 +84,18 @@ def get_last_evaluation(channel):
|
||||||
if evaluation.startswith("beta"):
|
if evaluation.startswith("beta"):
|
||||||
evaluation = evaluation[len("beta") :]
|
evaluation = evaluation[len("beta") :]
|
||||||
try:
|
try:
|
||||||
revisions_since_start, git_revision = evaluation.lstrip(".").rstrip("/").split(".")
|
revisions_since_start, git_revision = (
|
||||||
except:
|
evaluation.lstrip(".").rstrip("/").split(".")
|
||||||
|
)
|
||||||
|
except Exception as e: # noqa
|
||||||
continue
|
continue
|
||||||
evaluations.append(dict(
|
evaluations.append(
|
||||||
|
dict(
|
||||||
revisions_since_start=int(revisions_since_start),
|
revisions_since_start=int(revisions_since_start),
|
||||||
git_revision=git_revision,
|
git_revision=git_revision,
|
||||||
prefix=prefix,
|
prefix=prefix,
|
||||||
))
|
)
|
||||||
|
)
|
||||||
|
|
||||||
evaluations = sorted(evaluations, key=lambda i: i["revisions_since_start"])
|
evaluations = sorted(evaluations, key=lambda i: i["revisions_since_start"])
|
||||||
return evaluations[-1]
|
return evaluations[-1]
|
||||||
|
@ -88,7 +103,9 @@ def get_last_evaluation(channel):
|
||||||
|
|
||||||
def get_packages(evaluation):
|
def get_packages(evaluation):
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
shlex.split(f"nix-env -f '<nixpkgs>' -I nixpkgs=https://github.com/NixOS/nixpkgs-channels/archive/{evaluation['git_revision']}.tar.gz --arg config 'import {CURRENT_DIR}/packages-config.nix' -qa --json"),
|
shlex.split(
|
||||||
|
f"nix-env -f '<nixpkgs>' -I nixpkgs=https://github.com/NixOS/nixpkgs-channels/archive/{evaluation['git_revision']}.tar.gz --arg config 'import {CURRENT_DIR}/packages-config.nix' -qa --json"
|
||||||
|
),
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
check=True,
|
check=True,
|
||||||
)
|
)
|
||||||
|
@ -109,10 +126,7 @@ def get_packages(evaluation):
|
||||||
licenses = [
|
licenses = [
|
||||||
type(license) == str
|
type(license) == str
|
||||||
and dict(fullName=license, url=None)
|
and dict(fullName=license, url=None)
|
||||||
or dict(
|
or dict(fullName=license.get("fullName"), url=license.get("url"),)
|
||||||
fullName=license.get("fullName"),
|
|
||||||
url=license.get("url"),
|
|
||||||
)
|
|
||||||
for license in licenses
|
for license in licenses
|
||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
|
@ -128,16 +142,16 @@ def get_packages(evaluation):
|
||||||
for maintainer in data["meta"].get("maintainers", [])
|
for maintainer in data["meta"].get("maintainers", [])
|
||||||
]
|
]
|
||||||
platforms = [
|
platforms = [
|
||||||
type(platform) == str
|
type(platform) == str and platform or None
|
||||||
and platform
|
|
||||||
or None
|
|
||||||
for platform in data["meta"].get("platforms", [])
|
for platform in data["meta"].get("platforms", [])
|
||||||
]
|
]
|
||||||
|
|
||||||
attr_set = None
|
attr_set = None
|
||||||
if "." in attr_name:
|
if "." in attr_name:
|
||||||
attr_set = attr_name.split(".")[0]
|
attr_set = attr_name.split(".")[0]
|
||||||
if not attr_set.endswith("Packages") and not attr_set.endswith("Plugins"):
|
if not attr_set.endswith("Packages") and not attr_set.endswith(
|
||||||
|
"Plugins"
|
||||||
|
):
|
||||||
attr_set = None
|
attr_set = None
|
||||||
|
|
||||||
doc = dict(
|
doc = dict(
|
||||||
|
@ -161,7 +175,9 @@ def get_packages(evaluation):
|
||||||
|
|
||||||
def get_options(evaluation):
|
def get_options(evaluation):
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
shlex.split(f"nix-build <nixpkgs/nixos/release.nix> --no-out-link -A options -I nixpkgs=https://github.com/NixOS/nixpkgs-channels/archive/{evaluation['git_revision']}.tar.gz"),
|
shlex.split(
|
||||||
|
f"nix-build <nixpkgs/nixos/release.nix> --no-out-link -A options -I nixpkgs=https://github.com/NixOS/nixpkgs-channels/archive/{evaluation['git_revision']}.tar.gz"
|
||||||
|
),
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
check=True,
|
check=True,
|
||||||
)
|
)
|
||||||
|
@ -176,9 +192,11 @@ def get_options(evaluation):
|
||||||
def gen():
|
def gen():
|
||||||
for name, option in options:
|
for name, option in options:
|
||||||
example = option.get("example")
|
example = option.get("example")
|
||||||
if example and \
|
if (
|
||||||
type(example) == dict and \
|
example
|
||||||
example.get("_type") == "literalExample":
|
and type(example) == dict
|
||||||
|
and example.get("_type") == "literalExample"
|
||||||
|
):
|
||||||
example = str(example["text"])
|
example = str(example["text"])
|
||||||
yield dict(
|
yield dict(
|
||||||
id=name,
|
id=name,
|
||||||
|
@ -205,11 +223,7 @@ def recreate_index(es, channel):
|
||||||
attr_name=dict(
|
attr_name=dict(
|
||||||
type="text",
|
type="text",
|
||||||
analyzer="nixAttrName",
|
analyzer="nixAttrName",
|
||||||
fields={
|
fields={"raw": {"type": "keyword"}},
|
||||||
"raw": {
|
|
||||||
"type": "keyword",
|
|
||||||
}
|
|
||||||
},
|
|
||||||
),
|
),
|
||||||
attr_set=dict(type="keyword"),
|
attr_set=dict(type="keyword"),
|
||||||
pname=dict(type="keyword"),
|
pname=dict(type="keyword"),
|
||||||
|
@ -219,8 +233,7 @@ def recreate_index(es, channel):
|
||||||
license=dict(
|
license=dict(
|
||||||
type="nested",
|
type="nested",
|
||||||
properties=dict(
|
properties=dict(
|
||||||
fullName=dict(type="text"),
|
fullName=dict(type="text"), url=dict(type="text"),
|
||||||
url=dict(type="text"),
|
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
maintainers=dict(
|
maintainers=dict(
|
||||||
|
@ -268,30 +281,26 @@ def main(es_url, channel):
|
||||||
|
|
||||||
# write packages
|
# write packages
|
||||||
number_of_packages, gen_packages = get_packages(evaluation)
|
number_of_packages, gen_packages = get_packages(evaluation)
|
||||||
packages = list(gen_packages())
|
|
||||||
if number_of_packages:
|
if number_of_packages:
|
||||||
click.echo("Indexing packages...")
|
click.echo("Indexing packages...")
|
||||||
progress = tqdm.tqdm(unit="packages", total=number_of_packages)
|
progress = tqdm.tqdm(unit="packages", total=number_of_packages)
|
||||||
successes = 0
|
successes = 0
|
||||||
for ok, action in elasticsearch.helpers.streaming_bulk(
|
for ok, action in elasticsearch.helpers.streaming_bulk(
|
||||||
client=es,
|
client=es, index=f"{channel}-packages", actions=gen_packages()
|
||||||
index=f"{channel}-packages",
|
):
|
||||||
actions=gen_packages()):
|
|
||||||
progress.update(1)
|
progress.update(1)
|
||||||
successes += ok
|
successes += ok
|
||||||
print("Indexed %d/%d packages" % (successes, number_of_packages))
|
print("Indexed %d/%d packages" % (successes, number_of_packages))
|
||||||
|
|
||||||
# write options
|
# write options
|
||||||
number_of_options, gen_options = get_options(evaluation)
|
number_of_options, gen_options = get_options(evaluation)
|
||||||
options = list(gen_options())
|
|
||||||
if number_of_options:
|
if number_of_options:
|
||||||
click.echo("Indexing options...")
|
click.echo("Indexing options...")
|
||||||
progress = tqdm.tqdm(unit="options", total=number_of_options)
|
progress = tqdm.tqdm(unit="options", total=number_of_options)
|
||||||
successes = 0
|
successes = 0
|
||||||
for ok, action in elasticsearch.helpers.streaming_bulk(
|
for ok, action in elasticsearch.helpers.streaming_bulk(
|
||||||
client=es,
|
client=es, index=f"{channel}-options", actions=gen_options()
|
||||||
index=f"{channel}-options",
|
):
|
||||||
actions=gen_options()):
|
|
||||||
progress.update(1)
|
progress.update(1)
|
||||||
successes += ok
|
successes += ok
|
||||||
print("Indexed %d/%d options" % (successes, number_of_options))
|
print("Indexed %d/%d options" % (successes, number_of_options))
|
Loading…
Reference in a new issue