2020-03-28 01:34:38 +00:00
|
|
|
#! /usr/bin/env nix-shell
|
2020-05-22 10:43:57 +00:00
|
|
|
#! nix-shell -i python3 -p python3 python3Packages.click python3Packages.click-log python3Packages.elasticsearch python3Packages.boto3 python3Packages.tqdm
|
2020-03-28 01:34:38 +00:00
|
|
|
|
2020-05-22 10:43:57 +00:00
|
|
|
# develop:
|
|
|
|
# $ nix-shell -p python3Packages.black python3Packages.mypy python3Packages.flake8
|
|
|
|
#
|
|
|
|
# format:
|
|
|
|
# $ nix-shell -p python3Packages.black --command "black import-channel"
|
|
|
|
#
|
|
|
|
# lint:
|
|
|
|
# $ nix-shell -p python3Packages.flake8 --command "flake8 --ignore E501,E265 import-channel"
|
2020-03-28 01:34:38 +00:00
|
|
|
|
|
|
|
import boto3
|
|
|
|
import click
|
2020-05-22 11:03:31 +00:00
|
|
|
import logging
|
|
|
|
import click_log
|
2020-03-28 01:34:38 +00:00
|
|
|
import elasticsearch
|
|
|
|
import elasticsearch.helpers
|
|
|
|
import json
|
|
|
|
import os.path
|
|
|
|
import shlex
|
|
|
|
import subprocess
|
|
|
|
import tqdm
|
2020-05-19 10:54:48 +00:00
|
|
|
import botocore.client
|
|
|
|
import botocore
|
|
|
|
|
2020-05-22 11:03:31 +00:00
|
|
|
logger = logging.getLogger("import-channel")
|
|
|
|
click_log.basic_config(logger)
|
|
|
|
|
2020-03-28 01:34:38 +00:00
|
|
|
|
|
|
|
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
2020-05-22 12:50:44 +00:00
|
|
|
INDEX_SCHEMA_VERSION = 1
|
2020-05-19 10:54:48 +00:00
|
|
|
ANALYSIS = {
|
2020-05-22 10:43:57 +00:00
|
|
|
"analyzer": {
|
|
|
|
"nixAttrName": {
|
|
|
|
"type": "custom",
|
|
|
|
"tokenizer": "nix_attrname",
|
|
|
|
"filter": ["lowercase", "nix_stopwords"],
|
2020-05-19 10:54:48 +00:00
|
|
|
},
|
|
|
|
},
|
2020-05-22 10:43:57 +00:00
|
|
|
"tokenizer": {
|
|
|
|
"nix_attrname": {
|
|
|
|
"type": "pattern",
|
2020-05-19 10:54:48 +00:00
|
|
|
# Split on attrname separators like _, .
|
2020-05-22 10:43:57 +00:00
|
|
|
"pattern": "|".join(
|
|
|
|
[
|
|
|
|
"[_.-]", # Common separators like underscores, dots and dashes
|
|
|
|
"\\d+?Packages", # python37Packages -> python
|
|
|
|
# Camelcase tokenizer adapted from
|
|
|
|
# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pattern-analyzer.html
|
|
|
|
"".join(
|
|
|
|
[
|
|
|
|
"(?<=[\\p{L}&&[^\\p{Lu}]])" # lower case
|
|
|
|
"(?=\\p{Lu})", # followed by upper case
|
|
|
|
"|",
|
|
|
|
"(?<=\\p{Lu})" # or upper case
|
|
|
|
"(?=\\p{Lu}[\\p{L}&&[^\\p{Lu}]])", # followed by lower case
|
|
|
|
]
|
|
|
|
),
|
|
|
|
]
|
|
|
|
),
|
2020-05-19 10:54:48 +00:00
|
|
|
},
|
|
|
|
},
|
2020-05-22 10:43:57 +00:00
|
|
|
"filter": {
|
|
|
|
"nix_stopwords": {
|
|
|
|
"type": "stop",
|
|
|
|
"ignore_case": True,
|
|
|
|
"stopwords": ["packages", "package", "options", "option"],
|
2020-05-19 10:54:48 +00:00
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
2020-05-22 12:50:44 +00:00
|
|
|
PACKAGES_MAPPING = dict(
|
|
|
|
properties=dict(
|
|
|
|
attr_name=dict(
|
|
|
|
type="text", analyzer="nixAttrName", fields={"raw": {"type": "keyword"}},
|
|
|
|
),
|
|
|
|
attr_set=dict(type="keyword"),
|
|
|
|
pname=dict(type="keyword"),
|
|
|
|
pversion=dict(type="keyword"),
|
|
|
|
description=dict(type="text"),
|
|
|
|
longDescription=dict(type="text"),
|
|
|
|
license=dict(
|
|
|
|
type="nested",
|
|
|
|
properties=dict(fullName=dict(type="text"), url=dict(type="text"),),
|
|
|
|
),
|
|
|
|
maintainers=dict(
|
|
|
|
type="nested",
|
|
|
|
properties=dict(
|
|
|
|
name=dict(type="text"),
|
|
|
|
email=dict(type="text"),
|
|
|
|
github=dict(type="text"),
|
|
|
|
),
|
|
|
|
),
|
|
|
|
platforms=dict(type="keyword"),
|
|
|
|
position=dict(type="text"),
|
|
|
|
homepage=dict(type="keyword"),
|
|
|
|
),
|
|
|
|
)
|
|
|
|
OPTIONS_MAPPING = dict(
|
|
|
|
properties=dict(
|
|
|
|
option_name=dict(type="keyword"),
|
|
|
|
description=dict(type="text"),
|
|
|
|
type=dict(type="keyword"),
|
|
|
|
default=dict(type="text"),
|
|
|
|
example=dict(type="text"),
|
|
|
|
source=dict(type="keyword"),
|
|
|
|
),
|
|
|
|
)
|
2020-05-19 10:54:48 +00:00
|
|
|
|
|
|
|
|
2020-03-28 01:34:38 +00:00
|
|
|
def get_last_evaluation(channel):
|
2020-05-22 11:03:31 +00:00
|
|
|
logger.debug(f"Retriving last evaluation for {channel} channel")
|
|
|
|
|
2020-03-28 01:34:38 +00:00
|
|
|
project, project_version = channel.split("-", 1)
|
2020-05-22 11:03:31 +00:00
|
|
|
logger.debug(f"get_last_evaluation: project='{project}'")
|
|
|
|
logger.debug(f"get_last_evaluation: project_version='{project_version}'")
|
|
|
|
|
|
|
|
bucket = "nix-releases"
|
|
|
|
prefix = f"{project}/{project_version}/"
|
|
|
|
logger.debug(
|
|
|
|
f"get_last_evaluation: list all evaluation in '{bucket}' bucker under '{prefix}' prefix"
|
|
|
|
)
|
|
|
|
|
2020-05-22 10:43:57 +00:00
|
|
|
s3 = boto3.client(
|
|
|
|
"s3", config=botocore.client.Config(signature_version=botocore.UNSIGNED)
|
|
|
|
)
|
2020-05-22 11:03:31 +00:00
|
|
|
s3_result = s3.list_objects(Bucket=bucket, Prefix=prefix, Delimiter="/",)
|
2020-03-28 01:34:38 +00:00
|
|
|
evaluations = []
|
|
|
|
for item in s3_result.get("CommonPrefixes"):
|
2020-03-28 04:09:01 +00:00
|
|
|
if not item:
|
2020-03-28 01:34:38 +00:00
|
|
|
continue
|
2020-05-22 11:03:31 +00:00
|
|
|
logger.debug(f"get_last_evaluation: evaluation in raw {item}")
|
2020-03-28 01:34:38 +00:00
|
|
|
prefix = item.get("Prefix")
|
2020-05-22 10:43:57 +00:00
|
|
|
evaluation = prefix[len(f"{project}/{project_version}/{channel}") :]
|
2020-03-28 01:34:38 +00:00
|
|
|
if evaluation.startswith("beta"):
|
2020-05-22 10:43:57 +00:00
|
|
|
evaluation = evaluation[len("beta") :]
|
2020-03-28 04:09:01 +00:00
|
|
|
try:
|
2020-05-22 10:43:57 +00:00
|
|
|
revisions_since_start, git_revision = (
|
|
|
|
evaluation.lstrip(".").rstrip("/").split(".")
|
|
|
|
)
|
|
|
|
except Exception as e: # noqa
|
2020-03-28 04:09:01 +00:00
|
|
|
continue
|
2020-05-22 11:03:31 +00:00
|
|
|
evaluation = {
|
|
|
|
"revisions_since_start": int(revisions_since_start),
|
|
|
|
"git_revision": git_revision,
|
|
|
|
"prefix": prefix,
|
|
|
|
}
|
|
|
|
logger.debug(f"get_last_evaluation: evaluation {evaluation}")
|
|
|
|
evaluations.append(evaluation)
|
2020-03-28 01:34:38 +00:00
|
|
|
|
2020-05-22 11:03:31 +00:00
|
|
|
logger.debug(
|
|
|
|
f"get_last_evaluation: {len(evaluations)} evaluations found for {channel} channel"
|
|
|
|
)
|
2020-03-28 01:34:38 +00:00
|
|
|
evaluations = sorted(evaluations, key=lambda i: i["revisions_since_start"])
|
2020-05-22 11:03:31 +00:00
|
|
|
|
|
|
|
logger.debug(f"get_last_evaluation: last evaluation is: {evaluations[-1]}")
|
2020-03-28 01:34:38 +00:00
|
|
|
return evaluations[-1]
|
|
|
|
|
|
|
|
|
|
|
|
def get_packages(evaluation):
|
2020-05-22 11:03:31 +00:00
|
|
|
logger.debug(
|
|
|
|
f"get_packages: Retriving list of packages for '{evaluation['git_revision']}' revision"
|
|
|
|
)
|
2020-03-28 01:34:38 +00:00
|
|
|
result = subprocess.run(
|
2020-05-22 10:43:57 +00:00
|
|
|
shlex.split(
|
|
|
|
f"nix-env -f '<nixpkgs>' -I nixpkgs=https://github.com/NixOS/nixpkgs-channels/archive/{evaluation['git_revision']}.tar.gz --arg config 'import {CURRENT_DIR}/packages-config.nix' -qa --json"
|
|
|
|
),
|
2020-03-28 01:34:38 +00:00
|
|
|
stdout=subprocess.PIPE,
|
|
|
|
check=True,
|
|
|
|
)
|
|
|
|
packages = json.loads(result.stdout).items()
|
2020-05-08 13:24:58 +00:00
|
|
|
packages = list(packages)
|
2020-03-28 01:34:38 +00:00
|
|
|
|
|
|
|
def gen():
|
|
|
|
for attr_name, data in packages:
|
|
|
|
position = data["meta"].get("position")
|
|
|
|
if position and position.startswith("/nix/store"):
|
|
|
|
position = position[44:]
|
|
|
|
licenses = data["meta"].get("license")
|
|
|
|
if licenses:
|
|
|
|
if type(licenses) == str:
|
2020-05-19 10:54:48 +00:00
|
|
|
licenses = [dict(fullName=licenses)]
|
2020-03-28 01:34:38 +00:00
|
|
|
elif type(licenses) == dict:
|
2020-05-19 10:54:48 +00:00
|
|
|
licenses = [licenses]
|
2020-03-28 01:34:38 +00:00
|
|
|
licenses = [
|
|
|
|
type(license) == str
|
|
|
|
and dict(fullName=license, url=None)
|
2020-05-22 10:43:57 +00:00
|
|
|
or dict(fullName=license.get("fullName"), url=license.get("url"),)
|
2020-03-28 01:34:38 +00:00
|
|
|
for license in licenses
|
|
|
|
]
|
2020-04-07 05:05:50 +00:00
|
|
|
else:
|
|
|
|
licenses = []
|
2020-03-28 01:34:38 +00:00
|
|
|
maintainers = [
|
|
|
|
type(maintainer) == str
|
|
|
|
and dict(name=maintainer, email=None, github=None)
|
|
|
|
or dict(
|
|
|
|
name=maintainer.get("name"),
|
|
|
|
email=maintainer.get("email"),
|
|
|
|
github=maintainer.get("github"),
|
|
|
|
)
|
|
|
|
for maintainer in data["meta"].get("maintainers", [])
|
|
|
|
]
|
2020-04-10 08:13:50 +00:00
|
|
|
platforms = [
|
2020-05-22 10:43:57 +00:00
|
|
|
type(platform) == str and platform or None
|
2020-04-10 08:13:50 +00:00
|
|
|
for platform in data["meta"].get("platforms", [])
|
|
|
|
]
|
2020-05-21 22:41:42 +00:00
|
|
|
|
|
|
|
attr_set = None
|
|
|
|
if "." in attr_name:
|
|
|
|
attr_set = attr_name.split(".")[0]
|
2020-05-22 10:43:57 +00:00
|
|
|
if not attr_set.endswith("Packages") and not attr_set.endswith(
|
|
|
|
"Plugins"
|
|
|
|
):
|
2020-05-21 22:41:42 +00:00
|
|
|
attr_set = None
|
|
|
|
|
|
|
|
doc = dict(
|
2020-04-10 08:13:50 +00:00
|
|
|
id=attr_name,
|
2020-03-28 01:34:38 +00:00
|
|
|
attr_name=attr_name,
|
2020-05-21 22:41:42 +00:00
|
|
|
attr_set=attr_set,
|
2020-05-19 10:54:48 +00:00
|
|
|
pname=data["pname"],
|
|
|
|
pversion=data["version"],
|
2020-03-28 01:34:38 +00:00
|
|
|
description=data["meta"].get("description"),
|
|
|
|
longDescription=data["meta"].get("longDescription", ""),
|
|
|
|
license=licenses,
|
|
|
|
maintainers=maintainers,
|
2020-04-10 08:13:50 +00:00
|
|
|
platforms=[i for i in platforms if i],
|
2020-03-28 01:34:38 +00:00
|
|
|
position=position,
|
|
|
|
homepage=data["meta"].get("homepage"),
|
|
|
|
)
|
2020-05-21 22:41:42 +00:00
|
|
|
yield doc
|
2020-03-28 01:34:38 +00:00
|
|
|
|
2020-05-22 11:03:31 +00:00
|
|
|
logger.debug(f"get_packages: Found {len(packages)} packages")
|
2020-03-28 01:34:38 +00:00
|
|
|
return len(packages), gen
|
|
|
|
|
|
|
|
|
|
|
|
def get_options(evaluation):
|
|
|
|
result = subprocess.run(
|
2020-05-22 10:43:57 +00:00
|
|
|
shlex.split(
|
|
|
|
f"nix-build <nixpkgs/nixos/release.nix> --no-out-link -A options -I nixpkgs=https://github.com/NixOS/nixpkgs-channels/archive/{evaluation['git_revision']}.tar.gz"
|
|
|
|
),
|
2020-03-28 01:34:38 +00:00
|
|
|
stdout=subprocess.PIPE,
|
|
|
|
check=True,
|
|
|
|
)
|
|
|
|
options = []
|
|
|
|
options_file = result.stdout.strip().decode()
|
|
|
|
options_file = f"{options_file}/share/doc/nixos/options.json"
|
|
|
|
if os.path.exists(options_file):
|
|
|
|
with open(options_file) as f:
|
|
|
|
options = json.load(f).items()
|
2020-05-08 13:24:58 +00:00
|
|
|
options = list(options)
|
2020-03-28 01:34:38 +00:00
|
|
|
|
|
|
|
def gen():
|
|
|
|
for name, option in options:
|
|
|
|
example = option.get("example")
|
2020-05-22 10:43:57 +00:00
|
|
|
if (
|
|
|
|
example
|
|
|
|
and type(example) == dict
|
|
|
|
and example.get("_type") == "literalExample"
|
|
|
|
):
|
2020-03-28 01:34:38 +00:00
|
|
|
example = str(example["text"])
|
|
|
|
yield dict(
|
2020-04-10 08:13:50 +00:00
|
|
|
id=name,
|
2020-03-28 01:34:38 +00:00
|
|
|
option_name=name,
|
|
|
|
description=option.get("description"),
|
|
|
|
type=option.get("type"),
|
|
|
|
default=str(option.get("default")),
|
|
|
|
example=str(example),
|
|
|
|
source=option.get("declarations", [None])[0],
|
|
|
|
)
|
|
|
|
|
|
|
|
return len(options), gen
|
|
|
|
|
2020-04-10 08:13:50 +00:00
|
|
|
|
2020-05-22 12:58:38 +00:00
|
|
|
def ensure_index(es, index, mapping):
|
2020-05-22 12:50:44 +00:00
|
|
|
if es.indices.exists(index):
|
2020-05-22 12:58:38 +00:00
|
|
|
logger.debug(f"ensure_index: index '{index}' already exists")
|
|
|
|
return False
|
|
|
|
|
2020-03-28 01:34:38 +00:00
|
|
|
es.indices.create(
|
2020-05-22 12:50:44 +00:00
|
|
|
index=index,
|
|
|
|
body={
|
|
|
|
"settings": {"number_of_shards": 1, "analysis": ANALYSIS},
|
|
|
|
"mappings": mapping,
|
|
|
|
},
|
2020-03-28 01:34:38 +00:00
|
|
|
)
|
2020-05-22 12:58:38 +00:00
|
|
|
logger.debug(f"ensure_index: index '{index}' was created")
|
|
|
|
|
|
|
|
return True
|
2020-05-22 12:50:44 +00:00
|
|
|
|
|
|
|
|
2020-05-22 12:58:38 +00:00
|
|
|
def ensure_index_name(type_, channel, evaluation):
|
2020-05-22 12:50:44 +00:00
|
|
|
return (
|
|
|
|
f"latest-{channel}-{type_}",
|
|
|
|
f"evaluation-{INDEX_SCHEMA_VERSION}-{channel}-{evaluation['revisions_since_start']}-{evaluation['git_revision']}-{type_}",
|
2020-03-28 01:34:38 +00:00
|
|
|
)
|
2020-05-22 12:50:44 +00:00
|
|
|
|
|
|
|
|
|
|
|
def update_alias(es, name, index):
|
|
|
|
es.indices.put_alias(index=index, name=name)
|
|
|
|
logger.debug(f"'{name}' alias now points to '{index}' index")
|
2020-03-28 01:34:38 +00:00
|
|
|
|
|
|
|
|
|
|
|
@click.command()
|
2020-05-22 11:03:31 +00:00
|
|
|
@click.option("-u", "--es-url", help="Elasticsearch connection url")
|
|
|
|
@click.option("-c", "--channel", help="NixOS channel name")
|
|
|
|
@click.option("-v", "--verbose", count=True)
|
|
|
|
def main(es_url, channel, verbose):
|
|
|
|
|
|
|
|
logging_level = "CRITICAL"
|
|
|
|
if verbose == 1:
|
|
|
|
logging_level = "WARNING"
|
|
|
|
elif verbose >= 2:
|
|
|
|
logging_level = "DEBUG"
|
|
|
|
|
|
|
|
logger.setLevel(getattr(logging, logging_level))
|
|
|
|
logger.debug(f"Verbosity is {verbose}")
|
|
|
|
logger.debug(f"Logging set to {logging_level}")
|
|
|
|
|
2020-03-28 01:34:38 +00:00
|
|
|
evaluation = get_last_evaluation(channel)
|
|
|
|
es = elasticsearch.Elasticsearch([es_url])
|
2020-05-22 12:50:44 +00:00
|
|
|
|
|
|
|
# ensure indexes exist
|
2020-05-22 12:58:38 +00:00
|
|
|
options_alias, options_index = ensure_index_name("options", channel, evaluation)
|
|
|
|
packages_alias, packages_index = ensure_index_name("packages", channel, evaluation)
|
|
|
|
packages_index_created = ensure_index(es, packages_index, PACKAGES_MAPPING)
|
|
|
|
options_index_created = ensure_index(es, options_index, OPTIONS_MAPPING)
|
2020-03-28 01:34:38 +00:00
|
|
|
|
|
|
|
# write packages
|
2020-05-22 12:58:38 +00:00
|
|
|
if packages_index_created:
|
|
|
|
number_of_packages, gen_packages = get_packages(evaluation)
|
|
|
|
if number_of_packages:
|
|
|
|
click.echo("Indexing packages...")
|
|
|
|
progress = tqdm.tqdm(unit="packages", total=number_of_packages)
|
|
|
|
successes = 0
|
|
|
|
for ok, action in elasticsearch.helpers.streaming_bulk(
|
|
|
|
client=es, index=packages_index, actions=gen_packages()
|
|
|
|
):
|
|
|
|
progress.update(1)
|
|
|
|
successes += ok
|
|
|
|
click.echo("Indexed %d/%d packages" % (successes, number_of_packages))
|
2020-03-28 01:34:38 +00:00
|
|
|
|
|
|
|
# write options
|
2020-05-22 12:58:38 +00:00
|
|
|
if options_index_created:
|
|
|
|
number_of_options, gen_options = get_options(evaluation)
|
|
|
|
if number_of_options:
|
|
|
|
click.echo("Indexing options...")
|
|
|
|
progress = tqdm.tqdm(unit="options", total=number_of_options)
|
|
|
|
successes = 0
|
|
|
|
for ok, action in elasticsearch.helpers.streaming_bulk(
|
|
|
|
client=es, index=options_index, actions=gen_options()
|
|
|
|
):
|
|
|
|
progress.update(1)
|
|
|
|
successes += ok
|
|
|
|
print("Indexed %d/%d options" % (successes, number_of_options))
|
2020-03-28 01:34:38 +00:00
|
|
|
|
2020-05-22 12:50:44 +00:00
|
|
|
# update alias
|
2020-05-22 12:58:38 +00:00
|
|
|
if packages_index_created:
|
|
|
|
update_alias(es, packages_alias, packages_index)
|
|
|
|
if options_index_created:
|
|
|
|
update_alias(es, options_alias, options_index)
|
2020-05-22 12:50:44 +00:00
|
|
|
|
2020-03-28 01:34:38 +00:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|
2020-05-22 11:03:31 +00:00
|
|
|
|
|
|
|
# vi:ft=python
|