2020-03-28 01:34:38 +00:00
|
|
|
#! /usr/bin/env nix-shell
|
2020-06-11 06:44:54 +00:00
|
|
|
#! nix-shell -i python3 -p python3 python3Packages.requests python3Packages.click python3Packages.click-log python3Packages.elasticsearch python3Packages.boto3 python3Packages.tqdm python3Packages.pypandoc
|
2020-03-28 01:34:38 +00:00
|
|
|
|
2020-05-22 10:43:57 +00:00
|
|
|
# develop:
|
|
|
|
# $ nix-shell -p python3Packages.black python3Packages.mypy python3Packages.flake8
|
|
|
|
#
|
|
|
|
# format:
|
|
|
|
# $ nix-shell -p python3Packages.black --command "black import-channel"
|
|
|
|
#
|
|
|
|
# lint:
|
|
|
|
# $ nix-shell -p python3Packages.flake8 --command "flake8 --ignore E501,E265 import-channel"
|
2020-03-28 01:34:38 +00:00
|
|
|
|
|
|
|
import boto3
|
2020-06-11 06:44:54 +00:00
|
|
|
import botocore
|
|
|
|
import botocore.client
|
2020-03-28 01:34:38 +00:00
|
|
|
import click
|
2020-05-22 11:03:31 +00:00
|
|
|
import click_log
|
2020-03-28 01:34:38 +00:00
|
|
|
import elasticsearch
|
|
|
|
import elasticsearch.helpers
|
|
|
|
import json
|
2020-06-11 06:44:54 +00:00
|
|
|
import logging
|
2020-03-28 01:34:38 +00:00
|
|
|
import os.path
|
2020-06-11 06:44:54 +00:00
|
|
|
import pypandoc
|
2020-06-18 10:24:52 +00:00
|
|
|
import re
|
2020-06-11 06:44:54 +00:00
|
|
|
import requests
|
2020-03-28 01:34:38 +00:00
|
|
|
import shlex
|
|
|
|
import subprocess
|
|
|
|
import tqdm
|
2020-06-18 10:24:52 +00:00
|
|
|
import xml.etree.ElementTree
|
2020-05-19 10:54:48 +00:00
|
|
|
|
2020-05-22 11:03:31 +00:00
|
|
|
logger = logging.getLogger("import-channel")
|
|
|
|
click_log.basic_config(logger)
|
|
|
|
|
2020-03-28 01:34:38 +00:00
|
|
|
|
2020-06-10 11:48:53 +00:00
|
|
|
S3_BUCKET = "nix-releases"
|
2020-03-28 01:34:38 +00:00
|
|
|
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
2020-06-18 10:24:52 +00:00
|
|
|
INDEX_SCHEMA_VERSION = 6
|
2020-06-10 11:48:53 +00:00
|
|
|
CHANNELS = {
|
|
|
|
"unstable": {
|
|
|
|
"packages": "nixpkgs/nixpkgs-20.09pre",
|
|
|
|
"options": "nixos/unstable/nixos-20.09pre",
|
|
|
|
},
|
|
|
|
"19.09": {
|
|
|
|
"packages": "nixpkgs/nixpkgs-19.09pre",
|
|
|
|
"options": "nixos/19.09/nixos-19.09.",
|
|
|
|
},
|
|
|
|
"20.03": {
|
|
|
|
"packages": "nixpkgs/nixpkgs-19.09pre",
|
|
|
|
"options": "nixos/20.03/nixos-20.03.",
|
|
|
|
},
|
|
|
|
}
|
2020-05-19 10:54:48 +00:00
|
|
|
ANALYSIS = {
|
2020-06-18 10:24:52 +00:00
|
|
|
"normalizer": {
|
|
|
|
"lowercase": {
|
|
|
|
"type": "custom",
|
|
|
|
"char_filter": [],
|
|
|
|
"filter": ["lowercase"],
|
|
|
|
}
|
|
|
|
},
|
2020-05-22 10:43:57 +00:00
|
|
|
"analyzer": {
|
2020-06-18 10:24:52 +00:00
|
|
|
"lowercase": {
|
2020-05-22 10:43:57 +00:00
|
|
|
"type": "custom",
|
2020-06-18 10:24:52 +00:00
|
|
|
"tokenizer": "keyword",
|
|
|
|
"filter": ["lowercase"],
|
2020-05-19 10:54:48 +00:00
|
|
|
},
|
2020-06-03 23:33:54 +00:00
|
|
|
"nixOptionName": {
|
|
|
|
"type": "custom",
|
|
|
|
"tokenizer": "nix_option_name",
|
|
|
|
"filter": ["lowercase"],
|
|
|
|
},
|
|
|
|
"nixOptionNameGranular": {
|
|
|
|
"type": "custom",
|
|
|
|
"tokenizer": "nix_option_name_granular",
|
|
|
|
"filter": ["lowercase"],
|
|
|
|
},
|
2020-05-19 10:54:48 +00:00
|
|
|
},
|
2020-05-22 10:43:57 +00:00
|
|
|
"tokenizer": {
|
2020-06-18 10:24:52 +00:00
|
|
|
"nix_package_query": {
|
|
|
|
"type": "pattern",
|
|
|
|
"pattern": "|".join(
|
|
|
|
[
|
|
|
|
"[ ]",
|
|
|
|
]
|
|
|
|
),
|
|
|
|
},
|
|
|
|
"nix_package_attr_name": {
|
2020-05-22 10:43:57 +00:00
|
|
|
"type": "pattern",
|
2020-05-19 10:54:48 +00:00
|
|
|
# Split on attrname separators like _, .
|
2020-05-22 10:43:57 +00:00
|
|
|
"pattern": "|".join(
|
|
|
|
[
|
|
|
|
"[_.-]", # Common separators like underscores, dots and dashes
|
|
|
|
"\\d+?Packages", # python37Packages -> python
|
2020-06-18 10:24:52 +00:00
|
|
|
"\\d+?Plugins", # vimPlugins -> vim
|
|
|
|
"\\d+?Extensions", # php74Extensions -> php
|
|
|
|
"\\d+?Interpreters", # perlInterpreters -> perl
|
2020-05-22 10:43:57 +00:00
|
|
|
# Camelcase tokenizer adapted from
|
|
|
|
# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pattern-analyzer.html
|
|
|
|
"".join(
|
|
|
|
[
|
|
|
|
"(?<=[\\p{L}&&[^\\p{Lu}]])" # lower case
|
|
|
|
"(?=\\p{Lu})", # followed by upper case
|
|
|
|
"|",
|
|
|
|
"(?<=\\p{Lu})" # or upper case
|
|
|
|
"(?=\\p{Lu}[\\p{L}&&[^\\p{Lu}]])", # followed by lower case
|
|
|
|
]
|
|
|
|
),
|
|
|
|
]
|
|
|
|
),
|
2020-05-19 10:54:48 +00:00
|
|
|
},
|
2020-06-03 23:33:54 +00:00
|
|
|
"nix_option_name": {
|
|
|
|
"type": "pattern",
|
|
|
|
"pattern": "[.]",
|
|
|
|
},
|
|
|
|
# Lower priority (virtualHost -> [virtual, host])
|
|
|
|
"nix_option_name_granular": {
|
|
|
|
"type": "pattern",
|
|
|
|
# Split on attrname separators like _, .
|
|
|
|
"pattern": "|".join(
|
|
|
|
[
|
|
|
|
"[_.-]", # Common separators like underscores, dots and dashes
|
|
|
|
# Camelcase tokenizer adapted from
|
|
|
|
# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pattern-analyzer.html
|
|
|
|
"".join(
|
|
|
|
[
|
|
|
|
"(?<=[\\p{L}&&[^\\p{Lu}]])" # lower case
|
|
|
|
"(?=\\p{Lu})", # followed by upper case
|
|
|
|
"|",
|
|
|
|
"(?<=\\p{Lu})" # or upper case
|
|
|
|
"(?=\\p{Lu}[\\p{L}&&[^\\p{Lu}]])", # followed by lower case
|
|
|
|
]
|
|
|
|
),
|
|
|
|
]
|
|
|
|
),
|
|
|
|
},
|
2020-05-19 10:54:48 +00:00
|
|
|
},
|
2020-05-22 10:43:57 +00:00
|
|
|
"filter": {
|
|
|
|
"nix_stopwords": {
|
|
|
|
"type": "stop",
|
|
|
|
"ignore_case": True,
|
2020-06-18 10:24:52 +00:00
|
|
|
"stopwords": [
|
|
|
|
"packages",
|
|
|
|
"package",
|
|
|
|
"options",
|
|
|
|
"option",
|
|
|
|
"plugins",
|
|
|
|
"plugin",
|
|
|
|
"extensions",
|
|
|
|
"extension",
|
|
|
|
"interpreters",
|
|
|
|
"interpreter",
|
|
|
|
],
|
2020-05-19 10:54:48 +00:00
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
2020-06-03 19:02:12 +00:00
|
|
|
MAPPING = {
|
|
|
|
"properties": {
|
|
|
|
"type": {"type": "keyword"},
|
|
|
|
# Package fields
|
2020-06-10 11:48:53 +00:00
|
|
|
"package_hydra_build": {
|
2020-06-09 22:55:37 +00:00
|
|
|
"type": "nested",
|
|
|
|
"properties": {
|
2020-06-10 11:48:53 +00:00
|
|
|
"build_id": {"type": "keyword"},
|
|
|
|
"build_status": {"type": "keyword"},
|
|
|
|
"platform": {"type": "keyword"},
|
|
|
|
"project": {"type": "keyword"},
|
|
|
|
"jobset": {"type": "keyword"},
|
|
|
|
"job": {"type": "keyword"},
|
|
|
|
"path": {
|
|
|
|
"type": "nested",
|
|
|
|
"properties": {
|
|
|
|
"output": {"type": "keyword"},
|
|
|
|
"path": {"type": "keyword"}
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"drv_path": {"type": "keyword"},
|
|
|
|
},
|
2020-06-09 22:55:37 +00:00
|
|
|
},
|
2020-06-03 19:02:12 +00:00
|
|
|
"package_attr_name": {
|
2020-06-18 10:24:52 +00:00
|
|
|
"type": "keyword",
|
|
|
|
"normalizer": "lowercase",
|
|
|
|
},
|
|
|
|
"package_attr_name_query": {
|
|
|
|
"type": "keyword",
|
|
|
|
"normalizer": "lowercase",
|
|
|
|
},
|
|
|
|
"package_attr_set": {
|
|
|
|
"type": "keyword",
|
|
|
|
"normalizer": "lowercase",
|
|
|
|
},
|
|
|
|
"package_pname": {
|
|
|
|
"type": "keyword",
|
|
|
|
"normalizer": "lowercase",
|
2020-06-03 19:02:12 +00:00
|
|
|
},
|
|
|
|
"package_pversion": {"type": "keyword"},
|
|
|
|
"package_description": {"type": "text"},
|
|
|
|
"package_longDescription": {"type": "text"},
|
|
|
|
"package_license": {
|
|
|
|
"type": "nested",
|
2020-06-03 22:08:43 +00:00
|
|
|
"properties": {"fullName": {"type": "text"}, "url": {"type": "text"}},
|
2020-06-03 19:02:12 +00:00
|
|
|
},
|
|
|
|
"package_maintainers": {
|
|
|
|
"type": "nested",
|
|
|
|
"properties": {
|
|
|
|
"name": {"type": "text"},
|
|
|
|
"email": {"type": "text"},
|
|
|
|
"github": {"type": "text"},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"package_platforms": {"type": "keyword"},
|
|
|
|
"package_position": {"type": "text"},
|
|
|
|
"package_homepage": {"type": "keyword"},
|
2020-06-09 23:53:17 +00:00
|
|
|
"package_system": {"type": "keyword"},
|
2020-06-03 19:02:12 +00:00
|
|
|
# Options fields
|
2020-06-03 23:33:54 +00:00
|
|
|
"option_name": {
|
|
|
|
"type": "text",
|
|
|
|
"analyzer": "nixOptionName",
|
|
|
|
"fielddata": True,
|
|
|
|
"fields": {
|
|
|
|
"raw": {
|
|
|
|
"type": "keyword"
|
|
|
|
},
|
|
|
|
"granular": {
|
|
|
|
"type": "text",
|
|
|
|
"analyzer": "nixOptionNameGranular",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
2020-06-03 19:02:12 +00:00
|
|
|
"option_description": {"type": "text"},
|
|
|
|
"option_type": {"type": "keyword"},
|
|
|
|
"option_default": {"type": "text"},
|
|
|
|
"option_example": {"type": "text"},
|
|
|
|
"option_source": {"type": "keyword"},
|
|
|
|
},
|
|
|
|
}
|
2020-05-19 10:54:48 +00:00
|
|
|
|
|
|
|
|
2020-06-18 10:24:52 +00:00
|
|
|
def split_query(text):
|
|
|
|
"""Tokenize package attr_name
|
|
|
|
|
|
|
|
Example:
|
|
|
|
|
|
|
|
python37Packages.test_name-test
|
|
|
|
= index: 0
|
|
|
|
- python37Packages.test1_name-test2
|
|
|
|
- python37Packages.test1_name
|
|
|
|
- python37Packages.test1
|
|
|
|
- python37
|
|
|
|
- python
|
|
|
|
= index: 1
|
|
|
|
- test1_name-test2
|
|
|
|
- test1_name
|
|
|
|
- test1
|
|
|
|
= index: 2
|
|
|
|
- name-test2
|
|
|
|
- name
|
|
|
|
= index: 3
|
|
|
|
- test2
|
|
|
|
"""
|
|
|
|
tokens = []
|
|
|
|
regex = re.compile(".+?(?:(?<=[a-z])(?=[1-9A-Z])|(?<=[1-9A-Z])(?=[A-Z][a-z])|[\._-]|$)")
|
|
|
|
parts = [m.group(0) for m in regex.finditer(text)]
|
|
|
|
for index in range(len(parts)):
|
|
|
|
prev_parts = ""
|
|
|
|
for part in parts[index:]:
|
|
|
|
tokens.append((prev_parts + part).rstrip("_.-"))
|
|
|
|
prev_parts += part
|
|
|
|
return tokens
|
|
|
|
|
|
|
|
|
2020-06-10 11:48:53 +00:00
|
|
|
def get_last_evaluation(prefix):
|
|
|
|
logger.debug(f"Retriving last evaluation for {prefix} prefix.")
|
2020-05-22 11:03:31 +00:00
|
|
|
|
2020-05-22 10:43:57 +00:00
|
|
|
s3 = boto3.client(
|
|
|
|
"s3", config=botocore.client.Config(signature_version=botocore.UNSIGNED)
|
|
|
|
)
|
2020-06-10 11:48:53 +00:00
|
|
|
s3_result = s3.list_objects(Bucket=S3_BUCKET, Prefix=prefix, Delimiter="/",)
|
2020-03-28 01:34:38 +00:00
|
|
|
evaluations = []
|
|
|
|
for item in s3_result.get("CommonPrefixes"):
|
2020-03-28 04:09:01 +00:00
|
|
|
if not item:
|
2020-03-28 01:34:38 +00:00
|
|
|
continue
|
2020-05-22 11:03:31 +00:00
|
|
|
logger.debug(f"get_last_evaluation: evaluation in raw {item}")
|
2020-03-28 04:09:01 +00:00
|
|
|
try:
|
2020-06-10 11:48:53 +00:00
|
|
|
revisions_since_start, git_revision = item['Prefix'][len(prefix):].rstrip('/').split('.')
|
|
|
|
except:
|
|
|
|
__import__('pdb').set_trace()
|
2020-05-22 11:03:31 +00:00
|
|
|
evaluation = {
|
|
|
|
"revisions_since_start": int(revisions_since_start),
|
|
|
|
"git_revision": git_revision,
|
2020-06-10 11:48:53 +00:00
|
|
|
"prefix": item['Prefix'].rstrip('/'),
|
2020-05-22 11:03:31 +00:00
|
|
|
}
|
|
|
|
logger.debug(f"get_last_evaluation: evaluation {evaluation}")
|
|
|
|
evaluations.append(evaluation)
|
2020-03-28 01:34:38 +00:00
|
|
|
|
2020-05-22 11:03:31 +00:00
|
|
|
logger.debug(
|
2020-06-10 11:48:53 +00:00
|
|
|
f"get_last_evaluation: {len(evaluations)} evaluations found for {prefix} prefix"
|
2020-05-22 11:03:31 +00:00
|
|
|
)
|
2020-03-28 01:34:38 +00:00
|
|
|
evaluations = sorted(evaluations, key=lambda i: i["revisions_since_start"])
|
2020-05-22 11:03:31 +00:00
|
|
|
|
2020-06-09 22:55:37 +00:00
|
|
|
evaluation = evaluations[-1]
|
|
|
|
|
2020-06-10 11:48:53 +00:00
|
|
|
result = s3.get_object(Bucket=S3_BUCKET, Key=f"{evaluation['prefix']}/src-url")
|
2020-06-09 22:55:37 +00:00
|
|
|
evaluation['id'] = result.get("Body").read().decode()[len("https://hydra.nixos.org/eval/"):]
|
2020-03-28 01:34:38 +00:00
|
|
|
|
2020-06-09 22:55:37 +00:00
|
|
|
logger.debug(f"get_last_evaluation: last evaluation is: {evaluation}")
|
2020-03-28 01:34:38 +00:00
|
|
|
|
2020-06-09 22:55:37 +00:00
|
|
|
return evaluation
|
|
|
|
|
|
|
|
|
|
|
|
def get_evaluation_builds(evaluation_id):
|
|
|
|
logger.debug(f"get_evaluation_builds: Retriving list of builds for {evaluation_id} evaluation id")
|
|
|
|
filename = f"eval-{evaluation_id}.json"
|
|
|
|
if not os.path.exists(filename):
|
|
|
|
url = f"https://hydra.nixos.org/eval/{evaluation_id}/builds"
|
|
|
|
logger.debug(f"get_evaluation_builds: Fetching builds from {url} url.")
|
|
|
|
headers = {
|
|
|
|
"Content-Type": "application/json"
|
|
|
|
}
|
|
|
|
r = requests.get(url, headers=headers, stream=True)
|
|
|
|
with tqdm.tqdm.wrapattr(
|
|
|
|
open(filename, "wb"),
|
|
|
|
"write",
|
|
|
|
miniters=1,
|
|
|
|
total=int(r.headers.get('content-length', 0)),
|
|
|
|
desc=filename
|
|
|
|
) as f:
|
|
|
|
for chunk in r.iter_content(chunk_size=4096):
|
|
|
|
f.write(chunk)
|
|
|
|
|
|
|
|
with open(filename) as f:
|
|
|
|
builds = json.loads(f.read())
|
|
|
|
|
2020-06-10 11:48:53 +00:00
|
|
|
result = {}
|
|
|
|
for build in builds:
|
|
|
|
result.setdefault(build['nixname'], {})
|
|
|
|
result[build['nixname']][build['system']] = build
|
|
|
|
|
|
|
|
return result
|
2020-06-09 22:55:37 +00:00
|
|
|
|
|
|
|
|
2020-06-18 10:24:52 +00:00
|
|
|
def get_maintainer(maintainer):
|
|
|
|
maintainers = []
|
|
|
|
|
|
|
|
if type(maintainer) == str:
|
|
|
|
maintainers.append(dict(
|
|
|
|
name=maintainer,
|
|
|
|
email=None,
|
|
|
|
github=None,
|
|
|
|
))
|
|
|
|
|
|
|
|
elif type(maintainer) == dict:
|
|
|
|
maintainers.append(dict(
|
|
|
|
name=maintainer.get("name"),
|
|
|
|
email=maintainer.get("email"),
|
|
|
|
github=maintainer.get("github"),
|
|
|
|
))
|
|
|
|
|
|
|
|
elif type(maintainer) == list:
|
|
|
|
for item in maintainer:
|
|
|
|
maintainers += get_maintainer(item)
|
|
|
|
|
|
|
|
else:
|
|
|
|
logger.error(f"maintainer can not be recognized from: {maintainer}")
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
return maintainers
|
|
|
|
|
|
|
|
|
|
|
|
def remove_attr_set(name):
|
|
|
|
# some package sets the prefix is included in pname
|
|
|
|
sets = [
|
|
|
|
# Packages
|
|
|
|
"emscripten",
|
|
|
|
"lua",
|
|
|
|
"php",
|
|
|
|
"pure",
|
|
|
|
"python",
|
|
|
|
"lisp",
|
|
|
|
"perl",
|
|
|
|
"ruby",
|
|
|
|
# Plugins
|
|
|
|
"elasticsearch",
|
|
|
|
"graylog",
|
|
|
|
"tmuxplugin"
|
|
|
|
"vimplugin"
|
|
|
|
]
|
|
|
|
# TODO: is this correct
|
|
|
|
if any([name.startswith(i) for i in sets]):
|
|
|
|
name = "-".join(name.split("-")[1:])
|
|
|
|
|
|
|
|
# node does things a bit different
|
|
|
|
elif name.startswith("node_"):
|
|
|
|
name = name[len("node_"):]
|
|
|
|
|
|
|
|
return name
|
|
|
|
|
|
|
|
|
2020-06-09 22:55:37 +00:00
|
|
|
def get_packages(evaluation, evaluation_builds):
|
2020-05-22 11:03:31 +00:00
|
|
|
logger.debug(
|
|
|
|
f"get_packages: Retriving list of packages for '{evaluation['git_revision']}' revision"
|
|
|
|
)
|
2020-03-28 01:34:38 +00:00
|
|
|
result = subprocess.run(
|
2020-05-22 10:43:57 +00:00
|
|
|
shlex.split(
|
|
|
|
f"nix-env -f '<nixpkgs>' -I nixpkgs=https://github.com/NixOS/nixpkgs-channels/archive/{evaluation['git_revision']}.tar.gz --arg config 'import {CURRENT_DIR}/packages-config.nix' -qa --json"
|
|
|
|
),
|
2020-03-28 01:34:38 +00:00
|
|
|
stdout=subprocess.PIPE,
|
|
|
|
check=True,
|
|
|
|
)
|
|
|
|
packages = json.loads(result.stdout).items()
|
2020-05-08 13:24:58 +00:00
|
|
|
packages = list(packages)
|
2020-03-28 01:34:38 +00:00
|
|
|
|
|
|
|
def gen():
|
|
|
|
for attr_name, data in packages:
|
2020-06-18 10:24:52 +00:00
|
|
|
|
2020-03-28 01:34:38 +00:00
|
|
|
position = data["meta"].get("position")
|
|
|
|
if position and position.startswith("/nix/store"):
|
|
|
|
position = position[44:]
|
2020-06-09 22:55:37 +00:00
|
|
|
|
2020-03-28 01:34:38 +00:00
|
|
|
licenses = data["meta"].get("license")
|
|
|
|
if licenses:
|
|
|
|
if type(licenses) == str:
|
2020-05-19 10:54:48 +00:00
|
|
|
licenses = [dict(fullName=licenses)]
|
2020-03-28 01:34:38 +00:00
|
|
|
elif type(licenses) == dict:
|
2020-05-19 10:54:48 +00:00
|
|
|
licenses = [licenses]
|
2020-03-28 01:34:38 +00:00
|
|
|
licenses = [
|
|
|
|
type(license) == str
|
|
|
|
and dict(fullName=license, url=None)
|
2020-05-22 10:43:57 +00:00
|
|
|
or dict(fullName=license.get("fullName"), url=license.get("url"),)
|
2020-03-28 01:34:38 +00:00
|
|
|
for license in licenses
|
|
|
|
]
|
2020-04-07 05:05:50 +00:00
|
|
|
else:
|
|
|
|
licenses = []
|
2020-06-09 22:55:37 +00:00
|
|
|
|
2020-06-18 10:24:52 +00:00
|
|
|
maintainers = get_maintainer(data["meta"].get("maintainers", []))
|
2020-06-09 22:55:37 +00:00
|
|
|
|
2020-04-10 08:13:50 +00:00
|
|
|
platforms = [
|
2020-05-22 10:43:57 +00:00
|
|
|
type(platform) == str and platform or None
|
2020-04-10 08:13:50 +00:00
|
|
|
for platform in data["meta"].get("platforms", [])
|
|
|
|
]
|
2020-05-21 22:41:42 +00:00
|
|
|
|
|
|
|
attr_set = None
|
|
|
|
if "." in attr_name:
|
|
|
|
attr_set = attr_name.split(".")[0]
|
2020-06-18 10:24:52 +00:00
|
|
|
if not attr_set.endswith("Packages") and \
|
|
|
|
not attr_set.endswith("Plugins") and \
|
|
|
|
not attr_set.endswith("Extensions"):
|
2020-05-21 22:41:42 +00:00
|
|
|
attr_set = None
|
|
|
|
|
2020-06-10 11:48:53 +00:00
|
|
|
hydra = None
|
|
|
|
if data['name'] in evaluation_builds:
|
|
|
|
hydra = []
|
|
|
|
for platform, build in evaluation_builds[data['name']].items():
|
|
|
|
hydra.append({
|
|
|
|
"build_id": build['id'],
|
|
|
|
"build_status": build['buildstatus'],
|
|
|
|
"platform": build['system'],
|
|
|
|
"project": build['project'],
|
|
|
|
"jobset": build['jobset'],
|
|
|
|
"job": build['job'],
|
|
|
|
"path": [
|
|
|
|
{
|
|
|
|
"output": output,
|
|
|
|
"path": item['path'],
|
|
|
|
}
|
|
|
|
for output, item in build['buildoutputs'].items()
|
|
|
|
],
|
|
|
|
"drv_path": build['drvpath'],
|
|
|
|
})
|
2020-06-09 22:55:37 +00:00
|
|
|
|
2020-06-03 19:02:12 +00:00
|
|
|
yield dict(
|
|
|
|
type="package",
|
2020-06-10 11:48:53 +00:00
|
|
|
package_hydra=hydra,
|
2020-06-03 19:02:12 +00:00
|
|
|
package_attr_name=attr_name,
|
2020-06-18 10:24:52 +00:00
|
|
|
package_attr_name_query=list(split_query(attr_name)),
|
2020-06-03 19:02:12 +00:00
|
|
|
package_attr_set=attr_set,
|
2020-06-18 10:24:52 +00:00
|
|
|
package_pname=remove_attr_set(data["pname"]),
|
2020-06-03 19:02:12 +00:00
|
|
|
package_pversion=data["version"],
|
|
|
|
package_description=data["meta"].get("description"),
|
|
|
|
package_longDescription=data["meta"].get("longDescription", ""),
|
|
|
|
package_license=licenses,
|
|
|
|
package_maintainers=maintainers,
|
|
|
|
package_platforms=[i for i in platforms if i],
|
|
|
|
package_position=position,
|
|
|
|
package_homepage=data["meta"].get("homepage"),
|
2020-06-09 23:53:17 +00:00
|
|
|
package_system=data["system"],
|
2020-03-28 01:34:38 +00:00
|
|
|
)
|
|
|
|
|
2020-05-22 11:03:31 +00:00
|
|
|
logger.debug(f"get_packages: Found {len(packages)} packages")
|
2020-03-28 01:34:38 +00:00
|
|
|
return len(packages), gen
|
|
|
|
|
|
|
|
|
|
|
|
def get_options(evaluation):
|
|
|
|
result = subprocess.run(
|
2020-05-22 10:43:57 +00:00
|
|
|
shlex.split(
|
|
|
|
f"nix-build <nixpkgs/nixos/release.nix> --no-out-link -A options -I nixpkgs=https://github.com/NixOS/nixpkgs-channels/archive/{evaluation['git_revision']}.tar.gz"
|
|
|
|
),
|
2020-03-28 01:34:38 +00:00
|
|
|
stdout=subprocess.PIPE,
|
|
|
|
check=True,
|
|
|
|
)
|
|
|
|
options = []
|
|
|
|
options_file = result.stdout.strip().decode()
|
|
|
|
options_file = f"{options_file}/share/doc/nixos/options.json"
|
|
|
|
if os.path.exists(options_file):
|
|
|
|
with open(options_file) as f:
|
|
|
|
options = json.load(f).items()
|
2020-05-08 13:24:58 +00:00
|
|
|
options = list(options)
|
2020-03-28 01:34:38 +00:00
|
|
|
|
|
|
|
def gen():
|
|
|
|
for name, option in options:
|
2020-06-11 06:44:54 +00:00
|
|
|
default = option.get("default")
|
|
|
|
if default is not None:
|
2020-06-11 14:33:25 +00:00
|
|
|
default = json.dumps(default)
|
2020-06-11 06:44:54 +00:00
|
|
|
|
2020-03-28 01:34:38 +00:00
|
|
|
example = option.get("example")
|
2020-06-11 06:44:54 +00:00
|
|
|
if example is not None:
|
|
|
|
if (type(example) == dict and example.get("_type") == "literalExample"):
|
2020-06-11 14:33:25 +00:00
|
|
|
example = json.dumps(example["text"])
|
2020-06-11 06:44:54 +00:00
|
|
|
else:
|
2020-06-11 14:33:25 +00:00
|
|
|
example = json.dumps(example)
|
2020-06-11 06:44:54 +00:00
|
|
|
|
|
|
|
description = option.get("description")
|
|
|
|
if description is not None:
|
|
|
|
xml_description = (
|
|
|
|
f"<xml xmlns:xlink=\"http://www.w3.org/1999/xlink\">"
|
|
|
|
f"<para>{description}</para>"
|
|
|
|
f"</xml>"
|
|
|
|
)
|
|
|
|
# we first check if there are some xml elements before using pypandoc
|
|
|
|
# since pypandoc calls are quite slow
|
|
|
|
root = xml.etree.ElementTree.fromstring(xml_description)
|
2020-06-18 10:24:52 +00:00
|
|
|
if len(list(root.find('para'))) > 0:
|
2020-06-11 06:44:54 +00:00
|
|
|
description = pypandoc.convert_text(
|
|
|
|
xml_description,
|
|
|
|
"html",
|
|
|
|
format="docbook",
|
|
|
|
)
|
|
|
|
|
2020-03-28 01:34:38 +00:00
|
|
|
yield dict(
|
2020-06-03 19:02:12 +00:00
|
|
|
type="option",
|
2020-03-28 01:34:38 +00:00
|
|
|
option_name=name,
|
2020-06-11 06:44:54 +00:00
|
|
|
option_description=description,
|
2020-06-03 19:02:12 +00:00
|
|
|
option_type=option.get("type"),
|
2020-06-11 06:44:54 +00:00
|
|
|
option_default=default,
|
|
|
|
option_example=example,
|
2020-06-03 19:02:12 +00:00
|
|
|
option_source=option.get("declarations", [None])[0],
|
2020-03-28 01:34:38 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
return len(options), gen
|
|
|
|
|
2020-04-10 08:13:50 +00:00
|
|
|
|
2020-06-10 11:48:53 +00:00
|
|
|
def ensure_index(es, index, mapping, force=False):
|
2020-05-22 12:50:44 +00:00
|
|
|
if es.indices.exists(index):
|
2020-05-22 12:58:38 +00:00
|
|
|
logger.debug(f"ensure_index: index '{index}' already exists")
|
2020-06-10 11:48:53 +00:00
|
|
|
if not force:
|
|
|
|
return False
|
|
|
|
|
|
|
|
logger.debug(f"ensure_index: Deleting index '{index}'")
|
|
|
|
es.indices.delete(index)
|
2020-05-22 12:58:38 +00:00
|
|
|
|
2020-03-28 01:34:38 +00:00
|
|
|
es.indices.create(
|
2020-05-22 12:50:44 +00:00
|
|
|
index=index,
|
|
|
|
body={
|
|
|
|
"settings": {"number_of_shards": 1, "analysis": ANALYSIS},
|
|
|
|
"mappings": mapping,
|
|
|
|
},
|
2020-03-28 01:34:38 +00:00
|
|
|
)
|
2020-05-22 12:58:38 +00:00
|
|
|
logger.debug(f"ensure_index: index '{index}' was created")
|
2020-06-03 22:08:43 +00:00
|
|
|
|
2020-05-22 12:58:38 +00:00
|
|
|
return True
|
2020-05-22 12:50:44 +00:00
|
|
|
|
|
|
|
|
2020-06-10 11:48:53 +00:00
|
|
|
def create_index_name(channel, evaluation_packages, evaluation_options):
|
|
|
|
evaluation_name = '-'.join([
|
|
|
|
evaluation_packages['id'],
|
|
|
|
str(evaluation_packages['revisions_since_start']),
|
|
|
|
evaluation_packages['git_revision'],
|
|
|
|
evaluation_options['id'],
|
|
|
|
str(evaluation_options['revisions_since_start']),
|
|
|
|
evaluation_options['git_revision'],
|
|
|
|
])
|
2020-05-22 12:50:44 +00:00
|
|
|
return (
|
2020-06-03 23:33:54 +00:00
|
|
|
f"latest-{INDEX_SCHEMA_VERSION}-{channel}",
|
2020-06-10 11:48:53 +00:00
|
|
|
f"evaluation-{INDEX_SCHEMA_VERSION}-{channel}-{evaluation_name}",
|
2020-03-28 01:34:38 +00:00
|
|
|
)
|
2020-05-22 12:50:44 +00:00
|
|
|
|
|
|
|
|
|
|
|
def update_alias(es, name, index):
|
2020-06-03 23:33:54 +00:00
|
|
|
if es.indices.exists_alias(name=name):
|
|
|
|
indexes = set(es.indices.get_alias(name=name).keys())
|
|
|
|
|
|
|
|
# indexes to remove from alias
|
|
|
|
actions = [
|
|
|
|
{"remove": {"index": item, "alias": name}}
|
|
|
|
for item in indexes.difference(set([index]))
|
|
|
|
]
|
|
|
|
|
|
|
|
# add index if does not exists in alias
|
|
|
|
if index not in indexes:
|
|
|
|
actions.append({"add": {"index": index, "alias": name}})
|
|
|
|
|
|
|
|
if actions:
|
|
|
|
es.indices.update_aliases({"actions": actions})
|
|
|
|
else:
|
|
|
|
es.indices.put_alias(index=index, name=name)
|
2020-06-03 22:08:43 +00:00
|
|
|
|
|
|
|
indexes = ", ".join(es.indices.get_alias(name=name).keys())
|
|
|
|
logger.debug(f"'{name}' alias now points to '{indexes}' index")
|
2020-03-28 01:34:38 +00:00
|
|
|
|
|
|
|
|
2020-06-03 19:02:12 +00:00
|
|
|
def write(unit, es, index_name, number_of_items, item_generator):
|
|
|
|
if number_of_items:
|
|
|
|
click.echo(f"Indexing {unit}...")
|
|
|
|
progress = tqdm.tqdm(unit=unit, total=number_of_items)
|
|
|
|
successes = 0
|
|
|
|
for ok, action in elasticsearch.helpers.streaming_bulk(
|
|
|
|
client=es, index=index_name, actions=item_generator()
|
|
|
|
):
|
|
|
|
progress.update(1)
|
|
|
|
successes += ok
|
|
|
|
click.echo(f"Indexed {successes}/{number_of_items} {unit}")
|
|
|
|
|
|
|
|
|
2020-03-28 01:34:38 +00:00
|
|
|
@click.command()
|
2020-06-10 11:48:53 +00:00
|
|
|
@click.option("-u", "--es-url", help="Elasticsearch connection url.")
|
|
|
|
@click.option("-c", "--channel", type=click.Choice(CHANNELS.keys()), help="Channel.")
|
|
|
|
@click.option("-f", "--force", is_flag=True, help="Force channel recreation.")
|
2020-05-22 11:03:31 +00:00
|
|
|
@click.option("-v", "--verbose", count=True)
|
2020-06-10 11:48:53 +00:00
|
|
|
def main(es_url, channel, force, verbose):
|
2020-05-22 11:03:31 +00:00
|
|
|
|
|
|
|
logging_level = "CRITICAL"
|
|
|
|
if verbose == 1:
|
|
|
|
logging_level = "WARNING"
|
|
|
|
elif verbose >= 2:
|
|
|
|
logging_level = "DEBUG"
|
|
|
|
|
|
|
|
logger.setLevel(getattr(logging, logging_level))
|
|
|
|
logger.debug(f"Verbosity is {verbose}")
|
|
|
|
logger.debug(f"Logging set to {logging_level}")
|
|
|
|
|
2020-06-10 11:48:53 +00:00
|
|
|
evaluation_packages = get_last_evaluation(CHANNELS[channel]['packages'])
|
|
|
|
evaluation_options = get_last_evaluation(CHANNELS[channel]['options'])
|
|
|
|
evaluation_packages_builds = get_evaluation_builds(evaluation_packages['id'])
|
2020-06-09 22:55:37 +00:00
|
|
|
|
2020-03-28 01:34:38 +00:00
|
|
|
es = elasticsearch.Elasticsearch([es_url])
|
2020-05-22 12:50:44 +00:00
|
|
|
|
2020-06-10 11:48:53 +00:00
|
|
|
alias_name, index_name = create_index_name(channel, evaluation_packages, evaluation_options)
|
|
|
|
index_created = ensure_index(es, index_name, MAPPING, force)
|
2020-06-03 19:02:12 +00:00
|
|
|
|
|
|
|
if index_created:
|
2020-06-10 11:48:53 +00:00
|
|
|
write("packages", es, index_name, *get_packages(evaluation_packages, evaluation_packages_builds))
|
|
|
|
write("options", es, index_name, *get_options(evaluation_options))
|
2020-06-03 22:08:43 +00:00
|
|
|
|
|
|
|
update_alias(es, alias_name, index_name)
|
2020-05-22 12:50:44 +00:00
|
|
|
|
2020-03-28 01:34:38 +00:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|
2020-05-22 11:03:31 +00:00
|
|
|
|
|
|
|
# vi:ft=python
|