commit 5bccd8c0b2fbf8b9cbf164e75320c82f6ac4d69a Author: Rok Garbas Date: Sat Mar 28 02:34:38 2020 +0100 initial version of import script diff --git a/scripts/import-channels-into-elasticsearch b/scripts/import-channels-into-elasticsearch new file mode 100755 index 0000000..23a31bf --- /dev/null +++ b/scripts/import-channels-into-elasticsearch @@ -0,0 +1,226 @@ +#! /usr/bin/env nix-shell +#! nix-shell -i python3 -p python3 python3Packages.click python3Packages.elasticsearch python3Packages.boto3 python3Packages.tqdm + + +import boto3 +import click +import elasticsearch +import elasticsearch.helpers +import json +import os.path +import shlex +import subprocess +import tqdm + + +CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) + + +def get_last_evaluation(channel): + project, project_version = channel.split("-", 1) + + s3 = boto3.client("s3") + s3_result = s3.list_objects( + Bucket="nix-releases", + Prefix=f"{project}/{project_version}/", + Delimiter="/", + ) + evaluations = [] + for item in s3_result.get("CommonPrefixes"): + if not item : + continue + prefix = item.get("Prefix") + evaluation = prefix[len(f"{project}/{project_version}/{channel}"):] + if evaluation.startswith("beta"): + evaluation = evaluation[len("beta"):] + revisions_since_start, git_revision = evaluation.lstrip(".").rstrip("/").split(".") + evaluations.append(dict( + revisions_since_start=int(revisions_since_start), + git_revision=git_revision, + prefix=prefix, + )) + + evaluations = sorted(evaluations, key=lambda i: i["revisions_since_start"]) + return evaluations[-1] + + +def get_packages(evaluation): + result = subprocess.run( + shlex.split(f"nix-env -f '' -I nixpkgs=https://github.com/NixOS/nixpkgs-channels/archive/{evaluation['git_revision']}.tar.gz --arg config 'import {CURRENT_DIR}/packages-config.nix' -qa --json"), + stdout=subprocess.PIPE, + check=True, + ) + packages = json.loads(result.stdout).items() + + def gen(): + for attr_name, data in packages: + position = data["meta"].get("position") + if position and position.startswith("/nix/store"): + position = position[44:] + licenses = data["meta"].get("license") + if licenses: + if type(licenses) == str: + licenses = [dict(fullName=licenses)] + elif type(licenses) == dict: + licenses = [licenses] + licenses = [ + type(license) == str + and dict(fullName=license, url=None) + or dict( + fullName=license.get("fullName"), + url=license.get("url"), + ) + for license in licenses + ] + maintainers = [ + type(maintainer) == str + and dict(name=maintainer, email=None, github=None) + or dict( + name=maintainer.get("name"), + email=maintainer.get("email"), + github=maintainer.get("github"), + ) + for maintainer in data["meta"].get("maintainers", []) + ] + yield dict( + attr_name=attr_name, + name=data["pname"], + version=data["version"], + description=data["meta"].get("description"), + longDescription=data["meta"].get("longDescription", ""), + license=licenses, + maintainers=maintainers, + position=position, + homepage=data["meta"].get("homepage"), + ) + + return len(packages), gen + + +def get_options(evaluation): + result = subprocess.run( + shlex.split(f"nix-build --no-out-link -A options -I nixpkgs=https://github.com/NixOS/nixpkgs-channels/archive/{evaluation['git_revision']}.tar.gz"), + stdout=subprocess.PIPE, + check=True, + ) + options = [] + options_file = result.stdout.strip().decode() + options_file = f"{options_file}/share/doc/nixos/options.json" + if os.path.exists(options_file): + with open(options_file) as f: + options = json.load(f).items() + + def gen(): + for name, option in options: + example = option.get("example") + if example and \ + type(example) == dict and \ + example.get("_type") == "literalExample": + example = str(example["text"]) + yield dict( + option_name=name, + description=option.get("description"), + type=option.get("type"), + default=str(option.get("default")), + example=str(example), + source=option.get("declarations", [None])[0], + ) + + return len(options), gen + +def recreate_index(es, channel): + if es.indices.exists(f"{channel}-packages"): + es.indices.delete(index=f"{channel}-packages") + es.indices.create( + index=f"{channel}-packages", + body=dict( + settings=dict(number_of_shards=1), + mappings=dict( + properties=dict( + attr_name=dict(type="text"), + name=dict(type="text"), + version=dict(type="text"), + description=dict(type="text"), + longDescription=dict(type="text"), + license=dict( + type="nested", + properties=dict( + fullName=dict(type="text"), + url=dict(type="text"), + ), + ), + maintainers=dict( + type="nested", + properties=dict( + name=dict(type="text"), + email=dict(type="text"), + github=dict(type="text"), + ), + ), + position=dict(type="text"), + homepage=dict(type="text"), + ), + ), + ), + ) + if es.indices.exists(f"{channel}-options"): + es.indices.delete(index=f"{channel}-options") + es.indices.create( + index=f"{channel}-options", + body=dict( + settings=dict(number_of_shards=1), + mappings=dict( + properties=dict( + option_name=dict(type="text"), + description=dict(type="text"), + type=dict(type="keyword"), + default=dict(type="text"), + example=dict(type="text"), + source=dict(type="keyword"), + ), + ), + ), + ) + + +@click.command() +@click.option("--es-url", help="Elasticsearch connection url") +@click.option("--channel") +def main(es_url, channel): + evaluation = get_last_evaluation(channel) + es = elasticsearch.Elasticsearch([es_url]) + recreate_index(es, channel) + + # write packages + number_of_packages, gen_packages = get_packages(evaluation) + packages = list(gen_packages()) + if number_of_packages: + click.echo("Indexing packages...") + progress = tqdm.tqdm(unit="packages", total=number_of_packages) + successes = 0 + for ok, action in elasticsearch.helpers.streaming_bulk( + client=es, + index=f"{channel}-packages", + actions=gen_packages()): + progress.update(1) + successes += ok + print("Indexed %d/%d packages" % (successes, number_of_packages)) + + # write options + number_of_options, gen_options = get_options(evaluation) + options = list(gen_options()) + if number_of_options: + click.echo("Indexing options...") + progress = tqdm.tqdm(unit="options", total=number_of_options) + successes = 0 + for ok, action in elasticsearch.helpers.streaming_bulk( + client=es, + index=f"{channel}-options", + actions=gen_options()): + progress.update(1) + successes += ok + print("Indexed %d/%d options" % (successes, number_of_options)) + + +if __name__ == "__main__": + main() diff --git a/scripts/packages-config.nix b/scripts/packages-config.nix new file mode 100644 index 0000000..38b95ca --- /dev/null +++ b/scripts/packages-config.nix @@ -0,0 +1,12 @@ +{ + # Ensures no aliases are in the results. + allowAliases = false; + + # Enable recursion into attribute sets that nix-env normally doesn't look into + # so that we can get a more complete picture of the available packages for the + # purposes of the index. + packageOverrides = super: { + haskellPackages = super.recurseIntoAttrs super.haskellPackages; + rPackages = super.recurseIntoAttrs super.rPackages; + }; +}