#! /usr/bin/env nix-shell #! nix-shell -i python3 -p python3 python3Packages.click python3Packages.elasticsearch python3Packages.boto3 python3Packages.tqdm import boto3 import click import elasticsearch import elasticsearch.helpers import json import os.path import shlex import subprocess import tqdm CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) def get_last_evaluation(channel): project, project_version = channel.split("-", 1) s3 = boto3.client("s3") s3_result = s3.list_objects( Bucket="nix-releases", Prefix=f"{project}/{project_version}/", Delimiter="/", ) evaluations = [] for item in s3_result.get("CommonPrefixes"): if not item: continue prefix = item.get("Prefix") evaluation = prefix[len(f"{project}/{project_version}/{channel}"):] if evaluation.startswith("beta"): evaluation = evaluation[len("beta"):] try: revisions_since_start, git_revision = evaluation.lstrip(".").rstrip("/").split(".") except: continue evaluations.append(dict( revisions_since_start=int(revisions_since_start), git_revision=git_revision, prefix=prefix, )) evaluations = sorted(evaluations, key=lambda i: i["revisions_since_start"]) return evaluations[-1] def get_packages(evaluation): result = subprocess.run( shlex.split(f"nix-env -f '' -I nixpkgs=https://github.com/NixOS/nixpkgs-channels/archive/{evaluation['git_revision']}.tar.gz --arg config 'import {CURRENT_DIR}/packages-config.nix' -qa --json"), stdout=subprocess.PIPE, check=True, ) packages = json.loads(result.stdout).items() packages = list(packages) def gen(): for attr_name, data in packages: position = data["meta"].get("position") if position and position.startswith("/nix/store"): position = position[44:] licenses = data["meta"].get("license") if licenses: if type(licenses) == str: licenses = [dict(fullName=licenses)] elif type(licenses) == dict: licenses = [licenses] licenses = [ type(license) == str and dict(fullName=license, url=None) or dict( fullName=license.get("fullName"), url=license.get("url"), ) for license in licenses ] else: licenses = [] maintainers = [ type(maintainer) == str and dict(name=maintainer, email=None, github=None) or dict( name=maintainer.get("name"), email=maintainer.get("email"), github=maintainer.get("github"), ) for maintainer in data["meta"].get("maintainers", []) ] platforms = [ type(platform) == str and platform or None for platform in data["meta"].get("platforms", []) ] yield dict( id=attr_name, attr_name=attr_name, name=data["pname"], version=data["version"], description=data["meta"].get("description"), longDescription=data["meta"].get("longDescription", ""), license=licenses, maintainers=maintainers, platforms=[i for i in platforms if i], position=position, homepage=data["meta"].get("homepage"), ) return len(packages), gen def get_options(evaluation): result = subprocess.run( shlex.split(f"nix-build --no-out-link -A options -I nixpkgs=https://github.com/NixOS/nixpkgs-channels/archive/{evaluation['git_revision']}.tar.gz"), stdout=subprocess.PIPE, check=True, ) options = [] options_file = result.stdout.strip().decode() options_file = f"{options_file}/share/doc/nixos/options.json" if os.path.exists(options_file): with open(options_file) as f: options = json.load(f).items() options = list(options) def gen(): for name, option in options: example = option.get("example") if example and \ type(example) == dict and \ example.get("_type") == "literalExample": example = str(example["text"]) yield dict( id=name, option_name=name, description=option.get("description"), type=option.get("type"), default=str(option.get("default")), example=str(example), source=option.get("declarations", [None])[0], ) return len(options), gen def recreate_index(es, channel): if es.indices.exists(f"{channel}-packages"): es.indices.delete(index=f"{channel}-packages") es.indices.create( index=f"{channel}-packages", body=dict( settings=dict(number_of_shards=1), mappings=dict( properties=dict( attr_name=dict(type="keyword"), name=dict(type="text"), version=dict(type="text"), description=dict(type="text"), longDescription=dict(type="text"), license=dict( type="nested", properties=dict( fullName=dict(type="text"), url=dict(type="text"), ), ), maintainers=dict( type="nested", properties=dict( name=dict(type="text"), email=dict(type="text"), github=dict(type="text"), ), ), platforms=dict(type="keyword"), position=dict(type="text"), homepage=dict(type="text"), ), ), ), ) if es.indices.exists(f"{channel}-options"): es.indices.delete(index=f"{channel}-options") es.indices.create( index=f"{channel}-options", body=dict( settings=dict(number_of_shards=1), mappings=dict( properties=dict( option_name=dict(type="keyword"), description=dict(type="text"), type=dict(type="keyword"), default=dict(type="text"), example=dict(type="text"), source=dict(type="keyword"), ), ), ), ) @click.command() @click.option("--es-url", help="Elasticsearch connection url") @click.option("--channel") def main(es_url, channel): evaluation = get_last_evaluation(channel) es = elasticsearch.Elasticsearch([es_url]) recreate_index(es, channel) # write packages number_of_packages, gen_packages = get_packages(evaluation) packages = list(gen_packages()) if number_of_packages: click.echo("Indexing packages...") progress = tqdm.tqdm(unit="packages", total=number_of_packages) successes = 0 for ok, action in elasticsearch.helpers.streaming_bulk( client=es, index=f"{channel}-packages", actions=gen_packages()): progress.update(1) successes += ok print("Indexed %d/%d packages" % (successes, number_of_packages)) # write options number_of_options, gen_options = get_options(evaluation) options = list(gen_options()) if number_of_options: click.echo("Indexing options...") progress = tqdm.tqdm(unit="options", total=number_of_options) successes = 0 for ok, action in elasticsearch.helpers.streaming_bulk( client=es, index=f"{channel}-options", actions=gen_options()): progress.update(1) successes += ok print("Indexed %d/%d options" % (successes, number_of_options)) if __name__ == "__main__": main()