initial version of import script
This commit is contained in:
commit
5bccd8c0b2
226
scripts/import-channels-into-elasticsearch
Executable file
226
scripts/import-channels-into-elasticsearch
Executable file
|
@ -0,0 +1,226 @@
|
|||
#! /usr/bin/env nix-shell
|
||||
#! nix-shell -i python3 -p python3 python3Packages.click python3Packages.elasticsearch python3Packages.boto3 python3Packages.tqdm
|
||||
|
||||
|
||||
import boto3
|
||||
import click
|
||||
import elasticsearch
|
||||
import elasticsearch.helpers
|
||||
import json
|
||||
import os.path
|
||||
import shlex
|
||||
import subprocess
|
||||
import tqdm
|
||||
|
||||
|
||||
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def get_last_evaluation(channel):
|
||||
project, project_version = channel.split("-", 1)
|
||||
|
||||
s3 = boto3.client("s3")
|
||||
s3_result = s3.list_objects(
|
||||
Bucket="nix-releases",
|
||||
Prefix=f"{project}/{project_version}/",
|
||||
Delimiter="/",
|
||||
)
|
||||
evaluations = []
|
||||
for item in s3_result.get("CommonPrefixes"):
|
||||
if not item :
|
||||
continue
|
||||
prefix = item.get("Prefix")
|
||||
evaluation = prefix[len(f"{project}/{project_version}/{channel}"):]
|
||||
if evaluation.startswith("beta"):
|
||||
evaluation = evaluation[len("beta"):]
|
||||
revisions_since_start, git_revision = evaluation.lstrip(".").rstrip("/").split(".")
|
||||
evaluations.append(dict(
|
||||
revisions_since_start=int(revisions_since_start),
|
||||
git_revision=git_revision,
|
||||
prefix=prefix,
|
||||
))
|
||||
|
||||
evaluations = sorted(evaluations, key=lambda i: i["revisions_since_start"])
|
||||
return evaluations[-1]
|
||||
|
||||
|
||||
def get_packages(evaluation):
|
||||
result = subprocess.run(
|
||||
shlex.split(f"nix-env -f '<nixpkgs>' -I nixpkgs=https://github.com/NixOS/nixpkgs-channels/archive/{evaluation['git_revision']}.tar.gz --arg config 'import {CURRENT_DIR}/packages-config.nix' -qa --json"),
|
||||
stdout=subprocess.PIPE,
|
||||
check=True,
|
||||
)
|
||||
packages = json.loads(result.stdout).items()
|
||||
|
||||
def gen():
|
||||
for attr_name, data in packages:
|
||||
position = data["meta"].get("position")
|
||||
if position and position.startswith("/nix/store"):
|
||||
position = position[44:]
|
||||
licenses = data["meta"].get("license")
|
||||
if licenses:
|
||||
if type(licenses) == str:
|
||||
licenses = [dict(fullName=licenses)]
|
||||
elif type(licenses) == dict:
|
||||
licenses = [licenses]
|
||||
licenses = [
|
||||
type(license) == str
|
||||
and dict(fullName=license, url=None)
|
||||
or dict(
|
||||
fullName=license.get("fullName"),
|
||||
url=license.get("url"),
|
||||
)
|
||||
for license in licenses
|
||||
]
|
||||
maintainers = [
|
||||
type(maintainer) == str
|
||||
and dict(name=maintainer, email=None, github=None)
|
||||
or dict(
|
||||
name=maintainer.get("name"),
|
||||
email=maintainer.get("email"),
|
||||
github=maintainer.get("github"),
|
||||
)
|
||||
for maintainer in data["meta"].get("maintainers", [])
|
||||
]
|
||||
yield dict(
|
||||
attr_name=attr_name,
|
||||
name=data["pname"],
|
||||
version=data["version"],
|
||||
description=data["meta"].get("description"),
|
||||
longDescription=data["meta"].get("longDescription", ""),
|
||||
license=licenses,
|
||||
maintainers=maintainers,
|
||||
position=position,
|
||||
homepage=data["meta"].get("homepage"),
|
||||
)
|
||||
|
||||
return len(packages), gen
|
||||
|
||||
|
||||
def get_options(evaluation):
|
||||
result = subprocess.run(
|
||||
shlex.split(f"nix-build <nixpkgs/nixos/release.nix> --no-out-link -A options -I nixpkgs=https://github.com/NixOS/nixpkgs-channels/archive/{evaluation['git_revision']}.tar.gz"),
|
||||
stdout=subprocess.PIPE,
|
||||
check=True,
|
||||
)
|
||||
options = []
|
||||
options_file = result.stdout.strip().decode()
|
||||
options_file = f"{options_file}/share/doc/nixos/options.json"
|
||||
if os.path.exists(options_file):
|
||||
with open(options_file) as f:
|
||||
options = json.load(f).items()
|
||||
|
||||
def gen():
|
||||
for name, option in options:
|
||||
example = option.get("example")
|
||||
if example and \
|
||||
type(example) == dict and \
|
||||
example.get("_type") == "literalExample":
|
||||
example = str(example["text"])
|
||||
yield dict(
|
||||
option_name=name,
|
||||
description=option.get("description"),
|
||||
type=option.get("type"),
|
||||
default=str(option.get("default")),
|
||||
example=str(example),
|
||||
source=option.get("declarations", [None])[0],
|
||||
)
|
||||
|
||||
return len(options), gen
|
||||
|
||||
def recreate_index(es, channel):
|
||||
if es.indices.exists(f"{channel}-packages"):
|
||||
es.indices.delete(index=f"{channel}-packages")
|
||||
es.indices.create(
|
||||
index=f"{channel}-packages",
|
||||
body=dict(
|
||||
settings=dict(number_of_shards=1),
|
||||
mappings=dict(
|
||||
properties=dict(
|
||||
attr_name=dict(type="text"),
|
||||
name=dict(type="text"),
|
||||
version=dict(type="text"),
|
||||
description=dict(type="text"),
|
||||
longDescription=dict(type="text"),
|
||||
license=dict(
|
||||
type="nested",
|
||||
properties=dict(
|
||||
fullName=dict(type="text"),
|
||||
url=dict(type="text"),
|
||||
),
|
||||
),
|
||||
maintainers=dict(
|
||||
type="nested",
|
||||
properties=dict(
|
||||
name=dict(type="text"),
|
||||
email=dict(type="text"),
|
||||
github=dict(type="text"),
|
||||
),
|
||||
),
|
||||
position=dict(type="text"),
|
||||
homepage=dict(type="text"),
|
||||
),
|
||||
),
|
||||
),
|
||||
)
|
||||
if es.indices.exists(f"{channel}-options"):
|
||||
es.indices.delete(index=f"{channel}-options")
|
||||
es.indices.create(
|
||||
index=f"{channel}-options",
|
||||
body=dict(
|
||||
settings=dict(number_of_shards=1),
|
||||
mappings=dict(
|
||||
properties=dict(
|
||||
option_name=dict(type="text"),
|
||||
description=dict(type="text"),
|
||||
type=dict(type="keyword"),
|
||||
default=dict(type="text"),
|
||||
example=dict(type="text"),
|
||||
source=dict(type="keyword"),
|
||||
),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option("--es-url", help="Elasticsearch connection url")
|
||||
@click.option("--channel")
|
||||
def main(es_url, channel):
|
||||
evaluation = get_last_evaluation(channel)
|
||||
es = elasticsearch.Elasticsearch([es_url])
|
||||
recreate_index(es, channel)
|
||||
|
||||
# write packages
|
||||
number_of_packages, gen_packages = get_packages(evaluation)
|
||||
packages = list(gen_packages())
|
||||
if number_of_packages:
|
||||
click.echo("Indexing packages...")
|
||||
progress = tqdm.tqdm(unit="packages", total=number_of_packages)
|
||||
successes = 0
|
||||
for ok, action in elasticsearch.helpers.streaming_bulk(
|
||||
client=es,
|
||||
index=f"{channel}-packages",
|
||||
actions=gen_packages()):
|
||||
progress.update(1)
|
||||
successes += ok
|
||||
print("Indexed %d/%d packages" % (successes, number_of_packages))
|
||||
|
||||
# write options
|
||||
number_of_options, gen_options = get_options(evaluation)
|
||||
options = list(gen_options())
|
||||
if number_of_options:
|
||||
click.echo("Indexing options...")
|
||||
progress = tqdm.tqdm(unit="options", total=number_of_options)
|
||||
successes = 0
|
||||
for ok, action in elasticsearch.helpers.streaming_bulk(
|
||||
client=es,
|
||||
index=f"{channel}-options",
|
||||
actions=gen_options()):
|
||||
progress.update(1)
|
||||
successes += ok
|
||||
print("Indexed %d/%d options" % (successes, number_of_options))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
12
scripts/packages-config.nix
Normal file
12
scripts/packages-config.nix
Normal file
|
@ -0,0 +1,12 @@
|
|||
{
|
||||
# Ensures no aliases are in the results.
|
||||
allowAliases = false;
|
||||
|
||||
# Enable recursion into attribute sets that nix-env normally doesn't look into
|
||||
# so that we can get a more complete picture of the available packages for the
|
||||
# purposes of the index.
|
||||
packageOverrides = super: {
|
||||
haskellPackages = super.recurseIntoAttrs super.haskellPackages;
|
||||
rPackages = super.recurseIntoAttrs super.rPackages;
|
||||
};
|
||||
}
|
Loading…
Reference in a new issue