channel-diff script (#206)

This commit is contained in:
Rok Garbas 2020-10-07 08:58:17 +02:00 committed by GitHub
parent 33c2c8a837
commit a23bcf1497
Failed to generate hash of commit
4 changed files with 482 additions and 375 deletions

View file

@ -25,6 +25,7 @@ mkPoetryApplication {
''; '';
postInstall = '' postInstall = ''
wrapProgram $out/bin/import-channel --set INDEX_SCHEMA_VERSION "${version}" wrapProgram $out/bin/import-channel --set INDEX_SCHEMA_VERSION "${version}"
wrapProgram $out/bin/channel-diff --set INDEX_SCHEMA_VERSION "${version}"
''; '';
shellHook = '' shellHook = ''
cd import-scripts/ cd import-scripts/

View file

@ -3,6 +3,7 @@ import botocore # type: ignore
import botocore.client # type: ignore import botocore.client # type: ignore
import click import click
import click_log # type: ignore import click_log # type: ignore
import dictdiffer # type: ignore
import elasticsearch # type: ignore import elasticsearch # type: ignore
import elasticsearch.helpers # type: ignore import elasticsearch.helpers # type: ignore
import json import json
@ -25,6 +26,7 @@ click_log.basic_config(logger)
S3_BUCKET = "nix-releases" S3_BUCKET = "nix-releases"
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
INDEX_SCHEMA_VERSION = os.environ.get("INDEX_SCHEMA_VERSION", 0) INDEX_SCHEMA_VERSION = os.environ.get("INDEX_SCHEMA_VERSION", 0)
DIFF_OUTPUT = ["json", "stats"]
CHANNELS = { CHANNELS = {
"unstable": "nixos/unstable/nixos-21.03pre", "unstable": "nixos/unstable/nixos-21.03pre",
"19.09": "nixos/19.09/nixos-19.09.", "19.09": "nixos/19.09/nixos-19.09.",
@ -389,7 +391,7 @@ def remove_attr_set(name):
return name return name
def get_packages(evaluation, evaluation_builds): def get_packages_raw(evaluation):
logger.debug( logger.debug(
f"get_packages: Retriving list of packages for '{evaluation['git_revision']}' revision" f"get_packages: Retriving list of packages for '{evaluation['git_revision']}' revision"
) )
@ -401,15 +403,14 @@ def get_packages(evaluation, evaluation_builds):
check=True, check=True,
) )
packages = json.loads(result.stdout).items() packages = json.loads(result.stdout).items()
packages = list(packages) return list(packages)
def get_packages(evaluation, evaluation_builds):
packages = list(get_packages_raw(evaluation))
def gen(): def gen():
for attr_name, data in packages: for attr_name, data in packages:
position = data["meta"].get("position")
if position and position.startswith("/nix/store"):
position = position[44:]
licenses = data["meta"].get("license") licenses = data["meta"].get("license")
if licenses: if licenses:
if type(licenses) == str: if type(licenses) == str:
@ -462,6 +463,10 @@ def get_packages(evaluation, evaluation_builds):
} }
) )
position = data["meta"].get("position")
if position and position.startswith("/nix/store"):
position = position[44:]
package_attr_name_query = list(parse_query(attr_name)) package_attr_name_query = list(parse_query(attr_name))
package_pname = remove_attr_set(data["pname"]) package_pname = remove_attr_set(data["pname"])
package_description = data["meta"].get("description") package_description = data["meta"].get("description")
@ -495,7 +500,10 @@ def get_packages(evaluation, evaluation_builds):
return len(packages), gen return len(packages), gen
def get_options(evaluation): def get_options_raw(evaluation):
logger.debug(
f"get_packages: Retriving list of options for '{evaluation['git_revision']}' revision"
)
result = subprocess.run( result = subprocess.run(
shlex.split( shlex.split(
f"nix-build <nixpkgs/nixos/release.nix> --no-out-link -A options -I nixpkgs=https://github.com/NixOS/nixpkgs/archive/{evaluation['git_revision']}.tar.gz" f"nix-build <nixpkgs/nixos/release.nix> --no-out-link -A options -I nixpkgs=https://github.com/NixOS/nixpkgs/archive/{evaluation['git_revision']}.tar.gz"
@ -509,7 +517,11 @@ def get_options(evaluation):
if os.path.exists(options_file): if os.path.exists(options_file):
with open(options_file) as f: with open(options_file) as f:
options = json.load(f).items() options = json.load(f).items()
options = list(options) return list(options)
def get_options(evaluation):
options = get_options_raw(evaluation)
def gen(): def gen():
for name, option in options: for name, option in options:
@ -632,13 +644,7 @@ def write(unit, es, index_name, number_of_items, item_generator):
click.echo(f"Indexed {successes}/{number_of_items} {unit}") click.echo(f"Indexed {successes}/{number_of_items} {unit}")
@click.command() def setup_logging(verbose):
@click.option("-u", "--es-url", help="Elasticsearch connection url.")
@click.option("-c", "--channel", type=click.Choice(CHANNELS.keys()), help="Channel.")
@click.option("-f", "--force", is_flag=True, help="Force channel recreation.")
@click.option("-v", "--verbose", count=True)
def run(es_url, channel, force, verbose):
logging_level = "CRITICAL" logging_level = "CRITICAL"
if verbose == 1: if verbose == 1:
logging_level = "WARNING" logging_level = "WARNING"
@ -649,6 +655,15 @@ def run(es_url, channel, force, verbose):
logger.debug(f"Verbosity is {verbose}") logger.debug(f"Verbosity is {verbose}")
logger.debug(f"Logging set to {logging_level}") logger.debug(f"Logging set to {logging_level}")
@click.command()
@click.option("-u", "--es-url", help="Elasticsearch connection url.")
@click.option("-c", "--channel", type=click.Choice(CHANNELS.keys()), help="Channel.")
@click.option("-f", "--force", is_flag=True, help="Force channel recreation.")
@click.option("-v", "--verbose", count=True)
def run_import(es_url, channel, force, verbose):
setup_logging(verbose)
evaluation_packages = get_last_evaluation(CHANNELS[channel]) evaluation_packages = get_last_evaluation(CHANNELS[channel])
evaluation_options = get_last_evaluation(CHANNELS[channel]) evaluation_options = get_last_evaluation(CHANNELS[channel])
evaluation_packages_builds = ( evaluation_packages_builds = (
@ -674,5 +689,93 @@ def run(es_url, channel, force, verbose):
update_alias(es, alias_name, index_name) update_alias(es, alias_name, index_name)
def prepare_items(key, total, func):
logger.info(f"Preparing items ({key})...")
return {item[key]: item for item in func()}
def get_packages_diff(evaluation):
for attr_name, data in get_packages_raw(evaluation):
data_cmp = dict(attr_name=attr_name, version=data.get("version"),)
yield attr_name, data_cmp, data
def get_options_diff(evaluation):
for name, data in get_options_raw(evaluation):
data_cmp = dict(name=name, type=data.get("type"), default=data.get("default"),)
yield name, data_cmp, data
def create_diff(type_, items_from, items_to):
logger.debug(f"Starting to diff {type_}...")
return dict(
added=[item for key, item in items_to.items() if key not in items_from.keys()],
removed=[
item for key, item in items_from.items() if key not in items_to.keys()
],
updated=[
(
list(dictdiffer.diff(items_from[key][0], items_to[key][0])),
items_from[key],
items_to[key],
)
for key in set(items_from.keys()).intersection(set(items_to.keys()))
if items_from[key][0] != items_to[key][0]
],
)
@click.command()
@click.option("-v", "--verbose", count=True)
@click.option("-o", "--output", default="stats", type=click.Choice(DIFF_OUTPUT))
@click.argument("channel_from", type=click.Choice(CHANNELS.keys()))
@click.argument("channel_to", type=click.Choice(CHANNELS.keys()))
def run_diff(channel_from, channel_to, output, verbose):
setup_logging(verbose)
# TODO: channel_from and channel_to should not be the same
evaluation_from = get_last_evaluation(CHANNELS[channel_from])
evaluation_to = get_last_evaluation(CHANNELS[channel_to])
packages_from = {
key: (item, item_raw)
for key, item, item_raw in get_packages_diff(evaluation_from)
}
packages_to = {
key: (item, item_raw)
for key, item, item_raw in get_packages_diff(evaluation_to)
}
options_from = {
key: (item, item_raw)
for key, item, item_raw in get_options_diff(evaluation_from)
}
options_to = {
key: (item, item_raw) for key, item, item_raw in get_options_diff(evaluation_to)
}
packages_diff = create_diff("packages", packages_from, packages_to)
options_diff = create_diff("options", options_from, options_to)
if output == "stats":
click.echo("Packages:")
click.echo(f" All in {channel_from}: {len(packages_from)}")
click.echo(f" All in {channel_to}: {len(packages_to)}")
click.echo(f" Added: {len(packages_diff['added'])}")
click.echo(f" Removed: {len(packages_diff['removed'])}")
click.echo(f" Updated: {len(packages_diff['updated'])}")
click.echo("Options:")
click.echo(f" All in {channel_from}: {len(options_from)}")
click.echo(f" All in {channel_to}: {len(options_to)}")
click.echo(f" Added: {len(options_diff['added'])}")
click.echo(f" Removed: {len(options_diff['removed'])}")
click.echo(f" Updated: {len(options_diff['updated'])}")
elif output == "json":
click.echo(json.dumps(dict(packages=packages_diff, options=options_diff,)))
else:
click.echo(f"ERROR: unknown output {output}")
if __name__ == "__main__": if __name__ == "__main__":
run() run_diff()

File diff suppressed because one or more lines are too long

View file

@ -8,7 +8,8 @@ include = [
] ]
[tool.poetry.scripts] [tool.poetry.scripts]
import-channel = 'import_scripts.channel:run' import-channel = 'import_scripts.channel:run_import'
channel-diff = 'import_scripts.channel:run_diff'
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = "^3.8" python = "^3.8"
@ -19,6 +20,7 @@ elasticsearch = "^7.8.0"
boto3 = "^1.14.5" boto3 = "^1.14.5"
tqdm = "^4.46.1" tqdm = "^4.46.1"
pypandoc = "^1.5" pypandoc = "^1.5"
dictdiffer = "^0.8.1"
[tool.poetry.dev-dependencies] [tool.poetry.dev-dependencies]
ipdb = "^0.13.2" ipdb = "^0.13.2"