From 6406f34ebeeb4ca52668dd4cf831afd55628fe75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Sun, 10 Sep 2023 08:11:56 +0000 Subject: [PATCH] first commit; very broken --- README.md | 4 + buildbot_nix/buildbot_nix.py | 529 ++++++++++++++++++++++++++++++++ buildbot_nix/github_projects.py | 98 ++++++ buildbot_nix/irc_notify.py | 150 +++++++++ buildbot_nix/master.py | 238 ++++++++++++++ buildbot_nix/worker.py | 58 ++++ nix/master.nix | 98 ++++++ nix/worker.nix | 52 ++++ pyproject.toml | 18 ++ 9 files changed, 1245 insertions(+) create mode 100644 README.md create mode 100644 buildbot_nix/buildbot_nix.py create mode 100644 buildbot_nix/github_projects.py create mode 100644 buildbot_nix/irc_notify.py create mode 100644 buildbot_nix/master.py create mode 100644 buildbot_nix/worker.py create mode 100644 nix/master.nix create mode 100644 nix/worker.nix create mode 100644 pyproject.toml diff --git a/README.md b/README.md new file mode 100644 index 0000000..6d8b9a2 --- /dev/null +++ b/README.md @@ -0,0 +1,4 @@ +# Buildbot-nix + +A nixos module to make buildbot a proper Nix-CI. + diff --git a/buildbot_nix/buildbot_nix.py b/buildbot_nix/buildbot_nix.py new file mode 100644 index 0000000..3ed6955 --- /dev/null +++ b/buildbot_nix/buildbot_nix.py @@ -0,0 +1,529 @@ +#!/usr/bin/env python3 + +import json +import multiprocessing +import os +import uuid +from collections import defaultdict +from pathlib import Path +from typing import Any, Generator, List + +from buildbot.plugins import steps, util +from buildbot.process import buildstep, logobserver, remotecommand +from buildbot.process.log import Log +from buildbot.process.properties import Properties +from buildbot.process.results import ALL_RESULTS, statusToString +from buildbot.steps.trigger import Trigger +from github_projects import GithubProject +from twisted.internet import defer + + +class BuildTrigger(Trigger): + """ + Dynamic trigger that creates a build for every attribute. + """ + + def __init__( + self, scheduler: str, jobs: list[dict[str, Any]], **kwargs: Any + ) -> None: + if "name" not in kwargs: + kwargs["name"] = "trigger" + self.jobs = jobs + self.config = None + Trigger.__init__( + self, + waitForFinish=True, + schedulerNames=[scheduler], + haltOnFailure=True, + flunkOnFailure=True, + sourceStamps=[], + alwaysUseLatest=False, + updateSourceStamp=False, + **kwargs, + ) + + def createTriggerProperties(self, props: Any) -> Any: + return props + + def getSchedulersAndProperties(self) -> list[tuple[str, Properties]]: + build_props = self.build.getProperties() + repo_name = build_props.getProperty( + "github.base.repo.full_name", + build_props.getProperty("github.repository.full_name"), + ) + project_id = repo_name.replace("/", "-") + source = f"nix-eval-{project_id}" + + sch = self.schedulerNames[0] + triggered_schedulers = [] + for job in self.jobs: + attr = job.get("attr", "eval-error") + name = attr + if repo_name is not None: + name = f"github:{repo_name}#checks.{name}" + else: + name = f"checks.{name}" + drv_path = job.get("drvPath") + error = job.get("error") + system = job.get("system") + out_path = job.get("outputs", {}).get("out") + + build_props.setProperty(f"{attr}-out_path", out_path, source) + build_props.setProperty(f"{attr}-drv_path", drv_path, source) + + props = Properties() + props.setProperty("virtual_builder_name", name, source) + props.setProperty("status_name", f"nix-build .#checks.{attr}", source) + props.setProperty("virtual_builder_tags", "", source) + props.setProperty("attr", attr, source) + props.setProperty("system", system, source) + props.setProperty("drv_path", drv_path, source) + props.setProperty("out_path", out_path, source) + # we use this to identify builds when running a retry + props.setProperty("build_uuid", str(uuid.uuid4()), source) + props.setProperty("error", error, source) + triggered_schedulers.append((sch, props)) + return triggered_schedulers + + def getCurrentSummary(self) -> dict[str, str]: + """ + The original build trigger will the generic builder name `nix-build` in this case, which is not helpful + """ + if not self.triggeredNames: + return {"step": "running"} + summary = [] + if self._result_list: + for status in ALL_RESULTS: + count = self._result_list.count(status) + if count: + summary.append( + f"{self._result_list.count(status)} {statusToString(status, count)}" + ) + return {"step": f"({', '.join(summary)})"} + + +class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep): + """ + Parses the output of `nix-eval-jobs` and triggers a `nix-build` build for + every attribute. + """ + + def __init__(self, supported_systems: list[str], **kwargs: Any) -> None: + kwargs = self.setupShellMixin(kwargs) + super().__init__(**kwargs) + self.observer = logobserver.BufferLogObserver() + self.addLogObserver("stdio", self.observer) + self.supported_systems = supported_systems + + @defer.inlineCallbacks + def run(self) -> Generator[Any, object, Any]: + # run nix-instanstiate to generate the dict of stages + cmd: remotecommand.RemoteCommand = yield self.makeRemoteShellCommand() + yield self.runCommand(cmd) + + # if the command passes extract the list of stages + result = cmd.results() + if result == util.SUCCESS: + # create a ShellCommand for each stage and add them to the build + jobs = [] + + for line in self.observer.getStdout().split("\n"): + if line != "": + try: + job = json.loads(line) + except json.JSONDecodeError as e: + raise Exception(f"Failed to parse line: {line}") from e + jobs.append(job) + build_props = self.build.getProperties() + repo_name = build_props.getProperty( + "github.base.repo.full_name", + build_props.getProperty("github.repository.full_name"), + ) + project_id = repo_name.replace("/", "-") + scheduler = f"{project_id}-nix-build" + filtered_jobs = [] + for job in jobs: + system = job.get("system") + if not system: # report eval errors + filtered_jobs.append(job) + elif system in self.supported_systems: + filtered_jobs.append(job) + + self.build.addStepsAfterCurrentStep( + [BuildTrigger(scheduler=scheduler, name="build flake", jobs=jobs)] + ) + + return result + + +# FIXME this leaks memory... but probably not enough that we care +class RetryCounter: + def __init__(self, retries: int) -> None: + self.builds: dict[uuid.UUID, int] = defaultdict(lambda: retries) + + def retry_build(self, id: uuid.UUID) -> int: + retries = self.builds[id] + if retries > 1: + self.builds[id] = retries - 1 + return retries + else: + return 0 + + +# For now we limit this to two. Often this allows us to make the error log +# shorter because we won't see the logs for all previous succeeded builds +RETRY_COUNTER = RetryCounter(retries=2) + + +class NixBuildCommand(buildstep.ShellMixin, steps.BuildStep): + """ + Builds a nix derivation if evaluation was successful, + otherwise this shows the evaluation error. + """ + + def __init__(self, **kwargs: Any) -> None: + kwargs = self.setupShellMixin(kwargs) + super().__init__(**kwargs) + self.observer = logobserver.BufferLogObserver() + self.addLogObserver("stdio", self.observer) + + @defer.inlineCallbacks + def run(self) -> Generator[Any, object, Any]: + error = self.getProperty("error") + if error is not None: + attr = self.getProperty("attr") + # show eval error + self.build.results = util.FAILURE + log: Log = yield self.addLog("nix_error") + log.addStderr(f"{attr} failed to evaluate:\n{error}") + return util.FAILURE + path = self.getProperty("out_path") + + # FIXME: actually we should check if it exists in the remote machine + if os.path.exists(path): + # build already succeeded + return util.SKIPPED + + # run `nix build` + cmd: remotecommand.RemoteCommand = yield self.makeRemoteShellCommand() + yield self.runCommand(cmd) + + res = cmd.results() + if res == util.FAILURE: + retries = RETRY_COUNTER.retry_build(self.getProperty("build_uuid")) + if retries > 0: + return util.RETRY + return res + + +class UpdateBuildOutput(steps.BuildStep): + """ + Updates store paths in a public www directory. + This is useful to prefetch updates without having to evaluate + on the target machine. + """ + + def __init__(self, **kwargs: Any) -> None: + super().__init__(**kwargs) + + def run(self) -> Generator[Any, object, Any]: + props = self.build.getProperties() + if props.getProperty("branch") != props.getProperty( + "github.repository.default_branch" + ): + return util.SKIPPED + attr = os.path.basename(props.getProperty("attr")) + out_path = props.getProperty("out_path") + # XXX don't hardcode this + p = Path("/var/www/buildbot/nix-outputs/") + os.makedirs(p, exist_ok=True) + (p / attr).write_text(out_path) + return util.SUCCESS + + +def nix_update_flake_config( + project: GithubProject, + worker_names: list[str], + github_token_secret: str, + github_bot_user: str, +) -> util.BuilderConfig: + """ + Updates the flake an opens a PR for it. + """ + factory = util.BuildFactory() + url_with_secret = util.Interpolate( + f"https://git:%(secret:{github_token_secret})s@github.com/{project.name}" + ) + factory.addStep( + steps.Git( + repourl=url_with_secret, + alwaysUseLatest=True, + method="clean", + submodules=True, + haltOnFailure=True, + ) + ) + factory.addStep( + steps.ShellCommand( + name="Update flakes", + env=dict( + GIT_AUTHOR_NAME=github_bot_user, + GIT_AUTHOR_EMAIL=f"{github_bot_user}@users.noreply.github.com", + GIT_COMMITTER_NAME=github_bot_user, + GIT_COMMITTER_EMAIL=f"{github_bot_user}@users.noreply.github.com", + ), + command=[ + "nix", + "flake", + "update", + "--commit-lock-file", + "--commit-lockfile-summary", + "flake.lock: Update", + ], + haltOnFailure=True, + ) + ) + factory.addStep( + steps.ShellCommand( + name="Force-Push to update_flake_lock branch", + command=[ + "git", + "push", + "--force", + "origin", + "HEAD:refs/heads/update_flake_lock", + ], + haltOnFailure=True, + ) + ) + factory.addStep( + steps.SetPropertyFromCommand( + env=dict(GITHUB_TOKEN=util.Secret(github_token_secret)), + command=[ + "gh", + "pr", + "view", + "--json", + "state", + "--template", + "{{.state}}", + "update_flake_lock", + ], + decodeRC={0: "SUCCESS", 1: "SUCCESS"}, + property="has_pr", + ) + ) + factory.addStep( + steps.ShellCommand( + name="Create pull-request", + env=dict(GITHUB_TOKEN=util.Secret(github_token_secret)), + command=[ + "gh", + "pr", + "create", + "--repo", + project.name, + "--title", + "flake.lock: Update", + "--body", + "Automatic buildbot update", + "--head", + "refs/heads/update_flake_lock", + "--base", + project.default_branch, + ], + doStepIf=util.Interpolate("has_pr") != "OPEN", + ) + ) + return util.BuilderConfig( + name=f"{project.name}/update-flake", + project=project.name, + workernames=worker_names, + factory=factory, + ) + + +def nix_eval_config( + project: GithubProject, + worker_names: list[str], + github_token_secret: str, + supported_systems: list[str], + automerge_users: List[str] = [], + max_memory_size: int = 4096, +) -> util.BuilderConfig: + """ + Uses nix-eval-jobs to evaluate hydraJobs from flake.nix in parallel. + For each evaluated attribute a new build pipeline is started. + If all builds succeed and the build was for a PR opened by the flake update bot, + this PR is merged. + """ + factory = util.BuildFactory() + # check out the source + url_with_secret = util.Interpolate( + f"https://git:%(secret:{github_token_secret})s@github.com/%(prop:project)s" + ) + factory.addStep( + steps.Git( + repourl=url_with_secret, + method="clean", + submodules=True, + haltOnFailure=True, + ) + ) + + factory.addStep( + NixEvalCommand( + env={}, + name="evaluate flake", + supported_systems=supported_systems, + command=[ + "nix", + "run", + "--option", + "accept-flake-config", + "true", + "github:nix-community/nix-eval-jobs", + "--", + "--workers", + multiprocessing.cpu_count(), + "--max-memory-size", + str(max_memory_size), + "--option", + "accept-flake-config", + "true", + "--gc-roots-dir", + # FIXME: don't hardcode this + "/var/lib/buildbot-worker/gcroot", + "--force-recurse", + "--flake", + ".#checks", + ], + haltOnFailure=True, + ) + ) + if len(automerge_users) > 0: + + def check_auto_merge(step: steps.BuildStep) -> bool: + print("Checking if we should merge") + props = step.build.getProperties() + if props.getProperty("event") != "pull_request": + print("Not a pull request") + return False + if props.getProperty( + "github.repository.default_branch" + ) != props.getProperty("branch"): + print("Not on default branch") + return False + if not any( + owner in automerge_users for owner in props.getProperty("owners") + ): + print( + f"PR opened by {step.getProperty('owner')} not in {automerge_users}" + ) + return False + return True + + factory.addStep( + steps.ShellCommand( + name="Merge pull-request", + env=dict(GITHUB_TOKEN=util.Secret(github_token_secret)), + command=[ + "gh", + "pr", + "merge", + "--repo", + util.Property("project"), + "--rebase", + util.Property("pullrequesturl"), + ], + doStepIf=check_auto_merge, + ) + ) + + return util.BuilderConfig( + name=f"{project.name}/nix-eval", + workernames=worker_names, + project=project.name, + factory=factory, + properties=dict(status_name="nix-eval"), + ) + + +def nix_build_config( + project: GithubProject, + worker_names: list[str], + has_cachix_auth_token: bool = False, + has_cachix_signing_key: bool = False, +) -> util.BuilderConfig: + """ + Builds one nix flake attribute. + """ + factory = util.BuildFactory() + factory.addStep( + NixBuildCommand( + env={}, + name="Build flake attr", + command=[ + "nix", + "build", + "-L", + "--option", + "keep-going", + "true", + "--accept-flake-config", + "--out-link", + util.Interpolate("result-%(prop:attr)s"), + util.Interpolate("%(prop:drv_path)s^*"), + ], + haltOnFailure=True, + ) + ) + if has_cachix_auth_token or has_cachix_signing_key: + if has_cachix_signing_key: + env = dict(CACHIX_SIGNING_KEY=util.Secret("cachix-signing-key")) + else: + env = dict(CACHIX_AUTH_TOKEN=util.Secret("cachix-auth-token")) + factory.addStep( + steps.ShellCommand( + name="Upload cachix", + env=env, + command=[ + "cachix", + "push", + util.Secret("cachix-name"), + util.Interpolate("result-%(prop:attr)s"), + ], + ) + ) + factory.addStep( + steps.ShellCommand( + name="Register gcroot", + command=[ + "nix-store", + "--add-root", + # FIXME: cleanup old build attributes + util.Interpolate( + "/nix/var/nix/profiles/per-user/buildbot-worker/result-%(prop:attr)s" + ), + "-r", + util.Property("out_path"), + ], + doStepIf=util.Interpolate("branch") + == util.Interpolate("github.repository.default_branch"), + ) + ) + factory.addStep( + steps.ShellCommand( + name="Delete temporary gcroots", + command=["rm", "-f", util.Interpolate("result-%(prop:attr)s")], + ) + ) + factory.addStep(UpdateBuildOutput(name="Update build output")) + return util.BuilderConfig( + name=f"{project.name}/nix-build", + project=project.name, + workernames=worker_names, + collapseRequests=False, + env={}, + factory=factory, + ) diff --git a/buildbot_nix/github_projects.py b/buildbot_nix/github_projects.py new file mode 100644 index 0000000..7e229c1 --- /dev/null +++ b/buildbot_nix/github_projects.py @@ -0,0 +1,98 @@ +import http.client +import json +import urllib.request +from pathlib import Path +from typing import Any + +from twisted.python import log + + +class HttpResponse: + def __init__(self, raw: http.client.HTTPResponse) -> None: + self.raw = raw + + def json(self) -> Any: + return json.load(self.raw) + + def headers(self) -> http.client.HTTPMessage: + return self.raw.headers + + +def http_request( + url: str, + method: str = "GET", + headers: dict[str, str] = {}, + data: dict[str, Any] | None = None, +) -> HttpResponse: + body = None + if data: + body = json.dumps(data).encode("ascii") + headers = headers.copy() + headers["User-Agent"] = "buildbot-nix" + req = urllib.request.Request(url, headers=headers, method=method, data=body) + resp = urllib.request.urlopen(req) + return HttpResponse(resp) + + +def paginated_github_request(url: str, token: str) -> list[dict[str, Any]]: + next_url: str | None = url + repos = [] + while next_url: + try: + res = http_request( + next_url, + headers={"Authorization": f"token {token}"}, + ) + except OSError as e: + raise Exception(f"failed to fetch {next_url}: {e}") from e + next_url = None + link = res.headers()["Link"] + if link is not None: + links = link.split(", ") + for link in links: # pagination + link_parts = link.split(";") + if link_parts[1].strip() == 'rel="next"': + next_url = link_parts[0][1:-1] + repos += res.json() + return repos + + +class GithubProject: + def __init__(self, repo: dict[str, Any]) -> None: + self.repo = repo + + @property + def name(self) -> str: + return self.repo["full_name"] + + @property + def url(self) -> str: + return self.repo["html_url"] + + @property + def id(self) -> str: + n = self.repo["full_name"] + return n.replace("/", "-") + + @property + def default_branch(self) -> str: + return self.repo["default_branch"] + + @property + def topics(self) -> list[str]: + return self.repo["topics"] + + +def load_projects(github_token: str, repo_cache_file: Path) -> list[GithubProject]: + if repo_cache_file.exists(): + log.msg("fetching github repositories from cache") + repos: list[dict[str, Any]] = json.loads(repo_cache_file.read_text()) + else: + log.msg("fetching github repositories from api") + repos = paginated_github_request( + "https://api.github.com/user/repos?per_page=100", + github_token, + ) + repo_cache_file.write_text(json.dumps(repos, indent=2)) + + return [GithubProject(repo) for repo in repos] diff --git a/buildbot_nix/irc_notify.py b/buildbot_nix/irc_notify.py new file mode 100644 index 0000000..8c70826 --- /dev/null +++ b/buildbot_nix/irc_notify.py @@ -0,0 +1,150 @@ +import base64 +import re +import socket +import ssl +import threading +from typing import Any, Generator, Optional +from urllib.parse import urlparse + +from buildbot.reporters.base import ReporterBase +from buildbot.reporters.generators.build import BuildStatusGenerator +from buildbot.reporters.message import MessageFormatter +from twisted.internet import defer + +DEBUG = False + + +def _irc_send( + server: str, + nick: str, + channel: str, + sasl_password: Optional[str] = None, + server_password: Optional[str] = None, + tls: bool = True, + port: int = 6697, + messages: list[str] = [], +) -> None: + if not messages: + return + + # don't give a shit about legacy ip + sock = socket.socket(family=socket.AF_INET6) + if tls: + sock = ssl.wrap_socket( + sock, cert_reqs=ssl.CERT_NONE, ssl_version=ssl.PROTOCOL_TLSv1_2 + ) + + def _send(command: str) -> int: + if DEBUG: + print(command) + return sock.send((f"{command}\r\n").encode()) + + def _pong(ping: str) -> None: + if ping.startswith("PING"): + sock.send(ping.replace("PING", "PONG").encode("ascii")) + + recv_file = sock.makefile(mode="r") + + print(f"connect {server}:{port}") + sock.connect((server, port)) + if server_password: + _send(f"PASS {server_password}") + _send(f"USER {nick} 0 * :{nick}") + _send(f"NICK {nick}") + for line in recv_file.readline(): + if re.match(r"^:[^ ]* (MODE|221|376|422) ", line): + break + else: + _pong(line) + + if sasl_password: + _send("CAP REQ :sasl") + _send("AUTHENTICATE PLAIN") + auth = base64.encodebytes(f"{nick}\0{nick}\0{sasl_password}".encode("ascii")) + _send(f"AUTHENTICATE {auth.decode('ascii')}") + _send("CAP END") + _send(f"JOIN :{channel}") + + for m in messages: + _send(f"PRIVMSG {channel} :{m}") + + _send("INFO") + for line in recv_file: + if DEBUG: + print(line, end="") + # Assume INFO reply means we are done + if "End of /INFO" in line: + break + else: + _pong(line) + + sock.send(b"QUIT") + print("disconnect") + sock.close() + + +def irc_send( + url: str, notifications: list[str], password: Optional[str] = None +) -> None: + parsed = urlparse(f"{url}") + username = parsed.username or "prometheus" + server = parsed.hostname or "chat.freenode.net" + if parsed.fragment != "": + channel = f"#{parsed.fragment}" + else: + channel = "#krebs-announce" + port = parsed.port or 6697 + if not password: + password = parsed.password + if len(notifications) == 0: + return + # put this in a thread to not block buildbot + t = threading.Thread( + target=_irc_send, + kwargs=dict( + server=server, + nick=username, + sasl_password=password, + channel=channel, + port=port, + messages=notifications, + tls=parsed.scheme == "irc+tls", + ), + ) + t.start() + + +subject_template = """\ +{{ '☠' if result_names[results] == 'failure' else '☺' if result_names[results] == 'success' else '☝' }} \ +{{ build['properties'].get('project', ['whole buildset'])[0] if is_buildset else buildername }} \ +- \ +{{ build['state_string'] }} \ +{{ '(%s)' % (build['properties']['branch'][0] if (build['properties']['branch'] and build['properties']['branch'][0]) else build['properties'].get('got_revision', ['(unknown revision)'])[0]) }} \ +({{ build_url }}) +""" # # noqa pylint: disable=line-too-long + + +class NotifyFailedBuilds(ReporterBase): + def _generators(self) -> list[BuildStatusGenerator]: + formatter = MessageFormatter(template_type="plain", subject=subject_template) + return [BuildStatusGenerator(message_formatter=formatter)] + + def checkConfig(self, url: str) -> None: + super().checkConfig(generators=self._generators()) + + @defer.inlineCallbacks + def reconfigService(self, url: str) -> Generator[Any, object, Any]: + self.url = url + yield super().reconfigService(generators=self._generators()) + + def sendMessage(self, reports: list) -> None: + msgs = [] + for r in reports: + build = r["builds"][0] + buildername = build["builder"]["name"] + # We don't want to report individual failures here to not spam the channel. + if buildername != "nix-eval": + continue + if build["state_string"] != "build successful": + msgs.append(r["subject"]) + irc_send(self.url, notifications=msgs) diff --git a/buildbot_nix/master.py b/buildbot_nix/master.py new file mode 100644 index 0000000..ecd6b20 --- /dev/null +++ b/buildbot_nix/master.py @@ -0,0 +1,238 @@ +#!/usr/bin/env python3 + +import json +import os +import sys +from datetime import timedelta +from pathlib import Path +from typing import Any + +from buildbot.plugins import reporters, schedulers, secrets, util, worker +from buildbot.process.project import Project +from buildbot.process.properties import Interpolate + +# allow to import modules +sys.path.append(str(Path(__file__).parent)) + +from buildbot_nix import ( # noqa: E402 + nix_build_config, + nix_eval_config, + nix_update_flake_config, +) +from github_projects import GithubProject, load_projects # noqa: E402 +from irc_notify import NotifyFailedBuilds # noqa: E402 + + +def read_secret_file(secret_name: str) -> str: + directory = os.environ.get("CREDENTIALS_DIRECTORY") + if directory is None: + print("directory not set", file=sys.stderr) + sys.exit(1) + return Path(directory).joinpath(secret_name).read_text() + + +GITHUB_OAUTH_ID = os.environ.get("GITHUB_OAUTH_ID") +GITHUB_OAUTH_SECRET = read_secret_file("github-oauth-secret") +GITHUB_TOKEN_SECRET_NAME = "github-token" +GITHUB_TOKEN = read_secret_file(GITHUB_TOKEN_SECRET_NAME) +GITHUB_WEBHOOK_SECRET = read_secret_file("github-webhook-secret") +# Shape of this file: +# [ { "name": "", "pass": "", "cores": "" } ] +BUILDBOT_NIX_WORKERS = read_secret_file("buildbot-nix-workers") +REPO_FOR_FLAKE_UPDATE = os.environ["REPO_FOR_FLAKE_UPDATE"] +BUILDBOT_URL = os.environ["BUILDBOT_URL"] +BUILDBOT_GITHUB_USER = os.environ["BUILDBOT_GITHUB_USER"] +NIX_SUPPORTED_SYSTEMS = os.environ["NIX_SUPPORTED_SYSTEMS"].split(" ") +NIX_EVAL_MAX_MEMORY_SIZE = int(os.environ.get("NIX_EVAL_MAX_MEMORY_SIZE", "4096")) + + +def config_for_project( + config: dict[str, Any], + project: GithubProject, + credentials: str, + worker_names: list[str], +) -> Project: + config["projects"].append(Project(project.name)) + config["schedulers"].extend( + [ + schedulers.SingleBranchScheduler( + name=f"default-branch-{project.id}", + change_filter=util.ChangeFilter( + repository=project.url, + filter_fn=lambda c: c.branch + == c.properties.getProperty("github.repository.default_branch"), + ), + builderNames=[f"{project.name}/nix-eval"], + ), + # this is compatible with bors or github's merge queue + schedulers.SingleBranchScheduler( + name=f"merge-queue-{project.id}", + change_filter=util.ChangeFilter( + repository=project.url, + branch_re="(gh-readonly-queue/.*|staging|trying)", + ), + builderNames=[f"{project.name}/nix-eval"], + ), + # build all pull requests + schedulers.SingleBranchScheduler( + name=f"prs-{project.id}", + change_filter=util.ChangeFilter( + repository=project.url, category="pull" + ), + builderNames=[f"{project.name}/nix-eval"], + ), + # this is triggered from `nix-eval` + schedulers.Triggerable( + name=f"{project.id}-nix-build", + builderNames=[f"{project.name}/nix-build"], + ), + # allow to manually trigger a nix-build + schedulers.ForceScheduler( + name=f"{project.id}-force", builderNames=[f"{project.name}/nix-eval"] + ), + # allow to manually update flakes + schedulers.ForceScheduler( + name=f"{project.id}-update-flake", + builderNames=[f"{project.name}/update-flake"], + buttonName="Update flakes", + ), + # updates flakes once a weeek + schedulers.NightlyTriggerable( + name=f"{project.id}-update-flake-weekly", + builderNames=[f"{project.name}/update-flake"], + hour=3, + minute=0, + dayOfWeek=6, + ), + ] + ) + has_cachix_auth_token = os.path.isfile( + os.path.join(credentials, "cachix-auth-token") + ) + has_cachix_signing_key = os.path.isfile( + os.path.join(credentials, "cachix-signing-key") + ) + config["builders"].extend( + [ + # Since all workers run on the same machine, we only assign one of them to do the evaluation. + # This should prevent exessive memory usage. + nix_eval_config( + project, + [worker_names[0]], + github_token_secret=GITHUB_TOKEN_SECRET_NAME, + supported_systems=NIX_SUPPORTED_SYSTEMS, + automerge_users=[BUILDBOT_GITHUB_USER], + max_memory_size=NIX_EVAL_MAX_MEMORY_SIZE, + ), + nix_build_config( + project, + worker_names, + has_cachix_auth_token, + has_cachix_signing_key, + ), + nix_update_flake_config( + project, + worker_names, + github_token_secret=GITHUB_TOKEN_SECRET_NAME, + github_bot_user=BUILDBOT_GITHUB_USER, + ), + ] + ) + + +PROJECT_CACHE_FILE = Path("github-project-cache.json") + + +def build_config() -> dict[str, Any]: + projects = load_projects(GITHUB_TOKEN, PROJECT_CACHE_FILE) + projects = [p for p in projects if "build-with-buildbot" in p.topics] + import pprint + + pprint.pprint(projects) + c: dict[str, Any] = {} + c["buildbotNetUsageData"] = None + # configure a janitor which will delete all logs older than one month, and will run on sundays at noon + c["configurators"] = [ + util.JanitorConfigurator(logHorizon=timedelta(weeks=4), hour=12, dayOfWeek=6) + ] + credentials = os.environ.get("CREDENTIALS_DIRECTORY", ".") + c["schedulers"] = [ + schedulers.SingleBranchScheduler( + name="nixpkgs", + change_filter=util.ChangeFilter( + repository_re=r"https://github\.com/.*/nixpkgs", + filter_fn=lambda c: c.branch + == c.properties.getProperty("github.repository.default_branch"), + ), + treeStableTimer=20, + builderNames=["Mic92/dotfiles/update-flake"], + ), + ] + c["builders"] = [] + c["projects"] = [] + c["workers"] = [] + + worker_config = json.loads(BUILDBOT_NIX_WORKERS) + worker_names = [] + for item in worker_config: + cores = item.get("cores", 0) + for i in range(cores): + worker_name = f"{item['name']}-{i}" + c["workers"].append(worker.Worker(worker_name, item["pass"])) + worker_names.append(worker_name) + + for project in projects: + config_for_project(c, project, credentials, worker_names) + + c["services"] = [ + reporters.GitHubStatusPush( + token=GITHUB_TOKEN, + # Since we dynamically create build steps, + # we use `virtual_builder_name` in the webinterface + # so that we distinguish what has beeing build + context=Interpolate("buildbot/%(prop:status_name)s"), + ), + # Notify on irc + NotifyFailedBuilds("irc://buildbot|mic92@irc.r:6667/#xxx"), + ] + + systemd_secrets = secrets.SecretInAFile(dirname=credentials) + c["secretsProviders"] = [systemd_secrets] + + github_admins = os.environ.get("GITHUB_ADMINS", "").split(",") + + c["www"] = { + "avatar_methods": [util.AvatarGitHub()], + "port": int(os.environ.get("PORT", "1810")), + "auth": util.GitHubAuth(GITHUB_OAUTH_ID, GITHUB_OAUTH_SECRET), + "authz": util.Authz( + roleMatchers=[ + util.RolesFromUsername(roles=["admin"], usernames=github_admins) + ], + allowRules=[ + util.AnyEndpointMatcher(role="admin", defaultDeny=False), + util.AnyControlEndpointMatcher(role="admins"), + ], + ), + "plugins": dict( + base_react={}, waterfall_view={}, console_view={}, grid_view={} + ), + "change_hook_dialects": dict( + github={ + "secret": GITHUB_WEBHOOK_SECRET, + "strict": True, + "token": GITHUB_TOKEN, + "github_property_whitelist": "*", + } + ), + } + + c["db"] = {"db_url": os.environ.get("DB_URL", "sqlite:///state.sqlite")} + + c["protocols"] = {"pb": {"port": "tcp:9989:interface=\\:\\:"}} + c["buildbotURL"] = BUILDBOT_URL + + return c + + +BuildmasterConfig = build_config() diff --git a/buildbot_nix/worker.py b/buildbot_nix/worker.py new file mode 100644 index 0000000..1bb8d5b --- /dev/null +++ b/buildbot_nix/worker.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 + +import multiprocessing +import os +import socket +from pathlib import Path + +from buildbot_worker.bot import Worker +from twisted.application import service + + +def require_env(key: str) -> str: + val = os.environ.get(key) + assert val is not None, "val is not set" + return val + + +PASSWD = Path(require_env("WORKER_PASSWORD_FILE")).read_text().strip("\r\n") +BUILDBOT_DIR = require_env("BUILDBOT_DIR") +MASTER_URL = require_env("MASTER_URL") + + +def setup_worker(application: service.Application, id: int) -> None: + basedir = f"{BUILDBOT_DIR}-{id}" + os.makedirs(basedir, mode=0o700, exist_ok=True) + + hostname = socket.gethostname() + workername = f"{hostname}-{id}" + keepalive = 600 + umask = None + maxdelay = 300 + numcpus = None + allow_shutdown = None + + s = Worker( + None, + None, + workername, + PASSWD, + basedir, + keepalive, + connection_string=MASTER_URL, + umask=umask, + maxdelay=maxdelay, + numcpus=numcpus, + allow_shutdown=allow_shutdown, + ) + # defaults to 4096, bump to 10MB for nix-eval-jobs + s.bot.max_line_length = 10485760 + s.setServiceParent(application) + + +# note: this line is matched against to check that this is a worker +# directory; do not edit it. +application = service.Application("buildbot-worker") + +for i in range(multiprocessing.cpu_count()): + setup_worker(application, i) diff --git a/nix/master.nix b/nix/master.nix new file mode 100644 index 0000000..7e83c50 --- /dev/null +++ b/nix/master.nix @@ -0,0 +1,98 @@ +{ config +, pkgs +, ... +}: +let + # TODO: make this an option + # https://github.com/organizations/numtide/settings/applications + # Application name: BuildBot + # Homepage URL: https://buildbot.numtide.com + # Authorization callback URL: https://buildbot.numtide.com/auth/login + # oauth_token: 2516248ec6289e4d9818122cce0cbde39e4b788d + buildbotDomain = "buildbot.thalheim.io"; + githubOauthId = "d1b24258af1abc157934"; +in +{ + services.buildbot-master = { + enable = true; + masterCfg = "${./.}/master.py"; + dbUrl = "postgresql://@/buildbot"; + pythonPackages = ps: [ + ps.requests + ps.treq + ps.psycopg2 + (ps.toPythonModule pkgs.buildbot-worker) + pkgs.buildbot-plugins.www + pkgs.buildbot-plugins.www-react + ]; + }; + + systemd.services.buildbot-master = { + environment = { + PORT = "1810"; + DB_URL = config.services.buildbot-master.dbUrl; + # Github app used for the login button + GITHUB_OAUTH_ID = githubOauthId; + + # XXX replace this with renovate + REPO_FOR_FLAKE_UPDATE = "Mic92/dotfiles/main"; + + BUILDBOT_URL = "https://${buildbotDomain}/"; + BUILDBOT_GITHUB_USER = "mic92-buildbot"; + # comma seperated list of users that are allowed to login to buildbot and do stuff + GITHUB_ADMINS = "Mic92"; + NIX_SUPPORTED_SYSTEMS = builtins.toString [ "x86_64-linux" "aarch64-linux" "x86_64-darwin" "aarch64-darwin" ]; + NIX_EVAL_MAX_MEMORY_SIZE = "2048"; + }; + serviceConfig = { + # in master.py we read secrets from $CREDENTIALS_DIRECTORY + LoadCredential = [ + "github-token:${config.sops.secrets.buildbot-github-token.path}" + "github-webhook-secret:${config.sops.secrets.buildbot-github-webhook-secret.path}" + "github-oauth-secret:${config.sops.secrets.buildbot-github-oauth-secret.path}" + "buildbot-nix-workers:${config.sops.secrets.buildbot-nix-workers.path}" + ]; + }; + }; + sops.secrets = { + buildbot-github-token = { }; + buildbot-github-webhook-secret = { }; + buildbot-github-oauth-secret = { }; + buildbot-nix-workers = { }; + }; + + services.postgresql = { + ensureDatabases = [ "buildbot" ]; + ensureUsers = [ + { + name = "buildbot"; + ensurePermissions."DATABASE buildbot" = "ALL PRIVILEGES"; + } + ]; + }; + + services.nginx.virtualHosts.${buildbotDomain} = { + forceSSL = true; + useACMEHost = "thalheim.io"; + locations."/".proxyPass = "http://127.0.0.1:1810/"; + locations."/sse" = { + proxyPass = "http://127.0.0.1:1810/sse/"; + # proxy buffering will prevent sse to work + extraConfig = "proxy_buffering off;"; + }; + locations."/ws" = { + proxyPass = "http://127.0.0.1:1810/ws"; + proxyWebsockets = true; + # raise the proxy timeout for the websocket + extraConfig = "proxy_read_timeout 6000s;"; + }; + + # In this directory we store the lastest build store paths for nix attributes + locations."/nix-outputs".root = "/var/www/buildbot/"; + }; + + # Allow buildbot-master to write to this directory + systemd.tmpfiles.rules = [ + "d /var/www/buildbot/nix-outputs 0755 buildbot buildbot - -" + ]; +} diff --git a/nix/worker.nix b/nix/worker.nix new file mode 100644 index 0000000..6a763f9 --- /dev/null +++ b/nix/worker.nix @@ -0,0 +1,52 @@ +{ config +, pkgs +, ... +}: +let + package = pkgs.buildbot-worker; + python = package.pythonModule; + home = "/var/lib/buildbot-worker"; + buildbotDir = "${home}/worker"; +in +{ + nix.settings.allowed-users = [ "buildbot-worker" ]; + users.users.buildbot-worker = { + description = "Buildbot Worker User."; + isSystemUser = true; + createHome = true; + home = "/var/lib/buildbot-worker"; + group = "buildbot-worker"; + useDefaultShell = true; + }; + users.groups.buildbot-worker = { }; + + systemd.services.buildbot-worker = { + reloadIfChanged = true; + description = "Buildbot Worker."; + after = [ "network.target" "buildbot-master.service" ]; + wantedBy = [ "multi-user.target" ]; + path = [ + pkgs.cachix + pkgs.git + pkgs.openssh + pkgs.gh + pkgs.nix + ]; + environment.PYTHONPATH = "${python.withPackages (_: [package])}/${python.sitePackages}"; + environment.MASTER_URL = ''tcp:host=localhost:port=9989''; + environment.BUILDBOT_DIR = buildbotDir; + environment.WORKER_PASSWORD_FILE = config.sops.secrets.buildbot-nix-worker-password.path; + + serviceConfig = { + Type = "simple"; + User = "buildbot-worker"; + Group = "buildbot-worker"; + WorkingDirectory = home; + + # Restart buildbot with a delay. This time way we can use buildbot to deploy itself. + ExecReload = "+${pkgs.systemd}/bin/systemd-run --on-active=60 ${pkgs.systemd}/bin/systemctl restart buildbot-worker"; + ExecStart = "${python.pkgs.twisted}/bin/twistd --nodaemon --pidfile= --logfile - --python ${./worker.py}"; + }; + }; + sops.secrets.buildbot-nix-worker-password.owner = "buildbot-worker"; +} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..8545e68 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,18 @@ +[tool.mypy] +python_version = "3.10" +warn_redundant_casts = true +disallow_untyped_calls = true +disallow_untyped_defs = true +no_implicit_optional = true + +[[tool.mypy.overrides]] +module = "buildbot.*" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "buildbot_worker.*" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "twisted.*" +ignore_missing_imports = true