#!/bin/sh

# some git commands print to stdout, which would contaminate our JSON output
clean_git(){
    git "$@" >&2
}

# Remove all remote branches, remove tags not reachable from HEAD, do a full
# repack and then garbage collect unreferenced objects.
make_deterministic_repo(){
    local repo="$1"

    # run in sub-shell to not touch current working directory
    (
    cd "$repo"
    # Remove files that contain timestamps or otherwise have non-deterministic
    # properties.
    rm -rf .git/logs/ .git/hooks/ .git/index .git/FETCH_HEAD .git/ORIG_HEAD \
        .git/refs/remotes/origin/HEAD .git/config

    # Remove all remote branches.
    git branch -r | while read -r branch; do
        clean_git branch -rD "$branch"
    done

    # Remove tags not reachable from HEAD. If we're exactly on a tag, don't
    # delete it.
    maybe_tag=$(git tag --points-at HEAD)
    git tag --contains HEAD | while read -r tag; do
        if [ "$tag" != "$maybe_tag" ]; then
            clean_git tag -d "$tag"
        fi
    done

    # Do a full repack. Must run single-threaded, or else we lose determinism.
    clean_git config pack.threads 1
    clean_git repack -A -d -f
    rm -f .git/config

    # Garbage collect unreferenced objects.
    # Note: --keep-largest-pack prevents non-deterministic ordering of packs
    #   listed in .git/objects/info/packs by only using a single pack
    clean_git gc --prune=all --keep-largest-pack
    )
}