#!/usr/bin/env python3
import argparse
import os
import subprocess
import sys
from collections import defaultdict
from pathlib import Path

bot_commits = 0

ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
os.chdir(ZULIP_PATH)


def add_log(committer_dict: dict[str, int], input: list[str]) -> None:
    for dataset in input:
        committer_name = dataset.split("\t")[1]
        commit_count = int(dataset.split("\t")[0])

        if committer_name.endswith("[bot]"):
            # Exclude dependabot[bot] and other GitHub bots.
            global bot_commits
            bot_commits += commit_count
            continue

        committer_dict[committer_name] += commit_count


def retrieve_log(repo: str, revisions: str) -> list[str]:
    return subprocess.check_output(
        ["git", "shortlog", "-s", revisions],
        cwd=find_path(repo),
        text=True,
    ).splitlines()


def find_path(repository: str) -> str:
    return str(Path.cwd().parent / repository)


def process_repo(
    *,
    out_dict: dict[str, int],
    repo_short: str,
    repo_full: str,
    lower_version: str,
    upper_version: str,
) -> None:
    if not lower_version:
        revisions = upper_version
        revisions_display = f"(start)..{upper_version[0:12]}"
    else:
        revisions = f"{lower_version}..{upper_version}"
        revisions_display = f"{lower_version[0:12]}..{upper_version[0:12]}"
    commit_count = len(
        subprocess.check_output(
            ["git", "log", "--pretty=oneline", revisions],
            cwd=find_path(repo_short),
            text=True,
        ).splitlines()
    )
    repo_log = retrieve_log(repo_short, revisions)
    print(f"{commit_count} commits from {repo_full}: {revisions_display}")
    add_log(out_dict, repo_log)


def find_last_commit_before_time(repository: str, branch: str, time: str) -> str:
    """Find the latest release version for the target repository as of the
    specified time.
    """
    return subprocess.check_output(
        ["git", "rev-list", "-1", f"--before={time}", branch, "--"],
        cwd=find_path(repository),
        text=True,
    ).strip()


# argparse
parser = argparse.ArgumentParser(
    prog="python3 total-contributions",
    formatter_class=argparse.RawTextHelpFormatter,
    description="""\
Aggregates the total commit contributions to Zulip that should be
attributed to the time window between the two provided
zulip/zulip versions (tags or branches).

The attribution algorithm used by this tool attributes all changes for
a Zulip project between:

* The last release of the target project before the first zulip/zulip version.
* The last release of the target project before the last zulip/zulip version.

This algorithm has the key property that the totals for a given contributor of
2.1.0..4.0 will equal the sum of 2.1.0..3.0 and 3.0..4.0.

Its main downside is that contributions to projects other than
zulip/zulip in the last few weeks before a zulip/zulip release will be
delayed (i.e. counted in the total for the next zulip/zulip release).

Expects that all Zulip repositories are in the current working
directory, which does not need to be the directory this is run from.

# Changes between two major releases.
total-contributions 4.0 5.0

# Changes between a release and the current main branch.
total-contributions 4.0 main
total-contributions 2.1.0
    """,
)
parser.add_argument(
    "version",
    metavar="version",
    nargs="*",
    # TODO: Ideally, we'd replace "1.3.0" with "First commit", to
    # simplify including contributions before the 1.3.0 release.
    default=["1.3.0", "main"],
    help="Git tag or branch in zulip/zulip specifying one end of the commit range to use.",
)

parser.add_argument(
    "-a",
    "--ascending",
    action="store_true",
    help="Sort contributors based on number of commits(ascending order)",
)

args = parser.parse_args()

if len(args.version) > 2:
    parser.error("Expects 0 to 2 version number(s)")

lower_zulip_version = args.version[0]
if len(args.version) == 1:
    upper_zulip_version = "main"
else:
    upper_zulip_version = args.version[1]

subprocess.check_call(["git", "fetch"], cwd=find_path("zulip"))

# Extract git version and time. It's important that we use the commit
# date (%ci), not the author date (%ai), since while those are often
# near identical for release commits, if we pass a branch like `main`,
# it's possible the latest commit on the branch might have a months
# old author date if the last pull request merged was started at that
# time.
try:
    lower_time = subprocess.check_output(
        ["git", "log", "-1", "--format=%ci", lower_zulip_version],
        stderr=subprocess.DEVNULL,
        text=True,
    ).split()[0]
    upper_time = subprocess.check_output(
        ["git", "log", "-1", "--format=%ci", upper_zulip_version],
        stderr=subprocess.DEVNULL,
        text=True,
    ).split()[0]
except subprocess.CalledProcessError:
    print("Specified version(s) don't exist")
    sys.exit(0)

print(
    f"Commit range {lower_zulip_version}..{upper_zulip_version} corresponds to {lower_time} to {upper_time}"
)

repository_dict: dict[str, int] = defaultdict(int)
out_dict: dict[str, int] = defaultdict(int)
subprocess.check_call(["git", "fetch"], cwd=find_path("zulip"))
process_repo(
    out_dict=out_dict,
    repo_short="zulip",
    repo_full="zulip/zulip",
    lower_version=lower_zulip_version,
    upper_version=upper_zulip_version,
)

# TODO: We should migrate the last couple repositories to use the
# `main` default branch name and then simplify this.
for full_repository, branch in [
    ("zulip/zulip-mobile", "main"),
    ("zulip/zulip-flutter", "main"),
    ("zulip/zulip-desktop", "main"),
    ("zulip/docker-zulip", "main"),
    ("zulip/python-zulip-api", "main"),
    ("zulip/zulip-terminal", "main"),
    ("zulip/zulint", "main"),
    ("zulip/github-actions-zulip", "main"),
    ("zulip/zulip-js", "main"),
    ("zulip/zulip-archive", "master"),
    ("zulip/zulipbot", "main"),
    ("zulip/zulip-zapier", "master"),
]:
    repository = os.path.basename(full_repository)

    if os.path.exists(find_path(repository)):
        # Update the checkout for the project in question.
        subprocess.check_call(
            ["git", "pull", "--rebase", "-q"],
            cwd=find_path(repository),
        )
    else:
        subprocess.check_call(
            ["git", "clone", f"git@github.com:{full_repository}.git"],
            cwd=os.path.dirname(find_path(repository)),
        )

    subprocess.check_call(["git", "fetch", "-a"], cwd=find_path(repository))
    lower_repo_version = find_last_commit_before_time(repository, branch, lower_time)
    upper_repo_version = find_last_commit_before_time(repository, branch, upper_time)
    process_repo(
        out_dict=out_dict,
        repo_short=repository,
        repo_full=full_repository,
        lower_version=lower_repo_version,
        upper_version=upper_repo_version,
    )

# Sorting based on number of commits
grand_total = 0
for committer_name, commit_count in sorted(
    out_dict.items(), key=lambda item: item[1], reverse=not args.ascending
):
    print(str(commit_count) + "\t" + committer_name)
    grand_total += commit_count

print(f"Excluded {bot_commits} commits authored by bots.")
print(
    f"{grand_total} total commits by {len(out_dict)} contributors between "
    f"{lower_zulip_version} and {upper_zulip_version}."
)
