#!/usr/bin/env python3

import argparse
import html
import json
import sys
from typing import NamedTuple


class CLIArgs(NamedTuple):
    unescape_html: bool
    filename: str
    indent_level: int


def parse_args() -> CLIArgs:
    parser = argparse.ArgumentParser(
        prog="unescape-contents",
        description="Unescape Unicode and, optionally, HTML entities in a JSON file. Input file must be a JSON dictionary. Output will always be unescaped UTF-8.",
    )

    parser.add_argument("filename", type=str)
    parser.add_argument(
        "--unescape-html",
        action="store_true",
        dest="unescape_html",
        help="If the key of a dictionary field does not contain HTML escapes, unescape any HTML escapes found in the value",
    )
    parser.add_argument("--indent-level", dest="indent_level", type=int, default=2, required=False)
    args = parser.parse_args()

    return CLIArgs(args.unescape_html, args.filename, args.indent_level)


if __name__ == "__main__":
    args = parse_args()
    print(f"unescaping file {args.filename}", file=sys.stderr)

    json_data: dict[str, str] = {}

    with open(args.filename) as source:
        json_data = json.load(source)

        if args.unescape_html:
            for key, value in json_data.items():
                if key == html.unescape(key):
                    json_data[key] = html.unescape(value)
                else:
                    print(
                        f'{args.filename}: key "{key}" contains HTML, not escaping HTML in value',
                        file=sys.stderr,
                    )

    with open(args.filename, mode="w") as dest:
        # At least on Linux systems with LANG=en_US.UTF-8, ensure_ascii=False
        # ensures our output uses real UTF-8 codepoints for human readability,
        # rather than \u0000 style escape sequences, providing us a
        # somewhat-implicit JSON unescape. This may behave in unexpected ways
        # on other OSes or system encodings.
        json.dump(json_data, dest, ensure_ascii=False, indent=args.indent_level)
        dest.write("\n")
