#!/usr/bin/python3
# coding: utf-8

"""check for missing things in the wiki"""

# Copyright (C) 2024 Antoine Beaupré <anarcat@debian.org>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from __future__ import division, absolute_import
from __future__ import print_function, unicode_literals

import argparse
from glob import glob
import logging
import os.path
import re
import sys
from typing import Iterator


def parse_args():
    parser = argparse.ArgumentParser(description=__doc__, epilog="""""")
    parser.add_argument(
        "--verbose",
        "-v",
        dest="log_level",
        action="store_const",
        const="info",
        default="warning",
    )
    parser.add_argument(
        "--debug",
        "-d",
        dest="log_level",
        action="store_const",
        const="debug",
        default="warning",
    )
    parser.add_argument(
        "--directory", "-t", default=".", help="base directory, default: %(default)s"
    )
    parser.add_argument(
        "--exclude",
        "-e",
        default=[49],
        nargs="+",
        help="exclude the given RFC numbers, default: %(default)s",
    )
    parser.add_argument("--dryrun", "-n", action="store_true", help="do nothing")
    args = parser.parse_args()
    try:
        args.exclude = list(map(int, args.exclude))
    except ValueError as exc:
        parser.error("could not parse --exclude value: %s", exc)
    return args


RFC_FILENAME_RE = r"tpa-rfc-(\d+)-[^)]*"


def find_policy_numbers_toc(path: str) -> set[int]:
    with open(path) as fp:
        policy_toc = fp.read()

    found_policy_numbers = set()
    for m in re.finditer(r"\(policy/" + RFC_FILENAME_RE + r"\)", policy_toc):
        found_policy_numbers.add(int(m.group(1)))
    return found_policy_numbers


def find_policy_numbers_filenames(directory: str) -> set[int]:
    policy_filenames = glob(os.path.join(directory, "policy/tpa-rfc-*.md"))

    policy_numbers_filenames = set()
    for policy_file in policy_filenames:
        m = re.search(RFC_FILENAME_RE, policy_file)
        assert m, "oops, %s doesn't match pattern %s, regex doesn't match glob?" % (
            policy_file,
            RFC_FILENAME_RE,
        )
        policy_numbers_filenames.add(int(m.group(1)))
    return policy_numbers_filenames


def find_gaps(number_list: list[int]) -> Iterator[int]:
    prev = None
    for i in sorted(number_list):
        if prev is None:
            prev = i
            continue
        if i != prev + 1:
            yield from range(prev + 1, i)
        prev = i


def main():
    errors = 0
    args = parse_args()
    logging.basicConfig(format="%(message)s", level=args.log_level.upper())

    policy_numbers_toc = find_policy_numbers_toc(
        os.path.join(args.directory, "policy.md")
    )
    logging.debug(
        "found %d policy items in policy.md: %s",
        len(policy_numbers_toc),
        policy_numbers_toc,
    )
    policy_numbers_filenames = find_policy_numbers_filenames(args.directory)
    logging.debug(
        "found %d policy files in policy/: %s",
        len(policy_numbers_filenames),
        policy_numbers_filenames,
    )

    if len(policy_numbers_filenames) != len(policy_numbers_toc):
        logging.error(
            "number of policy items mismatch between policy.md (%d) and policy/ directory (%d)",
            len(policy_numbers_toc),
            len(policy_numbers_filenames),
        )
        errors += 1
        logging.debug("policy_numbers_toc: %r", policy_numbers_toc)
        logging.debug("policy_numbers_filenames: %r", policy_numbers_filenames)
        missing_filenames = [
            x for x in policy_numbers_toc if x not in policy_numbers_filenames
        ]
        missing_toc = [
            x for x in policy_numbers_filenames if x not in policy_numbers_toc
        ]
        if missing_filenames:
            errors += 1
            logging.info("missing filenames: %s", missing_filenames)
        if missing_toc:
            errors += 1
            logging.info("missing from policy.md: %s", missing_toc)

    filenames_gaps = [
        x for x in find_gaps(policy_numbers_filenames) if x not in args.exclude
    ]
    if filenames_gaps:
        errors += 1
        logging.info("gaps found in policy numbers filenames: %s", filenames_gaps)
    toc_gaps = [x for x in find_gaps(policy_numbers_toc) if x not in args.exclude]
    if toc_gaps:
        errors += 1
        logging.info("gaps found in policy numbers TOC: %s", toc_gaps)
    logging.info(
        "last RFC number is %s, next one would logically be %s",
        max(policy_numbers_filenames),
        max(policy_numbers_filenames) + 1,
    )
    if errors:
        logging.error("check failed: %d errors found", errors)
        sys.exit(1)


if __name__ == "__main__":
    main()
