import argparse
import os
import re
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path

# --- Configuration ---
COPYRIGHT_HOLDER = "s0up and the autobrr contributors"
START_YEAR = 2025
CURRENT_YEAR = datetime.now().year
COPYRIGHT_YEAR = f"{START_YEAR}-{CURRENT_YEAR}" if CURRENT_YEAR > START_YEAR else str(START_YEAR)
LICENSE = "GPL-2.0-or-later"

# Excluded directories
EXCLUDED_DIRS = {".git", "node_modules", "dist", "build", "vendor", "documentation"}

SPDX_LINE_RE = re.compile(r"SPDX-License-Identifier:\s*([A-Za-z0-9.+-]+)\s*$")

# Header for TypeScript/TSX files (4 lines)
TS_HEADER_LINES = [
    "/*",
    f" * Copyright (c) {COPYRIGHT_YEAR}, {COPYRIGHT_HOLDER}.",
    f" * SPDX-License-Identifier: {LICENSE}",
    " */",
]

# Header for Go files (2 lines)
GO_HEADER_LINES = [
    f"// Copyright (c) {COPYRIGHT_YEAR}, {COPYRIGHT_HOLDER}.",
    f"// SPDX-License-Identifier: {LICENSE}",
]


@dataclass
class Summary:
    changed: int = 0
    unchanged: int = 0
    added: int = 0
    updated: int = 0
    skipped_generated: int = 0
    skipped_foreign_license: int = 0
    errors: int = 0
    foreign_license_files: list[tuple[str, str]] = field(default_factory=list)
    generated_files: list[str] = field(default_factory=list)


def _detect_newline(raw: bytes) -> str:
    crlf = raw.count(b"\r\n")
    lf = raw.count(b"\n")
    if crlf > 0 and crlf >= (lf - crlf):
        return "\r\n"
    return "\n"


def _read_text_file(file_path: str) -> tuple[list[str], str, bool]:
    raw = Path(file_path).read_bytes()
    newline = _detect_newline(raw)
    ends_with_newline = raw.endswith(b"\n")
    text = raw.decode("utf-8")
    if text.startswith("\ufeff"):
        text = text.lstrip("\ufeff")
    return text.splitlines(), newline, ends_with_newline


def _write_text_file(file_path: str, lines: list[str], newline: str, ends_with_newline: bool) -> None:
    text = newline.join(lines)
    if ends_with_newline:
        text += newline
    Path(file_path).write_bytes(text.encode("utf-8"))


def _is_generated(file_path: str, lines: list[str]) -> bool:
    p = Path(file_path)
    if p.name.endswith((".gen.ts", ".gen.tsx")):
        return True
    head = "\n".join(lines[:50]).lower()
    return any(
        marker in head
        for marker in (
            "automatically generated",
            "auto-generated",
            "autogenerated",
            "do not edit",
            "will be overwritten",
        )
    )


def _extract_spdx_license(block: str) -> str | None:
    for line in block.splitlines():
        cleaned = line.strip()
        cleaned = cleaned.lstrip("/*").lstrip("*").strip()
        match = SPDX_LINE_RE.search(cleaned)
        if match:
            return match.group(1)
    return None


def _looks_like_foreign_license(block_lower: str) -> bool:
    return any(
        phrase in block_lower
        for phrase in (
            "mit license",
            "apache license",
            "mozilla public license",
            "bsd license",
            "isc license",
            "lgpl",
            "lesser general public license",
        )
    )


def _scan_preamble_comment_blocks(lines: list[str]) -> list[tuple[int, int, str]]:
    i = 0
    while i < len(lines) and lines[i].strip() == "":
        i += 1

    blocks: list[tuple[int, int, str]] = []
    while i < len(lines):
        line = lines[i].lstrip()
        if line == "":
            i += 1
            continue

        if line.startswith("//"):
            start = i
            while i < len(lines) and lines[i].lstrip().startswith("//"):
                i += 1
            end = i - 1
            blocks.append((start, end, "\n".join(lines[start : end + 1])))
            continue

        if line.startswith("/*"):
            start = i
            end = i
            while end < len(lines):
                if "*/" in lines[end]:
                    break
                end += 1
            if end >= len(lines):
                break
            blocks.append((start, end, "\n".join(lines[start : end + 1])))
            i = end + 1
            continue

        break

    return blocks


def _strip_leading_blank_lines(lines: list[str]) -> list[str]:
    i = 0
    while i < len(lines) and lines[i].strip() == "":
        i += 1
    return lines[i:]


def _build_header_lines(file_path: str) -> list[str] | None:
    if file_path.endswith((".ts", ".tsx")):
        return TS_HEADER_LINES
    if file_path.endswith(".go"):
        return GO_HEADER_LINES
    return None


def _remove_existing_license_header_or_skip(
    file_path: str,
    lines: list[str],
    *,
    summary: Summary,
) -> tuple[list[str], bool] | None:
    preamble_blocks = _scan_preamble_comment_blocks(lines)
    if not preamble_blocks:
        return lines, False

    for start, end, block in preamble_blocks:
        spdx_license = _extract_spdx_license(block)
        if spdx_license is None:
            continue
        if spdx_license != LICENSE:
            summary.skipped_foreign_license += 1
            summary.foreign_license_files.append((file_path, spdx_license))
            return None
        remaining = lines[:start] + lines[end + 1 :]
        return remaining, True

    for _, _, block in preamble_blocks:
        if _looks_like_foreign_license(block.lower()):
            summary.skipped_foreign_license += 1
            summary.foreign_license_files.append((file_path, "unknown (non-SPDX license text)"))
            return None

    return lines, False


def process_file(file_path: str, *, dry_run: bool, only_missing: bool, summary: Summary) -> None:
    header_lines = _build_header_lines(file_path)
    if header_lines is None:
        return

    try:
        lines, newline, ends_with_newline = _read_text_file(file_path)
    except Exception as e:
        summary.errors += 1
        print(f"ERROR: {file_path}: {e}")
        return

    if _is_generated(file_path, lines):
        summary.skipped_generated += 1
        summary.generated_files.append(file_path)
        return

    removed = _remove_existing_license_header_or_skip(file_path, lines, summary=summary)
    if removed is None:
        return
    remaining, removed_ours = removed

    if only_missing:
        for _, _, block in _scan_preamble_comment_blocks(lines):
            if _extract_spdx_license(block) is not None:
                summary.unchanged += 1
                return

    remaining = _strip_leading_blank_lines(remaining)
    new_lines = [*header_lines, "", *remaining]

    if new_lines == lines:
        summary.unchanged += 1
        return

    if removed_ours:
        summary.updated += 1
    else:
        summary.added += 1
    summary.changed += 1

    if dry_run:
        return

    try:
        _write_text_file(file_path, new_lines, newline, ends_with_newline)
    except Exception as e:
        summary.errors += 1
        print(f"ERROR: failed to write {file_path}: {e}")


def main() -> None:
    parser = argparse.ArgumentParser(description="Add/update license headers in Go and TS/TSX files.")
    parser.add_argument(
        "paths",
        nargs="*",
        help="Files or directories to process (defaults to current directory).",
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Report changes but do not write files.",
    )
    parser.add_argument(
        "--check",
        action="store_true",
        help="Exit non-zero if changes are needed (implies --dry-run).",
    )
    parser.add_argument(
        "--only-missing",
        action="store_true",
        help="Only add headers when no SPDX header is present.",
    )
    args = parser.parse_args()

    dry_run = args.dry_run or args.check
    only_missing = args.only_missing
    roots = args.paths or ["."]

    summary = Summary()

    def iter_targets(paths: list[str]):
        for p in paths:
            path = Path(p)
            if path.is_file():
                if _build_header_lines(str(path)) is not None:
                    yield str(path)
                continue
            if not path.is_dir():
                continue
            for root, dirs, files in os.walk(path):
                dirs[:] = [d for d in dirs if d not in EXCLUDED_DIRS]
                for name in files:
                    candidate = str(Path(root) / name)
                    if _build_header_lines(candidate) is not None:
                        yield candidate

    for target in iter_targets(roots):
        process_file(target, dry_run=dry_run, only_missing=only_missing, summary=summary)

    print("\n=== Summary ===")
    print(f"Changed: {summary.changed} (added: {summary.added}, updated: {summary.updated})")
    print(f"Unchanged: {summary.unchanged}")
    print(f"Skipped generated: {summary.skipped_generated}")
    print(f"Skipped other license: {summary.skipped_foreign_license}")
    print(f"Errors: {summary.errors}")

    if summary.foreign_license_files:
        print("\nFiles skipped due to other license headers:")
        for file_path, spdx in sorted(summary.foreign_license_files):
            print(f"- {file_path} ({spdx})")

    if summary.generated_files:
        print("\nGenerated files skipped:")
        for file_path in sorted(summary.generated_files):
            print(f"- {file_path}")

    if args.check and summary.changed > 0:
        raise SystemExit(1)


if __name__ == "__main__":
    main()