#!/usr/bin/env python
# Copyright 2026 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Unified runner for check/fix scripts.

Usage:
    python utils/checkers.py copies,modular_conversion,doc_toc
    python utils/checkers.py copies,modular_conversion,doc_toc --fix
    python utils/checkers.py copies,doc_toc --keep-going
    python utils/checkers.py all
    python utils/checkers.py all --fix
"""

import argparse
import hashlib
import itertools
import os
import shutil
import subprocess
import sys
import threading
from collections import deque
from pathlib import Path


UTILS_DIR = Path(__file__).parent
REPO_ROOT = UTILS_DIR.parent

# Each checker maps to (label, script_path, extra_check_args, extra_fix_args).
# When fix_args is None, the checker has no fix mode.
# Custom checkers use None instead of the tuple.
CHECKERS = {
    "copies": ("Copied code consistency", "check_copies.py", [], ["--fix_and_overwrite"]),
    "modular_conversion": ("Modular file conversions", "check_modular_conversion.py", [], ["--fix_and_overwrite"]),
    "doc_toc": ("Documentation table of contents", "check_doc_toc.py", [], ["--fix_and_overwrite"]),
    "docstrings": ("Docstring formatting", "check_docstrings.py", [], ["--fix_and_overwrite"]),
    "dummies": ("Dummy objects", "check_dummies.py", [], ["--fix_and_overwrite"]),
    "pipeline_typing": ("Pipeline type hints", "check_pipeline_typing.py", [], ["--fix_and_overwrite"]),
    "doctest_list": ("Doctest list", "check_doctest_list.py", [], ["--fix_and_overwrite"]),
    "repo": ("Repository structure", "check_repo.py", [], None),
    "inits": ("Init files", "check_inits.py", [], None),
    "config_docstrings": ("Config docstrings", "check_config_docstrings.py", [], None),
    "config_attributes": ("Config attributes", "check_config_attributes.py", [], None),
    "init_isort": ("Import ordering", "custom_init_isort.py", ["--check_only"], []),
    "auto_mappings": ("Auto mappings", "sort_auto_mappings.py", ["--check_only"], []),
    "update_metadata": ("Model metadata", "update_metadata.py", ["--check-only"], []),
    "add_dates": ("Model dates", "add_dates.py", ["--check-only"], []),
    "types": (
        "Type annotations",
        "check_types.py",
        [
            "src/transformers/_typing.py",
            "src/transformers/utils",
            "src/transformers/generation",
            "src/transformers/quantizers",
        ],
        None,
    ),
    "modeling_structure": ("Modeling file structure", "check_modeling_structure.py", [], None),
    "deps_table": ("Dependency versions table", None, None, None),
    "imports": ("Public imports", None, None, None),
    "ruff_check": ("Ruff linting", None, None, None),
    "ruff_format": ("Ruff formatting", None, None, None),
}


def _file_md5(path):
    return hashlib.md5(path.read_bytes()).hexdigest()


# ANSI helpers
ORANGE = "\033[38;5;214m"
GREEN = "\033[32m"
RED = "\033[31m"
RESET = "\033[0m"
SPINNER_CHARS = "⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏"


class SlidingWindow:
    """Displays a spinning title + sliding window of the last N output lines in a TTY."""

    def __init__(self, label, max_lines=10):
        self.label = label
        self.max_lines = max_lines
        self.lines = deque(maxlen=max_lines)
        self.displayed = 0  # number of output lines currently on screen
        self.term_width = shutil.get_terminal_size().columns
        self._spinner = itertools.cycle(SPINNER_CHARS)
        self._stop = threading.Event()
        self._lock = threading.Lock()
        # Print initial title line (will be overwritten by spinner)
        print(f"{ORANGE}{next(self._spinner)} {label}{RESET}")
        self._title_on_screen = True
        self._thread = threading.Thread(target=self._spin, daemon=True)
        self._thread.start()

    def _spin(self):
        while not self._stop.is_set():
            self._stop.wait(0.08)
            if self._stop.is_set():
                break
            with self._lock:
                self._redraw()

    def _redraw(self):
        """Clear output lines + title, redraw everything."""
        # Move up over output lines + title line
        for _ in range(self.displayed + (1 if self._title_on_screen else 0)):
            sys.stdout.write("\033[A\033[2K")
        self.displayed = 0
        # Redraw title with next spinner frame
        print(f"{ORANGE}{next(self._spinner)} {self.label}{RESET}")
        self._title_on_screen = True
        # Redraw output lines
        for line in self.lines:
            print(line)
        self.displayed = len(self.lines)
        sys.stdout.flush()

    def add_line(self, line):
        with self._lock:
            self.lines.append(line.rstrip()[: self.term_width])
            self._redraw()

    def finish(self, success):
        """Stop spinner and print final status title."""
        self._stop.set()
        self._thread.join()
        with self._lock:
            # Clear output lines + title
            for _ in range(self.displayed + (1 if self._title_on_screen else 0)):
                sys.stdout.write("\033[A\033[2K")
            self._title_on_screen = False
            self.displayed = 0
            # Print final title with status
            if success:
                print(f"{GREEN}✓ {self.label}{RESET}")
            else:
                print(f"{RED}✗ {self.label}{RESET}")
            # Reprint output lines
            for line in self.lines:
                print(line)
            sys.stdout.flush()


def _run_cmd(cmd, line_callback=None):
    """Run a command, capturing output. Returns (returncode, output)."""
    if line_callback is None:
        result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        return result.returncode, result.stdout.decode("utf-8", errors="replace")

    env = os.environ.copy()
    env["PYTHONUNBUFFERED"] = "1"
    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=env)
    output_lines = []
    for raw_line in proc.stdout:
        line = raw_line.decode("utf-8", errors="replace")
        output_lines.append(line)
        line_callback(line)
    proc.wait()
    return proc.returncode, "".join(output_lines)


def run_deps_table_checker(fix=False, line_callback=None):
    """Check or fix the dependency versions table."""
    deps_table = REPO_ROOT / "src" / "transformers" / "dependency_versions_table.py"
    setup_py = REPO_ROOT / "setup.py"
    cmd = [sys.executable, str(setup_py), "deps_table_update"]

    if fix:
        return _run_cmd(cmd, line_callback=line_callback)

    before = _file_md5(deps_table)
    rc, output = _run_cmd(cmd, line_callback=line_callback)
    if rc != 0:
        return rc, output
    after = _file_md5(deps_table)
    if before != after:
        msg = (
            "Error: the version dependency table is outdated.\n"
            "Please run 'make fix-repo' and commit the changes. This requires Python 3.10.\n"
        )
        return 1, output + msg
    return 0, output


def run_imports_checker(fix=False, line_callback=None):
    """Check that all public imports work."""
    rc, output = _run_cmd([sys.executable, "-c", "from transformers import *"], line_callback=line_callback)
    if rc != 0:
        return rc, output + "Import failed, this means you introduced unprotected imports!\n"
    return 0, output


RUFF_TARGETS = ["examples", "tests", "src", "utils", "scripts", "benchmark", "benchmark_v2", "setup.py", "conftest.py"]


def run_ruff_check(fix=False, line_callback=None):
    """Run ruff linting."""
    cmd = ["ruff", "check", *RUFF_TARGETS]
    if fix:
        cmd += ["--fix", "--exclude", ""]
    return _run_cmd(cmd, line_callback=line_callback)


def run_ruff_format(fix=False, line_callback=None):
    """Run ruff formatting."""
    cmd = ["ruff", "format", *RUFF_TARGETS]
    if not fix:
        cmd += ["--check"]
    else:
        cmd += ["--exclude", ""]
    return _run_cmd(cmd, line_callback=line_callback)


CUSTOM_RUNNERS = {
    "deps_table": run_deps_table_checker,
    "imports": run_imports_checker,
    "ruff_check": run_ruff_check,
    "ruff_format": run_ruff_format,
}


def get_checker_command(name, fix=False):
    """Return a shell-friendly command string for a checker."""
    if name == "deps_table":
        return "python setup.py deps_table_update"
    if name == "imports":
        return 'python -c "from transformers import *"'
    if name == "ruff_check":
        cmd = ["ruff", "check", *RUFF_TARGETS]
        if fix:
            cmd += ["--fix", "--exclude", ""]
        return " ".join(cmd)
    if name == "ruff_format":
        cmd = ["ruff", "format", *RUFF_TARGETS]
        if not fix:
            cmd += ["--check"]
        else:
            cmd += ["--exclude", ""]
        return " ".join(cmd)

    _, script, check_args, fix_args = CHECKERS[name]
    if fix and fix_args is None:
        return None
    args = fix_args if fix else check_args
    return " ".join(["python", f"utils/{script}"] + args)


def run_checker(name, fix=False, line_callback=None):
    if name in CUSTOM_RUNNERS:
        return CUSTOM_RUNNERS[name](fix=fix, line_callback=line_callback)

    _, script, check_args, fix_args = CHECKERS[name]
    script_path = UTILS_DIR / script

    if fix and fix_args is None:
        return 0, "skipped (no fix mode)"

    cmd = [sys.executable, str(script_path)]
    cmd += fix_args if fix else check_args

    return _run_cmd(cmd, line_callback=line_callback)


def main():
    parser = argparse.ArgumentParser(description="Run check/fix scripts.")
    parser.add_argument(
        "checkers",
        nargs="+",
        help='Comma-separated checker names, or "all". Use --list to see available checkers.',
    )
    parser.add_argument("--fix", action="store_true", help="Run in fix mode instead of check mode.")
    parser.add_argument(
        "--keep-going", action="store_true", help="Run all checkers even if some fail (report failures at the end)."
    )
    parser.add_argument("--list", action="store_true", help="List available checkers and exit.")

    args = parser.parse_args()

    if args.list:
        for name, entry in sorted(CHECKERS.items()):
            label, script, _, fix_args = entry
            fixable = "fixable" if fix_args is not None else "check-only"
            script_display = script or "custom"
            print(f"  {name:25s} {label:35s} ({script_display}, {fixable})")
        return

    # Join all positional args (shell line continuations may split them) and parse checker names
    raw = " ".join(args.checkers)
    if raw.strip() == "all":
        names = list(CHECKERS.keys())
    else:
        names = [n.strip() for n in raw.split(",") if n.strip()]

    unknown = [n for n in names if n not in CHECKERS]
    if unknown:
        print(f"Unknown checkers: {', '.join(unknown)}")
        print(f"Available: {', '.join(sorted(CHECKERS.keys()))}")
        sys.exit(1)

    is_ci = os.environ.get("GITHUB_ACTIONS") == "true" or os.environ.get("CIRCLECI") == "true"
    is_tty = sys.stdout.isatty() and not is_ci

    failures = []
    for name in names:
        label = CHECKERS[name][0]
        cmd_str = get_checker_command(name, fix=args.fix)

        if is_tty:
            window = SlidingWindow(label, max_lines=10)
            if cmd_str:
                window.add_line(f"$ {cmd_str}")
            rc, output = run_checker(name, fix=args.fix, line_callback=window.add_line)
            window.finish(success=(rc == 0))
            print()
            if rc != 0:
                failures.append(name)
                if not args.keep_going:
                    sys.exit(1)
        else:
            print(f"{label}")
            if cmd_str:
                print(f"$ {cmd_str}")
            rc, output = run_checker(name, fix=args.fix)
            tail = output.splitlines()[-10:]
            if tail:
                print("\n".join(tail))
            status = "OK" if rc == 0 else "FAILED"
            print(status)
            print()
            if rc != 0:
                failures.append(name)
                if not args.keep_going:
                    sys.exit(1)

    if failures:
        print(f"\n{len(failures)} failed: {', '.join(failures)}")
        sys.exit(1)

    print(f"\nAll {len(names)} checks passed.")


if __name__ == "__main__":
    main()