on: workflow_call: inputs: push_coverage: type: boolean description: "If true, the coverage results are pushed to codecov.io." default: true run_lint: type: boolean description: "If true, run the full lint job in this workflow." default: true force_all_checks: type: boolean description: "If true, run all test/example/package lanes regardless of changed-path filters." default: false run_package_compat: type: boolean description: "If true, run the cross-version package compatibility lanes." default: false run_windows: type: boolean description: "If true, run the Windows smoke test lane on GitHub-hosted runners." default: false run_macos: type: boolean description: "If true, run the macOS smoke test lane on GitHub-hosted runners." default: false use_tach: type: boolean description: "If true, enable Tach-aware test skipping in the cheap CI lanes." default: false tach_base_ref: type: string description: "Base git ref or commit used for Tach impact analysis." default: "" python_versions: type: string description: 'JSON array of Python versions to use for multi-version jobs, e.g. ["3.10", "3.12"].' default: '["3.10", "3.11", "3.12", "3.13", "3.14"]' secrets: CODECOV_TOKEN: required: false env: FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true" HF_HUB_DOWNLOAD_TIMEOUT: "90" HF_HUB_ETAG_TIMEOUT: "90" UV_FROZEN: "1" EXAMPLES_HEAVY: '^(batch_convert|chart_extraction|granite_vision_table_structure|minimal_asr_pipeline|minimal_vlm_pipeline)\.py$' EXAMPLES_UNSUPPORTED_IN_CI: '^(compare_vlm_models|custom_convert|demo_layout_vlm|develop_picture_enrichment|export_multimodal|gpu_standard_pipeline|gpu_vlm_pipeline|granitedocling_repetition_stopping|minimal|mlx_whisper_example|offline_convert|pictures_description|pictures_description_api|post_process_ocr_with_vlm|rapidocr_with_custom_models|run_with_formats_html_rendered|run_with_formats_html_rendered_mp|suryaocr_with_custom_models|vlm_pipeline_api_model)\.py$|xbrl_conversion\.ipynb$' jobs: changes: runs-on: ubuntu-latest timeout-minutes: 5 permissions: contents: read pull-requests: read outputs: run_tests: ${{ steps.force.outputs.run_tests || steps.filter.outputs.run_tests }} ml_suites: ${{ steps.force.outputs.ml_suites || steps.ml-matrix.outputs.ml_suites || '[]' }} run_examples_all: ${{ steps.force.outputs.run_examples_all || steps.filter.outputs.run_examples_all }} changed_example_scripts: ${{ steps.force.outputs.changed_example_scripts || steps.filter.outputs.changed_example_scripts }} changed_example_scripts_files: ${{ steps.force.outputs.changed_example_scripts_files || steps.filter.outputs.changed_example_scripts_files || '[]' }} run_package: ${{ steps.force.outputs.run_package || steps.filter.outputs.run_package }} run_tach: ${{ steps.force.outputs.run_tach || steps.filter.outputs.run_tach }} steps: - name: Force all lanes if: ${{ inputs.force_all_checks }} id: force run: | echo "run_tests=true" >> "$GITHUB_OUTPUT" echo 'ml_suites=["ocr","pdf-model","vlm","asr"]' >> "$GITHUB_OUTPUT" echo "run_examples_all=true" >> "$GITHUB_OUTPUT" echo "changed_example_scripts=true" >> "$GITHUB_OUTPUT" echo "changed_example_scripts_files=[]" >> "$GITHUB_OUTPUT" echo "run_package=true" >> "$GITHUB_OUTPUT" echo "run_tach=true" >> "$GITHUB_OUTPUT" - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 if: ${{ !inputs.force_all_checks }} with: fetch-depth: 0 - name: Detect changed paths if: ${{ !inputs.force_all_checks }} id: filter uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1 with: list-files: json filters: | # Default catch-all for regular tests. The core pytest command # ignores modules marked with ML pytest markers; newly added tests # still run here unless intentionally marked for an ML lane. run_tests: - ".github/workflows/**" - ".github/scripts/**" - ".github/actions/**" - "pyproject.toml" - "uv.lock" - "Dockerfile" - "docling/**" - "tests/**" run_examples_all: - ".github/workflows/**" - ".github/scripts/**" - ".github/actions/**" - "pyproject.toml" - "uv.lock" - "Dockerfile" - "docling/**" changed_example_scripts: - added|modified: "docs/examples/*.py" run_package: - ".github/workflows/**" - ".github/scripts/**" - ".github/actions/**" - "pyproject.toml" - "uv.lock" - "README.md" - "LICENSE" - "docling/**" run_tach: - ".github/workflows/**" - ".github/scripts/**" - ".github/actions/**" - ".pre-commit-config.yaml" - "pyproject.toml" - "scripts/check_tach_module_coverage.py" - "tach.toml" - "uv.lock" - "docling/**" - name: Build ML suite matrix if: ${{ !inputs.force_all_checks }} id: ml-matrix env: # ML lanes share the regular test trigger; run_tach adds Tach and # pre-commit config changes that can affect test selection. RUN_ML: ${{ steps.filter.outputs.run_tests == 'true' || steps.filter.outputs.run_tach == 'true' }} run: | python3 .github/scripts/pytest_marker_selection.py matrix \ --run-all-ml "$RUN_ML" lint: if: ${{ inputs.run_lint }} runs-on: ubuntu-latest timeout-minutes: 20 strategy: fail-fast: false matrix: python-version: ["3.10"] steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: ./.github/actions/setup-ubuntu-ci with: python_version: ${{ matrix.python-version }} uv_sync_args: --frozen --group dev --all-extras --all-packages --no-group docs --no-group examples - name: Set prek cache key run: echo "PY=$(python -VV | sha256sum | cut -d' ' -f1)" >> "$GITHUB_ENV" - name: Cache prek environments uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 with: path: ~/.cache/prek key: prek|${{ env.PY }}|${{ hashFiles('.pre-commit-config.yaml', '.github/dprint.json') }} restore-keys: | prek|${{ env.PY }}| - name: Check style run: | echo "--- Running prek style checks ---" uv run prek run --all-files tach: needs: changes if: ${{ needs.changes.outputs.run_tach == 'true' }} runs-on: ubuntu-latest timeout-minutes: 10 steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: ./.github/actions/setup-ubuntu-ci with: python_version: "3.10" uv_sync_args: --frozen --only-group dev --no-install-project - name: Check module coverage run: python3 scripts/check_tach_module_coverage.py - name: Check module boundaries run: uv run --no-sync tach check run-tests-core: needs: changes if: ${{ needs.changes.outputs.run_tests == 'true' }} runs-on: ubuntu-latest timeout-minutes: 45 strategy: fail-fast: false matrix: python-version: ${{ fromJSON(inputs.python_versions) }} steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 - uses: ./.github/actions/setup-ubuntu-ci with: python_version: ${{ matrix.python-version }} uv_sync_args: --frozen --group dev --all-extras --all-packages --no-group docs --no-group examples install_system_deps: "true" cache_models: "true" - name: Run core test suite env: TACH_BASE_REF: ${{ inputs.tach_base_ref }} run: | echo "--- Running core tests ---" mapfile -t ML_IGNORE_ARGS < <(uv run --no-sync python .github/scripts/pytest_marker_selection.py core-ignore-args) TACH_ARGS="" if [ "${{ inputs.use_tach }}" = "true" ] && [ -n "$TACH_BASE_REF" ] && [ "$TACH_BASE_REF" != "0000000000000000000000000000000000000000" ]; then TACH_ARGS="--tach-base $TACH_BASE_REF" fi echo "Running core test suite" uv run pytest -v --durations=0 --cov=docling --cov-report=xml --cov-context=test $TACH_ARGS "${ML_IGNORE_ARGS[@]}" - name: Upload coverage to Codecov if: inputs.push_coverage uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6.0.0 with: token: ${{ secrets.CODECOV_TOKEN }} files: ./coverage.xml flags: run-tests-core run-tests-ml: needs: changes if: ${{ needs.changes.outputs.ml_suites != '[]' }} runs-on: ubuntu-latest timeout-minutes: 60 strategy: fail-fast: false matrix: python-version: ${{ fromJSON(inputs.python_versions) }} suite: ${{ fromJSON(needs.changes.outputs.ml_suites) }} steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 - uses: ./.github/actions/setup-ubuntu-ci with: python_version: ${{ matrix.python-version }} uv_sync_args: --frozen --group dev --all-extras --all-packages --no-group docs --no-group examples install_system_deps: "true" cache_models: "true" - name: Pre-download OCR models if: ${{ matrix.suite == 'ocr' }} run: uv run python -c "import easyocr; reader = easyocr.Reader(['en', 'fr', 'de', 'es'])" - name: Run ML test suite env: TACH_BASE_REF: ${{ inputs.tach_base_ref }} run: | echo "--- Running ML suite: ${{ matrix.suite }} ---" SUITE_MARKER=$(uv run --no-sync python .github/scripts/pytest_marker_selection.py suite-marker "${{ matrix.suite }}") mapfile -t SUITE_ARGS < <(uv run --no-sync python .github/scripts/pytest_marker_selection.py suite-args "${{ matrix.suite }}") if [ ${#SUITE_ARGS[@]} -eq 0 ]; then echo "No tests are marked for ${{ matrix.suite }}" >&2 exit 1 fi TACH_ARGS="" if [ "${{ inputs.use_tach }}" = "true" ] && [ -n "$TACH_BASE_REF" ] && [ "$TACH_BASE_REF" != "0000000000000000000000000000000000000000" ]; then TACH_ARGS="--tach-base $TACH_BASE_REF" fi uv run pytest -v --durations=0 --cov=docling --cov-report=xml --cov-context=test $TACH_ARGS -m "$SUITE_MARKER" "${SUITE_ARGS[@]}" - name: Upload coverage to Codecov if: inputs.push_coverage uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6.0.0 with: token: ${{ secrets.CODECOV_TOKEN }} files: ./coverage.xml flags: run-tests-ml-${{ matrix.suite }} run-examples-light: needs: changes if: >- ${{ needs.changes.outputs.run_examples_all == 'true' || needs.changes.outputs.changed_example_scripts == 'true' }} runs-on: ubuntu-latest timeout-minutes: 60 strategy: fail-fast: false matrix: python-version: ["3.10"] steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: ./.github/actions/setup-ubuntu-ci with: python_version: ${{ matrix.python-version }} uv_sync_args: --frozen --group examples --all-extras --all-packages --no-group docs --no-group dev install_system_deps: "true" cache_models: "true" - name: Free up disk space run: | df -h sudo rm -rf /usr/share/dotnet sudo rm -rf /usr/local/lib/android sudo rm -rf /opt/ghc sudo apt-get clean df -h - name: Run selected light examples env: RUN_ALL_EXAMPLES: ${{ needs.changes.outputs.run_examples_all }} CHANGED_EXAMPLE_FILES: ${{ needs.changes.outputs.changed_example_scripts_files }} run: | args=( --examples-dir docs/examples --changed-files-json "$CHANGED_EXAMPLE_FILES" --exclude-pattern "${EXAMPLES_HEAVY}|${EXAMPLES_UNSUPPORTED_IN_CI}" ) if [ "$RUN_ALL_EXAMPLES" = "true" ]; then args+=(--run-all) fi uv run --no-sync python .github/scripts/run_selected_examples.py "${args[@]}" run-tests-windows: needs: changes if: ${{ inputs.run_windows }} runs-on: windows-latest timeout-minutes: 30 env: PYTHONUTF8: "1" UV_PYTHON_PREFERENCE: only-system strategy: fail-fast: false matrix: python-version: ["3.10"] steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: ${{ matrix.python-version }} - name: Install uv uses: astral-sh/setup-uv@v7 with: enable-cache: true - name: Install dependencies shell: bash run: | uv python pin "${{ matrix.python-version }}" uv sync --frozen --group dev --all-packages --no-group docs --no-group examples - name: Check import and CLI wiring shell: bash run: | uv run python -c "import docling; from docling.document_converter import DocumentConverter; DocumentConverter(); print('Docling import smoke passed')" uv run docling --help - name: Run cross-platform smoke tests shell: bash run: | mapfile -t cross_platform_args < <(uv run --no-sync python .github/scripts/pytest_marker_selection.py marker-args cross_platform) if [ "${#cross_platform_args[@]}" -eq 0 ]; then echo "No tests are marked with pytest.mark.cross_platform" >&2 exit 1 fi uv run pytest -p no:tach -m cross_platform "${cross_platform_args[@]}" --durations=10 run-tests-macos: needs: changes if: ${{ inputs.run_macos }} runs-on: macos-latest timeout-minutes: 30 env: UV_PYTHON_PREFERENCE: only-system strategy: fail-fast: false matrix: python-version: ["3.10"] steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: ${{ matrix.python-version }} - name: Install uv uses: astral-sh/setup-uv@v7 with: enable-cache: true - name: Install dependencies shell: bash run: | uv python pin "${{ matrix.python-version }}" uv sync --frozen --group dev --all-packages --no-group docs --no-group examples - name: Check import and CLI wiring shell: bash run: | uv run python -c "import docling; from docling.document_converter import DocumentConverter; DocumentConverter(); print('Docling import smoke passed')" uv run docling --help - name: Run cross-platform smoke tests shell: bash run: | mapfile -t cross_platform_args < <(uv run --no-sync python .github/scripts/pytest_marker_selection.py marker-args cross_platform) if [ "${#cross_platform_args[@]}" -eq 0 ]; then echo "No tests are marked with pytest.mark.cross_platform" >&2 exit 1 fi uv run pytest -p no:tach -m cross_platform "${cross_platform_args[@]}" --durations=10 test-pip-install-no-lock: needs: changes if: ${{ inputs.run_package_compat && needs.changes.outputs.run_package == 'true' }} runs-on: ubuntu-latest timeout-minutes: 30 env: SELECTED_PYTHON_VERSIONS: ${{ join(fromJSON(inputs.python_versions), ' ') }} steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Install uv uses: astral-sh/setup-uv@v7 - name: Test pip install across selected Python versions run: | for py_version in $SELECTED_PYTHON_VERSIONS; do echo "==========================================" echo "Testing Python $py_version" echo "==========================================" # Create virtual environment with uv uv venv /tmp/venv-${py_version} --python=${py_version} source /tmp/venv-${py_version}/bin/activate # Install docling-slim with pip (no lock file) uv pip install --torch-backend=cpu -e .[all] # Run basic import test python -c "import docling; print('Import successful for Python ${py_version}')" # Cleanup deactivate rm -rf /tmp/venv-${py_version} echo "Python $py_version: PASSED" echo "" done test-pip-install-no-dev-headers: needs: changes if: ${{ inputs.run_package_compat && needs.changes.outputs.run_package == 'true' }} runs-on: ubuntu-latest timeout-minutes: 30 env: SELECTED_PYTHON_VERSIONS: ${{ join(fromJSON(inputs.python_versions), ' ') }} steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Install uv uses: astral-sh/setup-uv@v7 - name: Test pip install without dev headers across selected Python versions run: | for py_version in $SELECTED_PYTHON_VERSIONS; do echo "==========================================" echo "Testing Python $py_version (no dev headers)" echo "==========================================" # Create virtual environment with uv uv venv /tmp/venv-nodev-${py_version} --python=${py_version} source /tmp/venv-nodev-${py_version}/bin/activate # Find and remove Python.h from the Python installation echo "Removing Python development headers from Python installation..." python_include_dir=$(python -c "import sysconfig; print(sysconfig.get_path('include'))") echo "Python include directory: $python_include_dir" if [ -f "$python_include_dir/Python.h" ]; then echo "Found Python.h, removing it and other headers..." # Use sudo if the directory is system-owned if [ -w "$python_include_dir" ]; then rm -rf "$python_include_dir"/* else sudo rm -rf "$python_include_dir"/* fi echo "✓ Headers removed" else echo "Warning: Python.h not found at expected location" fi # Verify that compilation fails without dev headers # Try to install numpy from source (sdist) - this should fail echo "Verifying compilation fails without dev headers..." set +e # Temporarily allow command to fail uv pip install --no-binary=:all: numpy==1.26.4 > /tmp/numpy-install-${py_version}.log 2>&1 numpy_exit_code=$? set -e # Re-enable exit on error if [ $numpy_exit_code -eq 0 ]; then echo "ERROR: numpy installation from source succeeded, but it should have failed without dev headers!" cat /tmp/numpy-install-${py_version}.log exit 1 else echo "✓ Compilation correctly failed without dev headers (expected behavior)" # Check that the error mentions missing Python.h or similar if grep -qi "Python.h\|fatal error.*\.h" /tmp/numpy-install-${py_version}.log; then echo "✓ Error message confirms missing development headers" else echo "Warning: Error message doesn't explicitly mention missing headers, but compilation failed as expected" fi fi # Install docling-slim with pip (no lock file, no compilation) echo "grpcio>=1.71.0" > override-grpcio.txt uv pip install --torch-backend=cpu -e ".[all]" --overrides override-grpcio.txt # Run basic import test python -c "import docling; print('Import successful for Python ${py_version} without dev headers')" # Cleanup deactivate rm -rf /tmp/venv-nodev-${py_version} echo "Python $py_version (no dev headers): PASSED" echo "" done build-package: needs: changes if: ${{ needs.changes.outputs.run_package == 'true' }} runs-on: ubuntu-latest timeout-minutes: 15 strategy: matrix: python-version: ["3.10"] steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Install uv and set the python version uses: astral-sh/setup-uv@v7 with: python-version: ${{ matrix.python-version }} enable-cache: true - name: Install dependencies run: uv sync --all-extras - name: Build packages run: bash .github/scripts/build-packages.sh - name: Check content of wheels run: | for whl in dist/*/*.whl; do echo "=== $whl ===" unzip -l "$whl" done - name: Store the distribution packages uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: python-package-distributions path: dist/ test-package: needs: - changes - build-package if: ${{ needs.changes.outputs.run_package == 'true' }} runs-on: ubuntu-latest timeout-minutes: 15 strategy: matrix: python-version: ["3.10"] steps: - name: Download all the dists uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: name: python-package-distributions path: dist/ - name: Install uv and set the python version uses: astral-sh/setup-uv@v7 with: python-version: ${{ matrix.python-version }} activate-environment: true enable-cache: false - name: Install package run: | uv pip install --find-links dist/docling-slim/ dist/docling/docling-*.whl - name: Run docling run: uv run docling --help check: if: ${{ always() }} needs: - changes - lint - tach - run-tests-core - run-tests-ml - run-examples-light - run-tests-windows - run-tests-macos - test-pip-install-no-lock - test-pip-install-no-dev-headers - build-package - test-package runs-on: ubuntu-latest timeout-minutes: 5 permissions: contents: read steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: All Green env: NEEDS_JSON: ${{ toJSON(needs) }} ALLOWED_SKIPS: >- lint tach run-tests-core run-tests-ml run-examples-light run-tests-windows run-tests-macos test-pip-install-no-lock test-pip-install-no-dev-headers build-package test-package run: >- python3 .github/scripts/check_needs_results.py --needs-json "$NEEDS_JSON" --allowed-skips "$ALLOWED_SKIPS"