#!/usr/bin/env python
"""
Run all tests for the PHENIX AI Agent.

Tests use cctbx-style fail-fast behavior: the first assertion failure
raises an uncaught exception with full traceback, rather than collecting
all failures. This makes it easy to identify and debug issues.

Test Suites (standalone - no PHENIX required):
  1. API Schema - Request/response validation
  2. Best Files Tracker - File tracking, scoring, and model stage detection
  3. Transport - Encoding/decoding round-trips
  4. State Serialization - State packaging/unpackaging
  5. Command Builder - Unified command generation
  6. File Categorization - File classification logic
  7. Session Summary - Agent session summary generation (cycle counting)
  8. Advice Preprocessing - README discovery, advice processing, change detection
  9. Directive Extractor - LLM-based directive extraction from user advice
  10. Directive Validator - Pre-validation of user requests against capabilities
  11. Session Directives - Session directive storage and retrieval
  12. YAML Tools - YAML configuration validation and inspection
  13. Session Tools - Session management utilities
  14. Docs Tools - Documentation generation
  15. Error Analyzer - Error detection and recovery strategies
  16. Decision Flow - Decision flow logic testing
  17. Phaser Multimodel - Phaser multi-model handling
  18. Event System - Event logging and tracking
  19. Metric Patterns - Metric extraction patterns
  20. Pattern Manager - Pattern management
  21. Program Registration - Program registry tests
  22. Summary Display - Summary formatting
  23. New Programs - YAML config for new programs (polder, map_sharpening, autobuild_denmod)
  24. History Analysis - History analysis, anomalous workflow support (v110)
  25. File Utils - Shared file classification utilities (v110)

Test Suites (require PHENIX environment):
  26. Workflow State - State detection, transitions, done flags, stepwise mode
  27. YAML Config - YAML configuration validation
  28. Sanity Checker - Sanity check logic
  29. Metrics Analyzer - Metric extraction and trends
  30. Dry Run - Dry run manager functionality
  31. Integration - End-to-end workflow tests
  32. Directives Integration - End-to-end directive system tests

  33. Langchain Tools - Legacy module tests (core, analysis, RAG, validation, prompts, memory)
  34. Hardcoded Cleanup - Conformance guards for YAML-driven architecture (v112.10)
  35. v112.13 Fixes - Companion files, intermediate filtering, file categorisation
  36. Audit Fix Regressions - Categories I/J/E/G/H: max_refine_cycles landing, zombie
      state detection, _is_failed_result false-positives, xtriage resolution regex,
      real_space_refine map_cc extract strategy (v112 systematic audit)

Key Tests for Recent Fixes (v110):
  - tst_best_files_tracker: Model scoring, autobuild_output same score as refined
  - tst_workflow_state: Stepwise mode (automation_path), predict_and_build blocking
  - tst_file_categorization: predict_and_build output file categorization
  - tst_session_summary: STOP cycle exclusion from counts
  - tst_history_analysis: Anomalous workflow, analysis/metrics key handling
  - tst_file_utils: Shared MTZ classification (consolidation fix)
  - tst_audit_fixes: Systematic audit regressions (v112 Categories I/J/E/G/H)

Usage:
    python tests/run_all_tests.py
    python tests/run_all_tests.py --verbose
    python tests/run_all_tests.py --quick  # Skip slow integration tests

Note: Tests requiring PHENIX will be skipped with a warning if libtbx is not available.
"""

from __future__ import absolute_import, division, print_function

import os
import sys
import time
import argparse
import traceback

# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))


def run_test_module(module_name, run_func, verbose=False):
    """
    Run a test module and return success status.

    Args:
        module_name: Name of the test module
        run_func: Function to call to run tests
        verbose: If True, show full output

    Returns:
        tuple: (success, elapsed_time)
    """
    print(f"\n{'='*60}")
    print(f"Running: {module_name}")
    print('='*60)

    start_time = time.time()

    try:
        run_func()
        elapsed = time.time() - start_time
        print(f"\n✅ {module_name} PASSED ({elapsed:.2f}s)")
        return True, elapsed
    except Exception as e:
        elapsed = time.time() - start_time
        print(f"\n❌ {module_name} FAILED ({elapsed:.2f}s)")
        if verbose:
            traceback.print_exc()
        else:
            print(f"   Error: {e}")
        return False, elapsed


def main():
    parser = argparse.ArgumentParser(description="Run all AI Agent tests")
    parser.add_argument("--verbose", "-v", action="store_true",
                        help="Show verbose output including tracebacks")
    parser.add_argument("--quick", "-q", action="store_true",
                        help="Skip slow integration tests")
    args = parser.parse_args()

    print("="*60)
    print("PHENIX AI AGENT - TEST SUITE")
    print("="*60)

    results = []
    total_start = time.time()

    # --- API Schema Tests ---
    try:
        from tests.tst_api_schema import run_all_tests as run_api_schema_tests
        success, elapsed = run_test_module(
            "tst_api_schema", run_api_schema_tests, args.verbose)
        results.append(("API Schema", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_api_schema: {e}")
        results.append(("API Schema", False, 0))

    # --- Best Files Tracker Tests ---
    try:
        from tests.tst_best_files_tracker import run_all_tests as run_best_files_tests
        success, elapsed = run_test_module(
            "tst_best_files_tracker", run_best_files_tests, args.verbose)
        results.append(("Best Files Tracker", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_best_files_tracker: {e}")
        results.append(("Best Files Tracker", False, 0))

    # --- Workflow State Tests ---
    try:
        from tests.tst_workflow_state import run_all_tests as run_workflow_tests
        success, elapsed = run_test_module(
            "tst_workflow_state", run_workflow_tests, args.verbose)
        results.append(("Workflow State", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_workflow_state: {e}")
        results.append(("Workflow State", False, 0))

    # --- YAML Config Tests ---
    try:
        from tests.tst_yaml_config import run_all_tests as run_yaml_tests
        success, elapsed = run_test_module(
            "tst_yaml_config", run_yaml_tests, args.verbose)
        results.append(("YAML Config", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_yaml_config: {e}")
        results.append(("YAML Config", False, 0))

    # --- Sanity Checker Tests ---
    try:
        from tests.tst_sanity_checker import run_all_tests as run_sanity_tests
        success, elapsed = run_test_module(
            "tst_sanity_checker", run_sanity_tests, args.verbose)
        results.append(("Sanity Checker", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_sanity_checker: {e}")
        results.append(("Sanity Checker", False, 0))

    # --- Metrics Analyzer Tests ---
    try:
        from tests.tst_metrics_analyzer import run_all_tests as run_metrics_tests
        success, elapsed = run_test_module(
            "tst_metrics_analyzer", run_metrics_tests, args.verbose)
        results.append(("Metrics Analyzer", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_metrics_analyzer: {e}")
        results.append(("Metrics Analyzer", False, 0))

    # --- Dry Run Tests ---
    try:
        from tests.tst_dry_run import run_all_tests as run_dry_run_tests
        success, elapsed = run_test_module(
            "tst_dry_run", run_dry_run_tests, args.verbose)
        results.append(("Dry Run", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_dry_run: {e}")
        results.append(("Dry Run", False, 0))

    # --- Integration Tests (slow, skip with --quick) ---
    if not args.quick:
        try:
            from tests.tst_integration import run_all_tests as run_integration_tests
            success, elapsed = run_test_module(
                "tst_integration", run_integration_tests, args.verbose)
            results.append(("Integration", success, elapsed))
        except ImportError as e:
            print(f"⚠️  Could not import tst_integration: {e}")
            results.append(("Integration", False, 0))
    else:
        print("\n⏭️  Skipping integration tests (--quick mode)")

    # --- Transport Tests ---
    try:
        from tests.tst_transport import run_all_tests as run_transport_tests
        success, elapsed = run_test_module(
            "tst_transport", run_transport_tests, args.verbose)
        results.append(("Transport", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_transport: {e}")
        results.append(("Transport", False, 0))

    # --- State Serialization Tests ---
    try:
        from tests.tst_state_serialization import run_all_tests as run_serialization_tests
        success, elapsed = run_test_module(
            "tst_state_serialization", run_serialization_tests, args.verbose)
        results.append(("State Serialization", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_state_serialization: {e}")
        results.append(("State Serialization", False, 0))

    # --- Command Builder Tests ---
    try:
        from tests.tst_command_builder import run_all_tests as run_command_builder_tests
        success, elapsed = run_test_module(
            "tst_command_builder", run_command_builder_tests, args.verbose)
        results.append(("Command Builder", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_command_builder: {e}")
        results.append(("Command Builder", False, 0))

    # --- File Categorization Tests ---
    try:
        from tests.tst_file_categorization import run_all_tests as run_file_categorization_tests
        success, elapsed = run_test_module(
            "tst_file_categorization", run_file_categorization_tests, args.verbose)
        results.append(("File Categorization", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_file_categorization: {e}")
        results.append(("File Categorization", False, 0))

    # --- Session Summary Tests ---
    try:
        from tests.tst_session_summary import run_all_tests as run_session_summary_tests
        success, elapsed = run_test_module(
            "tst_session_summary", run_session_summary_tests, args.verbose)
        results.append(("Session Summary", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_session_summary: {e}")
        results.append(("Session Summary", False, 0))

    # --- Advice Preprocessing Tests ---
    try:
        from tests.tst_advice_preprocessing import run_all_tests as run_advice_preprocessing_tests
        success, elapsed = run_test_module(
            "tst_advice_preprocessing", run_advice_preprocessing_tests, args.verbose)
        results.append(("Advice Preprocessing", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_advice_preprocessing: {e}")
        results.append(("Advice Preprocessing", False, 0))

    # --- Directive Extractor Tests ---
    try:
        from tests.tst_directive_extractor import run_all_tests as run_directive_extractor_tests
        success, elapsed = run_test_module(
            "tst_directive_extractor", run_directive_extractor_tests, args.verbose)
        results.append(("Directive Extractor", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_directive_extractor: {e}")
        results.append(("Directive Extractor", False, 0))

    # --- Directive Validator Tests ---
    try:
        from tests.tst_directive_validator import run_all_tests as run_directive_validator_tests
        success, elapsed = run_test_module(
            "tst_directive_validator", run_directive_validator_tests, args.verbose)
        results.append(("Directive Validator", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_directive_validator: {e}")
        results.append(("Directive Validator", False, 0))

    # --- Session Directives Tests ---
    try:
        from tests.tst_session_directives import run_all_tests as run_session_directives_tests
        success, elapsed = run_test_module(
            "tst_session_directives", run_session_directives_tests, args.verbose)
        results.append(("Session Directives", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_session_directives: {e}")
        results.append(("Session Directives", False, 0))

    # --- YAML Tools Tests ---
    try:
        from tests.tst_yaml_tools import run_all_tests as run_yaml_tools_tests
        success, elapsed = run_test_module(
            "tst_yaml_tools", run_yaml_tools_tests, args.verbose)
        results.append(("YAML Tools", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_yaml_tools: {e}")
        results.append(("YAML Tools", False, 0))

    # --- Session Tools Tests ---
    try:
        from tests.tst_session_tools import run_all_tests as run_session_tools_tests
        success, elapsed = run_test_module(
            "tst_session_tools", run_session_tools_tests, args.verbose)
        results.append(("Session Tools", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_session_tools: {e}")
        results.append(("Session Tools", False, 0))

    # --- Docs Tools Tests ---
    try:
        from tests.tst_docs_tools import run_all_tests as run_docs_tools_tests
        success, elapsed = run_test_module(
            "tst_docs_tools", run_docs_tools_tests, args.verbose)
        results.append(("Docs Tools", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_docs_tools: {e}")
        results.append(("Docs Tools", False, 0))

    # --- Metric Patterns Tests ---
    try:
        from tests.tst_metric_patterns import run_all_tests as run_metric_patterns_tests
        success, elapsed = run_test_module(
            "tst_metric_patterns", run_metric_patterns_tests, args.verbose)
        results.append(("Metric Patterns", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_metric_patterns: {e}")
        results.append(("Metric Patterns", False, 0))

    # --- Pattern Manager Tests ---
    try:
        from tests.tst_pattern_manager import run_all_tests as run_pattern_manager_tests
        success, elapsed = run_test_module(
            "tst_pattern_manager", run_pattern_manager_tests, args.verbose)
        results.append(("Pattern Manager", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_pattern_manager: {e}")
        results.append(("Pattern Manager", False, 0))

    # --- Event System Tests ---
    try:
        from tests.tst_event_system import run_all_tests as run_event_system_tests
        success, elapsed = run_test_module(
            "tst_event_system", run_event_system_tests, args.verbose)
        results.append(("Event System", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_event_system: {e}")
        results.append(("Event System", False, 0))

    # --- Program Registration Tests ---
    try:
        from tests.tst_program_registration import run_all_tests as run_program_registration_tests
        success, elapsed = run_test_module(
            "tst_program_registration", run_program_registration_tests, args.verbose)
        results.append(("Program Registration", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_program_registration: {e}")
        results.append(("Program Registration", False, 0))

    # --- Summary Display Tests ---
    try:
        from tests.tst_summary_display import run_all_tests as run_summary_display_tests
        success, elapsed = run_test_module(
            "tst_summary_display", run_summary_display_tests, args.verbose)
        results.append(("Summary Display", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_summary_display: {e}")
        results.append(("Summary Display", False, 0))

    # --- Directives Integration Tests (can be slow) ---
    if not args.quick:
        try:
            from tests.tst_directives_integration import run_all_tests as run_directives_integration_tests
            success, elapsed = run_test_module(
                "tst_directives_integration", run_directives_integration_tests, args.verbose)
            results.append(("Directives Integration", success, elapsed))
        except ImportError as e:
            print(f"⚠️  Could not import tst_directives_integration: {e}")
            results.append(("Directives Integration", False, 0))
    else:
        print("\n⏭️  Skipping directives integration tests (--quick mode)")

    # --- Error Analyzer Tests ---
    try:
        from tests.tst_error_analyzer import run_all_tests as run_error_analyzer_tests
        success, elapsed = run_test_module(
            "tst_error_analyzer", run_error_analyzer_tests, args.verbose)
        results.append(("Error Analyzer", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_error_analyzer: {e}")
        results.append(("Error Analyzer", False, 0))

    # --- Decision Flow Tests ---
    try:
        from tests.tst_decision_flow import run_all_tests as run_decision_flow_tests
        success, elapsed = run_test_module(
            "tst_decision_flow", run_decision_flow_tests, args.verbose)
        results.append(("Decision Flow", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_decision_flow: {e}")
        results.append(("Decision Flow", False, 0))

    # --- Phaser Multimodel Tests ---
    try:
        from tests.tst_phaser_multimodel import run_all_tests as run_phaser_multimodel_tests
        success, elapsed = run_test_module(
            "tst_phaser_multimodel", run_phaser_multimodel_tests, args.verbose)
        results.append(("Phaser Multimodel", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_phaser_multimodel: {e}")
        results.append(("Phaser Multimodel", False, 0))

    # --- New Programs Tests (polder, map_sharpening, etc.) ---
    try:
        from tests.tst_new_programs import run_all_tests as run_new_programs_tests
        success, elapsed = run_test_module(
            "tst_new_programs", run_new_programs_tests, args.verbose)
        results.append(("New Programs", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_new_programs: {e}")
        results.append(("New Programs", False, 0))

    # --- History Analysis Tests (v110 - anomalous workflow support) ---
    try:
        from tests.tst_history_analysis import run_all_tests as run_history_analysis_tests
        success, elapsed = run_test_module(
            "tst_history_analysis", run_history_analysis_tests, args.verbose)
        results.append(("History Analysis", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_history_analysis: {e}")
        results.append(("History Analysis", False, 0))

    # --- File Utils Tests (v110 - shared file classification) ---
    try:
        from tests.tst_file_utils import run_all_tests as run_file_utils_tests
        success, elapsed = run_test_module(
            "tst_file_utils", run_file_utils_tests, args.verbose)
        results.append(("File Utils", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_file_utils: {e}")
        results.append(("File Utils", False, 0))

    # --- Langchain Tools Tests (require PHENIX + langchain_core) ---
    try:
        from tests.tst_langchain_tools import run_all_tests as run_langchain_tools_tests
        success, elapsed = run_test_module(
            "tst_langchain_tools", run_langchain_tools_tests, args.verbose)
        results.append(("Langchain Tools", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_langchain_tools: {e}")
        results.append(("Langchain Tools", False, 0))

    # --- Hardcoded Cleanup Conformance Tests ---
    try:
        from tests.tst_hardcoded_cleanup import run_all_tests as run_hardcoded_cleanup_tests
        success, elapsed = run_test_module(
            "tst_hardcoded_cleanup", run_hardcoded_cleanup_tests, args.verbose)
        results.append(("Hardcoded Cleanup", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_hardcoded_cleanup: {e}")
        results.append(("Hardcoded Cleanup", False, 0))

    # --- v112.13 Fix Tests ---
    try:
        from tests.tst_v112_13_fixes import run_all_tests as run_v112_13_tests
        success, elapsed = run_test_module(
            "tst_v112_13_fixes", run_v112_13_tests, args.verbose)
        results.append(("v112.13 Fixes", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_v112_13_fixes: {e}")
        results.append(("v112.13 Fixes", False, 0))

    # --- Audit Fix Regression Tests (Categories I, J, E, G, H) ---
    try:
        from tests.tst_audit_fixes import run_all_tests as run_audit_fixes_tests
        success, elapsed = run_test_module(
            "tst_audit_fixes", run_audit_fixes_tests, args.verbose)
        results.append(("Audit Fix Regressions", success, elapsed))
    except ImportError as e:
        print(f"⚠️  Could not import tst_audit_fixes: {e}")
        results.append(("Audit Fix Regressions", False, 0))

    # --- Summary ---
    total_elapsed = time.time() - total_start

    print("\n")
    print("="*60)
    print("TEST SUMMARY")
    print("="*60)

    passed = 0
    failed = 0

    for name, success, elapsed in results:
        status = "✅ PASSED" if success else "❌ FAILED"
        print(f"  {name:25s} {status} ({elapsed:.2f}s)")
        if success:
            passed += 1
        else:
            failed += 1

    print("-"*60)
    print(f"  Total: {passed} passed, {failed} failed ({total_elapsed:.2f}s)")
    print("="*60)

    if failed > 0:
        print("\n❌ SOME TESTS FAILED")
        sys.exit(1)
    else:
        print("\n✅ ALL TESTS PASSED")
        sys.exit(0)


if __name__ == "__main__":
    main()
