#!/usr/bin/env python3
"""
Load All Aurora Data with Custom Base-Name Mapping

This script replicates the data loading from aurora.ipynb using the new
Aurora framework primitives with custom base-name mappings for each dataset.

Performs comprehensive data quality checking and reports all discrepancies.
"""

import sys
sys.path.insert(0, 'src')

import json
import numpy as np
from pathlib import Path
from typing import Dict, Any, List

from aurora import (
    PickleResultsLoader,
    JSONResultsLoader,
    DataQualityChecker,
    BaseNameRegistry,
    create_cutoff_month_filter,
    create_hyperparameter_filter,
    combine_collections,
    expand_results_with_ncms,  # NEW: NCM expansion
)

print("=" * 80)
print("LOADING ALL AURORA DATA - COMPREHENSIVE DATA QUALITY CHECK")
print("=" * 80)

# ============================================================================
# CONFIGURATION
# ============================================================================
# Use data-for-export directory for paper reproduction
results_dir = Path("data-for-export/deep_drebin_svc")
other_results_v2_dir = Path("data-for-export/others_v2")
# NAC results are not needed for main paper table
nac_results_dir = Path("data-for-export/nac")  # Optional

# Create registry to track all mappings
registry = BaseNameRegistry()

# ============================================================================
# HELPER FUNCTIONS
# ============================================================================

def make_np_arrays(results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """
    Convert list fields to numpy arrays in result dictionaries

    Args:
        results: List of result dictionaries

    Returns:
        List of result dictionaries with numpy arrays
    """
    processed = []
    for item in results:
        # Convert lists to numpy arrays
        for key, value in item.items():
            if isinstance(value, list):
                item[key] = np.array(value)
        processed.append(item)
    return processed


def create_collection_from_dicts(
    dicts: List[Dict[str, Any]],
    loader: JSONResultsLoader,
    source_name: str
) -> 'ResultsCollection':
    """
    Create a ResultsCollection from already-loaded dicts

    This manually replicates what loader.load() does but skips file reading.

    Args:
        dicts: List of result dictionaries (already processed)
        loader: JSONResultsLoader with mappers and filters configured
        source_name: Name for the collection metadata

    Returns:
        ResultsCollection
    """
    from aurora import ExperimentMetadata, ResultsCollection
    from datetime import datetime

    # Create metadata
    metadata = ExperimentMetadata(
        experiment_name=source_name,
        source_file="(in-memory expansion)",
        load_timestamp=datetime.now().isoformat(),
        description=f"Created from {len(dicts)} expanded results",
    )

    # Convert dicts to ExperimentResult objects
    # Use the loader's internal methods
    results = []
    for i, raw_result in enumerate(dicts):
        # Apply filters (should be empty since we already filtered)
        skip = False
        for filter_fn in loader.filters:
            if not filter_fn(raw_result):
                skip = True
                break
        if skip:
            continue

        # Apply field transforms
        for field, transform_fn in loader.field_transforms.items():
            if field in raw_result:
                raw_result[field] = transform_fn(raw_result[field])

        try:
            # Use the loader's _convert_result method
            result = loader._convert_result(raw_result)
            results.append(result)
        except Exception as e:
            raise ValueError(
                f"Failed to convert result {i}: {e}\n"
                f"Result keys: {list(raw_result.keys())}"
            )

    collection = ResultsCollection(metadata=metadata, results=results)

    # Validate if loader has auto_validate enabled
    if loader.auto_validate:
        return loader._validate_if_enabled(collection)

    return collection


# ============================================================================
# CUSTOM BASE-NAME MAPPERS FOR EACH EXPERIMENT TYPE
# ============================================================================

def deep_drebin_mapper(result_dict: Dict[str, Any]) -> str:
    """
    Mapper for Deep Drebin experiments

    Groups all CE experiments under a single base name
    """
    trainer = result_dict.get("Trainer-Mode", "Unknown")

    if trainer == "CE" or trainer == "DeepDrebin":
        return "DeepDrebin (cold) - MSP"

    return trainer


def svc_mapper(result_dict: Dict[str, Any]) -> str:
    """
    Mapper for SVC experiments

    Already has correct trainer-mode, just return it
    """
    trainer = result_dict.get("Trainer-Mode", "Drebin (cold) - Margin")
    return "SVC - Margin"


def hcc_mapper(result_dict: Dict[str, Any]) -> str:
    """
    Mapper for HCC experiments

    Separate by warm vs cold, and by method (Pseudo-Loss vs MSP)
    """
    trainer = result_dict.get("Trainer-Mode", "Unknown")

    # HCC experiments should already have descriptive trainer-mode
    if "HCC" in trainer:
        return trainer  # e.g., "HCC (warm) - Pseudo-Loss"

    # Fallback
    return trainer


def cade_mapper(result_dict: Dict[str, Any]) -> str:
    """
    Mapper for CADE experiments

    Separate by cold vs warm, and by method (OOD, MSP)
    """
    trainer = result_dict.get("Trainer-Mode", "Unknown")

    # CADE experiments should already have descriptive trainer-mode
    if "CADE" in trainer:
        return trainer  # e.g., "CADE (cold) - OOD"

    return trainer


def nac_mapper(result_dict: Dict[str, Any]) -> str:
    """
    Mapper for NAC experiments

    Group all NAC variants together (we'll separate by hyperparameters later if needed)
    """
    trainer = result_dict.get("Trainer-Mode", "Unknown")

    if "NAC" in trainer or trainer == "NAC":
        # For now, group all NAC under single base name
        # Could separate by hyperparameters if needed
        return "DeepDrebin (cold) - NAC"

    return trainer


def transcendent_ice_mapper(result_dict: Dict[str, Any]) -> str:
    """
    Mapper for Transcendent-ICE experiments

    Separate by cred vs cred+conf
    """
    trainer = result_dict.get("Trainer-Mode", "Unknown")

    if "Trans.-CCE" in trainer:
        return trainer  # Already descriptive

    return trainer


# ============================================================================
# LOAD DATA - DEEP DREBIN
# ============================================================================
print("\n" + "=" * 80)
print("LOADING DEEP DREBIN (Pickle)")
print("=" * 80)

common_filters = [
    create_cutoff_month_filter(),
    create_hyperparameter_filter("Num-Epochs", [10], exclude=True),
]

loader_dd = PickleResultsLoader(
    base_name_mapper=deep_drebin_mapper,
    filters=common_filters,
    auto_validate=False  # We'll validate separately
)

deep_drebin_file = results_dir / "parallel_ce_no_aug_v2.pkl"
if deep_drebin_file.exists():
    deep_drebin = loader_dd.load(deep_drebin_file, experiment_name="DeepDrebin")
    print(f"✅ Loaded {len(deep_drebin)} Deep Drebin results")
    print(f"   Datasets: {sorted(deep_drebin.get_unique_values('dataset')['dataset'])}")
    print(f"   Base names: {sorted(deep_drebin.get_unique_values('base_name')['base_name'])}")

    # Track mappings
    for r in deep_drebin.results[:10]:  # Sample
        config_key = f"{r.trainer_mode} (epochs={r.hyperparameters.get('Num-Epochs', '?')})"
        registry.register(config_key, r.base_name)
else:
    print("❌ Deep Drebin file not found")
    deep_drebin = None

# ============================================================================
# LOAD DATA - SVC
# ============================================================================
print("\n" + "=" * 80)
print("LOADING SVC BASELINES (Pickle)")
print("=" * 80)

loader_svc = PickleResultsLoader(
    base_name_mapper=svc_mapper,
    filters=common_filters,
    auto_validate=False
)

svc_file = results_dir / "parallel_svc_v2.pkl"
if svc_file.exists():
    svc = loader_svc.load(svc_file, experiment_name="SVC")

    # Fix trainer-mode (as in aurora.ipynb)
    for result in svc.results:
        result.trainer_mode = "Drebin (cold) - Margin"
        result.base_name = "SVC - Margin"

    print(f"✅ Loaded {len(svc)} SVC results")
    print(f"   Datasets: {sorted(svc.get_unique_values('dataset')['dataset'])}")
    print(f"   Base names: {sorted(svc.get_unique_values('base_name')['base_name'])}")

    # Track mappings
    registry.register("SVC", "SVC - Margin")
else:
    print("❌ SVC file not found")
    svc = None

# ============================================================================
# LOAD DATA - HCC (JSON) WITH NCM EXPANSION
# ============================================================================
print("\n" + "=" * 80)
print("LOADING HCC EXPERIMENTS (JSON) WITH NCM EXPANSION")
print("=" * 80)

hcc_files = [
    "hcc_mlp_warm-androzoo.json",
    "hcc_mlp_warm-apigraph.json",
    "hcc_mlp_warm-transcendent.json",
    "hcc_mlp_warm-androzoo-subsampling.json",
    "hcc_mlp_warm-apigraph-subsampling.json",
    "hcc_mlp_warm-transcendent-subsampling.json",
]

# Load all HCC results as raw dicts
all_hcc_results = []
for filename in hcc_files:
    file_path = other_results_v2_dir / filename
    if file_path.exists():
        with open(file_path, "r") as f:
            results = json.load(f)
            all_hcc_results.extend(results)
            print(f"  ✅ Loaded {len(results)} raw results from {filename}")
    else:
        print(f"  ❌ Not found: {filename}")

if all_hcc_results:
    print(f"\n📊 Preprocessing {len(all_hcc_results)} HCC results...")

    # Convert lists to numpy arrays
    all_hcc_results = make_np_arrays(all_hcc_results)

    # Apply cutoff filter
    cutoff_filter = create_cutoff_month_filter()
    all_hcc_results = [r for r in all_hcc_results if cutoff_filter(r)]
    print(f"   After cutoff filter: {len(all_hcc_results)} results")

    # Fix sampler-mode typo (as in aurora.ipynb)
    for item in all_hcc_results:
        if item.get("Sampler-Mode") == "subsampled_first_year_subsample_months":
            item["Sampler-Mode"] = "subsample_first_year_subsample_months"

    # Rename Seed → Random-Seed
    for item in all_hcc_results:
        if "Seed" in item:
            item["Random-Seed"] = item["Seed"]
            del item["Seed"]

    # ✨ AUTOMATIC NCM EXPANSION ✨
    # Expands HCC results into Pseudo-Loss and MSP variants
    print(f"\n✨ Expanding with NCM system...")
    hcc_results_expanded = expand_results_with_ncms(
        all_hcc_results,
        trainer_mode_key="Trainer-Mode",
        uncertainty_key="Uncertainties (Month Ahead)",
        clean_temp_fields=True
    )

    print(f"   Before NCM expansion: {len(all_hcc_results)} results")
    print(f"   After NCM expansion: {len(hcc_results_expanded)} results")
    print(f"   Expansion ratio: {len(hcc_results_expanded) / len(all_hcc_results):.1f}x")

    # Check what NCM variants were created
    trainer_modes = set(r.get("Trainer-Mode", "Unknown") for r in hcc_results_expanded)
    print(f"\n   NCM variants created:")
    for mode in sorted(trainer_modes):
        count = sum(1 for r in hcc_results_expanded if r.get("Trainer-Mode") == mode)
        print(f"     - {mode}: {count} results")

    # Now create collection from expanded results
    loader_hcc = JSONResultsLoader(
        base_name_mapper=hcc_mapper,
        filters=[],  # Already filtered
        auto_validate=False,
        rename_seed_field=False  # Already renamed
    )

    hcc_combined = create_collection_from_dicts(
        hcc_results_expanded,
        loader_hcc,
        "HCC-with-NCMs"
    )

    print(f"\n✅ Total HCC results: {len(hcc_combined)}")
    print(f"   Datasets: {sorted(hcc_combined.get_unique_values('dataset')['dataset'])}")
    print(f"   Base names: {sorted(hcc_combined.get_unique_values('base_name')['base_name'])}")

    # Track mappings
    for base_name in hcc_combined.get_unique_values('base_name')['base_name']:
        registry.register(f"HCC - {base_name}", base_name)
else:
    print("❌ No HCC files loaded")
    hcc_combined = None

# ============================================================================
# LOAD DATA - CADE (JSON) WITH NCM EXPANSION
# ============================================================================
print("\n" + "=" * 80)
print("LOADING CADE EXPERIMENTS (JSON) WITH NCM EXPANSION")
print("=" * 80)

cade_files = [
    "cade_mlp_cold-androzoo.json",
    "cade_mlp_cold-apigraph.json",
    "cade_mlp_cold-transcendent.json",
    "cade_mlp_warm-androzoo.json",
    "cade_mlp_warm-apigraph.json",
    "cade_mlp_warm-transcendent.json",
]

# Load all CADE results as raw dicts
all_cade_results = []
for filename in cade_files:
    file_path = other_results_v2_dir / filename
    if file_path.exists():
        with open(file_path, "r") as f:
            results = json.load(f)
            all_cade_results.extend(results)
            print(f"  ✅ Loaded {len(results)} raw results from {filename}")
    else:
        print(f"  ❌ Not found: {filename}")

if all_cade_results:
    print(f"\n📊 Preprocessing {len(all_cade_results)} CADE results...")

    # Convert lists to numpy arrays
    all_cade_results = make_np_arrays(all_cade_results)

    # Apply cutoff filter
    cutoff_filter = create_cutoff_month_filter()
    all_cade_results = [r for r in all_cade_results if cutoff_filter(r)]
    print(f"   After cutoff filter: {len(all_cade_results)} results")

    # ✨ AUTOMATIC NCM EXPANSION ✨
    # Expands CADE results into OOD and MSP variants
    print(f"\n✨ Expanding with NCM system...")
    cade_results_expanded = expand_results_with_ncms(
        all_cade_results,
        trainer_mode_key="Trainer-Mode",
        uncertainty_key="Uncertainties (Month Ahead)",
        clean_temp_fields=True
    )

    print(f"   Before NCM expansion: {len(all_cade_results)} results")
    print(f"   After NCM expansion: {len(cade_results_expanded)} results")
    print(f"   Expansion ratio: {len(cade_results_expanded) / len(all_cade_results):.1f}x")

    # Check what NCM variants were created
    trainer_modes = set(r.get("Trainer-Mode", "Unknown") for r in cade_results_expanded)
    print(f"\n   NCM variants created:")
    for mode in sorted(trainer_modes):
        count = sum(1 for r in cade_results_expanded if r.get("Trainer-Mode") == mode)
        print(f"     - {mode}: {count} results")

    # Now create collection from expanded results
    loader_cade = JSONResultsLoader(
        base_name_mapper=cade_mapper,
        filters=[],  # Already filtered
        auto_validate=False,
        rename_seed_field=True  # CADE still needs seed rename
    )

    cade_combined = create_collection_from_dicts(
        cade_results_expanded,
        loader_cade,
        "CADE-with-NCMs"
    )

    print(f"\n✅ Total CADE results: {len(cade_combined)}")
    print(f"   Datasets: {sorted(cade_combined.get_unique_values('dataset')['dataset'])}")
    print(f"   Base names: {sorted(cade_combined.get_unique_values('base_name')['base_name'])}")

    # Track mappings
    for base_name in cade_combined.get_unique_values('base_name')['base_name']:
        registry.register(f"CADE - {base_name}", base_name)
else:
    print("❌ No CADE files loaded")
    cade_combined = None

# ============================================================================
# LOAD DATA - NAC (JSON)
# ============================================================================
print("\n" + "=" * 80)
print("LOADING NAC EXPERIMENTS (JSON)")
print("=" * 80)

loader_nac = JSONResultsLoader(
    base_name_mapper=nac_mapper,
    filters=[create_cutoff_month_filter()],
    auto_validate=False,
    rename_seed_field=True
)

nac_files = [
    "deepdrebin_nac.json",
    "deepdrebin_nac_subsampling.json",
    "deepdrebin_nac_subsampling_fullcov.json",
]

nac_collections = []
for filename in nac_files:
    file_path = nac_results_dir / filename
    if file_path.exists():
        collection = loader_nac.load(file_path, experiment_name=f"NAC_{filename}")
        nac_collections.append(collection)
        print(f"  ✅ Loaded {len(collection)} results from {filename}")
    else:
        print(f"  ❌ Not found: {filename}")

if nac_collections:
    nac_combined = combine_collections(*nac_collections)
    print(f"\n✅ Total NAC results: {len(nac_combined)}")
    print(f"   Datasets: {sorted(nac_combined.get_unique_values('dataset')['dataset'])}")
    print(f"   Base names: {sorted(nac_combined.get_unique_values('base_name')['base_name'])}")

    # Track mappings
    registry.register("NAC (all variants)", "DeepDrebin (cold) - NAC")
else:
    print("❌ No NAC files loaded")
    nac_combined = None

# ============================================================================
# LOAD DATA - TRANSCENDENT-ICE (JSON)
# ============================================================================
print("\n" + "=" * 80)
print("LOADING TRANSCENDENT-ICE EXPERIMENTS (JSON)")
print("=" * 80)

loader_trans = JSONResultsLoader(
    base_name_mapper=transcendent_ice_mapper,
    filters=[create_cutoff_month_filter()],
    auto_validate=False,
    rename_seed_field=True
)

trans_files = [
    "transcendent-ice_cred_svm-apigraph.json",
    "transcendent-ice_cred+conf_svm-apigraph.json",
]

trans_collections = []
for filename in trans_files:
    file_path = other_results_v2_dir / filename
    if file_path.exists():
        collection = loader_trans.load(file_path, experiment_name=f"Trans_{filename}")

        # Fix sampler-mode and trainer-mode (as in aurora.ipynb)
        for result in collection.results:
            result.sampler_mode = "full_first_year_subsample_months"
            if "cred+conf" in filename:
                result.trainer_mode = "Trans.-CCE[LinearSVC] - cred+conf"
            else:
                result.trainer_mode = "Trans.-CCE[LinearSVC] - cred"
            result.base_name = result.trainer_mode

        trans_collections.append(collection)
        print(f"  ✅ Loaded {len(collection)} results from {filename}")
    else:
        print(f"  ❌ Not found: {filename}")

if trans_collections:
    trans_combined = combine_collections(*trans_collections)
    print(f"\n✅ Total Transcendent-ICE results: {len(trans_combined)}")
    print(f"   Datasets: {sorted(trans_combined.get_unique_values('dataset')['dataset'])}")
    print(f"   Base names: {sorted(trans_combined.get_unique_values('base_name')['base_name'])}")

    # Track mappings
    for base_name in trans_combined.get_unique_values('base_name')['base_name']:
        registry.register(f"Trans-ICE - {base_name}", base_name)
else:
    print("❌ No Transcendent-ICE files loaded")
    trans_combined = None

# ============================================================================
# COMBINE ALL RESULTS
# ============================================================================
print("\n" + "=" * 80)
print("COMBINING ALL RESULTS")
print("=" * 80)

all_collections = []
collection_names = []

if deep_drebin is not None:
    all_collections.append(deep_drebin)
    collection_names.append("DeepDrebin")

if svc is not None:
    all_collections.append(svc)
    collection_names.append("SVC")

if hcc_combined is not None:
    all_collections.append(hcc_combined)
    collection_names.append("HCC")

if cade_combined is not None:
    all_collections.append(cade_combined)
    collection_names.append("CADE")

if nac_combined is not None:
    all_collections.append(nac_combined)
    collection_names.append("NAC")

if trans_combined is not None:
    all_collections.append(trans_combined)
    collection_names.append("Transcendent-ICE")

if all_collections:
    all_results = combine_collections(*all_collections)

    print(f"✅ Combined {len(collection_names)} collections: {', '.join(collection_names)}")
    print(f"\nTotal results: {len(all_results)}")
    print(f"Datasets: {sorted(all_results.get_unique_values('dataset')['dataset'])}")
    print(f"Budgets: {sorted(all_results.get_unique_values('monthly_label_budget')['monthly_label_budget'])}")
    print(f"Base names ({len(all_results.get_unique_values('base_name')['base_name'])}): ")
    for base_name in sorted(all_results.get_unique_values('base_name')['base_name']):
        print(f"  - {base_name}")
else:
    print("❌ No collections loaded!")
    sys.exit(1)

# ============================================================================
# BASE-NAME REGISTRY SUMMARY
# ============================================================================
print("\n" + "=" * 80)
print("BASE-NAME MAPPING REGISTRY")
print("=" * 80)

registry.print_summary(max_per_base=3)

# ============================================================================
# COMPREHENSIVE DATA QUALITY CHECKING
# ============================================================================
print("\n" + "=" * 80)
print("COMPREHENSIVE DATA QUALITY CHECKS")
print("=" * 80)

print("\nInitializing checker with:")
print("  ✓ Array length consistency (predictions/labels/uncertainties)")
print("  ✓ Same months per base-name")
print("  ✓ Same results per month")
print("  ✓ Duplicate seed detection")
print("  ✓ Monthly progression validation")
print("  ✓ Base-name consistency validation")

checker = DataQualityChecker(
    strict_uniqueness=False,  # Allow multi-seed
    expected_seeds=None,  # Will report actual counts
    check_monthly_progression=True,
    check_base_name_consistency=True
)

print("\n🔍 Running checks on all combined results...")
report = checker.check_quality(all_results)

# Print full report
report.print_report(show_info=False, max_issues=30)

# ============================================================================
# DETAILED ISSUE BREAKDOWN BY CATEGORY
# ============================================================================
print("\n" + "=" * 80)
print("ISSUE BREAKDOWN BY CATEGORY")
print("=" * 80)

# Group issues by category
from collections import defaultdict

issues_by_category = defaultdict(list)
for issue in report.issues:
    issues_by_category[issue.category].append(issue)

for category in sorted(issues_by_category.keys()):
    issues = issues_by_category[category]
    error_count = len([i for i in issues if i.severity == 'error'])
    warning_count = len([i for i in issues if i.severity == 'warning'])

    print(f"\n📊 {category.upper()}")
    print(f"   Errors: {error_count}, Warnings: {warning_count}")

    if error_count > 0:
        print(f"   Sample errors (showing up to 3):")
        for i, issue in enumerate([i for i in issues if i.severity == 'error'][:3]):
            print(f"\n   {i+1}. {issue.message}")
            for key, value in list(issue.context.items())[:3]:
                print(f"      {key}: {value}")

# ============================================================================
# CHECK EACH COLLECTION INDIVIDUALLY
# ============================================================================
print("\n" + "=" * 80)
print("INDIVIDUAL COLLECTION QUALITY CHECKS")
print("=" * 80)

individual_reports = {}

for collection, name in zip(
    [deep_drebin, svc, hcc_combined, cade_combined, nac_combined, trans_combined],
    ["DeepDrebin", "SVC", "HCC", "CADE", "NAC", "Transcendent-ICE"]
):
    if collection is None:
        continue

    print(f"\n--- {name} ---")
    individual_report = checker.check_quality(collection)
    individual_reports[name] = individual_report

    print(f"Results: {len(collection)}")
    print(f"Errors: {len(individual_report.get_errors())}")
    print(f"Warnings: {len(individual_report.get_warnings())}")

    # Show most critical issue
    errors = individual_report.get_errors()
    if errors:
        print(f"Top error: {errors[0].message}")

# ============================================================================
# SUMMARY & RECOMMENDATIONS
# ============================================================================
print("\n" + "=" * 80)
print("SUMMARY & RECOMMENDATIONS")
print("=" * 80)

total_errors = len(report.get_errors())
total_warnings = len(report.get_warnings())

print(f"\n📊 OVERALL STATISTICS:")
print(f"   Total results loaded: {len(all_results)}")
print(f"   Total base names: {len(all_results.get_unique_values('base_name')['base_name'])}")
print(f"   Data quality errors: {total_errors}")
print(f"   Data quality warnings: {total_warnings}")

if total_errors > 0:
    print(f"\n⚠️  CRITICAL ISSUES FOUND:")
    print(f"   {total_errors} errors detected that may affect analysis")
    print(f"\n   Common issues:")

    # Count most common error types
    error_types = defaultdict(int)
    for error in report.get_errors():
        error_types[error.message[:50]] += 1

    for error_msg, count in sorted(error_types.items(), key=lambda x: x[1], reverse=True)[:5]:
        print(f"   - {error_msg}... ({count} occurrences)")

    print(f"\n   📝 RECOMMENDATIONS:")
    if any("Duplicate random seeds" in e.message for e in report.get_errors()):
        print(f"   1. De-duplicate results with same seeds")
        print(f"      (Each seed should appear only once per configuration/month)")

    if any("Different prediction array lengths" in e.message for e in report.get_errors()):
        print(f"   2. Investigate array length inconsistencies")
        print(f"      (All seeds for same month should test on same data!)")

    if any("number of months" in e.message for e in report.get_errors()):
        print(f"   3. Check for missing months in some configurations")

else:
    print(f"\n✅ NO CRITICAL ERRORS FOUND")

if total_warnings > 0:
    print(f"\n⚠️  {total_warnings} warnings - review recommended but not critical")

print("\n" + "=" * 80)
print("✅ DATA QUALITY CHECK COMPLETE")
print("=" * 80)

print(f"\nNext steps:")
print(f"  1. Review detailed issues above")
print(f"  2. Address critical errors before analysis")
print(f"  3. Use cleaned data with AuroraAnalyzer for metrics")
