"""
Integration tests - written FIRST before implementation.

TDD Phase 4: These tests verify the complete workflow from configuration to analysis.
All tests should FAIL until implementation is complete.
"""
import pytest
import tempfile
import pickle
import json
import yaml
import numpy as np
from pathlib import Path

# Try importing components - will fail until implemented
try:
    # Existing exports (must still work)
    from aurora import (
        PickleResultsLoader,
        ResultsCollection,
        AuroraAnalyzer,
    )
    EXISTING_IMPORTS_AVAILABLE = True
except ImportError:
    EXISTING_IMPORTS_AVAILABLE = False
    PickleResultsLoader = None
    ResultsCollection = None
    AuroraAnalyzer = None

try:
    # New exports (Phase 1-4)
    from aurora import (
        FileDiscovery,
        Filter,
        FilterChain,
        Pipeline,
        ConfigLoader,
        ConfiguredLoader,
    )
    NEW_IMPORTS_AVAILABLE = True
except ImportError:
    NEW_IMPORTS_AVAILABLE = False
    FileDiscovery = None
    Filter = None
    FilterChain = None
    Pipeline = None
    ConfigLoader = None
    ConfiguredLoader = None

try:
    from aurora import load_with_config
    CONVENIENCE_FUNCTION_AVAILABLE = True
except ImportError:
    CONVENIENCE_FUNCTION_AVAILABLE = False
    load_with_config = None


class TestBackwardCompatibility:
    """
    CRITICAL: Existing API must continue to work unchanged.

    These tests verify that all existing functionality is preserved.
    """

    @pytest.fixture
    def test_data_file(self):
        """Create test data file with expected structure"""
        with tempfile.TemporaryDirectory() as tmpdir:
            filepath = Path(tmpdir) / "test_data.pkl"
            # Full schema required by PickleResultsLoader
            data = [
                {
                    "Trainer-Mode": "CE",
                    "Test-Month": 10,
                    "Dataset": "test_dataset",
                    "Monthly-Label-Budget": 100,
                    "Sampler-Mode": "subsample",
                    "Predictions": [1, 0, 1, 0],
                    "Labels": [1, 0, 1, 1],
                    "Uncertainties (Month Ahead)": [0.2, 0.8, 0.3, 0.7],
                    "Uncertainties (Past Month)": [0.1, 0.9, 0.2, 0.6],
                    "Random-Seed": 0
                },
                {
                    "Trainer-Mode": "HCC",
                    "Test-Month": 11,
                    "Dataset": "test_dataset",
                    "Monthly-Label-Budget": 100,
                    "Sampler-Mode": "subsample",
                    "Predictions": [0, 1, 0, 1],
                    "Labels": [0, 1, 1, 1],
                    "Uncertainties (Month Ahead)": [0.6, 0.4, 0.5, 0.3],
                    "Uncertainties (Past Month)": [0.5, 0.5, 0.4, 0.4],
                    "Random-Seed": 0
                }
            ]
            with open(filepath, 'wb') as f:
                pickle.dump(data, f)
            yield filepath

    @pytest.mark.skipif(not EXISTING_IMPORTS_AVAILABLE, reason="Existing Aurora imports not available")
    def test_pickle_loader_no_args(self, test_data_file):
        """PickleResultsLoader works without arguments"""
        loader = PickleResultsLoader()
        results = loader.load(test_data_file)

        assert len(results) == 2

    @pytest.mark.skipif(not EXISTING_IMPORTS_AVAILABLE, reason="Existing Aurora imports not available")
    def test_pickle_loader_with_base_name_mapper(self, test_data_file):
        """PickleResultsLoader works with base_name_mapper"""
        def my_mapper(record):
            return record.get("Trainer-Mode", "Unknown")

        loader = PickleResultsLoader(base_name_mapper=my_mapper)
        collection = loader.load(test_data_file)

        assert len(collection) == 2
        # base_name should be set by mapper (access as attribute on ExperimentResult)
        assert all(r.base_name is not None for r in collection)
        # Verify the mapper was applied correctly
        assert collection[0].base_name == "CE"
        assert collection[1].base_name == "HCC"

    @pytest.mark.skipif(not EXISTING_IMPORTS_AVAILABLE, reason="Existing Aurora imports not available")
    def test_results_collection_unchanged(self, test_data_file):
        """ResultsCollection works as before"""
        loader = PickleResultsLoader()
        collection = loader.load(test_data_file)

        # Loader returns ResultsCollection directly
        assert isinstance(collection, ResultsCollection)
        assert len(collection) == 2
        # Can iterate over results
        assert all(hasattr(r, 'predictions') for r in collection)

    @pytest.mark.skipif(not EXISTING_IMPORTS_AVAILABLE, reason="Existing Aurora imports not available")
    def test_analyzer_unchanged(self, test_data_file):
        """AuroraAnalyzer works as before"""
        loader = PickleResultsLoader()
        collection = loader.load(test_data_file)

        # Should be able to create analyzer from collection
        analyzer = AuroraAnalyzer(collection)
        # Basic operation should work
        assert analyzer is not None


class TestNewImports:
    """Test that all new components are importable"""

    @pytest.mark.skipif(not NEW_IMPORTS_AVAILABLE, reason="New components not yet implemented")
    def test_import_discovery(self):
        """FileDiscovery is importable"""
        from aurora import FileDiscovery
        assert FileDiscovery is not None

    @pytest.mark.skipif(not NEW_IMPORTS_AVAILABLE, reason="New components not yet implemented")
    def test_import_filters(self):
        """Filter and FilterChain are importable"""
        from aurora import Filter, FilterChain
        assert Filter is not None
        assert FilterChain is not None

    @pytest.mark.skipif(not NEW_IMPORTS_AVAILABLE, reason="New components not yet implemented")
    def test_import_pipeline(self):
        """Pipeline is importable"""
        from aurora import Pipeline
        assert Pipeline is not None

    @pytest.mark.skipif(not NEW_IMPORTS_AVAILABLE, reason="New components not yet implemented")
    def test_import_config(self):
        """Config components are importable"""
        from aurora import (
            ConfigLoader,
            ConfiguredLoader
        )
        assert ConfigLoader is not None
        assert ConfiguredLoader is not None

    @pytest.mark.skipif(not CONVENIENCE_FUNCTION_AVAILABLE, reason="Convenience function not yet implemented")
    def test_import_convenience_function(self):
        """load_with_config is importable"""
        from aurora import load_with_config
        assert callable(load_with_config)


@pytest.mark.skipif(not (NEW_IMPORTS_AVAILABLE and CONVENIENCE_FUNCTION_AVAILABLE),
                    reason="New components not yet implemented")
class TestEndToEndWorkflow:
    """End-to-end workflow tests"""

    @pytest.fixture
    def experiment_data_dir(self):
        """Create directory with realistic experiment data"""
        with tempfile.TemporaryDirectory() as tmpdir:
            tmpdir = Path(tmpdir)

            # Create multiple experiment result files
            for seed in [0, 1, 2]:
                for trainer in ["CE", "HCC"]:
                    data = []
                    for month in range(10, 25):
                        data.append({
                            "Trainer-Mode": trainer,
                            "Test-Month": month,
                            "Monthly-Label-Budget": 100,
                            "Sampler-Mode": "subsample",
                            "Random-Seed": seed,
                            "Predictions": np.random.randint(0, 2, 100).tolist(),
                            "Labels": np.random.randint(0, 2, 100).tolist(),
                            "Uncertainties": np.random.rand(100).tolist(),
                        })

                    filename = f"results_{trainer}_seed{seed}.pkl"
                    with open(tmpdir / filename, "wb") as f:
                        pickle.dump(data, f)

            yield tmpdir

    @pytest.fixture
    def experiment_config(self, experiment_data_dir):
        """Create configuration file"""
        config = {
            "schema_mapping": {
                "Test-Month": "month",
                "Trainer-Mode": "trainer_mode",
                "Monthly-Label-Budget": "monthly_label_budget",
                "Random-Seed": "random_seed",
                "Sampler-Mode": "sampler_mode"
            },
            "filters": [
                {"field": "month", "op": "<=", "value": 22},
                {"field": "random_seed", "op": "<", "value": 3}
            ],
            "array_fields": ["Predictions", "Labels", "Uncertainties"],
            "computed_fields": {
                "base_name": "trainer_mode"
            }
        }

        config_path = experiment_data_dir / "config.yaml"
        with open(config_path, "w") as f:
            yaml.dump(config, f)

        yield config_path

    def test_full_workflow_with_convenience_function(
        self, experiment_data_dir, experiment_config
    ):
        """Complete workflow using load_with_config"""
        results = load_with_config(experiment_config, experiment_data_dir)

        # Should have filtered by month
        assert all(r["month"] <= 22 for r in results)

        # Should have filtered by seed
        assert all(r["random_seed"] < 3 for r in results)

        # Should have converted arrays
        assert all(isinstance(r["Predictions"], np.ndarray) for r in results)

        # Should have renamed fields
        assert all("trainer_mode" in r for r in results)
        assert all("Trainer-Mode" not in r for r in results)

    def test_full_workflow_with_configured_loader(
        self, experiment_data_dir, experiment_config
    ):
        """Complete workflow using ConfiguredLoader"""
        loader = ConfiguredLoader.from_config(experiment_config)
        results = loader.load(experiment_data_dir, pattern="*.pkl")

        # Verify transformations applied
        assert len(results) > 0
        assert all("month" in r for r in results)

    def test_workflow_to_results_collection(
        self, experiment_data_dir, experiment_config
    ):
        """Verify load_with_config returns usable dict list"""
        if not EXISTING_IMPORTS_AVAILABLE:
            pytest.skip("Existing imports not available")

        # load_with_config returns List[Dict], not ResultsCollection
        # This is the intended use case for the new API
        results = load_with_config(experiment_config, experiment_data_dir)

        # Verify results are processed dicts
        assert isinstance(results, list)
        assert len(results) > 0
        assert isinstance(results[0], dict)

        # For ResultsCollection usage, use PickleResultsLoader directly
        # The two APIs serve different purposes:
        # - load_with_config: Dict-based processing with config
        # - PickleResultsLoader: ExperimentResult-based analysis


@pytest.mark.skipif(not (NEW_IMPORTS_AVAILABLE and EXISTING_IMPORTS_AVAILABLE),
                    reason="Components not yet implemented")
class TestMixedAPIUsage:
    """Test mixing old and new API styles"""

    @pytest.fixture
    def test_data_file(self):
        """Create test data file with full schema"""
        with tempfile.TemporaryDirectory() as tmpdir:
            filepath = Path(tmpdir) / "test_data.pkl"
            # Full schema required by PickleResultsLoader
            data = [
                {
                    "Trainer-Mode": "CE",
                    "Test-Month": 10,
                    "Dataset": "test_dataset",
                    "Monthly-Label-Budget": 100,
                    "Sampler-Mode": "subsample",
                    "Predictions": [1, 0, 1, 0],
                    "Labels": [1, 0, 1, 1],
                    "Uncertainties (Month Ahead)": [0.2, 0.8, 0.3, 0.7],
                    "Uncertainties (Past Month)": [0.1, 0.9, 0.2, 0.6],
                    "Random-Seed": 0,
                    "value": 1
                },
                {
                    "Trainer-Mode": "HCC",
                    "Test-Month": 20,
                    "Dataset": "test_dataset",
                    "Monthly-Label-Budget": 100,
                    "Sampler-Mode": "subsample",
                    "Predictions": [0, 1, 0, 1],
                    "Labels": [0, 1, 1, 1],
                    "Uncertainties (Month Ahead)": [0.6, 0.4, 0.5, 0.3],
                    "Uncertainties (Past Month)": [0.5, 0.5, 0.4, 0.4],
                    "Random-Seed": 0,
                    "value": 2
                }
            ]
            with open(filepath, 'wb') as f:
                pickle.dump(data, f)
            yield filepath

    def test_pipeline_with_existing_loader(self, test_data_file):
        """Use Pipeline with existing PickleResultsLoader"""
        # Old style: load with existing loader
        loader = PickleResultsLoader()
        results = loader.load(test_data_file)

        # Bridge: convert to dict list for new Pipeline
        # This is the recommended pattern for mixed API usage
        records = results.to_dict_list()

        # New style: process with Pipeline
        pipeline = (
            Pipeline()
            .rename_fields({"Test-Month": "month"})
            .filter(Filter("month", "<=", 15))
        )
        processed = pipeline.apply(records)

        assert len(processed) == 1
        assert processed[0]["month"] == 10

    def test_filter_with_existing_loader(self, test_data_file):
        """Use Filter with existing PickleResultsLoader"""
        loader = PickleResultsLoader()
        results = loader.load(test_data_file)

        # Bridge: convert to dict list for new Filter
        records = results.to_dict_list()

        # Apply filter to results
        f = Filter("Test-Month", "<", 15)
        filtered = [r for r in records if f.apply(r)]

        assert len(filtered) == 1

    def test_new_loader_with_existing_analyzer(self, test_data_file):
        """Use existing loader with existing AuroraAnalyzer"""
        # This demonstrates using the existing API (unchanged)
        # Pipeline is for dict-based transformations; AuroraAnalyzer
        # works directly with ResultsCollection objects

        # Load with existing loader
        loader = PickleResultsLoader()
        collection = loader.load(test_data_file)

        # Use existing filter mechanism for ResultsCollection
        # (not Pipeline, which works on dicts)
        filtered_collection = collection.filter(lambda r: r.test_month < 15)

        # Pass to AuroraAnalyzer
        analyzer = AuroraAnalyzer(filtered_collection)
        assert analyzer is not None

        # Pipeline is useful for dict-based workflows:
        config = ConfigLoader.from_dict({
            "schema_mapping": {"Test-Month": "month"}
        })
        pipeline = config.to_pipeline()

        # Convert to dicts for Pipeline processing
        records = collection.to_dict_list()
        processed = pipeline.apply(records)

        # processed is now a list of dicts (useful for export, etc.)
        assert len(processed) == 2
        assert "month" in processed[0]  # Field was renamed


class TestErrorHandling:
    """Test error handling in integration"""

    @pytest.mark.skipif(not CONVENIENCE_FUNCTION_AVAILABLE, reason="Convenience function not yet implemented")
    def test_missing_config_file(self):
        """Missing config file raises FileNotFoundError"""
        with pytest.raises(FileNotFoundError):
            load_with_config("/nonexistent/config.yaml", "/data")

    @pytest.mark.skipif(not CONVENIENCE_FUNCTION_AVAILABLE, reason="Convenience function not yet implemented")
    def test_missing_data_directory(self):
        """Missing data directory raises FileNotFoundError"""
        with tempfile.TemporaryDirectory() as tmpdir:
            config_path = Path(tmpdir) / "config.yaml"
            with open(config_path, 'w') as f:
                yaml.dump({}, f)

            with pytest.raises(FileNotFoundError):
                load_with_config(config_path, "/nonexistent/data/")

    @pytest.mark.skipif(not NEW_IMPORTS_AVAILABLE, reason="ConfigLoader not yet implemented")
    def test_invalid_config_syntax(self):
        """Invalid config syntax raises appropriate error"""
        with tempfile.TemporaryDirectory() as tmpdir:
            config_path = Path(tmpdir) / "invalid.yaml"
            with open(config_path, 'wb') as f:
                f.write(b"invalid: yaml: [syntax")

            with pytest.raises(Exception):  # YAML parse error
                ConfigLoader.load(config_path)


@pytest.mark.skipif(not (NEW_IMPORTS_AVAILABLE and CONVENIENCE_FUNCTION_AVAILABLE),
                    reason="Components not yet implemented")
class TestPerformance:
    """Performance-related integration tests"""

    @pytest.fixture
    def large_data_dir(self):
        """Create directory with larger dataset"""
        with tempfile.TemporaryDirectory() as tmpdir:
            tmpdir = Path(tmpdir)

            # Create 10 files with 1000 records each
            for i in range(10):
                data = [
                    {
                        "month": j % 30,
                        "value": j,
                        "data": list(range(100))
                    }
                    for j in range(1000)
                ]
                with open(tmpdir / f"data_{i}.pkl", "wb") as f:
                    pickle.dump(data, f)

            yield tmpdir

    def test_load_large_dataset(self, large_data_dir):
        """Can load and process larger dataset"""
        config = ConfigLoader.from_dict({
            "filters": [{"field": "month", "op": "<", "value": 15}],
            "array_fields": ["data"]
        })

        loader = ConfiguredLoader.from_config_object(config)
        results = loader.load(large_data_dir, pattern="*.pkl")

        # Should have filtered ~half
        assert len(results) > 0
        assert len(results) < 10000
        # Should have converted arrays
        assert all(isinstance(r["data"], np.ndarray) for r in results)
