"""
Tests for FileDiscovery - written FIRST before implementation.

TDD Phase 1: These tests define the expected behavior of FileDiscovery.
All tests should FAIL until implementation is complete.
"""
import pytest
import tempfile
import json
import pickle
from pathlib import Path

# This import will fail until implementation exists
try:
    from aurora.discovery import FileDiscovery
except ImportError:
    FileDiscovery = None
    pytestmark = pytest.mark.skip(reason="FileDiscovery not yet implemented")


class TestFileDiscoveryBasic:
    """Basic file discovery tests"""

    @pytest.fixture
    def temp_dir_with_files(self):
        """Create temp directory with test files"""
        with tempfile.TemporaryDirectory() as tmpdir:
            tmpdir = Path(tmpdir)

            # Create test files
            (tmpdir / "data1.pkl").write_bytes(pickle.dumps({"a": 1}))
            (tmpdir / "data2.pkl").write_bytes(pickle.dumps({"a": 2}))
            (tmpdir / "data3.json").write_text(json.dumps({"b": 1}))
            (tmpdir / "readme.txt").write_text("readme")

            # Create subdirectory
            subdir = tmpdir / "subdir"
            subdir.mkdir()
            (subdir / "nested.pkl").write_bytes(pickle.dumps({"c": 1}))
            (subdir / "nested.json").write_text(json.dumps({"d": 1}))

            yield tmpdir

    def test_discover_all_files(self, temp_dir_with_files):
        """Discover all files with wildcard pattern"""
        files = FileDiscovery.discover(temp_dir_with_files, pattern="*")
        # Should find 4 files in root (not recursive)
        assert len(files) == 4

    def test_discover_by_extension_pattern(self, temp_dir_with_files):
        """Discover files matching extension pattern"""
        files = FileDiscovery.discover(temp_dir_with_files, pattern="*.pkl")
        assert len(files) == 2
        assert all(f.suffix == ".pkl" for f in files)

    def test_discover_recursive(self, temp_dir_with_files):
        """Discover files recursively in subdirectories"""
        files = FileDiscovery.discover(
            temp_dir_with_files,
            pattern="*.pkl",
            recursive=True
        )
        assert len(files) == 3  # 2 in root + 1 in subdir

    def test_discover_with_extensions_filter(self, temp_dir_with_files):
        """Filter by multiple extensions"""
        files = FileDiscovery.discover(
            temp_dir_with_files,
            pattern="*",
            extensions=[".pkl", ".json"]
        )
        assert len(files) == 3  # Excludes .txt

    def test_discover_nonexistent_directory(self):
        """Raise error for nonexistent directory"""
        with pytest.raises(FileNotFoundError):
            FileDiscovery.discover(Path("/nonexistent/path"), pattern="*")

    def test_discover_empty_result(self, temp_dir_with_files):
        """Return empty list when no matches"""
        files = FileDiscovery.discover(temp_dir_with_files, pattern="*.xyz")
        assert files == []

    def test_discover_sorted_output(self, temp_dir_with_files):
        """Results are sorted by name"""
        files = FileDiscovery.discover(temp_dir_with_files, pattern="*.pkl")
        assert files == sorted(files)


class TestFileDiscoveryByContent:
    """Tests for content-based discovery"""

    @pytest.fixture
    def temp_dir_with_data(self):
        """Create temp directory with data files"""
        with tempfile.TemporaryDirectory() as tmpdir:
            tmpdir = Path(tmpdir)

            # File with required fields
            (tmpdir / "valid1.json").write_text(json.dumps([
                {"Predictions": [1, 0], "Labels": [1, 1], "Uncertainties": [0.1, 0.2]}
            ]))

            # File missing field
            (tmpdir / "invalid1.json").write_text(json.dumps([
                {"Predictions": [1, 0], "Labels": [1, 1]}  # No Uncertainties
            ]))

            # Another valid file
            (tmpdir / "valid2.pkl").write_bytes(pickle.dumps([
                {"Predictions": [0], "Labels": [0], "Uncertainties": [0.5]}
            ]))

            yield tmpdir

    def test_discover_by_required_fields(self, temp_dir_with_data):
        """Find files containing all required fields"""
        files = FileDiscovery.discover_by_content(
            temp_dir_with_data,
            required_fields=["Predictions", "Labels", "Uncertainties"]
        )
        assert len(files) == 2
        filenames = [f.name for f in files]
        assert "valid1.json" in filenames
        assert "valid2.pkl" in filenames
        assert "invalid1.json" not in filenames


class TestFileDiscoveryGrouping:
    """Tests for file grouping"""

    @pytest.fixture
    def mixed_files(self):
        """Create files with different naming patterns"""
        with tempfile.TemporaryDirectory() as tmpdir:
            tmpdir = Path(tmpdir)

            (tmpdir / "hcc_warm_androzoo.json").write_text("{}")
            (tmpdir / "hcc_warm_apigraph.json").write_text("{}")
            (tmpdir / "cade_cold_androzoo.json").write_text("{}")
            (tmpdir / "cade_cold_apigraph.json").write_text("{}")
            (tmpdir / "deep_drebin.pkl").write_bytes(b"")

            yield tmpdir

    def test_group_by_pattern(self, mixed_files):
        """Group files by regex patterns"""
        files = FileDiscovery.discover(mixed_files, pattern="*")
        grouped = FileDiscovery.group_by_pattern(files, {
            "hcc": r"hcc_.*",
            "cade": r"cade_.*",
            "drebin": r"deep_drebin.*"
        })

        assert len(grouped["hcc"]) == 2
        assert len(grouped["cade"]) == 2
        assert len(grouped["drebin"]) == 1
