"""
Configuration system for Aurora framework.

This module provides a declarative configuration system that allows users
to define data loading and processing workflows in YAML/JSON files.

The framework provides mechanisms; users provide configuration.

Example:
    >>> config = ConfigLoader.load("my_config.yaml")
    >>> pipeline = config.to_pipeline()
    >>> results = pipeline.apply(raw_data)

    # Or use ConfiguredLoader for the full workflow:
    >>> loader = ConfiguredLoader.from_config("my_config.yaml")
    >>> results = loader.load("data/")
"""

from __future__ import annotations

import json
import pickle
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Callable, Dict, List, Literal, Optional, Union

import yaml

from .discovery import FileDiscovery
from .filters import Filter, FilterChain
from .pipeline import Pipeline


class ConfigValidationError(Exception):
    """Raised when configuration validation fails."""

    def __init__(self, errors: List[str]):
        self.errors = errors
        super().__init__(f"Configuration validation failed: {'; '.join(errors)}")


class ConfigValidator:
    """
    Validate configuration dictionaries against schema.

    The validator checks:
    - Required fields are present
    - Field types are correct
    - Enum values are valid
    - Nested structures are valid

    Example:
        >>> validator = ConfigValidator()
        >>> errors = validator.validate(config_dict)
        >>> if errors:
        ...     raise ConfigValidationError(errors)
    """

    # Valid operators for filters
    VALID_OPERATORS = [
        "==", "!=", "<", "<=", ">", ">=",
        "in", "not_in", "matches", "contains"
    ]

    # Valid transform types
    VALID_TRANSFORMS = [
        "rename_fields",
        "filter",
        "convert_arrays",
        "add_computed_field",
        "remove_fields",
        "transform_field",
    ]

    def validate(self, config: Dict[str, Any]) -> List[str]:
        """
        Validate configuration dictionary.

        Args:
            config: Configuration dictionary to validate

        Returns:
            List of validation error messages (empty if valid)

        Example:
            >>> validator = ConfigValidator()
            >>> errors = validator.validate({"filters": [{"invalid": "config"}]})
            >>> print(errors)
            ["filters[0]: missing required field 'field'"]
        """
        errors = []

        # Validate schema_mapping
        if "schema_mapping" in config:
            if not isinstance(config["schema_mapping"], dict):
                errors.append("schema_mapping must be a dict")
            else:
                for key, value in config["schema_mapping"].items():
                    if not isinstance(key, str) or not isinstance(value, str):
                        errors.append(f"schema_mapping keys and values must be strings, got {key}: {value}")

        # Validate filters
        if "filters" in config:
            if not isinstance(config["filters"], list):
                errors.append("filters must be a list")
            else:
                for i, filter_config in enumerate(config["filters"]):
                    errors.extend(self.validate_filter(filter_config, f"filters[{i}]"))

        # Validate transforms
        if "transforms" in config:
            if not isinstance(config["transforms"], list):
                errors.append("transforms must be a list")
            else:
                for i, transform_config in enumerate(config["transforms"]):
                    errors.extend(self.validate_transform(transform_config, f"transforms[{i}]"))

        # Validate array_fields
        if "array_fields" in config:
            if not isinstance(config["array_fields"], list):
                errors.append("array_fields must be a list")

        # Validate computed_fields
        if "computed_fields" in config:
            if not isinstance(config["computed_fields"], dict):
                errors.append("computed_fields must be a dict")

        # Validate loader_options
        if "loader_options" in config:
            if not isinstance(config["loader_options"], dict):
                errors.append("loader_options must be a dict")

        return errors

    def validate_filter(self, filter_config: Dict[str, Any], path: str) -> List[str]:
        """
        Validate a single filter configuration.

        Args:
            filter_config: Filter configuration dict
            path: Path for error messages (e.g., "filters[0]")

        Returns:
            List of validation errors
        """
        errors = []

        if not isinstance(filter_config, dict):
            return [f"{path}: must be a dict"]

        # Required fields
        if "field" not in filter_config:
            errors.append(f"{path}: missing required field 'field'")
        if "op" not in filter_config:
            errors.append(f"{path}: missing required field 'op'")
        if "value" not in filter_config:
            errors.append(f"{path}: missing required field 'value'")

        # Validate operator
        if "op" in filter_config:
            op = filter_config["op"]
            if op not in self.VALID_OPERATORS:
                valid_ops = ", ".join(self.VALID_OPERATORS)
                errors.append(f"{path}.op must be one of [{valid_ops}], got '{op}'")

        return errors

    def validate_transform(self, transform_config: Dict[str, Any], path: str) -> List[str]:
        """
        Validate a single transform configuration.

        Args:
            transform_config: Transform configuration dict
            path: Path for error messages

        Returns:
            List of validation errors
        """
        errors = []

        if not isinstance(transform_config, dict):
            return [f"{path}: must be a dict"]

        # Required type field
        if "type" not in transform_config:
            errors.append(f"{path}: missing required field 'type'")
        else:
            transform_type = transform_config["type"]
            if transform_type not in self.VALID_TRANSFORMS:
                valid_types = ", ".join(self.VALID_TRANSFORMS)
                errors.append(f"{path}.type must be one of [{valid_types}], got '{transform_type}'")

        return errors


@dataclass
class Config:
    """
    Validated configuration.

    Attributes:
        schema_mapping: Dict mapping original field names to new names
        filters: List of filter configurations
        transforms: List of transform configurations
        array_fields: List of fields to convert to numpy arrays
        computed_fields: Dict of computed field definitions
        loader_options: Options for data loading
        raw: The original raw configuration dict
    """
    schema_mapping: Dict[str, str] = field(default_factory=dict)
    filters: List[Dict[str, Any]] = field(default_factory=list)
    transforms: List[Dict[str, Any]] = field(default_factory=list)
    array_fields: List[str] = field(default_factory=list)
    computed_fields: Dict[str, str] = field(default_factory=dict)
    loader_options: Dict[str, Any] = field(default_factory=dict)
    raw: Dict[str, Any] = field(default_factory=dict)

    def to_pipeline(self) -> Pipeline:
        """
        Convert configuration to executable Pipeline.

        Returns:
            Pipeline instance with all configured steps

        Example:
            >>> config = ConfigLoader.load("config.yaml")
            >>> pipeline = config.to_pipeline()
            >>> results = pipeline.apply(raw_data)
        """
        pipeline = Pipeline()

        # Add schema_mapping as rename_fields
        if self.schema_mapping:
            pipeline = pipeline.rename_fields(self.schema_mapping)

        # Add filters
        if self.filters:
            filter_chain = self.to_filter_chain()
            if filter_chain is not None:
                pipeline = pipeline.filter(filter_chain)

        # Add transforms
        for transform in self.transforms:
            pipeline = self._add_transform_to_pipeline(pipeline, transform)

        # Add array_fields as convert_arrays
        if self.array_fields:
            pipeline = pipeline.convert_arrays(fields=self.array_fields)

        # Add computed_fields
        for field_name, source_field in self.computed_fields.items():
            # Simple field copy: "new_field: source_field"
            pipeline = pipeline.add_computed_field(
                field_name,
                lambda r, src=source_field: r.get(src)
            )

        return pipeline

    def _add_transform_to_pipeline(
        self,
        pipeline: Pipeline,
        transform: Dict[str, Any],
    ) -> Pipeline:
        """Add a single transform to pipeline."""
        transform_type = transform.get("type")

        if transform_type == "rename_fields":
            mapping = transform.get("mapping", {})
            return pipeline.rename_fields(mapping)

        elif transform_type == "filter":
            filter_config = transform.get("filter", {})
            f = Filter.from_dict(filter_config)
            return pipeline.filter(f)

        elif transform_type == "convert_arrays":
            fields = transform.get("fields")
            exclude = transform.get("exclude", [])
            return pipeline.convert_arrays(fields=fields, exclude=exclude)

        elif transform_type == "add_computed_field":
            field_name = transform.get("field")
            # For simple field reference
            expression = transform.get("expression", "")
            return pipeline.add_computed_field(
                field_name,
                lambda r, src=expression: r.get(src)
            )

        elif transform_type == "remove_fields":
            fields = transform.get("fields", [])
            return pipeline.remove_fields(fields)

        elif transform_type == "transform_field":
            # For advanced use - would need to support callable in config
            pass

        return pipeline

    def to_filter_chain(self) -> Optional[FilterChain]:
        """
        Convert filter configuration to FilterChain.

        Returns:
            FilterChain if filters defined, None otherwise

        Example:
            >>> config = ConfigLoader.load("config.yaml")
            >>> chain = config.to_filter_chain()
        """
        if not self.filters:
            return None

        filters = [Filter.from_dict(f) for f in self.filters]
        return FilterChain(filters, logic="AND")

    def get(self, key: str, default: Any = None) -> Any:
        """
        Get configuration value by key.

        Args:
            key: Configuration key (supports dot notation)
            default: Default value if key not found

        Returns:
            Configuration value or default

        Example:
            >>> config.get("schema_mapping")
            >>> config.get("loader_options.recursive", False)
        """
        # Handle dot notation
        if "." in key:
            parts = key.split(".")
            value = self.raw
            for part in parts:
                if isinstance(value, dict) and part in value:
                    value = value[part]
                else:
                    return default
            return value

        # Direct attribute access
        if hasattr(self, key):
            return getattr(self, key)

        # Fallback to raw dict
        return self.raw.get(key, default)

    def validate(self) -> None:
        """
        Validate the configuration.

        Raises:
            ConfigValidationError: If configuration is invalid
        """
        validator = ConfigValidator()
        errors = validator.validate(self.raw)
        if errors:
            raise ConfigValidationError(errors)


class ConfigLoader:
    """
    Load configuration from YAML or JSON files.

    Supports:
    - YAML (.yaml, .yml)
    - JSON (.json)
    - Python dict (for programmatic use)

    Example:
        >>> config = ConfigLoader.load("my_config.yaml")
        >>> config = ConfigLoader.load(Path("config.json"))
        >>> config = ConfigLoader.from_dict({"filters": [...]})
    """

    @staticmethod
    def load(path: Union[str, Path]) -> Config:
        """
        Load configuration from file.

        Args:
            path: Path to YAML or JSON configuration file

        Returns:
            Parsed Config object

        Raises:
            FileNotFoundError: If file doesn't exist
            ConfigParseError: If file format is invalid
            ConfigValidationError: If config doesn't match schema

        Example:
            >>> config = ConfigLoader.load("config.yaml")
        """
        path = Path(path)

        if not path.exists():
            raise FileNotFoundError(f"Config file not found: {path}")

        # Determine format and load
        suffix = path.suffix.lower()
        if suffix in (".yaml", ".yml"):
            with open(path, "r") as f:
                data = yaml.safe_load(f)
        elif suffix == ".json":
            with open(path, "r") as f:
                data = json.load(f)
        else:
            raise ValueError(f"Unsupported config format: {suffix}")

        if data is None:
            data = {}

        return ConfigLoader.from_dict(data)

    @staticmethod
    def from_dict(data: Dict[str, Any]) -> Config:
        """
        Create configuration from dictionary.

        Args:
            data: Configuration as dictionary

        Returns:
            Parsed Config object

        Raises:
            ConfigValidationError: If config doesn't match schema

        Example:
            >>> config = ConfigLoader.from_dict({
            ...     "schema_mapping": {"Old-Name": "new_name"},
            ...     "filters": [{"field": "x", "op": "==", "value": 1}]
            ... })
        """
        # Validate first
        validator = ConfigValidator()
        errors = validator.validate(data)
        if errors:
            raise ConfigValidationError(errors)

        # Build Config object
        config = Config(
            schema_mapping=data.get("schema_mapping", {}),
            filters=data.get("filters", []),
            transforms=data.get("transforms", []),
            array_fields=data.get("array_fields", []),
            computed_fields=data.get("computed_fields", {}),
            loader_options=data.get("loader_options", {}),
            raw=data,
        )

        return config

    @staticmethod
    def from_string(content: str, format: str = "yaml") -> Config:
        """
        Create configuration from string content.

        Args:
            content: Configuration as string
            format: Either "yaml" or "json"

        Returns:
            Parsed Config object

        Example:
            >>> yaml_str = '''
            ... schema_mapping:
            ...   a: b
            ... '''
            >>> config = ConfigLoader.from_string(yaml_str, format="yaml")
        """
        if format.lower() == "yaml":
            data = yaml.safe_load(content)
        elif format.lower() == "json":
            data = json.loads(content)
        else:
            raise ValueError(f"Unsupported format: {format}")

        if data is None:
            data = {}

        return ConfigLoader.from_dict(data)


class ConfiguredLoader:
    """
    Data loader configured from YAML/JSON.

    Combines FileDiscovery, Filters, and Pipeline into a single
    configured loading workflow.

    Example:
        >>> loader = ConfiguredLoader.from_config("config.yaml")
        >>> results = loader.load("data/")
    """

    @classmethod
    def from_config(cls, config_path: Union[str, Path]) -> ConfiguredLoader:
        """
        Create loader from configuration file.

        Args:
            config_path: Path to YAML/JSON configuration

        Returns:
            Configured loader instance

        Example:
            >>> loader = ConfiguredLoader.from_config("my_experiment.yaml")
        """
        config = ConfigLoader.load(config_path)
        return cls.from_config_object(config)

    @classmethod
    def from_config_object(cls, config: Config) -> ConfiguredLoader:
        """
        Create loader from Config object.

        Args:
            config: Parsed Config object

        Returns:
            Configured loader instance
        """
        pipeline = config.to_pipeline()
        filter_chain = config.to_filter_chain()
        return cls(config, pipeline, filter_chain)

    def __init__(
        self,
        config: Config,
        pipeline: Pipeline,
        filter_chain: Optional[FilterChain] = None,
    ):
        """
        Initialize configured loader.

        Args:
            config: Parsed configuration
            pipeline: Pipeline for transformations
            filter_chain: Optional filter chain for discovery
        """
        self._config = config
        self._pipeline = pipeline
        self._filter_chain = filter_chain

    def load(
        self,
        directory: Union[str, Path],
        pattern: str = "*.pkl",
        recursive: bool = False,
    ) -> List[Dict]:
        """
        Load and process data from directory.

        Args:
            directory: Directory to load from
            pattern: File pattern to match
            recursive: Search subdirectories

        Returns:
            List of processed records

        Example:
            >>> results = loader.load("data/experiments/", pattern="*.json")
        """
        directory = Path(directory)

        if not directory.exists():
            raise FileNotFoundError(f"Directory not found: {directory}")

        # Get pattern from config or use default
        pattern = self._config.loader_options.get("file_pattern", pattern)
        recursive = self._config.loader_options.get("recursive", recursive)

        # Discover files
        files = FileDiscovery.discover(directory, pattern=pattern, recursive=recursive)

        # Load all records
        all_records = []
        for filepath in files:
            records = self._load_file_records(filepath)
            all_records.extend(records)

        # Apply pipeline
        return self._pipeline.apply(all_records)

    def load_file(self, path: Union[str, Path]) -> List[Dict]:
        """
        Load and process a single file.

        Args:
            path: Path to file

        Returns:
            List of processed records from file
        """
        path = Path(path)
        records = self._load_file_records(path)
        return self._pipeline.apply(records)

    def _load_file_records(self, path: Path) -> List[Dict]:
        """Load records from a single file."""
        if path.suffix == ".pkl":
            with open(path, "rb") as f:
                data = pickle.load(f)
        elif path.suffix == ".json":
            with open(path, "r") as f:
                data = json.load(f)
        else:
            raise ValueError(f"Unsupported file type: {path.suffix}")

        # Ensure we have a list of records
        if isinstance(data, list):
            return data
        elif isinstance(data, dict):
            return [data]
        else:
            raise ValueError(f"Expected list or dict, got {type(data)}")


def load_with_config(
    config_path: Union[str, Path],
    data_directory: Union[str, Path],
    pattern: str = "*.pkl",
    recursive: bool = False,
) -> List[Dict]:
    """
    Convenience function to load and process data with a configuration.

    This is the high-level API for loading experimental results with a
    YAML/JSON configuration file.

    Args:
        config_path: Path to configuration file (YAML or JSON)
        data_directory: Directory containing data files
        pattern: Glob pattern for file discovery (default: "*.pkl")
        recursive: Search subdirectories (default: False)

    Returns:
        List of processed record dictionaries

    Example:
        >>> results = load_with_config("experiment.yaml", "data/results/")
        >>> for r in results:
        ...     print(f"Month {r['month']}: F1={r.get('f1', 'N/A')}")

    Config file example::

        schema_mapping:
          Test-Month: month
          Trainer-Mode: trainer_mode

        filters:
          - field: month
            op: "<="
            value: 22

        array_fields:
          - Predictions
          - Labels
    """
    loader = ConfiguredLoader.from_config(config_path)
    return loader.load(data_directory, pattern=pattern, recursive=recursive)
