vault-dash/app/services/backtesting/ui_service.py

from __future__ import annotations

from copy import copy
from dataclasses import dataclass
from datetime import date
from math import isclose
from typing import Any

from app.backtesting.engine import SyntheticBacktestEngine
from app.domain.backtesting_math import materialize_backtest_portfolio_state
from app.models.backtest import (
    BacktestRunResult,
    BacktestScenario,
    ProviderRef,
    TemplateRef,
)
from app.services.backtesting.databento_source import DatabentoHistoricalPriceSource, DatabentoSourceConfig
from app.services.backtesting.fixture_source import (
    FixtureBoundSyntheticHistoricalProvider,
    build_backtest_ui_fixture_source,
)
from app.services.backtesting.historical_provider import (
    DailyClosePoint,
    SyntheticHistoricalProvider,
    YFinanceHistoricalPriceSource,
)
from app.services.backtesting.input_normalization import normalize_historical_scenario_inputs
from app.services.backtesting.service import BacktestService
from app.services.strategy_templates import StrategyTemplateService

SUPPORTED_BACKTEST_PAGE_SYMBOLS = ("GLD", "GC", "XAU")


def _validate_initial_collateral(underlying_units: float, entry_spot: float, loan_amount: float) -> None:
    initial_collateral_value = underlying_units * entry_spot
    if loan_amount >= initial_collateral_value:
        raise ValueError(
            "Historical scenario starts undercollateralized: "
            f"loan ${loan_amount:,.0f} exceeds initial collateral ${initial_collateral_value:,.0f} "
            f"at entry spot ${entry_spot:,.2f}. Reduce loan amount or increase underlying units."
        )


@dataclass(frozen=True)
class BacktestPageRunResult:
    scenario: BacktestScenario
    run_result: BacktestRunResult
    entry_spot: float
    data_source: str = "synthetic"
    data_cost_usd: float = 0.0
    cache_status: str = ""


@dataclass(frozen=True)
class DataSourceInfo:
    """Information about a data source."""

    provider_id: str
    pricing_mode: str
    display_name: str
    supports_cost_estimate: bool
    supports_cache: bool


class BacktestPageService:
    """Service for the backtest page UI.

    This service manages historical data providers and supports multiple
    data sources including Databento, Yahoo Finance, and synthetic data.
    """

    DATA_SOURCE_INFO: dict[str, DataSourceInfo] = {
        "databento": DataSourceInfo(
            provider_id="databento",
            pricing_mode="historical",
            display_name="Databento",
            supports_cost_estimate=True,
            supports_cache=True,
        ),
        "yfinance": DataSourceInfo(
            provider_id="yfinance",
            pricing_mode="free",
            display_name="Yahoo Finance",
            supports_cost_estimate=False,
            supports_cache=False,
        ),
        "synthetic": DataSourceInfo(
            provider_id="synthetic_v1",
            pricing_mode="synthetic_bs_mid",
            display_name="Synthetic",
            supports_cost_estimate=False,
            supports_cache=False,
        ),
    }

    def __init__(
        self,
        backtest_service: BacktestService | None = None,
        template_service: StrategyTemplateService | None = None,
        databento_config: DatabentoSourceConfig | None = None,
    ) -> None:
        base_service = backtest_service or BacktestService(
            template_service=template_service,
            provider=None,
        )
        self.template_service = template_service or base_service.template_service
        self.databento_config = databento_config
        # Use the injected provider if available, otherwise create a new one
        base_provider = base_service.provider
        if base_provider is None:
            base_provider = SyntheticHistoricalProvider()
        fixture_provider = FixtureBoundSyntheticHistoricalProvider(
            base_provider=base_provider,  # type: ignore[arg-type]
            source=build_backtest_ui_fixture_source(),
        )
        self.backtest_service = copy(base_service)
        self.backtest_service.provider = fixture_provider
        self.backtest_service.template_service = self.template_service
        self.backtest_service.engine = SyntheticBacktestEngine(fixture_provider)

        # Cache for Databento provider instances
        self._databento_provider: DatabentoHistoricalPriceSource | None = None
        self._yfinance_provider: YFinanceHistoricalPriceSource | None = None

    def _get_databento_provider(self) -> DatabentoHistoricalPriceSource:
        """Get or create the Databento provider instance."""
        if self._databento_provider is None:
            self._databento_provider = DatabentoHistoricalPriceSource(config=self.databento_config)
        return self._databento_provider

    def _get_yfinance_provider(self) -> YFinanceHistoricalPriceSource:
        """Get or create the YFinance provider instance."""
        if self._yfinance_provider is None:
            self._yfinance_provider = YFinanceHistoricalPriceSource()
        return self._yfinance_provider

    def get_historical_prices(
        self, symbol: str, start_date: date, end_date: date, data_source: str
    ) -> list[DailyClosePoint]:
        """Load historical prices from the specified data source.

        Args:
            symbol: Trading symbol (GLD, GC, XAU)
            start_date: Start date
            end_date: End date
            data_source: One of "databento", "yfinance", "synthetic"

        Returns:
            List of daily close points sorted by date
        """
        if data_source == "databento":
            return self._get_databento_provider().load_daily_closes(symbol, start_date, end_date)
        elif data_source == "yfinance":
            return self._get_yfinance_provider().load_daily_closes(symbol, start_date, end_date)
        else:
            # Use synthetic fixture data
            return self.backtest_service.provider.load_history(symbol, start_date, end_date)

    def get_cost_estimate(self, symbol: str, start_date: date, end_date: date, data_source: str = "databento") -> float:
        """Get estimated cost for the data request.

        Args:
            symbol: Trading symbol
            start_date: Start date
            end_date: End date
            data_source: Data source (only "databento" supports this)

        Returns:
            Estimated cost in USD
        """
        if data_source != "databento":
            return 0.0

        try:
            provider = self._get_databento_provider()
            return provider.get_cost_estimate(symbol, start_date, end_date)
        except Exception:
            return 0.0

    def get_cache_stats(
        self, symbol: str, start_date: date, end_date: date, data_source: str = "databento"
    ) -> dict[str, Any]:
        """Get cache statistics for the data request.

        Args:
            symbol: Trading symbol
            start_date: Start date
            end_date: End date
            data_source: Data source (only "databento" supports this)

        Returns:
            Dict with cache statistics
        """
        if data_source != "databento":
            return {"status": "not_applicable", "entries": []}

        try:
            provider = self._get_databento_provider()
            return provider.get_cache_stats()
        except Exception:
            return {"status": "error", "entries": []}

    def get_available_date_range(self, symbol: str, data_source: str = "databento") -> tuple[date | None, date | None]:
        """Get the available date range for a symbol from the data source.

        Args:
            symbol: Trading symbol
            data_source: Data source (only "databento" supports this)

        Returns:
            Tuple of (start_date, end_date) or (None, None) if unavailable
        """
        if data_source != "databento":
            return None, None

        try:
            provider = self._get_databento_provider()
            return provider.get_available_range(symbol)
        except Exception:
            return None, None

    def template_options(self, symbol: str = "GLD") -> list[dict[str, str | int]]:
        return [
            {
                "label": template.display_name,
                "slug": template.slug,
                "version": template.version,
                "description": template.description,
            }
            for template in self.template_service.list_active_templates(symbol)
        ]

    def derive_entry_spot(self, symbol: str, start_date: date, end_date: date, data_source: str) -> float:
        history = self.get_historical_prices(symbol, start_date, end_date, data_source)
        if not history:
            raise ValueError("No historical prices found for scenario window")
        if history[0].date != start_date:
            raise ValueError(
                "Scenario start date must match the first available historical close for entry-at-start backtests"
            )
        return history[0].close

    def validate_preview_inputs(
        self,
        *,
        symbol: str,
        start_date: date,
        end_date: date,
        template_slug: str,
        underlying_units: float,
        loan_amount: float,
        margin_call_ltv: float,
        entry_spot: float | None = None,
        data_source: str,
    ) -> float:
        normalized_symbol = symbol.strip().upper()
        if not normalized_symbol:
            raise ValueError("Symbol is required")
        if normalized_symbol not in SUPPORTED_BACKTEST_PAGE_SYMBOLS:
            raise ValueError(f"Backtests support symbols: {', '.join(SUPPORTED_BACKTEST_PAGE_SYMBOLS)}")
        if start_date > end_date:
            raise ValueError("Start date must be on or before end date")
        normalized_inputs = normalize_historical_scenario_inputs(
            underlying_units=underlying_units,
            loan_amount=loan_amount,
            margin_call_ltv=margin_call_ltv,
        )
        if not template_slug:
            raise ValueError("Template selection is required")

        self.template_service.get_template(template_slug)
        derived_entry_spot = self.derive_entry_spot(normalized_symbol, start_date, end_date, data_source)
        if entry_spot is not None and not isclose(
            entry_spot,
            derived_entry_spot,
            rel_tol=BacktestService.ENTRY_SPOT_REL_TOLERANCE,
            abs_tol=BacktestService.ENTRY_SPOT_ABS_TOLERANCE,
        ):
            raise ValueError(
                f"Supplied entry spot ${entry_spot:,.2f} does not match derived historical entry spot ${derived_entry_spot:,.2f}"
            )
        _validate_initial_collateral(
            normalized_inputs.underlying_units,
            derived_entry_spot,
            normalized_inputs.loan_amount,
        )
        return derived_entry_spot

    def run_read_only_scenario(
        self,
        *,
        symbol: str,
        start_date: date,
        end_date: date,
        template_slug: str,
        underlying_units: float,
        loan_amount: float,
        margin_call_ltv: float,
        data_source: str = "synthetic",
    ) -> BacktestPageRunResult:
        normalized_symbol = symbol.strip().upper()
        entry_spot = self.validate_preview_inputs(
            symbol=normalized_symbol,
            start_date=start_date,
            end_date=end_date,
            template_slug=template_slug,
            underlying_units=underlying_units,
            loan_amount=loan_amount,
            margin_call_ltv=margin_call_ltv,
            data_source=data_source,
        )
        normalized_inputs = normalize_historical_scenario_inputs(
            underlying_units=underlying_units,
            loan_amount=loan_amount,
            margin_call_ltv=margin_call_ltv,
        )
        template = self.template_service.get_template(template_slug)
        initial_portfolio = materialize_backtest_portfolio_state(
            symbol=normalized_symbol,
            underlying_units=normalized_inputs.underlying_units,
            entry_spot=entry_spot,
            loan_amount=normalized_inputs.loan_amount,
            margin_call_ltv=normalized_inputs.margin_call_ltv,
        )

        # Fetch historical prices using the specified data source
        history = self.get_historical_prices(normalized_symbol, start_date, end_date, data_source)
        if not history:
            raise ValueError("No historical prices found for scenario window")
        if history[0].date != start_date:
            raise ValueError(
                "Scenario start date must match the first available historical close for entry-at-start backtests"
            )

        # Use the fixture provider's ID for the scenario (for pricing mode)
        # The actual price data comes from the specified data_source
        provider_id = self.backtest_service.provider.provider_id
        pricing_mode = self.backtest_service.provider.pricing_mode

        scenario = BacktestScenario(
            scenario_id=(
                f"{normalized_symbol.lower()}-{start_date.isoformat()}-{end_date.isoformat()}-{template.slug}"
            ),
            display_name=f"{normalized_symbol} backtest {start_date.isoformat()} → {end_date.isoformat()}",
            symbol=normalized_symbol,
            start_date=start_date,
            end_date=end_date,
            initial_portfolio=initial_portfolio,
            template_refs=(TemplateRef(slug=template.slug, version=template.version),),
            provider_ref=ProviderRef(
                provider_id=provider_id,
                pricing_mode=pricing_mode,
            ),
        )

        # Get cost estimate for Databento
        data_cost_usd = 0.0
        if data_source == "databento":
            data_cost_usd = self.get_cost_estimate(normalized_symbol, start_date, end_date, data_source)

        # Run the backtest engine directly with pre-fetched history
        # This bypasses the fixture provider in BacktestService.run_scenario
        template_result = self.backtest_service.engine.run_template(scenario, template, history)
        run_result = BacktestRunResult(scenario_id=scenario.scenario_id, template_results=(template_result,))

        return BacktestPageRunResult(
            scenario=scenario,
            run_result=run_result,
            entry_spot=entry_spot,
            data_source=data_source,
            data_cost_usd=data_cost_usd,
        )