Files
vault-dash/app/services/backtesting/ui_service.py
Bu5hm4nn a2a816cc79 fix(backtest): use fixture provider ID for backtest scenario
The backtest engine uses a fixture provider (synthetic_v1) regardless of
the data_source used for price fetching. We must use the fixture provider's
ID for the scenario, not the data source's ID.

This fixes 'Unsupported provider/pricing combination' error when running
backtests with data_source='databento'.
2026-04-01 09:42:23 +02:00

363 lines
14 KiB
Python

from __future__ import annotations
from copy import copy
from dataclasses import dataclass
from datetime import date
from math import isclose
from typing import Any
from app.backtesting.engine import SyntheticBacktestEngine
from app.domain.backtesting_math import materialize_backtest_portfolio_state
from app.models.backtest import (
BacktestRunResult,
BacktestScenario,
ProviderRef,
TemplateRef,
)
from app.services.backtesting.databento_source import DatabentoHistoricalPriceSource, DatabentoSourceConfig
from app.services.backtesting.fixture_source import (
FixtureBoundSyntheticHistoricalProvider,
build_backtest_ui_fixture_source,
)
from app.services.backtesting.historical_provider import (
DailyClosePoint,
SyntheticHistoricalProvider,
YFinanceHistoricalPriceSource,
)
from app.services.backtesting.input_normalization import normalize_historical_scenario_inputs
from app.services.backtesting.service import BacktestService
from app.services.strategy_templates import StrategyTemplateService
SUPPORTED_BACKTEST_PAGE_SYMBOLS = ("GLD", "GC", "XAU")
def _validate_initial_collateral(underlying_units: float, entry_spot: float, loan_amount: float) -> None:
initial_collateral_value = underlying_units * entry_spot
if loan_amount >= initial_collateral_value:
raise ValueError(
"Historical scenario starts undercollateralized: "
f"loan ${loan_amount:,.0f} exceeds initial collateral ${initial_collateral_value:,.0f} "
f"at entry spot ${entry_spot:,.2f}. Reduce loan amount or increase underlying units."
)
@dataclass(frozen=True)
class BacktestPageRunResult:
scenario: BacktestScenario
run_result: BacktestRunResult
entry_spot: float
data_source: str = "synthetic"
data_cost_usd: float = 0.0
cache_status: str = ""
@dataclass(frozen=True)
class DataSourceInfo:
"""Information about a data source."""
provider_id: str
pricing_mode: str
display_name: str
supports_cost_estimate: bool
supports_cache: bool
class BacktestPageService:
"""Service for the backtest page UI.
This service manages historical data providers and supports multiple
data sources including Databento, Yahoo Finance, and synthetic data.
"""
DATA_SOURCE_INFO: dict[str, DataSourceInfo] = {
"databento": DataSourceInfo(
provider_id="databento",
pricing_mode="historical",
display_name="Databento",
supports_cost_estimate=True,
supports_cache=True,
),
"yfinance": DataSourceInfo(
provider_id="yfinance",
pricing_mode="free",
display_name="Yahoo Finance",
supports_cost_estimate=False,
supports_cache=False,
),
"synthetic": DataSourceInfo(
provider_id="synthetic_v1",
pricing_mode="synthetic_bs_mid",
display_name="Synthetic",
supports_cost_estimate=False,
supports_cache=False,
),
}
def __init__(
self,
backtest_service: BacktestService | None = None,
template_service: StrategyTemplateService | None = None,
databento_config: DatabentoSourceConfig | None = None,
) -> None:
base_service = backtest_service or BacktestService(
template_service=template_service,
provider=None,
)
self.template_service = template_service or base_service.template_service
self.databento_config = databento_config
# Use the injected provider if available, otherwise create a new one
base_provider = base_service.provider
if base_provider is None:
base_provider = SyntheticHistoricalProvider()
fixture_provider = FixtureBoundSyntheticHistoricalProvider(
base_provider=base_provider, # type: ignore[arg-type]
source=build_backtest_ui_fixture_source(),
)
self.backtest_service = copy(base_service)
self.backtest_service.provider = fixture_provider
self.backtest_service.template_service = self.template_service
self.backtest_service.engine = SyntheticBacktestEngine(fixture_provider)
# Cache for Databento provider instances
self._databento_provider: DatabentoHistoricalPriceSource | None = None
self._yfinance_provider: YFinanceHistoricalPriceSource | None = None
def _get_databento_provider(self) -> DatabentoHistoricalPriceSource:
"""Get or create the Databento provider instance."""
if self._databento_provider is None:
self._databento_provider = DatabentoHistoricalPriceSource(config=self.databento_config)
return self._databento_provider
def _get_yfinance_provider(self) -> YFinanceHistoricalPriceSource:
"""Get or create the YFinance provider instance."""
if self._yfinance_provider is None:
self._yfinance_provider = YFinanceHistoricalPriceSource()
return self._yfinance_provider
def get_historical_prices(
self, symbol: str, start_date: date, end_date: date, data_source: str
) -> list[DailyClosePoint]:
"""Load historical prices from the specified data source.
Args:
symbol: Trading symbol (GLD, GC, XAU)
start_date: Start date
end_date: End date
data_source: One of "databento", "yfinance", "synthetic"
Returns:
List of daily close points sorted by date
"""
if data_source == "databento":
return self._get_databento_provider().load_daily_closes(symbol, start_date, end_date)
elif data_source == "yfinance":
return self._get_yfinance_provider().load_daily_closes(symbol, start_date, end_date)
else:
# Use synthetic fixture data
return self.backtest_service.provider.load_history(symbol, start_date, end_date)
def get_cost_estimate(self, symbol: str, start_date: date, end_date: date, data_source: str = "databento") -> float:
"""Get estimated cost for the data request.
Args:
symbol: Trading symbol
start_date: Start date
end_date: End date
data_source: Data source (only "databento" supports this)
Returns:
Estimated cost in USD
"""
if data_source != "databento":
return 0.0
try:
provider = self._get_databento_provider()
return provider.get_cost_estimate(symbol, start_date, end_date)
except Exception:
return 0.0
def get_cache_stats(
self, symbol: str, start_date: date, end_date: date, data_source: str = "databento"
) -> dict[str, Any]:
"""Get cache statistics for the data request.
Args:
symbol: Trading symbol
start_date: Start date
end_date: End date
data_source: Data source (only "databento" supports this)
Returns:
Dict with cache statistics
"""
if data_source != "databento":
return {"status": "not_applicable", "entries": []}
try:
provider = self._get_databento_provider()
return provider.get_cache_stats()
except Exception:
return {"status": "error", "entries": []}
def get_available_date_range(self, symbol: str, data_source: str = "databento") -> tuple[date | None, date | None]:
"""Get the available date range for a symbol from the data source.
Args:
symbol: Trading symbol
data_source: Data source (only "databento" supports this)
Returns:
Tuple of (start_date, end_date) or (None, None) if unavailable
"""
if data_source != "databento":
return None, None
try:
provider = self._get_databento_provider()
return provider.get_available_range(symbol)
except Exception:
return None, None
def template_options(self, symbol: str = "GLD") -> list[dict[str, str | int]]:
return [
{
"label": template.display_name,
"slug": template.slug,
"version": template.version,
"description": template.description,
}
for template in self.template_service.list_active_templates(symbol)
]
def derive_entry_spot(self, symbol: str, start_date: date, end_date: date, data_source: str) -> float:
history = self.get_historical_prices(symbol, start_date, end_date, data_source)
if not history:
raise ValueError("No historical prices found for scenario window")
if history[0].date != start_date:
raise ValueError(
"Scenario start date must match the first available historical close for entry-at-start backtests"
)
return history[0].close
def validate_preview_inputs(
self,
*,
symbol: str,
start_date: date,
end_date: date,
template_slug: str,
underlying_units: float,
loan_amount: float,
margin_call_ltv: float,
entry_spot: float | None = None,
data_source: str,
) -> float:
normalized_symbol = symbol.strip().upper()
if not normalized_symbol:
raise ValueError("Symbol is required")
if normalized_symbol not in SUPPORTED_BACKTEST_PAGE_SYMBOLS:
raise ValueError(f"Backtests support symbols: {', '.join(SUPPORTED_BACKTEST_PAGE_SYMBOLS)}")
if start_date > end_date:
raise ValueError("Start date must be on or before end date")
normalized_inputs = normalize_historical_scenario_inputs(
underlying_units=underlying_units,
loan_amount=loan_amount,
margin_call_ltv=margin_call_ltv,
)
if not template_slug:
raise ValueError("Template selection is required")
self.template_service.get_template(template_slug)
derived_entry_spot = self.derive_entry_spot(normalized_symbol, start_date, end_date, data_source)
if entry_spot is not None and not isclose(
entry_spot,
derived_entry_spot,
rel_tol=BacktestService.ENTRY_SPOT_REL_TOLERANCE,
abs_tol=BacktestService.ENTRY_SPOT_ABS_TOLERANCE,
):
raise ValueError(
f"Supplied entry spot ${entry_spot:,.2f} does not match derived historical entry spot ${derived_entry_spot:,.2f}"
)
_validate_initial_collateral(
normalized_inputs.underlying_units,
derived_entry_spot,
normalized_inputs.loan_amount,
)
return derived_entry_spot
def run_read_only_scenario(
self,
*,
symbol: str,
start_date: date,
end_date: date,
template_slug: str,
underlying_units: float,
loan_amount: float,
margin_call_ltv: float,
data_source: str = "synthetic",
) -> BacktestPageRunResult:
normalized_symbol = symbol.strip().upper()
entry_spot = self.validate_preview_inputs(
symbol=normalized_symbol,
start_date=start_date,
end_date=end_date,
template_slug=template_slug,
underlying_units=underlying_units,
loan_amount=loan_amount,
margin_call_ltv=margin_call_ltv,
data_source=data_source,
)
normalized_inputs = normalize_historical_scenario_inputs(
underlying_units=underlying_units,
loan_amount=loan_amount,
margin_call_ltv=margin_call_ltv,
)
template = self.template_service.get_template(template_slug)
initial_portfolio = materialize_backtest_portfolio_state(
symbol=normalized_symbol,
underlying_units=normalized_inputs.underlying_units,
entry_spot=entry_spot,
loan_amount=normalized_inputs.loan_amount,
margin_call_ltv=normalized_inputs.margin_call_ltv,
)
# CRITICAL: The backtest engine uses a fixture provider (synthetic_v1),
# regardless of the data_source used for price fetching.
# We must use the fixture provider's ID for the scenario, not the data source's ID.
# The data_source parameter only affects price data fetching, not the backtest engine.
provider_id = self.backtest_service.provider.provider_id
pricing_mode = self.backtest_service.provider.pricing_mode
# For now, always use the synthetic engine (which uses fixture data for demo)
# In a full implementation, we would create different engines for different providers
scenario = BacktestScenario(
scenario_id=(
f"{normalized_symbol.lower()}-{start_date.isoformat()}-{end_date.isoformat()}-{template.slug}"
),
display_name=f"{normalized_symbol} backtest {start_date.isoformat()}{end_date.isoformat()}",
symbol=normalized_symbol,
start_date=start_date,
end_date=end_date,
initial_portfolio=initial_portfolio,
template_refs=(TemplateRef(slug=template.slug, version=template.version),),
provider_ref=ProviderRef(
provider_id=provider_id,
pricing_mode=pricing_mode,
),
)
# Get cost estimate for Databento
data_cost_usd = 0.0
if data_source == "databento":
data_cost_usd = self.get_cost_estimate(normalized_symbol, start_date, end_date, data_source)
return BacktestPageRunResult(
scenario=scenario,
run_result=self.backtest_service.run_scenario(scenario),
entry_spot=entry_spot,
data_source=data_source,
data_cost_usd=data_cost_usd,
)