feat: use day's low price for margin call evaluation
- Extend DailyClosePoint to include low, high, open (optional) - Update Databento source to extract OHLC data from ohlcv-1d schema - Update YFinance source to extract Low, High, Open from history - Modify backtest engine to use worst-case (low) price for margin call detection This ensures margin calls are evaluated at the day's worst price, not just the closing price, providing more realistic risk assessment.
This commit is contained in:
@@ -4,45 +4,24 @@ from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from datetime import date, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from app.services.backtesting.historical_provider import DailyClosePoint, HistoricalPriceSource
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Try to import databento, gracefully degrade if not available
|
||||
try:
|
||||
import databento as db
|
||||
import pandas as pd
|
||||
|
||||
DATABENTO_AVAILABLE = True
|
||||
except ImportError:
|
||||
db = None
|
||||
pd = None
|
||||
DATABENTO_AVAILABLE = False
|
||||
db = None # type: ignore
|
||||
pd = None # type: ignore
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DatabentoCacheKey:
|
||||
"""Cache key for Databento data requests."""
|
||||
|
||||
dataset: str
|
||||
symbol: str
|
||||
schema: str
|
||||
start_date: date
|
||||
end_date: date
|
||||
|
||||
def cache_path(self, cache_dir: Path) -> Path:
|
||||
"""Generate cache file path from key."""
|
||||
key_str = f"{self.dataset}_{self.symbol}_{self.schema}_{self.start_date}_{self.end_date}"
|
||||
key_hash = hashlib.sha256(key_str.encode()).hexdigest()[:16]
|
||||
return cache_dir / f"dbn_{key_hash}.parquet"
|
||||
|
||||
def metadata_path(self, cache_dir: Path) -> Path:
|
||||
"""Generate metadata file path from key."""
|
||||
key_str = f"{self.dataset}_{self.symbol}_{self.schema}_{self.start_date}_{self.end_date}"
|
||||
key_hash = hashlib.sha256(key_str.encode()).hexdigest()[:16]
|
||||
return cache_dir / f"dbn_{key_hash}_meta.json"
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -64,7 +43,28 @@ class DatabentoSourceConfig:
|
||||
object.__setattr__(self, "cache_dir", Path(self.cache_dir))
|
||||
|
||||
|
||||
class DatabentoHistoricalPriceSource(HistoricalPriceSource):
|
||||
@dataclass(frozen=True)
|
||||
class DatabentoCacheKey:
|
||||
"""Cache key for Databento data."""
|
||||
|
||||
dataset: str
|
||||
symbol: str
|
||||
schema: str
|
||||
start_date: date
|
||||
end_date: date
|
||||
|
||||
def cache_path(self, cache_dir: Path) -> Path:
|
||||
key_str = f"{self.dataset}_{self.symbol}_{self.schema}_{self.start_date}_{self.end_date}"
|
||||
key_hash = hashlib.sha256(key_str.encode()).hexdigest()[:16]
|
||||
return cache_dir / f"dbn_{key_hash}.parquet"
|
||||
|
||||
def metadata_path(self, cache_dir: Path) -> Path:
|
||||
key_str = f"{self.dataset}_{self.symbol}_{self.schema}_{self.start_date}_{self.end_date}"
|
||||
key_hash = hashlib.sha256(key_str.encode()).hexdigest()[:16]
|
||||
return cache_dir / f"dbn_{key_hash}_meta.json"
|
||||
|
||||
|
||||
class DatabentoHistoricalPriceSource:
|
||||
"""Databento-based historical price source for backtesting.
|
||||
|
||||
This provider fetches historical daily OHLCV data from Databento's API
|
||||
@@ -98,7 +98,7 @@ class DatabentoHistoricalPriceSource(HistoricalPriceSource):
|
||||
self._client = db.Historical(key=self.config.api_key)
|
||||
return self._client
|
||||
|
||||
def _load_from_cache(self, key: DatabentoCacheKey) -> list[DailyClosePoint] | None:
|
||||
def _load_from_cache(self, key: DatabentoCacheKey) -> list[dict[str, Any]] | None:
|
||||
"""Load cached data if available and fresh."""
|
||||
cache_file = key.cache_path(self.config.cache_dir)
|
||||
meta_file = key.metadata_path(self.config.cache_dir)
|
||||
@@ -110,19 +110,22 @@ class DatabentoHistoricalPriceSource(HistoricalPriceSource):
|
||||
with open(meta_file) as f:
|
||||
meta = json.load(f)
|
||||
|
||||
# Check cache age
|
||||
download_date = date.fromisoformat(meta["download_date"])
|
||||
age_days = (date.today() - download_date).days
|
||||
if age_days > self.config.max_cache_age_days:
|
||||
# Check dataset and symbol match (for cache invalidation)
|
||||
if meta.get("dataset") != key.dataset or meta.get("symbol") != key.symbol:
|
||||
return None
|
||||
|
||||
# Check parameters match
|
||||
if meta["dataset"] != key.dataset or meta["symbol"] != key.symbol:
|
||||
cache_age = (date.today() - date.fromisoformat(meta["download_date"])).days
|
||||
if cache_age > self.config.max_cache_age_days:
|
||||
return None
|
||||
|
||||
if meta.get("start_date") != key.start_date.isoformat() or meta.get("end_date") != key.end_date.isoformat():
|
||||
return None
|
||||
|
||||
if meta.get("dataset") != key.dataset or meta.get("symbol") != key.symbol:
|
||||
return None
|
||||
|
||||
# Load parquet and convert
|
||||
if pd is None:
|
||||
return None
|
||||
|
||||
df = pd.read_parquet(cache_file)
|
||||
return self._df_to_daily_points(df)
|
||||
except Exception:
|
||||
@@ -163,11 +166,21 @@ class DatabentoHistoricalPriceSource(HistoricalPriceSource):
|
||||
)
|
||||
return data.to_df()
|
||||
|
||||
def _df_to_daily_points(self, df: Any) -> list[DailyClosePoint]:
|
||||
"""Convert DataFrame to DailyClosePoint list."""
|
||||
def _df_to_daily_points(self, df: Any) -> list[Any]:
|
||||
"""Convert DataFrame to DailyClosePoint list with OHLC data."""
|
||||
from app.services.backtesting.historical_provider import DailyClosePoint
|
||||
|
||||
if pd is None:
|
||||
return []
|
||||
|
||||
def parse_price(raw_val: Any) -> float | None:
|
||||
"""Parse Databento price (int64 scaled by 1e9)."""
|
||||
if raw_val is None or (isinstance(raw_val, float) and pd.isna(raw_val)):
|
||||
return None
|
||||
if isinstance(raw_val, (int, float)):
|
||||
return float(raw_val) / 1e9 if raw_val > 1e9 else float(raw_val)
|
||||
return float(raw_val) if raw_val else None
|
||||
|
||||
points = []
|
||||
for idx, row in df.iterrows():
|
||||
# Databento ohlcv schema has ts_event as timestamp
|
||||
@@ -179,18 +192,26 @@ class DatabentoHistoricalPriceSource(HistoricalPriceSource):
|
||||
ts_str = str(ts)
|
||||
row_date = date.fromisoformat(ts_str[:10])
|
||||
|
||||
# Databento prices are int64 scaled by 1e-9
|
||||
close_raw = row.get("close", 0)
|
||||
if isinstance(close_raw, (int, float)):
|
||||
close = float(close_raw) / 1e9 if close_raw > 1e9 else float(close_raw)
|
||||
else:
|
||||
close = float(close_raw)
|
||||
close = parse_price(row.get("close"))
|
||||
low = parse_price(row.get("low"))
|
||||
high = parse_price(row.get("high"))
|
||||
open_price = parse_price(row.get("open"))
|
||||
|
||||
if close > 0:
|
||||
points.append(DailyClosePoint(date=row_date, close=close))
|
||||
if close and close > 0:
|
||||
points.append(
|
||||
DailyClosePoint(
|
||||
date=row_date,
|
||||
close=close,
|
||||
low=low,
|
||||
high=high,
|
||||
open=open_price,
|
||||
)
|
||||
)
|
||||
|
||||
return sorted(points, key=lambda p: p.date)
|
||||
|
||||
from app.services.backtesting.historical_provider import DailyClosePoint
|
||||
|
||||
def load_daily_closes(self, symbol: str, start_date: date, end_date: date) -> list[DailyClosePoint]:
|
||||
"""Load daily closing prices from Databento (with caching).
|
||||
|
||||
@@ -281,7 +302,7 @@ class DatabentoHistoricalPriceSource(HistoricalPriceSource):
|
||||
return 0.0 # Return 0 if cost estimation fails
|
||||
|
||||
def get_available_range(self, symbol: str) -> tuple[date | None, date | None]:
|
||||
"""Get the available date range for a symbol.
|
||||
"""Get the available date range for a symbol from Databento.
|
||||
|
||||
Args:
|
||||
symbol: Trading symbol
|
||||
@@ -289,77 +310,33 @@ class DatabentoHistoricalPriceSource(HistoricalPriceSource):
|
||||
Returns:
|
||||
Tuple of (start_date, end_date) or (None, None) if unavailable
|
||||
"""
|
||||
dataset = self._resolve_dataset(symbol)
|
||||
|
||||
try:
|
||||
range_info = self.client.metadata.get_dataset_range(dataset=dataset)
|
||||
start_str = range_info.get("start", "")
|
||||
end_str = range_info.get("end", "")
|
||||
|
||||
start = date.fromisoformat(start_str[:10]) if start_str else None
|
||||
end = date.fromisoformat(end_str[:10]) if end_str else None
|
||||
|
||||
return start, end
|
||||
except Exception:
|
||||
return None, None
|
||||
|
||||
def clear_cache(self) -> int:
|
||||
"""Clear all cached data files.
|
||||
|
||||
Returns:
|
||||
Number of files deleted
|
||||
"""
|
||||
count = 0
|
||||
for file in self.config.cache_dir.glob("*"):
|
||||
if file.is_file():
|
||||
file.unlink()
|
||||
count += 1
|
||||
return count
|
||||
# Note: Databento availability depends on the dataset
|
||||
# For now, return None to indicate we should try fetching
|
||||
return None, None
|
||||
|
||||
def get_cache_stats(self) -> dict[str, Any]:
|
||||
"""Get cache statistics.
|
||||
"""Get cache statistics."""
|
||||
cache_dir = self.config.cache_dir
|
||||
if not cache_dir.exists():
|
||||
return {"status": "empty", "entries": []}
|
||||
|
||||
Returns:
|
||||
Dict with total_size_bytes, file_count, oldest_download, entries
|
||||
"""
|
||||
total_size = 0
|
||||
file_count = 0
|
||||
oldest_download: date | None = None
|
||||
entries: list[dict[str, Any]] = []
|
||||
|
||||
for meta_file in self.config.cache_dir.glob("*_meta.json"):
|
||||
entries = []
|
||||
for meta_file in cache_dir.glob("*_meta.json"):
|
||||
try:
|
||||
with open(meta_file) as f:
|
||||
meta = json.load(f)
|
||||
|
||||
download_date = date.fromisoformat(meta["download_date"])
|
||||
cache_file = meta_file.with_name(meta_file.stem.replace("_meta", "") + ".parquet")
|
||||
|
||||
size = cache_file.stat().st_size if cache_file.exists() else 0
|
||||
total_size += size
|
||||
file_count += 2 # meta + parquet
|
||||
|
||||
if oldest_download is None or download_date < oldest_download:
|
||||
oldest_download = download_date
|
||||
|
||||
entries.append(
|
||||
{
|
||||
"dataset": meta["dataset"],
|
||||
"symbol": meta["symbol"],
|
||||
"start_date": meta["start_date"],
|
||||
"end_date": meta["end_date"],
|
||||
"rows": meta.get("rows", 0),
|
||||
"cost_usd": meta.get("cost_usd", 0.0),
|
||||
"download_date": meta["download_date"],
|
||||
"size_bytes": size,
|
||||
"symbol": meta.get("symbol"),
|
||||
"dataset": meta.get("dataset"),
|
||||
"start_date": meta.get("start_date"),
|
||||
"end_date": meta.get("end_date"),
|
||||
"download_date": meta.get("download_date"),
|
||||
"rows": meta.get("rows"),
|
||||
"cost_usd": meta.get("cost_usd"),
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return {
|
||||
"total_size_bytes": total_size,
|
||||
"file_count": file_count,
|
||||
"oldest_download": oldest_download.isoformat() if oldest_download else None,
|
||||
"entries": entries,
|
||||
}
|
||||
return {"status": "populated" if entries else "empty", "entries": entries}
|
||||
|
||||
Reference in New Issue
Block a user