feat(DATA-DB-001): add Databento historical price source for backtesting
- Add DatabentoHistoricalPriceSource implementing HistoricalPriceSource protocol - Smart caching with Parquet storage and metadata tracking - Auto symbol-to-dataset resolution (GLD→XNAS.BASIC, GC=F→GLBX.MDP3) - Cache management with age threshold invalidation - Cost estimation via metadata.get_cost() - Add databento>=0.30.0 to requirements.txt - Add DATABENTO_API_KEY to .env.example - Full test coverage with 16 tests
This commit is contained in:
310
tests/test_databento_source.py
Normal file
310
tests/test_databento_source.py
Normal file
@@ -0,0 +1,310 @@
|
||||
"""Tests for Databento historical price source."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
from datetime import date, timedelta
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from app.services.backtesting.databento_source import (
|
||||
DatabentoCacheKey,
|
||||
DatabentoHistoricalPriceSource,
|
||||
DatabentoSourceConfig,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_cache_dir():
|
||||
"""Create a temporary cache directory."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
yield Path(tmpdir)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_databento_client():
|
||||
"""Create a mock Databento client."""
|
||||
mock_client = MagicMock()
|
||||
return mock_client
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_ohlcv_df():
|
||||
"""Create sample OHLCV DataFrame."""
|
||||
import pandas as pd
|
||||
|
||||
data = [
|
||||
{"ts_event": "2024-01-02", "close": 185000000000}, # 185.0
|
||||
{"ts_event": "2024-01-03", "close": 186500000000}, # 186.5
|
||||
{"ts_event": "2024-01-04", "close": 184000000000}, # 184.0
|
||||
{"ts_event": "2024-01-05", "close": 187000000000}, # 187.0
|
||||
]
|
||||
return pd.DataFrame(data)
|
||||
|
||||
|
||||
class TestDatabentoCacheKey:
|
||||
"""Tests for DatabentoCacheKey."""
|
||||
|
||||
def test_cache_path_generation(self, temp_cache_dir: Path) -> None:
|
||||
"""Cache path is deterministic for same parameters."""
|
||||
key = DatabentoCacheKey(
|
||||
dataset="XNAS.BASIC",
|
||||
symbol="GLD",
|
||||
schema="ohlcv-1d",
|
||||
start_date=date(2024, 1, 1),
|
||||
end_date=date(2024, 1, 31),
|
||||
)
|
||||
|
||||
path1 = key.cache_path(temp_cache_dir)
|
||||
path2 = key.cache_path(temp_cache_dir)
|
||||
|
||||
assert path1 == path2
|
||||
assert path1.suffix == ".parquet"
|
||||
assert path1.name.startswith("dbn_")
|
||||
|
||||
def test_metadata_path_generation(self, temp_cache_dir: Path) -> None:
|
||||
"""Metadata path matches cache path."""
|
||||
key = DatabentoCacheKey(
|
||||
dataset="XNAS.BASIC",
|
||||
symbol="GLD",
|
||||
schema="ohlcv-1d",
|
||||
start_date=date(2024, 1, 1),
|
||||
end_date=date(2024, 1, 31),
|
||||
)
|
||||
|
||||
cache_path = key.cache_path(temp_cache_dir)
|
||||
meta_path = key.metadata_path(temp_cache_dir)
|
||||
|
||||
assert meta_path.stem == cache_path.stem + "_meta"
|
||||
assert meta_path.suffix == ".json"
|
||||
|
||||
|
||||
class TestDatabentoSourceConfig:
|
||||
"""Tests for DatabentoSourceConfig."""
|
||||
|
||||
def test_default_config(self) -> None:
|
||||
"""Default config uses XNAS.BASIC and daily bars."""
|
||||
config = DatabentoSourceConfig()
|
||||
|
||||
assert config.dataset == "XNAS.BASIC"
|
||||
assert config.schema == "ohlcv-1d"
|
||||
assert config.max_cache_age_days == 30
|
||||
assert config.api_key is None
|
||||
|
||||
def test_custom_config(self) -> None:
|
||||
"""Custom config overrides defaults."""
|
||||
config = DatabentoSourceConfig(
|
||||
api_key="test-key",
|
||||
dataset="GLBX.MDP3",
|
||||
schema="ohlcv-1h",
|
||||
max_cache_age_days=7,
|
||||
)
|
||||
|
||||
assert config.api_key == "test-key"
|
||||
assert config.dataset == "GLBX.MDP3"
|
||||
assert config.schema == "ohlcv-1h"
|
||||
assert config.max_cache_age_days == 7
|
||||
|
||||
|
||||
class TestDatabentoHistoricalPriceSource:
|
||||
"""Tests for DatabentoHistoricalPriceSource."""
|
||||
|
||||
def test_resolve_dataset_gld(self) -> None:
|
||||
"""GLD resolves to XNAS.BASIC."""
|
||||
source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource)
|
||||
source.config = DatabentoSourceConfig()
|
||||
|
||||
assert source._resolve_dataset("GLD") == "XNAS.BASIC"
|
||||
assert source._resolve_dataset("gld") == "XNAS.BASIC"
|
||||
assert source._resolve_dataset("GLDM") == "XNAS.BASIC"
|
||||
|
||||
def test_resolve_dataset_gc_f(self) -> None:
|
||||
"""GC=F resolves to GLBX.MDP3."""
|
||||
source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource)
|
||||
source.config = DatabentoSourceConfig()
|
||||
|
||||
assert source._resolve_dataset("GC=F") == "GLBX.MDP3"
|
||||
assert source._resolve_dataset("GC") == "GLBX.MDP3"
|
||||
|
||||
def test_resolve_dataset_xau(self) -> None:
|
||||
"""XAU resolves to XNAS.BASIC (GLD proxy)."""
|
||||
source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource)
|
||||
source.config = DatabentoSourceConfig()
|
||||
|
||||
assert source._resolve_dataset("XAU") == "XNAS.BASIC"
|
||||
|
||||
def test_resolve_symbol_xau(self) -> None:
|
||||
"""XAU resolves to GLD symbol."""
|
||||
source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource)
|
||||
source.config = DatabentoSourceConfig()
|
||||
|
||||
assert source._resolve_symbol("XAU") == "GLD"
|
||||
|
||||
def test_resolve_symbol_gc_f(self) -> None:
|
||||
"""GC=F resolves to GC parent symbol."""
|
||||
source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource)
|
||||
source.config = DatabentoSourceConfig()
|
||||
|
||||
assert source._resolve_symbol("GC=F") == "GC"
|
||||
|
||||
def test_df_to_daily_points_converts_prices(self) -> None:
|
||||
"""DataFrame prices are converted from int64 scaled format."""
|
||||
import pandas as pd
|
||||
|
||||
source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource)
|
||||
source.config = DatabentoSourceConfig()
|
||||
|
||||
df = pd.DataFrame(
|
||||
[
|
||||
{"ts_event": "2024-01-02", "close": 185000000000}, # 185.0
|
||||
{"ts_event": "2024-01-03", "close": 186500000000}, # 186.5
|
||||
]
|
||||
)
|
||||
|
||||
points = source._df_to_daily_points(df)
|
||||
|
||||
assert len(points) == 2
|
||||
assert points[0].date == date(2024, 1, 2)
|
||||
assert points[0].close == 185.0
|
||||
assert points[1].close == 186.5
|
||||
|
||||
def test_load_from_cache_returns_none_if_missing(self, temp_cache_dir: Path) -> None:
|
||||
"""Returns None if cache files don't exist."""
|
||||
source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource)
|
||||
source.config = DatabentoSourceConfig(cache_dir=temp_cache_dir)
|
||||
|
||||
key = DatabentoCacheKey(
|
||||
dataset="XNAS.BASIC",
|
||||
symbol="GLD",
|
||||
schema="ohlcv-1d",
|
||||
start_date=date(2024, 1, 1),
|
||||
end_date=date(2024, 1, 31),
|
||||
)
|
||||
|
||||
result = source._load_from_cache(key)
|
||||
assert result is None
|
||||
|
||||
def test_load_from_cache_returns_data_if_fresh(self, temp_cache_dir: Path, sample_ohlcv_df) -> None:
|
||||
"""Returns cached data if within age threshold."""
|
||||
|
||||
source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource)
|
||||
source.config = DatabentoSourceConfig(cache_dir=temp_cache_dir)
|
||||
|
||||
key = DatabentoCacheKey(
|
||||
dataset="XNAS.BASIC",
|
||||
symbol="GLD",
|
||||
schema="ohlcv-1d",
|
||||
start_date=date(2024, 1, 1),
|
||||
end_date=date(2024, 1, 31),
|
||||
)
|
||||
|
||||
# Save to cache
|
||||
source._save_to_cache(key, sample_ohlcv_df)
|
||||
|
||||
# Load from cache
|
||||
result = source._load_from_cache(key)
|
||||
|
||||
assert result is not None
|
||||
assert len(result) == 4
|
||||
assert result[0].close == 185.0
|
||||
|
||||
def test_load_from_cache_returns_none_if_stale(
|
||||
self, temp_cache_dir: Path, sample_ohlcv_df
|
||||
) -> None:
|
||||
"""Returns None if cache exceeds age threshold."""
|
||||
|
||||
source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource)
|
||||
source.config = DatabentoSourceConfig(
|
||||
cache_dir=temp_cache_dir,
|
||||
max_cache_age_days=0, # Always stale
|
||||
)
|
||||
|
||||
key = DatabentoCacheKey(
|
||||
dataset="XNAS.BASIC",
|
||||
symbol="GLD",
|
||||
schema="ohlcv-1d",
|
||||
start_date=date(2024, 1, 1),
|
||||
end_date=date(2024, 1, 31),
|
||||
)
|
||||
|
||||
# Save to cache
|
||||
source._save_to_cache(key, sample_ohlcv_df)
|
||||
|
||||
# Manually age the cache by setting download_date to yesterday
|
||||
meta_file = key.metadata_path(temp_cache_dir)
|
||||
with open(meta_file) as f:
|
||||
meta = json.load(f)
|
||||
meta["download_date"] = (date.today() - timedelta(days=1)).isoformat()
|
||||
with open(meta_file, "w") as f:
|
||||
json.dump(meta, f)
|
||||
|
||||
# Load from cache (should fail due to age)
|
||||
result = source._load_from_cache(key)
|
||||
|
||||
assert result is None
|
||||
|
||||
@patch("app.services.backtesting.databento_source.DATABENTO_AVAILABLE", False)
|
||||
def test_raises_if_databento_not_installed(self) -> None:
|
||||
"""Raises error if databento package not installed."""
|
||||
with pytest.raises(RuntimeError, match="databento package required"):
|
||||
DatabentoHistoricalPriceSource()
|
||||
|
||||
def test_clear_cache(self, temp_cache_dir: Path, sample_ohlcv_df) -> None:
|
||||
"""Clears all cache files."""
|
||||
source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource)
|
||||
source.config = DatabentoSourceConfig(cache_dir=temp_cache_dir)
|
||||
|
||||
# Create some cache files
|
||||
key1 = DatabentoCacheKey(
|
||||
dataset="XNAS.BASIC",
|
||||
symbol="GLD",
|
||||
schema="ohlcv-1d",
|
||||
start_date=date(2024, 1, 1),
|
||||
end_date=date(2024, 1, 31),
|
||||
)
|
||||
key2 = DatabentoCacheKey(
|
||||
dataset="GLBX.MDP3",
|
||||
symbol="GC",
|
||||
schema="ohlcv-1d",
|
||||
start_date=date(2024, 1, 1),
|
||||
end_date=date(2024, 1, 31),
|
||||
)
|
||||
|
||||
source._save_to_cache(key1, sample_ohlcv_df)
|
||||
source._save_to_cache(key2, sample_ohlcv_df)
|
||||
|
||||
count = source.clear_cache()
|
||||
assert count == 4 # 2 parquet + 2 json
|
||||
|
||||
|
||||
class TestDatabentoHistoricalPriceSourceIntegration:
|
||||
"""Integration tests (require databento package)."""
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not DatabentoHistoricalPriceSource.__module__,
|
||||
reason="databento not installed",
|
||||
)
|
||||
def test_get_cache_stats(self, temp_cache_dir: Path, sample_ohlcv_df) -> None:
|
||||
"""Returns cache statistics."""
|
||||
source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource)
|
||||
source.config = DatabentoSourceConfig(cache_dir=temp_cache_dir)
|
||||
|
||||
key = DatabentoCacheKey(
|
||||
dataset="XNAS.BASIC",
|
||||
symbol="GLD",
|
||||
schema="ohlcv-1d",
|
||||
start_date=date(2024, 1, 1),
|
||||
end_date=date(2024, 1, 31),
|
||||
)
|
||||
|
||||
source._save_to_cache(key, sample_ohlcv_df)
|
||||
|
||||
stats = source.get_cache_stats()
|
||||
|
||||
assert stats["file_count"] == 2
|
||||
assert stats["total_size_bytes"] > 0
|
||||
assert len(stats["entries"]) == 1
|
||||
assert stats["entries"][0]["symbol"] == "GLD"
|
||||
Reference in New Issue
Block a user