CONV-001: - Add GLD_LAUNCH_DATE constant (November 18, 2004) - Validate reference_date in gld_ounces_per_share() - Raise ValueError for dates before GLD launch - Update docstring with valid date range - Add comprehensive test coverage for edge cases DATA-DB-003: - Create scripts/cache_cli.py with three commands: - vault-dash cache stats: Show cache statistics - vault-dash cache list: List cached entries - vault-dash cache clear: Clear all cache files - Add Makefile targets: cache-stats, cache-list, cache-clear - Integrate with DatabentoHistoricalPriceSource methods
309 lines
10 KiB
Python
309 lines
10 KiB
Python
"""Tests for Databento historical price source."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import tempfile
|
|
from datetime import date, timedelta
|
|
from pathlib import Path
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from app.services.backtesting.databento_source import (
|
|
DatabentoCacheKey,
|
|
DatabentoHistoricalPriceSource,
|
|
DatabentoSourceConfig,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def temp_cache_dir():
|
|
"""Create a temporary cache directory."""
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
yield Path(tmpdir)
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_databento_client():
|
|
"""Create a mock Databento client."""
|
|
mock_client = MagicMock()
|
|
return mock_client
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_ohlcv_df():
|
|
"""Create sample OHLCV DataFrame."""
|
|
import pandas as pd
|
|
|
|
data = [
|
|
{"ts_event": "2024-01-02", "close": 185000000000}, # 185.0
|
|
{"ts_event": "2024-01-03", "close": 186500000000}, # 186.5
|
|
{"ts_event": "2024-01-04", "close": 184000000000}, # 184.0
|
|
{"ts_event": "2024-01-05", "close": 187000000000}, # 187.0
|
|
]
|
|
return pd.DataFrame(data)
|
|
|
|
|
|
class TestDatabentoCacheKey:
|
|
"""Tests for DatabentoCacheKey."""
|
|
|
|
def test_cache_path_generation(self, temp_cache_dir: Path) -> None:
|
|
"""Cache path is deterministic for same parameters."""
|
|
key = DatabentoCacheKey(
|
|
dataset="XNAS.BASIC",
|
|
symbol="GLD",
|
|
schema="ohlcv-1d",
|
|
start_date=date(2024, 1, 1),
|
|
end_date=date(2024, 1, 31),
|
|
)
|
|
|
|
path1 = key.cache_path(temp_cache_dir)
|
|
path2 = key.cache_path(temp_cache_dir)
|
|
|
|
assert path1 == path2
|
|
assert path1.suffix == ".parquet"
|
|
assert path1.name.startswith("dbn_")
|
|
|
|
def test_metadata_path_generation(self, temp_cache_dir: Path) -> None:
|
|
"""Metadata path matches cache path."""
|
|
key = DatabentoCacheKey(
|
|
dataset="XNAS.BASIC",
|
|
symbol="GLD",
|
|
schema="ohlcv-1d",
|
|
start_date=date(2024, 1, 1),
|
|
end_date=date(2024, 1, 31),
|
|
)
|
|
|
|
cache_path = key.cache_path(temp_cache_dir)
|
|
meta_path = key.metadata_path(temp_cache_dir)
|
|
|
|
assert meta_path.stem == cache_path.stem + "_meta"
|
|
assert meta_path.suffix == ".json"
|
|
|
|
|
|
class TestDatabentoSourceConfig:
|
|
"""Tests for DatabentoSourceConfig."""
|
|
|
|
def test_default_config(self) -> None:
|
|
"""Default config uses XNAS.BASIC and daily bars."""
|
|
config = DatabentoSourceConfig()
|
|
|
|
assert config.dataset == "XNAS.BASIC"
|
|
assert config.schema == "ohlcv-1d"
|
|
assert config.max_cache_age_days == 30
|
|
assert config.api_key is None
|
|
|
|
def test_custom_config(self) -> None:
|
|
"""Custom config overrides defaults."""
|
|
config = DatabentoSourceConfig(
|
|
api_key="test-key",
|
|
dataset="GLBX.MDP3",
|
|
schema="ohlcv-1h",
|
|
max_cache_age_days=7,
|
|
)
|
|
|
|
assert config.api_key == "test-key"
|
|
assert config.dataset == "GLBX.MDP3"
|
|
assert config.schema == "ohlcv-1h"
|
|
assert config.max_cache_age_days == 7
|
|
|
|
|
|
class TestDatabentoHistoricalPriceSource:
|
|
"""Tests for DatabentoHistoricalPriceSource."""
|
|
|
|
def test_resolve_dataset_gld(self) -> None:
|
|
"""GLD resolves to XNAS.BASIC."""
|
|
source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource)
|
|
source.config = DatabentoSourceConfig()
|
|
|
|
assert source._resolve_dataset("GLD") == "XNAS.BASIC"
|
|
assert source._resolve_dataset("gld") == "XNAS.BASIC"
|
|
assert source._resolve_dataset("GLDM") == "XNAS.BASIC"
|
|
|
|
def test_resolve_dataset_gc_f(self) -> None:
|
|
"""GC=F resolves to GLBX.MDP3."""
|
|
source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource)
|
|
source.config = DatabentoSourceConfig()
|
|
|
|
assert source._resolve_dataset("GC=F") == "GLBX.MDP3"
|
|
assert source._resolve_dataset("GC") == "GLBX.MDP3"
|
|
|
|
def test_resolve_dataset_xau(self) -> None:
|
|
"""XAU resolves to XNAS.BASIC (GLD proxy)."""
|
|
source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource)
|
|
source.config = DatabentoSourceConfig()
|
|
|
|
assert source._resolve_dataset("XAU") == "XNAS.BASIC"
|
|
|
|
def test_resolve_symbol_xau(self) -> None:
|
|
"""XAU resolves to GLD symbol."""
|
|
source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource)
|
|
source.config = DatabentoSourceConfig()
|
|
|
|
assert source._resolve_symbol("XAU") == "GLD"
|
|
|
|
def test_resolve_symbol_gc_f(self) -> None:
|
|
"""GC=F resolves to GC parent symbol."""
|
|
source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource)
|
|
source.config = DatabentoSourceConfig()
|
|
|
|
assert source._resolve_symbol("GC=F") == "GC"
|
|
|
|
def test_df_to_daily_points_converts_prices(self) -> None:
|
|
"""DataFrame prices are converted from int64 scaled format."""
|
|
import pandas as pd
|
|
|
|
source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource)
|
|
source.config = DatabentoSourceConfig()
|
|
|
|
df = pd.DataFrame(
|
|
[
|
|
{"ts_event": "2024-01-02", "close": 185000000000}, # 185.0
|
|
{"ts_event": "2024-01-03", "close": 186500000000}, # 186.5
|
|
]
|
|
)
|
|
|
|
points = source._df_to_daily_points(df)
|
|
|
|
assert len(points) == 2
|
|
assert points[0].date == date(2024, 1, 2)
|
|
assert points[0].close == 185.0
|
|
assert points[1].close == 186.5
|
|
|
|
def test_load_from_cache_returns_none_if_missing(self, temp_cache_dir: Path) -> None:
|
|
"""Returns None if cache files don't exist."""
|
|
source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource)
|
|
source.config = DatabentoSourceConfig(cache_dir=temp_cache_dir)
|
|
|
|
key = DatabentoCacheKey(
|
|
dataset="XNAS.BASIC",
|
|
symbol="GLD",
|
|
schema="ohlcv-1d",
|
|
start_date=date(2024, 1, 1),
|
|
end_date=date(2024, 1, 31),
|
|
)
|
|
|
|
result = source._load_from_cache(key)
|
|
assert result is None
|
|
|
|
def test_load_from_cache_returns_data_if_fresh(self, temp_cache_dir: Path, sample_ohlcv_df) -> None:
|
|
"""Returns cached data if within age threshold."""
|
|
|
|
source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource)
|
|
source.config = DatabentoSourceConfig(cache_dir=temp_cache_dir)
|
|
|
|
key = DatabentoCacheKey(
|
|
dataset="XNAS.BASIC",
|
|
symbol="GLD",
|
|
schema="ohlcv-1d",
|
|
start_date=date(2024, 1, 1),
|
|
end_date=date(2024, 1, 31),
|
|
)
|
|
|
|
# Save to cache
|
|
source._save_to_cache(key, sample_ohlcv_df)
|
|
|
|
# Load from cache
|
|
result = source._load_from_cache(key)
|
|
|
|
assert result is not None
|
|
assert len(result) == 4
|
|
assert result[0].close == 185.0
|
|
|
|
def test_load_from_cache_returns_none_if_stale(self, temp_cache_dir: Path, sample_ohlcv_df) -> None:
|
|
"""Returns None if cache exceeds age threshold."""
|
|
|
|
source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource)
|
|
source.config = DatabentoSourceConfig(
|
|
cache_dir=temp_cache_dir,
|
|
max_cache_age_days=0, # Always stale
|
|
)
|
|
|
|
key = DatabentoCacheKey(
|
|
dataset="XNAS.BASIC",
|
|
symbol="GLD",
|
|
schema="ohlcv-1d",
|
|
start_date=date(2024, 1, 1),
|
|
end_date=date(2024, 1, 31),
|
|
)
|
|
|
|
# Save to cache
|
|
source._save_to_cache(key, sample_ohlcv_df)
|
|
|
|
# Manually age the cache by setting download_date to yesterday
|
|
meta_file = key.metadata_path(temp_cache_dir)
|
|
with open(meta_file) as f:
|
|
meta = json.load(f)
|
|
meta["download_date"] = (date.today() - timedelta(days=1)).isoformat()
|
|
with open(meta_file, "w") as f:
|
|
json.dump(meta, f)
|
|
|
|
# Load from cache (should fail due to age)
|
|
result = source._load_from_cache(key)
|
|
|
|
assert result is None
|
|
|
|
@patch("app.services.backtesting.databento_source.DATABENTO_AVAILABLE", False)
|
|
def test_raises_if_databento_not_installed(self) -> None:
|
|
"""Raises error if databento package not installed."""
|
|
with pytest.raises(RuntimeError, match="databento package required"):
|
|
DatabentoHistoricalPriceSource()
|
|
|
|
def test_clear_cache(self, temp_cache_dir: Path, sample_ohlcv_df) -> None:
|
|
"""Clears all cache files."""
|
|
source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource)
|
|
source.config = DatabentoSourceConfig(cache_dir=temp_cache_dir)
|
|
|
|
# Create some cache files
|
|
key1 = DatabentoCacheKey(
|
|
dataset="XNAS.BASIC",
|
|
symbol="GLD",
|
|
schema="ohlcv-1d",
|
|
start_date=date(2024, 1, 1),
|
|
end_date=date(2024, 1, 31),
|
|
)
|
|
key2 = DatabentoCacheKey(
|
|
dataset="GLBX.MDP3",
|
|
symbol="GC",
|
|
schema="ohlcv-1d",
|
|
start_date=date(2024, 1, 1),
|
|
end_date=date(2024, 1, 31),
|
|
)
|
|
|
|
source._save_to_cache(key1, sample_ohlcv_df)
|
|
source._save_to_cache(key2, sample_ohlcv_df)
|
|
|
|
count = source.clear_cache()
|
|
assert count == 4 # 2 parquet + 2 json
|
|
|
|
|
|
class TestDatabentoHistoricalPriceSourceIntegration:
|
|
"""Integration tests (require databento package)."""
|
|
|
|
@pytest.mark.skipif(
|
|
not DatabentoHistoricalPriceSource.__module__,
|
|
reason="databento not installed",
|
|
)
|
|
def test_get_cache_stats(self, temp_cache_dir: Path, sample_ohlcv_df) -> None:
|
|
"""Returns cache statistics."""
|
|
source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource)
|
|
source.config = DatabentoSourceConfig(cache_dir=temp_cache_dir)
|
|
|
|
key = DatabentoCacheKey(
|
|
dataset="XNAS.BASIC",
|
|
symbol="GLD",
|
|
schema="ohlcv-1d",
|
|
start_date=date(2024, 1, 1),
|
|
end_date=date(2024, 1, 31),
|
|
)
|
|
|
|
source._save_to_cache(key, sample_ohlcv_df)
|
|
|
|
stats = source.get_cache_stats()
|
|
|
|
assert stats["file_count"] == 2
|
|
assert stats["total_size_bytes"] > 0
|
|
assert len(stats["entries"]) == 1
|
|
assert stats["entries"][0]["symbol"] == "GLD"
|