"""Tests for Databento historical price source.""" from __future__ import annotations import json import tempfile from datetime import date, timedelta from pathlib import Path from unittest.mock import MagicMock, patch import pytest from app.services.backtesting.databento_source import ( DatabentoCacheKey, DatabentoHistoricalPriceSource, DatabentoSourceConfig, ) @pytest.fixture def temp_cache_dir(): """Create a temporary cache directory.""" with tempfile.TemporaryDirectory() as tmpdir: yield Path(tmpdir) @pytest.fixture def mock_databento_client(): """Create a mock Databento client.""" mock_client = MagicMock() return mock_client @pytest.fixture def sample_ohlcv_df(): """Create sample OHLCV DataFrame.""" import pandas as pd data = [ {"ts_event": "2024-01-02", "close": 185000000000}, # 185.0 {"ts_event": "2024-01-03", "close": 186500000000}, # 186.5 {"ts_event": "2024-01-04", "close": 184000000000}, # 184.0 {"ts_event": "2024-01-05", "close": 187000000000}, # 187.0 ] return pd.DataFrame(data) class TestDatabentoCacheKey: """Tests for DatabentoCacheKey.""" def test_cache_path_generation(self, temp_cache_dir: Path) -> None: """Cache path is deterministic for same parameters.""" key = DatabentoCacheKey( dataset="XNAS.BASIC", symbol="GLD", schema="ohlcv-1d", start_date=date(2024, 1, 1), end_date=date(2024, 1, 31), ) path1 = key.cache_path(temp_cache_dir) path2 = key.cache_path(temp_cache_dir) assert path1 == path2 assert path1.suffix == ".parquet" assert path1.name.startswith("dbn_") def test_metadata_path_generation(self, temp_cache_dir: Path) -> None: """Metadata path matches cache path.""" key = DatabentoCacheKey( dataset="XNAS.BASIC", symbol="GLD", schema="ohlcv-1d", start_date=date(2024, 1, 1), end_date=date(2024, 1, 31), ) cache_path = key.cache_path(temp_cache_dir) meta_path = key.metadata_path(temp_cache_dir) assert meta_path.stem == cache_path.stem + "_meta" assert meta_path.suffix == ".json" class TestDatabentoSourceConfig: """Tests for DatabentoSourceConfig.""" def test_default_config(self) -> None: """Default config uses XNAS.BASIC and daily bars.""" config = DatabentoSourceConfig() assert config.dataset == "XNAS.BASIC" assert config.schema == "ohlcv-1d" assert config.max_cache_age_days == 30 assert config.api_key is None def test_custom_config(self) -> None: """Custom config overrides defaults.""" config = DatabentoSourceConfig( api_key="test-key", dataset="GLBX.MDP3", schema="ohlcv-1h", max_cache_age_days=7, ) assert config.api_key == "test-key" assert config.dataset == "GLBX.MDP3" assert config.schema == "ohlcv-1h" assert config.max_cache_age_days == 7 class TestDatabentoHistoricalPriceSource: """Tests for DatabentoHistoricalPriceSource.""" def test_resolve_dataset_gld(self) -> None: """GLD resolves to XNAS.BASIC.""" source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource) source.config = DatabentoSourceConfig() assert source._resolve_dataset("GLD") == "XNAS.BASIC" assert source._resolve_dataset("gld") == "XNAS.BASIC" assert source._resolve_dataset("GLDM") == "XNAS.BASIC" def test_resolve_dataset_gc_f(self) -> None: """GC=F resolves to GLBX.MDP3.""" source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource) source.config = DatabentoSourceConfig() assert source._resolve_dataset("GC=F") == "GLBX.MDP3" assert source._resolve_dataset("GC") == "GLBX.MDP3" def test_resolve_dataset_xau(self) -> None: """XAU resolves to XNAS.BASIC (GLD proxy).""" source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource) source.config = DatabentoSourceConfig() assert source._resolve_dataset("XAU") == "XNAS.BASIC" def test_resolve_symbol_xau(self) -> None: """XAU resolves to GLD symbol.""" source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource) source.config = DatabentoSourceConfig() assert source._resolve_symbol("XAU") == "GLD" def test_resolve_symbol_gc_f(self) -> None: """GC=F resolves to GC parent symbol.""" source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource) source.config = DatabentoSourceConfig() assert source._resolve_symbol("GC=F") == "GC" def test_df_to_daily_points_converts_prices(self) -> None: """DataFrame prices are converted from int64 scaled format.""" import pandas as pd source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource) source.config = DatabentoSourceConfig() df = pd.DataFrame( [ {"ts_event": "2024-01-02", "close": 185000000000}, # 185.0 {"ts_event": "2024-01-03", "close": 186500000000}, # 186.5 ] ) points = source._df_to_daily_points(df) assert len(points) == 2 assert points[0].date == date(2024, 1, 2) assert points[0].close == 185.0 assert points[1].close == 186.5 def test_load_from_cache_returns_none_if_missing(self, temp_cache_dir: Path) -> None: """Returns None if cache files don't exist.""" source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource) source.config = DatabentoSourceConfig(cache_dir=temp_cache_dir) key = DatabentoCacheKey( dataset="XNAS.BASIC", symbol="GLD", schema="ohlcv-1d", start_date=date(2024, 1, 1), end_date=date(2024, 1, 31), ) result = source._load_from_cache(key) assert result is None def test_load_from_cache_returns_data_if_fresh(self, temp_cache_dir: Path, sample_ohlcv_df) -> None: """Returns cached data if within age threshold.""" source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource) source.config = DatabentoSourceConfig(cache_dir=temp_cache_dir) key = DatabentoCacheKey( dataset="XNAS.BASIC", symbol="GLD", schema="ohlcv-1d", start_date=date(2024, 1, 1), end_date=date(2024, 1, 31), ) # Save to cache source._save_to_cache(key, sample_ohlcv_df) # Load from cache result = source._load_from_cache(key) assert result is not None assert len(result) == 4 assert result[0].close == 185.0 def test_load_from_cache_returns_none_if_stale( self, temp_cache_dir: Path, sample_ohlcv_df ) -> None: """Returns None if cache exceeds age threshold.""" source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource) source.config = DatabentoSourceConfig( cache_dir=temp_cache_dir, max_cache_age_days=0, # Always stale ) key = DatabentoCacheKey( dataset="XNAS.BASIC", symbol="GLD", schema="ohlcv-1d", start_date=date(2024, 1, 1), end_date=date(2024, 1, 31), ) # Save to cache source._save_to_cache(key, sample_ohlcv_df) # Manually age the cache by setting download_date to yesterday meta_file = key.metadata_path(temp_cache_dir) with open(meta_file) as f: meta = json.load(f) meta["download_date"] = (date.today() - timedelta(days=1)).isoformat() with open(meta_file, "w") as f: json.dump(meta, f) # Load from cache (should fail due to age) result = source._load_from_cache(key) assert result is None @patch("app.services.backtesting.databento_source.DATABENTO_AVAILABLE", False) def test_raises_if_databento_not_installed(self) -> None: """Raises error if databento package not installed.""" with pytest.raises(RuntimeError, match="databento package required"): DatabentoHistoricalPriceSource() def test_clear_cache(self, temp_cache_dir: Path, sample_ohlcv_df) -> None: """Clears all cache files.""" source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource) source.config = DatabentoSourceConfig(cache_dir=temp_cache_dir) # Create some cache files key1 = DatabentoCacheKey( dataset="XNAS.BASIC", symbol="GLD", schema="ohlcv-1d", start_date=date(2024, 1, 1), end_date=date(2024, 1, 31), ) key2 = DatabentoCacheKey( dataset="GLBX.MDP3", symbol="GC", schema="ohlcv-1d", start_date=date(2024, 1, 1), end_date=date(2024, 1, 31), ) source._save_to_cache(key1, sample_ohlcv_df) source._save_to_cache(key2, sample_ohlcv_df) count = source.clear_cache() assert count == 4 # 2 parquet + 2 json class TestDatabentoHistoricalPriceSourceIntegration: """Integration tests (require databento package).""" @pytest.mark.skipif( not DatabentoHistoricalPriceSource.__module__, reason="databento not installed", ) def test_get_cache_stats(self, temp_cache_dir: Path, sample_ohlcv_df) -> None: """Returns cache statistics.""" source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource) source.config = DatabentoSourceConfig(cache_dir=temp_cache_dir) key = DatabentoCacheKey( dataset="XNAS.BASIC", symbol="GLD", schema="ohlcv-1d", start_date=date(2024, 1, 1), end_date=date(2024, 1, 31), ) source._save_to_cache(key, sample_ohlcv_df) stats = source.get_cache_stats() assert stats["file_count"] == 2 assert stats["total_size_bytes"] > 0 assert len(stats["entries"]) == 1 assert stats["entries"][0]["symbol"] == "GLD"