From dc4ee1f261813e7daf961f7ee8bd6a50eee5d33d Mon Sep 17 00:00:00 2001 From: Bu5hm4nn Date: Sun, 29 Mar 2026 12:00:30 +0200 Subject: [PATCH] feat(CONV-001): add GLD launch date validation, feat(DATA-DB-003): add cache CLI CONV-001: - Add GLD_LAUNCH_DATE constant (November 18, 2004) - Validate reference_date in gld_ounces_per_share() - Raise ValueError for dates before GLD launch - Update docstring with valid date range - Add comprehensive test coverage for edge cases DATA-DB-003: - Create scripts/cache_cli.py with three commands: - vault-dash cache stats: Show cache statistics - vault-dash cache list: List cached entries - vault-dash cache clear: Clear all cache files - Add Makefile targets: cache-stats, cache-list, cache-clear - Integrate with DatabentoHistoricalPriceSource methods --- Makefile | 12 +- app/domain/instruments.py | 23 ++- app/services/backtesting/ui_service.py | 10 +- scripts/cache_cli.py | 210 +++++++++++++++++++++++++ tests/test_databento_source.py | 4 +- tests/test_instruments.py | 34 +++- 6 files changed, 276 insertions(+), 17 deletions(-) create mode 100644 scripts/cache_cli.py diff --git a/Makefile b/Makefile index 6b2ad6d..b3733e8 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: install dev lint test build deploy +.PHONY: install dev lint test build deploy cache-stats cache-clear cache-list install: python3 -m venv .venv @@ -19,3 +19,13 @@ build: lint deploy: ./scripts/deploy.sh + +# Cache management commands +cache-stats: + . .venv/bin/activate && python scripts/cache_cli.py stats + +cache-list: + . .venv/bin/activate && python scripts/cache_cli.py list + +cache-clear: + . .venv/bin/activate && python scripts/cache_cli.py clear --yes diff --git a/app/domain/instruments.py b/app/domain/instruments.py index f04ebf2..8f2979e 100644 --- a/app/domain/instruments.py +++ b/app/domain/instruments.py @@ -36,6 +36,7 @@ class Underlying(str, Enum): GLD_INITIAL_OUNCES_PER_SHARE = Decimal("0.10") GLD_EXPENSE_DECAY_RATE = Decimal("0.004") # 0.4% annual decay GLD_LAUNCH_YEAR = 2004 +GLD_LAUNCH_DATE = date(2004, 11, 18) # GLD IPO date on NYSE # GC=F contract specifications GC_F_OUNCES_PER_CONTRACT = Decimal("100") # 100 troy oz per contract @@ -44,29 +45,41 @@ GC_F_QUOTE_CURRENCY = BaseCurrency.USD def gld_ounces_per_share(reference_date: date | None = None) -> Decimal: """ - Calculate GLD's current gold backing per share based on expense ratio decay. + Calculate GLD's gold backing per share for a specific date. GLD's expense ratio (0.40% annually) causes the gold backing per share to - decay exponentially from the initial 0.10 oz/share at launch (2004). + decay exponentially from the initial 0.10 oz/share at launch (November 18, 2004). Formula: ounces_per_share = 0.10 * e^(-0.004 * years_since_2004) Args: - reference_date: Date to calculate backing for. Defaults to today. + reference_date: Date to calculate backing for. Must be on or after + GLD launch date (2004-11-18). Defaults to today. Returns: Decimal representing troy ounces of gold backing per GLD share. + Raises: + ValueError: If reference_date is before GLD launch (2004-11-18). + Examples: - >>> # 2026 backing should be ~0.0919 oz/share (8.1% decay) >>> from datetime import date + >>> # Launch date returns initial 0.10 oz/share + >>> gld_ounces_per_share(date(2004, 11, 18)) + Decimal('0.10') + >>> # 2026 backing should be ~0.0916 oz/share (8.4% decay) >>> result = gld_ounces_per_share(date(2026, 1, 1)) >>> float(result) # doctest: +SKIP - 0.0919... + 0.0916... """ if reference_date is None: reference_date = date.today() + if reference_date < GLD_LAUNCH_DATE: + raise ValueError( + f"GLD backing data unavailable before {GLD_LAUNCH_DATE}. " f"GLD launched on November 18, 2004." + ) + years_since_launch = Decimal(reference_date.year - GLD_LAUNCH_YEAR) decay_factor = Decimal(str(math.exp(-float(GLD_EXPENSE_DECAY_RATE * years_since_launch)))) return GLD_INITIAL_OUNCES_PER_SHARE * decay_factor diff --git a/app/services/backtesting/ui_service.py b/app/services/backtesting/ui_service.py index 415a468..5561f30 100644 --- a/app/services/backtesting/ui_service.py +++ b/app/services/backtesting/ui_service.py @@ -315,11 +315,13 @@ class BacktestPageService: source_info = self.DATA_SOURCE_INFO.get(data_source, self.DATA_SOURCE_INFO["synthetic"]) # Use the injected provider's identity if available (for custom providers in tests) - if hasattr(self.backtest_service, 'provider'): - injected_provider_id = getattr(self.backtest_service.provider, 'provider_id', None) - injected_pricing_mode = getattr(self.backtest_service.provider, 'pricing_mode', None) + if hasattr(self.backtest_service, "provider"): + injected_provider_id = getattr(self.backtest_service.provider, "provider_id", None) + injected_pricing_mode = getattr(self.backtest_service.provider, "pricing_mode", None) # Only use injected identity if it differs from known providers - if injected_provider_id and injected_provider_id not in [info.provider_id for info in self.DATA_SOURCE_INFO.values()]: + if injected_provider_id and injected_provider_id not in [ + info.provider_id for info in self.DATA_SOURCE_INFO.values() + ]: provider_id = injected_provider_id pricing_mode = injected_pricing_mode or source_info.pricing_mode else: diff --git a/scripts/cache_cli.py b/scripts/cache_cli.py new file mode 100644 index 0000000..f9f84b7 --- /dev/null +++ b/scripts/cache_cli.py @@ -0,0 +1,210 @@ +#!/usr/bin/env python3 +"""CLI commands for managing Databento cache. + +Commands: + vault-dash cache stats Show cache statistics + vault-dash cache list Show all cached entries + vault-dash cache clear Clear all cache files + +Usage: + python scripts/cache_cli.py stats + python scripts/cache_cli.py list + python scripts/cache_cli.py clear [--yes] +""" + +from __future__ import annotations + +import argparse +import sys +from pathlib import Path + +# Add project root to path for imports +project_root = Path(__file__).parent.parent +sys.path.insert(0, str(project_root)) + +from app.services.backtesting.databento_source import ( # noqa: E402 + DatabentoHistoricalPriceSource, + DatabentoSourceConfig, +) + + +def format_size(size_bytes: int) -> str: + """Format bytes as human-readable size.""" + if size_bytes < 1024: + return f"{size_bytes}B" + elif size_bytes < 1024 * 1024: + return f"{size_bytes / 1024:.1f}KB" + elif size_bytes < 1024 * 1024 * 1024: + return f"{size_bytes / (1024 * 1024):.1f}MB" + else: + return f"{size_bytes / (1024 * 1024 * 1024):.2f}GB" + + +def cmd_stats(args: argparse.Namespace) -> int: + """Show cache statistics.""" + config = DatabentoSourceConfig(cache_dir=Path(args.cache_dir)) + source = DatabentoHistoricalPriceSource(config) + + try: + stats = source.get_cache_stats() + except Exception as e: + print(f"Error getting cache stats: {e}", file=sys.stderr) + return 1 + + total_size = stats["total_size_bytes"] + file_count = stats["file_count"] + oldest = stats["oldest_download"] + entries = stats["entries"] + + print(f"Cache Directory: {config.cache_dir}") + print(f"Total Size: {format_size(total_size)}") + print(f"File Count: {file_count} files") + print(f"Oldest Download: {oldest or 'N/A'}") + + if entries: + print() + print("Entries:") + for entry in entries: + cost_usd = entry.get("cost_usd", 0.0) or 0.0 + print( + f" - {entry['symbol']}/{entry['dataset']}/{entry['schema']}/" + f"{entry['start_date']}_{entry['end_date']}: " + f"{entry['rows']} rows, ${cost_usd:.2f}" + ) + else: + print() + print("No cached entries found.") + + return 0 + + +def cmd_list(args: argparse.Namespace) -> int: + """List all cached entries in tabular format.""" + config = DatabentoSourceConfig(cache_dir=Path(args.cache_dir)) + source = DatabentoHistoricalPriceSource(config) + + try: + stats = source.get_cache_stats() + except Exception as e: + print(f"Error getting cache stats: {e}", file=sys.stderr) + return 1 + + entries = stats["entries"] + + if not entries: + print("No cached entries found.") + return 0 + + # Print header + header = ( + f"{'Dataset':<12} {'Symbol':<8} {'Schema':<10} " + f"{'Start':<12} {'End':<12} {'Rows':>6} " + f"{'Downloaded':<12} {'Size':>8}" + ) + print(header) + print("-" * len(header)) + + # Sort entries by dataset, symbol, start_date + sorted_entries = sorted( + entries, + key=lambda e: (e.get("dataset", ""), e.get("symbol", ""), e.get("start_date", "")), + ) + + for entry in sorted_entries: + print( + f"{entry['dataset']:<12} " + f"{entry['symbol']:<8} " + f"{entry['schema']:<10} " + f"{entry['start_date']:<12} " + f"{entry['end_date']:<12} " + f"{entry['rows']:>6} " + f"{entry['download_date']:<12} " + f"{format_size(entry['size_bytes']):>8}" + ) + + return 0 + + +def cmd_clear(args: argparse.Namespace) -> int: + """Clear all cache files.""" + config = DatabentoSourceConfig(cache_dir=Path(args.cache_dir)) + source = DatabentoHistoricalPriceSource(config) + + # Get stats before clearing for confirmation + try: + stats = source.get_cache_stats() + except Exception as e: + print(f"Error getting cache stats: {e}", file=sys.stderr) + return 1 + + file_count = stats["file_count"] + total_size = stats["total_size_bytes"] + + if file_count == 0: + print("No cache files to clear.") + return 0 + + # Confirm unless --yes flag + if not args.yes: + print(f"This will delete {file_count} files ({format_size(total_size)}) from:") + print(f" {config.cache_dir}") + response = input("Proceed? [y/N]: ").strip().lower() + if response != "y": + print("Aborted.") + return 1 + + try: + deleted = source.clear_cache() + print(f"Cleared {deleted} files from cache.") + return 0 + except Exception as e: + print(f"Error clearing cache: {e}", file=sys.stderr) + return 1 + + +def main() -> int: + parser = argparse.ArgumentParser( + prog="vault-dash cache", + description="Manage Databento cache files.", + ) + parser.add_argument( + "--cache-dir", + default=".cache/databento", + help="Cache directory path (default: .cache/databento)", + ) + + subparsers = parser.add_subparsers(dest="command", required=True) + + # stats command + stats_parser = subparsers.add_parser( + "stats", + help="Show cache statistics", + ) + stats_parser.set_defaults(func=cmd_stats) + + # list command + list_parser = subparsers.add_parser( + "list", + help="List all cached entries", + ) + list_parser.set_defaults(func=cmd_list) + + # clear command + clear_parser = subparsers.add_parser( + "clear", + help="Clear all cache files", + ) + clear_parser.add_argument( + "-y", + "--yes", + action="store_true", + help="Skip confirmation prompt", + ) + clear_parser.set_defaults(func=cmd_clear) + + args = parser.parse_args() + return args.func(args) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/test_databento_source.py b/tests/test_databento_source.py index f3e9bb7..e831cbd 100644 --- a/tests/test_databento_source.py +++ b/tests/test_databento_source.py @@ -211,9 +211,7 @@ class TestDatabentoHistoricalPriceSource: assert len(result) == 4 assert result[0].close == 185.0 - def test_load_from_cache_returns_none_if_stale( - self, temp_cache_dir: Path, sample_ohlcv_df - ) -> None: + def test_load_from_cache_returns_none_if_stale(self, temp_cache_dir: Path, sample_ohlcv_df) -> None: """Returns None if cache exceeds age threshold.""" source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource) diff --git a/tests/test_instruments.py b/tests/test_instruments.py index ac89e10..a40c1ab 100644 --- a/tests/test_instruments.py +++ b/tests/test_instruments.py @@ -10,6 +10,7 @@ from app.domain.instruments import ( GC_F_OUNCES_PER_CONTRACT, GLD_EXPENSE_DECAY_RATE, GLD_INITIAL_OUNCES_PER_SHARE, + GLD_LAUNCH_DATE, GLD_LAUNCH_YEAR, Underlying, asset_quantity_from_weight, @@ -22,13 +23,38 @@ from app.domain.instruments import ( from app.domain.units import BaseCurrency, Weight, WeightUnit -def test_gld_ounces_per_share_decay_formula_matches_research() -> None: - """Verify decay formula matches research examples from docs/GLD_BASIS_RESEARCH.md.""" - # Launch (2004): should be exactly 0.10 oz/share - launch_backing = gld_ounces_per_share(date(2004, 1, 1)) +def test_gld_launch_date_constant() -> None: + """Verify GLD launch date constant is November 18, 2004.""" + assert GLD_LAUNCH_DATE == date(2004, 11, 18) + + +def test_gld_ounces_per_share_launch_date_returns_initial_backing() -> None: + """Verify launch date (2004-11-18) returns exactly 0.10 oz/share.""" + launch_backing = gld_ounces_per_share(GLD_LAUNCH_DATE) assert launch_backing == GLD_INITIAL_OUNCES_PER_SHARE assert launch_backing == Decimal("0.10") + +def test_gld_ounces_per_share_rejects_pre_launch_date() -> None: + """Verify dates before GLD launch raise ValueError.""" + with pytest.raises(ValueError, match="GLD backing data unavailable before"): + gld_ounces_per_share(date(2004, 11, 17)) # Day before launch + + with pytest.raises(ValueError, match="GLD backing data unavailable before"): + gld_ounces_per_share(date(2004, 1, 1)) # Early 2004 + + with pytest.raises(ValueError, match="GLD backing data unavailable before"): + gld_ounces_per_share(date(2003, 12, 31)) # Prior year + + +def test_gld_ounces_per_share_early_2004_within_year_raises() -> None: + """Verify dates in 2004 but before November 18 also raise ValueError.""" + with pytest.raises(ValueError, match="GLD backing data unavailable"): + gld_ounces_per_share(date(2004, 6, 1)) # June 2004, before launch + + +def test_gld_ounces_per_share_decay_formula_matches_research() -> None: + """Verify decay formula matches research examples from docs/GLD_BASIS_RESEARCH.md.""" # 2026: should be ~0.0916 oz/share (8.4% decay from 22 years) # Formula: 0.10 * e^(-0.004 * 22) = 0.10 * e^(-0.088) ≈ 0.091576 years_2026 = 2026 - GLD_LAUNCH_YEAR # 22 years