feat(CONV-001): add GLD launch date validation, feat(DATA-DB-003): add cache CLI

CONV-001:
- Add GLD_LAUNCH_DATE constant (November 18, 2004)
- Validate reference_date in gld_ounces_per_share()
- Raise ValueError for dates before GLD launch
- Update docstring with valid date range
- Add comprehensive test coverage for edge cases

DATA-DB-003:
- Create scripts/cache_cli.py with three commands:
  - vault-dash cache stats: Show cache statistics
  - vault-dash cache list: List cached entries
  - vault-dash cache clear: Clear all cache files
- Add Makefile targets: cache-stats, cache-list, cache-clear
- Integrate with DatabentoHistoricalPriceSource methods
This commit is contained in:
Bu5hm4nn
2026-03-29 12:00:30 +02:00
parent ace6d67482
commit dc4ee1f261
6 changed files with 276 additions and 17 deletions

View File

@@ -1,4 +1,4 @@
.PHONY: install dev lint test build deploy .PHONY: install dev lint test build deploy cache-stats cache-clear cache-list
install: install:
python3 -m venv .venv python3 -m venv .venv
@@ -19,3 +19,13 @@ build: lint
deploy: deploy:
./scripts/deploy.sh ./scripts/deploy.sh
# Cache management commands
cache-stats:
. .venv/bin/activate && python scripts/cache_cli.py stats
cache-list:
. .venv/bin/activate && python scripts/cache_cli.py list
cache-clear:
. .venv/bin/activate && python scripts/cache_cli.py clear --yes

View File

@@ -36,6 +36,7 @@ class Underlying(str, Enum):
GLD_INITIAL_OUNCES_PER_SHARE = Decimal("0.10") GLD_INITIAL_OUNCES_PER_SHARE = Decimal("0.10")
GLD_EXPENSE_DECAY_RATE = Decimal("0.004") # 0.4% annual decay GLD_EXPENSE_DECAY_RATE = Decimal("0.004") # 0.4% annual decay
GLD_LAUNCH_YEAR = 2004 GLD_LAUNCH_YEAR = 2004
GLD_LAUNCH_DATE = date(2004, 11, 18) # GLD IPO date on NYSE
# GC=F contract specifications # GC=F contract specifications
GC_F_OUNCES_PER_CONTRACT = Decimal("100") # 100 troy oz per contract GC_F_OUNCES_PER_CONTRACT = Decimal("100") # 100 troy oz per contract
@@ -44,29 +45,41 @@ GC_F_QUOTE_CURRENCY = BaseCurrency.USD
def gld_ounces_per_share(reference_date: date | None = None) -> Decimal: def gld_ounces_per_share(reference_date: date | None = None) -> Decimal:
""" """
Calculate GLD's current gold backing per share based on expense ratio decay. Calculate GLD's gold backing per share for a specific date.
GLD's expense ratio (0.40% annually) causes the gold backing per share to GLD's expense ratio (0.40% annually) causes the gold backing per share to
decay exponentially from the initial 0.10 oz/share at launch (2004). decay exponentially from the initial 0.10 oz/share at launch (November 18, 2004).
Formula: ounces_per_share = 0.10 * e^(-0.004 * years_since_2004) Formula: ounces_per_share = 0.10 * e^(-0.004 * years_since_2004)
Args: Args:
reference_date: Date to calculate backing for. Defaults to today. reference_date: Date to calculate backing for. Must be on or after
GLD launch date (2004-11-18). Defaults to today.
Returns: Returns:
Decimal representing troy ounces of gold backing per GLD share. Decimal representing troy ounces of gold backing per GLD share.
Raises:
ValueError: If reference_date is before GLD launch (2004-11-18).
Examples: Examples:
>>> # 2026 backing should be ~0.0919 oz/share (8.1% decay)
>>> from datetime import date >>> from datetime import date
>>> # Launch date returns initial 0.10 oz/share
>>> gld_ounces_per_share(date(2004, 11, 18))
Decimal('0.10')
>>> # 2026 backing should be ~0.0916 oz/share (8.4% decay)
>>> result = gld_ounces_per_share(date(2026, 1, 1)) >>> result = gld_ounces_per_share(date(2026, 1, 1))
>>> float(result) # doctest: +SKIP >>> float(result) # doctest: +SKIP
0.0919... 0.0916...
""" """
if reference_date is None: if reference_date is None:
reference_date = date.today() reference_date = date.today()
if reference_date < GLD_LAUNCH_DATE:
raise ValueError(
f"GLD backing data unavailable before {GLD_LAUNCH_DATE}. " f"GLD launched on November 18, 2004."
)
years_since_launch = Decimal(reference_date.year - GLD_LAUNCH_YEAR) years_since_launch = Decimal(reference_date.year - GLD_LAUNCH_YEAR)
decay_factor = Decimal(str(math.exp(-float(GLD_EXPENSE_DECAY_RATE * years_since_launch)))) decay_factor = Decimal(str(math.exp(-float(GLD_EXPENSE_DECAY_RATE * years_since_launch))))
return GLD_INITIAL_OUNCES_PER_SHARE * decay_factor return GLD_INITIAL_OUNCES_PER_SHARE * decay_factor

View File

@@ -315,11 +315,13 @@ class BacktestPageService:
source_info = self.DATA_SOURCE_INFO.get(data_source, self.DATA_SOURCE_INFO["synthetic"]) source_info = self.DATA_SOURCE_INFO.get(data_source, self.DATA_SOURCE_INFO["synthetic"])
# Use the injected provider's identity if available (for custom providers in tests) # Use the injected provider's identity if available (for custom providers in tests)
if hasattr(self.backtest_service, 'provider'): if hasattr(self.backtest_service, "provider"):
injected_provider_id = getattr(self.backtest_service.provider, 'provider_id', None) injected_provider_id = getattr(self.backtest_service.provider, "provider_id", None)
injected_pricing_mode = getattr(self.backtest_service.provider, 'pricing_mode', None) injected_pricing_mode = getattr(self.backtest_service.provider, "pricing_mode", None)
# Only use injected identity if it differs from known providers # Only use injected identity if it differs from known providers
if injected_provider_id and injected_provider_id not in [info.provider_id for info in self.DATA_SOURCE_INFO.values()]: if injected_provider_id and injected_provider_id not in [
info.provider_id for info in self.DATA_SOURCE_INFO.values()
]:
provider_id = injected_provider_id provider_id = injected_provider_id
pricing_mode = injected_pricing_mode or source_info.pricing_mode pricing_mode = injected_pricing_mode or source_info.pricing_mode
else: else:

210
scripts/cache_cli.py Normal file
View File

@@ -0,0 +1,210 @@
#!/usr/bin/env python3
"""CLI commands for managing Databento cache.
Commands:
vault-dash cache stats Show cache statistics
vault-dash cache list Show all cached entries
vault-dash cache clear Clear all cache files
Usage:
python scripts/cache_cli.py stats
python scripts/cache_cli.py list
python scripts/cache_cli.py clear [--yes]
"""
from __future__ import annotations
import argparse
import sys
from pathlib import Path
# Add project root to path for imports
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))
from app.services.backtesting.databento_source import ( # noqa: E402
DatabentoHistoricalPriceSource,
DatabentoSourceConfig,
)
def format_size(size_bytes: int) -> str:
"""Format bytes as human-readable size."""
if size_bytes < 1024:
return f"{size_bytes}B"
elif size_bytes < 1024 * 1024:
return f"{size_bytes / 1024:.1f}KB"
elif size_bytes < 1024 * 1024 * 1024:
return f"{size_bytes / (1024 * 1024):.1f}MB"
else:
return f"{size_bytes / (1024 * 1024 * 1024):.2f}GB"
def cmd_stats(args: argparse.Namespace) -> int:
"""Show cache statistics."""
config = DatabentoSourceConfig(cache_dir=Path(args.cache_dir))
source = DatabentoHistoricalPriceSource(config)
try:
stats = source.get_cache_stats()
except Exception as e:
print(f"Error getting cache stats: {e}", file=sys.stderr)
return 1
total_size = stats["total_size_bytes"]
file_count = stats["file_count"]
oldest = stats["oldest_download"]
entries = stats["entries"]
print(f"Cache Directory: {config.cache_dir}")
print(f"Total Size: {format_size(total_size)}")
print(f"File Count: {file_count} files")
print(f"Oldest Download: {oldest or 'N/A'}")
if entries:
print()
print("Entries:")
for entry in entries:
cost_usd = entry.get("cost_usd", 0.0) or 0.0
print(
f" - {entry['symbol']}/{entry['dataset']}/{entry['schema']}/"
f"{entry['start_date']}_{entry['end_date']}: "
f"{entry['rows']} rows, ${cost_usd:.2f}"
)
else:
print()
print("No cached entries found.")
return 0
def cmd_list(args: argparse.Namespace) -> int:
"""List all cached entries in tabular format."""
config = DatabentoSourceConfig(cache_dir=Path(args.cache_dir))
source = DatabentoHistoricalPriceSource(config)
try:
stats = source.get_cache_stats()
except Exception as e:
print(f"Error getting cache stats: {e}", file=sys.stderr)
return 1
entries = stats["entries"]
if not entries:
print("No cached entries found.")
return 0
# Print header
header = (
f"{'Dataset':<12} {'Symbol':<8} {'Schema':<10} "
f"{'Start':<12} {'End':<12} {'Rows':>6} "
f"{'Downloaded':<12} {'Size':>8}"
)
print(header)
print("-" * len(header))
# Sort entries by dataset, symbol, start_date
sorted_entries = sorted(
entries,
key=lambda e: (e.get("dataset", ""), e.get("symbol", ""), e.get("start_date", "")),
)
for entry in sorted_entries:
print(
f"{entry['dataset']:<12} "
f"{entry['symbol']:<8} "
f"{entry['schema']:<10} "
f"{entry['start_date']:<12} "
f"{entry['end_date']:<12} "
f"{entry['rows']:>6} "
f"{entry['download_date']:<12} "
f"{format_size(entry['size_bytes']):>8}"
)
return 0
def cmd_clear(args: argparse.Namespace) -> int:
"""Clear all cache files."""
config = DatabentoSourceConfig(cache_dir=Path(args.cache_dir))
source = DatabentoHistoricalPriceSource(config)
# Get stats before clearing for confirmation
try:
stats = source.get_cache_stats()
except Exception as e:
print(f"Error getting cache stats: {e}", file=sys.stderr)
return 1
file_count = stats["file_count"]
total_size = stats["total_size_bytes"]
if file_count == 0:
print("No cache files to clear.")
return 0
# Confirm unless --yes flag
if not args.yes:
print(f"This will delete {file_count} files ({format_size(total_size)}) from:")
print(f" {config.cache_dir}")
response = input("Proceed? [y/N]: ").strip().lower()
if response != "y":
print("Aborted.")
return 1
try:
deleted = source.clear_cache()
print(f"Cleared {deleted} files from cache.")
return 0
except Exception as e:
print(f"Error clearing cache: {e}", file=sys.stderr)
return 1
def main() -> int:
parser = argparse.ArgumentParser(
prog="vault-dash cache",
description="Manage Databento cache files.",
)
parser.add_argument(
"--cache-dir",
default=".cache/databento",
help="Cache directory path (default: .cache/databento)",
)
subparsers = parser.add_subparsers(dest="command", required=True)
# stats command
stats_parser = subparsers.add_parser(
"stats",
help="Show cache statistics",
)
stats_parser.set_defaults(func=cmd_stats)
# list command
list_parser = subparsers.add_parser(
"list",
help="List all cached entries",
)
list_parser.set_defaults(func=cmd_list)
# clear command
clear_parser = subparsers.add_parser(
"clear",
help="Clear all cache files",
)
clear_parser.add_argument(
"-y",
"--yes",
action="store_true",
help="Skip confirmation prompt",
)
clear_parser.set_defaults(func=cmd_clear)
args = parser.parse_args()
return args.func(args)
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -211,9 +211,7 @@ class TestDatabentoHistoricalPriceSource:
assert len(result) == 4 assert len(result) == 4
assert result[0].close == 185.0 assert result[0].close == 185.0
def test_load_from_cache_returns_none_if_stale( def test_load_from_cache_returns_none_if_stale(self, temp_cache_dir: Path, sample_ohlcv_df) -> None:
self, temp_cache_dir: Path, sample_ohlcv_df
) -> None:
"""Returns None if cache exceeds age threshold.""" """Returns None if cache exceeds age threshold."""
source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource) source = DatabentoHistoricalPriceSource.__new__(DatabentoHistoricalPriceSource)

View File

@@ -10,6 +10,7 @@ from app.domain.instruments import (
GC_F_OUNCES_PER_CONTRACT, GC_F_OUNCES_PER_CONTRACT,
GLD_EXPENSE_DECAY_RATE, GLD_EXPENSE_DECAY_RATE,
GLD_INITIAL_OUNCES_PER_SHARE, GLD_INITIAL_OUNCES_PER_SHARE,
GLD_LAUNCH_DATE,
GLD_LAUNCH_YEAR, GLD_LAUNCH_YEAR,
Underlying, Underlying,
asset_quantity_from_weight, asset_quantity_from_weight,
@@ -22,13 +23,38 @@ from app.domain.instruments import (
from app.domain.units import BaseCurrency, Weight, WeightUnit from app.domain.units import BaseCurrency, Weight, WeightUnit
def test_gld_ounces_per_share_decay_formula_matches_research() -> None: def test_gld_launch_date_constant() -> None:
"""Verify decay formula matches research examples from docs/GLD_BASIS_RESEARCH.md.""" """Verify GLD launch date constant is November 18, 2004."""
# Launch (2004): should be exactly 0.10 oz/share assert GLD_LAUNCH_DATE == date(2004, 11, 18)
launch_backing = gld_ounces_per_share(date(2004, 1, 1))
def test_gld_ounces_per_share_launch_date_returns_initial_backing() -> None:
"""Verify launch date (2004-11-18) returns exactly 0.10 oz/share."""
launch_backing = gld_ounces_per_share(GLD_LAUNCH_DATE)
assert launch_backing == GLD_INITIAL_OUNCES_PER_SHARE assert launch_backing == GLD_INITIAL_OUNCES_PER_SHARE
assert launch_backing == Decimal("0.10") assert launch_backing == Decimal("0.10")
def test_gld_ounces_per_share_rejects_pre_launch_date() -> None:
"""Verify dates before GLD launch raise ValueError."""
with pytest.raises(ValueError, match="GLD backing data unavailable before"):
gld_ounces_per_share(date(2004, 11, 17)) # Day before launch
with pytest.raises(ValueError, match="GLD backing data unavailable before"):
gld_ounces_per_share(date(2004, 1, 1)) # Early 2004
with pytest.raises(ValueError, match="GLD backing data unavailable before"):
gld_ounces_per_share(date(2003, 12, 31)) # Prior year
def test_gld_ounces_per_share_early_2004_within_year_raises() -> None:
"""Verify dates in 2004 but before November 18 also raise ValueError."""
with pytest.raises(ValueError, match="GLD backing data unavailable"):
gld_ounces_per_share(date(2004, 6, 1)) # June 2004, before launch
def test_gld_ounces_per_share_decay_formula_matches_research() -> None:
"""Verify decay formula matches research examples from docs/GLD_BASIS_RESEARCH.md."""
# 2026: should be ~0.0916 oz/share (8.4% decay from 22 years) # 2026: should be ~0.0916 oz/share (8.4% decay from 22 years)
# Formula: 0.10 * e^(-0.004 * 22) = 0.10 * e^(-0.088) ≈ 0.091576 # Formula: 0.10 * e^(-0.004 * 22) = 0.10 * e^(-0.088) ≈ 0.091576
years_2026 = 2026 - GLD_LAUNCH_YEAR # 22 years years_2026 = 2026 - GLD_LAUNCH_YEAR # 22 years