feat(CONV-001): add GLD launch date validation, feat(DATA-DB-003): add cache CLI

CONV-001:
- Add GLD_LAUNCH_DATE constant (November 18, 2004)
- Validate reference_date in gld_ounces_per_share()
- Raise ValueError for dates before GLD launch
- Update docstring with valid date range
- Add comprehensive test coverage for edge cases

DATA-DB-003:
- Create scripts/cache_cli.py with three commands:
  - vault-dash cache stats: Show cache statistics
  - vault-dash cache list: List cached entries
  - vault-dash cache clear: Clear all cache files
- Add Makefile targets: cache-stats, cache-list, cache-clear
- Integrate with DatabentoHistoricalPriceSource methods
This commit is contained in:
Bu5hm4nn
2026-03-29 12:00:30 +02:00
parent ace6d67482
commit dc4ee1f261
6 changed files with 276 additions and 17 deletions

210
scripts/cache_cli.py Normal file
View File

@@ -0,0 +1,210 @@
#!/usr/bin/env python3
"""CLI commands for managing Databento cache.
Commands:
vault-dash cache stats Show cache statistics
vault-dash cache list Show all cached entries
vault-dash cache clear Clear all cache files
Usage:
python scripts/cache_cli.py stats
python scripts/cache_cli.py list
python scripts/cache_cli.py clear [--yes]
"""
from __future__ import annotations
import argparse
import sys
from pathlib import Path
# Add project root to path for imports
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))
from app.services.backtesting.databento_source import ( # noqa: E402
DatabentoHistoricalPriceSource,
DatabentoSourceConfig,
)
def format_size(size_bytes: int) -> str:
"""Format bytes as human-readable size."""
if size_bytes < 1024:
return f"{size_bytes}B"
elif size_bytes < 1024 * 1024:
return f"{size_bytes / 1024:.1f}KB"
elif size_bytes < 1024 * 1024 * 1024:
return f"{size_bytes / (1024 * 1024):.1f}MB"
else:
return f"{size_bytes / (1024 * 1024 * 1024):.2f}GB"
def cmd_stats(args: argparse.Namespace) -> int:
"""Show cache statistics."""
config = DatabentoSourceConfig(cache_dir=Path(args.cache_dir))
source = DatabentoHistoricalPriceSource(config)
try:
stats = source.get_cache_stats()
except Exception as e:
print(f"Error getting cache stats: {e}", file=sys.stderr)
return 1
total_size = stats["total_size_bytes"]
file_count = stats["file_count"]
oldest = stats["oldest_download"]
entries = stats["entries"]
print(f"Cache Directory: {config.cache_dir}")
print(f"Total Size: {format_size(total_size)}")
print(f"File Count: {file_count} files")
print(f"Oldest Download: {oldest or 'N/A'}")
if entries:
print()
print("Entries:")
for entry in entries:
cost_usd = entry.get("cost_usd", 0.0) or 0.0
print(
f" - {entry['symbol']}/{entry['dataset']}/{entry['schema']}/"
f"{entry['start_date']}_{entry['end_date']}: "
f"{entry['rows']} rows, ${cost_usd:.2f}"
)
else:
print()
print("No cached entries found.")
return 0
def cmd_list(args: argparse.Namespace) -> int:
"""List all cached entries in tabular format."""
config = DatabentoSourceConfig(cache_dir=Path(args.cache_dir))
source = DatabentoHistoricalPriceSource(config)
try:
stats = source.get_cache_stats()
except Exception as e:
print(f"Error getting cache stats: {e}", file=sys.stderr)
return 1
entries = stats["entries"]
if not entries:
print("No cached entries found.")
return 0
# Print header
header = (
f"{'Dataset':<12} {'Symbol':<8} {'Schema':<10} "
f"{'Start':<12} {'End':<12} {'Rows':>6} "
f"{'Downloaded':<12} {'Size':>8}"
)
print(header)
print("-" * len(header))
# Sort entries by dataset, symbol, start_date
sorted_entries = sorted(
entries,
key=lambda e: (e.get("dataset", ""), e.get("symbol", ""), e.get("start_date", "")),
)
for entry in sorted_entries:
print(
f"{entry['dataset']:<12} "
f"{entry['symbol']:<8} "
f"{entry['schema']:<10} "
f"{entry['start_date']:<12} "
f"{entry['end_date']:<12} "
f"{entry['rows']:>6} "
f"{entry['download_date']:<12} "
f"{format_size(entry['size_bytes']):>8}"
)
return 0
def cmd_clear(args: argparse.Namespace) -> int:
"""Clear all cache files."""
config = DatabentoSourceConfig(cache_dir=Path(args.cache_dir))
source = DatabentoHistoricalPriceSource(config)
# Get stats before clearing for confirmation
try:
stats = source.get_cache_stats()
except Exception as e:
print(f"Error getting cache stats: {e}", file=sys.stderr)
return 1
file_count = stats["file_count"]
total_size = stats["total_size_bytes"]
if file_count == 0:
print("No cache files to clear.")
return 0
# Confirm unless --yes flag
if not args.yes:
print(f"This will delete {file_count} files ({format_size(total_size)}) from:")
print(f" {config.cache_dir}")
response = input("Proceed? [y/N]: ").strip().lower()
if response != "y":
print("Aborted.")
return 1
try:
deleted = source.clear_cache()
print(f"Cleared {deleted} files from cache.")
return 0
except Exception as e:
print(f"Error clearing cache: {e}", file=sys.stderr)
return 1
def main() -> int:
parser = argparse.ArgumentParser(
prog="vault-dash cache",
description="Manage Databento cache files.",
)
parser.add_argument(
"--cache-dir",
default=".cache/databento",
help="Cache directory path (default: .cache/databento)",
)
subparsers = parser.add_subparsers(dest="command", required=True)
# stats command
stats_parser = subparsers.add_parser(
"stats",
help="Show cache statistics",
)
stats_parser.set_defaults(func=cmd_stats)
# list command
list_parser = subparsers.add_parser(
"list",
help="List all cached entries",
)
list_parser.set_defaults(func=cmd_list)
# clear command
clear_parser = subparsers.add_parser(
"clear",
help="Clear all cache files",
)
clear_parser.add_argument(
"-y",
"--yes",
action="store_true",
help="Skip confirmation prompt",
)
clear_parser.set_defaults(func=cmd_clear)
args = parser.parse_args()
return args.func(args)
if __name__ == "__main__":
raise SystemExit(main())