feat(CORE-001D2A): tighten quote provider cache normalization

This commit is contained in:
Bu5hm4nn
2026-03-25 17:10:11 +01:00
parent dbcc6a1ea0
commit 442a0cd702
5 changed files with 403 additions and 8 deletions

View File

@@ -53,7 +53,11 @@ class DataService:
cache_key = f"quote:{normalized_symbol}"
cached = await self.cache.get_json(cache_key)
if cached and isinstance(cached, dict):
try:
normalized_cached = self._normalize_quote_payload(cached, normalized_symbol)
except ValueError:
normalized_cached = None
if normalized_cached is not None:
if normalized_cached != cached:
await self.cache.set_json(cache_key, normalized_cached)
return normalized_cached
@@ -365,11 +369,43 @@ class DataService:
@staticmethod
def _normalize_quote_payload(payload: dict[str, Any], symbol: str) -> dict[str, Any]:
normalized = dict(payload)
"""Normalize provider/cache quote payload to explicit contract.
This is the named boundary adapter between external float-heavy provider
payloads and internal quote handling. It ensures:
- symbol is always present and uppercased
- GLD quotes have explicit quote_unit='share' metadata
- Non-GLD symbols pass through without auto-assigned units
Fail-closed: missing/invalid fields are preserved for upstream handling
rather than silently defaulted. Type conversion is not performed here.
Args:
payload: Raw quote dict from cache or provider (float-heavy)
symbol: Expected symbol (used as fallback if missing from payload)
Returns:
Normalized quote dict with explicit symbol and GLD quote_unit
"""
normalized: dict[str, Any] = dict(payload)
normalized_symbol = symbol.upper()
normalized["symbol"] = str(normalized.get("symbol", normalized_symbol)).upper()
# Ensure symbol is always present and normalized.
# Missing symbol is repaired from the requested key; explicit mismatches are rejected.
raw_symbol = normalized.get("symbol", normalized_symbol)
normalized_payload_symbol = str(raw_symbol).upper() if raw_symbol is not None else normalized_symbol
if raw_symbol is not None and normalized_payload_symbol != normalized_symbol:
raise ValueError(
f"Quote payload symbol mismatch: expected {normalized_symbol}, got {normalized_payload_symbol}"
)
normalized["symbol"] = normalized_payload_symbol
# Add explicit quote_unit for GLD (CORE-002A/B compatibility)
# Repair missing or empty unit metadata, but preserve explicit non-empty values
if normalized["symbol"] == "GLD" and not normalized.get("quote_unit"):
normalized["quote_unit"] = "share"
return normalized
@staticmethod

View File

@@ -14,6 +14,7 @@ priority_queue:
- CORE-001D
- CORE-002C
- BT-003B
- BT-003B
- CORE-002
- PORT-003
- BT-002
@@ -25,12 +26,12 @@ priority_queue:
- OPS-001
- BT-003
recently_completed:
- CORE-001D2A
- CORE-002B
- CORE-002A
- CORE-001D1
- SEC-001
- SEC-001A
- CORE-001A
states:
backlog:
- DATA-002A
@@ -64,6 +65,7 @@ states:
- CORE-001A
- CORE-001B
- CORE-001C
- CORE-001D2A
- CORE-002A
- CORE-002B
blocked: []

View File

@@ -15,6 +15,7 @@ acceptance_criteria:
- Remaining raw-float domain hotspots are identified or removed.
technical_notes:
- `CORE-001D1` is complete: portfolio/workspace persistence now uses an explicit unit-aware schema with strict validation and atomic saves.
- Remaining focus is `CORE-001D2` provider/cache normalization and `CORE-001D3` service entrypoint tightening.
- `CORE-001D2A` is complete: DataService quote/provider cache normalization is now a named boundary adapter with explicit symbol mismatch rejection and GLD quote-unit repair.
- Remaining focus is the rest of `CORE-001D2` provider/cache normalization plus `CORE-001D3` service entrypoint tightening.
- Pre-launch policy: unit-aware schema changes may be breaking until persistence is considered live; old flat payloads may fail loudly instead of being migrated.
- See `docs/CORE-001D_BOUNDARY_CLEANUP_PLAN.md` for the current hotspot inventory and proposed sub-slices.

View File

@@ -0,0 +1,21 @@
id: CORE-001D2A
title: Provider and Cache Quote Normalization Boundary
status: done
priority: P1
effort: S
depends_on:
- CORE-001D1
- CORE-002A
tags:
- core
- decimal
- cache
- provider
summary: DataService quote payload normalization is now an explicit provider/cache boundary with tested repair and rejection rules.
completed_notes:
- Added a named quote normalization adapter in `app/services/data_service.py` for cache/provider payloads.
- GLD quotes repair missing or empty `quote_unit` metadata to preserve CORE-002 behavior.
- Explicit symbol mismatches are now rejected instead of being silently rewritten.
- Mismatched cached quotes are discarded and refreshed from the provider path.
- Added focused boundary tests in `tests/test_data_service_normalization.py`.
- Validated with focused pytest coverage and `make build` on `main`.

View File

@@ -0,0 +1,335 @@
"""Tests for DataService quote payload normalization and cache serialization boundaries."""
from __future__ import annotations
from datetime import datetime, timezone
import pytest
from app.services.cache import CacheService
from app.services.data_service import DataService
class _CacheStub(CacheService):
"""In-memory cache stub for unit testing."""
def __init__(self, initial: dict[str, object] | None = None) -> None:
self._store = dict(initial or {})
self.write_count = 0
async def get_json(self, key: str): # type: ignore[override]
return self._store.get(key)
async def set_json(self, key: str, value): # type: ignore[override]
self._store[key] = value
self.write_count += 1
return True
class TestQuotePayloadNormalization:
"""Test quote payload normalization edge cases and fail-closed behavior."""
@pytest.mark.asyncio
async def test_normalize_quote_adds_missing_symbol(self) -> None:
"""Normalization should add symbol when missing from payload."""
payload = {"price": 100.0, "change": 1.0}
result = DataService._normalize_quote_payload(payload, "AAPL")
assert result["symbol"] == "AAPL"
assert result["price"] == 100.0
@pytest.mark.asyncio
async def test_normalize_quote_uppercases_symbol(self) -> None:
"""Normalization should uppercase symbol for consistency."""
payload = {"symbol": "gld", "price": 100.0}
result = DataService._normalize_quote_payload(payload, "gld")
assert result["symbol"] == "GLD"
@pytest.mark.asyncio
async def test_normalize_quote_rejects_explicit_symbol_mismatch(self) -> None:
"""Explicit payload/key mismatches should be rejected, not silently rewritten."""
payload = {"symbol": "SLV", "price": 100.0}
with pytest.raises(ValueError, match="Quote payload symbol mismatch"):
DataService._normalize_quote_payload(payload, "GLD")
@pytest.mark.asyncio
async def test_normalize_quote_preserves_existing_quote_unit(self) -> None:
"""Normalization should not overwrite existing quote_unit."""
payload = {"symbol": "AAPL", "price": 100.0, "quote_unit": "share"}
result = DataService._normalize_quote_payload(payload, "AAPL")
assert result["quote_unit"] == "share"
@pytest.mark.asyncio
async def test_normalize_quote_repairs_empty_gld_quote_unit(self) -> None:
"""Legacy or malformed empty GLD quote units should be repaired."""
payload = {"symbol": "GLD", "price": 200.0, "quote_unit": ""}
result = DataService._normalize_quote_payload(payload, "GLD")
assert result["quote_unit"] == "share"
@pytest.mark.asyncio
async def test_normalize_quote_adds_gld_share_unit(self) -> None:
"""GLD quotes should get quote_unit=share when missing."""
payload = {"symbol": "GLD", "price": 200.0}
result = DataService._normalize_quote_payload(payload, "GLD")
assert result["quote_unit"] == "share"
@pytest.mark.asyncio
async def test_normalize_quote_does_not_add_unit_for_unknown_symbols(self) -> None:
"""Non-GLD symbols should not get auto-assigned quote_unit."""
payload = {"symbol": "BTC-USD", "price": 50000.0}
result = DataService._normalize_quote_payload(payload, "BTC-USD")
assert "quote_unit" not in result
@pytest.mark.asyncio
async def test_normalize_quote_handles_missing_price_gracefully(self) -> None:
"""Missing price should be preserved as-is for fail-closed handling."""
payload = {"symbol": "GLD"} # type: ignore[dict-item]
result = DataService._normalize_quote_payload(payload, "GLD")
assert "price" not in result
assert result["symbol"] == "GLD"
assert result["quote_unit"] == "share"
@pytest.mark.asyncio
async def test_normalize_quote_handles_none_values(self) -> None:
"""None values in payload should be preserved for upstream handling."""
payload = {"symbol": "GLD", "price": None, "change": None}
result = DataService._normalize_quote_payload(payload, "GLD")
assert result["symbol"] == "GLD"
assert result["price"] is None
assert result["change"] is None
@pytest.mark.asyncio
async def test_normalize_quote_preserves_extra_fields(self) -> None:
"""Extra fields should pass through unchanged."""
payload = {
"symbol": "GLD",
"price": 200.0,
"custom_field": "custom_value",
"nested": {"key": "value"},
}
result = DataService._normalize_quote_payload(payload, "GLD")
assert result["custom_field"] == "custom_value"
assert result["nested"] == {"key": "value"}
@pytest.mark.asyncio
async def test_normalize_quote_handles_string_price(self) -> None:
"""String prices should pass through (normalization is not type conversion)."""
payload = {"symbol": "GLD", "price": "200.0"}
result = DataService._normalize_quote_payload(payload, "GLD")
assert result["price"] == "200.0"
assert result["quote_unit"] == "share"
class TestLegacyCachedQuoteHandling:
"""Test handling of legacy cached quotes without quote_unit metadata."""
@pytest.mark.asyncio
async def test_get_quote_upgrades_legacy_gld_quote(self) -> None:
"""Legacy GLD quotes without quote_unit should be upgraded in cache."""
cache = _CacheStub(
{
"quote:GLD": {
"symbol": "GLD",
"price": 404.19,
"change": 0.0,
"change_percent": 0.0,
"updated_at": "2026-03-25T00:00:00+00:00",
"source": "cache",
}
}
)
service = DataService(cache=cache)
quote = await service.get_quote("GLD")
assert quote["quote_unit"] == "share"
assert cache._store["quote:GLD"]["quote_unit"] == "share"
# Should have written back the normalized version
assert cache.write_count >= 1
@pytest.mark.asyncio
async def test_get_quote_preserves_already_normalized_gld_quote(self) -> None:
"""Already-normalized GLD quotes should not trigger cache rewrite."""
cache = _CacheStub(
{
"quote:GLD": {
"symbol": "GLD",
"price": 404.19,
"quote_unit": "share",
"change": 0.0,
"change_percent": 0.0,
"updated_at": "2026-03-25T00:00:00+00:00",
"source": "cache",
}
}
)
service = DataService(cache=cache)
quote = await service.get_quote("GLD")
assert quote["quote_unit"] == "share"
# Should not have rewritten the cache
assert cache.write_count == 0
@pytest.mark.asyncio
async def test_get_quote_preserves_missing_unit_for_non_gld(self) -> None:
"""Non-GLD symbols should not get auto-assigned quote_unit."""
cache = _CacheStub(
{
"quote:BTC-USD": {
"symbol": "BTC-USD",
"price": 70000.0,
"updated_at": "2026-03-25T00:00:00+00:00",
"source": "cache",
}
}
)
service = DataService(cache=cache, default_symbol="BTC-USD")
quote = await service.get_quote("BTC-USD")
assert "quote_unit" not in quote
assert cache.write_count == 0
@pytest.mark.asyncio
async def test_get_quote_handles_malformed_cached_quote(self) -> None:
"""Malformed cached quotes (missing critical fields) should pass through."""
cache = _CacheStub(
{
"quote:GLD": {
"symbol": "GLD",
# missing price, change, etc.
}
}
)
service = DataService(cache=cache)
quote = await service.get_quote("GLD")
# Should still normalize what's present
assert quote["symbol"] == "GLD"
assert quote["quote_unit"] == "share"
# Missing fields remain missing for fail-closed handling upstream
@pytest.mark.asyncio
async def test_get_quote_ignores_mismatched_cached_symbol_and_fetches_fresh(self) -> None:
"""Symbol-mismatched cached quotes should be discarded, not rewritten."""
cache = _CacheStub({"quote:GLD": {"symbol": "SLV", "price": 28.5, "source": "cache"}})
service = DataService(cache=cache)
async def _fake_fetch(symbol: str) -> dict[str, object]:
return {"symbol": symbol, "price": 404.19, "source": "provider"}
service._fetch_quote = _fake_fetch # type: ignore[method-assign]
quote = await service.get_quote("GLD")
assert quote["symbol"] == "GLD"
assert quote["source"] == "provider"
assert quote["quote_unit"] == "share"
class TestCacheSerializationBoundaries:
"""Test cache serialization behavior and JSON expectations."""
@pytest.mark.asyncio
async def test_cache_roundtrip_preserves_quote_structure(self) -> None:
"""Cache should preserve quote structure through JSON roundtrip."""
cache = _CacheStub()
# Simulate a fresh quote fetch
quote = {
"symbol": "GLD",
"price": 200.0,
"quote_unit": "share",
"change": 1.5,
"change_percent": 0.75,
"updated_at": datetime.now(timezone.utc).isoformat(),
"source": "yfinance",
}
# Store and retrieve
await cache.set_json("quote:GLD", quote)
retrieved = await cache.get_json("quote:GLD")
assert retrieved is not None
assert retrieved["symbol"] == "GLD"
assert retrieved["price"] == 200.0
assert retrieved["quote_unit"] == "share"
@pytest.mark.asyncio
async def test_cache_handles_datetime_in_payload(self) -> None:
"""Cache serialization should handle datetime objects in payloads.
Note: _CacheStub stores raw Python objects. The real Redis cache
serializes via json.dumps with a datetime default handler.
This test documents the expected behavior for the real cache.
"""
cache = _CacheStub()
quote_with_dt = {
"symbol": "GLD",
"price": 200.0,
"quote_unit": "share",
"updated_at": datetime.now(timezone.utc),
}
# In real usage, DataService always passes ISO strings to cache
# (see _fetch_quote and other quote producers)
# The stub just passes through the object
await cache.set_json("quote:GLD", quote_with_dt)
retrieved = await cache.get_json("quote:GLD")
assert retrieved is not None
assert retrieved["symbol"] == "GLD"
# Stub preserves the datetime object; real cache would serialize to ISO string
# What matters is that retrieval works without error
assert "updated_at" in retrieved
@pytest.mark.asyncio
async def test_cache_returns_none_for_missing_key(self) -> None:
"""Cache should return None for missing keys."""
cache = _CacheStub()
result = await cache.get_json("quote:NONEXISTENT")
assert result is None
@pytest.mark.asyncio
async def test_cache_handles_empty_dict(self) -> None:
"""Cache should handle empty dict payloads."""
cache = _CacheStub()
await cache.set_json("quote:EMPTY", {})
result = await cache.get_json("quote:EMPTY")
assert result == {}
class TestQuotePayloadValidation:
"""Test validation behavior for quote payloads at cache boundary."""
@pytest.mark.asyncio
async def test_get_quote_validates_cached_dict_type(self) -> None:
"""Non-dict cached values should be ignored."""
cache = _CacheStub({"quote:GLD": "not-a-dict"}) # type: ignore[dict-item]
service = DataService(cache=cache)
# Should not use the invalid cached value
# (will fall through to fetch, which will use fallback)
quote = await service.get_quote("GLD")
assert quote["symbol"] == "GLD"
assert quote["quote_unit"] == "share"
@pytest.mark.asyncio
async def test_get_quote_handles_list_cached_value(self) -> None:
"""List cached values should be ignored for quote lookups."""
cache = _CacheStub({"quote:GLD": [1, 2, 3]}) # type: ignore[dict-item]
service = DataService(cache=cache)
quote = await service.get_quote("GLD")
assert quote["symbol"] == "GLD"
@pytest.mark.asyncio
async def test_normalize_handles_empty_payload(self) -> None:
"""Empty payload should be handled gracefully."""
payload = {}
result = DataService._normalize_quote_payload(payload, "GLD")
assert result["symbol"] == "GLD"
assert result["quote_unit"] == "share"