diff --git a/app/services/data_service.py b/app/services/data_service.py index ffc5ab8..8cd964d 100644 --- a/app/services/data_service.py +++ b/app/services/data_service.py @@ -53,10 +53,14 @@ class DataService: cache_key = f"quote:{normalized_symbol}" cached = await self.cache.get_json(cache_key) if cached and isinstance(cached, dict): - normalized_cached = self._normalize_quote_payload(cached, normalized_symbol) - if normalized_cached != cached: - await self.cache.set_json(cache_key, normalized_cached) - return normalized_cached + try: + normalized_cached = self._normalize_quote_payload(cached, normalized_symbol) + except ValueError: + normalized_cached = None + if normalized_cached is not None: + if normalized_cached != cached: + await self.cache.set_json(cache_key, normalized_cached) + return normalized_cached quote = self._normalize_quote_payload(await self._fetch_quote(normalized_symbol), normalized_symbol) await self.cache.set_json(cache_key, quote) @@ -365,11 +369,43 @@ class DataService: @staticmethod def _normalize_quote_payload(payload: dict[str, Any], symbol: str) -> dict[str, Any]: - normalized = dict(payload) + """Normalize provider/cache quote payload to explicit contract. + + This is the named boundary adapter between external float-heavy provider + payloads and internal quote handling. It ensures: + + - symbol is always present and uppercased + - GLD quotes have explicit quote_unit='share' metadata + - Non-GLD symbols pass through without auto-assigned units + + Fail-closed: missing/invalid fields are preserved for upstream handling + rather than silently defaulted. Type conversion is not performed here. + + Args: + payload: Raw quote dict from cache or provider (float-heavy) + symbol: Expected symbol (used as fallback if missing from payload) + + Returns: + Normalized quote dict with explicit symbol and GLD quote_unit + """ + normalized: dict[str, Any] = dict(payload) normalized_symbol = symbol.upper() - normalized["symbol"] = str(normalized.get("symbol", normalized_symbol)).upper() + + # Ensure symbol is always present and normalized. + # Missing symbol is repaired from the requested key; explicit mismatches are rejected. + raw_symbol = normalized.get("symbol", normalized_symbol) + normalized_payload_symbol = str(raw_symbol).upper() if raw_symbol is not None else normalized_symbol + if raw_symbol is not None and normalized_payload_symbol != normalized_symbol: + raise ValueError( + f"Quote payload symbol mismatch: expected {normalized_symbol}, got {normalized_payload_symbol}" + ) + normalized["symbol"] = normalized_payload_symbol + + # Add explicit quote_unit for GLD (CORE-002A/B compatibility) + # Repair missing or empty unit metadata, but preserve explicit non-empty values if normalized["symbol"] == "GLD" and not normalized.get("quote_unit"): normalized["quote_unit"] = "share" + return normalized @staticmethod diff --git a/docs/roadmap/ROADMAP.yaml b/docs/roadmap/ROADMAP.yaml index ebb4cd0..3240125 100644 --- a/docs/roadmap/ROADMAP.yaml +++ b/docs/roadmap/ROADMAP.yaml @@ -14,6 +14,7 @@ priority_queue: - CORE-001D - CORE-002C - BT-003B + - BT-003B - CORE-002 - PORT-003 - BT-002 @@ -25,12 +26,12 @@ priority_queue: - OPS-001 - BT-003 recently_completed: + - CORE-001D2A - CORE-002B - CORE-002A - CORE-001D1 - SEC-001 - SEC-001A - - CORE-001A states: backlog: - DATA-002A @@ -64,6 +65,7 @@ states: - CORE-001A - CORE-001B - CORE-001C + - CORE-001D2A - CORE-002A - CORE-002B blocked: [] diff --git a/docs/roadmap/backlog/CORE-001D-decimal-boundary-cleanup.yaml b/docs/roadmap/backlog/CORE-001D-decimal-boundary-cleanup.yaml index 190890e..a0855e2 100644 --- a/docs/roadmap/backlog/CORE-001D-decimal-boundary-cleanup.yaml +++ b/docs/roadmap/backlog/CORE-001D-decimal-boundary-cleanup.yaml @@ -15,6 +15,7 @@ acceptance_criteria: - Remaining raw-float domain hotspots are identified or removed. technical_notes: - `CORE-001D1` is complete: portfolio/workspace persistence now uses an explicit unit-aware schema with strict validation and atomic saves. - - Remaining focus is `CORE-001D2` provider/cache normalization and `CORE-001D3` service entrypoint tightening. + - `CORE-001D2A` is complete: DataService quote/provider cache normalization is now a named boundary adapter with explicit symbol mismatch rejection and GLD quote-unit repair. + - Remaining focus is the rest of `CORE-001D2` provider/cache normalization plus `CORE-001D3` service entrypoint tightening. - Pre-launch policy: unit-aware schema changes may be breaking until persistence is considered live; old flat payloads may fail loudly instead of being migrated. - See `docs/CORE-001D_BOUNDARY_CLEANUP_PLAN.md` for the current hotspot inventory and proposed sub-slices. diff --git a/docs/roadmap/done/CORE-001D2A-provider-cache-normalization.yaml b/docs/roadmap/done/CORE-001D2A-provider-cache-normalization.yaml new file mode 100644 index 0000000..af749ff --- /dev/null +++ b/docs/roadmap/done/CORE-001D2A-provider-cache-normalization.yaml @@ -0,0 +1,21 @@ +id: CORE-001D2A +title: Provider and Cache Quote Normalization Boundary +status: done +priority: P1 +effort: S +depends_on: + - CORE-001D1 + - CORE-002A +tags: + - core + - decimal + - cache + - provider +summary: DataService quote payload normalization is now an explicit provider/cache boundary with tested repair and rejection rules. +completed_notes: + - Added a named quote normalization adapter in `app/services/data_service.py` for cache/provider payloads. + - GLD quotes repair missing or empty `quote_unit` metadata to preserve CORE-002 behavior. + - Explicit symbol mismatches are now rejected instead of being silently rewritten. + - Mismatched cached quotes are discarded and refreshed from the provider path. + - Added focused boundary tests in `tests/test_data_service_normalization.py`. + - Validated with focused pytest coverage and `make build` on `main`. diff --git a/tests/test_data_service_normalization.py b/tests/test_data_service_normalization.py new file mode 100644 index 0000000..4608d34 --- /dev/null +++ b/tests/test_data_service_normalization.py @@ -0,0 +1,335 @@ +"""Tests for DataService quote payload normalization and cache serialization boundaries.""" + +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest + +from app.services.cache import CacheService +from app.services.data_service import DataService + + +class _CacheStub(CacheService): + """In-memory cache stub for unit testing.""" + + def __init__(self, initial: dict[str, object] | None = None) -> None: + self._store = dict(initial or {}) + self.write_count = 0 + + async def get_json(self, key: str): # type: ignore[override] + return self._store.get(key) + + async def set_json(self, key: str, value): # type: ignore[override] + self._store[key] = value + self.write_count += 1 + return True + + +class TestQuotePayloadNormalization: + """Test quote payload normalization edge cases and fail-closed behavior.""" + + @pytest.mark.asyncio + async def test_normalize_quote_adds_missing_symbol(self) -> None: + """Normalization should add symbol when missing from payload.""" + payload = {"price": 100.0, "change": 1.0} + result = DataService._normalize_quote_payload(payload, "AAPL") + assert result["symbol"] == "AAPL" + assert result["price"] == 100.0 + + @pytest.mark.asyncio + async def test_normalize_quote_uppercases_symbol(self) -> None: + """Normalization should uppercase symbol for consistency.""" + payload = {"symbol": "gld", "price": 100.0} + result = DataService._normalize_quote_payload(payload, "gld") + assert result["symbol"] == "GLD" + + @pytest.mark.asyncio + async def test_normalize_quote_rejects_explicit_symbol_mismatch(self) -> None: + """Explicit payload/key mismatches should be rejected, not silently rewritten.""" + payload = {"symbol": "SLV", "price": 100.0} + with pytest.raises(ValueError, match="Quote payload symbol mismatch"): + DataService._normalize_quote_payload(payload, "GLD") + + @pytest.mark.asyncio + async def test_normalize_quote_preserves_existing_quote_unit(self) -> None: + """Normalization should not overwrite existing quote_unit.""" + payload = {"symbol": "AAPL", "price": 100.0, "quote_unit": "share"} + result = DataService._normalize_quote_payload(payload, "AAPL") + assert result["quote_unit"] == "share" + + @pytest.mark.asyncio + async def test_normalize_quote_repairs_empty_gld_quote_unit(self) -> None: + """Legacy or malformed empty GLD quote units should be repaired.""" + payload = {"symbol": "GLD", "price": 200.0, "quote_unit": ""} + result = DataService._normalize_quote_payload(payload, "GLD") + assert result["quote_unit"] == "share" + + @pytest.mark.asyncio + async def test_normalize_quote_adds_gld_share_unit(self) -> None: + """GLD quotes should get quote_unit=share when missing.""" + payload = {"symbol": "GLD", "price": 200.0} + result = DataService._normalize_quote_payload(payload, "GLD") + assert result["quote_unit"] == "share" + + @pytest.mark.asyncio + async def test_normalize_quote_does_not_add_unit_for_unknown_symbols(self) -> None: + """Non-GLD symbols should not get auto-assigned quote_unit.""" + payload = {"symbol": "BTC-USD", "price": 50000.0} + result = DataService._normalize_quote_payload(payload, "BTC-USD") + assert "quote_unit" not in result + + @pytest.mark.asyncio + async def test_normalize_quote_handles_missing_price_gracefully(self) -> None: + """Missing price should be preserved as-is for fail-closed handling.""" + payload = {"symbol": "GLD"} # type: ignore[dict-item] + result = DataService._normalize_quote_payload(payload, "GLD") + assert "price" not in result + assert result["symbol"] == "GLD" + assert result["quote_unit"] == "share" + + @pytest.mark.asyncio + async def test_normalize_quote_handles_none_values(self) -> None: + """None values in payload should be preserved for upstream handling.""" + payload = {"symbol": "GLD", "price": None, "change": None} + result = DataService._normalize_quote_payload(payload, "GLD") + assert result["symbol"] == "GLD" + assert result["price"] is None + assert result["change"] is None + + @pytest.mark.asyncio + async def test_normalize_quote_preserves_extra_fields(self) -> None: + """Extra fields should pass through unchanged.""" + payload = { + "symbol": "GLD", + "price": 200.0, + "custom_field": "custom_value", + "nested": {"key": "value"}, + } + result = DataService._normalize_quote_payload(payload, "GLD") + assert result["custom_field"] == "custom_value" + assert result["nested"] == {"key": "value"} + + @pytest.mark.asyncio + async def test_normalize_quote_handles_string_price(self) -> None: + """String prices should pass through (normalization is not type conversion).""" + payload = {"symbol": "GLD", "price": "200.0"} + result = DataService._normalize_quote_payload(payload, "GLD") + assert result["price"] == "200.0" + assert result["quote_unit"] == "share" + + +class TestLegacyCachedQuoteHandling: + """Test handling of legacy cached quotes without quote_unit metadata.""" + + @pytest.mark.asyncio + async def test_get_quote_upgrades_legacy_gld_quote(self) -> None: + """Legacy GLD quotes without quote_unit should be upgraded in cache.""" + cache = _CacheStub( + { + "quote:GLD": { + "symbol": "GLD", + "price": 404.19, + "change": 0.0, + "change_percent": 0.0, + "updated_at": "2026-03-25T00:00:00+00:00", + "source": "cache", + } + } + ) + service = DataService(cache=cache) + + quote = await service.get_quote("GLD") + + assert quote["quote_unit"] == "share" + assert cache._store["quote:GLD"]["quote_unit"] == "share" + # Should have written back the normalized version + assert cache.write_count >= 1 + + @pytest.mark.asyncio + async def test_get_quote_preserves_already_normalized_gld_quote(self) -> None: + """Already-normalized GLD quotes should not trigger cache rewrite.""" + cache = _CacheStub( + { + "quote:GLD": { + "symbol": "GLD", + "price": 404.19, + "quote_unit": "share", + "change": 0.0, + "change_percent": 0.0, + "updated_at": "2026-03-25T00:00:00+00:00", + "source": "cache", + } + } + ) + service = DataService(cache=cache) + + quote = await service.get_quote("GLD") + + assert quote["quote_unit"] == "share" + # Should not have rewritten the cache + assert cache.write_count == 0 + + @pytest.mark.asyncio + async def test_get_quote_preserves_missing_unit_for_non_gld(self) -> None: + """Non-GLD symbols should not get auto-assigned quote_unit.""" + cache = _CacheStub( + { + "quote:BTC-USD": { + "symbol": "BTC-USD", + "price": 70000.0, + "updated_at": "2026-03-25T00:00:00+00:00", + "source": "cache", + } + } + ) + service = DataService(cache=cache, default_symbol="BTC-USD") + + quote = await service.get_quote("BTC-USD") + + assert "quote_unit" not in quote + assert cache.write_count == 0 + + @pytest.mark.asyncio + async def test_get_quote_handles_malformed_cached_quote(self) -> None: + """Malformed cached quotes (missing critical fields) should pass through.""" + cache = _CacheStub( + { + "quote:GLD": { + "symbol": "GLD", + # missing price, change, etc. + } + } + ) + service = DataService(cache=cache) + + quote = await service.get_quote("GLD") + + # Should still normalize what's present + assert quote["symbol"] == "GLD" + assert quote["quote_unit"] == "share" + # Missing fields remain missing for fail-closed handling upstream + + @pytest.mark.asyncio + async def test_get_quote_ignores_mismatched_cached_symbol_and_fetches_fresh(self) -> None: + """Symbol-mismatched cached quotes should be discarded, not rewritten.""" + cache = _CacheStub({"quote:GLD": {"symbol": "SLV", "price": 28.5, "source": "cache"}}) + service = DataService(cache=cache) + + async def _fake_fetch(symbol: str) -> dict[str, object]: + return {"symbol": symbol, "price": 404.19, "source": "provider"} + + service._fetch_quote = _fake_fetch # type: ignore[method-assign] + + quote = await service.get_quote("GLD") + + assert quote["symbol"] == "GLD" + assert quote["source"] == "provider" + assert quote["quote_unit"] == "share" + + +class TestCacheSerializationBoundaries: + """Test cache serialization behavior and JSON expectations.""" + + @pytest.mark.asyncio + async def test_cache_roundtrip_preserves_quote_structure(self) -> None: + """Cache should preserve quote structure through JSON roundtrip.""" + cache = _CacheStub() + + # Simulate a fresh quote fetch + quote = { + "symbol": "GLD", + "price": 200.0, + "quote_unit": "share", + "change": 1.5, + "change_percent": 0.75, + "updated_at": datetime.now(timezone.utc).isoformat(), + "source": "yfinance", + } + + # Store and retrieve + await cache.set_json("quote:GLD", quote) + retrieved = await cache.get_json("quote:GLD") + + assert retrieved is not None + assert retrieved["symbol"] == "GLD" + assert retrieved["price"] == 200.0 + assert retrieved["quote_unit"] == "share" + + @pytest.mark.asyncio + async def test_cache_handles_datetime_in_payload(self) -> None: + """Cache serialization should handle datetime objects in payloads. + + Note: _CacheStub stores raw Python objects. The real Redis cache + serializes via json.dumps with a datetime default handler. + This test documents the expected behavior for the real cache. + """ + cache = _CacheStub() + + quote_with_dt = { + "symbol": "GLD", + "price": 200.0, + "quote_unit": "share", + "updated_at": datetime.now(timezone.utc), + } + + # In real usage, DataService always passes ISO strings to cache + # (see _fetch_quote and other quote producers) + # The stub just passes through the object + await cache.set_json("quote:GLD", quote_with_dt) + retrieved = await cache.get_json("quote:GLD") + + assert retrieved is not None + assert retrieved["symbol"] == "GLD" + # Stub preserves the datetime object; real cache would serialize to ISO string + # What matters is that retrieval works without error + assert "updated_at" in retrieved + + @pytest.mark.asyncio + async def test_cache_returns_none_for_missing_key(self) -> None: + """Cache should return None for missing keys.""" + cache = _CacheStub() + + result = await cache.get_json("quote:NONEXISTENT") + assert result is None + + @pytest.mark.asyncio + async def test_cache_handles_empty_dict(self) -> None: + """Cache should handle empty dict payloads.""" + cache = _CacheStub() + await cache.set_json("quote:EMPTY", {}) + result = await cache.get_json("quote:EMPTY") + assert result == {} + + +class TestQuotePayloadValidation: + """Test validation behavior for quote payloads at cache boundary.""" + + @pytest.mark.asyncio + async def test_get_quote_validates_cached_dict_type(self) -> None: + """Non-dict cached values should be ignored.""" + cache = _CacheStub({"quote:GLD": "not-a-dict"}) # type: ignore[dict-item] + service = DataService(cache=cache) + + # Should not use the invalid cached value + # (will fall through to fetch, which will use fallback) + quote = await service.get_quote("GLD") + assert quote["symbol"] == "GLD" + assert quote["quote_unit"] == "share" + + @pytest.mark.asyncio + async def test_get_quote_handles_list_cached_value(self) -> None: + """List cached values should be ignored for quote lookups.""" + cache = _CacheStub({"quote:GLD": [1, 2, 3]}) # type: ignore[dict-item] + service = DataService(cache=cache) + + quote = await service.get_quote("GLD") + assert quote["symbol"] == "GLD" + + @pytest.mark.asyncio + async def test_normalize_handles_empty_payload(self) -> None: + """Empty payload should be handled gracefully.""" + payload = {} + result = DataService._normalize_quote_payload(payload, "GLD") + assert result["symbol"] == "GLD" + assert result["quote_unit"] == "share"