feat(CORE-001D2A): tighten quote provider cache normalization
This commit is contained in:
@@ -53,10 +53,14 @@ class DataService:
|
||||
cache_key = f"quote:{normalized_symbol}"
|
||||
cached = await self.cache.get_json(cache_key)
|
||||
if cached and isinstance(cached, dict):
|
||||
normalized_cached = self._normalize_quote_payload(cached, normalized_symbol)
|
||||
if normalized_cached != cached:
|
||||
await self.cache.set_json(cache_key, normalized_cached)
|
||||
return normalized_cached
|
||||
try:
|
||||
normalized_cached = self._normalize_quote_payload(cached, normalized_symbol)
|
||||
except ValueError:
|
||||
normalized_cached = None
|
||||
if normalized_cached is not None:
|
||||
if normalized_cached != cached:
|
||||
await self.cache.set_json(cache_key, normalized_cached)
|
||||
return normalized_cached
|
||||
|
||||
quote = self._normalize_quote_payload(await self._fetch_quote(normalized_symbol), normalized_symbol)
|
||||
await self.cache.set_json(cache_key, quote)
|
||||
@@ -365,11 +369,43 @@ class DataService:
|
||||
|
||||
@staticmethod
|
||||
def _normalize_quote_payload(payload: dict[str, Any], symbol: str) -> dict[str, Any]:
|
||||
normalized = dict(payload)
|
||||
"""Normalize provider/cache quote payload to explicit contract.
|
||||
|
||||
This is the named boundary adapter between external float-heavy provider
|
||||
payloads and internal quote handling. It ensures:
|
||||
|
||||
- symbol is always present and uppercased
|
||||
- GLD quotes have explicit quote_unit='share' metadata
|
||||
- Non-GLD symbols pass through without auto-assigned units
|
||||
|
||||
Fail-closed: missing/invalid fields are preserved for upstream handling
|
||||
rather than silently defaulted. Type conversion is not performed here.
|
||||
|
||||
Args:
|
||||
payload: Raw quote dict from cache or provider (float-heavy)
|
||||
symbol: Expected symbol (used as fallback if missing from payload)
|
||||
|
||||
Returns:
|
||||
Normalized quote dict with explicit symbol and GLD quote_unit
|
||||
"""
|
||||
normalized: dict[str, Any] = dict(payload)
|
||||
normalized_symbol = symbol.upper()
|
||||
normalized["symbol"] = str(normalized.get("symbol", normalized_symbol)).upper()
|
||||
|
||||
# Ensure symbol is always present and normalized.
|
||||
# Missing symbol is repaired from the requested key; explicit mismatches are rejected.
|
||||
raw_symbol = normalized.get("symbol", normalized_symbol)
|
||||
normalized_payload_symbol = str(raw_symbol).upper() if raw_symbol is not None else normalized_symbol
|
||||
if raw_symbol is not None and normalized_payload_symbol != normalized_symbol:
|
||||
raise ValueError(
|
||||
f"Quote payload symbol mismatch: expected {normalized_symbol}, got {normalized_payload_symbol}"
|
||||
)
|
||||
normalized["symbol"] = normalized_payload_symbol
|
||||
|
||||
# Add explicit quote_unit for GLD (CORE-002A/B compatibility)
|
||||
# Repair missing or empty unit metadata, but preserve explicit non-empty values
|
||||
if normalized["symbol"] == "GLD" and not normalized.get("quote_unit"):
|
||||
normalized["quote_unit"] = "share"
|
||||
|
||||
return normalized
|
||||
|
||||
@staticmethod
|
||||
|
||||
@@ -14,6 +14,7 @@ priority_queue:
|
||||
- CORE-001D
|
||||
- CORE-002C
|
||||
- BT-003B
|
||||
- BT-003B
|
||||
- CORE-002
|
||||
- PORT-003
|
||||
- BT-002
|
||||
@@ -25,12 +26,12 @@ priority_queue:
|
||||
- OPS-001
|
||||
- BT-003
|
||||
recently_completed:
|
||||
- CORE-001D2A
|
||||
- CORE-002B
|
||||
- CORE-002A
|
||||
- CORE-001D1
|
||||
- SEC-001
|
||||
- SEC-001A
|
||||
- CORE-001A
|
||||
states:
|
||||
backlog:
|
||||
- DATA-002A
|
||||
@@ -64,6 +65,7 @@ states:
|
||||
- CORE-001A
|
||||
- CORE-001B
|
||||
- CORE-001C
|
||||
- CORE-001D2A
|
||||
- CORE-002A
|
||||
- CORE-002B
|
||||
blocked: []
|
||||
|
||||
@@ -15,6 +15,7 @@ acceptance_criteria:
|
||||
- Remaining raw-float domain hotspots are identified or removed.
|
||||
technical_notes:
|
||||
- `CORE-001D1` is complete: portfolio/workspace persistence now uses an explicit unit-aware schema with strict validation and atomic saves.
|
||||
- Remaining focus is `CORE-001D2` provider/cache normalization and `CORE-001D3` service entrypoint tightening.
|
||||
- `CORE-001D2A` is complete: DataService quote/provider cache normalization is now a named boundary adapter with explicit symbol mismatch rejection and GLD quote-unit repair.
|
||||
- Remaining focus is the rest of `CORE-001D2` provider/cache normalization plus `CORE-001D3` service entrypoint tightening.
|
||||
- Pre-launch policy: unit-aware schema changes may be breaking until persistence is considered live; old flat payloads may fail loudly instead of being migrated.
|
||||
- See `docs/CORE-001D_BOUNDARY_CLEANUP_PLAN.md` for the current hotspot inventory and proposed sub-slices.
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
id: CORE-001D2A
|
||||
title: Provider and Cache Quote Normalization Boundary
|
||||
status: done
|
||||
priority: P1
|
||||
effort: S
|
||||
depends_on:
|
||||
- CORE-001D1
|
||||
- CORE-002A
|
||||
tags:
|
||||
- core
|
||||
- decimal
|
||||
- cache
|
||||
- provider
|
||||
summary: DataService quote payload normalization is now an explicit provider/cache boundary with tested repair and rejection rules.
|
||||
completed_notes:
|
||||
- Added a named quote normalization adapter in `app/services/data_service.py` for cache/provider payloads.
|
||||
- GLD quotes repair missing or empty `quote_unit` metadata to preserve CORE-002 behavior.
|
||||
- Explicit symbol mismatches are now rejected instead of being silently rewritten.
|
||||
- Mismatched cached quotes are discarded and refreshed from the provider path.
|
||||
- Added focused boundary tests in `tests/test_data_service_normalization.py`.
|
||||
- Validated with focused pytest coverage and `make build` on `main`.
|
||||
335
tests/test_data_service_normalization.py
Normal file
335
tests/test_data_service_normalization.py
Normal file
@@ -0,0 +1,335 @@
|
||||
"""Tests for DataService quote payload normalization and cache serialization boundaries."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import pytest
|
||||
|
||||
from app.services.cache import CacheService
|
||||
from app.services.data_service import DataService
|
||||
|
||||
|
||||
class _CacheStub(CacheService):
|
||||
"""In-memory cache stub for unit testing."""
|
||||
|
||||
def __init__(self, initial: dict[str, object] | None = None) -> None:
|
||||
self._store = dict(initial or {})
|
||||
self.write_count = 0
|
||||
|
||||
async def get_json(self, key: str): # type: ignore[override]
|
||||
return self._store.get(key)
|
||||
|
||||
async def set_json(self, key: str, value): # type: ignore[override]
|
||||
self._store[key] = value
|
||||
self.write_count += 1
|
||||
return True
|
||||
|
||||
|
||||
class TestQuotePayloadNormalization:
|
||||
"""Test quote payload normalization edge cases and fail-closed behavior."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_normalize_quote_adds_missing_symbol(self) -> None:
|
||||
"""Normalization should add symbol when missing from payload."""
|
||||
payload = {"price": 100.0, "change": 1.0}
|
||||
result = DataService._normalize_quote_payload(payload, "AAPL")
|
||||
assert result["symbol"] == "AAPL"
|
||||
assert result["price"] == 100.0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_normalize_quote_uppercases_symbol(self) -> None:
|
||||
"""Normalization should uppercase symbol for consistency."""
|
||||
payload = {"symbol": "gld", "price": 100.0}
|
||||
result = DataService._normalize_quote_payload(payload, "gld")
|
||||
assert result["symbol"] == "GLD"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_normalize_quote_rejects_explicit_symbol_mismatch(self) -> None:
|
||||
"""Explicit payload/key mismatches should be rejected, not silently rewritten."""
|
||||
payload = {"symbol": "SLV", "price": 100.0}
|
||||
with pytest.raises(ValueError, match="Quote payload symbol mismatch"):
|
||||
DataService._normalize_quote_payload(payload, "GLD")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_normalize_quote_preserves_existing_quote_unit(self) -> None:
|
||||
"""Normalization should not overwrite existing quote_unit."""
|
||||
payload = {"symbol": "AAPL", "price": 100.0, "quote_unit": "share"}
|
||||
result = DataService._normalize_quote_payload(payload, "AAPL")
|
||||
assert result["quote_unit"] == "share"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_normalize_quote_repairs_empty_gld_quote_unit(self) -> None:
|
||||
"""Legacy or malformed empty GLD quote units should be repaired."""
|
||||
payload = {"symbol": "GLD", "price": 200.0, "quote_unit": ""}
|
||||
result = DataService._normalize_quote_payload(payload, "GLD")
|
||||
assert result["quote_unit"] == "share"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_normalize_quote_adds_gld_share_unit(self) -> None:
|
||||
"""GLD quotes should get quote_unit=share when missing."""
|
||||
payload = {"symbol": "GLD", "price": 200.0}
|
||||
result = DataService._normalize_quote_payload(payload, "GLD")
|
||||
assert result["quote_unit"] == "share"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_normalize_quote_does_not_add_unit_for_unknown_symbols(self) -> None:
|
||||
"""Non-GLD symbols should not get auto-assigned quote_unit."""
|
||||
payload = {"symbol": "BTC-USD", "price": 50000.0}
|
||||
result = DataService._normalize_quote_payload(payload, "BTC-USD")
|
||||
assert "quote_unit" not in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_normalize_quote_handles_missing_price_gracefully(self) -> None:
|
||||
"""Missing price should be preserved as-is for fail-closed handling."""
|
||||
payload = {"symbol": "GLD"} # type: ignore[dict-item]
|
||||
result = DataService._normalize_quote_payload(payload, "GLD")
|
||||
assert "price" not in result
|
||||
assert result["symbol"] == "GLD"
|
||||
assert result["quote_unit"] == "share"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_normalize_quote_handles_none_values(self) -> None:
|
||||
"""None values in payload should be preserved for upstream handling."""
|
||||
payload = {"symbol": "GLD", "price": None, "change": None}
|
||||
result = DataService._normalize_quote_payload(payload, "GLD")
|
||||
assert result["symbol"] == "GLD"
|
||||
assert result["price"] is None
|
||||
assert result["change"] is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_normalize_quote_preserves_extra_fields(self) -> None:
|
||||
"""Extra fields should pass through unchanged."""
|
||||
payload = {
|
||||
"symbol": "GLD",
|
||||
"price": 200.0,
|
||||
"custom_field": "custom_value",
|
||||
"nested": {"key": "value"},
|
||||
}
|
||||
result = DataService._normalize_quote_payload(payload, "GLD")
|
||||
assert result["custom_field"] == "custom_value"
|
||||
assert result["nested"] == {"key": "value"}
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_normalize_quote_handles_string_price(self) -> None:
|
||||
"""String prices should pass through (normalization is not type conversion)."""
|
||||
payload = {"symbol": "GLD", "price": "200.0"}
|
||||
result = DataService._normalize_quote_payload(payload, "GLD")
|
||||
assert result["price"] == "200.0"
|
||||
assert result["quote_unit"] == "share"
|
||||
|
||||
|
||||
class TestLegacyCachedQuoteHandling:
|
||||
"""Test handling of legacy cached quotes without quote_unit metadata."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_quote_upgrades_legacy_gld_quote(self) -> None:
|
||||
"""Legacy GLD quotes without quote_unit should be upgraded in cache."""
|
||||
cache = _CacheStub(
|
||||
{
|
||||
"quote:GLD": {
|
||||
"symbol": "GLD",
|
||||
"price": 404.19,
|
||||
"change": 0.0,
|
||||
"change_percent": 0.0,
|
||||
"updated_at": "2026-03-25T00:00:00+00:00",
|
||||
"source": "cache",
|
||||
}
|
||||
}
|
||||
)
|
||||
service = DataService(cache=cache)
|
||||
|
||||
quote = await service.get_quote("GLD")
|
||||
|
||||
assert quote["quote_unit"] == "share"
|
||||
assert cache._store["quote:GLD"]["quote_unit"] == "share"
|
||||
# Should have written back the normalized version
|
||||
assert cache.write_count >= 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_quote_preserves_already_normalized_gld_quote(self) -> None:
|
||||
"""Already-normalized GLD quotes should not trigger cache rewrite."""
|
||||
cache = _CacheStub(
|
||||
{
|
||||
"quote:GLD": {
|
||||
"symbol": "GLD",
|
||||
"price": 404.19,
|
||||
"quote_unit": "share",
|
||||
"change": 0.0,
|
||||
"change_percent": 0.0,
|
||||
"updated_at": "2026-03-25T00:00:00+00:00",
|
||||
"source": "cache",
|
||||
}
|
||||
}
|
||||
)
|
||||
service = DataService(cache=cache)
|
||||
|
||||
quote = await service.get_quote("GLD")
|
||||
|
||||
assert quote["quote_unit"] == "share"
|
||||
# Should not have rewritten the cache
|
||||
assert cache.write_count == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_quote_preserves_missing_unit_for_non_gld(self) -> None:
|
||||
"""Non-GLD symbols should not get auto-assigned quote_unit."""
|
||||
cache = _CacheStub(
|
||||
{
|
||||
"quote:BTC-USD": {
|
||||
"symbol": "BTC-USD",
|
||||
"price": 70000.0,
|
||||
"updated_at": "2026-03-25T00:00:00+00:00",
|
||||
"source": "cache",
|
||||
}
|
||||
}
|
||||
)
|
||||
service = DataService(cache=cache, default_symbol="BTC-USD")
|
||||
|
||||
quote = await service.get_quote("BTC-USD")
|
||||
|
||||
assert "quote_unit" not in quote
|
||||
assert cache.write_count == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_quote_handles_malformed_cached_quote(self) -> None:
|
||||
"""Malformed cached quotes (missing critical fields) should pass through."""
|
||||
cache = _CacheStub(
|
||||
{
|
||||
"quote:GLD": {
|
||||
"symbol": "GLD",
|
||||
# missing price, change, etc.
|
||||
}
|
||||
}
|
||||
)
|
||||
service = DataService(cache=cache)
|
||||
|
||||
quote = await service.get_quote("GLD")
|
||||
|
||||
# Should still normalize what's present
|
||||
assert quote["symbol"] == "GLD"
|
||||
assert quote["quote_unit"] == "share"
|
||||
# Missing fields remain missing for fail-closed handling upstream
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_quote_ignores_mismatched_cached_symbol_and_fetches_fresh(self) -> None:
|
||||
"""Symbol-mismatched cached quotes should be discarded, not rewritten."""
|
||||
cache = _CacheStub({"quote:GLD": {"symbol": "SLV", "price": 28.5, "source": "cache"}})
|
||||
service = DataService(cache=cache)
|
||||
|
||||
async def _fake_fetch(symbol: str) -> dict[str, object]:
|
||||
return {"symbol": symbol, "price": 404.19, "source": "provider"}
|
||||
|
||||
service._fetch_quote = _fake_fetch # type: ignore[method-assign]
|
||||
|
||||
quote = await service.get_quote("GLD")
|
||||
|
||||
assert quote["symbol"] == "GLD"
|
||||
assert quote["source"] == "provider"
|
||||
assert quote["quote_unit"] == "share"
|
||||
|
||||
|
||||
class TestCacheSerializationBoundaries:
|
||||
"""Test cache serialization behavior and JSON expectations."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cache_roundtrip_preserves_quote_structure(self) -> None:
|
||||
"""Cache should preserve quote structure through JSON roundtrip."""
|
||||
cache = _CacheStub()
|
||||
|
||||
# Simulate a fresh quote fetch
|
||||
quote = {
|
||||
"symbol": "GLD",
|
||||
"price": 200.0,
|
||||
"quote_unit": "share",
|
||||
"change": 1.5,
|
||||
"change_percent": 0.75,
|
||||
"updated_at": datetime.now(timezone.utc).isoformat(),
|
||||
"source": "yfinance",
|
||||
}
|
||||
|
||||
# Store and retrieve
|
||||
await cache.set_json("quote:GLD", quote)
|
||||
retrieved = await cache.get_json("quote:GLD")
|
||||
|
||||
assert retrieved is not None
|
||||
assert retrieved["symbol"] == "GLD"
|
||||
assert retrieved["price"] == 200.0
|
||||
assert retrieved["quote_unit"] == "share"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cache_handles_datetime_in_payload(self) -> None:
|
||||
"""Cache serialization should handle datetime objects in payloads.
|
||||
|
||||
Note: _CacheStub stores raw Python objects. The real Redis cache
|
||||
serializes via json.dumps with a datetime default handler.
|
||||
This test documents the expected behavior for the real cache.
|
||||
"""
|
||||
cache = _CacheStub()
|
||||
|
||||
quote_with_dt = {
|
||||
"symbol": "GLD",
|
||||
"price": 200.0,
|
||||
"quote_unit": "share",
|
||||
"updated_at": datetime.now(timezone.utc),
|
||||
}
|
||||
|
||||
# In real usage, DataService always passes ISO strings to cache
|
||||
# (see _fetch_quote and other quote producers)
|
||||
# The stub just passes through the object
|
||||
await cache.set_json("quote:GLD", quote_with_dt)
|
||||
retrieved = await cache.get_json("quote:GLD")
|
||||
|
||||
assert retrieved is not None
|
||||
assert retrieved["symbol"] == "GLD"
|
||||
# Stub preserves the datetime object; real cache would serialize to ISO string
|
||||
# What matters is that retrieval works without error
|
||||
assert "updated_at" in retrieved
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cache_returns_none_for_missing_key(self) -> None:
|
||||
"""Cache should return None for missing keys."""
|
||||
cache = _CacheStub()
|
||||
|
||||
result = await cache.get_json("quote:NONEXISTENT")
|
||||
assert result is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cache_handles_empty_dict(self) -> None:
|
||||
"""Cache should handle empty dict payloads."""
|
||||
cache = _CacheStub()
|
||||
await cache.set_json("quote:EMPTY", {})
|
||||
result = await cache.get_json("quote:EMPTY")
|
||||
assert result == {}
|
||||
|
||||
|
||||
class TestQuotePayloadValidation:
|
||||
"""Test validation behavior for quote payloads at cache boundary."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_quote_validates_cached_dict_type(self) -> None:
|
||||
"""Non-dict cached values should be ignored."""
|
||||
cache = _CacheStub({"quote:GLD": "not-a-dict"}) # type: ignore[dict-item]
|
||||
service = DataService(cache=cache)
|
||||
|
||||
# Should not use the invalid cached value
|
||||
# (will fall through to fetch, which will use fallback)
|
||||
quote = await service.get_quote("GLD")
|
||||
assert quote["symbol"] == "GLD"
|
||||
assert quote["quote_unit"] == "share"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_quote_handles_list_cached_value(self) -> None:
|
||||
"""List cached values should be ignored for quote lookups."""
|
||||
cache = _CacheStub({"quote:GLD": [1, 2, 3]}) # type: ignore[dict-item]
|
||||
service = DataService(cache=cache)
|
||||
|
||||
quote = await service.get_quote("GLD")
|
||||
assert quote["symbol"] == "GLD"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_normalize_handles_empty_payload(self) -> None:
|
||||
"""Empty payload should be handled gracefully."""
|
||||
payload = {}
|
||||
result = DataService._normalize_quote_payload(payload, "GLD")
|
||||
assert result["symbol"] == "GLD"
|
||||
assert result["quote_unit"] == "share"
|
||||
Reference in New Issue
Block a user