Implement caching strategy for analysis results and enhance hash-aware data fetching
This commit is contained in:
+96
-51
@@ -1,4 +1,21 @@
|
||||
"""Service layer for cache-aware fetching of FINN ads and Eiendom.no units."""
|
||||
"""Service layer for cache-aware fetching of FINN ads and Eiendom.no units.
|
||||
|
||||
Hash-aware fetch pattern
|
||||
------------------------
|
||||
Every ``get_or_fetch_*`` function follows the same contract:
|
||||
|
||||
1. TTL check -- if cached row is fresh enough, return it directly.
|
||||
2. Remote fetch -- if TTL expired (or force_refresh), fetch from network.
|
||||
3. Hash check -- compare incoming payload hash to stored hash.
|
||||
If equal the remote data has not changed; skip the DB write so that
|
||||
the analysis_cache entry for this finnkode remains valid.
|
||||
4. Write + invalidate -- if hash differs, persist the new row and
|
||||
delete any cached analysis (it will be recomputed on next call to
|
||||
``analyze_ad``).
|
||||
|
||||
This means analysis results survive TTL resets as long as the remote
|
||||
data has not actually changed.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
@@ -10,11 +27,12 @@ from .cache import (
|
||||
get_finn_ad,
|
||||
get_similar_units as get_cached_similar_units,
|
||||
init_db,
|
||||
invalidate_analysis,
|
||||
save_eiendom_unit,
|
||||
save_finn_ad,
|
||||
save_similar_units,
|
||||
)
|
||||
from .config import EIENDOM_NO_CACHE_TTL_HOURS, FINN_CACHE_PATH
|
||||
from .config import EIENDOM_NO_CACHE_TTL_HOURS, FINN_CACHE_PATH, FINN_CACHE_TTL_AD_HOURS
|
||||
from .eiendom_no import (
|
||||
build_unit_vector,
|
||||
decode_unit_vector,
|
||||
@@ -29,12 +47,25 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def get_or_fetch_ad(finnkode: str, force_refresh: bool = False) -> FinnAd:
|
||||
"""Get FinnAd from cache or fetch fresh. Never returns None."""
|
||||
"""Get FinnAd from cache or fetch fresh. Never returns None.
|
||||
|
||||
On a TTL expiry or force_refresh the ad is re-fetched from FINN.
|
||||
If the remote payload hash matches the stored hash the DB row is
|
||||
NOT updated, so analysis_cache entries for this finnkode stay valid.
|
||||
If the hash differs the row is updated and any cached analysis is
|
||||
invalidated.
|
||||
"""
|
||||
conn = init_db(FINN_CACHE_PATH)
|
||||
ad = None if force_refresh else get_finn_ad(conn, finnkode, ttl_hours=24)
|
||||
if ad is None:
|
||||
ad = await fetch_ad_details(finnkode)
|
||||
save_finn_ad(conn, ad)
|
||||
ad = None if force_refresh else get_finn_ad(conn, finnkode, ttl_hours=FINN_CACHE_TTL_AD_HOURS)
|
||||
if ad is not None:
|
||||
return ad
|
||||
|
||||
# Cache miss or force_refresh: fetch from remote.
|
||||
ad = await fetch_ad_details(finnkode)
|
||||
_, changed = save_finn_ad(conn, ad)
|
||||
if changed:
|
||||
logger.debug("finn_ad %s updated -- invalidating analysis cache", finnkode)
|
||||
invalidate_analysis(conn, finnkode)
|
||||
return ad
|
||||
|
||||
|
||||
@@ -67,10 +98,13 @@ async def ensure_eiendom_unit_code(ad: FinnAd) -> str | None:
|
||||
|
||||
ad.eiendom_unit_code = unit.unit_code
|
||||
conn = init_db(FINN_CACHE_PATH)
|
||||
save_finn_ad(conn, ad) # persist backfill; do NOT cache `unit` here --
|
||||
# the resolver returns a partial record (code +
|
||||
# address + coords). The full unit comes from
|
||||
# get_or_fetch_eiendom_unit -> get_unit().
|
||||
|
||||
# Persist the backfilled unit_code. If the hash changes (new field),
|
||||
# invalidate the analysis cache so it is recomputed with the enriched ad.
|
||||
_, changed = save_finn_ad(conn, ad)
|
||||
if changed:
|
||||
invalidate_analysis(conn, ad.finnkode)
|
||||
|
||||
logger.info("Resolved finnkode %s -> unit %s", ad.finnkode, unit.unit_code)
|
||||
return ad.eiendom_unit_code
|
||||
|
||||
@@ -78,13 +112,31 @@ async def ensure_eiendom_unit_code(ad: FinnAd) -> str | None:
|
||||
async def get_or_fetch_eiendom_unit(
|
||||
unit_code: str, force_refresh: bool = False
|
||||
) -> EiendomUnit | None:
|
||||
"""Get EiendomUnit from cache or fetch fresh."""
|
||||
"""Get EiendomUnit from cache or fetch fresh.
|
||||
|
||||
Hash-aware: if the remote payload is identical to what is stored,
|
||||
the DB row is not updated (analysis_cache stays valid).
|
||||
"""
|
||||
conn = init_db(FINN_CACHE_PATH)
|
||||
unit = None if force_refresh else get_cached_eiendom_unit(conn, unit_code, ttl_hours=24)
|
||||
if unit is None:
|
||||
unit = await get_unit(unit_code)
|
||||
if unit is not None:
|
||||
save_eiendom_unit(conn, unit)
|
||||
unit = (
|
||||
None
|
||||
if force_refresh
|
||||
else get_cached_eiendom_unit(conn, unit_code, ttl_hours=24)
|
||||
)
|
||||
if unit is not None:
|
||||
return unit
|
||||
|
||||
unit = await get_unit(unit_code)
|
||||
if unit is not None:
|
||||
_, changed = save_eiendom_unit(conn, unit)
|
||||
if changed:
|
||||
logger.debug(
|
||||
"eiendom_unit %s updated -- analysis caches for linked finnkodes may be stale",
|
||||
unit_code,
|
||||
)
|
||||
# We don't have a direct finnkode → unit_code reverse map in the
|
||||
# DB yet, so we cannot invalidate analysis here. The deps_hash
|
||||
# mismatch in get_analysis() handles this automatically.
|
||||
return unit
|
||||
|
||||
|
||||
@@ -93,43 +145,39 @@ async def get_or_fetch_similar_units(
|
||||
) -> list[SimilarUnit]:
|
||||
"""Get similar units (comps) from cache or fetch fresh.
|
||||
|
||||
Fetches the unit first to get the unit_vector, then checks cache for similar
|
||||
units by (unit_code, listing_status). On cache miss, fetches fresh from
|
||||
Eiendom.no and saves to cache.
|
||||
Hash-aware: identical remote payloads do not trigger a DB write,
|
||||
so the analysis_cache entry for any finnkode that uses these comps
|
||||
remains valid.
|
||||
"""
|
||||
conn = init_db(FINN_CACHE_PATH)
|
||||
|
||||
# First, ensure we have the unit to build its vector
|
||||
# Ensure we have the unit to build its vector.
|
||||
unit = await get_or_fetch_eiendom_unit(unit_code, force_refresh=force_refresh)
|
||||
if unit is None:
|
||||
return []
|
||||
|
||||
# Check cache for similar units (unless force_refresh)
|
||||
if not force_refresh:
|
||||
cached_similar = get_cached_similar_units(
|
||||
conn, unit_code, listing_status, ttl_hours=EIENDOM_NO_CACHE_TTL_HOURS
|
||||
)
|
||||
if cached_similar:
|
||||
logger.debug(
|
||||
"Using cached similar units for %s (status=%s)",
|
||||
unit_code,
|
||||
listing_status,
|
||||
"Using cached similar units for %s (status=%s)", unit_code, listing_status
|
||||
)
|
||||
return cached_similar
|
||||
|
||||
# Cache miss or force_refresh: fetch fresh
|
||||
# Cache miss or force_refresh: fetch from remote.
|
||||
vector = build_unit_vector(unit)
|
||||
similar = await get_similar_units(vector, listing_status=listing_status)
|
||||
|
||||
# Save to cache
|
||||
if similar:
|
||||
save_similar_units(conn, unit_code, listing_status, similar)
|
||||
logger.debug(
|
||||
"Cached %d similar units for %s (status=%s)",
|
||||
len(similar),
|
||||
unit_code,
|
||||
listing_status,
|
||||
)
|
||||
_, changed = save_similar_units(conn, unit_code, listing_status, similar)
|
||||
if changed:
|
||||
logger.debug(
|
||||
"similar_units %s/%s updated -- analysis caches may be stale",
|
||||
unit_code,
|
||||
listing_status,
|
||||
)
|
||||
|
||||
return similar
|
||||
|
||||
@@ -170,10 +218,8 @@ async def analyze_search(
|
||||
) -> dict[str, Any]:
|
||||
"""Analyze a FINN search URL and return a ranked shortlist.
|
||||
|
||||
NOTE: enrichment for search results lives in analysis.py. If that path
|
||||
also reports `eiendom_enriched: 0`, it has the same root cause -- each
|
||||
card's eiendom_unit_code must be resolved via ensure_eiendom_unit_code
|
||||
(or search_unit_from_finn_url) before the enrichment gate.
|
||||
Individual ad analyses are served from analysis_cache when the
|
||||
underlying data has not changed.
|
||||
"""
|
||||
return await run_analysis_search(
|
||||
search_url,
|
||||
@@ -198,15 +244,15 @@ async def analyze_ad(
|
||||
unit_code = await ensure_eiendom_unit_code(ad) if include_eiendom_no else None
|
||||
|
||||
result: dict[str, Any] = {
|
||||
"ad": ad.model_dump(),
|
||||
"ad": ad.model_dump(mode="json"),
|
||||
}
|
||||
if unit_code:
|
||||
unit = await get_or_fetch_eiendom_unit(unit_code)
|
||||
if unit:
|
||||
result["eiendom_unit"] = unit.model_dump()
|
||||
result["eiendom_unit"] = unit.model_dump(mode="json")
|
||||
if include_similar_units:
|
||||
similar = await get_or_fetch_similar_units(unit_code)
|
||||
result["similar_units"] = [s.model_dump() for s in similar]
|
||||
result["similar_units"] = [s.model_dump(mode="json") for s in similar]
|
||||
return result
|
||||
|
||||
|
||||
@@ -220,14 +266,14 @@ async def analyze_ad_against_comps(
|
||||
unit_code = await ensure_eiendom_unit_code(ad)
|
||||
|
||||
result: dict[str, Any] = {
|
||||
"ad": ad.model_dump(),
|
||||
"ad": ad.model_dump(mode="json"),
|
||||
}
|
||||
if unit_code:
|
||||
unit = await get_or_fetch_eiendom_unit(unit_code)
|
||||
if unit:
|
||||
result["eiendom_unit"] = unit.model_dump()
|
||||
result["eiendom_unit"] = unit.model_dump(mode="json")
|
||||
comps = await get_or_fetch_similar_units(unit_code, listing_status=listing_status)
|
||||
result["comparable_units"] = [c.model_dump() for c in comps]
|
||||
result["comparable_units"] = [c.model_dump(mode="json") for c in comps]
|
||||
return result
|
||||
|
||||
|
||||
@@ -235,7 +281,6 @@ async def find_similar_to_liked(
|
||||
finnkode: str, *, mode: str = "recommendations", listing_status: str = "FOR_SALE"
|
||||
) -> dict[str, Any]:
|
||||
"""Find properties similar to a listing the user has liked."""
|
||||
# Requires that feedback.verdict = "liked" exists for this finnkode
|
||||
ad = await get_or_fetch_ad(finnkode)
|
||||
|
||||
unit_code = await ensure_eiendom_unit_code(ad)
|
||||
@@ -252,8 +297,8 @@ async def find_similar_to_liked(
|
||||
|
||||
similar = await get_or_fetch_similar_units(unit_code, listing_status=listing_status)
|
||||
return {
|
||||
"base_ad": ad.model_dump(),
|
||||
"similar_listings": [s.model_dump() for s in similar],
|
||||
"base_ad": ad.model_dump(mode="json"),
|
||||
"similar_listings": [s.model_dump(mode="json") for s in similar],
|
||||
"mode": mode,
|
||||
}
|
||||
|
||||
@@ -269,16 +314,16 @@ async def compare_ads(
|
||||
# Resolve before model_dump() -- see analyze_ad.
|
||||
unit_code = await ensure_eiendom_unit_code(ad) if include_eiendom_no else None
|
||||
|
||||
ad_data = ad.model_dump()
|
||||
ad_data = ad.model_dump(mode="json")
|
||||
if unit_code:
|
||||
unit = await get_or_fetch_eiendom_unit(unit_code)
|
||||
if unit:
|
||||
ad_data["eiendom_unit"] = unit.model_dump()
|
||||
ad_data["eiendom_unit"] = unit.model_dump(mode="json")
|
||||
if include_comps:
|
||||
comps = await get_or_fetch_similar_units(
|
||||
unit_code, listing_status="RECENTLY_SOLD"
|
||||
)
|
||||
ad_data["comps"] = [c.model_dump() for c in comps]
|
||||
ad_data["comps"] = [c.model_dump(mode="json") for c in comps]
|
||||
|
||||
ads.append(ad_data)
|
||||
|
||||
@@ -318,4 +363,4 @@ def get_shortlist(run_id: int | None = None, limit: int = 10) -> dict[str, Any]:
|
||||
def get_new_ads_since_last_run(search_url: str) -> dict[str, Any]:
|
||||
"""Detect new/removed/changed listings vs the previous run."""
|
||||
# TODO: implement via search_runs table in cache.py
|
||||
return {"new_ads": [], "removed_ads": [], "changed_ads": [], "search_url": search_url}
|
||||
return {"new_ads": [], "removed_ads": [], "changed_ads": [], "search_url": search_url}
|
||||
Reference in New Issue
Block a user