366 lines
13 KiB
Python
366 lines
13 KiB
Python
"""Service layer for cache-aware fetching of FINN ads and Eiendom.no units.
|
|
|
|
Hash-aware fetch pattern
|
|
------------------------
|
|
Every ``get_or_fetch_*`` function follows the same contract:
|
|
|
|
1. TTL check -- if cached row is fresh enough, return it directly.
|
|
2. Remote fetch -- if TTL expired (or force_refresh), fetch from network.
|
|
3. Hash check -- compare incoming payload hash to stored hash.
|
|
If equal the remote data has not changed; skip the DB write so that
|
|
the analysis_cache entry for this finnkode remains valid.
|
|
4. Write + invalidate -- if hash differs, persist the new row and
|
|
delete any cached analysis (it will be recomputed on next call to
|
|
``analyze_ad``).
|
|
|
|
This means analysis results survive TTL resets as long as the remote
|
|
data has not actually changed.
|
|
"""
|
|
|
|
import logging
|
|
from typing import Any
|
|
|
|
from .ad import fetch_ad_details
|
|
from .analysis import analyze_search as run_analysis_search
|
|
from .cache import (
|
|
get_eiendom_unit as get_cached_eiendom_unit,
|
|
get_finn_ad,
|
|
get_similar_units as get_cached_similar_units,
|
|
init_db,
|
|
invalidate_analysis,
|
|
save_eiendom_unit,
|
|
save_finn_ad,
|
|
save_similar_units,
|
|
)
|
|
from .config import EIENDOM_NO_CACHE_TTL_HOURS, FINN_CACHE_PATH, FINN_CACHE_TTL_AD_HOURS
|
|
from .eiendom_no import (
|
|
build_unit_vector,
|
|
decode_unit_vector,
|
|
get_similar_units,
|
|
get_unit,
|
|
search_unit_from_finn_url,
|
|
)
|
|
from .feedback import save_feedback as save_feedback_impl
|
|
from .models import EiendomUnit, FinnAd, SimilarUnit
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
async def get_or_fetch_ad(finnkode: str, force_refresh: bool = False) -> FinnAd:
|
|
"""Get FinnAd from cache or fetch fresh. Never returns None.
|
|
|
|
On a TTL expiry or force_refresh the ad is re-fetched from FINN.
|
|
If the remote payload hash matches the stored hash the DB row is
|
|
NOT updated, so analysis_cache entries for this finnkode stay valid.
|
|
If the hash differs the row is updated and any cached analysis is
|
|
invalidated.
|
|
"""
|
|
conn = init_db(FINN_CACHE_PATH)
|
|
ad = None if force_refresh else get_finn_ad(conn, finnkode, ttl_hours=FINN_CACHE_TTL_AD_HOURS)
|
|
if ad is not None:
|
|
return ad
|
|
|
|
# Cache miss or force_refresh: fetch from remote.
|
|
ad = await fetch_ad_details(finnkode)
|
|
_, changed = save_finn_ad(conn, ad)
|
|
if changed:
|
|
logger.debug("finn_ad %s updated -- invalidating analysis cache", finnkode)
|
|
invalidate_analysis(conn, finnkode)
|
|
return ad
|
|
|
|
|
|
async def ensure_eiendom_unit_code(ad: FinnAd) -> str | None:
|
|
"""Backfill ``ad.eiendom_unit_code`` by resolving it from the FINN URL.
|
|
|
|
``fetch_ad_details`` never populates ``eiendom_unit_code`` -- only the
|
|
Eiendom.no resolver (``search_unit_from_finn_url``) can map a FINN listing
|
|
to an Eiendom.no unit. Every enrichment path gates on this field, so
|
|
without an explicit resolve step the gate is always falsy and enrichment
|
|
silently no-ops.
|
|
|
|
Resolves once, mutates the ad in place, and persists the backfill to the
|
|
cache so subsequent cache hits skip the network round trip.
|
|
|
|
IMPORTANT: callers must run this BEFORE serialising the ad with
|
|
``model_dump()`` -- otherwise the dumped dict carries a stale
|
|
``eiendom_unit_code: None`` even though enrichment succeeded.
|
|
|
|
Returns the unit_code, or ``None`` if the listing cannot be resolved
|
|
(e.g. new-build project ads, off-market addresses).
|
|
"""
|
|
if ad.eiendom_unit_code:
|
|
return ad.eiendom_unit_code
|
|
|
|
unit = await search_unit_from_finn_url(ad.url)
|
|
if unit is None or not unit.unit_code:
|
|
logger.info("No Eiendom.no unit resolved for finnkode %s", ad.finnkode)
|
|
return None
|
|
|
|
ad.eiendom_unit_code = unit.unit_code
|
|
conn = init_db(FINN_CACHE_PATH)
|
|
|
|
# Persist the backfilled unit_code. If the hash changes (new field),
|
|
# invalidate the analysis cache so it is recomputed with the enriched ad.
|
|
_, changed = save_finn_ad(conn, ad)
|
|
if changed:
|
|
invalidate_analysis(conn, ad.finnkode)
|
|
|
|
logger.info("Resolved finnkode %s -> unit %s", ad.finnkode, unit.unit_code)
|
|
return ad.eiendom_unit_code
|
|
|
|
|
|
async def get_or_fetch_eiendom_unit(
|
|
unit_code: str, force_refresh: bool = False
|
|
) -> EiendomUnit | None:
|
|
"""Get EiendomUnit from cache or fetch fresh.
|
|
|
|
Hash-aware: if the remote payload is identical to what is stored,
|
|
the DB row is not updated (analysis_cache stays valid).
|
|
"""
|
|
conn = init_db(FINN_CACHE_PATH)
|
|
unit = (
|
|
None
|
|
if force_refresh
|
|
else get_cached_eiendom_unit(conn, unit_code, ttl_hours=24)
|
|
)
|
|
if unit is not None:
|
|
return unit
|
|
|
|
unit = await get_unit(unit_code)
|
|
if unit is not None:
|
|
_, changed = save_eiendom_unit(conn, unit)
|
|
if changed:
|
|
logger.debug(
|
|
"eiendom_unit %s updated -- analysis caches for linked finnkodes may be stale",
|
|
unit_code,
|
|
)
|
|
# We don't have a direct finnkode → unit_code reverse map in the
|
|
# DB yet, so we cannot invalidate analysis here. The deps_hash
|
|
# mismatch in get_analysis() handles this automatically.
|
|
return unit
|
|
|
|
|
|
async def get_or_fetch_similar_units(
|
|
unit_code: str, listing_status: str = "RECENTLY_SOLD", force_refresh: bool = False
|
|
) -> list[SimilarUnit]:
|
|
"""Get similar units (comps) from cache or fetch fresh.
|
|
|
|
Hash-aware: identical remote payloads do not trigger a DB write,
|
|
so the analysis_cache entry for any finnkode that uses these comps
|
|
remains valid.
|
|
"""
|
|
conn = init_db(FINN_CACHE_PATH)
|
|
|
|
# Ensure we have the unit to build its vector.
|
|
unit = await get_or_fetch_eiendom_unit(unit_code, force_refresh=force_refresh)
|
|
if unit is None:
|
|
return []
|
|
|
|
if not force_refresh:
|
|
cached_similar = get_cached_similar_units(
|
|
conn, unit_code, listing_status, ttl_hours=EIENDOM_NO_CACHE_TTL_HOURS
|
|
)
|
|
if cached_similar:
|
|
logger.debug(
|
|
"Using cached similar units for %s (status=%s)", unit_code, listing_status
|
|
)
|
|
return cached_similar
|
|
|
|
# Cache miss or force_refresh: fetch from remote.
|
|
vector = build_unit_vector(unit)
|
|
similar = await get_similar_units(vector, listing_status=listing_status)
|
|
|
|
if similar:
|
|
_, changed = save_similar_units(conn, unit_code, listing_status, similar)
|
|
if changed:
|
|
logger.debug(
|
|
"similar_units %s/%s updated -- analysis caches may be stale",
|
|
unit_code,
|
|
listing_status,
|
|
)
|
|
|
|
return similar
|
|
|
|
|
|
async def get_unit_images(unit_code: str, force_refresh: bool = False) -> dict[str, Any]:
|
|
"""Fetch unit images for visual assessment."""
|
|
unit = await get_or_fetch_eiendom_unit(unit_code, force_refresh=force_refresh)
|
|
if unit is None:
|
|
raise ValueError(f"Could not fetch Eiendom.no unit {unit_code}")
|
|
|
|
return {
|
|
"unit_code": unit.unit_code,
|
|
"address": unit.address,
|
|
"unit_images": unit.unit_images or [],
|
|
"property_type": unit.property_type,
|
|
"rooms": unit.rooms,
|
|
"usable_area": unit.usable_area,
|
|
}
|
|
|
|
|
|
async def resolve_eiendom_unit_from_finn_url(finn_url: str) -> EiendomUnit | None:
|
|
"""Resolve an Eiendom.no unit from a FINN listing URL."""
|
|
return await search_unit_from_finn_url(finn_url)
|
|
|
|
|
|
# ============================================================================
|
|
# Orchestration functions -- delegate to analysis.py
|
|
# ============================================================================
|
|
|
|
|
|
async def analyze_search(
|
|
search_url: str,
|
|
*,
|
|
max_pages: int = 3,
|
|
detail_limit: int = 20,
|
|
include_details: bool = True,
|
|
include_eiendom_no: bool = True,
|
|
) -> dict[str, Any]:
|
|
"""Analyze a FINN search URL and return a ranked shortlist.
|
|
|
|
Individual ad analyses are served from analysis_cache when the
|
|
underlying data has not changed.
|
|
"""
|
|
return await run_analysis_search(
|
|
search_url,
|
|
max_pages=max_pages,
|
|
fetch_details=include_details,
|
|
detail_limit=detail_limit,
|
|
include_eiendom_no=include_eiendom_no,
|
|
)
|
|
|
|
|
|
async def analyze_ad(
|
|
finnkode: str,
|
|
*,
|
|
include_eiendom_no: bool = True,
|
|
include_similar_units: bool = False,
|
|
) -> dict[str, Any]:
|
|
"""Fetch and enrich a single FINN ad with analysis."""
|
|
ad = await get_or_fetch_ad(finnkode)
|
|
|
|
# Resolve BEFORE model_dump() so the serialised ad carries the backfilled
|
|
# eiendom_unit_code instead of a stale None.
|
|
unit_code = await ensure_eiendom_unit_code(ad) if include_eiendom_no else None
|
|
|
|
result: dict[str, Any] = {
|
|
"ad": ad.model_dump(mode="json"),
|
|
}
|
|
if unit_code:
|
|
unit = await get_or_fetch_eiendom_unit(unit_code)
|
|
if unit:
|
|
result["eiendom_unit"] = unit.model_dump(mode="json")
|
|
if include_similar_units:
|
|
similar = await get_or_fetch_similar_units(unit_code)
|
|
result["similar_units"] = [s.model_dump(mode="json") for s in similar]
|
|
return result
|
|
|
|
|
|
async def analyze_ad_against_comps(
|
|
finnkode: str, listing_status: str = "RECENTLY_SOLD"
|
|
) -> dict[str, Any]:
|
|
"""Evaluate one listing against recent comparable sales."""
|
|
ad = await get_or_fetch_ad(finnkode)
|
|
|
|
# Resolve before model_dump() -- see analyze_ad.
|
|
unit_code = await ensure_eiendom_unit_code(ad)
|
|
|
|
result: dict[str, Any] = {
|
|
"ad": ad.model_dump(mode="json"),
|
|
}
|
|
if unit_code:
|
|
unit = await get_or_fetch_eiendom_unit(unit_code)
|
|
if unit:
|
|
result["eiendom_unit"] = unit.model_dump(mode="json")
|
|
comps = await get_or_fetch_similar_units(unit_code, listing_status=listing_status)
|
|
result["comparable_units"] = [c.model_dump(mode="json") for c in comps]
|
|
return result
|
|
|
|
|
|
async def find_similar_to_liked(
|
|
finnkode: str, *, mode: str = "recommendations", listing_status: str = "FOR_SALE"
|
|
) -> dict[str, Any]:
|
|
"""Find properties similar to a listing the user has liked."""
|
|
ad = await get_or_fetch_ad(finnkode)
|
|
|
|
unit_code = await ensure_eiendom_unit_code(ad)
|
|
if not unit_code:
|
|
raise ValueError(
|
|
f"Finnkode {finnkode} could not be resolved to an Eiendom.no unit; "
|
|
"cannot find similar properties"
|
|
)
|
|
|
|
# TODO: verify feedback verdict = "liked" exists
|
|
unit = await get_or_fetch_eiendom_unit(unit_code)
|
|
if not unit:
|
|
raise ValueError(f"Cannot enrich finnkode {finnkode} with Eiendom.no data")
|
|
|
|
similar = await get_or_fetch_similar_units(unit_code, listing_status=listing_status)
|
|
return {
|
|
"base_ad": ad.model_dump(mode="json"),
|
|
"similar_listings": [s.model_dump(mode="json") for s in similar],
|
|
"mode": mode,
|
|
}
|
|
|
|
|
|
async def compare_ads(
|
|
finnkoder: list[str], *, include_eiendom_no: bool = True, include_comps: bool = True
|
|
) -> dict[str, Any]:
|
|
"""Compare multiple FINN listings side by side."""
|
|
ads = []
|
|
for finnkode in finnkoder:
|
|
ad = await get_or_fetch_ad(finnkode)
|
|
|
|
# Resolve before model_dump() -- see analyze_ad.
|
|
unit_code = await ensure_eiendom_unit_code(ad) if include_eiendom_no else None
|
|
|
|
ad_data = ad.model_dump(mode="json")
|
|
if unit_code:
|
|
unit = await get_or_fetch_eiendom_unit(unit_code)
|
|
if unit:
|
|
ad_data["eiendom_unit"] = unit.model_dump(mode="json")
|
|
if include_comps:
|
|
comps = await get_or_fetch_similar_units(
|
|
unit_code, listing_status="RECENTLY_SOLD"
|
|
)
|
|
ad_data["comps"] = [c.model_dump(mode="json") for c in comps]
|
|
|
|
ads.append(ad_data)
|
|
|
|
return {"listings": ads}
|
|
|
|
|
|
# ============================================================================
|
|
# Helper functions
|
|
# ============================================================================
|
|
|
|
|
|
async def build_unit_vector_for_unit_code(unit_code: str) -> dict[str, Any]:
|
|
"""Build a unit_vector for a unit_code by fetching and encoding the unit data."""
|
|
unit = await get_or_fetch_eiendom_unit(unit_code)
|
|
if unit is None:
|
|
raise ValueError(f"Could not fetch Eiendom.no unit {unit_code}")
|
|
vector = build_unit_vector(unit)
|
|
return {"unit_code": unit_code, "unit_vector": vector}
|
|
|
|
|
|
def decode_unit_vector_to_dict(unit_vector: str) -> dict[str, Any]:
|
|
"""Decode a unit_vector string to a dict."""
|
|
return decode_unit_vector(unit_vector)
|
|
|
|
|
|
def save_feedback(finnkode: str, verdict: str, notes: str | None = None) -> dict[str, Any]:
|
|
"""Store user feedback/verdict for a listing."""
|
|
return save_feedback_impl(finnkode, verdict, notes)
|
|
|
|
|
|
def get_shortlist(run_id: int | None = None, limit: int = 10) -> dict[str, Any]:
|
|
"""Fetch stored shortlist from a search run."""
|
|
# TODO: implement via search_runs table in cache.py
|
|
return {"shortlist": [], "run_id": run_id, "limit": limit}
|
|
|
|
|
|
def get_new_ads_since_last_run(search_url: str) -> dict[str, Any]:
|
|
"""Detect new/removed/changed listings vs the previous run."""
|
|
# TODO: implement via search_runs table in cache.py
|
|
return {"new_ads": [], "removed_ads": [], "changed_ads": [], "search_url": search_url} |