scoring and analyzsis

This commit is contained in:
Ole
2026-05-23 07:43:30 +00:00
parent c9383788de
commit d3f4bfa838
7 changed files with 1113 additions and 305 deletions
+74 -18
View File
@@ -36,6 +36,43 @@ async def get_or_fetch_ad(finnkode: str, force_refresh: bool = False) -> FinnAd:
return ad
async def ensure_eiendom_unit_code(ad: FinnAd) -> str | None:
"""Backfill ``ad.eiendom_unit_code`` by resolving it from the FINN URL.
``fetch_ad_details`` never populates ``eiendom_unit_code`` -- only the
Eiendom.no resolver (``search_unit_from_finn_url``) can map a FINN listing
to an Eiendom.no unit. Every enrichment path gates on this field, so
without an explicit resolve step the gate is always falsy and enrichment
silently no-ops.
Resolves once, mutates the ad in place, and persists the backfill to the
cache so subsequent cache hits skip the network round trip.
IMPORTANT: callers must run this BEFORE serialising the ad with
``model_dump()`` -- otherwise the dumped dict carries a stale
``eiendom_unit_code: None`` even though enrichment succeeded.
Returns the unit_code, or ``None`` if the listing cannot be resolved
(e.g. new-build project ads, off-market addresses).
"""
if ad.eiendom_unit_code:
return ad.eiendom_unit_code
unit = await search_unit_from_finn_url(ad.url)
if unit is None or not unit.unit_code:
logger.info("No Eiendom.no unit resolved for finnkode %s", ad.finnkode)
return None
ad.eiendom_unit_code = unit.unit_code
conn = init_db(FINN_CACHE_PATH)
save_finn_ad(conn, ad) # persist backfill; do NOT cache `unit` here --
# the resolver returns a partial record (code +
# address + coords). The full unit comes from
# get_or_fetch_eiendom_unit -> get_unit().
logger.info("Resolved finnkode %s -> unit %s", ad.finnkode, unit.unit_code)
return ad.eiendom_unit_code
async def get_or_fetch_eiendom_unit(
unit_code: str, force_refresh: bool = False
) -> EiendomUnit | None:
@@ -84,7 +121,7 @@ async def resolve_eiendom_unit_from_finn_url(finn_url: str) -> EiendomUnit | Non
# ============================================================================
# Orchestration functions delegate to analysis.py
# Orchestration functions -- delegate to analysis.py
# ============================================================================
@@ -96,7 +133,13 @@ async def analyze_search(
include_details: bool = True,
include_eiendom_no: bool = True,
) -> dict[str, Any]:
"""Analyze a FINN search URL and return a ranked shortlist."""
"""Analyze a FINN search URL and return a ranked shortlist.
NOTE: enrichment for search results lives in analysis.py. If that path
also reports `eiendom_enriched: 0`, it has the same root cause -- each
card's eiendom_unit_code must be resolved via ensure_eiendom_unit_code
(or search_unit_from_finn_url) before the enrichment gate.
"""
return await run_analysis_search(
search_url,
max_pages=max_pages,
@@ -114,15 +157,20 @@ async def analyze_ad(
) -> dict[str, Any]:
"""Fetch and enrich a single FINN ad with analysis."""
ad = await get_or_fetch_ad(finnkode)
# Resolve BEFORE model_dump() so the serialised ad carries the backfilled
# eiendom_unit_code instead of a stale None.
unit_code = await ensure_eiendom_unit_code(ad) if include_eiendom_no else None
result: dict[str, Any] = {
"ad": ad.model_dump(),
}
if include_eiendom_no and ad.eiendom_unit_code:
unit = await get_or_fetch_eiendom_unit(ad.eiendom_unit_code)
if unit_code:
unit = await get_or_fetch_eiendom_unit(unit_code)
if unit:
result["eiendom_unit"] = unit.model_dump()
if include_similar_units:
similar = await get_or_fetch_similar_units(ad.eiendom_unit_code)
similar = await get_or_fetch_similar_units(unit_code)
result["similar_units"] = [s.model_dump() for s in similar]
return result
@@ -132,16 +180,18 @@ async def analyze_ad_against_comps(
) -> dict[str, Any]:
"""Evaluate one listing against recent comparable sales."""
ad = await get_or_fetch_ad(finnkode)
# Resolve before model_dump() -- see analyze_ad.
unit_code = await ensure_eiendom_unit_code(ad)
result: dict[str, Any] = {
"ad": ad.model_dump(),
}
if ad.eiendom_unit_code:
unit = await get_or_fetch_eiendom_unit(ad.eiendom_unit_code)
if unit_code:
unit = await get_or_fetch_eiendom_unit(unit_code)
if unit:
result["eiendom_unit"] = unit.model_dump()
comps = await get_or_fetch_similar_units(
ad.eiendom_unit_code, listing_status=listing_status
)
comps = await get_or_fetch_similar_units(unit_code, listing_status=listing_status)
result["comparable_units"] = [c.model_dump() for c in comps]
return result
@@ -152,17 +202,20 @@ async def find_similar_to_liked(
"""Find properties similar to a listing the user has liked."""
# Requires that feedback.verdict = "liked" exists for this finnkode
ad = await get_or_fetch_ad(finnkode)
if not ad.eiendom_unit_code:
unit_code = await ensure_eiendom_unit_code(ad)
if not unit_code:
raise ValueError(
f"Finnkode {finnkode} has no Eiendom.no unit_code; cannot find similar properties"
f"Finnkode {finnkode} could not be resolved to an Eiendom.no unit; "
"cannot find similar properties"
)
# TODO: verify feedback verdict = "liked" exists
unit = await get_or_fetch_eiendom_unit(ad.eiendom_unit_code)
unit = await get_or_fetch_eiendom_unit(unit_code)
if not unit:
raise ValueError(f"Cannot enrich finnkode {finnkode} with Eiendom.no data")
similar = await get_or_fetch_similar_units(ad.eiendom_unit_code, listing_status=listing_status)
similar = await get_or_fetch_similar_units(unit_code, listing_status=listing_status)
return {
"base_ad": ad.model_dump(),
"similar_listings": [s.model_dump() for s in similar],
@@ -177,15 +230,18 @@ async def compare_ads(
ads = []
for finnkode in finnkoder:
ad = await get_or_fetch_ad(finnkode)
ad_data = ad.model_dump()
if include_eiendom_no and ad.eiendom_unit_code:
unit = await get_or_fetch_eiendom_unit(ad.eiendom_unit_code)
# Resolve before model_dump() -- see analyze_ad.
unit_code = await ensure_eiendom_unit_code(ad) if include_eiendom_no else None
ad_data = ad.model_dump()
if unit_code:
unit = await get_or_fetch_eiendom_unit(unit_code)
if unit:
ad_data["eiendom_unit"] = unit.model_dump()
if include_comps:
comps = await get_or_fetch_similar_units(
ad.eiendom_unit_code, listing_status="RECENTLY_SOLD"
unit_code, listing_status="RECENTLY_SOLD"
)
ad_data["comps"] = [c.model_dump() for c in comps]