scoring and analyzsis

This commit is contained in:
Ole
2026-05-23 07:43:30 +00:00
parent c9383788de
commit d3f4bfa838
7 changed files with 1113 additions and 305 deletions
+57 -18
View File
@@ -19,6 +19,12 @@ def _normalize_description(text: str | None) -> str:
return text.lower() if text else ""
def _is_resale_listing(url: str) -> bool:
"""True for ordinary resale ads. Project / new-build ads use different URL
paths that fetch_ad_details cannot resolve (it builds a /homes/ URL)."""
return "/realestate/homes/" in url
def _build_ad_summary(
ad: FinnAd,
enriched: EiendomUnit | None,
@@ -95,12 +101,17 @@ async def analyze_ad(
if enriched is not None:
cache.save_eiendom_unit(conn, enriched)
if enriched and enriched.unit_vector:
similar_units = cache.get_similar_units(conn, enriched.unit_code, "RECENTLY_SOLD")
if not similar_units:
similar_units = await eiendom_no.get_similar_units(enriched.unit_vector)
if similar_units:
cache.save_similar_units(conn, enriched.unit_code, "RECENTLY_SOLD", similar_units)
if enriched:
# EiendomUnit.unit_vector is NOT populated by get_unit / enrich -- the
# field comes back None. Reading enriched.unit_vector directly leaves
# this block dead and similar_units permanently empty. Build the vector
# from the unit fields instead (fall back to the field if a future
# endpoint ever populates it).
vector = enriched.unit_vector or eiendom_no.build_unit_vector(enriched)
if vector:
# No dedicated cache table for similar units (per PRD) -- fetch
# fresh each call, consistent with service.get_or_fetch_similar_units.
similar_units = await eiendom_no.get_similar_units(vector)
scores = scoring.score_ad(finn_ad, enriched, similar_units)
categories = scoring.classify_ad(scores)
@@ -120,6 +131,26 @@ async def analyze_ad(
return result
async def _analyze_card(card, conn, *, include_eiendom_no: bool, client) -> dict:
"""Fetch details + enrich a single search card. Raises on unrecoverable
errors; the caller is responsible for catching and skipping."""
finn_ad = cache.get_finn_ad(conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_HOURS)
if finn_ad is None:
finn_ad = await ad_module.fetch_ad_details(card.finnkode, client=client)
unit_code = None
if include_eiendom_no:
try:
matched_unit = await eiendom_no.search_unit_from_finn_url(card.url)
unit_code = matched_unit.unit_code if matched_unit else None
except Exception as exc:
# A failed unit resolution is non-fatal -- proceed without enrichment.
logger.warning("Eiendom.no unit search failed for %s: %s", card.finnkode, exc)
unit_code = None
return await analyze_ad(finn_ad, unit_code=unit_code)
async def analyze_search(
search_url: str,
max_pages: int = FINN_MAX_SEARCH_PAGES,
@@ -139,21 +170,28 @@ async def analyze_search(
)
results = []
enriched_count = 0
skipped_count = 0
if fetch_details:
for card in cards[:detail_limit]:
finn_ad = cache.get_finn_ad(conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_HOURS)
if finn_ad is None:
finn_ad = await ad_module.fetch_ad_details(card.finnkode, client=client)
unit_code = None
if include_eiendom_no:
try:
matched_unit = await eiendom_no.search_unit_from_finn_url(card.url)
unit_code = matched_unit.unit_code if matched_unit else None
except Exception as exc:
logger.warning("Eiendom.no unit search failed: %s", exc)
unit_code = None
result = await analyze_ad(finn_ad, unit_code=unit_code)
# Project / new-build ads are not resale listings and fetch_ad_details
# cannot resolve them -- skip up front rather than 404 mid-run.
if not _is_resale_listing(card.url):
logger.info("Skipping non-resale card %s (%s)", card.finnkode, card.url)
skipped_count += 1
continue
# One bad card (stale finnkode, removed ad, transient network error)
# must not abort the whole search -- isolate each card.
try:
result = await _analyze_card(
card, conn, include_eiendom_no=include_eiendom_no, client=client
)
except Exception as exc:
logger.warning("Skipping card %s: %s", card.finnkode, exc)
skipped_count += 1
continue
if result.get("eiendom_unit"):
enriched_count += 1
results.append(result)
@@ -166,6 +204,7 @@ async def analyze_search(
"summary": {
"total_listings": len(cards),
"analyzed_listings": len(results),
"skipped_listings": skipped_count,
"eiendom_enriched": enriched_count,
},
}