scoring and analyzsis
This commit is contained in:
+57
-18
@@ -19,6 +19,12 @@ def _normalize_description(text: str | None) -> str:
|
||||
return text.lower() if text else ""
|
||||
|
||||
|
||||
def _is_resale_listing(url: str) -> bool:
|
||||
"""True for ordinary resale ads. Project / new-build ads use different URL
|
||||
paths that fetch_ad_details cannot resolve (it builds a /homes/ URL)."""
|
||||
return "/realestate/homes/" in url
|
||||
|
||||
|
||||
def _build_ad_summary(
|
||||
ad: FinnAd,
|
||||
enriched: EiendomUnit | None,
|
||||
@@ -95,12 +101,17 @@ async def analyze_ad(
|
||||
if enriched is not None:
|
||||
cache.save_eiendom_unit(conn, enriched)
|
||||
|
||||
if enriched and enriched.unit_vector:
|
||||
similar_units = cache.get_similar_units(conn, enriched.unit_code, "RECENTLY_SOLD")
|
||||
if not similar_units:
|
||||
similar_units = await eiendom_no.get_similar_units(enriched.unit_vector)
|
||||
if similar_units:
|
||||
cache.save_similar_units(conn, enriched.unit_code, "RECENTLY_SOLD", similar_units)
|
||||
if enriched:
|
||||
# EiendomUnit.unit_vector is NOT populated by get_unit / enrich -- the
|
||||
# field comes back None. Reading enriched.unit_vector directly leaves
|
||||
# this block dead and similar_units permanently empty. Build the vector
|
||||
# from the unit fields instead (fall back to the field if a future
|
||||
# endpoint ever populates it).
|
||||
vector = enriched.unit_vector or eiendom_no.build_unit_vector(enriched)
|
||||
if vector:
|
||||
# No dedicated cache table for similar units (per PRD) -- fetch
|
||||
# fresh each call, consistent with service.get_or_fetch_similar_units.
|
||||
similar_units = await eiendom_no.get_similar_units(vector)
|
||||
|
||||
scores = scoring.score_ad(finn_ad, enriched, similar_units)
|
||||
categories = scoring.classify_ad(scores)
|
||||
@@ -120,6 +131,26 @@ async def analyze_ad(
|
||||
return result
|
||||
|
||||
|
||||
async def _analyze_card(card, conn, *, include_eiendom_no: bool, client) -> dict:
|
||||
"""Fetch details + enrich a single search card. Raises on unrecoverable
|
||||
errors; the caller is responsible for catching and skipping."""
|
||||
finn_ad = cache.get_finn_ad(conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_HOURS)
|
||||
if finn_ad is None:
|
||||
finn_ad = await ad_module.fetch_ad_details(card.finnkode, client=client)
|
||||
|
||||
unit_code = None
|
||||
if include_eiendom_no:
|
||||
try:
|
||||
matched_unit = await eiendom_no.search_unit_from_finn_url(card.url)
|
||||
unit_code = matched_unit.unit_code if matched_unit else None
|
||||
except Exception as exc:
|
||||
# A failed unit resolution is non-fatal -- proceed without enrichment.
|
||||
logger.warning("Eiendom.no unit search failed for %s: %s", card.finnkode, exc)
|
||||
unit_code = None
|
||||
|
||||
return await analyze_ad(finn_ad, unit_code=unit_code)
|
||||
|
||||
|
||||
async def analyze_search(
|
||||
search_url: str,
|
||||
max_pages: int = FINN_MAX_SEARCH_PAGES,
|
||||
@@ -139,21 +170,28 @@ async def analyze_search(
|
||||
)
|
||||
results = []
|
||||
enriched_count = 0
|
||||
skipped_count = 0
|
||||
|
||||
if fetch_details:
|
||||
for card in cards[:detail_limit]:
|
||||
finn_ad = cache.get_finn_ad(conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_HOURS)
|
||||
if finn_ad is None:
|
||||
finn_ad = await ad_module.fetch_ad_details(card.finnkode, client=client)
|
||||
unit_code = None
|
||||
if include_eiendom_no:
|
||||
try:
|
||||
matched_unit = await eiendom_no.search_unit_from_finn_url(card.url)
|
||||
unit_code = matched_unit.unit_code if matched_unit else None
|
||||
except Exception as exc:
|
||||
logger.warning("Eiendom.no unit search failed: %s", exc)
|
||||
unit_code = None
|
||||
result = await analyze_ad(finn_ad, unit_code=unit_code)
|
||||
# Project / new-build ads are not resale listings and fetch_ad_details
|
||||
# cannot resolve them -- skip up front rather than 404 mid-run.
|
||||
if not _is_resale_listing(card.url):
|
||||
logger.info("Skipping non-resale card %s (%s)", card.finnkode, card.url)
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
# One bad card (stale finnkode, removed ad, transient network error)
|
||||
# must not abort the whole search -- isolate each card.
|
||||
try:
|
||||
result = await _analyze_card(
|
||||
card, conn, include_eiendom_no=include_eiendom_no, client=client
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("Skipping card %s: %s", card.finnkode, exc)
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
if result.get("eiendom_unit"):
|
||||
enriched_count += 1
|
||||
results.append(result)
|
||||
@@ -166,6 +204,7 @@ async def analyze_search(
|
||||
"summary": {
|
||||
"total_listings": len(cards),
|
||||
"analyzed_listings": len(results),
|
||||
"skipped_listings": skipped_count,
|
||||
"eiendom_enriched": enriched_count,
|
||||
},
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user