46fd22c277
- Updated Dockerfile to include FINN_CACHE_PATH and create data directory. - Modified docker-compose.prod.yml to expose port 8010 and adjust resource limits. - Updated docker-compose.yml to include FINN_CACHE_PATH and ensure proper port mapping. - Added health check endpoint in http_server.py for container orchestration. - Improved caching logic in analysis.py and service.py for similar units. - Refined scoring.py with updated scoring model and constants for better accuracy. Co-authored-by: Copilot <copilot@github.com>
217 lines
8.1 KiB
Python
217 lines
8.1 KiB
Python
"""Orchestration for FINN search + Eiendom.no enrichment + scoring."""
|
|
|
|
import logging
|
|
|
|
from . import ad as ad_module
|
|
from . import cache, eiendom_no, scoring, search
|
|
from .config import (
|
|
EIENDOM_NO_CACHE_TTL_HOURS,
|
|
FINN_CACHE_PATH,
|
|
FINN_CACHE_TTL_AD_HOURS,
|
|
FINN_DETAIL_LIMIT,
|
|
FINN_MAX_SEARCH_PAGES,
|
|
)
|
|
from .models import EiendomUnit, FinnAd, SimilarUnit
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _normalize_description(text: str | None) -> str:
|
|
return text.lower() if text else ""
|
|
|
|
|
|
def _is_resale_listing(url: str) -> bool:
|
|
"""True for ordinary resale ads. Project / new-build ads use different URL
|
|
paths that fetch_ad_details cannot resolve (it builds a /homes/ URL)."""
|
|
return "/realestate/homes/" in url
|
|
|
|
|
|
def _build_ad_summary(
|
|
ad: FinnAd,
|
|
enriched: EiendomUnit | None,
|
|
similar_units: list[SimilarUnit],
|
|
scores: dict,
|
|
categories: list[str],
|
|
) -> dict:
|
|
description = _normalize_description(ad.listing_description)
|
|
reasons = []
|
|
risks = []
|
|
next_steps = [
|
|
"Open the FINN listing and condition report.",
|
|
"Review the Eiendom.no estimate and comparable sales.",
|
|
"Ask the broker about renovation status and approvals.",
|
|
]
|
|
|
|
if enriched and enriched.estimated_selling_price and ad.total_price:
|
|
if ad.total_price < enriched.estimated_selling_price:
|
|
reasons.append("Listing price is below Eiendom.no estimate.")
|
|
elif ad.total_price <= enriched.estimated_selling_price_upper:
|
|
reasons.append("Price sits within the local estimate range.")
|
|
else:
|
|
reasons.append("Listing price is above the estimate range.")
|
|
else:
|
|
reasons.append("Eiendom.no enrichment is unavailable or incomplete.")
|
|
|
|
if "utsikt" in description or ad.has_balcony or ad.has_terrace:
|
|
reasons.append("Outdoor space or view potential is positive.")
|
|
if "hybel" in description or "leie" in description:
|
|
reasons.append("Potential hybel/rental opportunity is mentioned.")
|
|
if "potensial" in description or "renover" in description:
|
|
reasons.append("Renovation or improvement potential is highlighted.")
|
|
|
|
if scores.get("risk", 0.0) < 0:
|
|
risks.append("Risk flags are detected in description or metadata.")
|
|
if ad.common_costs and ad.common_costs > 5000:
|
|
risks.append("Common costs are relatively high and should be reviewed.")
|
|
if enriched and enriched.sale_status and enriched.sale_status.upper() != "FOR_SALE":
|
|
risks.append("Eiendom.no sale status does not indicate an active sale.")
|
|
if not enriched:
|
|
risks.append("Missing Eiendom.no data increases uncertainty.")
|
|
|
|
if not any("Eiendom.no" in step for step in next_steps):
|
|
next_steps.append("Verify the property on Eiendom.no and reconcile any mismatches.")
|
|
|
|
if similar_units:
|
|
next_steps.append("Review the comparable units and average sqm prices.")
|
|
else:
|
|
next_steps.append("Comparable sales are unavailable; treat valuation with caution.")
|
|
|
|
return {
|
|
"why_interesting": reasons,
|
|
"risks": risks,
|
|
"next_steps": next_steps,
|
|
"shortlist_reason": ", ".join(reasons[:3])
|
|
if reasons
|
|
else "Review details and seller disclosures.",
|
|
}
|
|
|
|
|
|
async def analyze_ad(
|
|
finn_ad: FinnAd,
|
|
unit_code: str | None = None,
|
|
) -> dict:
|
|
"""Enrich a FinnAd and compute score summary."""
|
|
conn = cache.init_db(FINN_CACHE_PATH)
|
|
enriched: EiendomUnit | None = None
|
|
similar_units: list[SimilarUnit] = []
|
|
|
|
if unit_code:
|
|
enriched = cache.get_eiendom_unit(conn, unit_code)
|
|
if enriched is None:
|
|
enriched = await eiendom_no.enrich_ad_with_eiendom_no(finn_ad, unit_code)
|
|
if enriched is not None:
|
|
cache.save_eiendom_unit(conn, enriched)
|
|
|
|
if enriched:
|
|
# Check cache for similar units first. The cache uses (unit_code,
|
|
# listing_status) as the key, so we must look it up by unit_code.
|
|
similar_units = cache.get_similar_units(
|
|
conn, enriched.unit_code, "RECENTLY_SOLD", ttl_hours=EIENDOM_NO_CACHE_TTL_HOURS
|
|
)
|
|
|
|
if not similar_units:
|
|
# Cache miss: build the vector and fetch fresh from Eiendom.no
|
|
# (unit_vector field from get_unit is None; build locally)
|
|
vector = enriched.unit_vector or eiendom_no.build_unit_vector(enriched)
|
|
if vector:
|
|
similar_units = await eiendom_no.get_similar_units(vector)
|
|
# Save to cache
|
|
if similar_units:
|
|
cache.save_similar_units(conn, enriched.unit_code, "RECENTLY_SOLD", similar_units)
|
|
|
|
scores = scoring.score_ad(finn_ad, enriched, similar_units)
|
|
categories = scoring.classify_ad(scores)
|
|
summary = _build_ad_summary(finn_ad, enriched, similar_units, scores, categories)
|
|
|
|
result = {
|
|
"finnkode": finn_ad.finnkode,
|
|
"title": finn_ad.title,
|
|
"address": finn_ad.address,
|
|
"score": scores,
|
|
"categories": categories,
|
|
"summary": summary,
|
|
"eiendom_unit": enriched.model_dump() if enriched else None,
|
|
"similar_units": [unit.model_dump() for unit in similar_units],
|
|
}
|
|
cache.save_finn_ad(conn, finn_ad)
|
|
return result
|
|
|
|
|
|
async def _analyze_card(card, conn, *, include_eiendom_no: bool, client) -> dict:
|
|
"""Fetch details + enrich a single search card. Raises on unrecoverable
|
|
errors; the caller is responsible for catching and skipping."""
|
|
finn_ad = cache.get_finn_ad(conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_HOURS)
|
|
if finn_ad is None:
|
|
finn_ad = await ad_module.fetch_ad_details(card.finnkode, client=client)
|
|
|
|
unit_code = None
|
|
if include_eiendom_no:
|
|
try:
|
|
matched_unit = await eiendom_no.search_unit_from_finn_url(card.url)
|
|
unit_code = matched_unit.unit_code if matched_unit else None
|
|
except Exception as exc:
|
|
# A failed unit resolution is non-fatal -- proceed without enrichment.
|
|
logger.warning("Eiendom.no unit search failed for %s: %s", card.finnkode, exc)
|
|
unit_code = None
|
|
|
|
return await analyze_ad(finn_ad, unit_code=unit_code)
|
|
|
|
|
|
async def analyze_search(
|
|
search_url: str,
|
|
max_pages: int = FINN_MAX_SEARCH_PAGES,
|
|
fetch_details: bool = True,
|
|
detail_limit: int = FINN_DETAIL_LIMIT,
|
|
include_eiendom_no: bool = True,
|
|
client=None,
|
|
use_cache: bool = True,
|
|
) -> dict:
|
|
"""Analyze a FINN search URL and enrich matching listings."""
|
|
conn = cache.init_db(FINN_CACHE_PATH)
|
|
cards = await search.fetch_search_pages(
|
|
search_url,
|
|
max_pages=max_pages,
|
|
client=client,
|
|
use_cache=use_cache,
|
|
)
|
|
results = []
|
|
enriched_count = 0
|
|
skipped_count = 0
|
|
|
|
if fetch_details:
|
|
for card in cards[:detail_limit]:
|
|
# Project / new-build ads are not resale listings and fetch_ad_details
|
|
# cannot resolve them -- skip up front rather than 404 mid-run.
|
|
if not _is_resale_listing(card.url):
|
|
logger.info("Skipping non-resale card %s (%s)", card.finnkode, card.url)
|
|
skipped_count += 1
|
|
continue
|
|
|
|
# One bad card (stale finnkode, removed ad, transient network error)
|
|
# must not abort the whole search -- isolate each card.
|
|
try:
|
|
result = await _analyze_card(
|
|
card, conn, include_eiendom_no=include_eiendom_no, client=client
|
|
)
|
|
except Exception as exc:
|
|
logger.warning("Skipping card %s: %s", card.finnkode, exc)
|
|
skipped_count += 1
|
|
continue
|
|
|
|
if result.get("eiendom_unit"):
|
|
enriched_count += 1
|
|
results.append(result)
|
|
|
|
results.sort(key=lambda item: item["score"].get("total", 0.0), reverse=True)
|
|
return {
|
|
"search_url": search_url,
|
|
"search_cards": [card.model_dump() for card in cards],
|
|
"analysis": results,
|
|
"summary": {
|
|
"total_listings": len(cards),
|
|
"analyzed_listings": len(results),
|
|
"skipped_listings": skipped_count,
|
|
"eiendom_enriched": enriched_count,
|
|
},
|
|
}
|