Files
finn-mcp/finn_eiendom/analysis.py
T
ole 46fd22c277 Enhance Docker and Compose configurations; add health check endpoint and caching improvements
- Updated Dockerfile to include FINN_CACHE_PATH and create data directory.
- Modified docker-compose.prod.yml to expose port 8010 and adjust resource limits.
- Updated docker-compose.yml to include FINN_CACHE_PATH and ensure proper port mapping.
- Added health check endpoint in http_server.py for container orchestration.
- Improved caching logic in analysis.py and service.py for similar units.
- Refined scoring.py with updated scoring model and constants for better accuracy.

Co-authored-by: Copilot <copilot@github.com>
2026-05-26 12:10:00 +00:00

217 lines
8.1 KiB
Python

"""Orchestration for FINN search + Eiendom.no enrichment + scoring."""
import logging
from . import ad as ad_module
from . import cache, eiendom_no, scoring, search
from .config import (
EIENDOM_NO_CACHE_TTL_HOURS,
FINN_CACHE_PATH,
FINN_CACHE_TTL_AD_HOURS,
FINN_DETAIL_LIMIT,
FINN_MAX_SEARCH_PAGES,
)
from .models import EiendomUnit, FinnAd, SimilarUnit
logger = logging.getLogger(__name__)
def _normalize_description(text: str | None) -> str:
return text.lower() if text else ""
def _is_resale_listing(url: str) -> bool:
"""True for ordinary resale ads. Project / new-build ads use different URL
paths that fetch_ad_details cannot resolve (it builds a /homes/ URL)."""
return "/realestate/homes/" in url
def _build_ad_summary(
ad: FinnAd,
enriched: EiendomUnit | None,
similar_units: list[SimilarUnit],
scores: dict,
categories: list[str],
) -> dict:
description = _normalize_description(ad.listing_description)
reasons = []
risks = []
next_steps = [
"Open the FINN listing and condition report.",
"Review the Eiendom.no estimate and comparable sales.",
"Ask the broker about renovation status and approvals.",
]
if enriched and enriched.estimated_selling_price and ad.total_price:
if ad.total_price < enriched.estimated_selling_price:
reasons.append("Listing price is below Eiendom.no estimate.")
elif ad.total_price <= enriched.estimated_selling_price_upper:
reasons.append("Price sits within the local estimate range.")
else:
reasons.append("Listing price is above the estimate range.")
else:
reasons.append("Eiendom.no enrichment is unavailable or incomplete.")
if "utsikt" in description or ad.has_balcony or ad.has_terrace:
reasons.append("Outdoor space or view potential is positive.")
if "hybel" in description or "leie" in description:
reasons.append("Potential hybel/rental opportunity is mentioned.")
if "potensial" in description or "renover" in description:
reasons.append("Renovation or improvement potential is highlighted.")
if scores.get("risk", 0.0) < 0:
risks.append("Risk flags are detected in description or metadata.")
if ad.common_costs and ad.common_costs > 5000:
risks.append("Common costs are relatively high and should be reviewed.")
if enriched and enriched.sale_status and enriched.sale_status.upper() != "FOR_SALE":
risks.append("Eiendom.no sale status does not indicate an active sale.")
if not enriched:
risks.append("Missing Eiendom.no data increases uncertainty.")
if not any("Eiendom.no" in step for step in next_steps):
next_steps.append("Verify the property on Eiendom.no and reconcile any mismatches.")
if similar_units:
next_steps.append("Review the comparable units and average sqm prices.")
else:
next_steps.append("Comparable sales are unavailable; treat valuation with caution.")
return {
"why_interesting": reasons,
"risks": risks,
"next_steps": next_steps,
"shortlist_reason": ", ".join(reasons[:3])
if reasons
else "Review details and seller disclosures.",
}
async def analyze_ad(
finn_ad: FinnAd,
unit_code: str | None = None,
) -> dict:
"""Enrich a FinnAd and compute score summary."""
conn = cache.init_db(FINN_CACHE_PATH)
enriched: EiendomUnit | None = None
similar_units: list[SimilarUnit] = []
if unit_code:
enriched = cache.get_eiendom_unit(conn, unit_code)
if enriched is None:
enriched = await eiendom_no.enrich_ad_with_eiendom_no(finn_ad, unit_code)
if enriched is not None:
cache.save_eiendom_unit(conn, enriched)
if enriched:
# Check cache for similar units first. The cache uses (unit_code,
# listing_status) as the key, so we must look it up by unit_code.
similar_units = cache.get_similar_units(
conn, enriched.unit_code, "RECENTLY_SOLD", ttl_hours=EIENDOM_NO_CACHE_TTL_HOURS
)
if not similar_units:
# Cache miss: build the vector and fetch fresh from Eiendom.no
# (unit_vector field from get_unit is None; build locally)
vector = enriched.unit_vector or eiendom_no.build_unit_vector(enriched)
if vector:
similar_units = await eiendom_no.get_similar_units(vector)
# Save to cache
if similar_units:
cache.save_similar_units(conn, enriched.unit_code, "RECENTLY_SOLD", similar_units)
scores = scoring.score_ad(finn_ad, enriched, similar_units)
categories = scoring.classify_ad(scores)
summary = _build_ad_summary(finn_ad, enriched, similar_units, scores, categories)
result = {
"finnkode": finn_ad.finnkode,
"title": finn_ad.title,
"address": finn_ad.address,
"score": scores,
"categories": categories,
"summary": summary,
"eiendom_unit": enriched.model_dump() if enriched else None,
"similar_units": [unit.model_dump() for unit in similar_units],
}
cache.save_finn_ad(conn, finn_ad)
return result
async def _analyze_card(card, conn, *, include_eiendom_no: bool, client) -> dict:
"""Fetch details + enrich a single search card. Raises on unrecoverable
errors; the caller is responsible for catching and skipping."""
finn_ad = cache.get_finn_ad(conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_HOURS)
if finn_ad is None:
finn_ad = await ad_module.fetch_ad_details(card.finnkode, client=client)
unit_code = None
if include_eiendom_no:
try:
matched_unit = await eiendom_no.search_unit_from_finn_url(card.url)
unit_code = matched_unit.unit_code if matched_unit else None
except Exception as exc:
# A failed unit resolution is non-fatal -- proceed without enrichment.
logger.warning("Eiendom.no unit search failed for %s: %s", card.finnkode, exc)
unit_code = None
return await analyze_ad(finn_ad, unit_code=unit_code)
async def analyze_search(
search_url: str,
max_pages: int = FINN_MAX_SEARCH_PAGES,
fetch_details: bool = True,
detail_limit: int = FINN_DETAIL_LIMIT,
include_eiendom_no: bool = True,
client=None,
use_cache: bool = True,
) -> dict:
"""Analyze a FINN search URL and enrich matching listings."""
conn = cache.init_db(FINN_CACHE_PATH)
cards = await search.fetch_search_pages(
search_url,
max_pages=max_pages,
client=client,
use_cache=use_cache,
)
results = []
enriched_count = 0
skipped_count = 0
if fetch_details:
for card in cards[:detail_limit]:
# Project / new-build ads are not resale listings and fetch_ad_details
# cannot resolve them -- skip up front rather than 404 mid-run.
if not _is_resale_listing(card.url):
logger.info("Skipping non-resale card %s (%s)", card.finnkode, card.url)
skipped_count += 1
continue
# One bad card (stale finnkode, removed ad, transient network error)
# must not abort the whole search -- isolate each card.
try:
result = await _analyze_card(
card, conn, include_eiendom_no=include_eiendom_no, client=client
)
except Exception as exc:
logger.warning("Skipping card %s: %s", card.finnkode, exc)
skipped_count += 1
continue
if result.get("eiendom_unit"):
enriched_count += 1
results.append(result)
results.sort(key=lambda item: item["score"].get("total", 0.0), reverse=True)
return {
"search_url": search_url,
"search_cards": [card.model_dump() for card in cards],
"analysis": results,
"summary": {
"total_listings": len(cards),
"analyzed_listings": len(results),
"skipped_listings": skipped_count,
"eiendom_enriched": enriched_count,
},
}