Refactor and enhance various components of the FINN real estate analysis tool

- Updated docker-compose files to use local data volumes for development.
- Refactored analysis.py to improve code readability and performance, including changes to cache age calculations and hash computations.
- Enhanced cache.py to ensure the database directory is created if it doesn't exist and improved SQL query formatting.
- Modified cli.py to improve logging and statistics reporting for finn_ads.
- Updated config.py to streamline environment variable handling.
- Initialized the database eagerly in http_server.py to prevent runtime errors.
- Refactored mcp_server.py to slim down data structures and improve response formatting for API calls.
- Enhanced service.py to improve feedback handling and shortlist retrieval, ensuring enriched data is returned.
- Updated recompute_analysis_cache.py for better SQL query formatting.
This commit is contained in:
Ole
2026-05-29 15:17:11 +00:00
parent 55d93894ac
commit eb95b98111
10 changed files with 295 additions and 343 deletions
+125 -247
View File
@@ -11,30 +11,14 @@ from mcp.server.transport_security import TransportSecuritySettings
from mcp.server.fastmcp import Context, FastMCP
from mcp.types import ImageContent, TextContent
from .eiendom_no import (
build_unit_vector,
decode_unit_vector,
get_similar_units,
get_unit,
search_unit_from_finn_url,
)
from .formatting import (
render_ad,
render_comparison,
render_diff,
render_shortlist,
render_similar_units,
render_unit_images,
)
from .service import (
analyze_ad,
analyze_ad_against_comps,
analyze_search,
compare_ads,
find_similar_to_liked,
get_new_ads_since_last_run,
get_or_fetch_ad,
get_or_fetch_eiendom_unit,
get_shortlist,
get_unit_images,
save_feedback,
@@ -48,6 +32,55 @@ logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
def _slim_comp(c: dict) -> dict:
"""Drop internal IDs, coords, redundant status fields from a comparable unit."""
return {
"unit_code": c.get("unit_code"),
"address": c.get("address"),
"usable_area": c.get("usable_area"),
"rooms": c.get("rooms"),
"floor": c.get("floor"),
"construction_year": c.get("construction_year"),
"listing_price": c.get("listing_price"),
"selling_price": c.get("selling_price"),
"shared_debt": c.get("shared_debt"),
"sqm_price": c.get("sqm_price"),
"common_costs": c.get("common_costs"),
"days_on_market": c.get("days_on_market"),
"finalized_at": (c.get("finalized_at") or "")[:10],
}
def _slim_comps(comps: list[dict], keep: int = 15) -> list[dict]:
"""Sort comps by recency, keep the N most recent — older comps lose relevance fast."""
sorted_comps = sorted(comps, key=lambda c: c.get("finalized_at") or "", reverse=True)
return [_slim_comp(c) for c in sorted_comps[:keep]]
def _avg_comp_sqm(comps: list[dict]) -> int | None:
sqm_prices = [c["sqm_price"] for c in comps if c.get("sqm_price")]
return round(sum(sqm_prices) / len(sqm_prices)) if sqm_prices else None
def _slim_eiendom(eu: dict, comps: list[dict]) -> dict:
"""Compact Eiendom.no unit view. Drops unit_images, unit_vector, lat/lng, timestamps."""
return {
"unit_code": eu.get("unit_code"),
"usable_area": eu.get("usable_area"),
"estimated_price": eu.get("estimated_selling_price"),
"estimated_range": [
eu.get("estimated_selling_price_lower"),
eu.get("estimated_selling_price_upper"),
],
"listing_sqm_price": eu.get("listing_sqm_price"),
"market_placement": eu.get("market_placement_score"),
"sale_status": eu.get("sale_status"),
"days_on_market": eu.get("days_on_market"),
"avg_comp_sqm_price": _avg_comp_sqm(comps),
"comp_count": len(comps),
}
def _slim_listing(rank: int, item: dict) -> dict:
"""Collapse one full analyze_ad result into a compact listing card.
@@ -57,57 +90,9 @@ def _slim_listing(rank: int, item: dict) -> dict:
"""
eu = item.get("eiendom_unit") or {}
comps = item.get("similar_units") or []
sqm_prices = [c["sqm_price"] for c in comps if c.get("sqm_price")]
avg_comp_sqm = round(sum(sqm_prices) / len(sqm_prices)) if sqm_prices else None
# Slim comps: drop internal IDs, coords, redundant status fields.
# Sort by recency, keep 15 most recent — older comps lose relevance fast.
def _slim_comp(c: dict) -> dict:
return {
"unit_code": c.get("unit_code"),
"address": c.get("address"),
"usable_area": c.get("usable_area"),
"rooms": c.get("rooms"),
"floor": c.get("floor"),
"construction_year": c.get("construction_year"),
"listing_price": c.get("listing_price"),
"selling_price": c.get("selling_price"),
"shared_debt": c.get("shared_debt"),
"sqm_price": c.get("sqm_price"),
"common_costs": c.get("common_costs"),
"days_on_market": c.get("days_on_market"),
"finalized_at": (c.get("finalized_at") or "")[:10],
}
sorted_comps = sorted(comps, key=lambda c: c.get("finalized_at") or "", reverse=True)
slim_comps = [_slim_comp(c) for c in sorted_comps[:15]]
score = item.get("score") or {}
summary = item.get("summary") or {}
price_history = item.get("price_history") or []
cache_age = item.get("cache_age")
# Keep full score breakdown — 12 dimensions + nearby_transit = ~220 bytes, all signal.
# Drop nothing from scores.
slim_score = {k: v for k, v in score.items()}
eiendom: dict | None = None
if eu:
eiendom = {
"unit_code": eu.get("unit_code"),
"usable_area": eu.get("usable_area"),
"estimated_price": eu.get("estimated_selling_price"),
"estimated_range": [
eu.get("estimated_selling_price_lower"),
eu.get("estimated_selling_price_upper"),
],
"listing_sqm_price": eu.get("listing_sqm_price"),
"market_placement": eu.get("market_placement_score"),
"sale_status": eu.get("sale_status"),
"days_on_market": eu.get("days_on_market"),
"avg_comp_sqm_price": avg_comp_sqm,
"comp_count": len(comps),
}
return {
"rank": rank,
@@ -134,17 +119,59 @@ def _slim_listing(rank: int, item: dict) -> dict:
"has_parking": item.get("has_parking"),
"has_garage": item.get("has_garage"),
"eiendom_unit_code": item.get("eiendom_unit_code"),
"score": slim_score,
"score": dict(score),
"categories": item.get("categories"),
"why_interesting": summary.get("why_interesting"),
"risks": summary.get("risks"),
"cache_age": cache_age,
"cache_age": item.get("cache_age"),
"price_history": price_history[:5], # Last 5 price records
"eiendom": eiendom,
"similar_units": slim_comps,
"eiendom": _slim_eiendom(eu, comps) if eu else None,
"similar_units": _slim_comps(comps),
}
def _slim_analyze_ad(result: dict) -> dict:
"""Shape the single-ad analyze_ad result for MCP output.
The service returns {ad: FinnAd, eiendom_unit: EiendomUnit, similar_units: [...]}.
Flatten the ad fields up, keep listing_description, attach slim eiendom + comps,
and strip unit_images / unit_vector / lat / lng / internal timestamps.
"""
ad = result.get("ad") or {}
eu = result.get("eiendom_unit") or {}
comps = result.get("similar_units") or []
out: dict[str, Any] = {
"finnkode": ad.get("finnkode"),
"url": ad.get("url"),
"title": ad.get("title"),
"address": ad.get("address"),
"district": ad.get("district"),
"listing_description": ad.get("listing_description"),
"property_type": ad.get("property_type"),
"ownership_type": ad.get("ownership_type"),
"floor": ad.get("floor"),
"area_m2": ad.get("area_m2"),
"rooms": ad.get("rooms"),
"bedrooms": ad.get("bedrooms"),
"total_price": ad.get("total_price"),
"asking_price": ad.get("asking_price"),
"shared_debt": ad.get("shared_debt"),
"common_costs": ad.get("common_costs"),
"construction_year": ad.get("construction_year"),
"energy_rating": ad.get("energy_rating"),
"has_balcony": ad.get("has_balcony"),
"has_terrace": ad.get("has_terrace"),
"has_elevator": ad.get("has_elevator"),
"has_parking": ad.get("has_parking"),
"has_garage": ad.get("has_garage"),
"eiendom_unit_code": ad.get("eiendom_unit_code"),
"eiendom": _slim_eiendom(eu, comps) if eu else None,
"similar_units": _slim_comps(comps),
}
return out
def _build_slim_search_result(full: dict) -> dict:
"""Convert full analyze_search output to a compact MCP-safe response.
@@ -152,8 +179,7 @@ def _build_slim_search_result(full: dict) -> dict:
listings. Target: <200KB for 30 analyzed ads.
"""
listings = [
_slim_listing(rank + 1, item)
for rank, item in enumerate(full.get("analysis") or [])
_slim_listing(rank + 1, item) for rank, item in enumerate(full.get("analysis") or [])
]
return {
"search_url": full.get("search_url"),
@@ -208,65 +234,6 @@ async def finn_analyze_search(
return json.dumps({"error": True, "message": str(e)})
@mcp.tool(
description=(
"Fetch full detail for a FINN listing by finnkode."
" Checks cache first; use force_refresh=True to bypass."
)
)
async def finn_get_ad(finnkode: str, force_refresh: bool = False) -> str:
"""Fetch FINN ad details by finnkode."""
try:
ad = await get_or_fetch_ad(finnkode, force_refresh=force_refresh)
return ad.model_dump_json()
except Exception as e:
logger.error(f"Error fetching ad {finnkode}: {e}")
return json.dumps({"error": True, "message": str(e)})
@mcp.tool(
description="Resolve an Eiendom.no unit_code from a FINN listing URL. "
"Returns unit_code, address, lat, lng or an error if not found."
)
async def finn_resolve_eiendom_unit(finn_url: str) -> str:
"""Resolve Eiendom.no unit from FINN URL."""
try:
unit = await search_unit_from_finn_url(finn_url)
if unit is None:
return json.dumps(
{
"error": True,
"message": "Eiendom.no unit could not be resolved from FINN URL",
}
)
return json.dumps(
{
"unit_code": unit.unit_code,
"address": unit.address,
"lat": unit.lat,
"lng": unit.lng,
}
)
except Exception as e:
logger.error(f"Error resolving unit from {finn_url}: {e}")
return json.dumps({"error": True, "message": str(e)})
@mcp.tool(
description="Fetch full Eiendom.no unit data by unit_code. Checks SQLite cache (24h TTL)."
)
async def finn_get_eiendom_unit(unit_code: str, force_refresh: bool = False) -> str:
"""Fetch Eiendom.no unit details by unit_code."""
try:
unit = await get_or_fetch_eiendom_unit(unit_code, force_refresh=force_refresh)
if unit is None:
return json.dumps({"error": True, "message": "Eiendom.no unit not found"})
return unit.model_dump_json()
except Exception as e:
logger.error(f"Error fetching unit {unit_code}: {e}")
return json.dumps({"error": True, "message": str(e)})
@mcp.tool(
description=(
"Fetch and analyze unit images for visual assessment of a property. "
@@ -305,6 +272,7 @@ async def finn_analyze_unit_images(
# within the 1MB MCP tool result limit across multiple images.
from PIL import Image
import io
img = Image.open(io.BytesIO(resp.content))
img.thumbnail((1024, 1024), Image.LANCZOS)
if img.mode in ("RGBA", "P"):
@@ -326,50 +294,6 @@ async def finn_analyze_unit_images(
return [TextContent(type="text", text=json.dumps({"error": True, "message": str(e)}))]
@mcp.tool(
description="Fetch comparable recently-sold or for-sale units from Eiendom.no using a "
"base64-encoded unit vector. Returns list of similar units with sale prices."
)
async def finn_get_similar_units(unit_vector: str, listing_status: str = "RECENTLY_SOLD") -> str:
"""Fetch similar units from Eiendom.no."""
try:
units = await get_similar_units(unit_vector, listing_status)
return json.dumps([unit.model_dump() for unit in units], default=str)
except Exception as e:
logger.error(f"Error fetching similar units: {e}")
return json.dumps({"error": True, "message": str(e)})
@mcp.tool(
description="Build a base64-encoded unit vector for a given Eiendom.no unit_code. "
"The vector is used as input to finn_get_similar_units."
)
async def finn_build_unit_vector(unit_code: str) -> str:
"""Build unit vector for Eiendom.no unit."""
try:
unit = await get_unit(unit_code)
if unit is None:
return json.dumps({"error": True, "message": "Eiendom.no unit not found"})
return json.dumps({"unit_code": unit.unit_code, "unit_vector": build_unit_vector(unit)})
except Exception as e:
logger.error(f"Error building unit vector for {unit_code}: {e}")
return json.dumps({"error": True, "message": str(e)})
@mcp.tool(
description="Decode a base64 unit vector into human-readable JSON (lat, lon, property type, "
"floor, rooms, construction year, area, price)."
)
def finn_decode_unit_vector(unit_vector: str) -> str:
"""Decode unit vector to readable format."""
try:
result = decode_unit_vector(unit_vector)
return json.dumps(result)
except Exception as e:
logger.error(f"Error decoding unit vector: {e}")
return json.dumps({"error": True, "message": str(e)})
# ============================================================================
# Additional analysis and enrichment tools
# ============================================================================
@@ -377,79 +301,33 @@ def finn_decode_unit_vector(unit_vector: str) -> str:
@mcp.tool(
description=(
"Fetch and enrich a single FINN ad with optional Eiendom.no data and comparable units."
"Deep-dive one or more FINN listings. Accepts a single finnkode or a list "
"(batched in one call). Always enriches with Eiendom.no data and comparable "
"sold units. Returns listing_description plus slim eiendom/comps; excludes "
"image URLs and internal vectors (use finn_analyze_unit_images for visuals)."
)
)
async def finn_analyze_ad(
finnkode: str,
include_eiendom_no: bool = True,
include_similar_units: bool = False,
) -> str:
"""Analyze and enrich a single FINN ad."""
try:
result = await analyze_ad(
finnkode,
include_eiendom_no=include_eiendom_no,
include_similar_units=include_similar_units,
)
return json.dumps(result, default=str)
except Exception as e:
logger.error(f"Error analyzing ad {finnkode}: {e}")
return json.dumps({"error": True, "message": str(e)})
async def finn_analyze_ad(finnkode: str | list[str]) -> str:
"""Analyze and enrich one or more FINN ads. Batch input returns a list."""
finnkoder = [finnkode] if isinstance(finnkode, str) else list(finnkode)
async def _one(fk: str) -> dict:
try:
result = await analyze_ad(
fk,
include_eiendom_no=True,
include_similar_units=True,
)
return _slim_analyze_ad(result)
except Exception as e: # noqa: BLE001 — per-item isolation, batch must not abort
logger.error(f"Error analyzing ad {fk}: {e}")
return {"finnkode": fk, "error": True, "message": str(e)}
@mcp.tool(
description=(
"Evaluate one FINN listing against comparable recently-sold properties from Eiendom.no."
)
)
async def finn_analyze_ad_against_comps(
finnkode: str, listing_status: str = "RECENTLY_SOLD"
) -> str:
"""Analyze ad against comparable sales."""
try:
result = await analyze_ad_against_comps(finnkode, listing_status=listing_status)
return json.dumps(result, default=str)
except Exception as e:
logger.error(f"Error analyzing ad {finnkode} against comps: {e}")
return json.dumps({"error": True, "message": str(e)})
results = await asyncio.gather(*[_one(fk) for fk in finnkoder])
@mcp.tool(
description=(
"Find properties similar to a listing the user has liked. "
"Requires that the user has marked the listing with verdict='liked'."
)
)
async def finn_find_similar_to_liked_ad(
finnkode: str, mode: str = "recommendations", listing_status: str = "FOR_SALE"
) -> str:
"""Find properties similar to a liked ad."""
try:
result = await find_similar_to_liked(finnkode, mode=mode, listing_status=listing_status)
return render_similar_units(result, "json")
except Exception as e:
logger.error(f"Error finding similar to {finnkode}: {e}")
return json.dumps({"error": True, "message": str(e)})
@mcp.tool(description="Compare multiple FINN listings side by side with optional enrichment.")
async def finn_compare_ads(
finnkoder: list[str],
include_eiendom_no: bool = True,
include_comps: bool = True,
) -> str:
"""Compare multiple ads."""
try:
result = await compare_ads(
finnkoder,
include_eiendom_no=include_eiendom_no,
include_comps=include_comps,
)
return render_comparison(result, "json")
except Exception as e:
logger.error(f"Error comparing ads: {e}")
return json.dumps({"error": True, "message": str(e)})
# Single string input → single object; list input → list (preserves order).
payload: Any = results[0] if isinstance(finnkode, str) else results
return json.dumps(payload, default=str)
@mcp.tool(
@@ -467,13 +345,13 @@ async def finn_save_feedback(finnkode: str, verdict: str, notes: str | None = No
@mcp.tool(
description="Fetch the stored shortlist from a previous search run. "
"Returns the ranked listings with all enrichment data."
description="Fetch the shortlist of listings you have given a verdict "
"(liked, disliked, maybe, visited). Enriched with cached score and price data."
)
def finn_get_shortlist(run_id: int | None = None, limit: int = 10) -> str:
"""Get stored shortlist."""
def finn_get_shortlist(verdict: str = "liked", limit: int = 10) -> str:
"""Get stored shortlist filtered by verdict."""
try:
result = get_shortlist(run_id, limit)
result = get_shortlist(verdict, limit)
return render_shortlist(result, "json")
except Exception as e:
logger.error(f"Error fetching shortlist: {e}")
@@ -502,4 +380,4 @@ def main() -> None:
if __name__ == "__main__":
main()
main()