Refactor and enhance various components of the FINN real estate analysis tool

- Updated docker-compose files to use local data volumes for development. - Refactored analysis.py to improve code readability and performance, including changes to cache age calculations and hash computations. - Enhanced cache.py to ensure the database directory is created if it doesn't exist and improved SQL query formatting. - Modified cli.py to improve logging and statistics reporting for finn_ads. - Updated config.py to streamline environment variable handling. - Initialized the database eagerly in http_server.py to prevent runtime errors. - Refactored mcp_server.py to slim down data structures and improve response formatting for API calls. - Enhanced service.py to improve feedback handling and shortlist retrieval, ensuring enriched data is returned. - Updated recompute_analysis_cache.py for better SQL query formatting.
2026-05-29 15:17:11 +00:00
parent 55d93894ac
commit eb95b98111
10 changed files with 295 additions and 343 deletions
@@ -11,30 +11,14 @@ from mcp.server.transport_security import TransportSecuritySettings
 from mcp.server.fastmcp import Context, FastMCP
 from mcp.types import ImageContent, TextContent

-from .eiendom_no import (
-    build_unit_vector,
-    decode_unit_vector,
-    get_similar_units,
-    get_unit,
-    search_unit_from_finn_url,
-)
 from .formatting import (
-    render_ad,
-    render_comparison,
    render_diff,
    render_shortlist,
-    render_similar_units,
-    render_unit_images,
 )
 from .service import (
    analyze_ad,
-    analyze_ad_against_comps,
    analyze_search,
-    compare_ads,
-    find_similar_to_liked,
    get_new_ads_since_last_run,
-    get_or_fetch_ad,
-    get_or_fetch_eiendom_unit,
    get_shortlist,
    get_unit_images,
    save_feedback,
@@ -48,6 +32,55 @@ logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------


+def _slim_comp(c: dict) -> dict:
+    """Drop internal IDs, coords, redundant status fields from a comparable unit."""
+    return {
+        "unit_code": c.get("unit_code"),
+        "address": c.get("address"),
+        "usable_area": c.get("usable_area"),
+        "rooms": c.get("rooms"),
+        "floor": c.get("floor"),
+        "construction_year": c.get("construction_year"),
+        "listing_price": c.get("listing_price"),
+        "selling_price": c.get("selling_price"),
+        "shared_debt": c.get("shared_debt"),
+        "sqm_price": c.get("sqm_price"),
+        "common_costs": c.get("common_costs"),
+        "days_on_market": c.get("days_on_market"),
+        "finalized_at": (c.get("finalized_at") or "")[:10],
+    }
+
+
+def _slim_comps(comps: list[dict], keep: int = 15) -> list[dict]:
+    """Sort comps by recency, keep the N most recent — older comps lose relevance fast."""
+    sorted_comps = sorted(comps, key=lambda c: c.get("finalized_at") or "", reverse=True)
+    return [_slim_comp(c) for c in sorted_comps[:keep]]
+
+
+def _avg_comp_sqm(comps: list[dict]) -> int | None:
+    sqm_prices = [c["sqm_price"] for c in comps if c.get("sqm_price")]
+    return round(sum(sqm_prices) / len(sqm_prices)) if sqm_prices else None
+
+
+def _slim_eiendom(eu: dict, comps: list[dict]) -> dict:
+    """Compact Eiendom.no unit view. Drops unit_images, unit_vector, lat/lng, timestamps."""
+    return {
+        "unit_code": eu.get("unit_code"),
+        "usable_area": eu.get("usable_area"),
+        "estimated_price": eu.get("estimated_selling_price"),
+        "estimated_range": [
+            eu.get("estimated_selling_price_lower"),
+            eu.get("estimated_selling_price_upper"),
+        ],
+        "listing_sqm_price": eu.get("listing_sqm_price"),
+        "market_placement": eu.get("market_placement_score"),
+        "sale_status": eu.get("sale_status"),
+        "days_on_market": eu.get("days_on_market"),
+        "avg_comp_sqm_price": _avg_comp_sqm(comps),
+        "comp_count": len(comps),
+    }
+
+
 def _slim_listing(rank: int, item: dict) -> dict:
    """Collapse one full analyze_ad result into a compact listing card.

@@ -57,57 +90,9 @@ def _slim_listing(rank: int, item: dict) -> dict:
    """
    eu = item.get("eiendom_unit") or {}
    comps = item.get("similar_units") or []
-    sqm_prices = [c["sqm_price"] for c in comps if c.get("sqm_price")]
-    avg_comp_sqm = round(sum(sqm_prices) / len(sqm_prices)) if sqm_prices else None
-
-    # Slim comps: drop internal IDs, coords, redundant status fields.
-    # Sort by recency, keep 15 most recent — older comps lose relevance fast.
-    def _slim_comp(c: dict) -> dict:
-        return {
-            "unit_code": c.get("unit_code"),
-            "address": c.get("address"),
-            "usable_area": c.get("usable_area"),
-            "rooms": c.get("rooms"),
-            "floor": c.get("floor"),
-            "construction_year": c.get("construction_year"),
-            "listing_price": c.get("listing_price"),
-            "selling_price": c.get("selling_price"),
-            "shared_debt": c.get("shared_debt"),
-            "sqm_price": c.get("sqm_price"),
-            "common_costs": c.get("common_costs"),
-            "days_on_market": c.get("days_on_market"),
-            "finalized_at": (c.get("finalized_at") or "")[:10],
-        }
-
-    sorted_comps = sorted(comps, key=lambda c: c.get("finalized_at") or "", reverse=True)
-    slim_comps = [_slim_comp(c) for c in sorted_comps[:15]]
-
    score = item.get("score") or {}
    summary = item.get("summary") or {}
    price_history = item.get("price_history") or []
-    cache_age = item.get("cache_age")
-
-    # Keep full score breakdown — 12 dimensions + nearby_transit = ~220 bytes, all signal.
-    # Drop nothing from scores.
-    slim_score = {k: v for k, v in score.items()}
-
-    eiendom: dict | None = None
-    if eu:
-        eiendom = {
-            "unit_code": eu.get("unit_code"),
-            "usable_area": eu.get("usable_area"),
-            "estimated_price": eu.get("estimated_selling_price"),
-            "estimated_range": [
-                eu.get("estimated_selling_price_lower"),
-                eu.get("estimated_selling_price_upper"),
-            ],
-            "listing_sqm_price": eu.get("listing_sqm_price"),
-            "market_placement": eu.get("market_placement_score"),
-            "sale_status": eu.get("sale_status"),
-            "days_on_market": eu.get("days_on_market"),
-            "avg_comp_sqm_price": avg_comp_sqm,
-            "comp_count": len(comps),
-        }

    return {
        "rank": rank,
@@ -134,17 +119,59 @@ def _slim_listing(rank: int, item: dict) -> dict:
        "has_parking": item.get("has_parking"),
        "has_garage": item.get("has_garage"),
        "eiendom_unit_code": item.get("eiendom_unit_code"),
-        "score": slim_score,
+        "score": dict(score),
        "categories": item.get("categories"),
        "why_interesting": summary.get("why_interesting"),
        "risks": summary.get("risks"),
-        "cache_age": cache_age,
+        "cache_age": item.get("cache_age"),
        "price_history": price_history[:5],  # Last 5 price records
-        "eiendom": eiendom,
-        "similar_units": slim_comps,
+        "eiendom": _slim_eiendom(eu, comps) if eu else None,
+        "similar_units": _slim_comps(comps),
    }


+def _slim_analyze_ad(result: dict) -> dict:
+    """Shape the single-ad analyze_ad result for MCP output.
+
+    The service returns {ad: FinnAd, eiendom_unit: EiendomUnit, similar_units: [...]}.
+    Flatten the ad fields up, keep listing_description, attach slim eiendom + comps,
+    and strip unit_images / unit_vector / lat / lng / internal timestamps.
+    """
+    ad = result.get("ad") or {}
+    eu = result.get("eiendom_unit") or {}
+    comps = result.get("similar_units") or []
+
+    out: dict[str, Any] = {
+        "finnkode": ad.get("finnkode"),
+        "url": ad.get("url"),
+        "title": ad.get("title"),
+        "address": ad.get("address"),
+        "district": ad.get("district"),
+        "listing_description": ad.get("listing_description"),
+        "property_type": ad.get("property_type"),
+        "ownership_type": ad.get("ownership_type"),
+        "floor": ad.get("floor"),
+        "area_m2": ad.get("area_m2"),
+        "rooms": ad.get("rooms"),
+        "bedrooms": ad.get("bedrooms"),
+        "total_price": ad.get("total_price"),
+        "asking_price": ad.get("asking_price"),
+        "shared_debt": ad.get("shared_debt"),
+        "common_costs": ad.get("common_costs"),
+        "construction_year": ad.get("construction_year"),
+        "energy_rating": ad.get("energy_rating"),
+        "has_balcony": ad.get("has_balcony"),
+        "has_terrace": ad.get("has_terrace"),
+        "has_elevator": ad.get("has_elevator"),
+        "has_parking": ad.get("has_parking"),
+        "has_garage": ad.get("has_garage"),
+        "eiendom_unit_code": ad.get("eiendom_unit_code"),
+        "eiendom": _slim_eiendom(eu, comps) if eu else None,
+        "similar_units": _slim_comps(comps),
+    }
+    return out
+
+
 def _build_slim_search_result(full: dict) -> dict:
    """Convert full analyze_search output to a compact MCP-safe response.

@@ -152,8 +179,7 @@ def _build_slim_search_result(full: dict) -> dict:
    listings. Target: <200KB for 30 analyzed ads.
    """
    listings = [
-        _slim_listing(rank + 1, item)
-        for rank, item in enumerate(full.get("analysis") or [])
+        _slim_listing(rank + 1, item) for rank, item in enumerate(full.get("analysis") or [])
    ]
    return {
        "search_url": full.get("search_url"),
@@ -208,65 +234,6 @@ async def finn_analyze_search(
        return json.dumps({"error": True, "message": str(e)})


-@mcp.tool(
-    description=(
-        "Fetch full detail for a FINN listing by finnkode."
-        " Checks cache first; use force_refresh=True to bypass."
-    )
-)
-async def finn_get_ad(finnkode: str, force_refresh: bool = False) -> str:
-    """Fetch FINN ad details by finnkode."""
-    try:
-        ad = await get_or_fetch_ad(finnkode, force_refresh=force_refresh)
-        return ad.model_dump_json()
-    except Exception as e:
-        logger.error(f"Error fetching ad {finnkode}: {e}")
-        return json.dumps({"error": True, "message": str(e)})
-
-
-@mcp.tool(
-    description="Resolve an Eiendom.no unit_code from a FINN listing URL. "
-    "Returns unit_code, address, lat, lng or an error if not found."
-)
-async def finn_resolve_eiendom_unit(finn_url: str) -> str:
-    """Resolve Eiendom.no unit from FINN URL."""
-    try:
-        unit = await search_unit_from_finn_url(finn_url)
-        if unit is None:
-            return json.dumps(
-                {
-                    "error": True,
-                    "message": "Eiendom.no unit could not be resolved from FINN URL",
-                }
-            )
-        return json.dumps(
-            {
-                "unit_code": unit.unit_code,
-                "address": unit.address,
-                "lat": unit.lat,
-                "lng": unit.lng,
-            }
-        )
-    except Exception as e:
-        logger.error(f"Error resolving unit from {finn_url}: {e}")
-        return json.dumps({"error": True, "message": str(e)})
-
-
-@mcp.tool(
-    description="Fetch full Eiendom.no unit data by unit_code. Checks SQLite cache (24h TTL)."
-)
-async def finn_get_eiendom_unit(unit_code: str, force_refresh: bool = False) -> str:
-    """Fetch Eiendom.no unit details by unit_code."""
-    try:
-        unit = await get_or_fetch_eiendom_unit(unit_code, force_refresh=force_refresh)
-        if unit is None:
-            return json.dumps({"error": True, "message": "Eiendom.no unit not found"})
-        return unit.model_dump_json()
-    except Exception as e:
-        logger.error(f"Error fetching unit {unit_code}: {e}")
-        return json.dumps({"error": True, "message": str(e)})
-
-
@mcp.tool(
    description=(
        "Fetch and analyze unit images for visual assessment of a property. "
@@ -305,6 +272,7 @@ async def finn_analyze_unit_images(
                # within the 1MB MCP tool result limit across multiple images.
                from PIL import Image
                import io
+
                img = Image.open(io.BytesIO(resp.content))
                img.thumbnail((1024, 1024), Image.LANCZOS)
                if img.mode in ("RGBA", "P"):
@@ -326,50 +294,6 @@ async def finn_analyze_unit_images(
        return [TextContent(type="text", text=json.dumps({"error": True, "message": str(e)}))]


-@mcp.tool(
-    description="Fetch comparable recently-sold or for-sale units from Eiendom.no using a "
-    "base64-encoded unit vector. Returns list of similar units with sale prices."
-)
-async def finn_get_similar_units(unit_vector: str, listing_status: str = "RECENTLY_SOLD") -> str:
-    """Fetch similar units from Eiendom.no."""
-    try:
-        units = await get_similar_units(unit_vector, listing_status)
-        return json.dumps([unit.model_dump() for unit in units], default=str)
-    except Exception as e:
-        logger.error(f"Error fetching similar units: {e}")
-        return json.dumps({"error": True, "message": str(e)})
-
-
-@mcp.tool(
-    description="Build a base64-encoded unit vector for a given Eiendom.no unit_code. "
-    "The vector is used as input to finn_get_similar_units."
-)
-async def finn_build_unit_vector(unit_code: str) -> str:
-    """Build unit vector for Eiendom.no unit."""
-    try:
-        unit = await get_unit(unit_code)
-        if unit is None:
-            return json.dumps({"error": True, "message": "Eiendom.no unit not found"})
-        return json.dumps({"unit_code": unit.unit_code, "unit_vector": build_unit_vector(unit)})
-    except Exception as e:
-        logger.error(f"Error building unit vector for {unit_code}: {e}")
-        return json.dumps({"error": True, "message": str(e)})
-
-
-@mcp.tool(
-    description="Decode a base64 unit vector into human-readable JSON (lat, lon, property type, "
-    "floor, rooms, construction year, area, price)."
-)
-def finn_decode_unit_vector(unit_vector: str) -> str:
-    """Decode unit vector to readable format."""
-    try:
-        result = decode_unit_vector(unit_vector)
-        return json.dumps(result)
-    except Exception as e:
-        logger.error(f"Error decoding unit vector: {e}")
-        return json.dumps({"error": True, "message": str(e)})
-
-
 # ============================================================================
 # Additional analysis and enrichment tools
 # ============================================================================
@@ -377,79 +301,33 @@ def finn_decode_unit_vector(unit_vector: str) -> str:

@mcp.tool(
    description=(
-        "Fetch and enrich a single FINN ad with optional Eiendom.no data and comparable units."
+        "Deep-dive one or more FINN listings. Accepts a single finnkode or a list "
+        "(batched in one call). Always enriches with Eiendom.no data and comparable "
+        "sold units. Returns listing_description plus slim eiendom/comps; excludes "
+        "image URLs and internal vectors (use finn_analyze_unit_images for visuals)."
    )
 )
-async def finn_analyze_ad(
-    finnkode: str,
-    include_eiendom_no: bool = True,
-    include_similar_units: bool = False,
-) -> str:
-    """Analyze and enrich a single FINN ad."""
-    try:
-        result = await analyze_ad(
-            finnkode,
-            include_eiendom_no=include_eiendom_no,
-            include_similar_units=include_similar_units,
-        )
-        return json.dumps(result, default=str)
-    except Exception as e:
-        logger.error(f"Error analyzing ad {finnkode}: {e}")
-        return json.dumps({"error": True, "message": str(e)})
+async def finn_analyze_ad(finnkode: str | list[str]) -> str:
+    """Analyze and enrich one or more FINN ads. Batch input returns a list."""
+    finnkoder = [finnkode] if isinstance(finnkode, str) else list(finnkode)

+    async def _one(fk: str) -> dict:
+        try:
+            result = await analyze_ad(
+                fk,
+                include_eiendom_no=True,
+                include_similar_units=True,
+            )
+            return _slim_analyze_ad(result)
+        except Exception as e:  # noqa: BLE001 — per-item isolation, batch must not abort
+            logger.error(f"Error analyzing ad {fk}: {e}")
+            return {"finnkode": fk, "error": True, "message": str(e)}

-@mcp.tool(
-    description=(
-        "Evaluate one FINN listing against comparable recently-sold properties from Eiendom.no."
-    )
-)
-async def finn_analyze_ad_against_comps(
-    finnkode: str, listing_status: str = "RECENTLY_SOLD"
-) -> str:
-    """Analyze ad against comparable sales."""
-    try:
-        result = await analyze_ad_against_comps(finnkode, listing_status=listing_status)
-        return json.dumps(result, default=str)
-    except Exception as e:
-        logger.error(f"Error analyzing ad {finnkode} against comps: {e}")
-        return json.dumps({"error": True, "message": str(e)})
+    results = await asyncio.gather(*[_one(fk) for fk in finnkoder])

-
-@mcp.tool(
-    description=(
-        "Find properties similar to a listing the user has liked. "
-        "Requires that the user has marked the listing with verdict='liked'."
-    )
-)
-async def finn_find_similar_to_liked_ad(
-    finnkode: str, mode: str = "recommendations", listing_status: str = "FOR_SALE"
-) -> str:
-    """Find properties similar to a liked ad."""
-    try:
-        result = await find_similar_to_liked(finnkode, mode=mode, listing_status=listing_status)
-        return render_similar_units(result, "json")
-    except Exception as e:
-        logger.error(f"Error finding similar to {finnkode}: {e}")
-        return json.dumps({"error": True, "message": str(e)})
-
-
-@mcp.tool(description="Compare multiple FINN listings side by side with optional enrichment.")
-async def finn_compare_ads(
-    finnkoder: list[str],
-    include_eiendom_no: bool = True,
-    include_comps: bool = True,
-) -> str:
-    """Compare multiple ads."""
-    try:
-        result = await compare_ads(
-            finnkoder,
-            include_eiendom_no=include_eiendom_no,
-            include_comps=include_comps,
-        )
-        return render_comparison(result, "json")
-    except Exception as e:
-        logger.error(f"Error comparing ads: {e}")
-        return json.dumps({"error": True, "message": str(e)})
+    # Single string input → single object; list input → list (preserves order).
+    payload: Any = results[0] if isinstance(finnkode, str) else results
+    return json.dumps(payload, default=str)


@mcp.tool(
@@ -467,13 +345,13 @@ async def finn_save_feedback(finnkode: str, verdict: str, notes: str | None = No


@mcp.tool(
-    description="Fetch the stored shortlist from a previous search run. "
-    "Returns the ranked listings with all enrichment data."
+    description="Fetch the shortlist of listings you have given a verdict "
+    "(liked, disliked, maybe, visited). Enriched with cached score and price data."
 )
-def finn_get_shortlist(run_id: int | None = None, limit: int = 10) -> str:
-    """Get stored shortlist."""
+def finn_get_shortlist(verdict: str = "liked", limit: int = 10) -> str:
+    """Get stored shortlist filtered by verdict."""
    try:
-        result = get_shortlist(run_id, limit)
+        result = get_shortlist(verdict, limit)
        return render_shortlist(result, "json")
    except Exception as e:
        logger.error(f"Error fetching shortlist: {e}")
@@ -502,4 +380,4 @@ def main() -> None:


 if __name__ == "__main__":
-    main()
+    main()