Enhance analysis functionality with parallel fetching and response shaping; add image processing for unit images

2026-05-26 20:50:58 +00:00
parent 2933b8c1ea
commit 5b772b2ae5
4 changed files with 300 additions and 49 deletions
@@ -1,11 +1,15 @@
 """FastMCP stdio server for FINN real estate analysis and Eiendom.no enrichment."""

+import base64
 import json
 import logging
 from typing import Any
 import os
+import asyncio
+import httpx
 from mcp.server.transport_security import TransportSecuritySettings
-from mcp.server.fastmcp import FastMCP
+from mcp.server.fastmcp import Context, FastMCP
+from mcp.types import ImageContent, TextContent

 from .eiendom_no import (
    build_unit_vector,
@@ -39,6 +43,120 @@ from .service import (
 logger = logging.getLogger(__name__)


+# ---------------------------------------------------------------------------
+# Response shaping
+# ---------------------------------------------------------------------------
+
+
+def _slim_listing(rank: int, item: dict) -> dict:
+    """Collapse one full analyze_ad result into a compact listing card.
+
+    Drops: listing_description, unit_images, unit_vector, all timestamps,
+    full similar_units list, score dimension breakdown.
+    Derives: avg_comp_sqm_price from similar_units.
+    """
+    eu = item.get("eiendom_unit") or {}
+    comps = item.get("similar_units") or []
+    sqm_prices = [c["sqm_price"] for c in comps if c.get("sqm_price")]
+    avg_comp_sqm = round(sum(sqm_prices) / len(sqm_prices)) if sqm_prices else None
+
+    # Slim comps: drop internal IDs, coords, redundant status fields.
+    # Sort by recency, keep 15 most recent — older comps lose relevance fast.
+    def _slim_comp(c: dict) -> dict:
+        return {
+            "unit_code": c.get("unit_code"),
+            "address": c.get("address"),
+            "usable_area": c.get("usable_area"),
+            "rooms": c.get("rooms"),
+            "floor": c.get("floor"),
+            "construction_year": c.get("construction_year"),
+            "listing_price": c.get("listing_price"),
+            "selling_price": c.get("selling_price"),
+            "shared_debt": c.get("shared_debt"),
+            "sqm_price": c.get("sqm_price"),
+            "common_costs": c.get("common_costs"),
+            "days_on_market": c.get("days_on_market"),
+            "finalized_at": (c.get("finalized_at") or "")[:10],
+        }
+
+    sorted_comps = sorted(comps, key=lambda c: c.get("finalized_at") or "", reverse=True)
+    slim_comps = [_slim_comp(c) for c in sorted_comps[:15]]
+
+    score = item.get("score") or {}
+    summary = item.get("summary") or {}
+
+    # Keep full score breakdown — 12 dimensions + nearby_transit = ~220 bytes, all signal.
+    # Drop nothing from scores.
+    slim_score = {k: v for k, v in score.items()}
+
+    eiendom: dict | None = None
+    if eu:
+        eiendom = {
+            "unit_code": eu.get("unit_code"),
+            "usable_area": eu.get("usable_area"),
+            "estimated_price": eu.get("estimated_selling_price"),
+            "estimated_range": [
+                eu.get("estimated_selling_price_lower"),
+                eu.get("estimated_selling_price_upper"),
+            ],
+            "listing_sqm_price": eu.get("listing_sqm_price"),
+            "market_placement": eu.get("market_placement_score"),
+            "sale_status": eu.get("sale_status"),
+            "days_on_market": eu.get("days_on_market"),
+            "avg_comp_sqm_price": avg_comp_sqm,
+            "comp_count": len(comps),
+        }
+
+    return {
+        "rank": rank,
+        "finnkode": item.get("finnkode"),
+        "url": item.get("url"),
+        "title": item.get("title"),
+        "address": item.get("address"),
+        "district": item.get("district"),
+        "property_type": item.get("property_type"),
+        "ownership_type": item.get("ownership_type"),
+        "floor": item.get("floor"),
+        "area_m2": item.get("area_m2"),
+        "bedrooms": item.get("bedrooms"),
+        "rooms": item.get("rooms"),
+        "total_price": item.get("total_price"),
+        "asking_price": item.get("asking_price"),
+        "shared_debt": item.get("shared_debt"),
+        "common_costs": item.get("common_costs"),
+        "construction_year": item.get("construction_year"),
+        "has_balcony": item.get("has_balcony"),
+        "has_terrace": item.get("has_terrace"),
+        "has_elevator": item.get("has_elevator"),
+        "has_parking": item.get("has_parking"),
+        "has_garage": item.get("has_garage"),
+        "eiendom_unit_code": item.get("eiendom_unit_code"),
+        "score": slim_score,
+        "categories": item.get("categories"),
+        "why_interesting": summary.get("why_interesting"),
+        "risks": summary.get("risks"),
+        "eiendom": eiendom,
+        "similar_units": slim_comps,
+    }
+
+
+def _build_slim_search_result(full: dict) -> dict:
+    """Convert full analyze_search output to a compact MCP-safe response.
+
+    Removes search_cards (redundant), drops all fat fields from individual
+    listings. Target: <200KB for 30 analyzed ads.
+    """
+    listings = [
+        _slim_listing(rank + 1, item)
+        for rank, item in enumerate(full.get("analysis") or [])
+    ]
+    return {
+        "search_url": full.get("search_url"),
+        "summary": full.get("summary"),
+        "listings": listings,
+    }
+
+
 def _build_transport_security() -> TransportSecuritySettings:
    allowed = os.getenv("MCP_ALLOWED_HOSTS", "")
    if allowed:
@@ -57,10 +175,13 @@ mcp = FastMCP("finn_eiendom_mcp", transport_security=_build_transport_security()
    description=(
        "Analyze a FINN.no real estate search URL. Scrapes listing cards,"
        " fetches details, enriches with Eiendom.no data, scores, and ranks."
+        " Fetches all ads in parallel (phase 1) then scores from cache (phase 2)."
+        " Progress updates are emitted during phase 1."
    )
 )
 async def finn_analyze_search(
    search_url: str,
+    ctx: Context,
    max_pages: int = 3,
    detail_limit: int = 20,
    include_details: bool = True,
@@ -74,8 +195,9 @@ async def finn_analyze_search(
            include_details=include_details,
            detail_limit=detail_limit,
            include_eiendom_no=include_eiendom_no,
+            ctx=ctx,
        )
-        return json.dumps(result, default=str)
+        return json.dumps(_build_slim_search_result(result), default=str)
    except Exception as e:
        logger.error(f"Error analyzing search: {e}")
        return json.dumps({"error": True, "message": str(e)})
@@ -143,17 +265,60 @@ async def finn_get_eiendom_unit(unit_code: str, force_refresh: bool = False) ->
@mcp.tool(
    description=(
        "Fetch and analyze unit images for visual assessment of a property. "
-        "Returns property photos with metadata for evaluating views, condition, and layout."
+        "Downloads photos and returns them as visual image content so Claude can "
+        "directly assess views, condition, layout, kitchen/bathroom quality, and atmosphere."
    )
 )
-async def finn_analyze_unit_images(unit_code: str, force_refresh: bool = False) -> str:
-    """Fetch and return unit images for visual analysis."""
+async def finn_analyze_unit_images(
+    unit_code: str,
+    force_refresh: bool = False,
+    max_images: int = 8,
+) -> list:
+    """Fetch unit images and return as vision-compatible image content blocks."""
    try:
        result = await get_unit_images(unit_code, force_refresh=force_refresh)
-        return render_unit_images(result, "markdown")
+        all_urls = result.get("unit_images") or []
+        urls = all_urls[:max_images]
+
+        header = (
+            f"{result.get('address', unit_code)} | "
+            f"{result.get('rooms')} rom | "
+            f"{result.get('usable_area')}m² | "
+            f"{len(all_urls)} bilder totalt, viser {len(urls)}"
+        )
+        content: list = [TextContent(type="text", text=header)]
+
+        async def _fetch(url: str) -> ImageContent | None:
+            try:
+                async with httpx.AsyncClient(timeout=15) as client:
+                    resp = await client.get(url)
+                if resp.status_code != 200:
+                    return None
+
+                # Resize to max 1024px on longest side before encoding.
+                # Raw real estate photos are 2-4MB — must compress to stay
+                # within the 1MB MCP tool result limit across multiple images.
+                from PIL import Image
+                import io
+                img = Image.open(io.BytesIO(resp.content))
+                img.thumbnail((1024, 1024), Image.LANCZOS)
+                if img.mode in ("RGBA", "P"):
+                    img = img.convert("RGB")
+                buf = io.BytesIO()
+                img.save(buf, format="JPEG", quality=75, optimize=True)
+                b64 = base64.b64encode(buf.getvalue()).decode()
+                return ImageContent(type="image", data=b64, mimeType="image/jpeg")
+            except Exception as exc:
+                logger.warning("Failed to fetch/resize image %s: %s", url, exc)
+            return None
+
+        fetched = await asyncio.gather(*[_fetch(u) for u in urls])
+        content.extend(img for img in fetched if img is not None)
+        return content
+
    except Exception as e:
        logger.error(f"Error fetching unit images for {unit_code}: {e}")
-        return json.dumps({"error": True, "message": str(e)})
+        return [TextContent(type="text", text=json.dumps({"error": True, "message": str(e)}))]


@mcp.tool(
@@ -332,4 +497,4 @@ def main() -> None:


 if __name__ == "__main__":
-    main()
+    main()