Enhance analysis functionality with parallel fetching and response shaping; add image processing for unit images

This commit is contained in:
Ole
2026-05-26 20:50:58 +00:00
parent 2933b8c1ea
commit 5b772b2ae5
4 changed files with 300 additions and 49 deletions
+173 -8
View File
@@ -1,11 +1,15 @@
"""FastMCP stdio server for FINN real estate analysis and Eiendom.no enrichment."""
import base64
import json
import logging
from typing import Any
import os
import asyncio
import httpx
from mcp.server.transport_security import TransportSecuritySettings
from mcp.server.fastmcp import FastMCP
from mcp.server.fastmcp import Context, FastMCP
from mcp.types import ImageContent, TextContent
from .eiendom_no import (
build_unit_vector,
@@ -39,6 +43,120 @@ from .service import (
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Response shaping
# ---------------------------------------------------------------------------
def _slim_listing(rank: int, item: dict) -> dict:
"""Collapse one full analyze_ad result into a compact listing card.
Drops: listing_description, unit_images, unit_vector, all timestamps,
full similar_units list, score dimension breakdown.
Derives: avg_comp_sqm_price from similar_units.
"""
eu = item.get("eiendom_unit") or {}
comps = item.get("similar_units") or []
sqm_prices = [c["sqm_price"] for c in comps if c.get("sqm_price")]
avg_comp_sqm = round(sum(sqm_prices) / len(sqm_prices)) if sqm_prices else None
# Slim comps: drop internal IDs, coords, redundant status fields.
# Sort by recency, keep 15 most recent — older comps lose relevance fast.
def _slim_comp(c: dict) -> dict:
return {
"unit_code": c.get("unit_code"),
"address": c.get("address"),
"usable_area": c.get("usable_area"),
"rooms": c.get("rooms"),
"floor": c.get("floor"),
"construction_year": c.get("construction_year"),
"listing_price": c.get("listing_price"),
"selling_price": c.get("selling_price"),
"shared_debt": c.get("shared_debt"),
"sqm_price": c.get("sqm_price"),
"common_costs": c.get("common_costs"),
"days_on_market": c.get("days_on_market"),
"finalized_at": (c.get("finalized_at") or "")[:10],
}
sorted_comps = sorted(comps, key=lambda c: c.get("finalized_at") or "", reverse=True)
slim_comps = [_slim_comp(c) for c in sorted_comps[:15]]
score = item.get("score") or {}
summary = item.get("summary") or {}
# Keep full score breakdown — 12 dimensions + nearby_transit = ~220 bytes, all signal.
# Drop nothing from scores.
slim_score = {k: v for k, v in score.items()}
eiendom: dict | None = None
if eu:
eiendom = {
"unit_code": eu.get("unit_code"),
"usable_area": eu.get("usable_area"),
"estimated_price": eu.get("estimated_selling_price"),
"estimated_range": [
eu.get("estimated_selling_price_lower"),
eu.get("estimated_selling_price_upper"),
],
"listing_sqm_price": eu.get("listing_sqm_price"),
"market_placement": eu.get("market_placement_score"),
"sale_status": eu.get("sale_status"),
"days_on_market": eu.get("days_on_market"),
"avg_comp_sqm_price": avg_comp_sqm,
"comp_count": len(comps),
}
return {
"rank": rank,
"finnkode": item.get("finnkode"),
"url": item.get("url"),
"title": item.get("title"),
"address": item.get("address"),
"district": item.get("district"),
"property_type": item.get("property_type"),
"ownership_type": item.get("ownership_type"),
"floor": item.get("floor"),
"area_m2": item.get("area_m2"),
"bedrooms": item.get("bedrooms"),
"rooms": item.get("rooms"),
"total_price": item.get("total_price"),
"asking_price": item.get("asking_price"),
"shared_debt": item.get("shared_debt"),
"common_costs": item.get("common_costs"),
"construction_year": item.get("construction_year"),
"has_balcony": item.get("has_balcony"),
"has_terrace": item.get("has_terrace"),
"has_elevator": item.get("has_elevator"),
"has_parking": item.get("has_parking"),
"has_garage": item.get("has_garage"),
"eiendom_unit_code": item.get("eiendom_unit_code"),
"score": slim_score,
"categories": item.get("categories"),
"why_interesting": summary.get("why_interesting"),
"risks": summary.get("risks"),
"eiendom": eiendom,
"similar_units": slim_comps,
}
def _build_slim_search_result(full: dict) -> dict:
"""Convert full analyze_search output to a compact MCP-safe response.
Removes search_cards (redundant), drops all fat fields from individual
listings. Target: <200KB for 30 analyzed ads.
"""
listings = [
_slim_listing(rank + 1, item)
for rank, item in enumerate(full.get("analysis") or [])
]
return {
"search_url": full.get("search_url"),
"summary": full.get("summary"),
"listings": listings,
}
def _build_transport_security() -> TransportSecuritySettings:
allowed = os.getenv("MCP_ALLOWED_HOSTS", "")
if allowed:
@@ -57,10 +175,13 @@ mcp = FastMCP("finn_eiendom_mcp", transport_security=_build_transport_security()
description=(
"Analyze a FINN.no real estate search URL. Scrapes listing cards,"
" fetches details, enriches with Eiendom.no data, scores, and ranks."
" Fetches all ads in parallel (phase 1) then scores from cache (phase 2)."
" Progress updates are emitted during phase 1."
)
)
async def finn_analyze_search(
search_url: str,
ctx: Context,
max_pages: int = 3,
detail_limit: int = 20,
include_details: bool = True,
@@ -74,8 +195,9 @@ async def finn_analyze_search(
include_details=include_details,
detail_limit=detail_limit,
include_eiendom_no=include_eiendom_no,
ctx=ctx,
)
return json.dumps(result, default=str)
return json.dumps(_build_slim_search_result(result), default=str)
except Exception as e:
logger.error(f"Error analyzing search: {e}")
return json.dumps({"error": True, "message": str(e)})
@@ -143,17 +265,60 @@ async def finn_get_eiendom_unit(unit_code: str, force_refresh: bool = False) ->
@mcp.tool(
description=(
"Fetch and analyze unit images for visual assessment of a property. "
"Returns property photos with metadata for evaluating views, condition, and layout."
"Downloads photos and returns them as visual image content so Claude can "
"directly assess views, condition, layout, kitchen/bathroom quality, and atmosphere."
)
)
async def finn_analyze_unit_images(unit_code: str, force_refresh: bool = False) -> str:
"""Fetch and return unit images for visual analysis."""
async def finn_analyze_unit_images(
unit_code: str,
force_refresh: bool = False,
max_images: int = 8,
) -> list:
"""Fetch unit images and return as vision-compatible image content blocks."""
try:
result = await get_unit_images(unit_code, force_refresh=force_refresh)
return render_unit_images(result, "markdown")
all_urls = result.get("unit_images") or []
urls = all_urls[:max_images]
header = (
f"{result.get('address', unit_code)} | "
f"{result.get('rooms')} rom | "
f"{result.get('usable_area')}m² | "
f"{len(all_urls)} bilder totalt, viser {len(urls)}"
)
content: list = [TextContent(type="text", text=header)]
async def _fetch(url: str) -> ImageContent | None:
try:
async with httpx.AsyncClient(timeout=15) as client:
resp = await client.get(url)
if resp.status_code != 200:
return None
# Resize to max 1024px on longest side before encoding.
# Raw real estate photos are 2-4MB — must compress to stay
# within the 1MB MCP tool result limit across multiple images.
from PIL import Image
import io
img = Image.open(io.BytesIO(resp.content))
img.thumbnail((1024, 1024), Image.LANCZOS)
if img.mode in ("RGBA", "P"):
img = img.convert("RGB")
buf = io.BytesIO()
img.save(buf, format="JPEG", quality=75, optimize=True)
b64 = base64.b64encode(buf.getvalue()).decode()
return ImageContent(type="image", data=b64, mimeType="image/jpeg")
except Exception as exc:
logger.warning("Failed to fetch/resize image %s: %s", url, exc)
return None
fetched = await asyncio.gather(*[_fetch(u) for u in urls])
content.extend(img for img in fetched if img is not None)
return content
except Exception as e:
logger.error(f"Error fetching unit images for {unit_code}: {e}")
return json.dumps({"error": True, "message": str(e)})
return [TextContent(type="text", text=json.dumps({"error": True, "message": str(e)}))]
@mcp.tool(
@@ -332,4 +497,4 @@ def main() -> None:
if __name__ == "__main__":
main()
main()