From eb95b98111169c8ac7e38c149951740355bbcc0a Mon Sep 17 00:00:00 2001 From: Ole Date: Fri, 29 May 2026 15:17:11 +0000 Subject: [PATCH] Refactor and enhance various components of the FINN real estate analysis tool - Updated docker-compose files to use local data volumes for development. - Refactored analysis.py to improve code readability and performance, including changes to cache age calculations and hash computations. - Enhanced cache.py to ensure the database directory is created if it doesn't exist and improved SQL query formatting. - Modified cli.py to improve logging and statistics reporting for finn_ads. - Updated config.py to streamline environment variable handling. - Initialized the database eagerly in http_server.py to prevent runtime errors. - Refactored mcp_server.py to slim down data structures and improve response formatting for API calls. - Enhanced service.py to improve feedback handling and shortlist retrieval, ensuring enriched data is returned. - Updated recompute_analysis_cache.py for better SQL query formatting. --- docker-compose.prod.yml | 3 + docker-compose.yml | 11 +- finn_eiendom/analysis.py | 28 +-- finn_eiendom/cache.py | 137 ++++++---- finn_eiendom/cli.py | 6 +- finn_eiendom/config.py | 12 +- finn_eiendom/http_server.py | 5 + finn_eiendom/mcp_server.py | 372 ++++++++++------------------ finn_eiendom/service.py | 62 ++++- scripts/recompute_analysis_cache.py | 2 +- 10 files changed, 295 insertions(+), 343 deletions(-) diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index c686921..f5086f5 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -8,6 +8,9 @@ services: # Production image should be pre-built and tagged image: finn-mcp:latest + # TODO: Remove on actual production deployment + volumes: + - ./data:/app/data # Environment overrides for production environment: PYTHONUNBUFFERED: 1 diff --git a/docker-compose.yml b/docker-compose.yml index bc21a46..401db02 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -53,9 +53,10 @@ services: max-size: "10m" max-file: "3" volumes: - - finn-cache:/app/data + - ./data:/app/data + # - finn-cache:/app/data -volumes: - finn-cache: - # For development, you can override with: - # docker-compose -f docker-compose.yml -f docker-compose.override.yml up + # volumes: + # finn-cache: + # For development, you can override with: + # docker-compose -f docker-compose.yml -f docker-compose.override.yml up diff --git a/finn_eiendom/analysis.py b/finn_eiendom/analysis.py index 71af933..5a86f07 100644 --- a/finn_eiendom/analysis.py +++ b/finn_eiendom/analysis.py @@ -132,9 +132,7 @@ def _compute_deps_hash( """ ad_hash = get_finn_ad_hash(conn, finnkode) unit_hash = get_eiendom_unit_hash(conn, unit_code) if unit_code else None - comps_hash = ( - get_similar_units_hash(conn, unit_code, listing_status) if unit_code else None - ) + comps_hash = get_similar_units_hash(conn, unit_code, listing_status) if unit_code else None return combine_hashes(ad_hash, unit_hash, comps_hash) @@ -225,7 +223,7 @@ async def analyze_ad( from datetime import datetime, UTC, timedelta price_history = get_price_history(conn, finn_ad.finnkode, limit=20) - + # Compute cache age: how long since we last fetched this ad cursor = conn.cursor() cursor.execute( @@ -239,11 +237,11 @@ async def analyze_ad( last_verified = db_row["last_verified_at"] if last_verified: last_verified_at = datetime.fromisoformat(last_verified) - structural_age_days = (datetime.now(UTC) - fetched_at).days - price_age_hours = (datetime.now(UTC) - last_verified_at).total_seconds() / 3600 + structural_age_mins = (datetime.now(UTC) - fetched_at).total_seconds() / 60 + price_age_mins = (datetime.now(UTC) - last_verified_at).total_seconds() / 60 cache_age = { - "structural_days": structural_age_days, - "price_hours": round(price_age_hours, 1), + "structural_minutes": round(structural_age_mins, 1), + "price_minutes": round(price_age_mins, 1), } result = { @@ -282,6 +280,7 @@ async def analyze_ad( # Round-trip through JSON to guarantee all values are serialisable # (catches any datetime that survives model_dump, e.g. from scoring). import json as _json + result = _json.loads(_json.dumps(result, default=str)) save_analysis(conn, finn_ad.finnkode, deps_hash, result) @@ -301,7 +300,9 @@ async def _fetch_card_to_db( treats None as a skip without aborting the whole batch. """ try: - finn_ad = cache.get_finn_ad(conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_STRUCTURAL_DAYS * 24) + finn_ad = cache.get_finn_ad( + conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_STRUCTURAL_DAYS * 24 + ) if finn_ad is None: finn_ad = await ad_module.fetch_ad_details(card.finnkode, client=client) save_finn_ad(conn, finn_ad) @@ -364,9 +365,7 @@ async def analyze_search( skipped_count = len(cards[:detail_limit]) - len(resale_cards) if ctx is not None: - await ctx.info( - f"Found {len(cards)} listings, {len(resale_cards)} resale ads to fetch." - ) + await ctx.info(f"Found {len(cards)} listings, {len(resale_cards)} resale ads to fetch.") # ------------------------------------------------------------------ # Phase 1: parallel fetch to DB @@ -424,8 +423,7 @@ async def analyze_search( if ctx is not None: await ctx.info( - f"Done. {len(results)} analyzed, {enriched_count} enriched, " - f"{skipped_count} skipped." + f"Done. {len(results)} analyzed, {enriched_count} enriched, {skipped_count} skipped." ) # Record this search run in the database @@ -442,4 +440,4 @@ async def analyze_search( "skipped_listings": skipped_count, "eiendom_enriched": enriched_count, }, - } \ No newline at end of file + } diff --git a/finn_eiendom/cache.py b/finn_eiendom/cache.py index ef80845..adbfe1d 100644 --- a/finn_eiendom/cache.py +++ b/finn_eiendom/cache.py @@ -30,6 +30,8 @@ import sqlite3 from datetime import UTC, datetime, timedelta from typing import Any +from pathlib import Path + from .config import FINN_CACHE_PATH from .models import EiendomUnit, FinnAd, FinnSearchCard, SimilarUnit @@ -70,7 +72,10 @@ def get_connection(path: str | None = None) -> sqlite3.Connection: def init_db(path: str | None = None) -> sqlite3.Connection: - conn = get_connection(path) + # Ensure parent directory exists — sqlite3.connect() won't create it. + db_path = Path(path or FINN_CACHE_PATH) + db_path.parent.mkdir(parents=True, exist_ok=True) + conn = get_connection(str(db_path)) cursor = conn.cursor() cursor.execute( @@ -163,7 +168,9 @@ def init_db(path: str | None = None) -> sqlite3.Connection: ) """ ) - cursor.execute("CREATE INDEX IF NOT EXISTS idx_price_history_finnkode_recorded ON price_history(finnkode, recorded_at)") + cursor.execute( + "CREATE INDEX IF NOT EXISTS idx_price_history_finnkode_recorded ON price_history(finnkode, recorded_at)" + ) cursor.execute( """ @@ -175,20 +182,24 @@ def init_db(path: str | None = None) -> sqlite3.Connection: ) """ ) - cursor.execute("CREATE INDEX IF NOT EXISTS idx_search_runs_url_created ON search_runs(search_url, created_at)") + cursor.execute( + "CREATE INDEX IF NOT EXISTS idx_search_runs_url_created ON search_runs(search_url, created_at)" + ) # Create indexes for efficient staleness queries cursor.execute("CREATE INDEX IF NOT EXISTS idx_finn_ads_verified ON finn_ads(last_verified_at)") - cursor.execute("CREATE INDEX IF NOT EXISTS idx_eiendom_units_fetched ON eiendom_units(fetched_at)") - cursor.execute("CREATE INDEX IF NOT EXISTS idx_similar_units_fetched ON similar_units(fetched_at)") + cursor.execute( + "CREATE INDEX IF NOT EXISTS idx_eiendom_units_fetched ON eiendom_units(fetched_at)" + ) + cursor.execute( + "CREATE INDEX IF NOT EXISTS idx_similar_units_fetched ON similar_units(fetched_at)" + ) conn.commit() return conn -def _add_column_if_missing( - cursor: sqlite3.Cursor, table: str, column: str, col_type: str -) -> None: +def _add_column_if_missing(cursor: sqlite3.Cursor, table: str, column: str, col_type: str) -> None: """ALTER TABLE … ADD COLUMN is idempotent via this guard.""" cursor.execute(f"PRAGMA table_info({table})") existing = {row["name"] for row in cursor.fetchall()} @@ -300,27 +311,38 @@ def save_finn_ad(conn: sqlite3.Connection, ad: FinnAd) -> tuple[str, bool]: payload = ad.model_dump(mode="json") new_hash = compute_content_hash(payload) fetched_at = ( - ad.detail_fetched_at.isoformat() - if ad.detail_fetched_at - else datetime.now(UTC).isoformat() + ad.detail_fetched_at.isoformat() if ad.detail_fetched_at else datetime.now(UTC).isoformat() ) # Update last_verified_at to now when saving (indicates we just checked the data) last_verified_at = datetime.now(UTC).isoformat() # Check existing hash before writing. - cursor.execute( - "SELECT content_hash FROM finn_ads WHERE finnkode = ?", (ad.finnkode,) - ) + cursor.execute("SELECT content_hash FROM finn_ads WHERE finnkode = ?", (ad.finnkode,)) row = cursor.fetchone() if row and row["content_hash"] == new_hash: - logger.debug("finn_ad %s unchanged (hash match)", ad.finnkode) + # Data unchanged: skip the full rewrite (preserves analysis_cache), + # but still record that we verified it just now so the price-freshness + # timer (last_verified_at) advances and cache_age.price_hours resets. + cursor.execute( + "UPDATE finn_ads SET last_verified_at = ? WHERE finnkode = ?", + (last_verified_at, ad.finnkode), + ) + conn.commit() + logger.debug("finn_ad %s unchanged (hash match, verified bumped)", ad.finnkode) return new_hash, False cursor.execute( "INSERT OR REPLACE INTO finn_ads" " (finnkode, url, payload, content_hash, fetched_at, last_verified_at)" " VALUES (?, ?, ?, ?, ?, ?)", - (ad.finnkode, ad.url, json.dumps(payload, default=_json_default), new_hash, fetched_at, last_verified_at), + ( + ad.finnkode, + ad.url, + json.dumps(payload, default=_json_default), + new_hash, + fetched_at, + last_verified_at, + ), ) conn.commit() logger.debug("finn_ad %s saved (hash=%s)", ad.finnkode, new_hash[:8]) @@ -331,9 +353,7 @@ def get_finn_ad( conn: sqlite3.Connection, finnkode: str, ttl_hours: int | None = None ) -> FinnAd | None: cursor = conn.cursor() - cursor.execute( - "SELECT payload, fetched_at FROM finn_ads WHERE finnkode = ?", (finnkode,) - ) + cursor.execute("SELECT payload, fetched_at FROM finn_ads WHERE finnkode = ?", (finnkode,)) row = cursor.fetchone() if not row: return None @@ -345,9 +365,7 @@ def get_finn_ad( def get_finn_ad_hash(conn: sqlite3.Connection, finnkode: str) -> str | None: """Return the stored content_hash for *finnkode*, or None if not cached.""" cursor = conn.cursor() - cursor.execute( - "SELECT content_hash FROM finn_ads WHERE finnkode = ?", (finnkode,) - ) + cursor.execute("SELECT content_hash FROM finn_ads WHERE finnkode = ?", (finnkode,)) row = cursor.fetchone() return row["content_hash"] if row else None @@ -366,9 +384,7 @@ def save_eiendom_unit(conn: sqlite3.Connection, unit: EiendomUnit) -> tuple[str, payload = unit.model_dump(mode="json") new_hash = compute_content_hash(payload) - cursor.execute( - "SELECT content_hash FROM eiendom_units WHERE unit_code = ?", (unit.unit_code,) - ) + cursor.execute("SELECT content_hash FROM eiendom_units WHERE unit_code = ?", (unit.unit_code,)) row = cursor.fetchone() if row and row["content_hash"] == new_hash: logger.debug("eiendom_unit %s unchanged (hash match)", unit.unit_code) @@ -378,7 +394,12 @@ def save_eiendom_unit(conn: sqlite3.Connection, unit: EiendomUnit) -> tuple[str, "INSERT OR REPLACE INTO eiendom_units" " (unit_code, payload, content_hash, fetched_at)" " VALUES (?, ?, ?, ?)", - (unit.unit_code, json.dumps(payload, default=_json_default), new_hash, unit.fetched_at.isoformat()), + ( + unit.unit_code, + json.dumps(payload, default=_json_default), + new_hash, + unit.fetched_at.isoformat(), + ), ) conn.commit() logger.debug("eiendom_unit %s saved (hash=%s)", unit.unit_code, new_hash[:8]) @@ -405,9 +426,7 @@ def get_eiendom_unit( def get_eiendom_unit_hash(conn: sqlite3.Connection, unit_code: str) -> str | None: """Return the stored content_hash for *unit_code*, or None if not cached.""" cursor = conn.cursor() - cursor.execute( - "SELECT content_hash FROM eiendom_units WHERE unit_code = ?", (unit_code,) - ) + cursor.execute("SELECT content_hash FROM eiendom_units WHERE unit_code = ?", (unit_code,)) row = cursor.fetchone() return row["content_hash"] if row else None @@ -439,9 +458,7 @@ def save_similar_units( ) row = cursor.fetchone() if row and row["content_hash"] == new_hash: - logger.debug( - "similar_units %s/%s unchanged (hash match)", unit_code, listing_status - ) + logger.debug("similar_units %s/%s unchanged (hash match)", unit_code, listing_status) return new_hash, False cursor.execute( @@ -457,9 +474,7 @@ def save_similar_units( ), ) conn.commit() - logger.debug( - "similar_units %s/%s saved (hash=%s)", unit_code, listing_status, new_hash[:8] - ) + logger.debug("similar_units %s/%s saved (hash=%s)", unit_code, listing_status, new_hash[:8]) return new_hash, True @@ -504,9 +519,7 @@ def get_similar_units_hash( # --------------------------------------------------------------------------- -def get_analysis( - conn: sqlite3.Connection, finnkode: str, deps_hash: str -) -> dict[str, Any] | None: +def get_analysis(conn: sqlite3.Connection, finnkode: str, deps_hash: str) -> dict[str, Any] | None: """Return cached analysis for *finnkode* if deps_hash still matches. ``deps_hash`` encodes the combined hashes of the ad, eiendom unit, and @@ -533,6 +546,24 @@ def get_analysis( return json.loads(row["payload"]) +def get_latest_analysis(conn: sqlite3.Connection, finnkode: str) -> dict[str, Any] | None: + """Return the most recent cached analysis for *finnkode*, ignoring deps_hash. + + Unlike :func:`get_analysis`, this does not validate freshness -- it returns + whatever was last computed. Used by the shortlist where showing slightly + stale enrichment is preferable to recomputing on every read. + """ + cursor = conn.cursor() + cursor.execute( + "SELECT payload FROM analysis_cache WHERE finnkode = ?", + (finnkode,), + ) + row = cursor.fetchone() + if not row: + return None + return json.loads(row["payload"]) + + def _json_default(obj: Any) -> Any: """Fallback serialiser for json.dumps. Converts datetime/date → ISO string; anything else → repr string. @@ -556,7 +587,12 @@ def save_analysis( "INSERT OR REPLACE INTO analysis_cache" " (finnkode, deps_hash, payload, computed_at)" " VALUES (?, ?, ?, ?)", - (finnkode, deps_hash, json.dumps(result, default=_json_default), datetime.now(UTC).isoformat()), + ( + finnkode, + deps_hash, + json.dumps(result, default=_json_default), + datetime.now(UTC).isoformat(), + ), ) conn.commit() logger.debug("analysis_cache saved for %s (deps_hash=%s)", finnkode, deps_hash[:8]) @@ -564,9 +600,7 @@ def save_analysis( def invalidate_analysis(conn: sqlite3.Connection, finnkode: str) -> None: """Remove any cached analysis for *finnkode* (call after raw data changes).""" - conn.cursor().execute( - "DELETE FROM analysis_cache WHERE finnkode = ?", (finnkode,) - ) + conn.cursor().execute("DELETE FROM analysis_cache WHERE finnkode = ?", (finnkode,)) conn.commit() @@ -653,10 +687,14 @@ def save_price_history( (finnkode, total_price, asking_price, sale_status, datetime.now(UTC).isoformat()), ) conn.commit() - logger.debug("price_history recorded for %s (total=%s, asking=%s)", finnkode, total_price, asking_price) + logger.debug( + "price_history recorded for %s (total=%s, asking=%s)", finnkode, total_price, asking_price + ) -def get_price_history(conn: sqlite3.Connection, finnkode: str, limit: int = 100) -> list[dict[str, Any]]: +def get_price_history( + conn: sqlite3.Connection, finnkode: str, limit: int = 100 +) -> list[dict[str, Any]]: """Retrieve price history for a listing.""" cursor = conn.cursor() cursor.execute( @@ -680,15 +718,12 @@ def get_price_history(conn: sqlite3.Connection, finnkode: str, limit: int = 100) # --------------------------------------------------------------------------- -def save_search_run( - conn: sqlite3.Connection, search_url: str, finnkodes: list[str] -) -> None: +def save_search_run(conn: sqlite3.Connection, search_url: str, finnkodes: list[str]) -> None: """Record a search run with the finnkodes found.""" cursor = conn.cursor() finnkodes_json = json.dumps(finnkodes) cursor.execute( - "INSERT INTO search_runs (search_url, finnkodes, created_at)" - " VALUES (?, ?, ?)", + "INSERT INTO search_runs (search_url, finnkodes, created_at) VALUES (?, ?, ?)", (search_url, finnkodes_json, datetime.now(UTC).isoformat()), ) conn.commit() @@ -730,6 +765,4 @@ def delete_feedback(conn: sqlite3.Connection, finnkode: str) -> dict[str, Any]: def _is_fresh(fetched_at: str, ttl_hours: int | None) -> bool: if ttl_hours is None: return True - return datetime.fromisoformat(fetched_at) >= datetime.now(UTC) - timedelta( - hours=ttl_hours - ) \ No newline at end of file + return datetime.fromisoformat(fetched_at) >= datetime.now(UTC) - timedelta(hours=ttl_hours) diff --git a/finn_eiendom/cli.py b/finn_eiendom/cli.py index eac7390..9ce281e 100644 --- a/finn_eiendom/cli.py +++ b/finn_eiendom/cli.py @@ -356,14 +356,16 @@ def stats() -> None: # Special checks for finn_ads cursor.execute( - 'SELECT COUNT(*) FROM finn_ads ' + "SELECT COUNT(*) FROM finn_ads " 'WHERE json_extract(payload, "$.eiendom_unit_code") IS NOT NULL ' 'AND json_extract(payload, "$.eiendom_unit_code") != "null"' ) ads_with_unit_code = cursor.fetchone()[0] if "finn_ads" in stats and stats["finn_ads"]["total_rows"] > 0: stats["finn_ads"]["with_eiendom_unit_code"] = ads_with_unit_code - stats["finn_ads"]["pct_with_unit_code"] = round(100 * ads_with_unit_code / stats["finn_ads"]["total_rows"], 1) + stats["finn_ads"]["pct_with_unit_code"] = round( + 100 * ads_with_unit_code / stats["finn_ads"]["total_rows"], 1 + ) # Get fetched_at date ranges for table in ["finn_ads", "eiendom_units", "similar_units"]: diff --git a/finn_eiendom/config.py b/finn_eiendom/config.py index 9fe15c0..3eb36d5 100644 --- a/finn_eiendom/config.py +++ b/finn_eiendom/config.py @@ -14,9 +14,7 @@ FINN_USER_AGENT = os.getenv("FINN_USER_AGENT", "personal-finn-eiendom-analyzer/0 # Cache TTLs (refactor v2) # Structural data (address, area, year, etc.) changes rarely; long TTL -FINN_CACHE_TTL_AD_STRUCTURAL_DAYS = int( - os.getenv("FINN_CACHE_TTL_AD_STRUCTURAL_DAYS", "30") -) +FINN_CACHE_TTL_AD_STRUCTURAL_DAYS = int(os.getenv("FINN_CACHE_TTL_AD_STRUCTURAL_DAYS", "30")) # Price/status changes frequently; short TTL for lightweight verification FINN_CACHE_TTL_AD_PRICE_HOURS = int(os.getenv("FINN_CACHE_TTL_AD_PRICE_HOURS", "6")) # Search pages/cards also TTL-based (content changes with added/removed listings) @@ -27,12 +25,8 @@ EIENDOM_NO_ENABLED = os.getenv("EIENDOM_NO_ENABLED", "true").lower() == "true" EIENDOM_NO_BASE_URL = os.getenv("EIENDOM_NO_BASE_URL", "https://api.eiendom.no/api/v1") EIENDOM_NO_REQUEST_DELAY_SECONDS = float(os.getenv("EIENDOM_NO_REQUEST_DELAY_SECONDS", "1")) # Structural data (lat, lng, property_type) has long TTL; estimates have shorter TTL -EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS = int( - os.getenv("EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS", "30") -) -EIENDOM_NO_CACHE_TTL_ESTIMATE_DAYS = int( - os.getenv("EIENDOM_NO_CACHE_TTL_ESTIMATE_DAYS", "7") -) +EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS = int(os.getenv("EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS", "30")) +EIENDOM_NO_CACHE_TTL_ESTIMATE_DAYS = int(os.getenv("EIENDOM_NO_CACHE_TTL_ESTIMATE_DAYS", "7")) EIENDOM_NO_SIMILAR_UNITS_ENABLED = ( os.getenv("EIENDOM_NO_SIMILAR_UNITS_ENABLED", "true").lower() == "true" ) diff --git a/finn_eiendom/http_server.py b/finn_eiendom/http_server.py index b7cb1c5..970285f 100644 --- a/finn_eiendom/http_server.py +++ b/finn_eiendom/http_server.py @@ -4,8 +4,13 @@ from starlette.responses import JSONResponse from starlette.requests import Request from starlette.middleware.cors import CORSMiddleware from mcp.server.transport_security import TransportSecuritySettings +from finn_eiendom.cache import init_db from finn_eiendom.mcp_server import mcp +# Initialise the database (and create the data/ directory) eagerly at +# startup so the first tool call never fails on a missing directory. +init_db() + mcp.transport_security = TransportSecuritySettings(enable_dns_rebinding_protection=False) app = mcp.sse_app() diff --git a/finn_eiendom/mcp_server.py b/finn_eiendom/mcp_server.py index 57ab260..1532ad6 100644 --- a/finn_eiendom/mcp_server.py +++ b/finn_eiendom/mcp_server.py @@ -11,30 +11,14 @@ from mcp.server.transport_security import TransportSecuritySettings from mcp.server.fastmcp import Context, FastMCP from mcp.types import ImageContent, TextContent -from .eiendom_no import ( - build_unit_vector, - decode_unit_vector, - get_similar_units, - get_unit, - search_unit_from_finn_url, -) from .formatting import ( - render_ad, - render_comparison, render_diff, render_shortlist, - render_similar_units, - render_unit_images, ) from .service import ( analyze_ad, - analyze_ad_against_comps, analyze_search, - compare_ads, - find_similar_to_liked, get_new_ads_since_last_run, - get_or_fetch_ad, - get_or_fetch_eiendom_unit, get_shortlist, get_unit_images, save_feedback, @@ -48,6 +32,55 @@ logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- +def _slim_comp(c: dict) -> dict: + """Drop internal IDs, coords, redundant status fields from a comparable unit.""" + return { + "unit_code": c.get("unit_code"), + "address": c.get("address"), + "usable_area": c.get("usable_area"), + "rooms": c.get("rooms"), + "floor": c.get("floor"), + "construction_year": c.get("construction_year"), + "listing_price": c.get("listing_price"), + "selling_price": c.get("selling_price"), + "shared_debt": c.get("shared_debt"), + "sqm_price": c.get("sqm_price"), + "common_costs": c.get("common_costs"), + "days_on_market": c.get("days_on_market"), + "finalized_at": (c.get("finalized_at") or "")[:10], + } + + +def _slim_comps(comps: list[dict], keep: int = 15) -> list[dict]: + """Sort comps by recency, keep the N most recent — older comps lose relevance fast.""" + sorted_comps = sorted(comps, key=lambda c: c.get("finalized_at") or "", reverse=True) + return [_slim_comp(c) for c in sorted_comps[:keep]] + + +def _avg_comp_sqm(comps: list[dict]) -> int | None: + sqm_prices = [c["sqm_price"] for c in comps if c.get("sqm_price")] + return round(sum(sqm_prices) / len(sqm_prices)) if sqm_prices else None + + +def _slim_eiendom(eu: dict, comps: list[dict]) -> dict: + """Compact Eiendom.no unit view. Drops unit_images, unit_vector, lat/lng, timestamps.""" + return { + "unit_code": eu.get("unit_code"), + "usable_area": eu.get("usable_area"), + "estimated_price": eu.get("estimated_selling_price"), + "estimated_range": [ + eu.get("estimated_selling_price_lower"), + eu.get("estimated_selling_price_upper"), + ], + "listing_sqm_price": eu.get("listing_sqm_price"), + "market_placement": eu.get("market_placement_score"), + "sale_status": eu.get("sale_status"), + "days_on_market": eu.get("days_on_market"), + "avg_comp_sqm_price": _avg_comp_sqm(comps), + "comp_count": len(comps), + } + + def _slim_listing(rank: int, item: dict) -> dict: """Collapse one full analyze_ad result into a compact listing card. @@ -57,57 +90,9 @@ def _slim_listing(rank: int, item: dict) -> dict: """ eu = item.get("eiendom_unit") or {} comps = item.get("similar_units") or [] - sqm_prices = [c["sqm_price"] for c in comps if c.get("sqm_price")] - avg_comp_sqm = round(sum(sqm_prices) / len(sqm_prices)) if sqm_prices else None - - # Slim comps: drop internal IDs, coords, redundant status fields. - # Sort by recency, keep 15 most recent — older comps lose relevance fast. - def _slim_comp(c: dict) -> dict: - return { - "unit_code": c.get("unit_code"), - "address": c.get("address"), - "usable_area": c.get("usable_area"), - "rooms": c.get("rooms"), - "floor": c.get("floor"), - "construction_year": c.get("construction_year"), - "listing_price": c.get("listing_price"), - "selling_price": c.get("selling_price"), - "shared_debt": c.get("shared_debt"), - "sqm_price": c.get("sqm_price"), - "common_costs": c.get("common_costs"), - "days_on_market": c.get("days_on_market"), - "finalized_at": (c.get("finalized_at") or "")[:10], - } - - sorted_comps = sorted(comps, key=lambda c: c.get("finalized_at") or "", reverse=True) - slim_comps = [_slim_comp(c) for c in sorted_comps[:15]] - score = item.get("score") or {} summary = item.get("summary") or {} price_history = item.get("price_history") or [] - cache_age = item.get("cache_age") - - # Keep full score breakdown — 12 dimensions + nearby_transit = ~220 bytes, all signal. - # Drop nothing from scores. - slim_score = {k: v for k, v in score.items()} - - eiendom: dict | None = None - if eu: - eiendom = { - "unit_code": eu.get("unit_code"), - "usable_area": eu.get("usable_area"), - "estimated_price": eu.get("estimated_selling_price"), - "estimated_range": [ - eu.get("estimated_selling_price_lower"), - eu.get("estimated_selling_price_upper"), - ], - "listing_sqm_price": eu.get("listing_sqm_price"), - "market_placement": eu.get("market_placement_score"), - "sale_status": eu.get("sale_status"), - "days_on_market": eu.get("days_on_market"), - "avg_comp_sqm_price": avg_comp_sqm, - "comp_count": len(comps), - } return { "rank": rank, @@ -134,17 +119,59 @@ def _slim_listing(rank: int, item: dict) -> dict: "has_parking": item.get("has_parking"), "has_garage": item.get("has_garage"), "eiendom_unit_code": item.get("eiendom_unit_code"), - "score": slim_score, + "score": dict(score), "categories": item.get("categories"), "why_interesting": summary.get("why_interesting"), "risks": summary.get("risks"), - "cache_age": cache_age, + "cache_age": item.get("cache_age"), "price_history": price_history[:5], # Last 5 price records - "eiendom": eiendom, - "similar_units": slim_comps, + "eiendom": _slim_eiendom(eu, comps) if eu else None, + "similar_units": _slim_comps(comps), } +def _slim_analyze_ad(result: dict) -> dict: + """Shape the single-ad analyze_ad result for MCP output. + + The service returns {ad: FinnAd, eiendom_unit: EiendomUnit, similar_units: [...]}. + Flatten the ad fields up, keep listing_description, attach slim eiendom + comps, + and strip unit_images / unit_vector / lat / lng / internal timestamps. + """ + ad = result.get("ad") or {} + eu = result.get("eiendom_unit") or {} + comps = result.get("similar_units") or [] + + out: dict[str, Any] = { + "finnkode": ad.get("finnkode"), + "url": ad.get("url"), + "title": ad.get("title"), + "address": ad.get("address"), + "district": ad.get("district"), + "listing_description": ad.get("listing_description"), + "property_type": ad.get("property_type"), + "ownership_type": ad.get("ownership_type"), + "floor": ad.get("floor"), + "area_m2": ad.get("area_m2"), + "rooms": ad.get("rooms"), + "bedrooms": ad.get("bedrooms"), + "total_price": ad.get("total_price"), + "asking_price": ad.get("asking_price"), + "shared_debt": ad.get("shared_debt"), + "common_costs": ad.get("common_costs"), + "construction_year": ad.get("construction_year"), + "energy_rating": ad.get("energy_rating"), + "has_balcony": ad.get("has_balcony"), + "has_terrace": ad.get("has_terrace"), + "has_elevator": ad.get("has_elevator"), + "has_parking": ad.get("has_parking"), + "has_garage": ad.get("has_garage"), + "eiendom_unit_code": ad.get("eiendom_unit_code"), + "eiendom": _slim_eiendom(eu, comps) if eu else None, + "similar_units": _slim_comps(comps), + } + return out + + def _build_slim_search_result(full: dict) -> dict: """Convert full analyze_search output to a compact MCP-safe response. @@ -152,8 +179,7 @@ def _build_slim_search_result(full: dict) -> dict: listings. Target: <200KB for 30 analyzed ads. """ listings = [ - _slim_listing(rank + 1, item) - for rank, item in enumerate(full.get("analysis") or []) + _slim_listing(rank + 1, item) for rank, item in enumerate(full.get("analysis") or []) ] return { "search_url": full.get("search_url"), @@ -208,65 +234,6 @@ async def finn_analyze_search( return json.dumps({"error": True, "message": str(e)}) -@mcp.tool( - description=( - "Fetch full detail for a FINN listing by finnkode." - " Checks cache first; use force_refresh=True to bypass." - ) -) -async def finn_get_ad(finnkode: str, force_refresh: bool = False) -> str: - """Fetch FINN ad details by finnkode.""" - try: - ad = await get_or_fetch_ad(finnkode, force_refresh=force_refresh) - return ad.model_dump_json() - except Exception as e: - logger.error(f"Error fetching ad {finnkode}: {e}") - return json.dumps({"error": True, "message": str(e)}) - - -@mcp.tool( - description="Resolve an Eiendom.no unit_code from a FINN listing URL. " - "Returns unit_code, address, lat, lng or an error if not found." -) -async def finn_resolve_eiendom_unit(finn_url: str) -> str: - """Resolve Eiendom.no unit from FINN URL.""" - try: - unit = await search_unit_from_finn_url(finn_url) - if unit is None: - return json.dumps( - { - "error": True, - "message": "Eiendom.no unit could not be resolved from FINN URL", - } - ) - return json.dumps( - { - "unit_code": unit.unit_code, - "address": unit.address, - "lat": unit.lat, - "lng": unit.lng, - } - ) - except Exception as e: - logger.error(f"Error resolving unit from {finn_url}: {e}") - return json.dumps({"error": True, "message": str(e)}) - - -@mcp.tool( - description="Fetch full Eiendom.no unit data by unit_code. Checks SQLite cache (24h TTL)." -) -async def finn_get_eiendom_unit(unit_code: str, force_refresh: bool = False) -> str: - """Fetch Eiendom.no unit details by unit_code.""" - try: - unit = await get_or_fetch_eiendom_unit(unit_code, force_refresh=force_refresh) - if unit is None: - return json.dumps({"error": True, "message": "Eiendom.no unit not found"}) - return unit.model_dump_json() - except Exception as e: - logger.error(f"Error fetching unit {unit_code}: {e}") - return json.dumps({"error": True, "message": str(e)}) - - @mcp.tool( description=( "Fetch and analyze unit images for visual assessment of a property. " @@ -305,6 +272,7 @@ async def finn_analyze_unit_images( # within the 1MB MCP tool result limit across multiple images. from PIL import Image import io + img = Image.open(io.BytesIO(resp.content)) img.thumbnail((1024, 1024), Image.LANCZOS) if img.mode in ("RGBA", "P"): @@ -326,50 +294,6 @@ async def finn_analyze_unit_images( return [TextContent(type="text", text=json.dumps({"error": True, "message": str(e)}))] -@mcp.tool( - description="Fetch comparable recently-sold or for-sale units from Eiendom.no using a " - "base64-encoded unit vector. Returns list of similar units with sale prices." -) -async def finn_get_similar_units(unit_vector: str, listing_status: str = "RECENTLY_SOLD") -> str: - """Fetch similar units from Eiendom.no.""" - try: - units = await get_similar_units(unit_vector, listing_status) - return json.dumps([unit.model_dump() for unit in units], default=str) - except Exception as e: - logger.error(f"Error fetching similar units: {e}") - return json.dumps({"error": True, "message": str(e)}) - - -@mcp.tool( - description="Build a base64-encoded unit vector for a given Eiendom.no unit_code. " - "The vector is used as input to finn_get_similar_units." -) -async def finn_build_unit_vector(unit_code: str) -> str: - """Build unit vector for Eiendom.no unit.""" - try: - unit = await get_unit(unit_code) - if unit is None: - return json.dumps({"error": True, "message": "Eiendom.no unit not found"}) - return json.dumps({"unit_code": unit.unit_code, "unit_vector": build_unit_vector(unit)}) - except Exception as e: - logger.error(f"Error building unit vector for {unit_code}: {e}") - return json.dumps({"error": True, "message": str(e)}) - - -@mcp.tool( - description="Decode a base64 unit vector into human-readable JSON (lat, lon, property type, " - "floor, rooms, construction year, area, price)." -) -def finn_decode_unit_vector(unit_vector: str) -> str: - """Decode unit vector to readable format.""" - try: - result = decode_unit_vector(unit_vector) - return json.dumps(result) - except Exception as e: - logger.error(f"Error decoding unit vector: {e}") - return json.dumps({"error": True, "message": str(e)}) - - # ============================================================================ # Additional analysis and enrichment tools # ============================================================================ @@ -377,79 +301,33 @@ def finn_decode_unit_vector(unit_vector: str) -> str: @mcp.tool( description=( - "Fetch and enrich a single FINN ad with optional Eiendom.no data and comparable units." + "Deep-dive one or more FINN listings. Accepts a single finnkode or a list " + "(batched in one call). Always enriches with Eiendom.no data and comparable " + "sold units. Returns listing_description plus slim eiendom/comps; excludes " + "image URLs and internal vectors (use finn_analyze_unit_images for visuals)." ) ) -async def finn_analyze_ad( - finnkode: str, - include_eiendom_no: bool = True, - include_similar_units: bool = False, -) -> str: - """Analyze and enrich a single FINN ad.""" - try: - result = await analyze_ad( - finnkode, - include_eiendom_no=include_eiendom_no, - include_similar_units=include_similar_units, - ) - return json.dumps(result, default=str) - except Exception as e: - logger.error(f"Error analyzing ad {finnkode}: {e}") - return json.dumps({"error": True, "message": str(e)}) +async def finn_analyze_ad(finnkode: str | list[str]) -> str: + """Analyze and enrich one or more FINN ads. Batch input returns a list.""" + finnkoder = [finnkode] if isinstance(finnkode, str) else list(finnkode) + async def _one(fk: str) -> dict: + try: + result = await analyze_ad( + fk, + include_eiendom_no=True, + include_similar_units=True, + ) + return _slim_analyze_ad(result) + except Exception as e: # noqa: BLE001 — per-item isolation, batch must not abort + logger.error(f"Error analyzing ad {fk}: {e}") + return {"finnkode": fk, "error": True, "message": str(e)} -@mcp.tool( - description=( - "Evaluate one FINN listing against comparable recently-sold properties from Eiendom.no." - ) -) -async def finn_analyze_ad_against_comps( - finnkode: str, listing_status: str = "RECENTLY_SOLD" -) -> str: - """Analyze ad against comparable sales.""" - try: - result = await analyze_ad_against_comps(finnkode, listing_status=listing_status) - return json.dumps(result, default=str) - except Exception as e: - logger.error(f"Error analyzing ad {finnkode} against comps: {e}") - return json.dumps({"error": True, "message": str(e)}) + results = await asyncio.gather(*[_one(fk) for fk in finnkoder]) - -@mcp.tool( - description=( - "Find properties similar to a listing the user has liked. " - "Requires that the user has marked the listing with verdict='liked'." - ) -) -async def finn_find_similar_to_liked_ad( - finnkode: str, mode: str = "recommendations", listing_status: str = "FOR_SALE" -) -> str: - """Find properties similar to a liked ad.""" - try: - result = await find_similar_to_liked(finnkode, mode=mode, listing_status=listing_status) - return render_similar_units(result, "json") - except Exception as e: - logger.error(f"Error finding similar to {finnkode}: {e}") - return json.dumps({"error": True, "message": str(e)}) - - -@mcp.tool(description="Compare multiple FINN listings side by side with optional enrichment.") -async def finn_compare_ads( - finnkoder: list[str], - include_eiendom_no: bool = True, - include_comps: bool = True, -) -> str: - """Compare multiple ads.""" - try: - result = await compare_ads( - finnkoder, - include_eiendom_no=include_eiendom_no, - include_comps=include_comps, - ) - return render_comparison(result, "json") - except Exception as e: - logger.error(f"Error comparing ads: {e}") - return json.dumps({"error": True, "message": str(e)}) + # Single string input → single object; list input → list (preserves order). + payload: Any = results[0] if isinstance(finnkode, str) else results + return json.dumps(payload, default=str) @mcp.tool( @@ -467,13 +345,13 @@ async def finn_save_feedback(finnkode: str, verdict: str, notes: str | None = No @mcp.tool( - description="Fetch the stored shortlist from a previous search run. " - "Returns the ranked listings with all enrichment data." + description="Fetch the shortlist of listings you have given a verdict " + "(liked, disliked, maybe, visited). Enriched with cached score and price data." ) -def finn_get_shortlist(run_id: int | None = None, limit: int = 10) -> str: - """Get stored shortlist.""" +def finn_get_shortlist(verdict: str = "liked", limit: int = 10) -> str: + """Get stored shortlist filtered by verdict.""" try: - result = get_shortlist(run_id, limit) + result = get_shortlist(verdict, limit) return render_shortlist(result, "json") except Exception as e: logger.error(f"Error fetching shortlist: {e}") @@ -502,4 +380,4 @@ def main() -> None: if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/finn_eiendom/service.py b/finn_eiendom/service.py index 162d95f..f6e98aa 100644 --- a/finn_eiendom/service.py +++ b/finn_eiendom/service.py @@ -24,7 +24,9 @@ from .ad import fetch_ad_details from .analysis import analyze_search as run_analysis_search from .cache import ( get_eiendom_unit as get_cached_eiendom_unit, + get_feedback_by_verdict, get_finn_ad, + get_latest_analysis, get_similar_units as get_cached_similar_units, init_db, invalidate_analysis, @@ -137,11 +139,7 @@ async def get_or_fetch_eiendom_unit( conn = init_db(FINN_CACHE_PATH) # Convert structural TTL from days to hours ttl_hours = EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS * 24 - unit = ( - None - if force_refresh - else get_cached_eiendom_unit(conn, unit_code, ttl_hours=ttl_hours) - ) + unit = None if force_refresh else get_cached_eiendom_unit(conn, unit_code, ttl_hours=ttl_hours) if unit is not None: return unit @@ -182,9 +180,7 @@ async def get_or_fetch_similar_units( conn, unit_code, listing_status, ttl_hours=ttl_hours ) if cached_similar: - logger.debug( - "Using cached similar units for %s (status=%s)", unit_code, listing_status - ) + logger.debug("Using cached similar units for %s (status=%s)", unit_code, listing_status) return cached_similar # Cache miss or force_refresh: fetch from remote. @@ -377,10 +373,52 @@ def save_feedback(finnkode: str, verdict: str, notes: str | None = None) -> dict return save_feedback_impl(finnkode, verdict, notes) -def get_shortlist(run_id: int | None = None, limit: int = 10) -> dict[str, Any]: - """Fetch stored shortlist from a search run.""" - # TODO: implement via search_runs table in cache.py - return {"shortlist": [], "run_id": run_id, "limit": limit} +def get_shortlist( + verdict: str = "liked", limit: int = 10 +) -> dict[str, Any]: + """Fetch the shortlist of listings the user has given *verdict*. + + Reads from the ``user_feedback`` table and enriches each finnkode with + its most recent cached analysis (score, price, categories) when available. + Entries with no cached analysis still appear, carrying the stored verdict + and notes so nothing the user flagged is silently dropped. + """ + conn = init_db(FINN_CACHE_PATH) + feedback_rows = get_feedback_by_verdict(conn, verdict, limit=limit) + + shortlist: list[dict[str, Any]] = [] + for fb in feedback_rows: + finnkode = fb["finnkode"] + entry: dict[str, Any] = { + "finnkode": finnkode, + "verdict": fb["verdict"], + "notes": fb["notes"], + "url": f"https://www.finn.no/realestate/homes/ad.html?finnkode={finnkode}", + } + + analysis = get_latest_analysis(conn, finnkode) + if analysis: + score = analysis.get("score") or {} + eiendom = analysis.get("eiendom_unit") or {} + entry.update( + { + "title": analysis.get("title"), + "address": analysis.get("address"), + "area_m2": analysis.get("area_m2"), + "total_price": analysis.get("total_price"), + "asking_price": analysis.get("asking_price"), + "score": score.get("total"), + "categories": analysis.get("categories", []), + "market_placement": eiendom.get("market_placement_score"), + } + ) + + shortlist.append(entry) + + # Highest score first; un-enriched entries (score None) sink to the bottom. + shortlist.sort(key=lambda e: (e.get("score") is not None, e.get("score") or 0), reverse=True) + + return {"shortlist": shortlist, "verdict": verdict, "limit": limit} def get_new_ads_since_last_run(search_url: str) -> dict[str, Any]: diff --git a/scripts/recompute_analysis_cache.py b/scripts/recompute_analysis_cache.py index dd46d17..eacfda8 100644 --- a/scripts/recompute_analysis_cache.py +++ b/scripts/recompute_analysis_cache.py @@ -75,7 +75,7 @@ async def main() -> None: logger.info(f"analysis_cache now has {cache_count} rows") cursor.execute( - 'SELECT COUNT(*) FROM finn_ads ' + "SELECT COUNT(*) FROM finn_ads " 'WHERE json_extract(payload, "$.eiendom_unit_code") IS NOT NULL ' 'AND json_extract(payload, "$.eiendom_unit_code") != "null"' )