Refactor and enhance various components of the FINN real estate analysis tool

- Updated docker-compose files to use local data volumes for development.
- Refactored analysis.py to improve code readability and performance, including changes to cache age calculations and hash computations.
- Enhanced cache.py to ensure the database directory is created if it doesn't exist and improved SQL query formatting.
- Modified cli.py to improve logging and statistics reporting for finn_ads.
- Updated config.py to streamline environment variable handling.
- Initialized the database eagerly in http_server.py to prevent runtime errors.
- Refactored mcp_server.py to slim down data structures and improve response formatting for API calls.
- Enhanced service.py to improve feedback handling and shortlist retrieval, ensuring enriched data is returned.
- Updated recompute_analysis_cache.py for better SQL query formatting.
This commit is contained in:
Ole
2026-05-29 15:17:11 +00:00
parent 55d93894ac
commit eb95b98111
10 changed files with 295 additions and 343 deletions
+3
View File
@@ -8,6 +8,9 @@ services:
# Production image should be pre-built and tagged
image: finn-mcp:latest
# TODO: Remove on actual production deployment
volumes:
- ./data:/app/data
# Environment overrides for production
environment:
PYTHONUNBUFFERED: 1
+6 -5
View File
@@ -53,9 +53,10 @@ services:
max-size: "10m"
max-file: "3"
volumes:
- finn-cache:/app/data
- ./data:/app/data
# - finn-cache:/app/data
volumes:
finn-cache:
# For development, you can override with:
# docker-compose -f docker-compose.yml -f docker-compose.override.yml up
# volumes:
# finn-cache:
# For development, you can override with:
# docker-compose -f docker-compose.yml -f docker-compose.override.yml up
+11 -13
View File
@@ -132,9 +132,7 @@ def _compute_deps_hash(
"""
ad_hash = get_finn_ad_hash(conn, finnkode)
unit_hash = get_eiendom_unit_hash(conn, unit_code) if unit_code else None
comps_hash = (
get_similar_units_hash(conn, unit_code, listing_status) if unit_code else None
)
comps_hash = get_similar_units_hash(conn, unit_code, listing_status) if unit_code else None
return combine_hashes(ad_hash, unit_hash, comps_hash)
@@ -239,11 +237,11 @@ async def analyze_ad(
last_verified = db_row["last_verified_at"]
if last_verified:
last_verified_at = datetime.fromisoformat(last_verified)
structural_age_days = (datetime.now(UTC) - fetched_at).days
price_age_hours = (datetime.now(UTC) - last_verified_at).total_seconds() / 3600
structural_age_mins = (datetime.now(UTC) - fetched_at).total_seconds() / 60
price_age_mins = (datetime.now(UTC) - last_verified_at).total_seconds() / 60
cache_age = {
"structural_days": structural_age_days,
"price_hours": round(price_age_hours, 1),
"structural_minutes": round(structural_age_mins, 1),
"price_minutes": round(price_age_mins, 1),
}
result = {
@@ -282,6 +280,7 @@ async def analyze_ad(
# Round-trip through JSON to guarantee all values are serialisable
# (catches any datetime that survives model_dump, e.g. from scoring).
import json as _json
result = _json.loads(_json.dumps(result, default=str))
save_analysis(conn, finn_ad.finnkode, deps_hash, result)
@@ -301,7 +300,9 @@ async def _fetch_card_to_db(
treats None as a skip without aborting the whole batch.
"""
try:
finn_ad = cache.get_finn_ad(conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_STRUCTURAL_DAYS * 24)
finn_ad = cache.get_finn_ad(
conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_STRUCTURAL_DAYS * 24
)
if finn_ad is None:
finn_ad = await ad_module.fetch_ad_details(card.finnkode, client=client)
save_finn_ad(conn, finn_ad)
@@ -364,9 +365,7 @@ async def analyze_search(
skipped_count = len(cards[:detail_limit]) - len(resale_cards)
if ctx is not None:
await ctx.info(
f"Found {len(cards)} listings, {len(resale_cards)} resale ads to fetch."
)
await ctx.info(f"Found {len(cards)} listings, {len(resale_cards)} resale ads to fetch.")
# ------------------------------------------------------------------
# Phase 1: parallel fetch to DB
@@ -424,8 +423,7 @@ async def analyze_search(
if ctx is not None:
await ctx.info(
f"Done. {len(results)} analyzed, {enriched_count} enriched, "
f"{skipped_count} skipped."
f"Done. {len(results)} analyzed, {enriched_count} enriched, {skipped_count} skipped."
)
# Record this search run in the database
+85 -52
View File
@@ -30,6 +30,8 @@ import sqlite3
from datetime import UTC, datetime, timedelta
from typing import Any
from pathlib import Path
from .config import FINN_CACHE_PATH
from .models import EiendomUnit, FinnAd, FinnSearchCard, SimilarUnit
@@ -70,7 +72,10 @@ def get_connection(path: str | None = None) -> sqlite3.Connection:
def init_db(path: str | None = None) -> sqlite3.Connection:
conn = get_connection(path)
# Ensure parent directory exists — sqlite3.connect() won't create it.
db_path = Path(path or FINN_CACHE_PATH)
db_path.parent.mkdir(parents=True, exist_ok=True)
conn = get_connection(str(db_path))
cursor = conn.cursor()
cursor.execute(
@@ -163,7 +168,9 @@ def init_db(path: str | None = None) -> sqlite3.Connection:
)
"""
)
cursor.execute("CREATE INDEX IF NOT EXISTS idx_price_history_finnkode_recorded ON price_history(finnkode, recorded_at)")
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_price_history_finnkode_recorded ON price_history(finnkode, recorded_at)"
)
cursor.execute(
"""
@@ -175,20 +182,24 @@ def init_db(path: str | None = None) -> sqlite3.Connection:
)
"""
)
cursor.execute("CREATE INDEX IF NOT EXISTS idx_search_runs_url_created ON search_runs(search_url, created_at)")
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_search_runs_url_created ON search_runs(search_url, created_at)"
)
# Create indexes for efficient staleness queries
cursor.execute("CREATE INDEX IF NOT EXISTS idx_finn_ads_verified ON finn_ads(last_verified_at)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_eiendom_units_fetched ON eiendom_units(fetched_at)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_similar_units_fetched ON similar_units(fetched_at)")
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_eiendom_units_fetched ON eiendom_units(fetched_at)"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_similar_units_fetched ON similar_units(fetched_at)"
)
conn.commit()
return conn
def _add_column_if_missing(
cursor: sqlite3.Cursor, table: str, column: str, col_type: str
) -> None:
def _add_column_if_missing(cursor: sqlite3.Cursor, table: str, column: str, col_type: str) -> None:
"""ALTER TABLE … ADD COLUMN is idempotent via this guard."""
cursor.execute(f"PRAGMA table_info({table})")
existing = {row["name"] for row in cursor.fetchall()}
@@ -300,27 +311,38 @@ def save_finn_ad(conn: sqlite3.Connection, ad: FinnAd) -> tuple[str, bool]:
payload = ad.model_dump(mode="json")
new_hash = compute_content_hash(payload)
fetched_at = (
ad.detail_fetched_at.isoformat()
if ad.detail_fetched_at
else datetime.now(UTC).isoformat()
ad.detail_fetched_at.isoformat() if ad.detail_fetched_at else datetime.now(UTC).isoformat()
)
# Update last_verified_at to now when saving (indicates we just checked the data)
last_verified_at = datetime.now(UTC).isoformat()
# Check existing hash before writing.
cursor.execute(
"SELECT content_hash FROM finn_ads WHERE finnkode = ?", (ad.finnkode,)
)
cursor.execute("SELECT content_hash FROM finn_ads WHERE finnkode = ?", (ad.finnkode,))
row = cursor.fetchone()
if row and row["content_hash"] == new_hash:
logger.debug("finn_ad %s unchanged (hash match)", ad.finnkode)
# Data unchanged: skip the full rewrite (preserves analysis_cache),
# but still record that we verified it just now so the price-freshness
# timer (last_verified_at) advances and cache_age.price_hours resets.
cursor.execute(
"UPDATE finn_ads SET last_verified_at = ? WHERE finnkode = ?",
(last_verified_at, ad.finnkode),
)
conn.commit()
logger.debug("finn_ad %s unchanged (hash match, verified bumped)", ad.finnkode)
return new_hash, False
cursor.execute(
"INSERT OR REPLACE INTO finn_ads"
" (finnkode, url, payload, content_hash, fetched_at, last_verified_at)"
" VALUES (?, ?, ?, ?, ?, ?)",
(ad.finnkode, ad.url, json.dumps(payload, default=_json_default), new_hash, fetched_at, last_verified_at),
(
ad.finnkode,
ad.url,
json.dumps(payload, default=_json_default),
new_hash,
fetched_at,
last_verified_at,
),
)
conn.commit()
logger.debug("finn_ad %s saved (hash=%s)", ad.finnkode, new_hash[:8])
@@ -331,9 +353,7 @@ def get_finn_ad(
conn: sqlite3.Connection, finnkode: str, ttl_hours: int | None = None
) -> FinnAd | None:
cursor = conn.cursor()
cursor.execute(
"SELECT payload, fetched_at FROM finn_ads WHERE finnkode = ?", (finnkode,)
)
cursor.execute("SELECT payload, fetched_at FROM finn_ads WHERE finnkode = ?", (finnkode,))
row = cursor.fetchone()
if not row:
return None
@@ -345,9 +365,7 @@ def get_finn_ad(
def get_finn_ad_hash(conn: sqlite3.Connection, finnkode: str) -> str | None:
"""Return the stored content_hash for *finnkode*, or None if not cached."""
cursor = conn.cursor()
cursor.execute(
"SELECT content_hash FROM finn_ads WHERE finnkode = ?", (finnkode,)
)
cursor.execute("SELECT content_hash FROM finn_ads WHERE finnkode = ?", (finnkode,))
row = cursor.fetchone()
return row["content_hash"] if row else None
@@ -366,9 +384,7 @@ def save_eiendom_unit(conn: sqlite3.Connection, unit: EiendomUnit) -> tuple[str,
payload = unit.model_dump(mode="json")
new_hash = compute_content_hash(payload)
cursor.execute(
"SELECT content_hash FROM eiendom_units WHERE unit_code = ?", (unit.unit_code,)
)
cursor.execute("SELECT content_hash FROM eiendom_units WHERE unit_code = ?", (unit.unit_code,))
row = cursor.fetchone()
if row and row["content_hash"] == new_hash:
logger.debug("eiendom_unit %s unchanged (hash match)", unit.unit_code)
@@ -378,7 +394,12 @@ def save_eiendom_unit(conn: sqlite3.Connection, unit: EiendomUnit) -> tuple[str,
"INSERT OR REPLACE INTO eiendom_units"
" (unit_code, payload, content_hash, fetched_at)"
" VALUES (?, ?, ?, ?)",
(unit.unit_code, json.dumps(payload, default=_json_default), new_hash, unit.fetched_at.isoformat()),
(
unit.unit_code,
json.dumps(payload, default=_json_default),
new_hash,
unit.fetched_at.isoformat(),
),
)
conn.commit()
logger.debug("eiendom_unit %s saved (hash=%s)", unit.unit_code, new_hash[:8])
@@ -405,9 +426,7 @@ def get_eiendom_unit(
def get_eiendom_unit_hash(conn: sqlite3.Connection, unit_code: str) -> str | None:
"""Return the stored content_hash for *unit_code*, or None if not cached."""
cursor = conn.cursor()
cursor.execute(
"SELECT content_hash FROM eiendom_units WHERE unit_code = ?", (unit_code,)
)
cursor.execute("SELECT content_hash FROM eiendom_units WHERE unit_code = ?", (unit_code,))
row = cursor.fetchone()
return row["content_hash"] if row else None
@@ -439,9 +458,7 @@ def save_similar_units(
)
row = cursor.fetchone()
if row and row["content_hash"] == new_hash:
logger.debug(
"similar_units %s/%s unchanged (hash match)", unit_code, listing_status
)
logger.debug("similar_units %s/%s unchanged (hash match)", unit_code, listing_status)
return new_hash, False
cursor.execute(
@@ -457,9 +474,7 @@ def save_similar_units(
),
)
conn.commit()
logger.debug(
"similar_units %s/%s saved (hash=%s)", unit_code, listing_status, new_hash[:8]
)
logger.debug("similar_units %s/%s saved (hash=%s)", unit_code, listing_status, new_hash[:8])
return new_hash, True
@@ -504,9 +519,7 @@ def get_similar_units_hash(
# ---------------------------------------------------------------------------
def get_analysis(
conn: sqlite3.Connection, finnkode: str, deps_hash: str
) -> dict[str, Any] | None:
def get_analysis(conn: sqlite3.Connection, finnkode: str, deps_hash: str) -> dict[str, Any] | None:
"""Return cached analysis for *finnkode* if deps_hash still matches.
``deps_hash`` encodes the combined hashes of the ad, eiendom unit, and
@@ -533,6 +546,24 @@ def get_analysis(
return json.loads(row["payload"])
def get_latest_analysis(conn: sqlite3.Connection, finnkode: str) -> dict[str, Any] | None:
"""Return the most recent cached analysis for *finnkode*, ignoring deps_hash.
Unlike :func:`get_analysis`, this does not validate freshness -- it returns
whatever was last computed. Used by the shortlist where showing slightly
stale enrichment is preferable to recomputing on every read.
"""
cursor = conn.cursor()
cursor.execute(
"SELECT payload FROM analysis_cache WHERE finnkode = ?",
(finnkode,),
)
row = cursor.fetchone()
if not row:
return None
return json.loads(row["payload"])
def _json_default(obj: Any) -> Any:
"""Fallback serialiser for json.dumps.
Converts datetime/date → ISO string; anything else → repr string.
@@ -556,7 +587,12 @@ def save_analysis(
"INSERT OR REPLACE INTO analysis_cache"
" (finnkode, deps_hash, payload, computed_at)"
" VALUES (?, ?, ?, ?)",
(finnkode, deps_hash, json.dumps(result, default=_json_default), datetime.now(UTC).isoformat()),
(
finnkode,
deps_hash,
json.dumps(result, default=_json_default),
datetime.now(UTC).isoformat(),
),
)
conn.commit()
logger.debug("analysis_cache saved for %s (deps_hash=%s)", finnkode, deps_hash[:8])
@@ -564,9 +600,7 @@ def save_analysis(
def invalidate_analysis(conn: sqlite3.Connection, finnkode: str) -> None:
"""Remove any cached analysis for *finnkode* (call after raw data changes)."""
conn.cursor().execute(
"DELETE FROM analysis_cache WHERE finnkode = ?", (finnkode,)
)
conn.cursor().execute("DELETE FROM analysis_cache WHERE finnkode = ?", (finnkode,))
conn.commit()
@@ -653,10 +687,14 @@ def save_price_history(
(finnkode, total_price, asking_price, sale_status, datetime.now(UTC).isoformat()),
)
conn.commit()
logger.debug("price_history recorded for %s (total=%s, asking=%s)", finnkode, total_price, asking_price)
logger.debug(
"price_history recorded for %s (total=%s, asking=%s)", finnkode, total_price, asking_price
)
def get_price_history(conn: sqlite3.Connection, finnkode: str, limit: int = 100) -> list[dict[str, Any]]:
def get_price_history(
conn: sqlite3.Connection, finnkode: str, limit: int = 100
) -> list[dict[str, Any]]:
"""Retrieve price history for a listing."""
cursor = conn.cursor()
cursor.execute(
@@ -680,15 +718,12 @@ def get_price_history(conn: sqlite3.Connection, finnkode: str, limit: int = 100)
# ---------------------------------------------------------------------------
def save_search_run(
conn: sqlite3.Connection, search_url: str, finnkodes: list[str]
) -> None:
def save_search_run(conn: sqlite3.Connection, search_url: str, finnkodes: list[str]) -> None:
"""Record a search run with the finnkodes found."""
cursor = conn.cursor()
finnkodes_json = json.dumps(finnkodes)
cursor.execute(
"INSERT INTO search_runs (search_url, finnkodes, created_at)"
" VALUES (?, ?, ?)",
"INSERT INTO search_runs (search_url, finnkodes, created_at) VALUES (?, ?, ?)",
(search_url, finnkodes_json, datetime.now(UTC).isoformat()),
)
conn.commit()
@@ -730,6 +765,4 @@ def delete_feedback(conn: sqlite3.Connection, finnkode: str) -> dict[str, Any]:
def _is_fresh(fetched_at: str, ttl_hours: int | None) -> bool:
if ttl_hours is None:
return True
return datetime.fromisoformat(fetched_at) >= datetime.now(UTC) - timedelta(
hours=ttl_hours
)
return datetime.fromisoformat(fetched_at) >= datetime.now(UTC) - timedelta(hours=ttl_hours)
+4 -2
View File
@@ -356,14 +356,16 @@ def stats() -> None:
# Special checks for finn_ads
cursor.execute(
'SELECT COUNT(*) FROM finn_ads '
"SELECT COUNT(*) FROM finn_ads "
'WHERE json_extract(payload, "$.eiendom_unit_code") IS NOT NULL '
'AND json_extract(payload, "$.eiendom_unit_code") != "null"'
)
ads_with_unit_code = cursor.fetchone()[0]
if "finn_ads" in stats and stats["finn_ads"]["total_rows"] > 0:
stats["finn_ads"]["with_eiendom_unit_code"] = ads_with_unit_code
stats["finn_ads"]["pct_with_unit_code"] = round(100 * ads_with_unit_code / stats["finn_ads"]["total_rows"], 1)
stats["finn_ads"]["pct_with_unit_code"] = round(
100 * ads_with_unit_code / stats["finn_ads"]["total_rows"], 1
)
# Get fetched_at date ranges
for table in ["finn_ads", "eiendom_units", "similar_units"]:
+3 -9
View File
@@ -14,9 +14,7 @@ FINN_USER_AGENT = os.getenv("FINN_USER_AGENT", "personal-finn-eiendom-analyzer/0
# Cache TTLs (refactor v2)
# Structural data (address, area, year, etc.) changes rarely; long TTL
FINN_CACHE_TTL_AD_STRUCTURAL_DAYS = int(
os.getenv("FINN_CACHE_TTL_AD_STRUCTURAL_DAYS", "30")
)
FINN_CACHE_TTL_AD_STRUCTURAL_DAYS = int(os.getenv("FINN_CACHE_TTL_AD_STRUCTURAL_DAYS", "30"))
# Price/status changes frequently; short TTL for lightweight verification
FINN_CACHE_TTL_AD_PRICE_HOURS = int(os.getenv("FINN_CACHE_TTL_AD_PRICE_HOURS", "6"))
# Search pages/cards also TTL-based (content changes with added/removed listings)
@@ -27,12 +25,8 @@ EIENDOM_NO_ENABLED = os.getenv("EIENDOM_NO_ENABLED", "true").lower() == "true"
EIENDOM_NO_BASE_URL = os.getenv("EIENDOM_NO_BASE_URL", "https://api.eiendom.no/api/v1")
EIENDOM_NO_REQUEST_DELAY_SECONDS = float(os.getenv("EIENDOM_NO_REQUEST_DELAY_SECONDS", "1"))
# Structural data (lat, lng, property_type) has long TTL; estimates have shorter TTL
EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS = int(
os.getenv("EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS", "30")
)
EIENDOM_NO_CACHE_TTL_ESTIMATE_DAYS = int(
os.getenv("EIENDOM_NO_CACHE_TTL_ESTIMATE_DAYS", "7")
)
EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS = int(os.getenv("EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS", "30"))
EIENDOM_NO_CACHE_TTL_ESTIMATE_DAYS = int(os.getenv("EIENDOM_NO_CACHE_TTL_ESTIMATE_DAYS", "7"))
EIENDOM_NO_SIMILAR_UNITS_ENABLED = (
os.getenv("EIENDOM_NO_SIMILAR_UNITS_ENABLED", "true").lower() == "true"
)
+5
View File
@@ -4,8 +4,13 @@ from starlette.responses import JSONResponse
from starlette.requests import Request
from starlette.middleware.cors import CORSMiddleware
from mcp.server.transport_security import TransportSecuritySettings
from finn_eiendom.cache import init_db
from finn_eiendom.mcp_server import mcp
# Initialise the database (and create the data/ directory) eagerly at
# startup so the first tool call never fails on a missing directory.
init_db()
mcp.transport_security = TransportSecuritySettings(enable_dns_rebinding_protection=False)
app = mcp.sse_app()
+124 -246
View File
@@ -11,30 +11,14 @@ from mcp.server.transport_security import TransportSecuritySettings
from mcp.server.fastmcp import Context, FastMCP
from mcp.types import ImageContent, TextContent
from .eiendom_no import (
build_unit_vector,
decode_unit_vector,
get_similar_units,
get_unit,
search_unit_from_finn_url,
)
from .formatting import (
render_ad,
render_comparison,
render_diff,
render_shortlist,
render_similar_units,
render_unit_images,
)
from .service import (
analyze_ad,
analyze_ad_against_comps,
analyze_search,
compare_ads,
find_similar_to_liked,
get_new_ads_since_last_run,
get_or_fetch_ad,
get_or_fetch_eiendom_unit,
get_shortlist,
get_unit_images,
save_feedback,
@@ -48,6 +32,55 @@ logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
def _slim_comp(c: dict) -> dict:
"""Drop internal IDs, coords, redundant status fields from a comparable unit."""
return {
"unit_code": c.get("unit_code"),
"address": c.get("address"),
"usable_area": c.get("usable_area"),
"rooms": c.get("rooms"),
"floor": c.get("floor"),
"construction_year": c.get("construction_year"),
"listing_price": c.get("listing_price"),
"selling_price": c.get("selling_price"),
"shared_debt": c.get("shared_debt"),
"sqm_price": c.get("sqm_price"),
"common_costs": c.get("common_costs"),
"days_on_market": c.get("days_on_market"),
"finalized_at": (c.get("finalized_at") or "")[:10],
}
def _slim_comps(comps: list[dict], keep: int = 15) -> list[dict]:
"""Sort comps by recency, keep the N most recent — older comps lose relevance fast."""
sorted_comps = sorted(comps, key=lambda c: c.get("finalized_at") or "", reverse=True)
return [_slim_comp(c) for c in sorted_comps[:keep]]
def _avg_comp_sqm(comps: list[dict]) -> int | None:
sqm_prices = [c["sqm_price"] for c in comps if c.get("sqm_price")]
return round(sum(sqm_prices) / len(sqm_prices)) if sqm_prices else None
def _slim_eiendom(eu: dict, comps: list[dict]) -> dict:
"""Compact Eiendom.no unit view. Drops unit_images, unit_vector, lat/lng, timestamps."""
return {
"unit_code": eu.get("unit_code"),
"usable_area": eu.get("usable_area"),
"estimated_price": eu.get("estimated_selling_price"),
"estimated_range": [
eu.get("estimated_selling_price_lower"),
eu.get("estimated_selling_price_upper"),
],
"listing_sqm_price": eu.get("listing_sqm_price"),
"market_placement": eu.get("market_placement_score"),
"sale_status": eu.get("sale_status"),
"days_on_market": eu.get("days_on_market"),
"avg_comp_sqm_price": _avg_comp_sqm(comps),
"comp_count": len(comps),
}
def _slim_listing(rank: int, item: dict) -> dict:
"""Collapse one full analyze_ad result into a compact listing card.
@@ -57,57 +90,9 @@ def _slim_listing(rank: int, item: dict) -> dict:
"""
eu = item.get("eiendom_unit") or {}
comps = item.get("similar_units") or []
sqm_prices = [c["sqm_price"] for c in comps if c.get("sqm_price")]
avg_comp_sqm = round(sum(sqm_prices) / len(sqm_prices)) if sqm_prices else None
# Slim comps: drop internal IDs, coords, redundant status fields.
# Sort by recency, keep 15 most recent — older comps lose relevance fast.
def _slim_comp(c: dict) -> dict:
return {
"unit_code": c.get("unit_code"),
"address": c.get("address"),
"usable_area": c.get("usable_area"),
"rooms": c.get("rooms"),
"floor": c.get("floor"),
"construction_year": c.get("construction_year"),
"listing_price": c.get("listing_price"),
"selling_price": c.get("selling_price"),
"shared_debt": c.get("shared_debt"),
"sqm_price": c.get("sqm_price"),
"common_costs": c.get("common_costs"),
"days_on_market": c.get("days_on_market"),
"finalized_at": (c.get("finalized_at") or "")[:10],
}
sorted_comps = sorted(comps, key=lambda c: c.get("finalized_at") or "", reverse=True)
slim_comps = [_slim_comp(c) for c in sorted_comps[:15]]
score = item.get("score") or {}
summary = item.get("summary") or {}
price_history = item.get("price_history") or []
cache_age = item.get("cache_age")
# Keep full score breakdown — 12 dimensions + nearby_transit = ~220 bytes, all signal.
# Drop nothing from scores.
slim_score = {k: v for k, v in score.items()}
eiendom: dict | None = None
if eu:
eiendom = {
"unit_code": eu.get("unit_code"),
"usable_area": eu.get("usable_area"),
"estimated_price": eu.get("estimated_selling_price"),
"estimated_range": [
eu.get("estimated_selling_price_lower"),
eu.get("estimated_selling_price_upper"),
],
"listing_sqm_price": eu.get("listing_sqm_price"),
"market_placement": eu.get("market_placement_score"),
"sale_status": eu.get("sale_status"),
"days_on_market": eu.get("days_on_market"),
"avg_comp_sqm_price": avg_comp_sqm,
"comp_count": len(comps),
}
return {
"rank": rank,
@@ -134,17 +119,59 @@ def _slim_listing(rank: int, item: dict) -> dict:
"has_parking": item.get("has_parking"),
"has_garage": item.get("has_garage"),
"eiendom_unit_code": item.get("eiendom_unit_code"),
"score": slim_score,
"score": dict(score),
"categories": item.get("categories"),
"why_interesting": summary.get("why_interesting"),
"risks": summary.get("risks"),
"cache_age": cache_age,
"cache_age": item.get("cache_age"),
"price_history": price_history[:5], # Last 5 price records
"eiendom": eiendom,
"similar_units": slim_comps,
"eiendom": _slim_eiendom(eu, comps) if eu else None,
"similar_units": _slim_comps(comps),
}
def _slim_analyze_ad(result: dict) -> dict:
"""Shape the single-ad analyze_ad result for MCP output.
The service returns {ad: FinnAd, eiendom_unit: EiendomUnit, similar_units: [...]}.
Flatten the ad fields up, keep listing_description, attach slim eiendom + comps,
and strip unit_images / unit_vector / lat / lng / internal timestamps.
"""
ad = result.get("ad") or {}
eu = result.get("eiendom_unit") or {}
comps = result.get("similar_units") or []
out: dict[str, Any] = {
"finnkode": ad.get("finnkode"),
"url": ad.get("url"),
"title": ad.get("title"),
"address": ad.get("address"),
"district": ad.get("district"),
"listing_description": ad.get("listing_description"),
"property_type": ad.get("property_type"),
"ownership_type": ad.get("ownership_type"),
"floor": ad.get("floor"),
"area_m2": ad.get("area_m2"),
"rooms": ad.get("rooms"),
"bedrooms": ad.get("bedrooms"),
"total_price": ad.get("total_price"),
"asking_price": ad.get("asking_price"),
"shared_debt": ad.get("shared_debt"),
"common_costs": ad.get("common_costs"),
"construction_year": ad.get("construction_year"),
"energy_rating": ad.get("energy_rating"),
"has_balcony": ad.get("has_balcony"),
"has_terrace": ad.get("has_terrace"),
"has_elevator": ad.get("has_elevator"),
"has_parking": ad.get("has_parking"),
"has_garage": ad.get("has_garage"),
"eiendom_unit_code": ad.get("eiendom_unit_code"),
"eiendom": _slim_eiendom(eu, comps) if eu else None,
"similar_units": _slim_comps(comps),
}
return out
def _build_slim_search_result(full: dict) -> dict:
"""Convert full analyze_search output to a compact MCP-safe response.
@@ -152,8 +179,7 @@ def _build_slim_search_result(full: dict) -> dict:
listings. Target: <200KB for 30 analyzed ads.
"""
listings = [
_slim_listing(rank + 1, item)
for rank, item in enumerate(full.get("analysis") or [])
_slim_listing(rank + 1, item) for rank, item in enumerate(full.get("analysis") or [])
]
return {
"search_url": full.get("search_url"),
@@ -208,65 +234,6 @@ async def finn_analyze_search(
return json.dumps({"error": True, "message": str(e)})
@mcp.tool(
description=(
"Fetch full detail for a FINN listing by finnkode."
" Checks cache first; use force_refresh=True to bypass."
)
)
async def finn_get_ad(finnkode: str, force_refresh: bool = False) -> str:
"""Fetch FINN ad details by finnkode."""
try:
ad = await get_or_fetch_ad(finnkode, force_refresh=force_refresh)
return ad.model_dump_json()
except Exception as e:
logger.error(f"Error fetching ad {finnkode}: {e}")
return json.dumps({"error": True, "message": str(e)})
@mcp.tool(
description="Resolve an Eiendom.no unit_code from a FINN listing URL. "
"Returns unit_code, address, lat, lng or an error if not found."
)
async def finn_resolve_eiendom_unit(finn_url: str) -> str:
"""Resolve Eiendom.no unit from FINN URL."""
try:
unit = await search_unit_from_finn_url(finn_url)
if unit is None:
return json.dumps(
{
"error": True,
"message": "Eiendom.no unit could not be resolved from FINN URL",
}
)
return json.dumps(
{
"unit_code": unit.unit_code,
"address": unit.address,
"lat": unit.lat,
"lng": unit.lng,
}
)
except Exception as e:
logger.error(f"Error resolving unit from {finn_url}: {e}")
return json.dumps({"error": True, "message": str(e)})
@mcp.tool(
description="Fetch full Eiendom.no unit data by unit_code. Checks SQLite cache (24h TTL)."
)
async def finn_get_eiendom_unit(unit_code: str, force_refresh: bool = False) -> str:
"""Fetch Eiendom.no unit details by unit_code."""
try:
unit = await get_or_fetch_eiendom_unit(unit_code, force_refresh=force_refresh)
if unit is None:
return json.dumps({"error": True, "message": "Eiendom.no unit not found"})
return unit.model_dump_json()
except Exception as e:
logger.error(f"Error fetching unit {unit_code}: {e}")
return json.dumps({"error": True, "message": str(e)})
@mcp.tool(
description=(
"Fetch and analyze unit images for visual assessment of a property. "
@@ -305,6 +272,7 @@ async def finn_analyze_unit_images(
# within the 1MB MCP tool result limit across multiple images.
from PIL import Image
import io
img = Image.open(io.BytesIO(resp.content))
img.thumbnail((1024, 1024), Image.LANCZOS)
if img.mode in ("RGBA", "P"):
@@ -326,50 +294,6 @@ async def finn_analyze_unit_images(
return [TextContent(type="text", text=json.dumps({"error": True, "message": str(e)}))]
@mcp.tool(
description="Fetch comparable recently-sold or for-sale units from Eiendom.no using a "
"base64-encoded unit vector. Returns list of similar units with sale prices."
)
async def finn_get_similar_units(unit_vector: str, listing_status: str = "RECENTLY_SOLD") -> str:
"""Fetch similar units from Eiendom.no."""
try:
units = await get_similar_units(unit_vector, listing_status)
return json.dumps([unit.model_dump() for unit in units], default=str)
except Exception as e:
logger.error(f"Error fetching similar units: {e}")
return json.dumps({"error": True, "message": str(e)})
@mcp.tool(
description="Build a base64-encoded unit vector for a given Eiendom.no unit_code. "
"The vector is used as input to finn_get_similar_units."
)
async def finn_build_unit_vector(unit_code: str) -> str:
"""Build unit vector for Eiendom.no unit."""
try:
unit = await get_unit(unit_code)
if unit is None:
return json.dumps({"error": True, "message": "Eiendom.no unit not found"})
return json.dumps({"unit_code": unit.unit_code, "unit_vector": build_unit_vector(unit)})
except Exception as e:
logger.error(f"Error building unit vector for {unit_code}: {e}")
return json.dumps({"error": True, "message": str(e)})
@mcp.tool(
description="Decode a base64 unit vector into human-readable JSON (lat, lon, property type, "
"floor, rooms, construction year, area, price)."
)
def finn_decode_unit_vector(unit_vector: str) -> str:
"""Decode unit vector to readable format."""
try:
result = decode_unit_vector(unit_vector)
return json.dumps(result)
except Exception as e:
logger.error(f"Error decoding unit vector: {e}")
return json.dumps({"error": True, "message": str(e)})
# ============================================================================
# Additional analysis and enrichment tools
# ============================================================================
@@ -377,79 +301,33 @@ def finn_decode_unit_vector(unit_vector: str) -> str:
@mcp.tool(
description=(
"Fetch and enrich a single FINN ad with optional Eiendom.no data and comparable units."
"Deep-dive one or more FINN listings. Accepts a single finnkode or a list "
"(batched in one call). Always enriches with Eiendom.no data and comparable "
"sold units. Returns listing_description plus slim eiendom/comps; excludes "
"image URLs and internal vectors (use finn_analyze_unit_images for visuals)."
)
)
async def finn_analyze_ad(
finnkode: str,
include_eiendom_no: bool = True,
include_similar_units: bool = False,
) -> str:
"""Analyze and enrich a single FINN ad."""
try:
result = await analyze_ad(
finnkode,
include_eiendom_no=include_eiendom_no,
include_similar_units=include_similar_units,
)
return json.dumps(result, default=str)
except Exception as e:
logger.error(f"Error analyzing ad {finnkode}: {e}")
return json.dumps({"error": True, "message": str(e)})
async def finn_analyze_ad(finnkode: str | list[str]) -> str:
"""Analyze and enrich one or more FINN ads. Batch input returns a list."""
finnkoder = [finnkode] if isinstance(finnkode, str) else list(finnkode)
async def _one(fk: str) -> dict:
try:
result = await analyze_ad(
fk,
include_eiendom_no=True,
include_similar_units=True,
)
return _slim_analyze_ad(result)
except Exception as e: # noqa: BLE001 — per-item isolation, batch must not abort
logger.error(f"Error analyzing ad {fk}: {e}")
return {"finnkode": fk, "error": True, "message": str(e)}
@mcp.tool(
description=(
"Evaluate one FINN listing against comparable recently-sold properties from Eiendom.no."
)
)
async def finn_analyze_ad_against_comps(
finnkode: str, listing_status: str = "RECENTLY_SOLD"
) -> str:
"""Analyze ad against comparable sales."""
try:
result = await analyze_ad_against_comps(finnkode, listing_status=listing_status)
return json.dumps(result, default=str)
except Exception as e:
logger.error(f"Error analyzing ad {finnkode} against comps: {e}")
return json.dumps({"error": True, "message": str(e)})
results = await asyncio.gather(*[_one(fk) for fk in finnkoder])
@mcp.tool(
description=(
"Find properties similar to a listing the user has liked. "
"Requires that the user has marked the listing with verdict='liked'."
)
)
async def finn_find_similar_to_liked_ad(
finnkode: str, mode: str = "recommendations", listing_status: str = "FOR_SALE"
) -> str:
"""Find properties similar to a liked ad."""
try:
result = await find_similar_to_liked(finnkode, mode=mode, listing_status=listing_status)
return render_similar_units(result, "json")
except Exception as e:
logger.error(f"Error finding similar to {finnkode}: {e}")
return json.dumps({"error": True, "message": str(e)})
@mcp.tool(description="Compare multiple FINN listings side by side with optional enrichment.")
async def finn_compare_ads(
finnkoder: list[str],
include_eiendom_no: bool = True,
include_comps: bool = True,
) -> str:
"""Compare multiple ads."""
try:
result = await compare_ads(
finnkoder,
include_eiendom_no=include_eiendom_no,
include_comps=include_comps,
)
return render_comparison(result, "json")
except Exception as e:
logger.error(f"Error comparing ads: {e}")
return json.dumps({"error": True, "message": str(e)})
# Single string input → single object; list input → list (preserves order).
payload: Any = results[0] if isinstance(finnkode, str) else results
return json.dumps(payload, default=str)
@mcp.tool(
@@ -467,13 +345,13 @@ async def finn_save_feedback(finnkode: str, verdict: str, notes: str | None = No
@mcp.tool(
description="Fetch the stored shortlist from a previous search run. "
"Returns the ranked listings with all enrichment data."
description="Fetch the shortlist of listings you have given a verdict "
"(liked, disliked, maybe, visited). Enriched with cached score and price data."
)
def finn_get_shortlist(run_id: int | None = None, limit: int = 10) -> str:
"""Get stored shortlist."""
def finn_get_shortlist(verdict: str = "liked", limit: int = 10) -> str:
"""Get stored shortlist filtered by verdict."""
try:
result = get_shortlist(run_id, limit)
result = get_shortlist(verdict, limit)
return render_shortlist(result, "json")
except Exception as e:
logger.error(f"Error fetching shortlist: {e}")
+50 -12
View File
@@ -24,7 +24,9 @@ from .ad import fetch_ad_details
from .analysis import analyze_search as run_analysis_search
from .cache import (
get_eiendom_unit as get_cached_eiendom_unit,
get_feedback_by_verdict,
get_finn_ad,
get_latest_analysis,
get_similar_units as get_cached_similar_units,
init_db,
invalidate_analysis,
@@ -137,11 +139,7 @@ async def get_or_fetch_eiendom_unit(
conn = init_db(FINN_CACHE_PATH)
# Convert structural TTL from days to hours
ttl_hours = EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS * 24
unit = (
None
if force_refresh
else get_cached_eiendom_unit(conn, unit_code, ttl_hours=ttl_hours)
)
unit = None if force_refresh else get_cached_eiendom_unit(conn, unit_code, ttl_hours=ttl_hours)
if unit is not None:
return unit
@@ -182,9 +180,7 @@ async def get_or_fetch_similar_units(
conn, unit_code, listing_status, ttl_hours=ttl_hours
)
if cached_similar:
logger.debug(
"Using cached similar units for %s (status=%s)", unit_code, listing_status
)
logger.debug("Using cached similar units for %s (status=%s)", unit_code, listing_status)
return cached_similar
# Cache miss or force_refresh: fetch from remote.
@@ -377,10 +373,52 @@ def save_feedback(finnkode: str, verdict: str, notes: str | None = None) -> dict
return save_feedback_impl(finnkode, verdict, notes)
def get_shortlist(run_id: int | None = None, limit: int = 10) -> dict[str, Any]:
"""Fetch stored shortlist from a search run."""
# TODO: implement via search_runs table in cache.py
return {"shortlist": [], "run_id": run_id, "limit": limit}
def get_shortlist(
verdict: str = "liked", limit: int = 10
) -> dict[str, Any]:
"""Fetch the shortlist of listings the user has given *verdict*.
Reads from the ``user_feedback`` table and enriches each finnkode with
its most recent cached analysis (score, price, categories) when available.
Entries with no cached analysis still appear, carrying the stored verdict
and notes so nothing the user flagged is silently dropped.
"""
conn = init_db(FINN_CACHE_PATH)
feedback_rows = get_feedback_by_verdict(conn, verdict, limit=limit)
shortlist: list[dict[str, Any]] = []
for fb in feedback_rows:
finnkode = fb["finnkode"]
entry: dict[str, Any] = {
"finnkode": finnkode,
"verdict": fb["verdict"],
"notes": fb["notes"],
"url": f"https://www.finn.no/realestate/homes/ad.html?finnkode={finnkode}",
}
analysis = get_latest_analysis(conn, finnkode)
if analysis:
score = analysis.get("score") or {}
eiendom = analysis.get("eiendom_unit") or {}
entry.update(
{
"title": analysis.get("title"),
"address": analysis.get("address"),
"area_m2": analysis.get("area_m2"),
"total_price": analysis.get("total_price"),
"asking_price": analysis.get("asking_price"),
"score": score.get("total"),
"categories": analysis.get("categories", []),
"market_placement": eiendom.get("market_placement_score"),
}
)
shortlist.append(entry)
# Highest score first; un-enriched entries (score None) sink to the bottom.
shortlist.sort(key=lambda e: (e.get("score") is not None, e.get("score") or 0), reverse=True)
return {"shortlist": shortlist, "verdict": verdict, "limit": limit}
def get_new_ads_since_last_run(search_url: str) -> dict[str, Any]:
+1 -1
View File
@@ -75,7 +75,7 @@ async def main() -> None:
logger.info(f"analysis_cache now has {cache_count} rows")
cursor.execute(
'SELECT COUNT(*) FROM finn_ads '
"SELECT COUNT(*) FROM finn_ads "
'WHERE json_extract(payload, "$.eiendom_unit_code") IS NOT NULL '
'AND json_extract(payload, "$.eiendom_unit_code") != "null"'
)