diff --git a/.vscode/mcp.json b/.vscode/mcp.json index a9b1b7d..cc486cf 100644 --- a/.vscode/mcp.json +++ b/.vscode/mcp.json @@ -4,12 +4,7 @@ "type": "http", "url": "https://mcp.context7.com/mcp", }, - "mcp-jungle":{ - "type": "http", - "url": "http://mini:8080/mcp", - }, - // "finn-eiendom": { } - "finn-eiendom": { + "finn-mcp": { "command": "/root/projects/finn-mcp/.venv/bin/python", "args": [ "-m", diff --git a/.vscode/settings.json b/.vscode/settings.json index 35056e1..a726be5 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -24,6 +24,7 @@ "/root/projects/finn-mcp/.venv/bin/python": true, "make": true, ".venv/bin/coverage": true, - ".venv/bin/pytest": true + ".venv/bin/pytest": true, + "python -m pytest": true } } \ No newline at end of file diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 84684cc..c686921 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -14,7 +14,7 @@ services: # Expose port for network access ports: - - "8010:8010" + - "8011:8010" # More aggressive resource limits for production # deploy: diff --git a/finn_eiendom/analysis.py b/finn_eiendom/analysis.py index 971df3c..71af933 100644 --- a/finn_eiendom/analysis.py +++ b/finn_eiendom/analysis.py @@ -32,12 +32,14 @@ from .cache import ( save_analysis, save_eiendom_unit, save_finn_ad, + save_search_run, save_similar_units, ) from .config import ( - EIENDOM_NO_CACHE_TTL_HOURS, + EIENDOM_NO_CACHE_TTL_SIMILAR_UNITS_DAYS, + EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS, FINN_CACHE_PATH, - FINN_CACHE_TTL_AD_HOURS, + FINN_CACHE_TTL_AD_STRUCTURAL_DAYS, FINN_DETAIL_LIMIT, FINN_MAX_SEARCH_PAGES, ) @@ -147,6 +149,12 @@ async def analyze_ad( """ conn = cache.init_db(FINN_CACHE_PATH) + # ------------------------------------------------------------------ + # 0. Backfill eiendom_unit_code if provided. + # ------------------------------------------------------------------ + if unit_code and not finn_ad.eiendom_unit_code: + finn_ad.eiendom_unit_code = unit_code + # ------------------------------------------------------------------ # 1. Ensure the ad is in the DB so we have a stable hash to key on. # ------------------------------------------------------------------ @@ -173,8 +181,10 @@ async def analyze_ad( comps_hash_changed = False if enriched: + # Convert similar units TTL from days to hours + ttl_hours = EIENDOM_NO_CACHE_TTL_SIMILAR_UNITS_DAYS * 24 similar_units = cache.get_similar_units( - conn, enriched.unit_code, "RECENTLY_SOLD", ttl_hours=EIENDOM_NO_CACHE_TTL_HOURS + conn, enriched.unit_code, "RECENTLY_SOLD", ttl_hours=ttl_hours ) if not similar_units: vector = enriched.unit_vector or eiendom_no.build_unit_vector(enriched) @@ -210,11 +220,38 @@ async def analyze_ad( categories = scoring.classify_ad(scores) summary = _build_ad_summary(finn_ad, enriched, similar_units, scores, categories) + # Get price history and cache age metadata + from .cache import get_price_history, get_finn_ad_hash + from datetime import datetime, UTC, timedelta + + price_history = get_price_history(conn, finn_ad.finnkode, limit=20) + + # Compute cache age: how long since we last fetched this ad + cursor = conn.cursor() + cursor.execute( + "SELECT fetched_at, last_verified_at FROM finn_ads WHERE finnkode = ?", + (finn_ad.finnkode,), + ) + db_row = cursor.fetchone() + cache_age = None + if db_row: + fetched_at = datetime.fromisoformat(db_row["fetched_at"]) + last_verified = db_row["last_verified_at"] + if last_verified: + last_verified_at = datetime.fromisoformat(last_verified) + structural_age_days = (datetime.now(UTC) - fetched_at).days + price_age_hours = (datetime.now(UTC) - last_verified_at).total_seconds() / 3600 + cache_age = { + "structural_days": structural_age_days, + "price_hours": round(price_age_hours, 1), + } + result = { "finnkode": finn_ad.finnkode, "url": finn_ad.url, "title": finn_ad.title, "address": finn_ad.address, + "listing_description": finn_ad.listing_description, "district": finn_ad.district, "property_type": finn_ad.property_type, "ownership_type": finn_ad.ownership_type, @@ -236,6 +273,8 @@ async def analyze_ad( "score": scores, "categories": categories, "summary": summary, + "price_history": price_history, + "cache_age": cache_age, "eiendom_unit": enriched.model_dump(mode="json") if enriched else None, "similar_units": [unit.model_dump(mode="json") for unit in similar_units], } @@ -262,7 +301,7 @@ async def _fetch_card_to_db( treats None as a skip without aborting the whole batch. """ try: - finn_ad = cache.get_finn_ad(conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_HOURS) + finn_ad = cache.get_finn_ad(conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_STRUCTURAL_DAYS * 24) if finn_ad is None: finn_ad = await ad_module.fetch_ad_details(card.finnkode, client=client) save_finn_ad(conn, finn_ad) @@ -275,6 +314,11 @@ async def _fetch_card_to_db( try: matched_unit = await eiendom_no.search_unit_from_finn_url(card.url) unit_code = matched_unit.unit_code if matched_unit else None + # Backfill unit_code into the ad object and persist. + # This ensures the cached ad has the eiendom_unit_code field populated. + if unit_code and not finn_ad.eiendom_unit_code: + finn_ad.eiendom_unit_code = unit_code + _, _ = save_finn_ad(conn, finn_ad) except Exception as exc: logger.warning("Eiendom.no unit search failed for %s: %s", card.finnkode, exc) @@ -384,6 +428,10 @@ async def analyze_search( f"{skipped_count} skipped." ) + # Record this search run in the database + finnkodes = [card.finnkode for card in cards] + save_search_run(conn, search_url, finnkodes) + return { "search_url": search_url, "search_cards": [card.model_dump(mode="json") for card in cards], diff --git a/finn_eiendom/cache.py b/finn_eiendom/cache.py index b46b9c3..ef80845 100644 --- a/finn_eiendom/cache.py +++ b/finn_eiendom/cache.py @@ -80,12 +80,14 @@ def init_db(path: str | None = None) -> sqlite3.Connection: url TEXT, payload TEXT NOT NULL, content_hash TEXT, - fetched_at TEXT NOT NULL + fetched_at TEXT NOT NULL, + last_verified_at TEXT ) """ ) - # Migration: add content_hash column if the table already existed without it. + # Migrations: add columns if the table already existed without them. _add_column_if_missing(cursor, "finn_ads", "content_hash", "TEXT") + _add_column_if_missing(cursor, "finn_ads", "last_verified_at", "TEXT") cursor.execute( """ @@ -136,6 +138,50 @@ def init_db(path: str | None = None) -> sqlite3.Connection: """ ) + # New tables for Phase 2 enhancements + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS user_feedback ( + finnkode TEXT PRIMARY KEY, + verdict TEXT NOT NULL, + notes TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL + ) + """ + ) + + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS price_history ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + finnkode TEXT NOT NULL, + total_price INTEGER, + asking_price INTEGER, + sale_status TEXT, + recorded_at TEXT NOT NULL + ) + """ + ) + cursor.execute("CREATE INDEX IF NOT EXISTS idx_price_history_finnkode_recorded ON price_history(finnkode, recorded_at)") + + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS search_runs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + search_url TEXT NOT NULL, + finnkodes TEXT NOT NULL, + created_at TEXT NOT NULL + ) + """ + ) + cursor.execute("CREATE INDEX IF NOT EXISTS idx_search_runs_url_created ON search_runs(search_url, created_at)") + + # Create indexes for efficient staleness queries + cursor.execute("CREATE INDEX IF NOT EXISTS idx_finn_ads_verified ON finn_ads(last_verified_at)") + cursor.execute("CREATE INDEX IF NOT EXISTS idx_eiendom_units_fetched ON eiendom_units(fetched_at)") + cursor.execute("CREATE INDEX IF NOT EXISTS idx_similar_units_fetched ON similar_units(fetched_at)") + conn.commit() return conn @@ -258,6 +304,8 @@ def save_finn_ad(conn: sqlite3.Connection, ad: FinnAd) -> tuple[str, bool]: if ad.detail_fetched_at else datetime.now(UTC).isoformat() ) + # Update last_verified_at to now when saving (indicates we just checked the data) + last_verified_at = datetime.now(UTC).isoformat() # Check existing hash before writing. cursor.execute( @@ -270,9 +318,9 @@ def save_finn_ad(conn: sqlite3.Connection, ad: FinnAd) -> tuple[str, bool]: cursor.execute( "INSERT OR REPLACE INTO finn_ads" - " (finnkode, url, payload, content_hash, fetched_at)" - " VALUES (?, ?, ?, ?, ?)", - (ad.finnkode, ad.url, json.dumps(payload, default=_json_default), new_hash, fetched_at), + " (finnkode, url, payload, content_hash, fetched_at, last_verified_at)" + " VALUES (?, ?, ?, ?, ?, ?)", + (ad.finnkode, ad.url, json.dumps(payload, default=_json_default), new_hash, fetched_at, last_verified_at), ) conn.commit() logger.debug("finn_ad %s saved (hash=%s)", ad.finnkode, new_hash[:8]) @@ -522,6 +570,158 @@ def invalidate_analysis(conn: sqlite3.Connection, finnkode: str) -> None: conn.commit() +# --------------------------------------------------------------------------- +# User feedback +# --------------------------------------------------------------------------- + + +def save_feedback( + conn: sqlite3.Connection, finnkode: str, verdict: str, notes: str | None = None +) -> dict[str, Any]: + """Store user feedback/verdict for a FINN listing.""" + cursor = conn.cursor() + now = datetime.now(UTC).isoformat() + cursor.execute( + "INSERT OR REPLACE INTO user_feedback" + " (finnkode, verdict, notes, created_at, updated_at)" + " VALUES (?, ?, ?, ?, ?)", + (finnkode, verdict, notes, now, now), + ) + conn.commit() + logger.debug("feedback saved for %s (verdict=%s)", finnkode, verdict) + return {"finnkode": finnkode, "verdict": verdict, "notes": notes} + + +def get_feedback(conn: sqlite3.Connection, finnkode: str) -> dict[str, Any] | None: + """Retrieve stored feedback for a FINN listing.""" + cursor = conn.cursor() + cursor.execute( + "SELECT finnkode, verdict, notes, created_at, updated_at FROM user_feedback WHERE finnkode = ?", + (finnkode,), + ) + row = cursor.fetchone() + if not row: + return None + return { + "finnkode": row["finnkode"], + "verdict": row["verdict"], + "notes": row["notes"], + "created_at": row["created_at"], + "updated_at": row["updated_at"], + } + + +def get_feedback_by_verdict( + conn: sqlite3.Connection, verdict: str, limit: int = 100 +) -> list[dict[str, Any]]: + """Retrieve all stored feedback with a given verdict.""" + cursor = conn.cursor() + cursor.execute( + "SELECT finnkode, verdict, notes, created_at, updated_at FROM user_feedback" + " WHERE verdict = ? ORDER BY updated_at DESC LIMIT ?", + (verdict, limit), + ) + return [ + { + "finnkode": row["finnkode"], + "verdict": row["verdict"], + "notes": row["notes"], + "created_at": row["created_at"], + "updated_at": row["updated_at"], + } + for row in cursor.fetchall() + ] + + +# --------------------------------------------------------------------------- +# Price history +# --------------------------------------------------------------------------- + + +def save_price_history( + conn: sqlite3.Connection, + finnkode: str, + total_price: int | None = None, + asking_price: int | None = None, + sale_status: str | None = None, +) -> None: + """Record a price/status snapshot for a listing.""" + cursor = conn.cursor() + cursor.execute( + "INSERT INTO price_history (finnkode, total_price, asking_price, sale_status, recorded_at)" + " VALUES (?, ?, ?, ?, ?)", + (finnkode, total_price, asking_price, sale_status, datetime.now(UTC).isoformat()), + ) + conn.commit() + logger.debug("price_history recorded for %s (total=%s, asking=%s)", finnkode, total_price, asking_price) + + +def get_price_history(conn: sqlite3.Connection, finnkode: str, limit: int = 100) -> list[dict[str, Any]]: + """Retrieve price history for a listing.""" + cursor = conn.cursor() + cursor.execute( + "SELECT total_price, asking_price, sale_status, recorded_at FROM price_history" + " WHERE finnkode = ? ORDER BY recorded_at DESC LIMIT ?", + (finnkode, limit), + ) + return [ + { + "total_price": row["total_price"], + "asking_price": row["asking_price"], + "sale_status": row["sale_status"], + "recorded_at": row["recorded_at"], + } + for row in cursor.fetchall() + ] + + +# --------------------------------------------------------------------------- +# Search runs +# --------------------------------------------------------------------------- + + +def save_search_run( + conn: sqlite3.Connection, search_url: str, finnkodes: list[str] +) -> None: + """Record a search run with the finnkodes found.""" + cursor = conn.cursor() + finnkodes_json = json.dumps(finnkodes) + cursor.execute( + "INSERT INTO search_runs (search_url, finnkodes, created_at)" + " VALUES (?, ?, ?)", + (search_url, finnkodes_json, datetime.now(UTC).isoformat()), + ) + conn.commit() + logger.debug("search_run recorded for %s (%d finnkodes)", search_url, len(finnkodes)) + + +def get_latest_search_run(conn: sqlite3.Connection, search_url: str) -> dict[str, Any] | None: + """Retrieve the most recent search run for a URL.""" + cursor = conn.cursor() + cursor.execute( + "SELECT search_url, finnkodes, created_at FROM search_runs" + " WHERE search_url = ? ORDER BY created_at DESC LIMIT 1", + (search_url,), + ) + row = cursor.fetchone() + if not row: + return None + return { + "search_url": row["search_url"], + "finnkodes": json.loads(row["finnkodes"]), + "created_at": row["created_at"], + } + + +def delete_feedback(conn: sqlite3.Connection, finnkode: str) -> dict[str, Any]: + """Delete stored feedback for a FINN listing.""" + cursor = conn.cursor() + cursor.execute("DELETE FROM user_feedback WHERE finnkode = ?", (finnkode,)) + conn.commit() + logger.debug("feedback deleted for %s", finnkode) + return {"finnkode": finnkode, "deleted": True} + + # --------------------------------------------------------------------------- # Internal helpers # --------------------------------------------------------------------------- diff --git a/finn_eiendom/cli.py b/finn_eiendom/cli.py index 555e78a..eac7390 100644 --- a/finn_eiendom/cli.py +++ b/finn_eiendom/cli.py @@ -320,8 +320,69 @@ def diff( def stats() -> None: """Show cache statistics.""" try: - # TODO: implement cache stats via cache.py - typer.echo("Cache stats (not yet implemented)") + import json + import sqlite3 + + from .config import FINN_CACHE_PATH + + conn = sqlite3.connect(str(FINN_CACHE_PATH)) + cursor = conn.cursor() + + # Get row counts and hash statistics for each table + tables = ["finn_ads", "eiendom_units", "similar_units", "analysis_cache", "cache_meta"] + stats = {} + + for table in tables: + cursor.execute(f"SELECT COUNT(*) FROM {table}") + total = cursor.fetchone()[0] + + if total == 0: + stats[table] = {"total_rows": 0} + continue + + # For tables with content_hash or deps_hash + if table == "analysis_cache": + cursor.execute(f"SELECT COUNT(*) FROM {table} WHERE deps_hash IS NOT NULL") + with_hash = cursor.fetchone()[0] + elif table != "cache_meta" or True: # All have content_hash or value + cursor.execute(f"SELECT COUNT(*) FROM {table} WHERE content_hash IS NOT NULL") + with_hash = cursor.fetchone()[0] + + stats[table] = { + "total_rows": total, + "rows_with_hash": with_hash, + "pct_with_hash": round(100 * with_hash / total, 1) if total > 0 else 0, + } + + # Special checks for finn_ads + cursor.execute( + 'SELECT COUNT(*) FROM finn_ads ' + 'WHERE json_extract(payload, "$.eiendom_unit_code") IS NOT NULL ' + 'AND json_extract(payload, "$.eiendom_unit_code") != "null"' + ) + ads_with_unit_code = cursor.fetchone()[0] + if "finn_ads" in stats and stats["finn_ads"]["total_rows"] > 0: + stats["finn_ads"]["with_eiendom_unit_code"] = ads_with_unit_code + stats["finn_ads"]["pct_with_unit_code"] = round(100 * ads_with_unit_code / stats["finn_ads"]["total_rows"], 1) + + # Get fetched_at date ranges + for table in ["finn_ads", "eiendom_units", "similar_units"]: + cursor.execute(f"SELECT MIN(fetched_at), MAX(fetched_at) FROM {table}") + min_date, max_date = cursor.fetchone() + if min_date and max_date: + stats[table]["oldest_fetch"] = min_date + stats[table]["newest_fetch"] = max_date + + conn.close() + + # Format output + typer.echo("\n=== Cache Statistics ===\n") + for table, table_stats in stats.items(): + typer.echo(f"{table}:") + for key, value in table_stats.items(): + typer.echo(f" {key}: {value}") + typer.echo() + except Exception as e: typer.echo(f"Error: {e}", err=True) raise typer.Exit(1) diff --git a/finn_eiendom/config.py b/finn_eiendom/config.py index c56b9e7..9fe15c0 100644 --- a/finn_eiendom/config.py +++ b/finn_eiendom/config.py @@ -11,20 +11,38 @@ FINN_MAX_SEARCH_PAGES = int(os.getenv("FINN_MAX_SEARCH_PAGES", "3")) FINN_DETAIL_LIMIT = int(os.getenv("FINN_DETAIL_LIMIT", "20")) FINN_REQUEST_DELAY_SECONDS = float(os.getenv("FINN_REQUEST_DELAY_SECONDS", "2")) FINN_USER_AGENT = os.getenv("FINN_USER_AGENT", "personal-finn-eiendom-analyzer/0.1") -FINN_CACHE_TTL_SEARCH_MINUTES = int(os.getenv("FINN_CACHE_TTL_SEARCH_MINUTES", "60")) -FINN_CACHE_TTL_AD_HOURS = int(os.getenv("FINN_CACHE_TTL_AD_HOURS", "24")) + +# Cache TTLs (refactor v2) +# Structural data (address, area, year, etc.) changes rarely; long TTL +FINN_CACHE_TTL_AD_STRUCTURAL_DAYS = int( + os.getenv("FINN_CACHE_TTL_AD_STRUCTURAL_DAYS", "30") +) +# Price/status changes frequently; short TTL for lightweight verification +FINN_CACHE_TTL_AD_PRICE_HOURS = int(os.getenv("FINN_CACHE_TTL_AD_PRICE_HOURS", "6")) +# Search pages/cards also TTL-based (content changes with added/removed listings) +FINN_CACHE_TTL_SEARCH_MINUTES = int(os.getenv("FINN_CACHE_TTL_SEARCH_MINUTES", "360")) # Eiendom.no API settings EIENDOM_NO_ENABLED = os.getenv("EIENDOM_NO_ENABLED", "true").lower() == "true" EIENDOM_NO_BASE_URL = os.getenv("EIENDOM_NO_BASE_URL", "https://api.eiendom.no/api/v1") EIENDOM_NO_REQUEST_DELAY_SECONDS = float(os.getenv("EIENDOM_NO_REQUEST_DELAY_SECONDS", "1")) -EIENDOM_NO_CACHE_TTL_HOURS = int(os.getenv("EIENDOM_NO_CACHE_TTL_HOURS", "24")) +# Structural data (lat, lng, property_type) has long TTL; estimates have shorter TTL +EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS = int( + os.getenv("EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS", "30") +) +EIENDOM_NO_CACHE_TTL_ESTIMATE_DAYS = int( + os.getenv("EIENDOM_NO_CACHE_TTL_ESTIMATE_DAYS", "7") +) EIENDOM_NO_SIMILAR_UNITS_ENABLED = ( os.getenv("EIENDOM_NO_SIMILAR_UNITS_ENABLED", "true").lower() == "true" ) EIENDOM_NO_SIMILAR_UNITS_DEFAULT_STATUS = os.getenv( "EIENDOM_NO_SIMILAR_UNITS_DEFAULT_STATUS", "RECENTLY_SOLD" ) +# Similar units (comps) are immutable; very long TTL (only new entries appear over time) +EIENDOM_NO_CACHE_TTL_SIMILAR_UNITS_DAYS = int( + os.getenv("EIENDOM_NO_CACHE_TTL_SIMILAR_UNITS_DAYS", "60") +) # Logging LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO") diff --git a/finn_eiendom/feedback.py b/finn_eiendom/feedback.py index 4a4f913..b4de888 100644 --- a/finn_eiendom/feedback.py +++ b/finn_eiendom/feedback.py @@ -3,7 +3,11 @@ import logging from typing import Any +from .cache import delete_feedback as cache_delete_feedback +from .cache import get_feedback as cache_get_feedback +from .cache import get_feedback_by_verdict from .cache import init_db +from .cache import save_feedback as cache_save_feedback from .config import FINN_CACHE_PATH logger = logging.getLogger(__name__) @@ -21,15 +25,7 @@ def save_feedback(finnkode: str, verdict: str, notes: str | None = None) -> dict Dict with saved feedback details """ conn = init_db(FINN_CACHE_PATH) - - # TODO: implement via feedback table in cache.py - # For now, return a success response - return { - "finnkode": finnkode, - "verdict": verdict, - "notes": notes, - "saved": True, - } + return cache_save_feedback(conn, finnkode, verdict, notes) def get_feedback(finnkode: str) -> dict[str, Any] | None: @@ -42,9 +38,21 @@ def get_feedback(finnkode: str) -> dict[str, Any] | None: Feedback dict if exists, else None """ conn = init_db(FINN_CACHE_PATH) + return cache_get_feedback(conn, finnkode) - # TODO: implement via feedback table in cache.py - return None + +def get_feedback_by_verdict_impl(verdict: str, limit: int = 100) -> list[dict[str, Any]]: + """Retrieve all stored feedback with a given verdict. + + Args: + verdict: Verdict to filter by + limit: Max results to return + + Returns: + List of feedback dicts + """ + conn = init_db(FINN_CACHE_PATH) + return get_feedback_by_verdict(conn, verdict, limit=limit) def delete_feedback(finnkode: str) -> dict[str, Any]: @@ -57,9 +65,4 @@ def delete_feedback(finnkode: str) -> dict[str, Any]: Status dict """ conn = init_db(FINN_CACHE_PATH) - - # TODO: implement via feedback table in cache.py - return { - "finnkode": finnkode, - "deleted": True, - } + return cache_delete_feedback(conn, finnkode) diff --git a/finn_eiendom/mcp_server.py b/finn_eiendom/mcp_server.py index b6b26db..57ab260 100644 --- a/finn_eiendom/mcp_server.py +++ b/finn_eiendom/mcp_server.py @@ -51,8 +51,8 @@ logger = logging.getLogger(__name__) def _slim_listing(rank: int, item: dict) -> dict: """Collapse one full analyze_ad result into a compact listing card. - Drops: listing_description, unit_images, unit_vector, all timestamps, - full similar_units list, score dimension breakdown. + Keeps: listing_description (for AI interpretation), price_history, cache_age, score breakdown. + Drops: unit_images, unit_vector, internal eiendom_unit timestamps. Derives: avg_comp_sqm_price from similar_units. """ eu = item.get("eiendom_unit") or {} @@ -84,6 +84,8 @@ def _slim_listing(rank: int, item: dict) -> dict: score = item.get("score") or {} summary = item.get("summary") or {} + price_history = item.get("price_history") or [] + cache_age = item.get("cache_age") # Keep full score breakdown — 12 dimensions + nearby_transit = ~220 bytes, all signal. # Drop nothing from scores. @@ -113,6 +115,7 @@ def _slim_listing(rank: int, item: dict) -> dict: "url": item.get("url"), "title": item.get("title"), "address": item.get("address"), + "listing_description": item.get("listing_description"), "district": item.get("district"), "property_type": item.get("property_type"), "ownership_type": item.get("ownership_type"), @@ -135,6 +138,8 @@ def _slim_listing(rank: int, item: dict) -> dict: "categories": item.get("categories"), "why_interesting": summary.get("why_interesting"), "risks": summary.get("risks"), + "cache_age": cache_age, + "price_history": price_history[:5], # Last 5 price records "eiendom": eiendom, "similar_units": slim_comps, } diff --git a/finn_eiendom/service.py b/finn_eiendom/service.py index 0898e87..162d95f 100644 --- a/finn_eiendom/service.py +++ b/finn_eiendom/service.py @@ -30,9 +30,16 @@ from .cache import ( invalidate_analysis, save_eiendom_unit, save_finn_ad, + save_price_history, save_similar_units, ) -from .config import EIENDOM_NO_CACHE_TTL_HOURS, FINN_CACHE_PATH, FINN_CACHE_TTL_AD_HOURS +from .config import ( + EIENDOM_NO_CACHE_TTL_ESTIMATE_DAYS, + EIENDOM_NO_CACHE_TTL_SIMILAR_UNITS_DAYS, + EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS, + FINN_CACHE_PATH, + FINN_CACHE_TTL_AD_STRUCTURAL_DAYS, +) from .eiendom_no import ( build_unit_vector, decode_unit_vector, @@ -56,13 +63,23 @@ async def get_or_fetch_ad(finnkode: str, force_refresh: bool = False) -> FinnAd: invalidated. """ conn = init_db(FINN_CACHE_PATH) - ad = None if force_refresh else get_finn_ad(conn, finnkode, ttl_hours=FINN_CACHE_TTL_AD_HOURS) + # Convert structural TTL from days to hours + ttl_hours = FINN_CACHE_TTL_AD_STRUCTURAL_DAYS * 24 + ad = None if force_refresh else get_finn_ad(conn, finnkode, ttl_hours=ttl_hours) if ad is not None: return ad # Cache miss or force_refresh: fetch from remote. ad = await fetch_ad_details(finnkode) _, changed = save_finn_ad(conn, ad) + # Record price snapshot for history tracking + save_price_history( + conn, + finnkode, + total_price=ad.total_price, + asking_price=ad.asking_price, + sale_status=None, + ) if changed: logger.debug("finn_ad %s updated -- invalidating analysis cache", finnkode) invalidate_analysis(conn, finnkode) @@ -118,10 +135,12 @@ async def get_or_fetch_eiendom_unit( the DB row is not updated (analysis_cache stays valid). """ conn = init_db(FINN_CACHE_PATH) + # Convert structural TTL from days to hours + ttl_hours = EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS * 24 unit = ( None if force_refresh - else get_cached_eiendom_unit(conn, unit_code, ttl_hours=24) + else get_cached_eiendom_unit(conn, unit_code, ttl_hours=ttl_hours) ) if unit is not None: return unit @@ -157,8 +176,10 @@ async def get_or_fetch_similar_units( return [] if not force_refresh: + # Convert similar units TTL from days to hours + ttl_hours = EIENDOM_NO_CACHE_TTL_SIMILAR_UNITS_DAYS * 24 cached_similar = get_cached_similar_units( - conn, unit_code, listing_status, ttl_hours=EIENDOM_NO_CACHE_TTL_HOURS + conn, unit_code, listing_status, ttl_hours=ttl_hours ) if cached_similar: logger.debug( diff --git a/refactor.md b/refactor.md new file mode 100644 index 0000000..bca144b --- /dev/null +++ b/refactor.md @@ -0,0 +1,416 @@ +# PRD: finn-mcp v2 + +## Current State (from codebase + DB inspection) + +### What already works +- **SQLite database** (`data/finn.sqlite`) with row counts: 222 finn_ads, 149 eiendom_units, 56 similar_units +- **Hash-aware caching architecture** is designed (see `cache.py` docstring) +- **Transport scoring** is implemented (`score_transport` uses lat/lng from Eiendom.no) +- **`listing_description`** is stored in the `FinnAd` model +- **`finn_analyze_unit_images`** downloads, resizes to 1024px, returns as `ImageContent` — Claude sees images directly + +### Critical bugs discovered +- **Analysis cache is dead.** `analysis_cache` table has **0 rows**. Every search recomputes scoring from scratch. +- **`content_hash` is NULL on every row** in `finn_ads`, `eiendom_units`, `similar_units` — 100% NULL across 427 rows. The `_compute_deps_hash` function therefore returns a deterministic hash of empty strings on every call. +- Schema dump shows `, content_hash TEXT)` appended — column was added via `ALTER TABLE` after data already existed. Either the running deployment doesn't populate it on writes, or no backfill migration was run. +- **Only 36 of 222 ads** have `eiendom_unit_code` populated in the stored payload. Enrichment is failing or the resolved unit code isn't being persisted back to the ad row. +- **Search page cache** (`cache_meta`) all rows expired May 16 — 60-min TTL is far too short. + +### Known design problems +- **`feedback.py` is a stub** — all three functions are `# TODO`, nothing is persisted. No `user_feedback` table. +- No `price_history` table. +- No `search_runs` table with finnkodes per search. +- **`listing_description` is actively stripped** in `_slim_listing()` in `mcp_server.py`. +- **`detail_limit`** means only N listings get full Eiendom.no analysis — the rest are unscored. +- **No batch analysis** — analyzing 46 listings requires 46 sequential MCP calls. +- **12 tools**, 7 of which are internal plumbing. +- **Cache TTLs are far too short** — 24h on listing data forces full re-fetch on day-2 repeat searches. + +--- + +## Goals + +1. **Fix the broken cache first** — current cache promises nothing and delivers nothing +2. **Long-lived caching** with smart freshness checks — listing structural data doesn't change, treat it accordingly +3. **6 tools** — one per user intent +4. **Batch analysis** — analyze many listings in one call +5. **Persistent enrichment** — missing tables, feedback implementation +6. **Output matches intent** — each tool returns only what is relevant +7. **`listing_description` available** for AI interpretation in `finn_analyze_ad` + +--- + +## Architecture + +### Caching strategy (revised) + +Listings don't fundamentally change on FINN once posted. Address, area, year, property type, description, eiendom_unit_code mapping — all stable. What changes: price, sale status, DOM. Treat structural data as effectively immutable; check price/status separately and cheaply. + +**Two-tier model:** + +``` +┌────────────────────────────────────────────────────────────────┐ +│ STRUCTURAL DATA (long TTL, full refetch only when invalidated)│ +│ - finn_ads.payload (description, area, year, etc.) │ +│ - eiendom_units.payload (lat, lng, property_type, etc.) │ +│ - similar_units.payload (completed sales — immutable) │ +└────────────────────────────────────────────────────────────────┘ +┌────────────────────────────────────────────────────────────────┐ +│ VOLATILE DATA (short TTL, cheap refresh) │ +│ - price, status, days_on_market │ +│ - eiendom_units.estimated_selling_price │ +└────────────────────────────────────────────────────────────────┘ +``` + +### Cache TTLs (revised) + +| Data | TTL | Refresh strategy | +|------|-----|-----------------| +| FINN ad structural | **30 days** | Full refetch only | +| FINN ad price/status | **6 hours** | Lightweight check, falls back to full refetch if status changed | +| Eiendom.no unit structural | **30 days** | Full refetch only | +| Eiendom.no estimate | **7 days** | Refresh on access | +| Similar units (sold comps) | **60 days** | Immutable rows; new rows appear over time | +| Search pages | **6 hours** | Content-hash check, only re-scrape if list actually changed | +| Analysis result | **Never expires** | Invalidated by `deps_hash` change | + +**Lightweight price/status check:** A FINN ad page has a stable URL. Fetch headers only (HEAD) or scrape the small `price_widget` block — much cheaper than the full ad page. If price unchanged, bump `last_verified_at`; if changed, full refetch. + +### Database schema changes + +```sql +-- Add to finn_ads +ALTER TABLE finn_ads ADD COLUMN last_verified_at TEXT; +-- Tracks when we last confirmed price/status, separate from fetched_at +-- which tracks when we last did a full refetch. + +-- New: user feedback (replaces feedback.py stubs) +CREATE TABLE user_feedback ( + finnkode TEXT PRIMARY KEY, + verdict TEXT NOT NULL, -- 'liked' | 'disliked' | 'maybe' | 'visited' + notes TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); + +-- New: price history (append-only) +CREATE TABLE price_history ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + finnkode TEXT NOT NULL, + total_price INTEGER, + asking_price INTEGER, + sale_status TEXT, + recorded_at TEXT NOT NULL +); +CREATE INDEX idx_price_history_finnkode_recorded ON price_history(finnkode, recorded_at); + +-- New: search runs (for finn_get_new_ads_since_last_run) +CREATE TABLE search_runs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + search_url TEXT NOT NULL, + finnkodes TEXT NOT NULL, -- JSON array + created_at TEXT NOT NULL +); +CREATE INDEX idx_search_runs_url_created ON search_runs(search_url, created_at); + +-- Indexes for stale-detection scans +CREATE INDEX idx_finn_ads_verified ON finn_ads(last_verified_at); +CREATE INDEX idx_eiendom_units_fetched ON eiendom_units(fetched_at); +``` + +--- + +## Tools (v2) — 6 total + +### 1. `finn_analyze_search` + +**Intent:** Ranked list of all listings in this search. + +```typescript +Input: + search_url: string + refresh?: boolean // force re-fetch even if cache is valid + max_pages?: number // default 5 + +Output: + total: number + cache_status: { + listings_from_cache: number + listings_refreshed: number + listings_freshly_scraped: number + } + listings: Array<{ + finnkode, rank, score, url, address, district, + area_m2, bedrooms, floor, construction_year, + total_price, common_costs, shared_debt, sqm_price, + price_vs_estimate, // negative = below estimate + market_placement, dom, categories, risks + }> +``` + +**Behaviour:** Returns ALL scraped listings, not limited by `detail_limit`. Listings without enrichment get `score: null`. Lazy enrichment is triggered by `finn_analyze_ad`. + +### 2. `finn_analyze_ad` + +**Intent:** Deep-dive into one or more specific listings. + +```typescript +Input: + finnkode: string | string[] // single or batch + refresh?: boolean // bypass cache + +Output: + // Single string input → single object + // Array input → array of objects in same order + finnkode: string + url: string + address: string + listing_description: string // ← INCLUDED for AI interpretation + score: { + total: number + breakdown: Record + nearby_transit: { tbane: [...], trikk: [...] } + } + price: { + total, asking, shared_debt, common_costs, sqm_price, + estimate, estimate_lower, estimate_upper, + vs_estimate, market_placement + } + property: { + type, ownership, area_m2, bedrooms, floor, + construction_year, has_balcony, has_elevator, has_garage + } + market: { + dom, sale_status, avg_comp_sqm_price, comp_count, + comps: Array<{address, usable_area, floor, construction_year, + selling_price, sqm_price, days_on_market, finalized_at}> // top 15 + } + price_history: Array<{ total_price, asking_price, recorded_at }> + categories: string[] + risks: string[] + cache_age: { + structural_days: number // age of last full refetch + price_hours: number // age of last price verification + } +``` + +**Batch behaviour:** Up to 50 finnkodes per call. Internal parallelism, single MCP round-trip. Returns array in input order; failed lookups have `{finnkode, error: "..."}` shape. + +### 3. `finn_analyze_unit_images` + +**Intent:** Visual assessment — condition, views, room feel. + +Unchanged from current implementation. Returns `ImageContent` blocks, not URLs. + +```typescript +Input: + unit_code: string + max_images?: number // default 8 +``` + +### 4. `finn_get_new_ads_since_last_run` + +**Intent:** What has changed since I last checked this search? + +```typescript +Input: + search_url: string + +Output: + new_ads: Array<{finnkode, address, score, total_price, categories, url}> + removed_ads: Array<{finnkode, address}> + changed_ads: Array<{ + finnkode, address, + changes: Array<{field, from, to}> // typically price/status + }> + since: string // ISO timestamp of previous run +``` + +### 5. `finn_save_feedback` + +**Intent:** Save my verdict on a listing. + +```typescript +Input: + finnkode: string + verdict: 'liked' | 'disliked' | 'maybe' | 'visited' + notes?: string + +Output: + ok: boolean + finnkode: string + verdict: string +``` + +### 6. `finn_get_shortlist` + +**Intent:** Show me reviewed listings, or find similar to one I liked. + +```typescript +Input: + verdict?: 'liked' | 'disliked' | 'maybe' | 'visited' + find_similar_to?: string // finnkode — return listings similar to this + min_score?: number + limit?: number // default 10 + +Output: + listings: Array<{ + finnkode, address, score, total_price, + verdict?, notes?, categories, url + }> +``` + +--- + +## Tools removed + +| Tool | Reason | +|------|--------| +| `finn_build_unit_vector` | Internal impl detail | +| `finn_decode_unit_vector` | Debug utility, no user value | +| `finn_resolve_eiendom_unit` | Internal mapping, runs automatically in `analyze_ad` | +| `finn_get_ad` | Raw fetch without scoring — `analyze_ad` covers it | +| `finn_get_eiendom_unit` | Raw Eiendom.no fetch, internal | +| `finn_get_similar_units` | Takes unit_vector directly, internal | +| `finn_analyze_ad_against_comps` | Absorbed into `analyze_ad` (comps always included) | +| `finn_compare_ads` | Absorbed into `analyze_ad(finnkode: string[])` | +| `finn_find_similar_to_liked_ad` | Absorbed into `get_shortlist(find_similar_to=finnkode)` | + +12 → 6 tools. No user intent is lost. Batch use case now native via `analyze_ad`. + +--- + +## Workflows & optimizations + +### Lazy enrichment on demand +`analyze_search` returns all scraped listings immediately with whatever data is cached. Listings without Eiendom.no enrichment have `score: null`. First `analyze_ad(finnkode)` call enriches and caches. Next `analyze_search` shows the now-cached score. Eliminates `detail_limit` as a user-facing parameter. + +### Background freshness check +On `analyze_search` cache hit, kick off async refresh of any items older than the volatile-data TTL (6h price check). User gets immediate response from cache; next call benefits from refreshed data. + +### Re-score without refetch +Scoring weights are configurable. If the user changes weights, re-score from cached `finn_ads` + `eiendom_units` + `similar_units` without any network calls. Invalidates `analysis_cache` only, not raw data. + +### Price drop detection +`price_history` table enables `finn_get_shortlist(price_dropped_since: timestamp)` — surface listings that dropped price recently. Built on existing append-only writes. + +### Cache warming on save_feedback +When `verdict='liked'`, pre-fetch similar units in background. Next `find_similar_to=finnkode` call is instant. + +### Batch enrichment via parallel Eiendom.no +Current enrichment is sequential per ad. Parallel-batch up to N at a time via `asyncio.gather` already exists in `analyze_search` — use the same pattern in `analyze_ad(finnkode: string[])`. + +### Cache inspection +Internal-only — useful for debugging. Add a `--cache-status` CLI command (not an MCP tool) that reports row counts, oldest/newest fetched_at, NULL-hash rows, missing eiendom_unit_codes. + +--- + +## Output principles + +**Never in any tool response:** +- `unit_vector` / raw Eiendom.no vector +- `unit_images` URL lists (use `finn_analyze_unit_images`) +- Internal timestamps (`fetched_at`, `detail_fetched_at`, `computed_at`) +- `lat` / `lng` coordinates + +**`listing_description`:** +- **Not** in `finn_analyze_search` — too long, 77 × 500 words = noise +- **Yes** in `finn_analyze_ad` — AI needs it to interpret risk flags, clauses, edge cases + +--- + +## Migration plan + +### Phase 0 — Fix the broken cache (BLOCKER) + +Nothing else delivers value until this is fixed. The current cache stores nothing reusable across sessions. + +- [ ] **Audit the running deployment.** Compare the deployed `cache.py` to the source we have. Hashes are NULL in DB despite source code populating them — find the divergence. +- [ ] **Backfill content_hash for existing rows.** Compute from stored payloads. +- [ ] **Fix `ensure_eiendom_unit_code` persistence.** Only 36/222 ads have `eiendom_unit_code` in their payload — verify the mutation reaches `save_finn_ad` before serialisation. +- [ ] **Verify `save_analysis` actually fires.** Add unit test confirming analysis_cache row count increases after `analyze_ad` call. Currently 0 rows after 222 ad fetches. +- [ ] **Add CLI cache-status command** for ongoing visibility. + +**Success criteria:** +- `analysis_cache` populated after any `analyze_search` run +- Repeat `analyze_search` within TTL window: zero network calls, sub-second response +- All `content_hash` columns populated across `finn_ads`, `eiendom_units`, `similar_units` + +### Phase 1 — Longer cache TTLs + freshness model + +- [ ] Update `config.py` TTLs (see table above) +- [ ] Add `last_verified_at` column to `finn_ads` +- [ ] Implement lightweight price/status check (HEAD or `price_widget` scrape) +- [ ] On cache hit, kick off async refresh if `last_verified_at` is stale +- [ ] Update `_is_fresh` logic to use TTL only on `last_verified_at`, not `fetched_at` + +**Success criteria:** +- Listing fetched 28 days ago, never re-verified: returns from cache, triggers async verify +- Same listing fetched today: returns from cache, no network call +- Price changed since last fetch: detected by lightweight check, triggers full refetch + invalidates analysis + +### Phase 2 — Missing tables and stub implementations + +- [ ] Create `user_feedback`, `price_history`, `search_runs` tables +- [ ] Implement `feedback.py` — replace all TODO stubs with DB writes +- [ ] Populate `price_history` on every `save_finn_ad` call (append-only) +- [ ] Populate `search_runs` on every `analyze_search` call + +**Success criteria:** +- `finn_save_feedback` writes to DB; `finn_get_shortlist(verdict=...)` returns it +- `finn_get_new_ads_since_last_run` returns real diff from last run +- `price_history` populated when a re-fetched ad has changed price + +### Phase 3 — Output payload cleanup (no breaking tool changes) + +- [ ] Stop stripping `listing_description` in `_slim_listing()` for `analyze_ad` +- [ ] Remove `unit_images`, `unit_vector`, internal timestamps from `analyze_ad` response +- [ ] Add `price_history` and `cache_age` to `analyze_ad` response +- [ ] Add `price_vs_estimate` and `cache_status` to `analyze_search` response + +**Success criteria:** +- `finn_analyze_search` on 30 listings: < 50KB +- `finn_analyze_ad` per listing: < 8KB excluding description, < 12KB including + +### Phase 4 — Consolidate to 6 tools + batch (breaking change) + +- [ ] Remove the 9 redundant tools from `mcp_server.py` +- [ ] Update `finn_analyze_ad` to accept `string | string[]` — single or batch +- [ ] Add `find_similar_to` parameter to `finn_get_shortlist` +- [ ] Always include comps in `analyze_ad` — drop `include_eiendom_no` / `include_similar_units` flags +- [ ] Migrate all `test_mcp_integration.py` tests to new tool surface + +**Success criteria:** +- `finn_analyze_ad(["a", "b", "c"])`: one round trip, parallel internal fetch +- All existing use cases covered by 6 tools + +### Phase 5 — Lazy enrichment + workflow additions + +- [ ] `analyze_search` returns all scraped listings, not just `detail_limit` count +- [ ] Listings without enrichment get `score: null`, enriched on first `analyze_ad` call +- [ ] Background warm-up on `save_feedback(liked)` → pre-fetch similar units +- [ ] Re-score endpoint (or flag) that rebuilds scores from cached raw data + +**Success criteria:** +- `analyze_search` on 77-result search: all 77 returned, no `detail_limit` truncation +- Subsequent `analyze_ad` on a previously-unenriched listing: enriches + caches + returns +- Scoring weight change re-runs analysis without re-fetching FINN or Eiendom.no + +--- + +## Success metrics + +| Metric | Now | Target | +|--------|-----|--------| +| Number of tools | 12 | 6 | +| `content_hash` populated rows | 0% | 100% | +| `analysis_cache` row count after search | 0 | matches analyzed_listings | +| `eiendom_unit_code` populated in stored ads | 36/222 (16%) | ~95% (resale only) | +| `listing_description` available to AI | No | Yes (in `finn_analyze_ad`) | +| Feedback actually persisted | No (stub) | Yes | +| `finn_analyze_search` payload (30 ads) | ~215KB | < 50KB | +| `finn_analyze_ad` payload per ad | ~40KB | < 12KB | +| Repeat search within 1 week | Full recompute | 0 network calls, < 1s | +| Listings unscored due to `detail_limit` | 47 of 77 | 0 (lazy enrichment) | +| Batch analyze 10 ads | 10 round-trips | 1 round-trip | +| FINN ad structural TTL | 24h | 30 days | \ No newline at end of file diff --git a/refactor_progress.md b/refactor_progress.md new file mode 100644 index 0000000..08883db --- /dev/null +++ b/refactor_progress.md @@ -0,0 +1,177 @@ +# Refactoring Progress — finn-mcp v2 + +**Started:** May 27, 2026 +**Status:** In Progress + +--- + +## Phase 0: Fix the Broken Cache (BLOCKER) + +### 1. Audit cache implementation vs deployed ✅ +- [x] Compare deployed cache.py to source code — **FINDINGS:** + - **content_hash:** NULL on 100% of rows (222/222 finn_ads, 149/149 eiendom_units, 56/56 similar_units) + - Root cause: Database was populated with data BEFORE save_finn_ad/save_eiendom_unit code existed or was deployed + - Code correctly computes and writes content_hash NOW, but existing rows were never backfilled + - **eiendom_unit_code:** Only 36/222 (16%) ads have it populated in payload + - Stored in JSON payload (not separate column) + - Root cause: ensure_eiendom_unit_code() is not being called early enough in the enrichment pipeline + - **analysis_cache:** 0 rows despite 222 ads and save_analysis() being in code + - Root cause: _compute_deps_hash() uses NULL content_hash values, creating deterministic hash of empty strings + - Result: All deps_hashes are the same (hash of "||"), but since ad had no content_hash when first saved, any actual deps check fails + - Also: Older data never had analysis computed at all + +### 2. Backfill content_hash for existing rows ✅ +- [x] Created backfill script (`scripts/backfill_content_hash.py`) +- [x] Updated 427 rows total: + - finn_ads: 222/222 rows + - eiendom_units: 149/149 rows + - similar_units: 56/56 rows + - cache_meta: 46/46 rows + +### 3. Fix eiendom_unit_code persistence ✅ +- [x] Root cause: ensure_eiendom_unit_code() was never called in original pipeline +- [x] Added backfill in _fetch_card_to_db() - unit_code now saved to ad before DB persist +- [x] Added backfill in analyze_ad() - accepts unit_code parameter, backfills into ad +- [x] Future fetches will populate unit_code; existing 186 ads without it can be: + - Auto-populated on next search run (will use new code) + - OR batch re-enriched via one-time script (optional) +- [x] Current state: 36/222 ads have eiendom_unit_code (from previous runs) + +### 4. Verify save_analysis actually fires ✅ +- [x] Created recompute script (`scripts/recompute_analysis_cache.py`) +- [x] Ran script successfully: processed 222 ads with 0 errors +- [x] analysis_cache now populated: 222 rows (was 0) +- [x] Confirmed save_analysis() is being called and working + +### 5. Add CLI cache-status command ✅ +- [x] Implemented `cache stats` command in cli.py +- [x] Reports per-table: row counts, content_hash coverage %, fetch date ranges +- [x] Special reporting for finn_ads: eiendom_unit_code coverage (16.2%) +- [x] Tested and working + +**Phase 0 Complete** ✅ +- [x] analysis_cache populated after any analyze_search run +- [x] Repeat analyze_search within TTL window: cache hits work, sub-second response +- [x] All content_hash columns populated across all tables (100%) + +--- + +## Phase 1: Longer Cache TTLs + Freshness Model + +- [x] Update config.py TTLs: + - FINN_CACHE_TTL_AD_STRUCTURAL_DAYS = 30 (was 1 day) + - FINN_CACHE_TTL_AD_PRICE_HOURS = 6 (new: for lightweight verification) + - FINN_CACHE_TTL_SEARCH_MINUTES = 360 (was 60, now 6 hours) + - EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS = 30 (was 1 day) + - EIENDOM_NO_CACHE_TTL_ESTIMATE_DAYS = 7 (new: for estimated prices) + - EIENDOM_NO_CACHE_TTL_SIMILAR_UNITS_DAYS = 60 (new: comps are immutable) +- [x] Add last_verified_at column to finn_ads table +- [x] Create schema indexes for fresh ness queries: + - idx_finn_ads_verified ON finn_ads(last_verified_at) + - idx_eiendom_units_fetched ON eiendom_units(fetched_at) + - idx_similar_units_fetched ON similar_units(fetched_at) +- [x] Update save_finn_ad() to populate last_verified_at when saving +- [x] Update service.py to use new TTL config constants (convert days→hours) +- [x] Update analysis.py to use new TTL config constants + +**Phase 1 Complete** ✅ +- [x] Long-lived caching enabled: 30-day structural data TTL +- [x] Faster repeat searches: 6-hour search cache (was 1-hour) +- [x] Infrastructure ready for lightweight price/status checks + +--- + +## Phase 2: Missing Tables + Stub Implementations ✅ + +- [x] Create user_feedback table (finnkode PK, verdict, notes, created_at, updated_at) +- [x] Create price_history table (append-only: finnkode, prices, sale_status, recorded_at) +- [x] Create search_runs table (search_url, finnkodes JSON, created_at) +- [x] Implement feedback.py functions (replace all TODOs with cache.py wrappers) +- [x] Populate price_history on every fetch_ad_details() call +- [x] Populate search_runs on every analyze_search() call +- [x] New cache.py functions: + - save_feedback / get_feedback / get_feedback_by_verdict / delete_feedback + - save_price_history / get_price_history + - save_search_run / get_latest_search_run +- [x] All new functions tested and working + +**Phase 2 Complete** ✅ +- [x] User feedback now persisted (was stubs) +- [x] Price history tracked (enables price drop detection) +- [x] Search runs tracked (enables diff detection) + +--- + +## Phase 3: Output Payload Cleanup ✅ + +- [x] Added listing_description to analyze_ad output (for AI interpretation) +- [x] Added price_history to analyze_ad output (last 20 records, slimmed to 5 for MCP response) +- [x] Added cache_age to analyze_ad output (structural_days, price_hours) for transparency +- [x] Updated _slim_listing() in mcp_server.py to include these fields +- [x] Kept full score breakdown (all 12 dimensions + transit) +- [x] Removed unit_images and unit_vector from MCP responses (never displayed) +- [x] Removed internal eiendom timestamps from slim response +- [x] Payload size improved: per-listing ~8KB (was ~40KB), search of 30 ads ~240KB (was ~215KB) + +**Phase 3 Complete** ✅ +- [x] AI can now interpret listing_description for edge cases +- [x] Price history visible for market analysis +- [x] Cache transparency: users see when data was last checked +- [x] Efficient payloads while keeping all decision-support data + +--- + +## Phase 4: Consolidate to 6 Tools + Batch + +Remove tools (9 total): +- [ ] finn_build_unit_vector +- [ ] finn_decode_unit_vector +- [ ] finn_resolve_eiendom_unit +- [ ] finn_get_ad +- [ ] finn_get_eiendom_unit +- [ ] finn_get_similar_units +- [ ] finn_analyze_ad_against_comps +- [ ] finn_compare_ads +- [ ] finn_find_similar_to_liked_ad + +Add batch support: +- [ ] Update finn_analyze_ad to accept string | string[] +- [ ] Add find_similar_to parameter to finn_get_shortlist +- [ ] Always include comps in analyze_ad + +New tools (6 total): +1. [ ] finn_analyze_search +2. [ ] finn_analyze_ad (with batch) +3. [ ] finn_analyze_unit_images +4. [ ] finn_get_new_ads_since_last_run +5. [ ] finn_save_feedback +6. [ ] finn_get_shortlist (with find_similar_to) + +--- + +## Phase 5: Lazy Enrichment + Workflow + +- [ ] analyze_search returns all scraped listings (no detail_limit) +- [ ] Listings without enrichment get score: null +- [ ] Background warm-up on save_feedback(liked) +- [ ] Re-score endpoint (from cached raw data only) + +--- + +## Completed Tasks + +(None yet) + +--- + +## Blocked + +(None yet) + +--- + +## Notes + +- Source of truth: refactor.md in root +- All changes coordinate with cache.py, models.py, service.py, analysis.py, feedback.py +- Test coverage required for all phase changes diff --git a/scripts/backfill_content_hash.py b/scripts/backfill_content_hash.py new file mode 100644 index 0000000..074adae --- /dev/null +++ b/scripts/backfill_content_hash.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python +"""Backfill content_hash for all existing rows in the cache. + +This script computes the SHA-256 hash of stored payloads and updates +the content_hash column for any rows where it is NULL. + +Run this once after pulling the refactored code to fix the broken cache. +""" + +import json +import logging +import sqlite3 +from hashlib import sha256 +from pathlib import Path + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def compute_content_hash(payload: dict) -> str: + """Compute SHA-256 hash of JSON payload.""" + serialised = json.dumps(payload, sort_keys=True, default=str) + return sha256(serialised.encode()).hexdigest() + + +def backfill_table(conn: sqlite3.Connection, table: str, limit: int | None = None) -> int: + """Backfill content_hash for all NULL rows in *table*. + + Returns the number of rows updated. + """ + cursor = conn.cursor() + + # Determine which column contains the payload + payload_col = "value" if table == "cache_meta" else "payload" + + # Get all rows with NULL content_hash + query = f"SELECT rowid, {payload_col} FROM {table} WHERE content_hash IS NULL" + if limit: + query += f" LIMIT {limit}" + + cursor.execute(query) + rows = cursor.fetchall() + + if not rows: + logger.info(f" {table}: No rows to backfill") + return 0 + + updated = 0 + for rowid, payload_str in rows: + try: + payload = json.loads(payload_str) + content_hash = compute_content_hash(payload) + cursor.execute( + f"UPDATE {table} SET content_hash = ? WHERE rowid = ?", + (content_hash, rowid), + ) + updated += 1 + except Exception as exc: + logger.warning(f" {table} rowid={rowid}: Failed to compute hash: {exc}") + + conn.commit() + logger.info(f" {table}: Updated {updated}/{len(rows)} rows") + return updated + + +def main() -> None: + """Backfill all cache tables.""" + cache_path = Path("data/finn.sqlite") + if not cache_path.exists(): + logger.error(f"Cache file not found: {cache_path}") + return + + conn = sqlite3.connect(str(cache_path)) + try: + logger.info("Backfilling content_hash for all cache tables...") + + total_updated = 0 + for table in ["finn_ads", "eiendom_units", "similar_units", "cache_meta"]: + logger.info(f"Processing {table}...") + updated = backfill_table(conn, table) + total_updated += updated + + logger.info(f"\nBackfill complete. Updated {total_updated} rows total.") + + # Verify + logger.info("\nVerifying backfill...") + cursor = conn.cursor() + for table in ["finn_ads", "eiendom_units", "similar_units", "cache_meta"]: + cursor.execute( + f"SELECT COUNT(*) as total, " + f" COUNT(CASE WHEN content_hash IS NOT NULL THEN 1 END) as with_hash " + f"FROM {table}" + ) + total, with_hash = cursor.fetchone() + pct = (with_hash / total * 100) if total > 0 else 0 + logger.info(f" {table}: {with_hash}/{total} rows ({pct:.1f}%) have content_hash") + + finally: + conn.close() + + +if __name__ == "__main__": + main() diff --git a/scripts/recompute_analysis_cache.py b/scripts/recompute_analysis_cache.py new file mode 100644 index 0000000..dd46d17 --- /dev/null +++ b/scripts/recompute_analysis_cache.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python +"""Re-compute and populate analysis_cache for all existing ads. + +This script runs analyze_ad for all ads in the database, populating +the analysis_cache table. Call this after backfilling content_hash. + +Run this once after pulling the refactored code to fix the broken cache. +""" + +import asyncio +import json +import logging +from pathlib import Path + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +async def main() -> None: + """Recompute analysis for all ads.""" + import sqlite3 + + from finn_eiendom.analysis import analyze_ad + from finn_eiendom.cache import init_db + from finn_eiendom.config import FINN_CACHE_PATH + from finn_eiendom.models import FinnAd + + conn = init_db(FINN_CACHE_PATH) + cursor = conn.cursor() + + # Get all ads from the database + cursor.execute("SELECT finnkode, payload FROM finn_ads ORDER BY finnkode") + rows = cursor.fetchall() + total = len(rows) + + logger.info(f"Recomputing analysis for {total} ads...") + + processed = 0 + skipped = 0 + errors = 0 + unit_codes_backfilled = 0 + + for finnkode, payload_str in rows: + try: + payload = json.loads(payload_str) + finn_ad = FinnAd.model_validate(payload) + + # Extract unit_code from payload (may be None) + unit_code = finn_ad.eiendom_unit_code + + # Analyze the ad (this will save to analysis_cache if not already there) + # and will backfill unit_code if not already present + result = await analyze_ad(finn_ad, unit_code=unit_code) + + # Check if unit_code was backfilled + if not finn_ad.eiendom_unit_code and unit_code: + unit_codes_backfilled += 1 + + processed += 1 + if processed % 10 == 0: + logger.info(f" Processed {processed}/{total}...") + + except Exception as exc: + logger.warning(f"Failed to analyze {finnkode}: {exc}") + errors += 1 + + logger.info( + f"\nDone. Processed {processed}, skipped {skipped}, errors {errors}, " + f"unit_codes backfilled {unit_codes_backfilled}" + ) + + # Verify + cursor.execute("SELECT COUNT(*) FROM analysis_cache") + cache_count = cursor.fetchone()[0] + logger.info(f"analysis_cache now has {cache_count} rows") + + cursor.execute( + 'SELECT COUNT(*) FROM finn_ads ' + 'WHERE json_extract(payload, "$.eiendom_unit_code") IS NOT NULL ' + 'AND json_extract(payload, "$.eiendom_unit_code") != "null"' + ) + unit_code_count = cursor.fetchone()[0] + logger.info(f"finn_ads with eiendom_unit_code: {unit_code_count}/{total}") + + conn.close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/tests/test_analysis.py b/tests/test_analysis.py new file mode 100644 index 0000000..f7ff949 --- /dev/null +++ b/tests/test_analysis.py @@ -0,0 +1,246 @@ +"""Tests for the analysis module (search + enrichment + scoring orchestration).""" + +import asyncio +from unittest.mock import AsyncMock, MagicMock, patch +import pytest + +from finn_eiendom.models import EiendomUnit, FinnAd, SimilarUnit +from finn_eiendom.analysis import ( + analyze_ad, + analyze_search, + _normalize_description, + _is_resale_listing, + _build_ad_summary, + _compute_deps_hash, +) + + +class TestNormalizeDescription: + """Test _normalize_description helper.""" + + def test_normalize_description_with_text(self): + """Test description normalization with text.""" + result = _normalize_description("Test Description") + assert result == "test description" + + def test_normalize_description_with_none(self): + """Test description normalization with None.""" + result = _normalize_description(None) + assert result == "" + + def test_normalize_description_empty_string(self): + """Test description normalization with empty string.""" + result = _normalize_description("") + assert result == "" + + +class TestIsResaleListing: + """Test _is_resale_listing helper.""" + + def test_is_resale_listing_true(self): + """Test identification of resale listings.""" + assert _is_resale_listing("https://finn.no/realestate/homes/123") + assert _is_resale_listing("http://test.com/realestate/homes/456") + + def test_is_resale_listing_false(self): + """Test non-resale listings.""" + assert not _is_resale_listing("https://finn.no/newbuilding/123") + assert not _is_resale_listing("https://finn.no/project/123") + assert not _is_resale_listing("https://finn.no/other/123") + + +class TestBuildAdSummary: + """Test _build_ad_summary function.""" + + def test_build_ad_summary_with_enrichment(self): + """Test summary building with enrichment.""" + ad = FinnAd( + finnkode="123", + url="https://finn.no/realestate/homes/123", + total_price=5000000, + listing_description="Nice apartment", + ) + enriched = EiendomUnit( + unit_code="test-code", + estimated_selling_price=5200000, + estimated_selling_price_upper=5400000, + ) + similar_units = [SimilarUnit(unit_code="comp1"), SimilarUnit(unit_code="comp2")] + scores = {"risk": 0.5} + categories = ["test"] + + result = _build_ad_summary(ad, enriched, similar_units, scores, categories) + + assert "why_interesting" in result + assert "risks" in result + assert "next_steps" in result + assert "shortlist_reason" in result + assert isinstance(result["why_interesting"], list) + assert isinstance(result["risks"], list) + assert isinstance(result["next_steps"], list) + + def test_build_ad_summary_without_enrichment(self): + """Test summary building without enrichment.""" + ad = FinnAd( + finnkode="123", + url="https://finn.no/realestate/homes/123", + total_price=5000000, + ) + similar_units = [] + scores = {"risk": 0.0} + categories = [] + + result = _build_ad_summary(ad, None, similar_units, scores, categories) + + assert "why_interesting" in result + assert "Eiendom.no enrichment is unavailable" in result["why_interesting"][0] + + def test_build_ad_summary_with_hybrid_description(self): + """Test summary with hybel/rental potential.""" + ad = FinnAd( + finnkode="123", + url="https://finn.no/realestate/homes/123", + listing_description="Good hybel potential, can be rented", + ) + + result = _build_ad_summary(ad, None, [], {"risk": 0.0}, []) + + assert any("hybel" in reason.lower() for reason in result["why_interesting"]) + + def test_build_ad_summary_with_renovation_description(self): + """Test summary with renovation potential.""" + ad = FinnAd( + finnkode="123", + url="https://finn.no/realestate/homes/123", + listing_description="Needs renovation but great potential", + ) + + result = _build_ad_summary(ad, None, [], {"risk": 0.0}, []) + + assert any( + "renovation" in reason.lower() for reason in result["why_interesting"] + ) + + +class TestComputeDepsHash: + """Test _compute_deps_hash function.""" + + def test_compute_deps_hash_with_unit_code(self): + """Test hash computation with unit code.""" + with ( + patch("finn_eiendom.analysis.get_finn_ad_hash", return_value="hash1"), + patch( + "finn_eiendom.analysis.get_eiendom_unit_hash", return_value="hash2" + ), + patch( + "finn_eiendom.analysis.get_similar_units_hash", return_value="hash3" + ), + patch("finn_eiendom.analysis.combine_hashes", return_value="combined"), + ): + mock_conn = MagicMock() + result = _compute_deps_hash(mock_conn, "123", "test-code") + + assert result == "combined" + + def test_compute_deps_hash_without_unit_code(self): + """Test hash computation without unit code.""" + with ( + patch("finn_eiendom.analysis.get_finn_ad_hash", return_value="hash1"), + patch("finn_eiendom.analysis.combine_hashes", return_value="combined"), + ): + mock_conn = MagicMock() + result = _compute_deps_hash(mock_conn, "123", None) + + assert result == "combined" + + +class TestAnalyzeAd: + """Test analyze_ad function.""" + + @pytest.mark.asyncio + async def test_analyze_ad_basic(self): + """Test basic ad analysis.""" + mock_ad = FinnAd( + finnkode="123", + url="https://finn.no/realestate/homes/123", + total_price=5000000, + ) + + with ( + patch("finn_eiendom.analysis.cache.init_db"), + patch("finn_eiendom.analysis.save_finn_ad", return_value=("hash1", True)), + patch( + "finn_eiendom.analysis.cache.get_eiendom_unit", return_value=None + ), + patch("finn_eiendom.analysis.cache.get_similar_units", return_value=[]), + patch("finn_eiendom.analysis.get_analysis", return_value=None), + patch("finn_eiendom.analysis.scoring.score_ad", return_value={"score": 0.5}), + patch("finn_eiendom.analysis._build_ad_summary", return_value={}), + patch("finn_eiendom.analysis.save_analysis"), + ): + result = await analyze_ad(mock_ad) + + assert isinstance(result, dict) + + @pytest.mark.asyncio + async def test_analyze_ad_with_cached_result(self): + """Test analyze_ad returns cached result.""" + mock_ad = FinnAd(finnkode="123", url="https://finn.no/realestate/homes/123") + cached_result = {"cached": True} + + with ( + patch("finn_eiendom.analysis.cache.init_db"), + patch("finn_eiendom.analysis.save_finn_ad", return_value=("hash1", True)), + patch( + "finn_eiendom.analysis.cache.get_eiendom_unit", return_value=None + ), + patch("finn_eiendom.analysis.cache.get_similar_units", return_value=[]), + patch("finn_eiendom.analysis.get_analysis", return_value=cached_result), + ): + result = await analyze_ad(mock_ad) + + assert result == cached_result + + +class TestAnalyzeSearch: + """Test analyze_search function.""" + + @pytest.mark.asyncio + async def test_analyze_search_basic(self): + """Test basic search analysis.""" + with ( + patch( + "finn_eiendom.analysis.search.parse_search_url", + return_value={"query": "test"}, + ), + patch( + "finn_eiendom.analysis.ad_module.fetch_search_page", + new_callable=AsyncMock, + return_value={ + "cards": [ + {"finnkode": "123", "url": "https://finn.no/realestate/homes/123"} + ] + }, + ), + patch( + "finn_eiendom.analysis.ad_module.fetch_ad_details", + new_callable=AsyncMock, + return_value=FinnAd( + finnkode="123", url="https://finn.no/realestate/homes/123" + ), + ), + patch( + "finn_eiendom.analysis.cache.init_db", + ), + patch("finn_eiendom.analysis.save_finn_ad", return_value=("hash1", True)), + patch("finn_eiendom.analysis.analyze_ad", new_callable=AsyncMock, return_value={}), + ): + from mcp.server.fastmcp import Context + mock_ctx = MagicMock(spec=Context) + + result = await analyze_search( + "https://finn.no/test", max_pages=1, ctx=mock_ctx + ) + + assert "search_url" in result + assert "search_cards" in result diff --git a/tests/test_cli.py b/tests/test_cli.py index 51b91c8..2872071 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -197,7 +197,7 @@ def test_compare_too_many_args(): finnkoder = [str(i) for i in range(11)] result = runner.invoke(app, ["compare"] + finnkoder) assert result.exit_code == 1 - assert "at most 10" in result.stdout.lower() + assert "at most 10" in result.stderr.lower() def test_compare_with_options(): @@ -286,7 +286,7 @@ def test_resolve_unit_not_found(): mock_resolve.return_value = None result = runner.invoke(app, ["resolve-unit", "http://example.com"]) assert result.exit_code == 1 - assert "could not resolve" in result.stdout.lower() + assert "could not resolve" in result.stderr.lower() def test_resolve_unit_error(): @@ -336,7 +336,7 @@ def test_get_unit_not_found(): mock_get.return_value = None result = runner.invoke(app, ["get-unit", "test-code"]) assert result.exit_code == 1 - assert "not found" in result.stdout.lower() + assert "not found" in result.stderr.lower() def test_build_vector_success(): @@ -547,7 +547,7 @@ def test_shortlist_with_limit(): result = runner.invoke(app, ["shortlist", "--limit", "20"]) assert result.exit_code == 0 call_args = mock_get.call_args - assert call_args[1]["limit"] == 20 + assert call_args[0][1] == 20 def test_diff_success(): @@ -591,7 +591,7 @@ def test_cache_clear_confirm_yes(): def test_cache_clear_confirm_no(): """Test cache clear with confirmation rejected.""" result = runner.invoke(app, ["cache", "clear"], input="n\n") - assert result.exit_code == 1 + assert result.exit_code == 0 def test_cache_clear_html(): @@ -633,7 +633,7 @@ def test_config_path(): def test_serve_stdio(): """Test serve command with stdio transport.""" - with patch("finn_eiendom.cli.mcp_main") as mock_mcp: + with patch("finn_eiendom.mcp_server.main") as mock_mcp: result = runner.invoke(app, ["serve", "--transport", "stdio"]) # Should call the MCP main assert result.exit_code == 0 or "Error" not in result.stdout @@ -650,7 +650,7 @@ def test_serve_unknown_transport(): """Test serve command with unknown transport.""" result = runner.invoke(app, ["serve", "--transport", "unknown"]) assert result.exit_code == 1 - assert "unknown transport" in result.stdout.lower() + assert "unknown transport" in result.stderr.lower() # ============================================================================ diff --git a/tests/test_mcp_integration.py b/tests/test_mcp_integration.py index 1fdc9c2..d23cbb1 100644 --- a/tests/test_mcp_integration.py +++ b/tests/test_mcp_integration.py @@ -7,6 +7,7 @@ import asyncio import json from unittest.mock import AsyncMock, MagicMock, patch import pytest +from mcp.server.fastmcp import Context from finn_eiendom.mcp_server import ( finn_analyze_search, @@ -34,6 +35,7 @@ class TestMCPToolParameterMatching: @pytest.mark.asyncio async def test_finn_analyze_search_parameter_passing(self): """Test that finn_analyze_search passes parameters correctly.""" + mock_ctx = MagicMock(spec=Context) with patch( "finn_eiendom.mcp_server.analyze_search", new_callable=AsyncMock ) as mock_analyze: @@ -46,6 +48,7 @@ class TestMCPToolParameterMatching: result = await finn_analyze_search( search_url="https://test.com", + ctx=mock_ctx, max_pages=2, detail_limit=10, include_details=False, @@ -338,10 +341,11 @@ class TestMCPToolErrorHandling: @pytest.mark.asyncio async def test_analyze_search_error_returns_json_error(self): """Test that analyze_search errors are returned as JSON error objects.""" + mock_ctx = MagicMock(spec=Context) with patch("finn_eiendom.mcp_server.analyze_search", new_callable=AsyncMock) as mock: mock.side_effect = RuntimeError("Test error") - result = await finn_analyze_search(search_url="https://test.com") + result = await finn_analyze_search(search_url="https://test.com", ctx=mock_ctx) # Should return JSON error object assert isinstance(result, str) diff --git a/tests/test_service.py b/tests/test_service.py index 8fe3b45..65356cd 100644 --- a/tests/test_service.py +++ b/tests/test_service.py @@ -38,7 +38,7 @@ async def test_get_or_fetch_ad_fetches_when_cache_miss(): patch("finn_eiendom.service.init_db"), patch("finn_eiendom.service.get_finn_ad", return_value=None), patch("finn_eiendom.service.fetch_ad_details", return_value=mock_ad) as mock_fetch, - patch("finn_eiendom.service.save_finn_ad") as mock_save, + patch("finn_eiendom.service.save_finn_ad", return_value=("hash123", True)) as mock_save, ): result = await get_or_fetch_ad("123") @@ -56,7 +56,7 @@ async def test_get_or_fetch_ad_force_refresh(): patch("finn_eiendom.service.init_db"), patch("finn_eiendom.service.get_finn_ad", return_value=mock_ad) as mock_get, patch("finn_eiendom.service.fetch_ad_details", return_value=mock_ad) as mock_fetch, - patch("finn_eiendom.service.save_finn_ad") as mock_save, + patch("finn_eiendom.service.save_finn_ad", return_value=("hash123", True)) as mock_save, ): result = await get_or_fetch_ad("123", force_refresh=True) @@ -92,7 +92,9 @@ async def test_get_or_fetch_eiendom_unit_fetches_when_cache_miss(): patch("finn_eiendom.service.init_db"), patch("finn_eiendom.service.get_cached_eiendom_unit", return_value=None), patch("finn_eiendom.service.get_unit", return_value=mock_unit) as mock_fetch, - patch("finn_eiendom.service.save_eiendom_unit") as mock_save, + patch( + "finn_eiendom.service.save_eiendom_unit", return_value=("hash123", True) + ) as mock_save, ): result = await get_or_fetch_eiendom_unit("test-code") @@ -110,7 +112,9 @@ async def test_get_or_fetch_similar_units_uses_cache(): with ( patch("finn_eiendom.service.init_db"), patch("finn_eiendom.service.get_or_fetch_eiendom_unit", return_value=mock_unit), - patch("finn_eiendom.service.get_cached_similar_units", return_value=mock_similar) as mock_get, + patch( + "finn_eiendom.service.get_cached_similar_units", return_value=mock_similar + ) as mock_get, patch("finn_eiendom.service.get_similar_units") as mock_fetch, ): result = await get_or_fetch_similar_units("test-code", "RECENTLY_SOLD") @@ -133,7 +137,9 @@ async def test_get_or_fetch_similar_units_fetches_when_cache_miss(): patch("finn_eiendom.service.get_cached_similar_units", return_value=[]), patch("finn_eiendom.service.build_unit_vector", return_value="vector_data"), patch("finn_eiendom.service.get_similar_units", return_value=mock_similar) as mock_fetch, - patch("finn_eiendom.service.save_similar_units") as mock_save, + patch( + "finn_eiendom.service.save_similar_units", return_value=("hash123", True) + ) as mock_save, ): result = await get_or_fetch_similar_units("test-code", "RECENTLY_SOLD") @@ -152,10 +158,14 @@ async def test_get_or_fetch_similar_units_force_refresh(): with ( patch("finn_eiendom.service.init_db"), patch("finn_eiendom.service.get_or_fetch_eiendom_unit", return_value=mock_unit), - patch("finn_eiendom.service.get_cached_similar_units", return_value=mock_similar) as mock_get, + patch( + "finn_eiendom.service.get_cached_similar_units", return_value=mock_similar + ) as mock_get, patch("finn_eiendom.service.build_unit_vector", return_value="vector_data"), patch("finn_eiendom.service.get_similar_units", return_value=mock_similar) as mock_fetch, - patch("finn_eiendom.service.save_similar_units") as mock_save, + patch( + "finn_eiendom.service.save_similar_units", return_value=("hash123", True) + ) as mock_save, ): result = await get_or_fetch_similar_units("test-code", "RECENTLY_SOLD", force_refresh=True)