feat(refactor): Document refactoring progress and phases in markdown
feat(scripts): Add backfill script for content_hash in cache tables feat(scripts): Create recompute script for analysis_cache population test(tests): Implement comprehensive tests for analysis module functions fix(tests): Update CLI tests to assert errors on stderr instead of stdout fix(tests): Adjust MCP integration tests to pass context parameter correctly fix(tests): Modify service tests to return hash on save functions for consistency
This commit is contained in:
Vendored
+1
-6
@@ -4,12 +4,7 @@
|
|||||||
"type": "http",
|
"type": "http",
|
||||||
"url": "https://mcp.context7.com/mcp",
|
"url": "https://mcp.context7.com/mcp",
|
||||||
},
|
},
|
||||||
"mcp-jungle":{
|
"finn-mcp": {
|
||||||
"type": "http",
|
|
||||||
"url": "http://mini:8080/mcp",
|
|
||||||
},
|
|
||||||
// "finn-eiendom": { }
|
|
||||||
"finn-eiendom": {
|
|
||||||
"command": "/root/projects/finn-mcp/.venv/bin/python",
|
"command": "/root/projects/finn-mcp/.venv/bin/python",
|
||||||
"args": [
|
"args": [
|
||||||
"-m",
|
"-m",
|
||||||
|
|||||||
Vendored
+2
-1
@@ -24,6 +24,7 @@
|
|||||||
"/root/projects/finn-mcp/.venv/bin/python": true,
|
"/root/projects/finn-mcp/.venv/bin/python": true,
|
||||||
"make": true,
|
"make": true,
|
||||||
".venv/bin/coverage": true,
|
".venv/bin/coverage": true,
|
||||||
".venv/bin/pytest": true
|
".venv/bin/pytest": true,
|
||||||
|
"python -m pytest": true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -14,7 +14,7 @@ services:
|
|||||||
|
|
||||||
# Expose port for network access
|
# Expose port for network access
|
||||||
ports:
|
ports:
|
||||||
- "8010:8010"
|
- "8011:8010"
|
||||||
|
|
||||||
# More aggressive resource limits for production
|
# More aggressive resource limits for production
|
||||||
# deploy:
|
# deploy:
|
||||||
|
|||||||
@@ -32,12 +32,14 @@ from .cache import (
|
|||||||
save_analysis,
|
save_analysis,
|
||||||
save_eiendom_unit,
|
save_eiendom_unit,
|
||||||
save_finn_ad,
|
save_finn_ad,
|
||||||
|
save_search_run,
|
||||||
save_similar_units,
|
save_similar_units,
|
||||||
)
|
)
|
||||||
from .config import (
|
from .config import (
|
||||||
EIENDOM_NO_CACHE_TTL_HOURS,
|
EIENDOM_NO_CACHE_TTL_SIMILAR_UNITS_DAYS,
|
||||||
|
EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS,
|
||||||
FINN_CACHE_PATH,
|
FINN_CACHE_PATH,
|
||||||
FINN_CACHE_TTL_AD_HOURS,
|
FINN_CACHE_TTL_AD_STRUCTURAL_DAYS,
|
||||||
FINN_DETAIL_LIMIT,
|
FINN_DETAIL_LIMIT,
|
||||||
FINN_MAX_SEARCH_PAGES,
|
FINN_MAX_SEARCH_PAGES,
|
||||||
)
|
)
|
||||||
@@ -147,6 +149,12 @@ async def analyze_ad(
|
|||||||
"""
|
"""
|
||||||
conn = cache.init_db(FINN_CACHE_PATH)
|
conn = cache.init_db(FINN_CACHE_PATH)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 0. Backfill eiendom_unit_code if provided.
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
if unit_code and not finn_ad.eiendom_unit_code:
|
||||||
|
finn_ad.eiendom_unit_code = unit_code
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# 1. Ensure the ad is in the DB so we have a stable hash to key on.
|
# 1. Ensure the ad is in the DB so we have a stable hash to key on.
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
@@ -173,8 +181,10 @@ async def analyze_ad(
|
|||||||
comps_hash_changed = False
|
comps_hash_changed = False
|
||||||
|
|
||||||
if enriched:
|
if enriched:
|
||||||
|
# Convert similar units TTL from days to hours
|
||||||
|
ttl_hours = EIENDOM_NO_CACHE_TTL_SIMILAR_UNITS_DAYS * 24
|
||||||
similar_units = cache.get_similar_units(
|
similar_units = cache.get_similar_units(
|
||||||
conn, enriched.unit_code, "RECENTLY_SOLD", ttl_hours=EIENDOM_NO_CACHE_TTL_HOURS
|
conn, enriched.unit_code, "RECENTLY_SOLD", ttl_hours=ttl_hours
|
||||||
)
|
)
|
||||||
if not similar_units:
|
if not similar_units:
|
||||||
vector = enriched.unit_vector or eiendom_no.build_unit_vector(enriched)
|
vector = enriched.unit_vector or eiendom_no.build_unit_vector(enriched)
|
||||||
@@ -210,11 +220,38 @@ async def analyze_ad(
|
|||||||
categories = scoring.classify_ad(scores)
|
categories = scoring.classify_ad(scores)
|
||||||
summary = _build_ad_summary(finn_ad, enriched, similar_units, scores, categories)
|
summary = _build_ad_summary(finn_ad, enriched, similar_units, scores, categories)
|
||||||
|
|
||||||
|
# Get price history and cache age metadata
|
||||||
|
from .cache import get_price_history, get_finn_ad_hash
|
||||||
|
from datetime import datetime, UTC, timedelta
|
||||||
|
|
||||||
|
price_history = get_price_history(conn, finn_ad.finnkode, limit=20)
|
||||||
|
|
||||||
|
# Compute cache age: how long since we last fetched this ad
|
||||||
|
cursor = conn.cursor()
|
||||||
|
cursor.execute(
|
||||||
|
"SELECT fetched_at, last_verified_at FROM finn_ads WHERE finnkode = ?",
|
||||||
|
(finn_ad.finnkode,),
|
||||||
|
)
|
||||||
|
db_row = cursor.fetchone()
|
||||||
|
cache_age = None
|
||||||
|
if db_row:
|
||||||
|
fetched_at = datetime.fromisoformat(db_row["fetched_at"])
|
||||||
|
last_verified = db_row["last_verified_at"]
|
||||||
|
if last_verified:
|
||||||
|
last_verified_at = datetime.fromisoformat(last_verified)
|
||||||
|
structural_age_days = (datetime.now(UTC) - fetched_at).days
|
||||||
|
price_age_hours = (datetime.now(UTC) - last_verified_at).total_seconds() / 3600
|
||||||
|
cache_age = {
|
||||||
|
"structural_days": structural_age_days,
|
||||||
|
"price_hours": round(price_age_hours, 1),
|
||||||
|
}
|
||||||
|
|
||||||
result = {
|
result = {
|
||||||
"finnkode": finn_ad.finnkode,
|
"finnkode": finn_ad.finnkode,
|
||||||
"url": finn_ad.url,
|
"url": finn_ad.url,
|
||||||
"title": finn_ad.title,
|
"title": finn_ad.title,
|
||||||
"address": finn_ad.address,
|
"address": finn_ad.address,
|
||||||
|
"listing_description": finn_ad.listing_description,
|
||||||
"district": finn_ad.district,
|
"district": finn_ad.district,
|
||||||
"property_type": finn_ad.property_type,
|
"property_type": finn_ad.property_type,
|
||||||
"ownership_type": finn_ad.ownership_type,
|
"ownership_type": finn_ad.ownership_type,
|
||||||
@@ -236,6 +273,8 @@ async def analyze_ad(
|
|||||||
"score": scores,
|
"score": scores,
|
||||||
"categories": categories,
|
"categories": categories,
|
||||||
"summary": summary,
|
"summary": summary,
|
||||||
|
"price_history": price_history,
|
||||||
|
"cache_age": cache_age,
|
||||||
"eiendom_unit": enriched.model_dump(mode="json") if enriched else None,
|
"eiendom_unit": enriched.model_dump(mode="json") if enriched else None,
|
||||||
"similar_units": [unit.model_dump(mode="json") for unit in similar_units],
|
"similar_units": [unit.model_dump(mode="json") for unit in similar_units],
|
||||||
}
|
}
|
||||||
@@ -262,7 +301,7 @@ async def _fetch_card_to_db(
|
|||||||
treats None as a skip without aborting the whole batch.
|
treats None as a skip without aborting the whole batch.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
finn_ad = cache.get_finn_ad(conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_HOURS)
|
finn_ad = cache.get_finn_ad(conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_STRUCTURAL_DAYS * 24)
|
||||||
if finn_ad is None:
|
if finn_ad is None:
|
||||||
finn_ad = await ad_module.fetch_ad_details(card.finnkode, client=client)
|
finn_ad = await ad_module.fetch_ad_details(card.finnkode, client=client)
|
||||||
save_finn_ad(conn, finn_ad)
|
save_finn_ad(conn, finn_ad)
|
||||||
@@ -275,6 +314,11 @@ async def _fetch_card_to_db(
|
|||||||
try:
|
try:
|
||||||
matched_unit = await eiendom_no.search_unit_from_finn_url(card.url)
|
matched_unit = await eiendom_no.search_unit_from_finn_url(card.url)
|
||||||
unit_code = matched_unit.unit_code if matched_unit else None
|
unit_code = matched_unit.unit_code if matched_unit else None
|
||||||
|
# Backfill unit_code into the ad object and persist.
|
||||||
|
# This ensures the cached ad has the eiendom_unit_code field populated.
|
||||||
|
if unit_code and not finn_ad.eiendom_unit_code:
|
||||||
|
finn_ad.eiendom_unit_code = unit_code
|
||||||
|
_, _ = save_finn_ad(conn, finn_ad)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning("Eiendom.no unit search failed for %s: %s", card.finnkode, exc)
|
logger.warning("Eiendom.no unit search failed for %s: %s", card.finnkode, exc)
|
||||||
|
|
||||||
@@ -384,6 +428,10 @@ async def analyze_search(
|
|||||||
f"{skipped_count} skipped."
|
f"{skipped_count} skipped."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Record this search run in the database
|
||||||
|
finnkodes = [card.finnkode for card in cards]
|
||||||
|
save_search_run(conn, search_url, finnkodes)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"search_url": search_url,
|
"search_url": search_url,
|
||||||
"search_cards": [card.model_dump(mode="json") for card in cards],
|
"search_cards": [card.model_dump(mode="json") for card in cards],
|
||||||
|
|||||||
+205
-5
@@ -80,12 +80,14 @@ def init_db(path: str | None = None) -> sqlite3.Connection:
|
|||||||
url TEXT,
|
url TEXT,
|
||||||
payload TEXT NOT NULL,
|
payload TEXT NOT NULL,
|
||||||
content_hash TEXT,
|
content_hash TEXT,
|
||||||
fetched_at TEXT NOT NULL
|
fetched_at TEXT NOT NULL,
|
||||||
|
last_verified_at TEXT
|
||||||
)
|
)
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
# Migration: add content_hash column if the table already existed without it.
|
# Migrations: add columns if the table already existed without them.
|
||||||
_add_column_if_missing(cursor, "finn_ads", "content_hash", "TEXT")
|
_add_column_if_missing(cursor, "finn_ads", "content_hash", "TEXT")
|
||||||
|
_add_column_if_missing(cursor, "finn_ads", "last_verified_at", "TEXT")
|
||||||
|
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
"""
|
"""
|
||||||
@@ -136,6 +138,50 @@ def init_db(path: str | None = None) -> sqlite3.Connection:
|
|||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# New tables for Phase 2 enhancements
|
||||||
|
cursor.execute(
|
||||||
|
"""
|
||||||
|
CREATE TABLE IF NOT EXISTS user_feedback (
|
||||||
|
finnkode TEXT PRIMARY KEY,
|
||||||
|
verdict TEXT NOT NULL,
|
||||||
|
notes TEXT,
|
||||||
|
created_at TEXT NOT NULL,
|
||||||
|
updated_at TEXT NOT NULL
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
cursor.execute(
|
||||||
|
"""
|
||||||
|
CREATE TABLE IF NOT EXISTS price_history (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
finnkode TEXT NOT NULL,
|
||||||
|
total_price INTEGER,
|
||||||
|
asking_price INTEGER,
|
||||||
|
sale_status TEXT,
|
||||||
|
recorded_at TEXT NOT NULL
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_price_history_finnkode_recorded ON price_history(finnkode, recorded_at)")
|
||||||
|
|
||||||
|
cursor.execute(
|
||||||
|
"""
|
||||||
|
CREATE TABLE IF NOT EXISTS search_runs (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
search_url TEXT NOT NULL,
|
||||||
|
finnkodes TEXT NOT NULL,
|
||||||
|
created_at TEXT NOT NULL
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_search_runs_url_created ON search_runs(search_url, created_at)")
|
||||||
|
|
||||||
|
# Create indexes for efficient staleness queries
|
||||||
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_finn_ads_verified ON finn_ads(last_verified_at)")
|
||||||
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_eiendom_units_fetched ON eiendom_units(fetched_at)")
|
||||||
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_similar_units_fetched ON similar_units(fetched_at)")
|
||||||
|
|
||||||
conn.commit()
|
conn.commit()
|
||||||
return conn
|
return conn
|
||||||
|
|
||||||
@@ -258,6 +304,8 @@ def save_finn_ad(conn: sqlite3.Connection, ad: FinnAd) -> tuple[str, bool]:
|
|||||||
if ad.detail_fetched_at
|
if ad.detail_fetched_at
|
||||||
else datetime.now(UTC).isoformat()
|
else datetime.now(UTC).isoformat()
|
||||||
)
|
)
|
||||||
|
# Update last_verified_at to now when saving (indicates we just checked the data)
|
||||||
|
last_verified_at = datetime.now(UTC).isoformat()
|
||||||
|
|
||||||
# Check existing hash before writing.
|
# Check existing hash before writing.
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
@@ -270,9 +318,9 @@ def save_finn_ad(conn: sqlite3.Connection, ad: FinnAd) -> tuple[str, bool]:
|
|||||||
|
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
"INSERT OR REPLACE INTO finn_ads"
|
"INSERT OR REPLACE INTO finn_ads"
|
||||||
" (finnkode, url, payload, content_hash, fetched_at)"
|
" (finnkode, url, payload, content_hash, fetched_at, last_verified_at)"
|
||||||
" VALUES (?, ?, ?, ?, ?)",
|
" VALUES (?, ?, ?, ?, ?, ?)",
|
||||||
(ad.finnkode, ad.url, json.dumps(payload, default=_json_default), new_hash, fetched_at),
|
(ad.finnkode, ad.url, json.dumps(payload, default=_json_default), new_hash, fetched_at, last_verified_at),
|
||||||
)
|
)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
logger.debug("finn_ad %s saved (hash=%s)", ad.finnkode, new_hash[:8])
|
logger.debug("finn_ad %s saved (hash=%s)", ad.finnkode, new_hash[:8])
|
||||||
@@ -522,6 +570,158 @@ def invalidate_analysis(conn: sqlite3.Connection, finnkode: str) -> None:
|
|||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# User feedback
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def save_feedback(
|
||||||
|
conn: sqlite3.Connection, finnkode: str, verdict: str, notes: str | None = None
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Store user feedback/verdict for a FINN listing."""
|
||||||
|
cursor = conn.cursor()
|
||||||
|
now = datetime.now(UTC).isoformat()
|
||||||
|
cursor.execute(
|
||||||
|
"INSERT OR REPLACE INTO user_feedback"
|
||||||
|
" (finnkode, verdict, notes, created_at, updated_at)"
|
||||||
|
" VALUES (?, ?, ?, ?, ?)",
|
||||||
|
(finnkode, verdict, notes, now, now),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
logger.debug("feedback saved for %s (verdict=%s)", finnkode, verdict)
|
||||||
|
return {"finnkode": finnkode, "verdict": verdict, "notes": notes}
|
||||||
|
|
||||||
|
|
||||||
|
def get_feedback(conn: sqlite3.Connection, finnkode: str) -> dict[str, Any] | None:
|
||||||
|
"""Retrieve stored feedback for a FINN listing."""
|
||||||
|
cursor = conn.cursor()
|
||||||
|
cursor.execute(
|
||||||
|
"SELECT finnkode, verdict, notes, created_at, updated_at FROM user_feedback WHERE finnkode = ?",
|
||||||
|
(finnkode,),
|
||||||
|
)
|
||||||
|
row = cursor.fetchone()
|
||||||
|
if not row:
|
||||||
|
return None
|
||||||
|
return {
|
||||||
|
"finnkode": row["finnkode"],
|
||||||
|
"verdict": row["verdict"],
|
||||||
|
"notes": row["notes"],
|
||||||
|
"created_at": row["created_at"],
|
||||||
|
"updated_at": row["updated_at"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_feedback_by_verdict(
|
||||||
|
conn: sqlite3.Connection, verdict: str, limit: int = 100
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
"""Retrieve all stored feedback with a given verdict."""
|
||||||
|
cursor = conn.cursor()
|
||||||
|
cursor.execute(
|
||||||
|
"SELECT finnkode, verdict, notes, created_at, updated_at FROM user_feedback"
|
||||||
|
" WHERE verdict = ? ORDER BY updated_at DESC LIMIT ?",
|
||||||
|
(verdict, limit),
|
||||||
|
)
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"finnkode": row["finnkode"],
|
||||||
|
"verdict": row["verdict"],
|
||||||
|
"notes": row["notes"],
|
||||||
|
"created_at": row["created_at"],
|
||||||
|
"updated_at": row["updated_at"],
|
||||||
|
}
|
||||||
|
for row in cursor.fetchall()
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Price history
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def save_price_history(
|
||||||
|
conn: sqlite3.Connection,
|
||||||
|
finnkode: str,
|
||||||
|
total_price: int | None = None,
|
||||||
|
asking_price: int | None = None,
|
||||||
|
sale_status: str | None = None,
|
||||||
|
) -> None:
|
||||||
|
"""Record a price/status snapshot for a listing."""
|
||||||
|
cursor = conn.cursor()
|
||||||
|
cursor.execute(
|
||||||
|
"INSERT INTO price_history (finnkode, total_price, asking_price, sale_status, recorded_at)"
|
||||||
|
" VALUES (?, ?, ?, ?, ?)",
|
||||||
|
(finnkode, total_price, asking_price, sale_status, datetime.now(UTC).isoformat()),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
logger.debug("price_history recorded for %s (total=%s, asking=%s)", finnkode, total_price, asking_price)
|
||||||
|
|
||||||
|
|
||||||
|
def get_price_history(conn: sqlite3.Connection, finnkode: str, limit: int = 100) -> list[dict[str, Any]]:
|
||||||
|
"""Retrieve price history for a listing."""
|
||||||
|
cursor = conn.cursor()
|
||||||
|
cursor.execute(
|
||||||
|
"SELECT total_price, asking_price, sale_status, recorded_at FROM price_history"
|
||||||
|
" WHERE finnkode = ? ORDER BY recorded_at DESC LIMIT ?",
|
||||||
|
(finnkode, limit),
|
||||||
|
)
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"total_price": row["total_price"],
|
||||||
|
"asking_price": row["asking_price"],
|
||||||
|
"sale_status": row["sale_status"],
|
||||||
|
"recorded_at": row["recorded_at"],
|
||||||
|
}
|
||||||
|
for row in cursor.fetchall()
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Search runs
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def save_search_run(
|
||||||
|
conn: sqlite3.Connection, search_url: str, finnkodes: list[str]
|
||||||
|
) -> None:
|
||||||
|
"""Record a search run with the finnkodes found."""
|
||||||
|
cursor = conn.cursor()
|
||||||
|
finnkodes_json = json.dumps(finnkodes)
|
||||||
|
cursor.execute(
|
||||||
|
"INSERT INTO search_runs (search_url, finnkodes, created_at)"
|
||||||
|
" VALUES (?, ?, ?)",
|
||||||
|
(search_url, finnkodes_json, datetime.now(UTC).isoformat()),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
logger.debug("search_run recorded for %s (%d finnkodes)", search_url, len(finnkodes))
|
||||||
|
|
||||||
|
|
||||||
|
def get_latest_search_run(conn: sqlite3.Connection, search_url: str) -> dict[str, Any] | None:
|
||||||
|
"""Retrieve the most recent search run for a URL."""
|
||||||
|
cursor = conn.cursor()
|
||||||
|
cursor.execute(
|
||||||
|
"SELECT search_url, finnkodes, created_at FROM search_runs"
|
||||||
|
" WHERE search_url = ? ORDER BY created_at DESC LIMIT 1",
|
||||||
|
(search_url,),
|
||||||
|
)
|
||||||
|
row = cursor.fetchone()
|
||||||
|
if not row:
|
||||||
|
return None
|
||||||
|
return {
|
||||||
|
"search_url": row["search_url"],
|
||||||
|
"finnkodes": json.loads(row["finnkodes"]),
|
||||||
|
"created_at": row["created_at"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def delete_feedback(conn: sqlite3.Connection, finnkode: str) -> dict[str, Any]:
|
||||||
|
"""Delete stored feedback for a FINN listing."""
|
||||||
|
cursor = conn.cursor()
|
||||||
|
cursor.execute("DELETE FROM user_feedback WHERE finnkode = ?", (finnkode,))
|
||||||
|
conn.commit()
|
||||||
|
logger.debug("feedback deleted for %s", finnkode)
|
||||||
|
return {"finnkode": finnkode, "deleted": True}
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Internal helpers
|
# Internal helpers
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|||||||
+63
-2
@@ -320,8 +320,69 @@ def diff(
|
|||||||
def stats() -> None:
|
def stats() -> None:
|
||||||
"""Show cache statistics."""
|
"""Show cache statistics."""
|
||||||
try:
|
try:
|
||||||
# TODO: implement cache stats via cache.py
|
import json
|
||||||
typer.echo("Cache stats (not yet implemented)")
|
import sqlite3
|
||||||
|
|
||||||
|
from .config import FINN_CACHE_PATH
|
||||||
|
|
||||||
|
conn = sqlite3.connect(str(FINN_CACHE_PATH))
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# Get row counts and hash statistics for each table
|
||||||
|
tables = ["finn_ads", "eiendom_units", "similar_units", "analysis_cache", "cache_meta"]
|
||||||
|
stats = {}
|
||||||
|
|
||||||
|
for table in tables:
|
||||||
|
cursor.execute(f"SELECT COUNT(*) FROM {table}")
|
||||||
|
total = cursor.fetchone()[0]
|
||||||
|
|
||||||
|
if total == 0:
|
||||||
|
stats[table] = {"total_rows": 0}
|
||||||
|
continue
|
||||||
|
|
||||||
|
# For tables with content_hash or deps_hash
|
||||||
|
if table == "analysis_cache":
|
||||||
|
cursor.execute(f"SELECT COUNT(*) FROM {table} WHERE deps_hash IS NOT NULL")
|
||||||
|
with_hash = cursor.fetchone()[0]
|
||||||
|
elif table != "cache_meta" or True: # All have content_hash or value
|
||||||
|
cursor.execute(f"SELECT COUNT(*) FROM {table} WHERE content_hash IS NOT NULL")
|
||||||
|
with_hash = cursor.fetchone()[0]
|
||||||
|
|
||||||
|
stats[table] = {
|
||||||
|
"total_rows": total,
|
||||||
|
"rows_with_hash": with_hash,
|
||||||
|
"pct_with_hash": round(100 * with_hash / total, 1) if total > 0 else 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Special checks for finn_ads
|
||||||
|
cursor.execute(
|
||||||
|
'SELECT COUNT(*) FROM finn_ads '
|
||||||
|
'WHERE json_extract(payload, "$.eiendom_unit_code") IS NOT NULL '
|
||||||
|
'AND json_extract(payload, "$.eiendom_unit_code") != "null"'
|
||||||
|
)
|
||||||
|
ads_with_unit_code = cursor.fetchone()[0]
|
||||||
|
if "finn_ads" in stats and stats["finn_ads"]["total_rows"] > 0:
|
||||||
|
stats["finn_ads"]["with_eiendom_unit_code"] = ads_with_unit_code
|
||||||
|
stats["finn_ads"]["pct_with_unit_code"] = round(100 * ads_with_unit_code / stats["finn_ads"]["total_rows"], 1)
|
||||||
|
|
||||||
|
# Get fetched_at date ranges
|
||||||
|
for table in ["finn_ads", "eiendom_units", "similar_units"]:
|
||||||
|
cursor.execute(f"SELECT MIN(fetched_at), MAX(fetched_at) FROM {table}")
|
||||||
|
min_date, max_date = cursor.fetchone()
|
||||||
|
if min_date and max_date:
|
||||||
|
stats[table]["oldest_fetch"] = min_date
|
||||||
|
stats[table]["newest_fetch"] = max_date
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
# Format output
|
||||||
|
typer.echo("\n=== Cache Statistics ===\n")
|
||||||
|
for table, table_stats in stats.items():
|
||||||
|
typer.echo(f"{table}:")
|
||||||
|
for key, value in table_stats.items():
|
||||||
|
typer.echo(f" {key}: {value}")
|
||||||
|
typer.echo()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
typer.echo(f"Error: {e}", err=True)
|
typer.echo(f"Error: {e}", err=True)
|
||||||
raise typer.Exit(1)
|
raise typer.Exit(1)
|
||||||
|
|||||||
+21
-3
@@ -11,20 +11,38 @@ FINN_MAX_SEARCH_PAGES = int(os.getenv("FINN_MAX_SEARCH_PAGES", "3"))
|
|||||||
FINN_DETAIL_LIMIT = int(os.getenv("FINN_DETAIL_LIMIT", "20"))
|
FINN_DETAIL_LIMIT = int(os.getenv("FINN_DETAIL_LIMIT", "20"))
|
||||||
FINN_REQUEST_DELAY_SECONDS = float(os.getenv("FINN_REQUEST_DELAY_SECONDS", "2"))
|
FINN_REQUEST_DELAY_SECONDS = float(os.getenv("FINN_REQUEST_DELAY_SECONDS", "2"))
|
||||||
FINN_USER_AGENT = os.getenv("FINN_USER_AGENT", "personal-finn-eiendom-analyzer/0.1")
|
FINN_USER_AGENT = os.getenv("FINN_USER_AGENT", "personal-finn-eiendom-analyzer/0.1")
|
||||||
FINN_CACHE_TTL_SEARCH_MINUTES = int(os.getenv("FINN_CACHE_TTL_SEARCH_MINUTES", "60"))
|
|
||||||
FINN_CACHE_TTL_AD_HOURS = int(os.getenv("FINN_CACHE_TTL_AD_HOURS", "24"))
|
# Cache TTLs (refactor v2)
|
||||||
|
# Structural data (address, area, year, etc.) changes rarely; long TTL
|
||||||
|
FINN_CACHE_TTL_AD_STRUCTURAL_DAYS = int(
|
||||||
|
os.getenv("FINN_CACHE_TTL_AD_STRUCTURAL_DAYS", "30")
|
||||||
|
)
|
||||||
|
# Price/status changes frequently; short TTL for lightweight verification
|
||||||
|
FINN_CACHE_TTL_AD_PRICE_HOURS = int(os.getenv("FINN_CACHE_TTL_AD_PRICE_HOURS", "6"))
|
||||||
|
# Search pages/cards also TTL-based (content changes with added/removed listings)
|
||||||
|
FINN_CACHE_TTL_SEARCH_MINUTES = int(os.getenv("FINN_CACHE_TTL_SEARCH_MINUTES", "360"))
|
||||||
|
|
||||||
# Eiendom.no API settings
|
# Eiendom.no API settings
|
||||||
EIENDOM_NO_ENABLED = os.getenv("EIENDOM_NO_ENABLED", "true").lower() == "true"
|
EIENDOM_NO_ENABLED = os.getenv("EIENDOM_NO_ENABLED", "true").lower() == "true"
|
||||||
EIENDOM_NO_BASE_URL = os.getenv("EIENDOM_NO_BASE_URL", "https://api.eiendom.no/api/v1")
|
EIENDOM_NO_BASE_URL = os.getenv("EIENDOM_NO_BASE_URL", "https://api.eiendom.no/api/v1")
|
||||||
EIENDOM_NO_REQUEST_DELAY_SECONDS = float(os.getenv("EIENDOM_NO_REQUEST_DELAY_SECONDS", "1"))
|
EIENDOM_NO_REQUEST_DELAY_SECONDS = float(os.getenv("EIENDOM_NO_REQUEST_DELAY_SECONDS", "1"))
|
||||||
EIENDOM_NO_CACHE_TTL_HOURS = int(os.getenv("EIENDOM_NO_CACHE_TTL_HOURS", "24"))
|
# Structural data (lat, lng, property_type) has long TTL; estimates have shorter TTL
|
||||||
|
EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS = int(
|
||||||
|
os.getenv("EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS", "30")
|
||||||
|
)
|
||||||
|
EIENDOM_NO_CACHE_TTL_ESTIMATE_DAYS = int(
|
||||||
|
os.getenv("EIENDOM_NO_CACHE_TTL_ESTIMATE_DAYS", "7")
|
||||||
|
)
|
||||||
EIENDOM_NO_SIMILAR_UNITS_ENABLED = (
|
EIENDOM_NO_SIMILAR_UNITS_ENABLED = (
|
||||||
os.getenv("EIENDOM_NO_SIMILAR_UNITS_ENABLED", "true").lower() == "true"
|
os.getenv("EIENDOM_NO_SIMILAR_UNITS_ENABLED", "true").lower() == "true"
|
||||||
)
|
)
|
||||||
EIENDOM_NO_SIMILAR_UNITS_DEFAULT_STATUS = os.getenv(
|
EIENDOM_NO_SIMILAR_UNITS_DEFAULT_STATUS = os.getenv(
|
||||||
"EIENDOM_NO_SIMILAR_UNITS_DEFAULT_STATUS", "RECENTLY_SOLD"
|
"EIENDOM_NO_SIMILAR_UNITS_DEFAULT_STATUS", "RECENTLY_SOLD"
|
||||||
)
|
)
|
||||||
|
# Similar units (comps) are immutable; very long TTL (only new entries appear over time)
|
||||||
|
EIENDOM_NO_CACHE_TTL_SIMILAR_UNITS_DAYS = int(
|
||||||
|
os.getenv("EIENDOM_NO_CACHE_TTL_SIMILAR_UNITS_DAYS", "60")
|
||||||
|
)
|
||||||
|
|
||||||
# Logging
|
# Logging
|
||||||
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
|
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
|
||||||
|
|||||||
+20
-17
@@ -3,7 +3,11 @@
|
|||||||
import logging
|
import logging
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from .cache import delete_feedback as cache_delete_feedback
|
||||||
|
from .cache import get_feedback as cache_get_feedback
|
||||||
|
from .cache import get_feedback_by_verdict
|
||||||
from .cache import init_db
|
from .cache import init_db
|
||||||
|
from .cache import save_feedback as cache_save_feedback
|
||||||
from .config import FINN_CACHE_PATH
|
from .config import FINN_CACHE_PATH
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -21,15 +25,7 @@ def save_feedback(finnkode: str, verdict: str, notes: str | None = None) -> dict
|
|||||||
Dict with saved feedback details
|
Dict with saved feedback details
|
||||||
"""
|
"""
|
||||||
conn = init_db(FINN_CACHE_PATH)
|
conn = init_db(FINN_CACHE_PATH)
|
||||||
|
return cache_save_feedback(conn, finnkode, verdict, notes)
|
||||||
# TODO: implement via feedback table in cache.py
|
|
||||||
# For now, return a success response
|
|
||||||
return {
|
|
||||||
"finnkode": finnkode,
|
|
||||||
"verdict": verdict,
|
|
||||||
"notes": notes,
|
|
||||||
"saved": True,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def get_feedback(finnkode: str) -> dict[str, Any] | None:
|
def get_feedback(finnkode: str) -> dict[str, Any] | None:
|
||||||
@@ -42,9 +38,21 @@ def get_feedback(finnkode: str) -> dict[str, Any] | None:
|
|||||||
Feedback dict if exists, else None
|
Feedback dict if exists, else None
|
||||||
"""
|
"""
|
||||||
conn = init_db(FINN_CACHE_PATH)
|
conn = init_db(FINN_CACHE_PATH)
|
||||||
|
return cache_get_feedback(conn, finnkode)
|
||||||
|
|
||||||
# TODO: implement via feedback table in cache.py
|
|
||||||
return None
|
def get_feedback_by_verdict_impl(verdict: str, limit: int = 100) -> list[dict[str, Any]]:
|
||||||
|
"""Retrieve all stored feedback with a given verdict.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
verdict: Verdict to filter by
|
||||||
|
limit: Max results to return
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of feedback dicts
|
||||||
|
"""
|
||||||
|
conn = init_db(FINN_CACHE_PATH)
|
||||||
|
return get_feedback_by_verdict(conn, verdict, limit=limit)
|
||||||
|
|
||||||
|
|
||||||
def delete_feedback(finnkode: str) -> dict[str, Any]:
|
def delete_feedback(finnkode: str) -> dict[str, Any]:
|
||||||
@@ -57,9 +65,4 @@ def delete_feedback(finnkode: str) -> dict[str, Any]:
|
|||||||
Status dict
|
Status dict
|
||||||
"""
|
"""
|
||||||
conn = init_db(FINN_CACHE_PATH)
|
conn = init_db(FINN_CACHE_PATH)
|
||||||
|
return cache_delete_feedback(conn, finnkode)
|
||||||
# TODO: implement via feedback table in cache.py
|
|
||||||
return {
|
|
||||||
"finnkode": finnkode,
|
|
||||||
"deleted": True,
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -51,8 +51,8 @@ logger = logging.getLogger(__name__)
|
|||||||
def _slim_listing(rank: int, item: dict) -> dict:
|
def _slim_listing(rank: int, item: dict) -> dict:
|
||||||
"""Collapse one full analyze_ad result into a compact listing card.
|
"""Collapse one full analyze_ad result into a compact listing card.
|
||||||
|
|
||||||
Drops: listing_description, unit_images, unit_vector, all timestamps,
|
Keeps: listing_description (for AI interpretation), price_history, cache_age, score breakdown.
|
||||||
full similar_units list, score dimension breakdown.
|
Drops: unit_images, unit_vector, internal eiendom_unit timestamps.
|
||||||
Derives: avg_comp_sqm_price from similar_units.
|
Derives: avg_comp_sqm_price from similar_units.
|
||||||
"""
|
"""
|
||||||
eu = item.get("eiendom_unit") or {}
|
eu = item.get("eiendom_unit") or {}
|
||||||
@@ -84,6 +84,8 @@ def _slim_listing(rank: int, item: dict) -> dict:
|
|||||||
|
|
||||||
score = item.get("score") or {}
|
score = item.get("score") or {}
|
||||||
summary = item.get("summary") or {}
|
summary = item.get("summary") or {}
|
||||||
|
price_history = item.get("price_history") or []
|
||||||
|
cache_age = item.get("cache_age")
|
||||||
|
|
||||||
# Keep full score breakdown — 12 dimensions + nearby_transit = ~220 bytes, all signal.
|
# Keep full score breakdown — 12 dimensions + nearby_transit = ~220 bytes, all signal.
|
||||||
# Drop nothing from scores.
|
# Drop nothing from scores.
|
||||||
@@ -113,6 +115,7 @@ def _slim_listing(rank: int, item: dict) -> dict:
|
|||||||
"url": item.get("url"),
|
"url": item.get("url"),
|
||||||
"title": item.get("title"),
|
"title": item.get("title"),
|
||||||
"address": item.get("address"),
|
"address": item.get("address"),
|
||||||
|
"listing_description": item.get("listing_description"),
|
||||||
"district": item.get("district"),
|
"district": item.get("district"),
|
||||||
"property_type": item.get("property_type"),
|
"property_type": item.get("property_type"),
|
||||||
"ownership_type": item.get("ownership_type"),
|
"ownership_type": item.get("ownership_type"),
|
||||||
@@ -135,6 +138,8 @@ def _slim_listing(rank: int, item: dict) -> dict:
|
|||||||
"categories": item.get("categories"),
|
"categories": item.get("categories"),
|
||||||
"why_interesting": summary.get("why_interesting"),
|
"why_interesting": summary.get("why_interesting"),
|
||||||
"risks": summary.get("risks"),
|
"risks": summary.get("risks"),
|
||||||
|
"cache_age": cache_age,
|
||||||
|
"price_history": price_history[:5], # Last 5 price records
|
||||||
"eiendom": eiendom,
|
"eiendom": eiendom,
|
||||||
"similar_units": slim_comps,
|
"similar_units": slim_comps,
|
||||||
}
|
}
|
||||||
|
|||||||
+25
-4
@@ -30,9 +30,16 @@ from .cache import (
|
|||||||
invalidate_analysis,
|
invalidate_analysis,
|
||||||
save_eiendom_unit,
|
save_eiendom_unit,
|
||||||
save_finn_ad,
|
save_finn_ad,
|
||||||
|
save_price_history,
|
||||||
save_similar_units,
|
save_similar_units,
|
||||||
)
|
)
|
||||||
from .config import EIENDOM_NO_CACHE_TTL_HOURS, FINN_CACHE_PATH, FINN_CACHE_TTL_AD_HOURS
|
from .config import (
|
||||||
|
EIENDOM_NO_CACHE_TTL_ESTIMATE_DAYS,
|
||||||
|
EIENDOM_NO_CACHE_TTL_SIMILAR_UNITS_DAYS,
|
||||||
|
EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS,
|
||||||
|
FINN_CACHE_PATH,
|
||||||
|
FINN_CACHE_TTL_AD_STRUCTURAL_DAYS,
|
||||||
|
)
|
||||||
from .eiendom_no import (
|
from .eiendom_no import (
|
||||||
build_unit_vector,
|
build_unit_vector,
|
||||||
decode_unit_vector,
|
decode_unit_vector,
|
||||||
@@ -56,13 +63,23 @@ async def get_or_fetch_ad(finnkode: str, force_refresh: bool = False) -> FinnAd:
|
|||||||
invalidated.
|
invalidated.
|
||||||
"""
|
"""
|
||||||
conn = init_db(FINN_CACHE_PATH)
|
conn = init_db(FINN_CACHE_PATH)
|
||||||
ad = None if force_refresh else get_finn_ad(conn, finnkode, ttl_hours=FINN_CACHE_TTL_AD_HOURS)
|
# Convert structural TTL from days to hours
|
||||||
|
ttl_hours = FINN_CACHE_TTL_AD_STRUCTURAL_DAYS * 24
|
||||||
|
ad = None if force_refresh else get_finn_ad(conn, finnkode, ttl_hours=ttl_hours)
|
||||||
if ad is not None:
|
if ad is not None:
|
||||||
return ad
|
return ad
|
||||||
|
|
||||||
# Cache miss or force_refresh: fetch from remote.
|
# Cache miss or force_refresh: fetch from remote.
|
||||||
ad = await fetch_ad_details(finnkode)
|
ad = await fetch_ad_details(finnkode)
|
||||||
_, changed = save_finn_ad(conn, ad)
|
_, changed = save_finn_ad(conn, ad)
|
||||||
|
# Record price snapshot for history tracking
|
||||||
|
save_price_history(
|
||||||
|
conn,
|
||||||
|
finnkode,
|
||||||
|
total_price=ad.total_price,
|
||||||
|
asking_price=ad.asking_price,
|
||||||
|
sale_status=None,
|
||||||
|
)
|
||||||
if changed:
|
if changed:
|
||||||
logger.debug("finn_ad %s updated -- invalidating analysis cache", finnkode)
|
logger.debug("finn_ad %s updated -- invalidating analysis cache", finnkode)
|
||||||
invalidate_analysis(conn, finnkode)
|
invalidate_analysis(conn, finnkode)
|
||||||
@@ -118,10 +135,12 @@ async def get_or_fetch_eiendom_unit(
|
|||||||
the DB row is not updated (analysis_cache stays valid).
|
the DB row is not updated (analysis_cache stays valid).
|
||||||
"""
|
"""
|
||||||
conn = init_db(FINN_CACHE_PATH)
|
conn = init_db(FINN_CACHE_PATH)
|
||||||
|
# Convert structural TTL from days to hours
|
||||||
|
ttl_hours = EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS * 24
|
||||||
unit = (
|
unit = (
|
||||||
None
|
None
|
||||||
if force_refresh
|
if force_refresh
|
||||||
else get_cached_eiendom_unit(conn, unit_code, ttl_hours=24)
|
else get_cached_eiendom_unit(conn, unit_code, ttl_hours=ttl_hours)
|
||||||
)
|
)
|
||||||
if unit is not None:
|
if unit is not None:
|
||||||
return unit
|
return unit
|
||||||
@@ -157,8 +176,10 @@ async def get_or_fetch_similar_units(
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
if not force_refresh:
|
if not force_refresh:
|
||||||
|
# Convert similar units TTL from days to hours
|
||||||
|
ttl_hours = EIENDOM_NO_CACHE_TTL_SIMILAR_UNITS_DAYS * 24
|
||||||
cached_similar = get_cached_similar_units(
|
cached_similar = get_cached_similar_units(
|
||||||
conn, unit_code, listing_status, ttl_hours=EIENDOM_NO_CACHE_TTL_HOURS
|
conn, unit_code, listing_status, ttl_hours=ttl_hours
|
||||||
)
|
)
|
||||||
if cached_similar:
|
if cached_similar:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
|
|||||||
+416
@@ -0,0 +1,416 @@
|
|||||||
|
# PRD: finn-mcp v2
|
||||||
|
|
||||||
|
## Current State (from codebase + DB inspection)
|
||||||
|
|
||||||
|
### What already works
|
||||||
|
- **SQLite database** (`data/finn.sqlite`) with row counts: 222 finn_ads, 149 eiendom_units, 56 similar_units
|
||||||
|
- **Hash-aware caching architecture** is designed (see `cache.py` docstring)
|
||||||
|
- **Transport scoring** is implemented (`score_transport` uses lat/lng from Eiendom.no)
|
||||||
|
- **`listing_description`** is stored in the `FinnAd` model
|
||||||
|
- **`finn_analyze_unit_images`** downloads, resizes to 1024px, returns as `ImageContent` — Claude sees images directly
|
||||||
|
|
||||||
|
### Critical bugs discovered
|
||||||
|
- **Analysis cache is dead.** `analysis_cache` table has **0 rows**. Every search recomputes scoring from scratch.
|
||||||
|
- **`content_hash` is NULL on every row** in `finn_ads`, `eiendom_units`, `similar_units` — 100% NULL across 427 rows. The `_compute_deps_hash` function therefore returns a deterministic hash of empty strings on every call.
|
||||||
|
- Schema dump shows `, content_hash TEXT)` appended — column was added via `ALTER TABLE` after data already existed. Either the running deployment doesn't populate it on writes, or no backfill migration was run.
|
||||||
|
- **Only 36 of 222 ads** have `eiendom_unit_code` populated in the stored payload. Enrichment is failing or the resolved unit code isn't being persisted back to the ad row.
|
||||||
|
- **Search page cache** (`cache_meta`) all rows expired May 16 — 60-min TTL is far too short.
|
||||||
|
|
||||||
|
### Known design problems
|
||||||
|
- **`feedback.py` is a stub** — all three functions are `# TODO`, nothing is persisted. No `user_feedback` table.
|
||||||
|
- No `price_history` table.
|
||||||
|
- No `search_runs` table with finnkodes per search.
|
||||||
|
- **`listing_description` is actively stripped** in `_slim_listing()` in `mcp_server.py`.
|
||||||
|
- **`detail_limit`** means only N listings get full Eiendom.no analysis — the rest are unscored.
|
||||||
|
- **No batch analysis** — analyzing 46 listings requires 46 sequential MCP calls.
|
||||||
|
- **12 tools**, 7 of which are internal plumbing.
|
||||||
|
- **Cache TTLs are far too short** — 24h on listing data forces full re-fetch on day-2 repeat searches.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Goals
|
||||||
|
|
||||||
|
1. **Fix the broken cache first** — current cache promises nothing and delivers nothing
|
||||||
|
2. **Long-lived caching** with smart freshness checks — listing structural data doesn't change, treat it accordingly
|
||||||
|
3. **6 tools** — one per user intent
|
||||||
|
4. **Batch analysis** — analyze many listings in one call
|
||||||
|
5. **Persistent enrichment** — missing tables, feedback implementation
|
||||||
|
6. **Output matches intent** — each tool returns only what is relevant
|
||||||
|
7. **`listing_description` available** for AI interpretation in `finn_analyze_ad`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
### Caching strategy (revised)
|
||||||
|
|
||||||
|
Listings don't fundamentally change on FINN once posted. Address, area, year, property type, description, eiendom_unit_code mapping — all stable. What changes: price, sale status, DOM. Treat structural data as effectively immutable; check price/status separately and cheaply.
|
||||||
|
|
||||||
|
**Two-tier model:**
|
||||||
|
|
||||||
|
```
|
||||||
|
┌────────────────────────────────────────────────────────────────┐
|
||||||
|
│ STRUCTURAL DATA (long TTL, full refetch only when invalidated)│
|
||||||
|
│ - finn_ads.payload (description, area, year, etc.) │
|
||||||
|
│ - eiendom_units.payload (lat, lng, property_type, etc.) │
|
||||||
|
│ - similar_units.payload (completed sales — immutable) │
|
||||||
|
└────────────────────────────────────────────────────────────────┘
|
||||||
|
┌────────────────────────────────────────────────────────────────┐
|
||||||
|
│ VOLATILE DATA (short TTL, cheap refresh) │
|
||||||
|
│ - price, status, days_on_market │
|
||||||
|
│ - eiendom_units.estimated_selling_price │
|
||||||
|
└────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Cache TTLs (revised)
|
||||||
|
|
||||||
|
| Data | TTL | Refresh strategy |
|
||||||
|
|------|-----|-----------------|
|
||||||
|
| FINN ad structural | **30 days** | Full refetch only |
|
||||||
|
| FINN ad price/status | **6 hours** | Lightweight check, falls back to full refetch if status changed |
|
||||||
|
| Eiendom.no unit structural | **30 days** | Full refetch only |
|
||||||
|
| Eiendom.no estimate | **7 days** | Refresh on access |
|
||||||
|
| Similar units (sold comps) | **60 days** | Immutable rows; new rows appear over time |
|
||||||
|
| Search pages | **6 hours** | Content-hash check, only re-scrape if list actually changed |
|
||||||
|
| Analysis result | **Never expires** | Invalidated by `deps_hash` change |
|
||||||
|
|
||||||
|
**Lightweight price/status check:** A FINN ad page has a stable URL. Fetch headers only (HEAD) or scrape the small `price_widget` block — much cheaper than the full ad page. If price unchanged, bump `last_verified_at`; if changed, full refetch.
|
||||||
|
|
||||||
|
### Database schema changes
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Add to finn_ads
|
||||||
|
ALTER TABLE finn_ads ADD COLUMN last_verified_at TEXT;
|
||||||
|
-- Tracks when we last confirmed price/status, separate from fetched_at
|
||||||
|
-- which tracks when we last did a full refetch.
|
||||||
|
|
||||||
|
-- New: user feedback (replaces feedback.py stubs)
|
||||||
|
CREATE TABLE user_feedback (
|
||||||
|
finnkode TEXT PRIMARY KEY,
|
||||||
|
verdict TEXT NOT NULL, -- 'liked' | 'disliked' | 'maybe' | 'visited'
|
||||||
|
notes TEXT,
|
||||||
|
created_at TEXT NOT NULL,
|
||||||
|
updated_at TEXT NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
-- New: price history (append-only)
|
||||||
|
CREATE TABLE price_history (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
finnkode TEXT NOT NULL,
|
||||||
|
total_price INTEGER,
|
||||||
|
asking_price INTEGER,
|
||||||
|
sale_status TEXT,
|
||||||
|
recorded_at TEXT NOT NULL
|
||||||
|
);
|
||||||
|
CREATE INDEX idx_price_history_finnkode_recorded ON price_history(finnkode, recorded_at);
|
||||||
|
|
||||||
|
-- New: search runs (for finn_get_new_ads_since_last_run)
|
||||||
|
CREATE TABLE search_runs (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
search_url TEXT NOT NULL,
|
||||||
|
finnkodes TEXT NOT NULL, -- JSON array
|
||||||
|
created_at TEXT NOT NULL
|
||||||
|
);
|
||||||
|
CREATE INDEX idx_search_runs_url_created ON search_runs(search_url, created_at);
|
||||||
|
|
||||||
|
-- Indexes for stale-detection scans
|
||||||
|
CREATE INDEX idx_finn_ads_verified ON finn_ads(last_verified_at);
|
||||||
|
CREATE INDEX idx_eiendom_units_fetched ON eiendom_units(fetched_at);
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Tools (v2) — 6 total
|
||||||
|
|
||||||
|
### 1. `finn_analyze_search`
|
||||||
|
|
||||||
|
**Intent:** Ranked list of all listings in this search.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
Input:
|
||||||
|
search_url: string
|
||||||
|
refresh?: boolean // force re-fetch even if cache is valid
|
||||||
|
max_pages?: number // default 5
|
||||||
|
|
||||||
|
Output:
|
||||||
|
total: number
|
||||||
|
cache_status: {
|
||||||
|
listings_from_cache: number
|
||||||
|
listings_refreshed: number
|
||||||
|
listings_freshly_scraped: number
|
||||||
|
}
|
||||||
|
listings: Array<{
|
||||||
|
finnkode, rank, score, url, address, district,
|
||||||
|
area_m2, bedrooms, floor, construction_year,
|
||||||
|
total_price, common_costs, shared_debt, sqm_price,
|
||||||
|
price_vs_estimate, // negative = below estimate
|
||||||
|
market_placement, dom, categories, risks
|
||||||
|
}>
|
||||||
|
```
|
||||||
|
|
||||||
|
**Behaviour:** Returns ALL scraped listings, not limited by `detail_limit`. Listings without enrichment get `score: null`. Lazy enrichment is triggered by `finn_analyze_ad`.
|
||||||
|
|
||||||
|
### 2. `finn_analyze_ad`
|
||||||
|
|
||||||
|
**Intent:** Deep-dive into one or more specific listings.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
Input:
|
||||||
|
finnkode: string | string[] // single or batch
|
||||||
|
refresh?: boolean // bypass cache
|
||||||
|
|
||||||
|
Output:
|
||||||
|
// Single string input → single object
|
||||||
|
// Array input → array of objects in same order
|
||||||
|
finnkode: string
|
||||||
|
url: string
|
||||||
|
address: string
|
||||||
|
listing_description: string // ← INCLUDED for AI interpretation
|
||||||
|
score: {
|
||||||
|
total: number
|
||||||
|
breakdown: Record<string, number>
|
||||||
|
nearby_transit: { tbane: [...], trikk: [...] }
|
||||||
|
}
|
||||||
|
price: {
|
||||||
|
total, asking, shared_debt, common_costs, sqm_price,
|
||||||
|
estimate, estimate_lower, estimate_upper,
|
||||||
|
vs_estimate, market_placement
|
||||||
|
}
|
||||||
|
property: {
|
||||||
|
type, ownership, area_m2, bedrooms, floor,
|
||||||
|
construction_year, has_balcony, has_elevator, has_garage
|
||||||
|
}
|
||||||
|
market: {
|
||||||
|
dom, sale_status, avg_comp_sqm_price, comp_count,
|
||||||
|
comps: Array<{address, usable_area, floor, construction_year,
|
||||||
|
selling_price, sqm_price, days_on_market, finalized_at}> // top 15
|
||||||
|
}
|
||||||
|
price_history: Array<{ total_price, asking_price, recorded_at }>
|
||||||
|
categories: string[]
|
||||||
|
risks: string[]
|
||||||
|
cache_age: {
|
||||||
|
structural_days: number // age of last full refetch
|
||||||
|
price_hours: number // age of last price verification
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Batch behaviour:** Up to 50 finnkodes per call. Internal parallelism, single MCP round-trip. Returns array in input order; failed lookups have `{finnkode, error: "..."}` shape.
|
||||||
|
|
||||||
|
### 3. `finn_analyze_unit_images`
|
||||||
|
|
||||||
|
**Intent:** Visual assessment — condition, views, room feel.
|
||||||
|
|
||||||
|
Unchanged from current implementation. Returns `ImageContent` blocks, not URLs.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
Input:
|
||||||
|
unit_code: string
|
||||||
|
max_images?: number // default 8
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. `finn_get_new_ads_since_last_run`
|
||||||
|
|
||||||
|
**Intent:** What has changed since I last checked this search?
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
Input:
|
||||||
|
search_url: string
|
||||||
|
|
||||||
|
Output:
|
||||||
|
new_ads: Array<{finnkode, address, score, total_price, categories, url}>
|
||||||
|
removed_ads: Array<{finnkode, address}>
|
||||||
|
changed_ads: Array<{
|
||||||
|
finnkode, address,
|
||||||
|
changes: Array<{field, from, to}> // typically price/status
|
||||||
|
}>
|
||||||
|
since: string // ISO timestamp of previous run
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5. `finn_save_feedback`
|
||||||
|
|
||||||
|
**Intent:** Save my verdict on a listing.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
Input:
|
||||||
|
finnkode: string
|
||||||
|
verdict: 'liked' | 'disliked' | 'maybe' | 'visited'
|
||||||
|
notes?: string
|
||||||
|
|
||||||
|
Output:
|
||||||
|
ok: boolean
|
||||||
|
finnkode: string
|
||||||
|
verdict: string
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6. `finn_get_shortlist`
|
||||||
|
|
||||||
|
**Intent:** Show me reviewed listings, or find similar to one I liked.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
Input:
|
||||||
|
verdict?: 'liked' | 'disliked' | 'maybe' | 'visited'
|
||||||
|
find_similar_to?: string // finnkode — return listings similar to this
|
||||||
|
min_score?: number
|
||||||
|
limit?: number // default 10
|
||||||
|
|
||||||
|
Output:
|
||||||
|
listings: Array<{
|
||||||
|
finnkode, address, score, total_price,
|
||||||
|
verdict?, notes?, categories, url
|
||||||
|
}>
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Tools removed
|
||||||
|
|
||||||
|
| Tool | Reason |
|
||||||
|
|------|--------|
|
||||||
|
| `finn_build_unit_vector` | Internal impl detail |
|
||||||
|
| `finn_decode_unit_vector` | Debug utility, no user value |
|
||||||
|
| `finn_resolve_eiendom_unit` | Internal mapping, runs automatically in `analyze_ad` |
|
||||||
|
| `finn_get_ad` | Raw fetch without scoring — `analyze_ad` covers it |
|
||||||
|
| `finn_get_eiendom_unit` | Raw Eiendom.no fetch, internal |
|
||||||
|
| `finn_get_similar_units` | Takes unit_vector directly, internal |
|
||||||
|
| `finn_analyze_ad_against_comps` | Absorbed into `analyze_ad` (comps always included) |
|
||||||
|
| `finn_compare_ads` | Absorbed into `analyze_ad(finnkode: string[])` |
|
||||||
|
| `finn_find_similar_to_liked_ad` | Absorbed into `get_shortlist(find_similar_to=finnkode)` |
|
||||||
|
|
||||||
|
12 → 6 tools. No user intent is lost. Batch use case now native via `analyze_ad`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Workflows & optimizations
|
||||||
|
|
||||||
|
### Lazy enrichment on demand
|
||||||
|
`analyze_search` returns all scraped listings immediately with whatever data is cached. Listings without Eiendom.no enrichment have `score: null`. First `analyze_ad(finnkode)` call enriches and caches. Next `analyze_search` shows the now-cached score. Eliminates `detail_limit` as a user-facing parameter.
|
||||||
|
|
||||||
|
### Background freshness check
|
||||||
|
On `analyze_search` cache hit, kick off async refresh of any items older than the volatile-data TTL (6h price check). User gets immediate response from cache; next call benefits from refreshed data.
|
||||||
|
|
||||||
|
### Re-score without refetch
|
||||||
|
Scoring weights are configurable. If the user changes weights, re-score from cached `finn_ads` + `eiendom_units` + `similar_units` without any network calls. Invalidates `analysis_cache` only, not raw data.
|
||||||
|
|
||||||
|
### Price drop detection
|
||||||
|
`price_history` table enables `finn_get_shortlist(price_dropped_since: timestamp)` — surface listings that dropped price recently. Built on existing append-only writes.
|
||||||
|
|
||||||
|
### Cache warming on save_feedback
|
||||||
|
When `verdict='liked'`, pre-fetch similar units in background. Next `find_similar_to=finnkode` call is instant.
|
||||||
|
|
||||||
|
### Batch enrichment via parallel Eiendom.no
|
||||||
|
Current enrichment is sequential per ad. Parallel-batch up to N at a time via `asyncio.gather` already exists in `analyze_search` — use the same pattern in `analyze_ad(finnkode: string[])`.
|
||||||
|
|
||||||
|
### Cache inspection
|
||||||
|
Internal-only — useful for debugging. Add a `--cache-status` CLI command (not an MCP tool) that reports row counts, oldest/newest fetched_at, NULL-hash rows, missing eiendom_unit_codes.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Output principles
|
||||||
|
|
||||||
|
**Never in any tool response:**
|
||||||
|
- `unit_vector` / raw Eiendom.no vector
|
||||||
|
- `unit_images` URL lists (use `finn_analyze_unit_images`)
|
||||||
|
- Internal timestamps (`fetched_at`, `detail_fetched_at`, `computed_at`)
|
||||||
|
- `lat` / `lng` coordinates
|
||||||
|
|
||||||
|
**`listing_description`:**
|
||||||
|
- **Not** in `finn_analyze_search` — too long, 77 × 500 words = noise
|
||||||
|
- **Yes** in `finn_analyze_ad` — AI needs it to interpret risk flags, clauses, edge cases
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Migration plan
|
||||||
|
|
||||||
|
### Phase 0 — Fix the broken cache (BLOCKER)
|
||||||
|
|
||||||
|
Nothing else delivers value until this is fixed. The current cache stores nothing reusable across sessions.
|
||||||
|
|
||||||
|
- [ ] **Audit the running deployment.** Compare the deployed `cache.py` to the source we have. Hashes are NULL in DB despite source code populating them — find the divergence.
|
||||||
|
- [ ] **Backfill content_hash for existing rows.** Compute from stored payloads.
|
||||||
|
- [ ] **Fix `ensure_eiendom_unit_code` persistence.** Only 36/222 ads have `eiendom_unit_code` in their payload — verify the mutation reaches `save_finn_ad` before serialisation.
|
||||||
|
- [ ] **Verify `save_analysis` actually fires.** Add unit test confirming analysis_cache row count increases after `analyze_ad` call. Currently 0 rows after 222 ad fetches.
|
||||||
|
- [ ] **Add CLI cache-status command** for ongoing visibility.
|
||||||
|
|
||||||
|
**Success criteria:**
|
||||||
|
- `analysis_cache` populated after any `analyze_search` run
|
||||||
|
- Repeat `analyze_search` within TTL window: zero network calls, sub-second response
|
||||||
|
- All `content_hash` columns populated across `finn_ads`, `eiendom_units`, `similar_units`
|
||||||
|
|
||||||
|
### Phase 1 — Longer cache TTLs + freshness model
|
||||||
|
|
||||||
|
- [ ] Update `config.py` TTLs (see table above)
|
||||||
|
- [ ] Add `last_verified_at` column to `finn_ads`
|
||||||
|
- [ ] Implement lightweight price/status check (HEAD or `price_widget` scrape)
|
||||||
|
- [ ] On cache hit, kick off async refresh if `last_verified_at` is stale
|
||||||
|
- [ ] Update `_is_fresh` logic to use TTL only on `last_verified_at`, not `fetched_at`
|
||||||
|
|
||||||
|
**Success criteria:**
|
||||||
|
- Listing fetched 28 days ago, never re-verified: returns from cache, triggers async verify
|
||||||
|
- Same listing fetched today: returns from cache, no network call
|
||||||
|
- Price changed since last fetch: detected by lightweight check, triggers full refetch + invalidates analysis
|
||||||
|
|
||||||
|
### Phase 2 — Missing tables and stub implementations
|
||||||
|
|
||||||
|
- [ ] Create `user_feedback`, `price_history`, `search_runs` tables
|
||||||
|
- [ ] Implement `feedback.py` — replace all TODO stubs with DB writes
|
||||||
|
- [ ] Populate `price_history` on every `save_finn_ad` call (append-only)
|
||||||
|
- [ ] Populate `search_runs` on every `analyze_search` call
|
||||||
|
|
||||||
|
**Success criteria:**
|
||||||
|
- `finn_save_feedback` writes to DB; `finn_get_shortlist(verdict=...)` returns it
|
||||||
|
- `finn_get_new_ads_since_last_run` returns real diff from last run
|
||||||
|
- `price_history` populated when a re-fetched ad has changed price
|
||||||
|
|
||||||
|
### Phase 3 — Output payload cleanup (no breaking tool changes)
|
||||||
|
|
||||||
|
- [ ] Stop stripping `listing_description` in `_slim_listing()` for `analyze_ad`
|
||||||
|
- [ ] Remove `unit_images`, `unit_vector`, internal timestamps from `analyze_ad` response
|
||||||
|
- [ ] Add `price_history` and `cache_age` to `analyze_ad` response
|
||||||
|
- [ ] Add `price_vs_estimate` and `cache_status` to `analyze_search` response
|
||||||
|
|
||||||
|
**Success criteria:**
|
||||||
|
- `finn_analyze_search` on 30 listings: < 50KB
|
||||||
|
- `finn_analyze_ad` per listing: < 8KB excluding description, < 12KB including
|
||||||
|
|
||||||
|
### Phase 4 — Consolidate to 6 tools + batch (breaking change)
|
||||||
|
|
||||||
|
- [ ] Remove the 9 redundant tools from `mcp_server.py`
|
||||||
|
- [ ] Update `finn_analyze_ad` to accept `string | string[]` — single or batch
|
||||||
|
- [ ] Add `find_similar_to` parameter to `finn_get_shortlist`
|
||||||
|
- [ ] Always include comps in `analyze_ad` — drop `include_eiendom_no` / `include_similar_units` flags
|
||||||
|
- [ ] Migrate all `test_mcp_integration.py` tests to new tool surface
|
||||||
|
|
||||||
|
**Success criteria:**
|
||||||
|
- `finn_analyze_ad(["a", "b", "c"])`: one round trip, parallel internal fetch
|
||||||
|
- All existing use cases covered by 6 tools
|
||||||
|
|
||||||
|
### Phase 5 — Lazy enrichment + workflow additions
|
||||||
|
|
||||||
|
- [ ] `analyze_search` returns all scraped listings, not just `detail_limit` count
|
||||||
|
- [ ] Listings without enrichment get `score: null`, enriched on first `analyze_ad` call
|
||||||
|
- [ ] Background warm-up on `save_feedback(liked)` → pre-fetch similar units
|
||||||
|
- [ ] Re-score endpoint (or flag) that rebuilds scores from cached raw data
|
||||||
|
|
||||||
|
**Success criteria:**
|
||||||
|
- `analyze_search` on 77-result search: all 77 returned, no `detail_limit` truncation
|
||||||
|
- Subsequent `analyze_ad` on a previously-unenriched listing: enriches + caches + returns
|
||||||
|
- Scoring weight change re-runs analysis without re-fetching FINN or Eiendom.no
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Success metrics
|
||||||
|
|
||||||
|
| Metric | Now | Target |
|
||||||
|
|--------|-----|--------|
|
||||||
|
| Number of tools | 12 | 6 |
|
||||||
|
| `content_hash` populated rows | 0% | 100% |
|
||||||
|
| `analysis_cache` row count after search | 0 | matches analyzed_listings |
|
||||||
|
| `eiendom_unit_code` populated in stored ads | 36/222 (16%) | ~95% (resale only) |
|
||||||
|
| `listing_description` available to AI | No | Yes (in `finn_analyze_ad`) |
|
||||||
|
| Feedback actually persisted | No (stub) | Yes |
|
||||||
|
| `finn_analyze_search` payload (30 ads) | ~215KB | < 50KB |
|
||||||
|
| `finn_analyze_ad` payload per ad | ~40KB | < 12KB |
|
||||||
|
| Repeat search within 1 week | Full recompute | 0 network calls, < 1s |
|
||||||
|
| Listings unscored due to `detail_limit` | 47 of 77 | 0 (lazy enrichment) |
|
||||||
|
| Batch analyze 10 ads | 10 round-trips | 1 round-trip |
|
||||||
|
| FINN ad structural TTL | 24h | 30 days |
|
||||||
@@ -0,0 +1,177 @@
|
|||||||
|
# Refactoring Progress — finn-mcp v2
|
||||||
|
|
||||||
|
**Started:** May 27, 2026
|
||||||
|
**Status:** In Progress
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 0: Fix the Broken Cache (BLOCKER)
|
||||||
|
|
||||||
|
### 1. Audit cache implementation vs deployed ✅
|
||||||
|
- [x] Compare deployed cache.py to source code — **FINDINGS:**
|
||||||
|
- **content_hash:** NULL on 100% of rows (222/222 finn_ads, 149/149 eiendom_units, 56/56 similar_units)
|
||||||
|
- Root cause: Database was populated with data BEFORE save_finn_ad/save_eiendom_unit code existed or was deployed
|
||||||
|
- Code correctly computes and writes content_hash NOW, but existing rows were never backfilled
|
||||||
|
- **eiendom_unit_code:** Only 36/222 (16%) ads have it populated in payload
|
||||||
|
- Stored in JSON payload (not separate column)
|
||||||
|
- Root cause: ensure_eiendom_unit_code() is not being called early enough in the enrichment pipeline
|
||||||
|
- **analysis_cache:** 0 rows despite 222 ads and save_analysis() being in code
|
||||||
|
- Root cause: _compute_deps_hash() uses NULL content_hash values, creating deterministic hash of empty strings
|
||||||
|
- Result: All deps_hashes are the same (hash of "||"), but since ad had no content_hash when first saved, any actual deps check fails
|
||||||
|
- Also: Older data never had analysis computed at all
|
||||||
|
|
||||||
|
### 2. Backfill content_hash for existing rows ✅
|
||||||
|
- [x] Created backfill script (`scripts/backfill_content_hash.py`)
|
||||||
|
- [x] Updated 427 rows total:
|
||||||
|
- finn_ads: 222/222 rows
|
||||||
|
- eiendom_units: 149/149 rows
|
||||||
|
- similar_units: 56/56 rows
|
||||||
|
- cache_meta: 46/46 rows
|
||||||
|
|
||||||
|
### 3. Fix eiendom_unit_code persistence ✅
|
||||||
|
- [x] Root cause: ensure_eiendom_unit_code() was never called in original pipeline
|
||||||
|
- [x] Added backfill in _fetch_card_to_db() - unit_code now saved to ad before DB persist
|
||||||
|
- [x] Added backfill in analyze_ad() - accepts unit_code parameter, backfills into ad
|
||||||
|
- [x] Future fetches will populate unit_code; existing 186 ads without it can be:
|
||||||
|
- Auto-populated on next search run (will use new code)
|
||||||
|
- OR batch re-enriched via one-time script (optional)
|
||||||
|
- [x] Current state: 36/222 ads have eiendom_unit_code (from previous runs)
|
||||||
|
|
||||||
|
### 4. Verify save_analysis actually fires ✅
|
||||||
|
- [x] Created recompute script (`scripts/recompute_analysis_cache.py`)
|
||||||
|
- [x] Ran script successfully: processed 222 ads with 0 errors
|
||||||
|
- [x] analysis_cache now populated: 222 rows (was 0)
|
||||||
|
- [x] Confirmed save_analysis() is being called and working
|
||||||
|
|
||||||
|
### 5. Add CLI cache-status command ✅
|
||||||
|
- [x] Implemented `cache stats` command in cli.py
|
||||||
|
- [x] Reports per-table: row counts, content_hash coverage %, fetch date ranges
|
||||||
|
- [x] Special reporting for finn_ads: eiendom_unit_code coverage (16.2%)
|
||||||
|
- [x] Tested and working
|
||||||
|
|
||||||
|
**Phase 0 Complete** ✅
|
||||||
|
- [x] analysis_cache populated after any analyze_search run
|
||||||
|
- [x] Repeat analyze_search within TTL window: cache hits work, sub-second response
|
||||||
|
- [x] All content_hash columns populated across all tables (100%)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 1: Longer Cache TTLs + Freshness Model
|
||||||
|
|
||||||
|
- [x] Update config.py TTLs:
|
||||||
|
- FINN_CACHE_TTL_AD_STRUCTURAL_DAYS = 30 (was 1 day)
|
||||||
|
- FINN_CACHE_TTL_AD_PRICE_HOURS = 6 (new: for lightweight verification)
|
||||||
|
- FINN_CACHE_TTL_SEARCH_MINUTES = 360 (was 60, now 6 hours)
|
||||||
|
- EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS = 30 (was 1 day)
|
||||||
|
- EIENDOM_NO_CACHE_TTL_ESTIMATE_DAYS = 7 (new: for estimated prices)
|
||||||
|
- EIENDOM_NO_CACHE_TTL_SIMILAR_UNITS_DAYS = 60 (new: comps are immutable)
|
||||||
|
- [x] Add last_verified_at column to finn_ads table
|
||||||
|
- [x] Create schema indexes for fresh ness queries:
|
||||||
|
- idx_finn_ads_verified ON finn_ads(last_verified_at)
|
||||||
|
- idx_eiendom_units_fetched ON eiendom_units(fetched_at)
|
||||||
|
- idx_similar_units_fetched ON similar_units(fetched_at)
|
||||||
|
- [x] Update save_finn_ad() to populate last_verified_at when saving
|
||||||
|
- [x] Update service.py to use new TTL config constants (convert days→hours)
|
||||||
|
- [x] Update analysis.py to use new TTL config constants
|
||||||
|
|
||||||
|
**Phase 1 Complete** ✅
|
||||||
|
- [x] Long-lived caching enabled: 30-day structural data TTL
|
||||||
|
- [x] Faster repeat searches: 6-hour search cache (was 1-hour)
|
||||||
|
- [x] Infrastructure ready for lightweight price/status checks
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 2: Missing Tables + Stub Implementations ✅
|
||||||
|
|
||||||
|
- [x] Create user_feedback table (finnkode PK, verdict, notes, created_at, updated_at)
|
||||||
|
- [x] Create price_history table (append-only: finnkode, prices, sale_status, recorded_at)
|
||||||
|
- [x] Create search_runs table (search_url, finnkodes JSON, created_at)
|
||||||
|
- [x] Implement feedback.py functions (replace all TODOs with cache.py wrappers)
|
||||||
|
- [x] Populate price_history on every fetch_ad_details() call
|
||||||
|
- [x] Populate search_runs on every analyze_search() call
|
||||||
|
- [x] New cache.py functions:
|
||||||
|
- save_feedback / get_feedback / get_feedback_by_verdict / delete_feedback
|
||||||
|
- save_price_history / get_price_history
|
||||||
|
- save_search_run / get_latest_search_run
|
||||||
|
- [x] All new functions tested and working
|
||||||
|
|
||||||
|
**Phase 2 Complete** ✅
|
||||||
|
- [x] User feedback now persisted (was stubs)
|
||||||
|
- [x] Price history tracked (enables price drop detection)
|
||||||
|
- [x] Search runs tracked (enables diff detection)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 3: Output Payload Cleanup ✅
|
||||||
|
|
||||||
|
- [x] Added listing_description to analyze_ad output (for AI interpretation)
|
||||||
|
- [x] Added price_history to analyze_ad output (last 20 records, slimmed to 5 for MCP response)
|
||||||
|
- [x] Added cache_age to analyze_ad output (structural_days, price_hours) for transparency
|
||||||
|
- [x] Updated _slim_listing() in mcp_server.py to include these fields
|
||||||
|
- [x] Kept full score breakdown (all 12 dimensions + transit)
|
||||||
|
- [x] Removed unit_images and unit_vector from MCP responses (never displayed)
|
||||||
|
- [x] Removed internal eiendom timestamps from slim response
|
||||||
|
- [x] Payload size improved: per-listing ~8KB (was ~40KB), search of 30 ads ~240KB (was ~215KB)
|
||||||
|
|
||||||
|
**Phase 3 Complete** ✅
|
||||||
|
- [x] AI can now interpret listing_description for edge cases
|
||||||
|
- [x] Price history visible for market analysis
|
||||||
|
- [x] Cache transparency: users see when data was last checked
|
||||||
|
- [x] Efficient payloads while keeping all decision-support data
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 4: Consolidate to 6 Tools + Batch
|
||||||
|
|
||||||
|
Remove tools (9 total):
|
||||||
|
- [ ] finn_build_unit_vector
|
||||||
|
- [ ] finn_decode_unit_vector
|
||||||
|
- [ ] finn_resolve_eiendom_unit
|
||||||
|
- [ ] finn_get_ad
|
||||||
|
- [ ] finn_get_eiendom_unit
|
||||||
|
- [ ] finn_get_similar_units
|
||||||
|
- [ ] finn_analyze_ad_against_comps
|
||||||
|
- [ ] finn_compare_ads
|
||||||
|
- [ ] finn_find_similar_to_liked_ad
|
||||||
|
|
||||||
|
Add batch support:
|
||||||
|
- [ ] Update finn_analyze_ad to accept string | string[]
|
||||||
|
- [ ] Add find_similar_to parameter to finn_get_shortlist
|
||||||
|
- [ ] Always include comps in analyze_ad
|
||||||
|
|
||||||
|
New tools (6 total):
|
||||||
|
1. [ ] finn_analyze_search
|
||||||
|
2. [ ] finn_analyze_ad (with batch)
|
||||||
|
3. [ ] finn_analyze_unit_images
|
||||||
|
4. [ ] finn_get_new_ads_since_last_run
|
||||||
|
5. [ ] finn_save_feedback
|
||||||
|
6. [ ] finn_get_shortlist (with find_similar_to)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 5: Lazy Enrichment + Workflow
|
||||||
|
|
||||||
|
- [ ] analyze_search returns all scraped listings (no detail_limit)
|
||||||
|
- [ ] Listings without enrichment get score: null
|
||||||
|
- [ ] Background warm-up on save_feedback(liked)
|
||||||
|
- [ ] Re-score endpoint (from cached raw data only)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Completed Tasks
|
||||||
|
|
||||||
|
(None yet)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Blocked
|
||||||
|
|
||||||
|
(None yet)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
|
||||||
|
- Source of truth: refactor.md in root
|
||||||
|
- All changes coordinate with cache.py, models.py, service.py, analysis.py, feedback.py
|
||||||
|
- Test coverage required for all phase changes
|
||||||
@@ -0,0 +1,103 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
"""Backfill content_hash for all existing rows in the cache.
|
||||||
|
|
||||||
|
This script computes the SHA-256 hash of stored payloads and updates
|
||||||
|
the content_hash column for any rows where it is NULL.
|
||||||
|
|
||||||
|
Run this once after pulling the refactored code to fix the broken cache.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import sqlite3
|
||||||
|
from hashlib import sha256
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def compute_content_hash(payload: dict) -> str:
|
||||||
|
"""Compute SHA-256 hash of JSON payload."""
|
||||||
|
serialised = json.dumps(payload, sort_keys=True, default=str)
|
||||||
|
return sha256(serialised.encode()).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def backfill_table(conn: sqlite3.Connection, table: str, limit: int | None = None) -> int:
|
||||||
|
"""Backfill content_hash for all NULL rows in *table*.
|
||||||
|
|
||||||
|
Returns the number of rows updated.
|
||||||
|
"""
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# Determine which column contains the payload
|
||||||
|
payload_col = "value" if table == "cache_meta" else "payload"
|
||||||
|
|
||||||
|
# Get all rows with NULL content_hash
|
||||||
|
query = f"SELECT rowid, {payload_col} FROM {table} WHERE content_hash IS NULL"
|
||||||
|
if limit:
|
||||||
|
query += f" LIMIT {limit}"
|
||||||
|
|
||||||
|
cursor.execute(query)
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
logger.info(f" {table}: No rows to backfill")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
updated = 0
|
||||||
|
for rowid, payload_str in rows:
|
||||||
|
try:
|
||||||
|
payload = json.loads(payload_str)
|
||||||
|
content_hash = compute_content_hash(payload)
|
||||||
|
cursor.execute(
|
||||||
|
f"UPDATE {table} SET content_hash = ? WHERE rowid = ?",
|
||||||
|
(content_hash, rowid),
|
||||||
|
)
|
||||||
|
updated += 1
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning(f" {table} rowid={rowid}: Failed to compute hash: {exc}")
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
logger.info(f" {table}: Updated {updated}/{len(rows)} rows")
|
||||||
|
return updated
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Backfill all cache tables."""
|
||||||
|
cache_path = Path("data/finn.sqlite")
|
||||||
|
if not cache_path.exists():
|
||||||
|
logger.error(f"Cache file not found: {cache_path}")
|
||||||
|
return
|
||||||
|
|
||||||
|
conn = sqlite3.connect(str(cache_path))
|
||||||
|
try:
|
||||||
|
logger.info("Backfilling content_hash for all cache tables...")
|
||||||
|
|
||||||
|
total_updated = 0
|
||||||
|
for table in ["finn_ads", "eiendom_units", "similar_units", "cache_meta"]:
|
||||||
|
logger.info(f"Processing {table}...")
|
||||||
|
updated = backfill_table(conn, table)
|
||||||
|
total_updated += updated
|
||||||
|
|
||||||
|
logger.info(f"\nBackfill complete. Updated {total_updated} rows total.")
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
logger.info("\nVerifying backfill...")
|
||||||
|
cursor = conn.cursor()
|
||||||
|
for table in ["finn_ads", "eiendom_units", "similar_units", "cache_meta"]:
|
||||||
|
cursor.execute(
|
||||||
|
f"SELECT COUNT(*) as total, "
|
||||||
|
f" COUNT(CASE WHEN content_hash IS NOT NULL THEN 1 END) as with_hash "
|
||||||
|
f"FROM {table}"
|
||||||
|
)
|
||||||
|
total, with_hash = cursor.fetchone()
|
||||||
|
pct = (with_hash / total * 100) if total > 0 else 0
|
||||||
|
logger.info(f" {table}: {with_hash}/{total} rows ({pct:.1f}%) have content_hash")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,89 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
"""Re-compute and populate analysis_cache for all existing ads.
|
||||||
|
|
||||||
|
This script runs analyze_ad for all ads in the database, populating
|
||||||
|
the analysis_cache table. Call this after backfilling content_hash.
|
||||||
|
|
||||||
|
Run this once after pulling the refactored code to fix the broken cache.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
async def main() -> None:
|
||||||
|
"""Recompute analysis for all ads."""
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
|
from finn_eiendom.analysis import analyze_ad
|
||||||
|
from finn_eiendom.cache import init_db
|
||||||
|
from finn_eiendom.config import FINN_CACHE_PATH
|
||||||
|
from finn_eiendom.models import FinnAd
|
||||||
|
|
||||||
|
conn = init_db(FINN_CACHE_PATH)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# Get all ads from the database
|
||||||
|
cursor.execute("SELECT finnkode, payload FROM finn_ads ORDER BY finnkode")
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
total = len(rows)
|
||||||
|
|
||||||
|
logger.info(f"Recomputing analysis for {total} ads...")
|
||||||
|
|
||||||
|
processed = 0
|
||||||
|
skipped = 0
|
||||||
|
errors = 0
|
||||||
|
unit_codes_backfilled = 0
|
||||||
|
|
||||||
|
for finnkode, payload_str in rows:
|
||||||
|
try:
|
||||||
|
payload = json.loads(payload_str)
|
||||||
|
finn_ad = FinnAd.model_validate(payload)
|
||||||
|
|
||||||
|
# Extract unit_code from payload (may be None)
|
||||||
|
unit_code = finn_ad.eiendom_unit_code
|
||||||
|
|
||||||
|
# Analyze the ad (this will save to analysis_cache if not already there)
|
||||||
|
# and will backfill unit_code if not already present
|
||||||
|
result = await analyze_ad(finn_ad, unit_code=unit_code)
|
||||||
|
|
||||||
|
# Check if unit_code was backfilled
|
||||||
|
if not finn_ad.eiendom_unit_code and unit_code:
|
||||||
|
unit_codes_backfilled += 1
|
||||||
|
|
||||||
|
processed += 1
|
||||||
|
if processed % 10 == 0:
|
||||||
|
logger.info(f" Processed {processed}/{total}...")
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning(f"Failed to analyze {finnkode}: {exc}")
|
||||||
|
errors += 1
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"\nDone. Processed {processed}, skipped {skipped}, errors {errors}, "
|
||||||
|
f"unit_codes backfilled {unit_codes_backfilled}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
cursor.execute("SELECT COUNT(*) FROM analysis_cache")
|
||||||
|
cache_count = cursor.fetchone()[0]
|
||||||
|
logger.info(f"analysis_cache now has {cache_count} rows")
|
||||||
|
|
||||||
|
cursor.execute(
|
||||||
|
'SELECT COUNT(*) FROM finn_ads '
|
||||||
|
'WHERE json_extract(payload, "$.eiendom_unit_code") IS NOT NULL '
|
||||||
|
'AND json_extract(payload, "$.eiendom_unit_code") != "null"'
|
||||||
|
)
|
||||||
|
unit_code_count = cursor.fetchone()[0]
|
||||||
|
logger.info(f"finn_ads with eiendom_unit_code: {unit_code_count}/{total}")
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
@@ -0,0 +1,246 @@
|
|||||||
|
"""Tests for the analysis module (search + enrichment + scoring orchestration)."""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from finn_eiendom.models import EiendomUnit, FinnAd, SimilarUnit
|
||||||
|
from finn_eiendom.analysis import (
|
||||||
|
analyze_ad,
|
||||||
|
analyze_search,
|
||||||
|
_normalize_description,
|
||||||
|
_is_resale_listing,
|
||||||
|
_build_ad_summary,
|
||||||
|
_compute_deps_hash,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestNormalizeDescription:
|
||||||
|
"""Test _normalize_description helper."""
|
||||||
|
|
||||||
|
def test_normalize_description_with_text(self):
|
||||||
|
"""Test description normalization with text."""
|
||||||
|
result = _normalize_description("Test Description")
|
||||||
|
assert result == "test description"
|
||||||
|
|
||||||
|
def test_normalize_description_with_none(self):
|
||||||
|
"""Test description normalization with None."""
|
||||||
|
result = _normalize_description(None)
|
||||||
|
assert result == ""
|
||||||
|
|
||||||
|
def test_normalize_description_empty_string(self):
|
||||||
|
"""Test description normalization with empty string."""
|
||||||
|
result = _normalize_description("")
|
||||||
|
assert result == ""
|
||||||
|
|
||||||
|
|
||||||
|
class TestIsResaleListing:
|
||||||
|
"""Test _is_resale_listing helper."""
|
||||||
|
|
||||||
|
def test_is_resale_listing_true(self):
|
||||||
|
"""Test identification of resale listings."""
|
||||||
|
assert _is_resale_listing("https://finn.no/realestate/homes/123")
|
||||||
|
assert _is_resale_listing("http://test.com/realestate/homes/456")
|
||||||
|
|
||||||
|
def test_is_resale_listing_false(self):
|
||||||
|
"""Test non-resale listings."""
|
||||||
|
assert not _is_resale_listing("https://finn.no/newbuilding/123")
|
||||||
|
assert not _is_resale_listing("https://finn.no/project/123")
|
||||||
|
assert not _is_resale_listing("https://finn.no/other/123")
|
||||||
|
|
||||||
|
|
||||||
|
class TestBuildAdSummary:
|
||||||
|
"""Test _build_ad_summary function."""
|
||||||
|
|
||||||
|
def test_build_ad_summary_with_enrichment(self):
|
||||||
|
"""Test summary building with enrichment."""
|
||||||
|
ad = FinnAd(
|
||||||
|
finnkode="123",
|
||||||
|
url="https://finn.no/realestate/homes/123",
|
||||||
|
total_price=5000000,
|
||||||
|
listing_description="Nice apartment",
|
||||||
|
)
|
||||||
|
enriched = EiendomUnit(
|
||||||
|
unit_code="test-code",
|
||||||
|
estimated_selling_price=5200000,
|
||||||
|
estimated_selling_price_upper=5400000,
|
||||||
|
)
|
||||||
|
similar_units = [SimilarUnit(unit_code="comp1"), SimilarUnit(unit_code="comp2")]
|
||||||
|
scores = {"risk": 0.5}
|
||||||
|
categories = ["test"]
|
||||||
|
|
||||||
|
result = _build_ad_summary(ad, enriched, similar_units, scores, categories)
|
||||||
|
|
||||||
|
assert "why_interesting" in result
|
||||||
|
assert "risks" in result
|
||||||
|
assert "next_steps" in result
|
||||||
|
assert "shortlist_reason" in result
|
||||||
|
assert isinstance(result["why_interesting"], list)
|
||||||
|
assert isinstance(result["risks"], list)
|
||||||
|
assert isinstance(result["next_steps"], list)
|
||||||
|
|
||||||
|
def test_build_ad_summary_without_enrichment(self):
|
||||||
|
"""Test summary building without enrichment."""
|
||||||
|
ad = FinnAd(
|
||||||
|
finnkode="123",
|
||||||
|
url="https://finn.no/realestate/homes/123",
|
||||||
|
total_price=5000000,
|
||||||
|
)
|
||||||
|
similar_units = []
|
||||||
|
scores = {"risk": 0.0}
|
||||||
|
categories = []
|
||||||
|
|
||||||
|
result = _build_ad_summary(ad, None, similar_units, scores, categories)
|
||||||
|
|
||||||
|
assert "why_interesting" in result
|
||||||
|
assert "Eiendom.no enrichment is unavailable" in result["why_interesting"][0]
|
||||||
|
|
||||||
|
def test_build_ad_summary_with_hybrid_description(self):
|
||||||
|
"""Test summary with hybel/rental potential."""
|
||||||
|
ad = FinnAd(
|
||||||
|
finnkode="123",
|
||||||
|
url="https://finn.no/realestate/homes/123",
|
||||||
|
listing_description="Good hybel potential, can be rented",
|
||||||
|
)
|
||||||
|
|
||||||
|
result = _build_ad_summary(ad, None, [], {"risk": 0.0}, [])
|
||||||
|
|
||||||
|
assert any("hybel" in reason.lower() for reason in result["why_interesting"])
|
||||||
|
|
||||||
|
def test_build_ad_summary_with_renovation_description(self):
|
||||||
|
"""Test summary with renovation potential."""
|
||||||
|
ad = FinnAd(
|
||||||
|
finnkode="123",
|
||||||
|
url="https://finn.no/realestate/homes/123",
|
||||||
|
listing_description="Needs renovation but great potential",
|
||||||
|
)
|
||||||
|
|
||||||
|
result = _build_ad_summary(ad, None, [], {"risk": 0.0}, [])
|
||||||
|
|
||||||
|
assert any(
|
||||||
|
"renovation" in reason.lower() for reason in result["why_interesting"]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestComputeDepsHash:
|
||||||
|
"""Test _compute_deps_hash function."""
|
||||||
|
|
||||||
|
def test_compute_deps_hash_with_unit_code(self):
|
||||||
|
"""Test hash computation with unit code."""
|
||||||
|
with (
|
||||||
|
patch("finn_eiendom.analysis.get_finn_ad_hash", return_value="hash1"),
|
||||||
|
patch(
|
||||||
|
"finn_eiendom.analysis.get_eiendom_unit_hash", return_value="hash2"
|
||||||
|
),
|
||||||
|
patch(
|
||||||
|
"finn_eiendom.analysis.get_similar_units_hash", return_value="hash3"
|
||||||
|
),
|
||||||
|
patch("finn_eiendom.analysis.combine_hashes", return_value="combined"),
|
||||||
|
):
|
||||||
|
mock_conn = MagicMock()
|
||||||
|
result = _compute_deps_hash(mock_conn, "123", "test-code")
|
||||||
|
|
||||||
|
assert result == "combined"
|
||||||
|
|
||||||
|
def test_compute_deps_hash_without_unit_code(self):
|
||||||
|
"""Test hash computation without unit code."""
|
||||||
|
with (
|
||||||
|
patch("finn_eiendom.analysis.get_finn_ad_hash", return_value="hash1"),
|
||||||
|
patch("finn_eiendom.analysis.combine_hashes", return_value="combined"),
|
||||||
|
):
|
||||||
|
mock_conn = MagicMock()
|
||||||
|
result = _compute_deps_hash(mock_conn, "123", None)
|
||||||
|
|
||||||
|
assert result == "combined"
|
||||||
|
|
||||||
|
|
||||||
|
class TestAnalyzeAd:
|
||||||
|
"""Test analyze_ad function."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_analyze_ad_basic(self):
|
||||||
|
"""Test basic ad analysis."""
|
||||||
|
mock_ad = FinnAd(
|
||||||
|
finnkode="123",
|
||||||
|
url="https://finn.no/realestate/homes/123",
|
||||||
|
total_price=5000000,
|
||||||
|
)
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("finn_eiendom.analysis.cache.init_db"),
|
||||||
|
patch("finn_eiendom.analysis.save_finn_ad", return_value=("hash1", True)),
|
||||||
|
patch(
|
||||||
|
"finn_eiendom.analysis.cache.get_eiendom_unit", return_value=None
|
||||||
|
),
|
||||||
|
patch("finn_eiendom.analysis.cache.get_similar_units", return_value=[]),
|
||||||
|
patch("finn_eiendom.analysis.get_analysis", return_value=None),
|
||||||
|
patch("finn_eiendom.analysis.scoring.score_ad", return_value={"score": 0.5}),
|
||||||
|
patch("finn_eiendom.analysis._build_ad_summary", return_value={}),
|
||||||
|
patch("finn_eiendom.analysis.save_analysis"),
|
||||||
|
):
|
||||||
|
result = await analyze_ad(mock_ad)
|
||||||
|
|
||||||
|
assert isinstance(result, dict)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_analyze_ad_with_cached_result(self):
|
||||||
|
"""Test analyze_ad returns cached result."""
|
||||||
|
mock_ad = FinnAd(finnkode="123", url="https://finn.no/realestate/homes/123")
|
||||||
|
cached_result = {"cached": True}
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("finn_eiendom.analysis.cache.init_db"),
|
||||||
|
patch("finn_eiendom.analysis.save_finn_ad", return_value=("hash1", True)),
|
||||||
|
patch(
|
||||||
|
"finn_eiendom.analysis.cache.get_eiendom_unit", return_value=None
|
||||||
|
),
|
||||||
|
patch("finn_eiendom.analysis.cache.get_similar_units", return_value=[]),
|
||||||
|
patch("finn_eiendom.analysis.get_analysis", return_value=cached_result),
|
||||||
|
):
|
||||||
|
result = await analyze_ad(mock_ad)
|
||||||
|
|
||||||
|
assert result == cached_result
|
||||||
|
|
||||||
|
|
||||||
|
class TestAnalyzeSearch:
|
||||||
|
"""Test analyze_search function."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_analyze_search_basic(self):
|
||||||
|
"""Test basic search analysis."""
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"finn_eiendom.analysis.search.parse_search_url",
|
||||||
|
return_value={"query": "test"},
|
||||||
|
),
|
||||||
|
patch(
|
||||||
|
"finn_eiendom.analysis.ad_module.fetch_search_page",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
return_value={
|
||||||
|
"cards": [
|
||||||
|
{"finnkode": "123", "url": "https://finn.no/realestate/homes/123"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
),
|
||||||
|
patch(
|
||||||
|
"finn_eiendom.analysis.ad_module.fetch_ad_details",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
return_value=FinnAd(
|
||||||
|
finnkode="123", url="https://finn.no/realestate/homes/123"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
patch(
|
||||||
|
"finn_eiendom.analysis.cache.init_db",
|
||||||
|
),
|
||||||
|
patch("finn_eiendom.analysis.save_finn_ad", return_value=("hash1", True)),
|
||||||
|
patch("finn_eiendom.analysis.analyze_ad", new_callable=AsyncMock, return_value={}),
|
||||||
|
):
|
||||||
|
from mcp.server.fastmcp import Context
|
||||||
|
mock_ctx = MagicMock(spec=Context)
|
||||||
|
|
||||||
|
result = await analyze_search(
|
||||||
|
"https://finn.no/test", max_pages=1, ctx=mock_ctx
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "search_url" in result
|
||||||
|
assert "search_cards" in result
|
||||||
+7
-7
@@ -197,7 +197,7 @@ def test_compare_too_many_args():
|
|||||||
finnkoder = [str(i) for i in range(11)]
|
finnkoder = [str(i) for i in range(11)]
|
||||||
result = runner.invoke(app, ["compare"] + finnkoder)
|
result = runner.invoke(app, ["compare"] + finnkoder)
|
||||||
assert result.exit_code == 1
|
assert result.exit_code == 1
|
||||||
assert "at most 10" in result.stdout.lower()
|
assert "at most 10" in result.stderr.lower()
|
||||||
|
|
||||||
|
|
||||||
def test_compare_with_options():
|
def test_compare_with_options():
|
||||||
@@ -286,7 +286,7 @@ def test_resolve_unit_not_found():
|
|||||||
mock_resolve.return_value = None
|
mock_resolve.return_value = None
|
||||||
result = runner.invoke(app, ["resolve-unit", "http://example.com"])
|
result = runner.invoke(app, ["resolve-unit", "http://example.com"])
|
||||||
assert result.exit_code == 1
|
assert result.exit_code == 1
|
||||||
assert "could not resolve" in result.stdout.lower()
|
assert "could not resolve" in result.stderr.lower()
|
||||||
|
|
||||||
|
|
||||||
def test_resolve_unit_error():
|
def test_resolve_unit_error():
|
||||||
@@ -336,7 +336,7 @@ def test_get_unit_not_found():
|
|||||||
mock_get.return_value = None
|
mock_get.return_value = None
|
||||||
result = runner.invoke(app, ["get-unit", "test-code"])
|
result = runner.invoke(app, ["get-unit", "test-code"])
|
||||||
assert result.exit_code == 1
|
assert result.exit_code == 1
|
||||||
assert "not found" in result.stdout.lower()
|
assert "not found" in result.stderr.lower()
|
||||||
|
|
||||||
|
|
||||||
def test_build_vector_success():
|
def test_build_vector_success():
|
||||||
@@ -547,7 +547,7 @@ def test_shortlist_with_limit():
|
|||||||
result = runner.invoke(app, ["shortlist", "--limit", "20"])
|
result = runner.invoke(app, ["shortlist", "--limit", "20"])
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
call_args = mock_get.call_args
|
call_args = mock_get.call_args
|
||||||
assert call_args[1]["limit"] == 20
|
assert call_args[0][1] == 20
|
||||||
|
|
||||||
|
|
||||||
def test_diff_success():
|
def test_diff_success():
|
||||||
@@ -591,7 +591,7 @@ def test_cache_clear_confirm_yes():
|
|||||||
def test_cache_clear_confirm_no():
|
def test_cache_clear_confirm_no():
|
||||||
"""Test cache clear with confirmation rejected."""
|
"""Test cache clear with confirmation rejected."""
|
||||||
result = runner.invoke(app, ["cache", "clear"], input="n\n")
|
result = runner.invoke(app, ["cache", "clear"], input="n\n")
|
||||||
assert result.exit_code == 1
|
assert result.exit_code == 0
|
||||||
|
|
||||||
|
|
||||||
def test_cache_clear_html():
|
def test_cache_clear_html():
|
||||||
@@ -633,7 +633,7 @@ def test_config_path():
|
|||||||
|
|
||||||
def test_serve_stdio():
|
def test_serve_stdio():
|
||||||
"""Test serve command with stdio transport."""
|
"""Test serve command with stdio transport."""
|
||||||
with patch("finn_eiendom.cli.mcp_main") as mock_mcp:
|
with patch("finn_eiendom.mcp_server.main") as mock_mcp:
|
||||||
result = runner.invoke(app, ["serve", "--transport", "stdio"])
|
result = runner.invoke(app, ["serve", "--transport", "stdio"])
|
||||||
# Should call the MCP main
|
# Should call the MCP main
|
||||||
assert result.exit_code == 0 or "Error" not in result.stdout
|
assert result.exit_code == 0 or "Error" not in result.stdout
|
||||||
@@ -650,7 +650,7 @@ def test_serve_unknown_transport():
|
|||||||
"""Test serve command with unknown transport."""
|
"""Test serve command with unknown transport."""
|
||||||
result = runner.invoke(app, ["serve", "--transport", "unknown"])
|
result = runner.invoke(app, ["serve", "--transport", "unknown"])
|
||||||
assert result.exit_code == 1
|
assert result.exit_code == 1
|
||||||
assert "unknown transport" in result.stdout.lower()
|
assert "unknown transport" in result.stderr.lower()
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import asyncio
|
|||||||
import json
|
import json
|
||||||
from unittest.mock import AsyncMock, MagicMock, patch
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
import pytest
|
import pytest
|
||||||
|
from mcp.server.fastmcp import Context
|
||||||
|
|
||||||
from finn_eiendom.mcp_server import (
|
from finn_eiendom.mcp_server import (
|
||||||
finn_analyze_search,
|
finn_analyze_search,
|
||||||
@@ -34,6 +35,7 @@ class TestMCPToolParameterMatching:
|
|||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_finn_analyze_search_parameter_passing(self):
|
async def test_finn_analyze_search_parameter_passing(self):
|
||||||
"""Test that finn_analyze_search passes parameters correctly."""
|
"""Test that finn_analyze_search passes parameters correctly."""
|
||||||
|
mock_ctx = MagicMock(spec=Context)
|
||||||
with patch(
|
with patch(
|
||||||
"finn_eiendom.mcp_server.analyze_search", new_callable=AsyncMock
|
"finn_eiendom.mcp_server.analyze_search", new_callable=AsyncMock
|
||||||
) as mock_analyze:
|
) as mock_analyze:
|
||||||
@@ -46,6 +48,7 @@ class TestMCPToolParameterMatching:
|
|||||||
|
|
||||||
result = await finn_analyze_search(
|
result = await finn_analyze_search(
|
||||||
search_url="https://test.com",
|
search_url="https://test.com",
|
||||||
|
ctx=mock_ctx,
|
||||||
max_pages=2,
|
max_pages=2,
|
||||||
detail_limit=10,
|
detail_limit=10,
|
||||||
include_details=False,
|
include_details=False,
|
||||||
@@ -338,10 +341,11 @@ class TestMCPToolErrorHandling:
|
|||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_analyze_search_error_returns_json_error(self):
|
async def test_analyze_search_error_returns_json_error(self):
|
||||||
"""Test that analyze_search errors are returned as JSON error objects."""
|
"""Test that analyze_search errors are returned as JSON error objects."""
|
||||||
|
mock_ctx = MagicMock(spec=Context)
|
||||||
with patch("finn_eiendom.mcp_server.analyze_search", new_callable=AsyncMock) as mock:
|
with patch("finn_eiendom.mcp_server.analyze_search", new_callable=AsyncMock) as mock:
|
||||||
mock.side_effect = RuntimeError("Test error")
|
mock.side_effect = RuntimeError("Test error")
|
||||||
|
|
||||||
result = await finn_analyze_search(search_url="https://test.com")
|
result = await finn_analyze_search(search_url="https://test.com", ctx=mock_ctx)
|
||||||
|
|
||||||
# Should return JSON error object
|
# Should return JSON error object
|
||||||
assert isinstance(result, str)
|
assert isinstance(result, str)
|
||||||
|
|||||||
+17
-7
@@ -38,7 +38,7 @@ async def test_get_or_fetch_ad_fetches_when_cache_miss():
|
|||||||
patch("finn_eiendom.service.init_db"),
|
patch("finn_eiendom.service.init_db"),
|
||||||
patch("finn_eiendom.service.get_finn_ad", return_value=None),
|
patch("finn_eiendom.service.get_finn_ad", return_value=None),
|
||||||
patch("finn_eiendom.service.fetch_ad_details", return_value=mock_ad) as mock_fetch,
|
patch("finn_eiendom.service.fetch_ad_details", return_value=mock_ad) as mock_fetch,
|
||||||
patch("finn_eiendom.service.save_finn_ad") as mock_save,
|
patch("finn_eiendom.service.save_finn_ad", return_value=("hash123", True)) as mock_save,
|
||||||
):
|
):
|
||||||
result = await get_or_fetch_ad("123")
|
result = await get_or_fetch_ad("123")
|
||||||
|
|
||||||
@@ -56,7 +56,7 @@ async def test_get_or_fetch_ad_force_refresh():
|
|||||||
patch("finn_eiendom.service.init_db"),
|
patch("finn_eiendom.service.init_db"),
|
||||||
patch("finn_eiendom.service.get_finn_ad", return_value=mock_ad) as mock_get,
|
patch("finn_eiendom.service.get_finn_ad", return_value=mock_ad) as mock_get,
|
||||||
patch("finn_eiendom.service.fetch_ad_details", return_value=mock_ad) as mock_fetch,
|
patch("finn_eiendom.service.fetch_ad_details", return_value=mock_ad) as mock_fetch,
|
||||||
patch("finn_eiendom.service.save_finn_ad") as mock_save,
|
patch("finn_eiendom.service.save_finn_ad", return_value=("hash123", True)) as mock_save,
|
||||||
):
|
):
|
||||||
result = await get_or_fetch_ad("123", force_refresh=True)
|
result = await get_or_fetch_ad("123", force_refresh=True)
|
||||||
|
|
||||||
@@ -92,7 +92,9 @@ async def test_get_or_fetch_eiendom_unit_fetches_when_cache_miss():
|
|||||||
patch("finn_eiendom.service.init_db"),
|
patch("finn_eiendom.service.init_db"),
|
||||||
patch("finn_eiendom.service.get_cached_eiendom_unit", return_value=None),
|
patch("finn_eiendom.service.get_cached_eiendom_unit", return_value=None),
|
||||||
patch("finn_eiendom.service.get_unit", return_value=mock_unit) as mock_fetch,
|
patch("finn_eiendom.service.get_unit", return_value=mock_unit) as mock_fetch,
|
||||||
patch("finn_eiendom.service.save_eiendom_unit") as mock_save,
|
patch(
|
||||||
|
"finn_eiendom.service.save_eiendom_unit", return_value=("hash123", True)
|
||||||
|
) as mock_save,
|
||||||
):
|
):
|
||||||
result = await get_or_fetch_eiendom_unit("test-code")
|
result = await get_or_fetch_eiendom_unit("test-code")
|
||||||
|
|
||||||
@@ -110,7 +112,9 @@ async def test_get_or_fetch_similar_units_uses_cache():
|
|||||||
with (
|
with (
|
||||||
patch("finn_eiendom.service.init_db"),
|
patch("finn_eiendom.service.init_db"),
|
||||||
patch("finn_eiendom.service.get_or_fetch_eiendom_unit", return_value=mock_unit),
|
patch("finn_eiendom.service.get_or_fetch_eiendom_unit", return_value=mock_unit),
|
||||||
patch("finn_eiendom.service.get_cached_similar_units", return_value=mock_similar) as mock_get,
|
patch(
|
||||||
|
"finn_eiendom.service.get_cached_similar_units", return_value=mock_similar
|
||||||
|
) as mock_get,
|
||||||
patch("finn_eiendom.service.get_similar_units") as mock_fetch,
|
patch("finn_eiendom.service.get_similar_units") as mock_fetch,
|
||||||
):
|
):
|
||||||
result = await get_or_fetch_similar_units("test-code", "RECENTLY_SOLD")
|
result = await get_or_fetch_similar_units("test-code", "RECENTLY_SOLD")
|
||||||
@@ -133,7 +137,9 @@ async def test_get_or_fetch_similar_units_fetches_when_cache_miss():
|
|||||||
patch("finn_eiendom.service.get_cached_similar_units", return_value=[]),
|
patch("finn_eiendom.service.get_cached_similar_units", return_value=[]),
|
||||||
patch("finn_eiendom.service.build_unit_vector", return_value="vector_data"),
|
patch("finn_eiendom.service.build_unit_vector", return_value="vector_data"),
|
||||||
patch("finn_eiendom.service.get_similar_units", return_value=mock_similar) as mock_fetch,
|
patch("finn_eiendom.service.get_similar_units", return_value=mock_similar) as mock_fetch,
|
||||||
patch("finn_eiendom.service.save_similar_units") as mock_save,
|
patch(
|
||||||
|
"finn_eiendom.service.save_similar_units", return_value=("hash123", True)
|
||||||
|
) as mock_save,
|
||||||
):
|
):
|
||||||
result = await get_or_fetch_similar_units("test-code", "RECENTLY_SOLD")
|
result = await get_or_fetch_similar_units("test-code", "RECENTLY_SOLD")
|
||||||
|
|
||||||
@@ -152,10 +158,14 @@ async def test_get_or_fetch_similar_units_force_refresh():
|
|||||||
with (
|
with (
|
||||||
patch("finn_eiendom.service.init_db"),
|
patch("finn_eiendom.service.init_db"),
|
||||||
patch("finn_eiendom.service.get_or_fetch_eiendom_unit", return_value=mock_unit),
|
patch("finn_eiendom.service.get_or_fetch_eiendom_unit", return_value=mock_unit),
|
||||||
patch("finn_eiendom.service.get_cached_similar_units", return_value=mock_similar) as mock_get,
|
patch(
|
||||||
|
"finn_eiendom.service.get_cached_similar_units", return_value=mock_similar
|
||||||
|
) as mock_get,
|
||||||
patch("finn_eiendom.service.build_unit_vector", return_value="vector_data"),
|
patch("finn_eiendom.service.build_unit_vector", return_value="vector_data"),
|
||||||
patch("finn_eiendom.service.get_similar_units", return_value=mock_similar) as mock_fetch,
|
patch("finn_eiendom.service.get_similar_units", return_value=mock_similar) as mock_fetch,
|
||||||
patch("finn_eiendom.service.save_similar_units") as mock_save,
|
patch(
|
||||||
|
"finn_eiendom.service.save_similar_units", return_value=("hash123", True)
|
||||||
|
) as mock_save,
|
||||||
):
|
):
|
||||||
result = await get_or_fetch_similar_units("test-code", "RECENTLY_SOLD", force_refresh=True)
|
result = await get_or_fetch_similar_units("test-code", "RECENTLY_SOLD", force_refresh=True)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user