Refactor and enhance various components of the FINN real estate analysis tool
- Updated docker-compose files to use local data volumes for development. - Refactored analysis.py to improve code readability and performance, including changes to cache age calculations and hash computations. - Enhanced cache.py to ensure the database directory is created if it doesn't exist and improved SQL query formatting. - Modified cli.py to improve logging and statistics reporting for finn_ads. - Updated config.py to streamline environment variable handling. - Initialized the database eagerly in http_server.py to prevent runtime errors. - Refactored mcp_server.py to slim down data structures and improve response formatting for API calls. - Enhanced service.py to improve feedback handling and shortlist retrieval, ensuring enriched data is returned. - Updated recompute_analysis_cache.py for better SQL query formatting.
This commit is contained in:
+85
-52
@@ -30,6 +30,8 @@ import sqlite3
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from typing import Any
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from .config import FINN_CACHE_PATH
|
||||
from .models import EiendomUnit, FinnAd, FinnSearchCard, SimilarUnit
|
||||
|
||||
@@ -70,7 +72,10 @@ def get_connection(path: str | None = None) -> sqlite3.Connection:
|
||||
|
||||
|
||||
def init_db(path: str | None = None) -> sqlite3.Connection:
|
||||
conn = get_connection(path)
|
||||
# Ensure parent directory exists — sqlite3.connect() won't create it.
|
||||
db_path = Path(path or FINN_CACHE_PATH)
|
||||
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
conn = get_connection(str(db_path))
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute(
|
||||
@@ -163,7 +168,9 @@ def init_db(path: str | None = None) -> sqlite3.Connection:
|
||||
)
|
||||
"""
|
||||
)
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_price_history_finnkode_recorded ON price_history(finnkode, recorded_at)")
|
||||
cursor.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_price_history_finnkode_recorded ON price_history(finnkode, recorded_at)"
|
||||
)
|
||||
|
||||
cursor.execute(
|
||||
"""
|
||||
@@ -175,20 +182,24 @@ def init_db(path: str | None = None) -> sqlite3.Connection:
|
||||
)
|
||||
"""
|
||||
)
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_search_runs_url_created ON search_runs(search_url, created_at)")
|
||||
cursor.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_search_runs_url_created ON search_runs(search_url, created_at)"
|
||||
)
|
||||
|
||||
# Create indexes for efficient staleness queries
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_finn_ads_verified ON finn_ads(last_verified_at)")
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_eiendom_units_fetched ON eiendom_units(fetched_at)")
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_similar_units_fetched ON similar_units(fetched_at)")
|
||||
cursor.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_eiendom_units_fetched ON eiendom_units(fetched_at)"
|
||||
)
|
||||
cursor.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_similar_units_fetched ON similar_units(fetched_at)"
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
return conn
|
||||
|
||||
|
||||
def _add_column_if_missing(
|
||||
cursor: sqlite3.Cursor, table: str, column: str, col_type: str
|
||||
) -> None:
|
||||
def _add_column_if_missing(cursor: sqlite3.Cursor, table: str, column: str, col_type: str) -> None:
|
||||
"""ALTER TABLE … ADD COLUMN is idempotent via this guard."""
|
||||
cursor.execute(f"PRAGMA table_info({table})")
|
||||
existing = {row["name"] for row in cursor.fetchall()}
|
||||
@@ -300,27 +311,38 @@ def save_finn_ad(conn: sqlite3.Connection, ad: FinnAd) -> tuple[str, bool]:
|
||||
payload = ad.model_dump(mode="json")
|
||||
new_hash = compute_content_hash(payload)
|
||||
fetched_at = (
|
||||
ad.detail_fetched_at.isoformat()
|
||||
if ad.detail_fetched_at
|
||||
else datetime.now(UTC).isoformat()
|
||||
ad.detail_fetched_at.isoformat() if ad.detail_fetched_at else datetime.now(UTC).isoformat()
|
||||
)
|
||||
# Update last_verified_at to now when saving (indicates we just checked the data)
|
||||
last_verified_at = datetime.now(UTC).isoformat()
|
||||
|
||||
# Check existing hash before writing.
|
||||
cursor.execute(
|
||||
"SELECT content_hash FROM finn_ads WHERE finnkode = ?", (ad.finnkode,)
|
||||
)
|
||||
cursor.execute("SELECT content_hash FROM finn_ads WHERE finnkode = ?", (ad.finnkode,))
|
||||
row = cursor.fetchone()
|
||||
if row and row["content_hash"] == new_hash:
|
||||
logger.debug("finn_ad %s unchanged (hash match)", ad.finnkode)
|
||||
# Data unchanged: skip the full rewrite (preserves analysis_cache),
|
||||
# but still record that we verified it just now so the price-freshness
|
||||
# timer (last_verified_at) advances and cache_age.price_hours resets.
|
||||
cursor.execute(
|
||||
"UPDATE finn_ads SET last_verified_at = ? WHERE finnkode = ?",
|
||||
(last_verified_at, ad.finnkode),
|
||||
)
|
||||
conn.commit()
|
||||
logger.debug("finn_ad %s unchanged (hash match, verified bumped)", ad.finnkode)
|
||||
return new_hash, False
|
||||
|
||||
cursor.execute(
|
||||
"INSERT OR REPLACE INTO finn_ads"
|
||||
" (finnkode, url, payload, content_hash, fetched_at, last_verified_at)"
|
||||
" VALUES (?, ?, ?, ?, ?, ?)",
|
||||
(ad.finnkode, ad.url, json.dumps(payload, default=_json_default), new_hash, fetched_at, last_verified_at),
|
||||
(
|
||||
ad.finnkode,
|
||||
ad.url,
|
||||
json.dumps(payload, default=_json_default),
|
||||
new_hash,
|
||||
fetched_at,
|
||||
last_verified_at,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
logger.debug("finn_ad %s saved (hash=%s)", ad.finnkode, new_hash[:8])
|
||||
@@ -331,9 +353,7 @@ def get_finn_ad(
|
||||
conn: sqlite3.Connection, finnkode: str, ttl_hours: int | None = None
|
||||
) -> FinnAd | None:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"SELECT payload, fetched_at FROM finn_ads WHERE finnkode = ?", (finnkode,)
|
||||
)
|
||||
cursor.execute("SELECT payload, fetched_at FROM finn_ads WHERE finnkode = ?", (finnkode,))
|
||||
row = cursor.fetchone()
|
||||
if not row:
|
||||
return None
|
||||
@@ -345,9 +365,7 @@ def get_finn_ad(
|
||||
def get_finn_ad_hash(conn: sqlite3.Connection, finnkode: str) -> str | None:
|
||||
"""Return the stored content_hash for *finnkode*, or None if not cached."""
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"SELECT content_hash FROM finn_ads WHERE finnkode = ?", (finnkode,)
|
||||
)
|
||||
cursor.execute("SELECT content_hash FROM finn_ads WHERE finnkode = ?", (finnkode,))
|
||||
row = cursor.fetchone()
|
||||
return row["content_hash"] if row else None
|
||||
|
||||
@@ -366,9 +384,7 @@ def save_eiendom_unit(conn: sqlite3.Connection, unit: EiendomUnit) -> tuple[str,
|
||||
payload = unit.model_dump(mode="json")
|
||||
new_hash = compute_content_hash(payload)
|
||||
|
||||
cursor.execute(
|
||||
"SELECT content_hash FROM eiendom_units WHERE unit_code = ?", (unit.unit_code,)
|
||||
)
|
||||
cursor.execute("SELECT content_hash FROM eiendom_units WHERE unit_code = ?", (unit.unit_code,))
|
||||
row = cursor.fetchone()
|
||||
if row and row["content_hash"] == new_hash:
|
||||
logger.debug("eiendom_unit %s unchanged (hash match)", unit.unit_code)
|
||||
@@ -378,7 +394,12 @@ def save_eiendom_unit(conn: sqlite3.Connection, unit: EiendomUnit) -> tuple[str,
|
||||
"INSERT OR REPLACE INTO eiendom_units"
|
||||
" (unit_code, payload, content_hash, fetched_at)"
|
||||
" VALUES (?, ?, ?, ?)",
|
||||
(unit.unit_code, json.dumps(payload, default=_json_default), new_hash, unit.fetched_at.isoformat()),
|
||||
(
|
||||
unit.unit_code,
|
||||
json.dumps(payload, default=_json_default),
|
||||
new_hash,
|
||||
unit.fetched_at.isoformat(),
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
logger.debug("eiendom_unit %s saved (hash=%s)", unit.unit_code, new_hash[:8])
|
||||
@@ -405,9 +426,7 @@ def get_eiendom_unit(
|
||||
def get_eiendom_unit_hash(conn: sqlite3.Connection, unit_code: str) -> str | None:
|
||||
"""Return the stored content_hash for *unit_code*, or None if not cached."""
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"SELECT content_hash FROM eiendom_units WHERE unit_code = ?", (unit_code,)
|
||||
)
|
||||
cursor.execute("SELECT content_hash FROM eiendom_units WHERE unit_code = ?", (unit_code,))
|
||||
row = cursor.fetchone()
|
||||
return row["content_hash"] if row else None
|
||||
|
||||
@@ -439,9 +458,7 @@ def save_similar_units(
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
if row and row["content_hash"] == new_hash:
|
||||
logger.debug(
|
||||
"similar_units %s/%s unchanged (hash match)", unit_code, listing_status
|
||||
)
|
||||
logger.debug("similar_units %s/%s unchanged (hash match)", unit_code, listing_status)
|
||||
return new_hash, False
|
||||
|
||||
cursor.execute(
|
||||
@@ -457,9 +474,7 @@ def save_similar_units(
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
logger.debug(
|
||||
"similar_units %s/%s saved (hash=%s)", unit_code, listing_status, new_hash[:8]
|
||||
)
|
||||
logger.debug("similar_units %s/%s saved (hash=%s)", unit_code, listing_status, new_hash[:8])
|
||||
return new_hash, True
|
||||
|
||||
|
||||
@@ -504,9 +519,7 @@ def get_similar_units_hash(
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def get_analysis(
|
||||
conn: sqlite3.Connection, finnkode: str, deps_hash: str
|
||||
) -> dict[str, Any] | None:
|
||||
def get_analysis(conn: sqlite3.Connection, finnkode: str, deps_hash: str) -> dict[str, Any] | None:
|
||||
"""Return cached analysis for *finnkode* if deps_hash still matches.
|
||||
|
||||
``deps_hash`` encodes the combined hashes of the ad, eiendom unit, and
|
||||
@@ -533,6 +546,24 @@ def get_analysis(
|
||||
return json.loads(row["payload"])
|
||||
|
||||
|
||||
def get_latest_analysis(conn: sqlite3.Connection, finnkode: str) -> dict[str, Any] | None:
|
||||
"""Return the most recent cached analysis for *finnkode*, ignoring deps_hash.
|
||||
|
||||
Unlike :func:`get_analysis`, this does not validate freshness -- it returns
|
||||
whatever was last computed. Used by the shortlist where showing slightly
|
||||
stale enrichment is preferable to recomputing on every read.
|
||||
"""
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"SELECT payload FROM analysis_cache WHERE finnkode = ?",
|
||||
(finnkode,),
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
if not row:
|
||||
return None
|
||||
return json.loads(row["payload"])
|
||||
|
||||
|
||||
def _json_default(obj: Any) -> Any:
|
||||
"""Fallback serialiser for json.dumps.
|
||||
Converts datetime/date → ISO string; anything else → repr string.
|
||||
@@ -556,7 +587,12 @@ def save_analysis(
|
||||
"INSERT OR REPLACE INTO analysis_cache"
|
||||
" (finnkode, deps_hash, payload, computed_at)"
|
||||
" VALUES (?, ?, ?, ?)",
|
||||
(finnkode, deps_hash, json.dumps(result, default=_json_default), datetime.now(UTC).isoformat()),
|
||||
(
|
||||
finnkode,
|
||||
deps_hash,
|
||||
json.dumps(result, default=_json_default),
|
||||
datetime.now(UTC).isoformat(),
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
logger.debug("analysis_cache saved for %s (deps_hash=%s)", finnkode, deps_hash[:8])
|
||||
@@ -564,9 +600,7 @@ def save_analysis(
|
||||
|
||||
def invalidate_analysis(conn: sqlite3.Connection, finnkode: str) -> None:
|
||||
"""Remove any cached analysis for *finnkode* (call after raw data changes)."""
|
||||
conn.cursor().execute(
|
||||
"DELETE FROM analysis_cache WHERE finnkode = ?", (finnkode,)
|
||||
)
|
||||
conn.cursor().execute("DELETE FROM analysis_cache WHERE finnkode = ?", (finnkode,))
|
||||
conn.commit()
|
||||
|
||||
|
||||
@@ -653,10 +687,14 @@ def save_price_history(
|
||||
(finnkode, total_price, asking_price, sale_status, datetime.now(UTC).isoformat()),
|
||||
)
|
||||
conn.commit()
|
||||
logger.debug("price_history recorded for %s (total=%s, asking=%s)", finnkode, total_price, asking_price)
|
||||
logger.debug(
|
||||
"price_history recorded for %s (total=%s, asking=%s)", finnkode, total_price, asking_price
|
||||
)
|
||||
|
||||
|
||||
def get_price_history(conn: sqlite3.Connection, finnkode: str, limit: int = 100) -> list[dict[str, Any]]:
|
||||
def get_price_history(
|
||||
conn: sqlite3.Connection, finnkode: str, limit: int = 100
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Retrieve price history for a listing."""
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
@@ -680,15 +718,12 @@ def get_price_history(conn: sqlite3.Connection, finnkode: str, limit: int = 100)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def save_search_run(
|
||||
conn: sqlite3.Connection, search_url: str, finnkodes: list[str]
|
||||
) -> None:
|
||||
def save_search_run(conn: sqlite3.Connection, search_url: str, finnkodes: list[str]) -> None:
|
||||
"""Record a search run with the finnkodes found."""
|
||||
cursor = conn.cursor()
|
||||
finnkodes_json = json.dumps(finnkodes)
|
||||
cursor.execute(
|
||||
"INSERT INTO search_runs (search_url, finnkodes, created_at)"
|
||||
" VALUES (?, ?, ?)",
|
||||
"INSERT INTO search_runs (search_url, finnkodes, created_at) VALUES (?, ?, ?)",
|
||||
(search_url, finnkodes_json, datetime.now(UTC).isoformat()),
|
||||
)
|
||||
conn.commit()
|
||||
@@ -730,6 +765,4 @@ def delete_feedback(conn: sqlite3.Connection, finnkode: str) -> dict[str, Any]:
|
||||
def _is_fresh(fetched_at: str, ttl_hours: int | None) -> bool:
|
||||
if ttl_hours is None:
|
||||
return True
|
||||
return datetime.fromisoformat(fetched_at) >= datetime.now(UTC) - timedelta(
|
||||
hours=ttl_hours
|
||||
)
|
||||
return datetime.fromisoformat(fetched_at) >= datetime.now(UTC) - timedelta(hours=ttl_hours)
|
||||
|
||||
Reference in New Issue
Block a user