eb95b98111
- Updated docker-compose files to use local data volumes for development. - Refactored analysis.py to improve code readability and performance, including changes to cache age calculations and hash computations. - Enhanced cache.py to ensure the database directory is created if it doesn't exist and improved SQL query formatting. - Modified cli.py to improve logging and statistics reporting for finn_ads. - Updated config.py to streamline environment variable handling. - Initialized the database eagerly in http_server.py to prevent runtime errors. - Refactored mcp_server.py to slim down data structures and improve response formatting for API calls. - Enhanced service.py to improve feedback handling and shortlist retrieval, ensuring enriched data is returned. - Updated recompute_analysis_cache.py for better SQL query formatting.
769 lines
26 KiB
Python
769 lines
26 KiB
Python
"""SQLite cache and persistence for FINN and Eiendom.no data.
|
|
|
|
Caching strategy
|
|
----------------
|
|
Raw data (finn_ads, eiendom_units, similar_units)
|
|
Stored with a SHA-256 content_hash of the serialised payload.
|
|
On write: compare incoming hash to stored hash. If equal the remote
|
|
data has not changed -- the row is left untouched and the caller gets
|
|
back ``changed=False``, which preserves a valid analysis_cache entry.
|
|
|
|
Analysis results (analysis_cache)
|
|
Keyed by ``(finnkode, deps_hash)`` where deps_hash = SHA-256 of the
|
|
combined raw payloads of the ad, eiendom unit, and comps that were used
|
|
to produce the result. A cache hit is only valid when the deps_hash
|
|
still matches, i.e. none of the underlying data has changed.
|
|
This means analysis is re-run *only* when remote data actually changes,
|
|
not on every TTL tick.
|
|
|
|
Search pages / cards (cache_meta)
|
|
Still TTL-based -- these change frequently and a content-hash over a
|
|
full HTML page is cheap but the semantics of "changed" are less clear
|
|
(ads added/removed vs. cosmetic HTML tweaks). Hash is stored anyway so
|
|
callers can detect real list changes if desired.
|
|
"""
|
|
|
|
import hashlib
|
|
import json
|
|
import logging
|
|
import sqlite3
|
|
from datetime import UTC, datetime, timedelta
|
|
from typing import Any
|
|
|
|
from pathlib import Path
|
|
|
|
from .config import FINN_CACHE_PATH
|
|
from .models import EiendomUnit, FinnAd, FinnSearchCard, SimilarUnit
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Hashing helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def compute_content_hash(payload: Any) -> str:
|
|
"""Return a stable SHA-256 hex digest of *payload*.
|
|
|
|
*payload* can be a dict, list, or any JSON-serialisable value.
|
|
Keys are sorted so that insertion order does not affect the hash.
|
|
"""
|
|
serialised = json.dumps(payload, sort_keys=True, default=str)
|
|
return hashlib.sha256(serialised.encode()).hexdigest()
|
|
|
|
|
|
def combine_hashes(*hashes: str | None) -> str:
|
|
"""Combine multiple content hashes into one deterministic deps_hash."""
|
|
combined = "|".join(h or "" for h in hashes)
|
|
return hashlib.sha256(combined.encode()).hexdigest()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Connection / schema
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def get_connection(path: str | None = None) -> sqlite3.Connection:
|
|
db_path = path or FINN_CACHE_PATH
|
|
conn = sqlite3.connect(str(db_path), detect_types=sqlite3.PARSE_DECLTYPES)
|
|
conn.row_factory = sqlite3.Row
|
|
return conn
|
|
|
|
|
|
def init_db(path: str | None = None) -> sqlite3.Connection:
|
|
# Ensure parent directory exists — sqlite3.connect() won't create it.
|
|
db_path = Path(path or FINN_CACHE_PATH)
|
|
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
conn = get_connection(str(db_path))
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS finn_ads (
|
|
finnkode TEXT PRIMARY KEY,
|
|
url TEXT,
|
|
payload TEXT NOT NULL,
|
|
content_hash TEXT,
|
|
fetched_at TEXT NOT NULL,
|
|
last_verified_at TEXT
|
|
)
|
|
"""
|
|
)
|
|
# Migrations: add columns if the table already existed without them.
|
|
_add_column_if_missing(cursor, "finn_ads", "content_hash", "TEXT")
|
|
_add_column_if_missing(cursor, "finn_ads", "last_verified_at", "TEXT")
|
|
|
|
cursor.execute(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS eiendom_units (
|
|
unit_code TEXT PRIMARY KEY,
|
|
payload TEXT NOT NULL,
|
|
content_hash TEXT,
|
|
fetched_at TEXT NOT NULL
|
|
)
|
|
"""
|
|
)
|
|
_add_column_if_missing(cursor, "eiendom_units", "content_hash", "TEXT")
|
|
|
|
cursor.execute(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS similar_units (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
unit_code TEXT NOT NULL,
|
|
listing_status TEXT NOT NULL,
|
|
payload TEXT NOT NULL,
|
|
content_hash TEXT,
|
|
fetched_at TEXT NOT NULL
|
|
)
|
|
"""
|
|
)
|
|
_add_column_if_missing(cursor, "similar_units", "content_hash", "TEXT")
|
|
|
|
cursor.execute(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS cache_meta (
|
|
key TEXT PRIMARY KEY,
|
|
value TEXT NOT NULL,
|
|
content_hash TEXT,
|
|
expires_at TEXT
|
|
)
|
|
"""
|
|
)
|
|
_add_column_if_missing(cursor, "cache_meta", "content_hash", "TEXT")
|
|
|
|
cursor.execute(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS analysis_cache (
|
|
finnkode TEXT PRIMARY KEY,
|
|
deps_hash TEXT NOT NULL,
|
|
payload TEXT NOT NULL,
|
|
computed_at TEXT NOT NULL
|
|
)
|
|
"""
|
|
)
|
|
|
|
# New tables for Phase 2 enhancements
|
|
cursor.execute(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS user_feedback (
|
|
finnkode TEXT PRIMARY KEY,
|
|
verdict TEXT NOT NULL,
|
|
notes TEXT,
|
|
created_at TEXT NOT NULL,
|
|
updated_at TEXT NOT NULL
|
|
)
|
|
"""
|
|
)
|
|
|
|
cursor.execute(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS price_history (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
finnkode TEXT NOT NULL,
|
|
total_price INTEGER,
|
|
asking_price INTEGER,
|
|
sale_status TEXT,
|
|
recorded_at TEXT NOT NULL
|
|
)
|
|
"""
|
|
)
|
|
cursor.execute(
|
|
"CREATE INDEX IF NOT EXISTS idx_price_history_finnkode_recorded ON price_history(finnkode, recorded_at)"
|
|
)
|
|
|
|
cursor.execute(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS search_runs (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
search_url TEXT NOT NULL,
|
|
finnkodes TEXT NOT NULL,
|
|
created_at TEXT NOT NULL
|
|
)
|
|
"""
|
|
)
|
|
cursor.execute(
|
|
"CREATE INDEX IF NOT EXISTS idx_search_runs_url_created ON search_runs(search_url, created_at)"
|
|
)
|
|
|
|
# Create indexes for efficient staleness queries
|
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_finn_ads_verified ON finn_ads(last_verified_at)")
|
|
cursor.execute(
|
|
"CREATE INDEX IF NOT EXISTS idx_eiendom_units_fetched ON eiendom_units(fetched_at)"
|
|
)
|
|
cursor.execute(
|
|
"CREATE INDEX IF NOT EXISTS idx_similar_units_fetched ON similar_units(fetched_at)"
|
|
)
|
|
|
|
conn.commit()
|
|
return conn
|
|
|
|
|
|
def _add_column_if_missing(cursor: sqlite3.Cursor, table: str, column: str, col_type: str) -> None:
|
|
"""ALTER TABLE … ADD COLUMN is idempotent via this guard."""
|
|
cursor.execute(f"PRAGMA table_info({table})")
|
|
existing = {row["name"] for row in cursor.fetchall()}
|
|
if column not in existing:
|
|
cursor.execute(f"ALTER TABLE {table} ADD COLUMN {column} {col_type}")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Generic cache_meta helpers (search pages, search cards)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def cache_get(conn: sqlite3.Connection, key: str) -> dict[str, Any] | None:
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT value, expires_at FROM cache_meta WHERE key = ?", (key,))
|
|
row = cursor.fetchone()
|
|
if not row:
|
|
return None
|
|
expires_at = row["expires_at"]
|
|
if expires_at and datetime.fromisoformat(expires_at) < datetime.now(UTC):
|
|
cursor.execute("DELETE FROM cache_meta WHERE key = ?", (key,))
|
|
conn.commit()
|
|
return None
|
|
return json.loads(row["value"])
|
|
|
|
|
|
def cache_set(
|
|
conn: sqlite3.Connection,
|
|
key: str,
|
|
payload: dict[str, Any],
|
|
ttl_hours: int | None = None,
|
|
ttl_minutes: int | None = None,
|
|
) -> str:
|
|
"""Store *payload* in cache_meta and return its content_hash."""
|
|
expires_at = None
|
|
if ttl_minutes is not None:
|
|
expires_at = (datetime.now(UTC) + timedelta(minutes=ttl_minutes)).isoformat()
|
|
elif ttl_hours is not None:
|
|
expires_at = (datetime.now(UTC) + timedelta(hours=ttl_hours)).isoformat()
|
|
|
|
content_hash = compute_content_hash(payload)
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"INSERT OR REPLACE INTO cache_meta (key, value, content_hash, expires_at)"
|
|
" VALUES (?, ?, ?, ?)",
|
|
(key, json.dumps(payload, default=_json_default), content_hash, expires_at),
|
|
)
|
|
conn.commit()
|
|
return content_hash
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Search page / cards helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def save_search_page(
|
|
conn: sqlite3.Connection,
|
|
url: str,
|
|
html: str,
|
|
ttl_minutes: int = 60,
|
|
) -> str:
|
|
"""Cache raw HTML for a search page URL. Returns content_hash."""
|
|
return cache_set(conn, f"search_page:{url}", {"html": html}, ttl_minutes=ttl_minutes)
|
|
|
|
|
|
def get_search_page(conn: sqlite3.Connection, url: str) -> str | None:
|
|
payload = cache_get(conn, f"search_page:{url}")
|
|
if not payload:
|
|
return None
|
|
return payload.get("html")
|
|
|
|
|
|
def save_search_cards(
|
|
conn: sqlite3.Connection,
|
|
url: str,
|
|
cards: list[FinnSearchCard],
|
|
ttl_minutes: int = 60,
|
|
) -> str:
|
|
"""Cache parsed search cards. Returns content_hash."""
|
|
return cache_set(
|
|
conn,
|
|
f"search_cards:{url}",
|
|
[card.model_dump(mode="json") for card in cards],
|
|
ttl_minutes=ttl_minutes,
|
|
)
|
|
|
|
|
|
def get_search_cards(conn: sqlite3.Connection, url: str) -> list[FinnSearchCard]:
|
|
payload = cache_get(conn, f"search_cards:{url}")
|
|
if not payload:
|
|
return []
|
|
return [FinnSearchCard.model_validate(item) for item in payload]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# FinnAd
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def save_finn_ad(conn: sqlite3.Connection, ad: FinnAd) -> tuple[str, bool]:
|
|
"""Persist *ad* to finn_ads.
|
|
|
|
Returns ``(content_hash, changed)`` where ``changed=False`` means the
|
|
remote payload is identical to what was already stored -- callers can
|
|
use this to skip analysis recomputation.
|
|
"""
|
|
cursor = conn.cursor()
|
|
payload = ad.model_dump(mode="json")
|
|
new_hash = compute_content_hash(payload)
|
|
fetched_at = (
|
|
ad.detail_fetched_at.isoformat() if ad.detail_fetched_at else datetime.now(UTC).isoformat()
|
|
)
|
|
# Update last_verified_at to now when saving (indicates we just checked the data)
|
|
last_verified_at = datetime.now(UTC).isoformat()
|
|
|
|
# Check existing hash before writing.
|
|
cursor.execute("SELECT content_hash FROM finn_ads WHERE finnkode = ?", (ad.finnkode,))
|
|
row = cursor.fetchone()
|
|
if row and row["content_hash"] == new_hash:
|
|
# Data unchanged: skip the full rewrite (preserves analysis_cache),
|
|
# but still record that we verified it just now so the price-freshness
|
|
# timer (last_verified_at) advances and cache_age.price_hours resets.
|
|
cursor.execute(
|
|
"UPDATE finn_ads SET last_verified_at = ? WHERE finnkode = ?",
|
|
(last_verified_at, ad.finnkode),
|
|
)
|
|
conn.commit()
|
|
logger.debug("finn_ad %s unchanged (hash match, verified bumped)", ad.finnkode)
|
|
return new_hash, False
|
|
|
|
cursor.execute(
|
|
"INSERT OR REPLACE INTO finn_ads"
|
|
" (finnkode, url, payload, content_hash, fetched_at, last_verified_at)"
|
|
" VALUES (?, ?, ?, ?, ?, ?)",
|
|
(
|
|
ad.finnkode,
|
|
ad.url,
|
|
json.dumps(payload, default=_json_default),
|
|
new_hash,
|
|
fetched_at,
|
|
last_verified_at,
|
|
),
|
|
)
|
|
conn.commit()
|
|
logger.debug("finn_ad %s saved (hash=%s)", ad.finnkode, new_hash[:8])
|
|
return new_hash, True
|
|
|
|
|
|
def get_finn_ad(
|
|
conn: sqlite3.Connection, finnkode: str, ttl_hours: int | None = None
|
|
) -> FinnAd | None:
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT payload, fetched_at FROM finn_ads WHERE finnkode = ?", (finnkode,))
|
|
row = cursor.fetchone()
|
|
if not row:
|
|
return None
|
|
if ttl_hours is not None and not _is_fresh(row["fetched_at"], ttl_hours):
|
|
return None
|
|
return FinnAd.model_validate(json.loads(row["payload"]))
|
|
|
|
|
|
def get_finn_ad_hash(conn: sqlite3.Connection, finnkode: str) -> str | None:
|
|
"""Return the stored content_hash for *finnkode*, or None if not cached."""
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT content_hash FROM finn_ads WHERE finnkode = ?", (finnkode,))
|
|
row = cursor.fetchone()
|
|
return row["content_hash"] if row else None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# EiendomUnit
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def save_eiendom_unit(conn: sqlite3.Connection, unit: EiendomUnit) -> tuple[str, bool]:
|
|
"""Persist *unit* to eiendom_units.
|
|
|
|
Returns ``(content_hash, changed)``.
|
|
"""
|
|
cursor = conn.cursor()
|
|
payload = unit.model_dump(mode="json")
|
|
new_hash = compute_content_hash(payload)
|
|
|
|
cursor.execute("SELECT content_hash FROM eiendom_units WHERE unit_code = ?", (unit.unit_code,))
|
|
row = cursor.fetchone()
|
|
if row and row["content_hash"] == new_hash:
|
|
logger.debug("eiendom_unit %s unchanged (hash match)", unit.unit_code)
|
|
return new_hash, False
|
|
|
|
cursor.execute(
|
|
"INSERT OR REPLACE INTO eiendom_units"
|
|
" (unit_code, payload, content_hash, fetched_at)"
|
|
" VALUES (?, ?, ?, ?)",
|
|
(
|
|
unit.unit_code,
|
|
json.dumps(payload, default=_json_default),
|
|
new_hash,
|
|
unit.fetched_at.isoformat(),
|
|
),
|
|
)
|
|
conn.commit()
|
|
logger.debug("eiendom_unit %s saved (hash=%s)", unit.unit_code, new_hash[:8])
|
|
return new_hash, True
|
|
|
|
|
|
def get_eiendom_unit(
|
|
conn: sqlite3.Connection,
|
|
unit_code: str,
|
|
ttl_hours: int | None = None,
|
|
) -> EiendomUnit | None:
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"SELECT payload, fetched_at FROM eiendom_units WHERE unit_code = ?", (unit_code,)
|
|
)
|
|
row = cursor.fetchone()
|
|
if not row:
|
|
return None
|
|
if ttl_hours is not None and not _is_fresh(row["fetched_at"], ttl_hours):
|
|
return None
|
|
return EiendomUnit.model_validate(json.loads(row["payload"]))
|
|
|
|
|
|
def get_eiendom_unit_hash(conn: sqlite3.Connection, unit_code: str) -> str | None:
|
|
"""Return the stored content_hash for *unit_code*, or None if not cached."""
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT content_hash FROM eiendom_units WHERE unit_code = ?", (unit_code,))
|
|
row = cursor.fetchone()
|
|
return row["content_hash"] if row else None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# SimilarUnits
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def save_similar_units(
|
|
conn: sqlite3.Connection,
|
|
unit_code: str,
|
|
listing_status: str,
|
|
similar_units: list[SimilarUnit],
|
|
) -> tuple[str, bool]:
|
|
"""Persist *similar_units* for (unit_code, listing_status).
|
|
|
|
Returns ``(content_hash, changed)``.
|
|
"""
|
|
cursor = conn.cursor()
|
|
payload_list = [item.model_dump(mode="json") for item in similar_units]
|
|
new_hash = compute_content_hash(payload_list)
|
|
|
|
cursor.execute(
|
|
"SELECT payload, content_hash FROM similar_units"
|
|
" WHERE unit_code = ? AND listing_status = ?"
|
|
" ORDER BY id DESC LIMIT 1",
|
|
(unit_code, listing_status),
|
|
)
|
|
row = cursor.fetchone()
|
|
if row and row["content_hash"] == new_hash:
|
|
logger.debug("similar_units %s/%s unchanged (hash match)", unit_code, listing_status)
|
|
return new_hash, False
|
|
|
|
cursor.execute(
|
|
"INSERT INTO similar_units"
|
|
" (unit_code, listing_status, payload, content_hash, fetched_at)"
|
|
" VALUES (?, ?, ?, ?, ?)",
|
|
(
|
|
unit_code,
|
|
listing_status,
|
|
json.dumps(payload_list, default=_json_default),
|
|
new_hash,
|
|
datetime.now(UTC).isoformat(),
|
|
),
|
|
)
|
|
conn.commit()
|
|
logger.debug("similar_units %s/%s saved (hash=%s)", unit_code, listing_status, new_hash[:8])
|
|
return new_hash, True
|
|
|
|
|
|
def get_similar_units(
|
|
conn: sqlite3.Connection,
|
|
unit_code: str,
|
|
listing_status: str,
|
|
ttl_hours: int | None = None,
|
|
) -> list[SimilarUnit]:
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"SELECT payload, fetched_at FROM similar_units"
|
|
" WHERE unit_code = ? AND listing_status = ?"
|
|
" ORDER BY id DESC LIMIT 1",
|
|
(unit_code, listing_status),
|
|
)
|
|
row = cursor.fetchone()
|
|
if not row:
|
|
return []
|
|
if ttl_hours is not None and not _is_fresh(row["fetched_at"], ttl_hours):
|
|
return []
|
|
return [SimilarUnit.model_validate(item) for item in json.loads(row["payload"])]
|
|
|
|
|
|
def get_similar_units_hash(
|
|
conn: sqlite3.Connection, unit_code: str, listing_status: str
|
|
) -> str | None:
|
|
"""Return the stored content_hash for (unit_code, listing_status), or None."""
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"SELECT content_hash FROM similar_units"
|
|
" WHERE unit_code = ? AND listing_status = ?"
|
|
" ORDER BY id DESC LIMIT 1",
|
|
(unit_code, listing_status),
|
|
)
|
|
row = cursor.fetchone()
|
|
return row["content_hash"] if row else None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Analysis cache
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def get_analysis(conn: sqlite3.Connection, finnkode: str, deps_hash: str) -> dict[str, Any] | None:
|
|
"""Return cached analysis for *finnkode* if deps_hash still matches.
|
|
|
|
``deps_hash`` encodes the combined hashes of the ad, eiendom unit, and
|
|
comps that were used to produce the analysis. Any change to underlying
|
|
data produces a different deps_hash and the cache is considered stale.
|
|
"""
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"SELECT payload, deps_hash FROM analysis_cache WHERE finnkode = ?",
|
|
(finnkode,),
|
|
)
|
|
row = cursor.fetchone()
|
|
if not row:
|
|
return None
|
|
if row["deps_hash"] != deps_hash:
|
|
logger.debug(
|
|
"analysis_cache miss for %s (deps_hash changed %s→%s)",
|
|
finnkode,
|
|
row["deps_hash"][:8],
|
|
deps_hash[:8],
|
|
)
|
|
return None
|
|
logger.debug("analysis_cache hit for %s", finnkode)
|
|
return json.loads(row["payload"])
|
|
|
|
|
|
def get_latest_analysis(conn: sqlite3.Connection, finnkode: str) -> dict[str, Any] | None:
|
|
"""Return the most recent cached analysis for *finnkode*, ignoring deps_hash.
|
|
|
|
Unlike :func:`get_analysis`, this does not validate freshness -- it returns
|
|
whatever was last computed. Used by the shortlist where showing slightly
|
|
stale enrichment is preferable to recomputing on every read.
|
|
"""
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"SELECT payload FROM analysis_cache WHERE finnkode = ?",
|
|
(finnkode,),
|
|
)
|
|
row = cursor.fetchone()
|
|
if not row:
|
|
return None
|
|
return json.loads(row["payload"])
|
|
|
|
|
|
def _json_default(obj: Any) -> Any:
|
|
"""Fallback serialiser for json.dumps.
|
|
Converts datetime/date → ISO string; anything else → repr string.
|
|
Means save_analysis never raises TypeError regardless of what scoring
|
|
or model_dump() emits.
|
|
"""
|
|
if hasattr(obj, "isoformat"):
|
|
return obj.isoformat()
|
|
return repr(obj)
|
|
|
|
|
|
def save_analysis(
|
|
conn: sqlite3.Connection,
|
|
finnkode: str,
|
|
deps_hash: str,
|
|
result: dict[str, Any],
|
|
) -> None:
|
|
"""Store an analysis result keyed by (finnkode, deps_hash)."""
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"INSERT OR REPLACE INTO analysis_cache"
|
|
" (finnkode, deps_hash, payload, computed_at)"
|
|
" VALUES (?, ?, ?, ?)",
|
|
(
|
|
finnkode,
|
|
deps_hash,
|
|
json.dumps(result, default=_json_default),
|
|
datetime.now(UTC).isoformat(),
|
|
),
|
|
)
|
|
conn.commit()
|
|
logger.debug("analysis_cache saved for %s (deps_hash=%s)", finnkode, deps_hash[:8])
|
|
|
|
|
|
def invalidate_analysis(conn: sqlite3.Connection, finnkode: str) -> None:
|
|
"""Remove any cached analysis for *finnkode* (call after raw data changes)."""
|
|
conn.cursor().execute("DELETE FROM analysis_cache WHERE finnkode = ?", (finnkode,))
|
|
conn.commit()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# User feedback
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def save_feedback(
|
|
conn: sqlite3.Connection, finnkode: str, verdict: str, notes: str | None = None
|
|
) -> dict[str, Any]:
|
|
"""Store user feedback/verdict for a FINN listing."""
|
|
cursor = conn.cursor()
|
|
now = datetime.now(UTC).isoformat()
|
|
cursor.execute(
|
|
"INSERT OR REPLACE INTO user_feedback"
|
|
" (finnkode, verdict, notes, created_at, updated_at)"
|
|
" VALUES (?, ?, ?, ?, ?)",
|
|
(finnkode, verdict, notes, now, now),
|
|
)
|
|
conn.commit()
|
|
logger.debug("feedback saved for %s (verdict=%s)", finnkode, verdict)
|
|
return {"finnkode": finnkode, "verdict": verdict, "notes": notes}
|
|
|
|
|
|
def get_feedback(conn: sqlite3.Connection, finnkode: str) -> dict[str, Any] | None:
|
|
"""Retrieve stored feedback for a FINN listing."""
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"SELECT finnkode, verdict, notes, created_at, updated_at FROM user_feedback WHERE finnkode = ?",
|
|
(finnkode,),
|
|
)
|
|
row = cursor.fetchone()
|
|
if not row:
|
|
return None
|
|
return {
|
|
"finnkode": row["finnkode"],
|
|
"verdict": row["verdict"],
|
|
"notes": row["notes"],
|
|
"created_at": row["created_at"],
|
|
"updated_at": row["updated_at"],
|
|
}
|
|
|
|
|
|
def get_feedback_by_verdict(
|
|
conn: sqlite3.Connection, verdict: str, limit: int = 100
|
|
) -> list[dict[str, Any]]:
|
|
"""Retrieve all stored feedback with a given verdict."""
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"SELECT finnkode, verdict, notes, created_at, updated_at FROM user_feedback"
|
|
" WHERE verdict = ? ORDER BY updated_at DESC LIMIT ?",
|
|
(verdict, limit),
|
|
)
|
|
return [
|
|
{
|
|
"finnkode": row["finnkode"],
|
|
"verdict": row["verdict"],
|
|
"notes": row["notes"],
|
|
"created_at": row["created_at"],
|
|
"updated_at": row["updated_at"],
|
|
}
|
|
for row in cursor.fetchall()
|
|
]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Price history
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def save_price_history(
|
|
conn: sqlite3.Connection,
|
|
finnkode: str,
|
|
total_price: int | None = None,
|
|
asking_price: int | None = None,
|
|
sale_status: str | None = None,
|
|
) -> None:
|
|
"""Record a price/status snapshot for a listing."""
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"INSERT INTO price_history (finnkode, total_price, asking_price, sale_status, recorded_at)"
|
|
" VALUES (?, ?, ?, ?, ?)",
|
|
(finnkode, total_price, asking_price, sale_status, datetime.now(UTC).isoformat()),
|
|
)
|
|
conn.commit()
|
|
logger.debug(
|
|
"price_history recorded for %s (total=%s, asking=%s)", finnkode, total_price, asking_price
|
|
)
|
|
|
|
|
|
def get_price_history(
|
|
conn: sqlite3.Connection, finnkode: str, limit: int = 100
|
|
) -> list[dict[str, Any]]:
|
|
"""Retrieve price history for a listing."""
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"SELECT total_price, asking_price, sale_status, recorded_at FROM price_history"
|
|
" WHERE finnkode = ? ORDER BY recorded_at DESC LIMIT ?",
|
|
(finnkode, limit),
|
|
)
|
|
return [
|
|
{
|
|
"total_price": row["total_price"],
|
|
"asking_price": row["asking_price"],
|
|
"sale_status": row["sale_status"],
|
|
"recorded_at": row["recorded_at"],
|
|
}
|
|
for row in cursor.fetchall()
|
|
]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Search runs
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def save_search_run(conn: sqlite3.Connection, search_url: str, finnkodes: list[str]) -> None:
|
|
"""Record a search run with the finnkodes found."""
|
|
cursor = conn.cursor()
|
|
finnkodes_json = json.dumps(finnkodes)
|
|
cursor.execute(
|
|
"INSERT INTO search_runs (search_url, finnkodes, created_at) VALUES (?, ?, ?)",
|
|
(search_url, finnkodes_json, datetime.now(UTC).isoformat()),
|
|
)
|
|
conn.commit()
|
|
logger.debug("search_run recorded for %s (%d finnkodes)", search_url, len(finnkodes))
|
|
|
|
|
|
def get_latest_search_run(conn: sqlite3.Connection, search_url: str) -> dict[str, Any] | None:
|
|
"""Retrieve the most recent search run for a URL."""
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"SELECT search_url, finnkodes, created_at FROM search_runs"
|
|
" WHERE search_url = ? ORDER BY created_at DESC LIMIT 1",
|
|
(search_url,),
|
|
)
|
|
row = cursor.fetchone()
|
|
if not row:
|
|
return None
|
|
return {
|
|
"search_url": row["search_url"],
|
|
"finnkodes": json.loads(row["finnkodes"]),
|
|
"created_at": row["created_at"],
|
|
}
|
|
|
|
|
|
def delete_feedback(conn: sqlite3.Connection, finnkode: str) -> dict[str, Any]:
|
|
"""Delete stored feedback for a FINN listing."""
|
|
cursor = conn.cursor()
|
|
cursor.execute("DELETE FROM user_feedback WHERE finnkode = ?", (finnkode,))
|
|
conn.commit()
|
|
logger.debug("feedback deleted for %s", finnkode)
|
|
return {"finnkode": finnkode, "deleted": True}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Internal helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _is_fresh(fetched_at: str, ttl_hours: int | None) -> bool:
|
|
if ttl_hours is None:
|
|
return True
|
|
return datetime.fromisoformat(fetched_at) >= datetime.now(UTC) - timedelta(hours=ttl_hours)
|