244 lines
6.9 KiB
Python
244 lines
6.9 KiB
Python
"""SQLite cache and persistence for FINN and Eiendom.no data."""
|
|
|
|
import json
|
|
import logging
|
|
import sqlite3
|
|
from datetime import UTC, datetime, timedelta
|
|
from typing import Any
|
|
|
|
from .config import FINN_CACHE_PATH
|
|
from .models import EiendomUnit, FinnAd, FinnSearchCard, SimilarUnit
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def get_connection(path: str | None = None) -> sqlite3.Connection:
|
|
db_path = path or FINN_CACHE_PATH
|
|
conn = sqlite3.connect(str(db_path), detect_types=sqlite3.PARSE_DECLTYPES)
|
|
conn.row_factory = sqlite3.Row
|
|
return conn
|
|
|
|
|
|
def init_db(path: str | None = None) -> sqlite3.Connection:
|
|
conn = get_connection(path)
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS finn_ads (
|
|
finnkode TEXT PRIMARY KEY,
|
|
url TEXT,
|
|
payload TEXT NOT NULL,
|
|
fetched_at TEXT NOT NULL
|
|
)
|
|
"""
|
|
)
|
|
cursor.execute(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS eiendom_units (
|
|
unit_code TEXT PRIMARY KEY,
|
|
payload TEXT NOT NULL,
|
|
fetched_at TEXT NOT NULL
|
|
)
|
|
"""
|
|
)
|
|
cursor.execute(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS similar_units (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
unit_code TEXT NOT NULL,
|
|
listing_status TEXT NOT NULL,
|
|
payload TEXT NOT NULL,
|
|
fetched_at TEXT NOT NULL
|
|
)
|
|
"""
|
|
)
|
|
cursor.execute(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS cache_meta (
|
|
key TEXT PRIMARY KEY,
|
|
value TEXT NOT NULL,
|
|
expires_at TEXT
|
|
)
|
|
"""
|
|
)
|
|
conn.commit()
|
|
return conn
|
|
|
|
|
|
def cache_get(conn: sqlite3.Connection, key: str) -> dict[str, Any] | None:
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT value, expires_at FROM cache_meta WHERE key = ?", (key,))
|
|
row = cursor.fetchone()
|
|
if not row:
|
|
return None
|
|
|
|
expires_at = row["expires_at"]
|
|
if expires_at and datetime.fromisoformat(expires_at) < datetime.now(UTC):
|
|
cursor.execute("DELETE FROM cache_meta WHERE key = ?", (key,))
|
|
conn.commit()
|
|
return None
|
|
|
|
return json.loads(row["value"])
|
|
|
|
|
|
def cache_set(
|
|
conn: sqlite3.Connection,
|
|
key: str,
|
|
payload: dict[str, Any],
|
|
ttl_hours: int | None = None,
|
|
ttl_minutes: int | None = None,
|
|
) -> None:
|
|
expires_at = None
|
|
if ttl_minutes is not None:
|
|
expires_at = (datetime.now(UTC) + timedelta(minutes=ttl_minutes)).isoformat()
|
|
elif ttl_hours is not None:
|
|
expires_at = (datetime.now(UTC) + timedelta(hours=ttl_hours)).isoformat()
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"INSERT OR REPLACE INTO cache_meta (key, value, expires_at) VALUES (?, ?, ?)",
|
|
(key, json.dumps(payload), expires_at),
|
|
)
|
|
conn.commit()
|
|
|
|
|
|
def _is_fresh(fetched_at: str, ttl_hours: int | None) -> bool:
|
|
if ttl_hours is None:
|
|
return True
|
|
return datetime.fromisoformat(fetched_at) >= datetime.now(UTC) - timedelta(hours=ttl_hours)
|
|
|
|
|
|
def save_search_page(
|
|
conn: sqlite3.Connection,
|
|
url: str,
|
|
html: str,
|
|
ttl_minutes: int = 60,
|
|
) -> None:
|
|
cache_set(conn, f"search_page:{url}", {"html": html}, ttl_minutes=ttl_minutes)
|
|
|
|
|
|
def get_search_page(conn: sqlite3.Connection, url: str) -> str | None:
|
|
payload = cache_get(conn, f"search_page:{url}")
|
|
if not payload:
|
|
return None
|
|
return payload.get("html")
|
|
|
|
|
|
def save_search_cards(
|
|
conn: sqlite3.Connection,
|
|
url: str,
|
|
cards: list[FinnSearchCard],
|
|
ttl_minutes: int = 60,
|
|
) -> None:
|
|
cache_set(
|
|
conn,
|
|
f"search_cards:{url}",
|
|
[card.model_dump(mode="json") for card in cards],
|
|
ttl_minutes=ttl_minutes,
|
|
)
|
|
|
|
|
|
def get_search_cards(conn: sqlite3.Connection, url: str) -> list[FinnSearchCard]:
|
|
payload = cache_get(conn, f"search_cards:{url}")
|
|
if not payload:
|
|
return []
|
|
return [FinnSearchCard.model_validate(item) for item in payload]
|
|
|
|
|
|
def save_finn_ad(conn: sqlite3.Connection, ad: FinnAd) -> None:
|
|
cursor = conn.cursor()
|
|
payload = ad.model_dump(mode="json")
|
|
cursor.execute(
|
|
"INSERT OR REPLACE INTO finn_ads (finnkode, url, payload, fetched_at) VALUES (?, ?, ?, ?)",
|
|
(
|
|
ad.finnkode,
|
|
ad.url,
|
|
json.dumps(payload),
|
|
ad.detail_fetched_at.isoformat()
|
|
if ad.detail_fetched_at
|
|
else datetime.now(UTC).isoformat(),
|
|
),
|
|
)
|
|
conn.commit()
|
|
|
|
|
|
def get_finn_ad(
|
|
conn: sqlite3.Connection, finnkode: str, ttl_hours: int | None = None
|
|
) -> FinnAd | None:
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT payload, fetched_at FROM finn_ads WHERE finnkode = ?", (finnkode,))
|
|
row = cursor.fetchone()
|
|
if not row:
|
|
return None
|
|
if ttl_hours is not None and not _is_fresh(row["fetched_at"], ttl_hours):
|
|
return None
|
|
return FinnAd.model_validate(json.loads(row["payload"]))
|
|
|
|
|
|
def save_eiendom_unit(conn: sqlite3.Connection, unit: EiendomUnit) -> None:
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"INSERT OR REPLACE INTO eiendom_units (unit_code, payload, fetched_at) VALUES (?, ?, ?)",
|
|
(unit.unit_code, json.dumps(unit.model_dump(mode="json")), unit.fetched_at.isoformat()),
|
|
)
|
|
conn.commit()
|
|
|
|
|
|
def get_eiendom_unit(
|
|
conn: sqlite3.Connection,
|
|
unit_code: str,
|
|
ttl_hours: int | None = None,
|
|
) -> EiendomUnit | None:
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"SELECT payload, fetched_at FROM eiendom_units WHERE unit_code = ?",
|
|
(unit_code,),
|
|
)
|
|
row = cursor.fetchone()
|
|
if not row:
|
|
return None
|
|
if ttl_hours is not None and not _is_fresh(row["fetched_at"], ttl_hours):
|
|
return None
|
|
return EiendomUnit.model_validate(json.loads(row["payload"]))
|
|
|
|
|
|
def save_similar_units(
|
|
conn: sqlite3.Connection,
|
|
unit_code: str,
|
|
listing_status: str,
|
|
similar_units: list[SimilarUnit],
|
|
) -> None:
|
|
cursor = conn.cursor()
|
|
payload = json.dumps([item.model_dump(mode="json") for item in similar_units])
|
|
cursor.execute(
|
|
(
|
|
"INSERT INTO similar_units"
|
|
" (unit_code, listing_status, payload, fetched_at)"
|
|
" VALUES (?, ?, ?, ?)"
|
|
),
|
|
(unit_code, listing_status, payload, datetime.now(UTC).isoformat()),
|
|
)
|
|
conn.commit()
|
|
|
|
|
|
def get_similar_units(
|
|
conn: sqlite3.Connection,
|
|
unit_code: str,
|
|
listing_status: str,
|
|
ttl_hours: int | None = None,
|
|
) -> list[SimilarUnit]:
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
(
|
|
"SELECT payload, fetched_at FROM similar_units"
|
|
" WHERE unit_code = ? AND listing_status = ?"
|
|
" ORDER BY id DESC LIMIT 1"
|
|
),
|
|
(unit_code, listing_status),
|
|
)
|
|
row = cursor.fetchone()
|
|
if not row:
|
|
return []
|
|
if ttl_hours is not None and not _is_fresh(row["fetched_at"], ttl_hours):
|
|
return []
|
|
return [SimilarUnit.model_validate(item) for item in json.loads(row["payload"])]
|