Refactor and enhance various components of the FINN real estate analysis tool

- Updated docker-compose files to use local data volumes for development.
- Refactored analysis.py to improve code readability and performance, including changes to cache age calculations and hash computations.
- Enhanced cache.py to ensure the database directory is created if it doesn't exist and improved SQL query formatting.
- Modified cli.py to improve logging and statistics reporting for finn_ads.
- Updated config.py to streamline environment variable handling.
- Initialized the database eagerly in http_server.py to prevent runtime errors.
- Refactored mcp_server.py to slim down data structures and improve response formatting for API calls.
- Enhanced service.py to improve feedback handling and shortlist retrieval, ensuring enriched data is returned.
- Updated recompute_analysis_cache.py for better SQL query formatting.
This commit is contained in:
Ole
2026-05-29 15:17:11 +00:00
parent 55d93894ac
commit eb95b98111
10 changed files with 295 additions and 343 deletions
+50 -12
View File
@@ -24,7 +24,9 @@ from .ad import fetch_ad_details
from .analysis import analyze_search as run_analysis_search
from .cache import (
get_eiendom_unit as get_cached_eiendom_unit,
get_feedback_by_verdict,
get_finn_ad,
get_latest_analysis,
get_similar_units as get_cached_similar_units,
init_db,
invalidate_analysis,
@@ -137,11 +139,7 @@ async def get_or_fetch_eiendom_unit(
conn = init_db(FINN_CACHE_PATH)
# Convert structural TTL from days to hours
ttl_hours = EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS * 24
unit = (
None
if force_refresh
else get_cached_eiendom_unit(conn, unit_code, ttl_hours=ttl_hours)
)
unit = None if force_refresh else get_cached_eiendom_unit(conn, unit_code, ttl_hours=ttl_hours)
if unit is not None:
return unit
@@ -182,9 +180,7 @@ async def get_or_fetch_similar_units(
conn, unit_code, listing_status, ttl_hours=ttl_hours
)
if cached_similar:
logger.debug(
"Using cached similar units for %s (status=%s)", unit_code, listing_status
)
logger.debug("Using cached similar units for %s (status=%s)", unit_code, listing_status)
return cached_similar
# Cache miss or force_refresh: fetch from remote.
@@ -377,10 +373,52 @@ def save_feedback(finnkode: str, verdict: str, notes: str | None = None) -> dict
return save_feedback_impl(finnkode, verdict, notes)
def get_shortlist(run_id: int | None = None, limit: int = 10) -> dict[str, Any]:
"""Fetch stored shortlist from a search run."""
# TODO: implement via search_runs table in cache.py
return {"shortlist": [], "run_id": run_id, "limit": limit}
def get_shortlist(
verdict: str = "liked", limit: int = 10
) -> dict[str, Any]:
"""Fetch the shortlist of listings the user has given *verdict*.
Reads from the ``user_feedback`` table and enriches each finnkode with
its most recent cached analysis (score, price, categories) when available.
Entries with no cached analysis still appear, carrying the stored verdict
and notes so nothing the user flagged is silently dropped.
"""
conn = init_db(FINN_CACHE_PATH)
feedback_rows = get_feedback_by_verdict(conn, verdict, limit=limit)
shortlist: list[dict[str, Any]] = []
for fb in feedback_rows:
finnkode = fb["finnkode"]
entry: dict[str, Any] = {
"finnkode": finnkode,
"verdict": fb["verdict"],
"notes": fb["notes"],
"url": f"https://www.finn.no/realestate/homes/ad.html?finnkode={finnkode}",
}
analysis = get_latest_analysis(conn, finnkode)
if analysis:
score = analysis.get("score") or {}
eiendom = analysis.get("eiendom_unit") or {}
entry.update(
{
"title": analysis.get("title"),
"address": analysis.get("address"),
"area_m2": analysis.get("area_m2"),
"total_price": analysis.get("total_price"),
"asking_price": analysis.get("asking_price"),
"score": score.get("total"),
"categories": analysis.get("categories", []),
"market_placement": eiendom.get("market_placement_score"),
}
)
shortlist.append(entry)
# Highest score first; un-enriched entries (score None) sink to the bottom.
shortlist.sort(key=lambda e: (e.get("score") is not None, e.get("score") or 0), reverse=True)
return {"shortlist": shortlist, "verdict": verdict, "limit": limit}
def get_new_ads_since_last_run(search_url: str) -> dict[str, Any]: