feat(refactor): Document refactoring progress and phases in markdown

feat(scripts): Add backfill script for content_hash in cache tables

feat(scripts): Create recompute script for analysis_cache population

test(tests): Implement comprehensive tests for analysis module functions

fix(tests): Update CLI tests to assert errors on stderr instead of stdout

fix(tests): Adjust MCP integration tests to pass context parameter correctly

fix(tests): Modify service tests to return hash on save functions for consistency
This commit is contained in:
Ole
2026-05-29 15:16:57 +00:00
parent 5b772b2ae5
commit 55d93894ac
18 changed files with 1457 additions and 60 deletions
+52 -4
View File
@@ -32,12 +32,14 @@ from .cache import (
save_analysis,
save_eiendom_unit,
save_finn_ad,
save_search_run,
save_similar_units,
)
from .config import (
EIENDOM_NO_CACHE_TTL_HOURS,
EIENDOM_NO_CACHE_TTL_SIMILAR_UNITS_DAYS,
EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS,
FINN_CACHE_PATH,
FINN_CACHE_TTL_AD_HOURS,
FINN_CACHE_TTL_AD_STRUCTURAL_DAYS,
FINN_DETAIL_LIMIT,
FINN_MAX_SEARCH_PAGES,
)
@@ -147,6 +149,12 @@ async def analyze_ad(
"""
conn = cache.init_db(FINN_CACHE_PATH)
# ------------------------------------------------------------------
# 0. Backfill eiendom_unit_code if provided.
# ------------------------------------------------------------------
if unit_code and not finn_ad.eiendom_unit_code:
finn_ad.eiendom_unit_code = unit_code
# ------------------------------------------------------------------
# 1. Ensure the ad is in the DB so we have a stable hash to key on.
# ------------------------------------------------------------------
@@ -173,8 +181,10 @@ async def analyze_ad(
comps_hash_changed = False
if enriched:
# Convert similar units TTL from days to hours
ttl_hours = EIENDOM_NO_CACHE_TTL_SIMILAR_UNITS_DAYS * 24
similar_units = cache.get_similar_units(
conn, enriched.unit_code, "RECENTLY_SOLD", ttl_hours=EIENDOM_NO_CACHE_TTL_HOURS
conn, enriched.unit_code, "RECENTLY_SOLD", ttl_hours=ttl_hours
)
if not similar_units:
vector = enriched.unit_vector or eiendom_no.build_unit_vector(enriched)
@@ -210,11 +220,38 @@ async def analyze_ad(
categories = scoring.classify_ad(scores)
summary = _build_ad_summary(finn_ad, enriched, similar_units, scores, categories)
# Get price history and cache age metadata
from .cache import get_price_history, get_finn_ad_hash
from datetime import datetime, UTC, timedelta
price_history = get_price_history(conn, finn_ad.finnkode, limit=20)
# Compute cache age: how long since we last fetched this ad
cursor = conn.cursor()
cursor.execute(
"SELECT fetched_at, last_verified_at FROM finn_ads WHERE finnkode = ?",
(finn_ad.finnkode,),
)
db_row = cursor.fetchone()
cache_age = None
if db_row:
fetched_at = datetime.fromisoformat(db_row["fetched_at"])
last_verified = db_row["last_verified_at"]
if last_verified:
last_verified_at = datetime.fromisoformat(last_verified)
structural_age_days = (datetime.now(UTC) - fetched_at).days
price_age_hours = (datetime.now(UTC) - last_verified_at).total_seconds() / 3600
cache_age = {
"structural_days": structural_age_days,
"price_hours": round(price_age_hours, 1),
}
result = {
"finnkode": finn_ad.finnkode,
"url": finn_ad.url,
"title": finn_ad.title,
"address": finn_ad.address,
"listing_description": finn_ad.listing_description,
"district": finn_ad.district,
"property_type": finn_ad.property_type,
"ownership_type": finn_ad.ownership_type,
@@ -236,6 +273,8 @@ async def analyze_ad(
"score": scores,
"categories": categories,
"summary": summary,
"price_history": price_history,
"cache_age": cache_age,
"eiendom_unit": enriched.model_dump(mode="json") if enriched else None,
"similar_units": [unit.model_dump(mode="json") for unit in similar_units],
}
@@ -262,7 +301,7 @@ async def _fetch_card_to_db(
treats None as a skip without aborting the whole batch.
"""
try:
finn_ad = cache.get_finn_ad(conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_HOURS)
finn_ad = cache.get_finn_ad(conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_STRUCTURAL_DAYS * 24)
if finn_ad is None:
finn_ad = await ad_module.fetch_ad_details(card.finnkode, client=client)
save_finn_ad(conn, finn_ad)
@@ -275,6 +314,11 @@ async def _fetch_card_to_db(
try:
matched_unit = await eiendom_no.search_unit_from_finn_url(card.url)
unit_code = matched_unit.unit_code if matched_unit else None
# Backfill unit_code into the ad object and persist.
# This ensures the cached ad has the eiendom_unit_code field populated.
if unit_code and not finn_ad.eiendom_unit_code:
finn_ad.eiendom_unit_code = unit_code
_, _ = save_finn_ad(conn, finn_ad)
except Exception as exc:
logger.warning("Eiendom.no unit search failed for %s: %s", card.finnkode, exc)
@@ -384,6 +428,10 @@ async def analyze_search(
f"{skipped_count} skipped."
)
# Record this search run in the database
finnkodes = [card.finnkode for card in cards]
save_search_run(conn, search_url, finnkodes)
return {
"search_url": search_url,
"search_cards": [card.model_dump(mode="json") for card in cards],