feat(refactor): Document refactoring progress and phases in markdown
feat(scripts): Add backfill script for content_hash in cache tables feat(scripts): Create recompute script for analysis_cache population test(tests): Implement comprehensive tests for analysis module functions fix(tests): Update CLI tests to assert errors on stderr instead of stdout fix(tests): Adjust MCP integration tests to pass context parameter correctly fix(tests): Modify service tests to return hash on save functions for consistency
This commit is contained in:
@@ -32,12 +32,14 @@ from .cache import (
|
||||
save_analysis,
|
||||
save_eiendom_unit,
|
||||
save_finn_ad,
|
||||
save_search_run,
|
||||
save_similar_units,
|
||||
)
|
||||
from .config import (
|
||||
EIENDOM_NO_CACHE_TTL_HOURS,
|
||||
EIENDOM_NO_CACHE_TTL_SIMILAR_UNITS_DAYS,
|
||||
EIENDOM_NO_CACHE_TTL_STRUCTURAL_DAYS,
|
||||
FINN_CACHE_PATH,
|
||||
FINN_CACHE_TTL_AD_HOURS,
|
||||
FINN_CACHE_TTL_AD_STRUCTURAL_DAYS,
|
||||
FINN_DETAIL_LIMIT,
|
||||
FINN_MAX_SEARCH_PAGES,
|
||||
)
|
||||
@@ -147,6 +149,12 @@ async def analyze_ad(
|
||||
"""
|
||||
conn = cache.init_db(FINN_CACHE_PATH)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 0. Backfill eiendom_unit_code if provided.
|
||||
# ------------------------------------------------------------------
|
||||
if unit_code and not finn_ad.eiendom_unit_code:
|
||||
finn_ad.eiendom_unit_code = unit_code
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 1. Ensure the ad is in the DB so we have a stable hash to key on.
|
||||
# ------------------------------------------------------------------
|
||||
@@ -173,8 +181,10 @@ async def analyze_ad(
|
||||
comps_hash_changed = False
|
||||
|
||||
if enriched:
|
||||
# Convert similar units TTL from days to hours
|
||||
ttl_hours = EIENDOM_NO_CACHE_TTL_SIMILAR_UNITS_DAYS * 24
|
||||
similar_units = cache.get_similar_units(
|
||||
conn, enriched.unit_code, "RECENTLY_SOLD", ttl_hours=EIENDOM_NO_CACHE_TTL_HOURS
|
||||
conn, enriched.unit_code, "RECENTLY_SOLD", ttl_hours=ttl_hours
|
||||
)
|
||||
if not similar_units:
|
||||
vector = enriched.unit_vector or eiendom_no.build_unit_vector(enriched)
|
||||
@@ -210,11 +220,38 @@ async def analyze_ad(
|
||||
categories = scoring.classify_ad(scores)
|
||||
summary = _build_ad_summary(finn_ad, enriched, similar_units, scores, categories)
|
||||
|
||||
# Get price history and cache age metadata
|
||||
from .cache import get_price_history, get_finn_ad_hash
|
||||
from datetime import datetime, UTC, timedelta
|
||||
|
||||
price_history = get_price_history(conn, finn_ad.finnkode, limit=20)
|
||||
|
||||
# Compute cache age: how long since we last fetched this ad
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"SELECT fetched_at, last_verified_at FROM finn_ads WHERE finnkode = ?",
|
||||
(finn_ad.finnkode,),
|
||||
)
|
||||
db_row = cursor.fetchone()
|
||||
cache_age = None
|
||||
if db_row:
|
||||
fetched_at = datetime.fromisoformat(db_row["fetched_at"])
|
||||
last_verified = db_row["last_verified_at"]
|
||||
if last_verified:
|
||||
last_verified_at = datetime.fromisoformat(last_verified)
|
||||
structural_age_days = (datetime.now(UTC) - fetched_at).days
|
||||
price_age_hours = (datetime.now(UTC) - last_verified_at).total_seconds() / 3600
|
||||
cache_age = {
|
||||
"structural_days": structural_age_days,
|
||||
"price_hours": round(price_age_hours, 1),
|
||||
}
|
||||
|
||||
result = {
|
||||
"finnkode": finn_ad.finnkode,
|
||||
"url": finn_ad.url,
|
||||
"title": finn_ad.title,
|
||||
"address": finn_ad.address,
|
||||
"listing_description": finn_ad.listing_description,
|
||||
"district": finn_ad.district,
|
||||
"property_type": finn_ad.property_type,
|
||||
"ownership_type": finn_ad.ownership_type,
|
||||
@@ -236,6 +273,8 @@ async def analyze_ad(
|
||||
"score": scores,
|
||||
"categories": categories,
|
||||
"summary": summary,
|
||||
"price_history": price_history,
|
||||
"cache_age": cache_age,
|
||||
"eiendom_unit": enriched.model_dump(mode="json") if enriched else None,
|
||||
"similar_units": [unit.model_dump(mode="json") for unit in similar_units],
|
||||
}
|
||||
@@ -262,7 +301,7 @@ async def _fetch_card_to_db(
|
||||
treats None as a skip without aborting the whole batch.
|
||||
"""
|
||||
try:
|
||||
finn_ad = cache.get_finn_ad(conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_HOURS)
|
||||
finn_ad = cache.get_finn_ad(conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_STRUCTURAL_DAYS * 24)
|
||||
if finn_ad is None:
|
||||
finn_ad = await ad_module.fetch_ad_details(card.finnkode, client=client)
|
||||
save_finn_ad(conn, finn_ad)
|
||||
@@ -275,6 +314,11 @@ async def _fetch_card_to_db(
|
||||
try:
|
||||
matched_unit = await eiendom_no.search_unit_from_finn_url(card.url)
|
||||
unit_code = matched_unit.unit_code if matched_unit else None
|
||||
# Backfill unit_code into the ad object and persist.
|
||||
# This ensures the cached ad has the eiendom_unit_code field populated.
|
||||
if unit_code and not finn_ad.eiendom_unit_code:
|
||||
finn_ad.eiendom_unit_code = unit_code
|
||||
_, _ = save_finn_ad(conn, finn_ad)
|
||||
except Exception as exc:
|
||||
logger.warning("Eiendom.no unit search failed for %s: %s", card.finnkode, exc)
|
||||
|
||||
@@ -384,6 +428,10 @@ async def analyze_search(
|
||||
f"{skipped_count} skipped."
|
||||
)
|
||||
|
||||
# Record this search run in the database
|
||||
finnkodes = [card.finnkode for card in cards]
|
||||
save_search_run(conn, search_url, finnkodes)
|
||||
|
||||
return {
|
||||
"search_url": search_url,
|
||||
"search_cards": [card.model_dump(mode="json") for card in cards],
|
||||
|
||||
Reference in New Issue
Block a user