Files
finn-mcp/scripts/recompute_analysis_cache.py
T
ole eb95b98111 Refactor and enhance various components of the FINN real estate analysis tool
- Updated docker-compose files to use local data volumes for development.
- Refactored analysis.py to improve code readability and performance, including changes to cache age calculations and hash computations.
- Enhanced cache.py to ensure the database directory is created if it doesn't exist and improved SQL query formatting.
- Modified cli.py to improve logging and statistics reporting for finn_ads.
- Updated config.py to streamline environment variable handling.
- Initialized the database eagerly in http_server.py to prevent runtime errors.
- Refactored mcp_server.py to slim down data structures and improve response formatting for API calls.
- Enhanced service.py to improve feedback handling and shortlist retrieval, ensuring enriched data is returned.
- Updated recompute_analysis_cache.py for better SQL query formatting.
2026-05-29 15:17:11 +00:00

90 lines
2.7 KiB
Python

#!/usr/bin/env python
"""Re-compute and populate analysis_cache for all existing ads.
This script runs analyze_ad for all ads in the database, populating
the analysis_cache table. Call this after backfilling content_hash.
Run this once after pulling the refactored code to fix the broken cache.
"""
import asyncio
import json
import logging
from pathlib import Path
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
async def main() -> None:
"""Recompute analysis for all ads."""
import sqlite3
from finn_eiendom.analysis import analyze_ad
from finn_eiendom.cache import init_db
from finn_eiendom.config import FINN_CACHE_PATH
from finn_eiendom.models import FinnAd
conn = init_db(FINN_CACHE_PATH)
cursor = conn.cursor()
# Get all ads from the database
cursor.execute("SELECT finnkode, payload FROM finn_ads ORDER BY finnkode")
rows = cursor.fetchall()
total = len(rows)
logger.info(f"Recomputing analysis for {total} ads...")
processed = 0
skipped = 0
errors = 0
unit_codes_backfilled = 0
for finnkode, payload_str in rows:
try:
payload = json.loads(payload_str)
finn_ad = FinnAd.model_validate(payload)
# Extract unit_code from payload (may be None)
unit_code = finn_ad.eiendom_unit_code
# Analyze the ad (this will save to analysis_cache if not already there)
# and will backfill unit_code if not already present
result = await analyze_ad(finn_ad, unit_code=unit_code)
# Check if unit_code was backfilled
if not finn_ad.eiendom_unit_code and unit_code:
unit_codes_backfilled += 1
processed += 1
if processed % 10 == 0:
logger.info(f" Processed {processed}/{total}...")
except Exception as exc:
logger.warning(f"Failed to analyze {finnkode}: {exc}")
errors += 1
logger.info(
f"\nDone. Processed {processed}, skipped {skipped}, errors {errors}, "
f"unit_codes backfilled {unit_codes_backfilled}"
)
# Verify
cursor.execute("SELECT COUNT(*) FROM analysis_cache")
cache_count = cursor.fetchone()[0]
logger.info(f"analysis_cache now has {cache_count} rows")
cursor.execute(
"SELECT COUNT(*) FROM finn_ads "
'WHERE json_extract(payload, "$.eiendom_unit_code") IS NOT NULL '
'AND json_extract(payload, "$.eiendom_unit_code") != "null"'
)
unit_code_count = cursor.fetchone()[0]
logger.info(f"finn_ads with eiendom_unit_code: {unit_code_count}/{total}")
conn.close()
if __name__ == "__main__":
asyncio.run(main())