#!/usr/bin/env python """Re-compute and populate analysis_cache for all existing ads. This script runs analyze_ad for all ads in the database, populating the analysis_cache table. Call this after backfilling content_hash. Run this once after pulling the refactored code to fix the broken cache. """ import asyncio import json import logging from pathlib import Path logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) async def main() -> None: """Recompute analysis for all ads.""" import sqlite3 from finn_eiendom.analysis import analyze_ad from finn_eiendom.cache import init_db from finn_eiendom.config import FINN_CACHE_PATH from finn_eiendom.models import FinnAd conn = init_db(FINN_CACHE_PATH) cursor = conn.cursor() # Get all ads from the database cursor.execute("SELECT finnkode, payload FROM finn_ads ORDER BY finnkode") rows = cursor.fetchall() total = len(rows) logger.info(f"Recomputing analysis for {total} ads...") processed = 0 skipped = 0 errors = 0 unit_codes_backfilled = 0 for finnkode, payload_str in rows: try: payload = json.loads(payload_str) finn_ad = FinnAd.model_validate(payload) # Extract unit_code from payload (may be None) unit_code = finn_ad.eiendom_unit_code # Analyze the ad (this will save to analysis_cache if not already there) # and will backfill unit_code if not already present result = await analyze_ad(finn_ad, unit_code=unit_code) # Check if unit_code was backfilled if not finn_ad.eiendom_unit_code and unit_code: unit_codes_backfilled += 1 processed += 1 if processed % 10 == 0: logger.info(f" Processed {processed}/{total}...") except Exception as exc: logger.warning(f"Failed to analyze {finnkode}: {exc}") errors += 1 logger.info( f"\nDone. Processed {processed}, skipped {skipped}, errors {errors}, " f"unit_codes backfilled {unit_codes_backfilled}" ) # Verify cursor.execute("SELECT COUNT(*) FROM analysis_cache") cache_count = cursor.fetchone()[0] logger.info(f"analysis_cache now has {cache_count} rows") cursor.execute( 'SELECT COUNT(*) FROM finn_ads ' 'WHERE json_extract(payload, "$.eiendom_unit_code") IS NOT NULL ' 'AND json_extract(payload, "$.eiendom_unit_code") != "null"' ) unit_code_count = cursor.fetchone()[0] logger.info(f"finn_ads with eiendom_unit_code: {unit_code_count}/{total}") conn.close() if __name__ == "__main__": asyncio.run(main())