55d93894ac
feat(scripts): Add backfill script for content_hash in cache tables feat(scripts): Create recompute script for analysis_cache population test(tests): Implement comprehensive tests for analysis module functions fix(tests): Update CLI tests to assert errors on stderr instead of stdout fix(tests): Adjust MCP integration tests to pass context parameter correctly fix(tests): Modify service tests to return hash on save functions for consistency
90 lines
2.7 KiB
Python
90 lines
2.7 KiB
Python
#!/usr/bin/env python
|
|
"""Re-compute and populate analysis_cache for all existing ads.
|
|
|
|
This script runs analyze_ad for all ads in the database, populating
|
|
the analysis_cache table. Call this after backfilling content_hash.
|
|
|
|
Run this once after pulling the refactored code to fix the broken cache.
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
async def main() -> None:
|
|
"""Recompute analysis for all ads."""
|
|
import sqlite3
|
|
|
|
from finn_eiendom.analysis import analyze_ad
|
|
from finn_eiendom.cache import init_db
|
|
from finn_eiendom.config import FINN_CACHE_PATH
|
|
from finn_eiendom.models import FinnAd
|
|
|
|
conn = init_db(FINN_CACHE_PATH)
|
|
cursor = conn.cursor()
|
|
|
|
# Get all ads from the database
|
|
cursor.execute("SELECT finnkode, payload FROM finn_ads ORDER BY finnkode")
|
|
rows = cursor.fetchall()
|
|
total = len(rows)
|
|
|
|
logger.info(f"Recomputing analysis for {total} ads...")
|
|
|
|
processed = 0
|
|
skipped = 0
|
|
errors = 0
|
|
unit_codes_backfilled = 0
|
|
|
|
for finnkode, payload_str in rows:
|
|
try:
|
|
payload = json.loads(payload_str)
|
|
finn_ad = FinnAd.model_validate(payload)
|
|
|
|
# Extract unit_code from payload (may be None)
|
|
unit_code = finn_ad.eiendom_unit_code
|
|
|
|
# Analyze the ad (this will save to analysis_cache if not already there)
|
|
# and will backfill unit_code if not already present
|
|
result = await analyze_ad(finn_ad, unit_code=unit_code)
|
|
|
|
# Check if unit_code was backfilled
|
|
if not finn_ad.eiendom_unit_code and unit_code:
|
|
unit_codes_backfilled += 1
|
|
|
|
processed += 1
|
|
if processed % 10 == 0:
|
|
logger.info(f" Processed {processed}/{total}...")
|
|
|
|
except Exception as exc:
|
|
logger.warning(f"Failed to analyze {finnkode}: {exc}")
|
|
errors += 1
|
|
|
|
logger.info(
|
|
f"\nDone. Processed {processed}, skipped {skipped}, errors {errors}, "
|
|
f"unit_codes backfilled {unit_codes_backfilled}"
|
|
)
|
|
|
|
# Verify
|
|
cursor.execute("SELECT COUNT(*) FROM analysis_cache")
|
|
cache_count = cursor.fetchone()[0]
|
|
logger.info(f"analysis_cache now has {cache_count} rows")
|
|
|
|
cursor.execute(
|
|
'SELECT COUNT(*) FROM finn_ads '
|
|
'WHERE json_extract(payload, "$.eiendom_unit_code") IS NOT NULL '
|
|
'AND json_extract(payload, "$.eiendom_unit_code") != "null"'
|
|
)
|
|
unit_code_count = cursor.fetchone()[0]
|
|
logger.info(f"finn_ads with eiendom_unit_code: {unit_code_count}/{total}")
|
|
|
|
conn.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|