feat(refactor): Document refactoring progress and phases in markdown

feat(scripts): Add backfill script for content_hash in cache tables

feat(scripts): Create recompute script for analysis_cache population

test(tests): Implement comprehensive tests for analysis module functions

fix(tests): Update CLI tests to assert errors on stderr instead of stdout

fix(tests): Adjust MCP integration tests to pass context parameter correctly

fix(tests): Modify service tests to return hash on save functions for consistency
This commit is contained in:
Ole
2026-05-29 15:16:57 +00:00
parent 5b772b2ae5
commit 55d93894ac
18 changed files with 1457 additions and 60 deletions
+63 -2
View File
@@ -320,8 +320,69 @@ def diff(
def stats() -> None:
"""Show cache statistics."""
try:
# TODO: implement cache stats via cache.py
typer.echo("Cache stats (not yet implemented)")
import json
import sqlite3
from .config import FINN_CACHE_PATH
conn = sqlite3.connect(str(FINN_CACHE_PATH))
cursor = conn.cursor()
# Get row counts and hash statistics for each table
tables = ["finn_ads", "eiendom_units", "similar_units", "analysis_cache", "cache_meta"]
stats = {}
for table in tables:
cursor.execute(f"SELECT COUNT(*) FROM {table}")
total = cursor.fetchone()[0]
if total == 0:
stats[table] = {"total_rows": 0}
continue
# For tables with content_hash or deps_hash
if table == "analysis_cache":
cursor.execute(f"SELECT COUNT(*) FROM {table} WHERE deps_hash IS NOT NULL")
with_hash = cursor.fetchone()[0]
elif table != "cache_meta" or True: # All have content_hash or value
cursor.execute(f"SELECT COUNT(*) FROM {table} WHERE content_hash IS NOT NULL")
with_hash = cursor.fetchone()[0]
stats[table] = {
"total_rows": total,
"rows_with_hash": with_hash,
"pct_with_hash": round(100 * with_hash / total, 1) if total > 0 else 0,
}
# Special checks for finn_ads
cursor.execute(
'SELECT COUNT(*) FROM finn_ads '
'WHERE json_extract(payload, "$.eiendom_unit_code") IS NOT NULL '
'AND json_extract(payload, "$.eiendom_unit_code") != "null"'
)
ads_with_unit_code = cursor.fetchone()[0]
if "finn_ads" in stats and stats["finn_ads"]["total_rows"] > 0:
stats["finn_ads"]["with_eiendom_unit_code"] = ads_with_unit_code
stats["finn_ads"]["pct_with_unit_code"] = round(100 * ads_with_unit_code / stats["finn_ads"]["total_rows"], 1)
# Get fetched_at date ranges
for table in ["finn_ads", "eiendom_units", "similar_units"]:
cursor.execute(f"SELECT MIN(fetched_at), MAX(fetched_at) FROM {table}")
min_date, max_date = cursor.fetchone()
if min_date and max_date:
stats[table]["oldest_fetch"] = min_date
stats[table]["newest_fetch"] = max_date
conn.close()
# Format output
typer.echo("\n=== Cache Statistics ===\n")
for table, table_stats in stats.items():
typer.echo(f"{table}:")
for key, value in table_stats.items():
typer.echo(f" {key}: {value}")
typer.echo()
except Exception as e:
typer.echo(f"Error: {e}", err=True)
raise typer.Exit(1)