Enhance Docker and Compose configurations; add health check endpoint and caching improvements

- Updated Dockerfile to include FINN_CACHE_PATH and create data directory. - Modified docker-compose.prod.yml to expose port 8010 and adjust resource limits. - Updated docker-compose.yml to include FINN_CACHE_PATH and ensure proper port mapping. - Added health check endpoint in http_server.py for container orchestration. - Improved caching logic in analysis.py and service.py for similar units. - Refined scoring.py with updated scoring model and constants for better accuracy. Co-authored-by: Copilot <copilot@github.com>
2026-05-26 12:10:00 +00:00
parent d3f4bfa838
commit 46fd22c277
7 changed files with 315 additions and 233 deletions
@@ -41,7 +41,10 @@ COPY finn_eiendom /app/finn_eiendom
 ENV PATH="/venv/bin:$PATH" \
    PYTHONUNBUFFERED=1 \
    MCP_HOST=0.0.0.0 \
-    MCP_PORT=8010
+    MCP_PORT=8010 \
+    FINN_CACHE_PATH=/app/data/finn.sqlite
+
+RUN mkdir -p /app/data

 # Expose HTTP port
 EXPOSE 8010
@@ -12,15 +12,19 @@ services:
    environment:
      PYTHONUNBUFFERED: 1

+    # Expose port for network access
+    ports:
+      - "8010:8010"
+
    # More aggressive resource limits for production
-    deploy:
-      resources:
-        limits:
-          cpus: '4'
-          memory: 2G
-        reservations:
-          cpus: '2'
-          memory: 1G
+    # deploy:
+    #   resources:
+    #     limits:
+    #       cpus: '4'
+    #       memory: 2G
+    #     reservations:
+    #       cpus: '2'
+    #       memory: 1G

    # Restart policy
    restart: always
@@ -9,13 +9,13 @@ services:

    # Environment configuration
    environment:
+      FINN_CACHE_PATH: /app/data/finn.sqlite
      # MCP HTTP server configuration
      MCP_HOST: 0.0.0.0
      MCP_PORT: 8010

      # Python configuration
      PYTHONUNBUFFERED: 1
-      
      # Optional: FINN/Eiendom.no rate limiting and retry configuration
      # FINN_RATE_LIMIT_DELAY: 0.5
      # HTTP_TIMEOUT: 30
@@ -52,6 +52,10 @@ services:
      options:
        max-size: "10m"
        max-file: "3"
+    volumes:
+      - finn-cache:/app/data

+volumes:
+  finn-cache:
    # For development, you can override with:
    # docker-compose -f docker-compose.yml -f docker-compose.override.yml up
@@ -5,6 +5,7 @@ import logging
 from . import ad as ad_module
 from . import cache, eiendom_no, scoring, search
 from .config import (
+    EIENDOM_NO_CACHE_TTL_HOURS,
    FINN_CACHE_PATH,
    FINN_CACHE_TTL_AD_HOURS,
    FINN_DETAIL_LIMIT,
@@ -102,16 +103,21 @@ async def analyze_ad(
                cache.save_eiendom_unit(conn, enriched)

    if enriched:
-        # EiendomUnit.unit_vector is NOT populated by get_unit / enrich -- the
-        # field comes back None. Reading enriched.unit_vector directly leaves
-        # this block dead and similar_units permanently empty. Build the vector
-        # from the unit fields instead (fall back to the field if a future
-        # endpoint ever populates it).
+        # Check cache for similar units first. The cache uses (unit_code,
+        # listing_status) as the key, so we must look it up by unit_code.
+        similar_units = cache.get_similar_units(
+            conn, enriched.unit_code, "RECENTLY_SOLD", ttl_hours=EIENDOM_NO_CACHE_TTL_HOURS
+        )
+
+        if not similar_units:
+            # Cache miss: build the vector and fetch fresh from Eiendom.no
+            # (unit_vector field from get_unit is None; build locally)
            vector = enriched.unit_vector or eiendom_no.build_unit_vector(enriched)
            if vector:
-            # No dedicated cache table for similar units (per PRD) -- fetch
-            # fresh each call, consistent with service.get_or_fetch_similar_units.
                similar_units = await eiendom_no.get_similar_units(vector)
+                # Save to cache
+                if similar_units:
+                    cache.save_similar_units(conn, enriched.unit_code, "RECENTLY_SOLD", similar_units)

    scores = scoring.score_ad(finn_ad, enriched, similar_units)
    categories = scoring.classify_ad(scores)
@@ -1,4 +1,8 @@
+import json
 import uvicorn
+from starlette.responses import JSONResponse
+from starlette.requests import Request
+from starlette.middleware.cors import CORSMiddleware
 from mcp.server.transport_security import TransportSecuritySettings
 from finn_eiendom.mcp_server import mcp

@@ -6,5 +10,24 @@ mcp.transport_security = TransportSecuritySettings(enable_dns_rebinding_protecti

 app = mcp.sse_app()

+# Add CORS middleware to allow browser-based clients (e.g., MCP Inspector)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+
+# Health check endpoint for container orchestration
+async def health(request: Request) -> JSONResponse:
+    """Return a simple health status for container probes."""
+    return JSONResponse({"status": "ok"})
+
+
+app.add_route("/health", health, methods=["GET"])
+
+
 if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8010, forwarded_allow_ips="*")
@@ -7,19 +7,28 @@ Priority hierarchy (stated):
  MEDIUM  : sameie economy, green areas / walking terrain, price vs market
  BONUS   : renovation upside (acceptable, not required)

-Dimension caps (non-risk total max ≈ 105, clamped to 100):
-  floor             -15..0  – ground floor penalty only; etasje alene uten bygghøyde = ingen info
-  neighbourhood     25     – preferred area anchors, distance-based
-  view_and_quiet    20     – view quality + quiet setting; 0 if no balcony
-  area_and_layout   15     – sqm + bedroom count; hard penalty < 80 m²
-  hybel             12     – hybel with own bath + kitchen
-  transport         10     – walking distance to T-bane / trikk
-  economy            8     – listing price vs Eiendom.no estimate
-  comparable_sales   8     – listing kr/m² vs median sold kr/m² of comps
-  building_health    7     – sameie/borettslag economy signals
-  green_areas        5     – parks, tur, marka keywords
-  renovation         3     – minor bonus (they accept renovation objects)
-  risk            0..-30   – stale listing, high costs, missing data
+Scoring model — explicit weights (sum = 1.0):
+  Each dimension function returns a raw score in [0, DIMENSION_MAX[d]].
+  score_ad normalises each to [0, 1] × weight × 100 → weighted bonus 0..100.
+  Penalties (floor, risk) are absolute deductions applied after weighting.
+  Final total = clamp(weighted_bonus + penalties, 0, 100).
+
+  Dimension         Weight   Max pts
+  ─────────────────────────────────
+  transport          24 %      11
+  view_and_quiet     21 %      20
+  neighbourhood      17 %      25
+  hybel              14 %      12
+  area_and_layout    10 %      15
+  economy             6 %       8
+  comparable_sales    4 %       8
+  building_health     2 %       7
+  green_areas         1 %       5
+  renovation          1 %       3
+  ─────────────────────────────────
+  bonus total       100 %     100
+  floor penalty              0..-15  (ground floor only)
+  risk penalty               0..-30
 """

 import logging
@@ -30,15 +39,54 @@ from .models import EiendomUnit, SimilarUnit

 logger = logging.getLogger(__name__)

+# ---------------------------------------------------------------------------
+# Scoring constants — explicit weights and per-dimension raw maxima
+# ---------------------------------------------------------------------------
+
+DIMENSION_WEIGHTS: dict[str, float] = {
+    "transport": 0.24,  # was 0.11 — MUST-have, now primary signal
+    "view_and_quiet": 0.21,  # was 0.17 — key quality-of-life differentiator
+    "neighbourhood": 0.17,  # was 0.22 — still important, no longer dominant
+    "hybel": 0.14,  # was 0.12 — rental income / flexibility
+    "area_and_layout": 0.10,  # was 0.16 — baseline met by search filters
+    "economy": 0.06,  # was 0.08
+    "comparable_sales": 0.04,  # was 0.06
+    "building_health": 0.02,  # was 0.04
+    "green_areas": 0.01,  # was 0.03
+    "renovation": 0.01,  # unchanged
+}
+
+DIMENSION_MAX: dict[str, float] = {
+    "transport": 10.0,
+    "view_and_quiet": 20.0,
+    "neighbourhood": 25.0,
+    "hybel": 12.0,
+    "area_and_layout": 15.0,
+    "economy": 8.0,
+    "comparable_sales": 8.0,
+    "building_health": 7.0,
+    "green_areas": 5.0,
+    "renovation": 3.0,
+}
+
+assert abs(sum(DIMENSION_WEIGHTS.values()) - 1.0) < 1e-9, "Weights must sum to 1.0"
+assert DIMENSION_WEIGHTS.keys() == DIMENSION_MAX.keys(), "Weight/max key mismatch"
+
+# Risk penalty thresholds
+_SHARED_DEBT_HIGH = 500_000  # per unit — hard red flag
+_SHARED_DEBT_MEDIUM = 200_000  # per unit — notable
+_COMMON_COST_HIGH = 8_000  # kr/mnd
+_COMMON_COST_MEDIUM = 6_000  # kr/mnd
+_DAYS_STALE = 120  # days on market → something is wrong
+_DAYS_SLOW = 60  # days on market → worth investigating
+
 # ---------------------------------------------------------------------------
 # Geometry helpers
 # ---------------------------------------------------------------------------


 def _distance_km(lat1: float, lng1: float, lat2: float, lng2: float) -> float:
-    """Flat-earth approximation — accurate enough within Oslo (~59.9°N).
-    1° lat ≈ 111 km, 1° lng ≈ 56 km at this latitude.
-    """
+    """Flat-earth approximation — accurate enough within Oslo (~59.9°N)."""
    dlat = (lat2 - lat1) * 111.0
    dlng = (lng2 - lng1) * 56.0
    return math.sqrt(dlat**2 + dlng**2)
@@ -59,7 +107,6 @@ def _median(values: list[float]) -> float:
 # ---------------------------------------------------------------------------

 _PREFERRED_ANCHORS: list[tuple[str, float, float]] = [
-    # (label, lat, lng) — label used only for debug logging
    ("Grünerløkka", 59.9240, 10.7573),
    ("Torshov", 59.9340, 10.7620),
    ("Rodeløkka", 59.9315, 10.7660),
@@ -80,17 +127,10 @@ _PREFERRED_ANCHORS: list[tuple[str, float, float]] = [


 # ---------------------------------------------------------------------------
-# Transit network — all T-bane and trikk stops.
-#
-# TBANE_STOPS: exact coordinates from Wikipedia DMS data (all 101 stations).
-# TRIKK_STOPS: estimated coordinates (Wikipedia has no trikk coords).
-#
-# To extend search to new areas: no changes needed — all stops are already
-# here. score_transport automatically finds the nearest stop for any address.
+# Transit network
 # ---------------------------------------------------------------------------

 TBANE_STOPS: dict[str, tuple[float, float]] = {
-    # All 101 stations — Wikipedia DMS converted to decimal degrees
    "Ammerud": (59.957922, 10.871165),
    "Avløs": (59.913859, 10.552926),
    "Bekkestua": (59.918097, 10.588031),
@@ -194,126 +234,97 @@ TBANE_STOPS: dict[str, tuple[float, float]] = {
    "Østhorn": (59.956944, 10.749779),
 }

-# Trikk stops — estimated coordinates (Wikipedia has no trikk coords).
-# Grouped by line corridor for readability.
-# Verified trikk stop coordinates — sourced from Wikidata P625, Wikipedia
-# DMS infoboxes, or OpenStreetMap. Keys match display names used in scoring.
-# Source tag format: Wikidata QID | "shared T-bane" | "OSM node <id>" | "Wikipedia"
 TRIKK_STOPS_VERIFIED: dict[str, tuple[float, float]] = {
-    # ── Briskeby-linjen (l11/19) ─────────────────────────────────────────
-    "Majorstuen": (59.929904, 10.714931),  # shared T-bane
-    "Bogstadveien": (59.92611, 10.72167),  # Q19372022
-    "Rosenborg": (59.92417, 10.72389),  # Q7899658
-    "Briskeby": (59.92048, 10.71767),  # Q11962293
-    "Riddervolds plass": (59.91896, 10.72026),  # Q19386557
-    "Inkognitogata": (59.91565, 10.72114),  # Q11977313
-    "Nationaltheatret": (59.91504, 10.73304),  # shared T-bane
-    # ── Sentrum (shared l11/12/13/17/18/19) ──────────────────────────────
-    "Øvre Slottsgate": (59.9118, 10.7417),  # Q31079249
-    "Dronningens gate": (59.91053, 10.74697),  # Q29828354
-    "Jernbanetorget": (59.912116, 10.751211),  # shared T-bane
-    "Storgata": (59.91396, 10.75141),  # Q109484341
-    "Nybrua": (59.91707, 10.75834),  # Q104867506
-    "Stortorvet": (59.91310, 10.74530),  # Q7620354
-    "Bjørvika": (59.90806, 10.75639),  # Wikipedia
-    # ── Grünerløkka–Torshov-linjen (l11/12/18) ───────────────────────────
-    "Schous plass": (59.92081, 10.75932),  # Q12006491-area / Wikipedia
-    "Olaf Ryes plass": (59.9231, 10.7592),  # Q4993079
-    "Birkelunden": (59.9271, 10.7601),  # Q4916412
-    "Biermanns gate": (59.93028, 10.76104),  # Wikipedia
-    "Sandaker senter": (59.93889, 10.76861),  # Wikipedia
-    "Grefsenveien": (59.94278, 10.77344),  # Q17778424
-    "Storo": (59.944545, 10.778768),  # shared T-bane
-    # ── Kjelsåslinjen (l11/12) ───────────────────────────────────────────
-    "Disen": (59.94627, 10.78729),  # Q11965753
-    "Glads vei": (59.95235, 10.78533),  # Q17776371
-    "Grefsenplatået": (59.9560, 10.78573),  # Q11972531
-    "Grefsen stadion": (59.96008, 10.78475),  # Q11972525
-    "Kjelsås": (59.96611, 10.78278),  # Wikipedia
-    # ── Frogner-linjen (l12) ─────────────────────────────────────────────
-    "Vigelandsparken": (59.92457, 10.70815),  # Q19398059
-    "Frogner plass": (59.92255, 10.70491),  # Q11970372 / OSM node 30560564
-    "Elisenberg": (59.91944, 10.70861),  # Q5361695
-    "Lille Frogner allé": (59.9180, 10.7120),  # Q19379373
-    "Niels Juels gate": (59.91634, 10.71520),  # Q11991378
-    "Solli": (59.91486, 10.71906),  # Q7558364
-    # ── Vika-linjen (l12) ────────────────────────────────────────────────
-    "Aker Brygge": (59.9110, 10.7299),  # Q4700639
-    "Kontraskjæret": (59.91087, 10.73592),  # Q11998807
-    # ── Lilleaker-linjen (l13) ───────────────────────────────────────────
-    "Lilleaker": (59.92074, 10.63580),  # Wikipedia
-    "Sollerud": (59.92104, 10.64309),  # Wikipedia
-    "Furulund": (59.91990, 10.65013),  # Wikipedia
-    "Ullern": (59.92429, 10.65858),  # Wikipedia
-    "Abbediengen": (59.92517, 10.66716),  # Wikipedia
-    "Hoff": (59.92500, 10.67488),  # Wikipedia
-    "Skøyen": (59.92384, 10.68034),  # Wikipedia
-    # ── Skøyen-linjen (l13) ──────────────────────────────────────────────
-    "Thune": (59.92186, 10.68742),  # Wikipedia
-    "Nobels gate": (59.91758, 10.69866),  # Wikipedia
-    "Skarpsno": (59.91430, 10.70234),  # Wikipedia
-    "Skillebekk": (59.91277, 10.71103),  # Wikipedia
-    # ── Ekeberg-linjen (l13/19) ──────────────────────────────────────────
-    "Middelalderparken": (59.90639, 10.76417),  # Q99971403
-    "Oslo Hospital": (59.9032, 10.7674),  # Wikipedia
-    "Ekebergparken": (59.8977, 10.7593),  # Wikipedia
-    "Jomfrubråten": (59.8883, 10.7706),  # Wikipedia
-    "Sportsplassen": (59.8860, 10.7736),  # Wikipedia
-    "Holtet": (59.88151, 10.78415),  # Wikipedia
-    "Sørli": (59.87493, 10.78709),  # Wikipedia
-    "Kastellet": (59.87106, 10.79036),  # Wikipedia
-    "Bråten": (59.86714, 10.79244),  # Wikipedia
-    "Sæter": (59.86102, 10.79870),  # Wikipedia
-    "Ljabru": (59.85335, 10.80089),  # Wikipedia
-    # ── Ullevål Hageby-linjen (l17/18) ───────────────────────────────────
-    "Rikshospitalet": (59.947768, 10.714716),  # Wikipedia
-    "Gaustadalleen": (59.9454, 10.7172),  # Wikipedia
-    "Forskningsparken": (59.943513, 10.720425),  # shared T-bane
-    "Universitetet Blindern": (59.9421, 10.7243),  # Wikipedia
-    "John Collets plass": (59.9403, 10.7290),  # Wikipedia
-    "Ullevål sykehus": (59.9361, 10.7318),  # Wikipedia
-    "Adamstuen": (59.9326, 10.7345),  # Wikipedia
-    "Stensgata": (59.92957, 10.73303),  # Q7607927
-    "Bislett": (59.92599, 10.73108),  # Q11961163
-    "Dalsbergstien": (59.92354, 10.73163),  # Q17764618
-    "Welhavens gate": (59.92131, 10.72968),  # Q12010485
-    "Frydenlund": (59.92086, 10.73317),  # Q19373143
-    "Holbergs plass": (59.91876, 10.73453),  # Q11975623
-    # ── Sinsen-linjen (l17) ──────────────────────────────────────────────
-    "Lakkegata skole": (59.92055, 10.76834),  # Q11982987
-    "Carl Berners plass": (59.926592, 10.778360),  # shared T-bane
-    "Sinsenkrysset": (59.93911, 10.78340),  # Q19388523
-    "Grefsen stasjon": (59.94167, 10.78056),  # Wikipedia
-    # ── Homansbyen-linjen (l19) ───────────────────────────────────────────
-    "Homansbyen": (59.92278, 10.72639),  # Q5887760
+    "Majorstuen": (59.929904, 10.714931),
+    "Bogstadveien": (59.92611, 10.72167),
+    "Rosenborg": (59.92417, 10.72389),
+    "Briskeby": (59.92048, 10.71767),
+    "Riddervolds plass": (59.91896, 10.72026),
+    "Inkognitogata": (59.91565, 10.72114),
+    "Nationaltheatret": (59.91504, 10.73304),
+    "Øvre Slottsgate": (59.9118, 10.7417),
+    "Dronningens gate": (59.91053, 10.74697),
+    "Jernbanetorget": (59.912116, 10.751211),
+    "Storgata": (59.91396, 10.75141),
+    "Nybrua": (59.91707, 10.75834),
+    "Stortorvet": (59.91310, 10.74530),
+    "Bjørvika": (59.90806, 10.75639),
+    "Schous plass": (59.92081, 10.75932),
+    "Olaf Ryes plass": (59.9231, 10.7592),
+    "Birkelunden": (59.9271, 10.7601),
+    "Biermanns gate": (59.93028, 10.76104),
+    "Sandaker senter": (59.93889, 10.76861),
+    "Grefsenveien": (59.94278, 10.77344),
+    "Storo": (59.944545, 10.778768),
+    "Disen": (59.94627, 10.78729),
+    "Glads vei": (59.95235, 10.78533),
+    "Grefsenplatået": (59.9560, 10.78573),
+    "Grefsen stadion": (59.96008, 10.78475),
+    "Kjelsås": (59.96611, 10.78278),
+    "Vigelandsparken": (59.92457, 10.70815),
+    "Frogner plass": (59.92255, 10.70491),
+    "Elisenberg": (59.91944, 10.70861),
+    "Lille Frogner allé": (59.9180, 10.7120),
+    "Niels Juels gate": (59.91634, 10.71520),
+    "Solli": (59.91486, 10.71906),
+    "Aker Brygge": (59.9110, 10.7299),
+    "Kontraskjæret": (59.91087, 10.73592),
+    "Lilleaker": (59.92074, 10.63580),
+    "Sollerud": (59.92104, 10.64309),
+    "Furulund": (59.91990, 10.65013),
+    "Ullern": (59.92429, 10.65858),
+    "Abbediengen": (59.92517, 10.66716),
+    "Hoff": (59.92500, 10.67488),
+    "Skøyen": (59.92384, 10.68034),
+    "Thune": (59.92186, 10.68742),
+    "Nobels gate": (59.91758, 10.69866),
+    "Skarpsno": (59.91430, 10.70234),
+    "Skillebekk": (59.91277, 10.71103),
+    "Middelalderparken": (59.90639, 10.76417),
+    "Oslo Hospital": (59.9032, 10.7674),
+    "Ekebergparken": (59.8977, 10.7593),
+    "Jomfrubråten": (59.8883, 10.7706),
+    "Sportsplassen": (59.8860, 10.7736),
+    "Holtet": (59.88151, 10.78415),
+    "Sørli": (59.87493, 10.78709),
+    "Kastellet": (59.87106, 10.79036),
+    "Bråten": (59.86714, 10.79244),
+    "Sæter": (59.86102, 10.79870),
+    "Ljabru": (59.85335, 10.80089),
+    "Rikshospitalet": (59.947768, 10.714716),
+    "Gaustadalleen": (59.9454, 10.7172),
+    "Forskningsparken": (59.943513, 10.720425),
+    "Universitetet Blindern": (59.9421, 10.7243),
+    "John Collets plass": (59.9403, 10.7290),
+    "Ullevål sykehus": (59.9361, 10.7318),
+    "Adamstuen": (59.9326, 10.7345),
+    "Stensgata": (59.92957, 10.73303),
+    "Bislett": (59.92599, 10.73108),
+    "Dalsbergstien": (59.92354, 10.73163),
+    "Welhavens gate": (59.92131, 10.72968),
+    "Frydenlund": (59.92086, 10.73317),
+    "Holbergs plass": (59.91876, 10.73453),
+    "Lakkegata skole": (59.92055, 10.76834),
+    "Carl Berners plass": (59.926592, 10.778360),
+    "Sinsenkrysset": (59.93911, 10.78340),
+    "Grefsen stasjon": (59.94167, 10.78056),
+    "Homansbyen": (59.92278, 10.72639),
 }

-# Estimated trikk stop coordinates — no Wikidata P625 found.
-# Derived from linear interpolation between verified neighbours,
-# or placed from map/street knowledge. Max error ~150-250 m.
-# To update: find Wikidata QID, fetch P625, move entry to TRIKK_STOPS_VERIFIED.
 TRIKK_STOPS_ESTIMATED: dict[str, tuple[float, float]] = {
-    # ── Sentrum ───────────────────────────────────────────────────────────
-    "Tinghuset": (59.9146, 10.7403),  # Ullevål Hageby-l ved Stortinget T
-    # ── Grünerløkka–Torshov-linjen ───────────────────────────────────────
-    "Torshov": (59.9332, 10.7643),  # interp Biermanns gate↔Sandaker
-    # ── Kjelsåslinjen ────────────────────────────────────────────────────
-    "Doktor Smiths vei": (59.9503, 10.7867),  # interp Disen↔Kjelsås t=0.20
-    "Kjelsåsalleen": (59.9641, 10.7833),  # interp Disen↔Kjelsås t=0.90
-    # ── Frogner-linjen ───────────────────────────────────────────────────
-    "Frogner stadion": (59.9167, 10.7038),  # Kirkeveien S for Vigelandsparken
-    # ── Vika-linjen ──────────────────────────────────────────────────────
-    "Ruseløkka": (59.9120, 10.7258),  # interp Solli↔Kontraskjæret
-    # ── Ullevål Hageby-linjen ─────────────────────────────────────────────
-    "Tullinøkka": (59.9163, 10.7349),  # interp Holbergs plass↔Tinghuset
-    # ── Sinsen-linjen ────────────────────────────────────────────────────
-    "Heimdalsgata": (59.9188, 10.7633),  # interp Nybrua↔Lakkegata skole
-    "Sofienberg": (59.9236, 10.7734),  # interp Lakkegata skole↔Carl Berners
-    "Rosenhoff": (59.9307, 10.7800),  # interp Carl Berners↔Sinsenkrysset t=0.33
-    "Sinsenterrassen": (59.9350, 10.7817),  # interp Carl Berners↔Sinsenkrysset t=0.67
+    "Tinghuset": (59.9146, 10.7403),
+    "Torshov": (59.9332, 10.7643),
+    "Doktor Smiths vei": (59.9503, 10.7867),
+    "Kjelsåsalleen": (59.9641, 10.7833),
+    "Frogner stadion": (59.9167, 10.7038),
+    "Ruseløkka": (59.9120, 10.7258),
+    "Tullinøkka": (59.9163, 10.7349),
+    "Heimdalsgata": (59.9188, 10.7633),
+    "Sofienberg": (59.9236, 10.7734),
+    "Rosenhoff": (59.9307, 10.7800),
+    "Sinsenterrassen": (59.9350, 10.7817),
 }

-# Merged — verified takes precedence if a key appears in both (shouldn't happen).
 TRIKK_STOPS: dict[str, tuple[float, float]] = {
    **TRIKK_STOPS_ESTIMATED,
    **TRIKK_STOPS_VERIFIED,
@@ -323,13 +334,12 @@ TRIKK_STOPS: dict[str, tuple[float, float]] = {
 # Transit helpers
 # ---------------------------------------------------------------------------

-_WALK_SPEED_KMH = 5.0  # avg walking speed
+_WALK_SPEED_KMH = 5.0


 def _nearest_stop(
    lat: float, lng: float, stops: dict[str, tuple[float, float]]
 ) -> tuple[str, float]:
-    """Return (stop_name, distance_km) for the nearest stop in a dict."""
    best_name, best_dist = "", float("inf")
    for name, (slat, slng) in stops.items():
        d = _distance_km(lat, lng, slat, slng)
@@ -341,17 +351,7 @@ def _nearest_stop(
 def nearby_transit(
    lat: float, lng: float, max_walk_min: float = 10.0
 ) -> dict[str, list[tuple[str, float]]]:
-    """Return T-bane and trikk stops within max_walk_min minutes walk.
-
-    Returns:
-        {
-          "tbane": [("Carl Berners plass", 0.28), ...],   # sorted by distance
-          "trikk": [("Rosenhoff", 0.19), ...],
-        }
-    All distances in km.
-    """
    max_km = (max_walk_min / 60.0) * _WALK_SPEED_KMH
-
    tbane = sorted(
        [
            (n, _distance_km(lat, lng, la, lo))
@@ -408,19 +408,7 @@ def score_neighbourhood(


 def score_transport(unit: EiendomUnit | None) -> float:
-    """Walking distance to nearest T-bane or trikk stop. Max 10.
-
-    Searches ALL stops in TBANE_STOPS and TRIKK_STOPS — no manual
-    curation needed when adding new search areas.
-
-    Distance bands:
-      < 400 m  →  10 pts  (~5 min walk)
-      < 800 m  →   8 pts  (~10 min — stated threshold)
-      < 1200 m →   4 pts  (~15 min)
-      ≥ 1200 m →   0 pts
-
-    Falls back to 0 when no coordinates available.
-    """
+    """Walking distance to nearest T-bane or trikk stop. Max 10."""
    if unit is None or unit.lat is None or unit.lng is None:
        return 0.0

@@ -529,7 +517,6 @@ def score_hybel(description: str) -> float:
    if is_potential:
        return 2.0

-    # Documented rental income → definitively real hybel
    if "leieinntekt" in d or "skattefri" in d:
        return 12.0

@@ -560,17 +547,10 @@ def score_hybel(description: str) -> float:


 def score_floor(ad: Any, unit: EiendomUnit | None) -> float:
-    """Floor level. Binary signal: ground floor is bad, everything else neutral.
+    """Floor penalty. Ground floor (≤1) = -15. All other floors = 0.

-    Rationale: "toppleilighet i 3-etgs blokk" og "8. etg i høyblokk" er begge
-    topp for sin bygning. Etasjenummer alene sier ingenting om utsikt eller lys
-    uten å kjenne byggets totale høyde. Eneste reelle signal er 1. etg (innsyn,
-    støy, lys) vs ikke-1. etg.
-
-    Scores:
-      ground floor (≤1)  → -15  (hard penalty: innsyn, støy, lys)
-      unknown            →   0  (no data → no penalty)
-      above ground       →   0  (etasjenummer uten bygghøyde = ingen info)
+    Rationale: floor number alone carries no signal without knowing building
+    height. The only reliable signal is ground floor (innsyn, støy, lys).
    """
    floor: int | None = None

@@ -670,22 +650,43 @@ def score_renovation(description: str) -> float:


 def score_risk(ad: Any, unit: EiendomUnit | None) -> float:
-    """Risk penalty. Returns 0 or negative."""
+    """Risk penalties. Returns 0 or negative.
+
+    Triggers:
+      No Eiendom.no data          → -8   (can't price-check)
+      Shared debt > 500k/unit     → -12  (hard red flag — total cost misleading)
+      Shared debt 200-500k/unit   →  -6  (notable, investigate)
+      Common costs > 8 000/mnd    → -10  (structural sameie problem)
+      Common costs 6-8 000/mnd    →  -5
+      Days on market > 120        → -15  (something is wrong)
+      Days on market 60-120       →  -5  (worth investigating)
+      "usikker" in description    →  -5
+    """
    penalty = 0.0

    if unit is None:
        penalty -= 8.0

+    # Shared debt — new: per-unit fellesgjeld signal
+    shared_debt = getattr(ad, "shared_debt", None)
+    if shared_debt is not None:
+        if shared_debt > _SHARED_DEBT_HIGH:
+            penalty -= 12.0
+            logger.debug("High shared debt: %d kr → -12", shared_debt)
+        elif shared_debt > _SHARED_DEBT_MEDIUM:
+            penalty -= 6.0
+            logger.debug("Medium shared debt: %d kr → -6", shared_debt)
+
    fk = ad.common_costs or 0
-    if fk > 8000:
+    if fk > _COMMON_COST_HIGH:
        penalty -= 10.0
-    elif fk > 6000:
+    elif fk > _COMMON_COST_MEDIUM:
        penalty -= 5.0

    if unit and unit.days_on_market:
-        if unit.days_on_market > 120:
-            penalty -= 10.0
-        elif unit.days_on_market > 60:
+        if unit.days_on_market > _DAYS_STALE:
+            penalty -= 15.0  # was -10
+        elif unit.days_on_market > _DAYS_SLOW:
            penalty -= 5.0

    if "usikker" in (ad.listing_description or "").lower():
@@ -702,15 +703,13 @@ def score_risk(ad: Any, unit: EiendomUnit | None) -> float:
 def score_ad(ad: Any, unit: EiendomUnit | None, similar_units: list[SimilarUnit]) -> dict[str, Any]:
    description = ad.listing_description or ""

-    # Collect nearby transit for informational output (not used in scoring)
    transit_nearby: dict | None = None
    if unit and unit.lat and unit.lng:
        transit_nearby = nearby_transit(unit.lat, unit.lng, max_walk_min=10.0)
        if transit_nearby["tbane"] or transit_nearby["trikk"]:
            logger.debug("Nearby transit: %s", transit_nearby)

-    scores: dict[str, Any] = {
-        "floor": score_floor(ad, unit),
+    raw: dict[str, float] = {
        "neighbourhood": score_neighbourhood(unit, ad.address, getattr(ad, "district", None)),
        "view_and_quiet": score_view_and_quiet(ad, description),
        "area_and_layout": score_area_and_layout(ad, unit),
@@ -724,14 +723,22 @@ def score_ad(ad: Any, unit: EiendomUnit | None, similar_units: list[SimilarUnit]
        "building_health": score_building_health(ad, description),
        "green_areas": score_green_areas(description),
        "renovation": score_renovation(description),
+    }
+
+    penalties: dict[str, float] = {
+        "floor": score_floor(ad, unit),
        "risk": score_risk(ad, unit),
    }

-    # Numeric-only sum for total
-    numeric = {k: v for k, v in scores.items() if isinstance(v, (int, float))}
-    scores["total"] = float(_clamp(sum(numeric.values()), 0.0, 100.0))
+    weighted_bonus = sum(
+        (raw[d] / DIMENSION_MAX[d]) * DIMENSION_WEIGHTS[d] * 100.0 for d in DIMENSION_WEIGHTS
+    )
+
+    total_penalty = sum(penalties.values())
+    total = float(_clamp(weighted_bonus + total_penalty, 0.0, 100.0))
+
+    scores: dict[str, Any] = {**raw, **penalties, "total": total}

-    # Attach nearby transit as metadata (non-scoring)
    if transit_nearby is not None:
        scores["nearby_transit"] = transit_nearby

@@ -8,11 +8,13 @@ from .analysis import analyze_search as run_analysis_search
 from .cache import (
    get_eiendom_unit as get_cached_eiendom_unit,
    get_finn_ad,
+    get_similar_units as get_cached_similar_units,
    init_db,
    save_eiendom_unit,
    save_finn_ad,
+    save_similar_units,
 )
-from .config import FINN_CACHE_PATH
+from .config import EIENDOM_NO_CACHE_TTL_HOURS, FINN_CACHE_PATH
 from .eiendom_no import (
    build_unit_vector,
    decode_unit_vector,
@@ -89,14 +91,47 @@ async def get_or_fetch_eiendom_unit(
 async def get_or_fetch_similar_units(
    unit_code: str, listing_status: str = "RECENTLY_SOLD", force_refresh: bool = False
 ) -> list[SimilarUnit]:
-    """Get similar units (comps) from cache or fetch fresh."""
-    # Similar units don't have a separate cache table; fetch fresh each time per PRD
-    # (or cache them in search_runs if doing diff detection)
-    unit = await get_or_fetch_eiendom_unit(unit_code)
+    """Get similar units (comps) from cache or fetch fresh.
+
+    Fetches the unit first to get the unit_vector, then checks cache for similar
+    units by (unit_code, listing_status). On cache miss, fetches fresh from
+    Eiendom.no and saves to cache.
+    """
+    conn = init_db(FINN_CACHE_PATH)
+
+    # First, ensure we have the unit to build its vector
+    unit = await get_or_fetch_eiendom_unit(unit_code, force_refresh=force_refresh)
    if unit is None:
        return []
+
+    # Check cache for similar units (unless force_refresh)
+    if not force_refresh:
+        cached_similar = get_cached_similar_units(
+            conn, unit_code, listing_status, ttl_hours=EIENDOM_NO_CACHE_TTL_HOURS
+        )
+        if cached_similar:
+            logger.debug(
+                "Using cached similar units for %s (status=%s)",
+                unit_code,
+                listing_status,
+            )
+            return cached_similar
+
+    # Cache miss or force_refresh: fetch fresh
    vector = build_unit_vector(unit)
-    return await get_similar_units(vector, listing_status=listing_status)
+    similar = await get_similar_units(vector, listing_status=listing_status)
+
+    # Save to cache
+    if similar:
+        save_similar_units(conn, unit_code, listing_status, similar)
+        logger.debug(
+            "Cached %d similar units for %s (status=%s)",
+            len(similar),
+            unit_code,
+            listing_status,
+        )
+
+    return similar


 async def get_unit_images(unit_code: str, force_refresh: bool = False) -> dict[str, Any]: