From 46fd22c277fcad4c5994de8103b21f9ca26ef367 Mon Sep 17 00:00:00 2001 From: Ole Date: Tue, 26 May 2026 12:10:00 +0000 Subject: [PATCH] Enhance Docker and Compose configurations; add health check endpoint and caching improvements - Updated Dockerfile to include FINN_CACHE_PATH and create data directory. - Modified docker-compose.prod.yml to expose port 8010 and adjust resource limits. - Updated docker-compose.yml to include FINN_CACHE_PATH and ensure proper port mapping. - Added health check endpoint in http_server.py for container orchestration. - Improved caching logic in analysis.py and service.py for similar units. - Refined scoring.py with updated scoring model and constants for better accuracy. Co-authored-by: Copilot --- Dockerfile | 5 +- docker-compose.prod.yml | 34 ++-- docker-compose.yml | 30 +-- finn_eiendom/analysis.py | 26 ++- finn_eiendom/http_server.py | 23 +++ finn_eiendom/scoring.py | 383 ++++++++++++++++++------------------ finn_eiendom/service.py | 47 ++++- 7 files changed, 315 insertions(+), 233 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0379107..2ad409a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -41,7 +41,10 @@ COPY finn_eiendom /app/finn_eiendom ENV PATH="/venv/bin:$PATH" \ PYTHONUNBUFFERED=1 \ MCP_HOST=0.0.0.0 \ - MCP_PORT=8010 + MCP_PORT=8010 \ + FINN_CACHE_PATH=/app/data/finn.sqlite + +RUN mkdir -p /app/data # Expose HTTP port EXPOSE 8010 diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 1e7cb9c..84684cc 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -7,31 +7,35 @@ services: mcp-server: # Production image should be pre-built and tagged image: finn-mcp:latest - + # Environment overrides for production environment: PYTHONUNBUFFERED: 1 - + + # Expose port for network access + ports: + - "8010:8010" + # More aggressive resource limits for production - deploy: - resources: - limits: - cpus: '4' - memory: 2G - reservations: - cpus: '2' - memory: 1G - + # deploy: + # resources: + # limits: + # cpus: '4' + # memory: 2G + # reservations: + # cpus: '2' + # memory: 1G + # Restart policy restart: always - + # Security options security_opt: - no-new-privileges:true - + # Read-only root filesystem (if cache is not persistent) # read_only: true - + # Logging configuration for production logging: driver: "json-file" @@ -39,7 +43,7 @@ services: max-size: "100m" max-file: "10" labels: "service=finn-mcp" - + # Labels for monitoring/metadata labels: com.example.description: "FINN Eiendom.no MCP Server" diff --git a/docker-compose.yml b/docker-compose.yml index 5372d57..bc21a46 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,33 +6,33 @@ services: context: . dockerfile: Dockerfile container_name: finn-mcp-server - + # Environment configuration environment: + FINN_CACHE_PATH: /app/data/finn.sqlite # MCP HTTP server configuration MCP_HOST: 0.0.0.0 MCP_PORT: 8010 - + # Python configuration PYTHONUNBUFFERED: 1 - # Optional: FINN/Eiendom.no rate limiting and retry configuration # FINN_RATE_LIMIT_DELAY: 0.5 # HTTP_TIMEOUT: 30 # HTTP_MAX_RETRIES: 3 - - # Port mapping for HTTP access + + # Port mapping for HTTP access ports: - "8010:8010" - + # Health check healthcheck: - test: ["CMD", "python", "-c", "import requests; requests.get('http://localhost:8010/health', timeout=5)"] + test: [ "CMD", "python", "-c", "import requests; requests.get('http://localhost:8010/health', timeout=5)" ] interval: 30s timeout: 10s retries: 3 start_period: 10s - + # Resource limits (adjust based on your server) deploy: resources: @@ -42,16 +42,20 @@ services: reservations: cpus: '1' memory: 512M - + # Restart policy restart: unless-stopped - + # Logging configuration logging: driver: "json-file" options: max-size: "10m" max-file: "3" - -# For development, you can override with: -# docker-compose -f docker-compose.yml -f docker-compose.override.yml up + volumes: + - finn-cache:/app/data + +volumes: + finn-cache: + # For development, you can override with: + # docker-compose -f docker-compose.yml -f docker-compose.override.yml up diff --git a/finn_eiendom/analysis.py b/finn_eiendom/analysis.py index 9a7a5bd..2e816f0 100644 --- a/finn_eiendom/analysis.py +++ b/finn_eiendom/analysis.py @@ -5,6 +5,7 @@ import logging from . import ad as ad_module from . import cache, eiendom_no, scoring, search from .config import ( + EIENDOM_NO_CACHE_TTL_HOURS, FINN_CACHE_PATH, FINN_CACHE_TTL_AD_HOURS, FINN_DETAIL_LIMIT, @@ -102,16 +103,21 @@ async def analyze_ad( cache.save_eiendom_unit(conn, enriched) if enriched: - # EiendomUnit.unit_vector is NOT populated by get_unit / enrich -- the - # field comes back None. Reading enriched.unit_vector directly leaves - # this block dead and similar_units permanently empty. Build the vector - # from the unit fields instead (fall back to the field if a future - # endpoint ever populates it). - vector = enriched.unit_vector or eiendom_no.build_unit_vector(enriched) - if vector: - # No dedicated cache table for similar units (per PRD) -- fetch - # fresh each call, consistent with service.get_or_fetch_similar_units. - similar_units = await eiendom_no.get_similar_units(vector) + # Check cache for similar units first. The cache uses (unit_code, + # listing_status) as the key, so we must look it up by unit_code. + similar_units = cache.get_similar_units( + conn, enriched.unit_code, "RECENTLY_SOLD", ttl_hours=EIENDOM_NO_CACHE_TTL_HOURS + ) + + if not similar_units: + # Cache miss: build the vector and fetch fresh from Eiendom.no + # (unit_vector field from get_unit is None; build locally) + vector = enriched.unit_vector or eiendom_no.build_unit_vector(enriched) + if vector: + similar_units = await eiendom_no.get_similar_units(vector) + # Save to cache + if similar_units: + cache.save_similar_units(conn, enriched.unit_code, "RECENTLY_SOLD", similar_units) scores = scoring.score_ad(finn_ad, enriched, similar_units) categories = scoring.classify_ad(scores) diff --git a/finn_eiendom/http_server.py b/finn_eiendom/http_server.py index e50644a..b7cb1c5 100644 --- a/finn_eiendom/http_server.py +++ b/finn_eiendom/http_server.py @@ -1,4 +1,8 @@ +import json import uvicorn +from starlette.responses import JSONResponse +from starlette.requests import Request +from starlette.middleware.cors import CORSMiddleware from mcp.server.transport_security import TransportSecuritySettings from finn_eiendom.mcp_server import mcp @@ -6,5 +10,24 @@ mcp.transport_security = TransportSecuritySettings(enable_dns_rebinding_protecti app = mcp.sse_app() +# Add CORS middleware to allow browser-based clients (e.g., MCP Inspector) +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +# Health check endpoint for container orchestration +async def health(request: Request) -> JSONResponse: + """Return a simple health status for container probes.""" + return JSONResponse({"status": "ok"}) + + +app.add_route("/health", health, methods=["GET"]) + + if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=8010, forwarded_allow_ips="*") diff --git a/finn_eiendom/scoring.py b/finn_eiendom/scoring.py index 01ed412..ce858cb 100644 --- a/finn_eiendom/scoring.py +++ b/finn_eiendom/scoring.py @@ -7,19 +7,28 @@ Priority hierarchy (stated): MEDIUM : sameie economy, green areas / walking terrain, price vs market BONUS : renovation upside (acceptable, not required) -Dimension caps (non-risk total max ≈ 105, clamped to 100): - floor -15..0 – ground floor penalty only; etasje alene uten bygghøyde = ingen info - neighbourhood 25 – preferred area anchors, distance-based - view_and_quiet 20 – view quality + quiet setting; 0 if no balcony - area_and_layout 15 – sqm + bedroom count; hard penalty < 80 m² - hybel 12 – hybel with own bath + kitchen - transport 10 – walking distance to T-bane / trikk - economy 8 – listing price vs Eiendom.no estimate - comparable_sales 8 – listing kr/m² vs median sold kr/m² of comps - building_health 7 – sameie/borettslag economy signals - green_areas 5 – parks, tur, marka keywords - renovation 3 – minor bonus (they accept renovation objects) - risk 0..-30 – stale listing, high costs, missing data +Scoring model — explicit weights (sum = 1.0): + Each dimension function returns a raw score in [0, DIMENSION_MAX[d]]. + score_ad normalises each to [0, 1] × weight × 100 → weighted bonus 0..100. + Penalties (floor, risk) are absolute deductions applied after weighting. + Final total = clamp(weighted_bonus + penalties, 0, 100). + + Dimension Weight Max pts + ───────────────────────────────── + transport 24 % 11 + view_and_quiet 21 % 20 + neighbourhood 17 % 25 + hybel 14 % 12 + area_and_layout 10 % 15 + economy 6 % 8 + comparable_sales 4 % 8 + building_health 2 % 7 + green_areas 1 % 5 + renovation 1 % 3 + ───────────────────────────────── + bonus total 100 % 100 + floor penalty 0..-15 (ground floor only) + risk penalty 0..-30 """ import logging @@ -30,15 +39,54 @@ from .models import EiendomUnit, SimilarUnit logger = logging.getLogger(__name__) +# --------------------------------------------------------------------------- +# Scoring constants — explicit weights and per-dimension raw maxima +# --------------------------------------------------------------------------- + +DIMENSION_WEIGHTS: dict[str, float] = { + "transport": 0.24, # was 0.11 — MUST-have, now primary signal + "view_and_quiet": 0.21, # was 0.17 — key quality-of-life differentiator + "neighbourhood": 0.17, # was 0.22 — still important, no longer dominant + "hybel": 0.14, # was 0.12 — rental income / flexibility + "area_and_layout": 0.10, # was 0.16 — baseline met by search filters + "economy": 0.06, # was 0.08 + "comparable_sales": 0.04, # was 0.06 + "building_health": 0.02, # was 0.04 + "green_areas": 0.01, # was 0.03 + "renovation": 0.01, # unchanged +} + +DIMENSION_MAX: dict[str, float] = { + "transport": 10.0, + "view_and_quiet": 20.0, + "neighbourhood": 25.0, + "hybel": 12.0, + "area_and_layout": 15.0, + "economy": 8.0, + "comparable_sales": 8.0, + "building_health": 7.0, + "green_areas": 5.0, + "renovation": 3.0, +} + +assert abs(sum(DIMENSION_WEIGHTS.values()) - 1.0) < 1e-9, "Weights must sum to 1.0" +assert DIMENSION_WEIGHTS.keys() == DIMENSION_MAX.keys(), "Weight/max key mismatch" + +# Risk penalty thresholds +_SHARED_DEBT_HIGH = 500_000 # per unit — hard red flag +_SHARED_DEBT_MEDIUM = 200_000 # per unit — notable +_COMMON_COST_HIGH = 8_000 # kr/mnd +_COMMON_COST_MEDIUM = 6_000 # kr/mnd +_DAYS_STALE = 120 # days on market → something is wrong +_DAYS_SLOW = 60 # days on market → worth investigating + # --------------------------------------------------------------------------- # Geometry helpers # --------------------------------------------------------------------------- def _distance_km(lat1: float, lng1: float, lat2: float, lng2: float) -> float: - """Flat-earth approximation — accurate enough within Oslo (~59.9°N). - 1° lat ≈ 111 km, 1° lng ≈ 56 km at this latitude. - """ + """Flat-earth approximation — accurate enough within Oslo (~59.9°N).""" dlat = (lat2 - lat1) * 111.0 dlng = (lng2 - lng1) * 56.0 return math.sqrt(dlat**2 + dlng**2) @@ -59,7 +107,6 @@ def _median(values: list[float]) -> float: # --------------------------------------------------------------------------- _PREFERRED_ANCHORS: list[tuple[str, float, float]] = [ - # (label, lat, lng) — label used only for debug logging ("Grünerløkka", 59.9240, 10.7573), ("Torshov", 59.9340, 10.7620), ("Rodeløkka", 59.9315, 10.7660), @@ -80,17 +127,10 @@ _PREFERRED_ANCHORS: list[tuple[str, float, float]] = [ # --------------------------------------------------------------------------- -# Transit network — all T-bane and trikk stops. -# -# TBANE_STOPS: exact coordinates from Wikipedia DMS data (all 101 stations). -# TRIKK_STOPS: estimated coordinates (Wikipedia has no trikk coords). -# -# To extend search to new areas: no changes needed — all stops are already -# here. score_transport automatically finds the nearest stop for any address. +# Transit network # --------------------------------------------------------------------------- TBANE_STOPS: dict[str, tuple[float, float]] = { - # All 101 stations — Wikipedia DMS converted to decimal degrees "Ammerud": (59.957922, 10.871165), "Avløs": (59.913859, 10.552926), "Bekkestua": (59.918097, 10.588031), @@ -194,126 +234,97 @@ TBANE_STOPS: dict[str, tuple[float, float]] = { "Østhorn": (59.956944, 10.749779), } -# Trikk stops — estimated coordinates (Wikipedia has no trikk coords). -# Grouped by line corridor for readability. -# Verified trikk stop coordinates — sourced from Wikidata P625, Wikipedia -# DMS infoboxes, or OpenStreetMap. Keys match display names used in scoring. -# Source tag format: Wikidata QID | "shared T-bane" | "OSM node " | "Wikipedia" TRIKK_STOPS_VERIFIED: dict[str, tuple[float, float]] = { - # ── Briskeby-linjen (l11/19) ───────────────────────────────────────── - "Majorstuen": (59.929904, 10.714931), # shared T-bane - "Bogstadveien": (59.92611, 10.72167), # Q19372022 - "Rosenborg": (59.92417, 10.72389), # Q7899658 - "Briskeby": (59.92048, 10.71767), # Q11962293 - "Riddervolds plass": (59.91896, 10.72026), # Q19386557 - "Inkognitogata": (59.91565, 10.72114), # Q11977313 - "Nationaltheatret": (59.91504, 10.73304), # shared T-bane - # ── Sentrum (shared l11/12/13/17/18/19) ────────────────────────────── - "Øvre Slottsgate": (59.9118, 10.7417), # Q31079249 - "Dronningens gate": (59.91053, 10.74697), # Q29828354 - "Jernbanetorget": (59.912116, 10.751211), # shared T-bane - "Storgata": (59.91396, 10.75141), # Q109484341 - "Nybrua": (59.91707, 10.75834), # Q104867506 - "Stortorvet": (59.91310, 10.74530), # Q7620354 - "Bjørvika": (59.90806, 10.75639), # Wikipedia - # ── Grünerløkka–Torshov-linjen (l11/12/18) ─────────────────────────── - "Schous plass": (59.92081, 10.75932), # Q12006491-area / Wikipedia - "Olaf Ryes plass": (59.9231, 10.7592), # Q4993079 - "Birkelunden": (59.9271, 10.7601), # Q4916412 - "Biermanns gate": (59.93028, 10.76104), # Wikipedia - "Sandaker senter": (59.93889, 10.76861), # Wikipedia - "Grefsenveien": (59.94278, 10.77344), # Q17778424 - "Storo": (59.944545, 10.778768), # shared T-bane - # ── Kjelsåslinjen (l11/12) ─────────────────────────────────────────── - "Disen": (59.94627, 10.78729), # Q11965753 - "Glads vei": (59.95235, 10.78533), # Q17776371 - "Grefsenplatået": (59.9560, 10.78573), # Q11972531 - "Grefsen stadion": (59.96008, 10.78475), # Q11972525 - "Kjelsås": (59.96611, 10.78278), # Wikipedia - # ── Frogner-linjen (l12) ───────────────────────────────────────────── - "Vigelandsparken": (59.92457, 10.70815), # Q19398059 - "Frogner plass": (59.92255, 10.70491), # Q11970372 / OSM node 30560564 - "Elisenberg": (59.91944, 10.70861), # Q5361695 - "Lille Frogner allé": (59.9180, 10.7120), # Q19379373 - "Niels Juels gate": (59.91634, 10.71520), # Q11991378 - "Solli": (59.91486, 10.71906), # Q7558364 - # ── Vika-linjen (l12) ──────────────────────────────────────────────── - "Aker Brygge": (59.9110, 10.7299), # Q4700639 - "Kontraskjæret": (59.91087, 10.73592), # Q11998807 - # ── Lilleaker-linjen (l13) ─────────────────────────────────────────── - "Lilleaker": (59.92074, 10.63580), # Wikipedia - "Sollerud": (59.92104, 10.64309), # Wikipedia - "Furulund": (59.91990, 10.65013), # Wikipedia - "Ullern": (59.92429, 10.65858), # Wikipedia - "Abbediengen": (59.92517, 10.66716), # Wikipedia - "Hoff": (59.92500, 10.67488), # Wikipedia - "Skøyen": (59.92384, 10.68034), # Wikipedia - # ── Skøyen-linjen (l13) ────────────────────────────────────────────── - "Thune": (59.92186, 10.68742), # Wikipedia - "Nobels gate": (59.91758, 10.69866), # Wikipedia - "Skarpsno": (59.91430, 10.70234), # Wikipedia - "Skillebekk": (59.91277, 10.71103), # Wikipedia - # ── Ekeberg-linjen (l13/19) ────────────────────────────────────────── - "Middelalderparken": (59.90639, 10.76417), # Q99971403 - "Oslo Hospital": (59.9032, 10.7674), # Wikipedia - "Ekebergparken": (59.8977, 10.7593), # Wikipedia - "Jomfrubråten": (59.8883, 10.7706), # Wikipedia - "Sportsplassen": (59.8860, 10.7736), # Wikipedia - "Holtet": (59.88151, 10.78415), # Wikipedia - "Sørli": (59.87493, 10.78709), # Wikipedia - "Kastellet": (59.87106, 10.79036), # Wikipedia - "Bråten": (59.86714, 10.79244), # Wikipedia - "Sæter": (59.86102, 10.79870), # Wikipedia - "Ljabru": (59.85335, 10.80089), # Wikipedia - # ── Ullevål Hageby-linjen (l17/18) ─────────────────────────────────── - "Rikshospitalet": (59.947768, 10.714716), # Wikipedia - "Gaustadalleen": (59.9454, 10.7172), # Wikipedia - "Forskningsparken": (59.943513, 10.720425), # shared T-bane - "Universitetet Blindern": (59.9421, 10.7243), # Wikipedia - "John Collets plass": (59.9403, 10.7290), # Wikipedia - "Ullevål sykehus": (59.9361, 10.7318), # Wikipedia - "Adamstuen": (59.9326, 10.7345), # Wikipedia - "Stensgata": (59.92957, 10.73303), # Q7607927 - "Bislett": (59.92599, 10.73108), # Q11961163 - "Dalsbergstien": (59.92354, 10.73163), # Q17764618 - "Welhavens gate": (59.92131, 10.72968), # Q12010485 - "Frydenlund": (59.92086, 10.73317), # Q19373143 - "Holbergs plass": (59.91876, 10.73453), # Q11975623 - # ── Sinsen-linjen (l17) ────────────────────────────────────────────── - "Lakkegata skole": (59.92055, 10.76834), # Q11982987 - "Carl Berners plass": (59.926592, 10.778360), # shared T-bane - "Sinsenkrysset": (59.93911, 10.78340), # Q19388523 - "Grefsen stasjon": (59.94167, 10.78056), # Wikipedia - # ── Homansbyen-linjen (l19) ─────────────────────────────────────────── - "Homansbyen": (59.92278, 10.72639), # Q5887760 + "Majorstuen": (59.929904, 10.714931), + "Bogstadveien": (59.92611, 10.72167), + "Rosenborg": (59.92417, 10.72389), + "Briskeby": (59.92048, 10.71767), + "Riddervolds plass": (59.91896, 10.72026), + "Inkognitogata": (59.91565, 10.72114), + "Nationaltheatret": (59.91504, 10.73304), + "Øvre Slottsgate": (59.9118, 10.7417), + "Dronningens gate": (59.91053, 10.74697), + "Jernbanetorget": (59.912116, 10.751211), + "Storgata": (59.91396, 10.75141), + "Nybrua": (59.91707, 10.75834), + "Stortorvet": (59.91310, 10.74530), + "Bjørvika": (59.90806, 10.75639), + "Schous plass": (59.92081, 10.75932), + "Olaf Ryes plass": (59.9231, 10.7592), + "Birkelunden": (59.9271, 10.7601), + "Biermanns gate": (59.93028, 10.76104), + "Sandaker senter": (59.93889, 10.76861), + "Grefsenveien": (59.94278, 10.77344), + "Storo": (59.944545, 10.778768), + "Disen": (59.94627, 10.78729), + "Glads vei": (59.95235, 10.78533), + "Grefsenplatået": (59.9560, 10.78573), + "Grefsen stadion": (59.96008, 10.78475), + "Kjelsås": (59.96611, 10.78278), + "Vigelandsparken": (59.92457, 10.70815), + "Frogner plass": (59.92255, 10.70491), + "Elisenberg": (59.91944, 10.70861), + "Lille Frogner allé": (59.9180, 10.7120), + "Niels Juels gate": (59.91634, 10.71520), + "Solli": (59.91486, 10.71906), + "Aker Brygge": (59.9110, 10.7299), + "Kontraskjæret": (59.91087, 10.73592), + "Lilleaker": (59.92074, 10.63580), + "Sollerud": (59.92104, 10.64309), + "Furulund": (59.91990, 10.65013), + "Ullern": (59.92429, 10.65858), + "Abbediengen": (59.92517, 10.66716), + "Hoff": (59.92500, 10.67488), + "Skøyen": (59.92384, 10.68034), + "Thune": (59.92186, 10.68742), + "Nobels gate": (59.91758, 10.69866), + "Skarpsno": (59.91430, 10.70234), + "Skillebekk": (59.91277, 10.71103), + "Middelalderparken": (59.90639, 10.76417), + "Oslo Hospital": (59.9032, 10.7674), + "Ekebergparken": (59.8977, 10.7593), + "Jomfrubråten": (59.8883, 10.7706), + "Sportsplassen": (59.8860, 10.7736), + "Holtet": (59.88151, 10.78415), + "Sørli": (59.87493, 10.78709), + "Kastellet": (59.87106, 10.79036), + "Bråten": (59.86714, 10.79244), + "Sæter": (59.86102, 10.79870), + "Ljabru": (59.85335, 10.80089), + "Rikshospitalet": (59.947768, 10.714716), + "Gaustadalleen": (59.9454, 10.7172), + "Forskningsparken": (59.943513, 10.720425), + "Universitetet Blindern": (59.9421, 10.7243), + "John Collets plass": (59.9403, 10.7290), + "Ullevål sykehus": (59.9361, 10.7318), + "Adamstuen": (59.9326, 10.7345), + "Stensgata": (59.92957, 10.73303), + "Bislett": (59.92599, 10.73108), + "Dalsbergstien": (59.92354, 10.73163), + "Welhavens gate": (59.92131, 10.72968), + "Frydenlund": (59.92086, 10.73317), + "Holbergs plass": (59.91876, 10.73453), + "Lakkegata skole": (59.92055, 10.76834), + "Carl Berners plass": (59.926592, 10.778360), + "Sinsenkrysset": (59.93911, 10.78340), + "Grefsen stasjon": (59.94167, 10.78056), + "Homansbyen": (59.92278, 10.72639), } -# Estimated trikk stop coordinates — no Wikidata P625 found. -# Derived from linear interpolation between verified neighbours, -# or placed from map/street knowledge. Max error ~150-250 m. -# To update: find Wikidata QID, fetch P625, move entry to TRIKK_STOPS_VERIFIED. TRIKK_STOPS_ESTIMATED: dict[str, tuple[float, float]] = { - # ── Sentrum ─────────────────────────────────────────────────────────── - "Tinghuset": (59.9146, 10.7403), # Ullevål Hageby-l ved Stortinget T - # ── Grünerløkka–Torshov-linjen ─────────────────────────────────────── - "Torshov": (59.9332, 10.7643), # interp Biermanns gate↔Sandaker - # ── Kjelsåslinjen ──────────────────────────────────────────────────── - "Doktor Smiths vei": (59.9503, 10.7867), # interp Disen↔Kjelsås t=0.20 - "Kjelsåsalleen": (59.9641, 10.7833), # interp Disen↔Kjelsås t=0.90 - # ── Frogner-linjen ─────────────────────────────────────────────────── - "Frogner stadion": (59.9167, 10.7038), # Kirkeveien S for Vigelandsparken - # ── Vika-linjen ────────────────────────────────────────────────────── - "Ruseløkka": (59.9120, 10.7258), # interp Solli↔Kontraskjæret - # ── Ullevål Hageby-linjen ───────────────────────────────────────────── - "Tullinøkka": (59.9163, 10.7349), # interp Holbergs plass↔Tinghuset - # ── Sinsen-linjen ──────────────────────────────────────────────────── - "Heimdalsgata": (59.9188, 10.7633), # interp Nybrua↔Lakkegata skole - "Sofienberg": (59.9236, 10.7734), # interp Lakkegata skole↔Carl Berners - "Rosenhoff": (59.9307, 10.7800), # interp Carl Berners↔Sinsenkrysset t=0.33 - "Sinsenterrassen": (59.9350, 10.7817), # interp Carl Berners↔Sinsenkrysset t=0.67 + "Tinghuset": (59.9146, 10.7403), + "Torshov": (59.9332, 10.7643), + "Doktor Smiths vei": (59.9503, 10.7867), + "Kjelsåsalleen": (59.9641, 10.7833), + "Frogner stadion": (59.9167, 10.7038), + "Ruseløkka": (59.9120, 10.7258), + "Tullinøkka": (59.9163, 10.7349), + "Heimdalsgata": (59.9188, 10.7633), + "Sofienberg": (59.9236, 10.7734), + "Rosenhoff": (59.9307, 10.7800), + "Sinsenterrassen": (59.9350, 10.7817), } -# Merged — verified takes precedence if a key appears in both (shouldn't happen). TRIKK_STOPS: dict[str, tuple[float, float]] = { **TRIKK_STOPS_ESTIMATED, **TRIKK_STOPS_VERIFIED, @@ -323,13 +334,12 @@ TRIKK_STOPS: dict[str, tuple[float, float]] = { # Transit helpers # --------------------------------------------------------------------------- -_WALK_SPEED_KMH = 5.0 # avg walking speed +_WALK_SPEED_KMH = 5.0 def _nearest_stop( lat: float, lng: float, stops: dict[str, tuple[float, float]] ) -> tuple[str, float]: - """Return (stop_name, distance_km) for the nearest stop in a dict.""" best_name, best_dist = "", float("inf") for name, (slat, slng) in stops.items(): d = _distance_km(lat, lng, slat, slng) @@ -341,17 +351,7 @@ def _nearest_stop( def nearby_transit( lat: float, lng: float, max_walk_min: float = 10.0 ) -> dict[str, list[tuple[str, float]]]: - """Return T-bane and trikk stops within max_walk_min minutes walk. - - Returns: - { - "tbane": [("Carl Berners plass", 0.28), ...], # sorted by distance - "trikk": [("Rosenhoff", 0.19), ...], - } - All distances in km. - """ max_km = (max_walk_min / 60.0) * _WALK_SPEED_KMH - tbane = sorted( [ (n, _distance_km(lat, lng, la, lo)) @@ -408,19 +408,7 @@ def score_neighbourhood( def score_transport(unit: EiendomUnit | None) -> float: - """Walking distance to nearest T-bane or trikk stop. Max 10. - - Searches ALL stops in TBANE_STOPS and TRIKK_STOPS — no manual - curation needed when adding new search areas. - - Distance bands: - < 400 m → 10 pts (~5 min walk) - < 800 m → 8 pts (~10 min — stated threshold) - < 1200 m → 4 pts (~15 min) - ≥ 1200 m → 0 pts - - Falls back to 0 when no coordinates available. - """ + """Walking distance to nearest T-bane or trikk stop. Max 10.""" if unit is None or unit.lat is None or unit.lng is None: return 0.0 @@ -529,7 +517,6 @@ def score_hybel(description: str) -> float: if is_potential: return 2.0 - # Documented rental income → definitively real hybel if "leieinntekt" in d or "skattefri" in d: return 12.0 @@ -560,17 +547,10 @@ def score_hybel(description: str) -> float: def score_floor(ad: Any, unit: EiendomUnit | None) -> float: - """Floor level. Binary signal: ground floor is bad, everything else neutral. + """Floor penalty. Ground floor (≤1) = -15. All other floors = 0. - Rationale: "toppleilighet i 3-etgs blokk" og "8. etg i høyblokk" er begge - topp for sin bygning. Etasjenummer alene sier ingenting om utsikt eller lys - uten å kjenne byggets totale høyde. Eneste reelle signal er 1. etg (innsyn, - støy, lys) vs ikke-1. etg. - - Scores: - ground floor (≤1) → -15 (hard penalty: innsyn, støy, lys) - unknown → 0 (no data → no penalty) - above ground → 0 (etasjenummer uten bygghøyde = ingen info) + Rationale: floor number alone carries no signal without knowing building + height. The only reliable signal is ground floor (innsyn, støy, lys). """ floor: int | None = None @@ -670,22 +650,43 @@ def score_renovation(description: str) -> float: def score_risk(ad: Any, unit: EiendomUnit | None) -> float: - """Risk penalty. Returns 0 or negative.""" + """Risk penalties. Returns 0 or negative. + + Triggers: + No Eiendom.no data → -8 (can't price-check) + Shared debt > 500k/unit → -12 (hard red flag — total cost misleading) + Shared debt 200-500k/unit → -6 (notable, investigate) + Common costs > 8 000/mnd → -10 (structural sameie problem) + Common costs 6-8 000/mnd → -5 + Days on market > 120 → -15 (something is wrong) + Days on market 60-120 → -5 (worth investigating) + "usikker" in description → -5 + """ penalty = 0.0 if unit is None: penalty -= 8.0 + # Shared debt — new: per-unit fellesgjeld signal + shared_debt = getattr(ad, "shared_debt", None) + if shared_debt is not None: + if shared_debt > _SHARED_DEBT_HIGH: + penalty -= 12.0 + logger.debug("High shared debt: %d kr → -12", shared_debt) + elif shared_debt > _SHARED_DEBT_MEDIUM: + penalty -= 6.0 + logger.debug("Medium shared debt: %d kr → -6", shared_debt) + fk = ad.common_costs or 0 - if fk > 8000: + if fk > _COMMON_COST_HIGH: penalty -= 10.0 - elif fk > 6000: + elif fk > _COMMON_COST_MEDIUM: penalty -= 5.0 if unit and unit.days_on_market: - if unit.days_on_market > 120: - penalty -= 10.0 - elif unit.days_on_market > 60: + if unit.days_on_market > _DAYS_STALE: + penalty -= 15.0 # was -10 + elif unit.days_on_market > _DAYS_SLOW: penalty -= 5.0 if "usikker" in (ad.listing_description or "").lower(): @@ -702,15 +703,13 @@ def score_risk(ad: Any, unit: EiendomUnit | None) -> float: def score_ad(ad: Any, unit: EiendomUnit | None, similar_units: list[SimilarUnit]) -> dict[str, Any]: description = ad.listing_description or "" - # Collect nearby transit for informational output (not used in scoring) transit_nearby: dict | None = None if unit and unit.lat and unit.lng: transit_nearby = nearby_transit(unit.lat, unit.lng, max_walk_min=10.0) if transit_nearby["tbane"] or transit_nearby["trikk"]: logger.debug("Nearby transit: %s", transit_nearby) - scores: dict[str, Any] = { - "floor": score_floor(ad, unit), + raw: dict[str, float] = { "neighbourhood": score_neighbourhood(unit, ad.address, getattr(ad, "district", None)), "view_and_quiet": score_view_and_quiet(ad, description), "area_and_layout": score_area_and_layout(ad, unit), @@ -724,14 +723,22 @@ def score_ad(ad: Any, unit: EiendomUnit | None, similar_units: list[SimilarUnit] "building_health": score_building_health(ad, description), "green_areas": score_green_areas(description), "renovation": score_renovation(description), + } + + penalties: dict[str, float] = { + "floor": score_floor(ad, unit), "risk": score_risk(ad, unit), } - # Numeric-only sum for total - numeric = {k: v for k, v in scores.items() if isinstance(v, (int, float))} - scores["total"] = float(_clamp(sum(numeric.values()), 0.0, 100.0)) + weighted_bonus = sum( + (raw[d] / DIMENSION_MAX[d]) * DIMENSION_WEIGHTS[d] * 100.0 for d in DIMENSION_WEIGHTS + ) + + total_penalty = sum(penalties.values()) + total = float(_clamp(weighted_bonus + total_penalty, 0.0, 100.0)) + + scores: dict[str, Any] = {**raw, **penalties, "total": total} - # Attach nearby transit as metadata (non-scoring) if transit_nearby is not None: scores["nearby_transit"] = transit_nearby diff --git a/finn_eiendom/service.py b/finn_eiendom/service.py index 6a2bc3d..506f4d4 100644 --- a/finn_eiendom/service.py +++ b/finn_eiendom/service.py @@ -8,11 +8,13 @@ from .analysis import analyze_search as run_analysis_search from .cache import ( get_eiendom_unit as get_cached_eiendom_unit, get_finn_ad, + get_similar_units as get_cached_similar_units, init_db, save_eiendom_unit, save_finn_ad, + save_similar_units, ) -from .config import FINN_CACHE_PATH +from .config import EIENDOM_NO_CACHE_TTL_HOURS, FINN_CACHE_PATH from .eiendom_no import ( build_unit_vector, decode_unit_vector, @@ -89,14 +91,47 @@ async def get_or_fetch_eiendom_unit( async def get_or_fetch_similar_units( unit_code: str, listing_status: str = "RECENTLY_SOLD", force_refresh: bool = False ) -> list[SimilarUnit]: - """Get similar units (comps) from cache or fetch fresh.""" - # Similar units don't have a separate cache table; fetch fresh each time per PRD - # (or cache them in search_runs if doing diff detection) - unit = await get_or_fetch_eiendom_unit(unit_code) + """Get similar units (comps) from cache or fetch fresh. + + Fetches the unit first to get the unit_vector, then checks cache for similar + units by (unit_code, listing_status). On cache miss, fetches fresh from + Eiendom.no and saves to cache. + """ + conn = init_db(FINN_CACHE_PATH) + + # First, ensure we have the unit to build its vector + unit = await get_or_fetch_eiendom_unit(unit_code, force_refresh=force_refresh) if unit is None: return [] + + # Check cache for similar units (unless force_refresh) + if not force_refresh: + cached_similar = get_cached_similar_units( + conn, unit_code, listing_status, ttl_hours=EIENDOM_NO_CACHE_TTL_HOURS + ) + if cached_similar: + logger.debug( + "Using cached similar units for %s (status=%s)", + unit_code, + listing_status, + ) + return cached_similar + + # Cache miss or force_refresh: fetch fresh vector = build_unit_vector(unit) - return await get_similar_units(vector, listing_status=listing_status) + similar = await get_similar_units(vector, listing_status=listing_status) + + # Save to cache + if similar: + save_similar_units(conn, unit_code, listing_status, similar) + logger.debug( + "Cached %d similar units for %s (status=%s)", + len(similar), + unit_code, + listing_status, + ) + + return similar async def get_unit_images(unit_code: str, force_refresh: bool = False) -> dict[str, Any]: