Enhance Docker and Compose configurations; add health check endpoint and caching improvements

- Updated Dockerfile to include FINN_CACHE_PATH and create data directory.
- Modified docker-compose.prod.yml to expose port 8010 and adjust resource limits.
- Updated docker-compose.yml to include FINN_CACHE_PATH and ensure proper port mapping.
- Added health check endpoint in http_server.py for container orchestration.
- Improved caching logic in analysis.py and service.py for similar units.
- Refined scoring.py with updated scoring model and constants for better accuracy.

Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
Ole
2026-05-26 12:10:00 +00:00
parent d3f4bfa838
commit 46fd22c277
7 changed files with 315 additions and 233 deletions
+4 -1
View File
@@ -41,7 +41,10 @@ COPY finn_eiendom /app/finn_eiendom
ENV PATH="/venv/bin:$PATH" \ ENV PATH="/venv/bin:$PATH" \
PYTHONUNBUFFERED=1 \ PYTHONUNBUFFERED=1 \
MCP_HOST=0.0.0.0 \ MCP_HOST=0.0.0.0 \
MCP_PORT=8010 MCP_PORT=8010 \
FINN_CACHE_PATH=/app/data/finn.sqlite
RUN mkdir -p /app/data
# Expose HTTP port # Expose HTTP port
EXPOSE 8010 EXPOSE 8010
+12 -8
View File
@@ -12,15 +12,19 @@ services:
environment: environment:
PYTHONUNBUFFERED: 1 PYTHONUNBUFFERED: 1
# Expose port for network access
ports:
- "8010:8010"
# More aggressive resource limits for production # More aggressive resource limits for production
deploy: # deploy:
resources: # resources:
limits: # limits:
cpus: '4' # cpus: '4'
memory: 2G # memory: 2G
reservations: # reservations:
cpus: '2' # cpus: '2'
memory: 1G # memory: 1G
# Restart policy # Restart policy
restart: always restart: always
+9 -5
View File
@@ -9,25 +9,25 @@ services:
# Environment configuration # Environment configuration
environment: environment:
FINN_CACHE_PATH: /app/data/finn.sqlite
# MCP HTTP server configuration # MCP HTTP server configuration
MCP_HOST: 0.0.0.0 MCP_HOST: 0.0.0.0
MCP_PORT: 8010 MCP_PORT: 8010
# Python configuration # Python configuration
PYTHONUNBUFFERED: 1 PYTHONUNBUFFERED: 1
# Optional: FINN/Eiendom.no rate limiting and retry configuration # Optional: FINN/Eiendom.no rate limiting and retry configuration
# FINN_RATE_LIMIT_DELAY: 0.5 # FINN_RATE_LIMIT_DELAY: 0.5
# HTTP_TIMEOUT: 30 # HTTP_TIMEOUT: 30
# HTTP_MAX_RETRIES: 3 # HTTP_MAX_RETRIES: 3
# Port mapping for HTTP access # Port mapping for HTTP access
ports: ports:
- "8010:8010" - "8010:8010"
# Health check # Health check
healthcheck: healthcheck:
test: ["CMD", "python", "-c", "import requests; requests.get('http://localhost:8010/health', timeout=5)"] test: [ "CMD", "python", "-c", "import requests; requests.get('http://localhost:8010/health', timeout=5)" ]
interval: 30s interval: 30s
timeout: 10s timeout: 10s
retries: 3 retries: 3
@@ -52,6 +52,10 @@ services:
options: options:
max-size: "10m" max-size: "10m"
max-file: "3" max-file: "3"
volumes:
- finn-cache:/app/data
# For development, you can override with: volumes:
# docker-compose -f docker-compose.yml -f docker-compose.override.yml up finn-cache:
# For development, you can override with:
# docker-compose -f docker-compose.yml -f docker-compose.override.yml up
+16 -10
View File
@@ -5,6 +5,7 @@ import logging
from . import ad as ad_module from . import ad as ad_module
from . import cache, eiendom_no, scoring, search from . import cache, eiendom_no, scoring, search
from .config import ( from .config import (
EIENDOM_NO_CACHE_TTL_HOURS,
FINN_CACHE_PATH, FINN_CACHE_PATH,
FINN_CACHE_TTL_AD_HOURS, FINN_CACHE_TTL_AD_HOURS,
FINN_DETAIL_LIMIT, FINN_DETAIL_LIMIT,
@@ -102,16 +103,21 @@ async def analyze_ad(
cache.save_eiendom_unit(conn, enriched) cache.save_eiendom_unit(conn, enriched)
if enriched: if enriched:
# EiendomUnit.unit_vector is NOT populated by get_unit / enrich -- the # Check cache for similar units first. The cache uses (unit_code,
# field comes back None. Reading enriched.unit_vector directly leaves # listing_status) as the key, so we must look it up by unit_code.
# this block dead and similar_units permanently empty. Build the vector similar_units = cache.get_similar_units(
# from the unit fields instead (fall back to the field if a future conn, enriched.unit_code, "RECENTLY_SOLD", ttl_hours=EIENDOM_NO_CACHE_TTL_HOURS
# endpoint ever populates it). )
vector = enriched.unit_vector or eiendom_no.build_unit_vector(enriched)
if vector: if not similar_units:
# No dedicated cache table for similar units (per PRD) -- fetch # Cache miss: build the vector and fetch fresh from Eiendom.no
# fresh each call, consistent with service.get_or_fetch_similar_units. # (unit_vector field from get_unit is None; build locally)
similar_units = await eiendom_no.get_similar_units(vector) vector = enriched.unit_vector or eiendom_no.build_unit_vector(enriched)
if vector:
similar_units = await eiendom_no.get_similar_units(vector)
# Save to cache
if similar_units:
cache.save_similar_units(conn, enriched.unit_code, "RECENTLY_SOLD", similar_units)
scores = scoring.score_ad(finn_ad, enriched, similar_units) scores = scoring.score_ad(finn_ad, enriched, similar_units)
categories = scoring.classify_ad(scores) categories = scoring.classify_ad(scores)
+23
View File
@@ -1,4 +1,8 @@
import json
import uvicorn import uvicorn
from starlette.responses import JSONResponse
from starlette.requests import Request
from starlette.middleware.cors import CORSMiddleware
from mcp.server.transport_security import TransportSecuritySettings from mcp.server.transport_security import TransportSecuritySettings
from finn_eiendom.mcp_server import mcp from finn_eiendom.mcp_server import mcp
@@ -6,5 +10,24 @@ mcp.transport_security = TransportSecuritySettings(enable_dns_rebinding_protecti
app = mcp.sse_app() app = mcp.sse_app()
# Add CORS middleware to allow browser-based clients (e.g., MCP Inspector)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Health check endpoint for container orchestration
async def health(request: Request) -> JSONResponse:
"""Return a simple health status for container probes."""
return JSONResponse({"status": "ok"})
app.add_route("/health", health, methods=["GET"])
if __name__ == "__main__": if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8010, forwarded_allow_ips="*") uvicorn.run(app, host="0.0.0.0", port=8010, forwarded_allow_ips="*")
+195 -188
View File
@@ -7,19 +7,28 @@ Priority hierarchy (stated):
MEDIUM : sameie economy, green areas / walking terrain, price vs market MEDIUM : sameie economy, green areas / walking terrain, price vs market
BONUS : renovation upside (acceptable, not required) BONUS : renovation upside (acceptable, not required)
Dimension caps (non-risk total max ≈ 105, clamped to 100): Scoring model — explicit weights (sum = 1.0):
floor -15..0 ground floor penalty only; etasje alene uten bygghøyde = ingen info Each dimension function returns a raw score in [0, DIMENSION_MAX[d]].
neighbourhood 25 preferred area anchors, distance-based score_ad normalises each to [0, 1] × weight × 100 → weighted bonus 0..100.
view_and_quiet 20 view quality + quiet setting; 0 if no balcony Penalties (floor, risk) are absolute deductions applied after weighting.
area_and_layout 15 sqm + bedroom count; hard penalty < 80 m² Final total = clamp(weighted_bonus + penalties, 0, 100).
hybel 12 hybel with own bath + kitchen
transport 10 walking distance to T-bane / trikk Dimension Weight Max pts
economy 8 listing price vs Eiendom.no estimate ─────────────────────────────────
comparable_sales 8 listing kr/m² vs median sold kr/m² of comps transport 24 % 11
building_health 7 sameie/borettslag economy signals view_and_quiet 21 % 20
green_areas 5 parks, tur, marka keywords neighbourhood 17 % 25
renovation 3 minor bonus (they accept renovation objects) hybel 14 % 12
risk 0..-30 stale listing, high costs, missing data area_and_layout 10 % 15
economy 6 % 8
comparable_sales 4 % 8
building_health 2 % 7
green_areas 1 % 5
renovation 1 % 3
─────────────────────────────────
bonus total 100 % 100
floor penalty 0..-15 (ground floor only)
risk penalty 0..-30
""" """
import logging import logging
@@ -30,15 +39,54 @@ from .models import EiendomUnit, SimilarUnit
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Scoring constants — explicit weights and per-dimension raw maxima
# ---------------------------------------------------------------------------
DIMENSION_WEIGHTS: dict[str, float] = {
"transport": 0.24, # was 0.11 — MUST-have, now primary signal
"view_and_quiet": 0.21, # was 0.17 — key quality-of-life differentiator
"neighbourhood": 0.17, # was 0.22 — still important, no longer dominant
"hybel": 0.14, # was 0.12 — rental income / flexibility
"area_and_layout": 0.10, # was 0.16 — baseline met by search filters
"economy": 0.06, # was 0.08
"comparable_sales": 0.04, # was 0.06
"building_health": 0.02, # was 0.04
"green_areas": 0.01, # was 0.03
"renovation": 0.01, # unchanged
}
DIMENSION_MAX: dict[str, float] = {
"transport": 10.0,
"view_and_quiet": 20.0,
"neighbourhood": 25.0,
"hybel": 12.0,
"area_and_layout": 15.0,
"economy": 8.0,
"comparable_sales": 8.0,
"building_health": 7.0,
"green_areas": 5.0,
"renovation": 3.0,
}
assert abs(sum(DIMENSION_WEIGHTS.values()) - 1.0) < 1e-9, "Weights must sum to 1.0"
assert DIMENSION_WEIGHTS.keys() == DIMENSION_MAX.keys(), "Weight/max key mismatch"
# Risk penalty thresholds
_SHARED_DEBT_HIGH = 500_000 # per unit — hard red flag
_SHARED_DEBT_MEDIUM = 200_000 # per unit — notable
_COMMON_COST_HIGH = 8_000 # kr/mnd
_COMMON_COST_MEDIUM = 6_000 # kr/mnd
_DAYS_STALE = 120 # days on market → something is wrong
_DAYS_SLOW = 60 # days on market → worth investigating
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Geometry helpers # Geometry helpers
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def _distance_km(lat1: float, lng1: float, lat2: float, lng2: float) -> float: def _distance_km(lat1: float, lng1: float, lat2: float, lng2: float) -> float:
"""Flat-earth approximation — accurate enough within Oslo (~59.9°N). """Flat-earth approximation — accurate enough within Oslo (~59.9°N)."""
1° lat ≈ 111 km, 1° lng ≈ 56 km at this latitude.
"""
dlat = (lat2 - lat1) * 111.0 dlat = (lat2 - lat1) * 111.0
dlng = (lng2 - lng1) * 56.0 dlng = (lng2 - lng1) * 56.0
return math.sqrt(dlat**2 + dlng**2) return math.sqrt(dlat**2 + dlng**2)
@@ -59,7 +107,6 @@ def _median(values: list[float]) -> float:
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
_PREFERRED_ANCHORS: list[tuple[str, float, float]] = [ _PREFERRED_ANCHORS: list[tuple[str, float, float]] = [
# (label, lat, lng) — label used only for debug logging
("Grünerløkka", 59.9240, 10.7573), ("Grünerløkka", 59.9240, 10.7573),
("Torshov", 59.9340, 10.7620), ("Torshov", 59.9340, 10.7620),
("Rodeløkka", 59.9315, 10.7660), ("Rodeløkka", 59.9315, 10.7660),
@@ -80,17 +127,10 @@ _PREFERRED_ANCHORS: list[tuple[str, float, float]] = [
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Transit network — all T-bane and trikk stops. # Transit network
#
# TBANE_STOPS: exact coordinates from Wikipedia DMS data (all 101 stations).
# TRIKK_STOPS: estimated coordinates (Wikipedia has no trikk coords).
#
# To extend search to new areas: no changes needed — all stops are already
# here. score_transport automatically finds the nearest stop for any address.
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
TBANE_STOPS: dict[str, tuple[float, float]] = { TBANE_STOPS: dict[str, tuple[float, float]] = {
# All 101 stations — Wikipedia DMS converted to decimal degrees
"Ammerud": (59.957922, 10.871165), "Ammerud": (59.957922, 10.871165),
"Avløs": (59.913859, 10.552926), "Avløs": (59.913859, 10.552926),
"Bekkestua": (59.918097, 10.588031), "Bekkestua": (59.918097, 10.588031),
@@ -194,126 +234,97 @@ TBANE_STOPS: dict[str, tuple[float, float]] = {
"Østhorn": (59.956944, 10.749779), "Østhorn": (59.956944, 10.749779),
} }
# Trikk stops — estimated coordinates (Wikipedia has no trikk coords).
# Grouped by line corridor for readability.
# Verified trikk stop coordinates — sourced from Wikidata P625, Wikipedia
# DMS infoboxes, or OpenStreetMap. Keys match display names used in scoring.
# Source tag format: Wikidata QID | "shared T-bane" | "OSM node <id>" | "Wikipedia"
TRIKK_STOPS_VERIFIED: dict[str, tuple[float, float]] = { TRIKK_STOPS_VERIFIED: dict[str, tuple[float, float]] = {
# ── Briskeby-linjen (l11/19) ───────────────────────────────────────── "Majorstuen": (59.929904, 10.714931),
"Majorstuen": (59.929904, 10.714931), # shared T-bane "Bogstadveien": (59.92611, 10.72167),
"Bogstadveien": (59.92611, 10.72167), # Q19372022 "Rosenborg": (59.92417, 10.72389),
"Rosenborg": (59.92417, 10.72389), # Q7899658 "Briskeby": (59.92048, 10.71767),
"Briskeby": (59.92048, 10.71767), # Q11962293 "Riddervolds plass": (59.91896, 10.72026),
"Riddervolds plass": (59.91896, 10.72026), # Q19386557 "Inkognitogata": (59.91565, 10.72114),
"Inkognitogata": (59.91565, 10.72114), # Q11977313 "Nationaltheatret": (59.91504, 10.73304),
"Nationaltheatret": (59.91504, 10.73304), # shared T-bane "Øvre Slottsgate": (59.9118, 10.7417),
# ── Sentrum (shared l11/12/13/17/18/19) ────────────────────────────── "Dronningens gate": (59.91053, 10.74697),
"Øvre Slottsgate": (59.9118, 10.7417), # Q31079249 "Jernbanetorget": (59.912116, 10.751211),
"Dronningens gate": (59.91053, 10.74697), # Q29828354 "Storgata": (59.91396, 10.75141),
"Jernbanetorget": (59.912116, 10.751211), # shared T-bane "Nybrua": (59.91707, 10.75834),
"Storgata": (59.91396, 10.75141), # Q109484341 "Stortorvet": (59.91310, 10.74530),
"Nybrua": (59.91707, 10.75834), # Q104867506 "Bjørvika": (59.90806, 10.75639),
"Stortorvet": (59.91310, 10.74530), # Q7620354 "Schous plass": (59.92081, 10.75932),
"Bjørvika": (59.90806, 10.75639), # Wikipedia "Olaf Ryes plass": (59.9231, 10.7592),
# ── GrünerløkkaTorshov-linjen (l11/12/18) ─────────────────────────── "Birkelunden": (59.9271, 10.7601),
"Schous plass": (59.92081, 10.75932), # Q12006491-area / Wikipedia "Biermanns gate": (59.93028, 10.76104),
"Olaf Ryes plass": (59.9231, 10.7592), # Q4993079 "Sandaker senter": (59.93889, 10.76861),
"Birkelunden": (59.9271, 10.7601), # Q4916412 "Grefsenveien": (59.94278, 10.77344),
"Biermanns gate": (59.93028, 10.76104), # Wikipedia "Storo": (59.944545, 10.778768),
"Sandaker senter": (59.93889, 10.76861), # Wikipedia "Disen": (59.94627, 10.78729),
"Grefsenveien": (59.94278, 10.77344), # Q17778424 "Glads vei": (59.95235, 10.78533),
"Storo": (59.944545, 10.778768), # shared T-bane "Grefsenplatået": (59.9560, 10.78573),
# ── Kjelsåslinjen (l11/12) ─────────────────────────────────────────── "Grefsen stadion": (59.96008, 10.78475),
"Disen": (59.94627, 10.78729), # Q11965753 "Kjelsås": (59.96611, 10.78278),
"Glads vei": (59.95235, 10.78533), # Q17776371 "Vigelandsparken": (59.92457, 10.70815),
"Grefsenplatået": (59.9560, 10.78573), # Q11972531 "Frogner plass": (59.92255, 10.70491),
"Grefsen stadion": (59.96008, 10.78475), # Q11972525 "Elisenberg": (59.91944, 10.70861),
"Kjelsås": (59.96611, 10.78278), # Wikipedia "Lille Frogner allé": (59.9180, 10.7120),
# ── Frogner-linjen (l12) ───────────────────────────────────────────── "Niels Juels gate": (59.91634, 10.71520),
"Vigelandsparken": (59.92457, 10.70815), # Q19398059 "Solli": (59.91486, 10.71906),
"Frogner plass": (59.92255, 10.70491), # Q11970372 / OSM node 30560564 "Aker Brygge": (59.9110, 10.7299),
"Elisenberg": (59.91944, 10.70861), # Q5361695 "Kontraskjæret": (59.91087, 10.73592),
"Lille Frogner allé": (59.9180, 10.7120), # Q19379373 "Lilleaker": (59.92074, 10.63580),
"Niels Juels gate": (59.91634, 10.71520), # Q11991378 "Sollerud": (59.92104, 10.64309),
"Solli": (59.91486, 10.71906), # Q7558364 "Furulund": (59.91990, 10.65013),
# ── Vika-linjen (l12) ──────────────────────────────────────────────── "Ullern": (59.92429, 10.65858),
"Aker Brygge": (59.9110, 10.7299), # Q4700639 "Abbediengen": (59.92517, 10.66716),
"Kontraskjæret": (59.91087, 10.73592), # Q11998807 "Hoff": (59.92500, 10.67488),
# ── Lilleaker-linjen (l13) ─────────────────────────────────────────── "Skøyen": (59.92384, 10.68034),
"Lilleaker": (59.92074, 10.63580), # Wikipedia "Thune": (59.92186, 10.68742),
"Sollerud": (59.92104, 10.64309), # Wikipedia "Nobels gate": (59.91758, 10.69866),
"Furulund": (59.91990, 10.65013), # Wikipedia "Skarpsno": (59.91430, 10.70234),
"Ullern": (59.92429, 10.65858), # Wikipedia "Skillebekk": (59.91277, 10.71103),
"Abbediengen": (59.92517, 10.66716), # Wikipedia "Middelalderparken": (59.90639, 10.76417),
"Hoff": (59.92500, 10.67488), # Wikipedia "Oslo Hospital": (59.9032, 10.7674),
"Skøyen": (59.92384, 10.68034), # Wikipedia "Ekebergparken": (59.8977, 10.7593),
# ── Skøyen-linjen (l13) ────────────────────────────────────────────── "Jomfrubråten": (59.8883, 10.7706),
"Thune": (59.92186, 10.68742), # Wikipedia "Sportsplassen": (59.8860, 10.7736),
"Nobels gate": (59.91758, 10.69866), # Wikipedia "Holtet": (59.88151, 10.78415),
"Skarpsno": (59.91430, 10.70234), # Wikipedia "Sørli": (59.87493, 10.78709),
"Skillebekk": (59.91277, 10.71103), # Wikipedia "Kastellet": (59.87106, 10.79036),
# ── Ekeberg-linjen (l13/19) ────────────────────────────────────────── "Bråten": (59.86714, 10.79244),
"Middelalderparken": (59.90639, 10.76417), # Q99971403 "Sæter": (59.86102, 10.79870),
"Oslo Hospital": (59.9032, 10.7674), # Wikipedia "Ljabru": (59.85335, 10.80089),
"Ekebergparken": (59.8977, 10.7593), # Wikipedia "Rikshospitalet": (59.947768, 10.714716),
"Jomfrubråten": (59.8883, 10.7706), # Wikipedia "Gaustadalleen": (59.9454, 10.7172),
"Sportsplassen": (59.8860, 10.7736), # Wikipedia "Forskningsparken": (59.943513, 10.720425),
"Holtet": (59.88151, 10.78415), # Wikipedia "Universitetet Blindern": (59.9421, 10.7243),
"Sørli": (59.87493, 10.78709), # Wikipedia "John Collets plass": (59.9403, 10.7290),
"Kastellet": (59.87106, 10.79036), # Wikipedia "Ullevål sykehus": (59.9361, 10.7318),
"Bråten": (59.86714, 10.79244), # Wikipedia "Adamstuen": (59.9326, 10.7345),
"Sæter": (59.86102, 10.79870), # Wikipedia "Stensgata": (59.92957, 10.73303),
"Ljabru": (59.85335, 10.80089), # Wikipedia "Bislett": (59.92599, 10.73108),
# ── Ullevål Hageby-linjen (l17/18) ─────────────────────────────────── "Dalsbergstien": (59.92354, 10.73163),
"Rikshospitalet": (59.947768, 10.714716), # Wikipedia "Welhavens gate": (59.92131, 10.72968),
"Gaustadalleen": (59.9454, 10.7172), # Wikipedia "Frydenlund": (59.92086, 10.73317),
"Forskningsparken": (59.943513, 10.720425), # shared T-bane "Holbergs plass": (59.91876, 10.73453),
"Universitetet Blindern": (59.9421, 10.7243), # Wikipedia "Lakkegata skole": (59.92055, 10.76834),
"John Collets plass": (59.9403, 10.7290), # Wikipedia "Carl Berners plass": (59.926592, 10.778360),
"Ullevål sykehus": (59.9361, 10.7318), # Wikipedia "Sinsenkrysset": (59.93911, 10.78340),
"Adamstuen": (59.9326, 10.7345), # Wikipedia "Grefsen stasjon": (59.94167, 10.78056),
"Stensgata": (59.92957, 10.73303), # Q7607927 "Homansbyen": (59.92278, 10.72639),
"Bislett": (59.92599, 10.73108), # Q11961163
"Dalsbergstien": (59.92354, 10.73163), # Q17764618
"Welhavens gate": (59.92131, 10.72968), # Q12010485
"Frydenlund": (59.92086, 10.73317), # Q19373143
"Holbergs plass": (59.91876, 10.73453), # Q11975623
# ── Sinsen-linjen (l17) ──────────────────────────────────────────────
"Lakkegata skole": (59.92055, 10.76834), # Q11982987
"Carl Berners plass": (59.926592, 10.778360), # shared T-bane
"Sinsenkrysset": (59.93911, 10.78340), # Q19388523
"Grefsen stasjon": (59.94167, 10.78056), # Wikipedia
# ── Homansbyen-linjen (l19) ───────────────────────────────────────────
"Homansbyen": (59.92278, 10.72639), # Q5887760
} }
# Estimated trikk stop coordinates — no Wikidata P625 found.
# Derived from linear interpolation between verified neighbours,
# or placed from map/street knowledge. Max error ~150-250 m.
# To update: find Wikidata QID, fetch P625, move entry to TRIKK_STOPS_VERIFIED.
TRIKK_STOPS_ESTIMATED: dict[str, tuple[float, float]] = { TRIKK_STOPS_ESTIMATED: dict[str, tuple[float, float]] = {
# ── Sentrum ─────────────────────────────────────────────────────────── "Tinghuset": (59.9146, 10.7403),
"Tinghuset": (59.9146, 10.7403), # Ullevål Hageby-l ved Stortinget T "Torshov": (59.9332, 10.7643),
# ── GrünerløkkaTorshov-linjen ─────────────────────────────────────── "Doktor Smiths vei": (59.9503, 10.7867),
"Torshov": (59.9332, 10.7643), # interp Biermanns gate↔Sandaker "Kjelsåsalleen": (59.9641, 10.7833),
# ── Kjelsåslinjen ──────────────────────────────────────────────────── "Frogner stadion": (59.9167, 10.7038),
"Doktor Smiths vei": (59.9503, 10.7867), # interp Disen↔Kjelsås t=0.20 "Ruseløkka": (59.9120, 10.7258),
"Kjelsåsalleen": (59.9641, 10.7833), # interp Disen↔Kjelsås t=0.90 "Tullinøkka": (59.9163, 10.7349),
# ── Frogner-linjen ─────────────────────────────────────────────────── "Heimdalsgata": (59.9188, 10.7633),
"Frogner stadion": (59.9167, 10.7038), # Kirkeveien S for Vigelandsparken "Sofienberg": (59.9236, 10.7734),
# ── Vika-linjen ────────────────────────────────────────────────────── "Rosenhoff": (59.9307, 10.7800),
"Ruseløkka": (59.9120, 10.7258), # interp Solli↔Kontraskjæret "Sinsenterrassen": (59.9350, 10.7817),
# ── Ullevål Hageby-linjen ─────────────────────────────────────────────
"Tullinøkka": (59.9163, 10.7349), # interp Holbergs plass↔Tinghuset
# ── Sinsen-linjen ────────────────────────────────────────────────────
"Heimdalsgata": (59.9188, 10.7633), # interp Nybrua↔Lakkegata skole
"Sofienberg": (59.9236, 10.7734), # interp Lakkegata skole↔Carl Berners
"Rosenhoff": (59.9307, 10.7800), # interp Carl Berners↔Sinsenkrysset t=0.33
"Sinsenterrassen": (59.9350, 10.7817), # interp Carl Berners↔Sinsenkrysset t=0.67
} }
# Merged — verified takes precedence if a key appears in both (shouldn't happen).
TRIKK_STOPS: dict[str, tuple[float, float]] = { TRIKK_STOPS: dict[str, tuple[float, float]] = {
**TRIKK_STOPS_ESTIMATED, **TRIKK_STOPS_ESTIMATED,
**TRIKK_STOPS_VERIFIED, **TRIKK_STOPS_VERIFIED,
@@ -323,13 +334,12 @@ TRIKK_STOPS: dict[str, tuple[float, float]] = {
# Transit helpers # Transit helpers
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
_WALK_SPEED_KMH = 5.0 # avg walking speed _WALK_SPEED_KMH = 5.0
def _nearest_stop( def _nearest_stop(
lat: float, lng: float, stops: dict[str, tuple[float, float]] lat: float, lng: float, stops: dict[str, tuple[float, float]]
) -> tuple[str, float]: ) -> tuple[str, float]:
"""Return (stop_name, distance_km) for the nearest stop in a dict."""
best_name, best_dist = "", float("inf") best_name, best_dist = "", float("inf")
for name, (slat, slng) in stops.items(): for name, (slat, slng) in stops.items():
d = _distance_km(lat, lng, slat, slng) d = _distance_km(lat, lng, slat, slng)
@@ -341,17 +351,7 @@ def _nearest_stop(
def nearby_transit( def nearby_transit(
lat: float, lng: float, max_walk_min: float = 10.0 lat: float, lng: float, max_walk_min: float = 10.0
) -> dict[str, list[tuple[str, float]]]: ) -> dict[str, list[tuple[str, float]]]:
"""Return T-bane and trikk stops within max_walk_min minutes walk.
Returns:
{
"tbane": [("Carl Berners plass", 0.28), ...], # sorted by distance
"trikk": [("Rosenhoff", 0.19), ...],
}
All distances in km.
"""
max_km = (max_walk_min / 60.0) * _WALK_SPEED_KMH max_km = (max_walk_min / 60.0) * _WALK_SPEED_KMH
tbane = sorted( tbane = sorted(
[ [
(n, _distance_km(lat, lng, la, lo)) (n, _distance_km(lat, lng, la, lo))
@@ -408,19 +408,7 @@ def score_neighbourhood(
def score_transport(unit: EiendomUnit | None) -> float: def score_transport(unit: EiendomUnit | None) -> float:
"""Walking distance to nearest T-bane or trikk stop. Max 10. """Walking distance to nearest T-bane or trikk stop. Max 10."""
Searches ALL stops in TBANE_STOPS and TRIKK_STOPS — no manual
curation needed when adding new search areas.
Distance bands:
< 400 m → 10 pts (~5 min walk)
< 800 m → 8 pts (~10 min — stated threshold)
< 1200 m → 4 pts (~15 min)
≥ 1200 m → 0 pts
Falls back to 0 when no coordinates available.
"""
if unit is None or unit.lat is None or unit.lng is None: if unit is None or unit.lat is None or unit.lng is None:
return 0.0 return 0.0
@@ -529,7 +517,6 @@ def score_hybel(description: str) -> float:
if is_potential: if is_potential:
return 2.0 return 2.0
# Documented rental income → definitively real hybel
if "leieinntekt" in d or "skattefri" in d: if "leieinntekt" in d or "skattefri" in d:
return 12.0 return 12.0
@@ -560,17 +547,10 @@ def score_hybel(description: str) -> float:
def score_floor(ad: Any, unit: EiendomUnit | None) -> float: def score_floor(ad: Any, unit: EiendomUnit | None) -> float:
"""Floor level. Binary signal: ground floor is bad, everything else neutral. """Floor penalty. Ground floor (≤1) = -15. All other floors = 0.
Rationale: "toppleilighet i 3-etgs blokk" og "8. etg i høyblokk" er begge Rationale: floor number alone carries no signal without knowing building
topp for sin bygning. Etasjenummer alene sier ingenting om utsikt eller lys height. The only reliable signal is ground floor (innsyn, støy, lys).
uten å kjenne byggets totale høyde. Eneste reelle signal er 1. etg (innsyn,
støy, lys) vs ikke-1. etg.
Scores:
ground floor (≤1) → -15 (hard penalty: innsyn, støy, lys)
unknown → 0 (no data → no penalty)
above ground → 0 (etasjenummer uten bygghøyde = ingen info)
""" """
floor: int | None = None floor: int | None = None
@@ -670,22 +650,43 @@ def score_renovation(description: str) -> float:
def score_risk(ad: Any, unit: EiendomUnit | None) -> float: def score_risk(ad: Any, unit: EiendomUnit | None) -> float:
"""Risk penalty. Returns 0 or negative.""" """Risk penalties. Returns 0 or negative.
Triggers:
No Eiendom.no data → -8 (can't price-check)
Shared debt > 500k/unit → -12 (hard red flag — total cost misleading)
Shared debt 200-500k/unit → -6 (notable, investigate)
Common costs > 8 000/mnd → -10 (structural sameie problem)
Common costs 6-8 000/mnd → -5
Days on market > 120 → -15 (something is wrong)
Days on market 60-120 → -5 (worth investigating)
"usikker" in description → -5
"""
penalty = 0.0 penalty = 0.0
if unit is None: if unit is None:
penalty -= 8.0 penalty -= 8.0
# Shared debt — new: per-unit fellesgjeld signal
shared_debt = getattr(ad, "shared_debt", None)
if shared_debt is not None:
if shared_debt > _SHARED_DEBT_HIGH:
penalty -= 12.0
logger.debug("High shared debt: %d kr → -12", shared_debt)
elif shared_debt > _SHARED_DEBT_MEDIUM:
penalty -= 6.0
logger.debug("Medium shared debt: %d kr → -6", shared_debt)
fk = ad.common_costs or 0 fk = ad.common_costs or 0
if fk > 8000: if fk > _COMMON_COST_HIGH:
penalty -= 10.0 penalty -= 10.0
elif fk > 6000: elif fk > _COMMON_COST_MEDIUM:
penalty -= 5.0 penalty -= 5.0
if unit and unit.days_on_market: if unit and unit.days_on_market:
if unit.days_on_market > 120: if unit.days_on_market > _DAYS_STALE:
penalty -= 10.0 penalty -= 15.0 # was -10
elif unit.days_on_market > 60: elif unit.days_on_market > _DAYS_SLOW:
penalty -= 5.0 penalty -= 5.0
if "usikker" in (ad.listing_description or "").lower(): if "usikker" in (ad.listing_description or "").lower():
@@ -702,15 +703,13 @@ def score_risk(ad: Any, unit: EiendomUnit | None) -> float:
def score_ad(ad: Any, unit: EiendomUnit | None, similar_units: list[SimilarUnit]) -> dict[str, Any]: def score_ad(ad: Any, unit: EiendomUnit | None, similar_units: list[SimilarUnit]) -> dict[str, Any]:
description = ad.listing_description or "" description = ad.listing_description or ""
# Collect nearby transit for informational output (not used in scoring)
transit_nearby: dict | None = None transit_nearby: dict | None = None
if unit and unit.lat and unit.lng: if unit and unit.lat and unit.lng:
transit_nearby = nearby_transit(unit.lat, unit.lng, max_walk_min=10.0) transit_nearby = nearby_transit(unit.lat, unit.lng, max_walk_min=10.0)
if transit_nearby["tbane"] or transit_nearby["trikk"]: if transit_nearby["tbane"] or transit_nearby["trikk"]:
logger.debug("Nearby transit: %s", transit_nearby) logger.debug("Nearby transit: %s", transit_nearby)
scores: dict[str, Any] = { raw: dict[str, float] = {
"floor": score_floor(ad, unit),
"neighbourhood": score_neighbourhood(unit, ad.address, getattr(ad, "district", None)), "neighbourhood": score_neighbourhood(unit, ad.address, getattr(ad, "district", None)),
"view_and_quiet": score_view_and_quiet(ad, description), "view_and_quiet": score_view_and_quiet(ad, description),
"area_and_layout": score_area_and_layout(ad, unit), "area_and_layout": score_area_and_layout(ad, unit),
@@ -724,14 +723,22 @@ def score_ad(ad: Any, unit: EiendomUnit | None, similar_units: list[SimilarUnit]
"building_health": score_building_health(ad, description), "building_health": score_building_health(ad, description),
"green_areas": score_green_areas(description), "green_areas": score_green_areas(description),
"renovation": score_renovation(description), "renovation": score_renovation(description),
}
penalties: dict[str, float] = {
"floor": score_floor(ad, unit),
"risk": score_risk(ad, unit), "risk": score_risk(ad, unit),
} }
# Numeric-only sum for total weighted_bonus = sum(
numeric = {k: v for k, v in scores.items() if isinstance(v, (int, float))} (raw[d] / DIMENSION_MAX[d]) * DIMENSION_WEIGHTS[d] * 100.0 for d in DIMENSION_WEIGHTS
scores["total"] = float(_clamp(sum(numeric.values()), 0.0, 100.0)) )
total_penalty = sum(penalties.values())
total = float(_clamp(weighted_bonus + total_penalty, 0.0, 100.0))
scores: dict[str, Any] = {**raw, **penalties, "total": total}
# Attach nearby transit as metadata (non-scoring)
if transit_nearby is not None: if transit_nearby is not None:
scores["nearby_transit"] = transit_nearby scores["nearby_transit"] = transit_nearby
+41 -6
View File
@@ -8,11 +8,13 @@ from .analysis import analyze_search as run_analysis_search
from .cache import ( from .cache import (
get_eiendom_unit as get_cached_eiendom_unit, get_eiendom_unit as get_cached_eiendom_unit,
get_finn_ad, get_finn_ad,
get_similar_units as get_cached_similar_units,
init_db, init_db,
save_eiendom_unit, save_eiendom_unit,
save_finn_ad, save_finn_ad,
save_similar_units,
) )
from .config import FINN_CACHE_PATH from .config import EIENDOM_NO_CACHE_TTL_HOURS, FINN_CACHE_PATH
from .eiendom_no import ( from .eiendom_no import (
build_unit_vector, build_unit_vector,
decode_unit_vector, decode_unit_vector,
@@ -89,14 +91,47 @@ async def get_or_fetch_eiendom_unit(
async def get_or_fetch_similar_units( async def get_or_fetch_similar_units(
unit_code: str, listing_status: str = "RECENTLY_SOLD", force_refresh: bool = False unit_code: str, listing_status: str = "RECENTLY_SOLD", force_refresh: bool = False
) -> list[SimilarUnit]: ) -> list[SimilarUnit]:
"""Get similar units (comps) from cache or fetch fresh.""" """Get similar units (comps) from cache or fetch fresh.
# Similar units don't have a separate cache table; fetch fresh each time per PRD
# (or cache them in search_runs if doing diff detection) Fetches the unit first to get the unit_vector, then checks cache for similar
unit = await get_or_fetch_eiendom_unit(unit_code) units by (unit_code, listing_status). On cache miss, fetches fresh from
Eiendom.no and saves to cache.
"""
conn = init_db(FINN_CACHE_PATH)
# First, ensure we have the unit to build its vector
unit = await get_or_fetch_eiendom_unit(unit_code, force_refresh=force_refresh)
if unit is None: if unit is None:
return [] return []
# Check cache for similar units (unless force_refresh)
if not force_refresh:
cached_similar = get_cached_similar_units(
conn, unit_code, listing_status, ttl_hours=EIENDOM_NO_CACHE_TTL_HOURS
)
if cached_similar:
logger.debug(
"Using cached similar units for %s (status=%s)",
unit_code,
listing_status,
)
return cached_similar
# Cache miss or force_refresh: fetch fresh
vector = build_unit_vector(unit) vector = build_unit_vector(unit)
return await get_similar_units(vector, listing_status=listing_status) similar = await get_similar_units(vector, listing_status=listing_status)
# Save to cache
if similar:
save_similar_units(conn, unit_code, listing_status, similar)
logger.debug(
"Cached %d similar units for %s (status=%s)",
len(similar),
unit_code,
listing_status,
)
return similar
async def get_unit_images(unit_code: str, force_refresh: bool = False) -> dict[str, Any]: async def get_unit_images(unit_code: str, force_refresh: bool = False) -> dict[str, Any]: