diff --git a/Dockerfile b/Dockerfile
index 2163d8d..0379107 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -10,8 +10,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libxslt1-dev \
&& rm -rf /var/lib/apt/lists/*
-# Copy dependency files
+# Copy source files
COPY pyproject.toml ./
+COPY finn_eiendom ./finn_eiendom
+COPY README.md ./
# Create virtual environment and install dependencies
RUN python -m venv /venv && \
diff --git a/fetch_trikk_coords.py b/fetch_trikk_coords.py
new file mode 100644
index 0000000..d17e901
--- /dev/null
+++ b/fetch_trikk_coords.py
@@ -0,0 +1,238 @@
+"""
+fetch_trikk_coords.py
+Henter P625-koordinater fra Wikidata for alle Oslo-trikkeholdeplasser
+via Wikipedia sitelinks. Kjør: python3 fetch_trikk_coords.py
+Krever: pip install requests beautifulsoup4
+"""
+
+import requests
+from urllib.parse import unquote, urlparse, parse_qs
+from bs4 import BeautifulSoup
+
+# Alle Wikipedia-lenker fra trikkelinjene (inkl. redlinks)
+HTML = """
+Majorstuen
+Bogstadveien
+Rosenborg
+Briskeby
+Riddervolds plass
+Inkognitogata
+Nationaltheatret
+Øvre Slottsgate
+Dronningens gate
+Jernbanetorget
+Storgata
+Nybrua
+Schous plass
+Olaf Ryes plass
+Birkelunden
+Biermanns gate
+Torshov
+Sandaker senter
+Grefsenveien
+Storo
+Disen tram stop
+Doktor Smiths vei
+Glads vei
+Grefsenplatået
+Grefsen stadion
+Kjelsåsalleen
+Kjelsås
+Frogner stadion
+Vigelandsparken
+Frogner plass
+Elisenberg
+Lille Frogner allé
+Niels Juels gate
+Solli
+Ruseløkka
+Akerbrygge
+Kontraskjæret
+Middelalderparken
+Bislett
+Dalsbergstien
+Welhavens gate
+Frydenlund
+Holbergs plass
+Tullinøkka
+Tinghuset
+Stortorvet
+Heimdalsgata
+Lakkegata skole
+Sofienberg
+Carl Berners plass
+Rosenhoff
+Sinsenterrassen
+Sinsenkrysset
+Grefsen stasjon
+Homansbyen
+"""
+
+# Redlinks har ingen Wikipedia-side — søk direkte på Wikidata label
+WIKIDATA_DIRECT = {
+ # Redlinks — ingen Wikipedia-side
+ "Briskeby_tram_stop": "Q11962293",
+ "Riddervolds_plass_tram_stop": "Q19386557",
+ "Grefsenveien_tram_stop": "Q17778424",
+ "Doktor_Smiths_vei_tram_stop": None,
+ "Glads_vei_tram_stop": "Q17776371",
+ "Grefsenplatået_tram_stop": "Q11972531",
+ "Grefsen_stadion_tram_stop": "Q11972525",
+ "Kjelsåsalleen_tram_stop": None,
+ "Vigelandsparken_tram_stop": "Q19398059",
+ "Frogner_plass_tram_stop": "Q11970372",
+ "Lille_Frogner_allé_tram_stop": "Q19379373",
+ "Niels_Juels_gate_tram_stop": "Q11991378",
+ "Ruseløkka_tram_stop": None,
+ "Bislett_tram_stop": "Q11961163",
+ "Dalsbergstien_tram_stop": "Q17764618",
+ "Welhavens_gate_tram_stop": "Q12010485",
+ "Frydenlund_tram_stop": "Q19373143",
+ "Holbergs_plass_tram_stop": "Q11975623",
+ "Tullinøkka_(station)": None,
+ "Heimdalsgata_tram_stop": None,
+ "Lakkegata_skole_tram_stop": "Q11982987",
+ "Sofienberg_tram_stop": None,
+ "Rosenhoff_tram_stop": None,
+ "Sinsenterrassen_tram_stop": None,
+ "Torshov_(station)": None,
+ "Sandaker_senter_(station)": None,
+ "Frogner_stadion_tram_stop": None,
+ # Wikipedia-redirect-sider — QID til redirect-målet
+ "Stortorvet_(station)": "Q7620354", # → Stortorvet_tram_stop
+ "Stortinget_(station)": "Q188712", # → Stortinget T-bane (Tinghuset tram er ved siden)
+ "Sinsen_(station)": "Q19388523", # → Sinsenkrysset tram stop
+ "Biermanns_gate_(station)": "Q19363042", # → Biermanns gate tram stop
+ "Carl_Berners_plass_(station)": "Q890592", # → Carl Berners plass metro+tram
+ "Majorstuen_(station)": "Q686510", # → Majorstuen T-bane
+ "Grefsen_Station": "Q728583", # → Grefsen stasjon jernbane
+ "Kjelsås_tram_stop": "Q11981146", # → Kjelsås
+ "Storo_(station)": "Q932133", # → Storo T-bane
+ "Schous_plass_tram_stop": "Q12006491", # → Schous plass
+ "Jernbanetorget_(station)": "Q841481", # → Jernbanetorget T+tram
+ "Sandaker_senter_(station)": "Q12008217", # → Sandaker senter
+}
+
+
+def extract_titles(html):
+ soup = BeautifulSoup(html, "html.parser")
+ titles = []
+ for a in soup.find_all("a"):
+ href = a.get("href", "")
+ label = a.get_text(strip=True)
+ if href.startswith("/wiki/"):
+ title = unquote(href.removeprefix("/wiki/"))
+ titles.append((title, label))
+ elif href.startswith("/w/index.php"):
+ qs = parse_qs(urlparse(href).query)
+ t = qs.get("title", [None])[0]
+ if t:
+ titles.append((unquote(t), label))
+ seen = {}
+ for title, label in titles:
+ seen.setdefault(title, label)
+ return seen # {wiki_title: display_label}
+
+
+HEADERS = {"User-Agent": "finn-mcp-trikk-coords/1.0 (contact: ole@example.com)"}
+
+
+def get_qids_from_wikipedia(titles):
+ """Wikipedia API: article titles → Wikidata QIDs."""
+ url = "https://en.wikipedia.org/w/api.php"
+ result = {}
+ batch = [t for t in titles if t not in WIKIDATA_DIRECT]
+ for i in range(0, len(batch), 50):
+ chunk = batch[i : i + 50]
+ r = requests.get(
+ url,
+ params={
+ "action": "query",
+ "format": "json",
+ "redirects": "1",
+ "prop": "pageprops",
+ "ppprop": "wikibase_item",
+ "titles": "|".join(chunk),
+ },
+ headers=HEADERS,
+ timeout=30,
+ )
+ print(f"Status: {r.status_code}, len: {len(r.text)}, preview: {r.text[:200]!r}")
+ for page in r.json()["query"]["pages"].values():
+ t = page.get("title", "").replace(" ", "_")
+ qid = page.get("pageprops", {}).get("wikibase_item")
+ if qid:
+ result[t] = qid
+ # Merge known QIDs
+ for t, qid in WIKIDATA_DIRECT.items():
+ if qid:
+ result[t] = qid
+ return result
+
+
+def get_p625(qids):
+ """Wikidata API: QIDs → P625 coordinates."""
+ url = "https://www.wikidata.org/w/api.php"
+ result = {}
+ unique = list(set(qids.values()))
+ for i in range(0, len(unique), 50):
+ chunk = unique[i : i + 50]
+ r = requests.get(
+ url,
+ params={
+ "action": "wbgetentities",
+ "format": "json",
+ "ids": "|".join(chunk),
+ "props": "claims|labels",
+ "languages": "en|nb",
+ },
+ headers=HEADERS,
+ timeout=30,
+ )
+ for qid, entity in r.json()["entities"].items():
+ label = (
+ entity.get("labels", {}).get("en", {}).get("value")
+ or entity.get("labels", {}).get("nb", {}).get("value")
+ or qid
+ )
+ p625 = entity.get("claims", {}).get("P625", [])
+ coords = None
+ if p625:
+ v = p625[0]["mainsnak"]["datavalue"]["value"]
+ coords = (round(v["latitude"], 5), round(v["longitude"], 5))
+ result[qid] = {"label": label, "coords": coords}
+ return result
+
+
+def main():
+ title_to_label = extract_titles(HTML)
+ print(f"Extracted {len(title_to_label)} unique titles\n")
+
+ title_to_qid = get_qids_from_wikipedia(title_to_label)
+ print(f"Resolved {len(title_to_qid)} QIDs\n")
+
+ qid_to_data = get_p625(title_to_qid)
+
+ print(f"{'Wiki title':<45} {'QID':<12} {'Label':<35} {'Coords'}")
+ print("-" * 120)
+ no_coords = []
+ for title in sorted(title_to_label):
+ qid = title_to_qid.get(title, "—")
+ if qid == "—":
+ coords = "NO QID"
+ label = "?"
+ else:
+ d = qid_to_data.get(qid, {})
+ coords = str(d.get("coords") or "NO P625")
+ label = d.get("label", "?")
+ print(f"{title:<45} {qid:<12} {label:<35} {coords}")
+ if "NO" in str(coords):
+ no_coords.append(title)
+
+ print(f"\n\nMissing coords: {len(no_coords)}")
+ for t in no_coords:
+ print(f" {t}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/finn_eiendom/analysis.py b/finn_eiendom/analysis.py
index 9a62449..9a7a5bd 100644
--- a/finn_eiendom/analysis.py
+++ b/finn_eiendom/analysis.py
@@ -19,6 +19,12 @@ def _normalize_description(text: str | None) -> str:
return text.lower() if text else ""
+def _is_resale_listing(url: str) -> bool:
+ """True for ordinary resale ads. Project / new-build ads use different URL
+ paths that fetch_ad_details cannot resolve (it builds a /homes/ URL)."""
+ return "/realestate/homes/" in url
+
+
def _build_ad_summary(
ad: FinnAd,
enriched: EiendomUnit | None,
@@ -95,12 +101,17 @@ async def analyze_ad(
if enriched is not None:
cache.save_eiendom_unit(conn, enriched)
- if enriched and enriched.unit_vector:
- similar_units = cache.get_similar_units(conn, enriched.unit_code, "RECENTLY_SOLD")
- if not similar_units:
- similar_units = await eiendom_no.get_similar_units(enriched.unit_vector)
- if similar_units:
- cache.save_similar_units(conn, enriched.unit_code, "RECENTLY_SOLD", similar_units)
+ if enriched:
+ # EiendomUnit.unit_vector is NOT populated by get_unit / enrich -- the
+ # field comes back None. Reading enriched.unit_vector directly leaves
+ # this block dead and similar_units permanently empty. Build the vector
+ # from the unit fields instead (fall back to the field if a future
+ # endpoint ever populates it).
+ vector = enriched.unit_vector or eiendom_no.build_unit_vector(enriched)
+ if vector:
+ # No dedicated cache table for similar units (per PRD) -- fetch
+ # fresh each call, consistent with service.get_or_fetch_similar_units.
+ similar_units = await eiendom_no.get_similar_units(vector)
scores = scoring.score_ad(finn_ad, enriched, similar_units)
categories = scoring.classify_ad(scores)
@@ -120,6 +131,26 @@ async def analyze_ad(
return result
+async def _analyze_card(card, conn, *, include_eiendom_no: bool, client) -> dict:
+ """Fetch details + enrich a single search card. Raises on unrecoverable
+ errors; the caller is responsible for catching and skipping."""
+ finn_ad = cache.get_finn_ad(conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_HOURS)
+ if finn_ad is None:
+ finn_ad = await ad_module.fetch_ad_details(card.finnkode, client=client)
+
+ unit_code = None
+ if include_eiendom_no:
+ try:
+ matched_unit = await eiendom_no.search_unit_from_finn_url(card.url)
+ unit_code = matched_unit.unit_code if matched_unit else None
+ except Exception as exc:
+ # A failed unit resolution is non-fatal -- proceed without enrichment.
+ logger.warning("Eiendom.no unit search failed for %s: %s", card.finnkode, exc)
+ unit_code = None
+
+ return await analyze_ad(finn_ad, unit_code=unit_code)
+
+
async def analyze_search(
search_url: str,
max_pages: int = FINN_MAX_SEARCH_PAGES,
@@ -139,21 +170,28 @@ async def analyze_search(
)
results = []
enriched_count = 0
+ skipped_count = 0
if fetch_details:
for card in cards[:detail_limit]:
- finn_ad = cache.get_finn_ad(conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_HOURS)
- if finn_ad is None:
- finn_ad = await ad_module.fetch_ad_details(card.finnkode, client=client)
- unit_code = None
- if include_eiendom_no:
- try:
- matched_unit = await eiendom_no.search_unit_from_finn_url(card.url)
- unit_code = matched_unit.unit_code if matched_unit else None
- except Exception as exc:
- logger.warning("Eiendom.no unit search failed: %s", exc)
- unit_code = None
- result = await analyze_ad(finn_ad, unit_code=unit_code)
+ # Project / new-build ads are not resale listings and fetch_ad_details
+ # cannot resolve them -- skip up front rather than 404 mid-run.
+ if not _is_resale_listing(card.url):
+ logger.info("Skipping non-resale card %s (%s)", card.finnkode, card.url)
+ skipped_count += 1
+ continue
+
+ # One bad card (stale finnkode, removed ad, transient network error)
+ # must not abort the whole search -- isolate each card.
+ try:
+ result = await _analyze_card(
+ card, conn, include_eiendom_no=include_eiendom_no, client=client
+ )
+ except Exception as exc:
+ logger.warning("Skipping card %s: %s", card.finnkode, exc)
+ skipped_count += 1
+ continue
+
if result.get("eiendom_unit"):
enriched_count += 1
results.append(result)
@@ -166,6 +204,7 @@ async def analyze_search(
"summary": {
"total_listings": len(cards),
"analyzed_listings": len(results),
+ "skipped_listings": skipped_count,
"eiendom_enriched": enriched_count,
},
}
diff --git a/finn_eiendom/mcp_server.py b/finn_eiendom/mcp_server.py
index be38bad..d3c1389 100644
--- a/finn_eiendom/mcp_server.py
+++ b/finn_eiendom/mcp_server.py
@@ -75,7 +75,7 @@ async def finn_analyze_search(
detail_limit=detail_limit,
include_eiendom_no=include_eiendom_no,
)
- return json.dumps(result)
+ return json.dumps(result, default=str)
except Exception as e:
logger.error(f"Error analyzing search: {e}")
return json.dumps({"error": True, "message": str(e)})
@@ -164,7 +164,7 @@ async def finn_get_similar_units(unit_vector: str, listing_status: str = "RECENT
"""Fetch similar units from Eiendom.no."""
try:
units = await get_similar_units(unit_vector, listing_status)
- return json.dumps([unit.model_dump() for unit in units])
+ return json.dumps([unit.model_dump() for unit in units], default=str)
except Exception as e:
logger.error(f"Error fetching similar units: {e}")
return json.dumps({"error": True, "message": str(e)})
@@ -222,7 +222,7 @@ async def finn_analyze_ad(
include_eiendom_no=include_eiendom_no,
include_similar_units=include_similar_units,
)
- return render_ad(result.get("ad", {}), "json")
+ return json.dumps(result, default=str)
except Exception as e:
logger.error(f"Error analyzing ad {finnkode}: {e}")
return json.dumps({"error": True, "message": str(e)})
diff --git a/finn_eiendom/scoring.py b/finn_eiendom/scoring.py
index 64627f9..01ed412 100644
--- a/finn_eiendom/scoring.py
+++ b/finn_eiendom/scoring.py
@@ -1,146 +1,771 @@
-"""Scoring engine for FINN listings enriched with Eiendom.no data."""
+"""Scoring engine tuned for Ole & partner's apartment search criteria.
+
+Priority hierarchy (stated):
+ MUST : balcony, ≥80 m² main unit, 2-3 bedrooms, T-bane/trikk access
+ HIGH : preferred neighbourhoods, view (sea/panorama > rooftop > general),
+ quiet setting, hybel with own bath + kitchen
+ MEDIUM : sameie economy, green areas / walking terrain, price vs market
+ BONUS : renovation upside (acceptable, not required)
+
+Dimension caps (non-risk total max ≈ 105, clamped to 100):
+ floor -15..0 – ground floor penalty only; etasje alene uten bygghøyde = ingen info
+ neighbourhood 25 – preferred area anchors, distance-based
+ view_and_quiet 20 – view quality + quiet setting; 0 if no balcony
+ area_and_layout 15 – sqm + bedroom count; hard penalty < 80 m²
+ hybel 12 – hybel with own bath + kitchen
+ transport 10 – walking distance to T-bane / trikk
+ economy 8 – listing price vs Eiendom.no estimate
+ comparable_sales 8 – listing kr/m² vs median sold kr/m² of comps
+ building_health 7 – sameie/borettslag economy signals
+ green_areas 5 – parks, tur, marka keywords
+ renovation 3 – minor bonus (they accept renovation objects)
+ risk 0..-30 – stale listing, high costs, missing data
+"""
import logging
+import math
from typing import Any
from .models import EiendomUnit, SimilarUnit
logger = logging.getLogger(__name__)
-
-def _clamp(value: float, min_value: float, max_value: float) -> float:
- return max(min_value, min(max_value, value))
+# ---------------------------------------------------------------------------
+# Geometry helpers
+# ---------------------------------------------------------------------------
-def score_market_position(unit: EiendomUnit | None) -> float:
- if unit is None or unit.estimated_selling_price is None or unit.listing_price is None:
- return 0.0
- ratio = unit.listing_price / unit.estimated_selling_price
- if ratio <= 0.9:
- return 20.0
- if ratio <= 1.0:
- return 16.0 + (1.0 - ratio) * 40.0
- if ratio <= 1.1:
- return 12.0 - (ratio - 1.0) * 40.0
- return 5.0
+def _distance_km(lat1: float, lng1: float, lat2: float, lng2: float) -> float:
+ """Flat-earth approximation — accurate enough within Oslo (~59.9°N).
+ 1° lat ≈ 111 km, 1° lng ≈ 56 km at this latitude.
+ """
+ dlat = (lat2 - lat1) * 111.0
+ dlng = (lng2 - lng1) * 56.0
+ return math.sqrt(dlat**2 + dlng**2)
-def score_economy(ad: Any, unit: EiendomUnit | None) -> float:
- if ad.total_price is None:
- return 0.0
- if unit and unit.estimated_selling_price:
- ratio = ad.total_price / unit.estimated_selling_price
- if ratio <= 0.95:
+def _clamp(value: float, lo: float, hi: float) -> float:
+ return max(lo, min(hi, value))
+
+
+def _median(values: list[float]) -> float:
+ s = sorted(values)
+ mid = len(s) // 2
+ return s[mid] if len(s) % 2 else (s[mid - 1] + s[mid]) / 2.0
+
+
+# ---------------------------------------------------------------------------
+# Preferred neighbourhood anchors
+# ---------------------------------------------------------------------------
+
+_PREFERRED_ANCHORS: list[tuple[str, float, float]] = [
+ # (label, lat, lng) — label used only for debug logging
+ ("Grünerløkka", 59.9240, 10.7573),
+ ("Torshov", 59.9340, 10.7620),
+ ("Rodeløkka", 59.9315, 10.7660),
+ ("Kampen", 59.9125, 10.7760),
+ ("Sagene", 59.9400, 10.7590),
+ ("Nydalen", 59.9520, 10.7540),
+ ("Storo", 59.9450, 10.7670),
+ ("Grefsen", 59.9580, 10.7720),
+ ("Fagerborg", 59.9280, 10.7300),
+ ("St. Hans Haugen", 59.9300, 10.7400),
+ ("Ullevål", 59.9400, 10.7270),
+ ("Majorstua", 59.9210, 10.7170),
+ ("Frogner", 59.9160, 10.7150),
+ ("Løren", 59.9310, 10.7960),
+ ("Torshovdalen", 59.9295, 10.7630),
+ ("Rosenhoff", 59.9255, 10.7775),
+]
+
+
+# ---------------------------------------------------------------------------
+# Transit network — all T-bane and trikk stops.
+#
+# TBANE_STOPS: exact coordinates from Wikipedia DMS data (all 101 stations).
+# TRIKK_STOPS: estimated coordinates (Wikipedia has no trikk coords).
+#
+# To extend search to new areas: no changes needed — all stops are already
+# here. score_transport automatically finds the nearest stop for any address.
+# ---------------------------------------------------------------------------
+
+TBANE_STOPS: dict[str, tuple[float, float]] = {
+ # All 101 stations — Wikipedia DMS converted to decimal degrees
+ "Ammerud": (59.957922, 10.871165),
+ "Avløs": (59.913859, 10.552926),
+ "Bekkestua": (59.918097, 10.588031),
+ "Berg": (59.951142, 10.744801),
+ "Bergkrystallen": (59.867091, 10.821206),
+ "Besserud": (59.957760, 10.673092),
+ "Bjørnsletta": (59.926902, 10.635458),
+ "Blindern": (59.940052, 10.716262),
+ "Bogerud": (59.875833, 10.841944),
+ "Borgen": (59.934548, 10.696000),
+ "Brattlikollen": (59.888076, 10.801191),
+ "Brynseng": (59.909169, 10.811834),
+ "Bøler": (59.884167, 10.845278),
+ "Carl Berners plass": (59.926592, 10.778360),
+ "Eiksmarka": (59.946431, 10.622320),
+ "Ekraveien": (59.950836, 10.635822),
+ "Ellingsrudåsen": (59.936311, 10.916634),
+ "Ensjø": (59.913364, 10.786986),
+ "Forskningsparken": (59.943513, 10.720425),
+ "Frognerseteren": (59.979018, 10.675857),
+ "Frøen": (59.934167, 10.709167),
+ "Furuset": (59.941578, 10.897247),
+ "Gaustad": (59.945625, 10.709814),
+ "Gjettum": (59.906221, 10.527155),
+ "Gjønnes": (59.918097, 10.579877),
+ "Godlia": (59.908523, 10.835352),
+ "Grorud": (59.961413, 10.881701),
+ "Grønland": (59.912912, 10.759563),
+ "Gråkammen": (59.954838, 10.701842),
+ "Gulleråsen": (59.955526, 10.696521),
+ "Hasle": (59.925302, 10.794454),
+ "Haslum": (59.915021, 10.563183),
+ "Hauger": (59.910957, 10.510713),
+ "Haugerud": (59.922592, 10.855350),
+ "Hellerud": (59.910079, 10.829953),
+ "Helsfyr": (59.911514, 10.803680),
+ "Holmen": (59.946296, 10.666609),
+ "Holmenkollen": (59.960489, 10.662446),
+ "Holstein": (59.960403, 10.740552),
+ "Hovseter": (59.946328, 10.654694),
+ "Høyenhall": (59.905769, 10.819860),
+ "Jar": (59.926592, 10.621762),
+ "Jernbanetorget": (59.912116, 10.751211),
+ "Kalbakken": (59.954553, 10.866750),
+ "Karlsrud": (59.880453, 10.805225),
+ "Kolsås": (59.914416, 10.501366),
+ "Kringsjå": (59.963690, 10.734930),
+ "Lambertseter": (59.873289, 10.810440),
+ "Lijordet": (59.940901, 10.616559),
+ "Lillevann": (59.980481, 10.653037),
+ "Lindeberg": (59.932979, 10.882087),
+ "Linderud": (59.940976, 10.839214),
+ "Løren": (59.929972, 10.790806),
+ "Majorstuen": (59.929904, 10.714931),
+ "Makrellbekken": (59.941957, 10.673845),
+ "Manglerud": (59.897957, 10.812435),
+ "Midtstuen": (59.961299, 10.682911),
+ "Montebello": (59.936806, 10.670471),
+ "Mortensrud": (59.849083, 10.828657),
+ "Munkelia": (59.868914, 10.812500),
+ "Nationaltheatret": (59.915045, 10.733039),
+ "Nydalen": (59.948864, 10.765250),
+ "Oppsal": (59.892866, 10.840201),
+ "Ringstabekk": (59.916182, 10.593696),
+ "Ris": (59.948069, 10.705147),
+ "Risløkka": (59.932355, 10.822713),
+ "Rommen": (59.962127, 10.908968),
+ "Romsås": (59.962272, 10.890777),
+ "Ryen": (59.895807, 10.805617),
+ "Røa": (59.946791, 10.643874),
+ "Rødtvet": (59.951416, 10.859535),
+ "Sinsen": (59.938085, 10.781343),
+ "Skogen": (59.975246, 10.647415),
+ "Skullerud": (59.866754, 10.839171),
+ "Skødalen": (59.961787, 10.690789),
+ "Skøyenåsen": (59.898866, 10.836516),
+ "Slemdal": (59.949896, 10.695662),
+ "Smestad": (59.937315, 10.683609),
+ "Sognsvann": (59.967127, 10.733943),
+ "Steinerud": (59.939083, 10.704345),
+ "Storo": (59.944545, 10.778768),
+ "Stortinget": (59.913047, 10.741469),
+ "Stovner": (59.962616, 10.923414),
+ "Trosterud": (59.927152, 10.864041),
+ "Tveita": (59.914354, 10.841961),
+ "Tøyen": (59.915214, 10.774670),
+ "Tåsen": (59.953270, 10.752439),
+ "Ullernåsen": (59.930635, 10.654796),
+ "Ullevål stadion": (59.946629, 10.732226),
+ "Ulsrud": (59.889970, 10.849428),
+ "Veitvet": (59.944700, 10.847304),
+ "Vestli": (59.972324, 10.929337),
+ "Vettakollen": (59.959913, 10.695705),
+ "Vinderen": (59.942803, 10.704761),
+ "Voksenkollen": (59.980076, 10.665193),
+ "Voksenlia": (59.966937, 10.655082),
+ "Vollebekk": (59.935865, 10.831039),
+ "Åsjordet": (59.928764, 10.646889),
+ "Økern": (59.928592, 10.804152),
+ "Østerås": (59.939445, 10.608587),
+ "Østhorn": (59.956944, 10.749779),
+}
+
+# Trikk stops — estimated coordinates (Wikipedia has no trikk coords).
+# Grouped by line corridor for readability.
+# Verified trikk stop coordinates — sourced from Wikidata P625, Wikipedia
+# DMS infoboxes, or OpenStreetMap. Keys match display names used in scoring.
+# Source tag format: Wikidata QID | "shared T-bane" | "OSM node " | "Wikipedia"
+TRIKK_STOPS_VERIFIED: dict[str, tuple[float, float]] = {
+ # ── Briskeby-linjen (l11/19) ─────────────────────────────────────────
+ "Majorstuen": (59.929904, 10.714931), # shared T-bane
+ "Bogstadveien": (59.92611, 10.72167), # Q19372022
+ "Rosenborg": (59.92417, 10.72389), # Q7899658
+ "Briskeby": (59.92048, 10.71767), # Q11962293
+ "Riddervolds plass": (59.91896, 10.72026), # Q19386557
+ "Inkognitogata": (59.91565, 10.72114), # Q11977313
+ "Nationaltheatret": (59.91504, 10.73304), # shared T-bane
+ # ── Sentrum (shared l11/12/13/17/18/19) ──────────────────────────────
+ "Øvre Slottsgate": (59.9118, 10.7417), # Q31079249
+ "Dronningens gate": (59.91053, 10.74697), # Q29828354
+ "Jernbanetorget": (59.912116, 10.751211), # shared T-bane
+ "Storgata": (59.91396, 10.75141), # Q109484341
+ "Nybrua": (59.91707, 10.75834), # Q104867506
+ "Stortorvet": (59.91310, 10.74530), # Q7620354
+ "Bjørvika": (59.90806, 10.75639), # Wikipedia
+ # ── Grünerløkka–Torshov-linjen (l11/12/18) ───────────────────────────
+ "Schous plass": (59.92081, 10.75932), # Q12006491-area / Wikipedia
+ "Olaf Ryes plass": (59.9231, 10.7592), # Q4993079
+ "Birkelunden": (59.9271, 10.7601), # Q4916412
+ "Biermanns gate": (59.93028, 10.76104), # Wikipedia
+ "Sandaker senter": (59.93889, 10.76861), # Wikipedia
+ "Grefsenveien": (59.94278, 10.77344), # Q17778424
+ "Storo": (59.944545, 10.778768), # shared T-bane
+ # ── Kjelsåslinjen (l11/12) ───────────────────────────────────────────
+ "Disen": (59.94627, 10.78729), # Q11965753
+ "Glads vei": (59.95235, 10.78533), # Q17776371
+ "Grefsenplatået": (59.9560, 10.78573), # Q11972531
+ "Grefsen stadion": (59.96008, 10.78475), # Q11972525
+ "Kjelsås": (59.96611, 10.78278), # Wikipedia
+ # ── Frogner-linjen (l12) ─────────────────────────────────────────────
+ "Vigelandsparken": (59.92457, 10.70815), # Q19398059
+ "Frogner plass": (59.92255, 10.70491), # Q11970372 / OSM node 30560564
+ "Elisenberg": (59.91944, 10.70861), # Q5361695
+ "Lille Frogner allé": (59.9180, 10.7120), # Q19379373
+ "Niels Juels gate": (59.91634, 10.71520), # Q11991378
+ "Solli": (59.91486, 10.71906), # Q7558364
+ # ── Vika-linjen (l12) ────────────────────────────────────────────────
+ "Aker Brygge": (59.9110, 10.7299), # Q4700639
+ "Kontraskjæret": (59.91087, 10.73592), # Q11998807
+ # ── Lilleaker-linjen (l13) ───────────────────────────────────────────
+ "Lilleaker": (59.92074, 10.63580), # Wikipedia
+ "Sollerud": (59.92104, 10.64309), # Wikipedia
+ "Furulund": (59.91990, 10.65013), # Wikipedia
+ "Ullern": (59.92429, 10.65858), # Wikipedia
+ "Abbediengen": (59.92517, 10.66716), # Wikipedia
+ "Hoff": (59.92500, 10.67488), # Wikipedia
+ "Skøyen": (59.92384, 10.68034), # Wikipedia
+ # ── Skøyen-linjen (l13) ──────────────────────────────────────────────
+ "Thune": (59.92186, 10.68742), # Wikipedia
+ "Nobels gate": (59.91758, 10.69866), # Wikipedia
+ "Skarpsno": (59.91430, 10.70234), # Wikipedia
+ "Skillebekk": (59.91277, 10.71103), # Wikipedia
+ # ── Ekeberg-linjen (l13/19) ──────────────────────────────────────────
+ "Middelalderparken": (59.90639, 10.76417), # Q99971403
+ "Oslo Hospital": (59.9032, 10.7674), # Wikipedia
+ "Ekebergparken": (59.8977, 10.7593), # Wikipedia
+ "Jomfrubråten": (59.8883, 10.7706), # Wikipedia
+ "Sportsplassen": (59.8860, 10.7736), # Wikipedia
+ "Holtet": (59.88151, 10.78415), # Wikipedia
+ "Sørli": (59.87493, 10.78709), # Wikipedia
+ "Kastellet": (59.87106, 10.79036), # Wikipedia
+ "Bråten": (59.86714, 10.79244), # Wikipedia
+ "Sæter": (59.86102, 10.79870), # Wikipedia
+ "Ljabru": (59.85335, 10.80089), # Wikipedia
+ # ── Ullevål Hageby-linjen (l17/18) ───────────────────────────────────
+ "Rikshospitalet": (59.947768, 10.714716), # Wikipedia
+ "Gaustadalleen": (59.9454, 10.7172), # Wikipedia
+ "Forskningsparken": (59.943513, 10.720425), # shared T-bane
+ "Universitetet Blindern": (59.9421, 10.7243), # Wikipedia
+ "John Collets plass": (59.9403, 10.7290), # Wikipedia
+ "Ullevål sykehus": (59.9361, 10.7318), # Wikipedia
+ "Adamstuen": (59.9326, 10.7345), # Wikipedia
+ "Stensgata": (59.92957, 10.73303), # Q7607927
+ "Bislett": (59.92599, 10.73108), # Q11961163
+ "Dalsbergstien": (59.92354, 10.73163), # Q17764618
+ "Welhavens gate": (59.92131, 10.72968), # Q12010485
+ "Frydenlund": (59.92086, 10.73317), # Q19373143
+ "Holbergs plass": (59.91876, 10.73453), # Q11975623
+ # ── Sinsen-linjen (l17) ──────────────────────────────────────────────
+ "Lakkegata skole": (59.92055, 10.76834), # Q11982987
+ "Carl Berners plass": (59.926592, 10.778360), # shared T-bane
+ "Sinsenkrysset": (59.93911, 10.78340), # Q19388523
+ "Grefsen stasjon": (59.94167, 10.78056), # Wikipedia
+ # ── Homansbyen-linjen (l19) ───────────────────────────────────────────
+ "Homansbyen": (59.92278, 10.72639), # Q5887760
+}
+
+# Estimated trikk stop coordinates — no Wikidata P625 found.
+# Derived from linear interpolation between verified neighbours,
+# or placed from map/street knowledge. Max error ~150-250 m.
+# To update: find Wikidata QID, fetch P625, move entry to TRIKK_STOPS_VERIFIED.
+TRIKK_STOPS_ESTIMATED: dict[str, tuple[float, float]] = {
+ # ── Sentrum ───────────────────────────────────────────────────────────
+ "Tinghuset": (59.9146, 10.7403), # Ullevål Hageby-l ved Stortinget T
+ # ── Grünerløkka–Torshov-linjen ───────────────────────────────────────
+ "Torshov": (59.9332, 10.7643), # interp Biermanns gate↔Sandaker
+ # ── Kjelsåslinjen ────────────────────────────────────────────────────
+ "Doktor Smiths vei": (59.9503, 10.7867), # interp Disen↔Kjelsås t=0.20
+ "Kjelsåsalleen": (59.9641, 10.7833), # interp Disen↔Kjelsås t=0.90
+ # ── Frogner-linjen ───────────────────────────────────────────────────
+ "Frogner stadion": (59.9167, 10.7038), # Kirkeveien S for Vigelandsparken
+ # ── Vika-linjen ──────────────────────────────────────────────────────
+ "Ruseløkka": (59.9120, 10.7258), # interp Solli↔Kontraskjæret
+ # ── Ullevål Hageby-linjen ─────────────────────────────────────────────
+ "Tullinøkka": (59.9163, 10.7349), # interp Holbergs plass↔Tinghuset
+ # ── Sinsen-linjen ────────────────────────────────────────────────────
+ "Heimdalsgata": (59.9188, 10.7633), # interp Nybrua↔Lakkegata skole
+ "Sofienberg": (59.9236, 10.7734), # interp Lakkegata skole↔Carl Berners
+ "Rosenhoff": (59.9307, 10.7800), # interp Carl Berners↔Sinsenkrysset t=0.33
+ "Sinsenterrassen": (59.9350, 10.7817), # interp Carl Berners↔Sinsenkrysset t=0.67
+}
+
+# Merged — verified takes precedence if a key appears in both (shouldn't happen).
+TRIKK_STOPS: dict[str, tuple[float, float]] = {
+ **TRIKK_STOPS_ESTIMATED,
+ **TRIKK_STOPS_VERIFIED,
+}
+
+# ---------------------------------------------------------------------------
+# Transit helpers
+# ---------------------------------------------------------------------------
+
+_WALK_SPEED_KMH = 5.0 # avg walking speed
+
+
+def _nearest_stop(
+ lat: float, lng: float, stops: dict[str, tuple[float, float]]
+) -> tuple[str, float]:
+ """Return (stop_name, distance_km) for the nearest stop in a dict."""
+ best_name, best_dist = "", float("inf")
+ for name, (slat, slng) in stops.items():
+ d = _distance_km(lat, lng, slat, slng)
+ if d < best_dist:
+ best_dist, best_name = d, name
+ return best_name, best_dist
+
+
+def nearby_transit(
+ lat: float, lng: float, max_walk_min: float = 10.0
+) -> dict[str, list[tuple[str, float]]]:
+ """Return T-bane and trikk stops within max_walk_min minutes walk.
+
+ Returns:
+ {
+ "tbane": [("Carl Berners plass", 0.28), ...], # sorted by distance
+ "trikk": [("Rosenhoff", 0.19), ...],
+ }
+ All distances in km.
+ """
+ max_km = (max_walk_min / 60.0) * _WALK_SPEED_KMH
+
+ tbane = sorted(
+ [
+ (n, _distance_km(lat, lng, la, lo))
+ for n, (la, lo) in TBANE_STOPS.items()
+ if _distance_km(lat, lng, la, lo) <= max_km
+ ],
+ key=lambda x: x[1],
+ )
+ trikk = sorted(
+ [
+ (n, _distance_km(lat, lng, la, lo))
+ for n, (la, lo) in TRIKK_STOPS.items()
+ if _distance_km(lat, lng, la, lo) <= max_km
+ ],
+ key=lambda x: x[1],
+ )
+ return {"tbane": tbane, "trikk": trikk}
+
+
+# ---------------------------------------------------------------------------
+# Dimension functions
+# ---------------------------------------------------------------------------
+
+
+def score_neighbourhood(
+ unit: EiendomUnit | None,
+ address: str | None = None,
+ district: str | None = None,
+) -> float:
+ """Distance to nearest preferred-area anchor. Max 25."""
+ if unit and unit.lat and unit.lng:
+ distances = [
+ (_distance_km(unit.lat, unit.lng, lat, lng), label)
+ for label, lat, lng in _PREFERRED_ANCHORS
+ ]
+ min_dist, nearest = min(distances)
+ logger.debug("Nearest anchor: %s at %.2f km", nearest, min_dist)
+ if min_dist < 0.5:
+ return 25.0
+ if min_dist < 1.0:
return 20.0
- if ratio <= 1.0:
+ if min_dist < 1.5:
return 15.0
- if ratio <= 1.05:
+ if min_dist < 2.5:
return 10.0
- return 6.0
- if ad.asking_price and ad.total_price <= ad.asking_price:
- return 12.0
- return 8.0
+ if min_dist < 4.0:
+ return 5.0
+ return 2.0
-
-def score_comparable_sales(listings: list[SimilarUnit], listing_price: int | None) -> float:
- if not listings or listing_price is None:
- return 0.0
- selling_prices = [unit.selling_price for unit in listings if unit.selling_price]
- if not selling_prices:
- return 0.0
- average = sum(selling_prices) / len(selling_prices)
- ratio = listing_price / average
- score = (1.0 - abs(ratio - 1.0)) * 20.0
- return float(_clamp(score, 0.0, 20.0))
-
-
-def score_location(address: str | None, district: str | None) -> float:
- if not address and not district:
- return 0.0
- if district and "oslo" in district.lower():
- return 15.0
- if address and "oslo" in address.lower():
- return 12.0
- return 7.0
-
-
-def score_layout_and_potential(description: str | None, rooms: int | None) -> float:
- score = 0.0
- if rooms and rooms >= 4:
- score += 10.0
- if description and "potensial" in description.lower():
- score += 8.0
- return float(_clamp(score, 0.0, 20.0))
-
-
-def score_outdoor_and_view(description: str | None) -> float:
- if not description:
- return 0.0
- score = 5.0 if "utsikt" in description.lower() or "balkong" in description.lower() else 0.0
- return float(_clamp(score, 0.0, 15.0))
-
-
-def score_rental_potential(description: str | None) -> float:
- if not description:
- return 0.0
- score = 10.0 if "hybel" in description.lower() or "leie" in description.lower() else 0.0
- return score
-
-
-def score_renovation_upside(description: str | None, asking_price: int | None) -> float:
- score = 0.0
- if description and "renover" in description.lower():
- score += 10.0
- if asking_price and asking_price > 0:
- score += 5.0
- return float(_clamp(score, 0.0, 15.0))
-
-
-def score_risk(description: str | None, unit: EiendomUnit | None) -> float:
- if unit is None:
- return -10.0
- if description and "usikker" in description.lower():
- return -10.0
+ haystack = " ".join(filter(None, [address, district])).lower()
+ if "oslo" in haystack:
+ return 5.0
return 0.0
-def score_ad(
- ad: Any, unit: EiendomUnit | None, similar_units: list[SimilarUnit]
-) -> dict[str, float]:
- scores = {
+def score_transport(unit: EiendomUnit | None) -> float:
+ """Walking distance to nearest T-bane or trikk stop. Max 10.
+
+ Searches ALL stops in TBANE_STOPS and TRIKK_STOPS — no manual
+ curation needed when adding new search areas.
+
+ Distance bands:
+ < 400 m → 10 pts (~5 min walk)
+ < 800 m → 8 pts (~10 min — stated threshold)
+ < 1200 m → 4 pts (~15 min)
+ ≥ 1200 m → 0 pts
+
+ Falls back to 0 when no coordinates available.
+ """
+ if unit is None or unit.lat is None or unit.lng is None:
+ return 0.0
+
+ _, tbane_dist = _nearest_stop(unit.lat, unit.lng, TBANE_STOPS)
+ _, trikk_dist = _nearest_stop(unit.lat, unit.lng, TRIKK_STOPS)
+ min_dist = min(tbane_dist, trikk_dist)
+
+ logger.debug("Nearest T-bane: %.2f km, trikk: %.2f km", tbane_dist, trikk_dist)
+
+ if min_dist < 0.4:
+ return 10.0
+ if min_dist < 0.8:
+ return 8.0
+ if min_dist < 1.2:
+ return 4.0
+ return 0.0
+
+
+def score_view_and_quiet(ad: Any, description: str) -> float:
+ """View quality × quiet setting. Max 20. Returns 0 if no balcony."""
+ if not (ad.has_balcony or ad.has_terrace):
+ return 0.0
+
+ d = description.lower()
+
+ view = 0.0
+ if any(kw in d for kw in ["sjøutsikt", "fjordutsikt", "sjøglimt", "fjordglimt"]):
+ view = 15.0
+ elif any(kw in d for kw in ["panorama", "panoramautsikt", "vidt utsyn", "vidstrakt"]):
+ view = 13.0
+ elif any(kw in d for kw in ["over hustak", "hustak", "over takene"]):
+ view = 10.0
+ elif "utsikt" in d:
+ view = 7.0
+
+ quiet = 0.0
+ if any(
+ kw in d
+ for kw in [
+ "rolig",
+ "tilbaketrukket",
+ "skjermet",
+ "bakgård",
+ "gårdsrom",
+ "stille",
+ "blindvei",
+ ]
+ ):
+ quiet += 5.0
+ if any(kw in d for kw in ["støy", "bilvei", "trafikkert", "støyutsatt"]):
+ quiet -= 5.0
+
+ return float(_clamp(view + quiet, 0.0, 20.0))
+
+
+def score_area_and_layout(ad: Any, unit: EiendomUnit | None) -> float:
+ """Main unit size + bedroom count. Max 15."""
+ area = (unit.usable_area if unit else None) or ad.area_m2 or 0
+
+ if area < 60:
+ return 0.0
+ if area < 80:
+ return 3.0
+ if area < 90:
+ area_score = 8.0
+ elif area < 105:
+ area_score = 11.0
+ else:
+ area_score = 14.0
+
+ bedrooms = ad.bedrooms or 0
+ if bedrooms >= 3:
+ bedroom_bonus = 1.0
+ elif bedrooms == 2:
+ bedroom_bonus = 0.5
+ else:
+ bedroom_bonus = 0.0
+
+ return float(_clamp(area_score + bedroom_bonus, 0.0, 15.0))
+
+
+def score_hybel(description: str) -> float:
+ """Hybel with own bath + kitchen. Max 12."""
+ d = description.lower()
+
+ if "hybel" not in d and "sokkelleil" not in d and "utleiedel" not in d:
+ return 0.0
+
+ _POTENTIAL = [
+ "mulighet for hybel",
+ "mulighet til hybel",
+ "mulig hybel",
+ "kan etableres hybel",
+ "kan bygges om til hybel",
+ "tilrettelagt for hybel",
+ "potensial for hybel",
+ "hybelpotensial",
+ ]
+ is_potential = any(sig in d for sig in _POTENTIAL)
+ if not is_potential and "mulighet" in d and "hybel" in d:
+ for sentence in d.replace("!", ".").replace("?", ".").split("."):
+ if "mulighet" in sentence and "hybel" in sentence:
+ is_potential = True
+ break
+
+ if is_potential:
+ return 2.0
+
+ # Documented rental income → definitively real hybel
+ if "leieinntekt" in d or "skattefri" in d:
+ return 12.0
+
+ has_bath = any(
+ kw in d
+ for kw in [
+ "eget bad",
+ "eget wc",
+ "eget toalett",
+ "bad i hybel",
+ "dusj i hybel",
+ "eget dusj",
+ ]
+ )
+ has_kitch = any(
+ kw in d for kw in ["eget kjøkken", "kjøkken i hybel", "kjøkkenkrok", "tekjøkken"]
+ )
+ if not has_bath:
+ has_bath = "bad" in d or "dusj" in d
+ if not has_kitch:
+ has_kitch = "kjøkken" in d
+
+ if has_bath and has_kitch:
+ return 12.0
+ if has_bath or has_kitch:
+ return 7.0
+ return 4.0
+
+
+def score_floor(ad: Any, unit: EiendomUnit | None) -> float:
+ """Floor level. Binary signal: ground floor is bad, everything else neutral.
+
+ Rationale: "toppleilighet i 3-etgs blokk" og "8. etg i høyblokk" er begge
+ topp for sin bygning. Etasjenummer alene sier ingenting om utsikt eller lys
+ uten å kjenne byggets totale høyde. Eneste reelle signal er 1. etg (innsyn,
+ støy, lys) vs ikke-1. etg.
+
+ Scores:
+ ground floor (≤1) → -15 (hard penalty: innsyn, støy, lys)
+ unknown → 0 (no data → no penalty)
+ above ground → 0 (etasjenummer uten bygghøyde = ingen info)
+ """
+ floor: int | None = None
+
+ if unit is not None and unit.floor is not None:
+ floor = unit.floor
+ elif ad.floor is not None:
+ try:
+ floor = int(str(ad.floor).strip().rstrip("."))
+ except (ValueError, TypeError):
+ floor = None
+
+ if floor is None:
+ return 0.0
+ if floor <= 1:
+ return -15.0
+ return 0.0
+
+
+def score_building_health(ad: Any, description: str) -> float:
+ """Sameie / borettslag economy signals. Max 7."""
+ score = 0.0
+ d = description.lower()
+
+ if ad.shared_debt == 0:
+ score += 3.0
+ elif ad.shared_debt is None:
+ score += 1.0
+
+ fk = ad.common_costs or 0
+ if fk == 0:
+ score += 0.0
+ elif fk <= 3500:
+ score += 4.0
+ elif fk <= 5000:
+ score += 2.0
+ elif fk <= 7000:
+ score += 0.0
+ else:
+ score -= 2.0
+
+ if any(kw in d for kw in ["veldrevet", "solid økonomi", "god økonomi", "ingen fellesgjeld"]):
+ score += 2.0
+
+ return float(_clamp(score, 0.0, 7.0))
+
+
+def score_green_areas(description: str) -> float:
+ """Parks, walking terrain, green surroundings. Max 5."""
+ d = description.lower()
+ keywords = ["park", "turområde", "turterreng", "marka", "skog", "grønt", "grønne", "friluft"]
+ hits = sum(1 for kw in keywords if kw in d)
+ if hits >= 2:
+ return 5.0
+ if hits == 1:
+ return 2.0
+ return 0.0
+
+
+def score_economy(ad: Any, unit: EiendomUnit | None) -> float:
+ """Listing price vs Eiendom.no estimated value. Max 8."""
+ if unit is None or unit.estimated_selling_price is None:
+ return 0.0
+ price = ad.total_price or ad.asking_price
+ if price is None:
+ return 0.0
+ ratio = price / unit.estimated_selling_price
+ if ratio <= 0.92:
+ return 8.0
+ if ratio <= 1.00:
+ return 5.0 + (1.0 - ratio) * 37.5
+ if ratio <= 1.08:
+ return 5.0 - (ratio - 1.0) * 37.5
+ return 1.0
+
+
+def score_comparable_sales(
+ listings: list[SimilarUnit],
+ listing_sqm_price: int | float | None,
+) -> float:
+ """Listing kr/m² vs median sold kr/m² of comp units. Max 8."""
+ if not listings or listing_sqm_price is None:
+ return 0.0
+ sqm_prices = [u.sqm_price for u in listings if u.sqm_price]
+ if not sqm_prices:
+ return 0.0
+ med = _median(sqm_prices)
+ ratio = listing_sqm_price / med
+ return float(_clamp((1.0 - abs(ratio - 1.0)) * 8.0, 0.0, 8.0))
+
+
+def score_renovation(description: str) -> float:
+ """Minor bonus for renovation upside. Max 3."""
+ d = description.lower()
+ if any(kw in d for kw in ["renover", "oppusse", "potensial", "moderniser"]):
+ return 3.0
+ return 0.0
+
+
+def score_risk(ad: Any, unit: EiendomUnit | None) -> float:
+ """Risk penalty. Returns 0 or negative."""
+ penalty = 0.0
+
+ if unit is None:
+ penalty -= 8.0
+
+ fk = ad.common_costs or 0
+ if fk > 8000:
+ penalty -= 10.0
+ elif fk > 6000:
+ penalty -= 5.0
+
+ if unit and unit.days_on_market:
+ if unit.days_on_market > 120:
+ penalty -= 10.0
+ elif unit.days_on_market > 60:
+ penalty -= 5.0
+
+ if "usikker" in (ad.listing_description or "").lower():
+ penalty -= 5.0
+
+ return penalty
+
+
+# ---------------------------------------------------------------------------
+# Orchestration
+# ---------------------------------------------------------------------------
+
+
+def score_ad(ad: Any, unit: EiendomUnit | None, similar_units: list[SimilarUnit]) -> dict[str, Any]:
+ description = ad.listing_description or ""
+
+ # Collect nearby transit for informational output (not used in scoring)
+ transit_nearby: dict | None = None
+ if unit and unit.lat and unit.lng:
+ transit_nearby = nearby_transit(unit.lat, unit.lng, max_walk_min=10.0)
+ if transit_nearby["tbane"] or transit_nearby["trikk"]:
+ logger.debug("Nearby transit: %s", transit_nearby)
+
+ scores: dict[str, Any] = {
+ "floor": score_floor(ad, unit),
+ "neighbourhood": score_neighbourhood(unit, ad.address, getattr(ad, "district", None)),
+ "view_and_quiet": score_view_and_quiet(ad, description),
+ "area_and_layout": score_area_and_layout(ad, unit),
+ "hybel": score_hybel(description),
+ "transport": score_transport(unit),
"economy": score_economy(ad, unit),
- "market_position": score_market_position(unit),
"comparable_sales": score_comparable_sales(
- similar_units, ad.total_price or ad.asking_price
+ similar_units,
+ unit.listing_sqm_price if unit else None,
),
- "location": score_location(ad.address, ad.district),
- "layout": score_layout_and_potential(ad.listing_description, ad.rooms),
- "outdoor": score_outdoor_and_view(ad.listing_description),
- "rental_potential": score_rental_potential(ad.listing_description),
- "renovation": score_renovation_upside(ad.listing_description, ad.asking_price),
- "risk": score_risk(ad.listing_description, unit),
+ "building_health": score_building_health(ad, description),
+ "green_areas": score_green_areas(description),
+ "renovation": score_renovation(description),
+ "risk": score_risk(ad, unit),
}
- scores["total"] = float(_clamp(sum(scores.values()), 0.0, 100.0))
+
+ # Numeric-only sum for total
+ numeric = {k: v for k, v in scores.items() if isinstance(v, (int, float))}
+ scores["total"] = float(_clamp(sum(numeric.values()), 0.0, 100.0))
+
+ # Attach nearby transit as metadata (non-scoring)
+ if transit_nearby is not None:
+ scores["nearby_transit"] = transit_nearby
+
return scores
-def classify_ad(scores: dict[str, float]) -> list[str]:
+def classify_ad(scores: dict[str, Any]) -> list[str]:
categories: list[str] = []
total = scores.get("total", 0.0)
- if total >= 70:
- categories.append("bargain_candidate")
+
+ if total >= 75:
+ categories.append("top_match")
if total >= 60:
- categories.append("safe_candidate")
- if 50 <= total < 70:
- categories.append("lifestyle_candidate")
- if scores.get("renovation", 0.0) >= 8:
- categories.append("renovation_candidate")
- if scores.get("rental_potential", 0.0) >= 5:
- categories.append("hybel_candidate")
- if scores.get("risk", 0.0) < 0:
- categories.append("risk_object")
+ categories.append("strong_candidate")
+ if 45 <= total < 60:
+ categories.append("worth_viewing")
if total < 30:
categories.append("not_interesting")
- if 30 <= total < 60:
+ if 30 <= total < 45:
categories.append("manual_review_required")
+
+ if scores.get("hybel", 0.0) >= 7:
+ categories.append("has_hybel")
+ if scores.get("view_and_quiet", 0.0) >= 13:
+ categories.append("premium_view")
+ if scores.get("neighbourhood", 0.0) == 25:
+ categories.append("preferred_neighbourhood")
+ if scores.get("renovation", 0.0) > 0:
+ categories.append("renovation_candidate")
+ if scores.get("floor", 0.0) < 0:
+ categories.append("ground_floor")
+ if scores.get("risk", 0.0) < -5:
+ categories.append("risk_object")
+ if scores.get("area_and_layout", 0.0) <= 3:
+ categories.append("too_small")
+
return categories
diff --git a/finn_eiendom/service.py b/finn_eiendom/service.py
index 4562edf..6a2bc3d 100644
--- a/finn_eiendom/service.py
+++ b/finn_eiendom/service.py
@@ -36,6 +36,43 @@ async def get_or_fetch_ad(finnkode: str, force_refresh: bool = False) -> FinnAd:
return ad
+async def ensure_eiendom_unit_code(ad: FinnAd) -> str | None:
+ """Backfill ``ad.eiendom_unit_code`` by resolving it from the FINN URL.
+
+ ``fetch_ad_details`` never populates ``eiendom_unit_code`` -- only the
+ Eiendom.no resolver (``search_unit_from_finn_url``) can map a FINN listing
+ to an Eiendom.no unit. Every enrichment path gates on this field, so
+ without an explicit resolve step the gate is always falsy and enrichment
+ silently no-ops.
+
+ Resolves once, mutates the ad in place, and persists the backfill to the
+ cache so subsequent cache hits skip the network round trip.
+
+ IMPORTANT: callers must run this BEFORE serialising the ad with
+ ``model_dump()`` -- otherwise the dumped dict carries a stale
+ ``eiendom_unit_code: None`` even though enrichment succeeded.
+
+ Returns the unit_code, or ``None`` if the listing cannot be resolved
+ (e.g. new-build project ads, off-market addresses).
+ """
+ if ad.eiendom_unit_code:
+ return ad.eiendom_unit_code
+
+ unit = await search_unit_from_finn_url(ad.url)
+ if unit is None or not unit.unit_code:
+ logger.info("No Eiendom.no unit resolved for finnkode %s", ad.finnkode)
+ return None
+
+ ad.eiendom_unit_code = unit.unit_code
+ conn = init_db(FINN_CACHE_PATH)
+ save_finn_ad(conn, ad) # persist backfill; do NOT cache `unit` here --
+ # the resolver returns a partial record (code +
+ # address + coords). The full unit comes from
+ # get_or_fetch_eiendom_unit -> get_unit().
+ logger.info("Resolved finnkode %s -> unit %s", ad.finnkode, unit.unit_code)
+ return ad.eiendom_unit_code
+
+
async def get_or_fetch_eiendom_unit(
unit_code: str, force_refresh: bool = False
) -> EiendomUnit | None:
@@ -84,7 +121,7 @@ async def resolve_eiendom_unit_from_finn_url(finn_url: str) -> EiendomUnit | Non
# ============================================================================
-# Orchestration functions — delegate to analysis.py
+# Orchestration functions -- delegate to analysis.py
# ============================================================================
@@ -96,7 +133,13 @@ async def analyze_search(
include_details: bool = True,
include_eiendom_no: bool = True,
) -> dict[str, Any]:
- """Analyze a FINN search URL and return a ranked shortlist."""
+ """Analyze a FINN search URL and return a ranked shortlist.
+
+ NOTE: enrichment for search results lives in analysis.py. If that path
+ also reports `eiendom_enriched: 0`, it has the same root cause -- each
+ card's eiendom_unit_code must be resolved via ensure_eiendom_unit_code
+ (or search_unit_from_finn_url) before the enrichment gate.
+ """
return await run_analysis_search(
search_url,
max_pages=max_pages,
@@ -114,15 +157,20 @@ async def analyze_ad(
) -> dict[str, Any]:
"""Fetch and enrich a single FINN ad with analysis."""
ad = await get_or_fetch_ad(finnkode)
+
+ # Resolve BEFORE model_dump() so the serialised ad carries the backfilled
+ # eiendom_unit_code instead of a stale None.
+ unit_code = await ensure_eiendom_unit_code(ad) if include_eiendom_no else None
+
result: dict[str, Any] = {
"ad": ad.model_dump(),
}
- if include_eiendom_no and ad.eiendom_unit_code:
- unit = await get_or_fetch_eiendom_unit(ad.eiendom_unit_code)
+ if unit_code:
+ unit = await get_or_fetch_eiendom_unit(unit_code)
if unit:
result["eiendom_unit"] = unit.model_dump()
if include_similar_units:
- similar = await get_or_fetch_similar_units(ad.eiendom_unit_code)
+ similar = await get_or_fetch_similar_units(unit_code)
result["similar_units"] = [s.model_dump() for s in similar]
return result
@@ -132,16 +180,18 @@ async def analyze_ad_against_comps(
) -> dict[str, Any]:
"""Evaluate one listing against recent comparable sales."""
ad = await get_or_fetch_ad(finnkode)
+
+ # Resolve before model_dump() -- see analyze_ad.
+ unit_code = await ensure_eiendom_unit_code(ad)
+
result: dict[str, Any] = {
"ad": ad.model_dump(),
}
- if ad.eiendom_unit_code:
- unit = await get_or_fetch_eiendom_unit(ad.eiendom_unit_code)
+ if unit_code:
+ unit = await get_or_fetch_eiendom_unit(unit_code)
if unit:
result["eiendom_unit"] = unit.model_dump()
- comps = await get_or_fetch_similar_units(
- ad.eiendom_unit_code, listing_status=listing_status
- )
+ comps = await get_or_fetch_similar_units(unit_code, listing_status=listing_status)
result["comparable_units"] = [c.model_dump() for c in comps]
return result
@@ -152,17 +202,20 @@ async def find_similar_to_liked(
"""Find properties similar to a listing the user has liked."""
# Requires that feedback.verdict = "liked" exists for this finnkode
ad = await get_or_fetch_ad(finnkode)
- if not ad.eiendom_unit_code:
+
+ unit_code = await ensure_eiendom_unit_code(ad)
+ if not unit_code:
raise ValueError(
- f"Finnkode {finnkode} has no Eiendom.no unit_code; cannot find similar properties"
+ f"Finnkode {finnkode} could not be resolved to an Eiendom.no unit; "
+ "cannot find similar properties"
)
# TODO: verify feedback verdict = "liked" exists
- unit = await get_or_fetch_eiendom_unit(ad.eiendom_unit_code)
+ unit = await get_or_fetch_eiendom_unit(unit_code)
if not unit:
raise ValueError(f"Cannot enrich finnkode {finnkode} with Eiendom.no data")
- similar = await get_or_fetch_similar_units(ad.eiendom_unit_code, listing_status=listing_status)
+ similar = await get_or_fetch_similar_units(unit_code, listing_status=listing_status)
return {
"base_ad": ad.model_dump(),
"similar_listings": [s.model_dump() for s in similar],
@@ -177,15 +230,18 @@ async def compare_ads(
ads = []
for finnkode in finnkoder:
ad = await get_or_fetch_ad(finnkode)
- ad_data = ad.model_dump()
- if include_eiendom_no and ad.eiendom_unit_code:
- unit = await get_or_fetch_eiendom_unit(ad.eiendom_unit_code)
+ # Resolve before model_dump() -- see analyze_ad.
+ unit_code = await ensure_eiendom_unit_code(ad) if include_eiendom_no else None
+
+ ad_data = ad.model_dump()
+ if unit_code:
+ unit = await get_or_fetch_eiendom_unit(unit_code)
if unit:
ad_data["eiendom_unit"] = unit.model_dump()
if include_comps:
comps = await get_or_fetch_similar_units(
- ad.eiendom_unit_code, listing_status="RECENTLY_SOLD"
+ unit_code, listing_status="RECENTLY_SOLD"
)
ad_data["comps"] = [c.model_dump() for c in comps]
diff --git a/validate_mcp_tools.py b/validate_mcp_tools.py
deleted file mode 100644
index e3ee568..0000000
--- a/validate_mcp_tools.py
+++ /dev/null
@@ -1,152 +0,0 @@
-#!/usr/bin/env python3
-"""
-Validate that all MCP tool definitions correctly match their service layer functions.
-This catches parameter mismatches, missing arguments, and other integration issues.
-"""
-
-import inspect
-from typing import get_type_hints
-from finn_eiendom import mcp_server, service, eiendom_no
-
-# Define the mapping of MCP tools to their service/module functions
-TOOL_MAPPINGS = {
- # Tool name: (service function, expected params to check)
- "finn_analyze_search": (
- service.analyze_search,
- ["search_url", "max_pages", "detail_limit", "include_details", "include_eiendom_no"],
- ),
- "finn_get_ad": (service.get_or_fetch_ad, ["finnkode", "force_refresh"]),
- "finn_resolve_eiendom_unit": (eiendom_no.search_unit_from_finn_url, ["finn_url"]),
- "finn_get_eiendom_unit": (service.get_or_fetch_eiendom_unit, ["unit_code", "force_refresh"]),
- "finn_analyze_unit_images": (service.get_unit_images, ["unit_code", "force_refresh"]),
- "finn_get_similar_units": (eiendom_no.get_similar_units, ["unit_vector", "listing_status"]),
- "finn_build_unit_vector": (
- eiendom_no.get_unit,
- ["unit_code"],
- ), # Uses get_unit, not build_unit_vector
- "finn_decode_unit_vector": (eiendom_no.decode_unit_vector, ["unit_vector"]),
- "finn_analyze_ad": (
- service.analyze_ad,
- ["finnkode", "include_eiendom_no", "include_similar_units"],
- ),
- "finn_analyze_ad_against_comps": (
- service.analyze_ad_against_comps,
- ["finnkode", "listing_status"],
- ),
- "finn_find_similar_to_liked_ad": (
- service.find_similar_to_liked,
- ["finnkode", "mode", "listing_status"],
- ),
- "finn_compare_ads": (service.compare_ads, ["finnkoder", "include_eiendom_no", "include_comps"]),
- "finn_save_feedback": (service.save_feedback, ["finnkode", "verdict", "notes"]),
- "finn_get_shortlist": (service.get_shortlist, ["run_id", "limit"]),
- "finn_get_new_ads_since_last_run": (service.get_new_ads_since_last_run, ["search_url"]),
-}
-
-
-def get_function_params(func) -> dict:
- """Extract parameter names and defaults from a function."""
- sig = inspect.signature(func)
- params = {}
- for name, param in sig.parameters.items():
- if name in ("self", "cls"):
- continue
- params[name] = {
- "default": param.default,
- "annotation": param.annotation,
- "kind": param.kind.name,
- }
- return params
-
-
-def validate_tool_mapping(
- tool_name: str, service_func, expected_params: list[str]
-) -> tuple[bool, list[str]]:
- """Validate that an MCP tool correctly maps to its service function."""
- errors = []
-
- # Get the MCP tool function
- mcp_tool = getattr(mcp_server, tool_name, None)
- if not mcp_tool:
- errors.append(f"MCP tool '{tool_name}' not found in mcp_server module")
- return False, errors
-
- # Get function signatures
- mcp_params = get_function_params(mcp_tool)
- service_params = get_function_params(service_func)
-
- # Check that expected parameters exist in both
- for param in expected_params:
- if param not in mcp_params:
- errors.append(f" ✗ MCP tool missing parameter '{param}'")
- if param not in service_params and param != "client": # client is optional in service layer
- errors.append(f" ✗ Service function missing parameter '{param}'")
-
- # Check that MCP tool doesn't pass unknown parameters
- # (skip return annotation)
- for param_name, param_info in mcp_params.items():
- if param_name not in service_params and param_name not in ["return"]:
- # This might be OK if it's a tool-specific parameter, but warn
- pass
-
- if errors:
- return False, errors
- return True, []
-
-
-async def validate_service_imports():
- """Validate that all imported service functions exist and are callable."""
- imported_funcs = [
- ("analyze_ad", service.analyze_ad),
- ("analyze_ad_against_comps", service.analyze_ad_against_comps),
- ("analyze_search", service.analyze_search),
- ("compare_ads", service.compare_ads),
- ("find_similar_to_liked", service.find_similar_to_liked),
- ("get_new_ads_since_last_run", service.get_new_ads_since_last_run),
- ("get_or_fetch_ad", service.get_or_fetch_ad),
- ("get_or_fetch_eiendom_unit", service.get_or_fetch_eiendom_unit),
- ("get_shortlist", service.get_shortlist),
- ("get_unit_images", service.get_unit_images),
- ("save_feedback", service.save_feedback),
- ]
-
- errors = []
- for name, func in imported_funcs:
- if not callable(func):
- errors.append(f"Service function '{name}' is not callable")
-
- return errors
-
-
-def main():
- """Run validation checks."""
- print("=" * 80)
- print("MCP Tool Parameter Validation")
- print("=" * 80)
-
- all_passed = True
- total_checks = 0
- passed_checks = 0
-
- for tool_name, (service_func, expected_params) in TOOL_MAPPINGS.items():
- total_checks += 1
- passed, errors = validate_tool_mapping(tool_name, service_func, expected_params)
-
- if passed:
- print(f"✓ {tool_name}")
- passed_checks += 1
- else:
- print(f"✗ {tool_name}")
- for error in errors:
- print(f" {error}")
- all_passed = False
-
- print("\n" + "=" * 80)
- print(f"Results: {passed_checks}/{total_checks} tools validated")
- print("=" * 80)
-
- return 0 if all_passed else 1
-
-
-if __name__ == "__main__":
- exit(main())