scoring and analyzsis

This commit is contained in:
Ole
2026-05-23 07:43:30 +00:00
parent c9383788de
commit d3f4bfa838
7 changed files with 1113 additions and 305 deletions
+3 -1
View File
@@ -10,8 +10,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libxslt1-dev \ libxslt1-dev \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# Copy dependency files # Copy source files
COPY pyproject.toml ./ COPY pyproject.toml ./
COPY finn_eiendom ./finn_eiendom
COPY README.md ./
# Create virtual environment and install dependencies # Create virtual environment and install dependencies
RUN python -m venv /venv && \ RUN python -m venv /venv && \
+238
View File
@@ -0,0 +1,238 @@
"""
fetch_trikk_coords.py
Henter P625-koordinater fra Wikidata for alle Oslo-trikkeholdeplasser
via Wikipedia sitelinks. Kjør: python3 fetch_trikk_coords.py
Krever: pip install requests beautifulsoup4
"""
import requests
from urllib.parse import unquote, urlparse, parse_qs
from bs4 import BeautifulSoup
# Alle Wikipedia-lenker fra trikkelinjene (inkl. redlinks)
HTML = """
<a href="/wiki/Majorstuen_(station)">Majorstuen</a>
<a href="/wiki/Bogstadveien_tram_stop">Bogstadveien</a>
<a href="/wiki/Rosenborg_tram_stop">Rosenborg</a>
<a href="/w/index.php?title=Briskeby_tram_stop&action=edit&redlink=1">Briskeby</a>
<a href="/w/index.php?title=Riddervolds_plass_tram_stop&action=edit&redlink=1">Riddervolds plass</a>
<a href="/wiki/Inkognitogata_tram_stop">Inkognitogata</a>
<a href="/wiki/Nationaltheatret_metro_station">Nationaltheatret</a>
<a href="/wiki/%C3%98vre_Slottsgate_tram_stop">Øvre Slottsgate</a>
<a href="/wiki/Dronningens_gate_tram_stop">Dronningens gate</a>
<a href="/wiki/Jernbanetorget_(station)">Jernbanetorget</a>
<a href="/wiki/Storgata_tram_stop">Storgata</a>
<a href="/wiki/Nybrua_tram_stop">Nybrua</a>
<a href="/wiki/Schous_plass_tram_stop">Schous plass</a>
<a href="/wiki/Olaf_Ryes_plass">Olaf Ryes plass</a>
<a href="/wiki/Birkelunden">Birkelunden</a>
<a href="/wiki/Biermanns_gate_(station)">Biermanns gate</a>
<a href="/w/index.php?title=Torshov_(station)&action=edit&redlink=1">Torshov</a>
<a href="/w/index.php?title=Sandaker_senter_(station)&action=edit&redlink=1">Sandaker senter</a>
<a href="/w/index.php?title=Grefsenveien_tram_stop&action=edit&redlink=1">Grefsenveien</a>
<a href="/wiki/Storo_(station)">Storo</a>
<a href="/wiki/Disen_tram_stop">Disen tram stop</a>
<a href="/w/index.php?title=Doktor_Smiths_vei_tram_stop&action=edit&redlink=1">Doktor Smiths vei</a>
<a href="/w/index.php?title=Glads_vei_tram_stop&action=edit&redlink=1">Glads vei</a>
<a href="/w/index.php?title=Grefsenplat%C3%A5et_tram_stop&action=edit&redlink=1">Grefsenplatået</a>
<a href="/w/index.php?title=Grefsen_stadion_tram_stop&action=edit&redlink=1">Grefsen stadion</a>
<a href="/w/index.php?title=Kjels%C3%A5salleen_tram_stop&action=edit&redlink=1">Kjelsåsalleen</a>
<a href="/wiki/Kjels%C3%A5s_tram_stop">Kjelsås</a>
<a href="/wiki/Frogner_stadion_tram_stop">Frogner stadion</a>
<a href="/w/index.php?title=Vigelandsparken_tram_stop&action=edit&redlink=1">Vigelandsparken</a>
<a href="/w/index.php?title=Frogner_plass_tram_stop&action=edit&redlink=1">Frogner plass</a>
<a href="/wiki/Elisenberg_tram_stop">Elisenberg</a>
<a href="/w/index.php?title=Lille_Frogner_all%C3%A9_tram_stop&action=edit&redlink=1">Lille Frogner allé</a>
<a href="/w/index.php?title=Niels_Juels_gate_tram_stop&action=edit&redlink=1">Niels Juels gate</a>
<a href="/wiki/Solli_tram_stop">Solli</a>
<a href="/w/index.php?title=Rusel%C3%B8kka_tram_stop&action=edit&redlink=1">Ruseløkka</a>
<a href="/wiki/Aker_Brygge_tram_stop">Akerbrygge</a>
<a href="/wiki/Kontraskj%C3%A6ret_tram_stop">Kontraskjæret</a>
<a href="/wiki/Middelalderparken_tram_stop">Middelalderparken</a>
<a href="/w/index.php?title=Bislett_tram_stop&action=edit&redlink=1">Bislett</a>
<a href="/w/index.php?title=Dalsbergstien_tram_stop&action=edit&redlink=1">Dalsbergstien</a>
<a href="/w/index.php?title=Welhavens_gate_tram_stop&action=edit&redlink=1">Welhavens gate</a>
<a href="/w/index.php?title=Frydenlund_tram_stop&action=edit&redlink=1">Frydenlund</a>
<a href="/w/index.php?title=Holbergs_plass_tram_stop&action=edit&redlink=1">Holbergs plass</a>
<a href="/w/index.php?title=Tullin%C3%B8kka_(station)&action=edit&redlink=1">Tullinøkka</a>
<a href="/wiki/Stortinget_(station)">Tinghuset</a>
<a href="/wiki/Stortorvet_(station)">Stortorvet</a>
<a href="/w/index.php?title=Heimdalsgata_tram_stop&action=edit&redlink=1">Heimdalsgata</a>
<a href="/w/index.php?title=Lakkegata_skole_tram_stop&action=edit&redlink=1">Lakkegata skole</a>
<a href="/w/index.php?title=Sofienberg_tram_stop&action=edit&redlink=1">Sofienberg</a>
<a href="/wiki/Carl_Berners_plass_(station)">Carl Berners plass</a>
<a href="/w/index.php?title=Rosenhoff_tram_stop&action=edit&redlink=1">Rosenhoff</a>
<a href="/w/index.php?title=Sinsenterrassen_tram_stop&action=edit&redlink=1">Sinsenterrassen</a>
<a href="/wiki/Sinsen_(station)">Sinsenkrysset</a>
<a href="/wiki/Grefsen_Station">Grefsen stasjon</a>
<a href="/wiki/Homansbyen_tram_stop">Homansbyen</a>
"""
# Redlinks har ingen Wikipedia-side — søk direkte på Wikidata label
WIKIDATA_DIRECT = {
# Redlinks — ingen Wikipedia-side
"Briskeby_tram_stop": "Q11962293",
"Riddervolds_plass_tram_stop": "Q19386557",
"Grefsenveien_tram_stop": "Q17778424",
"Doktor_Smiths_vei_tram_stop": None,
"Glads_vei_tram_stop": "Q17776371",
"Grefsenplatået_tram_stop": "Q11972531",
"Grefsen_stadion_tram_stop": "Q11972525",
"Kjelsåsalleen_tram_stop": None,
"Vigelandsparken_tram_stop": "Q19398059",
"Frogner_plass_tram_stop": "Q11970372",
"Lille_Frogner_allé_tram_stop": "Q19379373",
"Niels_Juels_gate_tram_stop": "Q11991378",
"Ruseløkka_tram_stop": None,
"Bislett_tram_stop": "Q11961163",
"Dalsbergstien_tram_stop": "Q17764618",
"Welhavens_gate_tram_stop": "Q12010485",
"Frydenlund_tram_stop": "Q19373143",
"Holbergs_plass_tram_stop": "Q11975623",
"Tullinøkka_(station)": None,
"Heimdalsgata_tram_stop": None,
"Lakkegata_skole_tram_stop": "Q11982987",
"Sofienberg_tram_stop": None,
"Rosenhoff_tram_stop": None,
"Sinsenterrassen_tram_stop": None,
"Torshov_(station)": None,
"Sandaker_senter_(station)": None,
"Frogner_stadion_tram_stop": None,
# Wikipedia-redirect-sider — QID til redirect-målet
"Stortorvet_(station)": "Q7620354", # → Stortorvet_tram_stop
"Stortinget_(station)": "Q188712", # → Stortinget T-bane (Tinghuset tram er ved siden)
"Sinsen_(station)": "Q19388523", # → Sinsenkrysset tram stop
"Biermanns_gate_(station)": "Q19363042", # → Biermanns gate tram stop
"Carl_Berners_plass_(station)": "Q890592", # → Carl Berners plass metro+tram
"Majorstuen_(station)": "Q686510", # → Majorstuen T-bane
"Grefsen_Station": "Q728583", # → Grefsen stasjon jernbane
"Kjelsås_tram_stop": "Q11981146", # → Kjelsås
"Storo_(station)": "Q932133", # → Storo T-bane
"Schous_plass_tram_stop": "Q12006491", # → Schous plass
"Jernbanetorget_(station)": "Q841481", # → Jernbanetorget T+tram
"Sandaker_senter_(station)": "Q12008217", # → Sandaker senter
}
def extract_titles(html):
soup = BeautifulSoup(html, "html.parser")
titles = []
for a in soup.find_all("a"):
href = a.get("href", "")
label = a.get_text(strip=True)
if href.startswith("/wiki/"):
title = unquote(href.removeprefix("/wiki/"))
titles.append((title, label))
elif href.startswith("/w/index.php"):
qs = parse_qs(urlparse(href).query)
t = qs.get("title", [None])[0]
if t:
titles.append((unquote(t), label))
seen = {}
for title, label in titles:
seen.setdefault(title, label)
return seen # {wiki_title: display_label}
HEADERS = {"User-Agent": "finn-mcp-trikk-coords/1.0 (contact: ole@example.com)"}
def get_qids_from_wikipedia(titles):
"""Wikipedia API: article titles → Wikidata QIDs."""
url = "https://en.wikipedia.org/w/api.php"
result = {}
batch = [t for t in titles if t not in WIKIDATA_DIRECT]
for i in range(0, len(batch), 50):
chunk = batch[i : i + 50]
r = requests.get(
url,
params={
"action": "query",
"format": "json",
"redirects": "1",
"prop": "pageprops",
"ppprop": "wikibase_item",
"titles": "|".join(chunk),
},
headers=HEADERS,
timeout=30,
)
print(f"Status: {r.status_code}, len: {len(r.text)}, preview: {r.text[:200]!r}")
for page in r.json()["query"]["pages"].values():
t = page.get("title", "").replace(" ", "_")
qid = page.get("pageprops", {}).get("wikibase_item")
if qid:
result[t] = qid
# Merge known QIDs
for t, qid in WIKIDATA_DIRECT.items():
if qid:
result[t] = qid
return result
def get_p625(qids):
"""Wikidata API: QIDs → P625 coordinates."""
url = "https://www.wikidata.org/w/api.php"
result = {}
unique = list(set(qids.values()))
for i in range(0, len(unique), 50):
chunk = unique[i : i + 50]
r = requests.get(
url,
params={
"action": "wbgetentities",
"format": "json",
"ids": "|".join(chunk),
"props": "claims|labels",
"languages": "en|nb",
},
headers=HEADERS,
timeout=30,
)
for qid, entity in r.json()["entities"].items():
label = (
entity.get("labels", {}).get("en", {}).get("value")
or entity.get("labels", {}).get("nb", {}).get("value")
or qid
)
p625 = entity.get("claims", {}).get("P625", [])
coords = None
if p625:
v = p625[0]["mainsnak"]["datavalue"]["value"]
coords = (round(v["latitude"], 5), round(v["longitude"], 5))
result[qid] = {"label": label, "coords": coords}
return result
def main():
title_to_label = extract_titles(HTML)
print(f"Extracted {len(title_to_label)} unique titles\n")
title_to_qid = get_qids_from_wikipedia(title_to_label)
print(f"Resolved {len(title_to_qid)} QIDs\n")
qid_to_data = get_p625(title_to_qid)
print(f"{'Wiki title':<45} {'QID':<12} {'Label':<35} {'Coords'}")
print("-" * 120)
no_coords = []
for title in sorted(title_to_label):
qid = title_to_qid.get(title, "")
if qid == "":
coords = "NO QID"
label = "?"
else:
d = qid_to_data.get(qid, {})
coords = str(d.get("coords") or "NO P625")
label = d.get("label", "?")
print(f"{title:<45} {qid:<12} {label:<35} {coords}")
if "NO" in str(coords):
no_coords.append(title)
print(f"\n\nMissing coords: {len(no_coords)}")
for t in no_coords:
print(f" {t}")
if __name__ == "__main__":
main()
+55 -16
View File
@@ -19,6 +19,12 @@ def _normalize_description(text: str | None) -> str:
return text.lower() if text else "" return text.lower() if text else ""
def _is_resale_listing(url: str) -> bool:
"""True for ordinary resale ads. Project / new-build ads use different URL
paths that fetch_ad_details cannot resolve (it builds a /homes/ URL)."""
return "/realestate/homes/" in url
def _build_ad_summary( def _build_ad_summary(
ad: FinnAd, ad: FinnAd,
enriched: EiendomUnit | None, enriched: EiendomUnit | None,
@@ -95,12 +101,17 @@ async def analyze_ad(
if enriched is not None: if enriched is not None:
cache.save_eiendom_unit(conn, enriched) cache.save_eiendom_unit(conn, enriched)
if enriched and enriched.unit_vector: if enriched:
similar_units = cache.get_similar_units(conn, enriched.unit_code, "RECENTLY_SOLD") # EiendomUnit.unit_vector is NOT populated by get_unit / enrich -- the
if not similar_units: # field comes back None. Reading enriched.unit_vector directly leaves
similar_units = await eiendom_no.get_similar_units(enriched.unit_vector) # this block dead and similar_units permanently empty. Build the vector
if similar_units: # from the unit fields instead (fall back to the field if a future
cache.save_similar_units(conn, enriched.unit_code, "RECENTLY_SOLD", similar_units) # endpoint ever populates it).
vector = enriched.unit_vector or eiendom_no.build_unit_vector(enriched)
if vector:
# No dedicated cache table for similar units (per PRD) -- fetch
# fresh each call, consistent with service.get_or_fetch_similar_units.
similar_units = await eiendom_no.get_similar_units(vector)
scores = scoring.score_ad(finn_ad, enriched, similar_units) scores = scoring.score_ad(finn_ad, enriched, similar_units)
categories = scoring.classify_ad(scores) categories = scoring.classify_ad(scores)
@@ -120,6 +131,26 @@ async def analyze_ad(
return result return result
async def _analyze_card(card, conn, *, include_eiendom_no: bool, client) -> dict:
"""Fetch details + enrich a single search card. Raises on unrecoverable
errors; the caller is responsible for catching and skipping."""
finn_ad = cache.get_finn_ad(conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_HOURS)
if finn_ad is None:
finn_ad = await ad_module.fetch_ad_details(card.finnkode, client=client)
unit_code = None
if include_eiendom_no:
try:
matched_unit = await eiendom_no.search_unit_from_finn_url(card.url)
unit_code = matched_unit.unit_code if matched_unit else None
except Exception as exc:
# A failed unit resolution is non-fatal -- proceed without enrichment.
logger.warning("Eiendom.no unit search failed for %s: %s", card.finnkode, exc)
unit_code = None
return await analyze_ad(finn_ad, unit_code=unit_code)
async def analyze_search( async def analyze_search(
search_url: str, search_url: str,
max_pages: int = FINN_MAX_SEARCH_PAGES, max_pages: int = FINN_MAX_SEARCH_PAGES,
@@ -139,21 +170,28 @@ async def analyze_search(
) )
results = [] results = []
enriched_count = 0 enriched_count = 0
skipped_count = 0
if fetch_details: if fetch_details:
for card in cards[:detail_limit]: for card in cards[:detail_limit]:
finn_ad = cache.get_finn_ad(conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_HOURS) # Project / new-build ads are not resale listings and fetch_ad_details
if finn_ad is None: # cannot resolve them -- skip up front rather than 404 mid-run.
finn_ad = await ad_module.fetch_ad_details(card.finnkode, client=client) if not _is_resale_listing(card.url):
unit_code = None logger.info("Skipping non-resale card %s (%s)", card.finnkode, card.url)
if include_eiendom_no: skipped_count += 1
continue
# One bad card (stale finnkode, removed ad, transient network error)
# must not abort the whole search -- isolate each card.
try: try:
matched_unit = await eiendom_no.search_unit_from_finn_url(card.url) result = await _analyze_card(
unit_code = matched_unit.unit_code if matched_unit else None card, conn, include_eiendom_no=include_eiendom_no, client=client
)
except Exception as exc: except Exception as exc:
logger.warning("Eiendom.no unit search failed: %s", exc) logger.warning("Skipping card %s: %s", card.finnkode, exc)
unit_code = None skipped_count += 1
result = await analyze_ad(finn_ad, unit_code=unit_code) continue
if result.get("eiendom_unit"): if result.get("eiendom_unit"):
enriched_count += 1 enriched_count += 1
results.append(result) results.append(result)
@@ -166,6 +204,7 @@ async def analyze_search(
"summary": { "summary": {
"total_listings": len(cards), "total_listings": len(cards),
"analyzed_listings": len(results), "analyzed_listings": len(results),
"skipped_listings": skipped_count,
"eiendom_enriched": enriched_count, "eiendom_enriched": enriched_count,
}, },
} }
+3 -3
View File
@@ -75,7 +75,7 @@ async def finn_analyze_search(
detail_limit=detail_limit, detail_limit=detail_limit,
include_eiendom_no=include_eiendom_no, include_eiendom_no=include_eiendom_no,
) )
return json.dumps(result) return json.dumps(result, default=str)
except Exception as e: except Exception as e:
logger.error(f"Error analyzing search: {e}") logger.error(f"Error analyzing search: {e}")
return json.dumps({"error": True, "message": str(e)}) return json.dumps({"error": True, "message": str(e)})
@@ -164,7 +164,7 @@ async def finn_get_similar_units(unit_vector: str, listing_status: str = "RECENT
"""Fetch similar units from Eiendom.no.""" """Fetch similar units from Eiendom.no."""
try: try:
units = await get_similar_units(unit_vector, listing_status) units = await get_similar_units(unit_vector, listing_status)
return json.dumps([unit.model_dump() for unit in units]) return json.dumps([unit.model_dump() for unit in units], default=str)
except Exception as e: except Exception as e:
logger.error(f"Error fetching similar units: {e}") logger.error(f"Error fetching similar units: {e}")
return json.dumps({"error": True, "message": str(e)}) return json.dumps({"error": True, "message": str(e)})
@@ -222,7 +222,7 @@ async def finn_analyze_ad(
include_eiendom_no=include_eiendom_no, include_eiendom_no=include_eiendom_no,
include_similar_units=include_similar_units, include_similar_units=include_similar_units,
) )
return render_ad(result.get("ad", {}), "json") return json.dumps(result, default=str)
except Exception as e: except Exception as e:
logger.error(f"Error analyzing ad {finnkode}: {e}") logger.error(f"Error analyzing ad {finnkode}: {e}")
return json.dumps({"error": True, "message": str(e)}) return json.dumps({"error": True, "message": str(e)})
+727 -102
View File
@@ -1,146 +1,771 @@
"""Scoring engine for FINN listings enriched with Eiendom.no data.""" """Scoring engine tuned for Ole & partner's apartment search criteria.
Priority hierarchy (stated):
MUST : balcony, ≥80 m² main unit, 2-3 bedrooms, T-bane/trikk access
HIGH : preferred neighbourhoods, view (sea/panorama > rooftop > general),
quiet setting, hybel with own bath + kitchen
MEDIUM : sameie economy, green areas / walking terrain, price vs market
BONUS : renovation upside (acceptable, not required)
Dimension caps (non-risk total max ≈ 105, clamped to 100):
floor -15..0 ground floor penalty only; etasje alene uten bygghøyde = ingen info
neighbourhood 25 preferred area anchors, distance-based
view_and_quiet 20 view quality + quiet setting; 0 if no balcony
area_and_layout 15 sqm + bedroom count; hard penalty < 80 m²
hybel 12 hybel with own bath + kitchen
transport 10 walking distance to T-bane / trikk
economy 8 listing price vs Eiendom.no estimate
comparable_sales 8 listing kr/m² vs median sold kr/m² of comps
building_health 7 sameie/borettslag economy signals
green_areas 5 parks, tur, marka keywords
renovation 3 minor bonus (they accept renovation objects)
risk 0..-30 stale listing, high costs, missing data
"""
import logging import logging
import math
from typing import Any from typing import Any
from .models import EiendomUnit, SimilarUnit from .models import EiendomUnit, SimilarUnit
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
def _clamp(value: float, min_value: float, max_value: float) -> float: # Geometry helpers
return max(min_value, min(max_value, value)) # ---------------------------------------------------------------------------
def score_market_position(unit: EiendomUnit | None) -> float: def _distance_km(lat1: float, lng1: float, lat2: float, lng2: float) -> float:
if unit is None or unit.estimated_selling_price is None or unit.listing_price is None: """Flat-earth approximation — accurate enough within Oslo (~59.9°N).
return 0.0 1° lat ≈ 111 km, 1° lng ≈ 56 km at this latitude.
ratio = unit.listing_price / unit.estimated_selling_price """
if ratio <= 0.9: dlat = (lat2 - lat1) * 111.0
dlng = (lng2 - lng1) * 56.0
return math.sqrt(dlat**2 + dlng**2)
def _clamp(value: float, lo: float, hi: float) -> float:
return max(lo, min(hi, value))
def _median(values: list[float]) -> float:
s = sorted(values)
mid = len(s) // 2
return s[mid] if len(s) % 2 else (s[mid - 1] + s[mid]) / 2.0
# ---------------------------------------------------------------------------
# Preferred neighbourhood anchors
# ---------------------------------------------------------------------------
_PREFERRED_ANCHORS: list[tuple[str, float, float]] = [
# (label, lat, lng) — label used only for debug logging
("Grünerløkka", 59.9240, 10.7573),
("Torshov", 59.9340, 10.7620),
("Rodeløkka", 59.9315, 10.7660),
("Kampen", 59.9125, 10.7760),
("Sagene", 59.9400, 10.7590),
("Nydalen", 59.9520, 10.7540),
("Storo", 59.9450, 10.7670),
("Grefsen", 59.9580, 10.7720),
("Fagerborg", 59.9280, 10.7300),
("St. Hans Haugen", 59.9300, 10.7400),
("Ullevål", 59.9400, 10.7270),
("Majorstua", 59.9210, 10.7170),
("Frogner", 59.9160, 10.7150),
("Løren", 59.9310, 10.7960),
("Torshovdalen", 59.9295, 10.7630),
("Rosenhoff", 59.9255, 10.7775),
]
# ---------------------------------------------------------------------------
# Transit network — all T-bane and trikk stops.
#
# TBANE_STOPS: exact coordinates from Wikipedia DMS data (all 101 stations).
# TRIKK_STOPS: estimated coordinates (Wikipedia has no trikk coords).
#
# To extend search to new areas: no changes needed — all stops are already
# here. score_transport automatically finds the nearest stop for any address.
# ---------------------------------------------------------------------------
TBANE_STOPS: dict[str, tuple[float, float]] = {
# All 101 stations — Wikipedia DMS converted to decimal degrees
"Ammerud": (59.957922, 10.871165),
"Avløs": (59.913859, 10.552926),
"Bekkestua": (59.918097, 10.588031),
"Berg": (59.951142, 10.744801),
"Bergkrystallen": (59.867091, 10.821206),
"Besserud": (59.957760, 10.673092),
"Bjørnsletta": (59.926902, 10.635458),
"Blindern": (59.940052, 10.716262),
"Bogerud": (59.875833, 10.841944),
"Borgen": (59.934548, 10.696000),
"Brattlikollen": (59.888076, 10.801191),
"Brynseng": (59.909169, 10.811834),
"Bøler": (59.884167, 10.845278),
"Carl Berners plass": (59.926592, 10.778360),
"Eiksmarka": (59.946431, 10.622320),
"Ekraveien": (59.950836, 10.635822),
"Ellingsrudåsen": (59.936311, 10.916634),
"Ensjø": (59.913364, 10.786986),
"Forskningsparken": (59.943513, 10.720425),
"Frognerseteren": (59.979018, 10.675857),
"Frøen": (59.934167, 10.709167),
"Furuset": (59.941578, 10.897247),
"Gaustad": (59.945625, 10.709814),
"Gjettum": (59.906221, 10.527155),
"Gjønnes": (59.918097, 10.579877),
"Godlia": (59.908523, 10.835352),
"Grorud": (59.961413, 10.881701),
"Grønland": (59.912912, 10.759563),
"Gråkammen": (59.954838, 10.701842),
"Gulleråsen": (59.955526, 10.696521),
"Hasle": (59.925302, 10.794454),
"Haslum": (59.915021, 10.563183),
"Hauger": (59.910957, 10.510713),
"Haugerud": (59.922592, 10.855350),
"Hellerud": (59.910079, 10.829953),
"Helsfyr": (59.911514, 10.803680),
"Holmen": (59.946296, 10.666609),
"Holmenkollen": (59.960489, 10.662446),
"Holstein": (59.960403, 10.740552),
"Hovseter": (59.946328, 10.654694),
"Høyenhall": (59.905769, 10.819860),
"Jar": (59.926592, 10.621762),
"Jernbanetorget": (59.912116, 10.751211),
"Kalbakken": (59.954553, 10.866750),
"Karlsrud": (59.880453, 10.805225),
"Kolsås": (59.914416, 10.501366),
"Kringsjå": (59.963690, 10.734930),
"Lambertseter": (59.873289, 10.810440),
"Lijordet": (59.940901, 10.616559),
"Lillevann": (59.980481, 10.653037),
"Lindeberg": (59.932979, 10.882087),
"Linderud": (59.940976, 10.839214),
"Løren": (59.929972, 10.790806),
"Majorstuen": (59.929904, 10.714931),
"Makrellbekken": (59.941957, 10.673845),
"Manglerud": (59.897957, 10.812435),
"Midtstuen": (59.961299, 10.682911),
"Montebello": (59.936806, 10.670471),
"Mortensrud": (59.849083, 10.828657),
"Munkelia": (59.868914, 10.812500),
"Nationaltheatret": (59.915045, 10.733039),
"Nydalen": (59.948864, 10.765250),
"Oppsal": (59.892866, 10.840201),
"Ringstabekk": (59.916182, 10.593696),
"Ris": (59.948069, 10.705147),
"Risløkka": (59.932355, 10.822713),
"Rommen": (59.962127, 10.908968),
"Romsås": (59.962272, 10.890777),
"Ryen": (59.895807, 10.805617),
"Røa": (59.946791, 10.643874),
"Rødtvet": (59.951416, 10.859535),
"Sinsen": (59.938085, 10.781343),
"Skogen": (59.975246, 10.647415),
"Skullerud": (59.866754, 10.839171),
"Skødalen": (59.961787, 10.690789),
"Skøyenåsen": (59.898866, 10.836516),
"Slemdal": (59.949896, 10.695662),
"Smestad": (59.937315, 10.683609),
"Sognsvann": (59.967127, 10.733943),
"Steinerud": (59.939083, 10.704345),
"Storo": (59.944545, 10.778768),
"Stortinget": (59.913047, 10.741469),
"Stovner": (59.962616, 10.923414),
"Trosterud": (59.927152, 10.864041),
"Tveita": (59.914354, 10.841961),
"Tøyen": (59.915214, 10.774670),
"Tåsen": (59.953270, 10.752439),
"Ullernåsen": (59.930635, 10.654796),
"Ullevål stadion": (59.946629, 10.732226),
"Ulsrud": (59.889970, 10.849428),
"Veitvet": (59.944700, 10.847304),
"Vestli": (59.972324, 10.929337),
"Vettakollen": (59.959913, 10.695705),
"Vinderen": (59.942803, 10.704761),
"Voksenkollen": (59.980076, 10.665193),
"Voksenlia": (59.966937, 10.655082),
"Vollebekk": (59.935865, 10.831039),
"Åsjordet": (59.928764, 10.646889),
"Økern": (59.928592, 10.804152),
"Østerås": (59.939445, 10.608587),
"Østhorn": (59.956944, 10.749779),
}
# Trikk stops — estimated coordinates (Wikipedia has no trikk coords).
# Grouped by line corridor for readability.
# Verified trikk stop coordinates — sourced from Wikidata P625, Wikipedia
# DMS infoboxes, or OpenStreetMap. Keys match display names used in scoring.
# Source tag format: Wikidata QID | "shared T-bane" | "OSM node <id>" | "Wikipedia"
TRIKK_STOPS_VERIFIED: dict[str, tuple[float, float]] = {
# ── Briskeby-linjen (l11/19) ─────────────────────────────────────────
"Majorstuen": (59.929904, 10.714931), # shared T-bane
"Bogstadveien": (59.92611, 10.72167), # Q19372022
"Rosenborg": (59.92417, 10.72389), # Q7899658
"Briskeby": (59.92048, 10.71767), # Q11962293
"Riddervolds plass": (59.91896, 10.72026), # Q19386557
"Inkognitogata": (59.91565, 10.72114), # Q11977313
"Nationaltheatret": (59.91504, 10.73304), # shared T-bane
# ── Sentrum (shared l11/12/13/17/18/19) ──────────────────────────────
"Øvre Slottsgate": (59.9118, 10.7417), # Q31079249
"Dronningens gate": (59.91053, 10.74697), # Q29828354
"Jernbanetorget": (59.912116, 10.751211), # shared T-bane
"Storgata": (59.91396, 10.75141), # Q109484341
"Nybrua": (59.91707, 10.75834), # Q104867506
"Stortorvet": (59.91310, 10.74530), # Q7620354
"Bjørvika": (59.90806, 10.75639), # Wikipedia
# ── GrünerløkkaTorshov-linjen (l11/12/18) ───────────────────────────
"Schous plass": (59.92081, 10.75932), # Q12006491-area / Wikipedia
"Olaf Ryes plass": (59.9231, 10.7592), # Q4993079
"Birkelunden": (59.9271, 10.7601), # Q4916412
"Biermanns gate": (59.93028, 10.76104), # Wikipedia
"Sandaker senter": (59.93889, 10.76861), # Wikipedia
"Grefsenveien": (59.94278, 10.77344), # Q17778424
"Storo": (59.944545, 10.778768), # shared T-bane
# ── Kjelsåslinjen (l11/12) ───────────────────────────────────────────
"Disen": (59.94627, 10.78729), # Q11965753
"Glads vei": (59.95235, 10.78533), # Q17776371
"Grefsenplatået": (59.9560, 10.78573), # Q11972531
"Grefsen stadion": (59.96008, 10.78475), # Q11972525
"Kjelsås": (59.96611, 10.78278), # Wikipedia
# ── Frogner-linjen (l12) ─────────────────────────────────────────────
"Vigelandsparken": (59.92457, 10.70815), # Q19398059
"Frogner plass": (59.92255, 10.70491), # Q11970372 / OSM node 30560564
"Elisenberg": (59.91944, 10.70861), # Q5361695
"Lille Frogner allé": (59.9180, 10.7120), # Q19379373
"Niels Juels gate": (59.91634, 10.71520), # Q11991378
"Solli": (59.91486, 10.71906), # Q7558364
# ── Vika-linjen (l12) ────────────────────────────────────────────────
"Aker Brygge": (59.9110, 10.7299), # Q4700639
"Kontraskjæret": (59.91087, 10.73592), # Q11998807
# ── Lilleaker-linjen (l13) ───────────────────────────────────────────
"Lilleaker": (59.92074, 10.63580), # Wikipedia
"Sollerud": (59.92104, 10.64309), # Wikipedia
"Furulund": (59.91990, 10.65013), # Wikipedia
"Ullern": (59.92429, 10.65858), # Wikipedia
"Abbediengen": (59.92517, 10.66716), # Wikipedia
"Hoff": (59.92500, 10.67488), # Wikipedia
"Skøyen": (59.92384, 10.68034), # Wikipedia
# ── Skøyen-linjen (l13) ──────────────────────────────────────────────
"Thune": (59.92186, 10.68742), # Wikipedia
"Nobels gate": (59.91758, 10.69866), # Wikipedia
"Skarpsno": (59.91430, 10.70234), # Wikipedia
"Skillebekk": (59.91277, 10.71103), # Wikipedia
# ── Ekeberg-linjen (l13/19) ──────────────────────────────────────────
"Middelalderparken": (59.90639, 10.76417), # Q99971403
"Oslo Hospital": (59.9032, 10.7674), # Wikipedia
"Ekebergparken": (59.8977, 10.7593), # Wikipedia
"Jomfrubråten": (59.8883, 10.7706), # Wikipedia
"Sportsplassen": (59.8860, 10.7736), # Wikipedia
"Holtet": (59.88151, 10.78415), # Wikipedia
"Sørli": (59.87493, 10.78709), # Wikipedia
"Kastellet": (59.87106, 10.79036), # Wikipedia
"Bråten": (59.86714, 10.79244), # Wikipedia
"Sæter": (59.86102, 10.79870), # Wikipedia
"Ljabru": (59.85335, 10.80089), # Wikipedia
# ── Ullevål Hageby-linjen (l17/18) ───────────────────────────────────
"Rikshospitalet": (59.947768, 10.714716), # Wikipedia
"Gaustadalleen": (59.9454, 10.7172), # Wikipedia
"Forskningsparken": (59.943513, 10.720425), # shared T-bane
"Universitetet Blindern": (59.9421, 10.7243), # Wikipedia
"John Collets plass": (59.9403, 10.7290), # Wikipedia
"Ullevål sykehus": (59.9361, 10.7318), # Wikipedia
"Adamstuen": (59.9326, 10.7345), # Wikipedia
"Stensgata": (59.92957, 10.73303), # Q7607927
"Bislett": (59.92599, 10.73108), # Q11961163
"Dalsbergstien": (59.92354, 10.73163), # Q17764618
"Welhavens gate": (59.92131, 10.72968), # Q12010485
"Frydenlund": (59.92086, 10.73317), # Q19373143
"Holbergs plass": (59.91876, 10.73453), # Q11975623
# ── Sinsen-linjen (l17) ──────────────────────────────────────────────
"Lakkegata skole": (59.92055, 10.76834), # Q11982987
"Carl Berners plass": (59.926592, 10.778360), # shared T-bane
"Sinsenkrysset": (59.93911, 10.78340), # Q19388523
"Grefsen stasjon": (59.94167, 10.78056), # Wikipedia
# ── Homansbyen-linjen (l19) ───────────────────────────────────────────
"Homansbyen": (59.92278, 10.72639), # Q5887760
}
# Estimated trikk stop coordinates — no Wikidata P625 found.
# Derived from linear interpolation between verified neighbours,
# or placed from map/street knowledge. Max error ~150-250 m.
# To update: find Wikidata QID, fetch P625, move entry to TRIKK_STOPS_VERIFIED.
TRIKK_STOPS_ESTIMATED: dict[str, tuple[float, float]] = {
# ── Sentrum ───────────────────────────────────────────────────────────
"Tinghuset": (59.9146, 10.7403), # Ullevål Hageby-l ved Stortinget T
# ── GrünerløkkaTorshov-linjen ───────────────────────────────────────
"Torshov": (59.9332, 10.7643), # interp Biermanns gate↔Sandaker
# ── Kjelsåslinjen ────────────────────────────────────────────────────
"Doktor Smiths vei": (59.9503, 10.7867), # interp Disen↔Kjelsås t=0.20
"Kjelsåsalleen": (59.9641, 10.7833), # interp Disen↔Kjelsås t=0.90
# ── Frogner-linjen ───────────────────────────────────────────────────
"Frogner stadion": (59.9167, 10.7038), # Kirkeveien S for Vigelandsparken
# ── Vika-linjen ──────────────────────────────────────────────────────
"Ruseløkka": (59.9120, 10.7258), # interp Solli↔Kontraskjæret
# ── Ullevål Hageby-linjen ─────────────────────────────────────────────
"Tullinøkka": (59.9163, 10.7349), # interp Holbergs plass↔Tinghuset
# ── Sinsen-linjen ────────────────────────────────────────────────────
"Heimdalsgata": (59.9188, 10.7633), # interp Nybrua↔Lakkegata skole
"Sofienberg": (59.9236, 10.7734), # interp Lakkegata skole↔Carl Berners
"Rosenhoff": (59.9307, 10.7800), # interp Carl Berners↔Sinsenkrysset t=0.33
"Sinsenterrassen": (59.9350, 10.7817), # interp Carl Berners↔Sinsenkrysset t=0.67
}
# Merged — verified takes precedence if a key appears in both (shouldn't happen).
TRIKK_STOPS: dict[str, tuple[float, float]] = {
**TRIKK_STOPS_ESTIMATED,
**TRIKK_STOPS_VERIFIED,
}
# ---------------------------------------------------------------------------
# Transit helpers
# ---------------------------------------------------------------------------
_WALK_SPEED_KMH = 5.0 # avg walking speed
def _nearest_stop(
lat: float, lng: float, stops: dict[str, tuple[float, float]]
) -> tuple[str, float]:
"""Return (stop_name, distance_km) for the nearest stop in a dict."""
best_name, best_dist = "", float("inf")
for name, (slat, slng) in stops.items():
d = _distance_km(lat, lng, slat, slng)
if d < best_dist:
best_dist, best_name = d, name
return best_name, best_dist
def nearby_transit(
lat: float, lng: float, max_walk_min: float = 10.0
) -> dict[str, list[tuple[str, float]]]:
"""Return T-bane and trikk stops within max_walk_min minutes walk.
Returns:
{
"tbane": [("Carl Berners plass", 0.28), ...], # sorted by distance
"trikk": [("Rosenhoff", 0.19), ...],
}
All distances in km.
"""
max_km = (max_walk_min / 60.0) * _WALK_SPEED_KMH
tbane = sorted(
[
(n, _distance_km(lat, lng, la, lo))
for n, (la, lo) in TBANE_STOPS.items()
if _distance_km(lat, lng, la, lo) <= max_km
],
key=lambda x: x[1],
)
trikk = sorted(
[
(n, _distance_km(lat, lng, la, lo))
for n, (la, lo) in TRIKK_STOPS.items()
if _distance_km(lat, lng, la, lo) <= max_km
],
key=lambda x: x[1],
)
return {"tbane": tbane, "trikk": trikk}
# ---------------------------------------------------------------------------
# Dimension functions
# ---------------------------------------------------------------------------
def score_neighbourhood(
unit: EiendomUnit | None,
address: str | None = None,
district: str | None = None,
) -> float:
"""Distance to nearest preferred-area anchor. Max 25."""
if unit and unit.lat and unit.lng:
distances = [
(_distance_km(unit.lat, unit.lng, lat, lng), label)
for label, lat, lng in _PREFERRED_ANCHORS
]
min_dist, nearest = min(distances)
logger.debug("Nearest anchor: %s at %.2f km", nearest, min_dist)
if min_dist < 0.5:
return 25.0
if min_dist < 1.0:
return 20.0 return 20.0
if ratio <= 1.0: if min_dist < 1.5:
return 16.0 + (1.0 - ratio) * 40.0 return 15.0
if ratio <= 1.1: if min_dist < 2.5:
return 12.0 - (ratio - 1.0) * 40.0 return 10.0
if min_dist < 4.0:
return 5.0 return 5.0
return 2.0
haystack = " ".join(filter(None, [address, district])).lower()
if "oslo" in haystack:
return 5.0
return 0.0
def score_transport(unit: EiendomUnit | None) -> float:
"""Walking distance to nearest T-bane or trikk stop. Max 10.
Searches ALL stops in TBANE_STOPS and TRIKK_STOPS — no manual
curation needed when adding new search areas.
Distance bands:
< 400 m → 10 pts (~5 min walk)
< 800 m → 8 pts (~10 min — stated threshold)
< 1200 m → 4 pts (~15 min)
≥ 1200 m → 0 pts
Falls back to 0 when no coordinates available.
"""
if unit is None or unit.lat is None or unit.lng is None:
return 0.0
_, tbane_dist = _nearest_stop(unit.lat, unit.lng, TBANE_STOPS)
_, trikk_dist = _nearest_stop(unit.lat, unit.lng, TRIKK_STOPS)
min_dist = min(tbane_dist, trikk_dist)
logger.debug("Nearest T-bane: %.2f km, trikk: %.2f km", tbane_dist, trikk_dist)
if min_dist < 0.4:
return 10.0
if min_dist < 0.8:
return 8.0
if min_dist < 1.2:
return 4.0
return 0.0
def score_view_and_quiet(ad: Any, description: str) -> float:
"""View quality × quiet setting. Max 20. Returns 0 if no balcony."""
if not (ad.has_balcony or ad.has_terrace):
return 0.0
d = description.lower()
view = 0.0
if any(kw in d for kw in ["sjøutsikt", "fjordutsikt", "sjøglimt", "fjordglimt"]):
view = 15.0
elif any(kw in d for kw in ["panorama", "panoramautsikt", "vidt utsyn", "vidstrakt"]):
view = 13.0
elif any(kw in d for kw in ["over hustak", "hustak", "over takene"]):
view = 10.0
elif "utsikt" in d:
view = 7.0
quiet = 0.0
if any(
kw in d
for kw in [
"rolig",
"tilbaketrukket",
"skjermet",
"bakgård",
"gårdsrom",
"stille",
"blindvei",
]
):
quiet += 5.0
if any(kw in d for kw in ["støy", "bilvei", "trafikkert", "støyutsatt"]):
quiet -= 5.0
return float(_clamp(view + quiet, 0.0, 20.0))
def score_area_and_layout(ad: Any, unit: EiendomUnit | None) -> float:
"""Main unit size + bedroom count. Max 15."""
area = (unit.usable_area if unit else None) or ad.area_m2 or 0
if area < 60:
return 0.0
if area < 80:
return 3.0
if area < 90:
area_score = 8.0
elif area < 105:
area_score = 11.0
else:
area_score = 14.0
bedrooms = ad.bedrooms or 0
if bedrooms >= 3:
bedroom_bonus = 1.0
elif bedrooms == 2:
bedroom_bonus = 0.5
else:
bedroom_bonus = 0.0
return float(_clamp(area_score + bedroom_bonus, 0.0, 15.0))
def score_hybel(description: str) -> float:
"""Hybel with own bath + kitchen. Max 12."""
d = description.lower()
if "hybel" not in d and "sokkelleil" not in d and "utleiedel" not in d:
return 0.0
_POTENTIAL = [
"mulighet for hybel",
"mulighet til hybel",
"mulig hybel",
"kan etableres hybel",
"kan bygges om til hybel",
"tilrettelagt for hybel",
"potensial for hybel",
"hybelpotensial",
]
is_potential = any(sig in d for sig in _POTENTIAL)
if not is_potential and "mulighet" in d and "hybel" in d:
for sentence in d.replace("!", ".").replace("?", ".").split("."):
if "mulighet" in sentence and "hybel" in sentence:
is_potential = True
break
if is_potential:
return 2.0
# Documented rental income → definitively real hybel
if "leieinntekt" in d or "skattefri" in d:
return 12.0
has_bath = any(
kw in d
for kw in [
"eget bad",
"eget wc",
"eget toalett",
"bad i hybel",
"dusj i hybel",
"eget dusj",
]
)
has_kitch = any(
kw in d for kw in ["eget kjøkken", "kjøkken i hybel", "kjøkkenkrok", "tekjøkken"]
)
if not has_bath:
has_bath = "bad" in d or "dusj" in d
if not has_kitch:
has_kitch = "kjøkken" in d
if has_bath and has_kitch:
return 12.0
if has_bath or has_kitch:
return 7.0
return 4.0
def score_floor(ad: Any, unit: EiendomUnit | None) -> float:
"""Floor level. Binary signal: ground floor is bad, everything else neutral.
Rationale: "toppleilighet i 3-etgs blokk" og "8. etg i høyblokk" er begge
topp for sin bygning. Etasjenummer alene sier ingenting om utsikt eller lys
uten å kjenne byggets totale høyde. Eneste reelle signal er 1. etg (innsyn,
støy, lys) vs ikke-1. etg.
Scores:
ground floor (≤1) → -15 (hard penalty: innsyn, støy, lys)
unknown → 0 (no data → no penalty)
above ground → 0 (etasjenummer uten bygghøyde = ingen info)
"""
floor: int | None = None
if unit is not None and unit.floor is not None:
floor = unit.floor
elif ad.floor is not None:
try:
floor = int(str(ad.floor).strip().rstrip("."))
except (ValueError, TypeError):
floor = None
if floor is None:
return 0.0
if floor <= 1:
return -15.0
return 0.0
def score_building_health(ad: Any, description: str) -> float:
"""Sameie / borettslag economy signals. Max 7."""
score = 0.0
d = description.lower()
if ad.shared_debt == 0:
score += 3.0
elif ad.shared_debt is None:
score += 1.0
fk = ad.common_costs or 0
if fk == 0:
score += 0.0
elif fk <= 3500:
score += 4.0
elif fk <= 5000:
score += 2.0
elif fk <= 7000:
score += 0.0
else:
score -= 2.0
if any(kw in d for kw in ["veldrevet", "solid økonomi", "god økonomi", "ingen fellesgjeld"]):
score += 2.0
return float(_clamp(score, 0.0, 7.0))
def score_green_areas(description: str) -> float:
"""Parks, walking terrain, green surroundings. Max 5."""
d = description.lower()
keywords = ["park", "turområde", "turterreng", "marka", "skog", "grønt", "grønne", "friluft"]
hits = sum(1 for kw in keywords if kw in d)
if hits >= 2:
return 5.0
if hits == 1:
return 2.0
return 0.0
def score_economy(ad: Any, unit: EiendomUnit | None) -> float: def score_economy(ad: Any, unit: EiendomUnit | None) -> float:
if ad.total_price is None: """Listing price vs Eiendom.no estimated value. Max 8."""
if unit is None or unit.estimated_selling_price is None:
return 0.0 return 0.0
if unit and unit.estimated_selling_price: price = ad.total_price or ad.asking_price
ratio = ad.total_price / unit.estimated_selling_price if price is None:
if ratio <= 0.95: return 0.0
return 20.0 ratio = price / unit.estimated_selling_price
if ratio <= 1.0: if ratio <= 0.92:
return 15.0
if ratio <= 1.05:
return 10.0
return 6.0
if ad.asking_price and ad.total_price <= ad.asking_price:
return 12.0
return 8.0 return 8.0
if ratio <= 1.00:
return 5.0 + (1.0 - ratio) * 37.5
if ratio <= 1.08:
return 5.0 - (ratio - 1.0) * 37.5
return 1.0
def score_comparable_sales(listings: list[SimilarUnit], listing_price: int | None) -> float: def score_comparable_sales(
if not listings or listing_price is None: listings: list[SimilarUnit],
listing_sqm_price: int | float | None,
) -> float:
"""Listing kr/m² vs median sold kr/m² of comp units. Max 8."""
if not listings or listing_sqm_price is None:
return 0.0 return 0.0
selling_prices = [unit.selling_price for unit in listings if unit.selling_price] sqm_prices = [u.sqm_price for u in listings if u.sqm_price]
if not selling_prices: if not sqm_prices:
return 0.0 return 0.0
average = sum(selling_prices) / len(selling_prices) med = _median(sqm_prices)
ratio = listing_price / average ratio = listing_sqm_price / med
score = (1.0 - abs(ratio - 1.0)) * 20.0 return float(_clamp((1.0 - abs(ratio - 1.0)) * 8.0, 0.0, 8.0))
return float(_clamp(score, 0.0, 20.0))
def score_location(address: str | None, district: str | None) -> float: def score_renovation(description: str) -> float:
if not address and not district: """Minor bonus for renovation upside. Max 3."""
d = description.lower()
if any(kw in d for kw in ["renover", "oppusse", "potensial", "moderniser"]):
return 3.0
return 0.0 return 0.0
if district and "oslo" in district.lower():
return 15.0
if address and "oslo" in address.lower():
return 12.0
return 7.0
def score_layout_and_potential(description: str | None, rooms: int | None) -> float: def score_risk(ad: Any, unit: EiendomUnit | None) -> float:
score = 0.0 """Risk penalty. Returns 0 or negative."""
if rooms and rooms >= 4: penalty = 0.0
score += 10.0
if description and "potensial" in description.lower():
score += 8.0
return float(_clamp(score, 0.0, 20.0))
def score_outdoor_and_view(description: str | None) -> float:
if not description:
return 0.0
score = 5.0 if "utsikt" in description.lower() or "balkong" in description.lower() else 0.0
return float(_clamp(score, 0.0, 15.0))
def score_rental_potential(description: str | None) -> float:
if not description:
return 0.0
score = 10.0 if "hybel" in description.lower() or "leie" in description.lower() else 0.0
return score
def score_renovation_upside(description: str | None, asking_price: int | None) -> float:
score = 0.0
if description and "renover" in description.lower():
score += 10.0
if asking_price and asking_price > 0:
score += 5.0
return float(_clamp(score, 0.0, 15.0))
def score_risk(description: str | None, unit: EiendomUnit | None) -> float:
if unit is None: if unit is None:
return -10.0 penalty -= 8.0
if description and "usikker" in description.lower():
return -10.0 fk = ad.common_costs or 0
return 0.0 if fk > 8000:
penalty -= 10.0
elif fk > 6000:
penalty -= 5.0
if unit and unit.days_on_market:
if unit.days_on_market > 120:
penalty -= 10.0
elif unit.days_on_market > 60:
penalty -= 5.0
if "usikker" in (ad.listing_description or "").lower():
penalty -= 5.0
return penalty
def score_ad( # ---------------------------------------------------------------------------
ad: Any, unit: EiendomUnit | None, similar_units: list[SimilarUnit] # Orchestration
) -> dict[str, float]: # ---------------------------------------------------------------------------
scores = {
def score_ad(ad: Any, unit: EiendomUnit | None, similar_units: list[SimilarUnit]) -> dict[str, Any]:
description = ad.listing_description or ""
# Collect nearby transit for informational output (not used in scoring)
transit_nearby: dict | None = None
if unit and unit.lat and unit.lng:
transit_nearby = nearby_transit(unit.lat, unit.lng, max_walk_min=10.0)
if transit_nearby["tbane"] or transit_nearby["trikk"]:
logger.debug("Nearby transit: %s", transit_nearby)
scores: dict[str, Any] = {
"floor": score_floor(ad, unit),
"neighbourhood": score_neighbourhood(unit, ad.address, getattr(ad, "district", None)),
"view_and_quiet": score_view_and_quiet(ad, description),
"area_and_layout": score_area_and_layout(ad, unit),
"hybel": score_hybel(description),
"transport": score_transport(unit),
"economy": score_economy(ad, unit), "economy": score_economy(ad, unit),
"market_position": score_market_position(unit),
"comparable_sales": score_comparable_sales( "comparable_sales": score_comparable_sales(
similar_units, ad.total_price or ad.asking_price similar_units,
unit.listing_sqm_price if unit else None,
), ),
"location": score_location(ad.address, ad.district), "building_health": score_building_health(ad, description),
"layout": score_layout_and_potential(ad.listing_description, ad.rooms), "green_areas": score_green_areas(description),
"outdoor": score_outdoor_and_view(ad.listing_description), "renovation": score_renovation(description),
"rental_potential": score_rental_potential(ad.listing_description), "risk": score_risk(ad, unit),
"renovation": score_renovation_upside(ad.listing_description, ad.asking_price),
"risk": score_risk(ad.listing_description, unit),
} }
scores["total"] = float(_clamp(sum(scores.values()), 0.0, 100.0))
# Numeric-only sum for total
numeric = {k: v for k, v in scores.items() if isinstance(v, (int, float))}
scores["total"] = float(_clamp(sum(numeric.values()), 0.0, 100.0))
# Attach nearby transit as metadata (non-scoring)
if transit_nearby is not None:
scores["nearby_transit"] = transit_nearby
return scores return scores
def classify_ad(scores: dict[str, float]) -> list[str]: def classify_ad(scores: dict[str, Any]) -> list[str]:
categories: list[str] = [] categories: list[str] = []
total = scores.get("total", 0.0) total = scores.get("total", 0.0)
if total >= 70:
categories.append("bargain_candidate") if total >= 75:
categories.append("top_match")
if total >= 60: if total >= 60:
categories.append("safe_candidate") categories.append("strong_candidate")
if 50 <= total < 70: if 45 <= total < 60:
categories.append("lifestyle_candidate") categories.append("worth_viewing")
if scores.get("renovation", 0.0) >= 8:
categories.append("renovation_candidate")
if scores.get("rental_potential", 0.0) >= 5:
categories.append("hybel_candidate")
if scores.get("risk", 0.0) < 0:
categories.append("risk_object")
if total < 30: if total < 30:
categories.append("not_interesting") categories.append("not_interesting")
if 30 <= total < 60: if 30 <= total < 45:
categories.append("manual_review_required") categories.append("manual_review_required")
if scores.get("hybel", 0.0) >= 7:
categories.append("has_hybel")
if scores.get("view_and_quiet", 0.0) >= 13:
categories.append("premium_view")
if scores.get("neighbourhood", 0.0) == 25:
categories.append("preferred_neighbourhood")
if scores.get("renovation", 0.0) > 0:
categories.append("renovation_candidate")
if scores.get("floor", 0.0) < 0:
categories.append("ground_floor")
if scores.get("risk", 0.0) < -5:
categories.append("risk_object")
if scores.get("area_and_layout", 0.0) <= 3:
categories.append("too_small")
return categories return categories
+74 -18
View File
@@ -36,6 +36,43 @@ async def get_or_fetch_ad(finnkode: str, force_refresh: bool = False) -> FinnAd:
return ad return ad
async def ensure_eiendom_unit_code(ad: FinnAd) -> str | None:
"""Backfill ``ad.eiendom_unit_code`` by resolving it from the FINN URL.
``fetch_ad_details`` never populates ``eiendom_unit_code`` -- only the
Eiendom.no resolver (``search_unit_from_finn_url``) can map a FINN listing
to an Eiendom.no unit. Every enrichment path gates on this field, so
without an explicit resolve step the gate is always falsy and enrichment
silently no-ops.
Resolves once, mutates the ad in place, and persists the backfill to the
cache so subsequent cache hits skip the network round trip.
IMPORTANT: callers must run this BEFORE serialising the ad with
``model_dump()`` -- otherwise the dumped dict carries a stale
``eiendom_unit_code: None`` even though enrichment succeeded.
Returns the unit_code, or ``None`` if the listing cannot be resolved
(e.g. new-build project ads, off-market addresses).
"""
if ad.eiendom_unit_code:
return ad.eiendom_unit_code
unit = await search_unit_from_finn_url(ad.url)
if unit is None or not unit.unit_code:
logger.info("No Eiendom.no unit resolved for finnkode %s", ad.finnkode)
return None
ad.eiendom_unit_code = unit.unit_code
conn = init_db(FINN_CACHE_PATH)
save_finn_ad(conn, ad) # persist backfill; do NOT cache `unit` here --
# the resolver returns a partial record (code +
# address + coords). The full unit comes from
# get_or_fetch_eiendom_unit -> get_unit().
logger.info("Resolved finnkode %s -> unit %s", ad.finnkode, unit.unit_code)
return ad.eiendom_unit_code
async def get_or_fetch_eiendom_unit( async def get_or_fetch_eiendom_unit(
unit_code: str, force_refresh: bool = False unit_code: str, force_refresh: bool = False
) -> EiendomUnit | None: ) -> EiendomUnit | None:
@@ -84,7 +121,7 @@ async def resolve_eiendom_unit_from_finn_url(finn_url: str) -> EiendomUnit | Non
# ============================================================================ # ============================================================================
# Orchestration functions delegate to analysis.py # Orchestration functions -- delegate to analysis.py
# ============================================================================ # ============================================================================
@@ -96,7 +133,13 @@ async def analyze_search(
include_details: bool = True, include_details: bool = True,
include_eiendom_no: bool = True, include_eiendom_no: bool = True,
) -> dict[str, Any]: ) -> dict[str, Any]:
"""Analyze a FINN search URL and return a ranked shortlist.""" """Analyze a FINN search URL and return a ranked shortlist.
NOTE: enrichment for search results lives in analysis.py. If that path
also reports `eiendom_enriched: 0`, it has the same root cause -- each
card's eiendom_unit_code must be resolved via ensure_eiendom_unit_code
(or search_unit_from_finn_url) before the enrichment gate.
"""
return await run_analysis_search( return await run_analysis_search(
search_url, search_url,
max_pages=max_pages, max_pages=max_pages,
@@ -114,15 +157,20 @@ async def analyze_ad(
) -> dict[str, Any]: ) -> dict[str, Any]:
"""Fetch and enrich a single FINN ad with analysis.""" """Fetch and enrich a single FINN ad with analysis."""
ad = await get_or_fetch_ad(finnkode) ad = await get_or_fetch_ad(finnkode)
# Resolve BEFORE model_dump() so the serialised ad carries the backfilled
# eiendom_unit_code instead of a stale None.
unit_code = await ensure_eiendom_unit_code(ad) if include_eiendom_no else None
result: dict[str, Any] = { result: dict[str, Any] = {
"ad": ad.model_dump(), "ad": ad.model_dump(),
} }
if include_eiendom_no and ad.eiendom_unit_code: if unit_code:
unit = await get_or_fetch_eiendom_unit(ad.eiendom_unit_code) unit = await get_or_fetch_eiendom_unit(unit_code)
if unit: if unit:
result["eiendom_unit"] = unit.model_dump() result["eiendom_unit"] = unit.model_dump()
if include_similar_units: if include_similar_units:
similar = await get_or_fetch_similar_units(ad.eiendom_unit_code) similar = await get_or_fetch_similar_units(unit_code)
result["similar_units"] = [s.model_dump() for s in similar] result["similar_units"] = [s.model_dump() for s in similar]
return result return result
@@ -132,16 +180,18 @@ async def analyze_ad_against_comps(
) -> dict[str, Any]: ) -> dict[str, Any]:
"""Evaluate one listing against recent comparable sales.""" """Evaluate one listing against recent comparable sales."""
ad = await get_or_fetch_ad(finnkode) ad = await get_or_fetch_ad(finnkode)
# Resolve before model_dump() -- see analyze_ad.
unit_code = await ensure_eiendom_unit_code(ad)
result: dict[str, Any] = { result: dict[str, Any] = {
"ad": ad.model_dump(), "ad": ad.model_dump(),
} }
if ad.eiendom_unit_code: if unit_code:
unit = await get_or_fetch_eiendom_unit(ad.eiendom_unit_code) unit = await get_or_fetch_eiendom_unit(unit_code)
if unit: if unit:
result["eiendom_unit"] = unit.model_dump() result["eiendom_unit"] = unit.model_dump()
comps = await get_or_fetch_similar_units( comps = await get_or_fetch_similar_units(unit_code, listing_status=listing_status)
ad.eiendom_unit_code, listing_status=listing_status
)
result["comparable_units"] = [c.model_dump() for c in comps] result["comparable_units"] = [c.model_dump() for c in comps]
return result return result
@@ -152,17 +202,20 @@ async def find_similar_to_liked(
"""Find properties similar to a listing the user has liked.""" """Find properties similar to a listing the user has liked."""
# Requires that feedback.verdict = "liked" exists for this finnkode # Requires that feedback.verdict = "liked" exists for this finnkode
ad = await get_or_fetch_ad(finnkode) ad = await get_or_fetch_ad(finnkode)
if not ad.eiendom_unit_code:
unit_code = await ensure_eiendom_unit_code(ad)
if not unit_code:
raise ValueError( raise ValueError(
f"Finnkode {finnkode} has no Eiendom.no unit_code; cannot find similar properties" f"Finnkode {finnkode} could not be resolved to an Eiendom.no unit; "
"cannot find similar properties"
) )
# TODO: verify feedback verdict = "liked" exists # TODO: verify feedback verdict = "liked" exists
unit = await get_or_fetch_eiendom_unit(ad.eiendom_unit_code) unit = await get_or_fetch_eiendom_unit(unit_code)
if not unit: if not unit:
raise ValueError(f"Cannot enrich finnkode {finnkode} with Eiendom.no data") raise ValueError(f"Cannot enrich finnkode {finnkode} with Eiendom.no data")
similar = await get_or_fetch_similar_units(ad.eiendom_unit_code, listing_status=listing_status) similar = await get_or_fetch_similar_units(unit_code, listing_status=listing_status)
return { return {
"base_ad": ad.model_dump(), "base_ad": ad.model_dump(),
"similar_listings": [s.model_dump() for s in similar], "similar_listings": [s.model_dump() for s in similar],
@@ -177,15 +230,18 @@ async def compare_ads(
ads = [] ads = []
for finnkode in finnkoder: for finnkode in finnkoder:
ad = await get_or_fetch_ad(finnkode) ad = await get_or_fetch_ad(finnkode)
ad_data = ad.model_dump()
if include_eiendom_no and ad.eiendom_unit_code: # Resolve before model_dump() -- see analyze_ad.
unit = await get_or_fetch_eiendom_unit(ad.eiendom_unit_code) unit_code = await ensure_eiendom_unit_code(ad) if include_eiendom_no else None
ad_data = ad.model_dump()
if unit_code:
unit = await get_or_fetch_eiendom_unit(unit_code)
if unit: if unit:
ad_data["eiendom_unit"] = unit.model_dump() ad_data["eiendom_unit"] = unit.model_dump()
if include_comps: if include_comps:
comps = await get_or_fetch_similar_units( comps = await get_or_fetch_similar_units(
ad.eiendom_unit_code, listing_status="RECENTLY_SOLD" unit_code, listing_status="RECENTLY_SOLD"
) )
ad_data["comps"] = [c.model_dump() for c in comps] ad_data["comps"] = [c.model_dump() for c in comps]
-152
View File
@@ -1,152 +0,0 @@
#!/usr/bin/env python3
"""
Validate that all MCP tool definitions correctly match their service layer functions.
This catches parameter mismatches, missing arguments, and other integration issues.
"""
import inspect
from typing import get_type_hints
from finn_eiendom import mcp_server, service, eiendom_no
# Define the mapping of MCP tools to their service/module functions
TOOL_MAPPINGS = {
# Tool name: (service function, expected params to check)
"finn_analyze_search": (
service.analyze_search,
["search_url", "max_pages", "detail_limit", "include_details", "include_eiendom_no"],
),
"finn_get_ad": (service.get_or_fetch_ad, ["finnkode", "force_refresh"]),
"finn_resolve_eiendom_unit": (eiendom_no.search_unit_from_finn_url, ["finn_url"]),
"finn_get_eiendom_unit": (service.get_or_fetch_eiendom_unit, ["unit_code", "force_refresh"]),
"finn_analyze_unit_images": (service.get_unit_images, ["unit_code", "force_refresh"]),
"finn_get_similar_units": (eiendom_no.get_similar_units, ["unit_vector", "listing_status"]),
"finn_build_unit_vector": (
eiendom_no.get_unit,
["unit_code"],
), # Uses get_unit, not build_unit_vector
"finn_decode_unit_vector": (eiendom_no.decode_unit_vector, ["unit_vector"]),
"finn_analyze_ad": (
service.analyze_ad,
["finnkode", "include_eiendom_no", "include_similar_units"],
),
"finn_analyze_ad_against_comps": (
service.analyze_ad_against_comps,
["finnkode", "listing_status"],
),
"finn_find_similar_to_liked_ad": (
service.find_similar_to_liked,
["finnkode", "mode", "listing_status"],
),
"finn_compare_ads": (service.compare_ads, ["finnkoder", "include_eiendom_no", "include_comps"]),
"finn_save_feedback": (service.save_feedback, ["finnkode", "verdict", "notes"]),
"finn_get_shortlist": (service.get_shortlist, ["run_id", "limit"]),
"finn_get_new_ads_since_last_run": (service.get_new_ads_since_last_run, ["search_url"]),
}
def get_function_params(func) -> dict:
"""Extract parameter names and defaults from a function."""
sig = inspect.signature(func)
params = {}
for name, param in sig.parameters.items():
if name in ("self", "cls"):
continue
params[name] = {
"default": param.default,
"annotation": param.annotation,
"kind": param.kind.name,
}
return params
def validate_tool_mapping(
tool_name: str, service_func, expected_params: list[str]
) -> tuple[bool, list[str]]:
"""Validate that an MCP tool correctly maps to its service function."""
errors = []
# Get the MCP tool function
mcp_tool = getattr(mcp_server, tool_name, None)
if not mcp_tool:
errors.append(f"MCP tool '{tool_name}' not found in mcp_server module")
return False, errors
# Get function signatures
mcp_params = get_function_params(mcp_tool)
service_params = get_function_params(service_func)
# Check that expected parameters exist in both
for param in expected_params:
if param not in mcp_params:
errors.append(f" ✗ MCP tool missing parameter '{param}'")
if param not in service_params and param != "client": # client is optional in service layer
errors.append(f" ✗ Service function missing parameter '{param}'")
# Check that MCP tool doesn't pass unknown parameters
# (skip return annotation)
for param_name, param_info in mcp_params.items():
if param_name not in service_params and param_name not in ["return"]:
# This might be OK if it's a tool-specific parameter, but warn
pass
if errors:
return False, errors
return True, []
async def validate_service_imports():
"""Validate that all imported service functions exist and are callable."""
imported_funcs = [
("analyze_ad", service.analyze_ad),
("analyze_ad_against_comps", service.analyze_ad_against_comps),
("analyze_search", service.analyze_search),
("compare_ads", service.compare_ads),
("find_similar_to_liked", service.find_similar_to_liked),
("get_new_ads_since_last_run", service.get_new_ads_since_last_run),
("get_or_fetch_ad", service.get_or_fetch_ad),
("get_or_fetch_eiendom_unit", service.get_or_fetch_eiendom_unit),
("get_shortlist", service.get_shortlist),
("get_unit_images", service.get_unit_images),
("save_feedback", service.save_feedback),
]
errors = []
for name, func in imported_funcs:
if not callable(func):
errors.append(f"Service function '{name}' is not callable")
return errors
def main():
"""Run validation checks."""
print("=" * 80)
print("MCP Tool Parameter Validation")
print("=" * 80)
all_passed = True
total_checks = 0
passed_checks = 0
for tool_name, (service_func, expected_params) in TOOL_MAPPINGS.items():
total_checks += 1
passed, errors = validate_tool_mapping(tool_name, service_func, expected_params)
if passed:
print(f"{tool_name}")
passed_checks += 1
else:
print(f"{tool_name}")
for error in errors:
print(f" {error}")
all_passed = False
print("\n" + "=" * 80)
print(f"Results: {passed_checks}/{total_checks} tools validated")
print("=" * 80)
return 0 if all_passed else 1
if __name__ == "__main__":
exit(main())