From d3f4bfa8381082618ae60918efc63d54d3154a55 Mon Sep 17 00:00:00 2001 From: Ole Date: Sat, 23 May 2026 07:43:30 +0000 Subject: [PATCH] scoring and analyzsis --- Dockerfile | 4 +- fetch_trikk_coords.py | 238 +++++++++++ finn_eiendom/analysis.py | 75 +++- finn_eiendom/mcp_server.py | 6 +- finn_eiendom/scoring.py | 851 ++++++++++++++++++++++++++++++++----- finn_eiendom/service.py | 92 +++- validate_mcp_tools.py | 152 ------- 7 files changed, 1113 insertions(+), 305 deletions(-) create mode 100644 fetch_trikk_coords.py delete mode 100644 validate_mcp_tools.py diff --git a/Dockerfile b/Dockerfile index 2163d8d..0379107 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,8 +10,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libxslt1-dev \ && rm -rf /var/lib/apt/lists/* -# Copy dependency files +# Copy source files COPY pyproject.toml ./ +COPY finn_eiendom ./finn_eiendom +COPY README.md ./ # Create virtual environment and install dependencies RUN python -m venv /venv && \ diff --git a/fetch_trikk_coords.py b/fetch_trikk_coords.py new file mode 100644 index 0000000..d17e901 --- /dev/null +++ b/fetch_trikk_coords.py @@ -0,0 +1,238 @@ +""" +fetch_trikk_coords.py +Henter P625-koordinater fra Wikidata for alle Oslo-trikkeholdeplasser +via Wikipedia sitelinks. Kjør: python3 fetch_trikk_coords.py +Krever: pip install requests beautifulsoup4 +""" + +import requests +from urllib.parse import unquote, urlparse, parse_qs +from bs4 import BeautifulSoup + +# Alle Wikipedia-lenker fra trikkelinjene (inkl. redlinks) +HTML = """ +Majorstuen +Bogstadveien +Rosenborg +Briskeby +Riddervolds plass +Inkognitogata +Nationaltheatret +Øvre Slottsgate +Dronningens gate +Jernbanetorget +Storgata +Nybrua +Schous plass +Olaf Ryes plass +Birkelunden +Biermanns gate +Torshov +Sandaker senter +Grefsenveien +Storo +Disen tram stop +Doktor Smiths vei +Glads vei +Grefsenplatået +Grefsen stadion +Kjelsåsalleen +Kjelsås +Frogner stadion +Vigelandsparken +Frogner plass +Elisenberg +Lille Frogner allé +Niels Juels gate +Solli +Ruseløkka +Akerbrygge +Kontraskjæret +Middelalderparken +Bislett +Dalsbergstien +Welhavens gate +Frydenlund +Holbergs plass +Tullinøkka +Tinghuset +Stortorvet +Heimdalsgata +Lakkegata skole +Sofienberg +Carl Berners plass +Rosenhoff +Sinsenterrassen +Sinsenkrysset +Grefsen stasjon +Homansbyen +""" + +# Redlinks har ingen Wikipedia-side — søk direkte på Wikidata label +WIKIDATA_DIRECT = { + # Redlinks — ingen Wikipedia-side + "Briskeby_tram_stop": "Q11962293", + "Riddervolds_plass_tram_stop": "Q19386557", + "Grefsenveien_tram_stop": "Q17778424", + "Doktor_Smiths_vei_tram_stop": None, + "Glads_vei_tram_stop": "Q17776371", + "Grefsenplatået_tram_stop": "Q11972531", + "Grefsen_stadion_tram_stop": "Q11972525", + "Kjelsåsalleen_tram_stop": None, + "Vigelandsparken_tram_stop": "Q19398059", + "Frogner_plass_tram_stop": "Q11970372", + "Lille_Frogner_allé_tram_stop": "Q19379373", + "Niels_Juels_gate_tram_stop": "Q11991378", + "Ruseløkka_tram_stop": None, + "Bislett_tram_stop": "Q11961163", + "Dalsbergstien_tram_stop": "Q17764618", + "Welhavens_gate_tram_stop": "Q12010485", + "Frydenlund_tram_stop": "Q19373143", + "Holbergs_plass_tram_stop": "Q11975623", + "Tullinøkka_(station)": None, + "Heimdalsgata_tram_stop": None, + "Lakkegata_skole_tram_stop": "Q11982987", + "Sofienberg_tram_stop": None, + "Rosenhoff_tram_stop": None, + "Sinsenterrassen_tram_stop": None, + "Torshov_(station)": None, + "Sandaker_senter_(station)": None, + "Frogner_stadion_tram_stop": None, + # Wikipedia-redirect-sider — QID til redirect-målet + "Stortorvet_(station)": "Q7620354", # → Stortorvet_tram_stop + "Stortinget_(station)": "Q188712", # → Stortinget T-bane (Tinghuset tram er ved siden) + "Sinsen_(station)": "Q19388523", # → Sinsenkrysset tram stop + "Biermanns_gate_(station)": "Q19363042", # → Biermanns gate tram stop + "Carl_Berners_plass_(station)": "Q890592", # → Carl Berners plass metro+tram + "Majorstuen_(station)": "Q686510", # → Majorstuen T-bane + "Grefsen_Station": "Q728583", # → Grefsen stasjon jernbane + "Kjelsås_tram_stop": "Q11981146", # → Kjelsås + "Storo_(station)": "Q932133", # → Storo T-bane + "Schous_plass_tram_stop": "Q12006491", # → Schous plass + "Jernbanetorget_(station)": "Q841481", # → Jernbanetorget T+tram + "Sandaker_senter_(station)": "Q12008217", # → Sandaker senter +} + + +def extract_titles(html): + soup = BeautifulSoup(html, "html.parser") + titles = [] + for a in soup.find_all("a"): + href = a.get("href", "") + label = a.get_text(strip=True) + if href.startswith("/wiki/"): + title = unquote(href.removeprefix("/wiki/")) + titles.append((title, label)) + elif href.startswith("/w/index.php"): + qs = parse_qs(urlparse(href).query) + t = qs.get("title", [None])[0] + if t: + titles.append((unquote(t), label)) + seen = {} + for title, label in titles: + seen.setdefault(title, label) + return seen # {wiki_title: display_label} + + +HEADERS = {"User-Agent": "finn-mcp-trikk-coords/1.0 (contact: ole@example.com)"} + + +def get_qids_from_wikipedia(titles): + """Wikipedia API: article titles → Wikidata QIDs.""" + url = "https://en.wikipedia.org/w/api.php" + result = {} + batch = [t for t in titles if t not in WIKIDATA_DIRECT] + for i in range(0, len(batch), 50): + chunk = batch[i : i + 50] + r = requests.get( + url, + params={ + "action": "query", + "format": "json", + "redirects": "1", + "prop": "pageprops", + "ppprop": "wikibase_item", + "titles": "|".join(chunk), + }, + headers=HEADERS, + timeout=30, + ) + print(f"Status: {r.status_code}, len: {len(r.text)}, preview: {r.text[:200]!r}") + for page in r.json()["query"]["pages"].values(): + t = page.get("title", "").replace(" ", "_") + qid = page.get("pageprops", {}).get("wikibase_item") + if qid: + result[t] = qid + # Merge known QIDs + for t, qid in WIKIDATA_DIRECT.items(): + if qid: + result[t] = qid + return result + + +def get_p625(qids): + """Wikidata API: QIDs → P625 coordinates.""" + url = "https://www.wikidata.org/w/api.php" + result = {} + unique = list(set(qids.values())) + for i in range(0, len(unique), 50): + chunk = unique[i : i + 50] + r = requests.get( + url, + params={ + "action": "wbgetentities", + "format": "json", + "ids": "|".join(chunk), + "props": "claims|labels", + "languages": "en|nb", + }, + headers=HEADERS, + timeout=30, + ) + for qid, entity in r.json()["entities"].items(): + label = ( + entity.get("labels", {}).get("en", {}).get("value") + or entity.get("labels", {}).get("nb", {}).get("value") + or qid + ) + p625 = entity.get("claims", {}).get("P625", []) + coords = None + if p625: + v = p625[0]["mainsnak"]["datavalue"]["value"] + coords = (round(v["latitude"], 5), round(v["longitude"], 5)) + result[qid] = {"label": label, "coords": coords} + return result + + +def main(): + title_to_label = extract_titles(HTML) + print(f"Extracted {len(title_to_label)} unique titles\n") + + title_to_qid = get_qids_from_wikipedia(title_to_label) + print(f"Resolved {len(title_to_qid)} QIDs\n") + + qid_to_data = get_p625(title_to_qid) + + print(f"{'Wiki title':<45} {'QID':<12} {'Label':<35} {'Coords'}") + print("-" * 120) + no_coords = [] + for title in sorted(title_to_label): + qid = title_to_qid.get(title, "—") + if qid == "—": + coords = "NO QID" + label = "?" + else: + d = qid_to_data.get(qid, {}) + coords = str(d.get("coords") or "NO P625") + label = d.get("label", "?") + print(f"{title:<45} {qid:<12} {label:<35} {coords}") + if "NO" in str(coords): + no_coords.append(title) + + print(f"\n\nMissing coords: {len(no_coords)}") + for t in no_coords: + print(f" {t}") + + +if __name__ == "__main__": + main() diff --git a/finn_eiendom/analysis.py b/finn_eiendom/analysis.py index 9a62449..9a7a5bd 100644 --- a/finn_eiendom/analysis.py +++ b/finn_eiendom/analysis.py @@ -19,6 +19,12 @@ def _normalize_description(text: str | None) -> str: return text.lower() if text else "" +def _is_resale_listing(url: str) -> bool: + """True for ordinary resale ads. Project / new-build ads use different URL + paths that fetch_ad_details cannot resolve (it builds a /homes/ URL).""" + return "/realestate/homes/" in url + + def _build_ad_summary( ad: FinnAd, enriched: EiendomUnit | None, @@ -95,12 +101,17 @@ async def analyze_ad( if enriched is not None: cache.save_eiendom_unit(conn, enriched) - if enriched and enriched.unit_vector: - similar_units = cache.get_similar_units(conn, enriched.unit_code, "RECENTLY_SOLD") - if not similar_units: - similar_units = await eiendom_no.get_similar_units(enriched.unit_vector) - if similar_units: - cache.save_similar_units(conn, enriched.unit_code, "RECENTLY_SOLD", similar_units) + if enriched: + # EiendomUnit.unit_vector is NOT populated by get_unit / enrich -- the + # field comes back None. Reading enriched.unit_vector directly leaves + # this block dead and similar_units permanently empty. Build the vector + # from the unit fields instead (fall back to the field if a future + # endpoint ever populates it). + vector = enriched.unit_vector or eiendom_no.build_unit_vector(enriched) + if vector: + # No dedicated cache table for similar units (per PRD) -- fetch + # fresh each call, consistent with service.get_or_fetch_similar_units. + similar_units = await eiendom_no.get_similar_units(vector) scores = scoring.score_ad(finn_ad, enriched, similar_units) categories = scoring.classify_ad(scores) @@ -120,6 +131,26 @@ async def analyze_ad( return result +async def _analyze_card(card, conn, *, include_eiendom_no: bool, client) -> dict: + """Fetch details + enrich a single search card. Raises on unrecoverable + errors; the caller is responsible for catching and skipping.""" + finn_ad = cache.get_finn_ad(conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_HOURS) + if finn_ad is None: + finn_ad = await ad_module.fetch_ad_details(card.finnkode, client=client) + + unit_code = None + if include_eiendom_no: + try: + matched_unit = await eiendom_no.search_unit_from_finn_url(card.url) + unit_code = matched_unit.unit_code if matched_unit else None + except Exception as exc: + # A failed unit resolution is non-fatal -- proceed without enrichment. + logger.warning("Eiendom.no unit search failed for %s: %s", card.finnkode, exc) + unit_code = None + + return await analyze_ad(finn_ad, unit_code=unit_code) + + async def analyze_search( search_url: str, max_pages: int = FINN_MAX_SEARCH_PAGES, @@ -139,21 +170,28 @@ async def analyze_search( ) results = [] enriched_count = 0 + skipped_count = 0 if fetch_details: for card in cards[:detail_limit]: - finn_ad = cache.get_finn_ad(conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_HOURS) - if finn_ad is None: - finn_ad = await ad_module.fetch_ad_details(card.finnkode, client=client) - unit_code = None - if include_eiendom_no: - try: - matched_unit = await eiendom_no.search_unit_from_finn_url(card.url) - unit_code = matched_unit.unit_code if matched_unit else None - except Exception as exc: - logger.warning("Eiendom.no unit search failed: %s", exc) - unit_code = None - result = await analyze_ad(finn_ad, unit_code=unit_code) + # Project / new-build ads are not resale listings and fetch_ad_details + # cannot resolve them -- skip up front rather than 404 mid-run. + if not _is_resale_listing(card.url): + logger.info("Skipping non-resale card %s (%s)", card.finnkode, card.url) + skipped_count += 1 + continue + + # One bad card (stale finnkode, removed ad, transient network error) + # must not abort the whole search -- isolate each card. + try: + result = await _analyze_card( + card, conn, include_eiendom_no=include_eiendom_no, client=client + ) + except Exception as exc: + logger.warning("Skipping card %s: %s", card.finnkode, exc) + skipped_count += 1 + continue + if result.get("eiendom_unit"): enriched_count += 1 results.append(result) @@ -166,6 +204,7 @@ async def analyze_search( "summary": { "total_listings": len(cards), "analyzed_listings": len(results), + "skipped_listings": skipped_count, "eiendom_enriched": enriched_count, }, } diff --git a/finn_eiendom/mcp_server.py b/finn_eiendom/mcp_server.py index be38bad..d3c1389 100644 --- a/finn_eiendom/mcp_server.py +++ b/finn_eiendom/mcp_server.py @@ -75,7 +75,7 @@ async def finn_analyze_search( detail_limit=detail_limit, include_eiendom_no=include_eiendom_no, ) - return json.dumps(result) + return json.dumps(result, default=str) except Exception as e: logger.error(f"Error analyzing search: {e}") return json.dumps({"error": True, "message": str(e)}) @@ -164,7 +164,7 @@ async def finn_get_similar_units(unit_vector: str, listing_status: str = "RECENT """Fetch similar units from Eiendom.no.""" try: units = await get_similar_units(unit_vector, listing_status) - return json.dumps([unit.model_dump() for unit in units]) + return json.dumps([unit.model_dump() for unit in units], default=str) except Exception as e: logger.error(f"Error fetching similar units: {e}") return json.dumps({"error": True, "message": str(e)}) @@ -222,7 +222,7 @@ async def finn_analyze_ad( include_eiendom_no=include_eiendom_no, include_similar_units=include_similar_units, ) - return render_ad(result.get("ad", {}), "json") + return json.dumps(result, default=str) except Exception as e: logger.error(f"Error analyzing ad {finnkode}: {e}") return json.dumps({"error": True, "message": str(e)}) diff --git a/finn_eiendom/scoring.py b/finn_eiendom/scoring.py index 64627f9..01ed412 100644 --- a/finn_eiendom/scoring.py +++ b/finn_eiendom/scoring.py @@ -1,146 +1,771 @@ -"""Scoring engine for FINN listings enriched with Eiendom.no data.""" +"""Scoring engine tuned for Ole & partner's apartment search criteria. + +Priority hierarchy (stated): + MUST : balcony, ≥80 m² main unit, 2-3 bedrooms, T-bane/trikk access + HIGH : preferred neighbourhoods, view (sea/panorama > rooftop > general), + quiet setting, hybel with own bath + kitchen + MEDIUM : sameie economy, green areas / walking terrain, price vs market + BONUS : renovation upside (acceptable, not required) + +Dimension caps (non-risk total max ≈ 105, clamped to 100): + floor -15..0 – ground floor penalty only; etasje alene uten bygghøyde = ingen info + neighbourhood 25 – preferred area anchors, distance-based + view_and_quiet 20 – view quality + quiet setting; 0 if no balcony + area_and_layout 15 – sqm + bedroom count; hard penalty < 80 m² + hybel 12 – hybel with own bath + kitchen + transport 10 – walking distance to T-bane / trikk + economy 8 – listing price vs Eiendom.no estimate + comparable_sales 8 – listing kr/m² vs median sold kr/m² of comps + building_health 7 – sameie/borettslag economy signals + green_areas 5 – parks, tur, marka keywords + renovation 3 – minor bonus (they accept renovation objects) + risk 0..-30 – stale listing, high costs, missing data +""" import logging +import math from typing import Any from .models import EiendomUnit, SimilarUnit logger = logging.getLogger(__name__) - -def _clamp(value: float, min_value: float, max_value: float) -> float: - return max(min_value, min(max_value, value)) +# --------------------------------------------------------------------------- +# Geometry helpers +# --------------------------------------------------------------------------- -def score_market_position(unit: EiendomUnit | None) -> float: - if unit is None or unit.estimated_selling_price is None or unit.listing_price is None: - return 0.0 - ratio = unit.listing_price / unit.estimated_selling_price - if ratio <= 0.9: - return 20.0 - if ratio <= 1.0: - return 16.0 + (1.0 - ratio) * 40.0 - if ratio <= 1.1: - return 12.0 - (ratio - 1.0) * 40.0 - return 5.0 +def _distance_km(lat1: float, lng1: float, lat2: float, lng2: float) -> float: + """Flat-earth approximation — accurate enough within Oslo (~59.9°N). + 1° lat ≈ 111 km, 1° lng ≈ 56 km at this latitude. + """ + dlat = (lat2 - lat1) * 111.0 + dlng = (lng2 - lng1) * 56.0 + return math.sqrt(dlat**2 + dlng**2) -def score_economy(ad: Any, unit: EiendomUnit | None) -> float: - if ad.total_price is None: - return 0.0 - if unit and unit.estimated_selling_price: - ratio = ad.total_price / unit.estimated_selling_price - if ratio <= 0.95: +def _clamp(value: float, lo: float, hi: float) -> float: + return max(lo, min(hi, value)) + + +def _median(values: list[float]) -> float: + s = sorted(values) + mid = len(s) // 2 + return s[mid] if len(s) % 2 else (s[mid - 1] + s[mid]) / 2.0 + + +# --------------------------------------------------------------------------- +# Preferred neighbourhood anchors +# --------------------------------------------------------------------------- + +_PREFERRED_ANCHORS: list[tuple[str, float, float]] = [ + # (label, lat, lng) — label used only for debug logging + ("Grünerløkka", 59.9240, 10.7573), + ("Torshov", 59.9340, 10.7620), + ("Rodeløkka", 59.9315, 10.7660), + ("Kampen", 59.9125, 10.7760), + ("Sagene", 59.9400, 10.7590), + ("Nydalen", 59.9520, 10.7540), + ("Storo", 59.9450, 10.7670), + ("Grefsen", 59.9580, 10.7720), + ("Fagerborg", 59.9280, 10.7300), + ("St. Hans Haugen", 59.9300, 10.7400), + ("Ullevål", 59.9400, 10.7270), + ("Majorstua", 59.9210, 10.7170), + ("Frogner", 59.9160, 10.7150), + ("Løren", 59.9310, 10.7960), + ("Torshovdalen", 59.9295, 10.7630), + ("Rosenhoff", 59.9255, 10.7775), +] + + +# --------------------------------------------------------------------------- +# Transit network — all T-bane and trikk stops. +# +# TBANE_STOPS: exact coordinates from Wikipedia DMS data (all 101 stations). +# TRIKK_STOPS: estimated coordinates (Wikipedia has no trikk coords). +# +# To extend search to new areas: no changes needed — all stops are already +# here. score_transport automatically finds the nearest stop for any address. +# --------------------------------------------------------------------------- + +TBANE_STOPS: dict[str, tuple[float, float]] = { + # All 101 stations — Wikipedia DMS converted to decimal degrees + "Ammerud": (59.957922, 10.871165), + "Avløs": (59.913859, 10.552926), + "Bekkestua": (59.918097, 10.588031), + "Berg": (59.951142, 10.744801), + "Bergkrystallen": (59.867091, 10.821206), + "Besserud": (59.957760, 10.673092), + "Bjørnsletta": (59.926902, 10.635458), + "Blindern": (59.940052, 10.716262), + "Bogerud": (59.875833, 10.841944), + "Borgen": (59.934548, 10.696000), + "Brattlikollen": (59.888076, 10.801191), + "Brynseng": (59.909169, 10.811834), + "Bøler": (59.884167, 10.845278), + "Carl Berners plass": (59.926592, 10.778360), + "Eiksmarka": (59.946431, 10.622320), + "Ekraveien": (59.950836, 10.635822), + "Ellingsrudåsen": (59.936311, 10.916634), + "Ensjø": (59.913364, 10.786986), + "Forskningsparken": (59.943513, 10.720425), + "Frognerseteren": (59.979018, 10.675857), + "Frøen": (59.934167, 10.709167), + "Furuset": (59.941578, 10.897247), + "Gaustad": (59.945625, 10.709814), + "Gjettum": (59.906221, 10.527155), + "Gjønnes": (59.918097, 10.579877), + "Godlia": (59.908523, 10.835352), + "Grorud": (59.961413, 10.881701), + "Grønland": (59.912912, 10.759563), + "Gråkammen": (59.954838, 10.701842), + "Gulleråsen": (59.955526, 10.696521), + "Hasle": (59.925302, 10.794454), + "Haslum": (59.915021, 10.563183), + "Hauger": (59.910957, 10.510713), + "Haugerud": (59.922592, 10.855350), + "Hellerud": (59.910079, 10.829953), + "Helsfyr": (59.911514, 10.803680), + "Holmen": (59.946296, 10.666609), + "Holmenkollen": (59.960489, 10.662446), + "Holstein": (59.960403, 10.740552), + "Hovseter": (59.946328, 10.654694), + "Høyenhall": (59.905769, 10.819860), + "Jar": (59.926592, 10.621762), + "Jernbanetorget": (59.912116, 10.751211), + "Kalbakken": (59.954553, 10.866750), + "Karlsrud": (59.880453, 10.805225), + "Kolsås": (59.914416, 10.501366), + "Kringsjå": (59.963690, 10.734930), + "Lambertseter": (59.873289, 10.810440), + "Lijordet": (59.940901, 10.616559), + "Lillevann": (59.980481, 10.653037), + "Lindeberg": (59.932979, 10.882087), + "Linderud": (59.940976, 10.839214), + "Løren": (59.929972, 10.790806), + "Majorstuen": (59.929904, 10.714931), + "Makrellbekken": (59.941957, 10.673845), + "Manglerud": (59.897957, 10.812435), + "Midtstuen": (59.961299, 10.682911), + "Montebello": (59.936806, 10.670471), + "Mortensrud": (59.849083, 10.828657), + "Munkelia": (59.868914, 10.812500), + "Nationaltheatret": (59.915045, 10.733039), + "Nydalen": (59.948864, 10.765250), + "Oppsal": (59.892866, 10.840201), + "Ringstabekk": (59.916182, 10.593696), + "Ris": (59.948069, 10.705147), + "Risløkka": (59.932355, 10.822713), + "Rommen": (59.962127, 10.908968), + "Romsås": (59.962272, 10.890777), + "Ryen": (59.895807, 10.805617), + "Røa": (59.946791, 10.643874), + "Rødtvet": (59.951416, 10.859535), + "Sinsen": (59.938085, 10.781343), + "Skogen": (59.975246, 10.647415), + "Skullerud": (59.866754, 10.839171), + "Skødalen": (59.961787, 10.690789), + "Skøyenåsen": (59.898866, 10.836516), + "Slemdal": (59.949896, 10.695662), + "Smestad": (59.937315, 10.683609), + "Sognsvann": (59.967127, 10.733943), + "Steinerud": (59.939083, 10.704345), + "Storo": (59.944545, 10.778768), + "Stortinget": (59.913047, 10.741469), + "Stovner": (59.962616, 10.923414), + "Trosterud": (59.927152, 10.864041), + "Tveita": (59.914354, 10.841961), + "Tøyen": (59.915214, 10.774670), + "Tåsen": (59.953270, 10.752439), + "Ullernåsen": (59.930635, 10.654796), + "Ullevål stadion": (59.946629, 10.732226), + "Ulsrud": (59.889970, 10.849428), + "Veitvet": (59.944700, 10.847304), + "Vestli": (59.972324, 10.929337), + "Vettakollen": (59.959913, 10.695705), + "Vinderen": (59.942803, 10.704761), + "Voksenkollen": (59.980076, 10.665193), + "Voksenlia": (59.966937, 10.655082), + "Vollebekk": (59.935865, 10.831039), + "Åsjordet": (59.928764, 10.646889), + "Økern": (59.928592, 10.804152), + "Østerås": (59.939445, 10.608587), + "Østhorn": (59.956944, 10.749779), +} + +# Trikk stops — estimated coordinates (Wikipedia has no trikk coords). +# Grouped by line corridor for readability. +# Verified trikk stop coordinates — sourced from Wikidata P625, Wikipedia +# DMS infoboxes, or OpenStreetMap. Keys match display names used in scoring. +# Source tag format: Wikidata QID | "shared T-bane" | "OSM node " | "Wikipedia" +TRIKK_STOPS_VERIFIED: dict[str, tuple[float, float]] = { + # ── Briskeby-linjen (l11/19) ───────────────────────────────────────── + "Majorstuen": (59.929904, 10.714931), # shared T-bane + "Bogstadveien": (59.92611, 10.72167), # Q19372022 + "Rosenborg": (59.92417, 10.72389), # Q7899658 + "Briskeby": (59.92048, 10.71767), # Q11962293 + "Riddervolds plass": (59.91896, 10.72026), # Q19386557 + "Inkognitogata": (59.91565, 10.72114), # Q11977313 + "Nationaltheatret": (59.91504, 10.73304), # shared T-bane + # ── Sentrum (shared l11/12/13/17/18/19) ────────────────────────────── + "Øvre Slottsgate": (59.9118, 10.7417), # Q31079249 + "Dronningens gate": (59.91053, 10.74697), # Q29828354 + "Jernbanetorget": (59.912116, 10.751211), # shared T-bane + "Storgata": (59.91396, 10.75141), # Q109484341 + "Nybrua": (59.91707, 10.75834), # Q104867506 + "Stortorvet": (59.91310, 10.74530), # Q7620354 + "Bjørvika": (59.90806, 10.75639), # Wikipedia + # ── Grünerløkka–Torshov-linjen (l11/12/18) ─────────────────────────── + "Schous plass": (59.92081, 10.75932), # Q12006491-area / Wikipedia + "Olaf Ryes plass": (59.9231, 10.7592), # Q4993079 + "Birkelunden": (59.9271, 10.7601), # Q4916412 + "Biermanns gate": (59.93028, 10.76104), # Wikipedia + "Sandaker senter": (59.93889, 10.76861), # Wikipedia + "Grefsenveien": (59.94278, 10.77344), # Q17778424 + "Storo": (59.944545, 10.778768), # shared T-bane + # ── Kjelsåslinjen (l11/12) ─────────────────────────────────────────── + "Disen": (59.94627, 10.78729), # Q11965753 + "Glads vei": (59.95235, 10.78533), # Q17776371 + "Grefsenplatået": (59.9560, 10.78573), # Q11972531 + "Grefsen stadion": (59.96008, 10.78475), # Q11972525 + "Kjelsås": (59.96611, 10.78278), # Wikipedia + # ── Frogner-linjen (l12) ───────────────────────────────────────────── + "Vigelandsparken": (59.92457, 10.70815), # Q19398059 + "Frogner plass": (59.92255, 10.70491), # Q11970372 / OSM node 30560564 + "Elisenberg": (59.91944, 10.70861), # Q5361695 + "Lille Frogner allé": (59.9180, 10.7120), # Q19379373 + "Niels Juels gate": (59.91634, 10.71520), # Q11991378 + "Solli": (59.91486, 10.71906), # Q7558364 + # ── Vika-linjen (l12) ──────────────────────────────────────────────── + "Aker Brygge": (59.9110, 10.7299), # Q4700639 + "Kontraskjæret": (59.91087, 10.73592), # Q11998807 + # ── Lilleaker-linjen (l13) ─────────────────────────────────────────── + "Lilleaker": (59.92074, 10.63580), # Wikipedia + "Sollerud": (59.92104, 10.64309), # Wikipedia + "Furulund": (59.91990, 10.65013), # Wikipedia + "Ullern": (59.92429, 10.65858), # Wikipedia + "Abbediengen": (59.92517, 10.66716), # Wikipedia + "Hoff": (59.92500, 10.67488), # Wikipedia + "Skøyen": (59.92384, 10.68034), # Wikipedia + # ── Skøyen-linjen (l13) ────────────────────────────────────────────── + "Thune": (59.92186, 10.68742), # Wikipedia + "Nobels gate": (59.91758, 10.69866), # Wikipedia + "Skarpsno": (59.91430, 10.70234), # Wikipedia + "Skillebekk": (59.91277, 10.71103), # Wikipedia + # ── Ekeberg-linjen (l13/19) ────────────────────────────────────────── + "Middelalderparken": (59.90639, 10.76417), # Q99971403 + "Oslo Hospital": (59.9032, 10.7674), # Wikipedia + "Ekebergparken": (59.8977, 10.7593), # Wikipedia + "Jomfrubråten": (59.8883, 10.7706), # Wikipedia + "Sportsplassen": (59.8860, 10.7736), # Wikipedia + "Holtet": (59.88151, 10.78415), # Wikipedia + "Sørli": (59.87493, 10.78709), # Wikipedia + "Kastellet": (59.87106, 10.79036), # Wikipedia + "Bråten": (59.86714, 10.79244), # Wikipedia + "Sæter": (59.86102, 10.79870), # Wikipedia + "Ljabru": (59.85335, 10.80089), # Wikipedia + # ── Ullevål Hageby-linjen (l17/18) ─────────────────────────────────── + "Rikshospitalet": (59.947768, 10.714716), # Wikipedia + "Gaustadalleen": (59.9454, 10.7172), # Wikipedia + "Forskningsparken": (59.943513, 10.720425), # shared T-bane + "Universitetet Blindern": (59.9421, 10.7243), # Wikipedia + "John Collets plass": (59.9403, 10.7290), # Wikipedia + "Ullevål sykehus": (59.9361, 10.7318), # Wikipedia + "Adamstuen": (59.9326, 10.7345), # Wikipedia + "Stensgata": (59.92957, 10.73303), # Q7607927 + "Bislett": (59.92599, 10.73108), # Q11961163 + "Dalsbergstien": (59.92354, 10.73163), # Q17764618 + "Welhavens gate": (59.92131, 10.72968), # Q12010485 + "Frydenlund": (59.92086, 10.73317), # Q19373143 + "Holbergs plass": (59.91876, 10.73453), # Q11975623 + # ── Sinsen-linjen (l17) ────────────────────────────────────────────── + "Lakkegata skole": (59.92055, 10.76834), # Q11982987 + "Carl Berners plass": (59.926592, 10.778360), # shared T-bane + "Sinsenkrysset": (59.93911, 10.78340), # Q19388523 + "Grefsen stasjon": (59.94167, 10.78056), # Wikipedia + # ── Homansbyen-linjen (l19) ─────────────────────────────────────────── + "Homansbyen": (59.92278, 10.72639), # Q5887760 +} + +# Estimated trikk stop coordinates — no Wikidata P625 found. +# Derived from linear interpolation between verified neighbours, +# or placed from map/street knowledge. Max error ~150-250 m. +# To update: find Wikidata QID, fetch P625, move entry to TRIKK_STOPS_VERIFIED. +TRIKK_STOPS_ESTIMATED: dict[str, tuple[float, float]] = { + # ── Sentrum ─────────────────────────────────────────────────────────── + "Tinghuset": (59.9146, 10.7403), # Ullevål Hageby-l ved Stortinget T + # ── Grünerløkka–Torshov-linjen ─────────────────────────────────────── + "Torshov": (59.9332, 10.7643), # interp Biermanns gate↔Sandaker + # ── Kjelsåslinjen ──────────────────────────────────────────────────── + "Doktor Smiths vei": (59.9503, 10.7867), # interp Disen↔Kjelsås t=0.20 + "Kjelsåsalleen": (59.9641, 10.7833), # interp Disen↔Kjelsås t=0.90 + # ── Frogner-linjen ─────────────────────────────────────────────────── + "Frogner stadion": (59.9167, 10.7038), # Kirkeveien S for Vigelandsparken + # ── Vika-linjen ────────────────────────────────────────────────────── + "Ruseløkka": (59.9120, 10.7258), # interp Solli↔Kontraskjæret + # ── Ullevål Hageby-linjen ───────────────────────────────────────────── + "Tullinøkka": (59.9163, 10.7349), # interp Holbergs plass↔Tinghuset + # ── Sinsen-linjen ──────────────────────────────────────────────────── + "Heimdalsgata": (59.9188, 10.7633), # interp Nybrua↔Lakkegata skole + "Sofienberg": (59.9236, 10.7734), # interp Lakkegata skole↔Carl Berners + "Rosenhoff": (59.9307, 10.7800), # interp Carl Berners↔Sinsenkrysset t=0.33 + "Sinsenterrassen": (59.9350, 10.7817), # interp Carl Berners↔Sinsenkrysset t=0.67 +} + +# Merged — verified takes precedence if a key appears in both (shouldn't happen). +TRIKK_STOPS: dict[str, tuple[float, float]] = { + **TRIKK_STOPS_ESTIMATED, + **TRIKK_STOPS_VERIFIED, +} + +# --------------------------------------------------------------------------- +# Transit helpers +# --------------------------------------------------------------------------- + +_WALK_SPEED_KMH = 5.0 # avg walking speed + + +def _nearest_stop( + lat: float, lng: float, stops: dict[str, tuple[float, float]] +) -> tuple[str, float]: + """Return (stop_name, distance_km) for the nearest stop in a dict.""" + best_name, best_dist = "", float("inf") + for name, (slat, slng) in stops.items(): + d = _distance_km(lat, lng, slat, slng) + if d < best_dist: + best_dist, best_name = d, name + return best_name, best_dist + + +def nearby_transit( + lat: float, lng: float, max_walk_min: float = 10.0 +) -> dict[str, list[tuple[str, float]]]: + """Return T-bane and trikk stops within max_walk_min minutes walk. + + Returns: + { + "tbane": [("Carl Berners plass", 0.28), ...], # sorted by distance + "trikk": [("Rosenhoff", 0.19), ...], + } + All distances in km. + """ + max_km = (max_walk_min / 60.0) * _WALK_SPEED_KMH + + tbane = sorted( + [ + (n, _distance_km(lat, lng, la, lo)) + for n, (la, lo) in TBANE_STOPS.items() + if _distance_km(lat, lng, la, lo) <= max_km + ], + key=lambda x: x[1], + ) + trikk = sorted( + [ + (n, _distance_km(lat, lng, la, lo)) + for n, (la, lo) in TRIKK_STOPS.items() + if _distance_km(lat, lng, la, lo) <= max_km + ], + key=lambda x: x[1], + ) + return {"tbane": tbane, "trikk": trikk} + + +# --------------------------------------------------------------------------- +# Dimension functions +# --------------------------------------------------------------------------- + + +def score_neighbourhood( + unit: EiendomUnit | None, + address: str | None = None, + district: str | None = None, +) -> float: + """Distance to nearest preferred-area anchor. Max 25.""" + if unit and unit.lat and unit.lng: + distances = [ + (_distance_km(unit.lat, unit.lng, lat, lng), label) + for label, lat, lng in _PREFERRED_ANCHORS + ] + min_dist, nearest = min(distances) + logger.debug("Nearest anchor: %s at %.2f km", nearest, min_dist) + if min_dist < 0.5: + return 25.0 + if min_dist < 1.0: return 20.0 - if ratio <= 1.0: + if min_dist < 1.5: return 15.0 - if ratio <= 1.05: + if min_dist < 2.5: return 10.0 - return 6.0 - if ad.asking_price and ad.total_price <= ad.asking_price: - return 12.0 - return 8.0 + if min_dist < 4.0: + return 5.0 + return 2.0 - -def score_comparable_sales(listings: list[SimilarUnit], listing_price: int | None) -> float: - if not listings or listing_price is None: - return 0.0 - selling_prices = [unit.selling_price for unit in listings if unit.selling_price] - if not selling_prices: - return 0.0 - average = sum(selling_prices) / len(selling_prices) - ratio = listing_price / average - score = (1.0 - abs(ratio - 1.0)) * 20.0 - return float(_clamp(score, 0.0, 20.0)) - - -def score_location(address: str | None, district: str | None) -> float: - if not address and not district: - return 0.0 - if district and "oslo" in district.lower(): - return 15.0 - if address and "oslo" in address.lower(): - return 12.0 - return 7.0 - - -def score_layout_and_potential(description: str | None, rooms: int | None) -> float: - score = 0.0 - if rooms and rooms >= 4: - score += 10.0 - if description and "potensial" in description.lower(): - score += 8.0 - return float(_clamp(score, 0.0, 20.0)) - - -def score_outdoor_and_view(description: str | None) -> float: - if not description: - return 0.0 - score = 5.0 if "utsikt" in description.lower() or "balkong" in description.lower() else 0.0 - return float(_clamp(score, 0.0, 15.0)) - - -def score_rental_potential(description: str | None) -> float: - if not description: - return 0.0 - score = 10.0 if "hybel" in description.lower() or "leie" in description.lower() else 0.0 - return score - - -def score_renovation_upside(description: str | None, asking_price: int | None) -> float: - score = 0.0 - if description and "renover" in description.lower(): - score += 10.0 - if asking_price and asking_price > 0: - score += 5.0 - return float(_clamp(score, 0.0, 15.0)) - - -def score_risk(description: str | None, unit: EiendomUnit | None) -> float: - if unit is None: - return -10.0 - if description and "usikker" in description.lower(): - return -10.0 + haystack = " ".join(filter(None, [address, district])).lower() + if "oslo" in haystack: + return 5.0 return 0.0 -def score_ad( - ad: Any, unit: EiendomUnit | None, similar_units: list[SimilarUnit] -) -> dict[str, float]: - scores = { +def score_transport(unit: EiendomUnit | None) -> float: + """Walking distance to nearest T-bane or trikk stop. Max 10. + + Searches ALL stops in TBANE_STOPS and TRIKK_STOPS — no manual + curation needed when adding new search areas. + + Distance bands: + < 400 m → 10 pts (~5 min walk) + < 800 m → 8 pts (~10 min — stated threshold) + < 1200 m → 4 pts (~15 min) + ≥ 1200 m → 0 pts + + Falls back to 0 when no coordinates available. + """ + if unit is None or unit.lat is None or unit.lng is None: + return 0.0 + + _, tbane_dist = _nearest_stop(unit.lat, unit.lng, TBANE_STOPS) + _, trikk_dist = _nearest_stop(unit.lat, unit.lng, TRIKK_STOPS) + min_dist = min(tbane_dist, trikk_dist) + + logger.debug("Nearest T-bane: %.2f km, trikk: %.2f km", tbane_dist, trikk_dist) + + if min_dist < 0.4: + return 10.0 + if min_dist < 0.8: + return 8.0 + if min_dist < 1.2: + return 4.0 + return 0.0 + + +def score_view_and_quiet(ad: Any, description: str) -> float: + """View quality × quiet setting. Max 20. Returns 0 if no balcony.""" + if not (ad.has_balcony or ad.has_terrace): + return 0.0 + + d = description.lower() + + view = 0.0 + if any(kw in d for kw in ["sjøutsikt", "fjordutsikt", "sjøglimt", "fjordglimt"]): + view = 15.0 + elif any(kw in d for kw in ["panorama", "panoramautsikt", "vidt utsyn", "vidstrakt"]): + view = 13.0 + elif any(kw in d for kw in ["over hustak", "hustak", "over takene"]): + view = 10.0 + elif "utsikt" in d: + view = 7.0 + + quiet = 0.0 + if any( + kw in d + for kw in [ + "rolig", + "tilbaketrukket", + "skjermet", + "bakgård", + "gårdsrom", + "stille", + "blindvei", + ] + ): + quiet += 5.0 + if any(kw in d for kw in ["støy", "bilvei", "trafikkert", "støyutsatt"]): + quiet -= 5.0 + + return float(_clamp(view + quiet, 0.0, 20.0)) + + +def score_area_and_layout(ad: Any, unit: EiendomUnit | None) -> float: + """Main unit size + bedroom count. Max 15.""" + area = (unit.usable_area if unit else None) or ad.area_m2 or 0 + + if area < 60: + return 0.0 + if area < 80: + return 3.0 + if area < 90: + area_score = 8.0 + elif area < 105: + area_score = 11.0 + else: + area_score = 14.0 + + bedrooms = ad.bedrooms or 0 + if bedrooms >= 3: + bedroom_bonus = 1.0 + elif bedrooms == 2: + bedroom_bonus = 0.5 + else: + bedroom_bonus = 0.0 + + return float(_clamp(area_score + bedroom_bonus, 0.0, 15.0)) + + +def score_hybel(description: str) -> float: + """Hybel with own bath + kitchen. Max 12.""" + d = description.lower() + + if "hybel" not in d and "sokkelleil" not in d and "utleiedel" not in d: + return 0.0 + + _POTENTIAL = [ + "mulighet for hybel", + "mulighet til hybel", + "mulig hybel", + "kan etableres hybel", + "kan bygges om til hybel", + "tilrettelagt for hybel", + "potensial for hybel", + "hybelpotensial", + ] + is_potential = any(sig in d for sig in _POTENTIAL) + if not is_potential and "mulighet" in d and "hybel" in d: + for sentence in d.replace("!", ".").replace("?", ".").split("."): + if "mulighet" in sentence and "hybel" in sentence: + is_potential = True + break + + if is_potential: + return 2.0 + + # Documented rental income → definitively real hybel + if "leieinntekt" in d or "skattefri" in d: + return 12.0 + + has_bath = any( + kw in d + for kw in [ + "eget bad", + "eget wc", + "eget toalett", + "bad i hybel", + "dusj i hybel", + "eget dusj", + ] + ) + has_kitch = any( + kw in d for kw in ["eget kjøkken", "kjøkken i hybel", "kjøkkenkrok", "tekjøkken"] + ) + if not has_bath: + has_bath = "bad" in d or "dusj" in d + if not has_kitch: + has_kitch = "kjøkken" in d + + if has_bath and has_kitch: + return 12.0 + if has_bath or has_kitch: + return 7.0 + return 4.0 + + +def score_floor(ad: Any, unit: EiendomUnit | None) -> float: + """Floor level. Binary signal: ground floor is bad, everything else neutral. + + Rationale: "toppleilighet i 3-etgs blokk" og "8. etg i høyblokk" er begge + topp for sin bygning. Etasjenummer alene sier ingenting om utsikt eller lys + uten å kjenne byggets totale høyde. Eneste reelle signal er 1. etg (innsyn, + støy, lys) vs ikke-1. etg. + + Scores: + ground floor (≤1) → -15 (hard penalty: innsyn, støy, lys) + unknown → 0 (no data → no penalty) + above ground → 0 (etasjenummer uten bygghøyde = ingen info) + """ + floor: int | None = None + + if unit is not None and unit.floor is not None: + floor = unit.floor + elif ad.floor is not None: + try: + floor = int(str(ad.floor).strip().rstrip(".")) + except (ValueError, TypeError): + floor = None + + if floor is None: + return 0.0 + if floor <= 1: + return -15.0 + return 0.0 + + +def score_building_health(ad: Any, description: str) -> float: + """Sameie / borettslag economy signals. Max 7.""" + score = 0.0 + d = description.lower() + + if ad.shared_debt == 0: + score += 3.0 + elif ad.shared_debt is None: + score += 1.0 + + fk = ad.common_costs or 0 + if fk == 0: + score += 0.0 + elif fk <= 3500: + score += 4.0 + elif fk <= 5000: + score += 2.0 + elif fk <= 7000: + score += 0.0 + else: + score -= 2.0 + + if any(kw in d for kw in ["veldrevet", "solid økonomi", "god økonomi", "ingen fellesgjeld"]): + score += 2.0 + + return float(_clamp(score, 0.0, 7.0)) + + +def score_green_areas(description: str) -> float: + """Parks, walking terrain, green surroundings. Max 5.""" + d = description.lower() + keywords = ["park", "turområde", "turterreng", "marka", "skog", "grønt", "grønne", "friluft"] + hits = sum(1 for kw in keywords if kw in d) + if hits >= 2: + return 5.0 + if hits == 1: + return 2.0 + return 0.0 + + +def score_economy(ad: Any, unit: EiendomUnit | None) -> float: + """Listing price vs Eiendom.no estimated value. Max 8.""" + if unit is None or unit.estimated_selling_price is None: + return 0.0 + price = ad.total_price or ad.asking_price + if price is None: + return 0.0 + ratio = price / unit.estimated_selling_price + if ratio <= 0.92: + return 8.0 + if ratio <= 1.00: + return 5.0 + (1.0 - ratio) * 37.5 + if ratio <= 1.08: + return 5.0 - (ratio - 1.0) * 37.5 + return 1.0 + + +def score_comparable_sales( + listings: list[SimilarUnit], + listing_sqm_price: int | float | None, +) -> float: + """Listing kr/m² vs median sold kr/m² of comp units. Max 8.""" + if not listings or listing_sqm_price is None: + return 0.0 + sqm_prices = [u.sqm_price for u in listings if u.sqm_price] + if not sqm_prices: + return 0.0 + med = _median(sqm_prices) + ratio = listing_sqm_price / med + return float(_clamp((1.0 - abs(ratio - 1.0)) * 8.0, 0.0, 8.0)) + + +def score_renovation(description: str) -> float: + """Minor bonus for renovation upside. Max 3.""" + d = description.lower() + if any(kw in d for kw in ["renover", "oppusse", "potensial", "moderniser"]): + return 3.0 + return 0.0 + + +def score_risk(ad: Any, unit: EiendomUnit | None) -> float: + """Risk penalty. Returns 0 or negative.""" + penalty = 0.0 + + if unit is None: + penalty -= 8.0 + + fk = ad.common_costs or 0 + if fk > 8000: + penalty -= 10.0 + elif fk > 6000: + penalty -= 5.0 + + if unit and unit.days_on_market: + if unit.days_on_market > 120: + penalty -= 10.0 + elif unit.days_on_market > 60: + penalty -= 5.0 + + if "usikker" in (ad.listing_description or "").lower(): + penalty -= 5.0 + + return penalty + + +# --------------------------------------------------------------------------- +# Orchestration +# --------------------------------------------------------------------------- + + +def score_ad(ad: Any, unit: EiendomUnit | None, similar_units: list[SimilarUnit]) -> dict[str, Any]: + description = ad.listing_description or "" + + # Collect nearby transit for informational output (not used in scoring) + transit_nearby: dict | None = None + if unit and unit.lat and unit.lng: + transit_nearby = nearby_transit(unit.lat, unit.lng, max_walk_min=10.0) + if transit_nearby["tbane"] or transit_nearby["trikk"]: + logger.debug("Nearby transit: %s", transit_nearby) + + scores: dict[str, Any] = { + "floor": score_floor(ad, unit), + "neighbourhood": score_neighbourhood(unit, ad.address, getattr(ad, "district", None)), + "view_and_quiet": score_view_and_quiet(ad, description), + "area_and_layout": score_area_and_layout(ad, unit), + "hybel": score_hybel(description), + "transport": score_transport(unit), "economy": score_economy(ad, unit), - "market_position": score_market_position(unit), "comparable_sales": score_comparable_sales( - similar_units, ad.total_price or ad.asking_price + similar_units, + unit.listing_sqm_price if unit else None, ), - "location": score_location(ad.address, ad.district), - "layout": score_layout_and_potential(ad.listing_description, ad.rooms), - "outdoor": score_outdoor_and_view(ad.listing_description), - "rental_potential": score_rental_potential(ad.listing_description), - "renovation": score_renovation_upside(ad.listing_description, ad.asking_price), - "risk": score_risk(ad.listing_description, unit), + "building_health": score_building_health(ad, description), + "green_areas": score_green_areas(description), + "renovation": score_renovation(description), + "risk": score_risk(ad, unit), } - scores["total"] = float(_clamp(sum(scores.values()), 0.0, 100.0)) + + # Numeric-only sum for total + numeric = {k: v for k, v in scores.items() if isinstance(v, (int, float))} + scores["total"] = float(_clamp(sum(numeric.values()), 0.0, 100.0)) + + # Attach nearby transit as metadata (non-scoring) + if transit_nearby is not None: + scores["nearby_transit"] = transit_nearby + return scores -def classify_ad(scores: dict[str, float]) -> list[str]: +def classify_ad(scores: dict[str, Any]) -> list[str]: categories: list[str] = [] total = scores.get("total", 0.0) - if total >= 70: - categories.append("bargain_candidate") + + if total >= 75: + categories.append("top_match") if total >= 60: - categories.append("safe_candidate") - if 50 <= total < 70: - categories.append("lifestyle_candidate") - if scores.get("renovation", 0.0) >= 8: - categories.append("renovation_candidate") - if scores.get("rental_potential", 0.0) >= 5: - categories.append("hybel_candidate") - if scores.get("risk", 0.0) < 0: - categories.append("risk_object") + categories.append("strong_candidate") + if 45 <= total < 60: + categories.append("worth_viewing") if total < 30: categories.append("not_interesting") - if 30 <= total < 60: + if 30 <= total < 45: categories.append("manual_review_required") + + if scores.get("hybel", 0.0) >= 7: + categories.append("has_hybel") + if scores.get("view_and_quiet", 0.0) >= 13: + categories.append("premium_view") + if scores.get("neighbourhood", 0.0) == 25: + categories.append("preferred_neighbourhood") + if scores.get("renovation", 0.0) > 0: + categories.append("renovation_candidate") + if scores.get("floor", 0.0) < 0: + categories.append("ground_floor") + if scores.get("risk", 0.0) < -5: + categories.append("risk_object") + if scores.get("area_and_layout", 0.0) <= 3: + categories.append("too_small") + return categories diff --git a/finn_eiendom/service.py b/finn_eiendom/service.py index 4562edf..6a2bc3d 100644 --- a/finn_eiendom/service.py +++ b/finn_eiendom/service.py @@ -36,6 +36,43 @@ async def get_or_fetch_ad(finnkode: str, force_refresh: bool = False) -> FinnAd: return ad +async def ensure_eiendom_unit_code(ad: FinnAd) -> str | None: + """Backfill ``ad.eiendom_unit_code`` by resolving it from the FINN URL. + + ``fetch_ad_details`` never populates ``eiendom_unit_code`` -- only the + Eiendom.no resolver (``search_unit_from_finn_url``) can map a FINN listing + to an Eiendom.no unit. Every enrichment path gates on this field, so + without an explicit resolve step the gate is always falsy and enrichment + silently no-ops. + + Resolves once, mutates the ad in place, and persists the backfill to the + cache so subsequent cache hits skip the network round trip. + + IMPORTANT: callers must run this BEFORE serialising the ad with + ``model_dump()`` -- otherwise the dumped dict carries a stale + ``eiendom_unit_code: None`` even though enrichment succeeded. + + Returns the unit_code, or ``None`` if the listing cannot be resolved + (e.g. new-build project ads, off-market addresses). + """ + if ad.eiendom_unit_code: + return ad.eiendom_unit_code + + unit = await search_unit_from_finn_url(ad.url) + if unit is None or not unit.unit_code: + logger.info("No Eiendom.no unit resolved for finnkode %s", ad.finnkode) + return None + + ad.eiendom_unit_code = unit.unit_code + conn = init_db(FINN_CACHE_PATH) + save_finn_ad(conn, ad) # persist backfill; do NOT cache `unit` here -- + # the resolver returns a partial record (code + + # address + coords). The full unit comes from + # get_or_fetch_eiendom_unit -> get_unit(). + logger.info("Resolved finnkode %s -> unit %s", ad.finnkode, unit.unit_code) + return ad.eiendom_unit_code + + async def get_or_fetch_eiendom_unit( unit_code: str, force_refresh: bool = False ) -> EiendomUnit | None: @@ -84,7 +121,7 @@ async def resolve_eiendom_unit_from_finn_url(finn_url: str) -> EiendomUnit | Non # ============================================================================ -# Orchestration functions — delegate to analysis.py +# Orchestration functions -- delegate to analysis.py # ============================================================================ @@ -96,7 +133,13 @@ async def analyze_search( include_details: bool = True, include_eiendom_no: bool = True, ) -> dict[str, Any]: - """Analyze a FINN search URL and return a ranked shortlist.""" + """Analyze a FINN search URL and return a ranked shortlist. + + NOTE: enrichment for search results lives in analysis.py. If that path + also reports `eiendom_enriched: 0`, it has the same root cause -- each + card's eiendom_unit_code must be resolved via ensure_eiendom_unit_code + (or search_unit_from_finn_url) before the enrichment gate. + """ return await run_analysis_search( search_url, max_pages=max_pages, @@ -114,15 +157,20 @@ async def analyze_ad( ) -> dict[str, Any]: """Fetch and enrich a single FINN ad with analysis.""" ad = await get_or_fetch_ad(finnkode) + + # Resolve BEFORE model_dump() so the serialised ad carries the backfilled + # eiendom_unit_code instead of a stale None. + unit_code = await ensure_eiendom_unit_code(ad) if include_eiendom_no else None + result: dict[str, Any] = { "ad": ad.model_dump(), } - if include_eiendom_no and ad.eiendom_unit_code: - unit = await get_or_fetch_eiendom_unit(ad.eiendom_unit_code) + if unit_code: + unit = await get_or_fetch_eiendom_unit(unit_code) if unit: result["eiendom_unit"] = unit.model_dump() if include_similar_units: - similar = await get_or_fetch_similar_units(ad.eiendom_unit_code) + similar = await get_or_fetch_similar_units(unit_code) result["similar_units"] = [s.model_dump() for s in similar] return result @@ -132,16 +180,18 @@ async def analyze_ad_against_comps( ) -> dict[str, Any]: """Evaluate one listing against recent comparable sales.""" ad = await get_or_fetch_ad(finnkode) + + # Resolve before model_dump() -- see analyze_ad. + unit_code = await ensure_eiendom_unit_code(ad) + result: dict[str, Any] = { "ad": ad.model_dump(), } - if ad.eiendom_unit_code: - unit = await get_or_fetch_eiendom_unit(ad.eiendom_unit_code) + if unit_code: + unit = await get_or_fetch_eiendom_unit(unit_code) if unit: result["eiendom_unit"] = unit.model_dump() - comps = await get_or_fetch_similar_units( - ad.eiendom_unit_code, listing_status=listing_status - ) + comps = await get_or_fetch_similar_units(unit_code, listing_status=listing_status) result["comparable_units"] = [c.model_dump() for c in comps] return result @@ -152,17 +202,20 @@ async def find_similar_to_liked( """Find properties similar to a listing the user has liked.""" # Requires that feedback.verdict = "liked" exists for this finnkode ad = await get_or_fetch_ad(finnkode) - if not ad.eiendom_unit_code: + + unit_code = await ensure_eiendom_unit_code(ad) + if not unit_code: raise ValueError( - f"Finnkode {finnkode} has no Eiendom.no unit_code; cannot find similar properties" + f"Finnkode {finnkode} could not be resolved to an Eiendom.no unit; " + "cannot find similar properties" ) # TODO: verify feedback verdict = "liked" exists - unit = await get_or_fetch_eiendom_unit(ad.eiendom_unit_code) + unit = await get_or_fetch_eiendom_unit(unit_code) if not unit: raise ValueError(f"Cannot enrich finnkode {finnkode} with Eiendom.no data") - similar = await get_or_fetch_similar_units(ad.eiendom_unit_code, listing_status=listing_status) + similar = await get_or_fetch_similar_units(unit_code, listing_status=listing_status) return { "base_ad": ad.model_dump(), "similar_listings": [s.model_dump() for s in similar], @@ -177,15 +230,18 @@ async def compare_ads( ads = [] for finnkode in finnkoder: ad = await get_or_fetch_ad(finnkode) - ad_data = ad.model_dump() - if include_eiendom_no and ad.eiendom_unit_code: - unit = await get_or_fetch_eiendom_unit(ad.eiendom_unit_code) + # Resolve before model_dump() -- see analyze_ad. + unit_code = await ensure_eiendom_unit_code(ad) if include_eiendom_no else None + + ad_data = ad.model_dump() + if unit_code: + unit = await get_or_fetch_eiendom_unit(unit_code) if unit: ad_data["eiendom_unit"] = unit.model_dump() if include_comps: comps = await get_or_fetch_similar_units( - ad.eiendom_unit_code, listing_status="RECENTLY_SOLD" + unit_code, listing_status="RECENTLY_SOLD" ) ad_data["comps"] = [c.model_dump() for c in comps] diff --git a/validate_mcp_tools.py b/validate_mcp_tools.py deleted file mode 100644 index e3ee568..0000000 --- a/validate_mcp_tools.py +++ /dev/null @@ -1,152 +0,0 @@ -#!/usr/bin/env python3 -""" -Validate that all MCP tool definitions correctly match their service layer functions. -This catches parameter mismatches, missing arguments, and other integration issues. -""" - -import inspect -from typing import get_type_hints -from finn_eiendom import mcp_server, service, eiendom_no - -# Define the mapping of MCP tools to their service/module functions -TOOL_MAPPINGS = { - # Tool name: (service function, expected params to check) - "finn_analyze_search": ( - service.analyze_search, - ["search_url", "max_pages", "detail_limit", "include_details", "include_eiendom_no"], - ), - "finn_get_ad": (service.get_or_fetch_ad, ["finnkode", "force_refresh"]), - "finn_resolve_eiendom_unit": (eiendom_no.search_unit_from_finn_url, ["finn_url"]), - "finn_get_eiendom_unit": (service.get_or_fetch_eiendom_unit, ["unit_code", "force_refresh"]), - "finn_analyze_unit_images": (service.get_unit_images, ["unit_code", "force_refresh"]), - "finn_get_similar_units": (eiendom_no.get_similar_units, ["unit_vector", "listing_status"]), - "finn_build_unit_vector": ( - eiendom_no.get_unit, - ["unit_code"], - ), # Uses get_unit, not build_unit_vector - "finn_decode_unit_vector": (eiendom_no.decode_unit_vector, ["unit_vector"]), - "finn_analyze_ad": ( - service.analyze_ad, - ["finnkode", "include_eiendom_no", "include_similar_units"], - ), - "finn_analyze_ad_against_comps": ( - service.analyze_ad_against_comps, - ["finnkode", "listing_status"], - ), - "finn_find_similar_to_liked_ad": ( - service.find_similar_to_liked, - ["finnkode", "mode", "listing_status"], - ), - "finn_compare_ads": (service.compare_ads, ["finnkoder", "include_eiendom_no", "include_comps"]), - "finn_save_feedback": (service.save_feedback, ["finnkode", "verdict", "notes"]), - "finn_get_shortlist": (service.get_shortlist, ["run_id", "limit"]), - "finn_get_new_ads_since_last_run": (service.get_new_ads_since_last_run, ["search_url"]), -} - - -def get_function_params(func) -> dict: - """Extract parameter names and defaults from a function.""" - sig = inspect.signature(func) - params = {} - for name, param in sig.parameters.items(): - if name in ("self", "cls"): - continue - params[name] = { - "default": param.default, - "annotation": param.annotation, - "kind": param.kind.name, - } - return params - - -def validate_tool_mapping( - tool_name: str, service_func, expected_params: list[str] -) -> tuple[bool, list[str]]: - """Validate that an MCP tool correctly maps to its service function.""" - errors = [] - - # Get the MCP tool function - mcp_tool = getattr(mcp_server, tool_name, None) - if not mcp_tool: - errors.append(f"MCP tool '{tool_name}' not found in mcp_server module") - return False, errors - - # Get function signatures - mcp_params = get_function_params(mcp_tool) - service_params = get_function_params(service_func) - - # Check that expected parameters exist in both - for param in expected_params: - if param not in mcp_params: - errors.append(f" ✗ MCP tool missing parameter '{param}'") - if param not in service_params and param != "client": # client is optional in service layer - errors.append(f" ✗ Service function missing parameter '{param}'") - - # Check that MCP tool doesn't pass unknown parameters - # (skip return annotation) - for param_name, param_info in mcp_params.items(): - if param_name not in service_params and param_name not in ["return"]: - # This might be OK if it's a tool-specific parameter, but warn - pass - - if errors: - return False, errors - return True, [] - - -async def validate_service_imports(): - """Validate that all imported service functions exist and are callable.""" - imported_funcs = [ - ("analyze_ad", service.analyze_ad), - ("analyze_ad_against_comps", service.analyze_ad_against_comps), - ("analyze_search", service.analyze_search), - ("compare_ads", service.compare_ads), - ("find_similar_to_liked", service.find_similar_to_liked), - ("get_new_ads_since_last_run", service.get_new_ads_since_last_run), - ("get_or_fetch_ad", service.get_or_fetch_ad), - ("get_or_fetch_eiendom_unit", service.get_or_fetch_eiendom_unit), - ("get_shortlist", service.get_shortlist), - ("get_unit_images", service.get_unit_images), - ("save_feedback", service.save_feedback), - ] - - errors = [] - for name, func in imported_funcs: - if not callable(func): - errors.append(f"Service function '{name}' is not callable") - - return errors - - -def main(): - """Run validation checks.""" - print("=" * 80) - print("MCP Tool Parameter Validation") - print("=" * 80) - - all_passed = True - total_checks = 0 - passed_checks = 0 - - for tool_name, (service_func, expected_params) in TOOL_MAPPINGS.items(): - total_checks += 1 - passed, errors = validate_tool_mapping(tool_name, service_func, expected_params) - - if passed: - print(f"✓ {tool_name}") - passed_checks += 1 - else: - print(f"✗ {tool_name}") - for error in errors: - print(f" {error}") - all_passed = False - - print("\n" + "=" * 80) - print(f"Results: {passed_checks}/{total_checks} tools validated") - print("=" * 80) - - return 0 if all_passed else 1 - - -if __name__ == "__main__": - exit(main())