scoring and analyzsis

This commit is contained in:
Ole
2026-05-23 07:43:30 +00:00
parent c9383788de
commit d3f4bfa838
7 changed files with 1113 additions and 305 deletions
+738 -113
View File
@@ -1,146 +1,771 @@
"""Scoring engine for FINN listings enriched with Eiendom.no data."""
"""Scoring engine tuned for Ole & partner's apartment search criteria.
Priority hierarchy (stated):
MUST : balcony, ≥80 m² main unit, 2-3 bedrooms, T-bane/trikk access
HIGH : preferred neighbourhoods, view (sea/panorama > rooftop > general),
quiet setting, hybel with own bath + kitchen
MEDIUM : sameie economy, green areas / walking terrain, price vs market
BONUS : renovation upside (acceptable, not required)
Dimension caps (non-risk total max ≈ 105, clamped to 100):
floor -15..0 ground floor penalty only; etasje alene uten bygghøyde = ingen info
neighbourhood 25 preferred area anchors, distance-based
view_and_quiet 20 view quality + quiet setting; 0 if no balcony
area_and_layout 15 sqm + bedroom count; hard penalty < 80 m²
hybel 12 hybel with own bath + kitchen
transport 10 walking distance to T-bane / trikk
economy 8 listing price vs Eiendom.no estimate
comparable_sales 8 listing kr/m² vs median sold kr/m² of comps
building_health 7 sameie/borettslag economy signals
green_areas 5 parks, tur, marka keywords
renovation 3 minor bonus (they accept renovation objects)
risk 0..-30 stale listing, high costs, missing data
"""
import logging
import math
from typing import Any
from .models import EiendomUnit, SimilarUnit
logger = logging.getLogger(__name__)
def _clamp(value: float, min_value: float, max_value: float) -> float:
return max(min_value, min(max_value, value))
# ---------------------------------------------------------------------------
# Geometry helpers
# ---------------------------------------------------------------------------
def score_market_position(unit: EiendomUnit | None) -> float:
if unit is None or unit.estimated_selling_price is None or unit.listing_price is None:
return 0.0
ratio = unit.listing_price / unit.estimated_selling_price
if ratio <= 0.9:
return 20.0
if ratio <= 1.0:
return 16.0 + (1.0 - ratio) * 40.0
if ratio <= 1.1:
return 12.0 - (ratio - 1.0) * 40.0
return 5.0
def _distance_km(lat1: float, lng1: float, lat2: float, lng2: float) -> float:
"""Flat-earth approximation — accurate enough within Oslo (~59.9°N).
1° lat ≈ 111 km, 1° lng ≈ 56 km at this latitude.
"""
dlat = (lat2 - lat1) * 111.0
dlng = (lng2 - lng1) * 56.0
return math.sqrt(dlat**2 + dlng**2)
def score_economy(ad: Any, unit: EiendomUnit | None) -> float:
if ad.total_price is None:
return 0.0
if unit and unit.estimated_selling_price:
ratio = ad.total_price / unit.estimated_selling_price
if ratio <= 0.95:
def _clamp(value: float, lo: float, hi: float) -> float:
return max(lo, min(hi, value))
def _median(values: list[float]) -> float:
s = sorted(values)
mid = len(s) // 2
return s[mid] if len(s) % 2 else (s[mid - 1] + s[mid]) / 2.0
# ---------------------------------------------------------------------------
# Preferred neighbourhood anchors
# ---------------------------------------------------------------------------
_PREFERRED_ANCHORS: list[tuple[str, float, float]] = [
# (label, lat, lng) — label used only for debug logging
("Grünerløkka", 59.9240, 10.7573),
("Torshov", 59.9340, 10.7620),
("Rodeløkka", 59.9315, 10.7660),
("Kampen", 59.9125, 10.7760),
("Sagene", 59.9400, 10.7590),
("Nydalen", 59.9520, 10.7540),
("Storo", 59.9450, 10.7670),
("Grefsen", 59.9580, 10.7720),
("Fagerborg", 59.9280, 10.7300),
("St. Hans Haugen", 59.9300, 10.7400),
("Ullevål", 59.9400, 10.7270),
("Majorstua", 59.9210, 10.7170),
("Frogner", 59.9160, 10.7150),
("Løren", 59.9310, 10.7960),
("Torshovdalen", 59.9295, 10.7630),
("Rosenhoff", 59.9255, 10.7775),
]
# ---------------------------------------------------------------------------
# Transit network — all T-bane and trikk stops.
#
# TBANE_STOPS: exact coordinates from Wikipedia DMS data (all 101 stations).
# TRIKK_STOPS: estimated coordinates (Wikipedia has no trikk coords).
#
# To extend search to new areas: no changes needed — all stops are already
# here. score_transport automatically finds the nearest stop for any address.
# ---------------------------------------------------------------------------
TBANE_STOPS: dict[str, tuple[float, float]] = {
# All 101 stations — Wikipedia DMS converted to decimal degrees
"Ammerud": (59.957922, 10.871165),
"Avløs": (59.913859, 10.552926),
"Bekkestua": (59.918097, 10.588031),
"Berg": (59.951142, 10.744801),
"Bergkrystallen": (59.867091, 10.821206),
"Besserud": (59.957760, 10.673092),
"Bjørnsletta": (59.926902, 10.635458),
"Blindern": (59.940052, 10.716262),
"Bogerud": (59.875833, 10.841944),
"Borgen": (59.934548, 10.696000),
"Brattlikollen": (59.888076, 10.801191),
"Brynseng": (59.909169, 10.811834),
"Bøler": (59.884167, 10.845278),
"Carl Berners plass": (59.926592, 10.778360),
"Eiksmarka": (59.946431, 10.622320),
"Ekraveien": (59.950836, 10.635822),
"Ellingsrudåsen": (59.936311, 10.916634),
"Ensjø": (59.913364, 10.786986),
"Forskningsparken": (59.943513, 10.720425),
"Frognerseteren": (59.979018, 10.675857),
"Frøen": (59.934167, 10.709167),
"Furuset": (59.941578, 10.897247),
"Gaustad": (59.945625, 10.709814),
"Gjettum": (59.906221, 10.527155),
"Gjønnes": (59.918097, 10.579877),
"Godlia": (59.908523, 10.835352),
"Grorud": (59.961413, 10.881701),
"Grønland": (59.912912, 10.759563),
"Gråkammen": (59.954838, 10.701842),
"Gulleråsen": (59.955526, 10.696521),
"Hasle": (59.925302, 10.794454),
"Haslum": (59.915021, 10.563183),
"Hauger": (59.910957, 10.510713),
"Haugerud": (59.922592, 10.855350),
"Hellerud": (59.910079, 10.829953),
"Helsfyr": (59.911514, 10.803680),
"Holmen": (59.946296, 10.666609),
"Holmenkollen": (59.960489, 10.662446),
"Holstein": (59.960403, 10.740552),
"Hovseter": (59.946328, 10.654694),
"Høyenhall": (59.905769, 10.819860),
"Jar": (59.926592, 10.621762),
"Jernbanetorget": (59.912116, 10.751211),
"Kalbakken": (59.954553, 10.866750),
"Karlsrud": (59.880453, 10.805225),
"Kolsås": (59.914416, 10.501366),
"Kringsjå": (59.963690, 10.734930),
"Lambertseter": (59.873289, 10.810440),
"Lijordet": (59.940901, 10.616559),
"Lillevann": (59.980481, 10.653037),
"Lindeberg": (59.932979, 10.882087),
"Linderud": (59.940976, 10.839214),
"Løren": (59.929972, 10.790806),
"Majorstuen": (59.929904, 10.714931),
"Makrellbekken": (59.941957, 10.673845),
"Manglerud": (59.897957, 10.812435),
"Midtstuen": (59.961299, 10.682911),
"Montebello": (59.936806, 10.670471),
"Mortensrud": (59.849083, 10.828657),
"Munkelia": (59.868914, 10.812500),
"Nationaltheatret": (59.915045, 10.733039),
"Nydalen": (59.948864, 10.765250),
"Oppsal": (59.892866, 10.840201),
"Ringstabekk": (59.916182, 10.593696),
"Ris": (59.948069, 10.705147),
"Risløkka": (59.932355, 10.822713),
"Rommen": (59.962127, 10.908968),
"Romsås": (59.962272, 10.890777),
"Ryen": (59.895807, 10.805617),
"Røa": (59.946791, 10.643874),
"Rødtvet": (59.951416, 10.859535),
"Sinsen": (59.938085, 10.781343),
"Skogen": (59.975246, 10.647415),
"Skullerud": (59.866754, 10.839171),
"Skødalen": (59.961787, 10.690789),
"Skøyenåsen": (59.898866, 10.836516),
"Slemdal": (59.949896, 10.695662),
"Smestad": (59.937315, 10.683609),
"Sognsvann": (59.967127, 10.733943),
"Steinerud": (59.939083, 10.704345),
"Storo": (59.944545, 10.778768),
"Stortinget": (59.913047, 10.741469),
"Stovner": (59.962616, 10.923414),
"Trosterud": (59.927152, 10.864041),
"Tveita": (59.914354, 10.841961),
"Tøyen": (59.915214, 10.774670),
"Tåsen": (59.953270, 10.752439),
"Ullernåsen": (59.930635, 10.654796),
"Ullevål stadion": (59.946629, 10.732226),
"Ulsrud": (59.889970, 10.849428),
"Veitvet": (59.944700, 10.847304),
"Vestli": (59.972324, 10.929337),
"Vettakollen": (59.959913, 10.695705),
"Vinderen": (59.942803, 10.704761),
"Voksenkollen": (59.980076, 10.665193),
"Voksenlia": (59.966937, 10.655082),
"Vollebekk": (59.935865, 10.831039),
"Åsjordet": (59.928764, 10.646889),
"Økern": (59.928592, 10.804152),
"Østerås": (59.939445, 10.608587),
"Østhorn": (59.956944, 10.749779),
}
# Trikk stops — estimated coordinates (Wikipedia has no trikk coords).
# Grouped by line corridor for readability.
# Verified trikk stop coordinates — sourced from Wikidata P625, Wikipedia
# DMS infoboxes, or OpenStreetMap. Keys match display names used in scoring.
# Source tag format: Wikidata QID | "shared T-bane" | "OSM node <id>" | "Wikipedia"
TRIKK_STOPS_VERIFIED: dict[str, tuple[float, float]] = {
# ── Briskeby-linjen (l11/19) ─────────────────────────────────────────
"Majorstuen": (59.929904, 10.714931), # shared T-bane
"Bogstadveien": (59.92611, 10.72167), # Q19372022
"Rosenborg": (59.92417, 10.72389), # Q7899658
"Briskeby": (59.92048, 10.71767), # Q11962293
"Riddervolds plass": (59.91896, 10.72026), # Q19386557
"Inkognitogata": (59.91565, 10.72114), # Q11977313
"Nationaltheatret": (59.91504, 10.73304), # shared T-bane
# ── Sentrum (shared l11/12/13/17/18/19) ──────────────────────────────
"Øvre Slottsgate": (59.9118, 10.7417), # Q31079249
"Dronningens gate": (59.91053, 10.74697), # Q29828354
"Jernbanetorget": (59.912116, 10.751211), # shared T-bane
"Storgata": (59.91396, 10.75141), # Q109484341
"Nybrua": (59.91707, 10.75834), # Q104867506
"Stortorvet": (59.91310, 10.74530), # Q7620354
"Bjørvika": (59.90806, 10.75639), # Wikipedia
# ── GrünerløkkaTorshov-linjen (l11/12/18) ───────────────────────────
"Schous plass": (59.92081, 10.75932), # Q12006491-area / Wikipedia
"Olaf Ryes plass": (59.9231, 10.7592), # Q4993079
"Birkelunden": (59.9271, 10.7601), # Q4916412
"Biermanns gate": (59.93028, 10.76104), # Wikipedia
"Sandaker senter": (59.93889, 10.76861), # Wikipedia
"Grefsenveien": (59.94278, 10.77344), # Q17778424
"Storo": (59.944545, 10.778768), # shared T-bane
# ── Kjelsåslinjen (l11/12) ───────────────────────────────────────────
"Disen": (59.94627, 10.78729), # Q11965753
"Glads vei": (59.95235, 10.78533), # Q17776371
"Grefsenplatået": (59.9560, 10.78573), # Q11972531
"Grefsen stadion": (59.96008, 10.78475), # Q11972525
"Kjelsås": (59.96611, 10.78278), # Wikipedia
# ── Frogner-linjen (l12) ─────────────────────────────────────────────
"Vigelandsparken": (59.92457, 10.70815), # Q19398059
"Frogner plass": (59.92255, 10.70491), # Q11970372 / OSM node 30560564
"Elisenberg": (59.91944, 10.70861), # Q5361695
"Lille Frogner allé": (59.9180, 10.7120), # Q19379373
"Niels Juels gate": (59.91634, 10.71520), # Q11991378
"Solli": (59.91486, 10.71906), # Q7558364
# ── Vika-linjen (l12) ────────────────────────────────────────────────
"Aker Brygge": (59.9110, 10.7299), # Q4700639
"Kontraskjæret": (59.91087, 10.73592), # Q11998807
# ── Lilleaker-linjen (l13) ───────────────────────────────────────────
"Lilleaker": (59.92074, 10.63580), # Wikipedia
"Sollerud": (59.92104, 10.64309), # Wikipedia
"Furulund": (59.91990, 10.65013), # Wikipedia
"Ullern": (59.92429, 10.65858), # Wikipedia
"Abbediengen": (59.92517, 10.66716), # Wikipedia
"Hoff": (59.92500, 10.67488), # Wikipedia
"Skøyen": (59.92384, 10.68034), # Wikipedia
# ── Skøyen-linjen (l13) ──────────────────────────────────────────────
"Thune": (59.92186, 10.68742), # Wikipedia
"Nobels gate": (59.91758, 10.69866), # Wikipedia
"Skarpsno": (59.91430, 10.70234), # Wikipedia
"Skillebekk": (59.91277, 10.71103), # Wikipedia
# ── Ekeberg-linjen (l13/19) ──────────────────────────────────────────
"Middelalderparken": (59.90639, 10.76417), # Q99971403
"Oslo Hospital": (59.9032, 10.7674), # Wikipedia
"Ekebergparken": (59.8977, 10.7593), # Wikipedia
"Jomfrubråten": (59.8883, 10.7706), # Wikipedia
"Sportsplassen": (59.8860, 10.7736), # Wikipedia
"Holtet": (59.88151, 10.78415), # Wikipedia
"Sørli": (59.87493, 10.78709), # Wikipedia
"Kastellet": (59.87106, 10.79036), # Wikipedia
"Bråten": (59.86714, 10.79244), # Wikipedia
"Sæter": (59.86102, 10.79870), # Wikipedia
"Ljabru": (59.85335, 10.80089), # Wikipedia
# ── Ullevål Hageby-linjen (l17/18) ───────────────────────────────────
"Rikshospitalet": (59.947768, 10.714716), # Wikipedia
"Gaustadalleen": (59.9454, 10.7172), # Wikipedia
"Forskningsparken": (59.943513, 10.720425), # shared T-bane
"Universitetet Blindern": (59.9421, 10.7243), # Wikipedia
"John Collets plass": (59.9403, 10.7290), # Wikipedia
"Ullevål sykehus": (59.9361, 10.7318), # Wikipedia
"Adamstuen": (59.9326, 10.7345), # Wikipedia
"Stensgata": (59.92957, 10.73303), # Q7607927
"Bislett": (59.92599, 10.73108), # Q11961163
"Dalsbergstien": (59.92354, 10.73163), # Q17764618
"Welhavens gate": (59.92131, 10.72968), # Q12010485
"Frydenlund": (59.92086, 10.73317), # Q19373143
"Holbergs plass": (59.91876, 10.73453), # Q11975623
# ── Sinsen-linjen (l17) ──────────────────────────────────────────────
"Lakkegata skole": (59.92055, 10.76834), # Q11982987
"Carl Berners plass": (59.926592, 10.778360), # shared T-bane
"Sinsenkrysset": (59.93911, 10.78340), # Q19388523
"Grefsen stasjon": (59.94167, 10.78056), # Wikipedia
# ── Homansbyen-linjen (l19) ───────────────────────────────────────────
"Homansbyen": (59.92278, 10.72639), # Q5887760
}
# Estimated trikk stop coordinates — no Wikidata P625 found.
# Derived from linear interpolation between verified neighbours,
# or placed from map/street knowledge. Max error ~150-250 m.
# To update: find Wikidata QID, fetch P625, move entry to TRIKK_STOPS_VERIFIED.
TRIKK_STOPS_ESTIMATED: dict[str, tuple[float, float]] = {
# ── Sentrum ───────────────────────────────────────────────────────────
"Tinghuset": (59.9146, 10.7403), # Ullevål Hageby-l ved Stortinget T
# ── GrünerløkkaTorshov-linjen ───────────────────────────────────────
"Torshov": (59.9332, 10.7643), # interp Biermanns gate↔Sandaker
# ── Kjelsåslinjen ────────────────────────────────────────────────────
"Doktor Smiths vei": (59.9503, 10.7867), # interp Disen↔Kjelsås t=0.20
"Kjelsåsalleen": (59.9641, 10.7833), # interp Disen↔Kjelsås t=0.90
# ── Frogner-linjen ───────────────────────────────────────────────────
"Frogner stadion": (59.9167, 10.7038), # Kirkeveien S for Vigelandsparken
# ── Vika-linjen ──────────────────────────────────────────────────────
"Ruseløkka": (59.9120, 10.7258), # interp Solli↔Kontraskjæret
# ── Ullevål Hageby-linjen ─────────────────────────────────────────────
"Tullinøkka": (59.9163, 10.7349), # interp Holbergs plass↔Tinghuset
# ── Sinsen-linjen ────────────────────────────────────────────────────
"Heimdalsgata": (59.9188, 10.7633), # interp Nybrua↔Lakkegata skole
"Sofienberg": (59.9236, 10.7734), # interp Lakkegata skole↔Carl Berners
"Rosenhoff": (59.9307, 10.7800), # interp Carl Berners↔Sinsenkrysset t=0.33
"Sinsenterrassen": (59.9350, 10.7817), # interp Carl Berners↔Sinsenkrysset t=0.67
}
# Merged — verified takes precedence if a key appears in both (shouldn't happen).
TRIKK_STOPS: dict[str, tuple[float, float]] = {
**TRIKK_STOPS_ESTIMATED,
**TRIKK_STOPS_VERIFIED,
}
# ---------------------------------------------------------------------------
# Transit helpers
# ---------------------------------------------------------------------------
_WALK_SPEED_KMH = 5.0 # avg walking speed
def _nearest_stop(
lat: float, lng: float, stops: dict[str, tuple[float, float]]
) -> tuple[str, float]:
"""Return (stop_name, distance_km) for the nearest stop in a dict."""
best_name, best_dist = "", float("inf")
for name, (slat, slng) in stops.items():
d = _distance_km(lat, lng, slat, slng)
if d < best_dist:
best_dist, best_name = d, name
return best_name, best_dist
def nearby_transit(
lat: float, lng: float, max_walk_min: float = 10.0
) -> dict[str, list[tuple[str, float]]]:
"""Return T-bane and trikk stops within max_walk_min minutes walk.
Returns:
{
"tbane": [("Carl Berners plass", 0.28), ...], # sorted by distance
"trikk": [("Rosenhoff", 0.19), ...],
}
All distances in km.
"""
max_km = (max_walk_min / 60.0) * _WALK_SPEED_KMH
tbane = sorted(
[
(n, _distance_km(lat, lng, la, lo))
for n, (la, lo) in TBANE_STOPS.items()
if _distance_km(lat, lng, la, lo) <= max_km
],
key=lambda x: x[1],
)
trikk = sorted(
[
(n, _distance_km(lat, lng, la, lo))
for n, (la, lo) in TRIKK_STOPS.items()
if _distance_km(lat, lng, la, lo) <= max_km
],
key=lambda x: x[1],
)
return {"tbane": tbane, "trikk": trikk}
# ---------------------------------------------------------------------------
# Dimension functions
# ---------------------------------------------------------------------------
def score_neighbourhood(
unit: EiendomUnit | None,
address: str | None = None,
district: str | None = None,
) -> float:
"""Distance to nearest preferred-area anchor. Max 25."""
if unit and unit.lat and unit.lng:
distances = [
(_distance_km(unit.lat, unit.lng, lat, lng), label)
for label, lat, lng in _PREFERRED_ANCHORS
]
min_dist, nearest = min(distances)
logger.debug("Nearest anchor: %s at %.2f km", nearest, min_dist)
if min_dist < 0.5:
return 25.0
if min_dist < 1.0:
return 20.0
if ratio <= 1.0:
if min_dist < 1.5:
return 15.0
if ratio <= 1.05:
if min_dist < 2.5:
return 10.0
return 6.0
if ad.asking_price and ad.total_price <= ad.asking_price:
return 12.0
return 8.0
if min_dist < 4.0:
return 5.0
return 2.0
def score_comparable_sales(listings: list[SimilarUnit], listing_price: int | None) -> float:
if not listings or listing_price is None:
return 0.0
selling_prices = [unit.selling_price for unit in listings if unit.selling_price]
if not selling_prices:
return 0.0
average = sum(selling_prices) / len(selling_prices)
ratio = listing_price / average
score = (1.0 - abs(ratio - 1.0)) * 20.0
return float(_clamp(score, 0.0, 20.0))
def score_location(address: str | None, district: str | None) -> float:
if not address and not district:
return 0.0
if district and "oslo" in district.lower():
return 15.0
if address and "oslo" in address.lower():
return 12.0
return 7.0
def score_layout_and_potential(description: str | None, rooms: int | None) -> float:
score = 0.0
if rooms and rooms >= 4:
score += 10.0
if description and "potensial" in description.lower():
score += 8.0
return float(_clamp(score, 0.0, 20.0))
def score_outdoor_and_view(description: str | None) -> float:
if not description:
return 0.0
score = 5.0 if "utsikt" in description.lower() or "balkong" in description.lower() else 0.0
return float(_clamp(score, 0.0, 15.0))
def score_rental_potential(description: str | None) -> float:
if not description:
return 0.0
score = 10.0 if "hybel" in description.lower() or "leie" in description.lower() else 0.0
return score
def score_renovation_upside(description: str | None, asking_price: int | None) -> float:
score = 0.0
if description and "renover" in description.lower():
score += 10.0
if asking_price and asking_price > 0:
score += 5.0
return float(_clamp(score, 0.0, 15.0))
def score_risk(description: str | None, unit: EiendomUnit | None) -> float:
if unit is None:
return -10.0
if description and "usikker" in description.lower():
return -10.0
haystack = " ".join(filter(None, [address, district])).lower()
if "oslo" in haystack:
return 5.0
return 0.0
def score_ad(
ad: Any, unit: EiendomUnit | None, similar_units: list[SimilarUnit]
) -> dict[str, float]:
scores = {
def score_transport(unit: EiendomUnit | None) -> float:
"""Walking distance to nearest T-bane or trikk stop. Max 10.
Searches ALL stops in TBANE_STOPS and TRIKK_STOPS — no manual
curation needed when adding new search areas.
Distance bands:
< 400 m → 10 pts (~5 min walk)
< 800 m → 8 pts (~10 min — stated threshold)
< 1200 m → 4 pts (~15 min)
≥ 1200 m → 0 pts
Falls back to 0 when no coordinates available.
"""
if unit is None or unit.lat is None or unit.lng is None:
return 0.0
_, tbane_dist = _nearest_stop(unit.lat, unit.lng, TBANE_STOPS)
_, trikk_dist = _nearest_stop(unit.lat, unit.lng, TRIKK_STOPS)
min_dist = min(tbane_dist, trikk_dist)
logger.debug("Nearest T-bane: %.2f km, trikk: %.2f km", tbane_dist, trikk_dist)
if min_dist < 0.4:
return 10.0
if min_dist < 0.8:
return 8.0
if min_dist < 1.2:
return 4.0
return 0.0
def score_view_and_quiet(ad: Any, description: str) -> float:
"""View quality × quiet setting. Max 20. Returns 0 if no balcony."""
if not (ad.has_balcony or ad.has_terrace):
return 0.0
d = description.lower()
view = 0.0
if any(kw in d for kw in ["sjøutsikt", "fjordutsikt", "sjøglimt", "fjordglimt"]):
view = 15.0
elif any(kw in d for kw in ["panorama", "panoramautsikt", "vidt utsyn", "vidstrakt"]):
view = 13.0
elif any(kw in d for kw in ["over hustak", "hustak", "over takene"]):
view = 10.0
elif "utsikt" in d:
view = 7.0
quiet = 0.0
if any(
kw in d
for kw in [
"rolig",
"tilbaketrukket",
"skjermet",
"bakgård",
"gårdsrom",
"stille",
"blindvei",
]
):
quiet += 5.0
if any(kw in d for kw in ["støy", "bilvei", "trafikkert", "støyutsatt"]):
quiet -= 5.0
return float(_clamp(view + quiet, 0.0, 20.0))
def score_area_and_layout(ad: Any, unit: EiendomUnit | None) -> float:
"""Main unit size + bedroom count. Max 15."""
area = (unit.usable_area if unit else None) or ad.area_m2 or 0
if area < 60:
return 0.0
if area < 80:
return 3.0
if area < 90:
area_score = 8.0
elif area < 105:
area_score = 11.0
else:
area_score = 14.0
bedrooms = ad.bedrooms or 0
if bedrooms >= 3:
bedroom_bonus = 1.0
elif bedrooms == 2:
bedroom_bonus = 0.5
else:
bedroom_bonus = 0.0
return float(_clamp(area_score + bedroom_bonus, 0.0, 15.0))
def score_hybel(description: str) -> float:
"""Hybel with own bath + kitchen. Max 12."""
d = description.lower()
if "hybel" not in d and "sokkelleil" not in d and "utleiedel" not in d:
return 0.0
_POTENTIAL = [
"mulighet for hybel",
"mulighet til hybel",
"mulig hybel",
"kan etableres hybel",
"kan bygges om til hybel",
"tilrettelagt for hybel",
"potensial for hybel",
"hybelpotensial",
]
is_potential = any(sig in d for sig in _POTENTIAL)
if not is_potential and "mulighet" in d and "hybel" in d:
for sentence in d.replace("!", ".").replace("?", ".").split("."):
if "mulighet" in sentence and "hybel" in sentence:
is_potential = True
break
if is_potential:
return 2.0
# Documented rental income → definitively real hybel
if "leieinntekt" in d or "skattefri" in d:
return 12.0
has_bath = any(
kw in d
for kw in [
"eget bad",
"eget wc",
"eget toalett",
"bad i hybel",
"dusj i hybel",
"eget dusj",
]
)
has_kitch = any(
kw in d for kw in ["eget kjøkken", "kjøkken i hybel", "kjøkkenkrok", "tekjøkken"]
)
if not has_bath:
has_bath = "bad" in d or "dusj" in d
if not has_kitch:
has_kitch = "kjøkken" in d
if has_bath and has_kitch:
return 12.0
if has_bath or has_kitch:
return 7.0
return 4.0
def score_floor(ad: Any, unit: EiendomUnit | None) -> float:
"""Floor level. Binary signal: ground floor is bad, everything else neutral.
Rationale: "toppleilighet i 3-etgs blokk" og "8. etg i høyblokk" er begge
topp for sin bygning. Etasjenummer alene sier ingenting om utsikt eller lys
uten å kjenne byggets totale høyde. Eneste reelle signal er 1. etg (innsyn,
støy, lys) vs ikke-1. etg.
Scores:
ground floor (≤1) → -15 (hard penalty: innsyn, støy, lys)
unknown → 0 (no data → no penalty)
above ground → 0 (etasjenummer uten bygghøyde = ingen info)
"""
floor: int | None = None
if unit is not None and unit.floor is not None:
floor = unit.floor
elif ad.floor is not None:
try:
floor = int(str(ad.floor).strip().rstrip("."))
except (ValueError, TypeError):
floor = None
if floor is None:
return 0.0
if floor <= 1:
return -15.0
return 0.0
def score_building_health(ad: Any, description: str) -> float:
"""Sameie / borettslag economy signals. Max 7."""
score = 0.0
d = description.lower()
if ad.shared_debt == 0:
score += 3.0
elif ad.shared_debt is None:
score += 1.0
fk = ad.common_costs or 0
if fk == 0:
score += 0.0
elif fk <= 3500:
score += 4.0
elif fk <= 5000:
score += 2.0
elif fk <= 7000:
score += 0.0
else:
score -= 2.0
if any(kw in d for kw in ["veldrevet", "solid økonomi", "god økonomi", "ingen fellesgjeld"]):
score += 2.0
return float(_clamp(score, 0.0, 7.0))
def score_green_areas(description: str) -> float:
"""Parks, walking terrain, green surroundings. Max 5."""
d = description.lower()
keywords = ["park", "turområde", "turterreng", "marka", "skog", "grønt", "grønne", "friluft"]
hits = sum(1 for kw in keywords if kw in d)
if hits >= 2:
return 5.0
if hits == 1:
return 2.0
return 0.0
def score_economy(ad: Any, unit: EiendomUnit | None) -> float:
"""Listing price vs Eiendom.no estimated value. Max 8."""
if unit is None or unit.estimated_selling_price is None:
return 0.0
price = ad.total_price or ad.asking_price
if price is None:
return 0.0
ratio = price / unit.estimated_selling_price
if ratio <= 0.92:
return 8.0
if ratio <= 1.00:
return 5.0 + (1.0 - ratio) * 37.5
if ratio <= 1.08:
return 5.0 - (ratio - 1.0) * 37.5
return 1.0
def score_comparable_sales(
listings: list[SimilarUnit],
listing_sqm_price: int | float | None,
) -> float:
"""Listing kr/m² vs median sold kr/m² of comp units. Max 8."""
if not listings or listing_sqm_price is None:
return 0.0
sqm_prices = [u.sqm_price for u in listings if u.sqm_price]
if not sqm_prices:
return 0.0
med = _median(sqm_prices)
ratio = listing_sqm_price / med
return float(_clamp((1.0 - abs(ratio - 1.0)) * 8.0, 0.0, 8.0))
def score_renovation(description: str) -> float:
"""Minor bonus for renovation upside. Max 3."""
d = description.lower()
if any(kw in d for kw in ["renover", "oppusse", "potensial", "moderniser"]):
return 3.0
return 0.0
def score_risk(ad: Any, unit: EiendomUnit | None) -> float:
"""Risk penalty. Returns 0 or negative."""
penalty = 0.0
if unit is None:
penalty -= 8.0
fk = ad.common_costs or 0
if fk > 8000:
penalty -= 10.0
elif fk > 6000:
penalty -= 5.0
if unit and unit.days_on_market:
if unit.days_on_market > 120:
penalty -= 10.0
elif unit.days_on_market > 60:
penalty -= 5.0
if "usikker" in (ad.listing_description or "").lower():
penalty -= 5.0
return penalty
# ---------------------------------------------------------------------------
# Orchestration
# ---------------------------------------------------------------------------
def score_ad(ad: Any, unit: EiendomUnit | None, similar_units: list[SimilarUnit]) -> dict[str, Any]:
description = ad.listing_description or ""
# Collect nearby transit for informational output (not used in scoring)
transit_nearby: dict | None = None
if unit and unit.lat and unit.lng:
transit_nearby = nearby_transit(unit.lat, unit.lng, max_walk_min=10.0)
if transit_nearby["tbane"] or transit_nearby["trikk"]:
logger.debug("Nearby transit: %s", transit_nearby)
scores: dict[str, Any] = {
"floor": score_floor(ad, unit),
"neighbourhood": score_neighbourhood(unit, ad.address, getattr(ad, "district", None)),
"view_and_quiet": score_view_and_quiet(ad, description),
"area_and_layout": score_area_and_layout(ad, unit),
"hybel": score_hybel(description),
"transport": score_transport(unit),
"economy": score_economy(ad, unit),
"market_position": score_market_position(unit),
"comparable_sales": score_comparable_sales(
similar_units, ad.total_price or ad.asking_price
similar_units,
unit.listing_sqm_price if unit else None,
),
"location": score_location(ad.address, ad.district),
"layout": score_layout_and_potential(ad.listing_description, ad.rooms),
"outdoor": score_outdoor_and_view(ad.listing_description),
"rental_potential": score_rental_potential(ad.listing_description),
"renovation": score_renovation_upside(ad.listing_description, ad.asking_price),
"risk": score_risk(ad.listing_description, unit),
"building_health": score_building_health(ad, description),
"green_areas": score_green_areas(description),
"renovation": score_renovation(description),
"risk": score_risk(ad, unit),
}
scores["total"] = float(_clamp(sum(scores.values()), 0.0, 100.0))
# Numeric-only sum for total
numeric = {k: v for k, v in scores.items() if isinstance(v, (int, float))}
scores["total"] = float(_clamp(sum(numeric.values()), 0.0, 100.0))
# Attach nearby transit as metadata (non-scoring)
if transit_nearby is not None:
scores["nearby_transit"] = transit_nearby
return scores
def classify_ad(scores: dict[str, float]) -> list[str]:
def classify_ad(scores: dict[str, Any]) -> list[str]:
categories: list[str] = []
total = scores.get("total", 0.0)
if total >= 70:
categories.append("bargain_candidate")
if total >= 75:
categories.append("top_match")
if total >= 60:
categories.append("safe_candidate")
if 50 <= total < 70:
categories.append("lifestyle_candidate")
if scores.get("renovation", 0.0) >= 8:
categories.append("renovation_candidate")
if scores.get("rental_potential", 0.0) >= 5:
categories.append("hybel_candidate")
if scores.get("risk", 0.0) < 0:
categories.append("risk_object")
categories.append("strong_candidate")
if 45 <= total < 60:
categories.append("worth_viewing")
if total < 30:
categories.append("not_interesting")
if 30 <= total < 60:
if 30 <= total < 45:
categories.append("manual_review_required")
if scores.get("hybel", 0.0) >= 7:
categories.append("has_hybel")
if scores.get("view_and_quiet", 0.0) >= 13:
categories.append("premium_view")
if scores.get("neighbourhood", 0.0) == 25:
categories.append("preferred_neighbourhood")
if scores.get("renovation", 0.0) > 0:
categories.append("renovation_candidate")
if scores.get("floor", 0.0) < 0:
categories.append("ground_floor")
if scores.get("risk", 0.0) < -5:
categories.append("risk_object")
if scores.get("area_and_layout", 0.0) <= 3:
categories.append("too_small")
return categories