Files
finn-mcp/finn_eiendom/scoring.py
T
ole 46fd22c277 Enhance Docker and Compose configurations; add health check endpoint and caching improvements
- Updated Dockerfile to include FINN_CACHE_PATH and create data directory.
- Modified docker-compose.prod.yml to expose port 8010 and adjust resource limits.
- Updated docker-compose.yml to include FINN_CACHE_PATH and ensure proper port mapping.
- Added health check endpoint in http_server.py for container orchestration.
- Improved caching logic in analysis.py and service.py for similar units.
- Refined scoring.py with updated scoring model and constants for better accuracy.

Co-authored-by: Copilot <copilot@github.com>
2026-05-26 12:10:00 +00:00

779 lines
26 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Scoring engine tuned for Ole & partner's apartment search criteria.
Priority hierarchy (stated):
MUST : balcony, ≥80 m² main unit, 2-3 bedrooms, T-bane/trikk access
HIGH : preferred neighbourhoods, view (sea/panorama > rooftop > general),
quiet setting, hybel with own bath + kitchen
MEDIUM : sameie economy, green areas / walking terrain, price vs market
BONUS : renovation upside (acceptable, not required)
Scoring model — explicit weights (sum = 1.0):
Each dimension function returns a raw score in [0, DIMENSION_MAX[d]].
score_ad normalises each to [0, 1] × weight × 100 → weighted bonus 0..100.
Penalties (floor, risk) are absolute deductions applied after weighting.
Final total = clamp(weighted_bonus + penalties, 0, 100).
Dimension Weight Max pts
─────────────────────────────────
transport 24 % 11
view_and_quiet 21 % 20
neighbourhood 17 % 25
hybel 14 % 12
area_and_layout 10 % 15
economy 6 % 8
comparable_sales 4 % 8
building_health 2 % 7
green_areas 1 % 5
renovation 1 % 3
─────────────────────────────────
bonus total 100 % 100
floor penalty 0..-15 (ground floor only)
risk penalty 0..-30
"""
import logging
import math
from typing import Any
from .models import EiendomUnit, SimilarUnit
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Scoring constants — explicit weights and per-dimension raw maxima
# ---------------------------------------------------------------------------
DIMENSION_WEIGHTS: dict[str, float] = {
"transport": 0.24, # was 0.11 — MUST-have, now primary signal
"view_and_quiet": 0.21, # was 0.17 — key quality-of-life differentiator
"neighbourhood": 0.17, # was 0.22 — still important, no longer dominant
"hybel": 0.14, # was 0.12 — rental income / flexibility
"area_and_layout": 0.10, # was 0.16 — baseline met by search filters
"economy": 0.06, # was 0.08
"comparable_sales": 0.04, # was 0.06
"building_health": 0.02, # was 0.04
"green_areas": 0.01, # was 0.03
"renovation": 0.01, # unchanged
}
DIMENSION_MAX: dict[str, float] = {
"transport": 10.0,
"view_and_quiet": 20.0,
"neighbourhood": 25.0,
"hybel": 12.0,
"area_and_layout": 15.0,
"economy": 8.0,
"comparable_sales": 8.0,
"building_health": 7.0,
"green_areas": 5.0,
"renovation": 3.0,
}
assert abs(sum(DIMENSION_WEIGHTS.values()) - 1.0) < 1e-9, "Weights must sum to 1.0"
assert DIMENSION_WEIGHTS.keys() == DIMENSION_MAX.keys(), "Weight/max key mismatch"
# Risk penalty thresholds
_SHARED_DEBT_HIGH = 500_000 # per unit — hard red flag
_SHARED_DEBT_MEDIUM = 200_000 # per unit — notable
_COMMON_COST_HIGH = 8_000 # kr/mnd
_COMMON_COST_MEDIUM = 6_000 # kr/mnd
_DAYS_STALE = 120 # days on market → something is wrong
_DAYS_SLOW = 60 # days on market → worth investigating
# ---------------------------------------------------------------------------
# Geometry helpers
# ---------------------------------------------------------------------------
def _distance_km(lat1: float, lng1: float, lat2: float, lng2: float) -> float:
"""Flat-earth approximation — accurate enough within Oslo (~59.9°N)."""
dlat = (lat2 - lat1) * 111.0
dlng = (lng2 - lng1) * 56.0
return math.sqrt(dlat**2 + dlng**2)
def _clamp(value: float, lo: float, hi: float) -> float:
return max(lo, min(hi, value))
def _median(values: list[float]) -> float:
s = sorted(values)
mid = len(s) // 2
return s[mid] if len(s) % 2 else (s[mid - 1] + s[mid]) / 2.0
# ---------------------------------------------------------------------------
# Preferred neighbourhood anchors
# ---------------------------------------------------------------------------
_PREFERRED_ANCHORS: list[tuple[str, float, float]] = [
("Grünerløkka", 59.9240, 10.7573),
("Torshov", 59.9340, 10.7620),
("Rodeløkka", 59.9315, 10.7660),
("Kampen", 59.9125, 10.7760),
("Sagene", 59.9400, 10.7590),
("Nydalen", 59.9520, 10.7540),
("Storo", 59.9450, 10.7670),
("Grefsen", 59.9580, 10.7720),
("Fagerborg", 59.9280, 10.7300),
("St. Hans Haugen", 59.9300, 10.7400),
("Ullevål", 59.9400, 10.7270),
("Majorstua", 59.9210, 10.7170),
("Frogner", 59.9160, 10.7150),
("Løren", 59.9310, 10.7960),
("Torshovdalen", 59.9295, 10.7630),
("Rosenhoff", 59.9255, 10.7775),
]
# ---------------------------------------------------------------------------
# Transit network
# ---------------------------------------------------------------------------
TBANE_STOPS: dict[str, tuple[float, float]] = {
"Ammerud": (59.957922, 10.871165),
"Avløs": (59.913859, 10.552926),
"Bekkestua": (59.918097, 10.588031),
"Berg": (59.951142, 10.744801),
"Bergkrystallen": (59.867091, 10.821206),
"Besserud": (59.957760, 10.673092),
"Bjørnsletta": (59.926902, 10.635458),
"Blindern": (59.940052, 10.716262),
"Bogerud": (59.875833, 10.841944),
"Borgen": (59.934548, 10.696000),
"Brattlikollen": (59.888076, 10.801191),
"Brynseng": (59.909169, 10.811834),
"Bøler": (59.884167, 10.845278),
"Carl Berners plass": (59.926592, 10.778360),
"Eiksmarka": (59.946431, 10.622320),
"Ekraveien": (59.950836, 10.635822),
"Ellingsrudåsen": (59.936311, 10.916634),
"Ensjø": (59.913364, 10.786986),
"Forskningsparken": (59.943513, 10.720425),
"Frognerseteren": (59.979018, 10.675857),
"Frøen": (59.934167, 10.709167),
"Furuset": (59.941578, 10.897247),
"Gaustad": (59.945625, 10.709814),
"Gjettum": (59.906221, 10.527155),
"Gjønnes": (59.918097, 10.579877),
"Godlia": (59.908523, 10.835352),
"Grorud": (59.961413, 10.881701),
"Grønland": (59.912912, 10.759563),
"Gråkammen": (59.954838, 10.701842),
"Gulleråsen": (59.955526, 10.696521),
"Hasle": (59.925302, 10.794454),
"Haslum": (59.915021, 10.563183),
"Hauger": (59.910957, 10.510713),
"Haugerud": (59.922592, 10.855350),
"Hellerud": (59.910079, 10.829953),
"Helsfyr": (59.911514, 10.803680),
"Holmen": (59.946296, 10.666609),
"Holmenkollen": (59.960489, 10.662446),
"Holstein": (59.960403, 10.740552),
"Hovseter": (59.946328, 10.654694),
"Høyenhall": (59.905769, 10.819860),
"Jar": (59.926592, 10.621762),
"Jernbanetorget": (59.912116, 10.751211),
"Kalbakken": (59.954553, 10.866750),
"Karlsrud": (59.880453, 10.805225),
"Kolsås": (59.914416, 10.501366),
"Kringsjå": (59.963690, 10.734930),
"Lambertseter": (59.873289, 10.810440),
"Lijordet": (59.940901, 10.616559),
"Lillevann": (59.980481, 10.653037),
"Lindeberg": (59.932979, 10.882087),
"Linderud": (59.940976, 10.839214),
"Løren": (59.929972, 10.790806),
"Majorstuen": (59.929904, 10.714931),
"Makrellbekken": (59.941957, 10.673845),
"Manglerud": (59.897957, 10.812435),
"Midtstuen": (59.961299, 10.682911),
"Montebello": (59.936806, 10.670471),
"Mortensrud": (59.849083, 10.828657),
"Munkelia": (59.868914, 10.812500),
"Nationaltheatret": (59.915045, 10.733039),
"Nydalen": (59.948864, 10.765250),
"Oppsal": (59.892866, 10.840201),
"Ringstabekk": (59.916182, 10.593696),
"Ris": (59.948069, 10.705147),
"Risløkka": (59.932355, 10.822713),
"Rommen": (59.962127, 10.908968),
"Romsås": (59.962272, 10.890777),
"Ryen": (59.895807, 10.805617),
"Røa": (59.946791, 10.643874),
"Rødtvet": (59.951416, 10.859535),
"Sinsen": (59.938085, 10.781343),
"Skogen": (59.975246, 10.647415),
"Skullerud": (59.866754, 10.839171),
"Skødalen": (59.961787, 10.690789),
"Skøyenåsen": (59.898866, 10.836516),
"Slemdal": (59.949896, 10.695662),
"Smestad": (59.937315, 10.683609),
"Sognsvann": (59.967127, 10.733943),
"Steinerud": (59.939083, 10.704345),
"Storo": (59.944545, 10.778768),
"Stortinget": (59.913047, 10.741469),
"Stovner": (59.962616, 10.923414),
"Trosterud": (59.927152, 10.864041),
"Tveita": (59.914354, 10.841961),
"Tøyen": (59.915214, 10.774670),
"Tåsen": (59.953270, 10.752439),
"Ullernåsen": (59.930635, 10.654796),
"Ullevål stadion": (59.946629, 10.732226),
"Ulsrud": (59.889970, 10.849428),
"Veitvet": (59.944700, 10.847304),
"Vestli": (59.972324, 10.929337),
"Vettakollen": (59.959913, 10.695705),
"Vinderen": (59.942803, 10.704761),
"Voksenkollen": (59.980076, 10.665193),
"Voksenlia": (59.966937, 10.655082),
"Vollebekk": (59.935865, 10.831039),
"Åsjordet": (59.928764, 10.646889),
"Økern": (59.928592, 10.804152),
"Østerås": (59.939445, 10.608587),
"Østhorn": (59.956944, 10.749779),
}
TRIKK_STOPS_VERIFIED: dict[str, tuple[float, float]] = {
"Majorstuen": (59.929904, 10.714931),
"Bogstadveien": (59.92611, 10.72167),
"Rosenborg": (59.92417, 10.72389),
"Briskeby": (59.92048, 10.71767),
"Riddervolds plass": (59.91896, 10.72026),
"Inkognitogata": (59.91565, 10.72114),
"Nationaltheatret": (59.91504, 10.73304),
"Øvre Slottsgate": (59.9118, 10.7417),
"Dronningens gate": (59.91053, 10.74697),
"Jernbanetorget": (59.912116, 10.751211),
"Storgata": (59.91396, 10.75141),
"Nybrua": (59.91707, 10.75834),
"Stortorvet": (59.91310, 10.74530),
"Bjørvika": (59.90806, 10.75639),
"Schous plass": (59.92081, 10.75932),
"Olaf Ryes plass": (59.9231, 10.7592),
"Birkelunden": (59.9271, 10.7601),
"Biermanns gate": (59.93028, 10.76104),
"Sandaker senter": (59.93889, 10.76861),
"Grefsenveien": (59.94278, 10.77344),
"Storo": (59.944545, 10.778768),
"Disen": (59.94627, 10.78729),
"Glads vei": (59.95235, 10.78533),
"Grefsenplatået": (59.9560, 10.78573),
"Grefsen stadion": (59.96008, 10.78475),
"Kjelsås": (59.96611, 10.78278),
"Vigelandsparken": (59.92457, 10.70815),
"Frogner plass": (59.92255, 10.70491),
"Elisenberg": (59.91944, 10.70861),
"Lille Frogner allé": (59.9180, 10.7120),
"Niels Juels gate": (59.91634, 10.71520),
"Solli": (59.91486, 10.71906),
"Aker Brygge": (59.9110, 10.7299),
"Kontraskjæret": (59.91087, 10.73592),
"Lilleaker": (59.92074, 10.63580),
"Sollerud": (59.92104, 10.64309),
"Furulund": (59.91990, 10.65013),
"Ullern": (59.92429, 10.65858),
"Abbediengen": (59.92517, 10.66716),
"Hoff": (59.92500, 10.67488),
"Skøyen": (59.92384, 10.68034),
"Thune": (59.92186, 10.68742),
"Nobels gate": (59.91758, 10.69866),
"Skarpsno": (59.91430, 10.70234),
"Skillebekk": (59.91277, 10.71103),
"Middelalderparken": (59.90639, 10.76417),
"Oslo Hospital": (59.9032, 10.7674),
"Ekebergparken": (59.8977, 10.7593),
"Jomfrubråten": (59.8883, 10.7706),
"Sportsplassen": (59.8860, 10.7736),
"Holtet": (59.88151, 10.78415),
"Sørli": (59.87493, 10.78709),
"Kastellet": (59.87106, 10.79036),
"Bråten": (59.86714, 10.79244),
"Sæter": (59.86102, 10.79870),
"Ljabru": (59.85335, 10.80089),
"Rikshospitalet": (59.947768, 10.714716),
"Gaustadalleen": (59.9454, 10.7172),
"Forskningsparken": (59.943513, 10.720425),
"Universitetet Blindern": (59.9421, 10.7243),
"John Collets plass": (59.9403, 10.7290),
"Ullevål sykehus": (59.9361, 10.7318),
"Adamstuen": (59.9326, 10.7345),
"Stensgata": (59.92957, 10.73303),
"Bislett": (59.92599, 10.73108),
"Dalsbergstien": (59.92354, 10.73163),
"Welhavens gate": (59.92131, 10.72968),
"Frydenlund": (59.92086, 10.73317),
"Holbergs plass": (59.91876, 10.73453),
"Lakkegata skole": (59.92055, 10.76834),
"Carl Berners plass": (59.926592, 10.778360),
"Sinsenkrysset": (59.93911, 10.78340),
"Grefsen stasjon": (59.94167, 10.78056),
"Homansbyen": (59.92278, 10.72639),
}
TRIKK_STOPS_ESTIMATED: dict[str, tuple[float, float]] = {
"Tinghuset": (59.9146, 10.7403),
"Torshov": (59.9332, 10.7643),
"Doktor Smiths vei": (59.9503, 10.7867),
"Kjelsåsalleen": (59.9641, 10.7833),
"Frogner stadion": (59.9167, 10.7038),
"Ruseløkka": (59.9120, 10.7258),
"Tullinøkka": (59.9163, 10.7349),
"Heimdalsgata": (59.9188, 10.7633),
"Sofienberg": (59.9236, 10.7734),
"Rosenhoff": (59.9307, 10.7800),
"Sinsenterrassen": (59.9350, 10.7817),
}
TRIKK_STOPS: dict[str, tuple[float, float]] = {
**TRIKK_STOPS_ESTIMATED,
**TRIKK_STOPS_VERIFIED,
}
# ---------------------------------------------------------------------------
# Transit helpers
# ---------------------------------------------------------------------------
_WALK_SPEED_KMH = 5.0
def _nearest_stop(
lat: float, lng: float, stops: dict[str, tuple[float, float]]
) -> tuple[str, float]:
best_name, best_dist = "", float("inf")
for name, (slat, slng) in stops.items():
d = _distance_km(lat, lng, slat, slng)
if d < best_dist:
best_dist, best_name = d, name
return best_name, best_dist
def nearby_transit(
lat: float, lng: float, max_walk_min: float = 10.0
) -> dict[str, list[tuple[str, float]]]:
max_km = (max_walk_min / 60.0) * _WALK_SPEED_KMH
tbane = sorted(
[
(n, _distance_km(lat, lng, la, lo))
for n, (la, lo) in TBANE_STOPS.items()
if _distance_km(lat, lng, la, lo) <= max_km
],
key=lambda x: x[1],
)
trikk = sorted(
[
(n, _distance_km(lat, lng, la, lo))
for n, (la, lo) in TRIKK_STOPS.items()
if _distance_km(lat, lng, la, lo) <= max_km
],
key=lambda x: x[1],
)
return {"tbane": tbane, "trikk": trikk}
# ---------------------------------------------------------------------------
# Dimension functions
# ---------------------------------------------------------------------------
def score_neighbourhood(
unit: EiendomUnit | None,
address: str | None = None,
district: str | None = None,
) -> float:
"""Distance to nearest preferred-area anchor. Max 25."""
if unit and unit.lat and unit.lng:
distances = [
(_distance_km(unit.lat, unit.lng, lat, lng), label)
for label, lat, lng in _PREFERRED_ANCHORS
]
min_dist, nearest = min(distances)
logger.debug("Nearest anchor: %s at %.2f km", nearest, min_dist)
if min_dist < 0.5:
return 25.0
if min_dist < 1.0:
return 20.0
if min_dist < 1.5:
return 15.0
if min_dist < 2.5:
return 10.0
if min_dist < 4.0:
return 5.0
return 2.0
haystack = " ".join(filter(None, [address, district])).lower()
if "oslo" in haystack:
return 5.0
return 0.0
def score_transport(unit: EiendomUnit | None) -> float:
"""Walking distance to nearest T-bane or trikk stop. Max 10."""
if unit is None or unit.lat is None or unit.lng is None:
return 0.0
_, tbane_dist = _nearest_stop(unit.lat, unit.lng, TBANE_STOPS)
_, trikk_dist = _nearest_stop(unit.lat, unit.lng, TRIKK_STOPS)
min_dist = min(tbane_dist, trikk_dist)
logger.debug("Nearest T-bane: %.2f km, trikk: %.2f km", tbane_dist, trikk_dist)
if min_dist < 0.4:
return 10.0
if min_dist < 0.8:
return 8.0
if min_dist < 1.2:
return 4.0
return 0.0
def score_view_and_quiet(ad: Any, description: str) -> float:
"""View quality × quiet setting. Max 20. Returns 0 if no balcony."""
if not (ad.has_balcony or ad.has_terrace):
return 0.0
d = description.lower()
view = 0.0
if any(kw in d for kw in ["sjøutsikt", "fjordutsikt", "sjøglimt", "fjordglimt"]):
view = 15.0
elif any(kw in d for kw in ["panorama", "panoramautsikt", "vidt utsyn", "vidstrakt"]):
view = 13.0
elif any(kw in d for kw in ["over hustak", "hustak", "over takene"]):
view = 10.0
elif "utsikt" in d:
view = 7.0
quiet = 0.0
if any(
kw in d
for kw in [
"rolig",
"tilbaketrukket",
"skjermet",
"bakgård",
"gårdsrom",
"stille",
"blindvei",
]
):
quiet += 5.0
if any(kw in d for kw in ["støy", "bilvei", "trafikkert", "støyutsatt"]):
quiet -= 5.0
return float(_clamp(view + quiet, 0.0, 20.0))
def score_area_and_layout(ad: Any, unit: EiendomUnit | None) -> float:
"""Main unit size + bedroom count. Max 15."""
area = (unit.usable_area if unit else None) or ad.area_m2 or 0
if area < 60:
return 0.0
if area < 80:
return 3.0
if area < 90:
area_score = 8.0
elif area < 105:
area_score = 11.0
else:
area_score = 14.0
bedrooms = ad.bedrooms or 0
if bedrooms >= 3:
bedroom_bonus = 1.0
elif bedrooms == 2:
bedroom_bonus = 0.5
else:
bedroom_bonus = 0.0
return float(_clamp(area_score + bedroom_bonus, 0.0, 15.0))
def score_hybel(description: str) -> float:
"""Hybel with own bath + kitchen. Max 12."""
d = description.lower()
if "hybel" not in d and "sokkelleil" not in d and "utleiedel" not in d:
return 0.0
_POTENTIAL = [
"mulighet for hybel",
"mulighet til hybel",
"mulig hybel",
"kan etableres hybel",
"kan bygges om til hybel",
"tilrettelagt for hybel",
"potensial for hybel",
"hybelpotensial",
]
is_potential = any(sig in d for sig in _POTENTIAL)
if not is_potential and "mulighet" in d and "hybel" in d:
for sentence in d.replace("!", ".").replace("?", ".").split("."):
if "mulighet" in sentence and "hybel" in sentence:
is_potential = True
break
if is_potential:
return 2.0
if "leieinntekt" in d or "skattefri" in d:
return 12.0
has_bath = any(
kw in d
for kw in [
"eget bad",
"eget wc",
"eget toalett",
"bad i hybel",
"dusj i hybel",
"eget dusj",
]
)
has_kitch = any(
kw in d for kw in ["eget kjøkken", "kjøkken i hybel", "kjøkkenkrok", "tekjøkken"]
)
if not has_bath:
has_bath = "bad" in d or "dusj" in d
if not has_kitch:
has_kitch = "kjøkken" in d
if has_bath and has_kitch:
return 12.0
if has_bath or has_kitch:
return 7.0
return 4.0
def score_floor(ad: Any, unit: EiendomUnit | None) -> float:
"""Floor penalty. Ground floor (≤1) = -15. All other floors = 0.
Rationale: floor number alone carries no signal without knowing building
height. The only reliable signal is ground floor (innsyn, støy, lys).
"""
floor: int | None = None
if unit is not None and unit.floor is not None:
floor = unit.floor
elif ad.floor is not None:
try:
floor = int(str(ad.floor).strip().rstrip("."))
except (ValueError, TypeError):
floor = None
if floor is None:
return 0.0
if floor <= 1:
return -15.0
return 0.0
def score_building_health(ad: Any, description: str) -> float:
"""Sameie / borettslag economy signals. Max 7."""
score = 0.0
d = description.lower()
if ad.shared_debt == 0:
score += 3.0
elif ad.shared_debt is None:
score += 1.0
fk = ad.common_costs or 0
if fk == 0:
score += 0.0
elif fk <= 3500:
score += 4.0
elif fk <= 5000:
score += 2.0
elif fk <= 7000:
score += 0.0
else:
score -= 2.0
if any(kw in d for kw in ["veldrevet", "solid økonomi", "god økonomi", "ingen fellesgjeld"]):
score += 2.0
return float(_clamp(score, 0.0, 7.0))
def score_green_areas(description: str) -> float:
"""Parks, walking terrain, green surroundings. Max 5."""
d = description.lower()
keywords = ["park", "turområde", "turterreng", "marka", "skog", "grønt", "grønne", "friluft"]
hits = sum(1 for kw in keywords if kw in d)
if hits >= 2:
return 5.0
if hits == 1:
return 2.0
return 0.0
def score_economy(ad: Any, unit: EiendomUnit | None) -> float:
"""Listing price vs Eiendom.no estimated value. Max 8."""
if unit is None or unit.estimated_selling_price is None:
return 0.0
price = ad.total_price or ad.asking_price
if price is None:
return 0.0
ratio = price / unit.estimated_selling_price
if ratio <= 0.92:
return 8.0
if ratio <= 1.00:
return 5.0 + (1.0 - ratio) * 37.5
if ratio <= 1.08:
return 5.0 - (ratio - 1.0) * 37.5
return 1.0
def score_comparable_sales(
listings: list[SimilarUnit],
listing_sqm_price: int | float | None,
) -> float:
"""Listing kr/m² vs median sold kr/m² of comp units. Max 8."""
if not listings or listing_sqm_price is None:
return 0.0
sqm_prices = [u.sqm_price for u in listings if u.sqm_price]
if not sqm_prices:
return 0.0
med = _median(sqm_prices)
ratio = listing_sqm_price / med
return float(_clamp((1.0 - abs(ratio - 1.0)) * 8.0, 0.0, 8.0))
def score_renovation(description: str) -> float:
"""Minor bonus for renovation upside. Max 3."""
d = description.lower()
if any(kw in d for kw in ["renover", "oppusse", "potensial", "moderniser"]):
return 3.0
return 0.0
def score_risk(ad: Any, unit: EiendomUnit | None) -> float:
"""Risk penalties. Returns 0 or negative.
Triggers:
No Eiendom.no data → -8 (can't price-check)
Shared debt > 500k/unit → -12 (hard red flag — total cost misleading)
Shared debt 200-500k/unit → -6 (notable, investigate)
Common costs > 8 000/mnd → -10 (structural sameie problem)
Common costs 6-8 000/mnd → -5
Days on market > 120 → -15 (something is wrong)
Days on market 60-120 → -5 (worth investigating)
"usikker" in description → -5
"""
penalty = 0.0
if unit is None:
penalty -= 8.0
# Shared debt — new: per-unit fellesgjeld signal
shared_debt = getattr(ad, "shared_debt", None)
if shared_debt is not None:
if shared_debt > _SHARED_DEBT_HIGH:
penalty -= 12.0
logger.debug("High shared debt: %d kr → -12", shared_debt)
elif shared_debt > _SHARED_DEBT_MEDIUM:
penalty -= 6.0
logger.debug("Medium shared debt: %d kr → -6", shared_debt)
fk = ad.common_costs or 0
if fk > _COMMON_COST_HIGH:
penalty -= 10.0
elif fk > _COMMON_COST_MEDIUM:
penalty -= 5.0
if unit and unit.days_on_market:
if unit.days_on_market > _DAYS_STALE:
penalty -= 15.0 # was -10
elif unit.days_on_market > _DAYS_SLOW:
penalty -= 5.0
if "usikker" in (ad.listing_description or "").lower():
penalty -= 5.0
return penalty
# ---------------------------------------------------------------------------
# Orchestration
# ---------------------------------------------------------------------------
def score_ad(ad: Any, unit: EiendomUnit | None, similar_units: list[SimilarUnit]) -> dict[str, Any]:
description = ad.listing_description or ""
transit_nearby: dict | None = None
if unit and unit.lat and unit.lng:
transit_nearby = nearby_transit(unit.lat, unit.lng, max_walk_min=10.0)
if transit_nearby["tbane"] or transit_nearby["trikk"]:
logger.debug("Nearby transit: %s", transit_nearby)
raw: dict[str, float] = {
"neighbourhood": score_neighbourhood(unit, ad.address, getattr(ad, "district", None)),
"view_and_quiet": score_view_and_quiet(ad, description),
"area_and_layout": score_area_and_layout(ad, unit),
"hybel": score_hybel(description),
"transport": score_transport(unit),
"economy": score_economy(ad, unit),
"comparable_sales": score_comparable_sales(
similar_units,
unit.listing_sqm_price if unit else None,
),
"building_health": score_building_health(ad, description),
"green_areas": score_green_areas(description),
"renovation": score_renovation(description),
}
penalties: dict[str, float] = {
"floor": score_floor(ad, unit),
"risk": score_risk(ad, unit),
}
weighted_bonus = sum(
(raw[d] / DIMENSION_MAX[d]) * DIMENSION_WEIGHTS[d] * 100.0 for d in DIMENSION_WEIGHTS
)
total_penalty = sum(penalties.values())
total = float(_clamp(weighted_bonus + total_penalty, 0.0, 100.0))
scores: dict[str, Any] = {**raw, **penalties, "total": total}
if transit_nearby is not None:
scores["nearby_transit"] = transit_nearby
return scores
def classify_ad(scores: dict[str, Any]) -> list[str]:
categories: list[str] = []
total = scores.get("total", 0.0)
if total >= 75:
categories.append("top_match")
if total >= 60:
categories.append("strong_candidate")
if 45 <= total < 60:
categories.append("worth_viewing")
if total < 30:
categories.append("not_interesting")
if 30 <= total < 45:
categories.append("manual_review_required")
if scores.get("hybel", 0.0) >= 7:
categories.append("has_hybel")
if scores.get("view_and_quiet", 0.0) >= 13:
categories.append("premium_view")
if scores.get("neighbourhood", 0.0) == 25:
categories.append("preferred_neighbourhood")
if scores.get("renovation", 0.0) > 0:
categories.append("renovation_candidate")
if scores.get("floor", 0.0) < 0:
categories.append("ground_floor")
if scores.get("risk", 0.0) < -5:
categories.append("risk_object")
if scores.get("area_and_layout", 0.0) <= 3:
categories.append("too_small")
return categories