46fd22c277
- Updated Dockerfile to include FINN_CACHE_PATH and create data directory. - Modified docker-compose.prod.yml to expose port 8010 and adjust resource limits. - Updated docker-compose.yml to include FINN_CACHE_PATH and ensure proper port mapping. - Added health check endpoint in http_server.py for container orchestration. - Improved caching logic in analysis.py and service.py for similar units. - Refined scoring.py with updated scoring model and constants for better accuracy. Co-authored-by: Copilot <copilot@github.com>
779 lines
26 KiB
Python
779 lines
26 KiB
Python
"""Scoring engine tuned for Ole & partner's apartment search criteria.
|
||
|
||
Priority hierarchy (stated):
|
||
MUST : balcony, ≥80 m² main unit, 2-3 bedrooms, T-bane/trikk access
|
||
HIGH : preferred neighbourhoods, view (sea/panorama > rooftop > general),
|
||
quiet setting, hybel with own bath + kitchen
|
||
MEDIUM : sameie economy, green areas / walking terrain, price vs market
|
||
BONUS : renovation upside (acceptable, not required)
|
||
|
||
Scoring model — explicit weights (sum = 1.0):
|
||
Each dimension function returns a raw score in [0, DIMENSION_MAX[d]].
|
||
score_ad normalises each to [0, 1] × weight × 100 → weighted bonus 0..100.
|
||
Penalties (floor, risk) are absolute deductions applied after weighting.
|
||
Final total = clamp(weighted_bonus + penalties, 0, 100).
|
||
|
||
Dimension Weight Max pts
|
||
─────────────────────────────────
|
||
transport 24 % 11
|
||
view_and_quiet 21 % 20
|
||
neighbourhood 17 % 25
|
||
hybel 14 % 12
|
||
area_and_layout 10 % 15
|
||
economy 6 % 8
|
||
comparable_sales 4 % 8
|
||
building_health 2 % 7
|
||
green_areas 1 % 5
|
||
renovation 1 % 3
|
||
─────────────────────────────────
|
||
bonus total 100 % 100
|
||
floor penalty 0..-15 (ground floor only)
|
||
risk penalty 0..-30
|
||
"""
|
||
|
||
import logging
|
||
import math
|
||
from typing import Any
|
||
|
||
from .models import EiendomUnit, SimilarUnit
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Scoring constants — explicit weights and per-dimension raw maxima
|
||
# ---------------------------------------------------------------------------
|
||
|
||
DIMENSION_WEIGHTS: dict[str, float] = {
|
||
"transport": 0.24, # was 0.11 — MUST-have, now primary signal
|
||
"view_and_quiet": 0.21, # was 0.17 — key quality-of-life differentiator
|
||
"neighbourhood": 0.17, # was 0.22 — still important, no longer dominant
|
||
"hybel": 0.14, # was 0.12 — rental income / flexibility
|
||
"area_and_layout": 0.10, # was 0.16 — baseline met by search filters
|
||
"economy": 0.06, # was 0.08
|
||
"comparable_sales": 0.04, # was 0.06
|
||
"building_health": 0.02, # was 0.04
|
||
"green_areas": 0.01, # was 0.03
|
||
"renovation": 0.01, # unchanged
|
||
}
|
||
|
||
DIMENSION_MAX: dict[str, float] = {
|
||
"transport": 10.0,
|
||
"view_and_quiet": 20.0,
|
||
"neighbourhood": 25.0,
|
||
"hybel": 12.0,
|
||
"area_and_layout": 15.0,
|
||
"economy": 8.0,
|
||
"comparable_sales": 8.0,
|
||
"building_health": 7.0,
|
||
"green_areas": 5.0,
|
||
"renovation": 3.0,
|
||
}
|
||
|
||
assert abs(sum(DIMENSION_WEIGHTS.values()) - 1.0) < 1e-9, "Weights must sum to 1.0"
|
||
assert DIMENSION_WEIGHTS.keys() == DIMENSION_MAX.keys(), "Weight/max key mismatch"
|
||
|
||
# Risk penalty thresholds
|
||
_SHARED_DEBT_HIGH = 500_000 # per unit — hard red flag
|
||
_SHARED_DEBT_MEDIUM = 200_000 # per unit — notable
|
||
_COMMON_COST_HIGH = 8_000 # kr/mnd
|
||
_COMMON_COST_MEDIUM = 6_000 # kr/mnd
|
||
_DAYS_STALE = 120 # days on market → something is wrong
|
||
_DAYS_SLOW = 60 # days on market → worth investigating
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Geometry helpers
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
def _distance_km(lat1: float, lng1: float, lat2: float, lng2: float) -> float:
|
||
"""Flat-earth approximation — accurate enough within Oslo (~59.9°N)."""
|
||
dlat = (lat2 - lat1) * 111.0
|
||
dlng = (lng2 - lng1) * 56.0
|
||
return math.sqrt(dlat**2 + dlng**2)
|
||
|
||
|
||
def _clamp(value: float, lo: float, hi: float) -> float:
|
||
return max(lo, min(hi, value))
|
||
|
||
|
||
def _median(values: list[float]) -> float:
|
||
s = sorted(values)
|
||
mid = len(s) // 2
|
||
return s[mid] if len(s) % 2 else (s[mid - 1] + s[mid]) / 2.0
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Preferred neighbourhood anchors
|
||
# ---------------------------------------------------------------------------
|
||
|
||
_PREFERRED_ANCHORS: list[tuple[str, float, float]] = [
|
||
("Grünerløkka", 59.9240, 10.7573),
|
||
("Torshov", 59.9340, 10.7620),
|
||
("Rodeløkka", 59.9315, 10.7660),
|
||
("Kampen", 59.9125, 10.7760),
|
||
("Sagene", 59.9400, 10.7590),
|
||
("Nydalen", 59.9520, 10.7540),
|
||
("Storo", 59.9450, 10.7670),
|
||
("Grefsen", 59.9580, 10.7720),
|
||
("Fagerborg", 59.9280, 10.7300),
|
||
("St. Hans Haugen", 59.9300, 10.7400),
|
||
("Ullevål", 59.9400, 10.7270),
|
||
("Majorstua", 59.9210, 10.7170),
|
||
("Frogner", 59.9160, 10.7150),
|
||
("Løren", 59.9310, 10.7960),
|
||
("Torshovdalen", 59.9295, 10.7630),
|
||
("Rosenhoff", 59.9255, 10.7775),
|
||
]
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Transit network
|
||
# ---------------------------------------------------------------------------
|
||
|
||
TBANE_STOPS: dict[str, tuple[float, float]] = {
|
||
"Ammerud": (59.957922, 10.871165),
|
||
"Avløs": (59.913859, 10.552926),
|
||
"Bekkestua": (59.918097, 10.588031),
|
||
"Berg": (59.951142, 10.744801),
|
||
"Bergkrystallen": (59.867091, 10.821206),
|
||
"Besserud": (59.957760, 10.673092),
|
||
"Bjørnsletta": (59.926902, 10.635458),
|
||
"Blindern": (59.940052, 10.716262),
|
||
"Bogerud": (59.875833, 10.841944),
|
||
"Borgen": (59.934548, 10.696000),
|
||
"Brattlikollen": (59.888076, 10.801191),
|
||
"Brynseng": (59.909169, 10.811834),
|
||
"Bøler": (59.884167, 10.845278),
|
||
"Carl Berners plass": (59.926592, 10.778360),
|
||
"Eiksmarka": (59.946431, 10.622320),
|
||
"Ekraveien": (59.950836, 10.635822),
|
||
"Ellingsrudåsen": (59.936311, 10.916634),
|
||
"Ensjø": (59.913364, 10.786986),
|
||
"Forskningsparken": (59.943513, 10.720425),
|
||
"Frognerseteren": (59.979018, 10.675857),
|
||
"Frøen": (59.934167, 10.709167),
|
||
"Furuset": (59.941578, 10.897247),
|
||
"Gaustad": (59.945625, 10.709814),
|
||
"Gjettum": (59.906221, 10.527155),
|
||
"Gjønnes": (59.918097, 10.579877),
|
||
"Godlia": (59.908523, 10.835352),
|
||
"Grorud": (59.961413, 10.881701),
|
||
"Grønland": (59.912912, 10.759563),
|
||
"Gråkammen": (59.954838, 10.701842),
|
||
"Gulleråsen": (59.955526, 10.696521),
|
||
"Hasle": (59.925302, 10.794454),
|
||
"Haslum": (59.915021, 10.563183),
|
||
"Hauger": (59.910957, 10.510713),
|
||
"Haugerud": (59.922592, 10.855350),
|
||
"Hellerud": (59.910079, 10.829953),
|
||
"Helsfyr": (59.911514, 10.803680),
|
||
"Holmen": (59.946296, 10.666609),
|
||
"Holmenkollen": (59.960489, 10.662446),
|
||
"Holstein": (59.960403, 10.740552),
|
||
"Hovseter": (59.946328, 10.654694),
|
||
"Høyenhall": (59.905769, 10.819860),
|
||
"Jar": (59.926592, 10.621762),
|
||
"Jernbanetorget": (59.912116, 10.751211),
|
||
"Kalbakken": (59.954553, 10.866750),
|
||
"Karlsrud": (59.880453, 10.805225),
|
||
"Kolsås": (59.914416, 10.501366),
|
||
"Kringsjå": (59.963690, 10.734930),
|
||
"Lambertseter": (59.873289, 10.810440),
|
||
"Lijordet": (59.940901, 10.616559),
|
||
"Lillevann": (59.980481, 10.653037),
|
||
"Lindeberg": (59.932979, 10.882087),
|
||
"Linderud": (59.940976, 10.839214),
|
||
"Løren": (59.929972, 10.790806),
|
||
"Majorstuen": (59.929904, 10.714931),
|
||
"Makrellbekken": (59.941957, 10.673845),
|
||
"Manglerud": (59.897957, 10.812435),
|
||
"Midtstuen": (59.961299, 10.682911),
|
||
"Montebello": (59.936806, 10.670471),
|
||
"Mortensrud": (59.849083, 10.828657),
|
||
"Munkelia": (59.868914, 10.812500),
|
||
"Nationaltheatret": (59.915045, 10.733039),
|
||
"Nydalen": (59.948864, 10.765250),
|
||
"Oppsal": (59.892866, 10.840201),
|
||
"Ringstabekk": (59.916182, 10.593696),
|
||
"Ris": (59.948069, 10.705147),
|
||
"Risløkka": (59.932355, 10.822713),
|
||
"Rommen": (59.962127, 10.908968),
|
||
"Romsås": (59.962272, 10.890777),
|
||
"Ryen": (59.895807, 10.805617),
|
||
"Røa": (59.946791, 10.643874),
|
||
"Rødtvet": (59.951416, 10.859535),
|
||
"Sinsen": (59.938085, 10.781343),
|
||
"Skogen": (59.975246, 10.647415),
|
||
"Skullerud": (59.866754, 10.839171),
|
||
"Skødalen": (59.961787, 10.690789),
|
||
"Skøyenåsen": (59.898866, 10.836516),
|
||
"Slemdal": (59.949896, 10.695662),
|
||
"Smestad": (59.937315, 10.683609),
|
||
"Sognsvann": (59.967127, 10.733943),
|
||
"Steinerud": (59.939083, 10.704345),
|
||
"Storo": (59.944545, 10.778768),
|
||
"Stortinget": (59.913047, 10.741469),
|
||
"Stovner": (59.962616, 10.923414),
|
||
"Trosterud": (59.927152, 10.864041),
|
||
"Tveita": (59.914354, 10.841961),
|
||
"Tøyen": (59.915214, 10.774670),
|
||
"Tåsen": (59.953270, 10.752439),
|
||
"Ullernåsen": (59.930635, 10.654796),
|
||
"Ullevål stadion": (59.946629, 10.732226),
|
||
"Ulsrud": (59.889970, 10.849428),
|
||
"Veitvet": (59.944700, 10.847304),
|
||
"Vestli": (59.972324, 10.929337),
|
||
"Vettakollen": (59.959913, 10.695705),
|
||
"Vinderen": (59.942803, 10.704761),
|
||
"Voksenkollen": (59.980076, 10.665193),
|
||
"Voksenlia": (59.966937, 10.655082),
|
||
"Vollebekk": (59.935865, 10.831039),
|
||
"Åsjordet": (59.928764, 10.646889),
|
||
"Økern": (59.928592, 10.804152),
|
||
"Østerås": (59.939445, 10.608587),
|
||
"Østhorn": (59.956944, 10.749779),
|
||
}
|
||
|
||
TRIKK_STOPS_VERIFIED: dict[str, tuple[float, float]] = {
|
||
"Majorstuen": (59.929904, 10.714931),
|
||
"Bogstadveien": (59.92611, 10.72167),
|
||
"Rosenborg": (59.92417, 10.72389),
|
||
"Briskeby": (59.92048, 10.71767),
|
||
"Riddervolds plass": (59.91896, 10.72026),
|
||
"Inkognitogata": (59.91565, 10.72114),
|
||
"Nationaltheatret": (59.91504, 10.73304),
|
||
"Øvre Slottsgate": (59.9118, 10.7417),
|
||
"Dronningens gate": (59.91053, 10.74697),
|
||
"Jernbanetorget": (59.912116, 10.751211),
|
||
"Storgata": (59.91396, 10.75141),
|
||
"Nybrua": (59.91707, 10.75834),
|
||
"Stortorvet": (59.91310, 10.74530),
|
||
"Bjørvika": (59.90806, 10.75639),
|
||
"Schous plass": (59.92081, 10.75932),
|
||
"Olaf Ryes plass": (59.9231, 10.7592),
|
||
"Birkelunden": (59.9271, 10.7601),
|
||
"Biermanns gate": (59.93028, 10.76104),
|
||
"Sandaker senter": (59.93889, 10.76861),
|
||
"Grefsenveien": (59.94278, 10.77344),
|
||
"Storo": (59.944545, 10.778768),
|
||
"Disen": (59.94627, 10.78729),
|
||
"Glads vei": (59.95235, 10.78533),
|
||
"Grefsenplatået": (59.9560, 10.78573),
|
||
"Grefsen stadion": (59.96008, 10.78475),
|
||
"Kjelsås": (59.96611, 10.78278),
|
||
"Vigelandsparken": (59.92457, 10.70815),
|
||
"Frogner plass": (59.92255, 10.70491),
|
||
"Elisenberg": (59.91944, 10.70861),
|
||
"Lille Frogner allé": (59.9180, 10.7120),
|
||
"Niels Juels gate": (59.91634, 10.71520),
|
||
"Solli": (59.91486, 10.71906),
|
||
"Aker Brygge": (59.9110, 10.7299),
|
||
"Kontraskjæret": (59.91087, 10.73592),
|
||
"Lilleaker": (59.92074, 10.63580),
|
||
"Sollerud": (59.92104, 10.64309),
|
||
"Furulund": (59.91990, 10.65013),
|
||
"Ullern": (59.92429, 10.65858),
|
||
"Abbediengen": (59.92517, 10.66716),
|
||
"Hoff": (59.92500, 10.67488),
|
||
"Skøyen": (59.92384, 10.68034),
|
||
"Thune": (59.92186, 10.68742),
|
||
"Nobels gate": (59.91758, 10.69866),
|
||
"Skarpsno": (59.91430, 10.70234),
|
||
"Skillebekk": (59.91277, 10.71103),
|
||
"Middelalderparken": (59.90639, 10.76417),
|
||
"Oslo Hospital": (59.9032, 10.7674),
|
||
"Ekebergparken": (59.8977, 10.7593),
|
||
"Jomfrubråten": (59.8883, 10.7706),
|
||
"Sportsplassen": (59.8860, 10.7736),
|
||
"Holtet": (59.88151, 10.78415),
|
||
"Sørli": (59.87493, 10.78709),
|
||
"Kastellet": (59.87106, 10.79036),
|
||
"Bråten": (59.86714, 10.79244),
|
||
"Sæter": (59.86102, 10.79870),
|
||
"Ljabru": (59.85335, 10.80089),
|
||
"Rikshospitalet": (59.947768, 10.714716),
|
||
"Gaustadalleen": (59.9454, 10.7172),
|
||
"Forskningsparken": (59.943513, 10.720425),
|
||
"Universitetet Blindern": (59.9421, 10.7243),
|
||
"John Collets plass": (59.9403, 10.7290),
|
||
"Ullevål sykehus": (59.9361, 10.7318),
|
||
"Adamstuen": (59.9326, 10.7345),
|
||
"Stensgata": (59.92957, 10.73303),
|
||
"Bislett": (59.92599, 10.73108),
|
||
"Dalsbergstien": (59.92354, 10.73163),
|
||
"Welhavens gate": (59.92131, 10.72968),
|
||
"Frydenlund": (59.92086, 10.73317),
|
||
"Holbergs plass": (59.91876, 10.73453),
|
||
"Lakkegata skole": (59.92055, 10.76834),
|
||
"Carl Berners plass": (59.926592, 10.778360),
|
||
"Sinsenkrysset": (59.93911, 10.78340),
|
||
"Grefsen stasjon": (59.94167, 10.78056),
|
||
"Homansbyen": (59.92278, 10.72639),
|
||
}
|
||
|
||
TRIKK_STOPS_ESTIMATED: dict[str, tuple[float, float]] = {
|
||
"Tinghuset": (59.9146, 10.7403),
|
||
"Torshov": (59.9332, 10.7643),
|
||
"Doktor Smiths vei": (59.9503, 10.7867),
|
||
"Kjelsåsalleen": (59.9641, 10.7833),
|
||
"Frogner stadion": (59.9167, 10.7038),
|
||
"Ruseløkka": (59.9120, 10.7258),
|
||
"Tullinøkka": (59.9163, 10.7349),
|
||
"Heimdalsgata": (59.9188, 10.7633),
|
||
"Sofienberg": (59.9236, 10.7734),
|
||
"Rosenhoff": (59.9307, 10.7800),
|
||
"Sinsenterrassen": (59.9350, 10.7817),
|
||
}
|
||
|
||
TRIKK_STOPS: dict[str, tuple[float, float]] = {
|
||
**TRIKK_STOPS_ESTIMATED,
|
||
**TRIKK_STOPS_VERIFIED,
|
||
}
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Transit helpers
|
||
# ---------------------------------------------------------------------------
|
||
|
||
_WALK_SPEED_KMH = 5.0
|
||
|
||
|
||
def _nearest_stop(
|
||
lat: float, lng: float, stops: dict[str, tuple[float, float]]
|
||
) -> tuple[str, float]:
|
||
best_name, best_dist = "", float("inf")
|
||
for name, (slat, slng) in stops.items():
|
||
d = _distance_km(lat, lng, slat, slng)
|
||
if d < best_dist:
|
||
best_dist, best_name = d, name
|
||
return best_name, best_dist
|
||
|
||
|
||
def nearby_transit(
|
||
lat: float, lng: float, max_walk_min: float = 10.0
|
||
) -> dict[str, list[tuple[str, float]]]:
|
||
max_km = (max_walk_min / 60.0) * _WALK_SPEED_KMH
|
||
tbane = sorted(
|
||
[
|
||
(n, _distance_km(lat, lng, la, lo))
|
||
for n, (la, lo) in TBANE_STOPS.items()
|
||
if _distance_km(lat, lng, la, lo) <= max_km
|
||
],
|
||
key=lambda x: x[1],
|
||
)
|
||
trikk = sorted(
|
||
[
|
||
(n, _distance_km(lat, lng, la, lo))
|
||
for n, (la, lo) in TRIKK_STOPS.items()
|
||
if _distance_km(lat, lng, la, lo) <= max_km
|
||
],
|
||
key=lambda x: x[1],
|
||
)
|
||
return {"tbane": tbane, "trikk": trikk}
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Dimension functions
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
def score_neighbourhood(
|
||
unit: EiendomUnit | None,
|
||
address: str | None = None,
|
||
district: str | None = None,
|
||
) -> float:
|
||
"""Distance to nearest preferred-area anchor. Max 25."""
|
||
if unit and unit.lat and unit.lng:
|
||
distances = [
|
||
(_distance_km(unit.lat, unit.lng, lat, lng), label)
|
||
for label, lat, lng in _PREFERRED_ANCHORS
|
||
]
|
||
min_dist, nearest = min(distances)
|
||
logger.debug("Nearest anchor: %s at %.2f km", nearest, min_dist)
|
||
if min_dist < 0.5:
|
||
return 25.0
|
||
if min_dist < 1.0:
|
||
return 20.0
|
||
if min_dist < 1.5:
|
||
return 15.0
|
||
if min_dist < 2.5:
|
||
return 10.0
|
||
if min_dist < 4.0:
|
||
return 5.0
|
||
return 2.0
|
||
|
||
haystack = " ".join(filter(None, [address, district])).lower()
|
||
if "oslo" in haystack:
|
||
return 5.0
|
||
return 0.0
|
||
|
||
|
||
def score_transport(unit: EiendomUnit | None) -> float:
|
||
"""Walking distance to nearest T-bane or trikk stop. Max 10."""
|
||
if unit is None or unit.lat is None or unit.lng is None:
|
||
return 0.0
|
||
|
||
_, tbane_dist = _nearest_stop(unit.lat, unit.lng, TBANE_STOPS)
|
||
_, trikk_dist = _nearest_stop(unit.lat, unit.lng, TRIKK_STOPS)
|
||
min_dist = min(tbane_dist, trikk_dist)
|
||
|
||
logger.debug("Nearest T-bane: %.2f km, trikk: %.2f km", tbane_dist, trikk_dist)
|
||
|
||
if min_dist < 0.4:
|
||
return 10.0
|
||
if min_dist < 0.8:
|
||
return 8.0
|
||
if min_dist < 1.2:
|
||
return 4.0
|
||
return 0.0
|
||
|
||
|
||
def score_view_and_quiet(ad: Any, description: str) -> float:
|
||
"""View quality × quiet setting. Max 20. Returns 0 if no balcony."""
|
||
if not (ad.has_balcony or ad.has_terrace):
|
||
return 0.0
|
||
|
||
d = description.lower()
|
||
|
||
view = 0.0
|
||
if any(kw in d for kw in ["sjøutsikt", "fjordutsikt", "sjøglimt", "fjordglimt"]):
|
||
view = 15.0
|
||
elif any(kw in d for kw in ["panorama", "panoramautsikt", "vidt utsyn", "vidstrakt"]):
|
||
view = 13.0
|
||
elif any(kw in d for kw in ["over hustak", "hustak", "over takene"]):
|
||
view = 10.0
|
||
elif "utsikt" in d:
|
||
view = 7.0
|
||
|
||
quiet = 0.0
|
||
if any(
|
||
kw in d
|
||
for kw in [
|
||
"rolig",
|
||
"tilbaketrukket",
|
||
"skjermet",
|
||
"bakgård",
|
||
"gårdsrom",
|
||
"stille",
|
||
"blindvei",
|
||
]
|
||
):
|
||
quiet += 5.0
|
||
if any(kw in d for kw in ["støy", "bilvei", "trafikkert", "støyutsatt"]):
|
||
quiet -= 5.0
|
||
|
||
return float(_clamp(view + quiet, 0.0, 20.0))
|
||
|
||
|
||
def score_area_and_layout(ad: Any, unit: EiendomUnit | None) -> float:
|
||
"""Main unit size + bedroom count. Max 15."""
|
||
area = (unit.usable_area if unit else None) or ad.area_m2 or 0
|
||
|
||
if area < 60:
|
||
return 0.0
|
||
if area < 80:
|
||
return 3.0
|
||
if area < 90:
|
||
area_score = 8.0
|
||
elif area < 105:
|
||
area_score = 11.0
|
||
else:
|
||
area_score = 14.0
|
||
|
||
bedrooms = ad.bedrooms or 0
|
||
if bedrooms >= 3:
|
||
bedroom_bonus = 1.0
|
||
elif bedrooms == 2:
|
||
bedroom_bonus = 0.5
|
||
else:
|
||
bedroom_bonus = 0.0
|
||
|
||
return float(_clamp(area_score + bedroom_bonus, 0.0, 15.0))
|
||
|
||
|
||
def score_hybel(description: str) -> float:
|
||
"""Hybel with own bath + kitchen. Max 12."""
|
||
d = description.lower()
|
||
|
||
if "hybel" not in d and "sokkelleil" not in d and "utleiedel" not in d:
|
||
return 0.0
|
||
|
||
_POTENTIAL = [
|
||
"mulighet for hybel",
|
||
"mulighet til hybel",
|
||
"mulig hybel",
|
||
"kan etableres hybel",
|
||
"kan bygges om til hybel",
|
||
"tilrettelagt for hybel",
|
||
"potensial for hybel",
|
||
"hybelpotensial",
|
||
]
|
||
is_potential = any(sig in d for sig in _POTENTIAL)
|
||
if not is_potential and "mulighet" in d and "hybel" in d:
|
||
for sentence in d.replace("!", ".").replace("?", ".").split("."):
|
||
if "mulighet" in sentence and "hybel" in sentence:
|
||
is_potential = True
|
||
break
|
||
|
||
if is_potential:
|
||
return 2.0
|
||
|
||
if "leieinntekt" in d or "skattefri" in d:
|
||
return 12.0
|
||
|
||
has_bath = any(
|
||
kw in d
|
||
for kw in [
|
||
"eget bad",
|
||
"eget wc",
|
||
"eget toalett",
|
||
"bad i hybel",
|
||
"dusj i hybel",
|
||
"eget dusj",
|
||
]
|
||
)
|
||
has_kitch = any(
|
||
kw in d for kw in ["eget kjøkken", "kjøkken i hybel", "kjøkkenkrok", "tekjøkken"]
|
||
)
|
||
if not has_bath:
|
||
has_bath = "bad" in d or "dusj" in d
|
||
if not has_kitch:
|
||
has_kitch = "kjøkken" in d
|
||
|
||
if has_bath and has_kitch:
|
||
return 12.0
|
||
if has_bath or has_kitch:
|
||
return 7.0
|
||
return 4.0
|
||
|
||
|
||
def score_floor(ad: Any, unit: EiendomUnit | None) -> float:
|
||
"""Floor penalty. Ground floor (≤1) = -15. All other floors = 0.
|
||
|
||
Rationale: floor number alone carries no signal without knowing building
|
||
height. The only reliable signal is ground floor (innsyn, støy, lys).
|
||
"""
|
||
floor: int | None = None
|
||
|
||
if unit is not None and unit.floor is not None:
|
||
floor = unit.floor
|
||
elif ad.floor is not None:
|
||
try:
|
||
floor = int(str(ad.floor).strip().rstrip("."))
|
||
except (ValueError, TypeError):
|
||
floor = None
|
||
|
||
if floor is None:
|
||
return 0.0
|
||
if floor <= 1:
|
||
return -15.0
|
||
return 0.0
|
||
|
||
|
||
def score_building_health(ad: Any, description: str) -> float:
|
||
"""Sameie / borettslag economy signals. Max 7."""
|
||
score = 0.0
|
||
d = description.lower()
|
||
|
||
if ad.shared_debt == 0:
|
||
score += 3.0
|
||
elif ad.shared_debt is None:
|
||
score += 1.0
|
||
|
||
fk = ad.common_costs or 0
|
||
if fk == 0:
|
||
score += 0.0
|
||
elif fk <= 3500:
|
||
score += 4.0
|
||
elif fk <= 5000:
|
||
score += 2.0
|
||
elif fk <= 7000:
|
||
score += 0.0
|
||
else:
|
||
score -= 2.0
|
||
|
||
if any(kw in d for kw in ["veldrevet", "solid økonomi", "god økonomi", "ingen fellesgjeld"]):
|
||
score += 2.0
|
||
|
||
return float(_clamp(score, 0.0, 7.0))
|
||
|
||
|
||
def score_green_areas(description: str) -> float:
|
||
"""Parks, walking terrain, green surroundings. Max 5."""
|
||
d = description.lower()
|
||
keywords = ["park", "turområde", "turterreng", "marka", "skog", "grønt", "grønne", "friluft"]
|
||
hits = sum(1 for kw in keywords if kw in d)
|
||
if hits >= 2:
|
||
return 5.0
|
||
if hits == 1:
|
||
return 2.0
|
||
return 0.0
|
||
|
||
|
||
def score_economy(ad: Any, unit: EiendomUnit | None) -> float:
|
||
"""Listing price vs Eiendom.no estimated value. Max 8."""
|
||
if unit is None or unit.estimated_selling_price is None:
|
||
return 0.0
|
||
price = ad.total_price or ad.asking_price
|
||
if price is None:
|
||
return 0.0
|
||
ratio = price / unit.estimated_selling_price
|
||
if ratio <= 0.92:
|
||
return 8.0
|
||
if ratio <= 1.00:
|
||
return 5.0 + (1.0 - ratio) * 37.5
|
||
if ratio <= 1.08:
|
||
return 5.0 - (ratio - 1.0) * 37.5
|
||
return 1.0
|
||
|
||
|
||
def score_comparable_sales(
|
||
listings: list[SimilarUnit],
|
||
listing_sqm_price: int | float | None,
|
||
) -> float:
|
||
"""Listing kr/m² vs median sold kr/m² of comp units. Max 8."""
|
||
if not listings or listing_sqm_price is None:
|
||
return 0.0
|
||
sqm_prices = [u.sqm_price for u in listings if u.sqm_price]
|
||
if not sqm_prices:
|
||
return 0.0
|
||
med = _median(sqm_prices)
|
||
ratio = listing_sqm_price / med
|
||
return float(_clamp((1.0 - abs(ratio - 1.0)) * 8.0, 0.0, 8.0))
|
||
|
||
|
||
def score_renovation(description: str) -> float:
|
||
"""Minor bonus for renovation upside. Max 3."""
|
||
d = description.lower()
|
||
if any(kw in d for kw in ["renover", "oppusse", "potensial", "moderniser"]):
|
||
return 3.0
|
||
return 0.0
|
||
|
||
|
||
def score_risk(ad: Any, unit: EiendomUnit | None) -> float:
|
||
"""Risk penalties. Returns 0 or negative.
|
||
|
||
Triggers:
|
||
No Eiendom.no data → -8 (can't price-check)
|
||
Shared debt > 500k/unit → -12 (hard red flag — total cost misleading)
|
||
Shared debt 200-500k/unit → -6 (notable, investigate)
|
||
Common costs > 8 000/mnd → -10 (structural sameie problem)
|
||
Common costs 6-8 000/mnd → -5
|
||
Days on market > 120 → -15 (something is wrong)
|
||
Days on market 60-120 → -5 (worth investigating)
|
||
"usikker" in description → -5
|
||
"""
|
||
penalty = 0.0
|
||
|
||
if unit is None:
|
||
penalty -= 8.0
|
||
|
||
# Shared debt — new: per-unit fellesgjeld signal
|
||
shared_debt = getattr(ad, "shared_debt", None)
|
||
if shared_debt is not None:
|
||
if shared_debt > _SHARED_DEBT_HIGH:
|
||
penalty -= 12.0
|
||
logger.debug("High shared debt: %d kr → -12", shared_debt)
|
||
elif shared_debt > _SHARED_DEBT_MEDIUM:
|
||
penalty -= 6.0
|
||
logger.debug("Medium shared debt: %d kr → -6", shared_debt)
|
||
|
||
fk = ad.common_costs or 0
|
||
if fk > _COMMON_COST_HIGH:
|
||
penalty -= 10.0
|
||
elif fk > _COMMON_COST_MEDIUM:
|
||
penalty -= 5.0
|
||
|
||
if unit and unit.days_on_market:
|
||
if unit.days_on_market > _DAYS_STALE:
|
||
penalty -= 15.0 # was -10
|
||
elif unit.days_on_market > _DAYS_SLOW:
|
||
penalty -= 5.0
|
||
|
||
if "usikker" in (ad.listing_description or "").lower():
|
||
penalty -= 5.0
|
||
|
||
return penalty
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Orchestration
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
def score_ad(ad: Any, unit: EiendomUnit | None, similar_units: list[SimilarUnit]) -> dict[str, Any]:
|
||
description = ad.listing_description or ""
|
||
|
||
transit_nearby: dict | None = None
|
||
if unit and unit.lat and unit.lng:
|
||
transit_nearby = nearby_transit(unit.lat, unit.lng, max_walk_min=10.0)
|
||
if transit_nearby["tbane"] or transit_nearby["trikk"]:
|
||
logger.debug("Nearby transit: %s", transit_nearby)
|
||
|
||
raw: dict[str, float] = {
|
||
"neighbourhood": score_neighbourhood(unit, ad.address, getattr(ad, "district", None)),
|
||
"view_and_quiet": score_view_and_quiet(ad, description),
|
||
"area_and_layout": score_area_and_layout(ad, unit),
|
||
"hybel": score_hybel(description),
|
||
"transport": score_transport(unit),
|
||
"economy": score_economy(ad, unit),
|
||
"comparable_sales": score_comparable_sales(
|
||
similar_units,
|
||
unit.listing_sqm_price if unit else None,
|
||
),
|
||
"building_health": score_building_health(ad, description),
|
||
"green_areas": score_green_areas(description),
|
||
"renovation": score_renovation(description),
|
||
}
|
||
|
||
penalties: dict[str, float] = {
|
||
"floor": score_floor(ad, unit),
|
||
"risk": score_risk(ad, unit),
|
||
}
|
||
|
||
weighted_bonus = sum(
|
||
(raw[d] / DIMENSION_MAX[d]) * DIMENSION_WEIGHTS[d] * 100.0 for d in DIMENSION_WEIGHTS
|
||
)
|
||
|
||
total_penalty = sum(penalties.values())
|
||
total = float(_clamp(weighted_bonus + total_penalty, 0.0, 100.0))
|
||
|
||
scores: dict[str, Any] = {**raw, **penalties, "total": total}
|
||
|
||
if transit_nearby is not None:
|
||
scores["nearby_transit"] = transit_nearby
|
||
|
||
return scores
|
||
|
||
|
||
def classify_ad(scores: dict[str, Any]) -> list[str]:
|
||
categories: list[str] = []
|
||
total = scores.get("total", 0.0)
|
||
|
||
if total >= 75:
|
||
categories.append("top_match")
|
||
if total >= 60:
|
||
categories.append("strong_candidate")
|
||
if 45 <= total < 60:
|
||
categories.append("worth_viewing")
|
||
if total < 30:
|
||
categories.append("not_interesting")
|
||
if 30 <= total < 45:
|
||
categories.append("manual_review_required")
|
||
|
||
if scores.get("hybel", 0.0) >= 7:
|
||
categories.append("has_hybel")
|
||
if scores.get("view_and_quiet", 0.0) >= 13:
|
||
categories.append("premium_view")
|
||
if scores.get("neighbourhood", 0.0) == 25:
|
||
categories.append("preferred_neighbourhood")
|
||
if scores.get("renovation", 0.0) > 0:
|
||
categories.append("renovation_candidate")
|
||
if scores.get("floor", 0.0) < 0:
|
||
categories.append("ground_floor")
|
||
if scores.get("risk", 0.0) < -5:
|
||
categories.append("risk_object")
|
||
if scores.get("area_and_layout", 0.0) <= 3:
|
||
categories.append("too_small")
|
||
|
||
return categories
|