Files
ole c9383788de update
Co-authored-by: Copilot <copilot@github.com>
2026-05-18 21:31:52 +00:00

88 lines
2.3 KiB
Python

"""Normalization and parsing helpers."""
import re
def normalize_price(price_str: str | None) -> int | None:
"""
Normalize Norwegian formatted price to integer.
Example: "7 200 991 kr" -> 7200991
"""
if not price_str:
return None
# Remove "kr" and spaces, keep only digits
normalized = re.sub(r"[^\d]", "", price_str)
try:
return int(normalized) if normalized else None
except ValueError:
return None
def normalize_area(area_str: str | None) -> int | None:
"""
Normalize area string to integer.
Example: "77 m²" -> 77
"""
if not area_str:
return None
cleaned = area_str.replace(" ", "")
match = re.search(r"(\d+(?:[.,]\d+)?)", cleaned)
if match:
value = match.group(1).replace(",", ".")
try:
return int(float(value))
except ValueError:
return None
return None
def normalize_number(num_str: str | None) -> int | None:
"""
Normalize Norwegian formatted number to integer.
Handles text like "3 500 kr/mnd" and "7,2".
"""
if not num_str:
return None
cleaned = re.sub(r"[^\d,\.]", "", num_str)
if "," in cleaned:
cleaned = cleaned.replace(".", "").replace(",", ".")
else:
cleaned = cleaned.replace(".", "")
try:
return int(float(cleaned)) if cleaned else None
except ValueError:
return None
def normalize_finnkode(finnkode: str | None) -> str | None:
"""Normalize finnkode to string, strip whitespace."""
if not finnkode:
return None
return str(finnkode).strip()
def extract_finnkode_from_url(url: str) -> str | None:
"""
Extract finnkode from FINN URL.
Example: https://www.finn.no/realestate/homes/ad.html?finnkode=462400360 -> 462400360
"""
match = re.search(r"finnkode=(\d+)", url)
if match:
return match.group(1)
return None
def text_to_bool(text: str | None) -> bool:
"""Convert text to boolean."""
if not text:
return False
return text.lower() in ("ja", "yes", "true", "1", "y")
def clean_text(text: str | None) -> str | None:
"""Clean and normalize text: strip, collapse whitespace."""
if not text:
return None
cleaned = " ".join(text.split())
return cleaned if cleaned else None