initial
This commit is contained in:
@@ -0,0 +1,88 @@
|
||||
"""Normalization and parsing helpers."""
|
||||
|
||||
import re
|
||||
|
||||
|
||||
def normalize_price(price_str: str | None) -> int | None:
|
||||
"""
|
||||
Normalize Norwegian formatted price to integer.
|
||||
Example: "7 200 991 kr" -> 7200991
|
||||
"""
|
||||
if not price_str:
|
||||
return None
|
||||
# Remove "kr" and spaces, keep only digits
|
||||
normalized = re.sub(r"[^\d]", "", price_str)
|
||||
try:
|
||||
return int(normalized) if normalized else None
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def normalize_area(area_str: str | None) -> int | None:
|
||||
"""
|
||||
Normalize area string to integer.
|
||||
Example: "77 m²" -> 77
|
||||
"""
|
||||
if not area_str:
|
||||
return None
|
||||
cleaned = area_str.replace(" ", "")
|
||||
match = re.search(r"(\d+(?:[.,]\d+)?)", cleaned)
|
||||
if match:
|
||||
value = match.group(1).replace(",", ".")
|
||||
try:
|
||||
return int(float(value))
|
||||
except ValueError:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def normalize_number(num_str: str | None) -> int | None:
|
||||
"""
|
||||
Normalize Norwegian formatted number to integer.
|
||||
Handles text like "3 500 kr/mnd" and "7,2".
|
||||
"""
|
||||
if not num_str:
|
||||
return None
|
||||
cleaned = re.sub(r"[^\d,\.]", "", num_str)
|
||||
cleaned = cleaned.replace(" ", "")
|
||||
if "," in cleaned:
|
||||
cleaned = cleaned.replace(".", "").replace(",", ".")
|
||||
else:
|
||||
cleaned = cleaned.replace(".", "")
|
||||
try:
|
||||
return int(float(cleaned)) if cleaned else None
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def normalize_finnkode(finnkode: str | None) -> str | None:
|
||||
"""Normalize finnkode to string, strip whitespace."""
|
||||
if not finnkode:
|
||||
return None
|
||||
return str(finnkode).strip()
|
||||
|
||||
|
||||
def extract_finnkode_from_url(url: str) -> str | None:
|
||||
"""
|
||||
Extract finnkode from FINN URL.
|
||||
Example: https://www.finn.no/realestate/homes/ad.html?finnkode=462400360 -> 462400360
|
||||
"""
|
||||
match = re.search(r"finnkode=(\d+)", url)
|
||||
if match:
|
||||
return match.group(1)
|
||||
return None
|
||||
|
||||
|
||||
def text_to_bool(text: str | None) -> bool:
|
||||
"""Convert text to boolean."""
|
||||
if not text:
|
||||
return False
|
||||
return text.lower() in ("ja", "yes", "true", "1", "y")
|
||||
|
||||
|
||||
def clean_text(text: str | None) -> str | None:
|
||||
"""Clean and normalize text: strip, collapse whitespace."""
|
||||
if not text:
|
||||
return None
|
||||
cleaned = " ".join(text.split())
|
||||
return cleaned if cleaned else None
|
||||
Reference in New Issue
Block a user