"""Normalization and parsing helpers.""" import re def normalize_price(price_str: str | None) -> int | None: """ Normalize Norwegian formatted price to integer. Example: "7 200 991 kr" -> 7200991 """ if not price_str: return None # Remove "kr" and spaces, keep only digits normalized = re.sub(r"[^\d]", "", price_str) try: return int(normalized) if normalized else None except ValueError: return None def normalize_area(area_str: str | None) -> int | None: """ Normalize area string to integer. Example: "77 m²" -> 77 """ if not area_str: return None cleaned = area_str.replace(" ", "") match = re.search(r"(\d+(?:[.,]\d+)?)", cleaned) if match: value = match.group(1).replace(",", ".") try: return int(float(value)) except ValueError: return None return None def normalize_number(num_str: str | None) -> int | None: """ Normalize Norwegian formatted number to integer. Handles text like "3 500 kr/mnd" and "7,2". """ if not num_str: return None cleaned = re.sub(r"[^\d,\.]", "", num_str) cleaned = cleaned.replace(" ", "") if "," in cleaned: cleaned = cleaned.replace(".", "").replace(",", ".") else: cleaned = cleaned.replace(".", "") try: return int(float(cleaned)) if cleaned else None except ValueError: return None def normalize_finnkode(finnkode: str | None) -> str | None: """Normalize finnkode to string, strip whitespace.""" if not finnkode: return None return str(finnkode).strip() def extract_finnkode_from_url(url: str) -> str | None: """ Extract finnkode from FINN URL. Example: https://www.finn.no/realestate/homes/ad.html?finnkode=462400360 -> 462400360 """ match = re.search(r"finnkode=(\d+)", url) if match: return match.group(1) return None def text_to_bool(text: str | None) -> bool: """Convert text to boolean.""" if not text: return False return text.lower() in ("ja", "yes", "true", "1", "y") def clean_text(text: str | None) -> str | None: """Clean and normalize text: strip, collapse whitespace.""" if not text: return None cleaned = " ".join(text.split()) return cleaned if cleaned else None