commit 1399f61c1a88824c69c6745ec95f6c499316de31 Author: Ole Date: Sat May 16 06:54:17 2026 +0000 initial diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..bf75d09 --- /dev/null +++ b/.env.example @@ -0,0 +1,18 @@ +FINN_CACHE_PATH=/data/finn.sqlite +FINN_MAX_SEARCH_PAGES=3 +FINN_DETAIL_LIMIT=20 +FINN_REQUEST_DELAY_SECONDS=2 +FINN_CACHE_TTL_SEARCH_MINUTES=60 +FINN_CACHE_TTL_AD_HOURS=24 +FINN_USER_AGENT=personal-finn-eiendom-analyzer/0.1 + +EIENDOM_NO_ENABLED=true +EIENDOM_NO_BASE_URL=https://api.eiendom.no/api/v1 +EIENDOM_NO_CACHE_TTL_HOURS=24 +EIENDOM_NO_REQUEST_DELAY_SECONDS=1 +EIENDOM_NO_SIMILAR_UNITS_ENABLED=true +EIENDOM_NO_SIMILAR_UNITS_DEFAULT_STATUS=RECENTLY_SOLD + +LOG_LEVEL=DEBUG +MCP_HOST=0.0.0.0 +MCP_PORT=8000 \ No newline at end of file diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..9ca9de4 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,181 @@ +# Copilot instructions for finn-eiendom-mcp + +This project is a private, self-hosted Python platform for analyzing FINN real-estate listings. It exposes the same code through three coordinated front ends: + +1. A **Python library** (`finn_eiendom`) — source of truth. +2. An **MCP server** (FastMCP, stdio + optional HTTP) over `finn_eiendom/mcp_server.py`. +3. A **CLI** (`finn-eiendom`) over `finn_eiendom/cli.py`. + +All three share the same `service.py`, `formatting.py`, `cache.py`, and `models.py`. Code lives in exactly one place and is called from both front ends. See `PRD.md` §17 for the full ownership rules — that section is the constitution. + +--- + +## Source of truth + +Read in this order: + +1. `PRD.md` — product and architecture, especially §17. +2. `PROJECT.md` — module map. +3. `AGENTS.md` — workflow. +4. `.github/instructions/*.md` — per-topic rules. + +--- + +## Module layout + +``` +finn_eiendom/ + config.py # env vars, defaults, TTLs + models.py # Pydantic v2 models + parser.py # number/area/date/URL/finnkode normalization + http.py # async HTTP (httpx) with delay + retry + user-agent + cache.py # SQLite (sqlite3) schema + persistence + search.py # FINN search HTML parsing + pagination + ad.py # FINN listing HTML parsing + eiendom_no.py # Eiendom.no unit search/detail, unit_vector, similar-units + scoring.py # score model + classifications + feedback.py # verdicts + soft preference signal + analysis.py # orchestration + shortlist + summary + service.py # get_or_fetch_* + thin facade for MCP and CLI + formatting.py # render_* helpers shared by MCP and CLI + mcp_server.py # FastMCP wrappers around service.py + cli.py # typer-based CLI wrappers around service.py + __main__.py # python -m finn_eiendom → CLI entry +``` + +--- + +## The five hard rules + +Enforced by `tests/test_architecture.py`: + +1. **`mcp_server.py` and `cli.py` are siblings.** They never import from each other. Both import only from `service`, `formatting`, `models`, `config`, stdlib, and their own framework (`mcp` / `typer`). +2. **`service.py` is the only orchestrator.** Nothing above it touches HTTP or SQLite directly. +3. **`httpx` lives only in `http.py`.** +4. **`sqlite3` lives only in `cache.py`.** +5. **Output formatting lives only in `formatting.py`.** Never inline in CLI or MCP tool bodies. + +--- + +## Development workflow — local venv + +Default runtime is a project-local virtualenv. Docker is supported for packaging but optional for development. + +```bash +uv venv # or: python3.12 -m venv .venv +source .venv/bin/activate +uv pip install -e ".[dev]" # or: pip install -e ".[dev]" + +# from now on: +pytest +ruff check . +ruff format . +mypy finn_eiendom +finn-eiendom --help +finn-eiendom-mcp # stdio MCP server +``` + +**Never** install packages globally. **Never** add a dependency without updating `pyproject.toml`. + +--- + +## Coding rules + +* Python 3.12+. +* Pydantic v2 with `model_config = ConfigDict(...)`. No v1 `class Config:` blocks. +* Type hints on every function signature. +* Async I/O for all network and DB code paths through `service.py`. +* Dependency injection for HTTP/cache clients in tests. +* Small, focused functions. One job per function. See `clean-code.instructions.md`. +* Errors raise with actionable messages; the MCP boundary translates them to `{"error": True, "code": ..., "message": ...}`. +* stdio MCP servers log to **stderr only**. + +--- + +## Code ownership — the short version + +| Concern | Lives in | +| -------------------------------------- | ------------------------------ | +| FINN search HTML parsing | `search.py` | +| FINN listing HTML parsing | `ad.py` | +| Norwegian number / area / URL regexes | `parser.py` | +| HTTP fetching + retry + delay | `http.py` | +| SQLite reads / writes | `cache.py` | +| Eiendom.no unit search/detail/comps | `eiendom_no.py` | +| `unit_vector` encode/decode (msgpack) | `eiendom_no.py` | +| Scoring + classification | `scoring.py` | +| Feedback storage | `feedback.py` | +| Cache-aware orchestration | `service.py` (`get_or_fetch_*`)| +| Shortlist + summary assembly | `analysis.py` | +| End-to-end runs | `service.py` (`analyze_search`)| +| MCP tool definitions | `mcp_server.py` | +| CLI command definitions | `cli.py` | +| Output rendering | `formatting.py` | +| Env-var defaults | `config.py` | +| Pydantic models | `models.py` | + +Full table with "never lives in" column is in `PRD.md` §17.2. + +--- + +## Adding a feature + +1. Decide the home using the table above (and `PRD.md` §17.2). +2. Implement in `service.py` (or `analysis.py` if pure orchestration). +3. Add a service-level test. +4. Add a thin MCP tool — `response_format`-aware. +5. Add a thin CLI command — `--format`-aware. +6. Add a renderer in `formatting.py`. +7. Test MCP and CLI registration. +8. Update PRD and instruction docs. + +If the MCP tool body or CLI command body grows past ~20 lines, push logic down to `service.py`. + +--- + +## Documentation lookups — use context7 + +When uncertain about an external library API (FastMCP, Pydantic v2, Typer, httpx, msgpack, pytest-asyncio, respx, BeautifulSoup), call the **`context7` MCP server** *before* writing code. Don't rely on training-data memory. + +``` +context7:resolve-library-id → library_id +context7:query-docs(library_id, topic) → authoritative snippets +``` + +Details in `.github/instructions/docs.instructions.md`. + +--- + +## Clean code is a hard requirement + +See `clean-code.instructions.md`. DRY, single-responsibility, descriptive names, type hints, no dead code, comments explain why not what. If duplication slips in, the right answer is to extract it — not to copy the second instance. + +--- + +## Product behavior + +The MVP does one thing well: + +``` +FINN search URL in + → relevant property candidates out + → enriched with Eiendom.no estimates + → similar-units / comps + → explanations + → risks + → next steps + → broker questions +``` + +Always explain: + +* why a property is interesting, +* price vs estimate, +* price vs comparable sales, +* renovation upside, +* hybel / rental potential, +* technical / legal risks, +* uncertainty / confidence, +* next questions for the broker. + +Scores and estimates are decision support, not advice. Surface uncertainty, never hide it. \ No newline at end of file diff --git a/.github/instructions/clean-code.instructions.md b/.github/instructions/clean-code.instructions.md new file mode 100644 index 0000000..1ba8f38 --- /dev/null +++ b/.github/instructions/clean-code.instructions.md @@ -0,0 +1,150 @@ +--- +name: Clean code rules +description: Best-practice standards for all production and test code +applyTo: "**/*.py" +--- + +# Clean code rules + +These rules apply everywhere — every module, every function, every test. They are intentionally opinionated. If a rule conflicts with the architecture rules in `PRD.md` §17, the architecture rules win. If it conflicts with another best practice here, pick the one that produces the simpler, more readable result. + +## Single responsibility + +* One job per function. If a function name needs "and" to describe it, it's two functions. +* One job per module. `parser.py` parses. `cache.py` caches. `formatting.py` formats. Don't mix. +* One job per class. We rarely need classes outside Pydantic models, dataclasses, and the `HTTPClient`. Avoid OO for OO's sake. + +## Function size + +* Aim for under **30 lines** of body. +* Past **50 lines** it's a code smell — extract helpers. +* If you've got more than **3 levels of nesting**, the function wants splitting (extract the inner block into a helper named after what it does). + +## Naming + +* Names describe **intent**, not implementation. `get_or_fetch_ad`, not `process_ad`. `render_shortlist_markdown`, not `format2`. +* Verbs for actions (`fetch_`, `parse_`, `score_`, `render_`). +* Nouns for data (`FinnAd`, `EiendomUnit`, `shortlist`). +* Boolean variables / parameters read as predicates: `force_refresh`, `include_eiendom_no`, `is_recently_sold`. Not `flag`, not `do_thing`. +* Avoid abbreviations except those well-established in the domain (`url`, `ad`, `nok`, `bra`, `sqm`). +* Norwegian terms stay Norwegian when they're domain vocabulary (`hybel`, `fellesgjeld`, `finnkode`). Don't translate `finnkode` to `finn_code` — it's a proper noun. + +## Type hints + +Required on every function signature, including private helpers. Mypy in strict mode is the goal. + +```python +# ❌ +def parse(html, base_url=None): + ... + +# ✅ +def parse(html: str, base_url: str | None = None) -> FinnAd | None: + ... +``` + +Use modern syntax: `X | None` over `Optional[X]`, `list[int]` over `List[int]`, `dict[str, Any]` over `Dict[str, Any]`. + +## Comments + +* Comments explain **WHY**, never **WHAT**. The code already says what. +* If a comment is needed to explain *what* a line does, the line wants renaming or extracting. +* Use docstrings for public functions, classes, and modules. One-line summary, blank line, optional details and examples. +* No commented-out code. Delete it. Git remembers. +* No `# TODO` without a date or issue reference. `# TODO(2026-05): replace once Eiendom.no confirms ...` is fine. + +## DRY — Don't Repeat Yourself + +If you write the same logic, regex, SQL, or format string **twice**, extract it. The decision table in `PRD.md` §17.2 tells you where it belongs. + +The pre-merge anti-duplication checklist (from `PRD.md` §17.4): + +1. Is this logic already implemented somewhere? (`grep` the function name and obvious keywords.) +2. If I'm copy-pasting from another file, am I about to duplicate behavior that should live in one shared function? +3. Can a new caller use an existing `service.py` function instead of writing its own orchestration? +4. Is the same Pydantic field defined in two models? Factor out a base model. +5. Am I formatting output in two places (CLI + MCP)? Move it to `formatting.py`. +6. Am I opening a SQLite connection outside `cache.py`? Move it. +7. Am I building an httpx call outside `http.py`? Move it. +8. Am I writing a Norwegian-number / area / finnkode regex outside `parser.py`? Move it. +9. Am I adding an env-var lookup outside `config.py`? Move it. +10. Did I add a new behavior with only one front end (MCP or CLI)? If it should exist in both, the service function is missing. + +A small amount of duplication is acceptable to keep boundaries clean — see `PRD.md` §17.8. Past a handful of lines, extract. + +## Errors + +* **Fail loudly** with actionable messages. + + ```python + # ❌ + raise ValueError("bad input") + + # ✅ + raise ValueError(f"Unknown listing_status {status!r}; expected one of {VALID_LISTING_STATUSES}") + ``` + +* **No silent failures.** `except Exception: pass` is forbidden. Catch the specific exception, log it, and either recover or re-raise. + +* **Service raises; MCP wraps.** Service functions raise normal exceptions. The MCP tool boundary translates them into `{"error": True, "code": ..., "message": ...}`. CLI lets typer handle non-zero exits. + +* **Graceful degradation is explicit.** If Eiendom.no enrichment fails, return a result with `eiendom_unit=None` and a warning, not a silently-missing field. + +## State + +* No global mutable state. The only module-level constants allowed are configuration values loaded from env in `config.py`. +* No module-level caches (dicts, lists) that mutate. Use `cache.py` if you need persistence. +* Pass dependencies in (HTTP clients, DB connections) for testability. + +## Dead code + +* No commented-out code. +* No unused imports (ruff catches these — fix them, don't add `# noqa`). +* No unused parameters (use `_` or remove). +* No `if False:` blocks "for later". +* Functions and classes that aren't called anywhere — delete them. Git keeps history. + +## Magic numbers and strings + +Anything that influences behavior and isn't self-explanatory belongs in `config.py` (env-controlled) or as a named module-level constant near the top of the file. + +```python +# ❌ +if days > 90: + confidence = "low" + +# ✅ +COMPS_STALE_AFTER_DAYS = 90 + +if days > COMPS_STALE_AFTER_DAYS: + confidence = "low" +``` + +URLs, timeouts, retries, TTLs, status codes — never inline. + +## Imports + +* Standard library first, third-party second, local last, separated by blank lines. +* Ruff's `I` rules sort and group these — run `ruff check . --fix`. +* No wildcard imports. +* No relative imports above one level (`from ..thing import x` is a smell; refactor). +* Each module's allowed import set is enforced by `tests/test_architecture.py`. + +## Tests are first-class code + +Same rules. Same type hints. Same naming. Same DRY. If a fixture is used in three test files, it goes in `conftest.py`. If three tests share a setup, factor it into a fixture. + +## Reviewing your own change before commit + +A 60-second self-review: + +1. Did I add a function that already exists somewhere? (`grep` it.) +2. Did I bypass `service.py`, `http.py`, `cache.py`, or `formatting.py`? +3. Is everything typed? +4. Did I leave a `print()`, `breakpoint()`, or commented-out block behind? +5. Does the test for this change actually fail without the change? +6. Did I update `PRD.md` or the relevant instruction file if I changed an architectural rule? + +## When in doubt about a library API + +Use the `context7` MCP server instead of guessing. See `docs.instructions.md`. Training-data memory of `pydantic.field_validator`, `typer.Option`, `mcp.tool` annotations, or `httpx.AsyncClient` is unreliable — they all change between versions. \ No newline at end of file diff --git a/.github/instructions/cli.instructions.md b/.github/instructions/cli.instructions.md new file mode 100644 index 0000000..4f2692b --- /dev/null +++ b/.github/instructions/cli.instructions.md @@ -0,0 +1,158 @@ +--- +name: CLI rules +description: Rules for the typer-based finn-eiendom CLI +applyTo: "finn_eiendom/cli.py,finn_eiendom/__main__.py" +--- + +# CLI rules + +The CLI is a **thin wrapper** over `service.py`. It is a sibling of `mcp_server.py` — they never call each other and they share the same underlying service functions. Every CLI command maps 1:1 to a service function with the same parameters and defaults. + +## Framework + +Built with [`typer`](https://typer.tiangolo.com/). One `typer.Typer` app: + +```python +# finn_eiendom/cli.py +import asyncio, typer +from . import service, formatting + +app = typer.Typer(no_args_is_help=True, add_completion=False) +``` + +Entry points in `pyproject.toml`: + +```toml +[project.scripts] +finn-eiendom-mcp = "finn_eiendom.mcp_server:main" +finn-eiendom = "finn_eiendom.cli:app" +``` + +Plus `finn_eiendom/__main__.py`: + +```python +from .cli import app + +if __name__ == "__main__": + app() +``` + +So `python -m finn_eiendom ...` works without installation. + +## Command body shape + +```python +@app.command() +def analyze_search( + url: str, + max_pages: int = 3, + detail_limit: int = 20, + no_details: bool = typer.Option(False, "--no-details"), + no_eiendom: bool = typer.Option(False, "--no-eiendom"), + with_similar: bool = typer.Option(False, "--with-similar"), + format: str = typer.Option("json", "--format"), +) -> None: + """Analyze a FINN search URL and return a ranked shortlist.""" + result = asyncio.run(service.analyze_search( + search_url=url, + max_pages=max_pages, + detail_limit=detail_limit, + include_details=not no_details, + include_eiendom_no=not no_eiendom, + include_similar_units_for_shortlist=with_similar, + )) + typer.echo(formatting.render_shortlist(result, format)) +``` + +Rules: + +* The command body has at most three sections: option parsing (handled by typer), one `service.` call, one `typer.echo(formatting.render_(result, format))`. +* If the body has more than ~20 lines, the logic belongs in `service.py`. +* No `print()` — use `typer.echo()` for stdout, `typer.echo(..., err=True)` for stderr. +* No business logic, no rendering, no SQLite, no HTTP, no parsing. + +## Formats + +Every command that produces structured output accepts `--format`: + +* `--format json` (default) — full structured output, pipeable into `jq`. +* `--format markdown` — human-readable. +* `--format table` — terminal table (only where it makes sense: `analyze-search`, `compare`, `shortlist`, `diff`). + +All three render paths are produced by `formatting.py`. Never format inline in `cli.py`. Unsupported values raise `ValueError` with a list of supported formats — typer surfaces this as a non-zero exit. + +## Commands + +```text +finn-eiendom analyze-search [--max-pages 3] [--detail-limit 20] [--no-details] [--no-eiendom] [--with-similar] [--format ...] +finn-eiendom get-ad [--force-refresh] [--no-eiendom] [--with-similar] [--format ...] +finn-eiendom compare [--no-eiendom] [--no-comps] [--format ...] +finn-eiendom save-feedback [--notes "..."] +finn-eiendom shortlist [--run-id ID] [--limit 10] [--format ...] +finn-eiendom diff [--format ...] +finn-eiendom resolve-unit +finn-eiendom get-unit [--force-refresh] +finn-eiendom enrich-ad [--with-similar] +finn-eiendom build-vector +finn-eiendom decode-vector +finn-eiendom similar-units [--status RECENTLY_SOLD|FOR_SALE|CURRENT] +finn-eiendom similar-to-liked [--mode recommendations|comps] [--status ...] +finn-eiendom analyze-against-comps +finn-eiendom cache stats | clear | clear-html | clear-json +finn-eiendom serve [--transport stdio|http] [--host 127.0.0.1] [--port 8010] +finn-eiendom config show | path +finn-eiendom doctor +finn-eiendom version +``` + +Sub-command groups (`cache`, `config`) use `typer.Typer` sub-apps: + +```python +cache_app = typer.Typer(help="Cache management") +app.add_typer(cache_app, name="cache") + +@cache_app.command("stats") +def cache_stats() -> None: + typer.echo(formatting.render_cache_stats(service.get_cache_stats(), "json")) +``` + +## Async glue + +Service functions are async; CLI commands are sync. Always use `asyncio.run(service.(...))` at the call boundary. Don't sprinkle `async def` across CLI commands — typer expects sync handlers. + +## Exit codes + +* `0` — success. +* `1` — runtime error (raised exception in service). +* `2` — usage error (typer's default for bad options). + +Let exceptions propagate from `service.py` and rely on typer's default handling. Only catch where you want a more specific exit code or message. + +## What stays out of cli.py + +* `import httpx`, `import sqlite3`, `import msgpack` — never. +* `from .ad import ...`, `from .search import ...`, `from .eiendom_no import ...`, `from .scoring import ...`, `from .cache import ...`, `from .http import ...` — never. +* Inline formatting logic — goes in `formatting.py`. +* MCP imports (no `from .mcp_server import ...`). + +Allowed imports in `cli.py`: + +```python +import asyncio, json, sys +import typer +from . import service, formatting, config +from .models import FinnAd, EiendomUnit, SimilarUnit # only for type hints +``` + +`tests/test_architecture.py` enforces this. + +## When uncertain about typer + +Use `context7` instead of guessing: + +``` +context7:resolve-library-id → "tiangolo/typer" +context7:query-docs(id, "Typer sub-apps and option groups") +``` + +See `docs.instructions.md`. \ No newline at end of file diff --git a/.github/instructions/docs.instructions.md b/.github/instructions/docs.instructions.md new file mode 100644 index 0000000..0278246 --- /dev/null +++ b/.github/instructions/docs.instructions.md @@ -0,0 +1,118 @@ +--- +name: Documentation lookups via context7 MCP +description: How and when to use the context7 MCP server for library documentation +applyTo: "**/*.py,**/*.md,**/*.toml,**/*.yaml,**/*.yml" +--- + +# Documentation lookups — use context7 + +When you are uncertain about a library's API, **call the `context7` MCP server before writing code**. Do not rely on training-data memory. Pydantic, FastMCP, Typer, httpx, and pytest all evolve quickly; what was true two releases ago is often wrong now. + +## When to use context7 + +Use it **before** writing code involving any of these: + +* **FastMCP / MCP Python SDK** — `@mcp.tool()` signatures, `ToolAnnotations`, `mcp.run(transport=...)`, resource and prompt decorators, server lifecycle, streamable-HTTP setup. +* **Pydantic v2** — `BaseModel`, `Field`, `ConfigDict`, `model_validator`, `field_validator`, `model_dump` / `model_dump_json`, discriminated unions, `Annotated[...]` with validators. +* **Typer** — `Typer()` apps, `typer.Option`, `typer.Argument`, sub-apps via `add_typer`, callbacks, exit codes, testing with `CliRunner`. +* **httpx** — `AsyncClient`, timeouts, transports, retries, `Response` API. +* **respx** — mocking httpx, `respx.mock`, `route.mock`, match patterns. +* **msgpack** — packing/unpacking, type extensions, raw vs string mode. +* **base64** — `urlsafe_b64encode`, padding handling. +* **pytest** / **pytest-asyncio** — fixtures, parametrize, async tests, markers, `tmp_path`, `monkeypatch`. +* **BeautifulSoup** / **lxml** — selectors, parser flavors, element traversal. +* **typer.testing.CliRunner** — invoking apps, asserting on stdout/stderr/exit codes. + +Use it **also** when: + +* A test fails with an error like `AttributeError: 'BaseModel' object has no attribute 'dict'` (Pydantic v1 vs v2 confusion). +* You see a `DeprecationWarning` from a third-party library and aren't sure of the modern replacement. +* You're about to copy a code pattern from memory that feels "old". + +## When NOT to use it + +* Pure Python stdlib (`json`, `pathlib`, `dataclasses`, `typing`) — these are stable and well-known. +* Project-internal modules — read the source. +* Generic programming questions ("what's a list comprehension") — use your own knowledge. +* FINN / Eiendom.no API behavior — these are not in context7. Use fixtures from prior runs in `tests/fixtures/` and the endpoint notes in `PRD.md` §9. + +## How to use it + +Two-step pattern: + +### 1. Resolve the library ID + +``` +context7:resolve-library-id(query="fastmcp") +context7:resolve-library-id(query="pydantic") +context7:resolve-library-id(query="typer") +``` + +Returns the canonical library ID (e.g. `pydantic/pydantic`, `fastapi/typer`). Pick the most-starred / official-looking match. + +### 2. Query the docs + +``` +context7:query-docs( + context7CompatibleLibraryID="pydantic/pydantic", + topic="field validators v2 mode after", + tokens=3000, +) +``` + +* **Keep the topic focused.** "Pydantic v2 field validators with mode=after on Optional[str]" beats "Pydantic validation". +* **Cap tokens** to roughly what you need (1500–4000 is usually plenty). The default is fine for most calls. +* **Use library-specific terminology** in the topic — "discriminator field" for Pydantic, "tool annotations" for FastMCP, "sub-apps" for Typer. + +### Worked examples + +**Q: How do I declare a FastMCP tool with read-only annotations?** + +``` +context7:resolve-library-id(query="modelcontextprotocol python sdk") +context7:query-docs(context7CompatibleLibraryID="", + topic="FastMCP @mcp.tool ToolAnnotations readOnlyHint") +``` + +**Q: How do I write a Pydantic v2 model_validator that runs after field validation?** + +``` +context7:resolve-library-id(query="pydantic") +context7:query-docs(context7CompatibleLibraryID="pydantic/pydantic", + topic="model_validator mode='after' v2") +``` + +**Q: How do I mock an async httpx POST with respx?** + +``` +context7:resolve-library-id(query="respx") +context7:query-docs(context7CompatibleLibraryID="", + topic="respx mock async httpx POST json body") +``` + +**Q: How do I add a Typer sub-app for `cache` commands?** + +``` +context7:resolve-library-id(query="typer") +context7:query-docs(context7CompatibleLibraryID="", + topic="Typer add_typer sub-application command groups") +``` + +## After the lookup + +* Cite or summarize what you found in a code comment **only when** the snippet documents a non-obvious API choice — otherwise the code is enough. +* If context7 returns nothing useful, fall back to: + 1. The library's official docs site. + 2. The library's repo `README` / `examples/`. + 3. The smallest possible spike (a 5-line script in the venv) to verify behavior. + +## Anti-patterns + +* **Don't** invent a method signature from memory and hope. If you're not 100% sure of an API, look it up. +* **Don't** copy patterns from old Stack Overflow answers without verifying — Pydantic, FastMCP, and Typer all had breaking changes recently. +* **Don't** silence a warning instead of fixing the deprecation. Look up the modern API. +* **Don't** query context7 for FINN or Eiendom.no — those endpoints aren't in any public docs index. Use `tests/fixtures/` and `PRD.md` §9. + +## Network configuration note + +`context7` is configured as a connected MCP server in this environment. If a call fails with a connection error, surface it clearly — don't fall back to guessing. \ No newline at end of file diff --git a/.github/instructions/mcp.instructions.md b/.github/instructions/mcp.instructions.md new file mode 100644 index 0000000..4f62fc2 --- /dev/null +++ b/.github/instructions/mcp.instructions.md @@ -0,0 +1,192 @@ +--- +name: MCP rules +description: Rules for FastMCP tools, resources, and prompts +applyTo: "finn_eiendom/mcp_server.py,finn_eiendom/**/*mcp*.py" +--- + +# MCP server rules + +The MCP server is a **thin wrapper** over `service.py`. It owns: + +* Tool registration with `@mcp.tool()` and annotations. +* Pydantic input schemas (these double as tool documentation). +* Error wrapping at the protocol boundary. +* JSON / markdown response formatting via `formatting.py`. + +It does **not** own: + +* Parsing, scraping, scoring, cache, or HTTP fetching logic. +* SQLite or `httpx` access. +* Any orchestration of "check cache, else fetch, else save" — that's `service.py`. + +## Server bootstrap + +```python +# finn_eiendom/mcp_server.py +import sys, logging +from mcp.server.fastmcp import FastMCP + +logging.basicConfig(stream=sys.stderr, level=logging.INFO, + format="%(asctime)s %(levelname)s %(name)s %(message)s") + +mcp = FastMCP("finn_eiendom_mcp") + +# ... tools registered here ... + +def main() -> None: + mcp.run(transport="stdio") + +if __name__ == "__main__": + main() +``` + +stdio servers **must** log to stderr only — anything on stdout breaks the JSON-RPC frame. + +## Tool naming + +All tools use the `finn_` prefix so they don't collide with other MCP servers running in the same Claude Desktop: + +* `finn_analyze_search` +* `finn_get_ad` +* `finn_compare_ads` +* `finn_save_feedback` +* `finn_get_shortlist` +* `finn_get_new_ads_since_last_run` +* `finn_resolve_eiendom_unit` +* `finn_get_eiendom_unit` +* `finn_enrich_ad` +* `finn_build_unit_vector` +* `finn_decode_unit_vector` +* `finn_get_similar_units` +* `finn_find_similar_to_liked_ad` +* `finn_analyze_ad_against_comps` + +## Tool body shape + +Every tool body looks like this: + +```python +@mcp.tool( + annotations=ToolAnnotations( + title="Analyze a FINN search URL", + readOnlyHint=True, + destructiveHint=False, + openWorldHint=True, + ) +) +async def finn_analyze_search(input: AnalyzeSearchInput) -> str: + """Analyze a FINN search URL and return a ranked shortlist.""" + try: + result = await service.analyze_search( + search_url=input.search_url, + max_pages=input.max_pages, + detail_limit=input.detail_limit, + include_details=input.include_details, + include_eiendom_no=input.include_eiendom_no, + include_similar_units_for_shortlist=input.include_similar_units_for_shortlist, + ) + return formatting.render_shortlist(result, input.response_format) + except Exception as e: + log.exception("finn_analyze_search failed") + return json.dumps({ + "error": True, + "code": type(e).__name__, + "message": str(e), + }) +``` + +Notes: + +* Every tool delegates to `service.` in one call. +* Every tool wraps in try/except and returns the error envelope as a JSON string. +* Output rendering goes through `formatting.py`, never inline. +* If the tool body needs more than ~20 lines, logic has leaked out of the service layer — push it back down. + +## Input schemas + +Every tool has a Pydantic v2 input model. Schemas live with the tool in `mcp_server.py` (they document the tool to LLM clients). Reuse from `models.py` only when the same shape is also a domain object — otherwise keep them as tool-local input types. + +```python +class AnalyzeSearchInput(BaseModel): + search_url: str = Field(..., description="Full FINN search URL") + max_pages: int = Field(default=3, ge=1, le=10) + detail_limit: int = Field(default=20, ge=1, le=100) + include_details: bool = True + include_eiendom_no: bool = True + include_similar_units_for_shortlist: bool = False + response_format: Literal["json", "markdown"] = "json" +``` + +## Annotations + +Set the right hints: + +* Read-only tools (most of them): `readOnlyHint=True, destructiveHint=False, openWorldHint=True`. +* `finn_save_feedback`: `readOnlyHint=False, destructiveHint=False, idempotentHint=False`. + +## Response format + +Tools accept a `response_format` parameter (`"json"` or `"markdown"`): + +* `"json"` — return `json.dumps(result_dict)`. +* `"markdown"` — return `formatting.render_(result, "markdown")`. + +Errors are always returned as the JSON error envelope regardless of `response_format`. + +## What stays out of mcp_server.py + +* `import httpx` — never. +* `import sqlite3` — never. +* `from .ad import ...`, `from .search import ...`, `from .eiendom_no import ...`, `from .scoring import ...`, `from .cache import ...`, `from .http import ...` — never. Go through `service`. +* Output formatting logic — goes in `formatting.py`. +* Cache management — goes in `service.py`. + +Allowed imports in `mcp_server.py`: + +```python +import json, logging, sys +from typing import Literal, Optional +from mcp.server.fastmcp import FastMCP +from mcp.server.fastmcp.utilities import ToolAnnotations +from pydantic import BaseModel, Field +from . import service, formatting +from .models import FinnAd, EiendomUnit, SimilarUnit # only if needed for type hints +from . import config +``` + +`tests/test_architecture.py` enforces this. + +## Resources and prompts + +When you add resources or prompts, they follow the same rule: thin wrappers over `service.py` and `formatting.py`. Resources: + +``` +finn://preferences/current +finn://search-runs/latest +finn://search-runs/{id} +finn://ads/{finnkode} +finn://ads/{finnkode}/enriched +finn://shortlist/latest +finn://feedback/{finnkode} +finn://eiendom-units/{unitCode} +finn://eiendom-units/{unitCode}/similar/{listingStatus} +``` + +Prompts: `evaluate_property_for_user`, `compare_properties_for_user`, `refine_search_from_feedback`, `find_more_like_this`. + +## When uncertain about FastMCP + +Use `context7` for FastMCP / MCP SDK questions instead of guessing: + +``` +context7:resolve-library-id → "modelcontextprotocol/python-sdk" or similar +context7:query-docs(id, "FastMCP tool annotations") → snippets +``` + +See `docs.instructions.md`. + +## Transports + +* Default: stdio. `finn-eiendom-mcp` is the entry point. +* Optional: Streamable HTTP via `finn-eiendom serve --transport http --port 8010`. Path: `POST /mcp`. Operational endpoints: `GET /health`, `GET /version`, `GET /debug/config`. +* Keep tools transport-agnostic. No request/response shape depends on the transport. \ No newline at end of file diff --git a/.github/instructions/python.instructions.md b/.github/instructions/python.instructions.md new file mode 100644 index 0000000..80dadc7 --- /dev/null +++ b/.github/instructions/python.instructions.md @@ -0,0 +1,80 @@ +--- +name: Python project rules +description: Python conventions for the FINN/Eiendom MCP server +applyTo: "**/*.py" +--- + +# Python conventions + +## Runtime + +* Python **3.12+**. +* Project-local virtualenv at `.venv/` (created by `uv venv` or `python3.12 -m venv .venv`). +* All commands run inside the activated venv. +* Editable install: `uv pip install -e ".[dev]"` (or `pip install -e ".[dev]"`). +* Never install packages globally; never use `sudo pip`; never mutate host Python. +* Add new dependencies to `pyproject.toml` in the same change that uses them. + +## Language + +* Use Python 3.12 syntax. Prefer `X | None` over `Optional[X]`, `list[int]` over `List[int]`, structural pattern matching where it actually helps. +* **Type hints on every function signature**, including private helpers. `mypy --strict finn_eiendom` is the target. +* Async-first for I/O. Sync code is fine for parsing, scoring, and cache access (SQLite). +* Pydantic v2 for all structured domain models, with `model_config = ConfigDict(...)`. No v1 `class Config:` blocks. + +## Prefer + +* Small, pure functions for parsing, normalization, and scoring. +* Explicit return types and explicit exceptions. +* Dependency injection for HTTP clients and DB connections in tests (pass `client` / `conn` as args; let services own the defaults). +* Domain names from the PRD (`FinnAd`, `EiendomUnit`, `SimilarUnit`, `analyze_search`, `get_or_fetch_ad`). +* `dataclass` for internal value objects that don't cross the API boundary; Pydantic for anything serialized or validated. + +## Avoid + +* Global mutable state (module-level dicts as caches, etc.). The only allowed module-level state is configuration loaded from env in `config.py`. +* Hardcoded URLs, credentials, paths, or magic numbers anywhere outside `config.py`. +* `httpx` imports anywhere except `finn_eiendom/http.py`. +* `sqlite3` imports anywhere except `finn_eiendom/cache.py`. +* `BeautifulSoup` imports anywhere except `finn_eiendom/search.py` and `finn_eiendom/ad.py`. +* `msgpack` imports anywhere except `finn_eiendom/eiendom_no.py`. +* Scraping, scoring, cache, or HTTP fetching logic inside MCP tool or CLI command bodies. +* Direct network calls in unit tests — use `respx` and fixtures. +* `print()` for logging — use the `logging` module. stdio MCP server logs go to **stderr only**. +* Bare `except:` or `except Exception: pass` — catch the specific exception or let it propagate. + +## External fetches + +All external fetches must support: + +* Configurable request delay (`FINN_REQUEST_DELAY_SECONDS`, `EIENDOM_NO_REQUEST_DELAY_SECONDS`). +* Cache lookup before fetch. +* Retry on 5xx with exponential backoff (`1s, 2s, 4s`). +* Graceful failure that returns `None` or empty rather than raising, when the caller can degrade. +* Structured logging at INFO for success, WARNING for retry, ERROR for final failure. + +## Best practices + +* **Single responsibility per function.** If a function name needs "and" to describe it, it's two functions. +* **Function length:** aim for under 30 lines. Past 50 lines it's a code smell — extract helpers. +* **Cyclomatic complexity:** if you've got more than 3 levels of nesting, the function wants splitting. +* **Naming:** `get_or_fetch_ad`, not `process_ad`. Verbs for actions, nouns for data. Avoid abbreviations except those well-known in the domain (`url`, `ad`, `nok`). +* **DRY:** if you write the same logic, regex, SQL, or format string twice, extract it. The decision table in `PRD.md` §17.2 tells you where it belongs. +* **Comments explain WHY**, not WHAT. The code already says what. +* **Errors are loud:** raise with actionable messages (`f"Unknown listing_status {status!r}; expected one of {VALID_STATUSES}"`). The MCP boundary wraps them as `{"error": True, ...}`. + +## When uncertain about a library API + +Use the `context7` MCP server **before** writing code: + +1. `context7:resolve-library-id` with the package name → canonical library ID. +2. `context7:query-docs` with that ID + focused topic. + +See `docs.instructions.md`. Don't guess from training memory — Pydantic, FastMCP, and Typer all change. + +## Tooling + +* `ruff check .` — lint. Target Python 3.12. Active rules: `E F I UP B SIM`. +* `ruff format .` — format. Line length 100. +* `mypy --strict finn_eiendom` — type-check. +* `pytest` — run the full suite. \ No newline at end of file diff --git a/.github/instructions/tests.instructions.md b/.github/instructions/tests.instructions.md new file mode 100644 index 0000000..920e54e --- /dev/null +++ b/.github/instructions/tests.instructions.md @@ -0,0 +1,199 @@ +--- +name: Test rules +description: Testing conventions for parser, cache, scoring, service, MCP, CLI, and architecture +applyTo: "tests/**/*.py" +--- + +# Test rules + +## Runtime + +Tests run in the project-local `.venv`. From the project root with the venv activated: + +```bash +pytest # full suite +pytest tests/test_service.py -v # one file +pytest -k "shortlist" # one keyword +pytest --lf # rerun last failures +``` + +`pytest-asyncio` is in `[tool.pytest.ini_options]` with `asyncio_mode = "auto"` — `async def` tests run without an `@pytest.mark.asyncio` decorator. + +## Never do live network calls + +No real HTTP in unit tests. Mock with `respx` (sits in front of `httpx.AsyncClient`): + +```python +import respx, httpx +from finn_eiendom import http as http_module + +@respx.mock +async def test_finn_search_fetch_uses_user_agent(): + route = respx.get("https://www.finn.no/realestate/homes/search.html").mock( + return_value=httpx.Response(200, html=SAMPLE_FINN_SEARCH_HTML) + ) + client = http_module.HTTPClient(user_agent="test-agent") + resp = await client.get("https://www.finn.no/realestate/homes/search.html") + assert resp.status_code == 200 + assert route.calls.last.request.headers["user-agent"] == "test-agent" +``` + +## Fixtures + +Fixture-driven testing for parsers and APIs: + +* FINN search HTML → `tests/fixtures/finn_search.html`. +* FINN listing HTML → `tests/fixtures/finn_ad_*.html`. +* Eiendom.no unit search JSON → `tests/fixtures/eiendom_unit_search.json`. +* Eiendom.no unit detail JSON → `tests/fixtures/eiendom_unit_detail.json`. +* Eiendom.no similar-units JSON → `tests/fixtures/eiendom_similar.json`. + +Loader helpers in `tests/fixtures.py` (e.g. `SAMPLE_FINN_SEARCH_HTML`, `SAMPLE_EIENDOM_UNIT_JSON`). Add new fixtures here, don't inline large strings in test files. + +## Test layout + +``` +tests/ + fixtures/ # raw HTML / JSON inputs + fixtures.py # loader helpers + conftest.py # shared pytest fixtures (tmp DB, http client, etc.) + test_parser.py # number/area/date/URL/finnkode normalization + test_search.py # FINN search HTML → cards + test_ad.py # FINN listing HTML → FinnAd + test_eiendom_no.py # unit search/detail/similar JSON, unit_vector encode/decode + test_scoring.py # all scoring components + classifier + test_cache.py # SQLite read/write/TTL + test_http.py # retry on 5xx, raise on 4xx, delay applied (new) + test_service.py # get_or_fetch_*, analyze_* (new) + test_formatting.py # render_* json/markdown/table (new) + test_mcp_server.py # tool registration + error envelope (expanded) + test_cli.py # typer CliRunner (new) + test_architecture.py # import-graph invariants (new) +``` + +## What to test per category + +### Parsers (`test_parser`, `test_search`, `test_ad`, `test_eiendom_no`) + +* Missing fields → `None`, not exception. +* Norwegian number formats: `7 200 991 kr`, `kr 7 200 991`, `7.200.991`. +* URL normalization (relative → absolute). +* Finnkode extraction from various URL shapes. +* Area parsing: `77 m²`, `77m2`, `77 kvm`. +* Price parsing (asking vs total vs shared debt). +* Eiendom.no JSON edge cases: empty `units`, missing `valuation`, missing `latestMarketData`. + +### Unit vectors (`test_eiendom_no`) + +* msgpack encoding + base64url without padding. +* Decode roundtrip. +* Missing optional fields (floor, rooms, built). +* Both lon/lat orderings handled. + +### Scoring (`test_scoring`) + +* Each component in isolation. +* Total clamped to 0–100. +* Risk penalties applied (negative range). +* Bargain classification triggers on the expected signal mix. +* Hybel classification: documented / possible / unclear / not relevant. +* Explainability: explanation list non-empty when score is non-trivial. + +### Cache (`test_cache`) + +* Read after write returns same object. +* TTL expiry returns `None`. +* JSON roundtrip preserves all fields. +* `init_db` is idempotent on existing DBs. + +### HTTP (`test_http`) + +* Retries on 500/502/503/504 with backoff (count exactly N retries). +* Raises immediately on 404 / 4xx. +* Applies `request_delay` between calls. +* Honors `user_agent`. + +### Service (`test_service`) + +The service tests are the heart of the suite. They cover orchestration end-to-end against fixtures. + +* `test_get_or_fetch_ad_uses_cache` — second call hits cache, no HTTP. +* `test_get_or_fetch_ad_fetches_when_cache_miss` — first call hits HTTP, then writes cache. +* `test_get_or_fetch_ad_force_refresh` — `force_refresh=True` bypasses cache. +* `test_analyze_search_with_fixtures` — full run from search HTML → shortlist. +* `test_find_similar_to_liked_uses_liked_feedback` — only seeds from `liked` verdicts. + +Use a tmp SQLite DB via the `tmp_path` pytest fixture: + +```python +@pytest.fixture +def tmp_db(tmp_path, monkeypatch): + db_path = tmp_path / "finn.sqlite" + monkeypatch.setenv("FINN_CACHE_PATH", str(db_path)) + return db_path +``` + +### Formatting (`test_formatting`) + +* `render_shortlist(result, "json")` is parseable JSON and roundtrips. +* `render_shortlist(result, "markdown")` contains the score and at least one risk. +* `render_(result, "xml")` raises `ValueError` listing supported formats. + +### MCP (`test_mcp_server`) + +* `test_mcp_server_has_correct_tools` — all 14 `finn_*` tool names registered. +* `test_finn_decode_unit_vector_returns_json` — happy path. +* `test_finn_analyze_search_handles_error` — error envelope shape: `{"error": True, "code": ..., "message": ...}`. + +Use the `mcp` SDK's testing helpers; don't spawn a subprocess. + +### CLI (`test_cli`) + +Use Typer's `CliRunner`: + +```python +from typer.testing import CliRunner +from finn_eiendom.cli import app + +runner = CliRunner() + +def test_cli_help(): + result = runner.invoke(app, ["--help"]) + assert result.exit_code == 0 + assert "analyze-search" in result.stdout +``` + +Patch `service.` with `monkeypatch` so CLI tests don't exercise the full stack — that's covered by `test_service.py`. + +### Architecture (`test_architecture`) + +Static checks of the module dependency graph: + +* No `import httpx` outside `finn_eiendom/http.py`. +* No `import sqlite3` outside `finn_eiendom/cache.py`. +* No `BeautifulSoup` import outside `search.py` and `ad.py`. +* No `msgpack` import outside `eiendom_no.py`. +* `mcp_server.py` only imports from `service`, `formatting`, `models`, `config`, `mcp`, stdlib, `pydantic`. +* `cli.py` only imports from `service`, `formatting`, `models`, `config`, `typer`, stdlib. +* `service.py` does not import from `mcp_server` or `cli`. + +Implementation: walk `.py` files under `finn_eiendom/` with `ast`, collect imports, assert allowed sets per module. + +## Best practices + +* One assertion per test (or per closely related group). Long tests die in painful ways. +* Test names describe the behavior: `test_get_or_fetch_ad_uses_cache_within_ttl`. +* Use `monkeypatch` for env vars and `tmp_path` for files. No `os.environ` mutation. +* No `time.sleep` — use `freezegun` if a test depends on time, or refactor the code under test to take a `now` parameter. +* No "smoke tests" that ping real servers — those go under a separately-marked `pytest -m live` suite and are not part of CI. + +## When uncertain about test tooling + +Use `context7` for pytest, respx, freezegun, or Typer testing: + +``` +context7:resolve-library-id → "pytest-dev/pytest" / "lundberg/respx" +context7:query-docs(id, "respx mock httpx async post") +``` + +See `docs.instructions.md`. \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a4d8bd9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,33 @@ +# Python +__pycache__/ +*.py[cod] +*.egg-info/ +.pytest_cache/ +.mypy_cache/ +.ruff_cache/ +.coverage +htmlcov/ + +# Virtualenvs +.venv/ +venv/ + +# uv +# uv.lock + +# Env +.env +.env.local + +# Data/cache +data/*.sqlite +data/*.sqlite-* +data/*.db +data/*.db-* + +# Editor +.DS_Store +.idea/ + +# Logs +*.log \ No newline at end of file diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000..82ee0ac --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,10 @@ +{ + "recommendations": [ + "github.copilot", + "github.copilot-chat", + "ms-python.python", + "charliermarsh.ruff", + "ms-azuretools.vscode-docker", + "tamasfe.even-better-toml" + ] +} \ No newline at end of file diff --git a/.vscode/mcp.json b/.vscode/mcp.json new file mode 100644 index 0000000..38b62ae --- /dev/null +++ b/.vscode/mcp.json @@ -0,0 +1,8 @@ +{ + "servers": { + "context7": { + "type": "http", + "url": "https://mcp.context7.com/mcp", + }, + }, +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..375eed9 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,23 @@ +{ + "python.defaultInterpreterPath": ".venv/bin/python", + "python.testing.pytestEnabled": true, + "python.testing.unittestEnabled": false, + "python.testing.pytestArgs": [ + "tests" + ], + "editor.formatOnSave": true, + "[python]": { + "editor.defaultFormatter": "charliermarsh.ruff" + }, + "ruff.enable": true, + "chat.instructionsFilesLocations": { + ".github/instructions": true + }, + "github.copilot.chat.codeGeneration.useInstructionFiles": true, + "files.exclude": { + "**/__pycache__": true, + "**/.pytest_cache": true, + "**/.mypy_cache": true, + "**/.ruff_cache": true + } +} \ No newline at end of file diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..af932a7 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,178 @@ +# AGENTS.md — Workflow for AI agents on finn-eiendom-mcp + +This is the master doc for any AI agent (Claude, Copilot, Cursor, etc.) working in this repo. Read this first, then the more specific files it references. + +--- + +## Read order + +Before changing code, read: + +1. **`PRD.md`** — what we're building and why. Especially §17 ("Code ownership and anti-duplication") — that section is the constitution. +2. **`PROJECT.md`** — module map. +3. This file — workflow. +4. The relevant `.github/instructions/*.md`: + * `python.instructions.md` — Python conventions. + * `mcp.instructions.md` — MCP tool rules. + * `cli.instructions.md` — CLI command rules. + * `tests.instructions.md` — testing conventions. + * `clean-code.instructions.md` — best practices and DRY enforcement. + * `docs.instructions.md` — when and how to use the **context7** MCP server for library documentation. + +If something in code contradicts the PRD, the PRD wins. If you change behavior, update both the PRD and the relevant instruction file in the same change. + +--- + +## Runtime — local venv (default) + +This project runs in a project-local virtualenv. Docker is supported for packaging but is not required for development. + +### One-time setup + +```bash +# from the project root +uv venv # or: python3.12 -m venv .venv +source .venv/bin/activate +uv pip install -e ".[dev]" # or: pip install -e ".[dev]" +``` + +Python **3.12+** is required. + +### Daily commands + +All commands are run inside the activated `.venv`: + +```bash +pytest # tests +ruff check . # lint +ruff format . # format +mypy finn_eiendom # type-check +finn-eiendom --help # CLI entrypoint +finn-eiendom-mcp # MCP server (stdio) +finn-eiendom serve --transport http --port 8010 # MCP server (HTTP) +``` + +### Never + +* Install packages globally (`pip install ...` outside a venv). +* Use `sudo pip`. +* Mutate the host Python. +* Add dependencies without updating `pyproject.toml`. + +### Adding a dependency + +```bash +uv pip install # ad-hoc, then: +# edit pyproject.toml to record it +uv pip install -e ".[dev]" # reinstall in editable mode +``` + +--- + +## Architecture in one screen + +``` +cli.py (typer) mcp_server.py (FastMCP) ← thin, parallel front ends + \ / + \ / + service.py ← orchestration: get_or_fetch, analyze_* + ↓ + analysis.py ← shortlist + summary + ↓ + search / ad / eiendom_no / scoring / feedback + ↓ + parser / http / cache + ↓ + FINN HTML + Eiendom.no JSON + SQLite +``` + +`formatting.py` sits next to `service.py` and is shared by both CLI and MCP for `json`, `markdown`, and `table` rendering. + +**The single-home rule:** every piece of logic has exactly one home. If you're tempted to add it in two places, you're wrong about one — push it down a layer and call it from both. See `PRD.md` §17.2 for the full ownership table. + +--- + +## The five hard rules + +These are non-negotiable. Architecture tests in `tests/test_architecture.py` enforce them. + +1. **`mcp_server.py` and `cli.py` are siblings.** They never call each other. Both call only `service`, `formatting`, `models`, and `config`. +2. **`service.py` is the only place that combines cache + fetch.** Nothing above it touches HTTP or SQLite directly. +3. **`httpx` lives in `http.py`. Nowhere else.** +4. **`sqlite3` lives in `cache.py`. Nowhere else.** +5. **Output formatting lives in `formatting.py`.** No inline rendering in CLI or MCP tool bodies. + +If you have to break one of these to ship a feature, the feature is wrong — fix the design first. + +--- + +## Adding a feature — the checklist + +For any new tool / command / behavior: + +1. Decide the home using the table in `PRD.md` §17.2. +2. Write the function in `service.py` (or extend `analysis.py` if it's pure orchestration). +3. Add a test in `tests/test_service.py`. +4. Add a thin MCP tool in `mcp_server.py` — `response_format` aware. +5. Add a thin CLI command in `cli.py` — `--format` aware. +6. Add the renderer in `formatting.py` if output is non-trivial. +7. Add tests in `tests/test_mcp_server.py` and `tests/test_cli.py`. +8. Update `PRD.md` and any affected `.github/instructions/*.md`. + +If steps 4 or 5 need more than ~20 lines, logic has leaked out of the service layer. Push it back down. + +--- + +## Clean code + +See `.github/instructions/clean-code.instructions.md`. Highlights: + +* Type hints everywhere. +* Functions stay small; one job per function. +* Names describe intent (`get_or_fetch_ad`, not `process`). +* Comments explain **why**, never **what** the code already says. +* DRY: if you write the same regex / SQL / format string twice, extract it. +* Errors fail loudly with actionable messages. No silent `except: pass`. +* No dead code, no commented-out blocks left in the tree. + +--- + +## Documentation lookups — use context7 + +When uncertain about a library's API (FastMCP decorators, Pydantic v2 validators, Typer command patterns, httpx async, msgpack, pytest-asyncio, respx, BeautifulSoup selectors, etc.), **use the `context7` MCP server**. Do not guess from training-data memory. + +Pattern (full details in `.github/instructions/docs.instructions.md`): + +1. `context7:resolve-library-id` with the library name → get the canonical ID. +2. `context7:query-docs` with that ID + a focused topic. + +Use context7 *before* writing the code, not after a test fails. If context7 returns nothing useful, search the library's official docs, then write the smallest possible spike to verify. + +--- + +## Safety and compliance + +* Private, low-frequency use only. +* Respect FINN / Eiendom.no rate limits and bot protection. +* Cache aggressively; never bulk-harvest. +* stdio MCP servers log to **stderr only** — anything on stdout breaks the JSON-RPC frame. +* Scores and estimates are decision support, never legal / technical / financial advice. + +--- + +## Implementation order (Phase 2) + +Follow `PRD.md` §29 step-by-step. Each step is independently mergeable: + +1. Switch dev workflow to local venv + update instruction files (this change). +2. Pydantic v2 cleanup. +3. Service layer + tests. +4. Formatting layer + tests. +5. HTTP retry on 5xx + tests. +6. Replace FastAPI with FastMCP stdio server. +7. CLI with typer. +8. Diff workflow. +9. Compare workflow. +10. Similar-to-liked. +11. Architecture tests. +12. README + Claude Desktop config. \ No newline at end of file diff --git a/IMPLEMENTATION.md b/IMPLEMENTATION.md new file mode 100644 index 0000000..ab35d52 --- /dev/null +++ b/IMPLEMENTATION.md @@ -0,0 +1,384 @@ +# IMPLEMENTATION.md — Phase 2 build runbook + +How to drive Phase 2 (the 12 steps in `PRD.md` §29) to completion using an AI agent. Each step has its own kickoff prompt, files affected, and "done" criteria. Run them in order. Each step is independently mergeable. + +--- + +## 0. Pre-flight + +Before starting step 1: + +1. ls -la + +2. **Venv is healthy.** From the project root: + + ```bash + source .venv/bin/activate + pytest -x # green except for any pre-existing FastMCP-related skips + ruff check . # zero issues + ``` + +3. **Docs are in place.** Re-confirm `PRD.md` §17 (code ownership) is current — every step below references it. + +If any of these fail, stop and fix before proceeding. + +--- + +## How to use this runbook + +For each step: + +1. Create a feature branch: `git checkout -b feat/phase2-step--` off `chore/cleanup-phase-2-prep`. +2. Open a fresh agent chat with repo access. Paste the kickoff prompt verbatim. +3. Let the agent propose, implement, and test. Push back where it skips tests or violates §17. +4. When all "done" boxes are checked, merge into `chore/cleanup-phase-2-prep`. +5. Move to the next step. + +Each kickoff prompt assumes the agent reads PRD.md, AGENTS.md, and the relevant instruction files first — that's encoded in the prompt. + +After step 12, merge `chore/cleanup-phase-2-prep` into `main`. + +--- + +## Step 1 — Dev workflow already switched to local venv + +This step is **done** by the time `CLEANUP.md` is merged. The instruction files and `AGENTS.md` already use local venv. Sanity check: + +```bash +source .venv/bin/activate +which finn-eiendom 2>/dev/null || echo "expected: not yet installed; entry points come in steps 6 and 7" +ruff check . # zero issues +pytest -x # green (allow mcp_server failures) +``` + +Move on. + +--- + +## Step 2 — Pydantic v2 cleanup + +### Kickoff prompt + +> Read **PRD.md** (especially §17 code ownership and A8 acceptance criterion), **`.github/instructions/python.instructions.md`**, and **`.github/instructions/clean-code.instructions.md`**. +> +> Implement Phase 2 step 2: convert every Pydantic model in `finn_eiendom/models.py` from v1 (`class Config:`) to v2 (`model_config = ConfigDict(...)`). Use `context7:query-docs` on `pydantic/pydantic` if you're not sure of the v2 syntax — don't guess. +> +> Add `tests/test_models.py` with a JSON roundtrip test per model. +> +> Run `ruff check .`, `ruff format .`, and `pytest tests/test_models.py -v` before declaring done. + +### Files + +* `finn_eiendom/models.py` (edit) +* `tests/test_models.py` (new) + +### Done when + +* `grep -rn "class Config:" finn_eiendom/` produces zero output. +* `pytest tests/test_models.py` is green. +* Existing tests still pass. + +--- + +## Step 3 — Service layer + +### Kickoff prompt + +> Read **PRD.md** §16 (Service layer) and §17 (code ownership), **`.github/instructions/python.instructions.md`** and **`.github/instructions/clean-code.instructions.md`**. +> +> Create `finn_eiendom/service.py` with the public surface listed in PRD §16: `get_or_fetch_ad`, `get_or_fetch_eiendom_unit`, `get_or_fetch_similar_units`, `analyze_search`, `analyze_ad`, `analyze_ad_against_comps`, `find_similar_to_liked`, `compare_ads`, `resolve_eiendom_unit_from_finn_url`, `build_unit_vector_for_unit_code`, `decode_unit_vector_to_dict`, `save_feedback`, `get_shortlist`, `get_new_ads_since_last_run`. +> +> Each function: +> 1. Opens its own SQLite connection via `cache.init_db(FINN_CACHE_PATH)`. +> 2. Reads cache first with TTLs from `config.py`. +> 3. On miss or `force_refresh=True`, calls the fetcher in `ad.py` / `eiendom_no.py`. +> 4. Writes the fresh result back. +> 5. Returns a typed model or dict. +> +> Do not duplicate behavior from `analysis.py` — delegate to it. Add `tests/test_service.py` covering the five service tests listed in PRD §25.2. + +### Files + +* `finn_eiendom/service.py` (new) +* `tests/test_service.py` (new) +* `tests/conftest.py` (may need a `tmp_db` fixture if it doesn't exist) + +### Done when + +* `pytest tests/test_service.py` is green. +* `service.py` imports only from `models`, `config`, `cache`, `analysis`, `ad`, `eiendom_no`, `feedback`, `scoring`, stdlib. +* No `import httpx` or `import sqlite3` outside their owners. + +--- + +## Step 4 — Formatting layer + +### Kickoff prompt + +> Read **PRD.md** §17.6 (shared formatting module) and §19 (output formats), **`.github/instructions/clean-code.instructions.md`**. +> +> Create `finn_eiendom/formatting.py` with these renderers (signatures in PRD §17.6): `render_ad`, `render_shortlist`, `render_comparison`, `render_diff`, `render_similar_units`, `render_unit`, `render_score_breakdown`, plus `render_cache_stats` for the CLI cache subcommand. +> +> Each renderer accepts `(payload, fmt: Literal["json","markdown","table"]) -> str`. Unsupported formats raise `ValueError` listing supported options. Table rendering only applies where it makes sense (shortlist, comparison, diff, similar-units). +> +> Add `tests/test_formatting.py` covering the three tests listed in PRD §25.5. + +### Files + +* `finn_eiendom/formatting.py` (new) +* `tests/test_formatting.py` (new) + +### Done when + +* `pytest tests/test_formatting.py` is green. +* `render_*` is the *only* place that formats output. No inline rendering anywhere else (verified by reading diffs of steps 6 and 7). + +--- + +## Step 5 — HTTP retry on 5xx + +### Kickoff prompt + +> Read **PRD.md** A9 (acceptance criterion), **`.github/instructions/python.instructions.md`**. +> +> Extend `HTTPClient.get()` in `finn_eiendom/http.py` to retry on 5xx responses (500/502/503/504) with exponential backoff `1s, 2s, 4s`, up to `retries` attempts (default 3). Surface 4xx as `httpx.HTTPStatusError` immediately. Apply the existing `request_delay` between any two calls. +> +> If you're unsure about `httpx` retry semantics or `respx` test patterns, use `context7`. +> +> Add `tests/test_http.py` covering the three tests listed in PRD §25.6 using `respx`. + +### Files + +* `finn_eiendom/http.py` (edit) +* `tests/test_http.py` (new) + +### Done when + +* `pytest tests/test_http.py` is green. +* `httpx` imports remain confined to `http.py`. + +--- + +## Step 6 — Replace FastAPI with FastMCP + +### Kickoff prompt + +> Read **PRD.md** §14 (MCP design — every tool and input schema), §17 (code ownership), and **`.github/instructions/mcp.instructions.md`** end-to-end. +> +> Rewrite `finn_eiendom/mcp_server.py` from scratch: +> - Use `from mcp.server.fastmcp import FastMCP`. +> - Configure stderr-only logging. +> - Register all 14 tools listed in PRD §14.1 with the `finn_` prefix. +> - Each tool body has the shape in `mcp.instructions.md` §"Tool body shape": one `service.` call, one `formatting.render_*` call, try/except returning the JSON error envelope. +> - Input schemas as in PRD §14.2. +> - Annotations: `readOnlyHint=True` for all except `finn_save_feedback`. +> - `main()` calls `mcp.run(transport="stdio")`. +> - Add `finn-eiendom-mcp = "finn_eiendom.mcp_server:main"` to `[project.scripts]` in `pyproject.toml`. +> +> If unsure about FastMCP annotations or transport options, use `context7:query-docs` on the MCP Python SDK. +> +> Rewrite `tests/test_mcp_server.py` to cover the three tests in PRD §25.3. Use the SDK's testing helpers — do not spawn a subprocess. +> +> Verify: `finn-eiendom-mcp` boots over stdio, `mcp dev finn_eiendom/mcp_server.py` lists all 14 tools. + +### Files + +* `finn_eiendom/mcp_server.py` (full rewrite) +* `tests/test_mcp_server.py` (full rewrite) +* `pyproject.toml` (edit `[project.scripts]`) + +### Done when + +* `mcp_server.py` imports only `service`, `formatting`, `models`, `config`, stdlib, `mcp`, `pydantic`. +* All 14 tools registered. +* `pytest tests/test_mcp_server.py` is green. +* `grep -rn "FastAPI" finn_eiendom/` is empty. + +--- + +## Step 7 — CLI + +### Kickoff prompt + +> Read **PRD.md** §15 (CLI design — every command and option) and **`.github/instructions/cli.instructions.md`** end-to-end. +> +> Create `finn_eiendom/cli.py` with a `typer.Typer` app exposing all commands in PRD §15.1, plus `finn_eiendom/__main__.py` that calls the app. Add to `pyproject.toml`: +> ``` +> [project.scripts] +> finn-eiendom = "finn_eiendom.cli:app" +> ``` +> +> Each command: +> - Translates options into a `service.` call. +> - Calls `formatting.render_*(result, format)` and `typer.echo(...)`. +> - No business logic, no inline rendering. +> - Body under ~20 lines. +> +> Sub-app for `cache` (stats/clear/clear-html/clear-json) and `config` (show/path). `serve` accepts `--transport stdio|http` and dispatches to `mcp_server.main()` or the HTTP transport. +> +> If unsure about Typer sub-apps or `CliRunner`, use `context7`. +> +> Add `tests/test_cli.py` covering the five tests in PRD §25.4 using `typer.testing.CliRunner`. Mock `service.*` with `monkeypatch` — do not exercise the full stack here, that's `test_service.py`. + +### Files + +* `finn_eiendom/cli.py` (new) +* `finn_eiendom/__main__.py` (new) +* `tests/test_cli.py` (new) +* `pyproject.toml` (edit) + +### Done when + +* `finn-eiendom --help` lists every command in PRD §15.1. +* `cli.py` imports only `service`, `formatting`, `models`, `config`, stdlib, `typer`. +* `pytest tests/test_cli.py` is green. + +--- + +## Step 8 — Diff workflow (new / removed / changed) + +### Kickoff prompt + +> Read **PRD.md** §10.8, §13 (search_runs table), workflow I in §18, and **`.github/instructions/clean-code.instructions.md`**. +> +> Implement: +> 1. `search_runs` and `scores` tables in `cache.py` (use existing migration pattern). +> 2. `service.get_new_ads_since_last_run(search_url)` that compares against the previous run for the same `normalized_url` and returns `{new_ads, removed_ads, changed_ads}` with price/common_costs/status diffs on changed. +> 3. `finn_get_new_ads_since_last_run` MCP tool. +> 4. `finn-eiendom diff ` CLI command. +> 5. `formatting.render_diff(result, fmt)`. +> +> Add tests covering: empty previous-run case, all-new case, mixed new+removed+changed case. + +### Done when + +* The three new tests pass. +* MCP and CLI both expose the same behavior with identical defaults. + +--- + +## Step 9 — Compare workflow + +### Kickoff prompt + +> Read **PRD.md** workflow K in §18 and §14.2 (`CompareAdsInput`). +> +> Implement `service.compare_ads(finnkoder, include_eiendom_no=True, include_comps=True)` returning a comparison table + winners by category (best value / lifestyle / hybel / bargain / safest / highest risk / most overpriced). +> +> Wire `finn_compare_ads` MCP tool and `finn-eiendom compare ` CLI command. Add `formatting.render_comparison`. Tests for service and CLI. + +### Done when + +* `finn-eiendom compare 462400360 461153194 --format markdown` produces a readable comparison. +* Service test covers the winners-by-category logic. + +--- + +## Step 10 — Similar-to-liked + +### Kickoff prompt + +> Read **PRD.md** workflow G in §18 and `FindSimilarToLikedInput` in §14.2. +> +> Implement `service.find_similar_to_liked(finnkode, mode, listing_status)`: +> 1. Load FinnAd; verify `feedback` has `verdict=liked` for this finnkode. +> 2. Ensure Eiendom.no enrichment + unit_vector exist. +> 3. Fetch similar-units (prefer `FOR_SALE` for recommendations, `RECENTLY_SOLD` for comps). +> 4. Score candidates against user preferences. +> 5. Return ranked recommendations. +> +> Wire MCP tool and CLI command. Tests covering: no liked feedback raises clear error; happy path returns ranked list. + +### Done when + +* `finn-eiendom similar-to-liked 462400360` returns ranked candidates when the listing has a liked verdict, and a clear error otherwise. + +--- + +## Step 11 — Architecture tests + +### Kickoff prompt + +> Read **PRD.md** A10 (architecture acceptance criterion) and §17.3 (layering invariants). +> +> Create `tests/test_architecture.py` that walks every `.py` file under `finn_eiendom/` with `ast`, collects all `import` and `from X import Y` statements, and asserts the layering invariants in PRD A10: +> - No `httpx` outside `http.py`. +> - No `sqlite3` outside `cache.py`. +> - No `BeautifulSoup` outside `search.py` / `ad.py`. +> - No `msgpack` outside `eiendom_no.py`. +> - `mcp_server.py` and `cli.py` import only from the allowed set. +> - `service.py` never imports `mcp_server` or `cli`. +> +> Add a parametrize'd test per invariant so failures show which module violated which rule. Failures should print the offending import line and module. + +### Done when + +* `pytest tests/test_architecture.py` is green. +* Deliberately introducing a violation (e.g. `import httpx` in `service.py`) makes a test fail with a clear message. + +--- + +## Step 12 — README + Claude Desktop config + final verification + +### Kickoff prompt + +> Read **PRD.md** §21 (deployment), §22 (MVP scope), §24 (all acceptance criteria), **README.md** and **USAGE.md**. +> +> Update `README.md` and `USAGE.md` so every command, env var, and Claude Desktop snippet matches what was actually built in steps 1–11. Verify with the user's exact paths. +> +> Run the full A1–A11 acceptance check: +> +> - A1: `finn-eiendom-mcp` boots over stdio; `mcp dev finn_eiendom/mcp_server.py` lists all 14 tools. +> - A2: `finn-eiendom --help` lists every §15.1 command; each command runs against fixtures. +> - A3 – A9: matching service tests pass. +> - A10: `pytest tests/test_architecture.py` is green. +> - A11: `ruff check .` is clean; `pytest` is fully green; `mypy --strict finn_eiendom` passes or is documented as a gap. +> +> Report any failures with specific file/line references — don't paper over them. + +### Files + +* `README.md` (edit to match reality) +* `USAGE.md` (edit to match reality) + +### Done when + +* All 11 acceptance criteria in PRD §24 pass. +* README + USAGE quickstart examples actually work end-to-end on a fresh clone. + +--- + +## Definition of done for the whole phase + +Merge `chore/cleanup-phase-2-prep` into `main` when **every** box is checked: + +* [ ] All 12 steps merged in order. +* [ ] `finn-eiendom-mcp` boots over stdio with all 14 tools. +* [ ] `finn-eiendom --help` lists every command in PRD §15.1. +* [ ] `pytest` is green, including the new `test_service.py`, `test_cli.py`, `test_http.py`, `test_formatting.py`, `test_models.py`, `test_architecture.py`. +* [ ] `ruff check .` is clean. +* [ ] `mypy --strict finn_eiendom` passes or has a documented exception list. +* [ ] `README.md` and `USAGE.md` quickstart examples work on a fresh clone in under 5 minutes. +* [ ] Claude Desktop config in USAGE.md is verified to work against your installation. + +--- + +## When a step blocks + +If a step blocks on an unclear requirement: + +1. Re-read the relevant PRD section. +2. Check `PRD.md` §28 (open questions) — the answer may be a deferred decision. +3. If still unclear, write the question down, pick the simplest interpretation, mark it `# TODO(): revisit ` in code, and move on. + +If a step blocks on a library question (FastMCP, Pydantic v2, Typer, httpx, msgpack, respx): + +1. Use `context7` — see `.github/instructions/docs.instructions.md`. +2. If context7 returns nothing useful, write the smallest possible spike in `scratch/` (gitignored) to verify behavior. + +If a step blocks on §17 (code ownership) — i.e. it feels like the right answer requires putting logic in the "wrong" place: + +1. Stop. +2. Re-read PRD §17.2 (decision table) and §17.3 (layering invariants). +3. Ask whether the service layer is actually missing a function. Usually it is. +4. Add the missing service function instead of bending the layering. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a1c0a36 --- /dev/null +++ b/Makefile @@ -0,0 +1,47 @@ +.PHONY: help venv install dev test test-fast lint format typecheck check clean serve mcp doctor + +PYTHON ?= python3.12 +VENV ?= .venv +BIN = $(VENV)/bin + +help: ## Show this help + @grep -E '^[a-zA-Z_-]+:.*?## ' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-12s\033[0m %s\n", $$1, $$2}' + +venv: ## Create the local virtualenv + uv venv $(VENV) 2>/dev/null || $(PYTHON) -m venv $(VENV) + @echo "Activate with: source $(BIN)/activate" + +install: venv ## Install the package (editable) with dev extras + uv pip install --python $(BIN)/python -e ".[dev]" 2>/dev/null || $(BIN)/pip install -e ".[dev]" + +dev: install ## Alias for install + +test: ## Run the full test suite + $(BIN)/pytest + +test-fast: ## Run tests, fail fast, verbose + $(BIN)/pytest -x -v + +lint: ## Lint with ruff + $(BIN)/ruff check . + +format: ## Auto-format with ruff + $(BIN)/ruff format . + +typecheck: ## Static type-check with mypy + $(BIN)/mypy finn_eiendom + +check: lint typecheck test ## Run lint + typecheck + tests + +clean: ## Remove caches and build artifacts + rm -rf .pytest_cache .ruff_cache .mypy_cache build dist *.egg-info + find . -type d -name __pycache__ -prune -exec rm -rf {} + + +serve: ## Start the MCP server over HTTP on port 8010 + $(BIN)/finn-eiendom serve --transport http --port 8010 + +mcp: ## Start the MCP server over stdio + $(BIN)/finn-eiendom-mcp + +doctor: ## Smoke-check the install + $(BIN)/finn-eiendom doctor diff --git a/PRD.md b/PRD.md new file mode 100644 index 0000000..482c96e --- /dev/null +++ b/PRD.md @@ -0,0 +1,1556 @@ +# PRD: finn-eiendom-mcp — Personal Real Estate Scout + +> Private, self-hosted property analysis platform built around a FINN scraper, an Eiendom.no enrichment layer, a scoring engine, and a SQLite cache. Exposed through three coordinated entry points: a **Python library** (`finn_eiendom`), an **MCP server** (FastMCP, stdio + optional HTTP), and a **CLI** (`finn-eiendom`). The Python library is the source of truth — MCP and CLI are thin, parallel front ends over the same service layer. + +--- + +## 1. Summary + +`finn-eiendom-mcp` analyzes a FINN real-estate search URL and returns a ranked shortlist of properties enriched with Eiendom.no estimates, comparable recently-sold units, scoring, risk flags, and broker questions. The same domain code powers: + +1. **MCP tools** for Claude Desktop / AI clients / n8n / agents. +2. **A CLI** for terminal-driven manual analysis and shell scripting. +3. **A Python library** that tests and notebooks can call directly. + +```text +FINN search URL + → listings (search cards) + → FINN details + → Eiendom.no enrichment (unit search + unit detail) + → unit_vector (built locally) + → similar-units / comps + → scoring + categorization + → shortlist + risks + next steps + broker questions +``` + +This is a **private, low-frequency decision-support tool**. Not a SaaS, not a crawler, not a bidding tool, not legal/technical/financial advice. + +--- + +## 2. Why three entry points + +| Layer | Audience | Transport | Purpose | +| ---------------- | ------------------------------------- | -------------------- | ----------------------------------------------------------------------------------- | +| Python library | tests, notebooks, custom scripts | in-process | Source of truth. Pure functions + async I/O. No global state beyond SQLite path. | +| MCP server | Claude Desktop, n8n, AI agents | stdio + streamableHttp | LLM-driven analysis, shortlisting, broker prep. | +| CLI | terminal, cron, ad-hoc debugging | stdio | Quick checks, smoke tests, scripted runs, demonstrations of new behavior. | + +The architectural rule: **all three layers call the same service functions**. MCP tools and CLI commands are thin wrappers around `service.py`. If a change goes into one, equivalent behavior appears in the others. + +--- + +## 3. User context & preferences + +User and partner are searching for a home in the Oslo area, roughly 9–12 MNOK depending on total monthly cost, rental/hybel potential, and property quality. Important preferences: + +* Good location and quality of life. +* Enough space and strong floor plan. +* Minimum 2 bedrooms, preferably more. +* Balcony, terrace, views, sun, sea/nature proximity. +* Hybel/rental potential or flexible layout. +* Willing to renovate themselves if the price is right. +* Renovation need is **not** automatically negative. +* Strong interest in **bargain candidates** where competition may be lower due to older standard or poor presentation. +* Avoid uncontrolled technical/legal risk: moisture, rot, illegal hybel, unapproved changes, severe TG3, unclear housing-association finances. + +--- + +## 4. Problem + +FINN search results are not ranked by the user's actual decision criteria. Manually triaging dozens of listings is slow and inconsistent. The current process lacks: + +* Automated extraction of FINN search and listing data. +* Linking FINN listings to structured Eiendom.no units. +* Price evaluation against Eiendom.no estimates and comparable sales. +* Similar-property discovery from listings the user already likes. +* Consistent scoring of price, location, layout, risk, renovation upside, hybel potential. +* Local history of seen listings, changes, scores, and feedback. +* Integration with AI clients and shell tooling. + +--- + +## 5. Goals + +The system shall: + +1. Accept a FINN real estate search URL via library, MCP tool, or CLI command. +2. Parse FINN search pages and extract listing cards, URLs, and finnkoder. +3. Fetch FINN listing detail pages and parse into a structured `FinnAd`. +4. Normalize Norwegian numbers, areas, currencies, dates, URLs. +5. Resolve each FINN URL to an Eiendom.no `unitCode` and fetch the unit detail. +6. Build a base64url-encoded `unit_vector` from unit detail and fetch similar-units / comps. +7. Score each listing using FINN data, Eiendom.no estimates, comps, user preferences, and risk signals. +8. Return a ranked shortlist with reasons, risks, next steps, and broker questions. +9. Cache HTML, JSON, parsed ads, units, comps, scores, and feedback in SQLite. +10. Detect new/removed/changed listings between runs of the same search URL. +11. Store user feedback (`liked`, `rejected`, `interesting`, `risk`, `viewing_candidate`, etc.) and surface it in subsequent runs. +12. Expose all of the above through MCP tools, CLI commands, and Python functions with consistent semantics. +13. Run locally in a project-local virtualenv. Docker is supported but optional. + +--- + +## 6. Non-goals + +MVP shall not: + +* Crawl all of FINN or Eiendom.no. +* Bypass rate limits, bot protection, authentication, or access controls. +* Bulk-harvest or redistribute data. +* Contact brokers automatically. +* Place bids automatically. +* Interpret full PDF condition reports. +* Provide official valuation, legal advice, technical inspection, or mortgage advice. +* Expose a public SaaS service. +* Build a web UI. + +--- + +## 7. Primary use cases + +| ID | Use case | Description | +| ---- | ----------------------------- | ------------------------------------------------------------------------------------ | +| UC1 | Analyze FINN search | Paste a FINN search URL → ranked shortlist with reasons/risks/next steps. | +| UC2 | Find bargain candidates | Surface listings with renovation need or weak presentation that may be underpriced. | +| UC3 | Separate renovation from risk | Treat cosmetic renovation as upside; flag technical/legal risk. | +| UC4 | Compare listings | Side-by-side comparison of multiple finnkoder. | +| UC5 | Save feedback | Mark listings as liked, rejected, interesting, risk, viewing candidate, etc. | +| UC6 | Find new listings since last run | Show new/removed/changed listings vs the prior run of the same search URL. | +| UC7 | Broker questions | Generate concrete questions based on risks, deviations, hybel status, comps. | +| UC8 | Eiendom.no enrichment | Add estimates, coordinates, area, rooms, floor, year, market data. | +| UC9 | Price fairness | Classify price as cheap / fair / expensive vs estimate and comps. | +| UC10 | Similar to liked | Find properties similar to listings the user has explicitly liked. | +| UC11 | Comparable sales | Fetch similar recently sold units to support valuation and bargain scoring. | + +--- + +## 8. Inputs + +Supported inputs across all three layers: + +* FINN search URL. +* FINN listing URL. +* Finnkode (string of digits). +* List of finnkoder. +* Eiendom.no `unitCode`. +* Eiendom.no `unit_vector` (base64url string). +* User feedback verdict + notes. +* Optional scoring/preference overrides (JSON or env). + +Example FINN search URL: + +```text +https://www.finn.no/realestate/homes/search.html?bbox=...&area_from=60&min_bedrooms=2&price_collective_to=12000000&... +``` + +--- + +## 9. External endpoints + +### 9.1 FINN HTML + +Not JSON. Parse HTML, cache aggressively, run at low frequency. + +| Method | URL pattern | Purpose | +| ------ | ---------------------------------------------------------------------------------------- | ---------------------------------------------------------- | +| GET | `https://www.finn.no/realestate/homes/search.html?{query_params}` | Parse search result cards, listing URLs, finnkoder. | +| GET | `https://www.finn.no/realestate/homes/search.html?{query_params}&page={N}` | Pagination. | +| GET | `https://www.finn.no/realestate/homes/ad.html?finnkode={finnkode}` | Parse listing detail page. | +| GET | `{calendar_ics_url_from_listing_html}` | Optional: parse viewing times (prefer parsing from listing HTML first). | + +Important search params: `bbox`, `location`, `area_from`, `area_to`, `price_collective_to`, `price_collective_from`, `min_bedrooms`, `facilities`, `floor_navigator`, `lifecycle`, `page`, `stored-id`. + +### 9.2 Eiendom.no + +Real JSON API. Used for enrichment, valuation, and similar-units. + +#### 9.2.1 Resolve FINN listing → Eiendom.no unitCode + +``` +GET https://api.eiendom.no/api/v1/geodata/units/search/?search={url_encoded_finn_listing_url_or_address} +``` + +Returns: + +```json +{ + "units": [ + { + "unitCode": "c-gxw-xmyum-s2a", + "address": "Gunnar Schjelderups v. 11D H0502, Oslo", + "geometry": { "type": "Point", "coordinates": [10.77, 59.95] } + } + ], + "summary": { "totalUnitsFound": 1, "totalCitiesFound": 1 } +} +``` + +#### 9.2.2 Fetch unit detail + +``` +GET https://api.eiendom.no/api/v1/geodata/units/{unitCode}/ +``` + +Important response fields: `unitCode`, `address`, `unitName`, `streetAddress`, `postalName`, `registrationCode`, `geometry.coordinates`, `specification.{propertyType, floor, rooms, constructionYear, usableArea}`, `valuation.{estimatedSellingPrice, estimatedSellingPriceLower, estimatedSellingPriceUpper}`, `latestMarketData.{listingPrice, monthlyCosts, squareMeterPrice, daysOnMarket, saleStatus, marketPlacementScore}`. + +#### 9.2.3 Build `unit_vector` (local, not HTTP) + +Encoding step before similar-units. Generated from unit detail data: + +```json +{ + "lon": 10.7803, + "lat": 59.9287, + "ptype": "APARTMENT", + "floor": 8, + "rooms": 5, + "built": 2005, + "area": 80, + "price": 8491082 +} +``` + +Encoding: `unit_vector = base64url_without_padding(msgpack(payload))`. + +Library functions (in `eiendom_no.py` only): + +* `build_unit_vector(unit) -> str` +* `decode_unit_vector(unit_vector) -> dict` + +#### 9.2.4 Fetch similar-units + +``` +GET https://api.eiendom.no/api/v1/geodata/units/similar/?unit_vector={unit_vector} +``` + +Returns a list of comparable units with `unitCode`, `address`, `geometry`, `specification`, and `marketData.{listingPrice, jointDebt, monthlyCosts, sellingPrice, squareMeterPrice, daysOnMarket, saleStatus, finalizedAt}`. + +`listing_status` (RECENTLY_SOLD / FOR_SALE / CURRENT) is implemented as a **local filter** over the returned `marketData.saleStatus` and `finalizedAt`. Only pass it to the API if later experimentation confirms server-side support. + +### 9.3 Optional Hjemla (disabled by default) + +``` +GET https://consumer-service-hjemla-prod.propcloud.no/public/market/address-list +``` + +Params: `marketType`, `period`, `marketStates`, `unittypes`, bbox (`swLat`, `neLat`, `swLng`, `neLng`), `limit`, `randomize`. + +Useful for bbox-level market snapshots. Disabled in MVP via `HJEMLA_ENABLED=false`. + +### 9.4 MCP server endpoint + +stdio is the default. Optional Streamable HTTP on `POST http://{host}:8010/mcp`. Operational endpoints when running HTTP: `GET /health`, `GET /version`, `GET /debug/config`. + +--- + +## 10. Functional requirements + +### 10.1 FINN search extraction + +Fetch and parse FINN search pages. Extract and deduplicate by finnkode. Support pagination via `page=N` and respect `FINN_MAX_SEARCH_PAGES`. Search-card fields when available: finnkode, URL, title, address/area, area, asking_price, total_price, common_costs, ownership_type, property_type, bedrooms, floor, viewing time, broker. + +### 10.2 FINN listing detail extraction + +Fetch and parse individual listing pages. Fields when available: finnkode, URL, title, address, postal_area, district, property_type, ownership_type, asking_price, total_price, shared_debt, common_costs, fees, municipal_fees, BRA/BRA-i/BRA-e/BRA-b, P-room, rooms, bedrooms, floor, construction_year, energy_rating, heating, balcony/terrace, elevator, parking/garage, viewings, listing_description, broker_name, broker_company, document_links. + +### 10.3 Normalization + +* Norwegian formatted numbers: `7 200 991 kr` → `7200991`. +* Areas: `77 m²` → `77`. +* Dates/viewings → ISO 8601. +* URLs → absolute. +* Missing values → `null`. +* Finnkode and Eiendom.no unitCode as strings. + +### 10.4 Eiendom.no enrichment + +Enabled by default. Flow: FINN listing URL → unit search → `unitCode` → unit detail → structured market data. Store: unit_code, address, coordinates, registration code, property_type, floor, rooms, construction_year, usable_area, estimated_selling_price + lower/upper, latest market data (listing_price, sqm_price, monthly_costs, days_on_market, sale_status), market_placement, raw JSON. + +If enrichment fails, the analysis continues with FINN data only and marks enrichment as `unavailable`. + +### 10.5 Similar-units / `unit_vector` + +Required functions: `build_unit_vector(unit)`, `decode_unit_vector(unit_vector)`, `get_similar_units(unit_vector, listing_status)`. Supported listing statuses: `RECENTLY_SOLD` (default for comps), `FOR_SALE` (active recommendations), `CURRENT` (if confirmed). Similar-unit fields when available: unit_code, address, coordinates, property_type, floor, rooms, construction_year, area, listing_price, selling_price, shared_debt, common_costs, sqm_price, days_on_market, sale_status, finalized_at, raw JSON. + +### 10.6 Cache and history + +SQLite. Default TTLs: + +| Data | Default TTL | +| -------------------- | ----------------------: | +| Search results | 30–60 minutes | +| FINN listing details | 6–24 hours | +| Eiendom.no unit data | 24 hours | +| Similar-units | 24 hours | +| Feedback/history | Permanent until deleted | + +### 10.7 Feedback + +Verdict vocabulary: `liked`, `rejected`, `interesting`, `bargain_candidate`, `risk_object`, `viewing_candidate`, `viewed`, `too_expensive`, `too_small`, `too_far_out`, `too_high_risk`, `likes_location`, `likes_layout`, `dislikes_area`. Stored permanently. `liked` listings are used as seeds for similar-to-liked recommendations. Feedback can be used as a soft scoring signal. + +### 10.8 Diffs between runs + +For a normalized search URL, the system shall compare finnkoder against the previous run and report `new_ads`, `removed_ads`, and `changed_ads` (price, common costs, status). Optionally re-fetch only new or changed details. + +--- + +## 11. Scoring and classification + +### 11.1 Score model (clamped to 0–100) + +| Category | Range | +| ------------------------------------- | ----: | +| Economy / total cost | 0–20 | +| Eiendom.no estimate / market position | 0–20 | +| Comparable sales / similar-units | 0–20 | +| Location | 0–15 | +| Layout and potential | 0–20 | +| Outdoor space / view / sun | 0–15 | +| Hybel / rental potential | 0–10 | +| Renovation / bargain upside | 0–15 | +| Technical / legal risk | -20–0 | + +### 11.2 Categories + +`bargain_candidate`, `safe_candidate`, `lifestyle_candidate`, `hybel_candidate`, `renovation_candidate`, `similar_to_liked`, `comparable_sale_match`, `risk_object`, `too_expensive`, `not_interesting`, `manual_review_required`. + +### 11.3 Bargain candidate logic + +A listing may be a bargain candidate when several of these are true: low sqm price vs comps, listing price below estimate, price near lower estimate interval, sqm price below similar recently sold, older standard / renovation need / weak presentation, strong underlying location/layout, suitable size, risk appears controllable. + +### 11.4 Renovation logic + +Renovation need is not automatically negative. + +* **Opportunity:** older standard, modernization need, renovation object, cosmetic wear, outdated kitchen/surfaces, weak presentation, layout improvement potential. +* **Risk:** moisture, rot, mold, drainage issues, load-bearing concerns, illegal/unapproved changes, non-approved hybel, serious electrical/wet-room deviations, TG3 with high cost or safety implications. + +### 11.5 Hybel / rental logic + +* **Positive:** hybel, rental unit, separate entrance, extra bathroom/kitchenette, basement/sokkel, secondary section, stated rental income. +* **Risk:** not approved, not applied for, not building-reported, only "disposable room", not approved for permanent residence, board approval required. + +Output classifies as: documented legal hybel / possible hybel potential / unclear/risky hybel / not relevant. + +### 11.6 Market and comparable outputs + +Market estimate: `market_score`, `price_vs_estimate_pct`, `price_position` (`below_estimate` / `within_estimate_range` / `above_estimate` / `unknown`), `sqm_price_position` (`cheap` / `normal` / `expensive` / `unknown`). + +Comparable: `comparable_score`, `comps_count`, `avg_selling_price`, `median_selling_price` (where possible), `avg_sqm_price`, `sqm_price_delta_pct`, `price_delta_pct`, `confidence` (`low` / `medium` / `high`). + +Risk factors: too few comps, comps too far away, large differences in area/rooms/floor/year, old sale dates, low confidence. + +--- + +## 12. Technical architecture + +```text +AI client / Claude Desktop / n8n / agent ← MCP layer + ↓ + FastMCP (stdio | streamable HTTP) + +User in a terminal ← CLI layer + ↓ + finn-eiendom CLI (typer) + +Python tests / notebooks / custom scripts ← Library layer + ↓ + import finn_eiendom + + ──────── all three above share ──────── + + finn_eiendom.formatting ← render_* for json/markdown/table + ↓ + finn_eiendom.service ← orchestration: get_or_fetch, analyze_* + ↓ + finn_eiendom.analysis ← shortlist + summary building + ↓ + search / ad / eiendom_no / scoring / feedback + ↓ + finn_eiendom.cache (SQLite) ← html, json, ads, units, comps, scores, feedback + ↓ + finn_eiendom.http (httpx) ← delay, retry, user-agent + ↓ + FINN HTML + Eiendom.no JSON (+ optional Hjemla) +``` + +### 12.1 Module layout + +```text +finn_eiendom/ + __init__.py + config.py # env / defaults / TTLs + models.py # Pydantic v2 models + parser.py # number/area/date/URL/finnkode normalization + http.py # async HTTP with delay, retry, user-agent + cache.py # SQLite schema + persistence + search.py # FINN search HTML parsing + pagination + ad.py # FINN listing HTML parsing + eiendom_no.py # unit search/detail, unit_vector, similar-units + scoring.py # score model + classifications + feedback.py # verdicts + soft preference signal + analysis.py # orchestration + shortlist + summary + service.py # get_or_fetch_* + thin facade for MCP and CLI + formatting.py # render_* helpers shared by MCP and CLI + mcp_server.py # FastMCP wrappers around service + cli.py # typer-based CLI wrappers around service + __main__.py # python -m finn_eiendom → CLI entry +``` + +### 12.2 Layering rules + +* `mcp_server.py` and `cli.py` are **thin**. They translate inputs to service calls and format outputs via `formatting.py`. +* `service.py` orchestrates cache + fetch. Every read should consult the cache first; every fresh fetch should write back. +* `analysis.py` orchestrates the full shortlist run: search → details → enrichment → comps → scoring → summary. +* Domain modules (`search`, `ad`, `eiendom_no`, `scoring`, `feedback`) are pure or only depend on `http`/`cache`. +* No layer above the service may call `httpx` or `sqlite3` directly. + +--- + +## 13. Data model + +SQLite. Existing schema already implements `finn_ads`, `eiendom_units`, `similar_units`, and `cache_meta`. MVP additions: `search_runs`, `scores`, `feedback`. + +```sql +CREATE TABLE finn_ads ( + finnkode TEXT PRIMARY KEY, + url TEXT, + payload TEXT NOT NULL, -- JSON-serialized FinnAd + fetched_at TEXT NOT NULL +); + +CREATE TABLE eiendom_units ( + unit_code TEXT PRIMARY KEY, + payload TEXT NOT NULL, -- JSON-serialized EiendomUnit + fetched_at TEXT NOT NULL +); + +CREATE TABLE similar_units ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + unit_code TEXT NOT NULL, + listing_status TEXT NOT NULL, + payload TEXT NOT NULL, -- JSON array of SimilarUnit + fetched_at TEXT NOT NULL +); + +CREATE TABLE cache_meta ( + key TEXT PRIMARY KEY, -- e.g. search_page:{url}, search_cards:{url} + value TEXT NOT NULL, + expires_at TEXT +); + +CREATE TABLE search_runs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + search_url TEXT NOT NULL, + normalized_url TEXT NOT NULL, + created_at TEXT NOT NULL, + total_found INTEGER, + total_parsed INTEGER, + total_scored INTEGER, + result_json TEXT -- shortlist snapshot +); + +CREATE TABLE scores ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + finnkode TEXT NOT NULL, + search_run_id INTEGER, + total_score REAL, + economy REAL, + market_position REAL, + comparable_sales REAL, + location REAL, + layout REAL, + outdoor REAL, + rental_potential REAL, + renovation REAL, + risk REAL, + categories_json TEXT, + explanation_json TEXT, + created_at TEXT NOT NULL +); + +CREATE TABLE feedback ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + finnkode TEXT NOT NULL, + verdict TEXT NOT NULL, + notes TEXT, + created_at TEXT NOT NULL +); +``` + +--- + +## 14. MCP design + +### 14.1 Tools + +All tool names use the `finn_` prefix to avoid collisions when the server runs alongside others. + +| Tool | Purpose | Read-only | +| ------------------------------------- | ---------------------------------------------------------------- | :-------: | +| `finn_analyze_search` | Analyze a FINN search URL and return a ranked shortlist. | yes | +| `finn_get_ad` | Fetch structured data for one finnkode. | yes | +| `finn_compare_ads` | Compare multiple listings side by side. | yes | +| `finn_save_feedback` | Store feedback/verdict/notes. | no | +| `finn_get_shortlist` | Fetch stored shortlist from a search run. | yes | +| `finn_get_new_ads_since_last_run` | Detect new/removed/changed listings vs the previous run. | yes | +| `finn_resolve_eiendom_unit` | Map FINN URL → Eiendom.no `unitCode`. | yes | +| `finn_get_eiendom_unit` | Fetch Eiendom.no unit detail by `unitCode`. | yes | +| `finn_enrich_ad` | Combine FINN listing and Eiendom.no enrichment. | yes | +| `finn_build_unit_vector` | Build a base64url `unit_vector` from a `unitCode`. | yes | +| `finn_decode_unit_vector` | Decode a `unit_vector` for inspection/debugging. | yes | +| `finn_get_similar_units` | Fetch comps/recommendations from `unit_vector`. | yes | +| `finn_find_similar_to_liked_ad` | Find properties similar to a listing the user has liked. | yes | +| `finn_analyze_ad_against_comps` | Evaluate one listing against `RECENTLY_SOLD` comps. | yes | + +All read-only tools set `readOnlyHint=True, destructiveHint=False, openWorldHint=True`. `finn_save_feedback` sets `readOnlyHint=False, destructiveHint=False, idempotentHint=False`. + +### 14.2 Tool input schemas (Pydantic v2) + +```python +class AnalyzeSearchInput(BaseModel): + search_url: str = Field(..., description="Full FINN search URL") + max_pages: int = Field(default=3, ge=1, le=10) + detail_limit: int = Field(default=20, ge=1, le=100) + include_details: bool = True + include_eiendom_no: bool = True + include_similar_units_for_shortlist: bool = False + response_format: Literal["json", "markdown"] = "json" + +class GetAdInput(BaseModel): + finnkode: str = Field(..., pattern=r"^\d+$") + force_refresh: bool = False + include_eiendom_no: bool = True + include_similar_units: bool = False + +class ResolveUnitInput(BaseModel): + finn_url: str + +class GetUnitInput(BaseModel): + unit_code: str + force_refresh: bool = False + +class BuildUnitVectorInput(BaseModel): + unit_code: str + +class DecodeUnitVectorInput(BaseModel): + unit_vector: str + +class SimilarUnitsInput(BaseModel): + unit_vector: str + listing_status: Literal["RECENTLY_SOLD", "FOR_SALE", "CURRENT"] = "RECENTLY_SOLD" + force_refresh: bool = False + +class FindSimilarToLikedInput(BaseModel): + finnkode: str + mode: Literal["recommendations", "comps"] = "recommendations" + listing_status: Literal["RECENTLY_SOLD", "FOR_SALE", "CURRENT"] = "FOR_SALE" + +class AnalyzeAgainstCompsInput(BaseModel): + finnkode: str + listing_status: Literal["RECENTLY_SOLD"] = "RECENTLY_SOLD" + +class SaveFeedbackInput(BaseModel): + finnkode: str + verdict: str + notes: Optional[str] = None + +class CompareAdsInput(BaseModel): + finnkoder: List[str] = Field(..., min_length=2, max_length=10) + include_eiendom_no: bool = True + include_comps: bool = True +``` + +### 14.3 Tool response convention + +Every tool body wraps execution in try/except and returns a JSON string. Errors return: + +```python +return json.dumps({"error": True, "code": "", "message": str(e)}) +``` + +This keeps the protocol layer happy and lets the LLM react to recoverable failures. + +When `response_format="markdown"`, return human-readable formatted text instead of JSON — produced by `formatting.py`, never inline. + +### 14.4 Resources + +```text +finn://preferences/current +finn://search-runs/latest +finn://search-runs/{id} +finn://ads/{finnkode} +finn://ads/{finnkode}/enriched +finn://shortlist/latest +finn://feedback/{finnkode} +finn://eiendom-units/{unitCode} +finn://eiendom-units/{unitCode}/similar/{listingStatus} +``` + +### 14.5 Prompts + +* `evaluate_property_for_user` +* `compare_properties_for_user` +* `refine_search_from_feedback` +* `find_more_like_this` + +Evaluation prompt template output: category, score, short assessment, why interesting, Eiendom.no estimate, comparable sales, main risks, bargain potential, questions for broker, should we view it. + +### 14.6 Entry point + +```python +# finn_eiendom/mcp_server.py +from mcp.server.fastmcp import FastMCP +mcp = FastMCP("finn_eiendom_mcp") + +# ... tools defined here ... + +def main() -> None: + mcp.run(transport="stdio") + +if __name__ == "__main__": + main() +``` + +`pyproject.toml`: + +```toml +[project.scripts] +finn-eiendom-mcp = "finn_eiendom.mcp_server:main" +finn-eiendom = "finn_eiendom.cli:app" +``` + +--- + +## 15. CLI design + +Built with `typer`. Every command maps 1:1 to a service function — same parameters, same defaults, same outputs. + +### 15.1 Commands + +```text +finn-eiendom analyze-search [--max-pages 3] [--detail-limit 20] [--no-details] [--no-eiendom] [--with-similar] [--format json|markdown|table] +finn-eiendom get-ad [--force-refresh] [--no-eiendom] [--with-similar] [--format ...] +finn-eiendom compare [--no-eiendom] [--no-comps] [--format ...] +finn-eiendom save-feedback [--notes "..."] +finn-eiendom shortlist [--run-id ID] [--limit 10] [--format ...] +finn-eiendom diff [--format ...] ← new / removed / changed +finn-eiendom resolve-unit +finn-eiendom get-unit [--force-refresh] +finn-eiendom enrich-ad [--with-similar] +finn-eiendom build-vector +finn-eiendom decode-vector +finn-eiendom similar-units [--status RECENTLY_SOLD|FOR_SALE|CURRENT] +finn-eiendom similar-to-liked [--mode recommendations|comps] [--status ...] +finn-eiendom analyze-against-comps +finn-eiendom cache stats | clear | clear-html | clear-json +finn-eiendom serve [--transport stdio|http] [--host 127.0.0.1] [--port 8010] +finn-eiendom config show | path +finn-eiendom doctor ← run a few smoke checks: cache reachable, eiendom.no reachable, finn reachable +finn-eiendom version +``` + +### 15.2 Output formats + +* `--format json` — full structured output (default for piping into `jq`). +* `--format markdown` — same data, human-readable. +* `--format table` — concise terminal table (for `analyze-search`, `compare`, `shortlist`, `diff`). + +All three are produced by `finn_eiendom.formatting`. CLI never formats inline. + +### 15.3 Examples + +```bash +# Triage a search live +finn-eiendom analyze-search 'https://www.finn.no/realestate/homes/search.html?location=...' --format table + +# Drill into one listing +finn-eiendom get-ad 462400360 --format markdown + +# Compare two finalists +finn-eiendom compare 462400360 461153194 --format markdown + +# Mark a listing as liked, then ask for similar +finn-eiendom save-feedback 462400360 liked --notes "great layout, check fellesgjeld" +finn-eiendom similar-to-liked 462400360 + +# Operate the MCP server in HTTP mode for n8n +finn-eiendom serve --transport http --port 8010 +``` + +### 15.4 CLI implementation pattern + +```python +# finn_eiendom/cli.py +import asyncio, typer +from . import service, formatting + +app = typer.Typer(no_args_is_help=True, add_completion=False) + +@app.command() +def analyze_search( + url: str, + max_pages: int = 3, + detail_limit: int = 20, + no_details: bool = typer.Option(False, "--no-details"), + no_eiendom: bool = typer.Option(False, "--no-eiendom"), + with_similar: bool = typer.Option(False, "--with-similar"), + format: str = typer.Option("json", "--format"), +) -> None: + result = asyncio.run(service.analyze_search( + search_url=url, + max_pages=max_pages, + detail_limit=detail_limit, + include_details=not no_details, + include_eiendom_no=not no_eiendom, + include_similar_units_for_shortlist=with_similar, + )) + typer.echo(formatting.render_shortlist(result, format)) +``` + +CLI commands are wrappers — no business logic, no rendering. If you need to add behavior, it goes in `service.py` and gets a matching MCP tool. If you need to change rendering, edit `formatting.py`. + +--- + +## 16. Service layer + +The keystone of the architecture. + +```python +# finn_eiendom/service.py — public surface + +async def get_or_fetch_ad(finnkode: str, force_refresh: bool = False) -> FinnAd: ... +async def get_or_fetch_eiendom_unit(unit_code: str, force_refresh: bool = False) -> Optional[EiendomUnit]: ... +async def get_or_fetch_similar_units(unit_code: str, listing_status: str = "RECENTLY_SOLD", force_refresh: bool = False) -> list[SimilarUnit]: ... + +async def analyze_search(search_url: str, *, max_pages=3, detail_limit=20, include_details=True, include_eiendom_no=True, include_similar_units_for_shortlist=False) -> dict: ... +async def analyze_ad(finnkode: str, *, include_eiendom_no=True, include_similar_units=False) -> dict: ... +async def analyze_ad_against_comps(finnkode: str, listing_status: str = "RECENTLY_SOLD") -> dict: ... +async def find_similar_to_liked(finnkode: str, *, mode="recommendations", listing_status="FOR_SALE") -> dict: ... +async def compare_ads(finnkoder: list[str], *, include_eiendom_no=True, include_comps=True) -> dict: ... + +async def resolve_eiendom_unit_from_finn_url(finn_url: str) -> Optional[EiendomUnit]: ... +def build_unit_vector_for_unit_code(unit_code: str) -> dict: ... +def decode_unit_vector_to_dict(unit_vector: str) -> dict: ... + +def save_feedback(finnkode: str, verdict: str, notes: Optional[str] = None) -> dict: ... +def get_shortlist(run_id: Optional[int] = None, limit: int = 10) -> dict: ... +def get_new_ads_since_last_run(search_url: str) -> dict: ... +``` + +Every function: + +1. Opens its own SQLite connection via `cache.init_db(FINN_CACHE_PATH)`. +2. Reads from cache first, with TTLs from `config.py`. +3. On cache miss (or `force_refresh=True`), calls the relevant fetch function in `ad.py` / `eiendom_no.py`. +4. Writes the fresh result back to the cache. +5. Returns a typed model or dict, never `None` unexpectedly — failures raise with clear messages. + +--- + +## 17. Code ownership and anti-duplication + +This section is the constitution. Everything else flexes; this does not. The goal is one home for every piece of logic and one obvious answer to "where does this go?". + +### 17.1 The single-home rule + +Every piece of logic has exactly one home. If you're tempted to add it in two places, you're wrong about one of them — push it down a layer and call it from both. + +### 17.2 Decision table — "where does this go?" + +| Concern | Lives in | Never in | +| -------------------------------------------------- | --------------------------------- | -------------------------------------------------------------- | +| Parsing FINN search HTML | `search.py` | `mcp_server`, `cli`, `analysis`, `scripts` | +| Parsing FINN listing HTML | `ad.py` | `mcp_server`, `cli`, `analysis`, `scripts` | +| Norwegian number / date / URL / finnkode normalization | `parser.py` | inline anywhere — if you write a regex twice, extract it | +| HTTP requests, retry, delay, user-agent | `http.py` | `search` / `ad` / `eiendom_no` using `httpx` directly | +| SQLite reads/writes | `cache.py` | every other module — go through cache helpers | +| Eiendom.no unit search / unit detail | `eiendom_no.py` | `ad`, `search`, `analysis` (call eiendom_no, don't reimplement)| +| `unit_vector` encode / decode | `eiendom_no.py` | `mcp_server`, `cli` (call it; don't pack msgpack inline) | +| Similar-units fetching + local filtering | `eiendom_no.py` | `analysis`, `service` (call `get_similar_units`) | +| Score components | `scoring.py` | `analysis` (use `score_ad`), `mcp_server`, `cli` | +| Category assignment | `scoring.py` (`classify_ad`) | `analysis`, `mcp_server`, `cli` | +| Feedback storage + retrieval | `feedback.py` | `mcp_server`, `cli`, `analysis` | +| "Get from cache, else fetch, else save" | `service.py` (`get_or_fetch_*`) | `mcp_server`, `cli`, `analysis` (always go through service) | +| Shortlist + summary assembly | `analysis.py` | `mcp_server`, `cli` | +| End-to-end orchestration (search → shortlist) | `service.py` (`analyze_search`) | `mcp_server`, `cli` (they just call it) | +| MCP tool definitions + annotations | `mcp_server.py` | `service`, `cli` | +| MCP error wrapping `{"error": True, ...}` | `mcp_server.py` only | `service` (which raises), `cli` (which has its own exit codes) | +| CLI command definitions + Typer plumbing | `cli.py` | `service`, `mcp_server` | +| Output formatting (json / markdown / table) | `formatting.py` | inline in `mcp_server.py` or `cli.py` | +| Env-var defaults | `config.py` | hardcoded anywhere | +| Pydantic models | `models.py` | redefined locally; subclass only if needed | + +### 17.3 Layering invariants + +The dependency graph is acyclic and points downward: + +``` +cli.py ─┐ + ├──> service.py ──> analysis.py ──> search / ad / eiendom_no / scoring / feedback +mcp_server.py ─┘ │ + │ ├──> parser.py + │ └──> http.py / cache.py + └──> formatting.py +``` + +Hard rules: + +* `mcp_server.py` and `cli.py` are **siblings** and never call each other. +* Neither MCP nor CLI imports from `search`, `ad`, `eiendom_no`, `scoring`, `feedback`, `cache`, or `http`. They import from `service`, `models`, and `formatting` only. +* `service.py` does not import from `mcp_server` or `cli`. +* `analysis.py` does not open SQLite connections directly — it goes through `cache.py` functions. +* `search.py`, `ad.py`, `eiendom_no.py` do not open SQLite directly — they call cache helpers passed in or imported from `cache.py`. +* Nothing except `http.py` uses `httpx` directly. If `import httpx` appears anywhere else, move it. +* Nothing except `cache.py` uses `sqlite3` directly. +* Nothing except `parser.py` defines Norwegian-text regexes. + +### 17.4 Anti-duplication checklist + +Before merging any change, ask: + +1. Is this logic already implemented somewhere? (`grep` the function name and obvious keywords.) +2. If I'm copy-pasting from another file, am I about to duplicate behavior that should live in one shared function? +3. Can a new caller use an existing `service.py` function instead of writing its own orchestration? +4. Is the same Pydantic field defined in two models? If yes, factor out a base model. +5. Am I formatting output in two places (CLI + MCP)? Move it to `formatting.py`. +6. Am I opening a SQLite connection outside `cache.py`? Move it. +7. Am I building an httpx call outside `http.py`? Move it. +8. Am I writing a Norwegian-number / area / finnkode regex outside `parser.py`? Move it. +9. Am I adding an env-var lookup outside `config.py`? Move it. +10. Did I add a new behavior with only one front end (MCP or CLI)? If it should exist in both, the service function is missing. + +### 17.5 Examples — what NOT to do + +**Bad:** MCP tool reaches into `ad.py` directly. + +```python +# ❌ in mcp_server.py +from .ad import fetch_ad_details +@mcp.tool() +async def finn_get_ad(...): + ad = await fetch_ad_details(...) # bypasses cache! +``` + +**Good:** MCP tool goes through `service.py`. + +```python +# ✅ in mcp_server.py +from .service import get_or_fetch_ad +@mcp.tool() +async def finn_get_ad(...): + ad = await get_or_fetch_ad(finnkode, force_refresh=force_refresh) + return ad.model_dump_json() +``` + +**Bad:** CLI formats output inline that MCP also needs. + +```python +# ❌ in cli.py +def _render_shortlist_markdown(result): ... # 80 lines of formatting +# later in mcp_server.py, the same 80 lines copy-pasted +``` + +**Good:** Shared formatter. + +```python +# ✅ in finn_eiendom/formatting.py +def render_shortlist(result: dict, fmt: str) -> str: ... +# cli.py and mcp_server.py both call render_shortlist(result, fmt) +``` + +**Bad:** Service inlines parsing or HTTP. + +```python +# ❌ in service.py +async def get_or_fetch_ad(...): + html = await httpx.AsyncClient().get(url) # http belongs in http.py + soup = BeautifulSoup(html.text, "html.parser") # parsing belongs in ad.py +``` + +**Good:** Service delegates. + +```python +# ✅ in service.py +async def get_or_fetch_ad(finnkode, force_refresh=False): + conn = cache.init_db(FINN_CACHE_PATH) + if not force_refresh: + cached = cache.get_finn_ad(conn, finnkode, ttl_hours=FINN_CACHE_TTL_AD_HOURS) + if cached: + return cached + ad = await ad_module.fetch_ad_details(finnkode) + cache.save_finn_ad(conn, ad) + return ad +``` + +### 17.6 The shared `formatting.py` module + +Output formatting (JSON / markdown / table) is shared between CLI (`--format`) and MCP (`response_format`). Centralize all renderers here: + +```python +# finn_eiendom/formatting.py +def render_ad(ad: FinnAd, fmt: str) -> str: ... +def render_shortlist(result: dict, fmt: str) -> str: ... +def render_comparison(result: dict, fmt: str) -> str: ... +def render_diff(result: dict, fmt: str) -> str: ... +def render_similar_units(units: list[SimilarUnit], fmt: str) -> str: ... +def render_unit(unit: EiendomUnit, fmt: str) -> str: ... +def render_score_breakdown(scores: dict, fmt: str) -> str: ... +``` + +CLI and MCP both call these. Neither has its own renderer. `fmt` accepts `"json"`, `"markdown"`, `"table"` (only where table makes sense). Unsupported values raise `ValueError` with a list of supported formats. + +### 17.7 Adding a new feature — the checklist + +For any new tool / command / behavior: + +1. **Decide the home.** Use the table in §17.2. +2. **Write the service function** in `service.py` (or extend `analysis.py` if it's pure orchestration of existing services). +3. **Add a test** for the service function in `tests/test_service.py`. +4. **Add the MCP tool** in `mcp_server.py` — thin wrapper, `response_format` aware. +5. **Add the CLI command** in `cli.py` — thin wrapper, `--format` aware. +6. **Add formatter** in `formatting.py` if output is non-trivial. +7. **Add a test** for the MCP tool registration in `tests/test_mcp_server.py`. +8. **Add a test** for the CLI command in `tests/test_cli.py`. +9. **Update docs** — README and the relevant `.github/instructions/*.md` if new patterns are introduced. + +If step 4 or 5 needs more than ~20 lines, you've put logic in the wrong layer. Push it down. + +### 17.8 Acceptable duplication + +A few small repetitions are tolerated to keep boundaries clean: + +* Trivial `model_dump()` / `model_dump_json()` calls at MCP and CLI boundaries. +* `try/except → format error` blocks at each MCP tool (kept identical via a helper if it grows). +* Pydantic input schema declarations at each MCP tool (they document the tool). + +Anything beyond a handful of lines is duplication and goes into a helper. + +--- + +## 18. Workflows + +### A. Analyze FINN search + +``` +Input: FINN search URL +Steps: + 1. Normalize URL. + 2. Check search-page cache (TTL 60min). + 3. Fetch page 1, parse cards. + 4. If max_pages > 1, fetch page 2..N. + 5. Deduplicate by finnkode. + 6. Record a search_run. + 7. Pre-score from card data. + 8. Select top N for detail fetch. + 9. Run workflow B for each. + 10. Score + classify each. + 11. Sort by total score. + 12. Persist scores; persist shortlist snapshot. + 13. Return shortlist + summary. +``` + +### B. Fetch and parse FINN listing + +``` +Input: finnkode +Steps: + 1. Build https://www.finn.no/realestate/homes/ad.html?finnkode={n}. + 2. Check finn_ads cache (TTL 24h). + 3. Fetch HTML, parse with ad.scrape_ad(). + 4. Normalize numbers/areas/dates via parser.py. + 5. save_finn_ad(). +Output: FinnAd. +``` + +### C. Eiendom.no enrichment + +``` +Input: FINN listing URL or finnkode +Steps: + 1. Build full FINN URL. + 2. Cache check on unit search. + 3. eiendom_no.search_unit_from_finn_url(). + 4. Pick best match. + 5. Save unitCode on the ad. + 6. Cache check on unit detail. + 7. eiendom_no.get_unit(unitCode). + 8. save_eiendom_unit(). + 9. Compute FINN-vs-Eiendom.no mismatch warnings. +Output: EiendomUnit + mismatch list (or unavailable). +``` + +### D. Build unit_vector + +``` +Input: EiendomUnit +Steps: + 1. Extract lon/lat from geometry. + 2. propertyType → ptype. + 3. floor / rooms / constructionYear / usableArea. + 4. Choose price: listingPrice → estimatedSellingPrice → FINN total_price. + 5. msgpack.packb + urlsafe_b64encode (strip "="). + 6. Persist unit_vector on eiendom_units. +Output: unit_vector + payload. +``` + +### E. Fetch similar-units / comps + +``` +Input: unitCode, listing_status=RECENTLY_SOLD +Steps: + 1. Load EiendomUnit; ensure unit_vector exists. + 2. Cache check on similar_units. + 3. eiendom_no.get_similar_units(unit_vector). + 4. Normalize and filter locally: + RECENTLY_SOLD → saleStatus=SOLD and finalizedAt is set + FOR_SALE → saleStatus=FORSALE + 5. Compute summary: count, avg/median selling price, avg sqm price, avg DOM. + 6. save_similar_units(). +Output: similar_units[] + comps_summary + confidence. +``` + +### F. Score property + +``` +Input: FinnAd, EiendomUnit, similar_units, user_prefs, feedback +Steps: + 1. economy / market / comparable / location / layout / outdoor / hybel / renovation / risk. + 2. Clamp total to 0–100. + 3. Assign categories. + 4. Build explanation: why_interesting, risks, next_steps, broker_questions. +Output: scores dict + categories + summary. +``` + +### G. Find similar to liked + +``` +Input: finnkode with verdict=liked +Steps: + 1. Load FinnAd. + 2. Ensure Eiendom.no enrichment + unit_vector. + 3. Fetch similar-units (prefer FOR_SALE). + 4. Score candidates against user preferences. + 5. Return ranked recommendations. +``` + +### H. Analyze one listing against comps + +``` +Input: finnkode +Steps: + 1. workflow B → enrich (C) → comps (E, RECENTLY_SOLD). + 2. Compare listing price vs comp avg/median; sqm price vs comp avg. + 3. Compute confidence and classify cheap/fair/expensive. +Output: price_position, sqm_price_position, comparable_score, confidence, comps_summary, warnings. +``` + +### I. Detect new / removed / changed listings + +``` +Input: FINN search URL +Steps: + 1. workflow A (no detail fetch needed). + 2. Compare finnkoder against previous search_run for same normalized_url. + 3. For changed ads, diff price/common_costs/status. + 4. Optionally workflow B on new + changed only. +Output: new_ads[], removed_ads[], changed_ads[]. +``` + +### J. Feedback loop + +``` +Input: finnkode + verdict + notes +Steps: + 1. INSERT into feedback. + 2. Update ad status. + 3. If verdict=liked: mark as seed for similar-to-liked recommendations. + 4. If verdict=rejected: store rejection reason. + 5. Future analyses use feedback as a soft preference signal. +``` + +### K. Compare multiple listings + +``` +Input: finnkoder[] +Steps: + 1. workflow B + C for each. + 2. Optionally workflow E. + 3. Build comparison table. + 4. Identify winners by category: best value / lifestyle / hybel / bargain / safest / highest risk / most overpriced. +Output: comparison_table + winners_by_category + recommendation + risks + broker_questions. +``` + +--- + +## 19. Output formats + +### 19.1 Shortlist item + +```text +1. [Title/address] – Score 84/100 + Category: Bargain candidate + Price: 7,200,000 total / 77 m² / 93,500 NOK per m² + Eiendom.no: Estimate 7,650,000 / range 6,900,000–8,400,000 + Comps: 12 similar recently sold / avg 98,000 NOK per m² + + Why interesting: + - Good size for price. + - Balcony and view. + - Renovation need may reduce competition. + - Flexible layout. + - Price looks low vs estimate and comps. + + Risks: + - Check wet rooms in condition report. + - Common costs need review. + - Hybel potential is not documented. + - Comparable confidence is medium. + + Next steps: + - Open listing. + - Read condition report. + - Check FINN vs Eiendom.no mismatches. + - Ask broker about planned cost increases. + - Consider viewing. +``` + +### 19.2 Analysis summary + +```text +Analyzed 83 listings. +Fetched details for 20. +Eiendom.no-enriched 18. +Fetched similar-units for 7 shortlisted listings. +Shortlisted 8. + +Best bargain candidate: ... +Best safe candidate: ... +Best hybel candidate: ... +Best price vs estimate: ... +Best price vs comps: ... +Highest risk: ... +Most overpriced: ... +``` + +--- + +## 20. Configuration + +| Variable | Default | Purpose | +| ----------------------------------------- | -------------------------------: | -------------------------------- | +| `FINN_CACHE_PATH` | `data/finn.sqlite` | SQLite DB path | +| `FINN_MAX_SEARCH_PAGES` | `3` | Max search pages | +| `FINN_DETAIL_LIMIT` | `20` | Max detailed listings per run | +| `FINN_REQUEST_DELAY_SECONDS` | `2` | Delay between FINN requests | +| `FINN_USER_AGENT` | `personal-finn-eiendom-analyzer/0.1` | HTTP User-Agent | +| `FINN_CACHE_TTL_SEARCH_MINUTES` | `60` | Search cache TTL | +| `FINN_CACHE_TTL_AD_HOURS` | `24` | Listing cache TTL | +| `EIENDOM_NO_ENABLED` | `true` | Enable Eiendom.no enrichment | +| `EIENDOM_NO_BASE_URL` | `https://api.eiendom.no/api/v1` | API base URL | +| `EIENDOM_NO_CACHE_TTL_HOURS` | `24` | Unit/similar cache TTL | +| `EIENDOM_NO_REQUEST_DELAY_SECONDS` | `1` | Delay between Eiendom.no calls | +| `EIENDOM_NO_SIMILAR_UNITS_ENABLED` | `true` | Enable similar-units | +| `EIENDOM_NO_SIMILAR_UNITS_DEFAULT_STATUS` | `RECENTLY_SOLD` | Default comps status | +| `HJEMLA_ENABLED` | `false` | Enable optional Hjemla API | +| `LOG_LEVEL` | `INFO` | Logging level | +| `MCP_TRANSPORT` | `stdio` | `stdio` or `streamable_http` | +| `MCP_HTTP_HOST` | `127.0.0.1` | Streamable HTTP bind | +| `MCP_HTTP_PORT` | `8010` | Streamable HTTP port | + +--- + +## 21. Deployment + +The default runtime is a project-local virtualenv. Docker is supported but optional. + +### 21.1 Local install (default) + +```bash +# in the project root +uv venv # or: python3.12 -m venv .venv +source .venv/bin/activate +uv pip install -e ".[dev]" # or: pip install -e ".[dev]" + +# now available: +finn-eiendom --help # CLI +finn-eiendom-mcp # MCP server over stdio +finn-eiendom serve --transport http --port 8010 # MCP server over HTTP +pytest # tests +ruff check . # lint +``` + +For a global CLI install: + +```bash +uv tool install . +# or +pipx install . +``` + +### 21.2 Claude Desktop integration (stdio) + +`~/Library/Application Support/Claude/claude_desktop_config.json`: + +```json +{ + "mcpServers": { + "finn-eiendom": { + "command": "/Users/ole/code/finn-mcp/.venv/bin/finn-eiendom-mcp", + "args": [], + "env": { + "FINN_CACHE_PATH": "/Users/ole/code/finn-mcp/data/finn.sqlite", + "EIENDOM_NO_ENABLED": "true" + } + } + } +} +``` + +Or, with `uv` from the project root: + +```json +{ + "mcpServers": { + "finn-eiendom": { + "command": "uv", + "args": ["run", "finn-eiendom-mcp"], + "cwd": "/Users/ole/code/finn-mcp" + } + } +} +``` + +### 21.3 Docker Compose (optional) + +```yaml +services: + finn-eiendom-mcp: + build: . + container_name: finn-eiendom-mcp + restart: unless-stopped + ports: + - "8010:8010" + environment: + FINN_CACHE_PATH: /data/finn.sqlite + EIENDOM_NO_ENABLED: "true" + EIENDOM_NO_SIMILAR_UNITS_ENABLED: "true" + MCP_TRANSPORT: streamable_http + MCP_HTTP_HOST: 0.0.0.0 + MCP_HTTP_PORT: "8010" + volumes: + - ./data:/data + command: ["finn-eiendom", "serve", "--transport", "http", "--host", "0.0.0.0", "--port", "8010"] +``` + +### 21.4 Dockerfile + +```dockerfile +FROM python:3.12-slim +WORKDIR /app + +RUN apt-get update && apt-get install -y --no-install-recommends gcc \ + && rm -rf /var/lib/apt/lists/* + +COPY pyproject.toml . +COPY finn_eiendom ./finn_eiendom +RUN pip install --no-cache-dir . + +EXPOSE 8010 +CMD ["finn-eiendom-mcp"] +``` + +--- + +## 22. MVP scope + +### Must have + +* Local venv install (`uv venv` + `pip install -e .[dev]`). +* Python core package with all modules listed in §12.1. +* `service.py` with `get_or_fetch_*` helpers. +* `formatting.py` shared between CLI and MCP. +* SQLite cache/history (existing schema retained, `search_runs` + `scores` + `feedback` added). +* FastMCP server with all tools in §14.1 except `finn_compare_ads` (deferred to "should have"). +* CLI with all commands in §15.1 except `serve --transport http` and `cache clear-*` variants (deferred). +* FINN search + listing extraction. +* Eiendom.no enrichment enabled by default. +* `unit_vector` build + decode. +* Similar-units/comps with local filtering. +* Scoring on all nine components with category assignment. +* Feedback storage. +* Shortlist output with reasons, risks, next steps, broker questions. +* Pydantic v2 models with `model_config` (no v1 `Config`). +* HTTP retry on 5xx in addition to connection errors. +* MCP entry-point registered in `pyproject.toml`. +* README + `.github/instructions/*.md` describing the architecture and ownership rules. + +### Should have + +* Pagination. +* Price per m² across the board. +* Component score breakdown in output. +* Generated broker questions. +* `finn_get_new_ads_since_last_run` / `finn-eiendom diff`. +* `finn_compare_ads` / `finn-eiendom compare`. +* Feedback-based scoring adjustment. +* `finn_find_similar_to_liked_ad` / `finn-eiendom similar-to-liked`. +* CLI `--format markdown` + `--format table`. +* CLI `serve --transport http`. +* CLI `cache stats|clear|clear-html|clear-json`. + +### Later + +* Web UI / dashboard. +* n8n workflow templates. +* PDF condition-report analysis. +* Geocoding / travel-time / sun / noise overlays. +* Push notifications. +* Price-drop monitoring. +* LLM-based listing-text scoring. +* Optional Hjemla integration. + +--- + +## 23. Roadmap + +### Phase 0 — Spike (largely done) + +* Parse one FINN search result, extract finnkoder, parse 3–5 listings. +* Resolve FINN URL → Eiendom.no `unitCode`, fetch unit detail, generate `unit_vector`, fetch similar-units with `RECENTLY_SOLD`. + +### Phase 1 — Core MVP (mostly done) + +* Stable parser, SQLite cache, Eiendom.no enrichment, similar-units/comps, basic scoring. +* Fixture-based tests for parsers, cache, scoring. + +### Phase 2 — MCP / CLI MVP (this PRD) + +* Replace FastAPI with FastMCP stdio server. +* Add `service.py` and `formatting.py`. +* Add `cli.py` (typer) and `__main__.py`. +* Wire MCP tools and CLI commands into the service + formatting layers. +* Pydantic v2 `model_config` cleanup. +* HTTP retry on 5xx. +* New tests: `tests/test_service.py`, expanded `tests/test_mcp_server.py`, new `tests/test_cli.py`, new `tests/test_http.py`, new `tests/test_formatting.py`, new `tests/test_architecture.py`. +* Switch from Docker-only workflow to local venv as default; keep Docker as an optional packaging path. + +### Phase 3 — Personal scoring v2 + +* Tighter user-preference weights, stronger bargain/risk/hybel logic, better confidence handling, generated broker questions. + +### Phase 4 — Agent / workflow + +* Cron / scheduled runs, diff notifications, n8n templates, Slack/Discord output. + +### Phase 5 — Dashboard + +* React/TanStack UI for shortlist, feedback, comps, history. + +--- + +## 24. Acceptance criteria + +### A1. MCP server + +Given a fresh local venv install, `finn-eiendom-mcp` starts via `mcp.run(transport="stdio")` without error. Running `mcp dev finn_eiendom/mcp_server.py` shows all tools listed in §14.1. + +### A2. CLI + +Given `pip install -e .`, `finn-eiendom --help` lists every command in §15.1. Each command runs end-to-end against cached fixtures with no live network calls and produces JSON, markdown, or table output as requested via `formatting.py`. + +### A3. Search analysis + +Given a valid FINN search URL, `service.analyze_search()` returns a ranked shortlist sorted by total score, with at least the fields: `summary`, `shortlist`, `search_url`. Cards are deduplicated by finnkode. Identical reruns within the search-cache TTL are served from cache. + +### A4. Listing detail + +Given a valid finnkode, `service.get_or_fetch_ad()` returns a `FinnAd` with at least `finnkode`, `url`, `title`, `address`, `total_price`, `area_m2`, `listing_description`. Missing fields are `None`, not raised. Subsequent calls within the TTL hit the cache. + +### A5. Feedback + +Given a finnkode and verdict, `service.save_feedback()` writes a `feedback` row. `liked` verdicts are surfaced by `service.find_similar_to_liked()`. + +### A6. Eiendom.no enrichment + +Given a FINN listing URL, the system resolves a `unitCode`, fetches the unit detail, stores estimate / coordinates / area / rooms / year / market data, and uses them in scoring. Enrichment failures degrade gracefully — the `eiendom_unit` field is `None` in the result, no exception escapes the service. + +### A7. Similar-units + +Given a `unitCode`, the system builds (or loads) a cached `unit_vector`, calls similar-units with the requested `listing_status`, returns structured comps, caches the result, and emits a comps summary with count, average price, average sqm price. + +### A8. Pydantic v2 + +`FinnAd`, `EiendomUnit`, `SimilarUnit` use `model_config = ConfigDict(...)`. No `class Config:` blocks remain. + +### A9. HTTP retry + +`HTTPClient.get()` retries 5xx responses with exponential backoff (`1s, 2s, 4s`) up to `retries` attempts, and surfaces 4xx as `httpx.HTTPStatusError` immediately. + +### A10. No-duplication / architecture invariants + +A static check (`tests/test_architecture.py`) verifies: + +* No `import httpx` outside `finn_eiendom/http.py`. +* No `import sqlite3` outside `finn_eiendom/cache.py`. +* No `BeautifulSoup` import outside `finn_eiendom/search.py` or `finn_eiendom/ad.py`. +* No `msgpack` import outside `finn_eiendom/eiendom_no.py`. +* `mcp_server.py` only imports from `service`, `formatting`, `models`, `config`, and stdlib + `mcp`. +* `cli.py` only imports from `service`, `formatting`, `models`, `config`, and stdlib + `typer`. + +### A11. Tooling + +`ruff check .` returns zero issues. `pytest` passes. `mypy --strict finn_eiendom` passes (or is documented as a known gap). + +--- + +## 25. Test strategy + +### 25.1 Unit tests + +* `tests/test_parser.py` — number/date/URL/finnkode normalization. +* `tests/test_search.py` — FINN search HTML → cards. +* `tests/test_ad.py` — FINN listing HTML → FinnAd. +* `tests/test_eiendom_no.py` — unit search/detail/similar JSON parsers, `unit_vector` encode/decode. +* `tests/test_scoring.py` — all scoring components + classifier. +* `tests/test_cache.py` — read/write/TTL behavior. + +### 25.2 Service tests (new) + +* `tests/test_service.py` + * `test_get_or_fetch_ad_uses_cache` + * `test_get_or_fetch_ad_fetches_when_cache_miss` + * `test_get_or_fetch_ad_force_refresh` + * `test_analyze_search_with_fixtures` + * `test_find_similar_to_liked_uses_liked_feedback` + +### 25.3 MCP tests + +* `tests/test_mcp_server.py` + * `test_mcp_server_has_correct_tools` + * `test_finn_decode_unit_vector_returns_json` + * `test_finn_analyze_search_handles_error` + +### 25.4 CLI tests (new) + +Use Typer's `CliRunner`. + +* `tests/test_cli.py` + * `test_cli_help` + * `test_cli_analyze_search_table_format` + * `test_cli_get_ad_json_format` + * `test_cli_save_feedback_persists_row` + * `test_cli_decode_vector` + +### 25.5 Formatting tests (new) + +* `tests/test_formatting.py` + * `test_render_shortlist_json_roundtrips` + * `test_render_shortlist_markdown_contains_score` + * `test_render_unsupported_format_raises_valueerror` + +### 25.6 HTTP tests (new) + +Use `respx`. + +* `tests/test_http.py` + * `test_get_retries_on_500` + * `test_get_raises_on_404` + * `test_post_delay_applied` + +### 25.7 Architecture tests (new) + +* `tests/test_architecture.py` — static import-graph checks listed in A10. + +### 25.8 Manual / smoke tests + +* `finn-eiendom doctor` runs. +* Real FINN URL run; compare top-3 with manual judgment. +* Save 5 feedback rows; rerun; verify scoring shift. +* Mark one ad liked; run `similar-to-liked`; sanity-check candidates. + +--- + +## 26. Logging, safety, compliance + +Log: start/end of analysis, pages/listings/details fetched, Eiendom.no enrichments attempted/found/failed, similar-units attempted/found/failed, cache hits/misses, parse errors, request errors, debug-level scoring details. + +Safety / compliance: + +* Private, low-frequency, user-triggered use only. +* Configurable request delays and User-Agent. +* Cache aggressively to minimize requests. +* No public redistribution of FINN/Eiendom.no data. +* No public exposure without auth — prefer LAN / Tailscale / reverse proxy. +* Scores, estimates, and comps are decision support, not official valuation, legal, or technical advice. +* stdio MCP servers must log to **stderr only** (`logging.basicConfig(stream=sys.stderr, ...)`). + +--- + +## 27. Risks & mitigations + +| Risk | Impact | Mitigation | +| ------------------------------------ | ---------------------- | -------------------------------------------- | +| FINN HTML changes | Parser breaks | Fixture tests, resilient selectors | +| Eiendom.no API/JSON changes | Enrichment/comps break | JSON fixtures, graceful fallback | +| Unit-vector format changes | Similar-units breaks | Unit tests, fall back to cache, mark unavailable | +| Too many requests | Blocking / unwanted load | Delay, cache, low-frequency use | +| Bad scoring | Poor recommendations | Explain score and uncertainty | +| Legal/technical interpretation wrong | Bad decisions | Present as broker questions, not facts | +| User overtrusts score | Missed risks | Always show risks and next steps | +| Public MCP exposure | Misuse | LAN / Tailscale / auth-only | +| stdio server writes to stdout | Breaks JSON-RPC frame | Configure logging to stderr; architecture test| +| Duplication of logic | Drift between MCP/CLI/library | Code-ownership table + architecture tests | + +--- + +## 28. Open questions + +1. Should `service.py` open one shared `sqlite3.Connection` per process or one per call? (current code opens per call — fine but worth measuring.) +2. Store raw HTML permanently or only parsed output? Default: only parsed, raw HTML under TTL. +3. How aggressively to detail-fetch in `analyze_search`? Default: top 20 cards. +4. Hardcode scoring weights or expose via YAML / env? Default: hardcoded for MVP; YAML in Phase 3. +5. Should feedback affect scoring in MVP, or only be stored? Default: stored only; soft signal in Phase 3. +6. Multiple scoring profiles (lifestyle / bargain / hybel / safe)? Default: single profile in MVP. +7. Permanently store Eiendom.no data or TTL only? Default: TTL only; review later. +8. How to handle FINN-vs-Eiendom.no mismatches (area, price)? Default: store both, surface as warning, never silently overwrite. +9. Which `listing_status` values does similar-units accept server-side? Verify in spike before relying on it. +10. Should recommendations use only `liked` listings, or also high-scoring listings without feedback? Default: liked only. +11. Should `serve --transport http` ship in MVP? Default: yes for cron/n8n users; stdio still default for Claude Desktop. + +--- + +## 29. First implementation plan (Phase 2) + +Step by step, each step independently mergeable. + +1. **Switch dev workflow to local venv.** Update `AGENTS.md`, `copilot-instructions.md`, `python.instructions.md`, `tests.instructions.md`. Add `clean-code.instructions.md`, `cli.instructions.md`, and `docs.instructions.md`. +2. **Pydantic v2 cleanup** — replace `class Config` with `model_config = ConfigDict(...)` in `models.py`. Add roundtrip test. +3. **Service layer** — create `finn_eiendom/service.py` with `get_or_fetch_*` and orchestration helpers. Add `tests/test_service.py`. +4. **Formatting layer** — create `finn_eiendom/formatting.py` with all `render_*` helpers. Add `tests/test_formatting.py`. +5. **HTTP retry** — extend `HTTPClient.get()` with 5xx retry + exponential backoff. Add `tests/test_http.py`. +6. **Replace FastAPI with FastMCP** — rewrite `finn_eiendom/mcp_server.py` against `service.py` + `formatting.py`. Add stdio `main()`. Add `[project.scripts]` entry `finn-eiendom-mcp`. Expand `tests/test_mcp_server.py`. +7. **CLI** — create `finn_eiendom/cli.py` (typer) and `finn_eiendom/__main__.py`. Add `[project.scripts]` entry `finn-eiendom`. Add `tests/test_cli.py`. +8. **Diff workflow** — implement `search_runs` table + `service.get_new_ads_since_last_run` + matching MCP tool + CLI `diff` command. +9. **Compare workflow** — implement `service.compare_ads` + MCP tool + CLI `compare` command. +10. **Similar-to-liked** — implement `service.find_similar_to_liked` + MCP tool + CLI `similar-to-liked` command. +11. **Architecture tests** — `tests/test_architecture.py` enforcing A10. +12. **README + Claude Desktop config** — document install paths for both CLI and MCP using local venv. + +Definition of done for the whole phase: + +* [ ] `finn-eiendom-mcp` boots over stdio with all tools listed. +* [ ] `finn-eiendom --help` lists every command in §15.1. +* [ ] `pytest` is green, including new `test_service.py`, `test_cli.py`, `test_http.py`, `test_formatting.py`, `test_architecture.py`. +* [ ] `ruff check .` is clean. +* [ ] README documents Claude Desktop config and a CLI quickstart using local venv. +* [ ] All acceptance criteria in §24 pass. + +--- + +## 30. Final product statement + +> **Build a compact, private, self-hosted property analysis platform whose source of truth is a typed Python library, and whose user-facing surfaces are (a) an MCP server for LLM agents, (b) a CLI for terminals and cron, and (c) a Python API for tests and notebooks. All three share the same service layer, the same formatting layer, and the same SQLite cache.** + +The MVP does one thing well: + +> **FINN search in → relevant property candidates out, enriched with Eiendom.no estimates, similar-units, explanation, risk, and next steps.** \ No newline at end of file diff --git a/PROJECT.md b/PROJECT.md new file mode 100644 index 0000000..3b6f4b9 --- /dev/null +++ b/PROJECT.md @@ -0,0 +1,162 @@ +# PROJECT.md — module map + +The repo at a glance. For the why and the rules, read [`PRD.md`](PRD.md) §12 and §17. For the workflow, read [`AGENTS.md`](AGENTS.md). + +--- + +## Source tree + +``` +finn-mcp/ +├── pyproject.toml +├── Makefile +├── README.md ← user-facing overview +├── USAGE.md ← full user guide +├── PRD.md ← product spec + architecture (§17 = constitution) +├── PROJECT.md ← this file +├── AGENTS.md ← workflow for AI agents and contributors +├── CLEANUP.md ← pre-Phase-2 cleanup runbook +├── IMPLEMENTATION.md ← Phase 2 build runbook (12 steps) +│ +├── .github/ +│ ├── copilot-instructions.md +│ └── instructions/ +│ ├── python.instructions.md +│ ├── mcp.instructions.md +│ ├── cli.instructions.md +│ ├── tests.instructions.md +│ ├── clean-code.instructions.md +│ └── docs.instructions.md ← context7 lookup rules +│ +├── finn_eiendom/ ← the package +│ ├── __init__.py +│ ├── __main__.py ← python -m finn_eiendom → CLI +│ ├── config.py ← env vars, defaults, TTLs +│ ├── models.py ← Pydantic v2 models +│ ├── parser.py ← Norwegian number/area/URL/finnkode normalization +│ ├── http.py ← async httpx client w/ retry + delay +│ ├── cache.py ← SQLite schema + persistence +│ ├── search.py ← FINN search HTML parsing +│ ├── ad.py ← FINN listing HTML parsing +│ ├── eiendom_no.py ← Eiendom.no unit search/detail, unit_vector, comps +│ ├── scoring.py ← score model + classifications +│ ├── feedback.py ← verdicts + soft preference signal +│ ├── analysis.py ← shortlist + summary assembly +│ ├── service.py ← get_or_fetch_* + thin facade for MCP and CLI +│ ├── formatting.py ← render_* helpers (json/markdown/table) — shared by MCP and CLI +│ ├── mcp_server.py ← FastMCP wrappers around service.py +│ └── cli.py ← typer wrappers around service.py +│ +├── tests/ +│ ├── conftest.py +│ ├── fixtures.py +│ ├── fixtures/ ← HTML + JSON samples +│ ├── test_parser.py +│ ├── test_search.py +│ ├── test_ad.py +│ ├── test_eiendom_no.py +│ ├── test_scoring.py +│ ├── test_cache.py +│ ├── test_http.py ← retry + delay behavior +│ ├── test_service.py ← get_or_fetch_* + analyze_* +│ ├── test_formatting.py ← render_* roundtrips +│ ├── test_models.py ← Pydantic v2 roundtrips +│ ├── test_mcp_server.py ← tool registration + error envelope +│ ├── test_cli.py ← Typer CliRunner +│ └── test_architecture.py ← import-graph invariants (PRD A10) +│ +└── data/ ← gitignored; SQLite cache lives here + └── finn.sqlite +``` + +--- + +## Module responsibilities + +Single-home rule: every concern lives in exactly one module. See `PRD.md` §17.2 for the full table. + +| Module | Owns | Imports allowed | +| --------------- | --------------------------------------------------------------------- | ---------------------------------------------------------- | +| `config.py` | env-var loading, defaults, TTL constants | stdlib | +| `models.py` | Pydantic v2 models | stdlib, `pydantic` | +| `parser.py` | Norwegian text normalization (numbers, dates, URLs, finnkode) | stdlib | +| `http.py` | async `httpx.AsyncClient`, retry on 5xx, delay, user-agent | stdlib, `httpx` | +| `cache.py` | SQLite schema, reads, writes, TTL | stdlib, `sqlite3`, `models` | +| `search.py` | FINN search HTML → cards (BeautifulSoup) | stdlib, `bs4`, `parser`, `http`, `cache`, `models` | +| `ad.py` | FINN listing HTML → `FinnAd` (BeautifulSoup) | stdlib, `bs4`, `parser`, `http`, `cache`, `models` | +| `eiendom_no.py` | Eiendom.no unit search/detail, unit_vector, similar-units (msgpack) | stdlib, `msgpack`, `http`, `cache`, `models` | +| `scoring.py` | 9 score components, total clamping, category classifier | stdlib, `models` | +| `feedback.py` | feedback storage and retrieval | stdlib, `cache`, `models` | +| `analysis.py` | shortlist + summary assembly | stdlib, `search`, `ad`, `eiendom_no`, `scoring`, `feedback`| +| `service.py` | cache-aware orchestration; the only place that combines fetch + cache | stdlib, `config`, `cache`, `analysis`, `ad`, `eiendom_no`, `feedback`, `scoring`, `models` | +| `formatting.py` | render_* helpers (json/markdown/table) | stdlib, `models` | +| `mcp_server.py` | FastMCP tool definitions, error wrapping, stdio/HTTP entry | stdlib, `mcp`, `pydantic`, `service`, `formatting`, `config`, `models` | +| `cli.py` | typer command definitions, --format dispatch | stdlib, `typer`, `service`, `formatting`, `config`, `models` | + +`mcp_server.py` and `cli.py` are siblings — they never import each other. `service.py` never imports `mcp_server` or `cli`. `tests/test_architecture.py` enforces all of this. + +--- + +## Entry points + +Defined in `pyproject.toml`: + +```toml +[project.scripts] +finn-eiendom-mcp = "finn_eiendom.mcp_server:main" +finn-eiendom = "finn_eiendom.cli:app" +``` + +So you have: + +* `finn-eiendom-mcp` — MCP server over stdio (what Claude Desktop calls). +* `finn-eiendom` — CLI with all subcommands. +* `python -m finn_eiendom` — same as `finn-eiendom` (via `__main__.py`). +* `import finn_eiendom` — the library, for tests and notebooks. + +--- + +## Dependency graph + +``` + cli.py mcp_server.py + ↓ ↓ + └──> formatting.py <──┘ + │ + ↓ + service.py + ↓ + analysis.py + ↓ + ┌───────────┼──────────────┐ + ↓ ↓ ↓ + search.py ad.py eiendom_no.py scoring.py feedback.py + │ │ │ │ │ + ↓ ↓ ↓ ↓ ↓ + parser.py parser.py cache.py models.py cache.py + │ │ │ + ↓ ↓ ↓ + http.py http.py http.py +``` + +Bottom layer: `parser.py`, `http.py`, `cache.py`, `models.py`, `config.py`. They depend only on stdlib + one third-party library each. + +The graph is acyclic and points downward. Every arrow can be drawn; no arrow can be drawn upward. + +--- + +## Where to add things + +| You want to… | Add it to… | +| ----------------------------------------- | --------------------------------------- | +| Parse a new FINN field | `ad.py` or `search.py` + `models.py` | +| Add a new score component | `scoring.py` | +| Add a new env var | `config.py` | +| Add a new MCP tool | `mcp_server.py` (after `service.py`) | +| Add a new CLI command | `cli.py` (after `service.py`) | +| Change how something renders | `formatting.py` | +| Add a new orchestration / workflow | `service.py` (then add MCP + CLI) | +| Speak to a new external API | new module next to `eiendom_no.py` | +| Add a new SQLite table | `cache.py` | + +For anything else — read `PRD.md` §17.2 and §17.7. diff --git a/README.md b/README.md new file mode 100644 index 0000000..65fc26d --- /dev/null +++ b/README.md @@ -0,0 +1,160 @@ +# finn-eiendom-mcp + +> **Private, self-hosted property analysis platform for Norwegian real estate.** Analyzes FINN listings, enriches with Eiendom.no estimates, scores against personal preferences, and surfaces bargain candidates, hybel potential, renovation upside, and risk flags. Exposed through an MCP server, a CLI, and a Python library — all sharing one service layer. + +This is a **personal tool**. Not a SaaS, not a crawler, not legal/financial advice. Run locally, low frequency, your own data. + +--- + +## What it does + +``` +FINN search URL → ranked shortlist of homes + with reasons, risks, comps, broker questions +``` + +Specifically: + +* Parses FINN search and listing pages. +* Resolves each listing to an Eiendom.no `unitCode` for valuation and similar-units. +* Builds a `unit_vector` and fetches recently-sold comparables. +* Scores 9 components (economy, market position, comps, location, layout, outdoor, hybel, renovation, risk). +* Classifies as *bargain*, *safe*, *hybel*, *renovation*, *lifestyle*, or *risk*. +* Caches everything in SQLite; remembers what you've liked or rejected. +* Detects new / removed / changed listings between runs. + +--- + +## Three ways to use it + +| Surface | When you want… | Entry point | +| --------------- | -------------------------------------------------------------- | ----------------------- | +| **CLI** | Quick triage in a terminal, scripting, cron | `finn-eiendom ...` | +| **MCP server** | Claude Desktop, n8n, AI agents — conversational analysis | `finn-eiendom-mcp` | +| **Python lib** | Tests, notebooks, custom scripts | `import finn_eiendom` | + +All three call the same underlying `service.py` — same defaults, same semantics, same results. + +--- + +## Quick start + +### Requirements + +* Python **3.12+** +* `uv` (recommended) or `pip` + +### Install + +```bash +git clone finn-mcp +cd finn-mcp + +uv venv # or: python3.12 -m venv .venv +source .venv/bin/activate +uv pip install -e ".[dev]" # or: pip install -e ".[dev]" +``` + +### First run (CLI) + +```bash +# Triage a FINN search +finn-eiendom analyze-search 'https://www.finn.no/realestate/homes/search.html?location=...' --format table + +# Drill into one listing +finn-eiendom get-ad 462400360 --format markdown + +# Mark a listing as liked +finn-eiendom save-feedback 462400360 liked --notes "great layout, check fellesgjeld" + +# Find similar properties to liked listings +finn-eiendom similar-to-liked 462400360 +``` + +### First run (Claude Desktop) + +Add to `~/Library/Application Support/Claude/claude_desktop_config.json` (macOS) or the equivalent on Linux: + +```json +{ + "mcpServers": { + "finn-eiendom": { + "command": "/absolute/path/to/finn-mcp/.venv/bin/finn-eiendom-mcp", + "env": { + "FINN_CACHE_PATH": "/absolute/path/to/finn-mcp/data/finn.sqlite", + "EIENDOM_NO_ENABLED": "true" + } + } + } +} +``` + +Restart Claude Desktop. Then in any chat: + +> Analyze this FINN search and shortlist the top 5 for a couple in Oslo with a 9–12 MNOK budget, willing to renovate, prefer hybel potential: +> `https://www.finn.no/realestate/homes/search.html?location=...` + +For deep usage — every command, every MCP tool, every env var — see [`USAGE.md`](USAGE.md). + +--- + +## Architecture in one screen + +``` + CLI (typer) MCP server (FastMCP) ← thin, parallel front ends + \ / + \ / + service.py ← cache + fetch orchestration + ↓ + analysis.py ← shortlist + summary + ↓ + search / ad / eiendom_no / scoring / feedback + ↓ + parser / http / cache (SQLite) + ↓ + FINN HTML + Eiendom.no JSON +``` + +`formatting.py` lives next to `service.py` and is shared by both CLI and MCP for JSON / markdown / table rendering. + +**Key rule:** CLI and MCP are siblings. They never call each other. Both call the same `service.py` functions. See [`PRD.md`](PRD.md) §17 for the full code-ownership constitution. + +--- + +## Project documents + +Read in this order depending on what you're doing: + +| If you want to… | Read | +| ------------------------------------- | --------------------------------------------------- | +| Use the tool | This README, then [`USAGE.md`](USAGE.md) | +| Understand the design | [`PRD.md`](PRD.md), especially §1, §12, §17 | +| Contribute / extend / hack on it | [`AGENTS.md`](AGENTS.md), then [`PROJECT.md`](PROJECT.md), then `.github/instructions/*.md` | +| Run the cleanup pass on the repo | [`CLEANUP.md`](CLEANUP.md) | +| Build out unfinished features | [`IMPLEMENTATION.md`](IMPLEMENTATION.md) | + +--- + +## Status + +* **Phase 0 (spike):** done. +* **Phase 1 (core MVP):** mostly done. +* **Phase 2 (MCP + CLI):** in progress — driven by [`IMPLEMENTATION.md`](IMPLEMENTATION.md). +* **Phase 3+ (scoring v2, agent workflows, dashboard):** future. + +--- + +## Safety and compliance + +* Private, low-frequency, user-triggered use only. No public deployment. +* Configurable request delays (`FINN_REQUEST_DELAY_SECONDS`, `EIENDOM_NO_REQUEST_DELAY_SECONDS`) — defaults are conservative. +* Aggressive caching to minimize external requests. +* No bypassing of rate limits, bot protection, authentication, or access controls. +* No public redistribution of FINN or Eiendom.no data. +* Scores, estimates, and comparable sales are **decision support, not advice**. Don't substitute this for a real broker, lawyer, or technical inspector. + +--- + +## License / use + +Personal project. Not for redistribution. Don't expose the MCP HTTP transport on a public interface — keep it on LAN, Tailscale, or behind auth. diff --git a/USAGE.md b/USAGE.md new file mode 100644 index 0000000..9bbb039 --- /dev/null +++ b/USAGE.md @@ -0,0 +1,503 @@ +# USAGE.md — finn-eiendom user guide + +How to use the tool day-to-day. Covers installation, every CLI command, every MCP tool, Claude Desktop integration, common workflows, environment variables, and troubleshooting. + +For the why and the architecture, see [`README.md`](README.md) and [`PRD.md`](PRD.md). + +--- + +## 1. Installation + +### Requirements + +* Python **3.12 or newer** (check with `python3 --version`) +* `uv` (recommended) or `pip` +* macOS, Linux, or WSL2 on Windows + +### Install + +```bash +git clone finn-mcp +cd finn-mcp + +# Option A: uv (preferred — fast) +uv venv +source .venv/bin/activate +uv pip install -e ".[dev]" + +# Option B: pip +python3.12 -m venv .venv +source .venv/bin/activate +pip install -e ".[dev]" +``` + +Verify: + +```bash +finn-eiendom --help +finn-eiendom-mcp --help # may exit immediately on stdio mode; that's fine +finn-eiendom doctor # smoke-checks cache, FINN, Eiendom.no reachability +``` + +### Updating + +```bash +git pull +source .venv/bin/activate +uv pip install -e ".[dev]" +``` + +If `pyproject.toml` added dependencies, the second command picks them up. + +### Global install (optional) + +If you want `finn-eiendom` available system-wide without activating the venv: + +```bash +uv tool install . +# or +pipx install . +``` + +--- + +## 2. First-time setup + +### Set up the data directory + +```bash +mkdir -p data +``` + +SQLite cache lives there at `data/finn.sqlite` by default. Override with `FINN_CACHE_PATH` if you want it elsewhere. + +### Optional: environment file + +Create `.env` in the project root for your usual settings: + +```bash +FINN_CACHE_PATH=data/finn.sqlite +FINN_MAX_SEARCH_PAGES=3 +FINN_DETAIL_LIMIT=20 +EIENDOM_NO_ENABLED=true +EIENDOM_NO_SIMILAR_UNITS_ENABLED=true +LOG_LEVEL=INFO +``` + +See §7 for the full list of variables. + +### Verify + +```bash +finn-eiendom doctor +``` + +This pings the cache, reaches FINN once, reaches Eiendom.no once, and reports any failures. + +--- + +## 3. CLI reference + +Every command runs inside the activated venv. + +### 3.1 Analyze a FINN search + +```bash +finn-eiendom analyze-search '' [options] +``` + +| Option | Default | Purpose | +| ------------------- | ------- | ---------------------------------------------------------- | +| `--max-pages N` | `3` | Pages of search results to fetch. | +| `--detail-limit N` | `20` | How many listings to detail-fetch from the result set. | +| `--no-details` | off | Skip detail fetches; use only search-card data. | +| `--no-eiendom` | off | Skip Eiendom.no enrichment. | +| `--with-similar` | off | Fetch similar-units / comps for shortlisted listings. | +| `--format FMT` | `json` | `json`, `markdown`, or `table`. | + +Examples: + +```bash +# Triage in the terminal +finn-eiendom analyze-search 'https://www.finn.no/realestate/homes/search.html?location=0.20061&min_bedrooms=2&price_collective_to=12000000' --format table + +# Full JSON for piping into jq +finn-eiendom analyze-search '' --format json | jq '.shortlist[].title' + +# Detailed run with comps +finn-eiendom analyze-search '' --detail-limit 30 --with-similar --format markdown +``` + +### 3.2 Drill into one listing + +```bash +finn-eiendom get-ad [options] +``` + +| Option | Default | Purpose | +| ------------------- | ------- | -------------------------------------------------- | +| `--force-refresh` | off | Bypass the 24h cache and refetch. | +| `--no-eiendom` | off | Skip Eiendom.no enrichment. | +| `--with-similar` | off | Fetch similar-units / comps. | +| `--format FMT` | `json` | `json` or `markdown`. | + +```bash +finn-eiendom get-ad 462400360 --format markdown +finn-eiendom get-ad 462400360 --force-refresh --with-similar +``` + +### 3.3 Compare listings + +```bash +finn-eiendom compare [...] [options] +``` + +| Option | Default | Purpose | +| ---------------- | ------- | -------------------------------------- | +| `--no-eiendom` | off | Skip Eiendom.no enrichment. | +| `--no-comps` | off | Skip similar-units / comps. | +| `--format FMT` | `json` | `json`, `markdown`, or `table`. | + +```bash +finn-eiendom compare 462400360 461153194 --format markdown +finn-eiendom compare 462400360 461153194 462400360 --format table +``` + +Up to 10 finnkoder per call. + +### 3.4 Feedback + +```bash +finn-eiendom save-feedback [--notes "..."] +``` + +Verdict vocabulary: `liked`, `rejected`, `interesting`, `bargain_candidate`, `risk_object`, `viewing_candidate`, `viewed`, `too_expensive`, `too_small`, `too_far_out`, `too_high_risk`, `likes_location`, `likes_layout`, `dislikes_area`. + +```bash +finn-eiendom save-feedback 462400360 liked --notes "balcony, view, check wet rooms" +finn-eiendom save-feedback 461153194 rejected --notes "too far from city center" +``` + +`liked` verdicts feed the `similar-to-liked` command. + +### 3.5 New / removed / changed listings + +```bash +finn-eiendom diff '' [--format FMT] +``` + +Compares the current search results against the previous run for the same normalized URL and reports new finnkoder, removed finnkoder, and changed listings (price, common costs, status). + +```bash +finn-eiendom diff '' --format table +``` + +Useful as a daily cron: + +```bash +0 9 * * * cd /path/to/finn-mcp && .venv/bin/finn-eiendom diff 'https://www.finn.no/...' --format markdown >> diff.log +``` + +### 3.6 Shortlist history + +```bash +finn-eiendom shortlist [--run-id ID] [--limit N] [--format FMT] +``` + +Without `--run-id`, returns the latest saved shortlist. + +### 3.7 Eiendom.no commands + +```bash +finn-eiendom resolve-unit '' # find unitCode for a FINN listing +finn-eiendom get-unit [--force-refresh] # fetch unit detail +finn-eiendom enrich-ad [--with-similar] # FINN + Eiendom.no combined +finn-eiendom build-vector # build the base64url unit_vector +finn-eiendom decode-vector # decode for inspection +finn-eiendom similar-units [--status RECENTLY_SOLD|FOR_SALE|CURRENT] +``` + +### 3.8 Find similar to liked + +```bash +finn-eiendom similar-to-liked [--mode recommendations|comps] [--status STATUS] +``` + +The listing must have a `liked` feedback row. Defaults to `mode=recommendations`, `status=FOR_SALE` — i.e. find active listings similar to this one. Use `--mode comps --status RECENTLY_SOLD` to get comparable sales instead. + +### 3.9 Price analysis against comps + +```bash +finn-eiendom analyze-against-comps +``` + +Returns `price_position` (`below_estimate` / `within_range` / `above_estimate`), `sqm_price_position` (`cheap` / `normal` / `expensive`), `comparable_score`, and a `confidence` label. + +### 3.10 Cache management + +```bash +finn-eiendom cache stats # row counts and TTL summary +finn-eiendom cache clear # purge everything except feedback +finn-eiendom cache clear-html # only purge raw HTML +finn-eiendom cache clear-json # only purge raw JSON +``` + +Feedback is never purged by `cache clear` — feedback is permanent until explicitly deleted via SQL. + +### 3.11 MCP server + +```bash +finn-eiendom serve # stdio (default) +finn-eiendom serve --transport http --port 8010 # HTTP for n8n / multi-client +``` + +In HTTP mode the server listens on `http://127.0.0.1:8010/mcp` with operational endpoints `GET /health`, `GET /version`, `GET /debug/config`. + +There's also a shorthand `finn-eiendom-mcp` that starts stdio mode directly — that's what Claude Desktop calls. + +### 3.12 Misc + +```bash +finn-eiendom config show # print resolved configuration +finn-eiendom config path # print SQLite cache path +finn-eiendom doctor # smoke checks +finn-eiendom version +``` + +--- + +## 4. MCP tools (for Claude Desktop / n8n / agents) + +All tools use the `finn_` prefix. They mirror the CLI commands 1:1 — same defaults, same semantics. + +| Tool | Purpose | +| ------------------------------------- | ---------------------------------------------------------------- | +| `finn_analyze_search` | Analyze a FINN search URL and return a ranked shortlist. | +| `finn_get_ad` | Fetch structured data for one finnkode. | +| `finn_compare_ads` | Compare multiple listings side by side. | +| `finn_save_feedback` | Store feedback/verdict/notes. | +| `finn_get_shortlist` | Fetch a stored shortlist from a previous run. | +| `finn_get_new_ads_since_last_run` | Detect new / removed / changed listings. | +| `finn_resolve_eiendom_unit` | Map FINN URL → Eiendom.no `unitCode`. | +| `finn_get_eiendom_unit` | Fetch Eiendom.no unit detail by `unitCode`. | +| `finn_enrich_ad` | Combine FINN listing + Eiendom.no enrichment. | +| `finn_build_unit_vector` | Build a `unit_vector` from a `unitCode`. | +| `finn_decode_unit_vector` | Decode a `unit_vector` for inspection. | +| `finn_get_similar_units` | Fetch comps / recommendations. | +| `finn_find_similar_to_liked_ad` | Find properties similar to one you liked. | +| `finn_analyze_ad_against_comps` | Evaluate a listing against `RECENTLY_SOLD` comps. | + +Every tool accepts a `response_format` parameter (`"json"` or `"markdown"`). Errors come back as `{"error": true, "code": "", "message": "..."}`. + +--- + +## 5. Claude Desktop setup + +### Config file + +* macOS: `~/Library/Application Support/Claude/claude_desktop_config.json` +* Linux: `~/.config/Claude/claude_desktop_config.json` + +### Direct entry-point (recommended) + +```json +{ + "mcpServers": { + "finn-eiendom": { + "command": "/absolute/path/to/finn-mcp/.venv/bin/finn-eiendom-mcp", + "env": { + "FINN_CACHE_PATH": "/absolute/path/to/finn-mcp/data/finn.sqlite", + "EIENDOM_NO_ENABLED": "true", + "EIENDOM_NO_SIMILAR_UNITS_ENABLED": "true", + "LOG_LEVEL": "INFO" + } + } + } +} +``` + +The `command` **must** be the absolute path to the venv's `finn-eiendom-mcp` binary. Don't rely on `$PATH` here — Claude Desktop doesn't inherit your shell environment. + +### Alternative: via `uv` + +```json +{ + "mcpServers": { + "finn-eiendom": { + "command": "uv", + "args": ["run", "finn-eiendom-mcp"], + "cwd": "/absolute/path/to/finn-mcp" + } + } +} +``` + +### Verify + +1. Restart Claude Desktop. +2. Look for `finn-eiendom` in the MCP servers indicator (usually a hammer icon). +3. Ask in any chat: *"Use the finn-eiendom server to analyze this search: ..."* + +If it doesn't show up, check the Claude Desktop logs: + +* macOS: `~/Library/Logs/Claude/mcp-server-finn-eiendom.log` +* Linux: `~/.local/share/Claude/logs/mcp-server-finn-eiendom.log` + +stdout output from the server is a fatal error — the server must only log to stderr. + +--- + +## 6. Common workflows + +### 6.1 Daily triage + +```bash +# Morning routine +finn-eiendom diff 'https://www.finn.no/...' --format table +# Detail-fetch only what's new or changed +finn-eiendom analyze-search 'https://www.finn.no/...' --detail-limit 10 --format markdown +``` + +### 6.2 Weekly deep dive in Claude Desktop + +> Read my latest finn-eiendom shortlist and group the top 10 by category (bargain / safe / hybel / lifestyle). For each, summarize the three most important risks and the three most important broker questions. + +### 6.3 Pre-viewing prep + +```bash +# Mark candidates for viewing +finn-eiendom save-feedback 462400360 viewing_candidate --notes "Saturday 14:00" +# Get the full data + comps +finn-eiendom get-ad 462400360 --with-similar --format markdown > viewing_prep_462400360.md +``` + +Then in Claude Desktop: + +> Read the saved markdown for finnkode 462400360 and prepare a viewing checklist: wet rooms to inspect, common-costs questions, hybel-approval question, neighbor questions. + +### 6.4 Comparing finalists + +```bash +finn-eiendom compare 462400360 461153194 459333210 --format markdown > finalists.md +``` + +### 6.5 Build a recommendation set from liked properties + +```bash +# After you've liked a few +finn-eiendom save-feedback 462400360 liked +finn-eiendom save-feedback 461153194 liked + +# Get recommendations similar to each +finn-eiendom similar-to-liked 462400360 --mode recommendations --status FOR_SALE +finn-eiendom similar-to-liked 461153194 --mode recommendations --status FOR_SALE +``` + +--- + +## 7. Environment variables + +| Variable | Default | Purpose | +| ----------------------------------------- | -------------------------------: | -------------------------------- | +| `FINN_CACHE_PATH` | `data/finn.sqlite` | SQLite DB path | +| `FINN_MAX_SEARCH_PAGES` | `3` | Max search pages per analyze | +| `FINN_DETAIL_LIMIT` | `20` | Max detail fetches per analyze | +| `FINN_REQUEST_DELAY_SECONDS` | `2` | Seconds between FINN requests | +| `FINN_USER_AGENT` | `personal-finn-eiendom-analyzer/0.1` | HTTP User-Agent | +| `FINN_CACHE_TTL_SEARCH_MINUTES` | `60` | Search cache TTL | +| `FINN_CACHE_TTL_AD_HOURS` | `24` | Listing cache TTL | +| `EIENDOM_NO_ENABLED` | `true` | Enable Eiendom.no enrichment | +| `EIENDOM_NO_BASE_URL` | `https://api.eiendom.no/api/v1` | API base URL | +| `EIENDOM_NO_CACHE_TTL_HOURS` | `24` | Unit/similar cache TTL | +| `EIENDOM_NO_REQUEST_DELAY_SECONDS` | `1` | Seconds between Eiendom.no calls | +| `EIENDOM_NO_SIMILAR_UNITS_ENABLED` | `true` | Enable similar-units | +| `EIENDOM_NO_SIMILAR_UNITS_DEFAULT_STATUS` | `RECENTLY_SOLD` | Default comps status | +| `HJEMLA_ENABLED` | `false` | Enable optional Hjemla API | +| `LOG_LEVEL` | `INFO` | Log level | +| `MCP_TRANSPORT` | `stdio` | `stdio` or `streamable_http` | +| `MCP_HTTP_HOST` | `127.0.0.1` | HTTP bind address | +| `MCP_HTTP_PORT` | `8010` | HTTP port | + +Set them in `.env`, in your shell, or in the Claude Desktop `env` block per §5. + +--- + +## 8. Troubleshooting + +### Claude Desktop doesn't see the server + +1. The `command` path must be absolute and point at the venv's binary. +2. Check `~/Library/Logs/Claude/mcp-server-finn-eiendom.log` (macOS) for a Python traceback. +3. The server **must not** write to stdout — any `print()` in the code breaks JSON-RPC. If you're hacking on it and see a frame parse error, that's the cause. +4. Restart Claude Desktop after config changes (`Cmd+Q`, not just close the window). + +### "Module not found" when running CLI + +The venv isn't activated, or the package isn't installed in editable mode. + +```bash +source .venv/bin/activate +uv pip install -e ".[dev]" +``` + +### Eiendom.no enrichment is `unavailable` + +This is graceful degradation when: + +* The FINN URL can't be matched to a `unitCode` (rare, but happens for unusual addresses). +* Eiendom.no rate-limited or returned 5xx. +* The unit was deleted from Eiendom.no's index. + +Check the log for the warning. The listing analysis continues without enrichment. + +### Similar-units returns nothing + +* Verify `EIENDOM_NO_SIMILAR_UNITS_ENABLED=true`. +* The `unit_vector` might be empty / malformed — check `finn-eiendom decode-vector `. +* Try `--status FOR_SALE` if `RECENTLY_SOLD` is sparse, or vice versa. + +### Slow first run + +The first analyze fills the cache. Subsequent runs are much faster. Tune `FINN_REQUEST_DELAY_SECONDS` and `EIENDOM_NO_REQUEST_DELAY_SECONDS` if you're impatient — but don't drop them too low, the whole point of caching is to be polite. + +### Stale results + +Cache TTLs: + +* Search: 60 minutes +* FINN listing: 24 hours +* Eiendom.no unit: 24 hours +* Similar-units: 24 hours + +Force a refresh with `--force-refresh` on `get-ad` or `get-unit`, or wipe with `finn-eiendom cache clear`. + +### `pytest` fails after pulling new changes + +```bash +source .venv/bin/activate +uv pip install -e ".[dev]" # re-sync dependencies +pytest -x # find the first failure +``` + +If a test fails with a network-related error, that's a bug — tests should never hit the network. Report it. + +--- + +## 9. What this tool is not + +* Not a public API. Don't expose the HTTP transport on the open internet. +* Not financial, legal, or valuation advice. Scores and estimates are decision support. +* Not a bidding agent. It will never contact a broker or place a bid for you. +* Not a crawler. Use it for the searches you'd be manually browsing anyway — at your own pace. +* Not a substitute for a real condition report (`tilstandsrapport`), a real lawyer, or a real broker. + +--- + +## 10. Getting help + +* [`README.md`](README.md) — overview +* [`PRD.md`](PRD.md) — full product spec and architecture +* [`AGENTS.md`](AGENTS.md) — workflow rules for contributors +* [`.github/instructions/*.md`](.github/instructions/) — per-topic conventions + +For bugs, open an issue in the repo with: the exact command run, the full traceback or unexpected output, the version (`finn-eiendom version`), and a redacted FINN URL if relevant. diff --git a/finn_eiendom/__init__.py b/finn_eiendom/__init__.py new file mode 100644 index 0000000..abbba63 --- /dev/null +++ b/finn_eiendom/__init__.py @@ -0,0 +1,36 @@ +"""FINN Real Estate MCP Server - Private property analysis platform.""" + +__version__ = "0.1.0" +__author__ = "FINN Scout" + +from . import ad, analysis, cache, config, eiendom_no, scoring, search +from .http import HTTPClient +from .models import EiendomUnit, FinnAd, FinnSearchCard, SimilarUnit, UnitVector +from .parser import ( + extract_finnkode_from_url, + normalize_area, + normalize_finnkode, + normalize_number, + normalize_price, +) + +__all__ = [ + "config", + "FinnAd", + "FinnSearchCard", + "EiendomUnit", + "SimilarUnit", + "UnitVector", + "normalize_price", + "normalize_area", + "normalize_number", + "normalize_finnkode", + "extract_finnkode_from_url", + "HTTPClient", + "ad", + "analysis", + "cache", + "eiendom_no", + "scoring", + "search", +] diff --git a/finn_eiendom/ad.py b/finn_eiendom/ad.py new file mode 100644 index 0000000..e4db961 --- /dev/null +++ b/finn_eiendom/ad.py @@ -0,0 +1,193 @@ +"""FINN listing detail scraping and normalization.""" + +import logging +import re +from datetime import UTC, datetime + +from bs4 import BeautifulSoup + +from .http import HTTPClient +from .models import FinnAd +from .parser import ( + clean_text, + extract_finnkode_from_url, + normalize_area, + normalize_finnkode, + normalize_number, + normalize_price, + text_to_bool, +) + +logger = logging.getLogger(__name__) + +FINN_AD_URL_TEMPLATE = "https://www.finn.no/realestate/homes/ad.html?finnkode={}" + + +async def fetch_ad(finnkode: str, client: HTTPClient | None = None) -> str: + """Fetch FINN listing HTML by finnkode.""" + client = client or HTTPClient(request_delay_seconds=0.0) + url = FINN_AD_URL_TEMPLATE.format(finnkode) + response = await client.get(url) + return response.text + + +def _load_property_map(soup: BeautifulSoup) -> dict[str, str]: + properties: dict[str, str] = {} + for dt, dd in zip(soup.find_all("dt"), soup.find_all("dd"), strict=False): + key = clean_text(dt.get_text()) or "" + value = clean_text(dd.get_text()) or "" + properties[key.lower()] = value + return properties + + +def _get_data_testid_value(soup: BeautifulSoup, testid: str) -> str | None: + node = soup.select_one(f'[data-testid="{testid}"]') + if not node: + return None + return clean_text(node.get_text(" ", strip=True)) + + +def _strip_labelled_text(text: str | None, labels: list[str]) -> str | None: + if not text: + return None + for label in labels: + if text.lower().startswith(label.lower()): + return clean_text(text[len(label) :]) + return text + + +def _extract_floor_from_text(text: str | None) -> str | None: + if not text: + return None + match = re.search(r"(\d+)\s*\.?\s*etasje", text, re.IGNORECASE) + if match: + return f"{match.group(1)}. etasje" + return None + + +def _clean_description(text: str | None) -> str | None: + if not text: + return None + cleaned = re.sub(r"(?i)^om boligen", "", text).strip() + cleaned = re.sub(r"(?i)^beskrivelse", "", cleaned).strip() + return clean_text(cleaned) + + +def _load_feature_text(soup: BeautifulSoup) -> str: + return _get_data_testid_value(soup, "object-facilities") or "" + + +def _extract_description(soup: BeautifulSoup) -> str | None: + node = soup.select_one('[data-testid="om boligen"]') or soup.select_one(".description") + if not node: + return None + paragraphs = [clean_text(p.get_text()) for p in node.select("p") if clean_text(p.get_text())] + if paragraphs: + return "\n".join(paragraphs) + return _clean_description(node.get_text(" ", strip=True)) + + +def scrape_ad(html: str, url: str | None = None) -> FinnAd: + """Scrape a FINN listing HTML page into a FinnAd model.""" + soup = BeautifulSoup(html, "html.parser") + title_node = soup.select_one("h1") + broker_name = soup.select_one(".broker-name") + + properties = _load_property_map(soup) + feature_text = _load_feature_text(soup).lower() + finnkode = normalize_finnkode(extract_finnkode_from_url(url or "")) or "" + address = _get_data_testid_value(soup, "object-address") or properties.get("adresse") + district = _get_data_testid_value(soup, "local-area-name") or properties.get("område") + ownership_type = _strip_labelled_text( + _get_data_testid_value(soup, "info-ownership-type"), ["Eieform", "Eiendomstype"] + ) or properties.get("eierform") + property_type = _strip_labelled_text( + _get_data_testid_value(soup, "info-property-type"), ["Boligtype", "Eiendomstype"] + ) or properties.get("eiendomstype") + + asking_price = normalize_price( + properties.get("prisantydning") or _get_data_testid_value(soup, "pricing-incicative-price") + ) + total_price_value = normalize_price( + properties.get("totalpris") or _get_data_testid_value(soup, "pricing-total-price") + ) + shared_debt = normalize_price( + properties.get("fellesgjeld") or _get_data_testid_value(soup, "pricing-joint-debt") + ) + common_costs = normalize_number( + properties.get("felles utgifter") + or _get_data_testid_value(soup, "pricing-common-monthly-cost") + ) + area_m2 = normalize_area( + properties.get("boligareal") + or _get_data_testid_value(soup, "info-usable-i-area") + or _get_data_testid_value(soup, "info-usable-area") + ) + rooms = normalize_number(properties.get("rom") or _get_data_testid_value(soup, "info-rooms")) + bedrooms = normalize_number( + properties.get("soverom") or _get_data_testid_value(soup, "info-bedrooms") + ) + floor = ( + properties.get("etasje") + or _extract_floor_from_text(title_node.get_text() if title_node else "") + or _get_data_testid_value(soup, "info-floor") + ) + construction_year = normalize_number( + properties.get("byggeår") or _get_data_testid_value(soup, "info-construction-year") + ) + energy_rating = properties.get("energimerking") + heating = properties.get("oppvarming") + has_balcony = text_to_bool(properties.get("balkonger/terrasser")) or "balkong" in feature_text + has_terrace = "terrasse" in feature_text + has_elevator = text_to_bool(properties.get("heis")) or "heis" in feature_text + has_parking = ( + bool(properties.get("parkering/garasje")) + or "parkering" in feature_text + or "garasje" in feature_text + ) + broker_company = None + if broker_name: + broker_company = clean_text(broker_name.get_text()) + + listing_description = _extract_description(soup) + + ad = FinnAd( + finnkode=finnkode, + url=url or "", + title=clean_text(title_node.get_text()) if title_node else None, + address=address, + postal_area=properties.get("postnummer"), + district=district, + property_type=property_type, + ownership_type=ownership_type, + asking_price=asking_price, + total_price=total_price_value, + shared_debt=shared_debt, + common_costs=common_costs, + municipal_fee=normalize_number(properties.get("kommunale avgifter")), + other_fees=normalize_number(properties.get("andre utgifter")), + area_m2=area_m2, + rooms=rooms, + bedrooms=bedrooms, + floor=floor, + construction_year=construction_year, + energy_rating=energy_rating, + heating=heating, + has_balcony=has_balcony, + has_terrace=has_terrace, + has_elevator=has_elevator, + has_parking=has_parking, + listing_description=listing_description, + broker_name=None, + broker_company=broker_company, + detail_fetched_at=None, + ) + return ad + + +async def fetch_ad_details(finnkode: str, client: HTTPClient | None = None) -> FinnAd: + """Fetch FINN listing HTML and return a parsed FinnAd object.""" + html = await fetch_ad(finnkode, client=client) + ad = scrape_ad(html, url=FINN_AD_URL_TEMPLATE.format(finnkode)) + ad.detail_fetched_at = datetime.now(UTC) + return ad diff --git a/finn_eiendom/analysis.py b/finn_eiendom/analysis.py new file mode 100644 index 0000000..01620f6 --- /dev/null +++ b/finn_eiendom/analysis.py @@ -0,0 +1,175 @@ +"""Orchestration for FINN search + Eiendom.no enrichment + scoring.""" + +import logging + +from . import ad as ad_module +from . import cache, eiendom_no, scoring, search +from .config import ( + FINN_CACHE_PATH, + FINN_CACHE_TTL_AD_HOURS, + FINN_DETAIL_LIMIT, + FINN_MAX_SEARCH_PAGES, +) +from .models import EiendomUnit, FinnAd, SimilarUnit + +logger = logging.getLogger(__name__) + + +def _normalize_description(text: str | None) -> str: + return text.lower() if text else "" + + +def _build_ad_summary( + ad: FinnAd, + enriched: EiendomUnit | None, + similar_units: list[SimilarUnit], + scores: dict, + categories: list[str], +) -> dict: + description = _normalize_description(ad.listing_description) + reasons = [] + risks = [] + next_steps = [ + "Open the FINN listing and condition report.", + "Review the Eiendom.no estimate and comparable sales.", + "Ask the broker about renovation status and approvals.", + ] + + if enriched and enriched.estimated_selling_price and ad.total_price: + if ad.total_price < enriched.estimated_selling_price: + reasons.append("Listing price is below Eiendom.no estimate.") + elif ad.total_price <= enriched.estimated_selling_price_upper: + reasons.append("Price sits within the local estimate range.") + else: + reasons.append("Listing price is above the estimate range.") + else: + reasons.append("Eiendom.no enrichment is unavailable or incomplete.") + + if "utsikt" in description or ad.has_balcony or ad.has_terrace: + reasons.append("Outdoor space or view potential is positive.") + if "hybel" in description or "leie" in description: + reasons.append("Potential hybel/rental opportunity is mentioned.") + if "potensial" in description or "renover" in description: + reasons.append("Renovation or improvement potential is highlighted.") + + if scores.get("risk", 0.0) < 0: + risks.append("Risk flags are detected in description or metadata.") + if ad.common_costs and ad.common_costs > 5000: + risks.append("Common costs are relatively high and should be reviewed.") + if enriched and enriched.sale_status and enriched.sale_status.upper() != "FOR_SALE": + risks.append("Eiendom.no sale status does not indicate an active sale.") + if not enriched: + risks.append("Missing Eiendom.no data increases uncertainty.") + + if not any("Eiendom.no" in step for step in next_steps): + next_steps.append("Verify the property on Eiendom.no and reconcile any mismatches.") + + if similar_units: + next_steps.append("Review the comparable units and average sqm prices.") + else: + next_steps.append("Comparable sales are unavailable; treat valuation with caution.") + + return { + "why_interesting": reasons, + "risks": risks, + "next_steps": next_steps, + "shortlist_reason": ", ".join(reasons[:3]) + if reasons + else "Review details and seller disclosures.", + } + + +async def analyze_ad( + finn_ad: FinnAd, + unit_code: str | None = None, +) -> dict: + """Enrich a FinnAd and compute score summary.""" + conn = cache.init_db(FINN_CACHE_PATH) + enriched: EiendomUnit | None = None + similar_units: list[SimilarUnit] = [] + + if unit_code: + enriched = cache.get_eiendom_unit(conn, unit_code) + if enriched is None: + enriched = await eiendom_no.enrich_ad_with_eiendom_no(finn_ad, unit_code) + if enriched is not None: + cache.save_eiendom_unit(conn, enriched) + + if enriched and enriched.unit_vector: + similar_units = cache.get_similar_units(conn, enriched.unit_code, "RECENTLY_SOLD") + if not similar_units: + similar_units = await eiendom_no.get_similar_units(enriched.unit_vector) + if similar_units: + cache.save_similar_units(conn, enriched.unit_code, "RECENTLY_SOLD", similar_units) + + scores = scoring.score_ad(finn_ad, enriched, similar_units) + categories = scoring.classify_ad(scores) + summary = _build_ad_summary(finn_ad, enriched, similar_units, scores, categories) + + result = { + "finnkode": finn_ad.finnkode, + "title": finn_ad.title, + "address": finn_ad.address, + "score": scores, + "categories": categories, + "summary": summary, + "eiendom_unit": enriched.model_dump() if enriched else None, + "similar_units": [unit.model_dump() for unit in similar_units], + } + cache.save_finn_ad(conn, finn_ad) + return result + + +async def analyze_search( + search_url: str, + max_pages: int = FINN_MAX_SEARCH_PAGES, + fetch_details: bool = True, + detail_limit: int = FINN_DETAIL_LIMIT, + include_eiendom_no: bool = True, + client=None, + use_cache: bool = True, +) -> dict: + """Analyze a FINN search URL and enrich matching listings.""" + conn = cache.init_db(FINN_CACHE_PATH) + cards = await search.fetch_search_pages( + search_url, + max_pages=max_pages, + client=client, + use_cache=use_cache, + ) + results = [] + enriched_count = 0 + + if fetch_details: + for card in cards[:detail_limit]: + finn_ad = cache.get_finn_ad(conn, card.finnkode, ttl_hours=FINN_CACHE_TTL_AD_HOURS) + if finn_ad is None: + finn_ad = await ad_module.fetch_ad_details(card.finnkode, client=client) + unit_code = None + if include_eiendom_no: + try: + matched_unit = await eiendom_no.search_unit_from_finn_url(card.url) + except Exception as exc: + logger.warning("Eiendom.no unit search failed: %s", exc) + matched_unit = None + unit_code = ( + matched_unit.unit_code + if matched_unit + else eiendom_no.resolve_unit_from_finn_url(card.url) + ) + result = await analyze_ad(finn_ad, unit_code=unit_code) + if result.get("eiendom_unit"): + enriched_count += 1 + results.append(result) + + results.sort(key=lambda item: item["score"].get("total", 0.0), reverse=True) + return { + "search_url": search_url, + "search_cards": [card.model_dump() for card in cards], + "analysis": results, + "summary": { + "total_listings": len(cards), + "analyzed_listings": len(results), + "eiendom_enriched": enriched_count, + }, + } diff --git a/finn_eiendom/cache.py b/finn_eiendom/cache.py new file mode 100644 index 0000000..8bf78ba --- /dev/null +++ b/finn_eiendom/cache.py @@ -0,0 +1,243 @@ +"""SQLite cache and persistence for FINN and Eiendom.no data.""" + +import json +import logging +import sqlite3 +from datetime import UTC, datetime, timedelta +from typing import Any + +from .config import FINN_CACHE_PATH +from .models import EiendomUnit, FinnAd, FinnSearchCard, SimilarUnit + +logger = logging.getLogger(__name__) + + +def get_connection(path: str | None = None) -> sqlite3.Connection: + db_path = path or FINN_CACHE_PATH + conn = sqlite3.connect(str(db_path), detect_types=sqlite3.PARSE_DECLTYPES) + conn.row_factory = sqlite3.Row + return conn + + +def init_db(path: str | None = None) -> sqlite3.Connection: + conn = get_connection(path) + cursor = conn.cursor() + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS finn_ads ( + finnkode TEXT PRIMARY KEY, + url TEXT, + payload TEXT NOT NULL, + fetched_at TEXT NOT NULL + ) + """ + ) + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS eiendom_units ( + unit_code TEXT PRIMARY KEY, + payload TEXT NOT NULL, + fetched_at TEXT NOT NULL + ) + """ + ) + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS similar_units ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + unit_code TEXT NOT NULL, + listing_status TEXT NOT NULL, + payload TEXT NOT NULL, + fetched_at TEXT NOT NULL + ) + """ + ) + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS cache_meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL, + expires_at TEXT + ) + """ + ) + conn.commit() + return conn + + +def cache_get(conn: sqlite3.Connection, key: str) -> dict[str, Any] | None: + cursor = conn.cursor() + cursor.execute("SELECT value, expires_at FROM cache_meta WHERE key = ?", (key,)) + row = cursor.fetchone() + if not row: + return None + + expires_at = row["expires_at"] + if expires_at and datetime.fromisoformat(expires_at) < datetime.now(UTC): + cursor.execute("DELETE FROM cache_meta WHERE key = ?", (key,)) + conn.commit() + return None + + return json.loads(row["value"]) + + +def cache_set( + conn: sqlite3.Connection, + key: str, + payload: dict[str, Any], + ttl_hours: int | None = None, + ttl_minutes: int | None = None, +) -> None: + expires_at = None + if ttl_minutes is not None: + expires_at = (datetime.now(UTC) + timedelta(minutes=ttl_minutes)).isoformat() + elif ttl_hours is not None: + expires_at = (datetime.now(UTC) + timedelta(hours=ttl_hours)).isoformat() + cursor = conn.cursor() + cursor.execute( + "INSERT OR REPLACE INTO cache_meta (key, value, expires_at) VALUES (?, ?, ?)", + (key, json.dumps(payload), expires_at), + ) + conn.commit() + + +def _is_fresh(fetched_at: str, ttl_hours: int | None) -> bool: + if ttl_hours is None: + return True + return datetime.fromisoformat(fetched_at) >= datetime.now(UTC) - timedelta(hours=ttl_hours) + + +def save_search_page( + conn: sqlite3.Connection, + url: str, + html: str, + ttl_minutes: int = 60, +) -> None: + cache_set(conn, f"search_page:{url}", {"html": html}, ttl_minutes=ttl_minutes) + + +def get_search_page(conn: sqlite3.Connection, url: str) -> str | None: + payload = cache_get(conn, f"search_page:{url}") + if not payload: + return None + return payload.get("html") + + +def save_search_cards( + conn: sqlite3.Connection, + url: str, + cards: list[FinnSearchCard], + ttl_minutes: int = 60, +) -> None: + cache_set( + conn, + f"search_cards:{url}", + [card.model_dump(mode="json") for card in cards], + ttl_minutes=ttl_minutes, + ) + + +def get_search_cards(conn: sqlite3.Connection, url: str) -> list[FinnSearchCard]: + payload = cache_get(conn, f"search_cards:{url}") + if not payload: + return [] + return [FinnSearchCard.model_validate(item) for item in payload] + + +def save_finn_ad(conn: sqlite3.Connection, ad: FinnAd) -> None: + cursor = conn.cursor() + payload = ad.model_dump(mode="json") + cursor.execute( + "INSERT OR REPLACE INTO finn_ads (finnkode, url, payload, fetched_at) VALUES (?, ?, ?, ?)", + ( + ad.finnkode, + ad.url, + json.dumps(payload), + ad.detail_fetched_at.isoformat() + if ad.detail_fetched_at + else datetime.now(UTC).isoformat(), + ), + ) + conn.commit() + + +def get_finn_ad( + conn: sqlite3.Connection, finnkode: str, ttl_hours: int | None = None +) -> FinnAd | None: + cursor = conn.cursor() + cursor.execute("SELECT payload, fetched_at FROM finn_ads WHERE finnkode = ?", (finnkode,)) + row = cursor.fetchone() + if not row: + return None + if ttl_hours is not None and not _is_fresh(row["fetched_at"], ttl_hours): + return None + return FinnAd.model_validate(json.loads(row["payload"])) + + +def save_eiendom_unit(conn: sqlite3.Connection, unit: EiendomUnit) -> None: + cursor = conn.cursor() + cursor.execute( + "INSERT OR REPLACE INTO eiendom_units (unit_code, payload, fetched_at) VALUES (?, ?, ?)", + (unit.unit_code, json.dumps(unit.model_dump(mode="json")), unit.fetched_at.isoformat()), + ) + conn.commit() + + +def get_eiendom_unit( + conn: sqlite3.Connection, + unit_code: str, + ttl_hours: int | None = None, +) -> EiendomUnit | None: + cursor = conn.cursor() + cursor.execute( + "SELECT payload, fetched_at FROM eiendom_units WHERE unit_code = ?", + (unit_code,), + ) + row = cursor.fetchone() + if not row: + return None + if ttl_hours is not None and not _is_fresh(row["fetched_at"], ttl_hours): + return None + return EiendomUnit.model_validate(json.loads(row["payload"])) + + +def save_similar_units( + conn: sqlite3.Connection, + unit_code: str, + listing_status: str, + similar_units: list[SimilarUnit], +) -> None: + cursor = conn.cursor() + payload = json.dumps([item.model_dump(mode="json") for item in similar_units]) + cursor.execute( + ( + "INSERT INTO similar_units" + " (unit_code, listing_status, payload, fetched_at)" + " VALUES (?, ?, ?, ?)" + ), + (unit_code, listing_status, payload, datetime.now(UTC).isoformat()), + ) + conn.commit() + + +def get_similar_units( + conn: sqlite3.Connection, + unit_code: str, + listing_status: str, + ttl_hours: int | None = None, +) -> list[SimilarUnit]: + cursor = conn.cursor() + cursor.execute( + ( + "SELECT payload, fetched_at FROM similar_units" + " WHERE unit_code = ? AND listing_status = ?" + " ORDER BY id DESC LIMIT 1" + ), + (unit_code, listing_status), + ) + row = cursor.fetchone() + if not row: + return [] + if ttl_hours is not None and not _is_fresh(row["fetched_at"], ttl_hours): + return [] + return [SimilarUnit.model_validate(item) for item in json.loads(row["payload"])] diff --git a/finn_eiendom/config.py b/finn_eiendom/config.py new file mode 100644 index 0000000..c56b9e7 --- /dev/null +++ b/finn_eiendom/config.py @@ -0,0 +1,30 @@ +"""Configuration and environment variables.""" + +import os +from pathlib import Path + +# Cache and database +FINN_CACHE_PATH = os.getenv("FINN_CACHE_PATH", str(Path("data/finn.sqlite"))) + +# FINN API settings +FINN_MAX_SEARCH_PAGES = int(os.getenv("FINN_MAX_SEARCH_PAGES", "3")) +FINN_DETAIL_LIMIT = int(os.getenv("FINN_DETAIL_LIMIT", "20")) +FINN_REQUEST_DELAY_SECONDS = float(os.getenv("FINN_REQUEST_DELAY_SECONDS", "2")) +FINN_USER_AGENT = os.getenv("FINN_USER_AGENT", "personal-finn-eiendom-analyzer/0.1") +FINN_CACHE_TTL_SEARCH_MINUTES = int(os.getenv("FINN_CACHE_TTL_SEARCH_MINUTES", "60")) +FINN_CACHE_TTL_AD_HOURS = int(os.getenv("FINN_CACHE_TTL_AD_HOURS", "24")) + +# Eiendom.no API settings +EIENDOM_NO_ENABLED = os.getenv("EIENDOM_NO_ENABLED", "true").lower() == "true" +EIENDOM_NO_BASE_URL = os.getenv("EIENDOM_NO_BASE_URL", "https://api.eiendom.no/api/v1") +EIENDOM_NO_REQUEST_DELAY_SECONDS = float(os.getenv("EIENDOM_NO_REQUEST_DELAY_SECONDS", "1")) +EIENDOM_NO_CACHE_TTL_HOURS = int(os.getenv("EIENDOM_NO_CACHE_TTL_HOURS", "24")) +EIENDOM_NO_SIMILAR_UNITS_ENABLED = ( + os.getenv("EIENDOM_NO_SIMILAR_UNITS_ENABLED", "true").lower() == "true" +) +EIENDOM_NO_SIMILAR_UNITS_DEFAULT_STATUS = os.getenv( + "EIENDOM_NO_SIMILAR_UNITS_DEFAULT_STATUS", "RECENTLY_SOLD" +) + +# Logging +LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO") diff --git a/finn_eiendom/eiendom_no.py b/finn_eiendom/eiendom_no.py new file mode 100644 index 0000000..64bb5c0 --- /dev/null +++ b/finn_eiendom/eiendom_no.py @@ -0,0 +1,236 @@ +"""Eiendom.no enrichment, unit vector, and similar units client.""" + +import base64 +import logging +from typing import Any + +import msgpack + +from .config import ( + EIENDOM_NO_BASE_URL, + EIENDOM_NO_ENABLED, + EIENDOM_NO_REQUEST_DELAY_SECONDS, + EIENDOM_NO_SIMILAR_UNITS_DEFAULT_STATUS, +) +from .http import HTTPClient +from .models import EiendomUnit, SimilarUnit, UnitVector +from .parser import extract_finnkode_from_url, normalize_finnkode + +logger = logging.getLogger(__name__) + + +def _extract_coordinates(geometry: dict) -> tuple[float | None, float | None]: + if not isinstance(geometry, dict): + return None, None + coords = geometry.get("coordinates") or [] + if isinstance(coords, (list, tuple)) and len(coords) >= 2: + return coords[0], coords[1] + return None, None + + +def parse_eiendom_unit_json(unit_data: dict) -> EiendomUnit: + geometry = unit_data.get("geometry", {}) + lon, lat = _extract_coordinates(geometry) + specification = unit_data.get("specification", {}) + valuation = unit_data.get("valuation", {}) + market = unit_data.get("latestMarketData", {}) + + return EiendomUnit( + unit_code=unit_data.get("unitCode", ""), + address=unit_data.get("address") or unit_data.get("streetAddress"), + lat=lat or unit_data.get("lat"), + lng=lon or unit_data.get("lon"), + property_type=specification.get("propertyType") or unit_data.get("propertyType"), + floor=specification.get("floor") or unit_data.get("floor"), + rooms=specification.get("rooms") or unit_data.get("rooms"), + construction_year=specification.get("constructionYear") + or unit_data.get("constructionYear"), + usable_area=specification.get("usableArea") or unit_data.get("usableArea"), + estimated_selling_price=valuation.get("estimatedSellingPrice") + or unit_data.get("estimatedSellingPrice"), + estimated_selling_price_lower=valuation.get("estimatedSellingPriceLower") + or unit_data.get("estimatedSellingPriceLower"), + estimated_selling_price_upper=valuation.get("estimatedSellingPriceUpper") + or unit_data.get("estimatedSellingPriceUpper"), + listing_price=market.get("listingPrice") or unit_data.get("listingPrice"), + listing_sqm_price=market.get("squareMeterPrice") + or unit_data.get("listingSquareMeterPrice"), + common_costs=market.get("monthlyCosts") + or market.get("commonCosts") + or unit_data.get("commonCosts"), + days_on_market=market.get("daysOnMarket") or unit_data.get("daysOnMarket"), + sale_status=market.get("saleStatus") or unit_data.get("saleStatus"), + market_placement_score=market.get("marketPlacementScore") + or unit_data.get("marketPlacementScore"), + ) + + +def parse_similar_units_json(response_data: dict) -> list[SimilarUnit]: + units: list[SimilarUnit] = [] + for item in response_data.get("units", []): + geometry = item.get("geometry", {}) + lon, lat = _extract_coordinates(geometry) + specification = item.get("specification", {}) + market = item.get("marketData", {}) + units.append( + SimilarUnit( + unit_code=item.get("unitCode", ""), + address=item.get("address"), + lat=lat or item.get("lat"), + lng=lon or item.get("lon"), + property_type=specification.get("propertyType") or item.get("propertyType"), + floor=specification.get("floor") or item.get("floor"), + rooms=specification.get("rooms") or item.get("rooms"), + construction_year=specification.get("constructionYear") + or item.get("constructionYear"), + usable_area=specification.get("usableArea") or item.get("usableArea"), + listing_price=market.get("listingPrice") or item.get("listingPrice"), + selling_price=market.get("sellingPrice") or item.get("sellingPrice"), + shared_debt=market.get("jointDebt") or item.get("sharedDebt"), + common_costs=market.get("monthlyCosts") or item.get("commonCosts"), + sqm_price=market.get("squareMeterPrice") or item.get("squareMeterPrice"), + days_on_market=market.get("daysOnMarket") or item.get("daysOnMarket"), + sale_status=market.get("saleStatus") or item.get("saleStatus"), + finalized_at=item.get("finalizedAt") or market.get("finalizedAt"), + listing_status=item.get("listingStatus", "RECENTLY_SOLD"), + ) + ) + return units + + +def build_unit_vector(unit: EiendomUnit) -> str: + """Build a base64url-encoded unit_vector from EiendomUnit data.""" + payload = UnitVector( + lon=unit.lng or 0.0, + lat=unit.lat or 0.0, + ptype=unit.property_type or "APARTMENT", + floor=unit.floor, + rooms=unit.rooms, + built=unit.construction_year, + area=unit.usable_area, + price=unit.listing_price or unit.estimated_selling_price, + ) + packed = msgpack.packb(payload.model_dump(), use_bin_type=True) + encoded = base64.urlsafe_b64encode(packed).decode("utf-8").rstrip("=") + return encoded + + +def decode_unit_vector(vector_str: str) -> dict: + """Decode a base64url unit_vector for debugging.""" + padding = 4 - (len(vector_str) % 4) + if padding != 4: + vector_str += "=" * padding + packed = base64.urlsafe_b64decode(vector_str.encode("utf-8")) + return msgpack.unpackb(packed, raw=False) + + +async def search_unit_from_finn_url( + finn_url: str, + client: HTTPClient | None = None, +) -> EiendomUnit | None: + if not EIENDOM_NO_ENABLED or not finn_url: + logger.info("Eiendom.no unit search is disabled or finn_url is empty") + return None + + client = client or HTTPClient( + base_url=EIENDOM_NO_BASE_URL, + request_delay_seconds=EIENDOM_NO_REQUEST_DELAY_SECONDS, + ) + response = await client.get( + "/geodata/units/search/", + params={"search": finn_url}, + ) + data = response.json() + units = data.get("units", []) + if not units: + return None + return parse_eiendom_unit_json(units[0]) + + +async def get_unit( + unit_code: str, + client: HTTPClient | None = None, +) -> EiendomUnit | None: + if not EIENDOM_NO_ENABLED: + logger.info("Eiendom.no enrichment is disabled") + return None + + client = client or HTTPClient( + base_url=EIENDOM_NO_BASE_URL, + request_delay_seconds=EIENDOM_NO_REQUEST_DELAY_SECONDS, + ) + path = f"/geodata/units/{unit_code}/" + response = await client.get(path) + data = response.json() + units = data.get("units") or [] + if not units and isinstance(data, dict) and data.get("unitCode"): + return parse_eiendom_unit_json(data) + if not units: + return None + return parse_eiendom_unit_json(units[0]) + + +async def get_eiendom_unit( + unit_code: str, + client: HTTPClient | None = None, +) -> EiendomUnit | None: + return await get_unit(unit_code, client=client) + + +async def get_similar_units( + unit_vector: str, + listing_status: str = EIENDOM_NO_SIMILAR_UNITS_DEFAULT_STATUS, + client: HTTPClient | None = None, +) -> list[SimilarUnit]: + if not EIENDOM_NO_ENABLED: + logger.info("Eiendom.no similar-units disabled") + return [] + + client = client or HTTPClient( + base_url=EIENDOM_NO_BASE_URL, + request_delay_seconds=EIENDOM_NO_REQUEST_DELAY_SECONDS, + ) + response = await client.get( + "/geodata/units/similar/", + params={"unit_vector": unit_vector}, + ) + data = response.json() + units = parse_similar_units_json(data) + + listing_status = (listing_status or "").upper() + if listing_status == "RECENTLY_SOLD": + units = [ + unit + for unit in units + if unit.sale_status and unit.sale_status.upper() == "SOLD" and unit.finalized_at + ] + elif listing_status == "FOR_SALE": + units = [ + unit for unit in units if unit.sale_status and unit.sale_status.upper() == "FORSALE" + ] + + return units + + +def resolve_unit_from_finn_url(finn_url: str) -> str | None: + """Resolve the FINN URL into a unit identifier or unitCode placeholder.""" + if not finn_url: + return None + candidate = normalize_finnkode(extract_finnkode_from_url(finn_url)) + if candidate: + return candidate + return None + + +async def enrich_ad_with_eiendom_no( + ad: Any, + unit_code: str | None = None, + client: HTTPClient | None = None, +) -> EiendomUnit | None: + if not unit_code: + return None + unit = await get_eiendom_unit(unit_code, client=client) + if unit is None: + return None + unit.unit_vector = build_unit_vector(unit) + return unit diff --git a/finn_eiendom/http.py b/finn_eiendom/http.py new file mode 100644 index 0000000..32bcbbb --- /dev/null +++ b/finn_eiendom/http.py @@ -0,0 +1,122 @@ +"""HTTP client with retries, delays, and error handling.""" + +import asyncio +import logging + +import httpx + +logger = logging.getLogger(__name__) + + +class HTTPClient: + """HTTP client with configurable retries, delays, and timeout.""" + + def __init__( + self, + base_url: str = "", + user_agent: str = "personal-finn-eiendom-analyzer/0.1", + request_delay_seconds: float = 0.0, + retries: int = 1, + timeout_seconds: float = 30.0, + ): + """ + Initialize HTTP client. + + Args: + base_url: Base URL for requests + user_agent: User-Agent header value + request_delay_seconds: Delay between requests (to be respectful) + retries: Number of retry attempts for failed connections + timeout_seconds: Request timeout + """ + self.base_url = base_url + self.user_agent = user_agent + self.request_delay_seconds = request_delay_seconds + self.timeout = httpx.Timeout(timeout_seconds) + self.transport = httpx.AsyncHTTPTransport(retries=retries) + self.last_request_time: float | None = None + + async def get(self, url: str, **kwargs) -> httpx.Response: + """ + Make async GET request with delay and error handling. + + Args: + url: URL to fetch + **kwargs: Additional httpx arguments + + Returns: + httpx.Response + + Raises: + httpx.HTTPStatusError if status is 4xx or 5xx + """ + headers = kwargs.pop("headers", {}) + if "User-Agent" not in headers: + headers["User-Agent"] = self.user_agent + + for attempt in range(self._get_retries() + 1): + await self._apply_delay() + + async with httpx.AsyncClient( + timeout=self.timeout, + base_url=self.base_url if not url.startswith("http") else "", + ) as client: + try: + response = await client.get(url, headers=headers, **kwargs) + if response.status_code < 500: + response.raise_for_status() + logger.debug(f"GET {url} -> {response.status_code}") + return response + if attempt < self._get_retries(): + await asyncio.sleep(2**attempt) + continue + response.raise_for_status() + return response + except httpx.HTTPStatusError as e: + logger.error(f"HTTP {e.response.status_code} for {url}") + raise + except httpx.RequestError as e: + logger.error(f"Request failed for {url}: {e}") + raise + + def _get_retries(self) -> int: + """Get retries count from transport.""" + if hasattr(self.transport, "_retries"): + return self.transport._retries + return 1 + + async def post(self, url: str, **kwargs) -> httpx.Response: + """Make async POST request with delay and error handling.""" + headers = kwargs.pop("headers", {}) + if "User-Agent" not in headers: + headers["User-Agent"] = self.user_agent + + for attempt in range(self._get_retries() + 1): + await self._apply_delay() + + async with httpx.AsyncClient( + timeout=self.timeout, + base_url=self.base_url if not url.startswith("http") else "", + ) as client: + try: + response = await client.post(url, headers=headers, **kwargs) + if response.status_code < 500: + response.raise_for_status() + logger.debug(f"POST {url} -> {response.status_code}") + return response + if attempt < self._get_retries(): + await asyncio.sleep(2**attempt) + continue + response.raise_for_status() + return response + except httpx.HTTPStatusError as e: + logger.error(f"HTTP {e.response.status_code} for {url}") + raise + except httpx.RequestError as e: + logger.error(f"Request failed for {url}: {e}") + raise + + async def _apply_delay(self): + """Apply delay between requests if configured.""" + if self.request_delay_seconds > 0: + await asyncio.sleep(self.request_delay_seconds) diff --git a/finn_eiendom/mcp_server.py b/finn_eiendom/mcp_server.py new file mode 100644 index 0000000..3658f07 --- /dev/null +++ b/finn_eiendom/mcp_server.py @@ -0,0 +1,160 @@ +"""FastMCP stdio server for FINN real estate analysis and Eiendom.no enrichment.""" + +import json +import logging + +from mcp.server.fastmcp import FastMCP + +from .analysis import analyze_search +from .eiendom_no import ( + build_unit_vector, + decode_unit_vector, + get_similar_units, + get_unit, + search_unit_from_finn_url, +) +from .service import get_or_fetch_ad, get_or_fetch_eiendom_unit + +logger = logging.getLogger(__name__) + +mcp = FastMCP("finn_eiendom_mcp") + + +@mcp.tool( + description=( + "Analyze a FINN.no real estate search URL. Scrapes listing cards," + " fetches details, enriches with Eiendom.no data, scores, and ranks." + ) +) +async def finn_analyze_search( + search_url: str, + max_pages: int = 3, + detail_limit: int = 20, + include_details: bool = True, + include_eiendom_no: bool = True, +) -> str: + """Analyze a FINN search URL and return ranked listing results.""" + try: + result = await analyze_search( + search_url, + max_pages=max_pages, + fetch_details=include_details, + detail_limit=detail_limit, + include_eiendom_no=include_eiendom_no, + ) + return json.dumps(result) + except Exception as e: + logger.error(f"Error analyzing search: {e}") + return json.dumps({"error": True, "message": str(e)}) + + +@mcp.tool( + description=( + "Fetch full detail for a FINN listing by finnkode." + " Checks cache first; use force_refresh=True to bypass." + ) +) +async def finn_get_ad(finnkode: str, force_refresh: bool = False) -> str: + """Fetch FINN ad details by finnkode.""" + try: + ad = await get_or_fetch_ad(finnkode, force_refresh=force_refresh) + return ad.model_dump_json() + except Exception as e: + logger.error(f"Error fetching ad {finnkode}: {e}") + return json.dumps({"error": True, "message": str(e)}) + + +@mcp.tool( + description="Resolve an Eiendom.no unit_code from a FINN listing URL. " + "Returns unit_code, address, lat, lng or an error if not found." +) +async def finn_resolve_eiendom_unit(finn_url: str) -> str: + """Resolve Eiendom.no unit from FINN URL.""" + try: + unit = await search_unit_from_finn_url(finn_url) + if unit is None: + return json.dumps( + { + "error": True, + "message": "Eiendom.no unit could not be resolved from FINN URL", + } + ) + return json.dumps( + { + "unit_code": unit.unit_code, + "address": unit.address, + "lat": unit.lat, + "lng": unit.lng, + } + ) + except Exception as e: + logger.error(f"Error resolving unit from {finn_url}: {e}") + return json.dumps({"error": True, "message": str(e)}) + + +@mcp.tool( + description="Fetch full Eiendom.no unit data by unit_code. Checks SQLite cache (24h TTL)." +) +async def finn_get_eiendom_unit(unit_code: str, force_refresh: bool = False) -> str: + """Fetch Eiendom.no unit details by unit_code.""" + try: + unit = await get_or_fetch_eiendom_unit(unit_code, force_refresh=force_refresh) + if unit is None: + return json.dumps({"error": True, "message": "Eiendom.no unit not found"}) + return unit.model_dump_json() + except Exception as e: + logger.error(f"Error fetching unit {unit_code}: {e}") + return json.dumps({"error": True, "message": str(e)}) + + +@mcp.tool( + description="Fetch comparable recently-sold or for-sale units from Eiendom.no using a " + "base64-encoded unit vector. Returns list of similar units with sale prices." +) +async def finn_get_similar_units(unit_vector: str, listing_status: str = "RECENTLY_SOLD") -> str: + """Fetch similar units from Eiendom.no.""" + try: + units = await get_similar_units(unit_vector, listing_status) + return json.dumps([unit.model_dump() for unit in units]) + except Exception as e: + logger.error(f"Error fetching similar units: {e}") + return json.dumps({"error": True, "message": str(e)}) + + +@mcp.tool( + description="Build a base64-encoded unit vector for a given Eiendom.no unit_code. " + "The vector is used as input to finn_get_similar_units." +) +async def finn_build_unit_vector(unit_code: str) -> str: + """Build unit vector for Eiendom.no unit.""" + try: + unit = await get_unit(unit_code) + if unit is None: + return json.dumps({"error": True, "message": "Eiendom.no unit not found"}) + return json.dumps({"unit_code": unit.unit_code, "unit_vector": build_unit_vector(unit)}) + except Exception as e: + logger.error(f"Error building unit vector for {unit_code}: {e}") + return json.dumps({"error": True, "message": str(e)}) + + +@mcp.tool( + description="Decode a base64 unit vector into human-readable JSON (lat, lon, property type, " + "floor, rooms, construction year, area, price)." +) +def finn_decode_unit_vector(unit_vector: str) -> str: + """Decode unit vector to readable format.""" + try: + result = decode_unit_vector(unit_vector) + return json.dumps(result) + except Exception as e: + logger.error(f"Error decoding unit vector: {e}") + return json.dumps({"error": True, "message": str(e)}) + + +def main() -> None: + """Run the FastMCP stdio server.""" + mcp.run(transport="stdio") + + +if __name__ == "__main__": + main() diff --git a/finn_eiendom/models.py b/finn_eiendom/models.py new file mode 100644 index 0000000..7ef876f --- /dev/null +++ b/finn_eiendom/models.py @@ -0,0 +1,128 @@ +"""Pydantic models for FINN ads and Eiendom.no units.""" + +from datetime import UTC, datetime + +from pydantic import BaseModel, ConfigDict, Field + + +class FinnSearchCard(BaseModel): + """FINN search result card (minimal fields from search listing).""" + + finnkode: str + url: str + title: str | None = None + address: str | None = None + area_m2: int | None = None + asking_price: int | None = None + total_price: int | None = None + common_costs: int | None = None + property_type: str | None = None + ownership_type: str | None = None + bedrooms: int | None = None + floor: str | None = None + broker_company: str | None = None + + +class FinnAd(BaseModel): + """FINN listing detail with all available fields.""" + + finnkode: str + url: str + title: str | None = None + address: str | None = None + postal_area: str | None = None + district: str | None = None + property_type: str | None = None + ownership_type: str | None = None + asking_price: int | None = None + total_price: int | None = None + shared_debt: int | None = None + common_costs: int | None = None + municipal_fee: int | None = None + other_fees: int | None = None + area_m2: int | None = None + rooms: int | None = None + bedrooms: int | None = None + floor: str | None = None + construction_year: int | None = None + energy_rating: str | None = None + heating: str | None = None + has_balcony: bool | None = None + has_terrace: bool | None = None + has_elevator: bool | None = None + has_parking: bool | None = None + has_garage: bool | None = None + listing_description: str | None = None + broker_name: str | None = None + broker_company: str | None = None + first_seen_at: datetime = Field(default_factory=lambda: datetime.now(UTC)) + last_seen_at: datetime = Field(default_factory=lambda: datetime.now(UTC)) + detail_fetched_at: datetime | None = None + eiendom_unit_code: str | None = None + + model_config = ConfigDict(serializers={datetime: lambda v: v.isoformat()}) + + +class EiendomUnit(BaseModel): + """Eiendom.no unit detail with market data.""" + + unit_code: str + address: str | None = None + lat: float | None = None + lng: float | None = None + property_type: str | None = None + floor: int | None = None + rooms: int | None = None + construction_year: int | None = None + usable_area: int | None = None + estimated_selling_price: int | None = None + estimated_selling_price_lower: int | None = None + estimated_selling_price_upper: int | None = None + listing_price: int | None = None + listing_sqm_price: int | None = None + common_costs: int | None = None + days_on_market: int | None = None + sale_status: str | None = None + market_placement_score: str | None = None + unit_vector: str | None = None + fetched_at: datetime = Field(default_factory=lambda: datetime.now(UTC)) + + model_config = ConfigDict(serializers={datetime: lambda v: v.isoformat()}) + + +class SimilarUnit(BaseModel): + """Eiendom.no similar unit (comp) result.""" + + unit_code: str + address: str | None = None + lat: float | None = None + lng: float | None = None + property_type: str | None = None + floor: int | None = None + rooms: int | None = None + construction_year: int | None = None + usable_area: int | None = None + listing_price: int | None = None + selling_price: int | None = None + shared_debt: int | None = None + common_costs: int | None = None + sqm_price: int | None = None + days_on_market: int | None = None + sale_status: str | None = None + finalized_at: datetime | None = None + listing_status: str = Field(default="RECENTLY_SOLD") + + model_config = ConfigDict(serializers={datetime: lambda v: v.isoformat() if v else None}) + + +class UnitVector(BaseModel): + """Unit vector payload for similar-units API.""" + + lon: float + lat: float + ptype: str # property type: APARTMENT, HOUSE, etc. + floor: int | None = None + rooms: int | None = None + built: int | None = None # construction year + area: int | None = None # usable area + price: int | None = None # listing or estimated price diff --git a/finn_eiendom/parser.py b/finn_eiendom/parser.py new file mode 100644 index 0000000..147a0a2 --- /dev/null +++ b/finn_eiendom/parser.py @@ -0,0 +1,88 @@ +"""Normalization and parsing helpers.""" + +import re + + +def normalize_price(price_str: str | None) -> int | None: + """ + Normalize Norwegian formatted price to integer. + Example: "7 200 991 kr" -> 7200991 + """ + if not price_str: + return None + # Remove "kr" and spaces, keep only digits + normalized = re.sub(r"[^\d]", "", price_str) + try: + return int(normalized) if normalized else None + except ValueError: + return None + + +def normalize_area(area_str: str | None) -> int | None: + """ + Normalize area string to integer. + Example: "77 m²" -> 77 + """ + if not area_str: + return None + cleaned = area_str.replace(" ", "") + match = re.search(r"(\d+(?:[.,]\d+)?)", cleaned) + if match: + value = match.group(1).replace(",", ".") + try: + return int(float(value)) + except ValueError: + return None + return None + + +def normalize_number(num_str: str | None) -> int | None: + """ + Normalize Norwegian formatted number to integer. + Handles text like "3 500 kr/mnd" and "7,2". + """ + if not num_str: + return None + cleaned = re.sub(r"[^\d,\.]", "", num_str) + cleaned = cleaned.replace(" ", "") + if "," in cleaned: + cleaned = cleaned.replace(".", "").replace(",", ".") + else: + cleaned = cleaned.replace(".", "") + try: + return int(float(cleaned)) if cleaned else None + except ValueError: + return None + + +def normalize_finnkode(finnkode: str | None) -> str | None: + """Normalize finnkode to string, strip whitespace.""" + if not finnkode: + return None + return str(finnkode).strip() + + +def extract_finnkode_from_url(url: str) -> str | None: + """ + Extract finnkode from FINN URL. + Example: https://www.finn.no/realestate/homes/ad.html?finnkode=462400360 -> 462400360 + """ + match = re.search(r"finnkode=(\d+)", url) + if match: + return match.group(1) + return None + + +def text_to_bool(text: str | None) -> bool: + """Convert text to boolean.""" + if not text: + return False + return text.lower() in ("ja", "yes", "true", "1", "y") + + +def clean_text(text: str | None) -> str | None: + """Clean and normalize text: strip, collapse whitespace.""" + if not text: + return None + cleaned = " ".join(text.split()) + return cleaned if cleaned else None diff --git a/finn_eiendom/scoring.py b/finn_eiendom/scoring.py new file mode 100644 index 0000000..64627f9 --- /dev/null +++ b/finn_eiendom/scoring.py @@ -0,0 +1,146 @@ +"""Scoring engine for FINN listings enriched with Eiendom.no data.""" + +import logging +from typing import Any + +from .models import EiendomUnit, SimilarUnit + +logger = logging.getLogger(__name__) + + +def _clamp(value: float, min_value: float, max_value: float) -> float: + return max(min_value, min(max_value, value)) + + +def score_market_position(unit: EiendomUnit | None) -> float: + if unit is None or unit.estimated_selling_price is None or unit.listing_price is None: + return 0.0 + ratio = unit.listing_price / unit.estimated_selling_price + if ratio <= 0.9: + return 20.0 + if ratio <= 1.0: + return 16.0 + (1.0 - ratio) * 40.0 + if ratio <= 1.1: + return 12.0 - (ratio - 1.0) * 40.0 + return 5.0 + + +def score_economy(ad: Any, unit: EiendomUnit | None) -> float: + if ad.total_price is None: + return 0.0 + if unit and unit.estimated_selling_price: + ratio = ad.total_price / unit.estimated_selling_price + if ratio <= 0.95: + return 20.0 + if ratio <= 1.0: + return 15.0 + if ratio <= 1.05: + return 10.0 + return 6.0 + if ad.asking_price and ad.total_price <= ad.asking_price: + return 12.0 + return 8.0 + + +def score_comparable_sales(listings: list[SimilarUnit], listing_price: int | None) -> float: + if not listings or listing_price is None: + return 0.0 + selling_prices = [unit.selling_price for unit in listings if unit.selling_price] + if not selling_prices: + return 0.0 + average = sum(selling_prices) / len(selling_prices) + ratio = listing_price / average + score = (1.0 - abs(ratio - 1.0)) * 20.0 + return float(_clamp(score, 0.0, 20.0)) + + +def score_location(address: str | None, district: str | None) -> float: + if not address and not district: + return 0.0 + if district and "oslo" in district.lower(): + return 15.0 + if address and "oslo" in address.lower(): + return 12.0 + return 7.0 + + +def score_layout_and_potential(description: str | None, rooms: int | None) -> float: + score = 0.0 + if rooms and rooms >= 4: + score += 10.0 + if description and "potensial" in description.lower(): + score += 8.0 + return float(_clamp(score, 0.0, 20.0)) + + +def score_outdoor_and_view(description: str | None) -> float: + if not description: + return 0.0 + score = 5.0 if "utsikt" in description.lower() or "balkong" in description.lower() else 0.0 + return float(_clamp(score, 0.0, 15.0)) + + +def score_rental_potential(description: str | None) -> float: + if not description: + return 0.0 + score = 10.0 if "hybel" in description.lower() or "leie" in description.lower() else 0.0 + return score + + +def score_renovation_upside(description: str | None, asking_price: int | None) -> float: + score = 0.0 + if description and "renover" in description.lower(): + score += 10.0 + if asking_price and asking_price > 0: + score += 5.0 + return float(_clamp(score, 0.0, 15.0)) + + +def score_risk(description: str | None, unit: EiendomUnit | None) -> float: + if unit is None: + return -10.0 + if description and "usikker" in description.lower(): + return -10.0 + return 0.0 + + +def score_ad( + ad: Any, unit: EiendomUnit | None, similar_units: list[SimilarUnit] +) -> dict[str, float]: + scores = { + "economy": score_economy(ad, unit), + "market_position": score_market_position(unit), + "comparable_sales": score_comparable_sales( + similar_units, ad.total_price or ad.asking_price + ), + "location": score_location(ad.address, ad.district), + "layout": score_layout_and_potential(ad.listing_description, ad.rooms), + "outdoor": score_outdoor_and_view(ad.listing_description), + "rental_potential": score_rental_potential(ad.listing_description), + "renovation": score_renovation_upside(ad.listing_description, ad.asking_price), + "risk": score_risk(ad.listing_description, unit), + } + scores["total"] = float(_clamp(sum(scores.values()), 0.0, 100.0)) + return scores + + +def classify_ad(scores: dict[str, float]) -> list[str]: + categories: list[str] = [] + total = scores.get("total", 0.0) + if total >= 70: + categories.append("bargain_candidate") + if total >= 60: + categories.append("safe_candidate") + if 50 <= total < 70: + categories.append("lifestyle_candidate") + if scores.get("renovation", 0.0) >= 8: + categories.append("renovation_candidate") + if scores.get("rental_potential", 0.0) >= 5: + categories.append("hybel_candidate") + if scores.get("risk", 0.0) < 0: + categories.append("risk_object") + if total < 30: + categories.append("not_interesting") + if 30 <= total < 60: + categories.append("manual_review_required") + return categories diff --git a/finn_eiendom/search.py b/finn_eiendom/search.py new file mode 100644 index 0000000..86ea72c --- /dev/null +++ b/finn_eiendom/search.py @@ -0,0 +1,194 @@ +"""FINN search scraping and parsing.""" + +import logging +import re + +from bs4 import BeautifulSoup + +from . import cache +from .config import FINN_CACHE_TTL_SEARCH_MINUTES +from .http import HTTPClient +from .models import FinnSearchCard +from .parser import ( + clean_text, + extract_finnkode_from_url, + normalize_area, + normalize_finnkode, + normalize_number, + normalize_price, +) + +logger = logging.getLogger(__name__) + + +async def fetch_search_page(url: str, client: HTTPClient | None = None) -> str: + """Fetch a FINN search page HTML.""" + client = client or HTTPClient(request_delay_seconds=0.0) + response = await client.get(url) + return response.text + + +async def fetch_search_page_cached( + url: str, + client: HTTPClient | None = None, + conn: cache.sqlite3.Connection | None = None, + use_cache: bool = True, +) -> str: + """Fetch a FINN search page with optional SQLite caching.""" + client = client or HTTPClient(request_delay_seconds=0.0) + conn = conn or cache.init_db() + if use_cache: + cached_html = cache.get_search_page(conn, url) + if cached_html: + logger.debug("Using cached search page: %s", url) + return cached_html + + html = await fetch_search_page(url, client=client) + cache.save_search_page(conn, url, html, ttl_minutes=FINN_CACHE_TTL_SEARCH_MINUTES) + return html + + +def extract_ad_links(html: str) -> list[str]: + """Extract listing URLs from FINN search HTML.""" + soup = BeautifulSoup(html, "html.parser") + links = [] + for article in soup.select("article.listing-card, article.sf-search-ad"): + anchor = article.select_one("a[href*='finnkode']") + if anchor and anchor.get("href"): + links.append(clean_text(anchor.get("href")) or "") + return links + + +def _extract_int_from_text(text: str, pattern: str) -> int | None: + match = re.search(pattern, text, re.I) + if match: + return normalize_number(match.group(1)) + return None + + +def _extract_area_from_text(text: str) -> int | None: + matches = re.findall(r"(\d+(?:[.,]\d+)?)\s*(?:m²|m2|kvm)", text, re.I) + if matches: + return normalize_area(matches[-1]) + return None + + +def _extract_price_from_text(text: str, label: str) -> int | None: + pattern = rf"{label}[:\s]*([\d\s]+kr)" + match = re.search(pattern, text, re.I) + if match: + return normalize_price(match.group(1)) + return None + + +def extract_search_cards(html: str) -> list[FinnSearchCard]: + """Parse FINN search HTML and return a list of FinnSearchCard objects.""" + logger.debug("Extracting FINN search cards") + soup = BeautifulSoup(html, "html.parser") + cards: list[FinnSearchCard] = [] + + for card in soup.select("article.listing-card, article.sf-search-ad"): + data_id = card.get("data-id") + anchor = card.select_one("a[href*='finnkode']") + url = anchor.get("href") if anchor else "" + finnkode = normalize_finnkode(data_id or extract_finnkode_from_url(url)) + if not finnkode: + logger.debug("Skipping card with missing finnkode") + continue + + title_elem = card.select_one(".title, h2.sf-realestate-heading, a.sf-search-ad-link") + address_elem = card.select_one(".location, .sf-realestate-location") + area_elem = card.select_one(".area") + price_elem = card.select_one(".price") + common_costs_elem = card.select_one(".common-costs") + bedrooms_elem = card.select_one(".bedrooms") + property_type_elem = card.select_one(".property-type") + ownership_type_elem = card.select_one(".ownership-type") + broker_elem = card.select_one(".broker-company") + + card_text = clean_text(card.get_text(" ") or "") + + bedrooms = None + if bedrooms_elem: + bedrooms = normalize_number(bedrooms_elem.get_text()) + elif card_text: + bedrooms = _extract_int_from_text(card_text, r"(\d+)\s*soverom") + + common_costs = None + if common_costs_elem: + common_costs = normalize_number(common_costs_elem.get_text()) + elif card_text: + common_costs = _extract_int_from_text( + card_text, r"(?:Fellesutg|Felleskost(?:er)?)[^\d]*(\d+[\d\s]*)kr" + ) + + total_price = None + if price_elem: + total_price = normalize_price(price_elem.get_text()) + if not total_price and card_text: + total_price = _extract_price_from_text(card_text, r"Totalpris") + if not total_price and card_text: + first_price_match = re.search(r"([\d\s]+kr)", card_text) + if first_price_match: + total_price = normalize_price(first_price_match.group(1)) + + area_m2 = None + if area_elem: + area_m2 = normalize_area(area_elem.get_text()) + elif card_text: + area_m2 = _extract_area_from_text(card_text) + + card_data = FinnSearchCard( + finnkode=finnkode, + url=url or "", + title=clean_text(title_elem.get_text()) if title_elem else None, + address=clean_text(address_elem.get_text()) if address_elem else None, + area_m2=area_m2, + asking_price=None, + total_price=total_price, + common_costs=common_costs, + property_type=clean_text(property_type_elem.get_text()) if property_type_elem else None, + ownership_type=clean_text(ownership_type_elem.get_text()) + if ownership_type_elem + else None, + bedrooms=bedrooms, + floor=None, + broker_company=clean_text(broker_elem.get_text()) if broker_elem else None, + ) + cards.append(card_data) + logger.debug("Parsed FINN search card %s", finnkode) + + return cards + + +def find_next_page_url(html: str) -> str | None: + """Return the FINN search next page URL if present.""" + soup = BeautifulSoup(html, "html.parser") + next_link = soup.select_one("a[rel='next']") + if next_link and next_link.get("href"): + return clean_text(next_link.get("href")) + return None + + +async def fetch_search_pages( + start_url: str, + max_pages: int = 1, + client: HTTPClient | None = None, + use_cache: bool = True, +) -> list[FinnSearchCard]: + """Fetch paginated FINN search pages and parse search cards.""" + client = client or HTTPClient(request_delay_seconds=0.0) + conn = cache.init_db() + url = start_url + all_cards: list[FinnSearchCard] = [] + + for _ in range(max_pages): + html = await fetch_search_page_cached(url, client=client, conn=conn, use_cache=use_cache) + all_cards.extend(extract_search_cards(html)) + next_url = find_next_page_url(html) + if not next_url: + break + url = next_url + logger.debug("Following next page link: %s", url) + + return all_cards diff --git a/finn_eiendom/service.py b/finn_eiendom/service.py new file mode 100644 index 0000000..bf11192 --- /dev/null +++ b/finn_eiendom/service.py @@ -0,0 +1,35 @@ +"""Service layer for cache-aware fetching of FINN ads and Eiendom.no units.""" + +import logging + +from .ad import fetch_ad_details +from .cache import get_eiendom_unit as get_cached_eiendom_unit +from .cache import get_finn_ad, init_db, save_eiendom_unit, save_finn_ad +from .config import FINN_CACHE_PATH +from .eiendom_no import get_unit +from .models import EiendomUnit, FinnAd + +logger = logging.getLogger(__name__) + + +async def get_or_fetch_ad(finnkode: str, force_refresh: bool = False) -> FinnAd: + """Get FinnAd from cache or fetch fresh. Never returns None.""" + conn = init_db(FINN_CACHE_PATH) + ad = None if force_refresh else get_finn_ad(conn, finnkode, ttl_hours=24) + if ad is None: + ad = await fetch_ad_details(finnkode) + save_finn_ad(conn, ad) + return ad + + +async def get_or_fetch_eiendom_unit( + unit_code: str, force_refresh: bool = False +) -> EiendomUnit | None: + """Get EiendomUnit from cache or fetch fresh.""" + conn = init_db(FINN_CACHE_PATH) + unit = None if force_refresh else get_cached_eiendom_unit(conn, unit_code, ttl_hours=24) + if unit is None: + unit = await get_unit(unit_code) + if unit is not None: + save_eiendom_unit(conn, unit) + return unit diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..9d5102f --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,49 @@ +[project] +name = "finn-eiendom-mcp" +version = "0.1.0" +description = "Private FINN and Eiendom.no real estate MCP scout" +readme = "README.md" +requires-python = ">=3.12" +dependencies = [ + "beautifulsoup4>=4.12.0", + "httpx>=0.27.0", + "lxml>=5.0.0", + "mcp[cli]>=1.0.0", + "msgpack>=1.0.0", + "pydantic>=2.8.0", + "pydantic-settings>=2.4.0", + "python-dotenv>=1.0.0", +] + +[project.scripts] +finn-eiendom-mcp = "finn_eiendom.mcp_server:main" + +[dependency-groups] +dev = [ + "ipython>=8.0.0", + "mypy>=1.10.0", + "pytest>=8.0.0", + "pytest-asyncio>=0.23.0", + "respx>=0.21.0", + "ruff>=0.6.0", +] + +[tool.ruff] +line-length = 100 +target-version = "py312" + +[tool.ruff.lint] +select = ["E", "F", "I", "UP", "B", "SIM"] +ignore = [] + +[tool.ruff.lint.per-file-ignores] +"tests/fixtures.py" = ["E501"] + +[tool.pytest.ini_options] +testpaths = ["tests"] +asyncio_mode = "auto" + +[tool.mypy] +python_version = "3.12" +strict = true +plugins = [] \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..4e9d29c --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Test fixtures and utilities.""" diff --git a/tests/fixtures.py b/tests/fixtures.py new file mode 100644 index 0000000..aeab59a --- /dev/null +++ b/tests/fixtures.py @@ -0,0 +1,236 @@ +"""Fixture data for testing without hitting live APIs.""" +# noqa: E501 + +SAMPLE_FINN_SEARCH_HTML = """ + + +FINN.no - Leiligheter til salgs + + + + +""" + +# noqa: E501 +SAMPLE_FINN_SEARCH_HTML_NEW = """ + + +FINN.no - Leiligheter til salgs + +
+ +
+ + +""" + +SAMPLE_FINN_LISTING_HTML = """ + + +Flott 3-roms i Ferner - FINN.no + +
+
+

Flott 3-roms i Ferner

+
Totalpris: 7 200 991 kr
+
+
+
+
Adresse
+
Fernerveien 42, 0554 Oslo
+
Område
+
Grünerløkka
+
Postnummer
+
0554
+
Eierform
+
Eierbolig
+
Eiendomstype
+
Leilighet
+
Prisantydning
+
7 200 000 kr
+
Totalpris
+
7 200 991 kr
+
Fellesgjeld
+
0 kr
+
Felles utgifter
+
3 500 kr/mnd
+
Boligareal
+
77 m²
+
Rom
+
4
+
Soverom
+
3
+
Etasje
+
4. etasje
+
Byggeår
+
2005
+
Energimerking
+
C
+
Oppvarming
+
Fjernvarme
+
Balkonger/terrasser
+
Ja, balkonger
+
Heis
+
Ja
+
Parkering/garasje
+
Privat parkering
+
+
+
+

Beskrivelse

+

Flott beliggenhet med fin utsikt over Oslo. Moderne kjøkken og bad.

+

Klar til visning!

+
+
+
+ Meglerhuset AS + Telefon: 21 00 00 00 +
+
+
+ + +""" + +SAMPLE_FINN_LISTING_HTML_NEW = """ + + +Romslig 5-roms i 5.etasje med heisadkomst + +
+

Romslig 5-roms i 5.etasje med heisadkomst | 2 hybler | 4 balkonger | Ingen dokavgift!

+ Hegdehaugsveien 3, 0352 Oslo + Homansbyen +
+
Prisantydning10 900 000 kr
+
Totalpris
10 986 901 kr
+
Fellesgjeld
76 911 kr
+
Felleskost/mnd.
12 011 kr
+
+
+
BoligtypeLeilighet
+
EieformAndel
+
Soverom2
+
Rom5
+
Byggeår1938
+
Internt bruksareal124 m² (BRA-i)
+
+
FasiliteterBalkong/TerrasseParkettHeis
+
+

Om boligen

+

Her bor du med kort vei til daglige behov og offentlig transport.

+
+
+ + +""" + +SAMPLE_EIENDOM_UNIT_JSON = { + "units": [ + { + "unitCode": "c-gxw-xmyum-s2a", + "address": "Fernerveien 42, 0554 Oslo", + "municipality": "Oslo", + "lat": 59.9287, + "lon": 10.7803, + "propertyType": "APARTMENT", + "floor": 4, + "rooms": 4, + "constructionYear": 2005, + "usableArea": 77, + "estimatedSellingPrice": 7650000, + "estimatedSellingPriceLower": 6900000, + "estimatedSellingPriceUpper": 8400000, + "listingPrice": 7200000, + "listingSquareMeterPrice": 93500, + "commonCosts": 3500, + "daysOnMarket": 12, + "saleStatus": "FOR_SALE", + "marketPlacementScore": "ABOVE_AVERAGE", + "similarUnitCount": 12, + "averageSquareMeterPrice": 98000, + } + ] +} + +SAMPLE_EIENDOM_SIMILAR_UNITS_JSON = { + "units": [ + { + "unitCode": "c-recent-1", + "address": "Birketveien 10, 0554 Oslo", + "lat": 59.9290, + "lon": 10.7810, + "propertyType": "APARTMENT", + "floor": 3, + "rooms": 3, + "constructionYear": 2004, + "usableArea": 75, + "listingPrice": 7100000, + "sellingPrice": 7050000, + "sharedDebt": 0, + "commonCosts": 3400, + "squareMeterPrice": 94000, + "daysOnMarket": 18, + "saleStatus": "SOLD", + "finalizedAt": "2024-05-01", + }, + { + "unitCode": "c-recent-2", + "address": "Sommers gate 5, 0554 Oslo", + "lat": 59.9280, + "lon": 10.7820, + "propertyType": "APARTMENT", + "floor": 2, + "rooms": 4, + "constructionYear": 2006, + "usableArea": 80, + "listingPrice": 7400000, + "sellingPrice": 7350000, + "sharedDebt": 0, + "commonCosts": 3600, + "squareMeterPrice": 91875, + "daysOnMarket": 22, + "saleStatus": "SOLD", + "finalizedAt": "2024-04-28", + }, + ] +} diff --git a/tests/test_ad.py b/tests/test_ad.py new file mode 100644 index 0000000..6f6450f --- /dev/null +++ b/tests/test_ad.py @@ -0,0 +1,45 @@ +from finn_eiendom.ad import scrape_ad +from tests.fixtures import SAMPLE_FINN_LISTING_HTML, SAMPLE_FINN_LISTING_HTML_NEW + + +def test_scrape_ad(): + ad = scrape_ad( + SAMPLE_FINN_LISTING_HTML, + url="https://www.finn.no/realestate/homes/ad.html?finnkode=462400360", + ) + assert ad.finnkode == "462400360" + assert ad.title == "Flott 3-roms i Ferner" + assert ad.address == "Fernerveien 42, 0554 Oslo" + assert ad.area_m2 == 77 + assert ad.asking_price == 7200000 + assert ad.total_price == 7200991 + assert ad.common_costs == 3500 + assert ad.rooms == 4 + assert ad.bedrooms == 3 + assert ad.floor == "4. etasje" + assert ad.construction_year == 2005 + assert ad.energy_rating == "C" + assert ad.heating == "Fjernvarme" + assert "Moderne kjøkken" in ad.listing_description + assert ad.broker_company == "Meglerhuset AS" + + +def test_scrape_ad_new_structure(): + ad = scrape_ad( + SAMPLE_FINN_LISTING_HTML_NEW, + url="https://www.finn.no/realestate/homes/ad.html?finnkode=455978973", + ) + assert ad.finnkode == "455978973" + assert ad.title.startswith("Romslig 5-roms i 5.etasje") + assert ad.address == "Hegdehaugsveien 3, 0352 Oslo" + assert ad.property_type == "Leilighet" + assert ad.ownership_type == "Andel" + assert ad.asking_price == 10900000 + assert ad.total_price == 10986901 + assert ad.common_costs == 12011 + assert ad.area_m2 == 124 + assert ad.rooms == 5 + assert ad.bedrooms == 2 + assert ad.construction_year == 1938 + assert ad.floor == "5. etasje" + assert "kort vei" in ad.listing_description.lower() diff --git a/tests/test_cache.py b/tests/test_cache.py new file mode 100644 index 0000000..d35b718 --- /dev/null +++ b/tests/test_cache.py @@ -0,0 +1,71 @@ +import tempfile +from datetime import UTC, datetime, timedelta +from pathlib import Path + +from finn_eiendom.cache import ( + get_eiendom_unit, + get_finn_ad, + get_search_page, + get_similar_units, + init_db, + save_eiendom_unit, + save_finn_ad, + save_search_page, + save_similar_units, +) +from finn_eiendom.models import EiendomUnit, FinnAd, SimilarUnit + + +def test_cache_roundtrip(): + with tempfile.TemporaryDirectory() as tmpdir: + db_path = Path(tmpdir) / "cache.sqlite" + conn = init_db(str(db_path)) + + ad = FinnAd(finnkode="1234", url="https://example.com", title="Test") + save_finn_ad(conn, ad) + loaded_ad = get_finn_ad(conn, "1234") + assert loaded_ad is not None + assert loaded_ad.finnkode == "1234" + assert loaded_ad.url == "https://example.com" + + unit = EiendomUnit(unit_code="abc", address="Oslo") + save_eiendom_unit(conn, unit) + loaded_unit = get_eiendom_unit(conn, "abc") + assert loaded_unit is not None + assert loaded_unit.address == "Oslo" + + comps = [ + SimilarUnit(unit_code="x1"), + SimilarUnit(unit_code="x2"), + ] + save_similar_units(conn, "abc", "RECENTLY_SOLD", comps) + loaded_comps = get_similar_units(conn, "abc", "RECENTLY_SOLD") + assert len(loaded_comps) == 2 + assert loaded_comps[0].unit_code == "x1" + + +def test_search_page_cache_roundtrip(): + with tempfile.TemporaryDirectory() as tmpdir: + conn = init_db(str(Path(tmpdir) / "cache.sqlite")) + + html = "search page" + url = "https://www.finn.no/realestate/homes/search.html" + + save_search_page(conn, url, html, ttl_minutes=5) + loaded_html = get_search_page(conn, url) + assert loaded_html == html + + +def test_finn_ad_cache_ttl_expiration(): + with tempfile.TemporaryDirectory() as tmpdir: + conn = init_db(str(Path(tmpdir) / "cache.sqlite")) + + ad = FinnAd( + finnkode="1234", + url="https://example.com", + title="Test", + detail_fetched_at=datetime.now(UTC) - timedelta(hours=2), + ) + save_finn_ad(conn, ad) + expired_ad = get_finn_ad(conn, "1234", ttl_hours=1) + assert expired_ad is None diff --git a/tests/test_eiendom_no.py b/tests/test_eiendom_no.py new file mode 100644 index 0000000..43eba03 --- /dev/null +++ b/tests/test_eiendom_no.py @@ -0,0 +1,44 @@ +from finn_eiendom.eiendom_no import ( + build_unit_vector, + decode_unit_vector, + parse_eiendom_unit_json, + parse_similar_units_json, + resolve_unit_from_finn_url, +) +from tests.fixtures import ( + SAMPLE_EIENDOM_SIMILAR_UNITS_JSON, + SAMPLE_EIENDOM_UNIT_JSON, +) + + +def test_parse_eiendom_unit_json(): + unit = parse_eiendom_unit_json(SAMPLE_EIENDOM_UNIT_JSON["units"][0]) + assert unit.unit_code == "c-gxw-xmyum-s2a" + assert unit.address == "Fernerveien 42, 0554 Oslo" + assert unit.estimated_selling_price == 7650000 + assert unit.listing_sqm_price == 93500 + + +def test_unit_vector_roundtrip(): + unit = parse_eiendom_unit_json(SAMPLE_EIENDOM_UNIT_JSON["units"][0]) + vector = build_unit_vector(unit) + decoded = decode_unit_vector(vector) + assert decoded["ptype"] == "APARTMENT" + assert decoded["area"] == 77 + assert decoded["price"] == 7200000 + assert isinstance(decoded, dict) + assert decoded["lon"] == unit.lng + + +def test_parse_similar_units_json(): + comps = parse_similar_units_json(SAMPLE_EIENDOM_SIMILAR_UNITS_JSON) + assert len(comps) == 2 + assert comps[0].unit_code == "c-recent-1" + assert comps[1].selling_price == 7350000 + + +def test_resolve_unit_from_finn_url(): + unit_code = resolve_unit_from_finn_url( + "https://www.finn.no/realestate/homes/ad.html?finnkode=462400360" + ) + assert unit_code == "462400360" diff --git a/tests/test_http.py b/tests/test_http.py new file mode 100644 index 0000000..81506ba --- /dev/null +++ b/tests/test_http.py @@ -0,0 +1,83 @@ +"""Tests for HTTP client retry logic.""" + +import httpx +import pytest +import respx + +from finn_eiendom.http import HTTPClient + + +@pytest.mark.asyncio +async def test_get_retries_on_500(): + """Test that HTTPClient retries on 500 errors and succeeds on second attempt.""" + client = HTTPClient(request_delay_seconds=0.0, retries=2) + + with respx.mock: + route = respx.get("https://example.com/api") + route.side_effect = [ + httpx.Response(500, text="Server Error"), + httpx.Response(200, text="Success"), + ] + + response = await client.get("https://example.com/api") + assert response.status_code == 200 + + +@pytest.mark.asyncio +async def test_get_raises_on_404(): + """Test that HTTPClient raises on 4xx errors immediately.""" + client = HTTPClient(request_delay_seconds=0.0, retries=2) + + with respx.mock: + respx.get("https://example.com/api").mock(return_value=httpx.Response(404)) + + with pytest.raises(httpx.HTTPStatusError) as exc_info: + await client.get("https://example.com/api") + + assert exc_info.value.response.status_code == 404 + + +@pytest.mark.asyncio +async def test_get_retries_on_502_bad_gateway(): + """Test that HTTPClient retries on 502 Bad Gateway.""" + client = HTTPClient(request_delay_seconds=0.0, retries=2) + + with respx.mock: + route = respx.get("https://example.com/api") + route.side_effect = [ + httpx.Response(502, text="Bad Gateway"), + httpx.Response(200, text="Success"), + ] + + response = await client.get("https://example.com/api") + assert response.status_code == 200 + + +@pytest.mark.asyncio +async def test_post_retries_on_503(): + """Test that HTTPClient retries POST on 503 Service Unavailable.""" + client = HTTPClient(request_delay_seconds=0.0, retries=2) + + with respx.mock: + route = respx.post("https://example.com/api") + route.side_effect = [ + httpx.Response(503, text="Service Unavailable"), + httpx.Response(201, json={"success": True}), + ] + + response = await client.post("https://example.com/api", json={"test": "data"}) + assert response.status_code == 201 + + +@pytest.mark.asyncio +async def test_get_eventually_fails_on_persistent_500(): + """Test that HTTPClient gives up after max retries.""" + client = HTTPClient(request_delay_seconds=0.0, retries=1) + + with respx.mock: + respx.get("https://example.com/api").mock(return_value=httpx.Response(500)) + + with pytest.raises(httpx.HTTPStatusError) as exc_info: + await client.get("https://example.com/api") + + assert exc_info.value.response.status_code == 500 diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py new file mode 100644 index 0000000..cd9d92b --- /dev/null +++ b/tests/test_mcp_server.py @@ -0,0 +1,69 @@ +"""Tests for the MCP server tools.""" + +import json + +from finn_eiendom.mcp_server import ( + finn_decode_unit_vector, + mcp, +) + + +def test_mcp_server_has_correct_tools(): + """Assert that the MCP server has all expected tools.""" + import asyncio + + async def check_tools(): + tools = await mcp.list_tools() + tool_names = {tool.name for tool in tools} + expected_tools = { + "finn_analyze_search", + "finn_get_ad", + "finn_resolve_eiendom_unit", + "finn_get_eiendom_unit", + "finn_get_similar_units", + "finn_build_unit_vector", + "finn_decode_unit_vector", + } + assert expected_tools.issubset(tool_names), f"Missing tools: {expected_tools - tool_names}" + + asyncio.run(check_tools()) + + +def test_finn_decode_unit_vector_returns_json(): + """Test that finn_decode_unit_vector returns valid JSON with expected keys.""" + from unittest.mock import patch + + test_vector = { + "lon": 10.7, + "lat": 59.9, + "ptype": "APARTMENT", + "floor": 3, + "rooms": 3, + "built": 2000, + "area": 80, + "price": 5000000, + } + + with patch("finn_eiendom.mcp_server.decode_unit_vector", return_value=test_vector): + result = finn_decode_unit_vector("dGVzdA==") + + data = json.loads(result) + assert "lon" in data + assert "lat" in data + assert "ptype" in data + assert data["lat"] == 59.9 + assert data["lon"] == 10.7 + + +def test_finn_decode_unit_vector_error_handling(): + """Test that finn_decode_unit_vector handles errors gracefully.""" + from unittest.mock import patch + + with patch( + "finn_eiendom.mcp_server.decode_unit_vector", side_effect=Exception("decode failed") + ): + result = finn_decode_unit_vector("invalid") + + data = json.loads(result) + assert data.get("error") is True + assert "message" in data diff --git a/tests/test_parser.py b/tests/test_parser.py new file mode 100644 index 0000000..5dba2cf --- /dev/null +++ b/tests/test_parser.py @@ -0,0 +1,45 @@ +from finn_eiendom.parser import ( + clean_text, + extract_finnkode_from_url, + normalize_area, + normalize_finnkode, + normalize_number, + normalize_price, +) + + +def test_normalize_price(): + assert normalize_price("7 200 991 kr") == 7200991 + assert normalize_price("1 234") == 1234 + assert normalize_price(None) is None + + +def test_normalize_area(): + assert normalize_area("77 m²") == 77 + assert normalize_area("100,5 m²") == 100 + assert normalize_area("") is None + + +def test_normalize_number(): + assert normalize_number("3 500 kr/mnd") == 3500 + assert normalize_number("7,2") == 7 + assert normalize_number("1.234") == 1234 + assert normalize_number(None) is None + + +def test_normalize_finnkode(): + assert normalize_finnkode(" 462400360 ") == "462400360" + assert normalize_finnkode(None) is None + + +def test_extract_finnkode_from_url(): + assert ( + extract_finnkode_from_url("https://www.finn.no/realestate/homes/ad.html?finnkode=462400360") + == "462400360" + ) + assert extract_finnkode_from_url("https://www.finn.no/realestate/homes/ad.html") is None + + +def test_clean_text(): + assert clean_text(" Hello world \n") == "Hello world" + assert clean_text(None) is None diff --git a/tests/test_scoring.py b/tests/test_scoring.py new file mode 100644 index 0000000..33f2029 --- /dev/null +++ b/tests/test_scoring.py @@ -0,0 +1,22 @@ +from finn_eiendom.models import EiendomUnit, FinnAd +from finn_eiendom.scoring import classify_ad, score_ad + + +def test_score_ad_and_classify(): + ad = FinnAd( + finnkode="462400360", + url="https://www.finn.no/realestate/homes/ad.html?finnkode=462400360", + title="Flott 3-roms i Ferner", + ) + unit = EiendomUnit( + unit_code="c-gxw-xmyum-s2a", + estimated_selling_price=7650000, + listing_price=7200000, + property_type="APARTMENT", + usable_area=77, + rooms=4, + ) + scores = score_ad(ad, unit, []) + assert scores["market_position"] >= 0 + categories = classify_ad(scores) + assert isinstance(categories, list) diff --git a/tests/test_search.py b/tests/test_search.py new file mode 100644 index 0000000..495918a --- /dev/null +++ b/tests/test_search.py @@ -0,0 +1,38 @@ +from finn_eiendom.search import extract_ad_links, extract_search_cards +from tests.fixtures import SAMPLE_FINN_SEARCH_HTML, SAMPLE_FINN_SEARCH_HTML_NEW + + +def test_extract_search_cards(): + cards = extract_search_cards(SAMPLE_FINN_SEARCH_HTML) + assert len(cards) == 2 + assert cards[0].finnkode == "462400360" + assert cards[0].url.endswith("finnkode=462400360") + assert cards[0].area_m2 == 77 + assert cards[0].total_price == 7200991 + assert cards[0].common_costs == 3500 + assert cards[1].bedrooms == 2 + + +def test_extract_search_cards_new_format(): + cards = extract_search_cards(SAMPLE_FINN_SEARCH_HTML_NEW) + assert len(cards) == 1 + assert cards[0].finnkode == "462880791" + assert cards[0].url.endswith("finnkode=462880791") + assert cards[0].address == "Lofotgata 4B, Oslo" + assert cards[0].area_m2 == 62 + assert cards[0].total_price == 7253377 + assert cards[0].common_costs == 7067 + assert cards[0].bedrooms == 2 + + +def test_extract_ad_links(): + links = extract_ad_links(SAMPLE_FINN_SEARCH_HTML) + assert len(links) == 2 + assert "finnkode=462400360" in links[0] + assert "finnkode=460784945" in links[1] + + +def test_extract_ad_links_new_format(): + links = extract_ad_links(SAMPLE_FINN_SEARCH_HTML_NEW) + assert len(links) == 1 + assert "finnkode=462880791" in links[0] diff --git a/tests/test_service.py b/tests/test_service.py new file mode 100644 index 0000000..f6a69f7 --- /dev/null +++ b/tests/test_service.py @@ -0,0 +1,97 @@ +"""Tests for the service layer (cache-aware fetching).""" + +from unittest.mock import patch + +import pytest + +from finn_eiendom.models import EiendomUnit, FinnAd +from finn_eiendom.service import get_or_fetch_ad, get_or_fetch_eiendom_unit + + +@pytest.mark.asyncio +async def test_get_or_fetch_ad_uses_cache(): + """Test that get_or_fetch_ad returns cached ad without fetching.""" + mock_ad = FinnAd(finnkode="123", url="http://example.com") + + with ( + patch("finn_eiendom.service.init_db"), + patch("finn_eiendom.service.get_finn_ad", return_value=mock_ad) as mock_get, + patch("finn_eiendom.service.fetch_ad_details") as mock_fetch, + ): + result = await get_or_fetch_ad("123") + + assert result.finnkode == "123" + mock_get.assert_called_once() + mock_fetch.assert_not_called() + + +@pytest.mark.asyncio +async def test_get_or_fetch_ad_fetches_when_cache_miss(): + """Test that get_or_fetch_ad fetches when cache is empty.""" + mock_ad = FinnAd(finnkode="123", url="http://example.com") + + with ( + patch("finn_eiendom.service.init_db"), + patch("finn_eiendom.service.get_finn_ad", return_value=None), + patch("finn_eiendom.service.fetch_ad_details", return_value=mock_ad) as mock_fetch, + patch("finn_eiendom.service.save_finn_ad") as mock_save, + ): + result = await get_or_fetch_ad("123") + + assert result.finnkode == "123" + mock_fetch.assert_called_once_with("123") + mock_save.assert_called_once() + + +@pytest.mark.asyncio +async def test_get_or_fetch_ad_force_refresh(): + """Test that force_refresh=True bypasses cache.""" + mock_ad = FinnAd(finnkode="123", url="http://example.com") + + with ( + patch("finn_eiendom.service.init_db"), + patch("finn_eiendom.service.get_finn_ad", return_value=mock_ad) as mock_get, + patch("finn_eiendom.service.fetch_ad_details", return_value=mock_ad) as mock_fetch, + patch("finn_eiendom.service.save_finn_ad") as mock_save, + ): + result = await get_or_fetch_ad("123", force_refresh=True) + + assert result.finnkode == "123" + mock_get.assert_not_called() + mock_fetch.assert_called_once_with("123") + mock_save.assert_called_once() + + +@pytest.mark.asyncio +async def test_get_or_fetch_eiendom_unit_uses_cache(): + """Test that get_or_fetch_eiendom_unit returns cached unit without fetching.""" + mock_unit = EiendomUnit(unit_code="test-code") + + with ( + patch("finn_eiendom.service.init_db"), + patch("finn_eiendom.service.get_cached_eiendom_unit", return_value=mock_unit) as mock_get, + patch("finn_eiendom.service.get_unit") as mock_fetch, + ): + result = await get_or_fetch_eiendom_unit("test-code") + + assert result.unit_code == "test-code" + mock_get.assert_called_once() + mock_fetch.assert_not_called() + + +@pytest.mark.asyncio +async def test_get_or_fetch_eiendom_unit_fetches_when_cache_miss(): + """Test that get_or_fetch_eiendom_unit fetches when cache is empty.""" + mock_unit = EiendomUnit(unit_code="test-code") + + with ( + patch("finn_eiendom.service.init_db"), + patch("finn_eiendom.service.get_cached_eiendom_unit", return_value=None), + patch("finn_eiendom.service.get_unit", return_value=mock_unit) as mock_fetch, + patch("finn_eiendom.service.save_eiendom_unit") as mock_save, + ): + result = await get_or_fetch_eiendom_unit("test-code") + + assert result.unit_code == "test-code" + mock_fetch.assert_called_once_with("test-code") + mock_save.assert_called_once()