Files
finn-mcp/fetch_trikk_coords.py
T
2026-05-23 07:43:30 +00:00

239 lines
10 KiB
Python

"""
fetch_trikk_coords.py
Henter P625-koordinater fra Wikidata for alle Oslo-trikkeholdeplasser
via Wikipedia sitelinks. Kjør: python3 fetch_trikk_coords.py
Krever: pip install requests beautifulsoup4
"""
import requests
from urllib.parse import unquote, urlparse, parse_qs
from bs4 import BeautifulSoup
# Alle Wikipedia-lenker fra trikkelinjene (inkl. redlinks)
HTML = """
<a href="/wiki/Majorstuen_(station)">Majorstuen</a>
<a href="/wiki/Bogstadveien_tram_stop">Bogstadveien</a>
<a href="/wiki/Rosenborg_tram_stop">Rosenborg</a>
<a href="/w/index.php?title=Briskeby_tram_stop&action=edit&redlink=1">Briskeby</a>
<a href="/w/index.php?title=Riddervolds_plass_tram_stop&action=edit&redlink=1">Riddervolds plass</a>
<a href="/wiki/Inkognitogata_tram_stop">Inkognitogata</a>
<a href="/wiki/Nationaltheatret_metro_station">Nationaltheatret</a>
<a href="/wiki/%C3%98vre_Slottsgate_tram_stop">Øvre Slottsgate</a>
<a href="/wiki/Dronningens_gate_tram_stop">Dronningens gate</a>
<a href="/wiki/Jernbanetorget_(station)">Jernbanetorget</a>
<a href="/wiki/Storgata_tram_stop">Storgata</a>
<a href="/wiki/Nybrua_tram_stop">Nybrua</a>
<a href="/wiki/Schous_plass_tram_stop">Schous plass</a>
<a href="/wiki/Olaf_Ryes_plass">Olaf Ryes plass</a>
<a href="/wiki/Birkelunden">Birkelunden</a>
<a href="/wiki/Biermanns_gate_(station)">Biermanns gate</a>
<a href="/w/index.php?title=Torshov_(station)&action=edit&redlink=1">Torshov</a>
<a href="/w/index.php?title=Sandaker_senter_(station)&action=edit&redlink=1">Sandaker senter</a>
<a href="/w/index.php?title=Grefsenveien_tram_stop&action=edit&redlink=1">Grefsenveien</a>
<a href="/wiki/Storo_(station)">Storo</a>
<a href="/wiki/Disen_tram_stop">Disen tram stop</a>
<a href="/w/index.php?title=Doktor_Smiths_vei_tram_stop&action=edit&redlink=1">Doktor Smiths vei</a>
<a href="/w/index.php?title=Glads_vei_tram_stop&action=edit&redlink=1">Glads vei</a>
<a href="/w/index.php?title=Grefsenplat%C3%A5et_tram_stop&action=edit&redlink=1">Grefsenplatået</a>
<a href="/w/index.php?title=Grefsen_stadion_tram_stop&action=edit&redlink=1">Grefsen stadion</a>
<a href="/w/index.php?title=Kjels%C3%A5salleen_tram_stop&action=edit&redlink=1">Kjelsåsalleen</a>
<a href="/wiki/Kjels%C3%A5s_tram_stop">Kjelsås</a>
<a href="/wiki/Frogner_stadion_tram_stop">Frogner stadion</a>
<a href="/w/index.php?title=Vigelandsparken_tram_stop&action=edit&redlink=1">Vigelandsparken</a>
<a href="/w/index.php?title=Frogner_plass_tram_stop&action=edit&redlink=1">Frogner plass</a>
<a href="/wiki/Elisenberg_tram_stop">Elisenberg</a>
<a href="/w/index.php?title=Lille_Frogner_all%C3%A9_tram_stop&action=edit&redlink=1">Lille Frogner allé</a>
<a href="/w/index.php?title=Niels_Juels_gate_tram_stop&action=edit&redlink=1">Niels Juels gate</a>
<a href="/wiki/Solli_tram_stop">Solli</a>
<a href="/w/index.php?title=Rusel%C3%B8kka_tram_stop&action=edit&redlink=1">Ruseløkka</a>
<a href="/wiki/Aker_Brygge_tram_stop">Akerbrygge</a>
<a href="/wiki/Kontraskj%C3%A6ret_tram_stop">Kontraskjæret</a>
<a href="/wiki/Middelalderparken_tram_stop">Middelalderparken</a>
<a href="/w/index.php?title=Bislett_tram_stop&action=edit&redlink=1">Bislett</a>
<a href="/w/index.php?title=Dalsbergstien_tram_stop&action=edit&redlink=1">Dalsbergstien</a>
<a href="/w/index.php?title=Welhavens_gate_tram_stop&action=edit&redlink=1">Welhavens gate</a>
<a href="/w/index.php?title=Frydenlund_tram_stop&action=edit&redlink=1">Frydenlund</a>
<a href="/w/index.php?title=Holbergs_plass_tram_stop&action=edit&redlink=1">Holbergs plass</a>
<a href="/w/index.php?title=Tullin%C3%B8kka_(station)&action=edit&redlink=1">Tullinøkka</a>
<a href="/wiki/Stortinget_(station)">Tinghuset</a>
<a href="/wiki/Stortorvet_(station)">Stortorvet</a>
<a href="/w/index.php?title=Heimdalsgata_tram_stop&action=edit&redlink=1">Heimdalsgata</a>
<a href="/w/index.php?title=Lakkegata_skole_tram_stop&action=edit&redlink=1">Lakkegata skole</a>
<a href="/w/index.php?title=Sofienberg_tram_stop&action=edit&redlink=1">Sofienberg</a>
<a href="/wiki/Carl_Berners_plass_(station)">Carl Berners plass</a>
<a href="/w/index.php?title=Rosenhoff_tram_stop&action=edit&redlink=1">Rosenhoff</a>
<a href="/w/index.php?title=Sinsenterrassen_tram_stop&action=edit&redlink=1">Sinsenterrassen</a>
<a href="/wiki/Sinsen_(station)">Sinsenkrysset</a>
<a href="/wiki/Grefsen_Station">Grefsen stasjon</a>
<a href="/wiki/Homansbyen_tram_stop">Homansbyen</a>
"""
# Redlinks har ingen Wikipedia-side — søk direkte på Wikidata label
WIKIDATA_DIRECT = {
# Redlinks — ingen Wikipedia-side
"Briskeby_tram_stop": "Q11962293",
"Riddervolds_plass_tram_stop": "Q19386557",
"Grefsenveien_tram_stop": "Q17778424",
"Doktor_Smiths_vei_tram_stop": None,
"Glads_vei_tram_stop": "Q17776371",
"Grefsenplatået_tram_stop": "Q11972531",
"Grefsen_stadion_tram_stop": "Q11972525",
"Kjelsåsalleen_tram_stop": None,
"Vigelandsparken_tram_stop": "Q19398059",
"Frogner_plass_tram_stop": "Q11970372",
"Lille_Frogner_allé_tram_stop": "Q19379373",
"Niels_Juels_gate_tram_stop": "Q11991378",
"Ruseløkka_tram_stop": None,
"Bislett_tram_stop": "Q11961163",
"Dalsbergstien_tram_stop": "Q17764618",
"Welhavens_gate_tram_stop": "Q12010485",
"Frydenlund_tram_stop": "Q19373143",
"Holbergs_plass_tram_stop": "Q11975623",
"Tullinøkka_(station)": None,
"Heimdalsgata_tram_stop": None,
"Lakkegata_skole_tram_stop": "Q11982987",
"Sofienberg_tram_stop": None,
"Rosenhoff_tram_stop": None,
"Sinsenterrassen_tram_stop": None,
"Torshov_(station)": None,
"Sandaker_senter_(station)": None,
"Frogner_stadion_tram_stop": None,
# Wikipedia-redirect-sider — QID til redirect-målet
"Stortorvet_(station)": "Q7620354", # → Stortorvet_tram_stop
"Stortinget_(station)": "Q188712", # → Stortinget T-bane (Tinghuset tram er ved siden)
"Sinsen_(station)": "Q19388523", # → Sinsenkrysset tram stop
"Biermanns_gate_(station)": "Q19363042", # → Biermanns gate tram stop
"Carl_Berners_plass_(station)": "Q890592", # → Carl Berners plass metro+tram
"Majorstuen_(station)": "Q686510", # → Majorstuen T-bane
"Grefsen_Station": "Q728583", # → Grefsen stasjon jernbane
"Kjelsås_tram_stop": "Q11981146", # → Kjelsås
"Storo_(station)": "Q932133", # → Storo T-bane
"Schous_plass_tram_stop": "Q12006491", # → Schous plass
"Jernbanetorget_(station)": "Q841481", # → Jernbanetorget T+tram
"Sandaker_senter_(station)": "Q12008217", # → Sandaker senter
}
def extract_titles(html):
soup = BeautifulSoup(html, "html.parser")
titles = []
for a in soup.find_all("a"):
href = a.get("href", "")
label = a.get_text(strip=True)
if href.startswith("/wiki/"):
title = unquote(href.removeprefix("/wiki/"))
titles.append((title, label))
elif href.startswith("/w/index.php"):
qs = parse_qs(urlparse(href).query)
t = qs.get("title", [None])[0]
if t:
titles.append((unquote(t), label))
seen = {}
for title, label in titles:
seen.setdefault(title, label)
return seen # {wiki_title: display_label}
HEADERS = {"User-Agent": "finn-mcp-trikk-coords/1.0 (contact: ole@example.com)"}
def get_qids_from_wikipedia(titles):
"""Wikipedia API: article titles → Wikidata QIDs."""
url = "https://en.wikipedia.org/w/api.php"
result = {}
batch = [t for t in titles if t not in WIKIDATA_DIRECT]
for i in range(0, len(batch), 50):
chunk = batch[i : i + 50]
r = requests.get(
url,
params={
"action": "query",
"format": "json",
"redirects": "1",
"prop": "pageprops",
"ppprop": "wikibase_item",
"titles": "|".join(chunk),
},
headers=HEADERS,
timeout=30,
)
print(f"Status: {r.status_code}, len: {len(r.text)}, preview: {r.text[:200]!r}")
for page in r.json()["query"]["pages"].values():
t = page.get("title", "").replace(" ", "_")
qid = page.get("pageprops", {}).get("wikibase_item")
if qid:
result[t] = qid
# Merge known QIDs
for t, qid in WIKIDATA_DIRECT.items():
if qid:
result[t] = qid
return result
def get_p625(qids):
"""Wikidata API: QIDs → P625 coordinates."""
url = "https://www.wikidata.org/w/api.php"
result = {}
unique = list(set(qids.values()))
for i in range(0, len(unique), 50):
chunk = unique[i : i + 50]
r = requests.get(
url,
params={
"action": "wbgetentities",
"format": "json",
"ids": "|".join(chunk),
"props": "claims|labels",
"languages": "en|nb",
},
headers=HEADERS,
timeout=30,
)
for qid, entity in r.json()["entities"].items():
label = (
entity.get("labels", {}).get("en", {}).get("value")
or entity.get("labels", {}).get("nb", {}).get("value")
or qid
)
p625 = entity.get("claims", {}).get("P625", [])
coords = None
if p625:
v = p625[0]["mainsnak"]["datavalue"]["value"]
coords = (round(v["latitude"], 5), round(v["longitude"], 5))
result[qid] = {"label": label, "coords": coords}
return result
def main():
title_to_label = extract_titles(HTML)
print(f"Extracted {len(title_to_label)} unique titles\n")
title_to_qid = get_qids_from_wikipedia(title_to_label)
print(f"Resolved {len(title_to_qid)} QIDs\n")
qid_to_data = get_p625(title_to_qid)
print(f"{'Wiki title':<45} {'QID':<12} {'Label':<35} {'Coords'}")
print("-" * 120)
no_coords = []
for title in sorted(title_to_label):
qid = title_to_qid.get(title, "")
if qid == "":
coords = "NO QID"
label = "?"
else:
d = qid_to_data.get(qid, {})
coords = str(d.get("coords") or "NO P625")
label = d.get("label", "?")
print(f"{title:<45} {qid:<12} {label:<35} {coords}")
if "NO" in str(coords):
no_coords.append(title)
print(f"\n\nMissing coords: {len(no_coords)}")
for t in no_coords:
print(f" {t}")
if __name__ == "__main__":
main()