"""
fetch_trikk_coords.py
Henter P625-koordinater fra Wikidata for alle Oslo-trikkeholdeplasser
via Wikipedia sitelinks. Kjør: python3 fetch_trikk_coords.py
Krever: pip install requests beautifulsoup4
"""
import requests
from urllib.parse import unquote, urlparse, parse_qs
from bs4 import BeautifulSoup
# Alle Wikipedia-lenker fra trikkelinjene (inkl. redlinks)
HTML = """
Majorstuen
Bogstadveien
Rosenborg
Briskeby
Riddervolds plass
Inkognitogata
Nationaltheatret
Øvre Slottsgate
Dronningens gate
Jernbanetorget
Storgata
Nybrua
Schous plass
Olaf Ryes plass
Birkelunden
Biermanns gate
Torshov
Sandaker senter
Grefsenveien
Storo
Disen tram stop
Doktor Smiths vei
Glads vei
Grefsenplatået
Grefsen stadion
Kjelsåsalleen
Kjelsås
Frogner stadion
Vigelandsparken
Frogner plass
Elisenberg
Lille Frogner allé
Niels Juels gate
Solli
Ruseløkka
Akerbrygge
Kontraskjæret
Middelalderparken
Bislett
Dalsbergstien
Welhavens gate
Frydenlund
Holbergs plass
Tullinøkka
Tinghuset
Stortorvet
Heimdalsgata
Lakkegata skole
Sofienberg
Carl Berners plass
Rosenhoff
Sinsenterrassen
Sinsenkrysset
Grefsen stasjon
Homansbyen
"""
# Redlinks har ingen Wikipedia-side — søk direkte på Wikidata label
WIKIDATA_DIRECT = {
# Redlinks — ingen Wikipedia-side
"Briskeby_tram_stop": "Q11962293",
"Riddervolds_plass_tram_stop": "Q19386557",
"Grefsenveien_tram_stop": "Q17778424",
"Doktor_Smiths_vei_tram_stop": None,
"Glads_vei_tram_stop": "Q17776371",
"Grefsenplatået_tram_stop": "Q11972531",
"Grefsen_stadion_tram_stop": "Q11972525",
"Kjelsåsalleen_tram_stop": None,
"Vigelandsparken_tram_stop": "Q19398059",
"Frogner_plass_tram_stop": "Q11970372",
"Lille_Frogner_allé_tram_stop": "Q19379373",
"Niels_Juels_gate_tram_stop": "Q11991378",
"Ruseløkka_tram_stop": None,
"Bislett_tram_stop": "Q11961163",
"Dalsbergstien_tram_stop": "Q17764618",
"Welhavens_gate_tram_stop": "Q12010485",
"Frydenlund_tram_stop": "Q19373143",
"Holbergs_plass_tram_stop": "Q11975623",
"Tullinøkka_(station)": None,
"Heimdalsgata_tram_stop": None,
"Lakkegata_skole_tram_stop": "Q11982987",
"Sofienberg_tram_stop": None,
"Rosenhoff_tram_stop": None,
"Sinsenterrassen_tram_stop": None,
"Torshov_(station)": None,
"Sandaker_senter_(station)": None,
"Frogner_stadion_tram_stop": None,
# Wikipedia-redirect-sider — QID til redirect-målet
"Stortorvet_(station)": "Q7620354", # → Stortorvet_tram_stop
"Stortinget_(station)": "Q188712", # → Stortinget T-bane (Tinghuset tram er ved siden)
"Sinsen_(station)": "Q19388523", # → Sinsenkrysset tram stop
"Biermanns_gate_(station)": "Q19363042", # → Biermanns gate tram stop
"Carl_Berners_plass_(station)": "Q890592", # → Carl Berners plass metro+tram
"Majorstuen_(station)": "Q686510", # → Majorstuen T-bane
"Grefsen_Station": "Q728583", # → Grefsen stasjon jernbane
"Kjelsås_tram_stop": "Q11981146", # → Kjelsås
"Storo_(station)": "Q932133", # → Storo T-bane
"Schous_plass_tram_stop": "Q12006491", # → Schous plass
"Jernbanetorget_(station)": "Q841481", # → Jernbanetorget T+tram
"Sandaker_senter_(station)": "Q12008217", # → Sandaker senter
}
def extract_titles(html):
soup = BeautifulSoup(html, "html.parser")
titles = []
for a in soup.find_all("a"):
href = a.get("href", "")
label = a.get_text(strip=True)
if href.startswith("/wiki/"):
title = unquote(href.removeprefix("/wiki/"))
titles.append((title, label))
elif href.startswith("/w/index.php"):
qs = parse_qs(urlparse(href).query)
t = qs.get("title", [None])[0]
if t:
titles.append((unquote(t), label))
seen = {}
for title, label in titles:
seen.setdefault(title, label)
return seen # {wiki_title: display_label}
HEADERS = {"User-Agent": "finn-mcp-trikk-coords/1.0 (contact: ole@example.com)"}
def get_qids_from_wikipedia(titles):
"""Wikipedia API: article titles → Wikidata QIDs."""
url = "https://en.wikipedia.org/w/api.php"
result = {}
batch = [t for t in titles if t not in WIKIDATA_DIRECT]
for i in range(0, len(batch), 50):
chunk = batch[i : i + 50]
r = requests.get(
url,
params={
"action": "query",
"format": "json",
"redirects": "1",
"prop": "pageprops",
"ppprop": "wikibase_item",
"titles": "|".join(chunk),
},
headers=HEADERS,
timeout=30,
)
print(f"Status: {r.status_code}, len: {len(r.text)}, preview: {r.text[:200]!r}")
for page in r.json()["query"]["pages"].values():
t = page.get("title", "").replace(" ", "_")
qid = page.get("pageprops", {}).get("wikibase_item")
if qid:
result[t] = qid
# Merge known QIDs
for t, qid in WIKIDATA_DIRECT.items():
if qid:
result[t] = qid
return result
def get_p625(qids):
"""Wikidata API: QIDs → P625 coordinates."""
url = "https://www.wikidata.org/w/api.php"
result = {}
unique = list(set(qids.values()))
for i in range(0, len(unique), 50):
chunk = unique[i : i + 50]
r = requests.get(
url,
params={
"action": "wbgetentities",
"format": "json",
"ids": "|".join(chunk),
"props": "claims|labels",
"languages": "en|nb",
},
headers=HEADERS,
timeout=30,
)
for qid, entity in r.json()["entities"].items():
label = (
entity.get("labels", {}).get("en", {}).get("value")
or entity.get("labels", {}).get("nb", {}).get("value")
or qid
)
p625 = entity.get("claims", {}).get("P625", [])
coords = None
if p625:
v = p625[0]["mainsnak"]["datavalue"]["value"]
coords = (round(v["latitude"], 5), round(v["longitude"], 5))
result[qid] = {"label": label, "coords": coords}
return result
def main():
title_to_label = extract_titles(HTML)
print(f"Extracted {len(title_to_label)} unique titles\n")
title_to_qid = get_qids_from_wikipedia(title_to_label)
print(f"Resolved {len(title_to_qid)} QIDs\n")
qid_to_data = get_p625(title_to_qid)
print(f"{'Wiki title':<45} {'QID':<12} {'Label':<35} {'Coords'}")
print("-" * 120)
no_coords = []
for title in sorted(title_to_label):
qid = title_to_qid.get(title, "—")
if qid == "—":
coords = "NO QID"
label = "?"
else:
d = qid_to_data.get(qid, {})
coords = str(d.get("coords") or "NO P625")
label = d.get("label", "?")
print(f"{title:<45} {qid:<12} {label:<35} {coords}")
if "NO" in str(coords):
no_coords.append(title)
print(f"\n\nMissing coords: {len(no_coords)}")
for t in no_coords:
print(f" {t}")
if __name__ == "__main__":
main()