feat: Add .tool-versions file, remove unused Docker documentation, and create repository summary and story files; enhance analysis.py and add fetch_trikk_coords.py script
This commit is contained in:
@@ -0,0 +1,238 @@
|
||||
"""
|
||||
fetch_trikk_coords.py
|
||||
Henter P625-koordinater fra Wikidata for alle Oslo-trikkeholdeplasser
|
||||
via Wikipedia sitelinks. Kjør: python3 fetch_trikk_coords.py
|
||||
Krever: pip install requests beautifulsoup4
|
||||
"""
|
||||
|
||||
import requests
|
||||
from urllib.parse import unquote, urlparse, parse_qs
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# Alle Wikipedia-lenker fra trikkelinjene (inkl. redlinks)
|
||||
HTML = """
|
||||
<a href="/wiki/Majorstuen_(station)">Majorstuen</a>
|
||||
<a href="/wiki/Bogstadveien_tram_stop">Bogstadveien</a>
|
||||
<a href="/wiki/Rosenborg_tram_stop">Rosenborg</a>
|
||||
<a href="/w/index.php?title=Briskeby_tram_stop&action=edit&redlink=1">Briskeby</a>
|
||||
<a href="/w/index.php?title=Riddervolds_plass_tram_stop&action=edit&redlink=1">Riddervolds plass</a>
|
||||
<a href="/wiki/Inkognitogata_tram_stop">Inkognitogata</a>
|
||||
<a href="/wiki/Nationaltheatret_metro_station">Nationaltheatret</a>
|
||||
<a href="/wiki/%C3%98vre_Slottsgate_tram_stop">Øvre Slottsgate</a>
|
||||
<a href="/wiki/Dronningens_gate_tram_stop">Dronningens gate</a>
|
||||
<a href="/wiki/Jernbanetorget_(station)">Jernbanetorget</a>
|
||||
<a href="/wiki/Storgata_tram_stop">Storgata</a>
|
||||
<a href="/wiki/Nybrua_tram_stop">Nybrua</a>
|
||||
<a href="/wiki/Schous_plass_tram_stop">Schous plass</a>
|
||||
<a href="/wiki/Olaf_Ryes_plass">Olaf Ryes plass</a>
|
||||
<a href="/wiki/Birkelunden">Birkelunden</a>
|
||||
<a href="/wiki/Biermanns_gate_(station)">Biermanns gate</a>
|
||||
<a href="/w/index.php?title=Torshov_(station)&action=edit&redlink=1">Torshov</a>
|
||||
<a href="/w/index.php?title=Sandaker_senter_(station)&action=edit&redlink=1">Sandaker senter</a>
|
||||
<a href="/w/index.php?title=Grefsenveien_tram_stop&action=edit&redlink=1">Grefsenveien</a>
|
||||
<a href="/wiki/Storo_(station)">Storo</a>
|
||||
<a href="/wiki/Disen_tram_stop">Disen tram stop</a>
|
||||
<a href="/w/index.php?title=Doktor_Smiths_vei_tram_stop&action=edit&redlink=1">Doktor Smiths vei</a>
|
||||
<a href="/w/index.php?title=Glads_vei_tram_stop&action=edit&redlink=1">Glads vei</a>
|
||||
<a href="/w/index.php?title=Grefsenplat%C3%A5et_tram_stop&action=edit&redlink=1">Grefsenplatået</a>
|
||||
<a href="/w/index.php?title=Grefsen_stadion_tram_stop&action=edit&redlink=1">Grefsen stadion</a>
|
||||
<a href="/w/index.php?title=Kjels%C3%A5salleen_tram_stop&action=edit&redlink=1">Kjelsåsalleen</a>
|
||||
<a href="/wiki/Kjels%C3%A5s_tram_stop">Kjelsås</a>
|
||||
<a href="/wiki/Frogner_stadion_tram_stop">Frogner stadion</a>
|
||||
<a href="/w/index.php?title=Vigelandsparken_tram_stop&action=edit&redlink=1">Vigelandsparken</a>
|
||||
<a href="/w/index.php?title=Frogner_plass_tram_stop&action=edit&redlink=1">Frogner plass</a>
|
||||
<a href="/wiki/Elisenberg_tram_stop">Elisenberg</a>
|
||||
<a href="/w/index.php?title=Lille_Frogner_all%C3%A9_tram_stop&action=edit&redlink=1">Lille Frogner allé</a>
|
||||
<a href="/w/index.php?title=Niels_Juels_gate_tram_stop&action=edit&redlink=1">Niels Juels gate</a>
|
||||
<a href="/wiki/Solli_tram_stop">Solli</a>
|
||||
<a href="/w/index.php?title=Rusel%C3%B8kka_tram_stop&action=edit&redlink=1">Ruseløkka</a>
|
||||
<a href="/wiki/Aker_Brygge_tram_stop">Akerbrygge</a>
|
||||
<a href="/wiki/Kontraskj%C3%A6ret_tram_stop">Kontraskjæret</a>
|
||||
<a href="/wiki/Middelalderparken_tram_stop">Middelalderparken</a>
|
||||
<a href="/w/index.php?title=Bislett_tram_stop&action=edit&redlink=1">Bislett</a>
|
||||
<a href="/w/index.php?title=Dalsbergstien_tram_stop&action=edit&redlink=1">Dalsbergstien</a>
|
||||
<a href="/w/index.php?title=Welhavens_gate_tram_stop&action=edit&redlink=1">Welhavens gate</a>
|
||||
<a href="/w/index.php?title=Frydenlund_tram_stop&action=edit&redlink=1">Frydenlund</a>
|
||||
<a href="/w/index.php?title=Holbergs_plass_tram_stop&action=edit&redlink=1">Holbergs plass</a>
|
||||
<a href="/w/index.php?title=Tullin%C3%B8kka_(station)&action=edit&redlink=1">Tullinøkka</a>
|
||||
<a href="/wiki/Stortinget_(station)">Tinghuset</a>
|
||||
<a href="/wiki/Stortorvet_(station)">Stortorvet</a>
|
||||
<a href="/w/index.php?title=Heimdalsgata_tram_stop&action=edit&redlink=1">Heimdalsgata</a>
|
||||
<a href="/w/index.php?title=Lakkegata_skole_tram_stop&action=edit&redlink=1">Lakkegata skole</a>
|
||||
<a href="/w/index.php?title=Sofienberg_tram_stop&action=edit&redlink=1">Sofienberg</a>
|
||||
<a href="/wiki/Carl_Berners_plass_(station)">Carl Berners plass</a>
|
||||
<a href="/w/index.php?title=Rosenhoff_tram_stop&action=edit&redlink=1">Rosenhoff</a>
|
||||
<a href="/w/index.php?title=Sinsenterrassen_tram_stop&action=edit&redlink=1">Sinsenterrassen</a>
|
||||
<a href="/wiki/Sinsen_(station)">Sinsenkrysset</a>
|
||||
<a href="/wiki/Grefsen_Station">Grefsen stasjon</a>
|
||||
<a href="/wiki/Homansbyen_tram_stop">Homansbyen</a>
|
||||
"""
|
||||
|
||||
# Redlinks har ingen Wikipedia-side — søk direkte på Wikidata label
|
||||
WIKIDATA_DIRECT = {
|
||||
# Redlinks — ingen Wikipedia-side
|
||||
"Briskeby_tram_stop": "Q11962293",
|
||||
"Riddervolds_plass_tram_stop": "Q19386557",
|
||||
"Grefsenveien_tram_stop": "Q17778424",
|
||||
"Doktor_Smiths_vei_tram_stop": None,
|
||||
"Glads_vei_tram_stop": "Q17776371",
|
||||
"Grefsenplatået_tram_stop": "Q11972531",
|
||||
"Grefsen_stadion_tram_stop": "Q11972525",
|
||||
"Kjelsåsalleen_tram_stop": None,
|
||||
"Vigelandsparken_tram_stop": "Q19398059",
|
||||
"Frogner_plass_tram_stop": "Q11970372",
|
||||
"Lille_Frogner_allé_tram_stop": "Q19379373",
|
||||
"Niels_Juels_gate_tram_stop": "Q11991378",
|
||||
"Ruseløkka_tram_stop": None,
|
||||
"Bislett_tram_stop": "Q11961163",
|
||||
"Dalsbergstien_tram_stop": "Q17764618",
|
||||
"Welhavens_gate_tram_stop": "Q12010485",
|
||||
"Frydenlund_tram_stop": "Q19373143",
|
||||
"Holbergs_plass_tram_stop": "Q11975623",
|
||||
"Tullinøkka_(station)": None,
|
||||
"Heimdalsgata_tram_stop": None,
|
||||
"Lakkegata_skole_tram_stop": "Q11982987",
|
||||
"Sofienberg_tram_stop": None,
|
||||
"Rosenhoff_tram_stop": None,
|
||||
"Sinsenterrassen_tram_stop": None,
|
||||
"Torshov_(station)": None,
|
||||
"Sandaker_senter_(station)": None,
|
||||
"Frogner_stadion_tram_stop": None,
|
||||
# Wikipedia-redirect-sider — QID til redirect-målet
|
||||
"Stortorvet_(station)": "Q7620354", # → Stortorvet_tram_stop
|
||||
"Stortinget_(station)": "Q188712", # → Stortinget T-bane (Tinghuset tram er ved siden)
|
||||
"Sinsen_(station)": "Q19388523", # → Sinsenkrysset tram stop
|
||||
"Biermanns_gate_(station)": "Q19363042", # → Biermanns gate tram stop
|
||||
"Carl_Berners_plass_(station)": "Q890592", # → Carl Berners plass metro+tram
|
||||
"Majorstuen_(station)": "Q686510", # → Majorstuen T-bane
|
||||
"Grefsen_Station": "Q728583", # → Grefsen stasjon jernbane
|
||||
"Kjelsås_tram_stop": "Q11981146", # → Kjelsås
|
||||
"Storo_(station)": "Q932133", # → Storo T-bane
|
||||
"Schous_plass_tram_stop": "Q12006491", # → Schous plass
|
||||
"Jernbanetorget_(station)": "Q841481", # → Jernbanetorget T+tram
|
||||
"Sandaker_senter_(station)": "Q12008217", # → Sandaker senter
|
||||
}
|
||||
|
||||
|
||||
def extract_titles(html):
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
titles = []
|
||||
for a in soup.find_all("a"):
|
||||
href = a.get("href", "")
|
||||
label = a.get_text(strip=True)
|
||||
if href.startswith("/wiki/"):
|
||||
title = unquote(href.removeprefix("/wiki/"))
|
||||
titles.append((title, label))
|
||||
elif href.startswith("/w/index.php"):
|
||||
qs = parse_qs(urlparse(href).query)
|
||||
t = qs.get("title", [None])[0]
|
||||
if t:
|
||||
titles.append((unquote(t), label))
|
||||
seen = {}
|
||||
for title, label in titles:
|
||||
seen.setdefault(title, label)
|
||||
return seen # {wiki_title: display_label}
|
||||
|
||||
|
||||
HEADERS = {"User-Agent": "finn-mcp-trikk-coords/1.0 (contact: ole@example.com)"}
|
||||
|
||||
|
||||
def get_qids_from_wikipedia(titles):
|
||||
"""Wikipedia API: article titles → Wikidata QIDs."""
|
||||
url = "https://en.wikipedia.org/w/api.php"
|
||||
result = {}
|
||||
batch = [t for t in titles if t not in WIKIDATA_DIRECT]
|
||||
for i in range(0, len(batch), 50):
|
||||
chunk = batch[i : i + 50]
|
||||
r = requests.get(
|
||||
url,
|
||||
params={
|
||||
"action": "query",
|
||||
"format": "json",
|
||||
"redirects": "1",
|
||||
"prop": "pageprops",
|
||||
"ppprop": "wikibase_item",
|
||||
"titles": "|".join(chunk),
|
||||
},
|
||||
headers=HEADERS,
|
||||
timeout=30,
|
||||
)
|
||||
print(f"Status: {r.status_code}, len: {len(r.text)}, preview: {r.text[:200]!r}")
|
||||
for page in r.json()["query"]["pages"].values():
|
||||
t = page.get("title", "").replace(" ", "_")
|
||||
qid = page.get("pageprops", {}).get("wikibase_item")
|
||||
if qid:
|
||||
result[t] = qid
|
||||
# Merge known QIDs
|
||||
for t, qid in WIKIDATA_DIRECT.items():
|
||||
if qid:
|
||||
result[t] = qid
|
||||
return result
|
||||
|
||||
|
||||
def get_p625(qids):
|
||||
"""Wikidata API: QIDs → P625 coordinates."""
|
||||
url = "https://www.wikidata.org/w/api.php"
|
||||
result = {}
|
||||
unique = list(set(qids.values()))
|
||||
for i in range(0, len(unique), 50):
|
||||
chunk = unique[i : i + 50]
|
||||
r = requests.get(
|
||||
url,
|
||||
params={
|
||||
"action": "wbgetentities",
|
||||
"format": "json",
|
||||
"ids": "|".join(chunk),
|
||||
"props": "claims|labels",
|
||||
"languages": "en|nb",
|
||||
},
|
||||
headers=HEADERS,
|
||||
timeout=30,
|
||||
)
|
||||
for qid, entity in r.json()["entities"].items():
|
||||
label = (
|
||||
entity.get("labels", {}).get("en", {}).get("value")
|
||||
or entity.get("labels", {}).get("nb", {}).get("value")
|
||||
or qid
|
||||
)
|
||||
p625 = entity.get("claims", {}).get("P625", [])
|
||||
coords = None
|
||||
if p625:
|
||||
v = p625[0]["mainsnak"]["datavalue"]["value"]
|
||||
coords = (round(v["latitude"], 5), round(v["longitude"], 5))
|
||||
result[qid] = {"label": label, "coords": coords}
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
title_to_label = extract_titles(HTML)
|
||||
print(f"Extracted {len(title_to_label)} unique titles\n")
|
||||
|
||||
title_to_qid = get_qids_from_wikipedia(title_to_label)
|
||||
print(f"Resolved {len(title_to_qid)} QIDs\n")
|
||||
|
||||
qid_to_data = get_p625(title_to_qid)
|
||||
|
||||
print(f"{'Wiki title':<45} {'QID':<12} {'Label':<35} {'Coords'}")
|
||||
print("-" * 120)
|
||||
no_coords = []
|
||||
for title in sorted(title_to_label):
|
||||
qid = title_to_qid.get(title, "—")
|
||||
if qid == "—":
|
||||
coords = "NO QID"
|
||||
label = "?"
|
||||
else:
|
||||
d = qid_to_data.get(qid, {})
|
||||
coords = str(d.get("coords") or "NO P625")
|
||||
label = d.get("label", "?")
|
||||
print(f"{title:<45} {qid:<12} {label:<35} {coords}")
|
||||
if "NO" in str(coords):
|
||||
no_coords.append(title)
|
||||
|
||||
print(f"\n\nMissing coords: {len(no_coords)}")
|
||||
for t in no_coords:
|
||||
print(f" {t}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user