manaviel85370
add pages and all
da88570
import json
from pathlib import Path
p = Path(__file__).with_name('large_cities.txt')
urls = []
# with p.open('r') as f:
# for line in f:
# l = line.strip()
# a = l.split()
# j = {"url_type": "city_url", "url":a[2]}
# urls.append(j)
def umlaut_to_ascii(text):
return (text.replace("ä", "ae")
.replace("ö", "oe")
.replace("ü", "ue")
.replace("Ä", "Ae")
.replace("Ö", "Oe")
.replace("Ü", "Ue")
.replace("ß", "ss"))
with open('large_cities.txt', 'r', encoding='utf-8') as f:
for line in f:
l = line.strip()
l = umlaut_to_ascii(l) # Funktion zur Umwandlung von Umlauten
print(l)
a = l.split()
j = {
"url_type": "city",
"url": f"https://www.{a[1]}.de",
"meta": {
"website_host": f"{a[1]}",
"location": f"{a[1]}"
}
}
urls.append(j)
urls.pop(0)
with open('large_city_urls.json', 'w', encoding='utf-8') as f:
json.dump(urls, f, ensure_ascii=False, indent=4)