|
import json |
|
from pathlib import Path |
|
|
|
p = Path(__file__).with_name('large_cities.txt') |
|
|
|
urls = [] |
|
|
|
|
|
|
|
|
|
|
|
|
|
def umlaut_to_ascii(text): |
|
return (text.replace("ä", "ae") |
|
.replace("ö", "oe") |
|
.replace("ü", "ue") |
|
.replace("Ä", "Ae") |
|
.replace("Ö", "Oe") |
|
.replace("Ü", "Ue") |
|
.replace("ß", "ss")) |
|
|
|
with open('large_cities.txt', 'r', encoding='utf-8') as f: |
|
for line in f: |
|
l = line.strip() |
|
l = umlaut_to_ascii(l) |
|
print(l) |
|
|
|
a = l.split() |
|
j = { |
|
"url_type": "city", |
|
"url": f"https://www.{a[1]}.de", |
|
"meta": { |
|
"website_host": f"{a[1]}", |
|
"location": f"{a[1]}" |
|
} |
|
} |
|
urls.append(j) |
|
|
|
|
|
urls.pop(0) |
|
with open('large_city_urls.json', 'w', encoding='utf-8') as f: |
|
json.dump(urls, f, ensure_ascii=False, indent=4) |
|
|
|
|
|
|
|
|