|
from src.persistence.db import init_db |
|
from src.utils.apis.googlemaps_api import GoogleMapsAPI |
|
from src.utils.apis.gpt_api import remove_boilerplate |
|
from src.utils.markdown_processing.md_preprocessing import convert_html_to_md |
|
|
|
db = init_db() |
|
google_maps_api = GoogleMapsAPI() |
|
event_urls = db.event_urls.find({"final": True, "class": "EventDetail"}, |
|
{"_id": 1, "cleaned_html":1}) |
|
|
|
for event in event_urls: |
|
html = event["cleaned_html"] |
|
md = convert_html_to_md(html) |
|
cleaned_md = remove_boilerplate(md) |
|
|
|
print(cleaned_md) |
|
db.event_urls.update_one({"_id": event["_id"]}, {"$set": {"markdown": cleaned_md }}) |
|
|