manaviel85370
refactor testings
6fc4565
raw
history blame contribute delete
657 Bytes
from src.persistence.db import init_db
from src.utils.apis.googlemaps_api import GoogleMapsAPI
from src.utils.apis.gpt_api import remove_boilerplate
from src.utils.markdown_processing.md_preprocessing import convert_html_to_md
db = init_db()
google_maps_api = GoogleMapsAPI()
event_urls = db.event_urls.find({"final": True, "class": "EventDetail"},
{"_id": 1, "cleaned_html":1})
for event in event_urls:
html = event["cleaned_html"]
md = convert_html_to_md(html)
cleaned_md = remove_boilerplate(md)
print(cleaned_md)
db.event_urls.update_one({"_id": event["_id"]}, {"$set": {"markdown": cleaned_md }})