File size: 657 Bytes
6fc4565
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
from src.persistence.db import init_db
from src.utils.apis.googlemaps_api import GoogleMapsAPI
from src.utils.apis.gpt_api import remove_boilerplate
from src.utils.markdown_processing.md_preprocessing import convert_html_to_md

db = init_db()
google_maps_api = GoogleMapsAPI()
event_urls = db.event_urls.find({"final": True, "class": "EventDetail"},
                                {"_id": 1, "cleaned_html":1})

for event in event_urls:
    html = event["cleaned_html"]
    md = convert_html_to_md(html)
    cleaned_md = remove_boilerplate(md)

    print(cleaned_md)
    db.event_urls.update_one({"_id": event["_id"]}, {"$set": {"markdown": cleaned_md }})