Update app.py
Browse files
app.py
CHANGED
@@ -163,58 +163,58 @@ class EventScraper:
|
|
163 |
Only return the json. nothing else. no comments."""
|
164 |
return prompt
|
165 |
|
166 |
-
def parse_llm_response(self, response):
|
167 |
-
|
168 |
-
try:
|
169 |
-
# Clean the response and handle nested lists
|
170 |
-
response = response.strip()
|
171 |
-
|
172 |
-
# Try parsing as JSON, handling potential nested structures
|
173 |
-
def flatten_events(data):
|
174 |
-
if isinstance(data, list):
|
175 |
-
flattened = []
|
176 |
-
for item in data:
|
177 |
-
if isinstance(item, list):
|
178 |
-
flattened.extend(flatten_events(item))
|
179 |
-
elif isinstance(item, dict):
|
180 |
-
flattened.append(item)
|
181 |
-
return flattened
|
182 |
-
return []
|
183 |
-
|
184 |
try:
|
185 |
-
#
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
events = []
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
gr.Warning(f"Parsing error: {str(e)}")
|
217 |
-
return []
|
218 |
|
219 |
def scrape_events(self):
|
220 |
"""Main method to scrape events from all URLs"""
|
|
|
163 |
Only return the json. nothing else. no comments."""
|
164 |
return prompt
|
165 |
|
166 |
+
def parse_llm_response(self, response):
|
167 |
+
"""Parse LLM's text response into structured events"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
try:
|
169 |
+
# Clean the response and handle nested lists
|
170 |
+
response = response.strip()
|
171 |
+
|
172 |
+
# Try parsing as JSON, handling potential nested structures
|
173 |
+
def flatten_events(data):
|
174 |
+
if isinstance(data, list):
|
175 |
+
flattened = []
|
176 |
+
for item in data:
|
177 |
+
if isinstance(item, list):
|
178 |
+
flattened.extend(flatten_events(item))
|
179 |
+
elif isinstance(item, dict):
|
180 |
+
flattened.append(item)
|
181 |
+
return flattened
|
182 |
+
return []
|
183 |
+
|
184 |
+
try:
|
185 |
+
# First, attempt direct JSON parsing
|
186 |
+
events = json.loads(response)
|
187 |
+
events = flatten_events(events)
|
188 |
+
except json.JSONDecodeError:
|
189 |
+
# If direct parsing fails, try extracting JSON
|
190 |
+
import re
|
191 |
+
json_match = re.search(r'\[.*\]', response, re.DOTALL | re.MULTILINE)
|
192 |
+
if json_match:
|
193 |
+
try:
|
194 |
+
events = json.loads(json_match.group(0))
|
195 |
+
events = flatten_events(events)
|
196 |
+
except json.JSONDecodeError:
|
197 |
+
events = []
|
198 |
+
else:
|
199 |
events = []
|
200 |
+
|
201 |
+
# Clean and validate events
|
202 |
+
cleaned_events = []
|
203 |
+
for event in events:
|
204 |
+
# Ensure each event has at least a name
|
205 |
+
if event.get('name'):
|
206 |
+
# Set default values if missing
|
207 |
+
event.setdefault('date', '')
|
208 |
+
event.setdefault('time', '')
|
209 |
+
event.setdefault('location', '')
|
210 |
+
event.setdefault('description', '')
|
211 |
+
cleaned_events.append(event)
|
212 |
+
|
213 |
+
return cleaned_events
|
214 |
|
215 |
+
except Exception as e:
|
216 |
+
gr.Warning(f"Parsing error: {str(e)}")
|
217 |
+
return []
|
|
|
|
|
218 |
|
219 |
def scrape_events(self):
|
220 |
"""Main method to scrape events from all URLs"""
|