Chris4K commited on
Commit
415d65e
·
verified ·
1 Parent(s): 70989a1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -29
app.py CHANGED
@@ -11,10 +11,18 @@ import icalendar
11
  import uuid
12
  import re
13
  import json
 
14
 
15
  # Hugging Face imports
16
- from transformers import AutoModelForCausalLM, AutoTokenizer
17
- import torch
 
 
 
 
 
 
 
18
 
19
  class EventScraper:
20
  def __init__(self, urls, timezone='Europe/Berlin'):
@@ -39,25 +47,69 @@ class EventScraper:
39
  # Model and tokenizer will be loaded on first use
40
  self.model = None
41
  self.tokenizer = None
 
42
 
43
  def setup_llm(self):
44
  """Setup Hugging Face LLM for event extraction"""
45
- if self.model is not None and self.tokenizer is not None:
46
- return
47
-
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  try:
49
- model_name = "meta-llama/Llama-3.2-3B-Instruct"
50
- self.tokenizer = AutoTokenizer.from_pretrained(model_name)
51
- self.model = AutoModelForCausalLM.from_pretrained(
52
- model_name,
53
- torch_dtype=torch.float16,
54
- return_dict_in_generate=False,
55
- device_map='auto'
56
- )
 
 
 
 
 
 
57
  except Exception as e:
58
- gr.Warning(f"LLM Setup Error: {str(e)}")
59
  raise
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  def fetch_webpage_content(self, url):
62
  """Fetch webpage content"""
63
  try:
@@ -160,17 +212,8 @@ class EventScraper:
160
  # Generate prompt
161
  prompt = self.generate_event_extraction_prompt(text_content)
162
 
163
- # Tokenize and generate
164
- inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
165
- outputs = self.model.generate(
166
- inputs.input_ids,
167
- max_new_tokens=12000,
168
- do_sample=True,
169
- temperature=0.9
170
- )
171
-
172
- # Decode response
173
- response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
174
 
175
  # Parse events
176
  parsed_events = self.parse_llm_response(response)
@@ -250,7 +293,7 @@ def scrape_events_with_urls(urls):
250
 
251
  if not url_list:
252
  gr.Warning("Please provide at least one valid URL.")
253
- return [], ""
254
 
255
  try:
256
  # Initialize scraper
@@ -269,7 +312,7 @@ def scrape_events_with_urls(urls):
269
 
270
  except Exception as e:
271
  gr.Warning(f"Error in event scraping: {str(e)}")
272
- return [], ""
273
 
274
  # Create Gradio Interface
275
  def create_gradio_app():
@@ -287,9 +330,9 @@ def create_gradio_app():
287
 
288
  with gr.Row():
289
  with gr.Column():
290
- events_output = gr.Textbox(label="Extracted Events (JSON)" )
291
  with gr.Column():
292
- ical_output = gr.Textbox(label="iCal Export")
293
 
294
  scrape_btn.click(
295
  fn=scrape_events_with_urls,
@@ -298,6 +341,7 @@ def create_gradio_app():
298
  )
299
 
300
  gr.Markdown("**Note:** Requires an internet connection and may take a few minutes to process.")
 
301
 
302
  return demo
303
 
 
11
  import uuid
12
  import re
13
  import json
14
+ import os
15
 
16
  # Hugging Face imports
17
+ try:
18
+ from transformers import AutoModelForCausalLM, AutoTokenizer
19
+ import torch
20
+ TRANSFORMERS_AVAILABLE = True
21
+ except ImportError:
22
+ TRANSFORMERS_AVAILABLE = False
23
+
24
+ # Hugging Face Inference Client
25
+ from huggingface_hub import InferenceClient
26
 
27
  class EventScraper:
28
  def __init__(self, urls, timezone='Europe/Berlin'):
 
47
  # Model and tokenizer will be loaded on first use
48
  self.model = None
49
  self.tokenizer = None
50
+ self.client = None
51
 
52
  def setup_llm(self):
53
  """Setup Hugging Face LLM for event extraction"""
54
+ # Try local model first
55
+ if TRANSFORMERS_AVAILABLE:
56
+ try:
57
+ model_name = "meta-llama/Llama-3.2-3B-Instruct"
58
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
59
+ self.model = AutoModelForCausalLM.from_pretrained(
60
+ model_name,
61
+ torch_dtype=torch.float16,
62
+ return_dict_in_generate=False,
63
+ device_map='auto'
64
+ )
65
+ return
66
+ except Exception as local_err:
67
+ gr.Warning(f"Local model setup failed: {str(local_err)}")
68
+
69
+ # Fallback to Inference Client
70
  try:
71
+ # Try to get Hugging Face token from environment
72
+ hf_token = os.getenv('HF_TOKEN')
73
+
74
+ # Setup Inference Client
75
+ if hf_token:
76
+ self.client = InferenceClient(
77
+ model="meta-llama/Llama-3.2-3B-Instruct",
78
+ token=hf_token
79
+ )
80
+ else:
81
+ # Public model access without token
82
+ self.client = InferenceClient(
83
+ model="meta-llama/Llama-3.2-3B-Instruct"
84
+ )
85
  except Exception as e:
86
+ gr.Warning(f"Inference Client setup error: {str(e)}")
87
  raise
88
 
89
+ def generate_with_model(self, prompt):
90
+ """Generate text using either local model or inference client"""
91
+ if self.model and self.tokenizer:
92
+ # Use local model
93
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
94
+ outputs = self.model.generate(
95
+ inputs.input_ids,
96
+ max_new_tokens=12000,
97
+ do_sample=True,
98
+ temperature=0.9
99
+ )
100
+ return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
101
+
102
+ elif self.client:
103
+ # Use Inference Client
104
+ return self.client.text_generation(
105
+ prompt,
106
+ max_new_tokens=12000,
107
+ temperature=0.9
108
+ )
109
+
110
+ else:
111
+ raise ValueError("No model or client available for text generation")
112
+
113
  def fetch_webpage_content(self, url):
114
  """Fetch webpage content"""
115
  try:
 
212
  # Generate prompt
213
  prompt = self.generate_event_extraction_prompt(text_content)
214
 
215
+ # Generate response
216
+ response = self.generate_with_model(prompt)
 
 
 
 
 
 
 
 
 
217
 
218
  # Parse events
219
  parsed_events = self.parse_llm_response(response)
 
293
 
294
  if not url_list:
295
  gr.Warning("Please provide at least one valid URL.")
296
+ return "", ""
297
 
298
  try:
299
  # Initialize scraper
 
312
 
313
  except Exception as e:
314
  gr.Warning(f"Error in event scraping: {str(e)}")
315
+ return "", ""
316
 
317
  # Create Gradio Interface
318
  def create_gradio_app():
 
330
 
331
  with gr.Row():
332
  with gr.Column():
333
+ events_output = gr.Textbox(label="Extracted Events (JSON)", lines=10)
334
  with gr.Column():
335
+ ical_output = gr.Textbox(label="iCal Export", lines=10)
336
 
337
  scrape_btn.click(
338
  fn=scrape_events_with_urls,
 
341
  )
342
 
343
  gr.Markdown("**Note:** Requires an internet connection and may take a few minutes to process.")
344
+ gr.Markdown("Set HF_TOKEN environment variable for authenticated access.")
345
 
346
  return demo
347