Toumaima commited on
Commit
ed10d25
·
verified ·
1 Parent(s): 6061cbb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -3
app.py CHANGED
@@ -13,7 +13,8 @@ import moviepy
13
  import gradio as gr
14
  import pandas as pd
15
  from spacy.cli import download
16
-
 
17
 
18
  class BasicAgent:
19
  def __init__(self):
@@ -22,11 +23,15 @@ class BasicAgent:
22
  self.spacy = spacy.load("en_core_web_sm")
23
  except OSError:
24
  download("en_core_web_sm")
25
- self.spacy = spacy.load("en_core_web_sm")
 
26
  self.whisper_model = whisper.load_model("base")
27
  self.qa_pipeline = pipeline("question-answering", truncation=True, padding=True)
28
  self.ner_pipeline = pipeline("ner", aggregation_strategy="simple")
29
- self.embedding_model = pipeline("feature-extraction", truncation=True)
 
 
 
30
 
31
  def split_text_into_chunks(self, text, max_length=512):
32
  """Split text into chunks smaller than `max_length` tokens."""
 
13
  import gradio as gr
14
  import pandas as pd
15
  from spacy.cli import download
16
+ from transformers import AutoTokenizer, AutoModel
17
+ import torch
18
 
19
  class BasicAgent:
20
  def __init__(self):
 
23
  self.spacy = spacy.load("en_core_web_sm")
24
  except OSError:
25
  download("en_core_web_sm")
26
+ self.spacy = spacy.load("en_core_web_sm")
27
+
28
  self.whisper_model = whisper.load_model("base")
29
  self.qa_pipeline = pipeline("question-answering", truncation=True, padding=True)
30
  self.ner_pipeline = pipeline("ner", aggregation_strategy="simple")
31
+
32
+ # ✅ FIXED: safer embedding model setup
33
+ self.embedding_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
34
+ self.embedding_model = AutoModel.from_pretrained("bert-base-uncased")
35
 
36
  def split_text_into_chunks(self, text, max_length=512):
37
  """Split text into chunks smaller than `max_length` tokens."""