acecalisto3 commited on
Commit
d75df0a
·
verified ·
1 Parent(s): 7919662

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -0
app.py CHANGED
@@ -24,6 +24,15 @@ client = InferenceClient(model=HF_MODEL, token=HF_TOKEN)
24
  # State to manage datasets
25
  datasets_queue = []
26
 
 
 
 
 
 
 
 
 
 
27
  # Helper Functions
28
  def extract_text_from_pdf(file_path):
29
  try:
 
24
  # State to manage datasets
25
  datasets_queue = []
26
 
27
+ def extract_text_from_url(url):
28
+ try:
29
+ response = requests.get(url, timeout=10)
30
+ response.raise_for_status()
31
+ soup = BeautifulSoup(response.content, "lxml") # Specify lxml here
32
+ return soup.get_text()
33
+ except Exception as e:
34
+ return f"Error scraping URL: {e}"
35
+
36
  # Helper Functions
37
  def extract_text_from_pdf(file_path):
38
  try: