Spaces:

Yoxas
/

Creatingdataset

Runtime error

Yoxas commited on May 24, 2024

Commit

8e6b567

verified ·

1 Parent(s): 3453878

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,21 +2,22 @@ import os
 import re
 import pandas as pd
 from PyPDF2 import PdfReader
-from transformers import pipeline, AutoTokenizer
 from gradio import Interface, File
 import gradio as gr
 import spaces
-# Initialize a list to store the data
-data = []
-# Load the LED tokenizer and model
 led_tokenizer = AutoTokenizer.from_pretrained("allenai/led-base-16384-multi_lexsum-source-long")
-classifier = pipeline("text-classification", model="allenai/led-base-16384-multi_lexsum-source-long", tokenizer=led_tokenizer, framework="pt")
-# Load the summarization model and tokenizer
 summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", tokenizer="sshleifer/distilbart-cnn-12-6", framework="pt")
 # Function to clean text by keeping only alphanumeric characters and spaces
 def clean_text(text):
     return re.sub(r'[^a-zA-Z0-9\s]', '', text)
@@ -72,6 +73,7 @@ def extract_title(text, max_length=20):
 # Define the Gradio interface for file upload and download
 @spaces.GPU(duration=120)
 def process_files(pdf_files):
     for pdf_file in pdf_files:
         text = extract_text(pdf_file)

 import re
 import pandas as pd
 from PyPDF2 import PdfReader
+from transformers import AutoTokenizer, pipeline, AutoModelForSeq2SeqLM
 from gradio import Interface, File
 import gradio as gr
 import spaces
+# Load the tokenizer and model
 led_tokenizer = AutoTokenizer.from_pretrained("allenai/led-base-16384-multi_lexsum-source-long")
 summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", tokenizer="sshleifer/distilbart-cnn-12-6", framework="pt")
+# Load the model separately
+model = AutoModelForSeq2SeqLM.from_pretrained("allenai/led-base-16384-multi_lexsum-source-long")
+# Move the model to CUDA if available
+if torch.cuda.is_available():
+    model = model.to("cuda")
 # Function to clean text by keeping only alphanumeric characters and spaces
 def clean_text(text):
     return re.sub(r'[^a-zA-Z0-9\s]', '', text)
 # Define the Gradio interface for file upload and download
 @spaces.GPU(duration=120)
 def process_files(pdf_files):
+    data = []
     for pdf_file in pdf_files:
         text = extract_text(pdf_file)