Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -2,21 +2,22 @@ import os
|
|
2 |
import re
|
3 |
import pandas as pd
|
4 |
from PyPDF2 import PdfReader
|
5 |
-
from transformers import pipeline,
|
6 |
from gradio import Interface, File
|
7 |
import gradio as gr
|
8 |
import spaces
|
9 |
|
10 |
-
#
|
11 |
-
data = []
|
12 |
-
|
13 |
-
# Load the LED tokenizer and model
|
14 |
led_tokenizer = AutoTokenizer.from_pretrained("allenai/led-base-16384-multi_lexsum-source-long")
|
15 |
-
classifier = pipeline("text-classification", model="allenai/led-base-16384-multi_lexsum-source-long", tokenizer=led_tokenizer, framework="pt")
|
16 |
-
|
17 |
-
# Load the summarization model and tokenizer
|
18 |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", tokenizer="sshleifer/distilbart-cnn-12-6", framework="pt")
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
# Function to clean text by keeping only alphanumeric characters and spaces
|
21 |
def clean_text(text):
|
22 |
return re.sub(r'[^a-zA-Z0-9\s]', '', text)
|
@@ -72,6 +73,7 @@ def extract_title(text, max_length=20):
|
|
72 |
# Define the Gradio interface for file upload and download
|
73 |
@spaces.GPU(duration=120)
|
74 |
def process_files(pdf_files):
|
|
|
75 |
for pdf_file in pdf_files:
|
76 |
text = extract_text(pdf_file)
|
77 |
|
|
|
2 |
import re
|
3 |
import pandas as pd
|
4 |
from PyPDF2 import PdfReader
|
5 |
+
from transformers import AutoTokenizer, pipeline, AutoModelForSeq2SeqLM
|
6 |
from gradio import Interface, File
|
7 |
import gradio as gr
|
8 |
import spaces
|
9 |
|
10 |
+
# Load the tokenizer and model
|
|
|
|
|
|
|
11 |
led_tokenizer = AutoTokenizer.from_pretrained("allenai/led-base-16384-multi_lexsum-source-long")
|
|
|
|
|
|
|
12 |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", tokenizer="sshleifer/distilbart-cnn-12-6", framework="pt")
|
13 |
|
14 |
+
# Load the model separately
|
15 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("allenai/led-base-16384-multi_lexsum-source-long")
|
16 |
+
|
17 |
+
# Move the model to CUDA if available
|
18 |
+
if torch.cuda.is_available():
|
19 |
+
model = model.to("cuda")
|
20 |
+
|
21 |
# Function to clean text by keeping only alphanumeric characters and spaces
|
22 |
def clean_text(text):
|
23 |
return re.sub(r'[^a-zA-Z0-9\s]', '', text)
|
|
|
73 |
# Define the Gradio interface for file upload and download
|
74 |
@spaces.GPU(duration=120)
|
75 |
def process_files(pdf_files):
|
76 |
+
data = []
|
77 |
for pdf_file in pdf_files:
|
78 |
text = extract_text(pdf_file)
|
79 |
|