Yoxas commited on
Commit
8e6b567
·
verified ·
1 Parent(s): 3453878

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -8
app.py CHANGED
@@ -2,21 +2,22 @@ import os
2
  import re
3
  import pandas as pd
4
  from PyPDF2 import PdfReader
5
- from transformers import pipeline, AutoTokenizer
6
  from gradio import Interface, File
7
  import gradio as gr
8
  import spaces
9
 
10
- # Initialize a list to store the data
11
- data = []
12
-
13
- # Load the LED tokenizer and model
14
  led_tokenizer = AutoTokenizer.from_pretrained("allenai/led-base-16384-multi_lexsum-source-long")
15
- classifier = pipeline("text-classification", model="allenai/led-base-16384-multi_lexsum-source-long", tokenizer=led_tokenizer, framework="pt")
16
-
17
- # Load the summarization model and tokenizer
18
  summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", tokenizer="sshleifer/distilbart-cnn-12-6", framework="pt")
19
 
 
 
 
 
 
 
 
20
  # Function to clean text by keeping only alphanumeric characters and spaces
21
  def clean_text(text):
22
  return re.sub(r'[^a-zA-Z0-9\s]', '', text)
@@ -72,6 +73,7 @@ def extract_title(text, max_length=20):
72
  # Define the Gradio interface for file upload and download
73
  @spaces.GPU(duration=120)
74
  def process_files(pdf_files):
 
75
  for pdf_file in pdf_files:
76
  text = extract_text(pdf_file)
77
 
 
2
  import re
3
  import pandas as pd
4
  from PyPDF2 import PdfReader
5
+ from transformers import AutoTokenizer, pipeline, AutoModelForSeq2SeqLM
6
  from gradio import Interface, File
7
  import gradio as gr
8
  import spaces
9
 
10
+ # Load the tokenizer and model
 
 
 
11
  led_tokenizer = AutoTokenizer.from_pretrained("allenai/led-base-16384-multi_lexsum-source-long")
 
 
 
12
  summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", tokenizer="sshleifer/distilbart-cnn-12-6", framework="pt")
13
 
14
+ # Load the model separately
15
+ model = AutoModelForSeq2SeqLM.from_pretrained("allenai/led-base-16384-multi_lexsum-source-long")
16
+
17
+ # Move the model to CUDA if available
18
+ if torch.cuda.is_available():
19
+ model = model.to("cuda")
20
+
21
  # Function to clean text by keeping only alphanumeric characters and spaces
22
  def clean_text(text):
23
  return re.sub(r'[^a-zA-Z0-9\s]', '', text)
 
73
  # Define the Gradio interface for file upload and download
74
  @spaces.GPU(duration=120)
75
  def process_files(pdf_files):
76
+ data = []
77
  for pdf_file in pdf_files:
78
  text = extract_text(pdf_file)
79