Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,11 @@
|
|
1 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import gradio as gr
|
3 |
import warnings
|
4 |
import torch
|
@@ -6,31 +13,26 @@ from transformers import WhisperTokenizer, WhisperForConditionalGeneration, Whis
|
|
6 |
from pydub import AudioSegment
|
7 |
import soundfile as sf
|
8 |
import numpy as np
|
9 |
-
import nltk
|
10 |
from fpdf import FPDF
|
11 |
import time
|
|
|
12 |
|
13 |
warnings.filterwarnings("ignore")
|
14 |
-
nltk.download('punkt')
|
15 |
|
16 |
-
# Load environment variable
|
17 |
HF_AUTH_TOKEN = os.getenv('HF_AUTH_TOKEN')
|
18 |
|
19 |
-
|
20 |
tokenizer = WhisperTokenizer.from_pretrained("NbAiLabBeta/nb-whisper-medium")
|
21 |
model = WhisperForConditionalGeneration.from_pretrained("NbAiLabBeta/nb-whisper-medium")
|
22 |
processor = WhisperProcessor.from_pretrained("NbAiLabBeta/nb-whisper-medium")
|
23 |
|
24 |
-
|
25 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
26 |
-
torch_dtype = torch.float32
|
27 |
model.to(device)
|
28 |
|
29 |
-
# Initialize pipeline
|
30 |
asr = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, device=device, torch_dtype=torch.float32)
|
31 |
|
32 |
def transcribe_audio(audio_file):
|
33 |
-
# Perform transcription
|
34 |
with torch.no_grad():
|
35 |
output = asr(audio_file, chunk_length_s=28, generate_kwargs={"num_beams": 5, "task": "transcribe", "language": "no"})
|
36 |
return output["text"]
|
@@ -95,11 +97,24 @@ def transcribe_audio(audio_file, batch_size=4):
|
|
95 |
|
96 |
return transcription.strip(), result
|
97 |
|
|
|
98 |
def summarize_text(text):
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
return summary
|
104 |
|
105 |
# HTML syntax for imagery
|
|
|
1 |
+
import networkx as nx
|
2 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
3 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
4 |
+
import nltk
|
5 |
+
nltk.download('punkt')
|
6 |
+
from nltk.tokenize import sent_tokenize
|
7 |
+
|
8 |
+
|
9 |
import gradio as gr
|
10 |
import warnings
|
11 |
import torch
|
|
|
13 |
from pydub import AudioSegment
|
14 |
import soundfile as sf
|
15 |
import numpy as np
|
|
|
16 |
from fpdf import FPDF
|
17 |
import time
|
18 |
+
import os
|
19 |
|
20 |
warnings.filterwarnings("ignore")
|
|
|
21 |
|
|
|
22 |
HF_AUTH_TOKEN = os.getenv('HF_AUTH_TOKEN')
|
23 |
|
24 |
+
|
25 |
tokenizer = WhisperTokenizer.from_pretrained("NbAiLabBeta/nb-whisper-medium")
|
26 |
model = WhisperForConditionalGeneration.from_pretrained("NbAiLabBeta/nb-whisper-medium")
|
27 |
processor = WhisperProcessor.from_pretrained("NbAiLabBeta/nb-whisper-medium")
|
28 |
|
29 |
+
|
30 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
|
31 |
model.to(device)
|
32 |
|
|
|
33 |
asr = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, device=device, torch_dtype=torch.float32)
|
34 |
|
35 |
def transcribe_audio(audio_file):
|
|
|
36 |
with torch.no_grad():
|
37 |
output = asr(audio_file, chunk_length_s=28, generate_kwargs={"num_beams": 5, "task": "transcribe", "language": "no"})
|
38 |
return output["text"]
|
|
|
97 |
|
98 |
return transcription.strip(), result
|
99 |
|
100 |
+
# Graph-based summarization|TextRank
|
101 |
def summarize_text(text):
|
102 |
+
sentences = sent_tokenize(text)
|
103 |
+
if len(sentences) == 0:
|
104 |
+
return ""
|
105 |
+
|
106 |
+
tfidf_vectorizer = TfidfVectorizer()
|
107 |
+
tfidf_matrix = tfidf_vectorizer.fit_transform(sentences)
|
108 |
+
similarity_matrix = cosine_similarity(tfidf_matrix)
|
109 |
+
|
110 |
+
nx_graph = nx.from_numpy_array(similarity_matrix)
|
111 |
+
scores = nx.pagerank(nx_graph)
|
112 |
+
|
113 |
+
ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
|
114 |
+
|
115 |
+
# Select top N sentences (e.g., 3 sentences for the summary)
|
116 |
+
top_n = 3
|
117 |
+
summary = " ".join([s for _, s in ranked_sentences[:top_n]])
|
118 |
return summary
|
119 |
|
120 |
# HTML syntax for imagery
|