Spaces:
Build error
Build error
### ----------------------------------------------------------------------- | |
### (FULL, Revised) version_1.07ALPHA_app.py | |
### ----------------------------------------------------------------------- | |
# ------------------------------------------------------------------------- | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# ------------------------------------------------------------------------- | |
import spaces | |
import gradio as gr | |
from PIL import Image | |
#from pydub import AudioSegment | |
#from scipy.io import wavfile | |
import os | |
import re | |
import time | |
import warnings | |
#import datetime | |
#import pandas as pd | |
#import csv | |
import subprocess | |
from pathlib import Path | |
import tempfile | |
from fpdf import FPDF | |
import psutil | |
from gpuinfo import GPUInfo | |
#import numpy as np | |
import torch | |
#import torchaudio | |
#import torchaudio.transforms as transforms | |
from transformers import pipeline #AutoModel | |
#import spacy | |
#import networkx as nx | |
#from sklearn.feature_extraction.text import TfidfVectorizer | |
#from sklearn.metrics.pairwise import cosine_similarity | |
warnings.filterwarnings("ignore") | |
# ------------header section------------ | |
HEADER_INFO = """ | |
# SWITCHVOX ✨|🇳🇴 *Switch Work Web app* | |
**Transkribering av lydfiler til norsk skrift** | |
""".strip() | |
LOGO = "https://cdn-lfs-us-1.huggingface.co/repos/fe/3b/fe3bd7c8beece8b087fddcc2278295e7f56c794c8dcf728189f4af8bddc585e1/24ad06a03a5bc66f3eba361b94e45ad17e46f98b76632f2d17faf8a0b4f9ab6b?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27banner_trans.png%3B+filename%3D%22banner_trans.png%22%3B&response-content-type=image%2Fpng&Expires=1725145079&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyNTE0NTA3OX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2ZlLzNiL2ZlM2JkN2M4YmVlY2U4YjA4N2ZkZGNjMjI3ODI5NWU3ZjU2Yzc5NGM4ZGNmNzI4MTg5ZjRhZjhiZGRjNTg1ZTEvMjRhZDA2YTAzYTViYzY2ZjNlYmEzNjFiOTRlNDVhZDE3ZTQ2Zjk4Yjc2NjMyZjJkMTdmYWY4YTBiNGY5YWI2Yj9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=PCB1BZeLzsZXnn4lRi1Fj50%7E0E4G39u6-GKLNLLgxhDyhXlqb3BJkR7IOgdyjuNyBh8Iz2d7QqhzNSsOlQmqR30cJLl6aDM5eJO01OlWXoD3Z0KcphoVBFyyrkoxe2oS8i2mdlbFRYn7oc%7EhyOcW46zR6HtqAB91iEydhEa5WTyz3C9nWasgMZevb0vRJtzwhplM9e-%7EbRrZTm2fMzkL14IGWpTpUOGBe93BDSAYbPhrZK1jvuY8p0Tmy1iEKVP3Zdzix5U5lrbxit5luitEhK8x6q2t63Gdv7F0CZvjQtTh7MYkB5GNiru8bTGKAgCdHGiZbG7VCGfhlX3UKvUTPg__&Key-Pair-Id=K24J24Z295AEI9" | |
SIDEBAR_INFO = f""" | |
<div align="center"> | |
<img src="{LOGO}" style="width: 100%; height: auto;"/> | |
</div> | |
""" | |
def transcribe(microphone, file_upload): | |
file = microphone if microphone is not None else file_upload | |
start_time = time.time() | |
#--------------____________________________________________--------------" | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device=device) | |
# chunk_length_s=30, generate_kwargs={'task': 'transcribe', 'language': 'no'} | |
text = pipe(file)["text"] | |
#--------------____________________________________________--------------" | |
end_time = time.time() | |
output_time = end_time - start_time | |
word_count = len(text.split()) | |
# --GPU metrics | |
memory = psutil.virtual_memory() | |
gpu_utilization, gpu_memory = GPUInfo.gpu_usage() | |
gpu_utilization = gpu_utilization[0] if len(gpu_utilization) > 0 else 0 | |
gpu_memory = gpu_memory[0] if len(gpu_memory) > 0 else 0 | |
# --CPU metric | |
cpu_usage = psutil.cpu_percent(interval=1) | |
# --system info string | |
system_info = f""" | |
*Memory: {memory.total / (1024 * 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB.* | |
*Processing time: {output_time:.2f} seconds.* | |
*Number of words: {word_count}* | |
*GPU Utilization: {gpu_utilization}%, GPU Memory: {gpu_memory}* | |
*CPU Usage: {cpu_usage}%* | |
""" | |
return warn_output + text, system_info | |
def save_to_pdf(text, summary): | |
pdf = FPDF() | |
pdf.add_page() | |
pdf.set_font("Arial", size=12) | |
# | |
# ----add same if/elif logic as above here---- | |
# | |
if text: | |
pdf.multi_cell(0, 10, "Text:\n" + text) | |
pdf.ln(10) # Paragraph metric | |
if summary: | |
pdf.multi_cell(0, 10, "Summary:\n" + summary) | |
pdf_output_path = "transcription_.pdf" | |
pdf.output(pdf_output_path) | |
return pdf_output_path | |
iface = gr.Blocks() | |
with iface: | |
gr.HTML(SIDEBAR_INFO) | |
gr.Markdown(HEADER_INFO) | |
with gr.Row(): | |
gr.Markdown(''' | |
##### 1. Last opp lydfila 🔊 | |
2. Trykk på "Transkriber" knappen og vent på svar ☕️ | |
3. Går rimelig bra kjapt med Norwegian NB-Whisper Large⚡️ | |
4. Planlegger tilleggs-funksjoner senere😅 | |
##### ''') | |
microphone = gr.Audio(sources="microphone", type="filepath") | |
upload = gr.Audio(sources="upload", type="filepath") | |
transcribe_btn = gr.Button("Transcribe Interview") | |
with gr.Row(): | |
text_output = gr.Textbox(label="Transkribert Tekst") | |
with gr.Column(): | |
system_info = gr.Textbox(label="System Info") | |
with gr.Tabs(): | |
with gr.TabItem("Download PDF"): | |
pdf_text_only = gr.Button("Download PDF with Transcribed Text") | |
pdf_output = gr.File(label="Download PDF") | |
pdf_text_only.click(fn=lambda text: save_to_pdf(text, ""), inputs=[text_output], outputs=[pdf_output]) | |
transcribe_btn.click(fn=transcribe, inputs=[microphone, upload], outputs=[text_output, system_info]) | |
iface.launch(debug=True) |