Spaces:

camparchimedes
/

nb

Build error

App Files Files

nb / app.py

camparchimedes

Update app.py

638acc9 verified 9 months ago

raw

history blame

6.1 kB

	### -----------------------------------------------------------------------
	### (FULL, Revised) version_1.07ALPHA_app.py
	### -----------------------------------------------------------------------

	# -------------------------------------------------------------------------
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# -------------------------------------------------------------------------

	import spaces
	import gradio as gr
	from PIL import Image
	#from pydub import AudioSegment
	#from scipy.io import wavfile

	import os
	import re
	import time
	import warnings
	#import datetime
	#import pandas as pd
	#import csv
	import subprocess
	from pathlib import Path
	import tempfile
	from fpdf import FPDF

	import psutil
	from gpuinfo import GPUInfo

	#import numpy as np
	import torch
	#import torchaudio
	#import torchaudio.transforms as transforms

	from transformers import pipeline #AutoModel

	#import spacy
	#import networkx as nx
	#from sklearn.feature_extraction.text import TfidfVectorizer
	#from sklearn.metrics.pairwise import cosine_similarity

	warnings.filterwarnings("ignore")

	# ------------header section------------
	HEADER_INFO = """
	# SWITCHVOX ✨\|🇳🇴 Switch Work Web app
	Transkribering av lydfiler til norsk skrift
	""".strip()
	LOGO = "https://cdn-lfs-us-1.huggingface.co/repos/fe/3b/fe3bd7c8beece8b087fddcc2278295e7f56c794c8dcf728189f4af8bddc585e1/24ad06a03a5bc66f3eba361b94e45ad17e46f98b76632f2d17faf8a0b4f9ab6b?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27banner_trans.png%3B+filename%3D%22banner_trans.png%22%3B&response-content-type=image%2Fpng&Expires=1725145079&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyNTE0NTA3OX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2ZlLzNiL2ZlM2JkN2M4YmVlY2U4YjA4N2ZkZGNjMjI3ODI5NWU3ZjU2Yzc5NGM4ZGNmNzI4MTg5ZjRhZjhiZGRjNTg1ZTEvMjRhZDA2YTAzYTViYzY2ZjNlYmEzNjFiOTRlNDVhZDE3ZTQ2Zjk4Yjc2NjMyZjJkMTdmYWY4YTBiNGY5YWI2Yj9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=PCB1BZeLzsZXnn4lRi1Fj50%7E0E4G39u6-GKLNLLgxhDyhXlqb3BJkR7IOgdyjuNyBh8Iz2d7QqhzNSsOlQmqR30cJLl6aDM5eJO01OlWXoD3Z0KcphoVBFyyrkoxe2oS8i2mdlbFRYn7oc%7EhyOcW46zR6HtqAB91iEydhEa5WTyz3C9nWasgMZevb0vRJtzwhplM9e-%7EbRrZTm2fMzkL14IGWpTpUOGBe93BDSAYbPhrZK1jvuY8p0Tmy1iEKVP3Zdzix5U5lrbxit5luitEhK8x6q2t63Gdv7F0CZvjQtTh7MYkB5GNiru8bTGKAgCdHGiZbG7VCGfhlX3UKvUTPg__&Key-Pair-Id=K24J24Z295AEI9"
	SIDEBAR_INFO = f"""
	<div align="center">
	<img src="{LOGO}" style="width: 100%; height: auto;"/>
	</div>
	"""

	@spaces.GPU()
	def transcribe(microphone, file_upload):

	file = microphone if microphone is not None else file_upload
	start_time = time.time()


	#--------------____________________________________________--------------"
	device = "cuda" if torch.cuda.is_available() else "cpu"
	pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device=device)
	# chunk_length_s=30, generate_kwargs={'task': 'transcribe', 'language': 'no'}
	text = pipe(file)["text"]
	#--------------____________________________________________--------------"


	end_time = time.time()
	output_time = end_time - start_time
	word_count = len(text.split())

	# --GPU metrics
	memory = psutil.virtual_memory()
	gpu_utilization, gpu_memory = GPUInfo.gpu_usage()
	gpu_utilization = gpu_utilization[0] if len(gpu_utilization) > 0 else 0
	gpu_memory = gpu_memory[0] if len(gpu_memory) > 0 else 0
	# --CPU metric
	cpu_usage = psutil.cpu_percent(interval=1)
	# --system info string
	system_info = f"""
	Memory: {memory.total / (1024 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB.*
	Processing time: {output_time:.2f} seconds.
	Number of words: {word_count}
	GPU Utilization: {gpu_utilization}%, GPU Memory: {gpu_memory}
	CPU Usage: {cpu_usage}%
	"""

	return warn_output + text, system_info

	def save_to_pdf(text, summary):
	pdf = FPDF()
	pdf.add_page()
	pdf.set_font("Arial", size=12)
	#
	# ----add same if/elif logic as above here----
	#
	if text:
	pdf.multi_cell(0, 10, "Text:\n" + text)

	pdf.ln(10) # Paragraph metric

	if summary:
	pdf.multi_cell(0, 10, "Summary:\n" + summary)

	pdf_output_path = "transcription_.pdf"
	pdf.output(pdf_output_path)
	return pdf_output_path


	iface = gr.Blocks()

	with iface:

	gr.HTML(SIDEBAR_INFO)
	gr.Markdown(HEADER_INFO)

	with gr.Row():
	gr.Markdown('''
	##### 1. Last opp lydfila 🔊
	2. Trykk på "Transkriber" knappen og vent på svar ☕️
	3. Går rimelig bra kjapt med Norwegian NB-Whisper Large⚡️
	4. Planlegger tilleggs-funksjoner senere😅
	##### ''')
	microphone = gr.Audio(sources="microphone", type="filepath")
	upload = gr.Audio(sources="upload", type="filepath")
	transcribe_btn = gr.Button("Transcribe Interview")

	with gr.Row():
	text_output = gr.Textbox(label="Transkribert Tekst")
	with gr.Column():
	system_info = gr.Textbox(label="System Info")


	with gr.Tabs():
	with gr.TabItem("Download PDF"):
	pdf_text_only = gr.Button("Download PDF with Transcribed Text")
	pdf_output = gr.File(label="Download PDF")

	pdf_text_only.click(fn=lambda text: save_to_pdf(text, ""), inputs=[text_output], outputs=[pdf_output])


	transcribe_btn.click(fn=transcribe, inputs=[microphone, upload], outputs=[text_output, system_info])



	iface.launch(debug=True)