Pontonkid's picture
Update app.py
f16a2a2 verified
raw
history blame contribute delete
2.35 kB
import gradio as gr
import torch
from faster_whisper import WhisperModel
import pandas as pd
# Model size selection
model_size = "large-v2"
# Get device
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# Initialize model based on device
if device == "cuda:0":
# Run on GPU with FP16
model_whisper = WhisperModel(model_size, device="cuda", compute_type="float16")
else:
# Run on CPU with INT8
model_whisper = WhisperModel(model_size, device="cpu", compute_type="int8")
# Function to get filename from file object
def get_filename(file_obj):
return file_obj.name.split("/")[-1]
# Function to transcribe audio to text
def audio_to_transcript(file_obj):
try:
filename = get_filename(file_obj)
segments, _ = model_whisper.transcribe(file_obj.name, beam_size=5, vad_filter=True)
except:
filename = file_obj.split("/")[-1]
segments, _ = model_whisper.transcribe(file_obj, beam_size=5, vad_filter=True)
# Initialize lists to store transcription data
start_segments, end_segments, text_segments = list(), list(), list()
# Process each segment for start time, end time, and text
for segment in segments:
start, end, text = segment.start, segment.end, segment.text
start_segments.append(start)
end_segments.append(end)
text_segments.append(text)
# Save transcript to CSV
df = pd.DataFrame()
df["start"] = start_segments
df["end"] = end_segments
df["text"] = text_segments
# Define output CSV file
csv_file = filename.split(".")[0] + ".csv"
df.to_csv(csv_file, encoding="utf-8", index=False)
path_to_csv = gr.File.update(value=csv_file, visible=True)
return filename, path_to_csv, df
## Gradio Interface Setup
headers = ["start", "end", "text"]
iface = gr.Interface(
fn=audio_to_transcript,
inputs=gr.File(label="Upload an Audio File", type="filepath"),
outputs=[
gr.Textbox(label="Audio file name"),
gr.File(label="Transcript CSV file"),
gr.DataFrame(label="Transcript", headers=headers),
],
allow_flagging="never",
title="Audio to Transcript",
description="Upload an audio file, and this tool will return a transcript with time-stamped segments.",
theme="compact", # Enhanced UI theme for simplicity
)
iface.launch(debug=True)