Spaces:
Sleeping
Sleeping
File size: 2,347 Bytes
445397d f16a2a2 445397d f16a2a2 445397d f16a2a2 445397d f16a2a2 445397d f16a2a2 445397d f16a2a2 445397d f16a2a2 445397d f16a2a2 445397d f16a2a2 445397d f16a2a2 445397d f16a2a2 445397d f16a2a2 445397d f16a2a2 445397d f16a2a2 445397d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import gradio as gr
import torch
from faster_whisper import WhisperModel
import pandas as pd
# Model size selection
model_size = "large-v2"
# Get device
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# Initialize model based on device
if device == "cuda:0":
# Run on GPU with FP16
model_whisper = WhisperModel(model_size, device="cuda", compute_type="float16")
else:
# Run on CPU with INT8
model_whisper = WhisperModel(model_size, device="cpu", compute_type="int8")
# Function to get filename from file object
def get_filename(file_obj):
return file_obj.name.split("/")[-1]
# Function to transcribe audio to text
def audio_to_transcript(file_obj):
try:
filename = get_filename(file_obj)
segments, _ = model_whisper.transcribe(file_obj.name, beam_size=5, vad_filter=True)
except:
filename = file_obj.split("/")[-1]
segments, _ = model_whisper.transcribe(file_obj, beam_size=5, vad_filter=True)
# Initialize lists to store transcription data
start_segments, end_segments, text_segments = list(), list(), list()
# Process each segment for start time, end time, and text
for segment in segments:
start, end, text = segment.start, segment.end, segment.text
start_segments.append(start)
end_segments.append(end)
text_segments.append(text)
# Save transcript to CSV
df = pd.DataFrame()
df["start"] = start_segments
df["end"] = end_segments
df["text"] = text_segments
# Define output CSV file
csv_file = filename.split(".")[0] + ".csv"
df.to_csv(csv_file, encoding="utf-8", index=False)
path_to_csv = gr.File.update(value=csv_file, visible=True)
return filename, path_to_csv, df
## Gradio Interface Setup
headers = ["start", "end", "text"]
iface = gr.Interface(
fn=audio_to_transcript,
inputs=gr.File(label="Upload an Audio File", type="filepath"),
outputs=[
gr.Textbox(label="Audio file name"),
gr.File(label="Transcript CSV file"),
gr.DataFrame(label="Transcript", headers=headers),
],
allow_flagging="never",
title="Audio to Transcript",
description="Upload an audio file, and this tool will return a transcript with time-stamped segments.",
theme="compact", # Enhanced UI theme for simplicity
)
iface.launch(debug=True) |