import gradio as gr import torch from faster_whisper import WhisperModel import pandas as pd # Model size selection model_size = "large-v2" # Get device device = "cuda:0" if torch.cuda.is_available() else "cpu" # Initialize model based on device if device == "cuda:0": # Run on GPU with FP16 model_whisper = WhisperModel(model_size, device="cuda", compute_type="float16") else: # Run on CPU with INT8 model_whisper = WhisperModel(model_size, device="cpu", compute_type="int8") # Function to get filename from file object def get_filename(file_obj): return file_obj.name.split("/")[-1] # Function to transcribe audio to text def audio_to_transcript(file_obj): try: filename = get_filename(file_obj) segments, _ = model_whisper.transcribe(file_obj.name, beam_size=5, vad_filter=True) except: filename = file_obj.split("/")[-1] segments, _ = model_whisper.transcribe(file_obj, beam_size=5, vad_filter=True) # Initialize lists to store transcription data start_segments, end_segments, text_segments = list(), list(), list() # Process each segment for start time, end time, and text for segment in segments: start, end, text = segment.start, segment.end, segment.text start_segments.append(start) end_segments.append(end) text_segments.append(text) # Save transcript to CSV df = pd.DataFrame() df["start"] = start_segments df["end"] = end_segments df["text"] = text_segments # Define output CSV file csv_file = filename.split(".")[0] + ".csv" df.to_csv(csv_file, encoding="utf-8", index=False) path_to_csv = gr.File.update(value=csv_file, visible=True) return filename, path_to_csv, df ## Gradio Interface Setup headers = ["start", "end", "text"] iface = gr.Interface( fn=audio_to_transcript, inputs=gr.File(label="Upload an Audio File", type="filepath"), outputs=[ gr.Textbox(label="Audio file name"), gr.File(label="Transcript CSV file"), gr.DataFrame(label="Transcript", headers=headers), ], allow_flagging="never", title="Audio to Transcript", description="Upload an audio file, and this tool will return a transcript with time-stamped segments.", theme="compact", # Enhanced UI theme for simplicity ) iface.launch(debug=True)