whisper-gradio / app.py
AbdullahAdeeb's picture
0.0.1
973bb27
raw
history blame contribute delete
3.08 kB
import gradio as gr
import whisper
import torch
import json
import spaces
from datetime import timedelta
import os
import zipfile
from pathlib import Path
def format_timestamp(seconds):
"""Convert seconds to SRT timestamp format"""
td = timedelta(seconds=seconds)
hours = td.seconds//3600
minutes = (td.seconds//60)%60
seconds = td.seconds%60
milliseconds = td.microseconds//1000
return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
def save_files(text, srt, json_data, base_name):
"""Save transcription in different formats and create zip"""
# Create output directory if it doesn't exist
output_dir = Path("transcriptions")
output_dir.mkdir(exist_ok=True)
# Generate filenames
base_name = Path(base_name).stem
txt_path = output_dir / f"{base_name}.txt"
srt_path = output_dir / f"{base_name}.srt"
json_path = output_dir / f"{base_name}.json"
zip_path = output_dir / f"{base_name}_all.zip"
# Save individual files
txt_path.write_text(text)
srt_path.write_text(srt)
json_path.write_text(json_data)
# Create ZIP file
with zipfile.ZipFile(zip_path, 'w') as zipf:
zipf.write(txt_path, txt_path.name)
zipf.write(srt_path, srt_path.name)
zipf.write(json_path, json_path.name)
return str(txt_path), str(srt_path), str(json_path), str(zip_path)
@spaces.GPU
def transcribe(audio_file):
# Load the Whisper model
model = whisper.load_model("large-v3-turbo")
# Transcribe the audio
result = model.transcribe(audio_file)
# Format as plain text
text_output = result["text"]
# Format as JSON
json_output = json.dumps(result, indent=2)
# Format as SRT
srt_output = ""
for i, segment in enumerate(result["segments"], 1):
start_time = format_timestamp(segment["start"])
end_time = format_timestamp(segment["end"])
text = segment["text"].strip()
srt_output += f"{i}\n{start_time} --> {end_time}\n{text}\n\n"
# Save files and get paths
txt_file, srt_file, json_file, zip_file = save_files(
text_output, srt_output, json_output,
os.path.basename(audio_file)
)
return (
txt_file, srt_file, json_file, zip_file, text_output, srt_output, json_output
)
# Create the Gradio interface
demo = gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="filepath", label="Upload Audio"),
outputs=[
gr.File(label="Download TXT"),
gr.File(label="Download SRT"),
gr.File(label="Download JSON"),
gr.File(label="Download All (ZIP)"),
gr.Textbox(label="Transcription", lines=5),
gr.Textbox(label="SRT Format"),
gr.JSON(label="JSON Output")
],
title="Audio Transcription with Whisper",
description="Upload an audio file to transcribe it into text, SRT, and JSON formats using OpenAI's Whisper model. You can download the results in different formats or get everything in a ZIP file."
)
if __name__ == "__main__":
demo.launch()