Spaces:

tdurzynski
/

automatic-speech-recognition

Sleeping

File size: 1,501 Bytes

202afe1
fe7eb34
f28ce48
202afe1
92a0faf
 
202afe1
f28ce48
fe7eb34
 
f28ce48
 
 
fe7eb34
92a0faf
 
 
 
 
 
f28ce48
92a0faf
 
 
f28ce48
 
92a0faf
f28ce48
92a0faf
fe7eb34
f28ce48
 
 
 
 
fe7eb34
f28ce48

from transformers import pipeline
import gradio as gr
import os

# Load the ASR model from Hugging Face Hub
asr = pipeline(task="automatic-speech-recognition", model="openai/whisper-small")

# Define the transcription function
def transcribe_long_form(filepath):
    if filepath is None:
        return "No audio file provided, please upload a file or record one."
    output = asr(filepath)
    return output['text']

# Custom CSS to improve the interface
css = """
body { font-family: Arial, sans-serif; }
button { background-color: #4CAF50; color: white; border: none; padding: 10px 20px; }
"""

# Set up the Gradio interface
with gr.Blocks(css=css) as demo:
    gr.Markdown("### Audio Transcription Service")
    gr.Markdown("Upload an audio file or use your microphone to record one. Then press the 'Transcribe' button to see the transcription.")
    with gr.Tab("Transcribe Audio"):
        with gr.Row():
            audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Upload or Record Audio")
            submit_button = gr.Button("Transcribe")
        transcription_output = gr.Textbox(label="Transcription", lines=10, placeholder="Your transcription will appear here...")

        submit_button.click(
            transcribe_long_form, 
            inputs=[audio_input],
            outputs=[transcription_output]
        )

# Launch the Gradio app
demo.launch(share=True, server_port=int(os.environ.get('PORT1', 7860)))  # Default port 7860 if PORT1 is not set