Spaces:
Sleeping
Sleeping
import gradio as gr | |
import whisper | |
from transformers import pipeline | |
# Load Whisper model | |
whisper_model = whisper.load_model("small", device="cpu") | |
# Load the text correction model | |
correction_pipeline = pipeline("text2text-generation", model="tiiuae/falcon3-1b-instruct", device=-1) | |
# Function to preprocess audio and transcribe it using Whisper | |
def transcribe_audio(audio_file): | |
transcription = whisper_model.transcribe(audio_file) | |
return transcription["text"] | |
# Function to correct grammar in text | |
def correct_text(raw_text): | |
try: | |
# Chunk the text into smaller pieces if needed (to handle token limits) | |
text_chunks = chunk_text(raw_text, max_tokens=2000) | |
corrected_chunks = [] | |
for chunk in text_chunks: | |
# Provide clear instructions to the model | |
prompt = f"Correct the following text for grammar and punctuation without changing its meaning: {chunk}" | |
corrected = correction_pipeline(prompt, max_length=2048, num_return_sequences=1)[0]["generated_text"] | |
# Add only the corrected part | |
corrected_chunks.append(corrected.strip()) | |
# Combine corrected chunks | |
final_corrected_text = " ".join(corrected_chunks).strip() | |
return final_corrected_text | |
except Exception as e: | |
return f"Error in correction: {str(e)}" | |
# Function to process the pipeline | |
def process_pipeline(audio_file): | |
raw_transcription = transcribe_audio(audio_file) | |
corrected_transcription = correct_text(raw_transcription) | |
return raw_transcription, corrected_transcription | |
# Gradio Interface | |
interface = gr.Interface( | |
fn=process_pipeline, | |
inputs=gr.Audio(type="filepath", label="Upload Audio"), | |
outputs=[ | |
gr.Textbox(label="Raw Transcription"), | |
gr.Textbox(label="Corrected Transcription"), | |
], | |
title="Speech Correction Demo", | |
description="Upload an audio file to see raw transcription and grammar-corrected output.", | |
) | |
# Launch the app | |
interface.launch(share=True) | |