File size: 1,471 Bytes
b0a50ec
0105482
 
b0a50ec
0105482
 
b0a50ec
 
 
 
0105482
 
aba0e7d
 
 
 
 
 
 
0105482
aba0e7d
 
b0a50ec
 
 
 
 
 
0105482
aba0e7d
 
 
 
0105482
 
 
 
 
b0a50ec
 
 
0105482
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import whisper
import gradio as gr
import subprocess
from autocorrect import Speller

# Load the Whisper model
model = whisper.load_model("large")

# Initialize autocorrect for Urdu
spell = Speller(lang='ur')  # Set the language for Urdu

def transcribe_video(video_path):
    try:
        # Extract audio from the uploaded video
        audio_path = "audio.wav"
        subprocess.run(
            ["ffmpeg", "-i", video_path, "-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le", audio_path],
            check=True
        )

        # Transcribe the audio in Urdu
        result = model.transcribe(audio_path, task="transcribe", language="ur")
        transcribed_text = result["text"]

        # Correct the transcribed text using autocorrect
        corrected_text = spell(transcribed_text)

        return corrected_text

    except FileNotFoundError:
        return "Error: ffmpeg is not installed or not found in the environment."
    except Exception as e:
        return f"An error occurred: {e}"

# Create the Gradio interface
interface = gr.Interface(
    fn=transcribe_video,
    inputs=gr.Video(label="Upload your Urdu-speaking video"),
    outputs=gr.Textbox(label="Corrected Transcribed Text"),
    title="Urdu Video Transcription with Correction",
    description="Upload a video file in Urdu, and this app will transcribe the speech and correct the text using Whisper and autocorrect.",
)

# Launch the app
if __name__ == "__main__":
    interface.launch()