Athspi commited on
Commit
dbe8a71
·
verified ·
1 Parent(s): 1258dc7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -0
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from faster_whisper import WhisperModel
4
+ import google.generativeai as genai
5
+ from gtts import gTTS, lang
6
+ import tempfile
7
+
8
+ # Configure Gemini API (replace with your API key or use environment variable)
9
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "YOUR_GEMINI_API_KEY_HERE")
10
+ genai.configure(api_key=GEMINI_API_KEY)
11
+
12
+ # Initialize the faster-whisper model
13
+ model_size = "Systran/faster-whisper-large-v3"
14
+ whisper_model = WhisperModel(model_size, device="auto", compute_type="float16")
15
+
16
+ # Function to transcribe audio using faster-whisper
17
+ def transcribe_audio(audio_file):
18
+ try:
19
+ segments, info = whisper_model.transcribe(audio_file, beam_size=5)
20
+ transcription = " ".join([segment.text for segment in segments])
21
+ detected_language = info.language
22
+ return transcription, detected_language, None
23
+ except Exception as e:
24
+ return None, None, f"Transcription error: {str(e)}"
25
+
26
+ # Function to translate text using Gemini API with a magic prompt
27
+ def translate_text(text, target_language):
28
+ try:
29
+ model = genai.GenerativeModel("gemini-1.5-flash")
30
+ # Magic prompt to ensure only translated text is returned
31
+ prompt = f"Translate the following text to {target_language} and return only the translated text with no additional explanation or commentary:\n\n{text}"
32
+ response = model.generate_content(prompt)
33
+ translated_text = response.text.strip()
34
+ return translated_text, None
35
+ except Exception as e:
36
+ return None, f"Translation error: {str(e)}"
37
+
38
+ # Function to convert text to speech using gTTS with full language support
39
+ def text_to_speech(text, language):
40
+ try:
41
+ # Get all supported languages from gTTS
42
+ lang_map = lang.tts_langs()
43
+ # Use the language code directly if supported, otherwise default to 'en'
44
+ tts_lang = language.lower() if language.lower() in lang_map else "en"
45
+ tts = gTTS(text=text, lang=tts_lang, slow=False)
46
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
47
+ tts.save(fp.name)
48
+ return fp.name, None
49
+ except Exception as e:
50
+ return None, f"TTS error: {str(e)}"
51
+
52
+ # Main function to process audio input and return outputs
53
+ def process_audio(audio_file, target_language):
54
+ # Step 1: Transcribe audio
55
+ transcription, detected_language, error = transcribe_audio(audio_file)
56
+ if error:
57
+ return error, None, None, None
58
+
59
+ # Step 2: Translate transcription
60
+ translated_text, error = translate_text(transcription, target_language)
61
+ if error:
62
+ return error, transcription, None, None
63
+
64
+ # Step 3: Convert translated text to speech
65
+ # Map target language name to gTTS language code
66
+ lang_map = lang.tts_langs()
67
+ # Convert target_language to lowercase keys as in lang_map
68
+ lang_key = next((k for k, v in lang_map.items() if v.lower() == target_language.lower()), "en")
69
+ audio_output, error = text_to_speech(translated_text, lang_key)
70
+ if error:
71
+ return error, transcription, translated_text, None
72
+
73
+ return None, transcription, translated_text, audio_output
74
+
75
+ # Gradio interface
76
+ with gr.Blocks(title="AI Audio Translator") as demo:
77
+ gr.Markdown("# AI Audio Translator")
78
+ gr.Markdown("Upload an audio file, select a target language, and get the transcription, translation, and translated audio!")
79
+
80
+ # Get all supported languages from gTTS
81
+ supported_langs = {v: k for k, v in lang.tts_langs().items()} # {name: code}
82
+ language_choices = list(supported_langs.keys()) # List of language names
83
+
84
+ with gr.Row():
85
+ audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Input Audio")
86
+ target_lang = gr.Dropdown(
87
+ choices=sorted(language_choices),
88
+ value="Spanish",
89
+ label="Target Language"
90
+ )
91
+
92
+ submit_btn = gr.Button("Translate")
93
+
94
+ with gr.Row():
95
+ error_output = gr.Textbox(label="Error", visible=True)
96
+ transcription_output = gr.Textbox(label="Transcription")
97
+ translation_output = gr.Textbox(label="Translated Text")
98
+ audio_output = gr.Audio(label="Translated Audio")
99
+
100
+ submit_btn.click(
101
+ fn=process_audio,
102
+ inputs=[audio_input, target_lang],
103
+ outputs=[error_output, transcription_output, translation_output, audio_output]
104
+ )
105
+
106
+ # Launch the app
107
+ demo.launch()