Spaces:

Athspi-ai
/

Audio-translation

Running

Athspi commited on Mar 9

Commit

965bd2d

verified ·

1 Parent(s): f003c90

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -53,23 +53,28 @@ def translate_audio():
         if not audio_file or audio_file.filename == '':
             return jsonify({'error': 'Invalid audio file'}), 400
-        # Save temporary audio file
-        filename = secure_filename(audio_file.filename)
-        temp_input_path = os.path.join(tempfile.gettempdir(), filename)
-        audio_file.save(temp_input_path)
         # Transcribe audio using Gemini
         model = genai.GenerativeModel("gemini-2.0-flash")
-        with open(temp_input_path, "rb") as f:
-            audio_data = f.read()
-        # Use Gemini to transcribe the audio
-        prompt = "Transcribe the following audio file into text. Return only the transcribed text with no additional commentary or explanations."
-        response = model.generate_content([prompt, audio_data])
         transcription = response.text.strip()
         # Translate text using Gemini
-        prompt = f"Translate the following text to {target_language} preserving meaning and cultural nuances:\n\n{transcription}"
         response = model.generate_content(prompt)
         translated_text = response.text.strip()
@@ -111,4 +116,4 @@ def download_file(filename):
         return jsonify({'error': 'File not found'}), 404
 if __name__ == '__main__':
-    app.run(host="0.0.0.0", port=7860)

         if not audio_file or audio_file.filename == '':
             return jsonify({'error': 'Invalid audio file'}), 400
+        # Validate MIME type
+        allowed_mime_types = ['audio/wav', 'audio/mpeg', 'audio/mp4', 'audio/webm']
+        if audio_file.mimetype not in allowed_mime_types:
+            return jsonify({'error': f'Unsupported file type: {audio_file.mimetype}'}), 400
         # Transcribe audio using Gemini
         model = genai.GenerativeModel("gemini-2.0-flash")
+        # Create proper audio blob
+        audio_blob = {
+            'mime_type': audio_file.mimetype,
+            'data': audio_file.read()
+        }
+        # Get transcription
+        convo = model.start_chat()
+        convo.send_message("You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language. Respond only with the transcription.")
+        response = convo.send_message(audio_blob)
         transcription = response.text.strip()
         # Translate text using Gemini
+        prompt = f"Translate the following text to {target_language} preserving meaning and cultural nuances. Respond only with the translation:\n\n{transcription}"
         response = model.generate_content(prompt)
         translated_text = response.text.strip()
         return jsonify({'error': 'File not found'}), 404
 if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=5000, debug=True)