Scribe_Transcription_Demo_2

Running

App Files Files Community

MrSimple07 commited on Mar 25

Commit

74211fa

verified ·

1 Parent(s): 73f6d14

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -6

app.py CHANGED Viewed

@@ -69,29 +69,77 @@ def process_video_url(video_url, output_format, api_key, model_id):
     else:
         return None, message, None, "Audio extraction failed, cannot transcribe"
-def transcribe_audio(audio_file, api_key, model_id="scribe_v1"):
     if not api_key:
         return {"error": "Please provide an API key"}
     url = "https://api.elevenlabs.io/v1/speech-to-text"
     headers = {
-        "xi-api-key": api_key
     }
     try:
-        with open(audio_file, "rb") as f:
             files = {
-                "file": f,
                 "model_id": (None, model_id)
             }
-            response = requests.post(url, headers=headers, files=files)
             response.raise_for_status()
             result = response.json()
-        return result
     except requests.exceptions.RequestException as e:
         return {"error": f"API request failed: {str(e)}"}
     except json.JSONDecodeError:
         return {"error": "Failed to parse API response"}
 with gr.Blocks(title="Video to Audio to Transcription") as app:
     gr.Markdown("# Video => Audio => Transcription")

     else:
         return None, message, None, "Audio extraction failed, cannot transcribe"
+def transcribe_audio(audio_path, api_key, model_id="elevenlabs_1"):
+    start_time = time.time()
     if not api_key:
         return {"error": "Please provide an API key"}
     url = "https://api.elevenlabs.io/v1/speech-to-text"
     headers = {
+        "xi-api-key": api_key,
+        "Accept": "application/json"
     }
     try:
+        with open(audio_path, "rb") as f:
             files = {
+                "file": (os.path.basename(audio_path), f, "audio/mpeg"),
                 "model_id": (None, model_id)
             }
+            # Use requests.post with explicit content type handling
+            response = requests.post(
+                url,
+                headers=headers,
+                files=files
+            )
+            # Check for specific HTTP errors
+            if response.status_code == 401:
+                return {"error": "Unauthorized. Please check your API key."}
+            if response.status_code == 422:
+                return {"error": "Unprocessable Entity. Check file format or API usage."}
             response.raise_for_status()
             result = response.json()
     except requests.exceptions.RequestException as e:
         return {"error": f"API request failed: {str(e)}"}
     except json.JSONDecodeError:
         return {"error": "Failed to parse API response"}
+    end_time = time.time()
+    processing_time = end_time - start_time
+    file_size = os.path.getsize(audio_path) / (1024 * 1024)
+    try:
+        audio_data, sample_rate = sf.read(audio_path)
+        audio_duration = len(audio_data) / sample_rate
+    except:
+        try:
+            import librosa
+            audio_duration = librosa.get_duration(filename=audio_path)
+        except:
+            audio_duration = 0
+    # Ensure text is extracted correctly from the new response format
+    text = result.get('text', '')
+    return {
+        "service": "ElevenLabs",
+        "text": text,
+        "processing_time": processing_time,
+        "file_size_mb": file_size,
+        "audio_duration": audio_duration,
+        "real_time_factor": processing_time / audio_duration if audio_duration > 0 else None,
+        "processing_speed": audio_duration / processing_time if audio_duration > 0 else None,
+        "raw_response": result,
+        "language_code": result.get('language_code'),
+        "language_probability": result.get('language_probability')
+    }
 with gr.Blocks(title="Video to Audio to Transcription") as app:
     gr.Markdown("# Video => Audio => Transcription")