MrSimple07 commited on
Commit
74211fa
·
verified ·
1 Parent(s): 73f6d14

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -6
app.py CHANGED
@@ -69,29 +69,77 @@ def process_video_url(video_url, output_format, api_key, model_id):
69
  else:
70
  return None, message, None, "Audio extraction failed, cannot transcribe"
71
 
72
- def transcribe_audio(audio_file, api_key, model_id="scribe_v1"):
 
 
 
 
73
  if not api_key:
74
  return {"error": "Please provide an API key"}
75
 
76
  url = "https://api.elevenlabs.io/v1/speech-to-text"
77
  headers = {
78
- "xi-api-key": api_key
 
79
  }
80
 
81
  try:
82
- with open(audio_file, "rb") as f:
83
  files = {
84
- "file": f,
85
  "model_id": (None, model_id)
86
  }
87
- response = requests.post(url, headers=headers, files=files)
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  response.raise_for_status()
89
  result = response.json()
90
- return result
91
  except requests.exceptions.RequestException as e:
92
  return {"error": f"API request failed: {str(e)}"}
93
  except json.JSONDecodeError:
94
  return {"error": "Failed to parse API response"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  with gr.Blocks(title="Video to Audio to Transcription") as app:
97
  gr.Markdown("# Video => Audio => Transcription")
 
69
  else:
70
  return None, message, None, "Audio extraction failed, cannot transcribe"
71
 
72
+
73
+ def transcribe_audio(audio_path, api_key, model_id="elevenlabs_1"):
74
+
75
+ start_time = time.time()
76
+
77
  if not api_key:
78
  return {"error": "Please provide an API key"}
79
 
80
  url = "https://api.elevenlabs.io/v1/speech-to-text"
81
  headers = {
82
+ "xi-api-key": api_key,
83
+ "Accept": "application/json"
84
  }
85
 
86
  try:
87
+ with open(audio_path, "rb") as f:
88
  files = {
89
+ "file": (os.path.basename(audio_path), f, "audio/mpeg"),
90
  "model_id": (None, model_id)
91
  }
92
+
93
+ # Use requests.post with explicit content type handling
94
+ response = requests.post(
95
+ url,
96
+ headers=headers,
97
+ files=files
98
+ )
99
+
100
+ # Check for specific HTTP errors
101
+ if response.status_code == 401:
102
+ return {"error": "Unauthorized. Please check your API key."}
103
+ if response.status_code == 422:
104
+ return {"error": "Unprocessable Entity. Check file format or API usage."}
105
+
106
  response.raise_for_status()
107
  result = response.json()
 
108
  except requests.exceptions.RequestException as e:
109
  return {"error": f"API request failed: {str(e)}"}
110
  except json.JSONDecodeError:
111
  return {"error": "Failed to parse API response"}
112
+
113
+ end_time = time.time()
114
+ processing_time = end_time - start_time
115
+
116
+ file_size = os.path.getsize(audio_path) / (1024 * 1024)
117
+
118
+ try:
119
+ audio_data, sample_rate = sf.read(audio_path)
120
+ audio_duration = len(audio_data) / sample_rate
121
+ except:
122
+ try:
123
+ import librosa
124
+ audio_duration = librosa.get_duration(filename=audio_path)
125
+ except:
126
+ audio_duration = 0
127
+
128
+ # Ensure text is extracted correctly from the new response format
129
+ text = result.get('text', '')
130
+
131
+ return {
132
+ "service": "ElevenLabs",
133
+ "text": text,
134
+ "processing_time": processing_time,
135
+ "file_size_mb": file_size,
136
+ "audio_duration": audio_duration,
137
+ "real_time_factor": processing_time / audio_duration if audio_duration > 0 else None,
138
+ "processing_speed": audio_duration / processing_time if audio_duration > 0 else None,
139
+ "raw_response": result,
140
+ "language_code": result.get('language_code'),
141
+ "language_probability": result.get('language_probability')
142
+ }
143
 
144
  with gr.Blocks(title="Video to Audio to Transcription") as app:
145
  gr.Markdown("# Video => Audio => Transcription")