import os import requests import uuid import subprocess import time def extract_audio(video_path, output_format="mp3"): if not video_path: return None, "No video provided" output_path = f"audio_{uuid.uuid4().hex[:6]}.{output_format}" try: cmd = [ "ffmpeg", "-i", video_path, "-vn", "-c:a", "libmp3lame" if output_format == "mp3" else output_format, "-q:a", "9", "-ac", "1", "-ar", "12000", "-y", output_path ] subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if os.path.exists(output_path): return output_path, "Audio extracted" else: return None, "Audio extraction failed" except Exception as e: return None, f"Error: {str(e)}" def transcribe_audio(audio_path, api_key): if not api_key: return {"error": "API key required"} url = "https://api.elevenlabs.io/v1/speech-to-text" headers = {"xi-api-key": api_key} try: with open(audio_path, "rb") as file: response = requests.post( url, headers=headers, files={"file": file, "model_id": (None, "scribe_v1")}, timeout=120 ) if response.status_code == 200: return response.json() else: return {"error": f"API error: {response.status_code}", "text": ""} except Exception as e: return {"error": f"Request failed: {str(e)}", "text": ""} def process_video_file(video_file, output_format, elevenlabs_api_key, model_id="scribe_v1"): print("Starting fast video processing...") start = time.time() if video_file is None: return None, "Please upload a video file", None, "No video provided", None audio_path, message = extract_audio(video_file, output_format) if not audio_path: print(f"Audio extraction failed: {message}") return None, message, None, "Audio extraction failed", None print(f"Audio extracted in {time.time() - start:.2f}s. Transcribing...") transcription = transcribe_audio(audio_path, elevenlabs_api_key) if "error" in transcription: print(f"Transcription error: {transcription['error']}") return audio_path, message, None, transcription["error"], None transcript_text = transcription.get("text", "") transcript_file = f"transcript_{uuid.uuid4().hex[:6]}.txt" try: with open(transcript_file, "w", encoding="utf-8") as f: f.write(transcript_text) transcript_message = "Transcription saved successfully" except Exception as e: transcript_file = None transcript_message = f"Error saving transcription: {str(e)}" print(f"Total video processing time: {time.time() - start:.2f}s") return audio_path, message, transcript_file, transcript_message, transcript_text