Spaces:
Sleeping
Sleeping
File size: 3,049 Bytes
b0ba8c2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import os
import requests
import uuid
import subprocess
import time
def extract_audio(video_path, output_format="mp3"):
if not video_path:
return None, "No video provided"
output_path = f"audio_{uuid.uuid4().hex[:6]}.{output_format}"
try:
cmd = [
"ffmpeg",
"-i", video_path,
"-vn",
"-c:a", "libmp3lame" if output_format == "mp3" else output_format,
"-q:a", "9",
"-ac", "1",
"-ar", "12000",
"-y",
output_path
]
subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if os.path.exists(output_path):
return output_path, "Audio extracted"
else:
return None, "Audio extraction failed"
except Exception as e:
return None, f"Error: {str(e)}"
def transcribe_audio(audio_path, api_key):
if not api_key:
return {"error": "API key required"}
url = "https://api.elevenlabs.io/v1/speech-to-text"
headers = {"xi-api-key": api_key}
try:
with open(audio_path, "rb") as file:
response = requests.post(
url,
headers=headers,
files={"file": file, "model_id": (None, "scribe_v1")},
timeout=120
)
if response.status_code == 200:
return response.json()
else:
return {"error": f"API error: {response.status_code}", "text": ""}
except Exception as e:
return {"error": f"Request failed: {str(e)}", "text": ""}
def process_video_file(video_file, output_format, elevenlabs_api_key, model_id="scribe_v1"):
print("Starting fast video processing...")
start = time.time()
if video_file is None:
return None, "Please upload a video file", None, "No video provided", None
audio_path, message = extract_audio(video_file, output_format)
if not audio_path:
print(f"Audio extraction failed: {message}")
return None, message, None, "Audio extraction failed", None
print(f"Audio extracted in {time.time() - start:.2f}s. Transcribing...")
transcription = transcribe_audio(audio_path, elevenlabs_api_key)
if "error" in transcription:
print(f"Transcription error: {transcription['error']}")
return audio_path, message, None, transcription["error"], None
transcript_text = transcription.get("text", "")
transcript_file = f"transcript_{uuid.uuid4().hex[:6]}.txt"
try:
with open(transcript_file, "w", encoding="utf-8") as f:
f.write(transcript_text)
transcript_message = "Transcription saved successfully"
except Exception as e:
transcript_file = None
transcript_message = f"Error saving transcription: {str(e)}"
print(f"Total video processing time: {time.time() - start:.2f}s")
return audio_path, message, transcript_file, transcript_message, transcript_text |