Spaces:
Sleeping
Sleeping
import os | |
import requests | |
import uuid | |
import subprocess | |
import time | |
def extract_audio(video_path, output_format="mp3"): | |
if not video_path: | |
return None, "No video provided" | |
output_path = f"audio_{uuid.uuid4().hex[:6]}.{output_format}" | |
try: | |
cmd = [ | |
"ffmpeg", | |
"-i", video_path, | |
"-vn", | |
"-c:a", "libmp3lame" if output_format == "mp3" else output_format, | |
"-q:a", "9", | |
"-ac", "1", | |
"-ar", "12000", | |
"-y", | |
output_path | |
] | |
subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
if os.path.exists(output_path): | |
return output_path, "Audio extracted" | |
else: | |
return None, "Audio extraction failed" | |
except Exception as e: | |
return None, f"Error: {str(e)}" | |
def transcribe_audio(audio_path, api_key): | |
if not api_key: | |
return {"error": "API key required"} | |
url = "https://api.elevenlabs.io/v1/speech-to-text" | |
headers = {"xi-api-key": api_key} | |
try: | |
with open(audio_path, "rb") as file: | |
response = requests.post( | |
url, | |
headers=headers, | |
files={"file": file, "model_id": (None, "scribe_v1")}, | |
timeout=120 | |
) | |
if response.status_code == 200: | |
return response.json() | |
else: | |
return {"error": f"API error: {response.status_code}", "text": ""} | |
except Exception as e: | |
return {"error": f"Request failed: {str(e)}", "text": ""} | |
def process_video_file(video_file, output_format, elevenlabs_api_key, model_id="scribe_v1"): | |
print("Starting fast video processing...") | |
start = time.time() | |
if video_file is None: | |
return None, "Please upload a video file", None, "No video provided", None | |
audio_path, message = extract_audio(video_file, output_format) | |
if not audio_path: | |
print(f"Audio extraction failed: {message}") | |
return None, message, None, "Audio extraction failed", None | |
print(f"Audio extracted in {time.time() - start:.2f}s. Transcribing...") | |
transcription = transcribe_audio(audio_path, elevenlabs_api_key) | |
if "error" in transcription: | |
print(f"Transcription error: {transcription['error']}") | |
return audio_path, message, None, transcription["error"], None | |
transcript_text = transcription.get("text", "") | |
transcript_file = f"transcript_{uuid.uuid4().hex[:6]}.txt" | |
try: | |
with open(transcript_file, "w", encoding="utf-8") as f: | |
f.write(transcript_text) | |
transcript_message = "Transcription saved successfully" | |
except Exception as e: | |
transcript_file = None | |
transcript_message = f"Error saving transcription: {str(e)}" | |
print(f"Total video processing time: {time.time() - start:.2f}s") | |
return audio_path, message, transcript_file, transcript_message, transcript_text |