Spaces:

MrSimple01
/

1_full_demo_webinarium

Sleeping

File size: 3,049 Bytes

b0ba8c2

import os
import requests
import uuid
import subprocess
import time

def extract_audio(video_path, output_format="mp3"):
    if not video_path:
        return None, "No video provided"
    
    output_path = f"audio_{uuid.uuid4().hex[:6]}.{output_format}"
    
    try:
        cmd = [
            "ffmpeg",
            "-i", video_path,
            "-vn",
            "-c:a", "libmp3lame" if output_format == "mp3" else output_format,
            "-q:a", "9",
            "-ac", "1",
            "-ar", "12000",
            "-y",
            output_path
        ]
        
        subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        
        if os.path.exists(output_path):
            return output_path, "Audio extracted"
        else:
            return None, "Audio extraction failed"
    except Exception as e:
        return None, f"Error: {str(e)}"

def transcribe_audio(audio_path, api_key):
    if not api_key:
        return {"error": "API key required"}
    
    url = "https://api.elevenlabs.io/v1/speech-to-text"
    headers = {"xi-api-key": api_key}
    
    try:
        with open(audio_path, "rb") as file:
            response = requests.post(
                url, 
                headers=headers,
                files={"file": file, "model_id": (None, "scribe_v1")},
                timeout=120
            )
            
            if response.status_code == 200:
                return response.json()
            else:
                return {"error": f"API error: {response.status_code}", "text": ""}
    except Exception as e:
        return {"error": f"Request failed: {str(e)}", "text": ""}

def process_video_file(video_file, output_format, elevenlabs_api_key, model_id="scribe_v1"):
    print("Starting fast video processing...")
    start = time.time()
    
    if video_file is None:
        return None, "Please upload a video file", None, "No video provided", None
    
    audio_path, message = extract_audio(video_file, output_format)
    if not audio_path:
        print(f"Audio extraction failed: {message}")
        return None, message, None, "Audio extraction failed", None
    
    print(f"Audio extracted in {time.time() - start:.2f}s. Transcribing...")
    transcription = transcribe_audio(audio_path, elevenlabs_api_key)
    
    if "error" in transcription:
        print(f"Transcription error: {transcription['error']}")
        return audio_path, message, None, transcription["error"], None
    
    transcript_text = transcription.get("text", "")
    transcript_file = f"transcript_{uuid.uuid4().hex[:6]}.txt"
    
    try:
        with open(transcript_file, "w", encoding="utf-8") as f:
            f.write(transcript_text)
        transcript_message = "Transcription saved successfully"
    except Exception as e:
        transcript_file = None
        transcript_message = f"Error saving transcription: {str(e)}"
    
    print(f"Total video processing time: {time.time() - start:.2f}s")
    return audio_path, message, transcript_file, transcript_message, transcript_text