Spaces:

JaganathC
/

Video_Summ

Running

App Files Files Community

JaganathC commited on Mar 15

Commit

236b4e0

verified ·

1 Parent(s): a3c5aa1

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -114

app.py CHANGED Viewed

@@ -4,169 +4,129 @@ import yt_dlp
 import os
 import subprocess
 import json
 import time
 import langdetect
 import uuid
-from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
-# Load Hugging Face Token
-HF_TOKEN = os.getenv("HF_TOKEN")
-print("Starting the program...")
 model_path = "Qwen/Qwen2.5-7B-Instruct"
-# **Efficient Model Loading**
-bnb_config = BitsAndBytesConfig(load_in_8bit=True)  # Use 8-bit precision to reduce memory usage
-device = "cuda" if torch.cuda.is_available() else "cpu"
-print(f"Using device: {device}")
 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(
-    model_path,
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-    quantization_config=bnb_config,  # Load in 8-bit to save memory
-    trust_remote_code=True
-).to(device).eval()
 print("Model successfully loaded.")
 def generate_unique_filename(extension):
     return f"{uuid.uuid4()}{extension}"
 def cleanup_files(*files):
     for file in files:
         if file and os.path.exists(file):
             os.remove(file)
             print(f"Removed file: {file}")
 def download_youtube_audio(url):
-    """Downloads audio from a YouTube video and converts it to WAV format."""
-    print(f"Downloading audio from YouTube: {url}")
     output_path = generate_unique_filename(".wav")
     ydl_opts = {
         'format': 'bestaudio/best',
-        'postprocessors': [{
-            'key': 'FFmpegExtractAudio',
-            'preferredcodec': 'wav',
-            'preferredquality': '192',
-        }],
-        'outtmpl': output_path[:-4]  # Remove .wav to prevent duplication
     }
-    try:
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            ydl.download([url])
-        return output_path if os.path.exists(output_path) else "Download Failed"
-    except Exception as e:
-        return f"Error downloading audio: {str(e)}"
 def transcribe_audio(file_path):
-    """Transcribes audio using `insanely-fast-whisper` and handles large files efficiently."""
-    print(f"Starting transcription of file: {file_path}")
-    temp_audio = None
     if file_path.endswith(('.mp4', '.avi', '.mov', '.flv')):
-        print("Video file detected. Extracting audio using ffmpeg...")
-        temp_audio = generate_unique_filename(".wav")
-        command = ["ffmpeg", "-i", file_path, "-q:a", "0", "-map", "a", temp_audio]
-        subprocess.run(command, check=True)
-        file_path = temp_audio  # Use extracted audio file
     output_file = generate_unique_filename(".json")
     command = [
-        "insanely-fast-whisper",
-        "--file-name", file_path,
-        "--device-id", "0",
-        "--model-name", "openai/whisper-large-v3",
-        "--task", "transcribe",
-        "--timestamp", "chunk",
         "--transcript-path", output_file
     ]
-    try:
-        subprocess.run(command, check=True)
-    except Exception as e:
-        return f"Error in transcription: {str(e)}"
-    # Process the JSON file in chunks to avoid memory overflow
-    result = []
-    try:
-        with open(output_file, "r") as f:
-            data = json.load(f)  # Load full JSON safely
-        result = [chunk.get("text", "") for chunk in data]
-    except Exception as e:
-        return f"Error reading transcription file: {str(e)}"
-    cleanup_files(output_file)
-    if temp_audio:
-        cleanup_files(temp_audio)
-    return " ".join(result)[:500000]  # Limit transcription size
-def generate_summary_stream(transcription):
-    """Summarizes the transcription efficiently to avoid memory overflow."""
-    if not transcription:
-        return "No transcription available."
-    detected_language = langdetect.detect(transcription[:1000])  # Detect using a smaller portion
-    # Use smaller chunks for processing
-    chunk_size = 1000  # Reduce chunk size
-    transcript_chunks = [transcription[i:i+chunk_size] for i in range(0, len(transcription), chunk_size)]
-    summary_result = []
-    for chunk in transcript_chunks[:5]:  # Process only the first 5 chunks
-        prompt = f"""Summarize the following video transcription in 150-300 words in {detected_language}:\n{chunk}"""
-        try:
-            input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
-            output_ids = model.generate(input_ids, max_length=300)  # Limit output size
-            response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-        except Exception as e:
-            response = f"Error generating summary: {str(e)}"
-        summary_result.append(response)
-    return "\n\n".join(summary_result)
 def process_youtube(url):
-    """Handles YouTube video processing: downloads audio, transcribes it, and cleans up."""
     if not url:
-        return "Please enter a YouTube URL.", None
     audio_file = download_youtube_audio(url)
-    if "Error" in audio_file or audio_file == "Download Failed":
-        return audio_file, None
-    transcription = transcribe_audio(audio_file)
-    cleanup_files(audio_file)  # Clean up the downloaded file
-    return transcription, None
 def process_uploaded_video(video_path):
-    """Processes uploaded video file for transcription."""
-    transcription = transcribe_audio(video_path)
-    return transcription, None
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
-    # 🎥 Video Transcription and Smart Summary
-    Upload a video or provide a YouTube link to get a transcription and AI-generated summary.
     """)
     with gr.Tabs():
         with gr.TabItem("📤 Video Upload"):
-            video_input = gr.Video()
             video_button = gr.Button("🚀 Process Video")
         with gr.TabItem("🔗 YouTube Link"):
-            url_input = gr.Textbox(placeholder="https://www.youtube.com/watch?v=...")
             url_button = gr.Button("🚀 Process URL")
     transcription_output = gr.Textbox(label="📝 Transcription", lines=10, show_copy_button=True)
     summary_output = gr.Textbox(label="📊 Summary", lines=10, show_copy_button=True)
     summary_button = gr.Button("📝 Generate Summary")
     video_button.click(process_uploaded_video, inputs=[video_input], outputs=[transcription_output, summary_output])
     url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output])
-    summary_button.click(generate_summary_stream, inputs=[transcription_output], outputs=[summary_output])
-demo.launch(share=True, debug=True, queue=True)

 import os
 import subprocess
 import json
+import moviepy.editor as mp
 import time
 import langdetect
 import uuid
+from transformers import AutoTokenizer, AutoModelForCausalLM
+# Load Hugging Face Model
+HF_TOKEN = os.environ.get("HF_TOKEN")
 model_path = "Qwen/Qwen2.5-7B-Instruct"
+print(f"Loading model {model_path}...")
 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, trust_remote_code=True).cuda()
+model = model.eval()
 print("Model successfully loaded.")
+# Generate unique filenames
 def generate_unique_filename(extension):
     return f"{uuid.uuid4()}{extension}"
+# Cleanup temporary files
 def cleanup_files(*files):
     for file in files:
         if file and os.path.exists(file):
             os.remove(file)
             print(f"Removed file: {file}")
+# Extract audio from video
+def extract_audio(video_path):
+    audio_path = generate_unique_filename(".wav")
+    try:
+        video = mp.VideoFileClip(video_path)
+        video.audio.write_audiofile(audio_path)
+        return audio_path
+    except Exception as e:
+        print(f"Error extracting audio: {e}")
+        return None
+# Download YouTube audio
 def download_youtube_audio(url):
     output_path = generate_unique_filename(".wav")
     ydl_opts = {
         'format': 'bestaudio/best',
+        'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'wav'}],
+        'outtmpl': output_path,
+        'keepvideo': True,
     }
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        ydl.download([url])
+    return output_path if os.path.exists(output_path) else None
+# Transcribe audio using Whisper
 def transcribe_audio(file_path):
     if file_path.endswith(('.mp4', '.avi', '.mov', '.flv')):
+        file_path = extract_audio(file_path)
+        if not file_path:
+            return "Audio extraction failed.", None
     output_file = generate_unique_filename(".json")
     command = [
+        "insanely-fast-whisper", "--file-name", file_path,
+        "--device-id", "cpu", "--model-name", "openai/whisper-large-v3",
+        "--task", "transcribe", "--timestamp", "chunk",
         "--transcript-path", output_file
     ]
+    result = subprocess.run(command, capture_output=True, text=True)
+    if result.returncode != 0:
+        return f"Transcription failed: {result.stderr}", None
+    if not os.path.exists(output_file):
+        return "Transcription file missing.", None
+    with open(output_file, "r") as f:
+        transcription = json.load(f)
+    text = transcription.get("text", " ".join([chunk["text"] for chunk in transcription.get("chunks", [])]))
+    cleanup_files(output_file, file_path)
+    return text, None
+# Generate summary using Qwen Model
+def generate_summary(transcription):
+    detected_language = langdetect.detect(transcription)
+    prompt = f"""Summarize the following transcription in 150-300 words:
+    Language: {detected_language}
+    {transcription[:100000]}"""
+    response, _ = model.chat(tokenizer, prompt, history=[])
+    return response
+# Process YouTube video
 def process_youtube(url):
     if not url:
+        return "Please enter a valid YouTube URL.", None
     audio_file = download_youtube_audio(url)
+    return transcribe_audio(audio_file) if audio_file else ("Download failed.", None)
+# Process uploaded video
 def process_uploaded_video(video_path):
+    return transcribe_audio(video_path)
+# Gradio Interface
+demo = gr.Blocks()
+with demo:
     gr.Markdown("""
+        # 🎥 AI Video Transcription & Summary
+        Upload a video or provide a YouTube link to get a transcription and AI-generated summary.
     """)
     with gr.Tabs():
         with gr.TabItem("📤 Video Upload"):
+            video_input = gr.File(label="Upload a video file")
             video_button = gr.Button("🚀 Process Video")
         with gr.TabItem("🔗 YouTube Link"):
+            url_input = gr.Textbox(label="Paste YouTube URL")
             url_button = gr.Button("🚀 Process URL")
     transcription_output = gr.Textbox(label="📝 Transcription", lines=10, show_copy_button=True)
     summary_output = gr.Textbox(label="📊 Summary", lines=10, show_copy_button=True)
     summary_button = gr.Button("📝 Generate Summary")
     video_button.click(process_uploaded_video, inputs=[video_input], outputs=[transcription_output, summary_output])
     url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output])
+    summary_button.click(generate_summary, inputs=[transcription_output], outputs=[summary_output])
+demo.launch()