MrSimple01 commited on
Commit
7f92889
·
verified ·
1 Parent(s): 514a813

Update src/video_processing.py

Browse files
Files changed (1) hide show
  1. src/video_processing.py +68 -25
src/video_processing.py CHANGED
@@ -3,6 +3,14 @@ import requests
3
  import uuid
4
  import subprocess
5
  import time
 
 
 
 
 
 
 
 
6
 
7
  def extract_audio_from_video(video_path, output_format="mp3"):
8
  if not video_path:
@@ -31,44 +39,44 @@ def extract_audio_from_video(video_path, output_format="mp3"):
31
  except Exception as e:
32
  raise Exception(f"Error extracting audio: {str(e)}")
33
 
34
- def transcribe_audio(audio_path, api_key, model_id="scribe_v1"):
35
- if not api_key:
36
- raise Exception("API key required")
37
-
38
- url = "https://api.elevenlabs.io/v1/speech-to-text"
39
- headers = {"xi-api-key": api_key}
40
 
41
  try:
 
 
 
42
  with open(audio_path, "rb") as file:
 
 
 
43
  response = requests.post(
44
- url,
45
  headers=headers,
46
- files={"file": file, "model_id": (None, model_id)},
 
47
  timeout=120
48
  )
49
 
50
  if response.status_code == 200:
51
  result = response.json()
52
- transcript_text = result.get("text", "")
53
 
54
- # Save transcript to file
55
- transcript_file = f"transcript_{uuid.uuid4().hex[:6]}.txt"
56
- with open(transcript_file, "w", encoding="utf-8") as f:
57
- f.write(transcript_text)
58
-
59
- return transcript_text, transcript_file, "Transcription completed successfully"
60
  else:
61
- raise Exception(f"API error: {response.status_code}")
62
  except Exception as e:
63
- raise Exception(f"Transcription failed: {str(e)}")
64
 
65
  def process_video_file(video_path, audio_format, elevenlabs_api_key, model_id, gemini_api_key, language, content_type):
66
  try:
67
- print("Starting video processing...")
68
- start = time.time()
69
-
70
  audio_path = extract_audio_from_video(video_path, audio_format)
71
- print(f"Audio extracted in {time.time() - start:.2f}s. Transcribing...")
72
 
73
  transcription, transcript_path, transcription_status = transcribe_audio(
74
  audio_path,
@@ -79,8 +87,6 @@ def process_video_file(video_path, audio_format, elevenlabs_api_key, model_id, g
79
  if not transcription:
80
  return audio_path, "Audio extracted, but transcription failed", None, transcription_status, None, None, None
81
 
82
- print(f"Transcription completed in {time.time() - start:.2f}s. Analyzing content...")
83
-
84
  # Generate summary or quiz from transcription
85
  formatted_output, json_path, txt_path = analyze_document(
86
  transcription,
@@ -89,8 +95,45 @@ def process_video_file(video_path, audio_format, elevenlabs_api_key, model_id, g
89
  content_type
90
  )
91
 
92
- print(f"Total processing time: {time.time() - start:.2f}s")
93
  return audio_path, "Processing completed successfully", transcript_path, transcription_status, formatted_output, txt_path, json_path
94
  except Exception as e:
95
  error_message = f"Error processing video: {str(e)}"
96
- return None, error_message, None, error_message, error_message, None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import uuid
4
  import subprocess
5
  import time
6
+ import os
7
+ import tempfile
8
+ import subprocess
9
+ from typing import Optional, Tuple, List
10
+ import pytube
11
+ import docx
12
+ import PyPDF2
13
+ import re
14
 
15
  def extract_audio_from_video(video_path, output_format="mp3"):
16
  if not video_path:
 
39
  except Exception as e:
40
  raise Exception(f"Error extracting audio: {str(e)}")
41
 
42
+ def transcribe_audio(audio_path, elevenlabs_api_key, model_id="scribe_v1"):
43
+ import requests
44
+ import tempfile
 
 
 
45
 
46
  try:
47
+ url = "https://api.elevenlabs.io/v1/speech-to-text"
48
+ headers = {"xi-api-key": elevenlabs_api_key}
49
+
50
  with open(audio_path, "rb") as file:
51
+ files = {"file": file}
52
+ data = {"model_id": model_id}
53
+
54
  response = requests.post(
55
+ url,
56
  headers=headers,
57
+ files=files,
58
+ data=data,
59
  timeout=120
60
  )
61
 
62
  if response.status_code == 200:
63
  result = response.json()
64
+ transcription = result.get('text', '')
65
 
66
+ # Save transcription to file
67
+ transcript_path = tempfile.mktemp(suffix='.txt')
68
+ with open(transcript_path, 'w', encoding='utf-8') as f:
69
+ f.write(transcription)
70
+
71
+ return transcription, transcript_path, "Transcription completed successfully"
72
  else:
73
+ return None, None, f"Transcription failed: {response.text}"
74
  except Exception as e:
75
+ return None, None, f"Transcription error: {str(e)}"
76
 
77
  def process_video_file(video_path, audio_format, elevenlabs_api_key, model_id, gemini_api_key, language, content_type):
78
  try:
 
 
 
79
  audio_path = extract_audio_from_video(video_path, audio_format)
 
80
 
81
  transcription, transcript_path, transcription_status = transcribe_audio(
82
  audio_path,
 
87
  if not transcription:
88
  return audio_path, "Audio extracted, but transcription failed", None, transcription_status, None, None, None
89
 
 
 
90
  # Generate summary or quiz from transcription
91
  formatted_output, json_path, txt_path = analyze_document(
92
  transcription,
 
95
  content_type
96
  )
97
 
 
98
  return audio_path, "Processing completed successfully", transcript_path, transcription_status, formatted_output, txt_path, json_path
99
  except Exception as e:
100
  error_message = f"Error processing video: {str(e)}"
101
+ return None, error_message, None, error_message, error_message, None, None
102
+
103
+
104
+
105
+ def process_youtube_video(youtube_url, audio_format, elevenlabs_api_key, model_id, gemini_api_key, language, content_type):
106
+ try:
107
+ yt = pytube.YouTube(youtube_url)
108
+ stream = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
109
+
110
+ if not stream:
111
+ raise Exception("No suitable video stream found")
112
+
113
+ video_path = tempfile.mktemp(suffix='.mp4')
114
+ stream.download(filename=video_path)
115
+
116
+ audio_path = extract_audio_from_video(video_path, audio_format)
117
+
118
+ transcription, transcript_path, transcription_status = transcribe_audio(
119
+ audio_path,
120
+ elevenlabs_api_key,
121
+ model_id
122
+ )
123
+
124
+ if not transcription:
125
+ return audio_path, "Audio extracted, but transcription failed", None, transcription_status, None, None, None
126
+
127
+ # Generate summary or quiz from transcription
128
+ formatted_output, json_path, txt_path = analyze_document(
129
+ transcription,
130
+ gemini_api_key,
131
+ language,
132
+ content_type
133
+ )
134
+
135
+ return audio_path, "Processing completed successfully", transcript_path, transcription_status, formatted_output, txt_path, json_path
136
+ except Exception as e:
137
+ error_message = f"Error processing YouTube video: {str(e)}"
138
+ return None, error_message, None, error_message, error_message, None, None
139
+