VPCSinfo commited on
Commit
41a98e7
·
1 Parent(s): f1018d4

Add language-aware summarization and DOCX generation

Browse files
Files changed (3) hide show
  1. .gitignore +65 -12
  2. app.py +104 -22
  3. tool.py +244 -52
.gitignore CHANGED
@@ -1,18 +1,71 @@
1
- .ipynb_checkpoints/
2
- __pycache__/
3
- *.py[cod]
4
- *$py.class
 
 
5
  .env
 
 
 
 
6
  venv/
 
 
 
 
7
  __pycache__/
8
- *.pyc
9
- *.pyo
10
  *$py.class
11
- *~
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  *.swp
 
 
 
 
 
 
13
  *.log
14
- /instance
15
- # Byte-compiled / optimized / DLL files
16
- __pycache__/
17
- *.py[cod]
18
- *$py.class
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project-specific directories
2
+ Documents/
3
+ Image/
4
+ Images/
5
+
6
+ # Environment variables
7
  .env
8
+ .env.*
9
+
10
+ # Python virtual environments
11
+ .venv/
12
  venv/
13
+ ENV/
14
+ env/
15
+
16
+ # Python bytecode
17
  __pycache__/
18
+ *.py[cod]
 
19
  *$py.class
20
+ *.so
21
+ .Python
22
+
23
+ # Distribution / packaging
24
+ dist/
25
+ build/
26
+ *.egg-info/
27
+ *.egg
28
+
29
+ # Unit test / coverage reports
30
+ htmlcov/
31
+ .tox/
32
+ .coverage
33
+ .coverage.*
34
+ .cache
35
+ nosetests.xml
36
+ coverage.xml
37
+ *.cover
38
+ .hypothesis/
39
+ .pytest_cache/
40
+
41
+ # Jupyter Notebook
42
+ .ipynb_checkpoints
43
+
44
+ # IDE specific files
45
+ .idea/
46
+ .vscode/
47
  *.swp
48
+ *.swo
49
+ *~
50
+ .DS_Store
51
+
52
+ # Logs
53
+ logs/
54
  *.log
55
+
56
+ # Local development settings
57
+ instance/
58
+ .webassets-cache
59
+
60
+ # Dependency directories
61
+ node_modules/
62
+
63
+ # Compiled Python modules
64
+ *.pyc
65
+ *.pyo
66
+ *.pyd
67
+
68
+ # Temporary files
69
+ *.bak
70
+ *.tmp
71
+ *.temp
app.py CHANGED
@@ -1,33 +1,115 @@
1
  import gradio as gr
2
- from tool import YouTubeTranscriptExtractor, TranscriptSummarizer
 
 
 
 
 
3
 
4
  youtube_tool = YouTubeTranscriptExtractor()
 
5
  #summarizer_tool = TranscriptSummarizer()
6
 
7
- def process_youtube_video(video_url, hf_api_key):
 
8
  summarizer_tool = TranscriptSummarizer(hf_api_key=hf_api_key)
9
- transcript = youtube_tool.forward(video_url=video_url)
10
- summary_and_blog = summarizer_tool.forward(transcript=transcript)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  try:
12
- summary, image_url = summary_and_blog.split("\n\nImage URL: ")
13
- except ValueError:
 
 
 
 
14
  summary = summary_and_blog
15
  image_url = None
16
- return transcript, summary, image_url
17
-
18
- iface = gr.Interface(
19
- fn=process_youtube_video,
20
- inputs=[
21
- gr.Textbox(label="YouTube Video URL"),
22
- gr.Textbox(label="Hugging Face API Key", type="password")
23
- ],
24
- outputs=[
25
- gr.Textbox(label="Transcript"),
26
- gr.Textbox(label="Summary and Blog Content"),
27
- gr.Image(label="Generated Image", image_mode="RGBA")
28
- ],
29
- title="YouTube Transcript Summarizer and Blog Content Generator",
30
- description="Enter a YouTube video URL and Hugging Face API Key to extract the transcript, summarize it, and generate blog content with an image."
31
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  iface.launch()
 
1
  import gradio as gr
2
+ import os
3
+ from dotenv import load_dotenv
4
+ from tool import YouTubeTranscriptExtractor, TranscriptSummarizer, TranscriptToDocx
5
+
6
+ # Load environment variables
7
+ load_dotenv()
8
 
9
  youtube_tool = YouTubeTranscriptExtractor()
10
+ docx_tool = TranscriptToDocx()
11
  #summarizer_tool = TranscriptSummarizer()
12
 
13
+ def process_youtube_video(video_url, hf_api_key, existing_docx_path=None):
14
+ # Initialize tools
15
  summarizer_tool = TranscriptSummarizer(hf_api_key=hf_api_key)
16
+
17
+ # Get video title
18
+ from pytubefix import YouTube
19
+ try:
20
+ yt = YouTube(video_url)
21
+ video_title = yt.title
22
+ except Exception:
23
+ video_title = "YouTube Video"
24
+
25
+ # Extract transcript and detect language
26
+ transcript_result = youtube_tool.forward(video_url=video_url)
27
+
28
+ # Parse the formatted string response
29
+ # Format: "LANGUAGE:lang||transcript_text"
30
+ try:
31
+ if "LANGUAGE:" in transcript_result and "||" in transcript_result:
32
+ parts = transcript_result.split("||", 1)
33
+ language = parts[0].replace("LANGUAGE:", "").strip()
34
+ transcript = parts[1]
35
+ print(f"Detected language: {language}")
36
+ else:
37
+ # Fallback if we didn't get the expected format
38
+ transcript = transcript_result
39
+ language = "en"
40
+ print("Warning: Could not detect language, using English as default")
41
+ except Exception as e:
42
+ transcript = transcript_result if isinstance(transcript_result, str) else "Error extracting transcript"
43
+ language = "en"
44
+ print(f"Warning: Error parsing transcript data: {str(e)}, using English as default")
45
+
46
+ # Generate summary and get image URL
47
+ summary_and_blog = summarizer_tool.forward(transcript=transcript, language=language)
48
  try:
49
+ if "\n\nImage URL: " in summary_and_blog:
50
+ summary, image_url = summary_and_blog.split("\n\nImage URL: ")
51
+ else:
52
+ summary = summary_and_blog
53
+ image_url = None
54
+ except Exception:
55
  summary = summary_and_blog
56
  image_url = None
57
+
58
+ # Generate or update DOCX file
59
+ # Handle the file path from Gradio
60
+ docx_file_path = None
61
+ if existing_docx_path is not None and existing_docx_path != "" and existing_docx_path != []:
62
+ # If it's a temporary file path from Gradio
63
+ if isinstance(existing_docx_path, str) and os.path.exists(existing_docx_path):
64
+ docx_file_path = existing_docx_path
65
+ # If it's a file object from Gradio
66
+ elif hasattr(existing_docx_path, 'name') and os.path.exists(existing_docx_path.name):
67
+ docx_file_path = existing_docx_path.name
68
+ # If it's a list (Gradio sometimes returns a list for file components)
69
+ elif isinstance(existing_docx_path, list) and len(existing_docx_path) > 0 and existing_docx_path[0] is not None:
70
+ if isinstance(existing_docx_path[0], str) and os.path.exists(existing_docx_path[0]):
71
+ docx_file_path = existing_docx_path[0]
72
+ elif hasattr(existing_docx_path[0], 'name') and os.path.exists(existing_docx_path[0].name):
73
+ docx_file_path = existing_docx_path[0].name
74
+
75
+ docx_path = docx_tool.forward(
76
+ transcript=transcript,
77
+ summary=summary,
78
+ video_title=video_title,
79
+ image_path=image_url,
80
+ existing_docx_path=docx_file_path
81
+ )
82
+
83
+ return transcript, summary, image_url, docx_path
84
+
85
+ with gr.Blocks() as demo:
86
+ gr.Markdown("# YouTube Transcript Summarizer and Blog Content Generator")
87
+ gr.Markdown("Enter a YouTube video URL and Hugging Face API Key to extract the transcript, summarize it, and generate blog content with an image and DOCX file. Optionally, you can provide an existing DOCX file to update.")
88
+
89
+ # Check if Gemini API key is set
90
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
91
+ if not gemini_api_key or gemini_api_key == "your_gemini_api_key_here":
92
+ gr.Markdown("⚠️ **Warning**: Gemini API key is not set in the .env file. Please add your Gemini API key to the .env file to use the summarization feature.")
93
+
94
+ with gr.Row():
95
+ with gr.Column():
96
+ video_url = gr.Textbox(label="YouTube Video URL")
97
+ hf_api_key = gr.Textbox(label="Hugging Face API Key", type="password")
98
+ existing_docx = gr.File(label="Existing DOCX file (optional)", file_types=[".docx"])
99
+ submit_btn = gr.Button("Process Video")
100
+
101
+ with gr.Column():
102
+ transcript_output = gr.Textbox(label="Transcript")
103
+ summary_output = gr.Textbox(label="Summary and Blog Content")
104
+ image_output = gr.Image(label="Generated Image", image_mode="RGBA")
105
+ docx_output = gr.File(label="Generated DOCX File")
106
+
107
+ submit_btn.click(
108
+ fn=process_youtube_video,
109
+ inputs=[video_url, hf_api_key, existing_docx],
110
+ outputs=[transcript_output, summary_output, image_output, docx_output]
111
+ )
112
+
113
+ iface = demo
114
 
115
  iface.launch()
tool.py CHANGED
@@ -1,71 +1,171 @@
1
  from smolagents.tools import Tool
2
- from typing import Optional
3
  import os
4
- from transformers import pipeline
5
  import requests
6
  import io
7
  from PIL import Image
8
  from pytubefix import YouTube
9
- #from dotenv import load_dotenv
 
 
 
 
10
 
11
- #load_dotenv()
 
12
 
13
  class TranscriptSummarizer(Tool):
14
- description = "Summarizes a transcript and generates blog content using the transformers library and Hugging Face API for image generation."
15
  name = "transcript_summarizer"
16
- inputs = {'transcript': {'type': 'string', 'description': 'The transcript to summarize.'}}
 
 
 
17
  output_type = "string"
18
 
19
  def __init__(self, *args, hf_api_key: str = None, **kwargs):
20
  super().__init__(*args, **kwargs)
21
- self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 
 
 
 
 
 
 
 
 
 
22
  self.api_url = "https://api-inference.huggingface.co/models/ZB-Tech/Text-to-Image"
23
  self.hf_api_key = hf_api_key
24
  self.headers = {"Authorization": f"Bearer {self.hf_api_key}"}
25
 
26
- def query(self, payload):
27
  response = requests.post(self.api_url, headers=self.headers, json=payload)
28
  return response.content
29
 
30
- def forward(self, transcript: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  try:
32
  if not self.hf_api_key:
33
- return "Hugging Face API key is required. Please provide it in the input field."
34
 
35
- transcript_length = len(transcript)
 
36
 
37
- def get_summary_lengths(length):
38
- # set the short maths formula
39
- max_length = int(length * 0.8)
40
- min_length = int(length * 0.2)
41
- return max_length, min_length
42
 
43
- # Split the transcript into chunks of 500 characters make it dynamic according to the length of the transcript
44
- if transcript_length < 500:
45
  return "Transcript is too short to summarize."
46
- chunk_size = 500
47
- transcript_chunks = [transcript[i:i+chunk_size] for i in range(0, len(transcript), chunk_size)]
48
-
49
- # Summarize each chunk of the transcript
50
- summaries = []
51
- for chunk in transcript_chunks:
52
- max_length, min_length = get_summary_lengths(len(chunk))
53
- summary = self.summarizer(chunk, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text']
54
- summaries.append(summary)
55
-
56
- # Concatenate the summaries
57
- full_summary = "\n".join(summaries)
58
-
59
- key_entities = full_summary.split()[:15] # Extract first 15 words as key entities
60
- image_prompt = f"Generate an image related to: {' '.join(key_entities)}, cartoon style"
61
- image_bytes = self.query({"inputs": image_prompt})
62
- image = Image.open(io.BytesIO(image_bytes))
63
- image_folder = "Image"
64
- if not os.path.exists(image_folder):
65
- os.makedirs(image_folder)
66
- image_url = os.path.join(image_folder, "image.jpg") # Specify the folder path
67
- image.save(image_url) # Save the image to a file
68
- return f"{full_summary}\n\nImage URL: {image_url}" # Return the file path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  except Exception as e:
70
  return f"An unexpected error occurred: {str(e)}"
71
 
@@ -73,24 +173,31 @@ class YouTubeTranscriptExtractor(Tool):
73
  description = "Extracts the transcript from a YouTube video."
74
  name = "youtube_transcript_extractor"
75
  inputs = {'video_url': {'type': 'string', 'description': 'The URL of the YouTube video.'}}
76
- output_type = "string"
77
 
78
  def forward(self, video_url: str) -> str:
79
  try:
80
  # Create a YouTube object
81
  yt = YouTube(video_url)
82
- lang='en'
83
- # Get the video transcript
 
84
  try:
85
- if lang in yt.captions:
86
  transcript = yt.captions['en'].generate_srt_captions()
 
87
  else:
88
- transcript = yt.captions.all()[0].generate_srt_captions()
89
- lang = yt.captions.all()[0].code
 
 
 
 
 
90
  except StopIteration:
91
- return "No transcript available for this video."
92
  except Exception as e:
93
- return f"An unexpected error occurred while accessing captions: {str(e)}"
94
 
95
  # Clean up the transcript by removing timestamps and line numbers
96
  cleaned_transcript = ""
@@ -98,10 +205,95 @@ class YouTubeTranscriptExtractor(Tool):
98
  if not line.strip().isdigit() and "-->" not in line:
99
  cleaned_transcript += line + "\n"
100
 
101
- print("transcript : ", cleaned_transcript)
102
- return cleaned_transcript
 
 
 
 
103
  except Exception as e:
104
- return f"An unexpected error occurred: {str(e)}"
105
 
106
  def __init__(self, *args, **kwargs):
107
  self.is_initialized = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from smolagents.tools import Tool
2
+ from typing import Optional, Union, Dict, Any
3
  import os
4
+ import time
5
  import requests
6
  import io
7
  from PIL import Image
8
  from pytubefix import YouTube
9
+ import docx
10
+ from docx.shared import Pt, RGBColor, Inches
11
+ from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
12
+ import google.generativeai as genai
13
+ from dotenv import load_dotenv
14
 
15
+ # Load environment variables
16
+ load_dotenv()
17
 
18
  class TranscriptSummarizer(Tool):
19
+ description = "Summarizes a transcript and generates blog content using Google's Gemini model for summarization and Hugging Face API for image generation."
20
  name = "transcript_summarizer"
21
+ inputs = {
22
+ 'transcript': {'type': 'string', 'description': 'The transcript to summarize.'},
23
+ 'language': {'type': 'string', 'description': 'The language of the transcript.', 'nullable': True}
24
+ }
25
  output_type = "string"
26
 
27
  def __init__(self, *args, hf_api_key: str = None, **kwargs):
28
  super().__init__(*args, **kwargs)
29
+ # Get Gemini API key from environment variables
30
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
31
+ if gemini_api_key:
32
+ # Configure the Gemini API
33
+ genai.configure(api_key=gemini_api_key)
34
+ # Set up the model
35
+ self.gemini_model = genai.GenerativeModel('gemini-2.0-flash')
36
+ else:
37
+ self.gemini_model = None
38
+
39
+ # Set up Hugging Face for image generation
40
  self.api_url = "https://api-inference.huggingface.co/models/ZB-Tech/Text-to-Image"
41
  self.hf_api_key = hf_api_key
42
  self.headers = {"Authorization": f"Bearer {self.hf_api_key}"}
43
 
44
+ def query_image_api(self, payload):
45
  response = requests.post(self.api_url, headers=self.headers, json=payload)
46
  return response.content
47
 
48
+ def summarize_with_gemini(self, text, language='en', max_tokens=1000):
49
+ """Use Gemini to summarize text in the specified language"""
50
+ # Map language codes to full language names for better prompting
51
+ language_map = {
52
+ 'en': 'English',
53
+ 'hi': 'Hindi',
54
+ 'es': 'Spanish',
55
+ 'fr': 'French',
56
+ 'de': 'German',
57
+ 'it': 'Italian',
58
+ 'ja': 'Japanese',
59
+ 'ko': 'Korean',
60
+ 'pt': 'Portuguese',
61
+ 'ru': 'Russian',
62
+ 'zh': 'Chinese',
63
+ 'ar': 'Arabic',
64
+ 'bn': 'Bengali',
65
+ 'ta': 'Tamil',
66
+ 'te': 'Telugu',
67
+ 'mr': 'Marathi',
68
+ 'gu': 'Gujarati',
69
+ 'kn': 'Kannada',
70
+ 'ml': 'Malayalam',
71
+ 'pa': 'Punjabi',
72
+ 'ur': 'Urdu'
73
+ # Add more languages as needed
74
+ }
75
+
76
+ language_name = language_map.get(language, language)
77
+
78
+ prompt = f"""
79
+ Please summarize the following transcript in a concise but comprehensive way.
80
+ Focus on the main points and key information.
81
+
82
+ IMPORTANT: The transcript is in {language_name}. Please provide the summary in the SAME LANGUAGE ({language_name}).
83
+ Do not translate to any other language. Keep the summary in the original language of the transcript.
84
+
85
+ Transcript:
86
+ {text}
87
+ """
88
+
89
+ generation_config = {
90
+ "temperature": 0.4,
91
+ "top_p": 0.95,
92
+ "top_k": 40,
93
+ "max_output_tokens": max_tokens,
94
+ }
95
+
96
+ response = self.gemini_model.generate_content(
97
+ prompt,
98
+ generation_config=generation_config
99
+ )
100
+
101
+ return response.text
102
+
103
+ def forward(self, transcript: str, language: str = 'en') -> str:
104
  try:
105
  if not self.hf_api_key:
106
+ return "Hugging Face API key is required for image generation. Please provide it in the input field."
107
 
108
+ if not self.gemini_model:
109
+ return "Gemini API key is required for summarization. Please add it to your .env file."
110
 
111
+ transcript_length = len(transcript)
 
 
 
 
112
 
113
+ # Check if transcript is too short
114
+ if transcript_length < 100:
115
  return "Transcript is too short to summarize."
116
+
117
+ # For longer transcripts, split into chunks to handle context window limitations
118
+ if transcript_length > 30000: # Gemini has a context window limit
119
+ chunk_size = 25000
120
+ transcript_chunks = [transcript[i:i+chunk_size] for i in range(0, len(transcript), chunk_size)]
121
+
122
+ # Summarize each chunk
123
+ chunk_summaries = []
124
+ for chunk in transcript_chunks:
125
+ chunk_summary = self.summarize_with_gemini(chunk, language=language, max_tokens=1000)
126
+ chunk_summaries.append(chunk_summary)
127
+
128
+ # Combine chunk summaries and create a final summary
129
+ combined_summary = "\n\n".join(chunk_summaries)
130
+ if len(combined_summary) > 25000:
131
+ full_summary = self.summarize_with_gemini(combined_summary, language=language, max_tokens=2000)
132
+ else:
133
+ full_summary = combined_summary
134
+ else:
135
+ # For shorter transcripts, summarize directly
136
+ full_summary = self.summarize_with_gemini(transcript, language=language, max_tokens=2000)
137
+
138
+ # Generate image based on summary
139
+ try:
140
+ key_entities = full_summary.split()[:15] # Extract first 15 words as key entities
141
+ image_prompt = f"Generate an image related to: {' '.join(key_entities)}, cartoon style"
142
+ image_bytes = self.query_image_api({"inputs": image_prompt})
143
+
144
+ # Check if the response is valid
145
+ if not image_bytes or len(image_bytes) < 100:
146
+ print("Warning: Received invalid or empty image response")
147
+ return full_summary # Return just the summary without image
148
+
149
+ try:
150
+ # Try to open the image
151
+ image = Image.open(io.BytesIO(image_bytes))
152
+
153
+ # Save the image
154
+ image_folder = "Image"
155
+ if not os.path.exists(image_folder):
156
+ os.makedirs(image_folder)
157
+ image_url = os.path.join(image_folder, f"image_{int(time.time())}.jpg") # Use timestamp for unique filename
158
+ image.save(image_url)
159
+
160
+ return f"{full_summary}\n\nImage URL: {image_url}" # Return the file path with summary
161
+ except Exception as img_error:
162
+ print(f"Error processing image: {str(img_error)}")
163
+ # Return just the summary if image processing fails
164
+ return full_summary
165
+ except Exception as img_gen_error:
166
+ print(f"Error generating image: {str(img_gen_error)}")
167
+ # Return just the summary if image generation fails
168
+ return full_summary
169
  except Exception as e:
170
  return f"An unexpected error occurred: {str(e)}"
171
 
 
173
  description = "Extracts the transcript from a YouTube video."
174
  name = "youtube_transcript_extractor"
175
  inputs = {'video_url': {'type': 'string', 'description': 'The URL of the YouTube video.'}}
176
+ output_type = "string" # Keep as string for compatibility with smolagents
177
 
178
  def forward(self, video_url: str) -> str:
179
  try:
180
  # Create a YouTube object
181
  yt = YouTube(video_url)
182
+ lang = 'en' # Default language
183
+
184
+ # Get the video transcript
185
  try:
186
+ if 'en' in yt.captions:
187
  transcript = yt.captions['en'].generate_srt_captions()
188
+ lang = 'en'
189
  else:
190
+ # Get the first available caption
191
+ if len(yt.captions.all()) > 0:
192
+ caption = yt.captions.all()[0]
193
+ transcript = caption.generate_srt_captions()
194
+ lang = caption.code
195
+ else:
196
+ return f"LANGUAGE:{lang}||No transcript available for this video."
197
  except StopIteration:
198
+ return f"LANGUAGE:{lang}||No transcript available for this video."
199
  except Exception as e:
200
+ return f"LANGUAGE:{lang}||An unexpected error occurred while accessing captions: {str(e)}"
201
 
202
  # Clean up the transcript by removing timestamps and line numbers
203
  cleaned_transcript = ""
 
205
  if not line.strip().isdigit() and "-->" not in line:
206
  cleaned_transcript += line + "\n"
207
 
208
+ print(f"Transcript language detected: {lang}")
209
+ print("Transcript sample: ", cleaned_transcript[:200] + "..." if len(cleaned_transcript) > 200 else cleaned_transcript)
210
+
211
+ # Return both the transcript and the language as a formatted string
212
+ # Format: "LANGUAGE:lang||transcript_text"
213
+ return f"LANGUAGE:{lang}||{cleaned_transcript}"
214
  except Exception as e:
215
+ return f"LANGUAGE:en||An unexpected error occurred: {str(e)}"
216
 
217
  def __init__(self, *args, **kwargs):
218
  self.is_initialized = False
219
+
220
+ class TranscriptToDocx(Tool):
221
+ description = "Creates or updates a DOCX file with YouTube transcript and summary."
222
+ name = "transcript_to_docx"
223
+ inputs = {
224
+ 'transcript': {'type': 'string', 'description': 'The transcript to include in the document.'},
225
+ 'summary': {'type': 'string', 'description': 'The summary to include in the document.'},
226
+ 'video_title': {'type': 'string', 'description': 'The title of the YouTube video.'},
227
+ 'image_path': {'type': 'string', 'description': 'Path to the image to include in the document.', 'nullable': True},
228
+ 'existing_docx_path': {'type': 'string', 'description': 'Path to an existing DOCX file to update.', 'nullable': True}
229
+ }
230
+ output_type = "string"
231
+
232
+ def __init__(self, *args, **kwargs):
233
+ super().__init__(*args, **kwargs)
234
+ self.docx_folder = "Documents"
235
+ if not os.path.exists(self.docx_folder):
236
+ os.makedirs(self.docx_folder)
237
+
238
+ def forward(self, transcript: str, summary: str, video_title: str, image_path: Optional[str] = None, existing_docx_path: Optional[str] = None) -> str:
239
+ try:
240
+ # Determine if we're creating a new document or updating an existing one
241
+ if existing_docx_path and os.path.exists(existing_docx_path):
242
+ doc = docx.Document(existing_docx_path)
243
+ # Add a page break before adding new content
244
+ doc.add_paragraph().add_run().add_break(docx.enum.text.WD_BREAK.PAGE)
245
+ else:
246
+ doc = docx.Document()
247
+ # Set document properties
248
+ doc.core_properties.title = f"YouTube Transcript: {video_title}"
249
+ doc.core_properties.author = "YouTube Transcript Tool"
250
+
251
+ # Add title
252
+ title = doc.add_heading(video_title, level=1)
253
+ title.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
254
+
255
+ # Add summary section
256
+ doc.add_heading("Summary", level=2)
257
+ summary_para = doc.add_paragraph(summary)
258
+
259
+ # Add image if provided
260
+ if image_path and os.path.exists(image_path):
261
+ try:
262
+ doc.add_picture(image_path, width=Inches(6))
263
+ # Add caption for the image
264
+ caption = doc.add_paragraph("Generated image based on transcript content")
265
+ caption.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
266
+ caption.runs[0].italic = True
267
+ except Exception as img_error:
268
+ # If there's an error adding the image, just log it and continue
269
+ print(f"Error adding image to document: {str(img_error)}")
270
+
271
+ # Add transcript section
272
+ doc.add_heading("Full Transcript", level=2)
273
+ transcript_para = doc.add_paragraph(transcript)
274
+
275
+ # Clean the video title for filename
276
+ safe_title = ''.join(c for c in video_title if c.isalnum() or c in ' _-')
277
+ safe_title = safe_title.replace(' ', '_')
278
+
279
+ # Save the document
280
+ output_filename = f"{safe_title}.docx"
281
+ output_path = os.path.join(self.docx_folder, output_filename)
282
+
283
+ try:
284
+ doc.save(output_path)
285
+ print(f"Document saved successfully at: {output_path}")
286
+ return output_path
287
+ except Exception as save_error:
288
+ error_msg = f"Error saving document: {str(save_error)}"
289
+ print(error_msg)
290
+ # Try with a simpler filename as fallback
291
+ try:
292
+ fallback_path = os.path.join(self.docx_folder, f"youtube_transcript_{int(time.time())}.docx")
293
+ doc.save(fallback_path)
294
+ print(f"Document saved with fallback name at: {fallback_path}")
295
+ return fallback_path
296
+ except:
297
+ return error_msg
298
+ except Exception as e:
299
+ return f"An error occurred while creating the DOCX file: {str(e)}"