Add language-aware summarization and DOCX generation
Browse files- .gitignore +65 -12
- app.py +104 -22
- tool.py +244 -52
.gitignore
CHANGED
@@ -1,18 +1,71 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
|
|
|
|
5 |
.env
|
|
|
|
|
|
|
|
|
6 |
venv/
|
|
|
|
|
|
|
|
|
7 |
__pycache__/
|
8 |
-
*.
|
9 |
-
*.pyo
|
10 |
*$py.class
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
*.swp
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
*.log
|
14 |
-
|
15 |
-
#
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Project-specific directories
|
2 |
+
Documents/
|
3 |
+
Image/
|
4 |
+
Images/
|
5 |
+
|
6 |
+
# Environment variables
|
7 |
.env
|
8 |
+
.env.*
|
9 |
+
|
10 |
+
# Python virtual environments
|
11 |
+
.venv/
|
12 |
venv/
|
13 |
+
ENV/
|
14 |
+
env/
|
15 |
+
|
16 |
+
# Python bytecode
|
17 |
__pycache__/
|
18 |
+
*.py[cod]
|
|
|
19 |
*$py.class
|
20 |
+
*.so
|
21 |
+
.Python
|
22 |
+
|
23 |
+
# Distribution / packaging
|
24 |
+
dist/
|
25 |
+
build/
|
26 |
+
*.egg-info/
|
27 |
+
*.egg
|
28 |
+
|
29 |
+
# Unit test / coverage reports
|
30 |
+
htmlcov/
|
31 |
+
.tox/
|
32 |
+
.coverage
|
33 |
+
.coverage.*
|
34 |
+
.cache
|
35 |
+
nosetests.xml
|
36 |
+
coverage.xml
|
37 |
+
*.cover
|
38 |
+
.hypothesis/
|
39 |
+
.pytest_cache/
|
40 |
+
|
41 |
+
# Jupyter Notebook
|
42 |
+
.ipynb_checkpoints
|
43 |
+
|
44 |
+
# IDE specific files
|
45 |
+
.idea/
|
46 |
+
.vscode/
|
47 |
*.swp
|
48 |
+
*.swo
|
49 |
+
*~
|
50 |
+
.DS_Store
|
51 |
+
|
52 |
+
# Logs
|
53 |
+
logs/
|
54 |
*.log
|
55 |
+
|
56 |
+
# Local development settings
|
57 |
+
instance/
|
58 |
+
.webassets-cache
|
59 |
+
|
60 |
+
# Dependency directories
|
61 |
+
node_modules/
|
62 |
+
|
63 |
+
# Compiled Python modules
|
64 |
+
*.pyc
|
65 |
+
*.pyo
|
66 |
+
*.pyd
|
67 |
+
|
68 |
+
# Temporary files
|
69 |
+
*.bak
|
70 |
+
*.tmp
|
71 |
+
*.temp
|
app.py
CHANGED
@@ -1,33 +1,115 @@
|
|
1 |
import gradio as gr
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
youtube_tool = YouTubeTranscriptExtractor()
|
|
|
5 |
#summarizer_tool = TranscriptSummarizer()
|
6 |
|
7 |
-
def process_youtube_video(video_url, hf_api_key):
|
|
|
8 |
summarizer_tool = TranscriptSummarizer(hf_api_key=hf_api_key)
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
try:
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
14 |
summary = summary_and_blog
|
15 |
image_url = None
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
iface.launch()
|
|
|
1 |
import gradio as gr
|
2 |
+
import os
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
from tool import YouTubeTranscriptExtractor, TranscriptSummarizer, TranscriptToDocx
|
5 |
+
|
6 |
+
# Load environment variables
|
7 |
+
load_dotenv()
|
8 |
|
9 |
youtube_tool = YouTubeTranscriptExtractor()
|
10 |
+
docx_tool = TranscriptToDocx()
|
11 |
#summarizer_tool = TranscriptSummarizer()
|
12 |
|
13 |
+
def process_youtube_video(video_url, hf_api_key, existing_docx_path=None):
|
14 |
+
# Initialize tools
|
15 |
summarizer_tool = TranscriptSummarizer(hf_api_key=hf_api_key)
|
16 |
+
|
17 |
+
# Get video title
|
18 |
+
from pytubefix import YouTube
|
19 |
+
try:
|
20 |
+
yt = YouTube(video_url)
|
21 |
+
video_title = yt.title
|
22 |
+
except Exception:
|
23 |
+
video_title = "YouTube Video"
|
24 |
+
|
25 |
+
# Extract transcript and detect language
|
26 |
+
transcript_result = youtube_tool.forward(video_url=video_url)
|
27 |
+
|
28 |
+
# Parse the formatted string response
|
29 |
+
# Format: "LANGUAGE:lang||transcript_text"
|
30 |
+
try:
|
31 |
+
if "LANGUAGE:" in transcript_result and "||" in transcript_result:
|
32 |
+
parts = transcript_result.split("||", 1)
|
33 |
+
language = parts[0].replace("LANGUAGE:", "").strip()
|
34 |
+
transcript = parts[1]
|
35 |
+
print(f"Detected language: {language}")
|
36 |
+
else:
|
37 |
+
# Fallback if we didn't get the expected format
|
38 |
+
transcript = transcript_result
|
39 |
+
language = "en"
|
40 |
+
print("Warning: Could not detect language, using English as default")
|
41 |
+
except Exception as e:
|
42 |
+
transcript = transcript_result if isinstance(transcript_result, str) else "Error extracting transcript"
|
43 |
+
language = "en"
|
44 |
+
print(f"Warning: Error parsing transcript data: {str(e)}, using English as default")
|
45 |
+
|
46 |
+
# Generate summary and get image URL
|
47 |
+
summary_and_blog = summarizer_tool.forward(transcript=transcript, language=language)
|
48 |
try:
|
49 |
+
if "\n\nImage URL: " in summary_and_blog:
|
50 |
+
summary, image_url = summary_and_blog.split("\n\nImage URL: ")
|
51 |
+
else:
|
52 |
+
summary = summary_and_blog
|
53 |
+
image_url = None
|
54 |
+
except Exception:
|
55 |
summary = summary_and_blog
|
56 |
image_url = None
|
57 |
+
|
58 |
+
# Generate or update DOCX file
|
59 |
+
# Handle the file path from Gradio
|
60 |
+
docx_file_path = None
|
61 |
+
if existing_docx_path is not None and existing_docx_path != "" and existing_docx_path != []:
|
62 |
+
# If it's a temporary file path from Gradio
|
63 |
+
if isinstance(existing_docx_path, str) and os.path.exists(existing_docx_path):
|
64 |
+
docx_file_path = existing_docx_path
|
65 |
+
# If it's a file object from Gradio
|
66 |
+
elif hasattr(existing_docx_path, 'name') and os.path.exists(existing_docx_path.name):
|
67 |
+
docx_file_path = existing_docx_path.name
|
68 |
+
# If it's a list (Gradio sometimes returns a list for file components)
|
69 |
+
elif isinstance(existing_docx_path, list) and len(existing_docx_path) > 0 and existing_docx_path[0] is not None:
|
70 |
+
if isinstance(existing_docx_path[0], str) and os.path.exists(existing_docx_path[0]):
|
71 |
+
docx_file_path = existing_docx_path[0]
|
72 |
+
elif hasattr(existing_docx_path[0], 'name') and os.path.exists(existing_docx_path[0].name):
|
73 |
+
docx_file_path = existing_docx_path[0].name
|
74 |
+
|
75 |
+
docx_path = docx_tool.forward(
|
76 |
+
transcript=transcript,
|
77 |
+
summary=summary,
|
78 |
+
video_title=video_title,
|
79 |
+
image_path=image_url,
|
80 |
+
existing_docx_path=docx_file_path
|
81 |
+
)
|
82 |
+
|
83 |
+
return transcript, summary, image_url, docx_path
|
84 |
+
|
85 |
+
with gr.Blocks() as demo:
|
86 |
+
gr.Markdown("# YouTube Transcript Summarizer and Blog Content Generator")
|
87 |
+
gr.Markdown("Enter a YouTube video URL and Hugging Face API Key to extract the transcript, summarize it, and generate blog content with an image and DOCX file. Optionally, you can provide an existing DOCX file to update.")
|
88 |
+
|
89 |
+
# Check if Gemini API key is set
|
90 |
+
gemini_api_key = os.getenv("GEMINI_API_KEY")
|
91 |
+
if not gemini_api_key or gemini_api_key == "your_gemini_api_key_here":
|
92 |
+
gr.Markdown("⚠️ **Warning**: Gemini API key is not set in the .env file. Please add your Gemini API key to the .env file to use the summarization feature.")
|
93 |
+
|
94 |
+
with gr.Row():
|
95 |
+
with gr.Column():
|
96 |
+
video_url = gr.Textbox(label="YouTube Video URL")
|
97 |
+
hf_api_key = gr.Textbox(label="Hugging Face API Key", type="password")
|
98 |
+
existing_docx = gr.File(label="Existing DOCX file (optional)", file_types=[".docx"])
|
99 |
+
submit_btn = gr.Button("Process Video")
|
100 |
+
|
101 |
+
with gr.Column():
|
102 |
+
transcript_output = gr.Textbox(label="Transcript")
|
103 |
+
summary_output = gr.Textbox(label="Summary and Blog Content")
|
104 |
+
image_output = gr.Image(label="Generated Image", image_mode="RGBA")
|
105 |
+
docx_output = gr.File(label="Generated DOCX File")
|
106 |
+
|
107 |
+
submit_btn.click(
|
108 |
+
fn=process_youtube_video,
|
109 |
+
inputs=[video_url, hf_api_key, existing_docx],
|
110 |
+
outputs=[transcript_output, summary_output, image_output, docx_output]
|
111 |
+
)
|
112 |
+
|
113 |
+
iface = demo
|
114 |
|
115 |
iface.launch()
|
tool.py
CHANGED
@@ -1,71 +1,171 @@
|
|
1 |
from smolagents.tools import Tool
|
2 |
-
from typing import Optional
|
3 |
import os
|
4 |
-
|
5 |
import requests
|
6 |
import io
|
7 |
from PIL import Image
|
8 |
from pytubefix import YouTube
|
9 |
-
|
|
|
|
|
|
|
|
|
10 |
|
11 |
-
#
|
|
|
12 |
|
13 |
class TranscriptSummarizer(Tool):
|
14 |
-
description = "Summarizes a transcript and generates blog content using
|
15 |
name = "transcript_summarizer"
|
16 |
-
inputs = {
|
|
|
|
|
|
|
17 |
output_type = "string"
|
18 |
|
19 |
def __init__(self, *args, hf_api_key: str = None, **kwargs):
|
20 |
super().__init__(*args, **kwargs)
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
self.api_url = "https://api-inference.huggingface.co/models/ZB-Tech/Text-to-Image"
|
23 |
self.hf_api_key = hf_api_key
|
24 |
self.headers = {"Authorization": f"Bearer {self.hf_api_key}"}
|
25 |
|
26 |
-
def
|
27 |
response = requests.post(self.api_url, headers=self.headers, json=payload)
|
28 |
return response.content
|
29 |
|
30 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
try:
|
32 |
if not self.hf_api_key:
|
33 |
-
return "Hugging Face API key is required. Please provide it in the input field."
|
34 |
|
35 |
-
|
|
|
36 |
|
37 |
-
|
38 |
-
# set the short maths formula
|
39 |
-
max_length = int(length * 0.8)
|
40 |
-
min_length = int(length * 0.2)
|
41 |
-
return max_length, min_length
|
42 |
|
43 |
-
#
|
44 |
-
if transcript_length <
|
45 |
return "Transcript is too short to summarize."
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
except Exception as e:
|
70 |
return f"An unexpected error occurred: {str(e)}"
|
71 |
|
@@ -73,24 +173,31 @@ class YouTubeTranscriptExtractor(Tool):
|
|
73 |
description = "Extracts the transcript from a YouTube video."
|
74 |
name = "youtube_transcript_extractor"
|
75 |
inputs = {'video_url': {'type': 'string', 'description': 'The URL of the YouTube video.'}}
|
76 |
-
output_type = "string"
|
77 |
|
78 |
def forward(self, video_url: str) -> str:
|
79 |
try:
|
80 |
# Create a YouTube object
|
81 |
yt = YouTube(video_url)
|
82 |
-
lang='en'
|
83 |
-
|
|
|
84 |
try:
|
85 |
-
if
|
86 |
transcript = yt.captions['en'].generate_srt_captions()
|
|
|
87 |
else:
|
88 |
-
|
89 |
-
|
|
|
|
|
|
|
|
|
|
|
90 |
except StopIteration:
|
91 |
-
return "No transcript available for this video."
|
92 |
except Exception as e:
|
93 |
-
return f"An unexpected error occurred while accessing captions: {str(e)}"
|
94 |
|
95 |
# Clean up the transcript by removing timestamps and line numbers
|
96 |
cleaned_transcript = ""
|
@@ -98,10 +205,95 @@ class YouTubeTranscriptExtractor(Tool):
|
|
98 |
if not line.strip().isdigit() and "-->" not in line:
|
99 |
cleaned_transcript += line + "\n"
|
100 |
|
101 |
-
print("
|
102 |
-
|
|
|
|
|
|
|
|
|
103 |
except Exception as e:
|
104 |
-
return f"An unexpected error occurred: {str(e)}"
|
105 |
|
106 |
def __init__(self, *args, **kwargs):
|
107 |
self.is_initialized = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from smolagents.tools import Tool
|
2 |
+
from typing import Optional, Union, Dict, Any
|
3 |
import os
|
4 |
+
import time
|
5 |
import requests
|
6 |
import io
|
7 |
from PIL import Image
|
8 |
from pytubefix import YouTube
|
9 |
+
import docx
|
10 |
+
from docx.shared import Pt, RGBColor, Inches
|
11 |
+
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
|
12 |
+
import google.generativeai as genai
|
13 |
+
from dotenv import load_dotenv
|
14 |
|
15 |
+
# Load environment variables
|
16 |
+
load_dotenv()
|
17 |
|
18 |
class TranscriptSummarizer(Tool):
|
19 |
+
description = "Summarizes a transcript and generates blog content using Google's Gemini model for summarization and Hugging Face API for image generation."
|
20 |
name = "transcript_summarizer"
|
21 |
+
inputs = {
|
22 |
+
'transcript': {'type': 'string', 'description': 'The transcript to summarize.'},
|
23 |
+
'language': {'type': 'string', 'description': 'The language of the transcript.', 'nullable': True}
|
24 |
+
}
|
25 |
output_type = "string"
|
26 |
|
27 |
def __init__(self, *args, hf_api_key: str = None, **kwargs):
|
28 |
super().__init__(*args, **kwargs)
|
29 |
+
# Get Gemini API key from environment variables
|
30 |
+
gemini_api_key = os.getenv("GEMINI_API_KEY")
|
31 |
+
if gemini_api_key:
|
32 |
+
# Configure the Gemini API
|
33 |
+
genai.configure(api_key=gemini_api_key)
|
34 |
+
# Set up the model
|
35 |
+
self.gemini_model = genai.GenerativeModel('gemini-2.0-flash')
|
36 |
+
else:
|
37 |
+
self.gemini_model = None
|
38 |
+
|
39 |
+
# Set up Hugging Face for image generation
|
40 |
self.api_url = "https://api-inference.huggingface.co/models/ZB-Tech/Text-to-Image"
|
41 |
self.hf_api_key = hf_api_key
|
42 |
self.headers = {"Authorization": f"Bearer {self.hf_api_key}"}
|
43 |
|
44 |
+
def query_image_api(self, payload):
|
45 |
response = requests.post(self.api_url, headers=self.headers, json=payload)
|
46 |
return response.content
|
47 |
|
48 |
+
def summarize_with_gemini(self, text, language='en', max_tokens=1000):
|
49 |
+
"""Use Gemini to summarize text in the specified language"""
|
50 |
+
# Map language codes to full language names for better prompting
|
51 |
+
language_map = {
|
52 |
+
'en': 'English',
|
53 |
+
'hi': 'Hindi',
|
54 |
+
'es': 'Spanish',
|
55 |
+
'fr': 'French',
|
56 |
+
'de': 'German',
|
57 |
+
'it': 'Italian',
|
58 |
+
'ja': 'Japanese',
|
59 |
+
'ko': 'Korean',
|
60 |
+
'pt': 'Portuguese',
|
61 |
+
'ru': 'Russian',
|
62 |
+
'zh': 'Chinese',
|
63 |
+
'ar': 'Arabic',
|
64 |
+
'bn': 'Bengali',
|
65 |
+
'ta': 'Tamil',
|
66 |
+
'te': 'Telugu',
|
67 |
+
'mr': 'Marathi',
|
68 |
+
'gu': 'Gujarati',
|
69 |
+
'kn': 'Kannada',
|
70 |
+
'ml': 'Malayalam',
|
71 |
+
'pa': 'Punjabi',
|
72 |
+
'ur': 'Urdu'
|
73 |
+
# Add more languages as needed
|
74 |
+
}
|
75 |
+
|
76 |
+
language_name = language_map.get(language, language)
|
77 |
+
|
78 |
+
prompt = f"""
|
79 |
+
Please summarize the following transcript in a concise but comprehensive way.
|
80 |
+
Focus on the main points and key information.
|
81 |
+
|
82 |
+
IMPORTANT: The transcript is in {language_name}. Please provide the summary in the SAME LANGUAGE ({language_name}).
|
83 |
+
Do not translate to any other language. Keep the summary in the original language of the transcript.
|
84 |
+
|
85 |
+
Transcript:
|
86 |
+
{text}
|
87 |
+
"""
|
88 |
+
|
89 |
+
generation_config = {
|
90 |
+
"temperature": 0.4,
|
91 |
+
"top_p": 0.95,
|
92 |
+
"top_k": 40,
|
93 |
+
"max_output_tokens": max_tokens,
|
94 |
+
}
|
95 |
+
|
96 |
+
response = self.gemini_model.generate_content(
|
97 |
+
prompt,
|
98 |
+
generation_config=generation_config
|
99 |
+
)
|
100 |
+
|
101 |
+
return response.text
|
102 |
+
|
103 |
+
def forward(self, transcript: str, language: str = 'en') -> str:
|
104 |
try:
|
105 |
if not self.hf_api_key:
|
106 |
+
return "Hugging Face API key is required for image generation. Please provide it in the input field."
|
107 |
|
108 |
+
if not self.gemini_model:
|
109 |
+
return "Gemini API key is required for summarization. Please add it to your .env file."
|
110 |
|
111 |
+
transcript_length = len(transcript)
|
|
|
|
|
|
|
|
|
112 |
|
113 |
+
# Check if transcript is too short
|
114 |
+
if transcript_length < 100:
|
115 |
return "Transcript is too short to summarize."
|
116 |
+
|
117 |
+
# For longer transcripts, split into chunks to handle context window limitations
|
118 |
+
if transcript_length > 30000: # Gemini has a context window limit
|
119 |
+
chunk_size = 25000
|
120 |
+
transcript_chunks = [transcript[i:i+chunk_size] for i in range(0, len(transcript), chunk_size)]
|
121 |
+
|
122 |
+
# Summarize each chunk
|
123 |
+
chunk_summaries = []
|
124 |
+
for chunk in transcript_chunks:
|
125 |
+
chunk_summary = self.summarize_with_gemini(chunk, language=language, max_tokens=1000)
|
126 |
+
chunk_summaries.append(chunk_summary)
|
127 |
+
|
128 |
+
# Combine chunk summaries and create a final summary
|
129 |
+
combined_summary = "\n\n".join(chunk_summaries)
|
130 |
+
if len(combined_summary) > 25000:
|
131 |
+
full_summary = self.summarize_with_gemini(combined_summary, language=language, max_tokens=2000)
|
132 |
+
else:
|
133 |
+
full_summary = combined_summary
|
134 |
+
else:
|
135 |
+
# For shorter transcripts, summarize directly
|
136 |
+
full_summary = self.summarize_with_gemini(transcript, language=language, max_tokens=2000)
|
137 |
+
|
138 |
+
# Generate image based on summary
|
139 |
+
try:
|
140 |
+
key_entities = full_summary.split()[:15] # Extract first 15 words as key entities
|
141 |
+
image_prompt = f"Generate an image related to: {' '.join(key_entities)}, cartoon style"
|
142 |
+
image_bytes = self.query_image_api({"inputs": image_prompt})
|
143 |
+
|
144 |
+
# Check if the response is valid
|
145 |
+
if not image_bytes or len(image_bytes) < 100:
|
146 |
+
print("Warning: Received invalid or empty image response")
|
147 |
+
return full_summary # Return just the summary without image
|
148 |
+
|
149 |
+
try:
|
150 |
+
# Try to open the image
|
151 |
+
image = Image.open(io.BytesIO(image_bytes))
|
152 |
+
|
153 |
+
# Save the image
|
154 |
+
image_folder = "Image"
|
155 |
+
if not os.path.exists(image_folder):
|
156 |
+
os.makedirs(image_folder)
|
157 |
+
image_url = os.path.join(image_folder, f"image_{int(time.time())}.jpg") # Use timestamp for unique filename
|
158 |
+
image.save(image_url)
|
159 |
+
|
160 |
+
return f"{full_summary}\n\nImage URL: {image_url}" # Return the file path with summary
|
161 |
+
except Exception as img_error:
|
162 |
+
print(f"Error processing image: {str(img_error)}")
|
163 |
+
# Return just the summary if image processing fails
|
164 |
+
return full_summary
|
165 |
+
except Exception as img_gen_error:
|
166 |
+
print(f"Error generating image: {str(img_gen_error)}")
|
167 |
+
# Return just the summary if image generation fails
|
168 |
+
return full_summary
|
169 |
except Exception as e:
|
170 |
return f"An unexpected error occurred: {str(e)}"
|
171 |
|
|
|
173 |
description = "Extracts the transcript from a YouTube video."
|
174 |
name = "youtube_transcript_extractor"
|
175 |
inputs = {'video_url': {'type': 'string', 'description': 'The URL of the YouTube video.'}}
|
176 |
+
output_type = "string" # Keep as string for compatibility with smolagents
|
177 |
|
178 |
def forward(self, video_url: str) -> str:
|
179 |
try:
|
180 |
# Create a YouTube object
|
181 |
yt = YouTube(video_url)
|
182 |
+
lang = 'en' # Default language
|
183 |
+
|
184 |
+
# Get the video transcript
|
185 |
try:
|
186 |
+
if 'en' in yt.captions:
|
187 |
transcript = yt.captions['en'].generate_srt_captions()
|
188 |
+
lang = 'en'
|
189 |
else:
|
190 |
+
# Get the first available caption
|
191 |
+
if len(yt.captions.all()) > 0:
|
192 |
+
caption = yt.captions.all()[0]
|
193 |
+
transcript = caption.generate_srt_captions()
|
194 |
+
lang = caption.code
|
195 |
+
else:
|
196 |
+
return f"LANGUAGE:{lang}||No transcript available for this video."
|
197 |
except StopIteration:
|
198 |
+
return f"LANGUAGE:{lang}||No transcript available for this video."
|
199 |
except Exception as e:
|
200 |
+
return f"LANGUAGE:{lang}||An unexpected error occurred while accessing captions: {str(e)}"
|
201 |
|
202 |
# Clean up the transcript by removing timestamps and line numbers
|
203 |
cleaned_transcript = ""
|
|
|
205 |
if not line.strip().isdigit() and "-->" not in line:
|
206 |
cleaned_transcript += line + "\n"
|
207 |
|
208 |
+
print(f"Transcript language detected: {lang}")
|
209 |
+
print("Transcript sample: ", cleaned_transcript[:200] + "..." if len(cleaned_transcript) > 200 else cleaned_transcript)
|
210 |
+
|
211 |
+
# Return both the transcript and the language as a formatted string
|
212 |
+
# Format: "LANGUAGE:lang||transcript_text"
|
213 |
+
return f"LANGUAGE:{lang}||{cleaned_transcript}"
|
214 |
except Exception as e:
|
215 |
+
return f"LANGUAGE:en||An unexpected error occurred: {str(e)}"
|
216 |
|
217 |
def __init__(self, *args, **kwargs):
|
218 |
self.is_initialized = False
|
219 |
+
|
220 |
+
class TranscriptToDocx(Tool):
|
221 |
+
description = "Creates or updates a DOCX file with YouTube transcript and summary."
|
222 |
+
name = "transcript_to_docx"
|
223 |
+
inputs = {
|
224 |
+
'transcript': {'type': 'string', 'description': 'The transcript to include in the document.'},
|
225 |
+
'summary': {'type': 'string', 'description': 'The summary to include in the document.'},
|
226 |
+
'video_title': {'type': 'string', 'description': 'The title of the YouTube video.'},
|
227 |
+
'image_path': {'type': 'string', 'description': 'Path to the image to include in the document.', 'nullable': True},
|
228 |
+
'existing_docx_path': {'type': 'string', 'description': 'Path to an existing DOCX file to update.', 'nullable': True}
|
229 |
+
}
|
230 |
+
output_type = "string"
|
231 |
+
|
232 |
+
def __init__(self, *args, **kwargs):
|
233 |
+
super().__init__(*args, **kwargs)
|
234 |
+
self.docx_folder = "Documents"
|
235 |
+
if not os.path.exists(self.docx_folder):
|
236 |
+
os.makedirs(self.docx_folder)
|
237 |
+
|
238 |
+
def forward(self, transcript: str, summary: str, video_title: str, image_path: Optional[str] = None, existing_docx_path: Optional[str] = None) -> str:
|
239 |
+
try:
|
240 |
+
# Determine if we're creating a new document or updating an existing one
|
241 |
+
if existing_docx_path and os.path.exists(existing_docx_path):
|
242 |
+
doc = docx.Document(existing_docx_path)
|
243 |
+
# Add a page break before adding new content
|
244 |
+
doc.add_paragraph().add_run().add_break(docx.enum.text.WD_BREAK.PAGE)
|
245 |
+
else:
|
246 |
+
doc = docx.Document()
|
247 |
+
# Set document properties
|
248 |
+
doc.core_properties.title = f"YouTube Transcript: {video_title}"
|
249 |
+
doc.core_properties.author = "YouTube Transcript Tool"
|
250 |
+
|
251 |
+
# Add title
|
252 |
+
title = doc.add_heading(video_title, level=1)
|
253 |
+
title.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
254 |
+
|
255 |
+
# Add summary section
|
256 |
+
doc.add_heading("Summary", level=2)
|
257 |
+
summary_para = doc.add_paragraph(summary)
|
258 |
+
|
259 |
+
# Add image if provided
|
260 |
+
if image_path and os.path.exists(image_path):
|
261 |
+
try:
|
262 |
+
doc.add_picture(image_path, width=Inches(6))
|
263 |
+
# Add caption for the image
|
264 |
+
caption = doc.add_paragraph("Generated image based on transcript content")
|
265 |
+
caption.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
266 |
+
caption.runs[0].italic = True
|
267 |
+
except Exception as img_error:
|
268 |
+
# If there's an error adding the image, just log it and continue
|
269 |
+
print(f"Error adding image to document: {str(img_error)}")
|
270 |
+
|
271 |
+
# Add transcript section
|
272 |
+
doc.add_heading("Full Transcript", level=2)
|
273 |
+
transcript_para = doc.add_paragraph(transcript)
|
274 |
+
|
275 |
+
# Clean the video title for filename
|
276 |
+
safe_title = ''.join(c for c in video_title if c.isalnum() or c in ' _-')
|
277 |
+
safe_title = safe_title.replace(' ', '_')
|
278 |
+
|
279 |
+
# Save the document
|
280 |
+
output_filename = f"{safe_title}.docx"
|
281 |
+
output_path = os.path.join(self.docx_folder, output_filename)
|
282 |
+
|
283 |
+
try:
|
284 |
+
doc.save(output_path)
|
285 |
+
print(f"Document saved successfully at: {output_path}")
|
286 |
+
return output_path
|
287 |
+
except Exception as save_error:
|
288 |
+
error_msg = f"Error saving document: {str(save_error)}"
|
289 |
+
print(error_msg)
|
290 |
+
# Try with a simpler filename as fallback
|
291 |
+
try:
|
292 |
+
fallback_path = os.path.join(self.docx_folder, f"youtube_transcript_{int(time.time())}.docx")
|
293 |
+
doc.save(fallback_path)
|
294 |
+
print(f"Document saved with fallback name at: {fallback_path}")
|
295 |
+
return fallback_path
|
296 |
+
except:
|
297 |
+
return error_msg
|
298 |
+
except Exception as e:
|
299 |
+
return f"An error occurred while creating the DOCX file: {str(e)}"
|