File size: 3,809 Bytes
583462f 56ac5db 74d45f5 56ac5db 8895970 56ac5db 8895970 56ac5db 583462f 56ac5db 583462f 56ac5db 583462f 56ac5db 583462f 56ac5db 583462f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
from smolagents.tools import Tool
from typing import Optional
import os
from transformers import pipeline
import requests
import io
from PIL import Image
from dotenv import load_dotenv
load_dotenv()
class TranscriptSummarizer(Tool):
description = "Summarizes a transcript and generates blog content using the transformers library and Hugging Face API for image generation."
name = "transcript_summarizer"
inputs = {'transcript': {'type': 'string', 'description': 'The transcript to summarize.'}}
output_type = "string"
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.summarizer = pipeline("summarization", model="google/pegasus-xsum")
self.api_url = "https://api-inference.huggingface.co/models/ZB-Tech/Text-to-Image"
self.headers = {"Authorization": f"Bearer {os.getenv('HF_API_KEY')}"}
def query(self, payload):
response = requests.post(self.api_url, headers=self.headers, json=payload)
return response.content
def forward(self, transcript: str) -> str:
try:
transcript_length = len(transcript)
def get_summary_lengths(length):
if length <= 1000:
max_length = 300
min_length = 100
elif length <= 3000:
max_length = 750
min_length = 250
else:
max_length = 1500
min_length = 500
return max_length, min_length
max_length, min_length = get_summary_lengths(transcript_length)
summary = self.summarizer(transcript, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text']
key_entities = summary.split()[:3] # Extract first 3 words as key entities
image_prompt = f"Generate an image related to: {' '.join(key_entities)}, cartoon style"
image_bytes = self.query({"inputs": image_prompt})
image = Image.open(io.BytesIO(image_bytes))
image_folder = "Image"
if not os.path.exists(image_folder):
os.makedirs(image_folder)
image_url = os.path.join(image_folder, "image.jpg") # Specify the folder path
image.save(image_url) # Save the image to a file
return f"{summary}\n\nImage URL: {image_url}" # Return the file path
except Exception as e:
return f"An unexpected error occurred: {str(e)}"
class YouTubeTranscriptExtractor(Tool):
description = "Extracts the transcript from a YouTube video."
name = "youtube_transcript_extractor"
inputs = {'video_url': {'type': 'string', 'description': 'The URL of the YouTube video.'}}
output_type = "string"
def forward(self, video_url: str) -> str:
try:
from pytubefix import YouTube
# Create a YouTube object
yt = YouTube(video_url)
lang='en'
# Get the video transcript
if lang in yt.captions:
transcript = yt.captions['en'].generate_srt_captions()
else:
transcript = yt.captions.all()[0].generate_srt_captions()
lang = yt.captions.all()[0].code
# Clean up the transcript by removing timestamps and line numbers
cleaned_transcript = ""
for line in transcript.splitlines():
if not line.strip().isdigit() and "-->" not in line:
cleaned_transcript += line + "\n"
print("transcript : ", cleaned_transcript)
return cleaned_transcript
except Exception as e:
return f"An unexpected error occurred: {str(e)}"
def __init__(self, *args, **kwargs):
self.is_initialized = False
|