# tools/youtube_tool.py from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound import re def extract_video_id(url: str) -> str: """ Extracts the video ID from a YouTube URL. Args: url (str): The full YouTube video URL. Returns: str: The extracted video ID or raises ValueError. """ patterns = [ r"youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})", r"youtu\.be/([a-zA-Z0-9_-]{11})" ] for pattern in patterns: match = re.search(pattern, url) if match: return match.group(1) raise ValueError("Invalid YouTube URL or unable to extract video ID.") def get_youtube_transcript(url: str) -> str: """ Fetches the transcript text for a given YouTube video. Args: url (str): The YouTube video URL. Returns: str: Combined transcript text or an error message. """ try: video_id = extract_video_id(url) transcript_list = YouTubeTranscriptApi.get_transcript(video_id) full_text = " ".join([entry["text"] for entry in transcript_list]) return full_text.strip()[:2000] # Truncate to 2000 chars to prevent token overflow except TranscriptsDisabled: return "This video has transcripts disabled." except NoTranscriptFound: return "No transcript was found for this video." except Exception as e: return f"Transcript error: {str(e)}"