Spaces:
Running
Running
File size: 1,500 Bytes
7bfec74 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
# tools/youtube_tool.py
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound
import re
def extract_video_id(url: str) -> str:
"""
Extracts the video ID from a YouTube URL.
Args:
url (str): The full YouTube video URL.
Returns:
str: The extracted video ID or raises ValueError.
"""
patterns = [
r"youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})",
r"youtu\.be/([a-zA-Z0-9_-]{11})"
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
return match.group(1)
raise ValueError("Invalid YouTube URL or unable to extract video ID.")
def get_youtube_transcript(url: str) -> str:
"""
Fetches the transcript text for a given YouTube video.
Args:
url (str): The YouTube video URL.
Returns:
str: Combined transcript text or an error message.
"""
try:
video_id = extract_video_id(url)
transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
full_text = " ".join([entry["text"] for entry in transcript_list])
return full_text.strip()[:2000] # Truncate to 2000 chars to prevent token overflow
except TranscriptsDisabled:
return "This video has transcripts disabled."
except NoTranscriptFound:
return "No transcript was found for this video."
except Exception as e:
return f"Transcript error: {str(e)}"
|