Spaces:

MartinHummel
/

FinalAssignment

Running

File size: 1,500 Bytes

7bfec74

# tools/youtube_tool.py

from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound
import re

def extract_video_id(url: str) -> str:
    """
    Extracts the video ID from a YouTube URL.

    Args:
        url (str): The full YouTube video URL.

    Returns:
        str: The extracted video ID or raises ValueError.
    """
    patterns = [
        r"youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})",
        r"youtu\.be/([a-zA-Z0-9_-]{11})"
    ]
    for pattern in patterns:
        match = re.search(pattern, url)
        if match:
            return match.group(1)
    raise ValueError("Invalid YouTube URL or unable to extract video ID.")

def get_youtube_transcript(url: str) -> str:
    """
    Fetches the transcript text for a given YouTube video.

    Args:
        url (str): The YouTube video URL.

    Returns:
        str: Combined transcript text or an error message.
    """
    try:
        video_id = extract_video_id(url)
        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
        full_text = " ".join([entry["text"] for entry in transcript_list])
        return full_text.strip()[:2000]  # Truncate to 2000 chars to prevent token overflow
    except TranscriptsDisabled:
        return "This video has transcripts disabled."
    except NoTranscriptFound:
        return "No transcript was found for this video."
    except Exception as e:
        return f"Transcript error: {str(e)}"