marigen_api / loader.py
jameszokah's picture
Synced repo using 'sync_with_huggingface' Github Action
1897f56 verified
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.document_loaders import YoutubeLoader
from langchain_core.documents import Document
from typing import List
def load_youtube_content(youtube_url: str) -> List[Document]:
"""
Load content from a YouTube video URL using the YoutubeLoader.
Args:
youtube_url (str): The URL of the YouTube video.
Returns:
Document: The document containing the video content and metadata.
"""
loader = YoutubeLoader.from_youtube_url(youtube_url, add_video_info=True)
documents = loader.load()
return documents
def load_web_content(url: str) -> List[Document]:
"""
Load content from a web page URL using the WebBaseLoader.
Args:
url (str): The URL of the web page.
Returns:
Document: The document containing the web page content.
"""
loader = WebBaseLoader(url)
documents = loader.load()
return documents