Spaces:
Runtime error
Runtime error
File size: 1,540 Bytes
ed4d993 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
from typing import Iterator, Optional, Sequence
from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseLoader
class BrowserbaseLoader(BaseLoader):
"""Load pre-rendered web pages using a headless browser hosted on Browserbase.
Depends on `browserbase` package.
Get your API key from https://browserbase.com
"""
def __init__(
self,
urls: Sequence[str],
text_content: bool = False,
api_key: Optional[str] = None,
project_id: Optional[str] = None,
session_id: Optional[str] = None,
proxy: Optional[bool] = None,
):
self.urls = urls
self.text_content = text_content
self.session_id = session_id
self.proxy = proxy
try:
from browserbase import Browserbase
except ImportError:
raise ImportError(
"You must run "
"`pip install --upgrade "
"browserbase` "
"to use the Browserbase loader."
)
self.browserbase = Browserbase(api_key, project_id)
def lazy_load(self) -> Iterator[Document]:
"""Load pages from URLs"""
pages = self.browserbase.load_urls(
self.urls, self.text_content, self.session_id, self.proxy
)
for i, page in enumerate(pages):
yield Document(
page_content=page,
metadata={
"url": self.urls[i],
},
)
|