Spaces:

rote1
/

IAGO

Sleeping

File size: 9,506 Bytes

62da328

# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
import os
from typing import List, Optional, cast

from camel.toolkits import FunctionTool
from camel.toolkits.base import BaseToolkit


def get_plain_text_from_rich_text(rich_text: List[dict]) -> str:
    r"""Extracts plain text from a list of rich text elements.

    Args:
        rich_text: A list of dictionaries representing rich text elements.
            Each dictionary should contain a key named "plain_text" with
            the plain text content.

    Returns:
        str: A string containing the combined plain text from all elements,
            joined together.
    """
    plain_texts = [element.get("plain_text", "") for element in rich_text]
    return "".join(plain_texts)


def get_media_source_text(block: dict) -> str:
    r"""Extracts the source URL and optional caption from a
    Notion media block.

    Args:
        block: A dictionary representing a Notion media block.

    Returns:
        A string containing the source URL and caption (if available),
            separated by a colon.
    """
    block_type = block.get("type", "Unknown Type")
    block_content = block.get(block_type, {})

    # Extract source URL based on available types
    source = (
        block_content.get("external", {}).get("url")
        or block_content.get("file", {}).get("url")
        or block_content.get(
            "url", "[Missing case for media block types]: " + block_type
        )
    )

    # Extract caption if available
    caption_elements = block_content.get("caption", [])
    if caption_elements:
        caption = get_plain_text_from_rich_text(caption_elements)
        return f"{caption}: {source}"

    return source


class NotionToolkit(BaseToolkit):
    r"""A toolkit for retrieving information from the user's notion pages.

    Attributes:
        notion_token (Optional[str], optional): The notion_token used to
            interact with notion APIs.(default: :obj:`None`)
        notion_client (module): The notion module for interacting with
            the notion APIs.
    """

    def __init__(
        self,
        notion_token: Optional[str] = None,
    ) -> None:
        r"""Initializes the NotionToolkit.

        Args:
            notion_token (Optional[str], optional): The optional notion_token
                used to interact with notion APIs.(default: :obj:`None`)
        """
        from notion_client import Client

        self.notion_token = notion_token or os.environ.get("NOTION_TOKEN")
        self.notion_client = Client(auth=self.notion_token)

    def list_all_users(self) -> List[dict]:
        r"""Lists all users via the Notion integration.

        Returns:
            List[dict]: A list of user objects with type, name, and workspace.
        """
        all_users_info: List[dict] = []
        cursor = None

        while True:
            response = cast(
                dict,
                self.notion_client.users.list(start_cursor=cursor),
            )
            all_users_info.extend(response["results"])

            if not response["has_more"]:
                break

            cursor = response["next_cursor"]

        formatted_users = [
            {
                "type": user["type"],
                "name": user["name"],
                "workspace": user.get(user.get("type"), {}).get(
                    "workspace_name", ""
                ),
            }
            for user in all_users_info
        ]

        return formatted_users

    def list_all_pages(self) -> List[dict]:
        r"""Lists all pages in the Notion workspace.

        Returns:
            List[dict]: A list of page objects with title and id.
        """
        all_pages_info: List[dict] = []
        cursor = None

        while True:
            response = cast(
                dict,
                self.notion_client.search(
                    filter={"property": "object", "value": "page"},
                    start_cursor=cursor,
                ),
            )
            all_pages_info.extend(response["results"])

            if not response["has_more"]:
                break

            cursor = response["next_cursor"]

        formatted_pages = [
            {
                "id": page.get("id"),
                "title": next(
                    (
                        title.get("text", {}).get("content")
                        for title in page["properties"]
                        .get("title", {})
                        .get("title", [])
                        if title["type"] == "text"
                    ),
                    None,
                ),
            }
            for page in all_pages_info
        ]

        return formatted_pages

    def get_notion_block_text_content(self, block_id: str) -> str:
        r"""Retrieves the text content of a Notion block.

        Args:
            block_id (str): The ID of the Notion block to retrieve.

        Returns:
            str: The text content of a Notion block, containing all
                the sub blocks.
        """
        blocks: List[dict] = []
        cursor = None

        while True:
            response = cast(
                dict,
                self.notion_client.blocks.children.list(
                    block_id=block_id, start_cursor=cursor
                ),
            )
            blocks.extend(response["results"])

            if not response["has_more"]:
                break

            cursor = response["next_cursor"]

        block_text_content = " ".join(
            [self.get_text_from_block(sub_block) for sub_block in blocks]
        )

        return block_text_content

    def get_text_from_block(self, block: dict) -> str:
        r"""Extracts plain text from a Notion block based on its type.

        Args:
            block (dict): A dictionary representing a Notion block.

        Returns:
            str: A string containing the extracted plain text and block type.
        """
        # Get rich text for supported block types
        if block.get(block.get("type"), {}).get("rich_text"):
            # Empty string if it's an empty line
            text = get_plain_text_from_rich_text(
                block[block["type"]]["rich_text"]
            )
        else:
            # Handle block types by case
            block_type = block.get("type")
            if block_type == "unsupported":
                text = "[Unsupported block type]"
            elif block_type == "bookmark":
                text = block["bookmark"]["url"]
            elif block_type == "child_database":
                text = block["child_database"]["title"]
                # Use other API endpoints for full database data
            elif block_type == "child_page":
                text = block["child_page"]["title"]
            elif block_type in ("embed", "video", "file", "image", "pdf"):
                text = get_media_source_text(block)
            elif block_type == "equation":
                text = block["equation"]["expression"]
            elif block_type == "link_preview":
                text = block["link_preview"]["url"]
            elif block_type == "synced_block":
                if block["synced_block"].get("synced_from"):
                    text = (
                        f"This block is synced with a block with ID: "
                        f"""
                        {block['synced_block']['synced_from']
                        [block['synced_block']['synced_from']['type']]}
                        """
                    )
                else:
                    text = (
                        "Source sync block that another"
                        + "blocked is synced with."
                    )
            elif block_type == "table":
                text = f"Table width: {block['table']['table_width']}"
                # Fetch children for full table data
            elif block_type == "table_of_contents":
                text = f"ToC color: {block['table_of_contents']['color']}"
            elif block_type in ("breadcrumb", "column_list", "divider"):
                text = "No text available"
            else:
                text = "[Needs case added]"

        # Query children for blocks with children
        if block.get("has_children"):
            text += self.get_notion_block_text_content(block["id"])

        return text

    def get_tools(self) -> List[FunctionTool]:
        r"""Returns a list of FunctionTool objects representing the
        functions in the toolkit.

        Returns:
            List[FunctionTool]: A list of FunctionTool objects
                representing the functions in the toolkit.
        """
        return [
            FunctionTool(self.list_all_pages),
            FunctionTool(self.list_all_users),
            FunctionTool(self.get_notion_block_text_content),
        ]