|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
from typing import TYPE_CHECKING, List, Optional |
|
|
|
if TYPE_CHECKING: |
|
from apify_client.clients import DatasetClient |
|
|
|
from camel.utils import api_keys_required |
|
|
|
|
|
class Apify: |
|
r"""Apify is a platform that allows you to automate any web workflow. |
|
|
|
Args: |
|
api_key (Optional[str]): API key for authenticating with the Apify API. |
|
""" |
|
|
|
@api_keys_required("APIFY_API_KEY") |
|
def __init__( |
|
self, |
|
api_key: Optional[str] = None, |
|
) -> None: |
|
from apify_client import ApifyClient |
|
|
|
self._api_key = api_key or os.environ.get("APIFY_API_KEY") |
|
self.client = ApifyClient(token=self._api_key) |
|
|
|
def run_actor( |
|
self, |
|
actor_id: str, |
|
run_input: Optional[dict] = None, |
|
content_type: Optional[str] = None, |
|
build: Optional[str] = None, |
|
max_items: Optional[int] = None, |
|
memory_mbytes: Optional[int] = None, |
|
timeout_secs: Optional[int] = None, |
|
webhooks: Optional[list] = None, |
|
wait_secs: Optional[int] = None, |
|
) -> Optional[dict]: |
|
r"""Run an actor on the Apify platform. |
|
|
|
Args: |
|
actor_id (str): The ID of the actor to run. |
|
run_input (Optional[dict]): The input data for the actor. Defaults |
|
to `None`. |
|
content_type (str, optional): The content type of the input. |
|
build (str, optional): Specifies the Actor build to run. It can be |
|
either a build tag or build number. By default, the run uses |
|
the build specified in the default run configuration for the |
|
Actor (typically latest). |
|
max_items (int, optional): Maximum number of results that will be |
|
returned by this run. If the Actor is charged per result, you |
|
will not be charged for more results than the given limit. |
|
memory_mbytes (int, optional): Memory limit for the run, in |
|
megabytes. By default, the run uses a memory limit specified in |
|
the default run configuration for the Actor. |
|
timeout_secs (int, optional): Optional timeout for the run, in |
|
seconds. By default, the run uses timeout specified in the |
|
default run configuration for the Actor. |
|
webhooks (list, optional): Optional webhooks |
|
(https://docs.apify.com/webhooks) associated with the Actor |
|
run, which can be used to receive a notification, e.g. when the |
|
Actor finished or failed. If you already have a webhook set up |
|
for the Actor, you do not have to add it again here. |
|
wait_secs (int, optional): The maximum number of seconds the server |
|
waits for finish. If not provided, waits indefinitely. |
|
|
|
Returns: |
|
Optional[dict]: The output data from the actor if successful. |
|
# please use the 'defaultDatasetId' to get the dataset |
|
|
|
Raises: |
|
RuntimeError: If the actor fails to run. |
|
""" |
|
try: |
|
return self.client.actor(actor_id).call( |
|
run_input=run_input, |
|
content_type=content_type, |
|
build=build, |
|
max_items=max_items, |
|
memory_mbytes=memory_mbytes, |
|
timeout_secs=timeout_secs, |
|
webhooks=webhooks, |
|
wait_secs=wait_secs, |
|
) |
|
except Exception as e: |
|
raise RuntimeError(f"Failed to run actor {actor_id}: {e}") from e |
|
|
|
def get_dataset_client( |
|
self, |
|
dataset_id: str, |
|
) -> "DatasetClient": |
|
r"""Get a dataset client from the Apify platform. |
|
|
|
Args: |
|
dataset_id (str): The ID of the dataset to get the client for. |
|
|
|
Returns: |
|
DatasetClient: The dataset client. |
|
|
|
Raises: |
|
RuntimeError: If the dataset client fails to be retrieved. |
|
""" |
|
try: |
|
return self.client.dataset(dataset_id) |
|
except Exception as e: |
|
raise RuntimeError( |
|
f"Failed to get dataset {dataset_id}: {e}" |
|
) from e |
|
|
|
def get_dataset( |
|
self, |
|
dataset_id: str, |
|
) -> Optional[dict]: |
|
r"""Get a dataset from the Apify platform. |
|
|
|
Args: |
|
dataset_id (str): The ID of the dataset to get. |
|
|
|
Returns: |
|
dict: The dataset. |
|
|
|
Raises: |
|
RuntimeError: If the dataset fails to be retrieved. |
|
""" |
|
try: |
|
return self.get_dataset_client(dataset_id).get() |
|
except Exception as e: |
|
raise RuntimeError( |
|
f"Failed to get dataset {dataset_id}: {e}" |
|
) from e |
|
|
|
def update_dataset( |
|
self, |
|
dataset_id: str, |
|
name: str, |
|
) -> dict: |
|
r"""Update a dataset on the Apify platform. |
|
|
|
Args: |
|
dataset_id (str): The ID of the dataset to update. |
|
name (str): The new name for the dataset. |
|
|
|
Returns: |
|
dict: The updated dataset. |
|
|
|
Raises: |
|
RuntimeError: If the dataset fails to be updated. |
|
""" |
|
try: |
|
return self.get_dataset_client(dataset_id).update(name=name) |
|
except Exception as e: |
|
raise RuntimeError( |
|
f"Failed to update dataset {dataset_id}: {e}" |
|
) from e |
|
|
|
def get_dataset_items( |
|
self, |
|
dataset_id: str, |
|
) -> List: |
|
r"""Get items from a dataset on the Apify platform. |
|
|
|
Args: |
|
dataset_id (str): The ID of the dataset to get items from. |
|
|
|
Returns: |
|
list: The items in the dataset. |
|
|
|
Raises: |
|
RuntimeError: If the items fail to be retrieved. |
|
""" |
|
try: |
|
items = self.get_dataset_client(dataset_id).list_items().items |
|
return items |
|
except Exception as e: |
|
raise RuntimeError( |
|
f"Failed to get dataset items {dataset_id}: {e}" |
|
) from e |
|
|
|
def get_datasets( |
|
self, |
|
unnamed: Optional[bool] = None, |
|
limit: Optional[int] = None, |
|
offset: Optional[int] = None, |
|
desc: Optional[bool] = None, |
|
) -> List[dict]: |
|
r"""Get all named datasets from the Apify platform. |
|
|
|
Args: |
|
unnamed (bool, optional): Whether to include unnamed key-value |
|
stores in the list |
|
limit (int, optional): How many key-value stores to retrieve |
|
offset (int, optional): What key-value store to include as first |
|
when retrieving the list |
|
desc (bool, optional): Whether to sort the key-value stores in |
|
descending order based on their modification date |
|
|
|
Returns: |
|
List[dict]: The datasets. |
|
|
|
Raises: |
|
RuntimeError: If the datasets fail to be retrieved. |
|
""" |
|
try: |
|
return ( |
|
self.client.datasets() |
|
.list(unnamed=unnamed, limit=limit, offset=offset, desc=desc) |
|
.items |
|
) |
|
except Exception as e: |
|
raise RuntimeError(f"Failed to get datasets: {e}") from e |
|
|