Spaces:
Runtime error
Runtime error
File size: 6,905 Bytes
ed4d993 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
import os
from math import ceil
from typing import Any, Dict, List, Optional
class ArangoGraph:
"""ArangoDB wrapper for graph operations.
*Security note*: Make sure that the database connection uses credentials
that are narrowly-scoped to only include necessary permissions.
Failure to do so may result in data corruption or loss, since the calling
code may attempt commands that would result in deletion, mutation
of data if appropriately prompted or reading sensitive data if such
data is present in the database.
The best way to guard against such negative outcomes is to (as appropriate)
limit the permissions granted to the credentials used with this tool.
See https://python.langchain.com/docs/security for more information.
"""
def __init__(self, db: Any) -> None:
"""Create a new ArangoDB graph wrapper instance."""
self.set_db(db)
self.set_schema()
@property
def db(self) -> Any:
return self.__db
@property
def schema(self) -> Dict[str, Any]:
return self.__schema
def set_db(self, db: Any) -> None:
from arango.database import Database
if not isinstance(db, Database):
msg = "**db** parameter must inherit from arango.database.Database"
raise TypeError(msg)
self.__db: Database = db
self.set_schema()
def set_schema(self, schema: Optional[Dict[str, Any]] = None) -> None:
"""
Set the schema of the ArangoDB Database.
Auto-generates Schema if **schema** is None.
"""
self.__schema = self.generate_schema() if schema is None else schema
def generate_schema(
self, sample_ratio: float = 0
) -> Dict[str, List[Dict[str, Any]]]:
"""
Generates the schema of the ArangoDB Database and returns it
User can specify a **sample_ratio** (0 to 1) to determine the
ratio of documents/edges used (in relation to the Collection size)
to render each Collection Schema.
"""
if not 0 <= sample_ratio <= 1:
raise ValueError("**sample_ratio** value must be in between 0 to 1")
# Stores the Edge Relationships between each ArangoDB Document Collection
graph_schema: List[Dict[str, Any]] = [
{"graph_name": g["name"], "edge_definitions": g["edge_definitions"]}
for g in self.db.graphs()
]
# Stores the schema of every ArangoDB Document/Edge collection
collection_schema: List[Dict[str, Any]] = []
for collection in self.db.collections():
if collection["system"]:
continue
# Extract collection name, type, and size
col_name: str = collection["name"]
col_type: str = collection["type"]
col_size: int = self.db.collection(col_name).count()
# Skip collection if empty
if col_size == 0:
continue
# Set number of ArangoDB documents/edges to retrieve
limit_amount = ceil(sample_ratio * col_size) or 1
aql = f"""
FOR doc in {col_name}
LIMIT {limit_amount}
RETURN doc
"""
doc: Dict[str, Any]
properties: List[Dict[str, str]] = []
for doc in self.__db.aql.execute(aql):
for key, value in doc.items():
properties.append({"name": key, "type": type(value).__name__})
collection_schema.append(
{
"collection_name": col_name,
"collection_type": col_type,
f"{col_type}_properties": properties,
f"example_{col_type}": doc,
}
)
return {"Graph Schema": graph_schema, "Collection Schema": collection_schema}
def query(
self, query: str, top_k: Optional[int] = None, **kwargs: Any
) -> List[Dict[str, Any]]:
"""Query the ArangoDB database."""
import itertools
cursor = self.__db.aql.execute(query, **kwargs)
return [doc for doc in itertools.islice(cursor, top_k)]
@classmethod
def from_db_credentials(
cls,
url: Optional[str] = None,
dbname: Optional[str] = None,
username: Optional[str] = None,
password: Optional[str] = None,
) -> Any:
"""Convenience constructor that builds Arango DB from credentials.
Args:
url: Arango DB url. Can be passed in as named arg or set as environment
var ``ARANGODB_URL``. Defaults to "http://localhost:8529".
dbname: Arango DB name. Can be passed in as named arg or set as
environment var ``ARANGODB_DBNAME``. Defaults to "_system".
username: Can be passed in as named arg or set as environment var
``ARANGODB_USERNAME``. Defaults to "root".
password: Can be passed ni as named arg or set as environment var
``ARANGODB_PASSWORD``. Defaults to "".
Returns:
An arango.database.StandardDatabase.
"""
db = get_arangodb_client(
url=url, dbname=dbname, username=username, password=password
)
return cls(db)
def get_arangodb_client(
url: Optional[str] = None,
dbname: Optional[str] = None,
username: Optional[str] = None,
password: Optional[str] = None,
) -> Any:
"""Get the Arango DB client from credentials.
Args:
url: Arango DB url. Can be passed in as named arg or set as environment
var ``ARANGODB_URL``. Defaults to "http://localhost:8529".
dbname: Arango DB name. Can be passed in as named arg or set as
environment var ``ARANGODB_DBNAME``. Defaults to "_system".
username: Can be passed in as named arg or set as environment var
``ARANGODB_USERNAME``. Defaults to "root".
password: Can be passed ni as named arg or set as environment var
``ARANGODB_PASSWORD``. Defaults to "".
Returns:
An arango.database.StandardDatabase.
"""
try:
from arango import ArangoClient
except ImportError as e:
raise ImportError(
"Unable to import arango, please install with `pip install python-arango`."
) from e
_url: str = url or os.environ.get("ARANGODB_URL", "http://localhost:8529") # type: ignore[assignment]
_dbname: str = dbname or os.environ.get("ARANGODB_DBNAME", "_system") # type: ignore[assignment]
_username: str = username or os.environ.get("ARANGODB_USERNAME", "root") # type: ignore[assignment]
_password: str = password or os.environ.get("ARANGODB_PASSWORD", "") # type: ignore[assignment]
return ArangoClient(_url).db(_dbname, _username, _password, verify=True)
|