Spaces:
Runtime error
Runtime error
from __future__ import annotations | |
from typing import Dict, List, Optional, cast | |
import requests | |
from langchain_core.embeddings import Embeddings | |
from langchain_core.pydantic_v1 import BaseModel, Extra, SecretStr, root_validator | |
from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env | |
class ClovaEmbeddings(BaseModel, Embeddings): | |
""" | |
Clova's embedding service. | |
To use this service, | |
you should have the following environment variables | |
set with your API tokens and application ID, | |
or pass them as named parameters to the constructor: | |
- ``CLOVA_EMB_API_KEY``: API key for accessing Clova's embedding service. | |
- ``CLOVA_EMB_APIGW_API_KEY``: API gateway key for enhanced security. | |
- ``CLOVA_EMB_APP_ID``: Application ID for identifying your application. | |
Example: | |
.. code-block:: python | |
from langchain_community.embeddings import ClovaEmbeddings | |
embeddings = ClovaEmbeddings( | |
clova_emb_api_key='your_clova_emb_api_key', | |
clova_emb_apigw_api_key='your_clova_emb_apigw_api_key', | |
app_id='your_app_id' | |
) | |
query_text = "This is a test query." | |
query_result = embeddings.embed_query(query_text) | |
document_text = "This is a test document." | |
document_result = embeddings.embed_documents([document_text]) | |
""" | |
endpoint_url: str = ( | |
"https://clovastudio.apigw.ntruss.com/testapp/v1/api-tools/embedding" | |
) | |
"""Endpoint URL to use.""" | |
model: str = "clir-emb-dolphin" | |
"""Embedding model name to use.""" | |
clova_emb_api_key: Optional[SecretStr] = None | |
"""API key for accessing Clova's embedding service.""" | |
clova_emb_apigw_api_key: Optional[SecretStr] = None | |
"""API gateway key for enhanced security.""" | |
app_id: Optional[SecretStr] = None | |
"""Application ID for identifying your application.""" | |
class Config: | |
extra = Extra.forbid | |
def validate_environment(cls, values: Dict) -> Dict: | |
"""Validate api key exists in environment.""" | |
values["clova_emb_api_key"] = convert_to_secret_str( | |
get_from_dict_or_env(values, "clova_emb_api_key", "CLOVA_EMB_API_KEY") | |
) | |
values["clova_emb_apigw_api_key"] = convert_to_secret_str( | |
get_from_dict_or_env( | |
values, "clova_emb_apigw_api_key", "CLOVA_EMB_APIGW_API_KEY" | |
) | |
) | |
values["app_id"] = convert_to_secret_str( | |
get_from_dict_or_env(values, "app_id", "CLOVA_EMB_APP_ID") | |
) | |
return values | |
def embed_documents(self, texts: List[str]) -> List[List[float]]: | |
""" | |
Embed a list of texts and return their embeddings. | |
Args: | |
texts: The list of texts to embed. | |
Returns: | |
List of embeddings, one for each text. | |
""" | |
embeddings = [] | |
for text in texts: | |
embeddings.append(self._embed_text(text)) | |
return embeddings | |
def embed_query(self, text: str) -> List[float]: | |
""" | |
Embed a single query text and return its embedding. | |
Args: | |
text: The text to embed. | |
Returns: | |
Embeddings for the text. | |
""" | |
return self._embed_text(text) | |
def _embed_text(self, text: str) -> List[float]: | |
""" | |
Internal method to call the embedding API and handle the response. | |
""" | |
payload = {"text": text} | |
# HTTP headers for authorization | |
headers = { | |
"X-NCP-CLOVASTUDIO-API-KEY": cast( | |
SecretStr, self.clova_emb_api_key | |
).get_secret_value(), | |
"X-NCP-APIGW-API-KEY": cast( | |
SecretStr, self.clova_emb_apigw_api_key | |
).get_secret_value(), | |
"Content-Type": "application/json", | |
} | |
# send request | |
app_id = cast(SecretStr, self.app_id).get_secret_value() | |
response = requests.post( | |
f"{self.endpoint_url}/{self.model}/{app_id}", | |
headers=headers, | |
json=payload, | |
) | |
# check for errors | |
if response.status_code == 200: | |
response_data = response.json() | |
if "result" in response_data and "embedding" in response_data["result"]: | |
return response_data["result"]["embedding"] | |
raise ValueError( | |
f"API request failed with status {response.status_code}: {response.text}" | |
) | |