Spaces:
Runtime error
Runtime error
File size: 3,950 Bytes
ed4d993 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import logging
from typing import Any, List, Mapping, Optional
import requests
from langchain_core.callbacks import CallbackManagerForLLMRun
from langchain_core.language_models.llms import LLM
from langchain_community.llms.utils import enforce_stop_tokens
logger = logging.getLogger(__name__)
class ChatGLM(LLM):
"""ChatGLM LLM service.
Example:
.. code-block:: python
from langchain_community.llms import ChatGLM
endpoint_url = (
"http://127.0.0.1:8000"
)
ChatGLM_llm = ChatGLM(
endpoint_url=endpoint_url
)
"""
endpoint_url: str = "http://127.0.0.1:8000/"
"""Endpoint URL to use."""
model_kwargs: Optional[dict] = None
"""Keyword arguments to pass to the model."""
max_token: int = 20000
"""Max token allowed to pass to the model."""
temperature: float = 0.1
"""LLM model temperature from 0 to 10."""
history: List[List] = []
"""History of the conversation"""
top_p: float = 0.7
"""Top P for nucleus sampling from 0 to 1"""
with_history: bool = False
"""Whether to use history or not"""
@property
def _llm_type(self) -> str:
return "chat_glm"
@property
def _identifying_params(self) -> Mapping[str, Any]:
"""Get the identifying parameters."""
_model_kwargs = self.model_kwargs or {}
return {
**{"endpoint_url": self.endpoint_url},
**{"model_kwargs": _model_kwargs},
}
def _call(
self,
prompt: str,
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> str:
"""Call out to a ChatGLM LLM inference endpoint.
Args:
prompt: The prompt to pass into the model.
stop: Optional list of stop words to use when generating.
Returns:
The string generated by the model.
Example:
.. code-block:: python
response = chatglm_llm.invoke("Who are you?")
"""
_model_kwargs = self.model_kwargs or {}
# HTTP headers for authorization
headers = {"Content-Type": "application/json"}
payload = {
"prompt": prompt,
"temperature": self.temperature,
"history": self.history,
"max_length": self.max_token,
"top_p": self.top_p,
}
payload.update(_model_kwargs)
payload.update(kwargs)
logger.debug(f"ChatGLM payload: {payload}")
# call api
try:
response = requests.post(self.endpoint_url, headers=headers, json=payload)
except requests.exceptions.RequestException as e:
raise ValueError(f"Error raised by inference endpoint: {e}")
logger.debug(f"ChatGLM response: {response}")
if response.status_code != 200:
raise ValueError(f"Failed with response: {response}")
try:
parsed_response = response.json()
# Check if response content does exists
if isinstance(parsed_response, dict):
content_keys = "response"
if content_keys in parsed_response:
text = parsed_response[content_keys]
else:
raise ValueError(f"No content in response : {parsed_response}")
else:
raise ValueError(f"Unexpected response type: {parsed_response}")
except requests.exceptions.JSONDecodeError as e:
raise ValueError(
f"Error raised during decoding response from inference endpoint: {e}."
f"\nResponse: {response.text}"
)
if stop is not None:
text = enforce_stop_tokens(text, stop)
if self.with_history:
self.history = parsed_response["history"]
return text
|