File size: 14,199 Bytes
ed4d993
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
import asyncio
import logging
from concurrent.futures import ThreadPoolExecutor
from typing import Any, Dict, List, Mapping, Optional, Sequence, TypedDict

import aiohttp
import requests
from langchain_core.callbacks import (
    AsyncCallbackManagerForLLMRun,
    CallbackManagerForLLMRun,
)
from langchain_core.language_models.llms import BaseLLM
from langchain_core.outputs import Generation, LLMResult
from langchain_core.pydantic_v1 import Extra, Field, root_validator
from langchain_core.utils import get_from_dict_or_env

from langchain_community.llms.utils import enforce_stop_tokens


class TrainResult(TypedDict):
    """Train result."""

    loss: float


class GradientLLM(BaseLLM):
    """Gradient.ai LLM Endpoints.

    GradientLLM is a class to interact with LLMs on gradient.ai

    To use, set the environment variable ``GRADIENT_ACCESS_TOKEN`` with your
    API token and ``GRADIENT_WORKSPACE_ID`` for your gradient workspace,
    or alternatively provide them as keywords to the constructor of this class.

    Example:
        .. code-block:: python

            from langchain_community.llms import GradientLLM
            GradientLLM(
                model="99148c6d-c2a0-4fbe-a4a7-e7c05bdb8a09_base_ml_model",
                model_kwargs={
                    "max_generated_token_count": 128,
                    "temperature": 0.75,
                    "top_p": 0.95,
                    "top_k": 20,
                    "stop": [],
                },
                gradient_workspace_id="12345614fc0_workspace",
                gradient_access_token="gradientai-access_token",
            )

    """

    model_id: str = Field(alias="model", min_length=2)
    "Underlying gradient.ai model id (base or fine-tuned)."

    gradient_workspace_id: Optional[str] = None
    "Underlying gradient.ai workspace_id."

    gradient_access_token: Optional[str] = None
    """gradient.ai API Token, which can be generated by going to
        https://auth.gradient.ai/select-workspace
        and selecting "Access tokens" under the profile drop-down.
    """

    model_kwargs: Optional[dict] = None
    """Keyword arguments to pass to the model."""

    gradient_api_url: str = "https://api.gradient.ai/api"
    """Endpoint URL to use."""

    aiosession: Optional[aiohttp.ClientSession] = None  #: :meta private:
    """ClientSession, private, subject to change in upcoming releases."""

    # LLM call kwargs
    class Config:
        """Configuration for this pydantic object."""

        allow_population_by_field_name = True
        extra = Extra.forbid

    @root_validator(allow_reuse=True)
    def validate_environment(cls, values: Dict) -> Dict:
        """Validate that api key and python package exists in environment."""

        values["gradient_access_token"] = get_from_dict_or_env(
            values, "gradient_access_token", "GRADIENT_ACCESS_TOKEN"
        )
        values["gradient_workspace_id"] = get_from_dict_or_env(
            values, "gradient_workspace_id", "GRADIENT_WORKSPACE_ID"
        )

        if (
            values["gradient_access_token"] is None
            or len(values["gradient_access_token"]) < 10
        ):
            raise ValueError("env variable `GRADIENT_ACCESS_TOKEN` must be set")

        if (
            values["gradient_workspace_id"] is None
            or len(values["gradient_access_token"]) < 3
        ):
            raise ValueError("env variable `GRADIENT_WORKSPACE_ID` must be set")

        if values["model_kwargs"]:
            kw = values["model_kwargs"]
            if not 0 <= kw.get("temperature", 0.5) <= 1:
                raise ValueError("`temperature` must be in the range [0.0, 1.0]")

            if not 0 <= kw.get("top_p", 0.5) <= 1:
                raise ValueError("`top_p` must be in the range [0.0, 1.0]")

            if 0 >= kw.get("top_k", 0.5):
                raise ValueError("`top_k` must be positive")

            if 0 >= kw.get("max_generated_token_count", 1):
                raise ValueError("`max_generated_token_count` must be positive")

        values["gradient_api_url"] = get_from_dict_or_env(
            values, "gradient_api_url", "GRADIENT_API_URL"
        )

        try:
            import gradientai  # noqa
        except ImportError:
            logging.warning(
                "DeprecationWarning: `GradientLLM` will use "
                "`pip install gradientai` in future releases of langchain."
            )
        except Exception:
            pass

        return values

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying parameters."""
        _model_kwargs = self.model_kwargs or {}
        return {
            **{"gradient_api_url": self.gradient_api_url},
            **{"model_kwargs": _model_kwargs},
        }

    @property
    def _llm_type(self) -> str:
        """Return type of llm."""
        return "gradient"

    def _kwargs_post_fine_tune_request(
        self, inputs: Sequence[str], kwargs: Mapping[str, Any]
    ) -> Mapping[str, Any]:
        """Build the kwargs for the Post request, used by sync

        Args:
            prompt (str): prompt used in query
            kwargs (dict): model kwargs in payload

        Returns:
            Dict[str, Union[str,dict]]: _description_
        """
        _model_kwargs = self.model_kwargs or {}
        _params = {**_model_kwargs, **kwargs}

        multipliers = _params.get("multipliers", None)

        return dict(
            url=f"{self.gradient_api_url}/models/{self.model_id}/fine-tune",
            headers={
                "authorization": f"Bearer {self.gradient_access_token}",
                "x-gradient-workspace-id": f"{self.gradient_workspace_id}",
                "accept": "application/json",
                "content-type": "application/json",
            },
            json=dict(
                samples=tuple(
                    {
                        "inputs": input,
                    }
                    for input in inputs
                )
                if multipliers is None
                else tuple(
                    {
                        "inputs": input,
                        "fineTuningParameters": {
                            "multiplier": multiplier,
                        },
                    }
                    for input, multiplier in zip(inputs, multipliers)
                ),
            ),
        )

    def _kwargs_post_request(
        self, prompt: str, kwargs: Mapping[str, Any]
    ) -> Mapping[str, Any]:
        """Build the kwargs for the Post request, used by sync

        Args:
            prompt (str): prompt used in query
            kwargs (dict): model kwargs in payload

        Returns:
            Dict[str, Union[str,dict]]: _description_
        """
        _model_kwargs = self.model_kwargs or {}
        _params = {**_model_kwargs, **kwargs}

        return dict(
            url=f"{self.gradient_api_url}/models/{self.model_id}/complete",
            headers={
                "authorization": f"Bearer {self.gradient_access_token}",
                "x-gradient-workspace-id": f"{self.gradient_workspace_id}",
                "accept": "application/json",
                "content-type": "application/json",
            },
            json=dict(
                query=prompt,
                maxGeneratedTokenCount=_params.get("max_generated_token_count", None),
                temperature=_params.get("temperature", None),
                topK=_params.get("top_k", None),
                topP=_params.get("top_p", None),
            ),
        )

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        """Call to Gradients API `model/{id}/complete`.

        Args:
            prompt: The prompt to pass into the model.
            stop: Optional list of stop words to use when generating.

        Returns:
            The string generated by the model.
        """
        try:
            response = requests.post(**self._kwargs_post_request(prompt, kwargs))
            if response.status_code != 200:
                raise Exception(
                    f"Gradient returned an unexpected response with status "
                    f"{response.status_code}: {response.text}"
                )
        except requests.exceptions.RequestException as e:
            raise Exception(f"RequestException while calling Gradient Endpoint: {e}")

        text = response.json()["generatedOutput"]

        if stop is not None:
            # Apply stop tokens when making calls to Gradient
            text = enforce_stop_tokens(text, stop)

        return text

    async def _acall(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        """Async Call to Gradients API `model/{id}/complete`.

        Args:
            prompt: The prompt to pass into the model.
            stop: Optional list of stop words to use when generating.

        Returns:
            The string generated by the model.
        """
        if not self.aiosession:
            async with aiohttp.ClientSession() as session:
                async with session.post(
                    **self._kwargs_post_request(prompt=prompt, kwargs=kwargs)
                ) as response:
                    if response.status != 200:
                        raise Exception(
                            f"Gradient returned an unexpected response with status "
                            f"{response.status}: {response.text}"
                        )
                    text = (await response.json())["generatedOutput"]
        else:
            async with self.aiosession.post(
                **self._kwargs_post_request(prompt=prompt, kwargs=kwargs)
            ) as response:
                if response.status != 200:
                    raise Exception(
                        f"Gradient returned an unexpected response with status "
                        f"{response.status}: {response.text}"
                    )
                text = (await response.json())["generatedOutput"]

        if stop is not None:
            # Apply stop tokens when making calls to Gradient
            text = enforce_stop_tokens(text, stop)

        return text

    def _generate(
        self,
        prompts: List[str],
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> LLMResult:
        """Run the LLM on the given prompt and input."""

        # same thing with threading
        def _inner_generate(prompt: str) -> List[Generation]:
            return [
                Generation(
                    text=self._call(
                        prompt=prompt, stop=stop, run_manager=run_manager, **kwargs
                    )
                )
            ]

        if len(prompts) <= 1:
            generations = list(map(_inner_generate, prompts))
        else:
            with ThreadPoolExecutor(min(8, len(prompts))) as p:
                generations = list(p.map(_inner_generate, prompts))

        return LLMResult(generations=generations)

    async def _agenerate(
        self,
        prompts: List[str],
        stop: Optional[List[str]] = None,
        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> LLMResult:
        """Run the LLM on the given prompt and input."""
        generations = []
        for generation in asyncio.gather(
            [self._acall(prompt, stop=stop, run_manager=run_manager, **kwargs)]
            for prompt in prompts
        ):
            generations.append([Generation(text=generation)])
        return LLMResult(generations=generations)

    def train_unsupervised(
        self,
        inputs: Sequence[str],
        **kwargs: Any,
    ) -> TrainResult:
        try:
            response = requests.post(
                **self._kwargs_post_fine_tune_request(inputs, kwargs)
            )
            if response.status_code != 200:
                raise Exception(
                    f"Gradient returned an unexpected response with status "
                    f"{response.status_code}: {response.text}"
                )
        except requests.exceptions.RequestException as e:
            raise Exception(f"RequestException while calling Gradient Endpoint: {e}")

        response_json = response.json()
        loss = response_json["sumLoss"] / response_json["numberOfTrainableTokens"]
        return TrainResult(loss=loss)

    async def atrain_unsupervised(
        self,
        inputs: Sequence[str],
        **kwargs: Any,
    ) -> TrainResult:
        if not self.aiosession:
            async with aiohttp.ClientSession() as session:
                async with session.post(
                    **self._kwargs_post_fine_tune_request(inputs, kwargs)
                ) as response:
                    if response.status != 200:
                        raise Exception(
                            f"Gradient returned an unexpected response with status "
                            f"{response.status}: {response.text}"
                        )
                    response_json = await response.json()
                    loss = (
                        response_json["sumLoss"]
                        / response_json["numberOfTrainableTokens"]
                    )
        else:
            async with self.aiosession.post(
                **self._kwargs_post_fine_tune_request(inputs, kwargs)
            ) as response:
                if response.status != 200:
                    raise Exception(
                        f"Gradient returned an unexpected response with status "
                        f"{response.status}: {response.text}"
                    )
                response_json = await response.json()
                loss = (
                    response_json["sumLoss"] / response_json["numberOfTrainableTokens"]
                )

        return TrainResult(loss=loss)