peterpeter8585 commited on
Commit
d7a53c8
·
verified ·
1 Parent(s): ac66021

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -403
app.py CHANGED
@@ -1,4 +1,6 @@
1
- from __future__ import annotations # type: ignore[import-not-found]
 
 
2
  from subprocess import Popen, PIPE as P
3
  from langchain_experimental.tools.python.tool import PythonREPLTool as PYT
4
  from langchain.agents import load_tools, create_structured_chat_agent as Agent,AgentExecutor as Ex, AgentType as Type
@@ -13,408 +15,10 @@ import torch
13
  import importlib.util
14
  import logging
15
  from typing import Any, Dict, Iterator, List, Mapping, Optional
16
-
17
- from langchain_core.callbacks import CallbackManagerForLLMRun
18
- from langchain_core.language_models.llms import BaseLLM
19
- from langchain_core.outputs import Generation, GenerationChunk, LLMResult
20
- from pydantic import ConfigDict, model_validator
21
-
22
- from import_utils import (
23
- IMPORT_ERROR,
24
- is_ipex_available,
25
- is_openvino_available,
26
- is_optimum_intel_available,
27
- is_optimum_intel_version,
28
- )
29
-
30
- DEFAULT_MODEL_ID = "gpt2"
31
- DEFAULT_TASK = "text-generation"
32
- VALID_TASKS = (
33
- "text2text-generation",
34
- "text-generation",
35
- "summarization",
36
- "translation",
37
- )
38
- DEFAULT_BATCH_SIZE = 4
39
- _MIN_OPTIMUM_VERSION = "1.21"
40
-
41
-
42
- logger = logging.getLogger(__name__)
43
-
44
-
45
- class HuggingFacePipeline(BaseLLM):
46
- global torch
47
- """HuggingFace Pipeline API.
48
-
49
- To use, you should have the ``transformers`` python package installed.
50
-
51
- Only supports `text-generation`, `text2text-generation`, `summarization` and
52
- `translation` for now.
53
-
54
- Example using from_model_id:
55
- .. code-block:: python
56
-
57
- from langchain_huggingface import HuggingFacePipeline
58
- hf = HuggingFacePipeline.from_model_id(
59
- model_id="gpt2",
60
- task="text-generation",
61
- pipeline_kwargs={"max_new_tokens": 10},
62
- )
63
- Example passing pipeline in directly:
64
- .. code-block:: python
65
-
66
- from langchain_huggingface import HuggingFacePipeline
67
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
68
-
69
- model_id = "gpt2"
70
- tokenizer = AutoTokenizer.from_pretrained(model_id)
71
- model = AutoModelForCausalLM.from_pretrained(model_id)
72
- pipe = pipeline(
73
- "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=10
74
- )
75
- hf = HuggingFacePipeline(pipeline=pipe)
76
- """
77
-
78
- pipeline: Any = None #: :meta private:
79
- model_id: Optional[str] = None
80
- """The model name. If not set explicitly by the user,
81
- it will be inferred from the provided pipeline (if available).
82
- If neither is provided, the DEFAULT_MODEL_ID will be used."""
83
- model_kwargs: Optional[dict] = None
84
- """Keyword arguments passed to the model."""
85
- pipeline_kwargs: Optional[dict] = None
86
- """Keyword arguments passed to the pipeline."""
87
- batch_size: int = DEFAULT_BATCH_SIZE
88
- """Batch size to use when passing multiple documents to generate."""
89
-
90
- model_config = ConfigDict(
91
- extra="forbid",
92
- )
93
-
94
- @model_validator(mode="before")
95
- @classmethod
96
- def pre_init_validator(cls, values: Dict[str, Any]) -> Dict[str, Any]:
97
- """Ensure model_id is set either by pipeline or user input."""
98
- if "model_id" not in values:
99
- if "pipeline" in values and values["pipeline"]:
100
- values["model_id"] = values["pipeline"].model.name_or_path
101
- else:
102
- values["model_id"] = DEFAULT_MODEL_ID
103
- return values
104
-
105
- @classmethod
106
- def from_model_id(
107
- cls,
108
- model_id: str,
109
- task: str,
110
- backend: str = "default",
111
- device: Optional[int] = None,
112
- device_map: Optional[str] = None,
113
- model_kwargs: Optional[dict] = None,
114
- pipeline_kwargs: Optional[dict] = None,
115
- batch_size: int = DEFAULT_BATCH_SIZE,
116
- **kwargs: Any,
117
- ) -> HuggingFacePipeline:
118
- """Construct the pipeline object from model_id and task."""
119
- try:
120
- from transformers import ( # type: ignore[import]
121
- AutoModelForCausalLM,
122
- AutoModelForSeq2SeqLM,
123
- AutoTokenizer,
124
- )
125
- from transformers import pipeline as hf_pipeline # type: ignore[import]
126
-
127
- except ImportError:
128
- raise ValueError(
129
- "Could not import transformers python package. "
130
- "Please install it with `pip install transformers`."
131
- )
132
-
133
- _model_kwargs = model_kwargs.copy() if model_kwargs else {}
134
- if device_map is not None:
135
- if device is not None:
136
- raise ValueError(
137
- "Both `device` and `device_map` are specified. "
138
- "`device` will override `device_map`. "
139
- "You will most likely encounter unexpected behavior."
140
- "Please remove `device` and keep "
141
- "`device_map`."
142
- )
143
-
144
- if "device_map" in _model_kwargs:
145
- raise ValueError("`device_map` is already specified in `model_kwargs`.")
146
-
147
- _model_kwargs["device_map"] = device_map
148
- tokenizer = AutoTokenizer.from_pretrained(model_id, **_model_kwargs)
149
-
150
- if backend in {"openvino", "ipex"}:
151
- if task not in VALID_TASKS:
152
- raise ValueError(
153
- f"Got invalid task {task}, "
154
- f"currently only {VALID_TASKS} are supported"
155
- )
156
-
157
- err_msg = f'Backend: {backend} {IMPORT_ERROR.format(f"optimum[{backend}]")}'
158
- if not is_optimum_intel_available():
159
- raise ImportError(err_msg)
160
-
161
- # TODO: upgrade _MIN_OPTIMUM_VERSION to 1.22 after release
162
- min_optimum_version = (
163
- "1.22"
164
- if backend == "ipex" and task != "text-generation"
165
- else _MIN_OPTIMUM_VERSION
166
- )
167
- if is_optimum_intel_version("<", min_optimum_version):
168
- raise ImportError(
169
- f"Backend: {backend} requires optimum-intel>="
170
- f"{min_optimum_version}. You can install it with pip: "
171
- "`pip install --upgrade --upgrade-strategy eager "
172
- f"`optimum[{backend}]`."
173
- )
174
-
175
- if backend == "openvino":
176
- if not is_openvino_available():
177
- raise ImportError(err_msg)
178
-
179
- from optimum.intel import ( # type: ignore[import]
180
- OVModelForCausalLM,
181
- OVModelForSeq2SeqLM,
182
- )
183
-
184
- model_cls = (
185
- OVModelForCausalLM
186
- if task == "text-generation"
187
- else OVModelForSeq2SeqLM
188
- )
189
- else:
190
- if not is_ipex_available():
191
- raise ImportError(err_msg)
192
-
193
- if task == "text-generation":
194
- from optimum.intel import (
195
- IPEXModelForCausalLM, # type: ignore[import]
196
- )
197
-
198
- model_cls = IPEXModelForCausalLM
199
- else:
200
- from optimum.intel import (
201
- IPEXModelForSeq2SeqLM, # type: ignore[import]
202
- )
203
-
204
- model_cls = IPEXModelForSeq2SeqLM
205
-
206
- else:
207
- model_cls = (
208
- AutoModelForCausalLM
209
- if task == "text-generation"
210
- else AutoModelForSeq2SeqLM
211
- )
212
-
213
- model = model_cls.from_pretrained(model_id, **_model_kwargs)
214
- model=torch.compile(model,mode="max-autotune")
215
-
216
- if tokenizer.pad_token is None:
217
- if model.config.pad_token_id is not None:
218
- tokenizer.pad_token_id = model.config.pad_token_id
219
- elif model.config.eos_token_id is not None and isinstance(
220
- model.config.eos_token_id, int
221
- ):
222
- tokenizer.pad_token_id = model.config.eos_token_id
223
- elif tokenizer.eos_token_id is not None:
224
- tokenizer.pad_token_id = tokenizer.eos_token_id
225
- else:
226
- tokenizer.add_special_tokens({"pad_token": "[PAD]"})
227
-
228
- if (
229
- (
230
- getattr(model, "is_loaded_in_4bit", False)
231
- or getattr(model, "is_loaded_in_8bit", False)
232
- )
233
- and device is not None
234
- and backend == "default"
235
- ):
236
- logger.warning(
237
- f"Setting the `device` argument to None from {device} to avoid "
238
- "the error caused by attempting to move the model that was already "
239
- "loaded on the GPU using the Accelerate module to the same or "
240
- "another device."
241
- )
242
- device = None
243
-
244
- if (
245
- device is not None
246
- and importlib.util.find_spec("torch") is not None
247
- and backend == "default"
248
- ):
249
- import torch
250
-
251
- cuda_device_count = torch.cuda.device_count()
252
- if device < -1 or (device >= cuda_device_count):
253
- raise ValueError(
254
- f"Got device=={device}, "
255
- f"device is required to be within [-1, {cuda_device_count})"
256
- )
257
- if device_map is not None and device < 0:
258
- device = None
259
- if device is not None and device < 0 and cuda_device_count > 0:
260
- logger.warning(
261
- "Device has %d GPUs available. "
262
- "Provide device={deviceId} to `from_model_id` to use available"
263
- "GPUs for execution. deviceId is -1 (default) for CPU and "
264
- "can be a positive integer associated with CUDA device id.",
265
- cuda_device_count,
266
- )
267
- if device is not None and device_map is not None and backend == "openvino":
268
- logger.warning("Please set device for OpenVINO through: `model_kwargs`")
269
- if "trust_remote_code" in _model_kwargs:
270
- _model_kwargs = {
271
- k: v for k, v in _model_kwargs.items() if k != "trust_remote_code"
272
- }
273
- _pipeline_kwargs = pipeline_kwargs or {}
274
- pipeline = hf_pipeline(
275
- task=task,
276
- model=model,
277
- tokenizer=tokenizer,
278
- device=device,
279
- batch_size=batch_size,
280
- model_kwargs=_model_kwargs,
281
- **_pipeline_kwargs,
282
- )
283
- if pipeline.task not in VALID_TASKS:
284
- raise ValueError(
285
- f"Got invalid task {pipeline.task}, "
286
- f"currently only {VALID_TASKS} are supported"
287
- )
288
- return cls(
289
- pipeline=pipeline,
290
- model_id=model_id,
291
- model_kwargs=_model_kwargs,
292
- pipeline_kwargs=_pipeline_kwargs,
293
- batch_size=batch_size,
294
- **kwargs,
295
- )
296
-
297
- @property
298
- def _identifying_params(self) -> Mapping[str, Any]:
299
- """Get the identifying parameters."""
300
- return {
301
- "model_id": self.model_id,
302
- "model_kwargs": self.model_kwargs,
303
- "pipeline_kwargs": self.pipeline_kwargs,
304
- }
305
-
306
- @property
307
- def _llm_type(self) -> str:
308
- return "huggingface_pipeline"
309
-
310
- def _generate(
311
- self,
312
- prompts: List[str],
313
- stop: Optional[List[str]] = None,
314
- run_manager: Optional[CallbackManagerForLLMRun] = None,
315
- **kwargs: Any,
316
- ) -> LLMResult:
317
- # List to hold all results
318
- text_generations: List[str] = []
319
- pipeline_kwargs = kwargs.get("pipeline_kwargs", {})
320
- skip_prompt = kwargs.get("skip_prompt", False)
321
-
322
- for i in range(0, len(prompts), self.batch_size):
323
- batch_prompts = prompts[i : i + self.batch_size]
324
-
325
- # Process batch of prompts
326
- responses = self.pipeline(
327
- batch_prompts,
328
- **pipeline_kwargs,
329
- )
330
-
331
- # Process each response in the batch
332
- for j, response in enumerate(responses):
333
- if isinstance(response, list):
334
- # if model returns multiple generations, pick the top one
335
- response = response[0]
336
-
337
- if self.pipeline.task == "text-generation":
338
- text = response["generated_text"]
339
- elif self.pipeline.task == "text2text-generation":
340
- text = response["generated_text"]
341
- elif self.pipeline.task == "summarization":
342
- text = response["summary_text"]
343
- elif self.pipeline.task in "translation":
344
- text = response["translation_text"]
345
- else:
346
- raise ValueError(
347
- f"Got invalid task {self.pipeline.task}, "
348
- f"currently only {VALID_TASKS} are supported"
349
- )
350
- if skip_prompt:
351
- text = text[len(batch_prompts[j]) :]
352
- # Append the processed text to results
353
- text_generations.append(text)
354
-
355
- return LLMResult(
356
- generations=[[Generation(text=text)] for text in text_generations]
357
- )
358
-
359
- def _stream(
360
- self,
361
- prompt: str,
362
- stop: Optional[List[str]] = None,
363
- run_manager: Optional[CallbackManagerForLLMRun] = None,
364
- **kwargs: Any,
365
- ) -> Iterator[GenerationChunk]:
366
- from threading import Thread
367
-
368
- import torch
369
- from transformers import (
370
- StoppingCriteria,
371
- StoppingCriteriaList,
372
- TextIteratorStreamer,
373
- )
374
-
375
- pipeline_kwargs = kwargs.get("pipeline_kwargs", {})
376
- skip_prompt = kwargs.get("skip_prompt", True)
377
-
378
- if stop is not None:
379
- stop = self.pipeline.tokenizer.convert_tokens_to_ids(stop)
380
- stopping_ids_list = stop or []
381
-
382
- class StopOnTokens(StoppingCriteria):
383
- def __call__(
384
- self,
385
- input_ids: torch.LongTensor,
386
- scores: torch.FloatTensor,
387
- **kwargs: Any,
388
- ) -> bool:
389
- for stop_id in stopping_ids_list:
390
- if input_ids[0][-1] == stop_id:
391
- return True
392
- return False
393
-
394
- stopping_criteria = StoppingCriteriaList([StopOnTokens()])
395
-
396
- streamer = TextIteratorStreamer(
397
- self.pipeline.tokenizer,
398
- timeout=60.0,
399
- skip_prompt=skip_prompt,
400
- skip_special_tokens=True,
401
- )
402
- generation_kwargs = dict(
403
- text_inputs=prompt,
404
- streamer=streamer,
405
- stopping_criteria=stopping_criteria,
406
- **pipeline_kwargs,
407
- )
408
- t1 = Thread(target=self.pipeline, kwargs=generation_kwargs)
409
- t1.start()
410
-
411
- for char in streamer:
412
- chunk = GenerationChunk(text=char)
413
- if run_manager:
414
- run_manager.on_llm_new_token(chunk.text, chunk=chunk)
415
-
416
- yield chunk
417
-
418
  from langchain_core.prompts.chat import ChatPromptTemplate, MessagesPlaceholder
419
  system = '''Respond to the human as helpfully and accurately as possible. You have access to the following tools:
420
 
 
1
+ import inspect
2
+ from langchain_huggingface import HuggingFacePipeline as HFP
3
+ path_hf=inspect.getfile(HFP)
4
  from subprocess import Popen, PIPE as P
5
  from langchain_experimental.tools.python.tool import PythonREPLTool as PYT
6
  from langchain.agents import load_tools, create_structured_chat_agent as Agent,AgentExecutor as Ex, AgentType as Type
 
15
  import importlib.util
16
  import logging
17
  from typing import Any, Dict, Iterator, List, Mapping, Optional
18
+ with open(path_hf,"r") as f:
19
+ s=f.read()
20
+ with open(path_hf,"w") as f:
21
+ f.write(s.replace(" model = model_cls.from_pretrained(model_id, **_model_kwargs)"," model = torch.compile(model_cls.from_pretrained(model_id, **_model_kwargs),mode='max-autotune')"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  from langchain_core.prompts.chat import ChatPromptTemplate, MessagesPlaceholder
23
  system = '''Respond to the human as helpfully and accurately as possible. You have access to the following tools:
24