IST199655
commited on
Commit
·
12b9045
1
Parent(s):
f63e352
app.py
CHANGED
@@ -5,7 +5,7 @@ from huggingface_hub import InferenceClient
|
|
5 |
Copied from inference in colab notebook
|
6 |
"""
|
7 |
|
8 |
-
from transformers import AutoTokenizer , AutoModelForCausalLM ,
|
9 |
import torch
|
10 |
from threading import Thread
|
11 |
|
@@ -101,7 +101,7 @@ def respond(
|
|
101 |
return_tensors = "pt",
|
102 |
)
|
103 |
# Generate tokens incrementally
|
104 |
-
streamer =
|
105 |
generation_kwargs = {
|
106 |
"input_ids": inputs,
|
107 |
"max_new_tokens": max_tokens,
|
|
|
5 |
Copied from inference in colab notebook
|
6 |
"""
|
7 |
|
8 |
+
from transformers import AutoTokenizer , AutoModelForCausalLM , TextIteratorStreamer
|
9 |
import torch
|
10 |
from threading import Thread
|
11 |
|
|
|
101 |
return_tensors = "pt",
|
102 |
)
|
103 |
# Generate tokens incrementally
|
104 |
+
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
|
105 |
generation_kwargs = {
|
106 |
"input_ids": inputs,
|
107 |
"max_new_tokens": max_tokens,
|