placed timeout on streamer
Browse files
app.py
CHANGED
@@ -53,7 +53,7 @@ def llama_generation(input_text: str,
|
|
53 |
input_ids = llama_tokenizer.apply_chat_template(conversation, return_tensors='pt').to(llama_model.device)
|
54 |
|
55 |
# Skip_prompt, ignores the prompt in the chatbot
|
56 |
-
streamer = TextIteratorStreamer(llama_tokenizer, skip_prompt=True, skip_special_tokens=True)
|
57 |
|
58 |
# generation arguments to pass in llm generate() eventually
|
59 |
generate_kwargs = dict(
|
@@ -78,7 +78,7 @@ def llama_generation(input_text: str,
|
|
78 |
for text in streamer:
|
79 |
outputs.append(text)
|
80 |
print(outputs)
|
81 |
-
|
82 |
|
83 |
# Let's just make sure the llama is returning as it should and than place that return output into a function making it fit into a base
|
84 |
# Prompt for gpt-4o
|
|
|
53 |
input_ids = llama_tokenizer.apply_chat_template(conversation, return_tensors='pt').to(llama_model.device)
|
54 |
|
55 |
# Skip_prompt, ignores the prompt in the chatbot
|
56 |
+
streamer = TextIteratorStreamer(llama_tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
|
57 |
|
58 |
# generation arguments to pass in llm generate() eventually
|
59 |
generate_kwargs = dict(
|
|
|
78 |
for text in streamer:
|
79 |
outputs.append(text)
|
80 |
print(outputs)
|
81 |
+
yield "".join(outputs)
|
82 |
|
83 |
# Let's just make sure the llama is returning as it should and than place that return output into a function making it fit into a base
|
84 |
# Prompt for gpt-4o
|