Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ import gradio as gr
|
|
4 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
5 |
from threading import Thread
|
6 |
from transformers import TextIteratorStreamer
|
|
|
7 |
|
8 |
model_name = "numfa/numfa_v2-3b"
|
9 |
model = AutoModelForCausalLM.from_pretrained(model_name,torch_dtype=torch.float16, device_map="auto")
|
@@ -13,6 +14,7 @@ if tokenizer.pad_token_id is None:
|
|
13 |
|
14 |
streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens = True)
|
15 |
|
|
|
16 |
def generate_text(prompt, max_length, top_p, top_k):
|
17 |
inputs = tokenizer([prompt], return_tensors="pt")
|
18 |
|
@@ -43,4 +45,4 @@ outputs = [gr.Textbox(label="Generated Text")]
|
|
43 |
|
44 |
demo = gr.Interface(fn=generate_text, inputs=inputs, outputs=outputs, allow_flagging=False, description=description)
|
45 |
|
46 |
-
demo.launch()
|
|
|
4 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
5 |
from threading import Thread
|
6 |
from transformers import TextIteratorStreamer
|
7 |
+
import spaces
|
8 |
|
9 |
model_name = "numfa/numfa_v2-3b"
|
10 |
model = AutoModelForCausalLM.from_pretrained(model_name,torch_dtype=torch.float16, device_map="auto")
|
|
|
14 |
|
15 |
streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens = True)
|
16 |
|
17 |
+
@spaces.GPU
|
18 |
def generate_text(prompt, max_length, top_p, top_k):
|
19 |
inputs = tokenizer([prompt], return_tensors="pt")
|
20 |
|
|
|
45 |
|
46 |
demo = gr.Interface(fn=generate_text, inputs=inputs, outputs=outputs, allow_flagging=False, description=description)
|
47 |
|
48 |
+
demo.queue(max_size=20).launch()
|