WillHeld commited on
Commit
b29974e
·
1 Parent(s): a4b631b

Chat interface

Browse files
Files changed (1) hide show
  1. app.py +18 -4
app.py CHANGED
@@ -1,7 +1,21 @@
 
1
  import gradio as gr
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
  import gradio as gr
3
 
4
+ checkpoint = "WillHeld/soft-raccoon"
5
+ device = "cuda" # "cuda" or "cpu"
6
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
7
+ model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
8
 
9
+ @spaces.GPU(duration=120)
10
+ def predict(message, history):
11
+ history.append({"role": "user", "content": message})
12
+ input_text = tokenizer.apply_chat_template(history, tokenize=False)
13
+ inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
14
+ outputs = model.generate(inputs, max_new_tokens=100, temperature=0.2, top_p=0.9, do_sample=True)
15
+ decoded = tokenizer.decode(outputs[0])
16
+ response = decoded.split("<|im_start|>assistant\n")[-1].split("<|im_end|>")[0]
17
+ return response
18
+
19
+ demo = gr.ChatInterface(predict, type="messages")
20
+
21
+ demo.launch()