Panyun commited on
Commit
ddacb49
·
verified ·
1 Parent(s): 3015010

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -0
app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+
4
+ # 加载模型(使用量化版本节省显存)
5
+ model_name = "Qwen/Qwen2.5-7B-Instruct-GPTQ-Int8"
6
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
7
+ model = AutoModelForCausalLM.from_pretrained(
8
+ model_name,
9
+ device_map="auto", # 自动分配GPU/CPU
10
+ torch_dtype="auto"
11
+ )
12
+
13
+ # 定义生成函数
14
+ def generate_response(message, history):
15
+ # 格式化对话历史
16
+ messages = [{"role": "user", "content": message}]
17
+ text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
18
+ inputs = tokenizer(text, return_tensors="pt").to(model.device)
19
+
20
+ # 生成回复
21
+ outputs = model.generate(
22
+ **inputs,
23
+ max_new_tokens=512,
24
+ temperature=0.7
25
+ )
26
+ response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
27
+ return response
28
+
29
+ # 启动Gradio界面
30
+ gr.ChatInterface(
31
+ fn=generate_response,
32
+ title="Qwen2.5-7B大模型在线演示",
33
+ description="输入问题后按回车开始对话"
34
+ ).launch()