Spaces:
Runtime error
Runtime error
Commit
·
d813b23
1
Parent(s):
6c7be96
update
Browse files
app.py
CHANGED
@@ -1,72 +1,48 @@
|
|
1 |
import os
|
2 |
-
import sys
|
3 |
import gradio as gr
|
4 |
from subprocess import Popen, PIPE
|
5 |
import subprocess
|
6 |
import logging
|
7 |
|
8 |
-
# 配置日志
|
9 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
10 |
logger = logging.getLogger(__name__)
|
11 |
|
12 |
-
# 检测是否有 GPU - 在Hugging Face Spaces环境中更可靠的方法
|
13 |
def check_gpu_available():
|
14 |
try:
|
15 |
-
# 方法1: 使用nvidia-smi命令检测
|
16 |
nvidia_smi = subprocess.run(["nvidia-smi"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
17 |
logger.info(f"nvidia-smi 输出: {nvidia_smi.stdout}")
|
18 |
if nvidia_smi.returncode == 0:
|
19 |
logger.info("通过nvidia-smi命令检测到GPU")
|
20 |
return True
|
21 |
-
|
22 |
-
# 方法2: 检查Hugging Face Space环境变量
|
23 |
if os.environ.get("SPACE_RUNTIME_ARCH", "") == "gpu":
|
24 |
logger.info("通过环境变量检测到GPU")
|
25 |
return True
|
26 |
-
|
27 |
-
# 方法3: 如果上面方法都失败,尝试PyTorch
|
28 |
import torch
|
29 |
has_gpu = torch.cuda.is_available()
|
30 |
-
if has_gpu
|
31 |
-
logger.info(f"通过PyTorch检测到GPU,设备数量: {torch.cuda.device_count()}")
|
32 |
-
else:
|
33 |
-
logger.info("PyTorch未检测到GPU")
|
34 |
return has_gpu
|
35 |
except Exception as e:
|
36 |
logger.error(f"GPU检测失败: {str(e)}")
|
37 |
return False
|
38 |
|
39 |
-
|
40 |
-
MODEL_NAME = os.environ.get("MODEL_NAME", "zhangchenxu/TinyV-1.5B") # 默认使用更小的模型
|
41 |
API_PORT = int(os.environ.get("API_PORT", 8000))
|
42 |
GRADIO_PORT = int(os.environ.get("GRADIO_PORT", 7860))
|
43 |
-
|
44 |
-
# 其他配置选项
|
45 |
USE_TRANSFORMERS_IMPL = os.environ.get("USE_TRANSFORMERS_IMPL", "true").lower() == "true"
|
46 |
ENFORCE_EAGER = os.environ.get("ENFORCE_EAGER", "true").lower() == "true"
|
47 |
-
|
48 |
-
# 检测 GPU
|
49 |
HAS_GPU = check_gpu_available()
|
50 |
-
logger.info(f"GPU检测结果: {'有GPU' if HAS_GPU else '无GPU'}")
|
51 |
-
|
52 |
-
# 尝试强制设置为有GPU - 如果你确信环境中有GPU
|
53 |
FORCE_GPU = os.environ.get("FORCE_GPU", "false").lower() == "true"
|
54 |
if FORCE_GPU:
|
55 |
-
logger.info("强制启用GPU模式")
|
56 |
HAS_GPU = True
|
57 |
|
58 |
-
# vLLM server进程
|
59 |
vllm_process = None
|
60 |
|
61 |
def start_vllm_server():
|
62 |
global vllm_process
|
63 |
if vllm_process is not None:
|
64 |
return "vLLM 服务已经在运行"
|
65 |
-
|
66 |
-
# 设置环境变量以帮助调试
|
67 |
os.environ["VLLM_LOGGING_LEVEL"] = "DEBUG"
|
68 |
-
|
69 |
-
# 构建启动命令
|
70 |
cmd = [
|
71 |
"vllm",
|
72 |
"serve",
|
@@ -74,78 +50,33 @@ def start_vllm_server():
|
|
74 |
"--host", "0.0.0.0",
|
75 |
"--port", str(API_PORT),
|
76 |
"--dtype", "auto",
|
77 |
-
"--trust-remote-code",
|
78 |
-
"--disable-async-output-proc",
|
79 |
]
|
80 |
-
|
81 |
-
# 根据配置选项添加参数
|
82 |
if ENFORCE_EAGER:
|
83 |
cmd.append("--enforce-eager")
|
84 |
-
|
85 |
if USE_TRANSFORMERS_IMPL:
|
86 |
cmd.extend(["--model-impl", "transformers"])
|
87 |
-
|
88 |
-
# 根据是否有 GPU 添加不同的参数
|
89 |
if HAS_GPU:
|
90 |
logger.info("使用GPU模式启动vLLM")
|
91 |
-
cmd.extend([
|
92 |
-
"--device", "cuda",
|
93 |
-
"--max-model-len", "2048",
|
94 |
-
"--gpu-memory-utilization", "0.9"
|
95 |
-
])
|
96 |
else:
|
97 |
logger.info("使用CPU模式启动vLLM")
|
98 |
-
cmd.extend([
|
99 |
-
|
100 |
-
|
101 |
-
])
|
102 |
-
|
103 |
-
# 打印启动命令
|
104 |
-
cmd_str = " ".join(cmd)
|
105 |
-
logger.info(f"启动命令: {cmd_str}")
|
106 |
-
|
107 |
-
# 启动vLLM服务
|
108 |
-
try:
|
109 |
-
vllm_process = Popen(cmd, stdout=PIPE, stderr=PIPE, text=True)
|
110 |
-
return "vLLM 服务器已启动!请等待模型加载完成... (可能需要几分钟)"
|
111 |
-
except Exception as e:
|
112 |
-
error_msg = f"启动vLLM服务器时出错: {str(e)}"
|
113 |
-
logger.error(error_msg)
|
114 |
-
return error_msg
|
115 |
-
|
116 |
-
# 打印启动命令
|
117 |
-
cmd_str = " ".join(cmd)
|
118 |
-
logger.info(f"启动命令: {cmd_str}")
|
119 |
-
|
120 |
-
# 启动vLLM服务
|
121 |
-
try:
|
122 |
-
vllm_process = Popen(cmd, stdout=PIPE, stderr=PIPE, text=True)
|
123 |
-
return "vLLM 服务器已启动!请等待模型加载完成... (可能需要几分钟)"
|
124 |
-
except Exception as e:
|
125 |
-
error_msg = f"启动vLLM服务器时出错: {str(e)}"
|
126 |
-
logger.error(error_msg)
|
127 |
-
return error_msg
|
128 |
-
|
129 |
-
# 是否启用API密钥
|
130 |
-
api_key = os.environ.get("API_KEY", "")
|
131 |
-
if api_key:
|
132 |
-
cmd.extend(["--api-key", api_key])
|
133 |
-
|
134 |
-
# 打印启动命令
|
135 |
-
print(f"启动命令: {' '.join(cmd)}")
|
136 |
-
|
137 |
-
# 启动vLLM���务
|
138 |
try:
|
139 |
vllm_process = Popen(cmd, stdout=PIPE, stderr=PIPE, text=True)
|
140 |
return "vLLM 服务器已启动!请等待模型加载完成..."
|
141 |
except Exception as e:
|
|
|
142 |
return f"启动vLLM服务器时出错: {str(e)}"
|
143 |
|
144 |
def stop_vllm_server():
|
145 |
global vllm_process
|
146 |
if vllm_process is None:
|
147 |
return "vLLM 服务未运行"
|
148 |
-
|
149 |
vllm_process.terminate()
|
150 |
vllm_process = None
|
151 |
return "vLLM 服务已停止"
|
@@ -153,232 +84,51 @@ def stop_vllm_server():
|
|
153 |
def check_server_status():
|
154 |
if vllm_process is None:
|
155 |
return "未运行"
|
156 |
-
|
157 |
return_code = vllm_process.poll()
|
158 |
-
if return_code is None
|
159 |
-
return "运行中"
|
160 |
-
else:
|
161 |
-
return f"已停止 (返回码: {return_code})"
|
162 |
|
163 |
def get_server_logs():
|
164 |
if vllm_process is None:
|
165 |
return "服务未运行,无日志可显示"
|
166 |
-
|
167 |
-
# 检查进程是否仍在运行
|
168 |
if vllm_process.poll() is not None:
|
169 |
return f"服务已停止,返回码: {vllm_process.poll()}"
|
170 |
-
|
171 |
try:
|
172 |
-
# 尝试从进程读取输出,但不会阻塞
|
173 |
-
output_lines = []
|
174 |
-
|
175 |
-
# 读取stderr (错误日志)
|
176 |
while True:
|
177 |
line = vllm_process.stderr.readline()
|
178 |
if not line:
|
179 |
break
|
180 |
output_lines.append(f"[ERROR] {line.strip()}")
|
181 |
-
|
182 |
-
# 读取stdout (标准输出)
|
183 |
while True:
|
184 |
line = vllm_process.stdout.readline()
|
185 |
if not line:
|
186 |
break
|
187 |
output_lines.append(line.strip())
|
188 |
-
|
189 |
-
if output_lines:
|
190 |
-
return "\n".join(output_lines)
|
191 |
-
else:
|
192 |
-
return "暂无新日志 (服务正在运行)"
|
193 |
except Exception as e:
|
194 |
return f"读取日志时出错: {str(e)}"
|
195 |
|
196 |
def serve_test_ui():
|
197 |
-
""
|
198 |
-
with gr.Blocks(title="vLLM OpenAI兼容API服务") as demo:
|
199 |
with gr.Row():
|
200 |
with gr.Column():
|
201 |
-
gr.Markdown("# vLLM
|
202 |
-
|
203 |
-
# 系统信息
|
204 |
gpu_info = "已检测到" if HAS_GPU else "未检测到"
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
- 运行环境: {'Hugging Face Space' if 'SPACE_ID' in os.environ else '本地环境'}
|
209 |
-
- 当前加载模型: `{MODEL_NAME}`
|
210 |
-
- API密钥: `{os.environ.get("API_KEY", "未设置")}`
|
211 |
-
"""
|
212 |
-
gr.Markdown(system_info)
|
213 |
-
|
214 |
-
with gr.Row():
|
215 |
-
start_btn = gr.Button("启动服务", variant="primary")
|
216 |
-
stop_btn = gr.Button("停止服务", variant="stop")
|
217 |
-
|
218 |
status_text = gr.Textbox(label="服务状态", value="未运行", interactive=False)
|
219 |
refresh_btn = gr.Button("刷新状态")
|
220 |
-
|
221 |
-
logs_text = gr.Textbox(label="服务日志", interactive=False, lines=15)
|
222 |
logs_refresh_btn = gr.Button("刷新日志")
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
with gr.Row():
|
230 |
-
force_gpu = gr.Checkbox(label="强制使用GPU模式", value=FORCE_GPU,
|
231 |
-
info="如果自动检测失败但您确定有GPU,请选中此项")
|
232 |
-
use_transformers = gr.Checkbox(label="使用Transformers实现", value=USE_TRANSFORMERS_IMPL,
|
233 |
-
info="使用Transformers实现而不是vLLM原生实现,可能更稳定但性能略低")
|
234 |
-
enforce_eager = gr.Checkbox(label="强制Eager模式", value=ENFORCE_EAGER,
|
235 |
-
info="强制使用PyTorch的Eager模式,避免CUDA图形相关问题")
|
236 |
-
|
237 |
-
apply_btn = gr.Button("应用设置", variant="primary")
|
238 |
-
|
239 |
-
# API测试部分
|
240 |
-
gr.Markdown("## API 信息")
|
241 |
-
api_key = os.environ.get("API_KEY", "未设置")
|
242 |
-
api_info = gr.Markdown(f"""
|
243 |
-
API地址: `http://localhost:{API_PORT}/v1/...`
|
244 |
-
|
245 |
-
Hugging Face Space公开URL: 部署后查看Space详情获取
|
246 |
-
|
247 |
-
## 测试命令
|
248 |
-
```python
|
249 |
-
from openai import OpenAI
|
250 |
-
|
251 |
-
client = OpenAI(
|
252 |
-
base_url="http://你的HF_SPACE_URL/v1",
|
253 |
-
api_key="{api_key}",
|
254 |
-
)
|
255 |
-
|
256 |
-
completion = client.chat.completions.create(
|
257 |
-
model="{MODEL_NAME}",
|
258 |
-
messages=[
|
259 |
-
{{"role": "user", "content": "Hello!"}}
|
260 |
-
]
|
261 |
-
)
|
262 |
-
|
263 |
-
print(completion.choices[0].message)
|
264 |
-
```
|
265 |
-
""")
|
266 |
-
|
267 |
-
# 设置事件处理
|
268 |
-
start_btn.click(start_vllm_server, inputs=[], outputs=status_text)
|
269 |
-
stop_btn.click(stop_vllm_server, inputs=[], outputs=status_text)
|
270 |
-
refresh_btn.click(check_server_status, inputs=[], outputs=status_text)
|
271 |
-
logs_refresh_btn.click(get_server_logs, inputs=[], outputs=logs_text)
|
272 |
-
|
273 |
-
# 高级选项的事件处理
|
274 |
-
def apply_settings(model_name, force_gpu_mode, use_transformers_impl, enforce_eager_mode):
|
275 |
-
global MODEL_NAME, FORCE_GPU, HAS_GPU, USE_TRANSFORMERS_IMPL, ENFORCE_EAGER
|
276 |
-
|
277 |
-
changed = []
|
278 |
-
|
279 |
-
if model_name.strip() and model_name != MODEL_NAME:
|
280 |
-
MODEL_NAME = model_name.strip()
|
281 |
-
changed.append(f"模型已更改为: {MODEL_NAME}")
|
282 |
-
|
283 |
-
if force_gpu_mode != FORCE_GPU:
|
284 |
-
FORCE_GPU = force_gpu_mode
|
285 |
-
if FORCE_GPU:
|
286 |
-
HAS_GPU = True
|
287 |
-
changed.append("已强制启用GPU模式")
|
288 |
-
else:
|
289 |
-
HAS_GPU = check_gpu_available()
|
290 |
-
changed.append(f"已恢复自动检测,GPU状态: {'已检测到' if HAS_GPU else '未检测到'}")
|
291 |
-
|
292 |
-
if use_transformers_impl != USE_TRANSFORMERS_IMPL:
|
293 |
-
USE_TRANSFORMERS_IMPL = use_transformers_impl
|
294 |
-
changed.append(f"Transformers实现: {'已启用' if USE_TRANSFORMERS_IMPL else '已禁用'}")
|
295 |
-
|
296 |
-
if enforce_eager_mode != ENFORCE_EAGER:
|
297 |
-
ENFORCE_EAGER = enforce_eager_mode
|
298 |
-
changed.append(f"Eager模式: {'已启用' if ENFORCE_EAGER else '已禁用'}")
|
299 |
-
|
300 |
-
if changed:
|
301 |
-
return "\n".join(changed) + "\n\n设置已应用。如果服务正在运行,需要重启服务以使更改生效。"
|
302 |
-
else:
|
303 |
-
return "没有设置被更改"
|
304 |
-
|
305 |
-
apply_btn.click(
|
306 |
-
apply_settings,
|
307 |
-
inputs=[model_input, force_gpu, use_transformers, enforce_eager],
|
308 |
-
outputs=status_text
|
309 |
-
)
|
310 |
-
|
311 |
-
# 页面加载时不自动启动服务,只显示系统状态
|
312 |
-
demo.load(lambda: f"系统就绪。GPU状态: {'已检测到' if HAS_GPU else '未检测到'}", inputs=[], outputs=status_text)
|
313 |
-
|
314 |
-
return demoinyLlama-1.1B-Chat-v1.0")
|
315 |
-
change_model_btn = gr.Button("更改模型")
|
316 |
-
|
317 |
-
force_gpu = gr.Checkbox(label="强制使用GPU模式", value=FORCE_GPU,
|
318 |
-
info="如果自动检测失败但您确定有GPU,请选中此项")
|
319 |
-
|
320 |
-
# API测试部分
|
321 |
-
gr.Markdown("## API 信息")
|
322 |
-
api_key = os.environ.get("API_KEY", "未设置")
|
323 |
-
api_info = gr.Markdown(f"""
|
324 |
-
API地址: `http://localhost:{API_PORT}/v1/...`
|
325 |
-
|
326 |
-
Hugging Face Space公开URL: 部署后查看Space详情获取
|
327 |
-
|
328 |
-
## 测试命令
|
329 |
-
```python
|
330 |
-
from openai import OpenAI
|
331 |
-
|
332 |
-
client = OpenAI(
|
333 |
-
base_url="http://你的HF_SPACE_URL/v1",
|
334 |
-
api_key="{api_key}",
|
335 |
-
)
|
336 |
-
|
337 |
-
completion = client.chat.completions.create(
|
338 |
-
model="{MODEL_NAME}",
|
339 |
-
messages=[
|
340 |
-
{{"role": "user", "content": "Hello!"}}
|
341 |
-
]
|
342 |
-
)
|
343 |
-
|
344 |
-
print(completion.choices[0].message)
|
345 |
-
```
|
346 |
-
""")
|
347 |
-
|
348 |
-
# 设置事件处理
|
349 |
-
start_btn.click(start_vllm_server, inputs=[], outputs=status_text)
|
350 |
-
stop_btn.click(stop_vllm_server, inputs=[], outputs=status_text)
|
351 |
-
refresh_btn.click(check_server_status, inputs=[], outputs=status_text)
|
352 |
-
logs_refresh_btn.click(get_server_logs, inputs=[], outputs=logs_text)
|
353 |
-
|
354 |
-
# 高级选项的事件处理
|
355 |
-
def change_model(model_name):
|
356 |
-
global MODEL_NAME
|
357 |
-
if model_name.strip():
|
358 |
-
MODEL_NAME = model_name.strip()
|
359 |
-
return f"模型已更改为: {MODEL_NAME}"
|
360 |
-
return "模型名称不能为空"
|
361 |
-
|
362 |
-
def toggle_gpu_mode(force):
|
363 |
-
global HAS_GPU, FORCE_GPU
|
364 |
-
FORCE_GPU = force
|
365 |
-
if FORCE_GPU:
|
366 |
-
HAS_GPU = True
|
367 |
-
return "已强制启用GPU模式"
|
368 |
-
else:
|
369 |
-
HAS_GPU = check_gpu_available()
|
370 |
-
return f"已恢复自动检测,GPU检测结果: {'已检测到' if HAS_GPU else '未检测到'}"
|
371 |
-
|
372 |
-
change_model_btn.click(change_model, inputs=[model_input], outputs=status_text)
|
373 |
-
force_gpu.change(toggle_gpu_mode, inputs=[force_gpu], outputs=status_text)
|
374 |
-
|
375 |
-
# 页面加载时自动启动服务
|
376 |
-
demo.load(lambda: f"系统就绪。GPU状态: {'已检测到' if HAS_GPU else '未检测到'}", inputs=[], outputs=status_text)
|
377 |
-
|
378 |
return demo
|
379 |
|
380 |
-
# 启动测试UI
|
381 |
if __name__ == "__main__":
|
382 |
-
# 创建并启动UI
|
383 |
demo = serve_test_ui()
|
384 |
-
demo.queue().launch(server_name="0.0.0.0", server_port=GRADIO_PORT, share=True)
|
|
|
1 |
import os
|
|
|
2 |
import gradio as gr
|
3 |
from subprocess import Popen, PIPE
|
4 |
import subprocess
|
5 |
import logging
|
6 |
|
|
|
7 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
8 |
logger = logging.getLogger(__name__)
|
9 |
|
|
|
10 |
def check_gpu_available():
|
11 |
try:
|
|
|
12 |
nvidia_smi = subprocess.run(["nvidia-smi"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
13 |
logger.info(f"nvidia-smi 输出: {nvidia_smi.stdout}")
|
14 |
if nvidia_smi.returncode == 0:
|
15 |
logger.info("通过nvidia-smi命令检测到GPU")
|
16 |
return True
|
|
|
|
|
17 |
if os.environ.get("SPACE_RUNTIME_ARCH", "") == "gpu":
|
18 |
logger.info("通过环境变量检测到GPU")
|
19 |
return True
|
|
|
|
|
20 |
import torch
|
21 |
has_gpu = torch.cuda.is_available()
|
22 |
+
logger.info(f"通过PyTorch检测到GPU,设备数量: {torch.cuda.device_count()}" if has_gpu else "PyTorch未检测到GPU")
|
|
|
|
|
|
|
23 |
return has_gpu
|
24 |
except Exception as e:
|
25 |
logger.error(f"GPU检测失败: {str(e)}")
|
26 |
return False
|
27 |
|
28 |
+
MODEL_NAME = os.environ.get("MODEL_NAME", "zhangchenxu/TinyV-1.5B")
|
|
|
29 |
API_PORT = int(os.environ.get("API_PORT", 8000))
|
30 |
GRADIO_PORT = int(os.environ.get("GRADIO_PORT", 7860))
|
|
|
|
|
31 |
USE_TRANSFORMERS_IMPL = os.environ.get("USE_TRANSFORMERS_IMPL", "true").lower() == "true"
|
32 |
ENFORCE_EAGER = os.environ.get("ENFORCE_EAGER", "true").lower() == "true"
|
|
|
|
|
33 |
HAS_GPU = check_gpu_available()
|
|
|
|
|
|
|
34 |
FORCE_GPU = os.environ.get("FORCE_GPU", "false").lower() == "true"
|
35 |
if FORCE_GPU:
|
|
|
36 |
HAS_GPU = True
|
37 |
|
|
|
38 |
vllm_process = None
|
39 |
|
40 |
def start_vllm_server():
|
41 |
global vllm_process
|
42 |
if vllm_process is not None:
|
43 |
return "vLLM 服务已经在运行"
|
44 |
+
|
|
|
45 |
os.environ["VLLM_LOGGING_LEVEL"] = "DEBUG"
|
|
|
|
|
46 |
cmd = [
|
47 |
"vllm",
|
48 |
"serve",
|
|
|
50 |
"--host", "0.0.0.0",
|
51 |
"--port", str(API_PORT),
|
52 |
"--dtype", "auto",
|
53 |
+
"--trust-remote-code",
|
54 |
+
"--disable-async-output-proc",
|
55 |
]
|
56 |
+
|
|
|
57 |
if ENFORCE_EAGER:
|
58 |
cmd.append("--enforce-eager")
|
|
|
59 |
if USE_TRANSFORMERS_IMPL:
|
60 |
cmd.extend(["--model-impl", "transformers"])
|
|
|
|
|
61 |
if HAS_GPU:
|
62 |
logger.info("使用GPU模式启动vLLM")
|
63 |
+
cmd.extend(["--device", "cuda", "--max-model-len", "2048", "--gpu-memory-utilization", "0.9"])
|
|
|
|
|
|
|
|
|
64 |
else:
|
65 |
logger.info("使用CPU模式启动vLLM")
|
66 |
+
cmd.extend(["--device", "cpu", "--max-model-len", "1024"])
|
67 |
+
|
68 |
+
logger.info(f"启动命令: {' '.join(cmd)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
try:
|
70 |
vllm_process = Popen(cmd, stdout=PIPE, stderr=PIPE, text=True)
|
71 |
return "vLLM 服务器已启动!请等待模型加载完成..."
|
72 |
except Exception as e:
|
73 |
+
logger.error(f"启动vLLM服务器时出错: {str(e)}")
|
74 |
return f"启动vLLM服务器时出错: {str(e)}"
|
75 |
|
76 |
def stop_vllm_server():
|
77 |
global vllm_process
|
78 |
if vllm_process is None:
|
79 |
return "vLLM 服务未运行"
|
|
|
80 |
vllm_process.terminate()
|
81 |
vllm_process = None
|
82 |
return "vLLM 服务已停止"
|
|
|
84 |
def check_server_status():
|
85 |
if vllm_process is None:
|
86 |
return "未运行"
|
|
|
87 |
return_code = vllm_process.poll()
|
88 |
+
return "运行中" if return_code is None else f"已停止 (返回码: {return_code})"
|
|
|
|
|
|
|
89 |
|
90 |
def get_server_logs():
|
91 |
if vllm_process is None:
|
92 |
return "服务未运行,无日志可显示"
|
|
|
|
|
93 |
if vllm_process.poll() is not None:
|
94 |
return f"服务已停止,返回码: {vllm_process.poll()}"
|
95 |
+
output_lines = []
|
96 |
try:
|
|
|
|
|
|
|
|
|
97 |
while True:
|
98 |
line = vllm_process.stderr.readline()
|
99 |
if not line:
|
100 |
break
|
101 |
output_lines.append(f"[ERROR] {line.strip()}")
|
|
|
|
|
102 |
while True:
|
103 |
line = vllm_process.stdout.readline()
|
104 |
if not line:
|
105 |
break
|
106 |
output_lines.append(line.strip())
|
107 |
+
return "\n".join(output_lines) if output_lines else "暂无新日志"
|
|
|
|
|
|
|
|
|
108 |
except Exception as e:
|
109 |
return f"读取日志时出错: {str(e)}"
|
110 |
|
111 |
def serve_test_ui():
|
112 |
+
with gr.Blocks(title="vLLM 控制面板") as demo:
|
|
|
113 |
with gr.Row():
|
114 |
with gr.Column():
|
115 |
+
gr.Markdown("# vLLM 控制面板")
|
|
|
|
|
116 |
gpu_info = "已检测到" if HAS_GPU else "未检测到"
|
117 |
+
gr.Markdown(f"**GPU:** {gpu_info} \n**模型:** `{MODEL_NAME}`")
|
118 |
+
start_btn = gr.Button("启动服务")
|
119 |
+
stop_btn = gr.Button("停止服务")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
status_text = gr.Textbox(label="服务状态", value="未运行", interactive=False)
|
121 |
refresh_btn = gr.Button("刷新状态")
|
122 |
+
logs_text = gr.Textbox(label="服务日志", interactive=False, lines=10)
|
|
|
123 |
logs_refresh_btn = gr.Button("刷新日志")
|
124 |
+
|
125 |
+
start_btn.click(start_vllm_server, outputs=status_text)
|
126 |
+
stop_btn.click(stop_vllm_server, outputs=status_text)
|
127 |
+
refresh_btn.click(check_server_status, outputs=status_text)
|
128 |
+
logs_refresh_btn.click(get_server_logs, outputs=logs_text)
|
129 |
+
demo.load(lambda: f"系统就绪。GPU状态: {'已检测到' if HAS_GPU else '未检测到'}", outputs=status_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
return demo
|
131 |
|
|
|
132 |
if __name__ == "__main__":
|
|
|
133 |
demo = serve_test_ui()
|
134 |
+
demo.queue().launch(server_name="0.0.0.0", server_port=GRADIO_PORT, share=True)
|