Spaces:
Runtime error
Runtime error
import os | |
import sys | |
import gradio as gr | |
from subprocess import Popen, PIPE | |
# 设置环境变量 | |
MODEL_NAME = os.environ.get("MODEL_NAME", "NousResearch/Nous-Hermes-2-Yi-9B") | |
API_PORT = int(os.environ.get("API_PORT", 8000)) | |
GRADIO_PORT = int(os.environ.get("GRADIO_PORT", 7860)) | |
# vLLM server进程 | |
vllm_process = None | |
def start_vllm_server(): | |
global vllm_process | |
if vllm_process is not None: | |
return "vLLM 服务已经在运行" | |
# 构建启动命令 | |
cmd = [ | |
"vllm", | |
"serve", | |
MODEL_NAME, | |
"--host", "0.0.0.0", | |
"--port", str(API_PORT), | |
"--dtype", "auto", | |
"--max-model-len", "2048", # 设置模型最大长度 | |
"--gpu-memory-utilization", "0.9" # 使用90%的GPU内存 | |
] | |
# 是否启用API密钥 | |
api_key = os.environ.get("API_KEY", "") | |
if api_key: | |
cmd.extend(["--api-key", api_key]) | |
# 打印启动命令 | |
print(f"启动命令: {' '.join(cmd)}") | |
# 启动vLLM服务 | |
try: | |
vllm_process = Popen(cmd, stdout=PIPE, stderr=PIPE, text=True) | |
return "vLLM 服务器已启动!请等待模型加载完成..." | |
except Exception as e: | |
return f"启动vLLM服务器时出错: {str(e)}" | |
def stop_vllm_server(): | |
global vllm_process | |
if vllm_process is None: | |
return "vLLM 服务未运行" | |
vllm_process.terminate() | |
vllm_process = None | |
return "vLLM 服务已停止" | |
def check_server_status(): | |
if vllm_process is None: | |
return "未运行" | |
return_code = vllm_process.poll() | |
if return_code is None: | |
return "运行中" | |
else: | |
return f"已停止 (返回码: {return_code})" | |
def get_server_logs(): | |
if vllm_process is None: | |
return "服务未运行,无日志可显示" | |
# 从进程读取输出 | |
output = "" | |
while True: | |
line_out = vllm_process.stdout.readline() | |
line_err = vllm_process.stderr.readline() | |
if not line_out and not line_err: | |
break | |
if line_out: | |
output += line_out + "\n" | |
if line_err: | |
output += "[ERROR] " + line_err + "\n" | |
return output if output else "暂无新日志" | |
def serve_test_ui(): | |
"""提供一个简单的测试UI""" | |
with gr.Blocks(title="vLLM OpenAI兼容API服务") as demo: | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown("# vLLM OpenAI 兼容API服务控制面板") | |
with gr.Row(): | |
start_btn = gr.Button("启动服务", variant="primary") | |
stop_btn = gr.Button("停止服务", variant="stop") | |
status_text = gr.Textbox(label="服务状态", value="未运行", interactive=False) | |
refresh_btn = gr.Button("刷新状态") | |
logs_text = gr.Textbox(label="服务日志", interactive=False, lines=15) | |
logs_refresh_btn = gr.Button("刷新日志") | |
# API测试部分 | |
gr.Markdown("## API 信息") | |
api_info = gr.Markdown(f""" | |
API地址: `http://localhost:{API_PORT}/v1/...` | |
Hugging Face Space公开URL: 部署后查看Space详情获取 | |
当前加载模型: `{MODEL_NAME}` | |
API密钥: `{os.environ.get("API_KEY", "未设置")}` | |
## 测试命令 | |
```python | |
from openai import OpenAI | |
client = OpenAI( | |
base_url="http://你的HF_SPACE_URL/v1", | |
api_key="{os.environ.get("API_KEY", "None")}", | |
) | |
completion = client.chat.completions.create( | |
model="{MODEL_NAME}", | |
messages=[ | |
{"role": "user", "content": "Hello!"} | |
] | |
) | |
print(completion.choices[0].message) | |
``` | |
""") | |
# 设置事件处理 | |
start_btn.click(start_vllm_server, inputs=[], outputs=status_text) | |
stop_btn.click(stop_vllm_server, inputs=[], outputs=status_text) | |
refresh_btn.click(check_server_status, inputs=[], outputs=status_text) | |
logs_refresh_btn.click(get_server_logs, inputs=[], outputs=logs_text) | |
# 页面加载时自动启动服务 | |
demo.load(start_vllm_server, inputs=[], outputs=status_text) | |
return demo | |
# 启动测试UI | |
if __name__ == "__main__": | |
# 创建并启动UI | |
demo = serve_test_ui() | |
demo.queue().launch(server_name="0.0.0.0", server_port=GRADIO_PORT, share=True) |