Spaces:
Runtime error
Runtime error
File size: 4,837 Bytes
1337025 b248825 1337025 b248825 1337025 b248825 1337025 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import os
import sys
import gradio as gr
from subprocess import Popen, PIPE
# 设置环境变量
MODEL_NAME = os.environ.get("MODEL_NAME", "NousResearch/Nous-Hermes-2-Yi-9B")
API_PORT = int(os.environ.get("API_PORT", 8000))
GRADIO_PORT = int(os.environ.get("GRADIO_PORT", 7860))
# vLLM server进程
vllm_process = None
def start_vllm_server():
global vllm_process
if vllm_process is not None:
return "vLLM 服务已经在运行"
# 构建启动命令
cmd = [
"vllm",
"serve",
MODEL_NAME,
"--host", "0.0.0.0",
"--port", str(API_PORT),
"--dtype", "auto",
"--max-model-len", "2048", # 设置模型最大长度
"--gpu-memory-utilization", "0.9" # 使用90%的GPU内存
]
# 是否启用API密钥
api_key = os.environ.get("API_KEY", "")
if api_key:
cmd.extend(["--api-key", api_key])
# 打印启动命令
print(f"启动命令: {' '.join(cmd)}")
# 启动vLLM服务
try:
vllm_process = Popen(cmd, stdout=PIPE, stderr=PIPE, text=True)
return "vLLM 服务器已启动!请等待模型加载完成..."
except Exception as e:
return f"启动vLLM服务器时出错: {str(e)}"
def stop_vllm_server():
global vllm_process
if vllm_process is None:
return "vLLM 服务未运行"
vllm_process.terminate()
vllm_process = None
return "vLLM 服务已停止"
def check_server_status():
if vllm_process is None:
return "未运行"
return_code = vllm_process.poll()
if return_code is None:
return "运行中"
else:
return f"已停止 (返回码: {return_code})"
def get_server_logs():
if vllm_process is None:
return "服务未运行,无日志可显示"
# 从进程读取输出
output = ""
while True:
line_out = vllm_process.stdout.readline()
line_err = vllm_process.stderr.readline()
if not line_out and not line_err:
break
if line_out:
output += line_out + "\n"
if line_err:
output += "[ERROR] " + line_err + "\n"
return output if output else "暂无新日志"
def serve_test_ui():
"""提供一个简单的测试UI"""
with gr.Blocks(title="vLLM OpenAI兼容API服务") as demo:
with gr.Row():
with gr.Column():
gr.Markdown("# vLLM OpenAI 兼容API服务控制面板")
with gr.Row():
start_btn = gr.Button("启动服务", variant="primary")
stop_btn = gr.Button("停止服务", variant="stop")
status_text = gr.Textbox(label="服务状态", value="未运行", interactive=False)
refresh_btn = gr.Button("刷新状态")
logs_text = gr.Textbox(label="服务日志", interactive=False, lines=15)
logs_refresh_btn = gr.Button("刷新日志")
# API测试部分
gr.Markdown("## API 信息")
api_info = gr.Markdown(f"""
API地址: `http://localhost:{API_PORT}/v1/...`
Hugging Face Space公开URL: 部署后查看Space详情获取
当前加载模型: `{MODEL_NAME}`
API密钥: `{os.environ.get("API_KEY", "未设置")}`
## 测试命令
```python
from openai import OpenAI
client = OpenAI(
base_url="http://你的HF_SPACE_URL/v1",
api_key="{os.environ.get("API_KEY", "None")}",
)
completion = client.chat.completions.create(
model="{MODEL_NAME}",
messages=[
{"role": "user", "content": "Hello!"}
]
)
print(completion.choices[0].message)
```
""")
# 设置事件处理
start_btn.click(start_vllm_server, inputs=[], outputs=status_text)
stop_btn.click(stop_vllm_server, inputs=[], outputs=status_text)
refresh_btn.click(check_server_status, inputs=[], outputs=status_text)
logs_refresh_btn.click(get_server_logs, inputs=[], outputs=logs_text)
# 页面加载时自动启动服务
demo.load(start_vllm_server, inputs=[], outputs=status_text)
return demo
# 启动测试UI
if __name__ == "__main__":
# 创建并启动UI
demo = serve_test_ui()
demo.queue().launch(server_name="0.0.0.0", server_port=GRADIO_PORT, share=True) |