Spaces:
Runtime error
Runtime error
Commit
·
a1dddfb
1
Parent(s):
c579716
update
Browse files
app.py
CHANGED
@@ -3,11 +3,22 @@ import sys
|
|
3 |
import gradio as gr
|
4 |
from subprocess import Popen, PIPE
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
# 设置环境变量
|
7 |
-
MODEL_NAME = os.environ.get("MODEL_NAME", "
|
8 |
API_PORT = int(os.environ.get("API_PORT", 8000))
|
9 |
GRADIO_PORT = int(os.environ.get("GRADIO_PORT", 7860))
|
10 |
|
|
|
|
|
|
|
11 |
# vLLM server进程
|
12 |
vllm_process = None
|
13 |
|
@@ -16,6 +27,9 @@ def start_vllm_server():
|
|
16 |
if vllm_process is not None:
|
17 |
return "vLLM 服务已经在运行"
|
18 |
|
|
|
|
|
|
|
19 |
# 构建启动命令
|
20 |
cmd = [
|
21 |
"vllm",
|
@@ -24,10 +38,22 @@ def start_vllm_server():
|
|
24 |
"--host", "0.0.0.0",
|
25 |
"--port", str(API_PORT),
|
26 |
"--dtype", "auto",
|
27 |
-
"--
|
28 |
-
"--gpu-memory-utilization", "0.9" # 使用90%的GPU内存
|
29 |
]
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
# 是否启用API密钥
|
32 |
api_key = os.environ.get("API_KEY", "")
|
33 |
if api_key:
|
|
|
3 |
import gradio as gr
|
4 |
from subprocess import Popen, PIPE
|
5 |
|
6 |
+
# 检测是否有 GPU
|
7 |
+
def check_gpu_available():
|
8 |
+
try:
|
9 |
+
import torch
|
10 |
+
return torch.cuda.is_available()
|
11 |
+
except ImportError:
|
12 |
+
return False
|
13 |
+
|
14 |
# 设置环境变量
|
15 |
+
MODEL_NAME = os.environ.get("MODEL_NAME", "zhangchenxu/TinyV-1.5B") # 默认使用更小的模型
|
16 |
API_PORT = int(os.environ.get("API_PORT", 8000))
|
17 |
GRADIO_PORT = int(os.environ.get("GRADIO_PORT", 7860))
|
18 |
|
19 |
+
# 检测 GPU
|
20 |
+
HAS_GPU = check_gpu_available()
|
21 |
+
|
22 |
# vLLM server进程
|
23 |
vllm_process = None
|
24 |
|
|
|
27 |
if vllm_process is not None:
|
28 |
return "vLLM 服务已经在运行"
|
29 |
|
30 |
+
# 设置环境变量以帮助调试
|
31 |
+
os.environ["VLLM_LOGGING_LEVEL"] = "DEBUG"
|
32 |
+
|
33 |
# 构建启动命令
|
34 |
cmd = [
|
35 |
"vllm",
|
|
|
38 |
"--host", "0.0.0.0",
|
39 |
"--port", str(API_PORT),
|
40 |
"--dtype", "auto",
|
41 |
+
"--trust-remote-code" # 信任远程代码,许多模型需要这个
|
|
|
42 |
]
|
43 |
|
44 |
+
# 根据是否有 GPU 添加不同的参数
|
45 |
+
if HAS_GPU:
|
46 |
+
cmd.extend([
|
47 |
+
"--device", "cuda",
|
48 |
+
"--max-model-len", "2048",
|
49 |
+
"--gpu-memory-utilization", "0.9"
|
50 |
+
])
|
51 |
+
else:
|
52 |
+
cmd.extend([
|
53 |
+
"--device", "cpu",
|
54 |
+
"--max-model-len", "1024" # CPU 模式使用更小的上下文长度以节省内存
|
55 |
+
])
|
56 |
+
|
57 |
# 是否启用API密钥
|
58 |
api_key = os.environ.get("API_KEY", "")
|
59 |
if api_key:
|