zhangchenxu commited on
Commit
a1dddfb
·
1 Parent(s): c579716
Files changed (1) hide show
  1. app.py +29 -3
app.py CHANGED
@@ -3,11 +3,22 @@ import sys
3
  import gradio as gr
4
  from subprocess import Popen, PIPE
5
 
 
 
 
 
 
 
 
 
6
  # 设置环境变量
7
- MODEL_NAME = os.environ.get("MODEL_NAME", "NousResearch/Nous-Hermes-2-Yi-9B")
8
  API_PORT = int(os.environ.get("API_PORT", 8000))
9
  GRADIO_PORT = int(os.environ.get("GRADIO_PORT", 7860))
10
 
 
 
 
11
  # vLLM server进程
12
  vllm_process = None
13
 
@@ -16,6 +27,9 @@ def start_vllm_server():
16
  if vllm_process is not None:
17
  return "vLLM 服务已经在运行"
18
 
 
 
 
19
  # 构建启动命令
20
  cmd = [
21
  "vllm",
@@ -24,10 +38,22 @@ def start_vllm_server():
24
  "--host", "0.0.0.0",
25
  "--port", str(API_PORT),
26
  "--dtype", "auto",
27
- "--max-model-len", "2048", # 设置模型最大长度
28
- "--gpu-memory-utilization", "0.9" # 使用90%的GPU内存
29
  ]
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  # 是否启用API密钥
32
  api_key = os.environ.get("API_KEY", "")
33
  if api_key:
 
3
  import gradio as gr
4
  from subprocess import Popen, PIPE
5
 
6
+ # 检测是否有 GPU
7
+ def check_gpu_available():
8
+ try:
9
+ import torch
10
+ return torch.cuda.is_available()
11
+ except ImportError:
12
+ return False
13
+
14
  # 设置环境变量
15
+ MODEL_NAME = os.environ.get("MODEL_NAME", "zhangchenxu/TinyV-1.5B") # 默认使用更小的模型
16
  API_PORT = int(os.environ.get("API_PORT", 8000))
17
  GRADIO_PORT = int(os.environ.get("GRADIO_PORT", 7860))
18
 
19
+ # 检测 GPU
20
+ HAS_GPU = check_gpu_available()
21
+
22
  # vLLM server进程
23
  vllm_process = None
24
 
 
27
  if vllm_process is not None:
28
  return "vLLM 服务已经在运行"
29
 
30
+ # 设置环境变量以帮助调试
31
+ os.environ["VLLM_LOGGING_LEVEL"] = "DEBUG"
32
+
33
  # 构建启动命令
34
  cmd = [
35
  "vllm",
 
38
  "--host", "0.0.0.0",
39
  "--port", str(API_PORT),
40
  "--dtype", "auto",
41
+ "--trust-remote-code" # 信任远程代码,许多模型需要这个
 
42
  ]
43
 
44
+ # 根据是否有 GPU 添加不同的参数
45
+ if HAS_GPU:
46
+ cmd.extend([
47
+ "--device", "cuda",
48
+ "--max-model-len", "2048",
49
+ "--gpu-memory-utilization", "0.9"
50
+ ])
51
+ else:
52
+ cmd.extend([
53
+ "--device", "cpu",
54
+ "--max-model-len", "1024" # CPU 模式使用更小的上下文长度以节省内存
55
+ ])
56
+
57
  # 是否启用API密钥
58
  api_key = os.environ.get("API_KEY", "")
59
  if api_key: