malt666 commited on
Commit
49a5193
·
verified ·
1 Parent(s): ad9a66f

Upload 5 files

Browse files
Files changed (4) hide show
  1. Dockerfile +13 -60
  2. app.py +54 -26
  3. requirements.txt +0 -0
  4. templates/dashboard.html +4 -4
Dockerfile CHANGED
@@ -1,71 +1,24 @@
1
- # 这是一个临时的Dockerfile,用于在Hugging Face上部署占位符服务
2
- # TODO: 解决完整版本的依赖问题后替换此文件
3
 
4
- # 第一阶段:构建Go服务
5
- FROM golang:1.20-alpine AS go-builder
6
 
7
- # 安装基本依赖
8
- RUN apk add --no-cache git build-base ca-certificates
9
-
10
- # 设置工作目录
11
  WORKDIR /app
12
 
13
- # 初始化go.mod
14
- RUN go mod init tokenizer
15
-
16
- # 预先下载依赖
17
- RUN go get github.com/google/generative-ai-go/[email protected]
18
- RUN go get google.golang.org/[email protected]
19
- RUN go get github.com/gin-gonic/[email protected]
20
- RUN go get github.com/go-playground/validator/[email protected]
21
- RUN go get github.com/gabriel-vasile/[email protected]
22
-
23
- # 复制源代码
24
- COPY main.go .
25
-
26
- # 确保依赖关系
27
- RUN go mod tidy
28
- RUN go mod download
29
-
30
- # 构建
31
- RUN CGO_ENABLED=0 GOOS=linux go build -o tokenizer -a -installsuffix cgo -ldflags="-w -s" .
32
-
33
- # 第二阶段:构建Python环境
34
- FROM python:3.9-slim
35
-
36
- # 安装基本依赖
37
- RUN apt-get update && apt-get install -y --no-install-recommends \
38
- ca-certificates \
39
- && rm -rf /var/lib/apt/lists/*
40
-
41
- # 设置工作目录
42
- WORKDIR /app
43
-
44
- # 复制Go二进制文件
45
- COPY --from=go-builder /app/tokenizer .
46
-
47
- # 复制Python服务文件和tokenizer文件
48
- COPY deepseek_v3_tokenizer /app/deepseek_v3_tokenizer
49
- COPY openai_service.py /app/
50
 
51
- # 安装Python依赖
52
- RUN pip install --no-cache-dir flask transformers tiktoken
53
 
54
  # 设置环境变量
55
- ENV GIN_MODE=release
56
  ENV PORT=7860
57
- ENV DEEPSEEK_URL=http://127.0.0.1:7861
58
- ENV OPENAI_URL=http://127.0.0.1:7862
59
 
60
- # 创建启动脚本
61
- RUN echo '#!/bin/sh\n\
62
- python /app/deepseek_v3_tokenizer/deepseek_service.py & \n\
63
- python /app/openai_service.py & \n\
64
- sleep 5\n\
65
- ./tokenizer' > /app/start.sh && chmod +x /app/start.sh
66
 
67
- # 暴露端口
68
- EXPOSE 7860 7861 7862
69
 
70
- # 启动服务
71
- CMD ["/app/start.sh"]
 
1
+ FROM python:3.11-slim
 
2
 
3
+ # 设置用户为root
4
+ USER root
5
 
 
 
 
 
6
  WORKDIR /app
7
 
8
+ COPY requirements.txt .
9
+ RUN pip install --no-cache-dir -r requirements.txt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
+ COPY . .
 
12
 
13
  # 设置环境变量
14
+ ENV HOST=0.0.0.0
15
  ENV PORT=7860
 
 
16
 
17
+ # 删除敏感文件
18
+ RUN rm -f config.json password.txt
 
 
 
 
19
 
20
+ # 暴露端口(Hugging Face默认使用7860端口)
21
+ EXPOSE 7860
22
 
23
+ # 启动命令
24
+ CMD ["python", "app.py"]
app.py CHANGED
@@ -12,12 +12,13 @@ import jwt
12
  import os
13
  import threading
14
  from datetime import datetime, timedelta
15
- import tiktoken # 导入tiktoken来计算token数量
16
 
17
  app = Flask(__name__, template_folder='templates')
18
  app.secret_key = os.environ.get("SECRET_KEY", "abacus_chat_proxy_secret_key")
19
  app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(days=7)
20
 
 
 
21
 
22
  API_ENDPOINT_URL = "https://abacus.ai/api/v0/describeDeployment"
23
  MODEL_LIST_URL = "https://abacus.ai/api/v0/listExternalApplications"
@@ -679,7 +680,7 @@ def send_message(message, model, think=False):
679
  trace_id, sentry_trace = generate_trace_id()
680
 
681
  # 计算输入token
682
- prompt_tokens = num_tokens_from_string(message)
683
  completion_buffer = io.StringIO() # 收集所有输出用于计算token
684
 
685
  headers = {
@@ -787,8 +788,8 @@ def send_message(message, model, think=False):
787
  yield "data: [DONE]\n\n"
788
 
789
  # 在流式传输完成后计算token并更新统计
790
- completion_tokens = num_tokens_from_string(completion_buffer.getvalue())
791
- update_model_stats(model, prompt_tokens, completion_tokens)
792
 
793
  # 如果需要删除上一个对话且上一个对话ID不为空且与当前不同
794
  if DELETE_CHAT and last_conversation_id and last_conversation_id != conversation_id:
@@ -818,7 +819,7 @@ def send_message_non_stream(message, model, think=False):
818
  trace_id, sentry_trace = generate_trace_id()
819
 
820
  # 计算输入token
821
- prompt_tokens = num_tokens_from_string(message)
822
 
823
  headers = {
824
  "accept": "text/event-stream",
@@ -916,8 +917,8 @@ def send_message_non_stream(message, model, think=False):
916
  response_content = content_buffer.getvalue()
917
 
918
  # 计算输出token并更新统计信息
919
- completion_tokens = num_tokens_from_string(think_content + response_content)
920
- update_model_stats(model, prompt_tokens, completion_tokens)
921
 
922
  # 如果需要删除上一个对话且上一个对话ID不为空且与当前不同
923
  if DELETE_CHAT and last_conversation_id and last_conversation_id != conversation_id:
@@ -938,8 +939,8 @@ def send_message_non_stream(message, model, think=False):
938
  }],
939
  "usage": {
940
  "prompt_tokens": prompt_tokens,
941
- "completion_tokens": completion_tokens,
942
- "total_tokens": prompt_tokens + completion_tokens
943
  }
944
  })
945
  else:
@@ -953,8 +954,8 @@ def send_message_non_stream(message, model, think=False):
953
  response_content = buffer.getvalue()
954
 
955
  # 计算输出token并更新统计信息
956
- completion_tokens = num_tokens_from_string(response_content)
957
- update_model_stats(model, prompt_tokens, completion_tokens)
958
 
959
  # 如果需要删除上一个对话且上一个对话ID不为空且与当前不同
960
  if DELETE_CHAT and last_conversation_id and last_conversation_id != conversation_id:
@@ -975,8 +976,8 @@ def send_message_non_stream(message, model, think=False):
975
  }],
976
  "usage": {
977
  "prompt_tokens": prompt_tokens,
978
- "completion_tokens": completion_tokens,
979
- "total_tokens": prompt_tokens + completion_tokens
980
  }
981
  })
982
  except requests.exceptions.RequestException as e:
@@ -1068,22 +1069,49 @@ def index():
1068
  return redirect(url_for('dashboard'))
1069
 
1070
 
1071
- # 获取OpenAI的tokenizer来计算token数
1072
- def num_tokens_from_string(string, model="gpt-3.5-turbo"):
1073
- """计算文本的token数量"""
1074
  try:
1075
- encoding = tiktoken.encoding_for_model(model)
1076
- num_tokens = len(encoding.encode(string))
1077
- print(f"使用tiktoken计算token数: {num_tokens}")
1078
- return num_tokens
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1079
  except Exception as e:
1080
- # 如果tiktoken不支持模型或者出错,使用简单的估算
1081
- estimated_tokens = len(string) // 4 # 粗略估计每个token约4个字符
1082
- print(f"使用估算方法计算token数: {estimated_tokens} (原因: {str(e)})")
1083
- return estimated_tokens
1084
 
1085
  # 更新模型使用统计
1086
- def update_model_stats(model, prompt_tokens, completion_tokens):
1087
  global model_usage_stats, total_tokens, model_usage_records
1088
 
1089
  # 添加调用记录
@@ -1098,7 +1126,7 @@ def update_model_stats(model, prompt_tokens, completion_tokens):
1098
  "call_time": call_time,
1099
  "prompt_tokens": prompt_tokens,
1100
  "completion_tokens": completion_tokens,
1101
- "calculation_method": "tiktoken" if any(x in model.lower() for x in ["gpt", "claude"]) or model in ["llama-3", "mistral", "gemma"] else "estimate"
1102
  }
1103
  model_usage_records.append(record)
1104
 
 
12
  import os
13
  import threading
14
  from datetime import datetime, timedelta
 
15
 
16
  app = Flask(__name__, template_folder='templates')
17
  app.secret_key = os.environ.get("SECRET_KEY", "abacus_chat_proxy_secret_key")
18
  app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(days=7)
19
 
20
+ # 添加tokenizer服务URL
21
+ TOKENIZER_SERVICE_URL = "https://esotlam-tokenizer.hf.space/count_tokens"
22
 
23
  API_ENDPOINT_URL = "https://abacus.ai/api/v0/describeDeployment"
24
  MODEL_LIST_URL = "https://abacus.ai/api/v0/listExternalApplications"
 
680
  trace_id, sentry_trace = generate_trace_id()
681
 
682
  # 计算输入token
683
+ prompt_tokens, calculation_method = num_tokens_from_string(message, model)
684
  completion_buffer = io.StringIO() # 收集所有输出用于计算token
685
 
686
  headers = {
 
788
  yield "data: [DONE]\n\n"
789
 
790
  # 在流式传输完成后计算token并更新统计
791
+ completion_result, _ = num_tokens_from_string(completion_buffer.getvalue(), model)
792
+ update_model_stats(model, prompt_tokens, completion_result, calculation_method)
793
 
794
  # 如果需要删除上一个对话且上一个对话ID不为空且与当前不同
795
  if DELETE_CHAT and last_conversation_id and last_conversation_id != conversation_id:
 
819
  trace_id, sentry_trace = generate_trace_id()
820
 
821
  # 计算输入token
822
+ prompt_tokens, calculation_method = num_tokens_from_string(message, model)
823
 
824
  headers = {
825
  "accept": "text/event-stream",
 
917
  response_content = content_buffer.getvalue()
918
 
919
  # 计算输出token并更新统计信息
920
+ completion_result, _ = num_tokens_from_string(think_content + response_content, model)
921
+ update_model_stats(model, prompt_tokens, completion_result, calculation_method)
922
 
923
  # 如果需要删除上一个对话且上一个对话ID不为空且与当前不同
924
  if DELETE_CHAT and last_conversation_id and last_conversation_id != conversation_id:
 
939
  }],
940
  "usage": {
941
  "prompt_tokens": prompt_tokens,
942
+ "completion_tokens": completion_result,
943
+ "total_tokens": prompt_tokens + completion_result
944
  }
945
  })
946
  else:
 
954
  response_content = buffer.getvalue()
955
 
956
  # 计算输出token并更新统计信息
957
+ completion_result, _ = num_tokens_from_string(response_content, model)
958
+ update_model_stats(model, prompt_tokens, completion_result, calculation_method)
959
 
960
  # 如果需要删除上一个对话且上一个对话ID不为空且与当前不同
961
  if DELETE_CHAT and last_conversation_id and last_conversation_id != conversation_id:
 
976
  }],
977
  "usage": {
978
  "prompt_tokens": prompt_tokens,
979
+ "completion_tokens": completion_result,
980
+ "total_tokens": prompt_tokens + completion_result
981
  }
982
  })
983
  except requests.exceptions.RequestException as e:
 
1069
  return redirect(url_for('dashboard'))
1070
 
1071
 
1072
+ def num_tokens_from_string(string, model=""):
1073
+ """计算字符串的token数量"""
 
1074
  try:
1075
+ # 准备请求数据
1076
+ request_data = {
1077
+ "model": model,
1078
+ "messages": [{"role": "user", "content": string}]
1079
+ }
1080
+
1081
+ # 发送POST请求到token计算服务
1082
+ response = requests.post(
1083
+ TOKENIZER_SERVICE_URL,
1084
+ json=request_data,
1085
+ timeout=10
1086
+ )
1087
+
1088
+ # 解析响应
1089
+ if response.status_code == 200:
1090
+ result = response.json()
1091
+ # 检查响应中是否包含warning字段,有则表示是估算值
1092
+ calculation_method = "estimate" if "warning" in result else "api"
1093
+ input_tokens = result.get("input_tokens", 0)
1094
+ return input_tokens, calculation_method
1095
+ elif response.status_code == 400:
1096
+ # 服务返回400错误,但可能提供了估算值
1097
+ result = response.json()
1098
+ if "input_tokens" in result:
1099
+ print(f"使用估算token值: {result.get('input_tokens')}")
1100
+ return result.get("input_tokens", 0), "estimate"
1101
+ # 如果没有提供估算值,使用字符数/4作为预估
1102
+ return len(string) // 4, "estimate"
1103
+ else:
1104
+ # 如果服务返回其他错误,记录错误并返回字符串长度/4作为预估值
1105
+ print(f"Tokenizer服务错误: {response.status_code} - {response.text}")
1106
+ return len(string) // 4, "estimate"
1107
  except Exception as e:
1108
+ # 如果发生其他错误,记录错误并返回字符串长度/4作为预估值
1109
+ print(f"计算token错误: {e}")
1110
+ return len(string) // 4, "estimate"
1111
+
1112
 
1113
  # 更新模型使用统计
1114
+ def update_model_stats(model, prompt_tokens, completion_tokens, calculation_method="estimate"):
1115
  global model_usage_stats, total_tokens, model_usage_records
1116
 
1117
  # 添加调用记录
 
1126
  "call_time": call_time,
1127
  "prompt_tokens": prompt_tokens,
1128
  "completion_tokens": completion_tokens,
1129
+ "calculation_method": calculation_method
1130
  }
1131
  model_usage_records.append(record)
1132
 
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
templates/dashboard.html CHANGED
@@ -467,7 +467,7 @@
467
  font-weight: 500;
468
  }
469
 
470
- .token-method.tiktoken {
471
  background: rgba(54, 211, 153, 0.2);
472
  color: var(--success-color);
473
  border: 1px solid rgba(54, 211, 153, 0.3);
@@ -879,8 +879,8 @@
879
  <td class="token-count">{{ record.prompt_tokens|int }}</td>
880
  <td class="token-count">{{ record.completion_tokens|int }}</td>
881
  <td>
882
- {% if record.calculation_method == "tiktoken" %}
883
- <span class="token-method tiktoken">精确</span>
884
  {% else %}
885
  <span class="token-method estimate">估算</span>
886
  {% endif %}
@@ -890,7 +890,7 @@
890
  </tbody>
891
  </table>
892
  <div class="token-note">
893
- <small>* Token计算方式:<span class="token-method tiktoken">精确</span> 表示使用tiktoken准确计算,<span class="token-method estimate">估算</span> 表示使用估算方法(约4字符=1token)。所有统计数据仅供参考,不代表实际计费标准。</small>
894
  </div>
895
  </div>
896
  </div>
 
467
  font-weight: 500;
468
  }
469
 
470
+ .token-method.api {
471
  background: rgba(54, 211, 153, 0.2);
472
  color: var(--success-color);
473
  border: 1px solid rgba(54, 211, 153, 0.3);
 
879
  <td class="token-count">{{ record.prompt_tokens|int }}</td>
880
  <td class="token-count">{{ record.completion_tokens|int }}</td>
881
  <td>
882
+ {% if record.calculation_method in ["api"] %}
883
+ <span class="token-method api">精确</span>
884
  {% else %}
885
  <span class="token-method estimate">估算</span>
886
  {% endif %}
 
890
  </tbody>
891
  </table>
892
  <div class="token-note">
893
+ <small>* Token计算方式:<span class="token-method api">精确</span> 表示调用官方API精确计算,<span class="token-method estimate">估算</span> 表示使用gpt-4o模型估算。所有统计数据仅供参考,不代表实际计费标准。</small>
894
  </div>
895
  </div>
896
  </div>