Spaces:
Sleeping
Sleeping
Upload 5 files
Browse files- Dockerfile +13 -60
- app.py +54 -26
- requirements.txt +0 -0
- templates/dashboard.html +4 -4
Dockerfile
CHANGED
@@ -1,71 +1,24 @@
|
|
1 |
-
|
2 |
-
# TODO: 解决完整版本的依赖问题后替换此文件
|
3 |
|
4 |
-
#
|
5 |
-
|
6 |
|
7 |
-
# 安装基本依赖
|
8 |
-
RUN apk add --no-cache git build-base ca-certificates
|
9 |
-
|
10 |
-
# 设置工作目录
|
11 |
WORKDIR /app
|
12 |
|
13 |
-
|
14 |
-
RUN
|
15 |
-
|
16 |
-
# 预先下载依赖
|
17 |
-
RUN go get github.com/google/generative-ai-go/[email protected]
|
18 |
-
RUN go get google.golang.org/[email protected]
|
19 |
-
RUN go get github.com/gin-gonic/[email protected]
|
20 |
-
RUN go get github.com/go-playground/validator/[email protected]
|
21 |
-
RUN go get github.com/gabriel-vasile/[email protected]
|
22 |
-
|
23 |
-
# 复制源代码
|
24 |
-
COPY main.go .
|
25 |
-
|
26 |
-
# 确保依赖关系
|
27 |
-
RUN go mod tidy
|
28 |
-
RUN go mod download
|
29 |
-
|
30 |
-
# 构建
|
31 |
-
RUN CGO_ENABLED=0 GOOS=linux go build -o tokenizer -a -installsuffix cgo -ldflags="-w -s" .
|
32 |
-
|
33 |
-
# 第二阶段:构建Python环境
|
34 |
-
FROM python:3.9-slim
|
35 |
-
|
36 |
-
# 安装基本依赖
|
37 |
-
RUN apt-get update && apt-get install -y --no-install-recommends \
|
38 |
-
ca-certificates \
|
39 |
-
&& rm -rf /var/lib/apt/lists/*
|
40 |
-
|
41 |
-
# 设置工作目录
|
42 |
-
WORKDIR /app
|
43 |
-
|
44 |
-
# 复制Go二进制文件
|
45 |
-
COPY --from=go-builder /app/tokenizer .
|
46 |
-
|
47 |
-
# 复制Python服务文件和tokenizer文件
|
48 |
-
COPY deepseek_v3_tokenizer /app/deepseek_v3_tokenizer
|
49 |
-
COPY openai_service.py /app/
|
50 |
|
51 |
-
|
52 |
-
RUN pip install --no-cache-dir flask transformers tiktoken
|
53 |
|
54 |
# 设置环境变量
|
55 |
-
ENV
|
56 |
ENV PORT=7860
|
57 |
-
ENV DEEPSEEK_URL=http://127.0.0.1:7861
|
58 |
-
ENV OPENAI_URL=http://127.0.0.1:7862
|
59 |
|
60 |
-
#
|
61 |
-
RUN
|
62 |
-
python /app/deepseek_v3_tokenizer/deepseek_service.py & \n\
|
63 |
-
python /app/openai_service.py & \n\
|
64 |
-
sleep 5\n\
|
65 |
-
./tokenizer' > /app/start.sh && chmod +x /app/start.sh
|
66 |
|
67 |
-
#
|
68 |
-
EXPOSE 7860
|
69 |
|
70 |
-
#
|
71 |
-
CMD ["
|
|
|
1 |
+
FROM python:3.11-slim
|
|
|
2 |
|
3 |
+
# 设置用户为root
|
4 |
+
USER root
|
5 |
|
|
|
|
|
|
|
|
|
6 |
WORKDIR /app
|
7 |
|
8 |
+
COPY requirements.txt .
|
9 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
+
COPY . .
|
|
|
12 |
|
13 |
# 设置环境变量
|
14 |
+
ENV HOST=0.0.0.0
|
15 |
ENV PORT=7860
|
|
|
|
|
16 |
|
17 |
+
# 删除敏感文件
|
18 |
+
RUN rm -f config.json password.txt
|
|
|
|
|
|
|
|
|
19 |
|
20 |
+
# 暴露端口(Hugging Face默认使用7860端口)
|
21 |
+
EXPOSE 7860
|
22 |
|
23 |
+
# 启动命令
|
24 |
+
CMD ["python", "app.py"]
|
app.py
CHANGED
@@ -12,12 +12,13 @@ import jwt
|
|
12 |
import os
|
13 |
import threading
|
14 |
from datetime import datetime, timedelta
|
15 |
-
import tiktoken # 导入tiktoken来计算token数量
|
16 |
|
17 |
app = Flask(__name__, template_folder='templates')
|
18 |
app.secret_key = os.environ.get("SECRET_KEY", "abacus_chat_proxy_secret_key")
|
19 |
app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(days=7)
|
20 |
|
|
|
|
|
21 |
|
22 |
API_ENDPOINT_URL = "https://abacus.ai/api/v0/describeDeployment"
|
23 |
MODEL_LIST_URL = "https://abacus.ai/api/v0/listExternalApplications"
|
@@ -679,7 +680,7 @@ def send_message(message, model, think=False):
|
|
679 |
trace_id, sentry_trace = generate_trace_id()
|
680 |
|
681 |
# 计算输入token
|
682 |
-
prompt_tokens = num_tokens_from_string(message)
|
683 |
completion_buffer = io.StringIO() # 收集所有输出用于计算token
|
684 |
|
685 |
headers = {
|
@@ -787,8 +788,8 @@ def send_message(message, model, think=False):
|
|
787 |
yield "data: [DONE]\n\n"
|
788 |
|
789 |
# 在流式传输完成后计算token并更新统计
|
790 |
-
|
791 |
-
update_model_stats(model, prompt_tokens,
|
792 |
|
793 |
# 如果需要删除上一个对话且上一个对话ID不为空且与当前不同
|
794 |
if DELETE_CHAT and last_conversation_id and last_conversation_id != conversation_id:
|
@@ -818,7 +819,7 @@ def send_message_non_stream(message, model, think=False):
|
|
818 |
trace_id, sentry_trace = generate_trace_id()
|
819 |
|
820 |
# 计算输入token
|
821 |
-
prompt_tokens = num_tokens_from_string(message)
|
822 |
|
823 |
headers = {
|
824 |
"accept": "text/event-stream",
|
@@ -916,8 +917,8 @@ def send_message_non_stream(message, model, think=False):
|
|
916 |
response_content = content_buffer.getvalue()
|
917 |
|
918 |
# 计算输出token并更新统计信息
|
919 |
-
|
920 |
-
update_model_stats(model, prompt_tokens,
|
921 |
|
922 |
# 如果需要删除上一个对话且上一个对话ID不为空且与当前不同
|
923 |
if DELETE_CHAT and last_conversation_id and last_conversation_id != conversation_id:
|
@@ -938,8 +939,8 @@ def send_message_non_stream(message, model, think=False):
|
|
938 |
}],
|
939 |
"usage": {
|
940 |
"prompt_tokens": prompt_tokens,
|
941 |
-
"completion_tokens":
|
942 |
-
"total_tokens": prompt_tokens +
|
943 |
}
|
944 |
})
|
945 |
else:
|
@@ -953,8 +954,8 @@ def send_message_non_stream(message, model, think=False):
|
|
953 |
response_content = buffer.getvalue()
|
954 |
|
955 |
# 计算输出token并更新统计信息
|
956 |
-
|
957 |
-
update_model_stats(model, prompt_tokens,
|
958 |
|
959 |
# 如果需要删除上一个对话且上一个对话ID不为空且与当前不同
|
960 |
if DELETE_CHAT and last_conversation_id and last_conversation_id != conversation_id:
|
@@ -975,8 +976,8 @@ def send_message_non_stream(message, model, think=False):
|
|
975 |
}],
|
976 |
"usage": {
|
977 |
"prompt_tokens": prompt_tokens,
|
978 |
-
"completion_tokens":
|
979 |
-
"total_tokens": prompt_tokens +
|
980 |
}
|
981 |
})
|
982 |
except requests.exceptions.RequestException as e:
|
@@ -1068,22 +1069,49 @@ def index():
|
|
1068 |
return redirect(url_for('dashboard'))
|
1069 |
|
1070 |
|
1071 |
-
|
1072 |
-
|
1073 |
-
"""计算文本的token数量"""
|
1074 |
try:
|
1075 |
-
|
1076 |
-
|
1077 |
-
|
1078 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1079 |
except Exception as e:
|
1080 |
-
#
|
1081 |
-
|
1082 |
-
|
1083 |
-
|
1084 |
|
1085 |
# 更新模型使用统计
|
1086 |
-
def update_model_stats(model, prompt_tokens, completion_tokens):
|
1087 |
global model_usage_stats, total_tokens, model_usage_records
|
1088 |
|
1089 |
# 添加调用记录
|
@@ -1098,7 +1126,7 @@ def update_model_stats(model, prompt_tokens, completion_tokens):
|
|
1098 |
"call_time": call_time,
|
1099 |
"prompt_tokens": prompt_tokens,
|
1100 |
"completion_tokens": completion_tokens,
|
1101 |
-
"calculation_method":
|
1102 |
}
|
1103 |
model_usage_records.append(record)
|
1104 |
|
|
|
12 |
import os
|
13 |
import threading
|
14 |
from datetime import datetime, timedelta
|
|
|
15 |
|
16 |
app = Flask(__name__, template_folder='templates')
|
17 |
app.secret_key = os.environ.get("SECRET_KEY", "abacus_chat_proxy_secret_key")
|
18 |
app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(days=7)
|
19 |
|
20 |
+
# 添加tokenizer服务URL
|
21 |
+
TOKENIZER_SERVICE_URL = "https://esotlam-tokenizer.hf.space/count_tokens"
|
22 |
|
23 |
API_ENDPOINT_URL = "https://abacus.ai/api/v0/describeDeployment"
|
24 |
MODEL_LIST_URL = "https://abacus.ai/api/v0/listExternalApplications"
|
|
|
680 |
trace_id, sentry_trace = generate_trace_id()
|
681 |
|
682 |
# 计算输入token
|
683 |
+
prompt_tokens, calculation_method = num_tokens_from_string(message, model)
|
684 |
completion_buffer = io.StringIO() # 收集所有输出用于计算token
|
685 |
|
686 |
headers = {
|
|
|
788 |
yield "data: [DONE]\n\n"
|
789 |
|
790 |
# 在流式传输完成后计算token并更新统计
|
791 |
+
completion_result, _ = num_tokens_from_string(completion_buffer.getvalue(), model)
|
792 |
+
update_model_stats(model, prompt_tokens, completion_result, calculation_method)
|
793 |
|
794 |
# 如果需要删除上一个对话且上一个对话ID不为空且与当前不同
|
795 |
if DELETE_CHAT and last_conversation_id and last_conversation_id != conversation_id:
|
|
|
819 |
trace_id, sentry_trace = generate_trace_id()
|
820 |
|
821 |
# 计算输入token
|
822 |
+
prompt_tokens, calculation_method = num_tokens_from_string(message, model)
|
823 |
|
824 |
headers = {
|
825 |
"accept": "text/event-stream",
|
|
|
917 |
response_content = content_buffer.getvalue()
|
918 |
|
919 |
# 计算输出token并更新统计信息
|
920 |
+
completion_result, _ = num_tokens_from_string(think_content + response_content, model)
|
921 |
+
update_model_stats(model, prompt_tokens, completion_result, calculation_method)
|
922 |
|
923 |
# 如果需要删除上一个对话且上一个对话ID不为空且与当前不同
|
924 |
if DELETE_CHAT and last_conversation_id and last_conversation_id != conversation_id:
|
|
|
939 |
}],
|
940 |
"usage": {
|
941 |
"prompt_tokens": prompt_tokens,
|
942 |
+
"completion_tokens": completion_result,
|
943 |
+
"total_tokens": prompt_tokens + completion_result
|
944 |
}
|
945 |
})
|
946 |
else:
|
|
|
954 |
response_content = buffer.getvalue()
|
955 |
|
956 |
# 计算输出token并更新统计信息
|
957 |
+
completion_result, _ = num_tokens_from_string(response_content, model)
|
958 |
+
update_model_stats(model, prompt_tokens, completion_result, calculation_method)
|
959 |
|
960 |
# 如果需要删除上一个对话且上一个对话ID不为空且与当前不同
|
961 |
if DELETE_CHAT and last_conversation_id and last_conversation_id != conversation_id:
|
|
|
976 |
}],
|
977 |
"usage": {
|
978 |
"prompt_tokens": prompt_tokens,
|
979 |
+
"completion_tokens": completion_result,
|
980 |
+
"total_tokens": prompt_tokens + completion_result
|
981 |
}
|
982 |
})
|
983 |
except requests.exceptions.RequestException as e:
|
|
|
1069 |
return redirect(url_for('dashboard'))
|
1070 |
|
1071 |
|
1072 |
+
def num_tokens_from_string(string, model=""):
|
1073 |
+
"""计算字符串的token数量"""
|
|
|
1074 |
try:
|
1075 |
+
# 准备请求数据
|
1076 |
+
request_data = {
|
1077 |
+
"model": model,
|
1078 |
+
"messages": [{"role": "user", "content": string}]
|
1079 |
+
}
|
1080 |
+
|
1081 |
+
# 发送POST请求到token计算服务
|
1082 |
+
response = requests.post(
|
1083 |
+
TOKENIZER_SERVICE_URL,
|
1084 |
+
json=request_data,
|
1085 |
+
timeout=10
|
1086 |
+
)
|
1087 |
+
|
1088 |
+
# 解析响应
|
1089 |
+
if response.status_code == 200:
|
1090 |
+
result = response.json()
|
1091 |
+
# 检查响应中是否包含warning字段,有则表示是估算值
|
1092 |
+
calculation_method = "estimate" if "warning" in result else "api"
|
1093 |
+
input_tokens = result.get("input_tokens", 0)
|
1094 |
+
return input_tokens, calculation_method
|
1095 |
+
elif response.status_code == 400:
|
1096 |
+
# 服务返回400错误,但可能提供了估算值
|
1097 |
+
result = response.json()
|
1098 |
+
if "input_tokens" in result:
|
1099 |
+
print(f"使用估算token值: {result.get('input_tokens')}")
|
1100 |
+
return result.get("input_tokens", 0), "estimate"
|
1101 |
+
# 如果没有提供估算值,使用字符数/4作为预估
|
1102 |
+
return len(string) // 4, "estimate"
|
1103 |
+
else:
|
1104 |
+
# 如果服务返回其他错误,记录错误并返回字符串长度/4作为预估值
|
1105 |
+
print(f"Tokenizer服务错误: {response.status_code} - {response.text}")
|
1106 |
+
return len(string) // 4, "estimate"
|
1107 |
except Exception as e:
|
1108 |
+
# 如果发生其他错误,记录错误并返回字符串长度/4作为预估值
|
1109 |
+
print(f"计算token错误: {e}")
|
1110 |
+
return len(string) // 4, "estimate"
|
1111 |
+
|
1112 |
|
1113 |
# 更新模型使用统计
|
1114 |
+
def update_model_stats(model, prompt_tokens, completion_tokens, calculation_method="estimate"):
|
1115 |
global model_usage_stats, total_tokens, model_usage_records
|
1116 |
|
1117 |
# 添加调用记录
|
|
|
1126 |
"call_time": call_time,
|
1127 |
"prompt_tokens": prompt_tokens,
|
1128 |
"completion_tokens": completion_tokens,
|
1129 |
+
"calculation_method": calculation_method
|
1130 |
}
|
1131 |
model_usage_records.append(record)
|
1132 |
|
requirements.txt
CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
|
|
templates/dashboard.html
CHANGED
@@ -467,7 +467,7 @@
|
|
467 |
font-weight: 500;
|
468 |
}
|
469 |
|
470 |
-
.token-method.
|
471 |
background: rgba(54, 211, 153, 0.2);
|
472 |
color: var(--success-color);
|
473 |
border: 1px solid rgba(54, 211, 153, 0.3);
|
@@ -879,8 +879,8 @@
|
|
879 |
<td class="token-count">{{ record.prompt_tokens|int }}</td>
|
880 |
<td class="token-count">{{ record.completion_tokens|int }}</td>
|
881 |
<td>
|
882 |
-
{% if record.calculation_method
|
883 |
-
<span class="token-method
|
884 |
{% else %}
|
885 |
<span class="token-method estimate">估算</span>
|
886 |
{% endif %}
|
@@ -890,7 +890,7 @@
|
|
890 |
</tbody>
|
891 |
</table>
|
892 |
<div class="token-note">
|
893 |
-
<small>* Token计算方式:<span class="token-method
|
894 |
</div>
|
895 |
</div>
|
896 |
</div>
|
|
|
467 |
font-weight: 500;
|
468 |
}
|
469 |
|
470 |
+
.token-method.api {
|
471 |
background: rgba(54, 211, 153, 0.2);
|
472 |
color: var(--success-color);
|
473 |
border: 1px solid rgba(54, 211, 153, 0.3);
|
|
|
879 |
<td class="token-count">{{ record.prompt_tokens|int }}</td>
|
880 |
<td class="token-count">{{ record.completion_tokens|int }}</td>
|
881 |
<td>
|
882 |
+
{% if record.calculation_method in ["api"] %}
|
883 |
+
<span class="token-method api">精确</span>
|
884 |
{% else %}
|
885 |
<span class="token-method estimate">估算</span>
|
886 |
{% endif %}
|
|
|
890 |
</tbody>
|
891 |
</table>
|
892 |
<div class="token-note">
|
893 |
+
<small>* Token计算方式:<span class="token-method api">精确</span> 表示调用官方API精确计算,<span class="token-method estimate">估算</span> 表示使用gpt-4o模型估算。所有统计数据仅供参考,不代表实际计费标准。</small>
|
894 |
</div>
|
895 |
</div>
|
896 |
</div>
|