abacus_chat_proxy

Sleeping

App Files Files Community

malt666 commited on Mar 28

Commit

49a5193

verified ·

1 Parent(s): ad9a66f

Upload 5 files

Browse files

Files changed (4) hide show

Dockerfile +13 -60
app.py +54 -26
requirements.txt +0 -0
templates/dashboard.html +4 -4

Dockerfile CHANGED Viewed

@@ -1,71 +1,24 @@
-# 这是一个临时的Dockerfile，用于在Hugging Face上部署占位符服务
-# TODO: 解决完整版本的依赖问题后替换此文件
-# 第一阶段：构建Go服务
-FROM golang:1.20-alpine AS go-builder
-# 安装基本依赖
-RUN apk add --no-cache git build-base ca-certificates
-# 设置工作目录
 WORKDIR /app
-# 初始化go.mod
-RUN go mod init tokenizer
-# 预先下载依赖
-RUN go get github.com/google/generative-ai-go/[email protected]
-RUN go get google.golang.org/[email protected]
-RUN go get github.com/gin-gonic/[email protected]
-RUN go get github.com/go-playground/validator/[email protected]
-RUN go get github.com/gabriel-vasile/[email protected]
-# 复制源代码
-COPY main.go .
-# 确保依赖关系
-RUN go mod tidy
-RUN go mod download
-# 构建
-RUN CGO_ENABLED=0 GOOS=linux go build -o tokenizer -a -installsuffix cgo -ldflags="-w -s" .
-# 第二阶段：构建Python环境
-FROM python:3.9-slim
-# 安装基本依赖
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    ca-certificates \
-    && rm -rf /var/lib/apt/lists/*
-# 设置工作目录
-WORKDIR /app
-# 复制Go二进制文件
-COPY --from=go-builder /app/tokenizer .
-# 复制Python服务文件和tokenizer文件
-COPY deepseek_v3_tokenizer /app/deepseek_v3_tokenizer
-COPY openai_service.py /app/
-# 安装Python依赖
-RUN pip install --no-cache-dir flask transformers tiktoken
 # 设置环境变量
-ENV GIN_MODE=release
 ENV PORT=7860
-ENV DEEPSEEK_URL=http://127.0.0.1:7861
-ENV OPENAI_URL=http://127.0.0.1:7862
-# 创建启动脚本
-RUN echo '#!/bin/sh\n\
-python /app/deepseek_v3_tokenizer/deepseek_service.py & \n\
-python /app/openai_service.py & \n\
-sleep 5\n\
-./tokenizer' > /app/start.sh && chmod +x /app/start.sh
-# 暴露端口
-EXPOSE 7860 7861 7862
-# 启动服务
-CMD ["/app/start.sh"]

+FROM python:3.11-slim
+# 设置用户为root
+USER root
 WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
 # 设置环境变量
+ENV HOST=0.0.0.0
 ENV PORT=7860
+# 删除敏感文件
+RUN rm -f config.json password.txt
+# 暴露端口（Hugging Face默认使用7860端口）
+EXPOSE 7860
+# 启动命令
+CMD ["python", "app.py"]

app.py CHANGED Viewed

@@ -12,12 +12,13 @@ import jwt
 import os
 import threading
 from datetime import datetime, timedelta
-import tiktoken  # 导入tiktoken来计算token数量
 app = Flask(__name__, template_folder='templates')
 app.secret_key = os.environ.get("SECRET_KEY", "abacus_chat_proxy_secret_key")
 app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(days=7)
 API_ENDPOINT_URL = "https://abacus.ai/api/v0/describeDeployment"
 MODEL_LIST_URL = "https://abacus.ai/api/v0/listExternalApplications"
@@ -679,7 +680,7 @@ def send_message(message, model, think=False):
     trace_id, sentry_trace = generate_trace_id()
     # 计算输入token
-    prompt_tokens = num_tokens_from_string(message)
     completion_buffer = io.StringIO()  # 收集所有输出用于计算token
     headers = {
@@ -787,8 +788,8 @@ def send_message(message, model, think=False):
             yield "data: [DONE]\n\n"
             # 在流式传输完成后计算token并更新统计
-            completion_tokens = num_tokens_from_string(completion_buffer.getvalue())
-            update_model_stats(model, prompt_tokens, completion_tokens)
             # 如果需要删除上一个对话且上一个对话ID不为空且与当前不同
             if DELETE_CHAT and last_conversation_id and last_conversation_id != conversation_id:
@@ -818,7 +819,7 @@ def send_message_non_stream(message, model, think=False):
     trace_id, sentry_trace = generate_trace_id()
     # 计算输入token
-    prompt_tokens = num_tokens_from_string(message)
     headers = {
         "accept": "text/event-stream",
@@ -916,8 +917,8 @@ def send_message_non_stream(message, model, think=False):
             response_content = content_buffer.getvalue()
             # 计算输出token并更新统计信息
-            completion_tokens = num_tokens_from_string(think_content + response_content)
-            update_model_stats(model, prompt_tokens, completion_tokens)
             # 如果需要删除上一个对话且上一个对话ID不为空且与当前不同
             if DELETE_CHAT and last_conversation_id and last_conversation_id != conversation_id:
@@ -938,8 +939,8 @@ def send_message_non_stream(message, model, think=False):
                 }],
                 "usage": {
                     "prompt_tokens": prompt_tokens,
-                    "completion_tokens": completion_tokens,
-                    "total_tokens": prompt_tokens + completion_tokens
                 }
             })
         else:
@@ -953,8 +954,8 @@ def send_message_non_stream(message, model, think=False):
             response_content = buffer.getvalue()
             # 计算输出token并更新统计信息
-            completion_tokens = num_tokens_from_string(response_content)
-            update_model_stats(model, prompt_tokens, completion_tokens)
             # 如果需要删除上一个对话且上一个对话ID不为空且与当前不同
             if DELETE_CHAT and last_conversation_id and last_conversation_id != conversation_id:
@@ -975,8 +976,8 @@ def send_message_non_stream(message, model, think=False):
                 }],
                 "usage": {
                     "prompt_tokens": prompt_tokens,
-                    "completion_tokens": completion_tokens,
-                    "total_tokens": prompt_tokens + completion_tokens
                 }
             })
     except requests.exceptions.RequestException as e:
@@ -1068,22 +1069,49 @@ def index():
     return redirect(url_for('dashboard'))
-# 获取OpenAI的tokenizer来计算token数
-def num_tokens_from_string(string, model="gpt-3.5-turbo"):
-    """计算文本的token数量"""
     try:
-        encoding = tiktoken.encoding_for_model(model)
-        num_tokens = len(encoding.encode(string))
-        print(f"使用tiktoken计算token数: {num_tokens}")
-        return num_tokens
     except Exception as e:
-        # 如果tiktoken不支持模型或者出错，使用简单的估算
-        estimated_tokens = len(string) // 4  # 粗略估计每个token约4个字符
-        print(f"使用估算方法计算token数: {estimated_tokens} (原因: {str(e)})")
-        return estimated_tokens
 # 更新模型使用统计
-def update_model_stats(model, prompt_tokens, completion_tokens):
     global model_usage_stats, total_tokens, model_usage_records
     # 添加调用记录
@@ -1098,7 +1126,7 @@ def update_model_stats(model, prompt_tokens, completion_tokens):
         "call_time": call_time,
         "prompt_tokens": prompt_tokens,
         "completion_tokens": completion_tokens,
-        "calculation_method": "tiktoken" if any(x in model.lower() for x in ["gpt", "claude"]) or model in ["llama-3", "mistral", "gemma"] else "estimate"
     }
     model_usage_records.append(record)

 import os
 import threading
 from datetime import datetime, timedelta
 app = Flask(__name__, template_folder='templates')
 app.secret_key = os.environ.get("SECRET_KEY", "abacus_chat_proxy_secret_key")
 app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(days=7)
+# 添加tokenizer服务URL
+TOKENIZER_SERVICE_URL = "https://esotlam-tokenizer.hf.space/count_tokens"
 API_ENDPOINT_URL = "https://abacus.ai/api/v0/describeDeployment"
 MODEL_LIST_URL = "https://abacus.ai/api/v0/listExternalApplications"
     trace_id, sentry_trace = generate_trace_id()
     # 计算输入token
+    prompt_tokens, calculation_method = num_tokens_from_string(message, model)
     completion_buffer = io.StringIO()  # 收集所有输出用于计算token
     headers = {
             yield "data: [DONE]\n\n"
             # 在流式传输完成后计算token并更新统计
+            completion_result, _ = num_tokens_from_string(completion_buffer.getvalue(), model)
+            update_model_stats(model, prompt_tokens, completion_result, calculation_method)
             # 如果需要删除上一个对话且上一个对话ID不为空且与当前不同
             if DELETE_CHAT and last_conversation_id and last_conversation_id != conversation_id:
     trace_id, sentry_trace = generate_trace_id()
     # 计算输入token
+    prompt_tokens, calculation_method = num_tokens_from_string(message, model)
     headers = {
         "accept": "text/event-stream",
             response_content = content_buffer.getvalue()
             # 计算输出token并更新统计信息
+            completion_result, _ = num_tokens_from_string(think_content + response_content, model)
+            update_model_stats(model, prompt_tokens, completion_result, calculation_method)
             # 如果需要删除上一个对话且上一个对话ID不为空且与当前不同
             if DELETE_CHAT and last_conversation_id and last_conversation_id != conversation_id:
                 }],
                 "usage": {
                     "prompt_tokens": prompt_tokens,
+                    "completion_tokens": completion_result,
+                    "total_tokens": prompt_tokens + completion_result
                 }
             })
         else:
             response_content = buffer.getvalue()
             # 计算输出token并更新统计信息
+            completion_result, _ = num_tokens_from_string(response_content, model)
+            update_model_stats(model, prompt_tokens, completion_result, calculation_method)
             # 如果需要删除上一个对话且上一个对话ID不为空且与当前不同
             if DELETE_CHAT and last_conversation_id and last_conversation_id != conversation_id:
                 }],
                 "usage": {
                     "prompt_tokens": prompt_tokens,
+                    "completion_tokens": completion_result,
+                    "total_tokens": prompt_tokens + completion_result
                 }
             })
     except requests.exceptions.RequestException as e:
     return redirect(url_for('dashboard'))
+def num_tokens_from_string(string, model=""):
+    """计算字符串的token数量"""
     try:
+        # 准备请求数据
+        request_data = {
+            "model": model,
+            "messages": [{"role": "user", "content": string}]
+        }
+        # 发送POST请求到token计算服务
+        response = requests.post(
+            TOKENIZER_SERVICE_URL,
+            json=request_data,
+            timeout=10
+        )
+        # 解析响应
+        if response.status_code == 200:
+            result = response.json()
+            # 检查响应中是否包含warning字段，有则表示是估算值
+            calculation_method = "estimate" if "warning" in result else "api"
+            input_tokens = result.get("input_tokens", 0)
+            return input_tokens, calculation_method
+        elif response.status_code == 400:
+            # 服务返回400错误，但可能提供了估算值
+            result = response.json()
+            if "input_tokens" in result:
+                print(f"使用估算token值: {result.get('input_tokens')}")
+                return result.get("input_tokens", 0), "estimate"
+            # 如果没有提供估算值，使用字符数/4作为预估
+            return len(string) // 4, "estimate"
+        else:
+            # 如果服务返回其他错误，记录错误并返回字符串长度/4作为预估值
+            print(f"Tokenizer服务错误: {response.status_code} - {response.text}")
+            return len(string) // 4, "estimate"
     except Exception as e:
+        # 如果发生其他错误，记录错误并返回字符串长度/4作为预估值
+        print(f"计算token错误: {e}")
+        return len(string) // 4, "estimate"
 # 更新模型使用统计
+def update_model_stats(model, prompt_tokens, completion_tokens, calculation_method="estimate"):
     global model_usage_stats, total_tokens, model_usage_records
     # 添加调用记录
         "call_time": call_time,
         "prompt_tokens": prompt_tokens,
         "completion_tokens": completion_tokens,
+        "calculation_method": calculation_method
     }
     model_usage_records.append(record)

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ

templates/dashboard.html CHANGED Viewed

@@ -467,7 +467,7 @@
             font-weight: 500;
         }
-        .token-method.tiktoken {
             background: rgba(54, 211, 153, 0.2);
             color: var(--success-color);
             border: 1px solid rgba(54, 211, 153, 0.3);
@@ -879,8 +879,8 @@
                             <td class="token-count">{{ record.prompt_tokens|int }}</td>
                             <td class="token-count">{{ record.completion_tokens|int }}</td>
                             <td>
-                                {% if record.calculation_method == "tiktoken" %}
-                                <span class="token-method tiktoken">精确</span>
                                 {% else %}
                                 <span class="token-method estimate">估算</span>
                                 {% endif %}
@@ -890,7 +890,7 @@
                     </tbody>
                 </table>
                 <div class="token-note">
-                    <small>* Token计算方式：<span class="token-method tiktoken">精确</span> 表示使用tiktoken准确计算，<span class="token-method estimate">估算</span> 表示使用估算方法(约4字符=1token)。所有统计数据仅供参考，不代表实际计费标准。</small>
                 </div>
             </div>
         </div>

             font-weight: 500;
         }
+        .token-method.api {
             background: rgba(54, 211, 153, 0.2);
             color: var(--success-color);
             border: 1px solid rgba(54, 211, 153, 0.3);
                             <td class="token-count">{{ record.prompt_tokens|int }}</td>
                             <td class="token-count">{{ record.completion_tokens|int }}</td>
                             <td>
+                                {% if record.calculation_method in ["api"] %}
+                                <span class="token-method api">精确</span>
                                 {% else %}
                                 <span class="token-method estimate">估算</span>
                                 {% endif %}
                     </tbody>
                 </table>
                 <div class="token-note">
+                    <small>* Token计算方式：<span class="token-method api">精确</span> 表示调用官方API精确计算，<span class="token-method estimate">估算</span> 表示使用gpt-4o模型估算。所有统计数据仅供参考，不代表实际计费标准。</small>
                 </div>
             </div>
         </div>