Spaces:

audioer
/

LLM_demo

Running

App Files Files Community

chenjianfei commited on 9 days ago

Commit

ee3c192

1 Parent(s): a0189a2

2

Browse files

Files changed (2) hide show

app.py +6 -14
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -39,7 +39,6 @@ def handle_retry(history, thinking_history, config, section_state, retry_data: g
 def predict(message, chat_history, thinking_history, config, section_state):
     global local_rag, TTS_LOADED, LLM_LOADED, synthesiser, core_llm, core_tokenizer
-    print(config)
     print(f"当前模式：{config['mode_selected']}")
     print(f'角色扮演描述：{config["character_description"]}')
     print(f"写入角色设定方式：{config['character_setting_mode']}")
@@ -100,17 +99,8 @@ def predict(message, chat_history, thinking_history, config, section_state):
     # 添加用户消息到历史
     section_state["chat_history"].append({"role": "user", "content": message})
-    # 计算当前上下文长度，动态调整上下文窗口的长度，规避ollama的限制
-    try:
-        tokenizer = load_tokenizer(config['llm_model'])
-    except Exception as e:
-        if config['llm_model'] in BASE_MODEL_TABLE:
-            tokenizer = load_tokenizer(BASE_MODEL_TABLE[config['llm_model']])
-        else:
-            raise e
-    token_cnt = count_tokens_local(input_message, tokenizer)
-    if token_cnt >= MAX_MODEL_CTX:
-        gr.Warning("当前对话已经超出模型上下文长度，请开启新会话...")
     try:
         # 调用模型
         if not LLM_LOADED:
@@ -121,7 +111,9 @@ def predict(message, chat_history, thinking_history, config, section_state):
                 )
             core_tokenizer = AutoTokenizer.from_pretrained(config['llm_model'])
             LLM_LOADED = True
         text = core_tokenizer.apply_chat_template(
             input_message,
             tokenize=False,
@@ -145,7 +137,7 @@ def predict(message, chat_history, thinking_history, config, section_state):
         index = 0
         # thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
         thinking = None
-        response_content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
         print('回复：', response_content)
         # 更新对话历史
         chat_history.append({'role': 'user', 'content': message})

 def predict(message, chat_history, thinking_history, config, section_state):
     global local_rag, TTS_LOADED, LLM_LOADED, synthesiser, core_llm, core_tokenizer
     print(f"当前模式：{config['mode_selected']}")
     print(f'角色扮演描述：{config["character_description"]}')
     print(f"写入角色设定方式：{config['character_setting_mode']}")
     # 添加用户消息到历史
     section_state["chat_history"].append({"role": "user", "content": message})
     try:
         # 调用模型
         if not LLM_LOADED:
                 )
             core_tokenizer = AutoTokenizer.from_pretrained(config['llm_model'])
             LLM_LOADED = True
+        token_cnt = count_tokens_local(input_message, core_tokenizer)
+        if token_cnt >= MAX_MODEL_CTX:
+            gr.Warning("当前对话已经超出模型上下文长度，请开启新会话...")
         text = core_tokenizer.apply_chat_template(
             input_message,
             tokenize=False,
         index = 0
         # thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
         thinking = None
+        response_content = core_tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
         print('回复：', response_content)
         # 更新对话历史
         chat_history.append({'role': 'user', 'content': message})

requirements.txt CHANGED Viewed

@@ -17,4 +17,4 @@ xcodec2==0.1.5
 vocos==0.1.0
 jq
 sentence-transformers==3.4.1
-faiss-cpu==1.7.4

 vocos==0.1.0
 jq
 sentence-transformers==3.4.1
+faiss-cpu