chenjianfei commited on
Commit
ee3c192
·
1 Parent(s): a0189a2
Files changed (2) hide show
  1. app.py +6 -14
  2. requirements.txt +1 -1
app.py CHANGED
@@ -39,7 +39,6 @@ def handle_retry(history, thinking_history, config, section_state, retry_data: g
39
 
40
  def predict(message, chat_history, thinking_history, config, section_state):
41
  global local_rag, TTS_LOADED, LLM_LOADED, synthesiser, core_llm, core_tokenizer
42
- print(config)
43
  print(f"当前模式:{config['mode_selected']}")
44
  print(f'角色扮演描述:{config["character_description"]}')
45
  print(f"写入角色设定方式:{config['character_setting_mode']}")
@@ -100,17 +99,8 @@ def predict(message, chat_history, thinking_history, config, section_state):
100
  # 添加用户消息到历史
101
  section_state["chat_history"].append({"role": "user", "content": message})
102
 
103
- # 计算当前上下文长度,动态调整上下文窗口的长度,规避ollama的限制
104
- try:
105
- tokenizer = load_tokenizer(config['llm_model'])
106
- except Exception as e:
107
- if config['llm_model'] in BASE_MODEL_TABLE:
108
- tokenizer = load_tokenizer(BASE_MODEL_TABLE[config['llm_model']])
109
- else:
110
- raise e
111
- token_cnt = count_tokens_local(input_message, tokenizer)
112
- if token_cnt >= MAX_MODEL_CTX:
113
- gr.Warning("当前对话已经超出模型上下文长度,请开启新会话...")
114
  try:
115
  # 调用模型
116
  if not LLM_LOADED:
@@ -121,7 +111,9 @@ def predict(message, chat_history, thinking_history, config, section_state):
121
  )
122
  core_tokenizer = AutoTokenizer.from_pretrained(config['llm_model'])
123
  LLM_LOADED = True
124
-
 
 
125
  text = core_tokenizer.apply_chat_template(
126
  input_message,
127
  tokenize=False,
@@ -145,7 +137,7 @@ def predict(message, chat_history, thinking_history, config, section_state):
145
  index = 0
146
  # thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
147
  thinking = None
148
- response_content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
149
  print('回复:', response_content)
150
  # 更新对话历史
151
  chat_history.append({'role': 'user', 'content': message})
 
39
 
40
  def predict(message, chat_history, thinking_history, config, section_state):
41
  global local_rag, TTS_LOADED, LLM_LOADED, synthesiser, core_llm, core_tokenizer
 
42
  print(f"当前模式:{config['mode_selected']}")
43
  print(f'角色扮演描述:{config["character_description"]}')
44
  print(f"写入角色设定方式:{config['character_setting_mode']}")
 
99
  # 添加用户消息到历史
100
  section_state["chat_history"].append({"role": "user", "content": message})
101
 
102
+
103
+
 
 
 
 
 
 
 
 
 
104
  try:
105
  # 调用模型
106
  if not LLM_LOADED:
 
111
  )
112
  core_tokenizer = AutoTokenizer.from_pretrained(config['llm_model'])
113
  LLM_LOADED = True
114
+ token_cnt = count_tokens_local(input_message, core_tokenizer)
115
+ if token_cnt >= MAX_MODEL_CTX:
116
+ gr.Warning("当前对话已经超出模型上下文长度,请开启新会话...")
117
  text = core_tokenizer.apply_chat_template(
118
  input_message,
119
  tokenize=False,
 
137
  index = 0
138
  # thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
139
  thinking = None
140
+ response_content = core_tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
141
  print('回复:', response_content)
142
  # 更新对话历史
143
  chat_history.append({'role': 'user', 'content': message})
requirements.txt CHANGED
@@ -17,4 +17,4 @@ xcodec2==0.1.5
17
  vocos==0.1.0
18
  jq
19
  sentence-transformers==3.4.1
20
- faiss-cpu==1.7.4
 
17
  vocos==0.1.0
18
  jq
19
  sentence-transformers==3.4.1
20
+ faiss-cpu