chenjianfei
commited on
Commit
·
ee3c192
1
Parent(s):
a0189a2
- app.py +6 -14
- requirements.txt +1 -1
app.py
CHANGED
@@ -39,7 +39,6 @@ def handle_retry(history, thinking_history, config, section_state, retry_data: g
|
|
39 |
|
40 |
def predict(message, chat_history, thinking_history, config, section_state):
|
41 |
global local_rag, TTS_LOADED, LLM_LOADED, synthesiser, core_llm, core_tokenizer
|
42 |
-
print(config)
|
43 |
print(f"当前模式:{config['mode_selected']}")
|
44 |
print(f'角色扮演描述:{config["character_description"]}')
|
45 |
print(f"写入角色设定方式:{config['character_setting_mode']}")
|
@@ -100,17 +99,8 @@ def predict(message, chat_history, thinking_history, config, section_state):
|
|
100 |
# 添加用户消息到历史
|
101 |
section_state["chat_history"].append({"role": "user", "content": message})
|
102 |
|
103 |
-
|
104 |
-
|
105 |
-
tokenizer = load_tokenizer(config['llm_model'])
|
106 |
-
except Exception as e:
|
107 |
-
if config['llm_model'] in BASE_MODEL_TABLE:
|
108 |
-
tokenizer = load_tokenizer(BASE_MODEL_TABLE[config['llm_model']])
|
109 |
-
else:
|
110 |
-
raise e
|
111 |
-
token_cnt = count_tokens_local(input_message, tokenizer)
|
112 |
-
if token_cnt >= MAX_MODEL_CTX:
|
113 |
-
gr.Warning("当前对话已经超出模型上下文长度,请开启新会话...")
|
114 |
try:
|
115 |
# 调用模型
|
116 |
if not LLM_LOADED:
|
@@ -121,7 +111,9 @@ def predict(message, chat_history, thinking_history, config, section_state):
|
|
121 |
)
|
122 |
core_tokenizer = AutoTokenizer.from_pretrained(config['llm_model'])
|
123 |
LLM_LOADED = True
|
124 |
-
|
|
|
|
|
125 |
text = core_tokenizer.apply_chat_template(
|
126 |
input_message,
|
127 |
tokenize=False,
|
@@ -145,7 +137,7 @@ def predict(message, chat_history, thinking_history, config, section_state):
|
|
145 |
index = 0
|
146 |
# thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
|
147 |
thinking = None
|
148 |
-
response_content =
|
149 |
print('回复:', response_content)
|
150 |
# 更新对话历史
|
151 |
chat_history.append({'role': 'user', 'content': message})
|
|
|
39 |
|
40 |
def predict(message, chat_history, thinking_history, config, section_state):
|
41 |
global local_rag, TTS_LOADED, LLM_LOADED, synthesiser, core_llm, core_tokenizer
|
|
|
42 |
print(f"当前模式:{config['mode_selected']}")
|
43 |
print(f'角色扮演描述:{config["character_description"]}')
|
44 |
print(f"写入角色设定方式:{config['character_setting_mode']}")
|
|
|
99 |
# 添加用户消息到历史
|
100 |
section_state["chat_history"].append({"role": "user", "content": message})
|
101 |
|
102 |
+
|
103 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
try:
|
105 |
# 调用模型
|
106 |
if not LLM_LOADED:
|
|
|
111 |
)
|
112 |
core_tokenizer = AutoTokenizer.from_pretrained(config['llm_model'])
|
113 |
LLM_LOADED = True
|
114 |
+
token_cnt = count_tokens_local(input_message, core_tokenizer)
|
115 |
+
if token_cnt >= MAX_MODEL_CTX:
|
116 |
+
gr.Warning("当前对话已经超出模型上下文长度,请开启新会话...")
|
117 |
text = core_tokenizer.apply_chat_template(
|
118 |
input_message,
|
119 |
tokenize=False,
|
|
|
137 |
index = 0
|
138 |
# thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
|
139 |
thinking = None
|
140 |
+
response_content = core_tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
|
141 |
print('回复:', response_content)
|
142 |
# 更新对话历史
|
143 |
chat_history.append({'role': 'user', 'content': message})
|
requirements.txt
CHANGED
@@ -17,4 +17,4 @@ xcodec2==0.1.5
|
|
17 |
vocos==0.1.0
|
18 |
jq
|
19 |
sentence-transformers==3.4.1
|
20 |
-
faiss-cpu
|
|
|
17 |
vocos==0.1.0
|
18 |
jq
|
19 |
sentence-transformers==3.4.1
|
20 |
+
faiss-cpu
|