import gradio as gr from zhipuai import ZhipuAI import json import os api_key = os.environ['ZHIPUAI_API_KEY'] def convert_to_openai_format(nested_chat): openai_format = [] for dialogue in nested_chat: user_dialogue = {"role": "user", "content": dialogue[0]} assistant_dialogue = {"role": "assistant", "content": dialogue[1]} openai_format.extend([user_dialogue, assistant_dialogue]) return openai_format def master_llm(user_prompt, history): # 生成针对专家LLM的系统提示 # 示例: 根据用户提问生成一个简单的系统提示 if history != []: last_round = history[-1] last_record_text = f"'''\n# 用户:\n{last_round[0]}\n\n\n# AI:\n{last_round[1]}\n\n\n# 用户:\n{user_prompt}\n'''" else: last_record_text = f"'''\n# 用户:\n{user_prompt}\n'''" syst_prompt = """根据用户与AI的对话或提问,判断未来对话需要什么领域专家,并写出对应领域的AI专家的system prompt。 以以下JSON型式返回,请严格遵守`{}`与`""`的闭合(注意,所有参数都是string): ``` { "expert_system_prompt":"你是一个...AI,你有着...的经验...,你的思维...。现在,你的任务是...", "temperature":"0.01", "top_p":"1" } ``` 参数解释: temperature为AI回复时的随机程度,值越小意味着回答逻辑越发散。取值为(0,1),但不能等于0或1。 top_p为AI会考虑的候选采样范围,比如0.1指只会选择前10%推荐的候选token。取值为(0,1),但不能等于0或1。 行业内,一般来说如果需要创意类型的AI,就会让这两个参数的值高一些,如果需要严格服从型的AI,则需要temperature尽量低一点。 注意,请不要刻意生成专家,如果无法判断需要什么领域的专家(比如无上下文),则直接回复此默认设定: ``` {{ "expert_system_prompt":"根据用户的提问与需求(如有上下文,请根据上下文),返回合适的回应。", "temperature":"0.5", "top_p":"0.5" }} ``` """ messages = [ {"role":"system","content":syst_prompt}, {"role":"user","content":last_record_text} ] client = ZhipuAI(api_key=api_key) response = client.chat.completions.create( model = "glm-4", messages = messages, temperature = 0.01, top_p = 0.01, do_sample = True ) response_text = response.choices[0].message.content response_json = json.loads(response_text[response_text.find('{'):response_text.rfind('}')+1]) expert_system_prompt = response_json['expert_system_prompt'] temperature = response_json['temperature'] top_p = response_json['top_p'] print(response_text) return expert_system_prompt, temperature, top_p def expert_llm(user_prompt, history, expert_system_prompt, temperature, top_p): client = ZhipuAI(api_key=api_key) if history != []: prompt_records = convert_to_openai_format(history) messages = [{"role":"system","content":expert_system_prompt}] + prompt_records + [{"role":"user","content":user_prompt}] else: messages = [{"role":"system","content":expert_system_prompt},{"role":"user","content":user_prompt}] response = client.chat.completions.create( model = "glm-4", messages = messages, temperature = float(temperature), top_p = float(top_p), do_sample = True ) return response.choices[0].message.content def gradio_fn(message, history): expert_system_prompt, temperature, top_p = master_llm(message, history) expert_response = expert_llm(message, history, expert_system_prompt, temperature, top_p) return expert_response demo = gr.ChatInterface(fn=gradio_fn) if __name__ == "__main__": demo.launch()