Spaces:

Lhumpal
/

beast-llm

Sleeping

Lhumpal commited on Mar 27

Commit

6e781e6

verified ·

1 Parent(s): 59f6fc6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -82,7 +82,7 @@ async def chat(request: ChatRequest):
             })
             # summarize chat history
-            summary_thresh = 2
             if len(request.chat_history) > summary_thresh:
                 summarize_prompt = f"""Please summarize the following chat history concisely, focusing on the key points and main topics discussed. Avoid
                 unnecessary details and provide a clear, straightforward summary. {request.chat_history[:-summary_thresh]}""" # summarize everything except last k items
@@ -114,9 +114,9 @@ async def chat(request: ChatRequest):
             # remove the unfformatted user message
             del request.chat_history[-1]
             # add the user message with RAG data
-            request.chat_history.append({"role": "user", "parts": [{"text": textwrap.dedent(rag_prompt)}]})
             response = client.models.generate_content(
                 model="gemini-2.0-flash",
@@ -130,7 +130,7 @@ async def chat(request: ChatRequest):
             del request.chat_history[-1]
             request.chat_history.append({"role": "user", "parts": [{"text": request.message}]})
-            return {"response": response.text, "dataset_str": concise_text_string, "history": request.chat_history, "RAG_prompt": rag_prompt, "chunks": chunks, "results": results}
         if request.model_choice == "HF":
             if hf_token:

             })
             # summarize chat history
+            summary_thresh = 10
             if len(request.chat_history) > summary_thresh:
                 summarize_prompt = f"""Please summarize the following chat history concisely, focusing on the key points and main topics discussed. Avoid
                 unnecessary details and provide a clear, straightforward summary. {request.chat_history[:-summary_thresh]}""" # summarize everything except last k items
             # remove the unfformatted user message
             del request.chat_history[-1]
             # add the user message with RAG data
+            rag_prompt = textwrap.dedent(rag_prompt)
+            request.chat_history.append({"role": "user", "parts": [{"text": rag_prompt}]})
             response = client.models.generate_content(
                 model="gemini-2.0-flash",
             del request.chat_history[-1]
             request.chat_history.append({"role": "user", "parts": [{"text": request.message}]})
+            return {"response": response.text, "dataset_str": concise_text_string[:150], "history": request.chat_history, "RAG_prompt": rag_prompt, "chunks": chunks, "results": results}
         if request.model_choice == "HF":
             if hf_token: