Spaces:

Lhumpal
/

beast-llm

Sleeping

App Files Files Community

Lhumpal commited on Mar 27

Commit

5efbc3d

verified ·

1 Parent(s): 6b7ecff

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -18

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from huggingface_hub import InferenceClient
 import os
 from google import genai
 from google.genai.types import GenerateContentConfig
 from datasets import load_dataset
@@ -45,7 +46,8 @@ class ChatRequest(BaseModel):
     You focus on buck bedding, terrain reading, and aggressive yet calculated mobile tactics. Your blue-collar, no-nonsense approach
     emphasizes deep scouting, strategic access, and minimalist setups. Through The Hunting Beast, you teach hunters how to kill big bucks
     using terrain, wind, and thermals. You speak from firsthand experience, keeping your advice practical and to the point. Provide detailed
-    yet concise responses, with a maximum of 150 words"""
     temperature: float = 0.7
     chat_history: List[Dict[str, Any]] = []
     model_choice: str = "google"
@@ -60,42 +62,59 @@ chunks = chunk_text(concise_text_string, chunk_size=450)
 # Build the vectorsore
 vectorstore = build_faiss_vectorstore(chunks)
 @app.post("/chat")
 async def chat(request: ChatRequest):
     try:
         if request.model_choice == "google":
             client = genai.Client(api_key=google_api_key)
             # Retrieve relevant text
             results = retrieve(request.message, vectorstore, top_k=5)
             formatted_results = "\n\n".join(results)
-            rag_prompt = f"""You have access to the following relevant information retrieved based on the user's query:
             {formatted_results}
             Using the information above, answer the user's query as accurately as possible:
             User's Query: {request.message}
-            """
-            # # summarize chat history
-            # summary_thresh = 2
-            # if len(request.chat_history) > summary_thresh:
-            #     summarize_prompt = f"""Please summarize the following chat history concisely, focusing on the key points and main topics discussed. Avoid
-            #     unnecessary details and provide a clear, straightforward summary. {request.chat_history[:-summary_thresh]}""" # summarize everything except last k items
-            #     summary_response = client.models.generate_content(
-            #         model="gemini-2.0-flash",
-            #         contents=summarize_prompt,
-            #         config=GenerateContentConfig(
-            #             system_instruction=["You are a helpful assistant who is an expert at summarization."]
-            #         ),
-            #     )
-            #     request.chat_history = request.chat_history[-(summary_thresh+1):] # keep last k items
-            #     request.chat_history.insert(0, {"role": "user", "parts": [{"text": f"Here is a summary of this conversation so far: {summary_response.text}"}]})
             # remove the unfformatted user message
             del request.chat_history[-1]
             # add the user message with RAG data
             request.chat_history.append({"role": "user", "parts": [{"text": rag_prompt}]})

 from pydantic import BaseModel
 from huggingface_hub import InferenceClient
 import os
+import textwrap
 from google import genai
 from google.genai.types import GenerateContentConfig
 from datasets import load_dataset
     You focus on buck bedding, terrain reading, and aggressive yet calculated mobile tactics. Your blue-collar, no-nonsense approach
     emphasizes deep scouting, strategic access, and minimalist setups. Through The Hunting Beast, you teach hunters how to kill big bucks
     using terrain, wind, and thermals. You speak from firsthand experience, keeping your advice practical and to the point. Provide detailed
+    yet concise responses, with a maximum of 150 words.
+    """
     temperature: float = 0.7
     chat_history: List[Dict[str, Any]] = []
     model_choice: str = "google"
 # Build the vectorsore
 vectorstore = build_faiss_vectorstore(chunks)
+one_shot_example = textwrap.dedent(""" Here is an example of the style and tone of a response:
+    Query: How do big bucks use clear cuts for bedding?
+    Response: Yeah, a lot of guys think big bucks just bed right in the middle of a clear cut because it’s thick, but that’s not really how they use it. The
+    thick regrowth is great for food and cover, but those bucks still want an advantage. Most of the time, they’re bedding on the edges, right where the cut
+    meets older timber. They’ll set up with the wind at their back so they can smell anything sneaking up behind them, and they’re looking out into the open
+    woods, watching for danger""")
 @app.post("/chat")
 async def chat(request: ChatRequest):
     try:
         if request.model_choice == "google":
             client = genai.Client(api_key=google_api_key)
+            # insert one shot example at beginning of chat
+            request.chat_history.insert(0, {
+                "role": "user",
+                "parts": [{"text": one_shot_example}]
+            })
+            # summarize chat history
+            summary_thresh = 2
+            if len(request.chat_history) > summary_thresh:
+                summarize_prompt = f"""Please summarize the following chat history concisely, focusing on the key points and main topics discussed. Avoid
+                unnecessary details and provide a clear, straightforward summary. {request.chat_history[:-summary_thresh]}""" # summarize everything except last k items
+                summary_response = client.models.generate_content(
+                    model="gemini-2.0-flash",
+                    contents=summarize_prompt,
+                    config=GenerateContentConfig(
+                        system_instruction=["You are a helpful assistant who is an expert at summarization."]
+                    ),
+                )
+                request.chat_history = request.chat_history[-(summary_thresh+1):] # keep last k items
+                request.chat_history.insert(1,
+                    {"role": "user",
+                     "parts": [{"text": f"Here is a summary of this conversation so far: {summary_response.text}"}]})
             # Retrieve relevant text
             results = retrieve(request.message, vectorstore, top_k=5)
             formatted_results = "\n\n".join(results)
+            rag_prompt = textwrap.dedent(f"""You have access to the following relevant information retrieved based on the user's query:
             {formatted_results}
             Using the information above, answer the user's query as accurately as possible:
             User's Query: {request.message}
+            """)
             # remove the unfformatted user message
             del request.chat_history[-1]
             # add the user message with RAG data
             request.chat_history.append({"role": "user", "parts": [{"text": rag_prompt}]})