TEST-GIZ-Project-Search

Running on CPU Upgrade

annikwag commited on Mar 3

Commit

d237e1f

verified ·

1 Parent(s): 6e6ca50

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -25,12 +25,18 @@ WRITE_ACCESS_TOKEN = st.secrets["Llama_3_1"]
 def get_rag_answer(query, top_results):
     """
     Constructs a prompt from the query and the page contexts of the top results,
-    then sends it to the dedicated endpoint and returns only the generated answer.
     """
-    # Combine the context from the top results (you may adjust the separator as needed)
     context = "\n\n".join([res.payload["page_content"] for res in top_results])
-    # Create a prompt that instructs the model to output only the answer.
     prompt = (
         "Using the following context, answer the question concisely. "
         "Only output the final answer below, without repeating the context or question.\n\n"
@@ -43,7 +49,7 @@ def get_rag_answer(query, top_results):
     payload = {
         "inputs": prompt,
         "parameters": {
-            "max_new_tokens": 50
         }
     }

 def get_rag_answer(query, top_results):
     """
     Constructs a prompt from the query and the page contexts of the top results,
+    truncates the context to avoid exceeding the token limit, then sends it to the
+    dedicated endpoint and returns only the generated answer.
     """
+    # Combine the context from the top results (adjust the separator as needed)
     context = "\n\n".join([res.payload["page_content"] for res in top_results])
+    # Truncate the context to a maximum number of characters (e.g., 12000 characters)
+    max_context_chars = 15000
+    if len(context) > max_context_chars:
+        context = context[:max_context_chars]
+    # Build the prompt, instructing the model to only output the final answer.
     prompt = (
         "Using the following context, answer the question concisely. "
         "Only output the final answer below, without repeating the context or question.\n\n"
     payload = {
         "inputs": prompt,
         "parameters": {
+            "max_new_tokens": 150  # Adjust max tokens as needed
         }
     }