annikwag commited on
Commit
d237e1f
·
verified ·
1 Parent(s): 6e6ca50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -4
app.py CHANGED
@@ -25,12 +25,18 @@ WRITE_ACCESS_TOKEN = st.secrets["Llama_3_1"]
25
  def get_rag_answer(query, top_results):
26
  """
27
  Constructs a prompt from the query and the page contexts of the top results,
28
- then sends it to the dedicated endpoint and returns only the generated answer.
 
29
  """
30
- # Combine the context from the top results (you may adjust the separator as needed)
31
  context = "\n\n".join([res.payload["page_content"] for res in top_results])
32
 
33
- # Create a prompt that instructs the model to output only the answer.
 
 
 
 
 
34
  prompt = (
35
  "Using the following context, answer the question concisely. "
36
  "Only output the final answer below, without repeating the context or question.\n\n"
@@ -43,7 +49,7 @@ def get_rag_answer(query, top_results):
43
  payload = {
44
  "inputs": prompt,
45
  "parameters": {
46
- "max_new_tokens": 50
47
  }
48
  }
49
 
 
25
  def get_rag_answer(query, top_results):
26
  """
27
  Constructs a prompt from the query and the page contexts of the top results,
28
+ truncates the context to avoid exceeding the token limit, then sends it to the
29
+ dedicated endpoint and returns only the generated answer.
30
  """
31
+ # Combine the context from the top results (adjust the separator as needed)
32
  context = "\n\n".join([res.payload["page_content"] for res in top_results])
33
 
34
+ # Truncate the context to a maximum number of characters (e.g., 12000 characters)
35
+ max_context_chars = 15000
36
+ if len(context) > max_context_chars:
37
+ context = context[:max_context_chars]
38
+
39
+ # Build the prompt, instructing the model to only output the final answer.
40
  prompt = (
41
  "Using the following context, answer the question concisely. "
42
  "Only output the final answer below, without repeating the context or question.\n\n"
 
49
  payload = {
50
  "inputs": prompt,
51
  "parameters": {
52
+ "max_new_tokens": 150 # Adjust max tokens as needed
53
  }
54
  }
55