Tamil Eniyan commited on
Commit
bebbe8f
·
1 Parent(s): 0282eea

Add application file

Browse files
Files changed (1) hide show
  1. app.py +19 -18
app.py CHANGED
@@ -52,17 +52,18 @@ curated_qa_pairs = [
52
  }
53
  ]
54
 
55
- def get_curated_context(query, curated_qa, embed_model):
56
  """
57
  Retrieve the most relevant curated Q/A pair based on the user's query.
58
- Returns a formatted string if the similarity (using L2 distance) is below a threshold.
 
59
  """
60
  curated_questions = [qa["question"] for qa in curated_qa]
61
  query_embedding = embed_model.encode([query]).astype('float32')
62
  curated_embeddings = embed_model.encode(curated_questions, show_progress_bar=False)
63
  curated_embeddings = np.array(curated_embeddings).astype('float32')
64
 
65
- # Build a temporary FAISS index for curated questions
66
  dimension = curated_embeddings.shape[1]
67
  curated_index = faiss.IndexFlatL2(dimension)
68
  curated_index.add(curated_embeddings)
@@ -70,15 +71,11 @@ def get_curated_context(query, curated_qa, embed_model):
70
  k = 1
71
  distances, indices = curated_index.search(query_embedding, k)
72
 
73
- # Define a threshold for relevance (tune this value as needed)
74
- threshold = 1.0
75
  if distances[0][0] < threshold:
76
  idx = indices[0][0]
77
- qa_pair = curated_qa[idx]
78
- # Return a formatted string with the curated question and answer.
79
- return f"Curated Q/A Pair:\nQuestion: {qa_pair['question']}\nAnswer: {qa_pair['answer']}\n"
80
  else:
81
- return ""
82
 
83
  def main():
84
  st.title("PDF Question-Answering App")
@@ -108,23 +105,27 @@ def main():
108
  for idx in indices[0]:
109
  pdf_context += chunks[idx] + "\n"
110
 
111
- # Get curated Q/A context if available
112
- curated_context = get_curated_context(query, curated_qa_pairs, embed_model)
113
-
114
  base_context = st.session_state.conversation_history + "\n"
115
 
116
- if curated_context:
 
 
 
117
  st.write("A curated Q/A pair was found and will be used for the answer by default.")
118
- # Provide an option to override with full PDF context
119
  use_full_data = st.checkbox("Check Full Data", value=False)
120
- if use_full_data:
121
- context_to_use = base_context + pdf_context
 
 
 
 
122
  else:
123
- context_to_use = base_context + curated_context
124
  else:
125
  context_to_use = base_context + pdf_context
126
 
127
- # Optionally, you can also provide an expander to show the full PDF context.
128
  with st.expander("Show Full PDF Context"):
129
  st.write(pdf_context)
130
 
 
52
  }
53
  ]
54
 
55
+ def get_curated_pair(query, curated_qa, embed_model, threshold=1.0):
56
  """
57
  Retrieve the most relevant curated Q/A pair based on the user's query.
58
+ Returns the QA dictionary if the similarity (using L2 distance) is below the threshold,
59
+ otherwise returns None.
60
  """
61
  curated_questions = [qa["question"] for qa in curated_qa]
62
  query_embedding = embed_model.encode([query]).astype('float32')
63
  curated_embeddings = embed_model.encode(curated_questions, show_progress_bar=False)
64
  curated_embeddings = np.array(curated_embeddings).astype('float32')
65
 
66
+ # Build a temporary FAISS index for the curated questions
67
  dimension = curated_embeddings.shape[1]
68
  curated_index = faiss.IndexFlatL2(dimension)
69
  curated_index.add(curated_embeddings)
 
71
  k = 1
72
  distances, indices = curated_index.search(query_embedding, k)
73
 
 
 
74
  if distances[0][0] < threshold:
75
  idx = indices[0][0]
76
+ return curated_qa[idx]
 
 
77
  else:
78
+ return None
79
 
80
  def main():
81
  st.title("PDF Question-Answering App")
 
105
  for idx in indices[0]:
106
  pdf_context += chunks[idx] + "\n"
107
 
 
 
 
108
  base_context = st.session_state.conversation_history + "\n"
109
 
110
+ # Check for a curated Q/A pair
111
+ curated_pair = get_curated_pair(query, curated_qa_pairs, embed_model)
112
+
113
+ if curated_pair:
114
  st.write("A curated Q/A pair was found and will be used for the answer by default.")
115
+ # Option to override with full PDF context
116
  use_full_data = st.checkbox("Check Full Data", value=False)
117
+ if not use_full_data:
118
+ # Directly display the curated answer without running the QA pipeline
119
+ answer = curated_pair["answer"]
120
+ st.write(answer)
121
+ st.session_state.conversation_history += f"AI: {answer}\n"
122
+ return # Exit the function after displaying the curated answer
123
  else:
124
+ context_to_use = base_context + pdf_context
125
  else:
126
  context_to_use = base_context + pdf_context
127
 
128
+ # Provide an expander to show the full PDF context if desired
129
  with st.expander("Show Full PDF Context"):
130
  st.write(pdf_context)
131