amasood commited on
Commit
8d7ab91
·
verified ·
1 Parent(s): 319855f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -11,13 +11,13 @@ from groq import Groq
11
  def load_data():
12
  dataset = load_dataset("FreedomIntelligence/RAG-Instruct", split="train")
13
  df = pd.DataFrame(dataset)
14
- return df[["instruction", "response"]]
15
 
16
  # Generate embeddings and index
17
  @st.cache_resource
18
  def setup_faiss(data):
19
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
20
- embeddings = model.encode(data["instruction"].tolist())
21
  index = faiss.IndexFlatL2(embeddings.shape[1])
22
  index.add(np.array(embeddings))
23
  return model, index, embeddings
@@ -26,7 +26,7 @@ def setup_faiss(data):
26
  def retrieve_context(query, model, index, data, top_k=1):
27
  query_vec = model.encode([query])
28
  distances, indices = index.search(np.array(query_vec), top_k)
29
- results = [data.iloc[i]["instruction"] + "\n\n" + data.iloc[i]["response"] for i in indices[0]]
30
  return "\n\n".join(results)
31
 
32
  # Call Groq LLM
@@ -46,15 +46,15 @@ st.title("🧠 RAG App using Groq API + RAG-Instruct Dataset")
46
  data = load_data()
47
  model, index, _ = setup_faiss(data)
48
 
49
- st.markdown("Ask a question based on the instruction-response knowledge base.")
50
 
51
  # Optional queries
52
  optional_queries = [
53
- "How to use a specific API function?",
54
- "Explain how to fine-tune a model.",
55
- "What is the difference between pretraining and finetuning?",
56
- "How does retrieval-augmented generation work?",
57
- "Explain self-supervised learning."
58
  ]
59
 
60
  query = st.text_input("Enter your question:", value=optional_queries[0])
 
11
  def load_data():
12
  dataset = load_dataset("FreedomIntelligence/RAG-Instruct", split="train")
13
  df = pd.DataFrame(dataset)
14
+ return df[["question", "answer"]]
15
 
16
  # Generate embeddings and index
17
  @st.cache_resource
18
  def setup_faiss(data):
19
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
20
+ embeddings = model.encode(data["question"].tolist())
21
  index = faiss.IndexFlatL2(embeddings.shape[1])
22
  index.add(np.array(embeddings))
23
  return model, index, embeddings
 
26
  def retrieve_context(query, model, index, data, top_k=1):
27
  query_vec = model.encode([query])
28
  distances, indices = index.search(np.array(query_vec), top_k)
29
+ results = [data.iloc[i]["question"] + "\n\n" + data.iloc[i]["answer"] for i in indices[0]]
30
  return "\n\n".join(results)
31
 
32
  # Call Groq LLM
 
46
  data = load_data()
47
  model, index, _ = setup_faiss(data)
48
 
49
+ st.markdown("Ask a question based on the QA knowledge base.")
50
 
51
  # Optional queries
52
  optional_queries = [
53
+ "What is retrieval-augmented generation?",
54
+ "How can I fine-tune a language model?",
55
+ "What are the components of a RAG system?",
56
+ "Explain prompt engineering basics.",
57
+ "How does FAISS indexing help in RAG?"
58
  ]
59
 
60
  query = st.text_input("Enter your question:", value=optional_queries[0])