shreetishresthanp commited on
Commit
ae5bb50
·
verified ·
1 Parent(s): 07b98cf

Upload questions_dataset.py

Browse files
Files changed (1) hide show
  1. utils/questions_dataset.py +69 -0
utils/questions_dataset.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import chromadb
3
+ from google.genai import types
4
+
5
+ def preprocess_questions():
6
+ df = pd.read_parquet("hf://datasets/hails/agieval-lsat-lr/data/test-00000-of-00001.parquet")
7
+ print(df.head())
8
+
9
+ lrq_docs = []
10
+ for _, row in df.iterrows():
11
+ q, c, g = row
12
+ doc = f'(question: "{q}", choices: {c}, gold: {g})'
13
+ lrq_docs.append(doc)
14
+
15
+ chroma_client = chromadb.Client()
16
+ collection = chroma_client.create_collection(name="lsat-lr")
17
+ collection.add(
18
+ documents=lrq_docs,
19
+ metadatas=[{"question": q, "choices": "\n".join(c), "gold": int(g[0])} for q, c, g in zip(df["query"], df["choices"], df["gold"])],
20
+ ids=[str(i) for i in range(len(lrq_docs))],
21
+ )
22
+ return collection
23
+
24
+ def get_logical_reasoning_practice_questions(query: str) -> str:
25
+ """
26
+ Use to get logical reasoning practice questions from database after user has studied.
27
+ Uses query to search the database.
28
+ Returns top 5 results in the format:
29
+ (question: "question", choices: [choices], gold: [gold]).
30
+ """
31
+ collection = preprocess_questions()
32
+ results = collection.query(query_texts=[query], n_results=5)['documents'][0]
33
+ # print(results)
34
+ return '\n\n'.join(results)
35
+
36
+ def get_model_tools():
37
+ get_practice_questions_function = {
38
+ "name": "get_practice_questions",
39
+ "description": get_logical_reasoning_practice_questions.__doc__,
40
+ "parameters": {
41
+ "type": "object",
42
+ "properties": {
43
+ "query": {
44
+ "type": "string",
45
+ "description": "string to query the database with"
46
+ }
47
+ },
48
+ "required": ["query"]
49
+ },
50
+ }
51
+ tools = types.Tool(function_declarations=[get_practice_questions_function])
52
+ return tools
53
+
54
+ system_instruction = """You are an AI tutor that teaches users LSAT Logical Reasoning.
55
+ Here is how your student performed on the practice quiz grouped by question type (num correct/num questions):
56
+ Assumtion: (1/1)
57
+ Find the flaw in the argument: (1/2)
58
+ Inferece: (1/1)
59
+ Justify the conclusion: (0/1)
60
+ Method of reasoning: (1/1)
61
+ Point at issue: (0/1)
62
+ Role Play: (1/1)
63
+ Strengthen: (0/1)
64
+ Weaken the argument: (1/1)
65
+ Based on this, classify them as Beginner / Intermediate / Advanced. Walk through the student on all topics, but focus on the ones they struggle with.
66
+ Question the user to ensure that they understand the material.
67
+ Use practice questions from the tool to ensure they understand the material.
68
+ Never give a one word answer. Always keep the conversation moving.
69
+ Once the user has studied all the topics, return <DONE>. """