Spaces:
Running
Running
Upload questions_dataset.py
Browse files- utils/questions_dataset.py +69 -0
utils/questions_dataset.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import chromadb
|
3 |
+
from google.genai import types
|
4 |
+
|
5 |
+
def preprocess_questions():
|
6 |
+
df = pd.read_parquet("hf://datasets/hails/agieval-lsat-lr/data/test-00000-of-00001.parquet")
|
7 |
+
print(df.head())
|
8 |
+
|
9 |
+
lrq_docs = []
|
10 |
+
for _, row in df.iterrows():
|
11 |
+
q, c, g = row
|
12 |
+
doc = f'(question: "{q}", choices: {c}, gold: {g})'
|
13 |
+
lrq_docs.append(doc)
|
14 |
+
|
15 |
+
chroma_client = chromadb.Client()
|
16 |
+
collection = chroma_client.create_collection(name="lsat-lr")
|
17 |
+
collection.add(
|
18 |
+
documents=lrq_docs,
|
19 |
+
metadatas=[{"question": q, "choices": "\n".join(c), "gold": int(g[0])} for q, c, g in zip(df["query"], df["choices"], df["gold"])],
|
20 |
+
ids=[str(i) for i in range(len(lrq_docs))],
|
21 |
+
)
|
22 |
+
return collection
|
23 |
+
|
24 |
+
def get_logical_reasoning_practice_questions(query: str) -> str:
|
25 |
+
"""
|
26 |
+
Use to get logical reasoning practice questions from database after user has studied.
|
27 |
+
Uses query to search the database.
|
28 |
+
Returns top 5 results in the format:
|
29 |
+
(question: "question", choices: [choices], gold: [gold]).
|
30 |
+
"""
|
31 |
+
collection = preprocess_questions()
|
32 |
+
results = collection.query(query_texts=[query], n_results=5)['documents'][0]
|
33 |
+
# print(results)
|
34 |
+
return '\n\n'.join(results)
|
35 |
+
|
36 |
+
def get_model_tools():
|
37 |
+
get_practice_questions_function = {
|
38 |
+
"name": "get_practice_questions",
|
39 |
+
"description": get_logical_reasoning_practice_questions.__doc__,
|
40 |
+
"parameters": {
|
41 |
+
"type": "object",
|
42 |
+
"properties": {
|
43 |
+
"query": {
|
44 |
+
"type": "string",
|
45 |
+
"description": "string to query the database with"
|
46 |
+
}
|
47 |
+
},
|
48 |
+
"required": ["query"]
|
49 |
+
},
|
50 |
+
}
|
51 |
+
tools = types.Tool(function_declarations=[get_practice_questions_function])
|
52 |
+
return tools
|
53 |
+
|
54 |
+
system_instruction = """You are an AI tutor that teaches users LSAT Logical Reasoning.
|
55 |
+
Here is how your student performed on the practice quiz grouped by question type (num correct/num questions):
|
56 |
+
Assumtion: (1/1)
|
57 |
+
Find the flaw in the argument: (1/2)
|
58 |
+
Inferece: (1/1)
|
59 |
+
Justify the conclusion: (0/1)
|
60 |
+
Method of reasoning: (1/1)
|
61 |
+
Point at issue: (0/1)
|
62 |
+
Role Play: (1/1)
|
63 |
+
Strengthen: (0/1)
|
64 |
+
Weaken the argument: (1/1)
|
65 |
+
Based on this, classify them as Beginner / Intermediate / Advanced. Walk through the student on all topics, but focus on the ones they struggle with.
|
66 |
+
Question the user to ensure that they understand the material.
|
67 |
+
Use practice questions from the tool to ensure they understand the material.
|
68 |
+
Never give a one word answer. Always keep the conversation moving.
|
69 |
+
Once the user has studied all the topics, return <DONE>. """
|