Files changed (1) hide show
  1. app.py +147 -134
app.py CHANGED
@@ -1,134 +1,147 @@
1
-
2
- import os
3
- import json
4
- from dotenv import load_dotenv
5
- import streamlit as st
6
- from huggingface_hub import login
7
- import google.generativeai as genai
8
- from sentence_transformers import SentenceTransformer
9
- from langchain_community.vectorstores import FAISS
10
- from langchain.embeddings.base import Embeddings
11
- from google.adk.agents import Agent
12
- from google.adk.sessions import InMemorySessionService
13
- from google.adk.runners import Runner
14
- from google.adk.tools import FunctionTool
15
- from google.genai import types
16
- from langchain_tavily import TavilySearch
17
-
18
- # === CONFIGURE ENV AND AUTH ===
19
- load_dotenv()
20
- hf_token = os.getenv("HUGGINGFACE_TOKEN")
21
- assert hf_token, "Please set HUGGINGFACE_TOKEN in your .env"
22
- login(token=hf_token)
23
- assert os.getenv("GOOGLE_API_KEY"), "Set GOOGLE_API_KEY in .env"
24
- assert os.getenv("TAVILY_API_KEY"), "Set TAVILY_API_KEY in .env"
25
- genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
26
-
27
- def flatten_json(obj: dict) -> str:
28
- pieces = []
29
- def recurse(prefix, value):
30
- if isinstance(value, dict):
31
- for k, v in value.items(): recurse(f"{prefix}{k} > ", v)
32
- elif value is not None:
33
- pieces.append(f"{prefix}{value}")
34
- recurse("", obj)
35
- return "\n".join(pieces)
36
-
37
- # === LOAD AND INDEX LOCAL COLLEGE JSONS ===
38
- @st.cache_resource
39
- def load_vector_store(data_dir: str):
40
- texts = []
41
- for fname in os.listdir(data_dir):
42
- if fname.lower().endswith('.json'):
43
- path = os.path.join(data_dir, fname)
44
- try:
45
- with open(path, 'r', encoding='utf-8') as f: data = json.load(f)
46
- except UnicodeDecodeError:
47
- with open(path, 'r', encoding='latin-1') as f: data = json.load(f)
48
- texts.append(flatten_json(data))
49
- st.info(f"Loaded {len(texts)} documents.")
50
-
51
- st_model = SentenceTransformer('all-MiniLM-L6-v2')
52
- class LocalEmbeddings(Embeddings):
53
- def embed_documents(self, docs): return st_model.encode(docs).tolist()
54
- def embed_query(self, q): return st_model.encode([q])[0].tolist()
55
-
56
- return FAISS.from_texts(texts, LocalEmbeddings())
57
-
58
- vector_store = load_vector_store('Jsons-Colleges/Jsons')
59
-
60
- # === TOOLS ===
61
- def db_search(query: str) -> dict:
62
- docs = vector_store.similarity_search(query, k=6)
63
- if not docs: return {"results": []}
64
- return {"results": [d.page_content for d in docs]}
65
-
66
-
67
- def tavily_search(query: str) -> dict:
68
- tool = TavilySearch(max_results=6, topic="general", include_raw_content=True)
69
- result = tool.invoke({"query": query})
70
- snippets = [item.get('content') for item in result.get('results', [])]
71
- return {"results": snippets or []}
72
-
73
- # Wrap as FunctionTools
74
- from google.adk.tools import FunctionTool
75
-
76
- db_tool = FunctionTool(db_search)
77
- tavily_tool = FunctionTool(tavily_search)
78
-
79
- # === AGENT SETUP ===
80
- @st.cache_resource
81
- def create_agent():
82
- agent = Agent(
83
- name="college_info_agent",
84
- model="gemini-2.0-flash",
85
- instruction=(
86
- "You are a college information specialist. For every user query about colleges or universities, "
87
- "follow this exact workflow before replying:\n"
88
- "1. Call `db_search` with the user’s query.\n"
89
- "2. If `db_search` returns an empty `results` list, immediately call `tavily_search`.\n"
90
- "3. Do not produce any output until one of those calls returns data.\n"
91
- "4. As soon as you have non‑empty results, stop further searches and craft your answer using only that source.\n"
92
- "5. Structure your response with key details: name, location, major/program offerings, rankings, tuition, "
93
- "admissions criteria, campus highlights, and any notable facts.\n"
94
- "6. Use a clear, conversational tone and include examples or comparable institutions when helpful."
95
- "7. If something is not present in the database or you don't know about it automatically do web search and find the answer for it without asking the user."
96
- "8. Always try to give complete answer in one go and let user ask follow up questions on the complete answer."
97
- ),
98
- tools=[db_tool, tavily_tool],
99
- generate_content_config=types.GenerateContentConfig(
100
- max_output_tokens=1500,
101
- temperature=0
102
- )
103
- )
104
- session_svc = InMemorySessionService()
105
- session = session_svc.create_session(app_name="college_agent_app", user_id="user1", session_id="session1")
106
- runner = Runner(agent=agent, app_name="college_agent_app", session_service=session_svc)
107
- return runner, session
108
-
109
- runner, session = create_agent()
110
-
111
- # === STREAMLIT UI ===
112
- st.title("🎓 CollegeGPT")
113
-
114
- if "history" not in st.session_state:
115
- st.session_state.history = []
116
-
117
- # Display chat history
118
- for role, msg in st.session_state.history:
119
- if role == "user": st.chat_message("user").write(msg)
120
- else: st.chat_message("assistant").write(msg)
121
-
122
- # Input
123
- query = st.chat_input("Ask me about any college…")
124
- if query:
125
- st.session_state.history.append(("user", query))
126
- # Run agent
127
- user_msg = types.Content(role="user", parts=[types.Part(text=query)])
128
- events = runner.run(user_id="user1", session_id=session.id, new_message=user_msg)
129
- # Collect final response text
130
- reply = ""
131
- for ev in events:
132
- if ev.is_final_response(): reply = ev.content.parts[0].text
133
- st.session_state.history.append(("assistant", reply))
134
- st.rerun()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import json
4
+ from dotenv import load_dotenv
5
+ import streamlit as st
6
+ from huggingface_hub import login
7
+ import google.generativeai as genai
8
+ from sentence_transformers import SentenceTransformer
9
+ from langchain_community.vectorstores import FAISS
10
+ from langchain.embeddings.base import Embeddings
11
+ from google.adk.agents import Agent
12
+ from google.adk.sessions import InMemorySessionService
13
+ from google.adk.runners import Runner
14
+ from google.adk.tools import FunctionTool
15
+ from google.genai import types
16
+ from langchain_tavily import TavilySearch
17
+
18
+ # === CONFIGURE ENV AND AUTH ===
19
+ load_dotenv()
20
+ hf_token = os.getenv("HUGGINGFACE_TOKEN")
21
+ assert hf_token, "Please set HUGGINGFACE_TOKEN in your .env"
22
+ login(token=hf_token)
23
+ assert os.getenv("GOOGLE_API_KEY"), "Set GOOGLE_API_KEY in .env"
24
+ assert os.getenv("TAVILY_API_KEY"), "Set TAVILY_API_KEY in .env"
25
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
26
+
27
+ def flatten_json(obj: dict) -> str:
28
+ pieces = []
29
+ def recurse(prefix, value):
30
+ if isinstance(value, dict):
31
+ for k, v in value.items(): recurse(f"{prefix}{k} > ", v)
32
+ elif value is not None:
33
+ pieces.append(f"{prefix}{value}")
34
+ recurse("", obj)
35
+ return "\n".join(pieces)
36
+
37
+ # === LOAD AND INDEX LOCAL COLLEGE JSONS ===
38
+ @st.cache_resource
39
+ def load_vector_store(data_dir: str):
40
+ texts = []
41
+ for fname in os.listdir(data_dir):
42
+ if fname.lower().endswith('.json'):
43
+ path = os.path.join(data_dir, fname)
44
+ try:
45
+ with open(path, 'r', encoding='utf-8') as f: data = json.load(f)
46
+ except UnicodeDecodeError:
47
+ with open(path, 'r', encoding='latin-1') as f: data = json.load(f)
48
+ texts.append(flatten_json(data))
49
+ st.info(f"Loaded {len(texts)} documents.")
50
+
51
+ st_model = SentenceTransformer('all-MiniLM-L6-v2')
52
+ class LocalEmbeddings(Embeddings):
53
+ def embed_documents(self, docs): return st_model.encode(docs).tolist()
54
+ def embed_query(self, q): return st_model.encode([q])[0].tolist()
55
+
56
+ return FAISS.from_texts(texts, LocalEmbeddings())
57
+
58
+ vector_store = load_vector_store('Jsons-Colleges/Jsons')
59
+
60
+ # === TOOLS ===
61
+ def db_search(query: str) -> dict:
62
+ docs = vector_store.similarity_search(query, k=6)
63
+ if not docs: return {"results": []}
64
+ return {"results": [d.page_content for d in docs]}
65
+
66
+
67
+ def tavily_search(query: str) -> dict:
68
+ tool = TavilySearch(max_results=6, topic="general", include_raw_content=True)
69
+ result = tool.invoke({"query": query})
70
+ snippets = [item.get('content') for item in result.get('results', [])]
71
+ return {"results": snippets or []}
72
+
73
+ # Wrap as FunctionTools
74
+ from google.adk.tools import FunctionTool
75
+
76
+ db_tool = FunctionTool(db_search)
77
+ tavily_tool = FunctionTool(tavily_search)
78
+
79
+ # === AGENT SETUP ===
80
+ @st.cache_resource
81
+ def create_agent():
82
+ agent = Agent(
83
+ name="college_info_agent",
84
+ model="gemini-2.0-flash",
85
+ instruction=(
86
+ """
87
+ You are a highly experienced college counselor, specializing in helping high school students choose the right engineering colleges. Please follow these guidelines while assisting students:
88
+ 1. Call `db_search` with the user’s query.\n
89
+ 2. If `db_search` returns an empty `results` list, immediately call `tavily_search`.\n
90
+ 3. Do not produce any output until one of those calls returns data.\n
91
+ 4. As soon as you have non‑empty results, stop further searches and craft your answer using only that source.\n
92
+ 5. When suggesting a list of colleges based on student's rank, always consider the following:
93
+ a. Rank of the student (mandatory)
94
+ b. Category (if not provided, assume General)
95
+ c. State (optional)
96
+ d. Gender (optional)
97
+
98
+ Note: In your final response, only include colleges where the student is eligible based on their rank. Do not list colleges where the cutoff rank is lower than the student's rank. Ask them to check out Precollege Predictor for more personalized answers, the link of college predictor https://www.precollege.in/college-predictor
99
+
100
+ 6. Rank Eligibility Rule: A student is eligible for a college if their rank is less than or equal to the college’s closing cutoff. (e.g., Rank 2000 is eligible if the cutoff is 2500.)
101
+
102
+ 7. If the user wants to compare colleges, present the comparison in a table format for clarity (Mention the opening and closing ranks for the General category.).
103
+
104
+ 8. If you cannot find sufficient or specific information, politely direct the student to connect with a verified mentor at: https://precollege.in
105
+
106
+ 9. If the user asks a question unrelated to college counseling, respond with:
107
+ "Sorry, this is beyond the scope of this application"
108
+ """
109
+
110
+ ),
111
+ tools=[db_tool, tavily_tool],
112
+ generate_content_config=types.GenerateContentConfig(
113
+ max_output_tokens=1500,
114
+ temperature=0
115
+ )
116
+ )
117
+ session_svc = InMemorySessionService()
118
+ session = session_svc.create_session(app_name="college_agent_app", user_id="user1", session_id="session1")
119
+ runner = Runner(agent=agent, app_name="college_agent_app", session_service=session_svc)
120
+ return runner, session
121
+
122
+ runner, session = create_agent()
123
+
124
+ # === STREAMLIT UI ===
125
+ st.title("🎓 CollegeGPT")
126
+
127
+ if "history" not in st.session_state:
128
+ st.session_state.history = []
129
+
130
+ # Display chat history
131
+ for role, msg in st.session_state.history:
132
+ if role == "user": st.chat_message("user").write(msg)
133
+ else: st.chat_message("assistant").write(msg)
134
+
135
+ # Input
136
+ query = st.chat_input("Ask me about any college…")
137
+ if query:
138
+ st.session_state.history.append(("user", query))
139
+ # Run agent
140
+ user_msg = types.Content(role="user", parts=[types.Part(text=query)])
141
+ events = runner.run(user_id="user1", session_id=session.id, new_message=user_msg)
142
+ # Collect final response text
143
+ reply = ""
144
+ for ev in events:
145
+ if ev.is_final_response(): reply = ev.content.parts[0].text
146
+ st.session_state.history.append(("assistant", reply))
147
+ st.rerun()