ak0601 commited on
Commit
3351c45
·
verified ·
1 Parent(s): 9c42663

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +134 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import json
4
+ from dotenv import load_dotenv
5
+ import streamlit as st
6
+ from huggingface_hub import login
7
+ import google.generativeai as genai
8
+ from sentence_transformers import SentenceTransformer
9
+ from langchain_community.vectorstores import FAISS
10
+ from langchain.embeddings.base import Embeddings
11
+ from google.adk.agents import Agent
12
+ from google.adk.sessions import InMemorySessionService
13
+ from google.adk.runners import Runner
14
+ from google.adk.tools import FunctionTool
15
+ from google.genai import types
16
+ from langchain_tavily import TavilySearch
17
+
18
+ # === CONFIGURE ENV AND AUTH ===
19
+ load_dotenv()
20
+ hf_token = os.getenv("HUGGINGFACE_TOKEN")
21
+ assert hf_token, "Please set HUGGINGFACE_TOKEN in your .env"
22
+ login(token=hf_token)
23
+ assert os.getenv("GOOGLE_API_KEY"), "Set GOOGLE_API_KEY in .env"
24
+ assert os.getenv("TAVILY_API_KEY"), "Set TAVILY_API_KEY in .env"
25
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
26
+
27
+ def flatten_json(obj: dict) -> str:
28
+ pieces = []
29
+ def recurse(prefix, value):
30
+ if isinstance(value, dict):
31
+ for k, v in value.items(): recurse(f"{prefix}{k} > ", v)
32
+ elif value is not None:
33
+ pieces.append(f"{prefix}{value}")
34
+ recurse("", obj)
35
+ return "\n".join(pieces)
36
+
37
+ # === LOAD AND INDEX LOCAL COLLEGE JSONS ===
38
+ @st.cache_resource
39
+ def load_vector_store(data_dir: str):
40
+ texts = []
41
+ for fname in os.listdir(data_dir):
42
+ if fname.lower().endswith('.json'):
43
+ path = os.path.join(data_dir, fname)
44
+ try:
45
+ with open(path, 'r', encoding='utf-8') as f: data = json.load(f)
46
+ except UnicodeDecodeError:
47
+ with open(path, 'r', encoding='latin-1') as f: data = json.load(f)
48
+ texts.append(flatten_json(data))
49
+ st.info(f"Loaded {len(texts)} documents.")
50
+
51
+ st_model = SentenceTransformer('all-MiniLM-L6-v2')
52
+ class LocalEmbeddings(Embeddings):
53
+ def embed_documents(self, docs): return st_model.encode(docs).tolist()
54
+ def embed_query(self, q): return st_model.encode([q])[0].tolist()
55
+
56
+ return FAISS.from_texts(texts, LocalEmbeddings())
57
+
58
+ vector_store = load_vector_store('Jsons-Colleges/Jsons')
59
+
60
+ # === TOOLS ===
61
+ def db_search(query: str) -> dict:
62
+ docs = vector_store.similarity_search(query, k=6)
63
+ if not docs: return {"results": []}
64
+ return {"results": [d.page_content for d in docs]}
65
+
66
+
67
+ def tavily_search(query: str) -> dict:
68
+ tool = TavilySearch(max_results=6, topic="general", include_raw_content=True)
69
+ result = tool.invoke({"query": query})
70
+ snippets = [item.get('content') for item in result.get('results', [])]
71
+ return {"results": snippets or []}
72
+
73
+ # Wrap as FunctionTools
74
+ from google.adk.tools import FunctionTool
75
+
76
+ db_tool = FunctionTool(db_search)
77
+ tavily_tool = FunctionTool(tavily_search)
78
+
79
+ # === AGENT SETUP ===
80
+ @st.cache_resource
81
+ def create_agent():
82
+ agent = Agent(
83
+ name="college_info_agent",
84
+ model="gemini-2.0-flash",
85
+ instruction=(
86
+ "You are a college information specialist. For every user query about colleges or universities, "
87
+ "follow this exact workflow before replying:\n"
88
+ "1. Call `db_search` with the user’s query.\n"
89
+ "2. If `db_search` returns an empty `results` list, immediately call `tavily_search`.\n"
90
+ "3. Do not produce any output until one of those calls returns data.\n"
91
+ "4. As soon as you have non‑empty results, stop further searches and craft your answer using only that source.\n"
92
+ "5. Structure your response with key details: name, location, major/program offerings, rankings, tuition, "
93
+ "admissions criteria, campus highlights, and any notable facts.\n"
94
+ "6. Use a clear, conversational tone and include examples or comparable institutions when helpful."
95
+ "7. If something is not present in the database or you don't know about it automatically do web search and find the answer for it without asking the user."
96
+ "8. Always try to give complete answer in one go and let user ask follow up questions on the complete answer."
97
+ ),
98
+ tools=[db_tool, tavily_tool],
99
+ generate_content_config=types.GenerateContentConfig(
100
+ max_output_tokens=1500,
101
+ temperature=0
102
+ )
103
+ )
104
+ session_svc = InMemorySessionService()
105
+ session = session_svc.create_session(app_name="college_agent_app", user_id="user1", session_id="session1")
106
+ runner = Runner(agent=agent, app_name="college_agent_app", session_service=session_svc)
107
+ return runner, session
108
+
109
+ runner, session = create_agent()
110
+
111
+ # === STREAMLIT UI ===
112
+ st.title("🎓 CollegeGPT")
113
+
114
+ if "history" not in st.session_state:
115
+ st.session_state.history = []
116
+
117
+ # Display chat history
118
+ for role, msg in st.session_state.history:
119
+ if role == "user": st.chat_message("user").write(msg)
120
+ else: st.chat_message("assistant").write(msg)
121
+
122
+ # Input
123
+ query = st.chat_input("Ask me about any college…")
124
+ if query:
125
+ st.session_state.history.append(("user", query))
126
+ # Run agent
127
+ user_msg = types.Content(role="user", parts=[types.Part(text=query)])
128
+ events = runner.run(user_id="user1", session_id=session.id, new_message=user_msg)
129
+ # Collect final response text
130
+ reply = ""
131
+ for ev in events:
132
+ if ev.is_final_response(): reply = ev.content.parts[0].text
133
+ st.session_state.history.append(("assistant", reply))
134
+ st.rerun()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ google-adk
2
+ google-generativeai
3
+ sentence-transformers
4
+ faiss-cpu
5
+ tavily-python
6
+ python-dotenv
7
+ langchain-community
8
+ langchain_tavily