File size: 7,449 Bytes
70d33fc
181bdfe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a605de
d83e60a
181bdfe
 
 
0977b5d
181bdfe
 
 
 
 
 
0977b5d
 
181bdfe
0977b5d
181bdfe
 
 
 
 
 
 
 
 
 
70d33fc
 
181bdfe
 
 
 
 
c208448
181bdfe
 
23ce091
 
181bdfe
23ce091
 
 
181bdfe
 
 
 
 
 
 
 
 
dbe84b9
cdeb41a
70d33fc
 
181bdfe
cdeb41a
0977b5d
cdeb41a
 
 
 
 
 
70d33fc
 
 
181bdfe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a605de
181bdfe
 
 
 
93a3786
 
 
 
 
 
9676717
e11a6a6
93a3786
 
 
 
 
 
 
e11a6a6
93a3786
e11a6a6
663b823
 
93a3786
663b823
93a3786
663b823
93a3786
 
181bdfe
 
 
 
 
 
 
 
0a605de
181bdfe
 
 
0a605de
181bdfe
 
 
 
0a605de
 
 
 
 
cdeb41a
 
0a605de
 
181bdfe
 
 
 
 
 
 
 
 
 
 
 
 
 
c46fd9a
181bdfe
 
 
 
00dbda5
9015224
181bdfe
cdeb41a
181bdfe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import gspread
import os
import json
from dotenv import load_dotenv
import streamlit as st
from huggingface_hub import login
import google.generativeai as genai
from sentence_transformers import SentenceTransformer
from langchain_community.vectorstores import FAISS
from langchain.embeddings.base import Embeddings
from google.adk.agents import Agent
from google.adk.sessions import InMemorySessionService
from google.adk.runners import Runner
from google.adk.tools import FunctionTool
from google.genai import types
from langchain_tavily import TavilySearch
import uuid
import datetime

# === CONFIGURE ENV AND AUTH ===
load_dotenv()

hf_token = os.getenv("HUGGINGFACE_TOKEN")
assert hf_token, "Please set HUGGINGFACE_TOKEN in your .env"
login(token=hf_token)
assert os.getenv("GOOGLE_API_KEY"), "Set GOOGLE_API_KEY in .env"
assert os.getenv("TAVILY_API_KEY"), "Set TAVILY_API_KEY in .env"
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
service_account_info = os.getenv("GOOGLE_SERVICE_ACCOUNT_JSON")
assert service_account_info, "Missing GOOGLE_SERVICE_ACCOUNT_JSON secret!"

service_account_dict = json.loads(service_account_info)
def flatten_json(obj: dict) -> str:
    pieces = []
    def recurse(prefix, value):
        if isinstance(value, dict):
            for k, v in value.items(): recurse(f"{prefix}{k} > ", v)
        elif value is not None:
            pieces.append(f"{prefix}{value}")
    recurse("", obj)
    return "\n".join(pieces)



# === LOAD AND INDEX LOCAL COLLEGE JSONS ===
@st.cache_resource
def load_vector_store(data_dir: str):
    texts = []
    for fname in os.listdir(data_dir):
        if fname.lower().endswith('.md'):
            path = os.path.join(data_dir, fname)
            try:
                with open(path, 'r', encoding='utf-8') as f:
                    data = f.read()
            except UnicodeDecodeError:
                with open(path, 'r', encoding='latin-1') as f:
                    data = f.read()
            texts.append(data)
    # st.info(f"Loaded {len(texts)} documents.")

    st_model = SentenceTransformer('all-MiniLM-L6-v2')
    class LocalEmbeddings(Embeddings):
        def embed_documents(self, docs): return st_model.encode(docs).tolist()
        def embed_query(self, q): return st_model.encode([q])[0].tolist()

    return FAISS.from_texts(texts, LocalEmbeddings())

vector_store = load_vector_store('College_markdown')
 

SHEET_KEY = os.getenv("SHEET_KEY")

def add_query_to_sheet(user_id, query, response):
    gc = gspread.service_account_from_dict(service_account_dict)
    sh = gc.open_by_key(SHEET_KEY)
    worksheet = sh.worksheet("Sheet1")
    
     
    timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    worksheet.append_row([user_id, timestamp, query, response])


    
# === TOOLS ===
def db_search(query: str) -> dict:
    docs = vector_store.similarity_search(query, k=6)
    if not docs: return {"results": []}
    return {"results": [d.page_content for d in docs]}


def tavily_search(query: str) -> dict:
    tool = TavilySearch(max_results=6, topic="general", include_raw_content=True)
    result = tool.invoke({"query": query})
    snippets = [item.get('content') for item in result.get('results', [])]
    return {"results": snippets or []}

# Wrap as FunctionTools
from google.adk.tools import FunctionTool

db_tool = FunctionTool(db_search)
tavily_tool = FunctionTool(tavily_search)

# === AGENT SETUP ===
@st.cache_resource
def create_agent(user_id, session_id):
    agent = Agent(
        name="college_info_agent",
        model="gemini-2.0-flash",
        instruction=(
            """
You are a highly experienced college counselor, specializing in helping high school students choose the right engineering colleges. Please follow these guidelines while assisting students:
    1. Call `db_search` with the user’s query.\n
    2. If `db_search` returns an empty `results` list, immediately call `tavily_search`.\n
    3. Do not produce any output until one of those calls returns data.\n
    4. As soon as you have non‑empty results, stop further searches and craft your answer using only that source.\n
    5. If a user asks about a college, use the available source to provide the most detailed response possible, preferably structured in bullet points for clarity.
    6. If a user enters a rank and asks a list of colleges based on their's rank, always consider the following:
        a. Rank of the student (mandatory)
        b. Category (if not provided, assume General)
        c. State (optional)
        d. Gender (optional)
    
        Note: In your final response, only include colleges where the student is eligible based on their rank. Do not list colleges where the cutoff rank is lower than the student's rank. Ask them to check out Precollege Predictor for more personalized answers, the link of college predictor https://www.precollege.in/college-predictor
        
    7. Rank Eligibility Rule: A student is eligible for a college if their rank is less than or equal to the college’s closing cutoff. (e.g., Rank 2000 is eligible if the cutoff is 2500.)
    
    8. If the user wants to compare colleges, present the comparison in a table format for clarity (Mention the opening and closing ranks for the General category.).

    9. Ensure that your response is always complete and fully addresses the user's question, without leaving any sentence or thought unfinished.
    
    10. If you cannot find sufficient or specific information, politely direct the student to connect with a verified mentor at: https://precollege.in
    
    11. If the user asks a question unrelated to college counseling, respond with:
    "Sorry, this is beyond the scope of this application"
"""
        ),
        tools=[db_tool, tavily_tool],
        generate_content_config=types.GenerateContentConfig(
            max_output_tokens=1500,
            temperature=0
        )
    )
    session_svc = InMemorySessionService()
    session = session_svc.create_session(app_name="college_agent_app",user_id=user_id, session_id=session_id)
    runner = Runner(agent=agent, app_name="college_agent_app", session_service=session_svc)
    return runner, session



# === STREAMLIT UI ===
st.title("🎓 Jossa-ChatBot")

if "user_id" not in st.session_state:
    st.session_state.user_id = str(uuid.uuid4())   
if "session_id" not in st.session_state:
    st.session_state.session_id = str(uuid.uuid4())  

user_id = st.session_state.user_id

runner, session = create_agent(st.session_state.user_id, st.session_state.session_id)

if "history" not in st.session_state:
    st.session_state.history = []

# Display chat history
for role, msg in st.session_state.history:
    if role == "user": st.chat_message("user").write(msg)
    else: st.chat_message("assistant").write(msg)

# Input
query = st.chat_input("Ask me about any college…")
if query:
    st.session_state.history.append(("user", query))
    # Run agent
    user_msg = types.Content(role="user", parts=[types.Part(text=query)])
    events = runner.run(user_id=user_id, session_id=session.id, new_message=user_msg)
    # Collect final response text
    reply = ""
    for ev in events:
        if ev.is_final_response(): reply = ev.content.parts[0].text
    if reply=="":
        reply="Please provide complete context."
    st.session_state.history.append(("assistant", reply))
    add_query_to_sheet(user_id=user_id, query=query, response=reply)
    st.rerun()