Spaces:
Sleeping
Sleeping
import os | |
import time | |
import json | |
from groq import Groq | |
from langchain.memory import ConversationBufferMemory | |
from langchain_openai import ChatOpenAI | |
from langchain_community.document_loaders import CSVLoader | |
from langchain_community.vectorstores import FAISS | |
from deep_translator import GoogleTranslator | |
class Comsatsbot: | |
def __init__(self, hf, llm, api_keys, chats_collection, paths, index_path='faiss_kb'): | |
self.llm = llm | |
self.api_keys = api_keys | |
self.client = None | |
self.models = ["llama3-70b-8192"] # use a single known working model | |
self.memory = ConversationBufferMemory(llm=self.llm, max_token_limit=3000) | |
self.chats_collection = chats_collection | |
self.index_path = index_path | |
self.hf = hf | |
self.faiss_index = None | |
self.faiss_retriever = None | |
self.paths = paths | |
self.initialize_faiss_index() | |
def load_data(self, paths): | |
documents = [] | |
for path in paths: | |
loader = CSVLoader(file_path=path) | |
data = loader.load() | |
documents.extend(data) | |
return documents | |
def initialize_faiss_index(self): | |
if os.path.exists(self.index_path): | |
self.faiss_index = FAISS.load_local(self.index_path, self.hf, allow_dangerous_deserialization=True) | |
else: | |
documents = self.load_data(self.paths) | |
self.faiss_index = FAISS.from_documents(documents, self.hf) | |
self.faiss_index.save_local(self.index_path) | |
self.faiss_retriever = self.faiss_index.as_retriever(search_kwargs={"k": 5}) | |
def retrieve_answer(self, query): | |
if self.faiss_retriever: | |
return self.faiss_retriever.invoke(query) | |
return None | |
def create_chat_record(self, chat_id): | |
self.chats_collection.insert_one({"_id": chat_id, "history": []}) | |
def update_chat(self, chat_id, question, answer): | |
self.chats_collection.update_one( | |
{"_id": chat_id}, | |
{"$push": {"history": {"question": question, "answer": answer}}} | |
) | |
def load_chat(self, chat_id): | |
chat_record = self.chats_collection.find_one({"_id": chat_id}) | |
if not chat_record: | |
raise KeyError(f"Chat ID {chat_id} does not exist.") | |
return chat_record.get('history', []) | |
def new_chat(self, chat_id): | |
if self.chats_collection.find_one({"_id": chat_id}): | |
raise KeyError(f"Chat ID {chat_id} exists already.") | |
self.create_chat_record(chat_id) | |
return "success" | |
def delete_chat(self, chat_id): | |
if not self.chats_collection.find_one({"_id": chat_id}): | |
raise KeyError(f"Chat ID {chat_id} does not exist.") | |
self.chats_collection.delete_one({"_id": chat_id}) | |
return "success" | |
def generate_response(self, question, history, context): | |
formatted_history = "\n".join([f"User: {h['question']}\nBot: {h['answer']}" for h in history]) | |
context_text = "\n".join([doc.page_content for doc in context]) | |
prompt = f''' | |
You are a comsats assistant. Answer concisely and with emojis only where appropriate. | |
Don't mention context/history explicitly. Use friendly tone and only say "I don’t know" if no relevant answer is found. | |
University Info: | |
Comsats Attock Campus offers CS, SE, AI, English, Math, EE, CE, BBA. | |
Departments: CS, Math, EE. Facilities: grounds, canteens, mosque, LT rooms, labs. | |
Admissions via NTS. CGPA: 4.0 (85%), 3.66 (79-84%), etc. | |
Context: | |
{context_text} | |
Chat History: | |
{formatted_history} | |
Question: {question} | |
''' | |
for api_key in self.api_keys: | |
self.client = Groq(api_key=api_key) | |
for model in self.models: | |
try: | |
chat_completion = self.client.chat.completions.create( | |
messages=[ | |
{"role": "system", "content": prompt}, | |
{"role": "user", "content": f"Answer the following question: {question}"} | |
], | |
model=model, | |
max_tokens=1024, | |
) | |
result = chat_completion.choices[0].message.content | |
print("Returning answer:", result) | |
return result | |
except Exception as e: | |
print(f"Error with {model}: {e}") | |
time.sleep(2) | |
continue | |
return "Sorry 😔 I'm currently facing technical issues. Please try again later." | |
def detect_language(self, question): | |
for api_key in self.api_keys: | |
self.client = Groq(api_key=api_key) | |
for model in self.models: | |
try: | |
chat_completion = self.client.chat.completions.create( | |
messages=[ | |
{ | |
"role": "system", | |
"content": """ | |
You are an expert agent, and your task is to detect the language. | |
Return a JSON: {'detected_language': 'urdu' or 'english'} | |
""" | |
}, | |
{ | |
"role": "user", | |
"content": f"Detect the language for: {question}" | |
} | |
], | |
model=model, | |
max_tokens=256, | |
response_format={"type": "json_object"}, | |
) | |
response = json.loads(chat_completion.choices[0].message.content) | |
return response['detected_language'].lower() | |
except Exception as e: | |
print(f"Language detection error: {e}") | |
time.sleep(2) | |
continue | |
return "english" | |
def translate_urdu(self, text): | |
try: | |
return GoogleTranslator(source='ur', target='en').translate(text) | |
except: | |
return text | |
def response(self, question, chat_id): | |
chat_history = self.load_chat(chat_id) | |
for entry in chat_history: | |
self.memory.save_context({"input": entry["question"]}, {"output": entry["answer"]}) | |
language = self.detect_language(question) | |
if language == 'urdu': | |
question_translation = self.translate_urdu(question) | |
context = self.faiss_retriever.invoke(question_translation) | |
else: | |
context = self.faiss_retriever.invoke(question) | |
answer = self.generate_response(question, chat_history, context) | |
self.update_chat(chat_id, question, answer) | |
return answer | |