Sadiaa commited on
Commit
57fce4e
·
verified ·
1 Parent(s): ec99e1c

Update chatbot.py

Browse files
Files changed (1) hide show
  1. chatbot.py +23 -121
chatbot.py CHANGED
@@ -1,7 +1,9 @@
1
  import os
2
- import time
3
  import json
 
4
  import logging
 
 
5
  from groq import Groq
6
  from langchain.memory import ConversationBufferMemory
7
  from langchain_openai import ChatOpenAI
@@ -14,44 +16,49 @@ logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %
14
  logger = logging.getLogger(__name__)
15
 
16
  class Comsatsbot:
17
- def __init__(self, hf, llm, api_keys, chats_collection, paths, index_path='faiss_kb'):
18
  logger.info("Initializing Comsatsbot...")
19
  self.llm = llm
20
  self.api_keys = api_keys
21
  self.client = None
22
  self.models = [
23
- # "llama3-groq-70b-8192-tool-use-preview",
24
  "llama-3.3-70b-versatile",
25
  "llama3-70b-8192"
26
  ]
27
  self.memory = ConversationBufferMemory(llm=self.llm, max_token_limit=3000)
28
  self.chats_collection = chats_collection
29
  self.index_path = index_path
30
- self.hf = hf
31
  self.faiss_index = None
32
  self.faiss_retriever = None
33
- self.paths = paths
34
  self.initialize_faiss_index()
35
 
36
- def load_data(self, paths):
37
- logger.info(f"Loading data from paths: {paths}")
38
- documents = []
39
- for path in paths:
40
- loader = CSVLoader(file_path=path)
41
- data = loader.load()
42
- documents.extend(data)
43
- logger.debug(f"Loaded {len(documents)} documents.")
 
 
 
 
 
 
 
44
  return documents
45
 
46
  def initialize_faiss_index(self):
47
  logger.info("Initializing FAISS index...")
48
  if os.path.exists(self.index_path):
49
  logger.info(f"FAISS index found at {self.index_path}. Loading...")
50
- self.faiss_index = FAISS.load_local(self.index_path, self.hf, allow_dangerous_deserialization=True)
51
  else:
52
  logger.info(f"FAISS index not found. Creating a new one...")
53
- documents = self.load_data(self.paths)
54
- self.faiss_index = FAISS.from_documents(documents, self.hf)
55
  self.faiss_index.save_local(self.index_path)
56
  self.faiss_retriever = self.faiss_index.as_retriever(search_kwargs={"k": 5})
57
  logger.info("FAISS index initialized successfully.")
@@ -87,22 +94,6 @@ class Comsatsbot:
87
  raise KeyError(f"Chat ID {chat_id} does not exist.")
88
  return chat_record.get('history', [])
89
 
90
- def new_chat(self, chat_id):
91
- logger.info(f"Creating new chat with ID: {chat_id}")
92
- if self.chats_collection.find_one({"_id": chat_id}):
93
- logger.error(f"Chat ID {chat_id} already exists.")
94
- raise KeyError(f"Chat ID {chat_id} exists already.")
95
- self.create_chat_record(chat_id)
96
- return "success"
97
-
98
- def delete_chat(self, chat_id):
99
- logger.info(f"Deleting chat record for chat_id: {chat_id}")
100
- if not self.chats_collection.find_one({"_id": chat_id}):
101
- logger.error(f"Chat ID {chat_id} does not exist.")
102
- raise KeyError(f"Chat ID {chat_id} does not exist.")
103
- self.chats_collection.delete_one({"_id": chat_id})
104
- return "success"
105
-
106
  def get_system_prompt(self):
107
  return """
108
  You are a comsats assistant to help the user with comsats university-related queries. Your response should be concise, direct, and to the point. Avoid any unnecessary explanations. Always consider the provided context and chat history to generate the answer.
@@ -112,12 +103,10 @@ Use emojis only when required based on the user's tone and emotions. Do not over
112
  - **Surprise**: Use 😯 when the user expresses surprise.
113
  - **Anger or frustration**: Use 😡 when the user expresses frustration or dissatisfaction.
114
  If the user asks the same question repeatedly or asks an illogical question, feel free to use emojis to subtly convey frustration, confusion, or amusement.
115
-
116
  If the user writes question in urdu, give answer in urdu.
117
  If the user writes question in English, give answer in English .
118
  please provide the personalized answer and provide answer quickly
119
  please answer from the dataset i provided to you in csv files. And donot write in every answer that i donot know the exact answer.and refer website only where it is necessary.
120
-
121
  Do not include the phrase "According to the provided context" or "Based on the chat history". Simply generate the answer like a human would, without referencing where the information comes from.
122
  If the question requires a URL, format it like this:
123
  [Click here to visit COMSATS](https://comsats.edu.pk).
@@ -164,91 +153,4 @@ Context ends here. Now, answer the following question:
164
  logger.warning("Unable to generate a response.")
165
  return "Sorry, unable to provide an answer at this time."
166
 
167
- def detect_language(self, question):
168
- logger.info(f"Detecting language for question: {question}")
169
- for api_key in self.api_keys:
170
- self.client = Groq(api_key=api_key)
171
- for model in self.models:
172
- try:
173
- chat_completion = self.client.chat.completions.create(
174
- messages=[
175
- {
176
- "role": "system",
177
- "content": """
178
- You are an expert agent, and your task is to detect the language.
179
- Return a JSON: {'detected_language': 'urdu' or 'english'}
180
- """
181
- },
182
- {
183
- "role": "user",
184
- "content": f"Detect the language for: {question}"
185
- }
186
- ],
187
- model=model,
188
- max_tokens=256,
189
- response_format={"type": "json_object"},
190
- )
191
- response = json.loads(chat_completion.choices[0].message.content)
192
- detected_language = response['detected_language'].lower()
193
- logger.debug(f"Detected language: {detected_language}")
194
- return detected_language
195
- except Exception as e:
196
- logger.error(f"Error detecting language: {e}")
197
- time.sleep(2)
198
- continue
199
- logger.warning("Unable to detect language.")
200
- return "english"
201
-
202
- def translate_urdu(self, text):
203
- logger.info(f"Translating text to Urdu: {text}")
204
- for api_key in self.api_keys:
205
- self.client = Groq(api_key=api_key)
206
- for model in self.models:
207
- try:
208
- chat_completion = self.client.chat.completions.create(
209
- messages=[
210
- {
211
- "role": "system",
212
- "content": """
213
- Translate the following text into proper Urdu. Return a JSON:
214
- {'text': 'translated urdu text'}
215
- """
216
- },
217
- {
218
- "role": "user",
219
- "content": f"Translate this: {text}"
220
- }
221
- ],
222
- model=model,
223
- max_tokens=512,
224
- response_format={"type": "json_object"},
225
- )
226
- response = json.loads(chat_completion.choices[0].message.content)
227
- translated_text = response['text']
228
- logger.debug(f"Translated text: {translated_text}")
229
- return translated_text
230
- except Exception as e:
231
- logger.error(f"Error translating text: {e}")
232
- time.sleep(2)
233
- continue
234
- return text
235
-
236
- def response(self, question, chat_id):
237
- logger.info(f"Processing response for question: {question} (chat_id: {chat_id})")
238
- chat_history = self.load_chat(chat_id)
239
-
240
- for entry in chat_history:
241
- self.memory.save_context({"input": entry["question"]}, {"output": entry["answer"]})
242
-
243
- language = self.detect_language(question)
244
-
245
- if language == 'urdu':
246
- question_translation = GoogleTranslator(source='ur', target='en').translate(question)
247
- context = self.faiss_retriever.invoke(question_translation)
248
- else:
249
- context = self.faiss_retriever.invoke(question)
250
-
251
- answer = self.generate_response(question, chat_history, context)
252
 
253
- self.update_chat(chat_id, question, answer)
254
- return answer
 
1
  import os
 
2
  import json
3
+ import time
4
  import logging
5
+ from huggingface_hub import hf_hub_download
6
+ import pandas as pd
7
  from groq import Groq
8
  from langchain.memory import ConversationBufferMemory
9
  from langchain_openai import ChatOpenAI
 
16
  logger = logging.getLogger(__name__)
17
 
18
  class Comsatsbot:
19
+ def __init__(self, hf_space_repo, llm, api_keys, chats_collection, index_path='faiss_kb'):
20
  logger.info("Initializing Comsatsbot...")
21
  self.llm = llm
22
  self.api_keys = api_keys
23
  self.client = None
24
  self.models = [
 
25
  "llama-3.3-70b-versatile",
26
  "llama3-70b-8192"
27
  ]
28
  self.memory = ConversationBufferMemory(llm=self.llm, max_token_limit=3000)
29
  self.chats_collection = chats_collection
30
  self.index_path = index_path
31
+ self.hf_space_repo = hf_space_repo
32
  self.faiss_index = None
33
  self.faiss_retriever = None
 
34
  self.initialize_faiss_index()
35
 
36
+ def load_data_from_hf_space(self):
37
+ """
38
+ Loads CSV files directly from Hugging Face Space repository.
39
+ This will allow the chatbot to answer questions using the dataset.
40
+ """
41
+ logger.info("Loading data from Hugging Face Space repository...")
42
+
43
+ # Download CSV files from Hugging Face Space using the repository and file path
44
+ local_csv_path = hf_hub_download(repo_id=self.hf_space_repo, filename="english_data.csv","urdu_data.csv","FYP Supervisor Feedback.csv")
45
+ logger.info(f"Downloaded CSV file to {local_csv_path}")
46
+
47
+ # Load the CSV file into a DataFrame using pandas
48
+ data = pd.read_csv(local_csv_path)
49
+ documents = data.to_dict(orient="records")
50
+ logger.debug(f"Loaded {len(documents)} records from the dataset.")
51
  return documents
52
 
53
  def initialize_faiss_index(self):
54
  logger.info("Initializing FAISS index...")
55
  if os.path.exists(self.index_path):
56
  logger.info(f"FAISS index found at {self.index_path}. Loading...")
57
+ self.faiss_index = FAISS.load_local(self.index_path)
58
  else:
59
  logger.info(f"FAISS index not found. Creating a new one...")
60
+ documents = self.load_data_from_hf_space()
61
+ self.faiss_index = FAISS.from_documents(documents)
62
  self.faiss_index.save_local(self.index_path)
63
  self.faiss_retriever = self.faiss_index.as_retriever(search_kwargs={"k": 5})
64
  logger.info("FAISS index initialized successfully.")
 
94
  raise KeyError(f"Chat ID {chat_id} does not exist.")
95
  return chat_record.get('history', [])
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  def get_system_prompt(self):
98
  return """
99
  You are a comsats assistant to help the user with comsats university-related queries. Your response should be concise, direct, and to the point. Avoid any unnecessary explanations. Always consider the provided context and chat history to generate the answer.
 
103
  - **Surprise**: Use 😯 when the user expresses surprise.
104
  - **Anger or frustration**: Use 😡 when the user expresses frustration or dissatisfaction.
105
  If the user asks the same question repeatedly or asks an illogical question, feel free to use emojis to subtly convey frustration, confusion, or amusement.
 
106
  If the user writes question in urdu, give answer in urdu.
107
  If the user writes question in English, give answer in English .
108
  please provide the personalized answer and provide answer quickly
109
  please answer from the dataset i provided to you in csv files. And donot write in every answer that i donot know the exact answer.and refer website only where it is necessary.
 
110
  Do not include the phrase "According to the provided context" or "Based on the chat history". Simply generate the answer like a human would, without referencing where the information comes from.
111
  If the question requires a URL, format it like this:
112
  [Click here to visit COMSATS](https://comsats.edu.pk).
 
153
  logger.warning("Unable to generate a response.")
154
  return "Sorry, unable to provide an answer at this time."
155
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156