Spaces:
Running
Running
Create retriever/chat_manager.py
Browse files- retriever/chat_manager.py +114 -0
retriever/chat_manager.py
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datetime import datetime
|
2 |
+
import logging
|
3 |
+
from typing import List
|
4 |
+
|
5 |
+
|
6 |
+
class ChatManager:
|
7 |
+
def __init__(self, documentManager, llmManager):
|
8 |
+
"""
|
9 |
+
Initialize the ChatManager.
|
10 |
+
"""
|
11 |
+
self.doc_manager = documentManager
|
12 |
+
self.llm_manager = llmManager
|
13 |
+
|
14 |
+
logging.info("ChatManager initialized")
|
15 |
+
|
16 |
+
def generate_chat_response(self, query: str, selected_docs: List[str], history: List[dict]) -> List[dict]:
|
17 |
+
"""
|
18 |
+
Generate a chat response based on the user's query and selected documents.
|
19 |
+
|
20 |
+
Args:
|
21 |
+
query (str): The user's query.
|
22 |
+
selected_docs (List[str]): List of selected document filenames from the dropdown.
|
23 |
+
history (List[dict]): The chat history as a list of {'role': str, 'content': str} dictionaries.
|
24 |
+
|
25 |
+
Returns:
|
26 |
+
List[dict]: Updated chat history with the new response in 'messages' format.
|
27 |
+
"""
|
28 |
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
29 |
+
logging.info(f"Generating chat response for query: {query} at {timestamp}")
|
30 |
+
|
31 |
+
# Handle empty query
|
32 |
+
if not query:
|
33 |
+
logging.warning("Empty query received")
|
34 |
+
return history + [{"role": "assistant", "content": "Please enter a query."}]
|
35 |
+
|
36 |
+
# Handle no selected documents
|
37 |
+
if not selected_docs:
|
38 |
+
logging.warning("No documents selected")
|
39 |
+
return history + [{"role": "assistant", "content": "Please select at least one document."}]
|
40 |
+
|
41 |
+
# Retrieve the top 5 chunks based on the query and selected documents
|
42 |
+
try:
|
43 |
+
top_k_results = self.doc_manager.retrieve_top_k(query, selected_docs, k=5)
|
44 |
+
except Exception as e:
|
45 |
+
logging.error(f"Error retrieving chunks: {str(e)}")
|
46 |
+
return history + [
|
47 |
+
{"role": "user", "content": f"{query}"},
|
48 |
+
{"role": "assistant", "content": f"Error retrieving chunks: {str(e)}"}
|
49 |
+
]
|
50 |
+
|
51 |
+
if not top_k_results:
|
52 |
+
logging.info("No relevant chunks found")
|
53 |
+
return history + [
|
54 |
+
{"role": "user", "content": f"{query}"},
|
55 |
+
{"role": "assistant", "content": "No relevant information found in the selected documents."}
|
56 |
+
]
|
57 |
+
|
58 |
+
# Send the top K results to the LLM to generate a response
|
59 |
+
try:
|
60 |
+
llm_response, source_docs = self.llm_manager.generate_response(query, top_k_results)
|
61 |
+
except Exception as e:
|
62 |
+
logging.error(f"Error generating LLM response: {str(e)}")
|
63 |
+
return history + [
|
64 |
+
{"role": "user", "content": f"{query}"},
|
65 |
+
{"role": "assistant", "content": f"Error generating response: {str(e)}"}
|
66 |
+
]
|
67 |
+
|
68 |
+
# Format the response
|
69 |
+
response = llm_response
|
70 |
+
# Uncomment to include source docs in response (optional)
|
71 |
+
# for i, doc in enumerate(source_docs, 1):
|
72 |
+
# doc_id = doc.metadata.get('doc_id', 'Unknown')
|
73 |
+
# filename = next((name for name, d_id in self.doc_manager.document_ids.items() if d_id == doc_id), 'Unknown')
|
74 |
+
# response += f"\n{i}. {filename}: {doc.page_content[:100]}..."
|
75 |
+
|
76 |
+
logging.info("Chat response generated successfully")
|
77 |
+
# Return updated history with new user query and LLM response
|
78 |
+
return history + [
|
79 |
+
{"role": "user", "content": f"{query}"},
|
80 |
+
{"role": "assistant", "content": response}
|
81 |
+
]
|
82 |
+
|
83 |
+
def generate_summary(self, chunks: any, summary_type: str = "medium") -> str:
|
84 |
+
"""
|
85 |
+
Generate a summary of the selected documents.
|
86 |
+
|
87 |
+
Args:
|
88 |
+
selected_docs (List[str]): List of selected document filenames.
|
89 |
+
summary_type (str): Type of summary ("small", "medium", "detailed").
|
90 |
+
k (int): Number of chunks to retrieve from DocumentManager.
|
91 |
+
include_toc (bool): Whether to include the table of contents (if available).
|
92 |
+
|
93 |
+
Returns:
|
94 |
+
str: Generated summary.
|
95 |
+
|
96 |
+
Raises:
|
97 |
+
ValueError: If summary_type is invalid or DocumentManager/LLM is not available.
|
98 |
+
"""
|
99 |
+
if summary_type not in ["small", "medium", "detailed"]:
|
100 |
+
raise ValueError("summary_type must be 'small', 'medium', or 'detailed'")
|
101 |
+
|
102 |
+
if not chunks:
|
103 |
+
logging.warning("No documents selected for summarization")
|
104 |
+
return "Please select at least one document."
|
105 |
+
|
106 |
+
|
107 |
+
llm_summary_response = self.llm_manager.generate_summary_v0(chunks = chunks)
|
108 |
+
#logging.info(f" Summary response {llm_summary_response}")
|
109 |
+
|
110 |
+
return llm_summary_response
|
111 |
+
|
112 |
+
def generate_sample_questions(self, chunks: any):
|
113 |
+
questions = self.llm_manager.generate_questions(chunks = chunks)
|
114 |
+
return questions
|