T-K-O-H commited on
Commit
699a68e
·
0 Parent(s):

Initial commit

Browse files
Files changed (7) hide show
  1. .gitignore +50 -0
  2. README.md +52 -0
  3. app.py +188 -0
  4. quantum_computing.txt +129 -0
  5. rag_graph.py +283 -0
  6. requirements.txt +8 -0
  7. test_document.txt +26 -0
.gitignore ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Environment variables
2
+ .env
3
+
4
+ # Python
5
+ __pycache__/
6
+ *.py[cod]
7
+ *$py.class
8
+ *.so
9
+ .Python
10
+ env/
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ *.egg-info/
23
+ .installed.cfg
24
+ *.egg
25
+
26
+ # Virtual Environment
27
+ venv/
28
+ ENV/
29
+
30
+ # IDE
31
+ .idea/
32
+ .vscode/
33
+ *.swp
34
+ *.swo
35
+
36
+ # OS
37
+ .DS_Store
38
+ Thumbs.db
39
+
40
+ # Project specific
41
+ chroma_db/ # Vector store directory
42
+ *.log # Log files
43
+ .pytest_cache/
44
+ .coverage
45
+ htmlcov/
46
+ .mypy_cache/
47
+ .ipynb_checkpoints/
48
+ *.ipynb
49
+ *.pdf # Uploaded PDFs
50
+ *.txt # Uploaded text files
README.md ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LangGraph RAG + RAGAS
2
+
3
+ Built this RAG system to experiment with LangGraph's workflow capabilities and RAGAS metrics. It's a straightforward implementation that lets you upload docs, ask questions, and get quality metrics for each response.
4
+
5
+ ## What it does
6
+
7
+ - Takes your docs and chunks them semantically (sentence-level similarity with greedy paragraph grouping)
8
+ - Uses ChromaDB to store and retrieve relevant context
9
+ - Spits out responses with RAGAS metrics:
10
+ - Faithfulness: How well the response sticks to the context
11
+ - Answer Relevancy: How relevant the answer is to the question
12
+ - Context Precision: How precise the retrieved context is
13
+ - Context Recall: How much relevant context was retrieved
14
+ - Answer Correctness: How accurate the answer is
15
+ - Simple Streamlit UI to interact with it all
16
+
17
+ ## Getting it running
18
+
19
+ 1. Clone the repo
20
+ 2. Install the deps:
21
+ ```bash
22
+ pip install -r requirements.txt
23
+ ```
24
+ 3. Toss your OpenAI key in `.env`:
25
+ ```
26
+ OPENAI_API_KEY=your_key_here
27
+ ```
28
+
29
+ ## Using it
30
+
31
+ 1. Run the Streamlit app:
32
+ ```bash
33
+ streamlit run app.py
34
+ ```
35
+ 2. Upload your docs in the sidebar
36
+ 3. Fire away with questions
37
+ 4. Check the metrics to see how well it's doing
38
+
39
+ ## Under the hood
40
+
41
+ - LangGraph handles the RAG pipeline (retrieve -> generate -> evaluate)
42
+ - ChromaDB stores the vectors with cosine similarity
43
+ - GPT-3.5-turbo generates responses
44
+ - RAGAS evaluates response quality
45
+ - Streamlit for the UI
46
+
47
+ ## Heads up
48
+
49
+ - Vectors get stored in `chroma_db`
50
+ - Using semantic chunking with sentence-level similarity and paragraph grouping
51
+ - Each response comes with its RAGAS metrics
52
+ - Minimum chunk size is a single sentence, max is 1000 chars
app.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from rag_graph import rag_graph, vectorstore
3
+ from langchain_core.messages import HumanMessage
4
+ import pandas as pd
5
+ import os
6
+ from dotenv import load_dotenv
7
+ from langchain_experimental.text_splitter import SemanticChunker
8
+ import chardet
9
+ from PyPDF2 import PdfReader
10
+ import io
11
+ import re
12
+ import logging
13
+ from langchain_openai import OpenAIEmbeddings
14
+
15
+ # Configure logging
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(__name__)
18
+
19
+ load_dotenv()
20
+
21
+ # Set page config
22
+ st.set_page_config(
23
+ page_title="RAG Application with RAGAS Metrics",
24
+ page_icon="🤖",
25
+ layout="wide"
26
+ )
27
+
28
+ # Initialize session state
29
+ if "messages" not in st.session_state:
30
+ st.session_state.messages = []
31
+
32
+ # Initialize embeddings
33
+ embeddings = OpenAIEmbeddings()
34
+
35
+ # Title
36
+ st.title("🤖 RAG Application with RAGAS Metrics")
37
+
38
+ def clean_text(text):
39
+ # Remove extra whitespace
40
+ text = re.sub(r'\s+', ' ', text)
41
+ # Remove non-printable characters
42
+ text = ''.join(char for char in text if char.isprintable())
43
+ return text.strip()
44
+
45
+ def split_into_sentences(text):
46
+ # Split text into sentences
47
+ sentences = re.split(r'(?<=[.!?])\s+', text)
48
+ return [s.strip() for s in sentences if s.strip()]
49
+
50
+ def process_text(text):
51
+ # Clean the text
52
+ text = clean_text(text)
53
+
54
+ # Split into sentences
55
+ sentences = split_into_sentences(text)
56
+
57
+ # Initialize text splitter with semantic chunking
58
+ text_splitter = SemanticChunker(
59
+ embeddings=embeddings,
60
+ breakpoint_threshold_type="percentile",
61
+ breakpoint_threshold_amount=25 # Lower threshold for more semantic grouping
62
+ )
63
+
64
+ # First chunk sentences semantically
65
+ sentence_chunks = text_splitter.split_text("\n".join(sentences))
66
+
67
+ # Then combine into paragraphs greedily
68
+ paragraphs = []
69
+ current_paragraph = []
70
+ current_size = 0
71
+ max_chunk_size = 1000 # Maximum characters per chunk
72
+
73
+ for chunk in sentence_chunks:
74
+ chunk_size = len(chunk)
75
+ if current_size + chunk_size <= max_chunk_size:
76
+ current_paragraph.append(chunk)
77
+ current_size += chunk_size
78
+ else:
79
+ if current_paragraph:
80
+ paragraphs.append("\n".join(current_paragraph))
81
+ current_paragraph = [chunk]
82
+ current_size = chunk_size
83
+
84
+ if current_paragraph:
85
+ paragraphs.append("\n".join(current_paragraph))
86
+
87
+ return paragraphs
88
+
89
+ def extract_text_from_pdf(pdf_file):
90
+ try:
91
+ pdf_reader = PdfReader(pdf_file)
92
+ text = ""
93
+ for page in pdf_reader.pages:
94
+ page_text = page.extract_text()
95
+ if page_text: # Only add if text was extracted
96
+ text += page_text + "\n"
97
+
98
+ if not text.strip():
99
+ raise ValueError("No text could be extracted from the PDF")
100
+
101
+ return clean_text(text)
102
+ except Exception as e:
103
+ raise ValueError(f"Error extracting text from PDF: {str(e)}")
104
+
105
+ # Sidebar for document upload
106
+ with st.sidebar:
107
+ st.header("Document Management")
108
+ uploaded_file = st.file_uploader("Upload a document", type=["txt", "pdf"])
109
+ if uploaded_file:
110
+ try:
111
+ logger.info(f"Processing uploaded file: {uploaded_file.name}")
112
+ # Process the document based on file type
113
+ if uploaded_file.type == "application/pdf":
114
+ text = extract_text_from_pdf(uploaded_file)
115
+ else:
116
+ # For text files, detect encoding
117
+ raw_data = uploaded_file.getvalue()
118
+ result = chardet.detect(raw_data)
119
+ encoding = result['encoding']
120
+ text = raw_data.decode(encoding)
121
+
122
+ if not text.strip():
123
+ raise ValueError("No text content found in the document")
124
+
125
+ # Process text into semantic chunks
126
+ chunks = process_text(text)
127
+
128
+ if not chunks:
129
+ raise ValueError("No valid text chunks could be created from the document")
130
+
131
+ # Add to vectorstore
132
+ logger.info(f"Adding {len(chunks)} chunks to vectorstore")
133
+ vectorstore.add_texts(chunks)
134
+
135
+ st.success("Document processed and added to the knowledge base!")
136
+ st.info(f"Processed {len(chunks)} chunks of text")
137
+ except Exception as e:
138
+ logger.error(f"Error processing document: {str(e)}")
139
+ st.error(f"Error processing document: {str(e)}")
140
+
141
+ # Main chat interface
142
+ st.header("Chat with the RAG System")
143
+
144
+ # Display chat messages
145
+ for message in st.session_state.messages:
146
+ with st.chat_message(message["role"]):
147
+ st.markdown(message["content"])
148
+
149
+ # Chat input
150
+ default_question = "What is quantum computing?"
151
+ question = st.text_input("Ask a question", value=default_question)
152
+ if st.button("Submit") or question != default_question:
153
+ # Add user message to chat history
154
+ st.session_state.messages.append({"role": "user", "content": question})
155
+
156
+ # Display user message
157
+ with st.chat_message("user"):
158
+ st.markdown(question)
159
+
160
+ # Prepare the state for the RAG graph
161
+ state = {
162
+ "messages": [HumanMessage(content=question)],
163
+ "context": "", # Initialize empty context
164
+ "response": "", # Initialize empty response
165
+ "next": "retrieve" # Start with retrieval
166
+ }
167
+
168
+ # Run the RAG graph
169
+ with st.chat_message("assistant"):
170
+ with st.spinner("Thinking..."):
171
+ try:
172
+ logger.info("Starting RAG process")
173
+ result = rag_graph.invoke(state)
174
+ logger.info("RAG process completed")
175
+
176
+ # Display the response and metrics
177
+ st.markdown(result["response"])
178
+ st.write("Full Response Data:")
179
+ st.json(result)
180
+
181
+ # Add assistant response to chat history
182
+ st.session_state.messages.append({
183
+ "role": "assistant",
184
+ "content": result["response"]
185
+ })
186
+ except Exception as e:
187
+ logger.error(f"Error in RAG process: {str(e)}")
188
+ st.error(f"Error generating response: {str(e)}")
quantum_computing.txt ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Introduction to Quantum Computing
2
+
3
+ Quantum Computing is a revolutionary field of computer science that leverages the principles of quantum mechanics to perform computations. Unlike classical computers that use bits (0s and 1s), quantum computers use quantum bits or qubits, which can exist in multiple states simultaneously. This fundamental difference allows quantum computers to solve certain problems exponentially faster than classical computers.
4
+
5
+ The field emerged in the 1980s when physicist Richard Feynman proposed that quantum systems could be simulated more efficiently using quantum computers. Since then, significant progress has been made in both theoretical understanding and practical implementation of quantum computing systems.
6
+
7
+ ## Key Concepts in Quantum Computing
8
+
9
+ ### Qubits
10
+ - Qubits are the fundamental units of quantum information
11
+ - They can exist in a superposition of states (both 0 and 1 simultaneously)
12
+ - This property enables quantum computers to process multiple possibilities at once
13
+ - Qubits can be implemented using various physical systems:
14
+ - Electron spin in quantum dots
15
+ - Nuclear spin in atoms
16
+ - Photon polarization
17
+ - Superconducting circuits
18
+ - The state of a qubit is described by a complex probability amplitude
19
+ - Measurement of a qubit collapses its state to either 0 or 1
20
+
21
+ ### Quantum Gates
22
+ - Quantum gates are the basic building blocks of quantum circuits
23
+ - Common quantum gates include:
24
+ - Hadamard gate (creates superposition)
25
+ - CNOT gate (entangles qubits)
26
+ - Pauli gates (X, Y, Z for basic operations)
27
+ - Phase shift gates (R, S, T gates)
28
+ - Toffoli gate (quantum AND operation)
29
+ - Quantum gates must be reversible, unlike classical gates
30
+ - Gates can be combined to create quantum circuits
31
+ - Error rates in quantum gates are a major challenge
32
+
33
+ ### Quantum Algorithms
34
+ Several important quantum algorithms have been developed:
35
+ 1. Shor's Algorithm: Efficiently factors large numbers
36
+ - Can break current RSA encryption
37
+ - Runs exponentially faster than classical algorithms
38
+ 2. Grover's Algorithm: Speeds up unstructured search problems
39
+ - Provides quadratic speedup
40
+ - Useful for database searching
41
+ 3. Quantum Fourier Transform: Basis for many quantum algorithms
42
+ - Enables phase estimation
43
+ - Used in Shor's algorithm
44
+ 4. Quantum Machine Learning Algorithms:
45
+ - Quantum support vector machines
46
+ - Quantum neural networks
47
+ - Quantum principal component analysis
48
+
49
+ ## Current State of Quantum Computing
50
+
51
+ ### Hardware Implementations
52
+ Major approaches to building quantum computers include:
53
+ - Superconducting qubits (used by IBM and Google)
54
+ - Operate at extremely low temperatures
55
+ - Use Josephson junctions
56
+ - Currently leading in qubit count
57
+ - Trapped ions (used by IonQ)
58
+ - High coherence times
59
+ - Excellent gate fidelities
60
+ - More complex to scale
61
+ - Topological qubits (pursued by Microsoft)
62
+ - More resistant to errors
63
+ - Still in research phase
64
+ - Based on anyons
65
+ - Photonic quantum computing (used by Xanadu)
66
+ - Operate at room temperature
67
+ - Good for quantum communication
68
+ - Challenging for computation
69
+
70
+ ### Challenges
71
+ Key challenges in quantum computing development:
72
+ 1. Quantum Decoherence: Maintaining qubit states
73
+ - Caused by environmental interactions
74
+ - Limits computation time
75
+ - Requires error correction
76
+ 2. Error Correction: Dealing with quantum errors
77
+ - Surface codes most promising
78
+ - Requires many physical qubits
79
+ - Complex to implement
80
+ 3. Scalability: Building larger quantum systems
81
+ - Current systems have 50-100 qubits
82
+ - Need thousands for useful applications
83
+ - Engineering challenges
84
+ 4. Temperature Control: Operating at near absolute zero
85
+ - Expensive cooling systems
86
+ - Power consumption issues
87
+ - Maintenance challenges
88
+
89
+ ## Applications
90
+
91
+ Quantum computing has potential applications in:
92
+ - Cryptography and cybersecurity
93
+ - Breaking current encryption
94
+ - Quantum key distribution
95
+ - Post-quantum cryptography
96
+ - Drug discovery and material science
97
+ - Molecular simulation
98
+ - Protein folding
99
+ - Material properties prediction
100
+ - Financial modeling and optimization
101
+ - Portfolio optimization
102
+ - Risk analysis
103
+ - Option pricing
104
+ - Artificial intelligence and machine learning
105
+ - Quantum neural networks
106
+ - Pattern recognition
107
+ - Optimization problems
108
+ - Climate modeling and weather prediction
109
+ - Complex system simulation
110
+ - Climate pattern analysis
111
+ - Weather forecasting
112
+
113
+ ## Future Outlook
114
+
115
+ The field of quantum computing is rapidly evolving:
116
+ - Current quantum computers have 50-100 qubits
117
+ - Goal is to build fault-tolerant quantum computers
118
+ - Hybrid quantum-classical systems are being developed
119
+ - Quantum cloud computing services are becoming available
120
+ - Major milestones expected in next decade:
121
+ - 1000+ qubit systems
122
+ - Error-corrected qubits
123
+ - Practical quantum algorithms
124
+ - Commercial applications
125
+ - Research areas:
126
+ - New qubit technologies
127
+ - Better error correction
128
+ - Quantum software development
129
+ - Quantum networking
rag_graph.py ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, TypedDict, Annotated
2
+ from langchain_core.messages import HumanMessage, AIMessage
3
+ from langchain_core.prompts import ChatPromptTemplate
4
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
5
+ from langchain_chroma import Chroma
6
+ from langchain_core.output_parsers import StrOutputParser
7
+ from langgraph.graph import Graph, StateGraph, END
8
+ from ragas.metrics import faithfulness, answer_relevancy, context_precision, context_recall, answer_correctness
9
+ from ragas import evaluate
10
+ from datasets import Dataset
11
+ import os
12
+ from dotenv import load_dotenv
13
+ import numpy as np
14
+ import logging
15
+
16
+ # Configure logging
17
+ logging.basicConfig(level=logging.INFO)
18
+ logger = logging.getLogger(__name__)
19
+
20
+ load_dotenv()
21
+
22
+ # Define the state
23
+ class AgentState(TypedDict):
24
+ messages: Annotated[List[HumanMessage | AIMessage], "The messages in the conversation"]
25
+ context: Annotated[str, "The retrieved context"]
26
+ response: Annotated[str, "The generated response"]
27
+ next: str
28
+
29
+ # Initialize components
30
+ llm = ChatOpenAI(model="gpt-3.5-turbo")
31
+ embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
32
+
33
+ # Initialize Chroma with minimal configuration
34
+ vectorstore = Chroma(
35
+ persist_directory="./chroma_db",
36
+ embedding_function=embeddings,
37
+ collection_name="rag_collection",
38
+ collection_metadata={"hnsw:space": "cosine"}
39
+ )
40
+
41
+ # Define the retrieval function
42
+ def retrieve(state: AgentState) -> Dict:
43
+ try:
44
+ messages = state["messages"]
45
+ last_message = messages[-1]
46
+ logger.info(f"Retrieving context for query: {last_message.content}")
47
+
48
+ # Get relevant documents
49
+ docs = vectorstore.similarity_search_with_score(
50
+ last_message.content,
51
+ k=10 # Increased number of documents
52
+ )
53
+ if not docs:
54
+ logger.warning("No relevant documents found in the knowledge base")
55
+ raise ValueError("No relevant documents found in the knowledge base")
56
+
57
+ # Filter and combine documents - using a lower threshold
58
+ filtered_docs = []
59
+ for doc, score in docs:
60
+ if score > 0.2: # Lower threshold for more context
61
+ filtered_docs.append(doc)
62
+
63
+ if not filtered_docs:
64
+ logger.warning("No documents met the similarity threshold, using all retrieved documents")
65
+ filtered_docs = [doc for doc, _ in docs] # Use all documents if none meet threshold
66
+
67
+ # Sort documents by relevance (using the original scores)
68
+ sorted_docs = sorted(zip(filtered_docs, [score for _, score in docs if score > 0.2]),
69
+ key=lambda x: x[1], reverse=True)
70
+ context = "\n\n".join([doc.page_content for doc, _ in sorted_docs])
71
+ logger.info(f"Using {len(filtered_docs)} documents for context")
72
+
73
+ # Validate context
74
+ if not context.strip():
75
+ logger.warning("No valid context could be retrieved")
76
+ raise ValueError("No valid context could be retrieved")
77
+
78
+ logger.info(f"Retrieved context length: {len(context)} characters")
79
+ return {"context": context, "next": "generate"}
80
+ except Exception as e:
81
+ logger.error(f"Error in retrieval: {str(e)}")
82
+ return {"context": "", "next": "generate"}
83
+
84
+ # Define the generation function
85
+ def generate(state: AgentState) -> Dict:
86
+ try:
87
+ messages = state["messages"]
88
+ context = state["context"]
89
+
90
+ if not context.strip():
91
+ logger.warning("Empty context in generation step")
92
+ return {
93
+ "response": "I apologize, but I couldn't find any relevant information in the knowledge base to answer your question. Please try rephrasing your question or upload more relevant documents.",
94
+ "metrics": {}, # Add empty metrics
95
+ "next": "evaluate"
96
+ }
97
+
98
+ logger.info("Generating response with context")
99
+ # Create prompt with context
100
+ prompt = ChatPromptTemplate.from_messages([
101
+ ("system", """You are a helpful assistant specialized in quantum computing.
102
+ Use the following context to answer the question.
103
+ Guidelines:
104
+ 1. Base your answer strictly on the provided context
105
+ 2. If the context doesn't contain relevant information, say so clearly
106
+ 3. Be specific and technical in your explanations
107
+ 4. Use bullet points or numbered lists when appropriate
108
+ 5. Include relevant examples from the context
109
+ 6. If discussing technical concepts, explain them clearly
110
+
111
+ Context:
112
+ {context}"""),
113
+ ("human", "{question}")
114
+ ])
115
+
116
+ chain = prompt | llm | StrOutputParser()
117
+ response = chain.invoke({
118
+ "context": context,
119
+ "question": messages[-1].content
120
+ })
121
+
122
+ logger.info(f"Generated response length: {len(response)} characters")
123
+
124
+ # Calculate metrics directly in generate
125
+ try:
126
+ dataset = Dataset.from_dict({
127
+ "question": [messages[-1].content],
128
+ "contexts": [[context]],
129
+ "answer": [response],
130
+ "ground_truth": [context]
131
+ })
132
+
133
+ metrics_dict = {}
134
+ result = evaluate(dataset, metrics=[faithfulness, answer_relevancy, context_precision, context_recall, answer_correctness])
135
+
136
+ metrics_dict["faithfulness"] = float(np.mean(result["faithfulness"]))
137
+ metrics_dict["answer_relevancy"] = float(np.mean(result["answer_relevancy"]))
138
+ metrics_dict["context_precision"] = float(np.mean(result["context_precision"]))
139
+ metrics_dict["context_recall"] = float(np.mean(result["context_recall"]))
140
+ metrics_dict["answer_correctness"] = float(np.mean(result["answer_correctness"]))
141
+
142
+ logger.info(f"Metrics calculated: {metrics_dict}")
143
+ except Exception as e:
144
+ logger.error(f"Error calculating metrics: {str(e)}")
145
+ metrics_dict = {}
146
+
147
+ return {
148
+ "response": response,
149
+ "metrics": metrics_dict, # Include metrics in the response
150
+ "next": "evaluate"
151
+ }
152
+ except Exception as e:
153
+ logger.error(f"Error in generation: {str(e)}")
154
+ return {
155
+ "response": "I apologize, but I encountered an error while generating a response. Please try again.",
156
+ "metrics": {}, # Add empty metrics
157
+ "next": "evaluate"
158
+ }
159
+
160
+ # Define the RAGAS evaluation function
161
+ def evaluate_rag(state: AgentState) -> Dict:
162
+ try:
163
+ messages = state["messages"]
164
+ context = state["context"]
165
+ response = state["response"]
166
+
167
+ # Detailed logging of input data
168
+ logger.info("=== RAGAS Evaluation Debug Info ===")
169
+ logger.info(f"Question: {messages[-1].content}")
170
+ logger.info(f"Context length: {len(context)}")
171
+ logger.info(f"Response length: {len(response)}")
172
+ logger.info(f"Context preview: {context[:200]}...")
173
+ logger.info(f"Response preview: {response[:200]}...")
174
+
175
+ # Validate inputs
176
+ if not context.strip():
177
+ logger.error("Empty context detected")
178
+ return {"context": context, "response": response, "metrics": {}, "next": END}
179
+
180
+ if not response.strip():
181
+ logger.error("Empty response detected")
182
+ return {"context": context, "response": response, "metrics": {}, "next": END}
183
+
184
+ # Check for minimum content requirements
185
+ if len(context) < 50:
186
+ logger.error(f"Context too short: {len(context)} characters")
187
+ return {"context": context, "response": response, "metrics": {}, "next": END}
188
+
189
+ if len(response) < 20:
190
+ logger.error(f"Response too short: {len(response)} characters")
191
+ return {"context": context, "response": response, "metrics": {}, "next": END}
192
+
193
+ logger.info("Creating evaluation dataset...")
194
+ try:
195
+ # Create dataset for evaluation
196
+ dataset = Dataset.from_dict({
197
+ "question": [messages[-1].content],
198
+ "contexts": [[context]],
199
+ "answer": [response],
200
+ "ground_truth": [context] # Use context as ground truth for better evaluation
201
+ })
202
+ logger.info("Dataset created successfully")
203
+
204
+ # Initialize metrics dictionary
205
+ metrics_dict = {}
206
+
207
+ # Evaluate each metric separately
208
+ try:
209
+ result = evaluate(dataset, metrics=[faithfulness])
210
+ metrics_dict["faithfulness"] = float(np.mean(result["faithfulness"]))
211
+ logger.info(f"Faithfulness calculated: {metrics_dict['faithfulness']}")
212
+ except Exception as e:
213
+ logger.error(f"Error calculating faithfulness: {str(e)}")
214
+ metrics_dict["faithfulness"] = 0.0
215
+
216
+ try:
217
+ result = evaluate(dataset, metrics=[answer_relevancy])
218
+ metrics_dict["answer_relevancy"] = float(np.mean(result["answer_relevancy"]))
219
+ logger.info(f"Answer relevancy calculated: {metrics_dict['answer_relevancy']}")
220
+ except Exception as e:
221
+ logger.error(f"Error calculating answer_relevancy: {str(e)}")
222
+ metrics_dict["answer_relevancy"] = 0.0
223
+
224
+ try:
225
+ result = evaluate(dataset, metrics=[context_precision])
226
+ metrics_dict["context_precision"] = float(np.mean(result["context_precision"]))
227
+ logger.info(f"Context precision calculated: {metrics_dict['context_precision']}")
228
+ except Exception as e:
229
+ logger.error(f"Error calculating context_precision: {str(e)}")
230
+ metrics_dict["context_precision"] = 0.0
231
+
232
+ try:
233
+ result = evaluate(dataset, metrics=[context_recall])
234
+ metrics_dict["context_recall"] = float(np.mean(result["context_recall"]))
235
+ logger.info(f"Context recall calculated: {metrics_dict['context_recall']}")
236
+ except Exception as e:
237
+ logger.error(f"Error calculating context_recall: {str(e)}")
238
+ metrics_dict["context_recall"] = 0.0
239
+
240
+ try:
241
+ result = evaluate(dataset, metrics=[answer_correctness])
242
+ metrics_dict["answer_correctness"] = float(np.mean(result["answer_correctness"]))
243
+ logger.info(f"Answer correctness calculated: {metrics_dict['answer_correctness']}")
244
+ except Exception as e:
245
+ logger.error(f"Error calculating answer_correctness: {str(e)}")
246
+ metrics_dict["answer_correctness"] = 0.0
247
+
248
+ logger.info(f"RAGAS metrics calculated: {metrics_dict}")
249
+ return {"context": context, "response": response, "metrics": metrics_dict, "next": END}
250
+
251
+ except Exception as eval_error:
252
+ logger.error(f"Error during RAGAS evaluation: {str(eval_error)}")
253
+ logger.error(f"Error type: {type(eval_error)}")
254
+ return {"context": context, "response": response, "metrics": {}, "next": END}
255
+
256
+ except Exception as e:
257
+ logger.error(f"Error in RAGAS evaluation: {str(e)}")
258
+ logger.error(f"Error type: {type(e)}")
259
+ return {"context": context, "response": response, "metrics": {}, "next": END}
260
+
261
+ # Create the workflow
262
+ def create_rag_graph():
263
+ workflow = StateGraph(AgentState)
264
+
265
+ # Add nodes
266
+ workflow.add_node("retrieve", retrieve)
267
+ workflow.add_node("generate", generate)
268
+ workflow.add_node("evaluate", evaluate_rag)
269
+
270
+ # Add edges
271
+ workflow.add_edge("retrieve", "generate")
272
+ workflow.add_edge("generate", "evaluate")
273
+ workflow.add_edge("evaluate", END)
274
+
275
+ # Set entry point
276
+ workflow.set_entry_point("retrieve")
277
+
278
+ # Compile
279
+ app = workflow.compile()
280
+ return app
281
+
282
+ # Create the graph
283
+ rag_graph = create_rag_graph()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ langchain>=0.1.0
2
+ langgraph>=0.0.15
3
+ ragas>=0.1.0
4
+ streamlit>=1.32.0
5
+ python-dotenv>=1.0.0
6
+ chromadb>=0.4.22
7
+ openai>=1.12.0
8
+ tiktoken>=0.6.0
test_document.txt ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ The Road Not Taken By Robert Frost
2
+
3
+ Two roads diverged in a yellow wood,
4
+ And sorry I could not travel both
5
+ And be one traveler, long I stood
6
+ And looked down one as far as I could
7
+ To where it bent in the undergrowth;
8
+
9
+ Then took the other, as just as fair,
10
+ And having perhaps the better claim,
11
+ Because it was grassy and wanted wear;
12
+ Though as for that the passing there
13
+ Had worn them really about the same,
14
+
15
+ And both that morning equally lay
16
+ In leaves no step had trodden black.
17
+ Oh, I kept the first for another day!
18
+ Yet knowing how way leads on to way,
19
+ I doubted if I should ever come back.
20
+
21
+ I shall be telling this with a sigh
22
+ Somewhere ages and ages hence:
23
+ Two roads diverged in a wood, and I—
24
+ I took the one less traveled by,
25
+ And that has made all the difference.
26
+