Spaces:
Build error
Build error
# Paths to uploaded PDFs | |
pdf_paths = ["/content/sex_education/hhs_gov_teen_pregnancy.pdf", "/content/sex_education/use_of_contraceptives.pdf"] | |
# Load & split PDFs into pages | |
from langchain_community.document_loaders import PyPDFLoader | |
text = "" | |
for pdf_path in pdf_paths: | |
loader = PyPDFLoader(pdf_path) | |
pages = loader.load_and_split() | |
for page in pages: | |
text += page.page_content | |
from langchain_core.documents import Document | |
from langchain_text_splitters import RecursiveCharacterTextSplitter | |
# store text in a single document | |
document = [Document(page_content=text)] | |
# split document into smaller chunks | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0) | |
docs = text_splitter.split_documents(document) | |
# Creating Embeddings | |
from langchain_openai import OpenAIEmbeddings | |
from google.colab import userdata | |
import os | |
os.environ['OPENAI_API_KEY']=userdata.get('OPENAI_API_KEY') | |
# Openai embedding model | |
embeddings = OpenAIEmbeddings(model="text-embedding-3-small") | |
from langchain_community.vectorstores import FAISS | |
# ccreate vector store with similarity search functionality | |
vector_store = FAISS.from_documents(docs, embeddings) | |
# Connect to OpenAI models via API & define llm object | |
import os | |
from langchain_openai import ChatOpenAI | |
from google.colab import userdata | |
os.environ['OPENAI_API_KEY']=userdata.get('OPENAI_API_KEY') | |
# Define llm object | |
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0) | |
from langchain_community.tools import tool | |
from langchain.chat_models import ChatOpenAI | |
import os | |
import requests | |
import json | |
from typing import List, Dict, Any | |
from google.colab import userdata | |
def get_location(query: str) -> str: | |
""" | |
Find locations worldwide using Google Places API v1 and generate a human-friendly response. | |
Args: | |
query (str): Search query for any type of location or place | |
Returns: | |
str: A conversational, informative response about the locations | |
""" | |
# Initialize LLM for generating responses | |
llm = ChatOpenAI(temperature=0.2, model_name="gpt-4o-mini") | |
# Validate query | |
if not query or len(query.strip()) < 3: | |
return "I'm sorry, but the search query is too short. Could you provide more details about the location you're looking for?" | |
# Google Places API v1 endpoint | |
base_url = "https://places.googleapis.com/v1/places:searchText" | |
# API Key (You would replace this with your actual Google Maps API key) | |
api_key = userdata.get('GOOGLE_MAPS_API_KEY') | |
if not api_key: | |
return "I apologize, but there's a configuration issue with the location search. Our team has been notified." | |
# Headers for the API request | |
headers = { | |
"Content-Type": "application/json", | |
"X-Goog-Api-Key": api_key, | |
"X-Goog-FieldMask": "places.displayName,places.formattedAddress,places.rating,places.userRatingCount,places.types" | |
} | |
# Request payload | |
payload = { | |
"textQuery": query | |
} | |
try: | |
# Make the API request (POST request for the new API) | |
response = requests.post(base_url, headers=headers, data=json.dumps(payload)) | |
results = response.json() | |
# Process and return location results | |
if results.get('places'): | |
locations = [] | |
for place in results.get('places', [])[:5]: # Limit to 5 results | |
location_info = { | |
"name": place.get('displayName', {}).get('text', 'Unnamed Location'), | |
"address": place.get('formattedAddress', 'Address not available'), | |
"rating": place.get('rating', 'No rating'), | |
"total_ratings": place.get('userRatingCount', 'No ratings'), | |
"types": place.get('types', []), | |
} | |
locations.append(location_info) | |
# Prepare locations for LLM interpretation | |
locations_str = json.dumps(locations, indent=2) | |
# Generate a conversational response about the locations | |
response_prompt = f""" | |
Given the following location search results for the query "{query}": | |
{locations_str} | |
Please generate a friendly, informative response that: | |
1. Highlights the top locations | |
2. Provides context about the search results | |
3. Offers helpful insights or recommendations | |
4. Keep the tone conversational and helpful | |
5. If multiple locations are found, summarize key differences | |
If no locations are found, explain that politely. | |
""" | |
# Generate response using LLM | |
llm_response = llm.invoke(response_prompt).content | |
return llm_response | |
else: | |
# Handle API errors with a friendly message | |
error_message = results.get('error', {}).get('message', 'Unknown error') | |
return f"I'm sorry, but I couldn't find any locations matching '{query}'. Error: {error_message}. Could you try a different search?" | |
except Exception as e: | |
# Provide a user-friendly error message | |
error_prompt = f""" | |
An error occurred while searching for locations with the query "{query}". | |
Error details: {str(e)} | |
Please generate a friendly, apologetic message that: | |
1. Acknowledges the search didn't work | |
2. Suggests how the user might modify their search | |
3. Maintains a helpful tone | |
""" | |
error_response = llm.invoke(error_prompt).content | |
return error_response | |
# Tool metadata | |
get_location.description = ( | |
"Searches for locations worldwide using Google Places API v1 and LLM-powered insights. " | |
"Provides conversational, context-rich location information. " | |
"Requires a valid Google Maps API key." | |
) | |
from langchain.retrievers.multi_query import MultiQueryRetriever | |
from langchain_core.messages import SystemMessage, HumanMessage | |
def get_pdf(query: str) -> str: | |
""" | |
Search and retrieve information from PDF documents based on a query, | |
then present it in a conversational and meaningful way. | |
Args: | |
query: The search query or question to look up in the PDF documents. | |
Returns: | |
A conversational response based on the information found in the PDFs. | |
""" | |
# Use a smaller/faster model for query generation | |
query_llm = ChatOpenAI(model_name="gpt-4o-mini") | |
# Create MultiQueryRetriever with your existing vector store | |
retriever = MultiQueryRetriever.from_llm( | |
retriever=vector_store.as_retriever(search_kwargs={"k": 4}), | |
llm=query_llm | |
) | |
# Get relevant documents | |
docs = retriever.get_relevant_documents(query) | |
if not docs: | |
return "I couldn't find specific information about contraceptive methods for people with disabilities in the available documents. I'd recommend consulting with a healthcare provider who can give personalized advice based on your specific situation." | |
# Extract document content and metadata | |
content_with_metadata = [] | |
for doc in docs: | |
source = doc.metadata.get('source', 'Unknown source') | |
page = doc.metadata.get('page', 'Unknown page') | |
content_with_metadata.append({ | |
"content": doc.page_content, | |
"source": source, | |
"page": page | |
}) | |
# Combine all document content | |
all_content = "\n\n".join([f"Document: {i+1}\nSource: {doc['source']}, Page: {doc['page']}\n{doc['content']}" | |
for i, doc in enumerate(content_with_metadata)]) | |
# Format the query prompt for GPT-4o-mini | |
conversation_llm = ChatOpenAI(model_name="gpt-4o-mini") | |
system_prompt = """ | |
You are a helpful and compassionate assistant providing information about Approaches to preventing Teen Pregnancy, | |
Recommendations for Contraceptive Use, and Reproductive Health services worldwide. | |
Based on the information in the provided documents, craft a warm, informative response | |
to someone asking about query: {query}. | |
Ensure your response is: | |
1. Conversational and empathetic | |
2. Factually accurate based on the retrieved information | |
3. Acknowledging the diversity of disabilities and individual needs | |
4. Clear about the importance of consulting healthcare providers for personalized advice | |
5. Organized and easy to understand | |
If the documents don't provide sufficient information, acknowledge the limitations and | |
suggest seeking professional medical advice. | |
""" | |
messages = [ | |
SystemMessage(content=system_prompt), | |
HumanMessage(content=f"Original query: {query}\n\nRetrieved information:\n{all_content}") | |
] | |
# Generate conversational response | |
response = conversation_llm.invoke(messages) | |
return response.content | |
# Create the Agentic Application | |
from typing import Annotated | |
from langchain_openai import ChatOpenAI | |
from langchain_core.messages import BaseMessage | |
from typing_extensions import TypedDict | |
from langgraph.graph import StateGraph, START, END | |
from langgraph.graph.message import add_messages | |
from langgraph.prebuilt import ToolNode, tools_condition | |
class State(TypedDict): | |
messages: Annotated[list, add_messages] | |
# Create the graph | |
graph_builder = StateGraph(State) | |
# Define tools list | |
tools = [get_pdf, get_location] | |
# Create LLM and bind tools | |
llm = ChatOpenAI(model_name="gpt-4o-mini") | |
llm_with_tools = llm.bind_tools(tools) | |
def chatbot(state: State): | |
return {"messages": [llm_with_tools.invoke(state["messages"])]} | |
# Add nodes | |
graph_builder.add_node("chatbot", chatbot) | |
# Create ToolNode | |
tool_node = ToolNode(tools=tools) | |
graph_builder.add_node("tools", tool_node) | |
# Add edges | |
graph_builder.add_conditional_edges( | |
"chatbot", | |
tools_condition, | |
) | |
# Any time a tool is called, we return to the chatbot to decide the next step | |
graph_builder.add_edge("tools", "chatbot") | |
graph_builder.add_edge(START, "chatbot") | |
from langgraph.checkpoint.memory import MemorySaver | |
# Intatiate MemorySaver class | |
memory = MemorySaver() | |
# Add memory | |
graph = graph_builder.compile(checkpointer=memory) | |
# Import required libraries | |
import gradio as gr | |
from typing import Dict, List, Any | |
import uuid | |
from collections import deque | |
from langchain_core.messages import HumanMessage, AIMessage | |
# Import the graph we defined earlier | |
# Make sure the graph is already compiled and the get_location tool is defined | |
# Session tracking (global) | |
current_thread_id = "1" # Default thread ID | |
sessions = {} | |
def process_message(message, history): | |
"""Process a user message through the LangGraph agent with streaming.""" | |
global current_thread_id | |
try: | |
# Configure the thread_id | |
config = {"configurable": {"thread_id": current_thread_id}} | |
# Prepare the message in the format expected by your graph | |
input_message = {"role": "user", "content": message} | |
# Stream from the graph | |
ai_response = "" | |
events = graph.stream( | |
{"messages": [input_message]}, | |
config, | |
stream_mode="values", | |
) | |
# Process all events to find the AI response | |
for event in events: | |
if "messages" in event: | |
if isinstance(event["messages"], deque): | |
event["messages"] = list(event["messages"]) | |
# Look for AI messages | |
if event["messages"]: | |
last_message = event["messages"][-1] | |
if not isinstance(last_message, HumanMessage): | |
# This is an AI message | |
ai_response = last_message.content | |
# If we couldn't get a response, provide a fallback | |
if not ai_response: | |
ai_response = "I'm sorry, I couldn't process that request. Could you try again?" | |
# Return the final conversation state | |
return history + [[message, ai_response]] | |
except Exception as e: | |
error_message = f"An error occurred: {str(e)}" | |
print(f"Error details: {e}") # Print full error for debugging | |
return history + [[message, error_message]] | |
def reset_conversation(): | |
"""Reset the conversation.""" | |
global current_thread_id | |
# Generate a new thread ID | |
current_thread_id = str(uuid.uuid4()) | |
# Clear any session data | |
if current_thread_id in sessions: | |
del sessions[current_thread_id] | |
return [] | |
# Add a welcome message to appear when someone connects | |
def add_welcome_message(): | |
return [[None, "Hi! how may I assist you today?"]] | |
# Create the Gradio interface | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.HTML(""" | |
<div style="text-align: center; margin-bottom: 1rem"> | |
<h1>Sex Education AI Assistant</h1> | |
<p>Ask me anything about Approaches to preventing Teen Pregnancy, Recommendations for Contraceptive Use, or Reproductive Health services worldwide!</p> | |
<p>Try asking:</p> | |
<ul style="list-style-type: none; padding: 0;"> | |
<li>"Find Family Planning services in Kampala"</li> | |
<li>"I'm 13 years old and want to avoid pregnancy as a teenager. What are the best ways to protect myself and stay safe?"</li> | |
<li>"I have a disability and am considering using contraceptives. Are they safe for me, and are there any specific factors I should be aware of?"</li> | |
</ul> | |
</div> | |
""") | |
chatbot = gr.Chatbot(height=500, label="Conversation") | |
with gr.Row(): | |
msg = gr.Textbox( | |
placeholder="Ask about any reproductive health services(e.g., 'Find family planning services in New York')", | |
label="Your search", | |
container=True | |
) | |
with gr.Row(): | |
submit_btn = gr.Button("Search") | |
clear_btn = gr.Button("New Search") | |
# Set up event handlers | |
submit_btn.click( | |
fn=process_message, | |
inputs=[msg, chatbot], | |
outputs=[chatbot] | |
).then( | |
lambda: "", | |
None, | |
[msg] # Clear the textbox after sending | |
) | |
msg.submit( | |
fn=process_message, | |
inputs=[msg, chatbot], | |
outputs=[chatbot] | |
).then( | |
lambda: "", | |
None, | |
[msg] # Clear the textbox after sending | |
) | |
clear_btn.click( | |
fn=reset_conversation, | |
inputs=None, | |
outputs=[chatbot] | |
) | |
# Set initial message | |
demo.load(fn=add_welcome_message, inputs=None, outputs=[chatbot]) | |
# Launch the app with share=True to get a public URL | |
demo.launch(debug=True, share=True, inline=False) |