Spaces:
Build error
Build error
File size: 4,377 Bytes
b22038d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import streamlit as st
from rag_app import WebRAG
import time
import os
# Set page configuration
st.set_page_config(
page_title="Web RAG Assistant",
page_icon="π",
layout="wide"
)
# Custom CSS
st.markdown("""
<style>
.stApp {
max-width: 1200px;
margin: 0 auto;
}
.chat-container {
border-radius: 10px;
padding: 20px;
background-color: #f0f2f6;
margin: 10px 0;
}
.user-message {
background-color: #2e7bf3;
color: white;
padding: 15px;
border-radius: 15px;
margin: 5px 0;
}
.assistant-message {
background-color: #white;
padding: 15px;
border-radius: 15px;
margin: 5px 0;
border: 1px solid #e0e0e0;
}
</style>
""", unsafe_allow_html=True)
# Initialize session state
if 'rag' not in st.session_state:
st.session_state.rag = WebRAG()
if 'chat_history' not in st.session_state:
st.session_state.chat_history = []
if 'url_processed' not in st.session_state:
st.session_state.url_processed = False
if 'current_url' not in st.session_state:
st.session_state.current_url = ""
# Function to reset chat history
def reset_chat_history():
st.session_state.chat_history = []
st.session_state.current_url = ""
# Header
st.title("π Web RAG Assistant")
st.markdown("### Ask questions about any webpage")
# Sidebar
with st.sidebar:
st.header("Settings")
url = st.text_input("Enter webpage URL:")
# Add scraping method selection
scraping_method = st.selectbox(
"Select Scraping Method",
["beautifulsoup", "scrapegraph", "crawl4ai"],
help="""
BeautifulSoup: Basic HTML parsing, faster but less sophisticated
ScrapeGraph: AI-powered scraping, better at understanding content but slower
Crawl4ai: Advanced async crawler with good JavaScript support
"""
)
if st.button("Process URL", type="primary"):
if url:
# Check if URL has changed
if url != st.session_state.current_url:
reset_chat_history()
st.session_state.current_url = url
with st.spinner("Processing URL... This may take a moment."):
try:
st.session_state.rag.crawl_and_process(url, scraping_method)
st.session_state.url_processed = True
st.success("URL processed successfully!")
st.rerun() # Rerun the app to refresh the chat interface
except Exception as e:
st.error(f"Error processing URL: {str(e)}")
else:
st.warning("Please enter a URL")
st.divider()
st.markdown("### How to use")
st.markdown("""
1. Enter a webpage URL in the input field
2. Click 'Process URL' to analyze the content
3. Ask questions about the webpage content
4. Get AI-powered answers based on the content
""")
# Main chat interface
st.divider()
# Display chat messages
for message in st.session_state.chat_history:
if message["role"] == "user":
st.markdown(f"""
<div class="user-message">
{message["content"]}
</div>
""", unsafe_allow_html=True)
else:
st.markdown(f"""
<div class="assistant-message">
{message["content"]}
</div>
""", unsafe_allow_html=True)
# Chat input
if st.session_state.url_processed:
question = st.chat_input("Ask a question about the webpage...")
if question:
# Add user message to chat history
st.session_state.chat_history.append({"role": "user", "content": question})
# Get answer from RAG
with st.spinner("Thinking..."):
try:
answer = st.session_state.rag.ask_question(
question,
[(msg["content"], msg["content"]) for msg in st.session_state.chat_history if msg["role"] == "assistant"]
)
# Add assistant message to chat history
st.session_state.chat_history.append({"role": "assistant", "content": answer})
st.rerun()
except Exception as e:
st.error(f"Error: {str(e)}")
else:
st.info("π Please process a URL first using the sidebar")
|