iver / src /web /app.py
hevold's picture
Upload 29 files
b34efa5 verified
"""
Gradio app for Norwegian RAG chatbot.
Provides a web interface for interacting with the chatbot.
"""
import os
import gradio as gr
import tempfile
from typing import List, Dict, Any, Tuple, Optional
from ..api.huggingface_api import HuggingFaceAPI
from ..document_processing.processor import DocumentProcessor
from ..rag.retriever import Retriever
from ..rag.generator import Generator
class ChatbotApp:
"""
Gradio app for Norwegian RAG chatbot.
"""
def __init__(
self,
api_client: Optional[HuggingFaceAPI] = None,
document_processor: Optional[DocumentProcessor] = None,
retriever: Optional[Retriever] = None,
generator: Optional[Generator] = None,
title: str = "Norwegian RAG Chatbot",
description: str = "En chatbot basert på Retrieval-Augmented Generation (RAG) for norsk språk."
):
"""
Initialize the chatbot app.
Args:
api_client: HuggingFaceAPI client
document_processor: Document processor
retriever: Retriever for finding relevant chunks
generator: Generator for creating responses
title: App title
description: App description
"""
# Initialize components
self.api_client = api_client or HuggingFaceAPI()
self.document_processor = document_processor or DocumentProcessor(api_client=self.api_client)
self.retriever = retriever or Retriever(api_client=self.api_client)
self.generator = generator or Generator(api_client=self.api_client)
# App settings
self.title = title
self.description = description
# Initialize Gradio app
self.app = self._build_interface()
def _build_interface(self) -> gr.Blocks:
"""
Build the Gradio interface.
Returns:
Gradio Blocks interface
"""
with gr.Blocks(title=self.title) as app:
gr.Markdown(f"# {self.title}")
gr.Markdown(self.description)
with gr.Tabs():
# Chat tab
with gr.Tab("Chat"):
chatbot = gr.Chatbot(height=500)
with gr.Row():
msg = gr.Textbox(
placeholder="Skriv din melding her...",
show_label=False,
scale=9
)
submit_btn = gr.Button("Send", scale=1)
with gr.Accordion("Avanserte innstillinger", open=False):
temperature = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.7,
step=0.1,
label="Temperatur"
)
clear_btn = gr.Button("Tøm chat")
# Set up event handlers
submit_btn.click(
fn=self._respond,
inputs=[msg, chatbot, temperature],
outputs=[msg, chatbot]
)
msg.submit(
fn=self._respond,
inputs=[msg, chatbot, temperature],
outputs=[msg, chatbot]
)
clear_btn.click(
fn=lambda: None,
inputs=None,
outputs=chatbot,
queue=False
)
# Document upload tab
with gr.Tab("Last opp dokumenter"):
with gr.Row():
with gr.Column(scale=2):
file_output = gr.File(label="Opplastede dokumenter")
upload_button = gr.UploadButton(
"Klikk for å laste opp dokument",
file_types=["pdf", "txt", "html"],
file_count="multiple"
)
with gr.Column(scale=3):
documents_list = gr.Dataframe(
headers=["Dokument ID", "Filnavn", "Dato", "Chunks"],
label="Dokumentliste",
interactive=False
)
process_status = gr.Textbox(label="Status", interactive=False)
refresh_btn = gr.Button("Oppdater dokumentliste")
# Set up event handlers
upload_button.upload(
fn=self._process_uploaded_files,
inputs=[upload_button],
outputs=[process_status, documents_list]
)
refresh_btn.click(
fn=self._get_documents_list,
inputs=None,
outputs=[documents_list]
)
# Embed tab
with gr.Tab("Integrer"):
gr.Markdown("## Integrer chatboten på din nettside")
with gr.Row():
with gr.Column():
gr.Markdown("### iFrame-kode")
iframe_code = gr.Code(
label="iFrame",
language="html",
value='<iframe src="https://huggingface.co/spaces/username/norwegian-rag-chatbot" width="100%" height="500px"></iframe>'
)
with gr.Column():
gr.Markdown("### JavaScript Widget")
js_code = gr.Code(
label="JavaScript",
language="html",
value='<script src="https://huggingface.co/spaces/username/norwegian-rag-chatbot/widget.js"></script>'
)
gr.Markdown("### Forhåndsvisning")
gr.Markdown("*Forhåndsvisning vil være tilgjengelig etter at chatboten er distribuert til Hugging Face Spaces.*")
gr.Markdown("---")
gr.Markdown("Bygget med [Hugging Face](https://huggingface.co/) og [Gradio](https://gradio.app/)")
return app
def _respond(
self,
message: str,
chat_history: List[Tuple[str, str]],
temperature: float
) -> Tuple[str, List[Tuple[str, str]]]:
"""
Generate a response to the user message.
Args:
message: User message
chat_history: Chat history
temperature: Temperature for text generation
Returns:
Empty message and updated chat history
"""
if not message:
return "", chat_history
# Add user message to chat history
chat_history.append((message, None))
try:
# Retrieve relevant chunks
retrieved_chunks = self.retriever.retrieve(message)
# Generate response
response = self.generator.generate(
query=message,
retrieved_chunks=retrieved_chunks,
temperature=temperature
)
# Update chat history with response
chat_history[-1] = (message, response)
except Exception as e:
# Handle errors
error_message = f"Beklager, det oppstod en feil: {str(e)}"
chat_history[-1] = (message, error_message)
return "", chat_history
def _process_uploaded_files(
self,
files: List[tempfile._TemporaryFileWrapper]
) -> Tuple[str, List[List[str]]]:
"""
Process uploaded files.
Args:
files: List of uploaded files
Returns:
Status message and updated documents list
"""
if not files:
return "Ingen filer lastet opp.", self._get_documents_list()
processed_files = []
for file in files:
try:
# Process the document
document_id = self.document_processor.process_document(file.name)
processed_files.append(os.path.basename(file.name))
except Exception as e:
return f"Feil ved behandling av {os.path.basename(file.name)}: {str(e)}", self._get_documents_list()
if len(processed_files) == 1:
status = f"Fil behandlet: {processed_files[0]}"
else:
status = f"{len(processed_files)} filer behandlet: {', '.join(processed_files)}"
return status, self._get_documents_list()
def _get_documents_list(self) -> List[List[str]]:
"""
Get list of processed documents.
Returns:
List of document information
"""
documents = self.document_processor.get_all_documents()
# Format for dataframe
documents_list = []
for doc_id, metadata in documents.items():
filename = metadata.get("filename", "N/A")
processed_date = metadata.get("processed_date", "N/A")
chunk_count = metadata.get("chunk_count", 0)
documents_list.append([doc_id, filename, processed_date, chunk_count])
return documents_list
def launch(self, **kwargs):
"""
Launch the Gradio app.
Args:
**kwargs: Additional arguments for gr.launch()
"""
self.app.launch(**kwargs)
def create_app():
"""
Create and configure the chatbot app.
Returns:
Configured ChatbotApp instance
"""
# Initialize API client
api_client = HuggingFaceAPI()
# Initialize components
document_processor = DocumentProcessor(api_client=api_client)
retriever = Retriever(api_client=api_client)
generator = Generator(api_client=api_client)
# Create app
app = ChatbotApp(
api_client=api_client,
document_processor=document_processor,
retriever=retriever,
generator=generator
)
return app