|
""" |
|
Gradio app for Norwegian RAG chatbot. |
|
Provides a web interface for interacting with the chatbot. |
|
""" |
|
|
|
import os |
|
import gradio as gr |
|
import tempfile |
|
from typing import List, Dict, Any, Tuple, Optional |
|
|
|
from ..api.huggingface_api import HuggingFaceAPI |
|
from ..document_processing.processor import DocumentProcessor |
|
from ..rag.retriever import Retriever |
|
from ..rag.generator import Generator |
|
|
|
class ChatbotApp: |
|
""" |
|
Gradio app for Norwegian RAG chatbot. |
|
""" |
|
|
|
def __init__( |
|
self, |
|
api_client: Optional[HuggingFaceAPI] = None, |
|
document_processor: Optional[DocumentProcessor] = None, |
|
retriever: Optional[Retriever] = None, |
|
generator: Optional[Generator] = None, |
|
title: str = "Norwegian RAG Chatbot", |
|
description: str = "En chatbot basert på Retrieval-Augmented Generation (RAG) for norsk språk." |
|
): |
|
""" |
|
Initialize the chatbot app. |
|
|
|
Args: |
|
api_client: HuggingFaceAPI client |
|
document_processor: Document processor |
|
retriever: Retriever for finding relevant chunks |
|
generator: Generator for creating responses |
|
title: App title |
|
description: App description |
|
""" |
|
|
|
self.api_client = api_client or HuggingFaceAPI() |
|
self.document_processor = document_processor or DocumentProcessor(api_client=self.api_client) |
|
self.retriever = retriever or Retriever(api_client=self.api_client) |
|
self.generator = generator or Generator(api_client=self.api_client) |
|
|
|
|
|
self.title = title |
|
self.description = description |
|
|
|
|
|
self.app = self._build_interface() |
|
|
|
def _build_interface(self) -> gr.Blocks: |
|
""" |
|
Build the Gradio interface. |
|
|
|
Returns: |
|
Gradio Blocks interface |
|
""" |
|
with gr.Blocks(title=self.title) as app: |
|
gr.Markdown(f"# {self.title}") |
|
gr.Markdown(self.description) |
|
|
|
with gr.Tabs(): |
|
|
|
with gr.Tab("Chat"): |
|
chatbot = gr.Chatbot(height=500) |
|
|
|
with gr.Row(): |
|
msg = gr.Textbox( |
|
placeholder="Skriv din melding her...", |
|
show_label=False, |
|
scale=9 |
|
) |
|
submit_btn = gr.Button("Send", scale=1) |
|
|
|
with gr.Accordion("Avanserte innstillinger", open=False): |
|
temperature = gr.Slider( |
|
minimum=0.1, |
|
maximum=1.0, |
|
value=0.7, |
|
step=0.1, |
|
label="Temperatur" |
|
) |
|
|
|
clear_btn = gr.Button("Tøm chat") |
|
|
|
|
|
submit_btn.click( |
|
fn=self._respond, |
|
inputs=[msg, chatbot, temperature], |
|
outputs=[msg, chatbot] |
|
) |
|
|
|
msg.submit( |
|
fn=self._respond, |
|
inputs=[msg, chatbot, temperature], |
|
outputs=[msg, chatbot] |
|
) |
|
|
|
clear_btn.click( |
|
fn=lambda: None, |
|
inputs=None, |
|
outputs=chatbot, |
|
queue=False |
|
) |
|
|
|
|
|
with gr.Tab("Last opp dokumenter"): |
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
file_output = gr.File(label="Opplastede dokumenter") |
|
upload_button = gr.UploadButton( |
|
"Klikk for å laste opp dokument", |
|
file_types=["pdf", "txt", "html"], |
|
file_count="multiple" |
|
) |
|
|
|
with gr.Column(scale=3): |
|
documents_list = gr.Dataframe( |
|
headers=["Dokument ID", "Filnavn", "Dato", "Chunks"], |
|
label="Dokumentliste", |
|
interactive=False |
|
) |
|
|
|
process_status = gr.Textbox(label="Status", interactive=False) |
|
refresh_btn = gr.Button("Oppdater dokumentliste") |
|
|
|
|
|
upload_button.upload( |
|
fn=self._process_uploaded_files, |
|
inputs=[upload_button], |
|
outputs=[process_status, documents_list] |
|
) |
|
|
|
refresh_btn.click( |
|
fn=self._get_documents_list, |
|
inputs=None, |
|
outputs=[documents_list] |
|
) |
|
|
|
|
|
with gr.Tab("Integrer"): |
|
gr.Markdown("## Integrer chatboten på din nettside") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
gr.Markdown("### iFrame-kode") |
|
iframe_code = gr.Code( |
|
label="iFrame", |
|
language="html", |
|
value='<iframe src="https://huggingface.co/spaces/username/norwegian-rag-chatbot" width="100%" height="500px"></iframe>' |
|
) |
|
|
|
with gr.Column(): |
|
gr.Markdown("### JavaScript Widget") |
|
js_code = gr.Code( |
|
label="JavaScript", |
|
language="html", |
|
value='<script src="https://huggingface.co/spaces/username/norwegian-rag-chatbot/widget.js"></script>' |
|
) |
|
|
|
gr.Markdown("### Forhåndsvisning") |
|
gr.Markdown("*Forhåndsvisning vil være tilgjengelig etter at chatboten er distribuert til Hugging Face Spaces.*") |
|
|
|
gr.Markdown("---") |
|
gr.Markdown("Bygget med [Hugging Face](https://huggingface.co/) og [Gradio](https://gradio.app/)") |
|
|
|
return app |
|
|
|
def _respond( |
|
self, |
|
message: str, |
|
chat_history: List[Tuple[str, str]], |
|
temperature: float |
|
) -> Tuple[str, List[Tuple[str, str]]]: |
|
""" |
|
Generate a response to the user message. |
|
|
|
Args: |
|
message: User message |
|
chat_history: Chat history |
|
temperature: Temperature for text generation |
|
|
|
Returns: |
|
Empty message and updated chat history |
|
""" |
|
if not message: |
|
return "", chat_history |
|
|
|
|
|
chat_history.append((message, None)) |
|
|
|
try: |
|
|
|
retrieved_chunks = self.retriever.retrieve(message) |
|
|
|
|
|
response = self.generator.generate( |
|
query=message, |
|
retrieved_chunks=retrieved_chunks, |
|
temperature=temperature |
|
) |
|
|
|
|
|
chat_history[-1] = (message, response) |
|
except Exception as e: |
|
|
|
error_message = f"Beklager, det oppstod en feil: {str(e)}" |
|
chat_history[-1] = (message, error_message) |
|
|
|
return "", chat_history |
|
|
|
def _process_uploaded_files( |
|
self, |
|
files: List[tempfile._TemporaryFileWrapper] |
|
) -> Tuple[str, List[List[str]]]: |
|
""" |
|
Process uploaded files. |
|
|
|
Args: |
|
files: List of uploaded files |
|
|
|
Returns: |
|
Status message and updated documents list |
|
""" |
|
if not files: |
|
return "Ingen filer lastet opp.", self._get_documents_list() |
|
|
|
processed_files = [] |
|
|
|
for file in files: |
|
try: |
|
|
|
document_id = self.document_processor.process_document(file.name) |
|
processed_files.append(os.path.basename(file.name)) |
|
except Exception as e: |
|
return f"Feil ved behandling av {os.path.basename(file.name)}: {str(e)}", self._get_documents_list() |
|
|
|
if len(processed_files) == 1: |
|
status = f"Fil behandlet: {processed_files[0]}" |
|
else: |
|
status = f"{len(processed_files)} filer behandlet: {', '.join(processed_files)}" |
|
|
|
return status, self._get_documents_list() |
|
|
|
def _get_documents_list(self) -> List[List[str]]: |
|
""" |
|
Get list of processed documents. |
|
|
|
Returns: |
|
List of document information |
|
""" |
|
documents = self.document_processor.get_all_documents() |
|
|
|
|
|
documents_list = [] |
|
for doc_id, metadata in documents.items(): |
|
filename = metadata.get("filename", "N/A") |
|
processed_date = metadata.get("processed_date", "N/A") |
|
chunk_count = metadata.get("chunk_count", 0) |
|
|
|
documents_list.append([doc_id, filename, processed_date, chunk_count]) |
|
|
|
return documents_list |
|
|
|
def launch(self, **kwargs): |
|
""" |
|
Launch the Gradio app. |
|
|
|
Args: |
|
**kwargs: Additional arguments for gr.launch() |
|
""" |
|
self.app.launch(**kwargs) |
|
|
|
|
|
def create_app(): |
|
""" |
|
Create and configure the chatbot app. |
|
|
|
Returns: |
|
Configured ChatbotApp instance |
|
""" |
|
|
|
api_client = HuggingFaceAPI() |
|
|
|
|
|
document_processor = DocumentProcessor(api_client=api_client) |
|
retriever = Retriever(api_client=api_client) |
|
generator = Generator(api_client=api_client) |
|
|
|
|
|
app = ChatbotApp( |
|
api_client=api_client, |
|
document_processor=document_processor, |
|
retriever=retriever, |
|
generator=generator |
|
) |
|
|
|
return app |
|
|