import gradio as gr from openai import OpenAI import os import numpy as np from src.document_processing.processor import DocumentProcessor from src.rag.retriever import Retriever from src.rag.generator import Generator from src.api.openai_api import OpenAIAPI # Initialize OpenAI client api_key = os.environ.get("OPENAI_API_KEY", "") openai_api = OpenAIAPI(api_key=api_key) # Initialize RAG components with OpenAI API document_processor = DocumentProcessor(api_client=openai_api) retriever = Retriever(api_client=openai_api) generator = Generator(api_client=openai_api) def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, ): # Check if we should use RAG use_rag = "bruk dokumenter" in message.lower() or "bruk rag" in message.lower() if use_rag: # Use our RAG implementation with GPT-4o try: # Retrieve relevant chunks retrieved_chunks = retriever.retrieve(message) # Generate response using RAG response = generator.generate( query=message, retrieved_chunks=retrieved_chunks, temperature=temperature ) yield response return except Exception as e: # If RAG fails, fall back to standard GPT-4o print(f"RAG failed: {str(e)}, falling back to standard GPT-4o") # Standard GPT-4o approach client = OpenAI(api_key=api_key) messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) response = "" for chunk in client.chat.completions.create( model="gpt-4o", messages=messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): content = chunk.choices[0].delta.content if content: response += content yield response """ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface """ demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox( value="Du er en hjelpsom assistent som svarer på norsk. Bruk kunnskapen din til å svare på spørsmål. Hvis brukeren skriver 'bruk dokumenter' eller 'bruk RAG', vil du bruke Retrieval-Augmented Generation for å svare basert på opplastede dokumenter.", label="System message" ), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)", ), ], title="Norwegian RAG Chatbot with GPT-4o", description="En chatbot basert på Retrieval-Augmented Generation (RAG) for norsk språk med GPT-4o. Skriv 'bruk dokumenter' eller 'bruk RAG' i meldingen din for å aktivere RAG-funksjonalitet.", ) # Create the document upload interface with gr.Blocks() as document_upload: with gr.Tab("Last opp dokumenter"): with gr.Row(): with gr.Column(scale=2): file_output = gr.File(label="Opplastede dokumenter") upload_button = gr.UploadButton( "Klikk for å laste opp dokument", file_types=["pdf", "txt", "html"], file_count="multiple" ) with gr.Column(scale=3): documents_list = gr.Dataframe( headers=["Dokument ID", "Filnavn", "Dato", "Chunks"], label="Dokumentliste", interactive=False ) process_status = gr.Textbox(label="Status", interactive=False) refresh_btn = gr.Button("Oppdater dokumentliste") # Set up event handlers upload_button.upload( fn=document_processor.process_document, inputs=[upload_button], outputs=[process_status, documents_list] ) refresh_btn.click( fn=lambda: [[doc_id, meta.get("filename", "N/A"), meta.get("processed_date", "N/A"), meta.get("chunk_count", 0)] for doc_id, meta in document_processor.get_all_documents().items()], inputs=None, outputs=[documents_list] ) # Combine the interfaces app = gr.TabbedInterface([demo, document_upload], ["Chat", "Dokumenter"]) if __name__ == "__main__": app.launch()