import os import pickle import gradio as gr from crawler import ContentCrawler from rag import RAGEngine # Define file paths for the pickled chunks and embeddings chunks = "./data/chunks.pkl" embeddings = "./data/embeddings.pkl" # Check if the chunks file exists; if not, crawl the website and save the results if os.path.exists(chunks): print("Loading chunks") with open(chunks, "rb") as f: results = pickle.load(f) else: print("Chunks file not found. Crawling the website...") # Define the base URL and initialize the content crawler base_url = "https://doc-publik.entrouvert.com/" crawler = ContentCrawler(base_url) results = crawler.crawl() # Save the crawled chunks to a pickle file with open(chunks, "wb") as f: pickle.dump(results, f) # Initialize the RAGEngine with the loaded chunks rag_engine = RAGEngine(results) # Check if the embeddings file exists; if not, create the embeddings and save them if os.path.exists(embeddings): print("Loading embeddings") with open(embeddings, "rb") as f: rag_engine.embeddings = pickle.load(f) else: print("Creating embeddings") rag_engine.index_documents() with open(embeddings, "wb") as f: pickle.dump(rag_engine.embeddings, f) # Define a function to answer questions using the RAG engine. # This function also retrieves the "urls" field and formats them as clickable Markdown links. def answer_question(question): # Affiche immédiatement un message de chargement yield "Chargement en cours..." try: result = rag_engine.rag(question, top_k=5) # Récupération de la réponse et des URLs associées prompt = result.get("prompt", "") response = result.get("response", "") urls = result.get("urls", []) # Formatage de la réponse avec les liens Markdown si des URLs sont présentes if urls: links_md = "\n".join([f"- [{url}]({url})" for url in urls]) markdown_output = f"{response}\n\n**Sources:**\n{links_md}" else: markdown_output = response # Envoi de la réponse finale yield markdown_output except Exception as e: # En cas d'erreur, affiche le message de l'exception yield f"Une erreur est survenue: {str(e)}" # Create a Gradio interface for the Q&A with Markdown formatted output and flagging disabled iface = gr.Interface( fn=answer_question, inputs=gr.Textbox(label="Votre question"), outputs=gr.Markdown(label="Réponse"), title="Publik Q&A", flagging_mode="never", description="Poser des questions sur Publik", ) # Launch the Gradio interface iface.launch()