"""app.py Smolagents agent given an SQL tool over a SQLite database built with data files from the Internation Consortium of Investigative Journalism (ICIJ.org). Agentic framework: - smolagents Database: - SQLite Generation: - Mistral :author: Didier Guillevic :date: 2025-01-12 """ import gradio as gr import icij_utils import smolagents import os # # Init a SQLite database with the data files from ICIJ.org # ICIJ_LEAKS_DB_NAME = 'icij_leaks.db' ICIJ_LEAKS_DATA_DIR = './icij_data' # Remove existing database (if present), since we will recreate it below. icij_db_path = pathlib.Path(ICIJ_LEAKS_DB_NAME) icij_db_path.unlink(missing_ok=True) # Load ICIJ data files into an SQLite database loader = icij_utils.ICIJDataLoader(ICIJ_LEAKS_DB_NAME) loader.load_all_files(ICIJ_LEAKS_DATA_DIR) # # Init an SQLAchemy instane (over the SQLite database) # db = icij_utils.ICIJDatabaseConnector(ICIJ_LEAKS_DB_NAME) schema = db.get_full_database_schema() # # Build an SQL tool # schema = db.get_full_database_schema() metadata = icij_utils.ICIJDatabaseMetadata() tool_description = ( "Tool for querying the ICIJ offshore database containing financial data leaks. " "This tool can execute SQL queries and return the results. " "Beware that this tool's output is a string representation of the execution output.\n" "It can use the following tables:" ) # Add table documentation for table, doc in metadata.TABLE_DOCS.items(): tool_description += f"\n\nTable: {table}\n" tool_description += f"Description: {doc.strip()}\n" tool_description += "Columns:\n" # Add column documentation and types if table in schema: for col_name, col_type in schema[table].items(): col_doc = metadata.COLUMN_DOCS.get(table, {}).get(col_name, "No documentation available") #tool_description += f" - {col_name}: {col_type}: {col_doc}\n" tool_description += f" - {col_name}: {col_type}\n" # Add source documentation #tool_description += "\n\nSource IDs:\n" #for source_id, descrip in metadata.SOURCE_IDS.items(): # tool_description += f"- {source_id}: {descrip}\n" @smolagents.tool def sql_tool(query: str) -> str: """Description to be set beloiw... Args: query: The query to perform. This should be correct SQL. """ output = "" with db.get_engine().connect() as con: rows = con.execute(sqlalchemy.text(query)) for row in rows: output += "\n" + str(row) return output sql_tool.description = tool_description # # language models # default_model = smolagents.HfApiModel() mistral_api_key = os.environ["MISTRAL_API_KEY"] mistral_model_id = "mistral/codestral-latest" mistral_model = smolagents.LiteLLMModel( model_id=mistral_model_id, api_key=mistral_api_key) # # Define the agent # agent = smolagents.CodeAgent( tools=[sql_engine], model=mistral_model ) def generate_response(query: str) -> str: """Generate a response given query. Args: Returns: - the response from the agent having access to a database over the ICIJ data and a large language model. """ agent_output = agent.run(query) return agent_output # # User interface # with gr.Blocks() as demo: gr.Markdown(""" # SQL agent Database: ICIJ data on offshore financial data leaks. """) # Inputs: question question = gr.Textbox( label="Question to answer", placeholder="" ) # Response response = gr.Textbox( label="Response", placeholder="" ) # Button with gr.Row(): response_button = gr.Button("Submit", variant='primary') clear_button = gr.Button("Clear", variant='secondary') # Example questions given default provided PDF file with gr.Accordion("Sample questions", open=False): gr.Examples( [ ["",], ["",], ], inputs=[question,], outputs=[response,], fn=generate_response, cache_examples=False, label="Sample questions" ) # Documentation with gr.Accordion("Documentation", open=False): gr.Markdown(""" - Agentic framework: smolagents - Data: icij.org - Database: SQLite, SQLAlchemy - Generation: Mistral - Examples: Generated using Claude.ai """) # Click actions response_button.click( fn=generate_response, inputs=[question,], outputs=[response,] ) clear_button.click( fn=lambda: ('', ''), inputs=[], outputs=[question, response] ) demo.launch(show_api=False)