from google import genai client = genai.Client(api_key="AIzaSyD6voSAiSUim17kB90skpdisMMyFXZPxMo") MODEL_ID = "gemini-2.0-flash-exp" def model_response(text): response = client.models.generate_content( model=MODEL_ID, contents=text ) return response.text def generate_dataset_queries(dataset_id,generated_glossary, schema_relationships): queries = model_response(f'''You are an expert in drafting BQ queries. Look at the dataset: {dataset_id}, look at the glossary: {generated_glossary} and {schema_relationships} and recommend interesting data exploration queries. Format: Query description in the form of a single line question Actual query''') return queries def generate_lookml(dataset_id,generated_glossary, schema_relationships): lookml = model_response(f'''You are an expert in drafting LookML models. Look at the dataset: {dataset_id}, look at the glossary: {generated_glossary} and {schema_relationships} and recommend the LookML semantic model corresponding to the dataset. ''') return lookml def run(DATASET_ID): dataset_description = None py = None schema_relations = None generated_glossary = None queries = None lookml = None lookml_explore = None dataset = model_response(f'''You are an expert in BQ public datasets. Generate a dataset schema related to {DATASET_ID}. You need to come up with atleast 5 tables with each table containing atleast 10 columns along with their descriptions.Ensure that these tables have columns that talk about data quality issues.''') dataset_description = model_response(f'''Generate a succinct 3-4 line description of the dataset: {dataset}.''') yield dataset_description, None, None, None, None, None, None #get_table_doc(PROJECT_ID, DATASET_ID) py = model_response(f'''Based on the dataset provided: {dataset}, identify all the possible relationships that exist between the tables in the dataset. Discover these relationships from the point of view of data exploration. Output: List of relationships along with the description which is the business value of the relationship and a query with description that validates the relationship. Ensure that the column names and table names are accurate.''') yield dataset_description, py, schema_relations, None, None, None, None schema_relations = model_response(f'''Based on the context: {py}, generate a knowledge graph represented using ASCII art. Also generate a brief description of the graph. Output: Description of the graph listing all the relationships in markdown format ASCII version of the knowledge graph with nodes represented by tables and edges represented by the relationships. Edges should be annotated with the type of relationships identified - many-to-one, many-to-many, one-to-one, primary key, self joins, foreign keys etc''') yield dataset_description, py, schema_relations, None, None, None, None generated_glossary = model_response(f'''Based on the relationships identified: {schema_relations} and the dataset: {dataset_description}, generate glossary terms that will help business users easily find the tables in the dataset. ## Task - Your goal is to create a business glossary for the data in this dataset, aligned with the definition of business glossary specified above. - Provide each business term in a newline, along with the definition. - Include examples in the term definitions, wherever suitable. - Make sure the business terms are relevant as per the table and column names and descriptions, and relevant to the domain to which the data belongs. - Also include a few business terms around the users/clients and around 5 key metrics in the domain of the data. - After defining the terms, identify the relationships between the business terms identified previously. ## Output format Ensure that the output is in markdown format with proper indentation - Output each business glossary term definition in a newline in the folowing format: term: definition - For the business terms which are the key metrics in the business domain, mark such terms by adding "[METRIC]" in the beginning of the line, in the following format: [METRIC] term: definition - Then print a header to indicate the end of this section and start of the relationships section. - Then output the relationships between the business terms as follows: term -> [related_term1, related_term2] Show the relationship between the glossary term and the column broken down by each table. ''') yield dataset_description, py, schema_relations, generated_glossary, None, None, None queries = generate_dataset_queries(dataset, generated_glossary, schema_relations) yield dataset_description, py, schema_relations, generated_glossary, queries, None, None lookml = generate_lookml(dataset, generated_glossary, schema_relations) yield dataset_description, py, schema_relations, generated_glossary, queries, lookml lookml_explore = model_response(f'''Given the dataset: {dataset}, schema relationships: {py} and graph:{schema_relations}, generate a data preparation pipeline that fixes the possible data quality issues across the tables in the dataset.''') yield dataset_description, py, schema_relations, generated_glossary, queries, lookml, lookml_explore return dataset_description, py, schema_relations, generated_glossary, queries, lookml, lookml_explore # Modify the wrapper function to yield a tuple for Gradio outputs def wrapper(dataset_id): for outputs in run(dataset_id): yield ( outputs[0], outputs[1], outputs[2], # Schema Relationships outputs[3], # Generated Glossary outputs[4], # Queries outputs[5] # LookML Model ) import gradio as gr iface = gr.Interface( fn=wrapper, inputs=gr.Textbox(label="Dataset ID"), outputs=[ gr.Markdown(label="Dataset description"), gr.Markdown(label="Knowledge Graph"), gr.Markdown(label="Schema Relationships"), gr.Markdown(label="Generated Glossary"), gr.Textbox(label="Queries"), gr.Markdown(label="LookML Model") ], live=False, theme = gr.themes.Ocean(), title="BQ knowledge engine ⚙️💡📊 (Simulator)", description="Provide a dataset ID to generate LookML, schema relationships, glossary, and more...", examples=['ncaa_basketball2', 'thelook_ecommerce','geo_openstreetmap','google_political_ads','noaa_historic_severe_storms','stackoverflow'], article = "This is a simulator that provides a sneak-peek into how BQ knowledge engine works." ) # Launch the app iface.launch(share=True, debug=True)