Spaces:
Runtime error
Runtime error
File size: 1,798 Bytes
a00be78 033af05 a00be78 033af05 a00be78 033af05 a00be78 033af05 a00be78 44cb622 a00be78 44cb622 a00be78 44cb622 a00be78 44cb622 a00be78 44cb622 a00be78 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
from gradio_huggingfacehub_search import HuggingfaceHubSearch
from huggingface_hub import HfApi
import pandas as pd
import gradio as gr
import duckdb
import requests
BASE_DATASETS_SERVER_URL = "https://datasets-server.huggingface.co"
hf_api = HfApi()
conn = duckdb.connect()
def get_dataset_ddl(dataset_id: str) -> pd.DataFrame:
view_name = "dataset_view"
response = requests.get(f"{BASE_DATASETS_SERVER_URL}/parquet?dataset={dataset_id}")
response.raise_for_status() # Check if the request was successful
first_parquet = response.json().get("parquet_files", [])[0]
first_parquet_url = first_parquet.get("url")
if not first_parquet_url:
raise ValueError("No valid URL found for the first parquet file.")
conn.execute(
f"CREATE OR REPLACE VIEW {view_name} as SELECT * FROM read_parquet('{first_parquet_url}');"
)
dataset_ddl = conn.execute(f"PRAGMA table_info('{view_name}');").fetchall()
column_data_types = ",\n\t".join(
[f"{column[1]} {column[2]}" for column in dataset_ddl]
)
sql_ddl = """
CREATE TABLE {} (
{}
);
""".format(
view_name, column_data_types
)
return sql_ddl
with gr.Blocks() as demo:
gr.Markdown("# Query your HF Datasets with Natural Language ππ")
dataset_name = HuggingfaceHubSearch(
label="Hub Dataset ID",
placeholder="Find your favorite dataset...",
search_type="dataset",
value="jamescalam/world-cities-geo",
)
query_input = gr.Textbox("", label="Ask anything...")
btn = gr.Button("Ask πͺ")
df = gr.DataFrame(datatype="markdown")
ddl = gr.Text("")
btn.click(
get_dataset_ddl,
inputs=[dataset_name],
outputs=[ddl],
)
if __name__ == "__main__":
demo.launch()
|