Spaces:
Runtime error
Runtime error
Adding parameters
Browse files
app.py
CHANGED
@@ -4,19 +4,25 @@ from huggingface_hub import HfFileSystem
|
|
4 |
from huggingface_hub.hf_file_system import safe_quote
|
5 |
import pandas as pd
|
6 |
|
7 |
-
fs = HfFileSystem()
|
8 |
-
duckdb.register_filesystem(fs)
|
9 |
-
dataset="glue"
|
10 |
PARQUET_REVISION="refs/convert/parquet"
|
11 |
-
|
12 |
-
# path="mnli/*.parquet" # To read all parquets but it should be grouped by split getting from datasets server
|
13 |
-
location=f"hf://datasets/{dataset}@{safe_quote(PARQUET_REVISION)}/{path}"
|
14 |
-
print(location)
|
15 |
|
|
|
|
|
16 |
|
17 |
def greet(dataset, config, split, sql):
|
18 |
try:
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
print("QUERY SUCCESSED")
|
21 |
except Exception as error:
|
22 |
print(f"Error: {str(error)}")
|
@@ -28,7 +34,7 @@ with gr.Blocks() as demo:
|
|
28 |
dataset = gr.Textbox(label="dataset", placeholder="mstz/iris")
|
29 |
config = gr.Textbox(label="config", placeholder="iris")
|
30 |
split = gr.Textbox(label="split", placeholder="train")
|
31 |
-
sql = gr.Textbox(label="sql", placeholder="SELECT
|
32 |
run_button = gr.Button("Run")
|
33 |
gr.Markdown("### Result")
|
34 |
cached_responses_table = gr.DataFrame()
|
|
|
4 |
from huggingface_hub.hf_file_system import safe_quote
|
5 |
import pandas as pd
|
6 |
|
|
|
|
|
|
|
7 |
PARQUET_REVISION="refs/convert/parquet"
|
8 |
+
TABLE_WILDCARD="{table}"
|
|
|
|
|
|
|
9 |
|
10 |
+
fs = HfFileSystem()
|
11 |
+
duckdb.register_filesystem(fs)
|
12 |
|
13 |
def greet(dataset, config, split, sql):
|
14 |
try:
|
15 |
+
if TABLE_WILDCARD not in sql:
|
16 |
+
raise Exception(f"Query must contains {TABLE_WILDCARD} wildcard.")
|
17 |
+
# dataset="glue"
|
18 |
+
# config="mnli"
|
19 |
+
path=f"{config}/{dataset}-{split}.parquet" # Only from one split
|
20 |
+
location=f"hf://datasets/{dataset}@{safe_quote(PARQUET_REVISION)}/{path}"
|
21 |
+
print(location)
|
22 |
+
sql = sql.replace(TABLE_WILDCARD, f"'{location}'")
|
23 |
+
|
24 |
+
# result = duckdb.query(f"SELECT idx as id, premise as p FROM '{location}' LIMIT 2").to_df()
|
25 |
+
result = duckdb.query(sql).to_df()
|
26 |
print("QUERY SUCCESSED")
|
27 |
except Exception as error:
|
28 |
print(f"Error: {str(error)}")
|
|
|
34 |
dataset = gr.Textbox(label="dataset", placeholder="mstz/iris")
|
35 |
config = gr.Textbox(label="config", placeholder="iris")
|
36 |
split = gr.Textbox(label="split", placeholder="train")
|
37 |
+
sql = gr.Textbox(label="sql", placeholder=f"SELECT sepal_length FROM {TABLE_WILDCARD} LIMIT 3")
|
38 |
run_button = gr.Button("Run")
|
39 |
gr.Markdown("### Result")
|
40 |
cached_responses_table = gr.DataFrame()
|