Spaces:
Runtime error
Runtime error
Trying to run query
Browse files
app.py
CHANGED
@@ -2,23 +2,43 @@ import gradio as gr
|
|
2 |
import duckdb
|
3 |
from huggingface_hub import HfFileSystem
|
4 |
from huggingface_hub.hf_file_system import safe_quote
|
|
|
5 |
|
6 |
fs = HfFileSystem()
|
7 |
duckdb.register_filesystem(fs)
|
8 |
dataset="glue"
|
9 |
PARQUET_REVISION="refs/convert/parquet"
|
10 |
-
|
11 |
-
path="mnli/*.parquet" # To read all parquets
|
12 |
location=f"hf://datasets/{dataset}@{safe_quote(PARQUET_REVISION)}/{path}"
|
13 |
print(location)
|
14 |
|
15 |
|
16 |
-
def greet(
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
-
iface = gr.Interface(fn=greet, inputs="text", outputs="text")
|
21 |
-
iface.launch()
|
22 |
|
23 |
|
24 |
|
|
|
2 |
import duckdb
|
3 |
from huggingface_hub import HfFileSystem
|
4 |
from huggingface_hub.hf_file_system import safe_quote
|
5 |
+
import pandas as pd
|
6 |
|
7 |
fs = HfFileSystem()
|
8 |
duckdb.register_filesystem(fs)
|
9 |
dataset="glue"
|
10 |
PARQUET_REVISION="refs/convert/parquet"
|
11 |
+
path=f"mnli/glue-train.parquet" # Only from one split
|
12 |
+
# path="mnli/*.parquet" # To read all parquets but it should be grouped by split getting from datasets server
|
13 |
location=f"hf://datasets/{dataset}@{safe_quote(PARQUET_REVISION)}/{path}"
|
14 |
print(location)
|
15 |
|
16 |
|
17 |
+
def greet(dataset, config, split, sql):
|
18 |
+
try:
|
19 |
+
result = duckdb.query(f"SELECT idx as id, premise as p FROM '{location}' LIMIT 2").to_df()
|
20 |
+
print("QUERY SUCCESSED")
|
21 |
+
except Exception as error:
|
22 |
+
print(f"Error: {str(error)}")
|
23 |
+
return pd.DataFrame({"Error": [f"❌ {str(error)}"]})
|
24 |
+
return result
|
25 |
+
|
26 |
+
with gr.Blocks() as demo:
|
27 |
+
gr.Markdown(" ## DuckDB demo using parquet revision")
|
28 |
+
dataset = gr.Textbox(label="dataset", placeholder="mstz/iris")
|
29 |
+
config = gr.Textbox(label="config", placeholder="iris")
|
30 |
+
split = gr.Textbox(label="split", placeholder="train")
|
31 |
+
sql = gr.Textbox(label="sql", placeholder="SELECT 1")
|
32 |
+
run_button = gr.Button("Run")
|
33 |
+
gr.Markdown("### Result")
|
34 |
+
cached_responses_table = gr.DataFrame()
|
35 |
+
run_button.click(greet, inputs=[dataset, config, split, sql], outputs=cached_responses_table)
|
36 |
+
|
37 |
+
|
38 |
+
|
39 |
+
if __name__ == "__main__":
|
40 |
+
demo.launch()
|
41 |
|
|
|
|
|
42 |
|
43 |
|
44 |
|