asoria HF Staff commited on
Commit
9b95e7f
·
1 Parent(s): 5822bba

Trying to run query

Browse files
Files changed (1) hide show
  1. app.py +27 -7
app.py CHANGED
@@ -2,23 +2,43 @@ import gradio as gr
2
  import duckdb
3
  from huggingface_hub import HfFileSystem
4
  from huggingface_hub.hf_file_system import safe_quote
 
5
 
6
  fs = HfFileSystem()
7
  duckdb.register_filesystem(fs)
8
  dataset="glue"
9
  PARQUET_REVISION="refs/convert/parquet"
10
- # path=f"mnli/glue-train.parquet"
11
- path="mnli/*.parquet" # To read all parquets
12
  location=f"hf://datasets/{dataset}@{safe_quote(PARQUET_REVISION)}/{path}"
13
  print(location)
14
 
15
 
16
- def greet(name):
17
- duckdb.query(f"SELECT idx as id, premise as p FROM '{location}' LIMIT 2")
18
- return "Hello " + name + "!!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
21
- iface.launch()
22
 
23
 
24
 
 
2
  import duckdb
3
  from huggingface_hub import HfFileSystem
4
  from huggingface_hub.hf_file_system import safe_quote
5
+ import pandas as pd
6
 
7
  fs = HfFileSystem()
8
  duckdb.register_filesystem(fs)
9
  dataset="glue"
10
  PARQUET_REVISION="refs/convert/parquet"
11
+ path=f"mnli/glue-train.parquet" # Only from one split
12
+ # path="mnli/*.parquet" # To read all parquets but it should be grouped by split getting from datasets server
13
  location=f"hf://datasets/{dataset}@{safe_quote(PARQUET_REVISION)}/{path}"
14
  print(location)
15
 
16
 
17
+ def greet(dataset, config, split, sql):
18
+ try:
19
+ result = duckdb.query(f"SELECT idx as id, premise as p FROM '{location}' LIMIT 2").to_df()
20
+ print("QUERY SUCCESSED")
21
+ except Exception as error:
22
+ print(f"Error: {str(error)}")
23
+ return pd.DataFrame({"Error": [f"❌ {str(error)}"]})
24
+ return result
25
+
26
+ with gr.Blocks() as demo:
27
+ gr.Markdown(" ## DuckDB demo using parquet revision")
28
+ dataset = gr.Textbox(label="dataset", placeholder="mstz/iris")
29
+ config = gr.Textbox(label="config", placeholder="iris")
30
+ split = gr.Textbox(label="split", placeholder="train")
31
+ sql = gr.Textbox(label="sql", placeholder="SELECT 1")
32
+ run_button = gr.Button("Run")
33
+ gr.Markdown("### Result")
34
+ cached_responses_table = gr.DataFrame()
35
+ run_button.click(greet, inputs=[dataset, config, split, sql], outputs=cached_responses_table)
36
+
37
+
38
+
39
+ if __name__ == "__main__":
40
+ demo.launch()
41
 
 
 
42
 
43
 
44