asoria HF Staff commited on
Commit
554bcd2
·
1 Parent(s): 9b95e7f

Adding parameters

Browse files
Files changed (1) hide show
  1. app.py +15 -9
app.py CHANGED
@@ -4,19 +4,25 @@ from huggingface_hub import HfFileSystem
4
  from huggingface_hub.hf_file_system import safe_quote
5
  import pandas as pd
6
 
7
- fs = HfFileSystem()
8
- duckdb.register_filesystem(fs)
9
- dataset="glue"
10
  PARQUET_REVISION="refs/convert/parquet"
11
- path=f"mnli/glue-train.parquet" # Only from one split
12
- # path="mnli/*.parquet" # To read all parquets but it should be grouped by split getting from datasets server
13
- location=f"hf://datasets/{dataset}@{safe_quote(PARQUET_REVISION)}/{path}"
14
- print(location)
15
 
 
 
16
 
17
  def greet(dataset, config, split, sql):
18
  try:
19
- result = duckdb.query(f"SELECT idx as id, premise as p FROM '{location}' LIMIT 2").to_df()
 
 
 
 
 
 
 
 
 
 
20
  print("QUERY SUCCESSED")
21
  except Exception as error:
22
  print(f"Error: {str(error)}")
@@ -28,7 +34,7 @@ with gr.Blocks() as demo:
28
  dataset = gr.Textbox(label="dataset", placeholder="mstz/iris")
29
  config = gr.Textbox(label="config", placeholder="iris")
30
  split = gr.Textbox(label="split", placeholder="train")
31
- sql = gr.Textbox(label="sql", placeholder="SELECT 1")
32
  run_button = gr.Button("Run")
33
  gr.Markdown("### Result")
34
  cached_responses_table = gr.DataFrame()
 
4
  from huggingface_hub.hf_file_system import safe_quote
5
  import pandas as pd
6
 
 
 
 
7
  PARQUET_REVISION="refs/convert/parquet"
8
+ TABLE_WILDCARD="{table}"
 
 
 
9
 
10
+ fs = HfFileSystem()
11
+ duckdb.register_filesystem(fs)
12
 
13
  def greet(dataset, config, split, sql):
14
  try:
15
+ if TABLE_WILDCARD not in sql:
16
+ raise Exception(f"Query must contains {TABLE_WILDCARD} wildcard.")
17
+ # dataset="glue"
18
+ # config="mnli"
19
+ path=f"{config}/{dataset}-{split}.parquet" # Only from one split
20
+ location=f"hf://datasets/{dataset}@{safe_quote(PARQUET_REVISION)}/{path}"
21
+ print(location)
22
+ sql = sql.replace(TABLE_WILDCARD, f"'{location}'")
23
+
24
+ # result = duckdb.query(f"SELECT idx as id, premise as p FROM '{location}' LIMIT 2").to_df()
25
+ result = duckdb.query(sql).to_df()
26
  print("QUERY SUCCESSED")
27
  except Exception as error:
28
  print(f"Error: {str(error)}")
 
34
  dataset = gr.Textbox(label="dataset", placeholder="mstz/iris")
35
  config = gr.Textbox(label="config", placeholder="iris")
36
  split = gr.Textbox(label="split", placeholder="train")
37
+ sql = gr.Textbox(label="sql", placeholder=f"SELECT sepal_length FROM {TABLE_WILDCARD} LIMIT 3")
38
  run_button = gr.Button("Run")
39
  gr.Markdown("### Result")
40
  cached_responses_table = gr.DataFrame()