Modify path to relative path
Browse files- test_pretrained.ipynb +15 -15
test_pretrained.ipynb
CHANGED
@@ -48,7 +48,7 @@
|
|
48 |
" hugging_face_path = snapshot_download(\n",
|
49 |
" repo_id=\"USC-Applied-NLP-Group/SQL-Generation\",\n",
|
50 |
" repo_type=\"model\", \n",
|
51 |
-
" allow_patterns=[\"src/*\", \"train-data/*\", \"deepseek-coder-1.3b-instruct/*\"], \n",
|
52 |
" )\n",
|
53 |
" sys.path.append(hugging_face_path)\n",
|
54 |
" current_path = hugging_face_path"
|
@@ -166,7 +166,7 @@
|
|
166 |
},
|
167 |
{
|
168 |
"cell_type": "code",
|
169 |
-
"execution_count":
|
170 |
"metadata": {},
|
171 |
"outputs": [
|
172 |
{
|
@@ -179,7 +179,7 @@
|
|
179 |
],
|
180 |
"source": [
|
181 |
"# Create connection to sqlite3 database\n",
|
182 |
-
"connection = sql.connect('
|
183 |
"cursor = connection.cursor()\n",
|
184 |
"\n",
|
185 |
"# Execute query from model output and print result\n",
|
@@ -304,7 +304,7 @@
|
|
304 |
},
|
305 |
{
|
306 |
"cell_type": "code",
|
307 |
-
"execution_count":
|
308 |
"metadata": {},
|
309 |
"outputs": [
|
310 |
{
|
@@ -325,7 +325,7 @@
|
|
325 |
}
|
326 |
],
|
327 |
"source": [
|
328 |
-
"less_than_90_df = pd.read_csv(\"
|
329 |
"run_evaluation(less_than_90_df, \"Less than 90\")\n",
|
330 |
"print(\"Dataset length: \" + str(len(less_than_90_df)))"
|
331 |
]
|
@@ -339,7 +339,7 @@
|
|
339 |
},
|
340 |
{
|
341 |
"cell_type": "code",
|
342 |
-
"execution_count":
|
343 |
"metadata": {},
|
344 |
"outputs": [
|
345 |
{
|
@@ -372,7 +372,7 @@
|
|
372 |
}
|
373 |
],
|
374 |
"source": [
|
375 |
-
"game_queries = pd.read_csv(\"
|
376 |
"run_evaluation(game_queries, \"Queries from game\")\n",
|
377 |
"print(\"Dataset length: \" + str(len(game_queries)))"
|
378 |
]
|
@@ -386,7 +386,7 @@
|
|
386 |
},
|
387 |
{
|
388 |
"cell_type": "code",
|
389 |
-
"execution_count":
|
390 |
"metadata": {},
|
391 |
"outputs": [
|
392 |
{
|
@@ -406,7 +406,7 @@
|
|
406 |
}
|
407 |
],
|
408 |
"source": [
|
409 |
-
"other_stats_queries = pd.read_csv(\"
|
410 |
"run_evaluation(other_stats_queries, \"Queries from other stats\")\n",
|
411 |
"print(\"Dataset length: \" + str(len(other_stats_queries)))"
|
412 |
]
|
@@ -420,7 +420,7 @@
|
|
420 |
},
|
421 |
{
|
422 |
"cell_type": "code",
|
423 |
-
"execution_count":
|
424 |
"metadata": {},
|
425 |
"outputs": [
|
426 |
{
|
@@ -438,7 +438,7 @@
|
|
438 |
}
|
439 |
],
|
440 |
"source": [
|
441 |
-
"team_queries = pd.read_csv(\"
|
442 |
"run_evaluation(team_queries, \"Queries from team\")\n",
|
443 |
"print(\"Dataset length: \" + str(len(team_queries)))"
|
444 |
]
|
@@ -452,7 +452,7 @@
|
|
452 |
},
|
453 |
{
|
454 |
"cell_type": "code",
|
455 |
-
"execution_count":
|
456 |
"metadata": {},
|
457 |
"outputs": [
|
458 |
{
|
@@ -472,7 +472,7 @@
|
|
472 |
}
|
473 |
],
|
474 |
"source": [
|
475 |
-
"join_queries = pd.read_csv(\"
|
476 |
"run_evaluation(join_queries, \"Queries with join\")\n",
|
477 |
"print(\"Dataset length: \" + str(len(join_queries)))"
|
478 |
]
|
@@ -486,7 +486,7 @@
|
|
486 |
},
|
487 |
{
|
488 |
"cell_type": "code",
|
489 |
-
"execution_count":
|
490 |
"metadata": {},
|
491 |
"outputs": [
|
492 |
{
|
@@ -520,7 +520,7 @@
|
|
520 |
}
|
521 |
],
|
522 |
"source": [
|
523 |
-
"no_join_queries = pd.read_csv(\"
|
524 |
"run_evaluation(no_join_queries, \"Queries without join\")\n",
|
525 |
"print(\"Dataset length: \" + str(len(no_join_queries)))"
|
526 |
]
|
|
|
48 |
" hugging_face_path = snapshot_download(\n",
|
49 |
" repo_id=\"USC-Applied-NLP-Group/SQL-Generation\",\n",
|
50 |
" repo_type=\"model\", \n",
|
51 |
+
" allow_patterns=[\"src/*\", \"train-data/*\", \"deepseek-coder-1.3b-instruct/*\", \"nba-data/*\"], \n",
|
52 |
" )\n",
|
53 |
" sys.path.append(hugging_face_path)\n",
|
54 |
" current_path = hugging_face_path"
|
|
|
166 |
},
|
167 |
{
|
168 |
"cell_type": "code",
|
169 |
+
"execution_count": null,
|
170 |
"metadata": {},
|
171 |
"outputs": [
|
172 |
{
|
|
|
179 |
],
|
180 |
"source": [
|
181 |
"# Create connection to sqlite3 database\n",
|
182 |
+
"connection = sql.connect(get_path('nba-data/nba.sqlite'))\n",
|
183 |
"cursor = connection.cursor()\n",
|
184 |
"\n",
|
185 |
"# Execute query from model output and print result\n",
|
|
|
304 |
},
|
305 |
{
|
306 |
"cell_type": "code",
|
307 |
+
"execution_count": null,
|
308 |
"metadata": {},
|
309 |
"outputs": [
|
310 |
{
|
|
|
325 |
}
|
326 |
],
|
327 |
"source": [
|
328 |
+
"less_than_90_df = pd.read_csv(get_path(\"train-data/less_than_90.tsv\"), sep='\\t')\n",
|
329 |
"run_evaluation(less_than_90_df, \"Less than 90\")\n",
|
330 |
"print(\"Dataset length: \" + str(len(less_than_90_df)))"
|
331 |
]
|
|
|
339 |
},
|
340 |
{
|
341 |
"cell_type": "code",
|
342 |
+
"execution_count": null,
|
343 |
"metadata": {},
|
344 |
"outputs": [
|
345 |
{
|
|
|
372 |
}
|
373 |
],
|
374 |
"source": [
|
375 |
+
"game_queries = pd.read_csv(get_path(\"train-data/queries_from_game.tsv\"), sep='\\t')\n",
|
376 |
"run_evaluation(game_queries, \"Queries from game\")\n",
|
377 |
"print(\"Dataset length: \" + str(len(game_queries)))"
|
378 |
]
|
|
|
386 |
},
|
387 |
{
|
388 |
"cell_type": "code",
|
389 |
+
"execution_count": null,
|
390 |
"metadata": {},
|
391 |
"outputs": [
|
392 |
{
|
|
|
406 |
}
|
407 |
],
|
408 |
"source": [
|
409 |
+
"other_stats_queries = pd.read_csv(get_path(\"train-data/queries_from_other_stats.tsv\"), sep='\\t')\n",
|
410 |
"run_evaluation(other_stats_queries, \"Queries from other stats\")\n",
|
411 |
"print(\"Dataset length: \" + str(len(other_stats_queries)))"
|
412 |
]
|
|
|
420 |
},
|
421 |
{
|
422 |
"cell_type": "code",
|
423 |
+
"execution_count": null,
|
424 |
"metadata": {},
|
425 |
"outputs": [
|
426 |
{
|
|
|
438 |
}
|
439 |
],
|
440 |
"source": [
|
441 |
+
"team_queries = pd.read_csv(get_path(\"train-data/queries_from_team.tsv\"), sep='\\t')\n",
|
442 |
"run_evaluation(team_queries, \"Queries from team\")\n",
|
443 |
"print(\"Dataset length: \" + str(len(team_queries)))"
|
444 |
]
|
|
|
452 |
},
|
453 |
{
|
454 |
"cell_type": "code",
|
455 |
+
"execution_count": null,
|
456 |
"metadata": {},
|
457 |
"outputs": [
|
458 |
{
|
|
|
472 |
}
|
473 |
],
|
474 |
"source": [
|
475 |
+
"join_queries = pd.read_csv(get_path(\"train-data/with_join.tsv\"), sep='\\t')\n",
|
476 |
"run_evaluation(join_queries, \"Queries with join\")\n",
|
477 |
"print(\"Dataset length: \" + str(len(join_queries)))"
|
478 |
]
|
|
|
486 |
},
|
487 |
{
|
488 |
"cell_type": "code",
|
489 |
+
"execution_count": null,
|
490 |
"metadata": {},
|
491 |
"outputs": [
|
492 |
{
|
|
|
520 |
}
|
521 |
],
|
522 |
"source": [
|
523 |
+
"no_join_queries = pd.read_csv(get_path(\"train-data/without_join.tsv\"), sep='\\t')\n",
|
524 |
"run_evaluation(no_join_queries, \"Queries without join\")\n",
|
525 |
"print(\"Dataset length: \" + str(len(no_join_queries)))"
|
526 |
]
|