to_delete / dreamcoder /domains /regex /groundtruthRegexes.py
Fraser-Greenlee
add dreamcoder codebase
e1c1753
raw
history blame contribute delete
4.25 kB
#dict of gt regexes
"""
pre.create(".+"),
pre.create("\d+"),
pre.create("\w+"),
pre.create("\s+"),
pre.create("\\u+"),
pre.create("\l+")
"""
gt_dict = {
776: "JPC\\u\\u\\d+\\.png",
922: "WHS\\d_\\d+",
354: "\\u+",
523: "(\\u)+|\\.",
184: "\\.\\d+",
501: "u\\d\\d",
760: "\\u\\u",
49: "(\\u)+\\u\\d?",
732: "\\uR5\\d\\d",
450: "-\\d(\\.(\\d)+)?",
350: "\\u\\u",
467: "hu\\d(\\d|\\u)+",
622: "A(\\d|\\u)**",
476: "\\u+",
554: "\\u\\u",
940: "\\u\\u?",
496: "\\u\\u",
369: "\\u\\u\\u",
596: "\\u+",
720: "\\(\\d\\d\\d\\) \\d\\d\\d-\\d\\d\\d\\d",
53: "rec-\\d\\d\\d?-(org)|(dup-0)",
150: "N\\d\\d",
741: "#\\d\\d\\d",
18: "A|C-\\d+-\\d+",
589: "A(\\u|\\d)++",
666: "\\(\\d\\d\\d\\) \\d\\d\\d-\\d\\d\\d\\d",
581: "us13\\u\\d\\d",
299: "E07000\\d\\d\\d",
638: "\\l+\\d+\\l+\\d+",
364: "\\u\\u",
334: "-00:\\d\\d:\\d\\d.\\d",
38: "SRX89\\d+",
247: "'\\d\\d:\\d\\d:00'",
506: "(S|H)\\d+",
891: "(r|v)\\d?",
911: "KW-\\d+",
792: "\\d*\\u*",
508: "N000\\d+",
842: "-?\\d?\\d\\.\\d\\d%",
200: "\\u\\u",
694: "\\(\\d+\\)",
210: "(\\d(\\.\\d)?)|(--)",
298: "DS_25(\\u|\\d)+",
668: "\\u+",
939: "ms0\\d+",
944: "\\u+\\d?",
731: "ManH.0\\d\\d",
229: "\\u+(-\\u+)?",
28: "Y201\\d/\\d\\d\\d\\d",
374: "q000\\d(_000\\d)?",
819: "\\d*\\l*\\d*",
516: "-122.3\\d+",
417: "\\u\\uT\\uB",
660: "ENGL?\\d\\d\\d",
585: "M?\\u+",
325: "BUS M \\d\\d\\d.*",
823: "\\u\\u\\u",
515: "L|\\u - (\\?\\?)|(\\d?\\d\\.\\d lbs\\.)",
864: "\\u+",
359: "MAM\\.OSBS\\.201\\d\\.\\d\\d",
594: "(\\u|\\d)+( (\\u|\\d)+)*",
788: "-\\d(,\\d+)?",
188: "cat\\. \\d\\d",
355: ".+",
799: "\\u\\d\\d",
902: "\\u\\d\\d",
920: "A\\.\\d\\d",
330: "Resp\\d\\d",
396: "\\u+(( |/)\\u+)?",
393: "US $ \\d\\.\\d\\d",
680: "Z:-?0\\.\\d\\d",
744: "t1_cv(\\l|\\d)+",
461: "(\\u|\\l)+\\d+",
631: "$\\d+\\.\\d+",
195: "(OLE)?\\d+",
693: "\\u",
577: "EFO_000\\d+",
392: "$\\d+(,\\d\\d\\d)*\\.00",
688: "\\u+( \\u+)*",
816: "\\u\\u\\u",
489: "UK\\u\\d",
251: "\\l\\l\\l",
653: "C\\d+",
769: "(\\u|\\l|\\d|-)+\\d+",
991: "Q\\d-201\\d",
342: "\\u\\u\\d\\d\\d\\d",
308: "\\u\\u\\u\\u",
136: "IMPC_\\u\\u\\u_\\d\\d\\d_\\d\\d\\d",
327: "#\\d+((/|-)\\d+)*",
981: "\\u\\u\\u",
892: "(.|\\l)*",
375: "P\\u\\.\\d\\d\\d\\d\\.\\d\\d\\d",
499: "A000\\d+",
474: "\\u+",
50: "V06\\d+",
381: "F?\\d+",
883: "-79.\\d+",
173: "(\\u|\\l)+\\d+",
147: "\\u\\u\\u-\\u\\u\\u",
419: "\\u\\u",
961: "-?\\d\\.\\d*",
148: "Q\\d\\d",
975: "(\\d|\\u)+",
79: "\\d+(,\\d\\d\\d)+",
775: "\\u\\l\\l \\d+ \\d\\d\\d\\d",
774: "FOS\\d\\d+",
561: ".+",
509: "S000\\d+",
494: "S1900\\d+",
119: "$\\d\\d(,\\d\\d\\d)+",
29: "(\\u|\\l|\\d)+",
121: "(\\d|\\u|\\.|/|\\(|\\))+",
61: "R \\d\\d\\d.\\d\\d",
871: "-0.7\\d+",
639: "\\u+?\\d+",
729: "COMISARIA \\d\\d",
193: "\\u\\d\\d",
752: "(.*|\\u\\.?)+",
17: "$\\d.\\d\\d",
914: "R\\d\\d\\d\\d",
510: "P\\d000\\d\\d\\d\\d",
443: "(W|L) \\d-\\d+",
20: "MDEL\\d\\d?\\.\\d\\l",
64: "c04p0100(\\l|\\d)",
301: "(\\u|\\d)+(-(\\u|\\d)+)*",
664: "N\\d",
493: "[0\\.0\\d+]",
765: "-?\\d\\.\\d+( \\(0\\.\\d+\\))?"
}
badRegexTasks = {
"Data column no. 922",
"Data column no. 184",
"Data column no. 467",
"Data column no. 476",
"Data column no. 150",
"Data column no. 299",
"Data column no. 334",
"Data column no. 493",
"Data column no. 891",
"Data column no. 792",
"Data column no. 765",
"Data column no. 944",
"Data column no. 374",
"Data column no. 660",
"Data column no. 188",
"Data column no. 920",
"Data column no. 330",
"Data column no. 396",
"Data column no. 680",
"Data column no. 769",
"Data column no. 308",
"Data column no. 375",
"Data column no. 474",
"Data column no. 79",
"Data column no. 871",
"Data column no. 729",
"Data column no. 664",
}