om4r932 commited on
Commit
cec5f67
·
1 Parent(s): f0a2998

Changed regex to accept all numbers ([0-9] -> [0-9]+)

Browse files
Files changed (1) hide show
  1. app.py +28 -12
app.py CHANGED
@@ -264,15 +264,15 @@ def get_file_from_spec_id_version(req: SpecRequest) -> Dict[str, str]:
264
  chapters = []
265
  for line in toc_brut:
266
  x = line.split("\t")
267
- if re.fullmatch(r"^\d\t[A-Z][a-zA-Z0-9\s,;!?'.-]*$", line):
268
  chapters.append(x[0] if len(x) == 1 else "\t".join(x[:2]))
269
- if re.fullmatch(r"^\d\.\d\t[A-Z][a-zA-Z0-9\s,;!?'.-]*$", line):
270
  chapters.append(x[0] if len(x) == 1 else "\t".join(x[:2]))
271
- if re.fullmatch(r"^\d\.\d\.\d\t[A-Z][a-zA-Z0-9\s,;!?'.-]*$", line):
272
  chapters.append(x[0] if len(x) == 1 else "\t".join(x[:2]))
273
- if re.fullmatch(r"^\d\.\d\.\d.\d\t[A-Z][a-zA-Z0-9\s,;!?'.-]*$", line):
274
  chapters.append(x[0] if len(x) == 1 else "\t".join(x[:2]))
275
- if re.fullmatch(r"^\d\.\d\.\d.\d.\d\t[A-Z][a-zA-Z0-9\s,;!?'.-]*$", line):
276
  chapters.append(x[0] if len(x) == 1 else "\t".join(x[:2]))
277
 
278
  real_toc_indexes = {}
@@ -282,7 +282,15 @@ def get_file_from_spec_id_version(req: SpecRequest) -> Dict[str, str]:
282
  x = text.index(chapter)
283
  real_toc_indexes[chapter] = x
284
  except ValueError as e:
285
- real_toc_indexes[chapter] = -float("inf")
 
 
 
 
 
 
 
 
286
 
287
  document = {}
288
  toc = list(real_toc_indexes.keys())
@@ -315,15 +323,15 @@ def get_file_from_spec_id_version(req: SpecRequest) -> Dict:
315
  chapters = []
316
  for line in toc_brut:
317
  x = line.split("\t")
318
- if re.fullmatch(r"^\d\t[A-Z][a-zA-Z0-9\s,;!?'.-]*$", line):
319
  chapters.append(x[0] if len(x) == 1 else "\t".join(x[:2]))
320
- if re.fullmatch(r"^\d\.\d\t[A-Z][a-zA-Z0-9\s,;!?'.-]*$", line):
321
  chapters.append(x[0] if len(x) == 1 else "\t".join(x[:2]))
322
- if re.fullmatch(r"^\d\.\d\.\d\t[A-Z][a-zA-Z0-9\s,;!?'.-]*$", line):
323
  chapters.append(x[0] if len(x) == 1 else "\t".join(x[:2]))
324
- if re.fullmatch(r"^\d\.\d\.\d.\d\t[A-Z][a-zA-Z0-9\s,;!?'.-]*$", line):
325
  chapters.append(x[0] if len(x) == 1 else "\t".join(x[:2]))
326
- if re.fullmatch(r"^\d\.\d\.\d.\d.\d\t[A-Z][a-zA-Z0-9\s,;!?'.-]*$", line):
327
  chapters.append(x[0] if len(x) == 1 else "\t".join(x[:2]))
328
 
329
  real_toc_indexes = {}
@@ -333,6 +341,14 @@ def get_file_from_spec_id_version(req: SpecRequest) -> Dict:
333
  x = text.index(chapter)
334
  real_toc_indexes[chapter] = x
335
  except ValueError as e:
336
- real_toc_indexes[chapter] = -float("inf")
 
 
 
 
 
 
 
 
337
 
338
  return create_nested_structure(chapters, text, real_toc_indexes)
 
264
  chapters = []
265
  for line in toc_brut:
266
  x = line.split("\t")
267
+ if re.search(r"^\d+\t[\ \S]+", line):
268
  chapters.append(x[0] if len(x) == 1 else "\t".join(x[:2]))
269
+ if re.search(r"^\d+\.\d+\t[\ \S]+", line):
270
  chapters.append(x[0] if len(x) == 1 else "\t".join(x[:2]))
271
+ if re.search(r"^\d+\.\d+\.\d+\t[\ \S]+", line):
272
  chapters.append(x[0] if len(x) == 1 else "\t".join(x[:2]))
273
+ if re.search(r"^\d+\.\d+\.\d+.\d+\t[\ \S]+", line):
274
  chapters.append(x[0] if len(x) == 1 else "\t".join(x[:2]))
275
+ if re.search(r"^\d+\.\d+\.\d+.\d+.\d+\t[\ \S]+", line):
276
  chapters.append(x[0] if len(x) == 1 else "\t".join(x[:2]))
277
 
278
  real_toc_indexes = {}
 
282
  x = text.index(chapter)
283
  real_toc_indexes[chapter] = x
284
  except ValueError as e:
285
+ try:
286
+ number = chapter.split("\t")[0] + "\t"
287
+ for line in text[forewords[1]:]:
288
+ if number in line:
289
+ x = text.index(line)
290
+ real_toc_indexes[line] = x
291
+ break
292
+ except:
293
+ real_toc_indexes[chapter] = -float("inf")
294
 
295
  document = {}
296
  toc = list(real_toc_indexes.keys())
 
323
  chapters = []
324
  for line in toc_brut:
325
  x = line.split("\t")
326
+ if re.search(r"^\d+\t[\ \S]+", line):
327
  chapters.append(x[0] if len(x) == 1 else "\t".join(x[:2]))
328
+ if re.search(r"^\d+\.\d+\t[\ \S]+", line):
329
  chapters.append(x[0] if len(x) == 1 else "\t".join(x[:2]))
330
+ if re.search(r"^\d+\.\d+\.\d+\t[\ \S]+", line):
331
  chapters.append(x[0] if len(x) == 1 else "\t".join(x[:2]))
332
+ if re.search(r"^\d+\.\d+\.\d+.\d+\t[\ \S]+", line):
333
  chapters.append(x[0] if len(x) == 1 else "\t".join(x[:2]))
334
+ if re.search(r"^\d+\.\d+\.\d+.\d+.\d+\t[\ \S]+", line):
335
  chapters.append(x[0] if len(x) == 1 else "\t".join(x[:2]))
336
 
337
  real_toc_indexes = {}
 
341
  x = text.index(chapter)
342
  real_toc_indexes[chapter] = x
343
  except ValueError as e:
344
+ try:
345
+ number = chapter.split("\t")[0] + "\t"
346
+ for line in text[forewords[1]:]:
347
+ if number in line:
348
+ x = text.index(line)
349
+ real_toc_indexes[line] = x
350
+ break
351
+ except:
352
+ real_toc_indexes[chapter] = -float("inf")
353
 
354
  return create_nested_structure(chapters, text, real_toc_indexes)