MikeMai commited on
Commit
1cc3a10
·
verified ·
1 Parent(s): f22f766

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -10
app.py CHANGED
@@ -34,9 +34,9 @@ def get_namespace(root):
34
  """Extracts the primary namespace from the XML root element while keeping the default."""
35
  global NS
36
 
37
- if NS is None:
38
- ns = root.tag.split('}')[0].strip('{')
39
- NS = {'w': ns} if ns else DEFAULT_NS
40
  return NS
41
 
42
  # --- Helper Functions for DOCX Processing ---
@@ -307,7 +307,6 @@ def extract_text_outside_tables(root, table_paragraphs):
307
  """Extracts text from paragraphs outside tables in the document."""
308
  extracted_text = []
309
 
310
- # print(ET.tostring(root, encoding='unicode'))
311
  for paragraph in root.findall('.//w:p', NS):
312
  if paragraph in table_paragraphs:
313
  continue # Skip paragraphs inside tables
@@ -422,8 +421,6 @@ def deepseek_extract_price_list(json_data):
422
  # Step 3: Convert back to JSON string (if needed)
423
  json_output = json.dumps(filtered_contract_data, ensure_ascii=False, indent=4)
424
 
425
- print(json_output)
426
-
427
  prompt = """You are given a price list in JSON format. Extract the following information in CSV format:
428
 
429
  # Response Format
@@ -455,7 +452,6 @@ JSON data:""" + f"""
455
 
456
  price_list = re.sub(r"^```json\n|```$", "", price_list, flags=re.DOTALL)
457
 
458
- print(price_list)
459
 
460
  def json_to_excel(contract_summary, json_data, excel_path):
461
  """Converts extracted JSON tables to an Excel file."""
@@ -463,8 +459,6 @@ def json_to_excel(contract_summary, json_data, excel_path):
463
  # Correctly parse the JSON string
464
  contract_summary_json = json.loads(json.loads(contract_summary))
465
 
466
- print(contract_summary_json)
467
-
468
  contract_summary_df = pd.DataFrame([contract_summary_json])
469
 
470
  # Ensure json_data is a dictionary
@@ -547,4 +541,4 @@ interface = gr.Interface(
547
  theme=Base()
548
  )
549
 
550
- interface.launch()
 
34
  """Extracts the primary namespace from the XML root element while keeping the default."""
35
  global NS
36
 
37
+ ns = root.tag.split('}')[0].strip('{')
38
+ NS = {'w': ns} if ns else DEFAULT_NS
39
+
40
  return NS
41
 
42
  # --- Helper Functions for DOCX Processing ---
 
307
  """Extracts text from paragraphs outside tables in the document."""
308
  extracted_text = []
309
 
 
310
  for paragraph in root.findall('.//w:p', NS):
311
  if paragraph in table_paragraphs:
312
  continue # Skip paragraphs inside tables
 
421
  # Step 3: Convert back to JSON string (if needed)
422
  json_output = json.dumps(filtered_contract_data, ensure_ascii=False, indent=4)
423
 
 
 
424
  prompt = """You are given a price list in JSON format. Extract the following information in CSV format:
425
 
426
  # Response Format
 
452
 
453
  price_list = re.sub(r"^```json\n|```$", "", price_list, flags=re.DOTALL)
454
 
 
455
 
456
  def json_to_excel(contract_summary, json_data, excel_path):
457
  """Converts extracted JSON tables to an Excel file."""
 
459
  # Correctly parse the JSON string
460
  contract_summary_json = json.loads(json.loads(contract_summary))
461
 
 
 
462
  contract_summary_df = pd.DataFrame([contract_summary_json])
463
 
464
  # Ensure json_data is a dictionary
 
541
  theme=Base()
542
  )
543
 
544
+ interface.launch()