Spaces:

MikeMai
/

PO_Extractor

Sleeping

App Files Files Community

MikeMai commited on Mar 7

Commit

1cc3a10

verified ·

1 Parent(s): f22f766

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -10

app.py CHANGED Viewed

@@ -34,9 +34,9 @@ def get_namespace(root):
     """Extracts the primary namespace from the XML root element while keeping the default."""
     global NS
-    if NS is None:
-        ns = root.tag.split('}')[0].strip('{')
-        NS = {'w': ns} if ns else DEFAULT_NS
     return NS
 # --- Helper Functions for DOCX Processing ---
@@ -307,7 +307,6 @@ def extract_text_outside_tables(root, table_paragraphs):
     """Extracts text from paragraphs outside tables in the document."""
     extracted_text = []
-    # print(ET.tostring(root, encoding='unicode'))
     for paragraph in root.findall('.//w:p', NS):
         if paragraph in table_paragraphs:
             continue  # Skip paragraphs inside tables
@@ -422,8 +421,6 @@ def deepseek_extract_price_list(json_data):
     # Step 3: Convert back to JSON string (if needed)
     json_output = json.dumps(filtered_contract_data, ensure_ascii=False, indent=4)
-    print(json_output)
     prompt = """You are given a price list in JSON format. Extract the following information in CSV format:
 # Response Format
@@ -455,7 +452,6 @@ JSON data:""" + f"""
     price_list = re.sub(r"^```json\n|```$", "", price_list, flags=re.DOTALL)
-    print(price_list)
 def json_to_excel(contract_summary, json_data, excel_path):
     """Converts extracted JSON tables to an Excel file."""
@@ -463,8 +459,6 @@ def json_to_excel(contract_summary, json_data, excel_path):
     # Correctly parse the JSON string
     contract_summary_json = json.loads(json.loads(contract_summary))
-    print(contract_summary_json)
     contract_summary_df = pd.DataFrame([contract_summary_json])
     # Ensure json_data is a dictionary
@@ -547,4 +541,4 @@ interface = gr.Interface(
     theme=Base()
 )
-interface.launch()

     """Extracts the primary namespace from the XML root element while keeping the default."""
     global NS
+    ns = root.tag.split('}')[0].strip('{')
+    NS = {'w': ns} if ns else DEFAULT_NS
     return NS
 # --- Helper Functions for DOCX Processing ---
     """Extracts text from paragraphs outside tables in the document."""
     extracted_text = []
     for paragraph in root.findall('.//w:p', NS):
         if paragraph in table_paragraphs:
             continue  # Skip paragraphs inside tables
     # Step 3: Convert back to JSON string (if needed)
     json_output = json.dumps(filtered_contract_data, ensure_ascii=False, indent=4)
     prompt = """You are given a price list in JSON format. Extract the following information in CSV format:
 # Response Format
     price_list = re.sub(r"^```json\n|```$", "", price_list, flags=re.DOTALL)
 def json_to_excel(contract_summary, json_data, excel_path):
     """Converts extracted JSON tables to an Excel file."""
     # Correctly parse the JSON string
     contract_summary_json = json.loads(json.loads(contract_summary))
     contract_summary_df = pd.DataFrame([contract_summary_json])
     # Ensure json_data is a dictionary
     theme=Base()
 )
+interface.launch()