Spaces:

milwright
/

historical-ocr

Running

App Files Files Community

milwright commited on 28 days ago

Commit

3030658

1 Parent(s): af6265b

Fix metadata field ordering and tag classification issues

Browse files

Files changed (1) hide show

utils/ui_utils.py +13 -7

utils/ui_utils.py CHANGED Viewed

@@ -21,17 +21,13 @@ def display_results(result, container, custom_prompt=""):
         meta = {k: v for k, v in result.items()
                 if k not in ['pages_data', 'illustrations', 'ocr_contents', 'raw_response_data']}
-        # Create a compact metadata section
         meta_html = '<div style="display: flex; flex-wrap: wrap; gap: 0.3rem; margin-bottom: 0.3rem;">'
         # Document type
         if 'detected_document_type' in meta:
             meta_html += f'<div><strong>Type:</strong> {meta["detected_document_type"]}</div>'
-        # Processing time
-        if 'processing_time' in meta:
-            meta_html += f'<div><strong>Time:</strong> {meta["processing_time"]:.1f}s</div>'
         # Page information
         if 'limited_pages' in meta:
             meta_html += f'<div><strong>Pages:</strong> {meta["limited_pages"]["processed"]}/{meta["limited_pages"]["total"]}</div>'
@@ -39,6 +35,14 @@ def display_results(result, container, custom_prompt=""):
         meta_html += '</div>'
         st.markdown(meta_html, unsafe_allow_html=True)
         # Language metadata on a separate line, Subject Tags below
         # First show languages if available
@@ -100,7 +104,7 @@ def display_results(result, container, custom_prompt=""):
         # Create a separate line for Time if we have time-related tags
         if 'topics' in result and result['topics']:
             time_tags = [topic for topic in result['topics']
-                       if any(term in topic.lower() for term in ["century", "pre-", "era", "historical"])]
             if time_tags:
                 time_html = '<div style="display: flex; align-items: center; margin: 0.2rem 0; flex-wrap: wrap;">'
                 time_html += '<div style="margin-right: 0.3rem; font-weight: bold;">Time:</div>'
@@ -113,7 +117,7 @@ def display_results(result, container, custom_prompt=""):
         if 'topics' in result and result['topics']:
             # Filter out time-related tags which are already displayed
             subject_tags = [topic for topic in result['topics']
-                         if not any(term in topic.lower() for term in ["century", "pre-", "era", "historical"])]
             if subject_tags:
                 # Create a separate line for Subject Tags
@@ -133,6 +137,8 @@ def display_results(result, container, custom_prompt=""):
                         tag_class += " tag-document-type"  # Document types
                     elif any(term in topic.lower() for term in ["travel", "military", "science", "medicine", "education", "art", "literature"]):
                         tag_class += " tag-subject"  # Subject domains
                     # Add each tag as an inline span
                     tags_html += f'<span class="{tag_class}">{topic}</span>'

         meta = {k: v for k, v in result.items()
                 if k not in ['pages_data', 'illustrations', 'ocr_contents', 'raw_response_data']}
+        # Create a compact metadata section for primary metadata
         meta_html = '<div style="display: flex; flex-wrap: wrap; gap: 0.3rem; margin-bottom: 0.3rem;">'
         # Document type
         if 'detected_document_type' in meta:
             meta_html += f'<div><strong>Type:</strong> {meta["detected_document_type"]}</div>'
         # Page information
         if 'limited_pages' in meta:
             meta_html += f'<div><strong>Pages:</strong> {meta["limited_pages"]["processed"]}/{meta["limited_pages"]["total"]}</div>'
         meta_html += '</div>'
         st.markdown(meta_html, unsafe_allow_html=True)
+        # Processing time - separate section for proper ordering of all metadata fields
+        if 'processing_time' in meta:
+            time_html = '<div style="display: flex; align-items: center; margin: 0.2rem 0; flex-wrap: wrap;">'
+            time_html += '<div style="margin-right: 0.3rem; font-weight: bold;">Time:</div>'
+            time_html += f'<div>{meta["processing_time"]:.1f}s</div>'
+            time_html += '</div>'
+            st.markdown(time_html, unsafe_allow_html=True)
         # Language metadata on a separate line, Subject Tags below
         # First show languages if available
         # Create a separate line for Time if we have time-related tags
         if 'topics' in result and result['topics']:
             time_tags = [topic for topic in result['topics']
+                       if any(term in topic.lower() for term in ["century", "pre-", "era"])]
             if time_tags:
                 time_html = '<div style="display: flex; align-items: center; margin: 0.2rem 0; flex-wrap: wrap;">'
                 time_html += '<div style="margin-right: 0.3rem; font-weight: bold;">Time:</div>'
         if 'topics' in result and result['topics']:
             # Filter out time-related tags which are already displayed
             subject_tags = [topic for topic in result['topics']
+                         if not any(term in topic.lower() for term in ["century", "pre-", "era"])]
             if subject_tags:
                 # Create a separate line for Subject Tags
                         tag_class += " tag-document-type"  # Document types
                     elif any(term in topic.lower() for term in ["travel", "military", "science", "medicine", "education", "art", "literature"]):
                         tag_class += " tag-subject"  # Subject domains
+                    elif "historical" in topic.lower() and "document" in topic.lower():
+                        tag_class += " tag-document-type"  # "Historical Document Analysis" should be a document type
                     # Add each tag as an inline span
                     tags_html += f'<span class="{tag_class}">{topic}</span>'