milwright commited on
Commit
3030658
·
1 Parent(s): af6265b

Fix metadata field ordering and tag classification issues

Browse files
Files changed (1) hide show
  1. utils/ui_utils.py +13 -7
utils/ui_utils.py CHANGED
@@ -21,17 +21,13 @@ def display_results(result, container, custom_prompt=""):
21
  meta = {k: v for k, v in result.items()
22
  if k not in ['pages_data', 'illustrations', 'ocr_contents', 'raw_response_data']}
23
 
24
- # Create a compact metadata section
25
  meta_html = '<div style="display: flex; flex-wrap: wrap; gap: 0.3rem; margin-bottom: 0.3rem;">'
26
 
27
  # Document type
28
  if 'detected_document_type' in meta:
29
  meta_html += f'<div><strong>Type:</strong> {meta["detected_document_type"]}</div>'
30
 
31
- # Processing time
32
- if 'processing_time' in meta:
33
- meta_html += f'<div><strong>Time:</strong> {meta["processing_time"]:.1f}s</div>'
34
-
35
  # Page information
36
  if 'limited_pages' in meta:
37
  meta_html += f'<div><strong>Pages:</strong> {meta["limited_pages"]["processed"]}/{meta["limited_pages"]["total"]}</div>'
@@ -39,6 +35,14 @@ def display_results(result, container, custom_prompt=""):
39
  meta_html += '</div>'
40
  st.markdown(meta_html, unsafe_allow_html=True)
41
 
 
 
 
 
 
 
 
 
42
  # Language metadata on a separate line, Subject Tags below
43
 
44
  # First show languages if available
@@ -100,7 +104,7 @@ def display_results(result, container, custom_prompt=""):
100
  # Create a separate line for Time if we have time-related tags
101
  if 'topics' in result and result['topics']:
102
  time_tags = [topic for topic in result['topics']
103
- if any(term in topic.lower() for term in ["century", "pre-", "era", "historical"])]
104
  if time_tags:
105
  time_html = '<div style="display: flex; align-items: center; margin: 0.2rem 0; flex-wrap: wrap;">'
106
  time_html += '<div style="margin-right: 0.3rem; font-weight: bold;">Time:</div>'
@@ -113,7 +117,7 @@ def display_results(result, container, custom_prompt=""):
113
  if 'topics' in result and result['topics']:
114
  # Filter out time-related tags which are already displayed
115
  subject_tags = [topic for topic in result['topics']
116
- if not any(term in topic.lower() for term in ["century", "pre-", "era", "historical"])]
117
 
118
  if subject_tags:
119
  # Create a separate line for Subject Tags
@@ -133,6 +137,8 @@ def display_results(result, container, custom_prompt=""):
133
  tag_class += " tag-document-type" # Document types
134
  elif any(term in topic.lower() for term in ["travel", "military", "science", "medicine", "education", "art", "literature"]):
135
  tag_class += " tag-subject" # Subject domains
 
 
136
 
137
  # Add each tag as an inline span
138
  tags_html += f'<span class="{tag_class}">{topic}</span>'
 
21
  meta = {k: v for k, v in result.items()
22
  if k not in ['pages_data', 'illustrations', 'ocr_contents', 'raw_response_data']}
23
 
24
+ # Create a compact metadata section for primary metadata
25
  meta_html = '<div style="display: flex; flex-wrap: wrap; gap: 0.3rem; margin-bottom: 0.3rem;">'
26
 
27
  # Document type
28
  if 'detected_document_type' in meta:
29
  meta_html += f'<div><strong>Type:</strong> {meta["detected_document_type"]}</div>'
30
 
 
 
 
 
31
  # Page information
32
  if 'limited_pages' in meta:
33
  meta_html += f'<div><strong>Pages:</strong> {meta["limited_pages"]["processed"]}/{meta["limited_pages"]["total"]}</div>'
 
35
  meta_html += '</div>'
36
  st.markdown(meta_html, unsafe_allow_html=True)
37
 
38
+ # Processing time - separate section for proper ordering of all metadata fields
39
+ if 'processing_time' in meta:
40
+ time_html = '<div style="display: flex; align-items: center; margin: 0.2rem 0; flex-wrap: wrap;">'
41
+ time_html += '<div style="margin-right: 0.3rem; font-weight: bold;">Time:</div>'
42
+ time_html += f'<div>{meta["processing_time"]:.1f}s</div>'
43
+ time_html += '</div>'
44
+ st.markdown(time_html, unsafe_allow_html=True)
45
+
46
  # Language metadata on a separate line, Subject Tags below
47
 
48
  # First show languages if available
 
104
  # Create a separate line for Time if we have time-related tags
105
  if 'topics' in result and result['topics']:
106
  time_tags = [topic for topic in result['topics']
107
+ if any(term in topic.lower() for term in ["century", "pre-", "era"])]
108
  if time_tags:
109
  time_html = '<div style="display: flex; align-items: center; margin: 0.2rem 0; flex-wrap: wrap;">'
110
  time_html += '<div style="margin-right: 0.3rem; font-weight: bold;">Time:</div>'
 
117
  if 'topics' in result and result['topics']:
118
  # Filter out time-related tags which are already displayed
119
  subject_tags = [topic for topic in result['topics']
120
+ if not any(term in topic.lower() for term in ["century", "pre-", "era"])]
121
 
122
  if subject_tags:
123
  # Create a separate line for Subject Tags
 
137
  tag_class += " tag-document-type" # Document types
138
  elif any(term in topic.lower() for term in ["travel", "military", "science", "medicine", "education", "art", "literature"]):
139
  tag_class += " tag-subject" # Subject domains
140
+ elif "historical" in topic.lower() and "document" in topic.lower():
141
+ tag_class += " tag-document-type" # "Historical Document Analysis" should be a document type
142
 
143
  # Add each tag as an inline span
144
  tags_html += f'<span class="{tag_class}">{topic}</span>'