luisoala commited on
Commit
5368667
Β·
1 Parent(s): c0d4205

cosmetic fixes

Browse files
Files changed (4) hide show
  1. README.md +4 -4
  2. app.py +270 -0
  3. requirements.txt +4 -0
  4. validation.py +63 -0
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Cc V2
3
- emoji: πŸ”₯
4
- colorFrom: purple
5
- colorTo: blue
6
  sdk: gradio
7
  sdk_version: 5.20.0
8
  app_file: app.py
 
1
  ---
2
+ title: Croissant Checker
3
+ emoji: 😻
4
+ colorFrom: yellow
5
+ colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 5.20.0
8
  app_file: app.py
app.py ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import time
4
+ import traceback
5
+ from validation import validate_json, validate_croissant, validate_records
6
+ import requests
7
+
8
+ def process_file(file):
9
+ results = []
10
+
11
+ # Check 1: JSON validation
12
+ json_valid, json_message, json_data = validate_json(file.name)
13
+ results.append(("JSON Format Validation", json_valid, json_message))
14
+
15
+ if not json_valid:
16
+ return results
17
+
18
+ # Check 2: Croissant validation
19
+ croissant_valid, croissant_message = validate_croissant(json_data)
20
+ results.append(("Croissant Schema Validation", croissant_valid, croissant_message))
21
+
22
+ if not croissant_valid:
23
+ return results
24
+
25
+ # Check 3: Records validation
26
+ records_valid, records_message = validate_records(json_data)
27
+ results.append(("Records Generation Test", records_valid, records_message))
28
+
29
+ return results
30
+
31
+ def create_ui():
32
+ with gr.Blocks(theme=gr.themes.Soft()) as app:
33
+ gr.Markdown("# Croissant JSON-LD Validator for NeurIPS")
34
+ gr.Markdown("""
35
+ Upload your Croissant JSON-LD file or enter a URL to validate if it meets the requirements for NeurIPS submission.
36
+ The validator will check:
37
+ 1. If the file is valid JSON
38
+ 2. If it passes Croissant schema validation
39
+ 3. If records can be generated within a reasonable time
40
+ """)
41
+
42
+ # Create containers for validation results - moved up for cleaner flow
43
+ validation_results = gr.HTML(visible=False)
44
+ upload_progress = gr.HTML(
45
+ """<div class="progress-container">
46
+ <div class="progress-status">Ready for validation</div>
47
+ </div>""", visible=True)
48
+
49
+ # Track the active tab for conditional UI updates
50
+ active_tab = gr.State("upload") # Default to upload tab
51
+
52
+ with gr.Tabs() as tabs:
53
+ with gr.TabItem("Upload File", id="upload_tab"):
54
+ file_input = gr.File(label="Upload Croissant JSON-LD File", file_types=[".json", ".jsonld"])
55
+ validate_btn = gr.Button("Validate Uploaded File", variant="primary")
56
+
57
+ with gr.TabItem("URL Input", id="url_tab"):
58
+ url_input = gr.Textbox(
59
+ label="Enter Croissant JSON-LD URL",
60
+ placeholder="e.g. https://huggingface.co/api/datasets/facebook/natural_reasoning/croissant"
61
+ )
62
+ fetch_btn = gr.Button("Fetch and Validate", variant="primary")
63
+
64
+ # Define CSS for the validation UI - improved for dark mode compatibility
65
+ gr.HTML("""
66
+ <style>
67
+ .gradio-container {
68
+ max-width: 800px;
69
+ margin: 0 auto;
70
+ }
71
+ .validation-step {
72
+ margin-bottom: 15px;
73
+ border: 1px solid var(--border-color-primary, #e0e0e0);
74
+ border-radius: 8px;
75
+ overflow: hidden;
76
+ }
77
+ .step-header {
78
+ padding: 10px 15px;
79
+ background-color: var(--background-fill-secondary, #f5f5f5);
80
+ display: flex;
81
+ align-items: center;
82
+ cursor: pointer;
83
+ }
84
+ .step-left {
85
+ display: flex;
86
+ align-items: center;
87
+ flex-grow: 1;
88
+ }
89
+ .step-status {
90
+ margin-right: 10px;
91
+ width: 24px;
92
+ height: 24px;
93
+ border-radius: 50%;
94
+ display: flex;
95
+ align-items: center;
96
+ justify-content: center;
97
+ font-weight: bold;
98
+ color: white !important;
99
+ font-size: 16px;
100
+ text-shadow: 0px 0px 1px rgba(0,0,0,0.5);
101
+ }
102
+ .arrow-indicator {
103
+ margin-left: 10px;
104
+ font-size: 16px;
105
+ transition: transform 0.3s ease;
106
+ }
107
+ .arrow-down {
108
+ transform: rotate(90deg);
109
+ }
110
+ .status-success {
111
+ background-color: #4caf50;
112
+ }
113
+ .status-error {
114
+ background-color: #f44336;
115
+ }
116
+ .status-waiting {
117
+ background-color: #9e9e9e;
118
+ }
119
+ /* Dark mode specific styles */
120
+ .dark .step-header {
121
+ background-color: var(--background-fill-secondary, #2e2e2e);
122
+ color: var(--body-text-color, #ffffff);
123
+ }
124
+ .dark .step-title {
125
+ color: var(--body-text-color, #ffffff);
126
+ }
127
+ .dark .step-details {
128
+ color: var(--body-text-color, #ffffff);
129
+ background-color: var(--background-fill-primary, #1f1f1f);
130
+ padding: 10px 15px;
131
+ }
132
+ /* Add this to ensure details are also styled for light mode */
133
+ .step-details {
134
+ padding: 10px 15px;
135
+ background-color: var(--background-fill-primary, #ffffff);
136
+ }
137
+ </style>
138
+ """)
139
+
140
+ # Update helper messages based on tab changes
141
+ def on_tab_change(evt: gr.SelectData):
142
+ tab_id = evt.value
143
+ if tab_id == "Upload File":
144
+ return "upload", """<div class="progress-container">
145
+ <div class="progress-status">Ready for upload</div>
146
+ </div>""", gr.update(visible=False)
147
+ else:
148
+ return "url", """<div class="progress-container">
149
+ <div class="progress-status">Enter a URL to fetch</div>
150
+ </div>""", gr.update(visible=False)
151
+
152
+ def on_file_upload(file):
153
+ if file is None:
154
+ return """<div class="progress-container">
155
+ <div class="progress-status">Ready for upload</div>
156
+ </div>""", gr.update(visible=False)
157
+
158
+ return """<div class="progress-container">
159
+ <div class="progress-status">βœ… File uploaded successfully</div>
160
+ </div>""", gr.update(visible=False)
161
+
162
+ def fetch_from_url(url):
163
+ if not url:
164
+ return """<div class="progress-container">
165
+ <div class="progress-status">Please enter a URL</div>
166
+ </div>""", gr.update(visible=False)
167
+
168
+ try:
169
+ # Fetch JSON from URL
170
+ response = requests.get(url, timeout=10)
171
+ response.raise_for_status()
172
+ json_data = response.json()
173
+
174
+ # Show success message
175
+ progress_html = """<div class="progress-container">
176
+ <div class="progress-status">βœ… JSON fetched successfully from URL</div>
177
+ </div>"""
178
+
179
+ # Validate the fetched JSON
180
+ results = []
181
+ results.append(("JSON Format Validation", True, "βœ… The URL returned valid JSON."))
182
+
183
+ croissant_valid, croissant_message = validate_croissant(json_data)
184
+ results.append(("Croissant Schema Validation", croissant_valid, croissant_message))
185
+
186
+ if not croissant_valid:
187
+ return progress_html, build_results_html(results)
188
+
189
+ records_valid, records_message = validate_records(json_data)
190
+ results.append(("Records Generation Test", records_valid, records_message))
191
+
192
+ return progress_html, build_results_html(results)
193
+
194
+ except requests.exceptions.RequestException as e:
195
+ error_message = f"❌ Error fetching URL: {str(e)}"
196
+ return f"""<div class="progress-container">
197
+ <div class="progress-status">{error_message}</div>
198
+ </div>""", gr.update(visible=False)
199
+ except json.JSONDecodeError as e:
200
+ error_message = f"❌ URL did not return valid JSON: {str(e)}"
201
+ return f"""<div class="progress-container">
202
+ <div class="progress-status">{error_message}</div>
203
+ </div>""", gr.update(visible=False)
204
+ except Exception as e:
205
+ error_message = f"❌ Unexpected error: {str(e)}"
206
+ return f"""<div class="progress-container">
207
+ <div class="progress-status">{error_message}</div>
208
+ </div>""", gr.update(visible=False)
209
+
210
+ def build_results_html(results):
211
+ # Build validation results HTML
212
+ html = '<div class="validation-results">'
213
+
214
+ for i, (test_name, passed, message) in enumerate(results):
215
+ status_class = "status-success" if passed else "status-error"
216
+ status_icon = "βœ“" if passed else "βœ—"
217
+
218
+ html += f'''
219
+ <div class="validation-step" id="step-{i}">
220
+ <div class="step-header" onclick="
221
+ var details = document.getElementById('details-{i}');
222
+ var arrow = document.getElementById('arrow-{i}');
223
+ if(details.style.display === 'none') {{
224
+ details.style.display = 'block';
225
+ arrow.classList.add('arrow-down');
226
+ }} else {{
227
+ details.style.display = 'none';
228
+ arrow.classList.remove('arrow-down');
229
+ }}">
230
+ <div class="step-left">
231
+ <div class="step-status {status_class}">{status_icon}</div>
232
+ <div class="step-title">{test_name}</div>
233
+ <div class="arrow-indicator" id="arrow-{i}">β–Ά</div>
234
+ </div>
235
+ </div>
236
+ <div class="step-details" id="details-{i}" style="display: none;">
237
+ {message}
238
+ </div>
239
+ </div>
240
+ '''
241
+
242
+ html += '</div>'
243
+ return gr.update(value=html, visible=True)
244
+
245
+ def on_validate(file):
246
+ if file is None:
247
+ return gr.update(visible=False)
248
+
249
+ # Process the file and get results
250
+ results = process_file(file)
251
+ return build_results_html(results)
252
+
253
+ # Connect UI events to functions
254
+ tabs.select(on_tab_change, None, [active_tab, upload_progress, validation_results])
255
+ file_input.change(on_file_upload, inputs=file_input, outputs=[upload_progress, validation_results])
256
+ validate_btn.click(on_validate, inputs=file_input, outputs=validation_results)
257
+ fetch_btn.click(fetch_from_url, inputs=url_input, outputs=[upload_progress, validation_results])
258
+
259
+ # Footer
260
+ gr.HTML("""
261
+ <div style="text-align: center; margin-top: 20px;">
262
+ <p>Learn more about <a href="https://github.com/mlcommons/croissant" target="_blank">Croissant format</a>.</p>
263
+ </div>
264
+ """)
265
+
266
+ return app
267
+
268
+ if __name__ == "__main__":
269
+ app = create_ui()
270
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio>=3.50.2
2
+ mlcroissant
3
+ func_timeout
4
+ requests
validation.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import traceback
3
+ import mlcroissant as mlc
4
+ import func_timeout
5
+
6
+ ONE_MINUTE = 60 # seconds
7
+
8
+ def validate_json(file_path):
9
+ """Validate that the file is proper JSON."""
10
+ try:
11
+ with open(file_path, 'r') as f:
12
+ json_data = json.load(f)
13
+ return True, "βœ… The file is valid JSON.", json_data
14
+ except json.JSONDecodeError as e:
15
+ error_message = f"❌ Invalid JSON format: {str(e)}"
16
+ return False, error_message, None
17
+ except Exception as e:
18
+ error_message = f"❌ Error reading file: {str(e)}"
19
+ return False, error_message, None
20
+
21
+ def validate_croissant(json_data):
22
+ """Validate that the JSON follows Croissant schema."""
23
+ try:
24
+ dataset = mlc.Dataset(jsonld=json_data)
25
+ return True, "βœ… The dataset passes Croissant validation."
26
+ except mlc.ValidationError as e:
27
+ error_details = traceback.format_exc()
28
+ error_message = f"❌ Validation failed: {str(e)}\n\n{error_details}"
29
+ return False, error_message
30
+ except Exception as e:
31
+ error_details = traceback.format_exc()
32
+ error_message = f"❌ Unexpected error during validation: {str(e)}\n\n{error_details}"
33
+ return False, error_message
34
+
35
+ def validate_records(json_data):
36
+ """Validate that records can be generated within the time limit."""
37
+ try:
38
+ dataset = mlc.Dataset(jsonld=json_data)
39
+ record_sets = dataset.metadata.record_sets
40
+
41
+ if not record_sets:
42
+ return True, "βœ… No record sets found to validate."
43
+
44
+ results = []
45
+
46
+ for record_set in record_sets:
47
+ try:
48
+ records = dataset.records(record_set=record_set.name)
49
+ _ = func_timeout.func_timeout(ONE_MINUTE, lambda: next(iter(records)))
50
+ results.append(f"βœ… Record set '{record_set.name}' passed validation.")
51
+ except func_timeout.exceptions.FunctionTimedOut:
52
+ error_message = f"❌ Record set '{record_set.name}' generation took too long (>60s)"
53
+ return False, error_message
54
+ except Exception as e:
55
+ error_details = traceback.format_exc()
56
+ error_message = f"❌ Record set '{record_set.name}' failed: {str(e)}\n\n{error_details}"
57
+ return False, error_message
58
+
59
+ return True, "\n".join(results)
60
+ except Exception as e:
61
+ error_details = traceback.format_exc()
62
+ error_message = f"❌ Unexpected error during records validation: {str(e)}\n\n{error_details}"
63
+ return False, error_message