import gradio as gr import json import time import traceback from validation import validate_json, validate_croissant, validate_records, generate_validation_report import requests def process_file(file): results = [] json_data = None # Use just the filename instead of full path filename = file.name.split("/")[-1] # Check 1: JSON validation json_valid, json_message, json_data = validate_json(file.name) # Remove empty checkmarks from messages json_message = json_message.replace("\nāœ“\n", "\n") results.append(("JSON Format Validation", json_valid, json_message)) if not json_valid: return results, None # Check 2: Croissant validation croissant_valid, croissant_message = validate_croissant(json_data) # Remove empty checkmarks from messages croissant_message = croissant_message.replace("\nāœ“\n", "\n") results.append(("Croissant Schema Validation", croissant_valid, croissant_message)) if not croissant_valid: return results, None # Check 3: Records validation records_valid, records_message = validate_records(json_data) # Remove empty checkmarks from messages records_message = records_message.replace("\nāœ“\n", "\n") results.append(("Records Generation Test", records_valid, records_message)) # Generate detailed report with just filename report = generate_validation_report(filename, json_data, results) return results, report def create_ui(): with gr.Blocks(theme=gr.themes.Soft()) as app: gr.Markdown("# šŸ”ŽšŸ„ Croissant Validator for NeurIPS D&B") gr.Markdown(""" Upload your Croissant JSON-LD file or enter a URL to validate if it meets the requirements for NeurIPS submission. The validator will check: 1. If the file is valid JSON 2. If it passes Croissant schema validation 3. If records can be generated within a reasonable time """) # Track the active tab for conditional UI updates active_tab = gr.State("upload") # Default to upload tab # Create a container for the entire input section with gr.Group(): # Input tabs with gr.Tabs() as tabs: with gr.TabItem("Upload File", id="upload_tab"): file_input = gr.File(label="Upload Croissant JSON-LD File", file_types=[".json", ".jsonld"]) validate_btn = gr.Button("Validate Uploaded File", variant="primary") with gr.TabItem("URL Input", id="url_tab"): url_input = gr.Textbox( label="Enter Croissant JSON-LD URL", placeholder="e.g. https://huggingface.co/api/datasets/facebook/natural_reasoning/croissant" ) fetch_btn = gr.Button("Fetch and Validate", variant="primary") # Change initial message to match upload tab upload_progress = gr.HTML( """
Ready for upload
""", visible=True) # Now create the validation results section in a separate group with gr.Group(): # Validation results validation_results = gr.HTML(visible=False) validation_progress = gr.HTML(visible=False) # Collapsible report section with gr.Accordion("Download full validation report", visible=False, open=False) as report_group: with gr.Column(): report_md = gr.File( label="Download Report", visible=True, file_types=[".md"] ) report_text = gr.Textbox( label="Report Content", visible=True, show_copy_button=True, lines=10 ) # Define CSS for the validation UI gr.HTML(""" """) # Update helper messages based on tab changes def on_tab_change(evt: gr.SelectData): tab_id = evt.value if tab_id == "Upload File": return [ "upload", """
Ready for upload
""", gr.update(visible=False), gr.update(visible=False), # Hide report group None, # Clear report text None, # Clear report file None, # Clear file input gr.update(value="") # Clear URL input ] else: return [ "url", """
Enter a URL to fetch
""", gr.update(visible=False), gr.update(visible=False), # Hide report group None, # Clear report text None, # Clear report file None, # Clear file input gr.update(value="") # Clear URL input ] def on_copy_click(report): return report def on_download_click(report, file_name): report_file = f"report_{file_name}.md" with open(report_file, "w") as f: f.write(report) return report_file def on_file_upload(file): if file is None: return [ """
Ready for upload
""", gr.update(visible=False), gr.update(visible=False), # Hide report group None, # Clear report text None # Clear report file ] return [ """
āœ… File uploaded successfully
""", gr.update(visible=False), gr.update(visible=False), # Hide report group None, # Clear report text None # Clear report file ] def fetch_from_url(url): if not url: return [ """
Please enter a URL
""", gr.update(visible=False), gr.update(visible=False), None, None ] try: # Fetch JSON from URL response = requests.get(url, timeout=10) response.raise_for_status() json_data = response.json() # Process validation results = [] results.append(("JSON Format Validation", True, "The URL returned valid JSON.")) croissant_valid, croissant_message = validate_croissant(json_data) results.append(("Croissant Schema Validation", croissant_valid, croissant_message)) if not croissant_valid: return [ """
āœ… JSON fetched successfully from URL
""", build_results_html(results), gr.update(visible=False), None, None ] records_valid, records_message = validate_records(json_data) results.append(("Records Generation Test", records_valid, records_message)) # Generate report report = generate_validation_report(url.split("/")[-1], json_data, results) report_filename = f"report_croissant-validation_{json_data.get('name', 'unnamed')}.md" if report: with open(report_filename, "w") as f: f.write(report) return [ """
āœ… JSON fetched successfully from URL
""", build_results_html(results), gr.update(visible=True), report, report_filename ] except requests.exceptions.RequestException as e: error_message = f"Error fetching URL: {str(e)}" return [ f"""
{error_message}
""", gr.update(visible=False), gr.update(visible=False), None, None ] except json.JSONDecodeError as e: error_message = f"URL did not return valid JSON: {str(e)}" return [ f"""
{error_message}
""", gr.update(visible=False), gr.update(visible=False), None, None ] except Exception as e: error_message = f"Unexpected error: {str(e)}" return [ f"""
{error_message}
""", gr.update(visible=False), gr.update(visible=False), None, None ] def build_results_html(results): # Build validation results HTML html = '
' for i, (test_name, passed, message) in enumerate(results): status_class = "status-success" if passed else "status-error" status_icon = "āœ“" if passed else "āœ—" # Add emoji to message message_with_emoji = ("āœ… " if passed else "āŒ ") + message html += f'''
{status_icon}
{test_name} ā–¶
''' html += '
' return gr.update(value=html, visible=True) def on_validate(file): if file is None: return [ gr.update(visible=False), # validation_results gr.update(visible=False), # validation_progress gr.update(visible=False), # report_group None, # report_text None # report_md ] # Process the file and get results results, report = process_file(file) # Extract dataset name from the JSON for the report filename try: with open(file.name, 'r') as f: json_data = json.load(f) dataset_name = json_data.get('name', 'unnamed') except: dataset_name = 'unnamed' # Save report to file with new naming convention report_filename = f"report_croissant-validation_{dataset_name}.md" if report: with open(report_filename, "w") as f: f.write(report) # Return final state return [ build_results_html(results), # validation_results gr.update(visible=False), # validation_progress gr.update(visible=True) if report else gr.update(visible=False), # report_group report if report else None, # report_text report_filename if report else None # report_md ] # Connect UI events to functions with updated outputs tabs.select( on_tab_change, None, [active_tab, upload_progress, validation_results, report_group, report_text, report_md, file_input, url_input] ) file_input.change( on_file_upload, inputs=file_input, outputs=[upload_progress, validation_results, report_group, report_text, report_md] ) # Add progress state handling def show_progress(): progress_html = """
Validating file...
""" return [ gr.update(visible=False), # validation_results gr.update(visible=True, value=progress_html), # validation_progress gr.update(visible=False), # report_group None, # report_text None # report_md ] validate_btn.click( fn=show_progress, inputs=None, outputs=[validation_results, validation_progress, report_group, report_text, report_md], queue=False ).then( fn=on_validate, inputs=file_input, outputs=[validation_results, validation_progress, report_group, report_text, report_md] ) fetch_btn.click( fetch_from_url, inputs=url_input, outputs=[upload_progress, validation_results, report_group, report_text, report_md] ) # Footer gr.HTML("""

Learn more about Croissant format.

""") return app if __name__ == "__main__": app = create_ui() app.launch()