import gradio as gr
import json
import time
import traceback
from validation import validate_json, validate_croissant, validate_records, generate_validation_report
import requests
def process_file(file):
results = []
json_data = None
# Use just the filename instead of full path
filename = file.name.split("/")[-1]
# Check 1: JSON validation
json_valid, json_message, json_data = validate_json(file.name)
# Remove empty checkmarks from messages
json_message = json_message.replace("\nā\n", "\n")
results.append(("JSON Format Validation", json_valid, json_message))
if not json_valid:
return results, None
# Check 2: Croissant validation
croissant_valid, croissant_message = validate_croissant(json_data)
# Remove empty checkmarks from messages
croissant_message = croissant_message.replace("\nā\n", "\n")
results.append(("Croissant Schema Validation", croissant_valid, croissant_message))
if not croissant_valid:
return results, None
# Check 3: Records validation
records_valid, records_message = validate_records(json_data)
# Remove empty checkmarks from messages
records_message = records_message.replace("\nā\n", "\n")
results.append(("Records Generation Test", records_valid, records_message))
# Generate detailed report with just filename
report = generate_validation_report(filename, json_data, results)
return results, report
def create_ui():
with gr.Blocks(theme=gr.themes.Soft()) as app:
gr.Markdown("# šš„ Croissant Validator for NeurIPS D&B")
gr.Markdown("""
Upload your Croissant JSON-LD file or enter a URL to validate if it meets the requirements for NeurIPS submission.
The validator will check:
1. If the file is valid JSON
2. If it passes Croissant schema validation
3. If records can be generated within a reasonable time
""")
# Track the active tab for conditional UI updates
active_tab = gr.State("upload") # Default to upload tab
# Create a container for the entire input section
with gr.Group():
# Input tabs
with gr.Tabs() as tabs:
with gr.TabItem("Upload File", id="upload_tab"):
file_input = gr.File(label="Upload Croissant JSON-LD File", file_types=[".json", ".jsonld"])
validate_btn = gr.Button("Validate Uploaded File", variant="primary")
with gr.TabItem("URL Input", id="url_tab"):
url_input = gr.Textbox(
label="Enter Croissant JSON-LD URL",
placeholder="e.g. https://huggingface.co/api/datasets/facebook/natural_reasoning/croissant"
)
fetch_btn = gr.Button("Fetch and Validate", variant="primary")
# Change initial message to match upload tab
upload_progress = gr.HTML(
"""
Ready for upload
""",
visible=True)
# Now create the validation results section in a separate group
with gr.Group():
# Validation results
validation_results = gr.HTML(visible=False)
validation_progress = gr.HTML(visible=False)
# Collapsible report section
with gr.Accordion("Download full validation report", visible=False, open=False) as report_group:
with gr.Column():
report_md = gr.File(
label="Download Report",
visible=True,
file_types=[".md"]
)
report_text = gr.Textbox(
label="Report Content",
visible=True,
show_copy_button=True,
lines=10
)
# Define CSS for the validation UI
gr.HTML("""
""")
# Update helper messages based on tab changes
def on_tab_change(evt: gr.SelectData):
tab_id = evt.value
if tab_id == "Upload File":
return [
"upload",
"""Ready for upload
""",
gr.update(visible=False),
gr.update(visible=False), # Hide report group
None, # Clear report text
None, # Clear report file
None, # Clear file input
gr.update(value="") # Clear URL input
]
else:
return [
"url",
"""Enter a URL to fetch
""",
gr.update(visible=False),
gr.update(visible=False), # Hide report group
None, # Clear report text
None, # Clear report file
None, # Clear file input
gr.update(value="") # Clear URL input
]
def on_copy_click(report):
return report
def on_download_click(report, file_name):
report_file = f"report_{file_name}.md"
with open(report_file, "w") as f:
f.write(report)
return report_file
def on_file_upload(file):
if file is None:
return [
"""Ready for upload
""",
gr.update(visible=False),
gr.update(visible=False), # Hide report group
None, # Clear report text
None # Clear report file
]
return [
"""ā
File uploaded successfully
""",
gr.update(visible=False),
gr.update(visible=False), # Hide report group
None, # Clear report text
None # Clear report file
]
def fetch_from_url(url):
if not url:
return [
"""Please enter a URL
""",
gr.update(visible=False),
gr.update(visible=False),
None,
None
]
try:
# Fetch JSON from URL
response = requests.get(url, timeout=10)
response.raise_for_status()
json_data = response.json()
# Process validation
results = []
results.append(("JSON Format Validation", True, "The URL returned valid JSON."))
croissant_valid, croissant_message = validate_croissant(json_data)
results.append(("Croissant Schema Validation", croissant_valid, croissant_message))
if not croissant_valid:
return [
"""ā
JSON fetched successfully from URL
""",
build_results_html(results),
gr.update(visible=False),
None,
None
]
records_valid, records_message = validate_records(json_data)
results.append(("Records Generation Test", records_valid, records_message))
# Generate report
report = generate_validation_report(url.split("/")[-1], json_data, results)
report_filename = f"report_croissant-validation_{json_data.get('name', 'unnamed')}.md"
if report:
with open(report_filename, "w") as f:
f.write(report)
return [
"""ā
JSON fetched successfully from URL
""",
build_results_html(results),
gr.update(visible=True),
report,
report_filename
]
except requests.exceptions.RequestException as e:
error_message = f"Error fetching URL: {str(e)}"
return [
f"""{error_message}
""",
gr.update(visible=False),
gr.update(visible=False),
None,
None
]
except json.JSONDecodeError as e:
error_message = f"URL did not return valid JSON: {str(e)}"
return [
f"""{error_message}
""",
gr.update(visible=False),
gr.update(visible=False),
None,
None
]
except Exception as e:
error_message = f"Unexpected error: {str(e)}"
return [
f"""{error_message}
""",
gr.update(visible=False),
gr.update(visible=False),
None,
None
]
def build_results_html(results):
# Build validation results HTML
html = ''
for i, (test_name, passed, message) in enumerate(results):
status_class = "status-success" if passed else "status-error"
status_icon = "ā" if passed else "ā"
# Add emoji to message
message_with_emoji = ("ā
" if passed else "ā ") + message
html += f'''
'''
html += '
'
return gr.update(value=html, visible=True)
def on_validate(file):
if file is None:
return [
gr.update(visible=False), # validation_results
gr.update(visible=False), # validation_progress
gr.update(visible=False), # report_group
None, # report_text
None # report_md
]
# Process the file and get results
results, report = process_file(file)
# Extract dataset name from the JSON for the report filename
try:
with open(file.name, 'r') as f:
json_data = json.load(f)
dataset_name = json_data.get('name', 'unnamed')
except:
dataset_name = 'unnamed'
# Save report to file with new naming convention
report_filename = f"report_croissant-validation_{dataset_name}.md"
if report:
with open(report_filename, "w") as f:
f.write(report)
# Return final state
return [
build_results_html(results), # validation_results
gr.update(visible=False), # validation_progress
gr.update(visible=True) if report else gr.update(visible=False), # report_group
report if report else None, # report_text
report_filename if report else None # report_md
]
# Connect UI events to functions with updated outputs
tabs.select(
on_tab_change,
None,
[active_tab, upload_progress, validation_results, report_group, report_text, report_md, file_input, url_input]
)
file_input.change(
on_file_upload,
inputs=file_input,
outputs=[upload_progress, validation_results, report_group, report_text, report_md]
)
# Add progress state handling
def show_progress():
progress_html = """
"""
return [
gr.update(visible=False), # validation_results
gr.update(visible=True, value=progress_html), # validation_progress
gr.update(visible=False), # report_group
None, # report_text
None # report_md
]
validate_btn.click(
fn=show_progress,
inputs=None,
outputs=[validation_results, validation_progress, report_group, report_text, report_md],
queue=False
).then(
fn=on_validate,
inputs=file_input,
outputs=[validation_results, validation_progress, report_group, report_text, report_md]
)
fetch_btn.click(
fetch_from_url,
inputs=url_input,
outputs=[upload_progress, validation_results, report_group, report_text, report_md]
)
# Footer
gr.HTML("""
""")
return app
if __name__ == "__main__":
app = create_ui()
app.launch()