Spaces:

HumanEval-V
/

HumanEval-V-Benchmark-Viewer

Running

App Files Files Community

HumanEval-V-Benchmark-Viewer / app.py

anonymous-researcher912

update new data

968f8be 3 months ago

raw

history blame contribute delete

2.64 kB

	import streamlit as st
	import datasets
	import json

	def load_json(file_path):
	with open(file_path, "r") as f:
	return json.load(f)

	humaneval_v_data = datasets.load_from_disk("humaneval_v_test_hf")
	idx_mapping = load_json("idx_mapping.json")

	st.set_page_config(layout="wide", page_title="HumanEval-V Viewer")

	st.markdown("> <i>This is a viewer for the HumanEval-V benchmark, which includes 253 coding tasks. Use the navigation buttons or enter an index to browse through the tasks. Please note that image loading may take a moment after switching to the next task. If you encounter any issues or have questions, feel free to start a discussion [here](https://huggingface.co/datasets/HumanEval-V/HumanEval-V-Benchmark/discussions)</i>.", unsafe_allow_html=True)
	st.markdown("---")

	max_index = 253

	# Initialize session state for index if not present
	if 'index' not in st.session_state:
	st.session_state.index = 1

	buttons = st.columns([2, 1.1, 5.9])

	with buttons[0]:
	st.markdown("# HumanEval-V Viewer")

	with buttons[1]:
	# Number input for navigation
	index_input = st.number_input(
	f"Go to index (1-{max_index}):",
	min_value=1,
	max_value=max_index,
	value=st.session_state.index,
	key="index_input",
	help="Enter an index and jump to that index.",
	step=1
	)

	coding_task = humaneval_v_data[idx_mapping[str(index_input-1)]]
	qid = coding_task["qid"]
	diagram = coding_task["diagram"]
	diagram_description = coding_task["ground_truth_diagram_description"]
	function_signature = coding_task["function_signature"]
	ground_truth = coding_task["ground_truth_solution"]
	test_script = coding_task["test_script"]
	task_type = coding_task["task_type"]
	capability_aspects = coding_task["capability_aspects"]

	upper_columns = st.columns([2, 7])
	with upper_columns[0]:
	st.markdown(f"### Question ID: {qid}")
	st.image(diagram, use_column_width=True)
	st.markdown(f"")
	st.markdown(f"### Task Type: {task_type}")
	st.markdown(f"")
	st.markdown(f"### Capability Aspects:")
	st.markdown(f"")
	st.json(capability_aspects)
	with upper_columns[1]:
	st.markdown(f"### Function Signature:")
	st.markdown(f"")
	st.markdown(f"""```python
	{function_signature}
	```""")
	st.markdown(f"### Ground Truth Diagram Description:")
	st.markdown(f"")
	st.markdown(f"""```markdown
	{diagram_description}
	```""")
	st.markdown(f"### Ground Truth Solution:")
	st.markdown(f"")
	st.markdown(f"""```python
	{ground_truth}
	```""")
	st.markdown(f"### Test Script:")
	st.markdown(f"")
	st.markdown(f"""```python
	{test_script}
	```""")