Spaces:

HumanEval-V
/

HumanEval-V-Benchmark-Viewer

Running

File size: 2,637 Bytes

cf3b6c5
 
968f8be
 
 
 
 
cf3b6c5
 
968f8be
 
cf3b6c5
3dc6935
968f8be
cf3b6c5
 
968f8be
cf3b6c5
 
 
 
 
32e9007
 
 
 
cf3b6c5
 
 
 
 
 
968f8be
cf3b6c5
 
 
32e9007
cf3b6c5
 
968f8be
cf3b6c5
968f8be
 
cf3b6c5
 
 
968f8be
 
cf3b6c5
 
 
 
968f8be
 
 
 
 
 
 
cf3b6c5
 
 
 
 
 
968f8be
cf3b6c5
968f8be
 
cf3b6c5
 
 
 
 
968f8be
 
 
 
 
cf3b6c5

import streamlit as st
import datasets
import json

def load_json(file_path):
    with open(file_path, "r") as f:
        return json.load(f)

humaneval_v_data = datasets.load_from_disk("humaneval_v_test_hf")
idx_mapping = load_json("idx_mapping.json")

st.set_page_config(layout="wide", page_title="HumanEval-V Viewer")

st.markdown("> <i>This is a viewer for the **HumanEval-V** benchmark, which includes 253 coding tasks. Use the navigation buttons or enter an index to browse through the tasks. Please note that image loading may take a moment after switching to the next task. If you encounter any issues or have questions, feel free to start a discussion [here](https://huggingface.co/datasets/HumanEval-V/HumanEval-V-Benchmark/discussions)</i>.", unsafe_allow_html=True)
st.markdown("---")

max_index = 253

# Initialize session state for index if not present
if 'index' not in st.session_state:
    st.session_state.index = 1

buttons = st.columns([2, 1.1, 5.9])

with buttons[0]:
    st.markdown("# HumanEval-V Viewer")

with buttons[1]:
    # Number input for navigation
    index_input = st.number_input(
        f"Go to index (1-{max_index}):", 
        min_value=1, 
        max_value=max_index, 
        value=st.session_state.index, 
        key="index_input", 
        help="Enter an index and jump to that index.",
        step=1
    )

coding_task = humaneval_v_data[idx_mapping[str(index_input-1)]]
qid = coding_task["qid"]
diagram = coding_task["diagram"]
diagram_description = coding_task["ground_truth_diagram_description"]
function_signature = coding_task["function_signature"]
ground_truth = coding_task["ground_truth_solution"]
test_script = coding_task["test_script"]
task_type = coding_task["task_type"]
capability_aspects = coding_task["capability_aspects"]

upper_columns = st.columns([2, 7])
with upper_columns[0]:
    st.markdown(f"### Question ID: {qid}")
    st.image(diagram, use_column_width=True)
    st.markdown(f"")
    st.markdown(f"### Task Type: {task_type}")
    st.markdown(f"")
    st.markdown(f"### Capability Aspects:")
    st.markdown(f"")
    st.json(capability_aspects)
with upper_columns[1]:
    st.markdown(f"### Function Signature:")
    st.markdown(f"")
    st.markdown(f"""```python
{function_signature}
```""")
    st.markdown(f"### Ground Truth Diagram Description:")
    st.markdown(f"")
    st.markdown(f"""```markdown
{diagram_description}
```""")
    st.markdown(f"### Ground Truth Solution:")
    st.markdown(f"")
    st.markdown(f"""```python
{ground_truth}
```""")
    st.markdown(f"### Test Script:")
    st.markdown(f"")
    st.markdown(f"""```python
{test_script}
```""")