|
import streamlit as st |
|
import datasets |
|
import json |
|
|
|
def load_json(file_path): |
|
with open(file_path, "r") as f: |
|
return json.load(f) |
|
|
|
humaneval_v_data = datasets.load_from_disk("humaneval_v_test_hf") |
|
idx_mapping = load_json("idx_mapping.json") |
|
|
|
st.set_page_config(layout="wide", page_title="HumanEval-V Viewer") |
|
|
|
st.markdown("> <i>This is a viewer for the **HumanEval-V** benchmark, which includes 253 coding tasks. Use the navigation buttons or enter an index to browse through the tasks. Please note that image loading may take a moment after switching to the next task. If you encounter any issues or have questions, feel free to start a discussion [here](https://huggingface.co/datasets/HumanEval-V/HumanEval-V-Benchmark/discussions)</i>.", unsafe_allow_html=True) |
|
st.markdown("---") |
|
|
|
max_index = 253 |
|
|
|
|
|
if 'index' not in st.session_state: |
|
st.session_state.index = 1 |
|
|
|
buttons = st.columns([2, 1.1, 5.9]) |
|
|
|
with buttons[0]: |
|
st.markdown("# HumanEval-V Viewer") |
|
|
|
with buttons[1]: |
|
|
|
index_input = st.number_input( |
|
f"Go to index (1-{max_index}):", |
|
min_value=1, |
|
max_value=max_index, |
|
value=st.session_state.index, |
|
key="index_input", |
|
help="Enter an index and jump to that index.", |
|
step=1 |
|
) |
|
|
|
coding_task = humaneval_v_data[idx_mapping[str(index_input-1)]] |
|
qid = coding_task["qid"] |
|
diagram = coding_task["diagram"] |
|
diagram_description = coding_task["ground_truth_diagram_description"] |
|
function_signature = coding_task["function_signature"] |
|
ground_truth = coding_task["ground_truth_solution"] |
|
test_script = coding_task["test_script"] |
|
task_type = coding_task["task_type"] |
|
capability_aspects = coding_task["capability_aspects"] |
|
|
|
upper_columns = st.columns([2, 7]) |
|
with upper_columns[0]: |
|
st.markdown(f"### Question ID: {qid}") |
|
st.image(diagram, use_column_width=True) |
|
st.markdown(f"") |
|
st.markdown(f"### Task Type: {task_type}") |
|
st.markdown(f"") |
|
st.markdown(f"### Capability Aspects:") |
|
st.markdown(f"") |
|
st.json(capability_aspects) |
|
with upper_columns[1]: |
|
st.markdown(f"### Function Signature:") |
|
st.markdown(f"") |
|
st.markdown(f"""```python |
|
{function_signature} |
|
```""") |
|
st.markdown(f"### Ground Truth Diagram Description:") |
|
st.markdown(f"") |
|
st.markdown(f"""```markdown |
|
{diagram_description} |
|
```""") |
|
st.markdown(f"### Ground Truth Solution:") |
|
st.markdown(f"") |
|
st.markdown(f"""```python |
|
{ground_truth} |
|
```""") |
|
st.markdown(f"### Test Script:") |
|
st.markdown(f"") |
|
st.markdown(f"""```python |
|
{test_script} |
|
```""") |