File size: 2,637 Bytes
cf3b6c5 968f8be cf3b6c5 968f8be cf3b6c5 3dc6935 968f8be cf3b6c5 968f8be cf3b6c5 32e9007 cf3b6c5 968f8be cf3b6c5 32e9007 cf3b6c5 968f8be cf3b6c5 968f8be cf3b6c5 968f8be cf3b6c5 968f8be cf3b6c5 968f8be cf3b6c5 968f8be cf3b6c5 968f8be cf3b6c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import streamlit as st
import datasets
import json
def load_json(file_path):
with open(file_path, "r") as f:
return json.load(f)
humaneval_v_data = datasets.load_from_disk("humaneval_v_test_hf")
idx_mapping = load_json("idx_mapping.json")
st.set_page_config(layout="wide", page_title="HumanEval-V Viewer")
st.markdown("> <i>This is a viewer for the **HumanEval-V** benchmark, which includes 253 coding tasks. Use the navigation buttons or enter an index to browse through the tasks. Please note that image loading may take a moment after switching to the next task. If you encounter any issues or have questions, feel free to start a discussion [here](https://huggingface.co/datasets/HumanEval-V/HumanEval-V-Benchmark/discussions)</i>.", unsafe_allow_html=True)
st.markdown("---")
max_index = 253
# Initialize session state for index if not present
if 'index' not in st.session_state:
st.session_state.index = 1
buttons = st.columns([2, 1.1, 5.9])
with buttons[0]:
st.markdown("# HumanEval-V Viewer")
with buttons[1]:
# Number input for navigation
index_input = st.number_input(
f"Go to index (1-{max_index}):",
min_value=1,
max_value=max_index,
value=st.session_state.index,
key="index_input",
help="Enter an index and jump to that index.",
step=1
)
coding_task = humaneval_v_data[idx_mapping[str(index_input-1)]]
qid = coding_task["qid"]
diagram = coding_task["diagram"]
diagram_description = coding_task["ground_truth_diagram_description"]
function_signature = coding_task["function_signature"]
ground_truth = coding_task["ground_truth_solution"]
test_script = coding_task["test_script"]
task_type = coding_task["task_type"]
capability_aspects = coding_task["capability_aspects"]
upper_columns = st.columns([2, 7])
with upper_columns[0]:
st.markdown(f"### Question ID: {qid}")
st.image(diagram, use_column_width=True)
st.markdown(f"")
st.markdown(f"### Task Type: {task_type}")
st.markdown(f"")
st.markdown(f"### Capability Aspects:")
st.markdown(f"")
st.json(capability_aspects)
with upper_columns[1]:
st.markdown(f"### Function Signature:")
st.markdown(f"")
st.markdown(f"""```python
{function_signature}
```""")
st.markdown(f"### Ground Truth Diagram Description:")
st.markdown(f"")
st.markdown(f"""```markdown
{diagram_description}
```""")
st.markdown(f"### Ground Truth Solution:")
st.markdown(f"")
st.markdown(f"""```python
{ground_truth}
```""")
st.markdown(f"### Test Script:")
st.markdown(f"")
st.markdown(f"""```python
{test_script}
```""") |