import random import gradio as gr from datasets import load_dataset whoops = load_dataset("nlphuji/whoops")['test'] print(f"Loaded WMTIS, first example:") print(whoops[0]) dataset_size = len(whoops) print(f"all dataset size: {dataset_size}") IMAGE = 'image' IMAGE_DESIGNER = 'image_designer' DESIGNER_EXPLANATION = 'designer_explanation' CROWD_CAPTIONS = 'crowd_captions' CROWD_EXPLANATIONS = 'crowd_explanations' CROWD_UNDERSPECIFIED_CAPTIONS = 'crowd_underspecified_captions' SELECTED_CAPTION = 'selected_caption' COMMONSENSE_CATEGORY = 'commonsense_category' QA = 'question_answering_pairs' IMAGE_ID = 'image_id' left_side_columns = [IMAGE] # right_side_columns = [x for x in whoops.features.keys() if x not in left_side_columns and x not in [QA]] right_side_columns = [x for x in whoops.features.keys() if x not in left_side_columns] enumerate_cols = [CROWD_CAPTIONS, CROWD_EXPLANATIONS, CROWD_UNDERSPECIFIED_CAPTIONS] emoji_to_label = {IMAGE_DESIGNER: '🎨, 🧑‍🎨, 💻', DESIGNER_EXPLANATION: '💡, 🤔, 🧑‍🎨', CROWD_CAPTIONS: '👥, 💬, 📝', CROWD_EXPLANATIONS: '👥, 💡, 🤔', CROWD_UNDERSPECIFIED_CAPTIONS: '👥, 💬, 👎', QA: '❓, 🤔, 💡', IMAGE_ID: '🔍, 📄, 💾', COMMONSENSE_CATEGORY: '🤔, 📚, 💡', SELECTED_CAPTION: '📝, 👌, 💬'} target_size = (1024, 1024) MAX_LINES = 30 def get_instance_values(example): values = [] for k in left_side_columns + right_side_columns: if k in enumerate_cols: value = list_to_string(example[k]) elif k == QA: qa_list = [f"Q: {x[0]} A: {x[1]}" for x in example[k]] value = list_to_string(qa_list) else: value = example[k] values.append(value) return values def list_to_string(lst): return '\n'.join(['{}. {}'.format(i + 1, item) for i, item in enumerate(lst)]) def plot_image(index): example = whoops_sample[index] instance_values = get_instance_values(example) assert len(left_side_columns) == len( instance_values[:len(left_side_columns)]) # excluding the image & designer for key, value in zip(left_side_columns, instance_values[:len(left_side_columns)]): if key == IMAGE: img = whoops_sample[index]["image"] img_resized = img.resize(target_size) gr.Image(value=img_resized, label=whoops_sample[index]['commonsense_category']) else: label = key.capitalize().replace("_", " ") gr.Textbox(value=value, label=f"{label} {emoji_to_label[key]}") with gr.Accordion("Click for details", open=False): assert len(right_side_columns) == len( instance_values[len(left_side_columns):]) # excluding the image & designer for key, value in zip(right_side_columns, instance_values[len(left_side_columns):]): label = key.capitalize().replace("_", " ") gr.Textbox(value=value, label=f"{label} {emoji_to_label[key]}", max_lines=MAX_LINES) columns_number = 4 # rows_number = int(dataset_size / columns_number) # rows_number = 25 rows_number = 20 whoops_sample = whoops.shuffle().select(range(0, columns_number * rows_number)) index = 0 LINES_NUMBER = 20 def display_df(): df_images = df.head(LINES_NUMBER) return df_images def display_next(dataframe, end): start = int(end or len(dataframe)) end = int(start) + int(LINES_NUMBER) global df if end >= len(df) - 1: start = 0 end = LINES_NUMBER df = df.sample(frac=1) print(f"Shuffle") # print(f"end: {end},start: {start}") df_images = df.iloc[start:end] assert len(df_images) == LINES_NUMBER return df_images, end with gr.Blocks() as demo: gr.Markdown(f"# WHOOPS! Dataset Explorer") with gr.Row(): num_end = gr.Number(visible=False) b1 = gr.Button("Get Initial dataframe") b2 = gr.Button("Next Rows") for row_num in range(0, int(LINES_NUMBER / columns_number)): with gr.Row(): for col_num in range(0, columns_number): with gr.Column(): plot_image(index) index += 1 b1.click(fn=display_df, outputs=out_dataframe, api_name="initial_dataframe") b2.click(fn=display_next, inputs=[out_dataframe, num_end], outputs=[out_dataframe, num_end], api_name="next_rows") demo.launch()