Spaces:

GEM
/

DatasetCardForm

Runtime error

File size: 5,386 Bytes

ac6c40f
 
bac5a97
 
 
 
 
 
 
ac6c40f
969e2c4
 
bac5a97
ac6c40f
969e2c4
57616af
ac6c40f
bac5a97
 
 
969e2c4
 
 
bac5a97
969e2c4
bac5a97
969e2c4
 
 
 
4081c39
 
969e2c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4081c39
bac5a97
 
 
 
 
969e2c4
 
 
 
 
 
 
 
 
 
 
 
 
bac5a97
 
 
 
 
 
ac6c40f
4081c39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57616af
ac6c40f
bac5a97
 
 
 
3578aa2
bac5a97
 
 
969e2c4
bac5a97
969e2c4
bac5a97

import streamlit as st

from .streamlit_utils import (
    make_multiselect,
    make_selectbox,
    make_text_area,
    make_text_input,
    make_radio,
)

N_FIELDS_PREVIOUS = 3
N_FIELDS_UNDERSERVED_COMMUNITIES = 2
N_FIELDS_BIASES= 3

N_FIELDS = N_FIELDS_PREVIOUS + N_FIELDS_UNDERSERVED_COMMUNITIES + N_FIELDS_BIASES

def context_page():
    st.session_state.card_dict["context"] = st.session_state.card_dict.get(
        "context", {}
    )
    with st.expander("Previous Work on the Social Impact of the Dataset", expanded=False):
        key_pref = ["context", "previous"]
        st.session_state.card_dict["context"]["previous"] = st.session_state.card_dict[
            "context"
        ].get("previous", {})

        make_radio(
            label="Are you aware of cases where models trained on the task featured in this dataset ore related tasks have been used in automated systems?",
            options=["no", "yes - related tasks", "yes - other datasets featuring the same task", "yes - models trained on this dataset"],
            key_list=key_pref + ["is-deployed"],
            help="",
        )
        if "yes" in st.session_state.card_dict["context"]["previous"]["is-deployed"]:
            make_text_area(
                label="Did any of these previous uses result in observations about the social impact of the systems? " + \
                "In particular, has there been work outlining the risks and limitations of the system? Provide links and descriptions here:",
                key_list=key_pref + ["described-risks"],
                help="",
            )
            if st.session_state.card_dict["context"]["previous"]["is-deployed"] == "yes - models trained on this dataset":
                make_text_area(
                    label="Have any changes been made to the dataset as a result of these observations?",
                    key_list=key_pref + ["changes-from-observation"],
                    help="",
                )
            else:
                st.session_state.card_dict["context"]["previous"]["changes-from-observation"] = "N/A"
        else:
            st.session_state.card_dict["context"]["previous"]["described-risks"] = "N/A"
            st.session_state.card_dict["context"]["previous"]["changes-from-observation"] = "N/A"

    with st.expander("Impact on Under-Served Communities", expanded=False):
        key_pref = ["context", "underserved"]
        st.session_state.card_dict["context"]["underserved"] = st.session_state.card_dict[
            "context"
        ].get("underserved", {})
        make_radio(
            label="Does this dataset address the needs of communities that are traditionally underserved in language technology, and particularly language generation technology?" + \
                "Communities may be underserved for exemple because their language, language variety, or social or geographical context is underepresented in NLP and NLG resources (datasets and models).",
            options=["no", "yes"],
            key_list=key_pref+["helps-underserved"],
        )
        if st.session_state.card_dict["context"]["underserved"]["helps-underserved"] == "yes":
            make_text_area(
                label="Describe how this dataset addresses the needs of underserved communities",
                key_list=key_pref+["underserved-description"],
            )
        else:
            st.session_state.card_dict["context"]["underserved"]["underserved-description"] = "N/A"

    with st.expander("Discussion of Biases", expanded=False):
        key_pref = ["context", "biases"]
        st.session_state.card_dict["context"]["biases"] = st.session_state.card_dict[
            "context"
        ].get("biases", {})

        make_text_area(
            label="Are there documented biases in the data?",
            key_list=key_pref + [""],
            help="",
        )

        make_text_area(
            label="Link to analyses",
            key_list=key_pref + [""],
            help="",
        )

        make_text_area(
            label="How does the distribution of language producers differ from a base distribution?",
            key_list=key_pref + [""],
            help="",
        )

        make_text_area(
            label="Topic coverage?",
            key_list=key_pref + [""],
            help="",
        )


def context_summary():
    total_filled = sum(
        [len(dct) for dct in st.session_state.card_dict.get("context", {}).values()]
    )
    with st.expander(
        f"Broader Social Context Completion - {total_filled} of {N_FIELDS}", expanded=False
    ):
        completion_markdown = ""
        completion_markdown += (
            f"- **Overall completion:**\n  - {total_filled} of {N_FIELDS} fields\n"
        )
        completion_markdown += f"- **Sub-section - Social Impact of the Dataset:**\n  - {len(st.session_state.card_dict.get('context', {}).get('previous', {}))} of {N_FIELDS_PREVIOUS} fields\n"
        completion_markdown += f"- **Sub-section - Impact on Under-Served Communities:**\n  - {len(st.session_state.card_dict.get('context', {}).get('underserved', {}))} of {N_FIELDS_UNDERSERVED_COMMUNITIES} fields\n"
        completion_markdown += f"- **Sub-section - Discussion of Biases:**\n  - {len(st.session_state.card_dict.get('context', {}).get('biases', {}))} of {N_FIELDS_BIASES} fields\n"
        st.markdown(completion_markdown)