Spaces:

ebrowne
/

retrieval-study

Sleeping

File size: 5,029 Bytes

bf75d52
3603153
bf75d52
a425fa9
 
895a686
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3603153
895a686
 
 
 
 
 
 
 
a425fa9
 
 
 
2eb4cf2
a425fa9
 
d919aa0
919be7b
fb39ca3
2eb4cf2
 
fb39ca3
c40bc82
 
 
919be7b
d919aa0
bf75d52
d919aa0
 
 
 
 
 
919be7b
d919aa0
 
5053d22
d919aa0
c40bc82
d919aa0
0120ad4
2eb4cf2
c40bc82
 
 
 
 
 
 
 
2eb4cf2
d919aa0
 
 
 
 
 
 
 
 
 
 
5053d22
 
d919aa0
fb39ca3
e7c52d7
fb39ca3
e7c52d7
fb39ca3
 
d919aa0
5053d22
fb39ca3
 
5053d22
3a7e233
d919aa0
 
 
 
4ea93c0
 
d919aa0

import gradio as gr
import os

# PERSISTENT DATA STORAGE: these are used to upload user responses to a dataset

import json
from datetime import datetime
from pathlib import Path
from uuid import uuid4 
from huggingface_hub import CommitScheduler

JSON_DATASET_DIR = Path("json_dataset")
JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)

JSON_DATASET_PATH = JSON_DATASET_DIR / f"train-{uuid4()}.json"

scheduler = CommitScheduler(
    repo_id="ebrowne/test-data",
    repo_type="dataset",
    folder_path=JSON_DATASET_DIR,
    path_in_repo="data",
    token = os.getenv("HF_TOKEN")
)

def save_json(score1, score2):
    with scheduler.lock:
        with JSON_DATASET_PATH.open("a") as f:
            json.dump({"relevance": score1, "novelty": score2, "datetime": datetime.now().isoformat()}, f)
            f.write("\n")

# READING EXISTING DATA: this is used to read questionss

from datasets import load_dataset
qa_data = load_dataset("ebrowne/test-data", data_files = "test.json")
q = qa_data["train"][0] # loaded question data


# VARIABLES: will eventually be loaded with JSON from a dataset 

"""
question_text = q["prompt"] + " " + q["question"]
answers_text = [q["a"], q["b"], q["c"], q["d"]]
"""
question_text = "An act of Congress provides that \"no federal court shall order the implementation of a public school desegregation plan that would require the transportation of any student to a school other than the school closest or next closest to his place of residence.\" Which of the following is the strongest argument for the constitutionality of the act?"
answers_text = ["The Fourteenth Amendment authorizes Congress to define governmental conduct which violates the equal protection clause.", "Under Article III, Congress may restrict the jurisdiction of the federal courts.", "Transportation of students is subject to regulation by Congress because commerce is involved.", "Congress provides partial support for public education and is therefore entitled to establish conditions upon the expenditure of federal grants."]
answer_id = 1

# BLOCKS: main user interface

with gr.Blocks() as user_eval:
    # Title text introducing study
    gr.Markdown("""
    # Legal Retriever Evaluation Study
    Thank you for your participation! Here are some basic instructions on how to complete the legal study.
    """)

    # Passages and user evaluations thereof
    with gr.Row(equal_height = False, visible = False) as evals:
        # Passage text
        with gr.Column(scale = 2) as passages:
            answers_text[answer_id] = "**" + answers_text[answer_id] + "**"
            passage_display = gr.Markdown("""
            ### Question and Answer
            """ + question_text + 
            """ \n
            """ + answers_text[0] + 
            """ \n
            """ + answers_text[1] + 
            """ \n
            """ + answers_text[2] + 
            """ \n
            """ + answers_text[3] + 
            """
            ### Relevant Passages
                - Dataset 1
                - Dataset 2
                - More text
                - More text
                - More text
                - More text
            ### Auto-Generated Summary
            This is a summary of the above legal passages, which imitates how a RAG system might \
            encorporate retrieved data into its context to give a better response to a certain query.
            """)

        # Scoring box
        with gr.Column(scale = 1) as scores:
            desc_1 = gr.Markdown("How **relevant** is this passage to the question?")
            eval_1 = gr.Slider(1, 5, step = 0.5)
            desc_2 = gr.Markdown("How would you rate the passage's **quality** in terms of detail, clarity, and focus?")
            eval_2 = gr.Slider(1, 5, step = 0.5)
            desc_3 = gr.Markdown("How effectively does the passage **lead you to the correct answer?**")
            eval_3 = gr.Slider(1, 5, step = 0.5)
            btn = gr.Button("Next")

            def next(eval_1, eval_2, eval_3):
                print(eval_1 + eval_2 + eval_3)

            btn.click(fn = next, inputs = [eval_1, eval_2, eval_3])

    # Question and answering dynamics
    with gr.Row() as question:
        with gr.Column():
            gr.Markdown("---")
            gr.Markdown("**Question**")
            gr.Markdown(question_text)
            a = gr.Button(answers_text[0])
            b = gr.Button(answers_text[1])
            c = gr.Button(answers_text[2])
            d = gr.Button(answers_text[3])

            def answer():
                return {
                    question: gr.Row(visible = False),
                    evals: gr.Row(visible = True)
                }
            
            a.click(fn = answer, outputs = [question, evals])
            b.click(fn = answer, outputs = [question, evals])
            c.click(fn = answer, outputs = [question, evals])
            d.click(fn = answer, outputs = [question, evals])

# Starts on question, switches to evaluation after the user answers
user_eval.launch()