Spaces:

davanstrien
/

would-you-read-it

Sleeping

App Files Files Community

davanstrien HF Staff commited on Jul 18, 2024

Commit

7fbe657

1 Parent(s): 4ddb8df

Refactor code for improved UI layout and readability

Browse files

Files changed (1) hide show

app.py +30 -18

app.py CHANGED Viewed

@@ -33,8 +33,7 @@ def get_latest_dataset_file():
 # Check for existing dataset and create or append to it
-latest_file = get_latest_dataset_file()
-if latest_file:
     dataset_file = latest_file
     print(f"Appending to existing dataset file: {dataset_file}")
 else:
@@ -50,7 +49,7 @@ scheduler = CommitScheduler(
     repo_type="dataset",
     folder_path=dataset_folder,
     path_in_repo="data",
-    every=1,  # Upload every 5 minutes
 )
@@ -58,7 +57,9 @@ scheduler = CommitScheduler(
 def download_existing_dataset():
     try:
         files = hf_hub_download(
-            repo_id=repo_id, filename="data", repo_type="dataset", recursive=True
         )
         for file in Path(files).glob("*.jsonl"):
             dest_file = dataset_folder / file.name
@@ -139,30 +140,41 @@ def log_blurb_and_vote(prompt, blurb, vote, user_info: gr.OAuthProfile | None, *
         with dataset_file.open("a") as f:
             f.write(json.dumps(log_entry) + "\n")
     gr.Info("Thank you for voting!")
-    return f"Logged: {vote} by user {user_id}"
 # Create custom theme
 tufte_theme = TufteInspired()
-# Create Gradio interface
 with gr.Blocks(theme=tufte_theme) as demo:
     gr.Markdown("<h1 style='text-align: center;'>Would you read this book?</h1>")
-    gr.Markdown(
-        """<p style='text-align: center;'>Looking for your next summer read?
-    Would you read a book based on this LLM generated blurb? <br> Your vote will be added to <a href="https://huggingface.co/datasets/your-username/your-dataset-repo">this</a> Hugging Face dataset</p>"""
-    )
-    # Add the login button
-    login_btn = gr.LoginButton()
     with gr.Row():
         generate_btn = gr.Button("Create a book", variant="primary")
     prompt_state = gr.State()
     blurb_output = gr.Markdown(label="Book blurb")
-    with gr.Row(visible=False) as voting_row:
         upvote_btn = gr.Button("👍 would read")
         downvote_btn = gr.Button("👎 wouldn't read")
@@ -188,7 +200,7 @@ with gr.Blocks(theme=tufte_theme) as demo:
             gr.Textbox(value="upvote", visible=False),
             login_btn,
         ],
-        outputs=vote_output,
     )
     downvote_btn.click(
         log_blurb_and_vote,
@@ -198,7 +210,7 @@ with gr.Blocks(theme=tufte_theme) as demo:
             gr.Textbox(value="downvote", visible=False),
             login_btn,
         ],
-        outputs=vote_output,
     )
 if __name__ == "__main__":

 # Check for existing dataset and create or append to it
+if latest_file := get_latest_dataset_file():
     dataset_file = latest_file
     print(f"Appending to existing dataset file: {dataset_file}")
 else:
     repo_type="dataset",
     folder_path=dataset_folder,
     path_in_repo="data",
+    every=5,  # Upload every 5 minutes
 )
 def download_existing_dataset():
     try:
         files = hf_hub_download(
+            repo_id=repo_id,
+            filename="data",
+            repo_type="dataset",
         )
         for file in Path(files).glob("*.jsonl"):
             dest_file = dataset_folder / file.name
         with dataset_file.open("a") as f:
             f.write(json.dumps(log_entry) + "\n")
     gr.Info("Thank you for voting!")
+    return f"Logged: {vote} by user {user_id}", gr.Row.update(visible=False)
+short_description = """Vote on book blurbs generated by large language models. Would you read the book the LLM generated? <br> Every five minutes, the dataset of votes created in this will be uploaded to the <a href="https://huggingface.co/davanstrien/summer-reading-preference">davanstrien/summer-reading-preference</a> dataset.
+"""
+full_description = """
+Large Language Models are already strong assistants for technical tasks like coding. Increasingly they are also being used to help with tasks like copywriting. The jury is out on whether the texts produced by language models in these applications is very appealing; "let's delve into" is a common example of the clunky cliche ridden text that LLMs often produce.  <br>
+However, there is also growing interest in using LLMs to help with more creative tasks. Outside of larger companies, there is a growing community of people fine-tuning LLMs for all sorts of creative tasks. Some writers want to use LLMs not as a replacement but as a companion in their writing process. <br>
+One of the requirements for building models which are better able to generate responses people like is having preference data. Preference datasets come in many forms but essentially boil to a dataset which contains some kind of signal for whether people like or dislike some LLM generated text. <br>
+This Space is a small experiment to see if we can generate preference data for LLM generated book blurbs. Whilst writing a blurb is very different from writing a whole book, it could be a neat experiment to see whether we can improve the ability of LLMs to generate book blurbs that people like. <br>
+"""
 # Create custom theme
 tufte_theme = TufteInspired()
 with gr.Blocks(theme=tufte_theme) as demo:
     gr.Markdown("<h1 style='text-align: center;'>Would you read this book?</h1>")
+    gr.Markdown(f"""<p style='text-align: center;'>{short_description}</p>""")
+    with gr.Accordion("More information", open=False):
+        gr.Markdown(full_description)
+    with gr.Row():
+        login_btn = gr.LoginButton(size="sm")
+        gr.Markdown(
+            "Login with your Hugging Face account to assign your Hub username to your votes. This will allow you to extract your preferences from the dataset generated by this Space! If you don't have a Hugging Face account, you can still vote but your votes will be anonymous."
+        )
     with gr.Row():
         generate_btn = gr.Button("Create a book", variant="primary")
     prompt_state = gr.State()
     blurb_output = gr.Markdown(label="Book blurb")
+    with gr.Row(visible=True) as voting_row:
         upvote_btn = gr.Button("👍 would read")
         downvote_btn = gr.Button("👎 wouldn't read")
             gr.Textbox(value="upvote", visible=False),
             login_btn,
         ],
+        outputs=[vote_output, voting_row],
     )
     downvote_btn.click(
         log_blurb_and_vote,
             gr.Textbox(value="downvote", visible=False),
             login_btn,
         ],
+        outputs=[vote_output, voting_row],
     )
 if __name__ == "__main__":