Spaces:
Sleeping
Sleeping
Commit
Β·
7fbe657
1
Parent(s):
4ddb8df
Refactor code for improved UI layout and readability
Browse files
app.py
CHANGED
@@ -33,8 +33,7 @@ def get_latest_dataset_file():
|
|
33 |
|
34 |
|
35 |
# Check for existing dataset and create or append to it
|
36 |
-
latest_file
|
37 |
-
if latest_file:
|
38 |
dataset_file = latest_file
|
39 |
print(f"Appending to existing dataset file: {dataset_file}")
|
40 |
else:
|
@@ -50,7 +49,7 @@ scheduler = CommitScheduler(
|
|
50 |
repo_type="dataset",
|
51 |
folder_path=dataset_folder,
|
52 |
path_in_repo="data",
|
53 |
-
every=
|
54 |
)
|
55 |
|
56 |
|
@@ -58,7 +57,9 @@ scheduler = CommitScheduler(
|
|
58 |
def download_existing_dataset():
|
59 |
try:
|
60 |
files = hf_hub_download(
|
61 |
-
repo_id=repo_id,
|
|
|
|
|
62 |
)
|
63 |
for file in Path(files).glob("*.jsonl"):
|
64 |
dest_file = dataset_folder / file.name
|
@@ -139,30 +140,41 @@ def log_blurb_and_vote(prompt, blurb, vote, user_info: gr.OAuthProfile | None, *
|
|
139 |
with dataset_file.open("a") as f:
|
140 |
f.write(json.dumps(log_entry) + "\n")
|
141 |
gr.Info("Thank you for voting!")
|
142 |
-
return f"Logged: {vote} by user {user_id}"
|
143 |
|
144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
# Create custom theme
|
146 |
tufte_theme = TufteInspired()
|
147 |
-
|
148 |
-
# Create Gradio interface
|
149 |
with gr.Blocks(theme=tufte_theme) as demo:
|
150 |
gr.Markdown("<h1 style='text-align: center;'>Would you read this book?</h1>")
|
151 |
-
gr.Markdown(
|
152 |
-
|
153 |
-
|
154 |
-
)
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
with gr.Row():
|
160 |
generate_btn = gr.Button("Create a book", variant="primary")
|
161 |
|
162 |
prompt_state = gr.State()
|
163 |
blurb_output = gr.Markdown(label="Book blurb")
|
164 |
|
165 |
-
with gr.Row(visible=
|
166 |
upvote_btn = gr.Button("π would read")
|
167 |
downvote_btn = gr.Button("π wouldn't read")
|
168 |
|
@@ -188,7 +200,7 @@ with gr.Blocks(theme=tufte_theme) as demo:
|
|
188 |
gr.Textbox(value="upvote", visible=False),
|
189 |
login_btn,
|
190 |
],
|
191 |
-
outputs=vote_output,
|
192 |
)
|
193 |
downvote_btn.click(
|
194 |
log_blurb_and_vote,
|
@@ -198,7 +210,7 @@ with gr.Blocks(theme=tufte_theme) as demo:
|
|
198 |
gr.Textbox(value="downvote", visible=False),
|
199 |
login_btn,
|
200 |
],
|
201 |
-
outputs=vote_output,
|
202 |
)
|
203 |
|
204 |
if __name__ == "__main__":
|
|
|
33 |
|
34 |
|
35 |
# Check for existing dataset and create or append to it
|
36 |
+
if latest_file := get_latest_dataset_file():
|
|
|
37 |
dataset_file = latest_file
|
38 |
print(f"Appending to existing dataset file: {dataset_file}")
|
39 |
else:
|
|
|
49 |
repo_type="dataset",
|
50 |
folder_path=dataset_folder,
|
51 |
path_in_repo="data",
|
52 |
+
every=5, # Upload every 5 minutes
|
53 |
)
|
54 |
|
55 |
|
|
|
57 |
def download_existing_dataset():
|
58 |
try:
|
59 |
files = hf_hub_download(
|
60 |
+
repo_id=repo_id,
|
61 |
+
filename="data",
|
62 |
+
repo_type="dataset",
|
63 |
)
|
64 |
for file in Path(files).glob("*.jsonl"):
|
65 |
dest_file = dataset_folder / file.name
|
|
|
140 |
with dataset_file.open("a") as f:
|
141 |
f.write(json.dumps(log_entry) + "\n")
|
142 |
gr.Info("Thank you for voting!")
|
143 |
+
return f"Logged: {vote} by user {user_id}", gr.Row.update(visible=False)
|
144 |
|
145 |
|
146 |
+
short_description = """Vote on book blurbs generated by large language models. Would you read the book the LLM generated? <br> Every five minutes, the dataset of votes created in this will be uploaded to the <a href="https://huggingface.co/davanstrien/summer-reading-preference">davanstrien/summer-reading-preference</a> dataset.
|
147 |
+
"""
|
148 |
+
|
149 |
+
full_description = """
|
150 |
+
Large Language Models are already strong assistants for technical tasks like coding. Increasingly they are also being used to help with tasks like copywriting. The jury is out on whether the texts produced by language models in these applications is very appealing; "let's delve into" is a common example of the clunky cliche ridden text that LLMs often produce. <br>
|
151 |
+
|
152 |
+
However, there is also growing interest in using LLMs to help with more creative tasks. Outside of larger companies, there is a growing community of people fine-tuning LLMs for all sorts of creative tasks. Some writers want to use LLMs not as a replacement but as a companion in their writing process. <br>
|
153 |
+
|
154 |
+
One of the requirements for building models which are better able to generate responses people like is having preference data. Preference datasets come in many forms but essentially boil to a dataset which contains some kind of signal for whether people like or dislike some LLM generated text. <br>
|
155 |
+
|
156 |
+
This Space is a small experiment to see if we can generate preference data for LLM generated book blurbs. Whilst writing a blurb is very different from writing a whole book, it could be a neat experiment to see whether we can improve the ability of LLMs to generate book blurbs that people like. <br>
|
157 |
+
"""
|
158 |
+
|
159 |
# Create custom theme
|
160 |
tufte_theme = TufteInspired()
|
|
|
|
|
161 |
with gr.Blocks(theme=tufte_theme) as demo:
|
162 |
gr.Markdown("<h1 style='text-align: center;'>Would you read this book?</h1>")
|
163 |
+
gr.Markdown(f"""<p style='text-align: center;'>{short_description}</p>""")
|
164 |
+
with gr.Accordion("More information", open=False):
|
165 |
+
gr.Markdown(full_description)
|
166 |
+
with gr.Row():
|
167 |
+
login_btn = gr.LoginButton(size="sm")
|
168 |
+
gr.Markdown(
|
169 |
+
"Login with your Hugging Face account to assign your Hub username to your votes. This will allow you to extract your preferences from the dataset generated by this Space! If you don't have a Hugging Face account, you can still vote but your votes will be anonymous."
|
170 |
+
)
|
171 |
with gr.Row():
|
172 |
generate_btn = gr.Button("Create a book", variant="primary")
|
173 |
|
174 |
prompt_state = gr.State()
|
175 |
blurb_output = gr.Markdown(label="Book blurb")
|
176 |
|
177 |
+
with gr.Row(visible=True) as voting_row:
|
178 |
upvote_btn = gr.Button("π would read")
|
179 |
downvote_btn = gr.Button("π wouldn't read")
|
180 |
|
|
|
200 |
gr.Textbox(value="upvote", visible=False),
|
201 |
login_btn,
|
202 |
],
|
203 |
+
outputs=[vote_output, voting_row],
|
204 |
)
|
205 |
downvote_btn.click(
|
206 |
log_blurb_and_vote,
|
|
|
210 |
gr.Textbox(value="downvote", visible=False),
|
211 |
login_btn,
|
212 |
],
|
213 |
+
outputs=[vote_output, voting_row],
|
214 |
)
|
215 |
|
216 |
if __name__ == "__main__":
|