erikjm commited on
Commit
daaf163
·
verified ·
1 Parent(s): 223a88f

Upload 4 files

Browse files
app.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from interface_utils import *
4
+
5
+ maxim = 'transparency'
6
+ submaxims = ["The response recognizes the speaker’s knowledge boundaries, making clear any limitations in expertise, evidence, experience, or context.",
7
+ "The response recognizes the speaker’s operational capabilities, highlighting the nature of actions that can or cannot be performed.",
8
+ "The response is forthright about the speaker’s willingness to engage with specific subjects or heed relevant advice."]
9
+ checkbox_choices = [
10
+ ["Yes", "No", "NA"],
11
+ ["Yes", "No", "NA"],
12
+ ["Yes", "No", "NA"]
13
+ ]
14
+
15
+ conversation_data = load_from_jsonl('./data/conversations_unlabeled.jsonl')
16
+ max_conversation_length = max([len(conversation['transcript']) for conversation in conversation_data])
17
+ conversation = get_conversation(conversation_data)
18
+
19
+
20
+ def save_labels(conv_id, skipped, submaxim_0=None, submaxim_1=None, submaxim_2=None):
21
+ data = {
22
+ 'conv_id': conv_id,
23
+ 'maxim': maxim,
24
+ 'skipped': skipped,
25
+ 'submaxim_0': submaxim_0,
26
+ 'submaxim_1': submaxim_1,
27
+ 'submaxim_2': submaxim_2,
28
+ }
29
+ os.makedirs("./labels", exist_ok=True)
30
+
31
+ with open(f"./labels/{maxim}_human_labels_{conv_id}.json", 'w') as f:
32
+ json.dump(data, f, indent=4)
33
+
34
+
35
+ def update_interface(new_conversation):
36
+ new_conv_id = new_conversation['conv_id']
37
+ new_transcript = pad_transcript(new_conversation['transcript'], max_conversation_length)
38
+
39
+ markdown_blocks = [None] * max_conversation_length
40
+ for i in range(max_conversation_length):
41
+ if new_transcript[i]['speaker'] != '':
42
+ markdown_blocks[i] = gr.Markdown(f"""  **{new_transcript[i]['speaker']}**:      {new_transcript[i]['response']}""",
43
+ visible=True)
44
+ else:
45
+ markdown_blocks[i] = gr.Markdown("", visible=False)
46
+
47
+ new_last_response = gr.Text(value=get_last_response(new_transcript),
48
+ label="",
49
+ lines=1,
50
+ container=False,
51
+ interactive=False,
52
+ autoscroll=True,
53
+ visible=True)
54
+ new_radio_0_base = gr.Radio(label=submaxims[0],
55
+ choices=checkbox_choices[0],
56
+ value=None,
57
+ visible=True)
58
+ new_radio_1_base = gr.Radio(label=submaxims[1],
59
+ choices=checkbox_choices[1],
60
+ value=None,
61
+ visible=True)
62
+ new_radio_2_base = gr.Radio(label=submaxims[2],
63
+ choices=checkbox_choices[2],
64
+ value=None,
65
+ visible=True)
66
+ conv_len = gr.Number(value=len(new_transcript), visible=False)
67
+
68
+ return [new_conv_id] + list(markdown_blocks) + [new_last_response] + [new_radio_0_base] + [new_radio_1_base] + [new_radio_2_base] + [conv_len]
69
+
70
+
71
+ def submit(*args):
72
+ conv_id = args[0]
73
+ submaxim_0 = args[-4]
74
+ submaxim_1 = args[-3]
75
+ submaxim_2 = args[-2]
76
+
77
+ save_labels(conv_id, skipped=False, submaxim_0=submaxim_0, submaxim_1=submaxim_1, submaxim_2=submaxim_2)
78
+
79
+ new_conversation = get_conversation(conversation_data)
80
+ return update_interface(new_conversation)
81
+
82
+
83
+ def skip(*args):
84
+ conv_id = args[0]
85
+ save_labels(conv_id, skipped=True)
86
+
87
+ new_conversation = get_conversation(conversation_data)
88
+ return update_interface(new_conversation)
89
+
90
+
91
+ with gr.Blocks(theme=gr.themes.Default()) as interface:
92
+ conv_id = conversation['conv_id']
93
+ transcript = conversation['transcript']
94
+ conv_len = gr.Number(value=len(transcript), visible=False)
95
+ padded_transcript = pad_transcript(transcript, max_conversation_length)
96
+
97
+ markdown_blocks = [None] * max_conversation_length
98
+ with gr.Column(scale=1, min_width=600):
99
+ with gr.Group():
100
+ gr.Markdown("""<span style='font-size: 16px;'>&nbsp;&nbsp;&nbsp;&nbsp;**Conversational context** </span>""",
101
+ visible=True)
102
+ for i in range(max_conversation_length):
103
+ markdown_blocks[i] = gr.Markdown(f"""&nbsp;&nbsp;**{padded_transcript[i]['speaker']}**: &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;{padded_transcript[i]['response']}""")
104
+ if i >= conv_len.value:
105
+ markdown_blocks[i].visible = False
106
+
107
+ with gr.Row():
108
+ with gr.Group(elem_classes="bottom-aligned-group"):
109
+ speaker_adapted = gr.Markdown(
110
+ f"""<span style='font-size: 16px;'>&nbsp;&nbsp;&nbsp;&nbsp;**Response to label** </span>""",
111
+ visible=True)
112
+ last_response = gr.Textbox(value=get_last_response(transcript),
113
+ label="",
114
+ lines=1,
115
+ container=False,
116
+ interactive=False,
117
+ autoscroll=True,
118
+ visible=True)
119
+ radio_submaxim_0_base = gr.Radio(label=submaxims[0],
120
+ choices=checkbox_choices[0],
121
+ value=None,
122
+ visible=True)
123
+ radio_submaxim_1_base = gr.Radio(label=submaxims[1],
124
+ choices=checkbox_choices[1],
125
+ value=None,
126
+ visible=True)
127
+ radio_submaxim_2_base = gr.Radio(label=submaxims[2],
128
+ choices=checkbox_choices[2],
129
+ value=None,
130
+ visible=True)
131
+
132
+ submit_button = gr.Button("Submit")
133
+ skip_button = gr.Button("Skip")
134
+
135
+ conv_id_element = gr.Text(value=conv_id, visible=False)
136
+ input_list = [conv_id_element] + \
137
+ markdown_blocks + \
138
+ [last_response] + \
139
+ [radio_submaxim_0_base] + \
140
+ [radio_submaxim_1_base] + \
141
+ [radio_submaxim_2_base] + \
142
+ [conv_len]
143
+ submit_button.click(
144
+ fn=submit,
145
+ inputs=input_list,
146
+ outputs=[conv_id_element,
147
+ *markdown_blocks,
148
+ last_response,
149
+ radio_submaxim_0_base,
150
+ radio_submaxim_1_base,
151
+ radio_submaxim_2_base,
152
+ conv_len]
153
+ )
154
+ skip_button.click(
155
+ fn=skip,
156
+ inputs=input_list,
157
+ outputs=[conv_id_element,
158
+ *markdown_blocks,
159
+ last_response,
160
+ radio_submaxim_0_base,
161
+ radio_submaxim_1_base,
162
+ radio_submaxim_2_base,
163
+ conv_len]
164
+ )
165
+
166
+ css = """
167
+ #textbox_id textarea {
168
+ background-color: white;
169
+ }
170
+
171
+ .bottom-aligned-group {
172
+ display: flex;
173
+ flex-direction: column;
174
+ justify-content: flex-end;
175
+ height: 100%;
176
+ }
177
+ """
178
+ interface.css = css
179
+ interface.launch()
180
+
181
+
data/conversations_unlabeled.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
interface_utils.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import numpy as np
3
+ import random
4
+ import uuid
5
+
6
+
7
+ def load_from_jsonl(filename, n=np.inf):
8
+ data = []
9
+ with open(filename, 'r') as file:
10
+ for i, line in enumerate(file):
11
+ if i >= n: # stop after reading n lines
12
+ break
13
+ data.append(json.loads(line))
14
+ return data
15
+
16
+
17
+ def append_id(conversations_no_id):
18
+ conversations = []
19
+ for conversation in conversations_no_id:
20
+ conversations.append({
21
+ 'conv_id': uuid.uuid4().hex,
22
+ 'transcript': conversation['transcript']
23
+ })
24
+ return conversations
25
+
26
+
27
+ def save_to_jsonl(data, filename):
28
+ with open(filename, 'w') as file:
29
+ for item in data:
30
+ json_line = json.dumps(item)
31
+ file.write(json_line + '\n')
32
+
33
+
34
+ def get_conversation(conversation_data):
35
+ conv = random.choice(conversation_data)
36
+ return conv
37
+
38
+
39
+ def pad_transcript(transcript, max_length):
40
+ padding_count = max_length - len(transcript)
41
+ if padding_count > 0:
42
+ for _ in range(padding_count):
43
+ transcript.append({'speaker': '', 'response': ''})
44
+ return transcript
45
+
46
+
47
+ def get_last_response(transcript):
48
+ for turn in reversed(transcript):
49
+ if turn['speaker'] and turn['response']:
50
+ return turn['response']
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio
2
+ numpy