erikjm commited on
Commit
f8bd452
·
verified ·
1 Parent(s): fffd84c

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +41 -27
  2. interface_utils.py +18 -11
app.py CHANGED
@@ -12,14 +12,16 @@ checkbox_choices = [
12
  ["Yes", "No", "NA"]
13
  ]
14
 
15
- conversation_data = load_from_jsonl('./data/conversations_unlabeled.jsonl')
16
- max_conversation_length = max([len(conversation['transcript']) for conversation in conversation_data])
17
- conversation = get_conversation(conversation_data)
18
 
 
19
 
20
- def save_labels(conv_id, skipped, submaxim_0=None, submaxim_1=None, submaxim_2=None):
 
21
  data = {
22
  'conv_id': conv_id,
 
23
  'maxim': maxim,
24
  'skipped': skipped,
25
  'submaxim_0': submaxim_0,
@@ -28,19 +30,22 @@ def save_labels(conv_id, skipped, submaxim_0=None, submaxim_1=None, submaxim_2=N
28
  }
29
  os.makedirs("./labels", exist_ok=True)
30
 
31
- with open(f"./labels/{maxim}_human_labels_{conv_id}.json", 'w') as f:
32
  json.dump(data, f, indent=4)
33
 
34
 
35
  def update_interface(new_conversation):
36
  new_conv_id = new_conversation['conv_id']
37
- new_transcript = pad_transcript(new_conversation['transcript'], max_conversation_length)
 
 
38
 
39
  markdown_blocks = [None] * max_conversation_length
40
  for i in range(max_conversation_length):
41
- if new_transcript[i]['speaker'] != '':
42
- markdown_blocks[i] = gr.Markdown(f"""  **{new_transcript[i]['speaker']}**:      {new_transcript[i]['response']}""",
43
- visible=True)
 
44
  else:
45
  markdown_blocks[i] = gr.Markdown("", visible=False)
46
 
@@ -62,37 +67,40 @@ def update_interface(new_conversation):
62
  new_radio_2_base = gr.Radio(label=submaxims[2],
63
  choices=checkbox_choices[2],
64
  value=None,
65
- visible=True)
66
  conv_len = gr.Number(value=len(new_transcript), visible=False)
67
 
68
- return [new_conv_id] + list(markdown_blocks) + [new_last_response] + [new_radio_0_base] + [new_radio_1_base] + [new_radio_2_base] + [conv_len]
69
 
70
 
71
  def submit(*args):
72
  conv_id = args[0]
 
73
  submaxim_0 = args[-4]
74
  submaxim_1 = args[-3]
75
  submaxim_2 = args[-2]
76
 
77
- save_labels(conv_id, skipped=False, submaxim_0=submaxim_0, submaxim_1=submaxim_1, submaxim_2=submaxim_2)
78
 
79
- new_conversation = get_conversation(conversation_data)
80
  return update_interface(new_conversation)
81
 
82
 
83
  def skip(*args):
84
  conv_id = args[0]
85
- save_labels(conv_id, skipped=True)
 
86
 
87
- new_conversation = get_conversation(conversation_data)
88
- return update_interface(new_conversation)
89
 
90
 
91
  with gr.Blocks(theme=gr.themes.Default()) as interface:
92
  conv_id = conversation['conv_id']
 
93
  transcript = conversation['transcript']
94
  conv_len = gr.Number(value=len(transcript), visible=False)
95
- padded_transcript = pad_transcript(transcript, max_conversation_length)
96
 
97
  markdown_blocks = [None] * max_conversation_length
98
  with gr.Column(scale=1, min_width=600):
@@ -100,7 +108,11 @@ with gr.Blocks(theme=gr.themes.Default()) as interface:
100
  gr.Markdown("""<span style='font-size: 16px;'>&nbsp;&nbsp;&nbsp;&nbsp;**Conversational context** </span>""",
101
  visible=True)
102
  for i in range(max_conversation_length):
103
- markdown_blocks[i] = gr.Markdown(f"""&nbsp;&nbsp;**{padded_transcript[i]['speaker']}**: &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;{padded_transcript[i]['response']}""")
 
 
 
 
104
  if i >= conv_len.value:
105
  markdown_blocks[i].visible = False
106
 
@@ -123,17 +135,19 @@ with gr.Blocks(theme=gr.themes.Default()) as interface:
123
  radio_submaxim_1_base = gr.Radio(label=submaxims[1],
124
  choices=checkbox_choices[1],
125
  value=None,
126
- visible=True)
127
  radio_submaxim_2_base = gr.Radio(label=submaxims[2],
128
  choices=checkbox_choices[2],
129
  value=None,
130
- visible=True)
131
 
132
  submit_button = gr.Button("Submit")
133
  skip_button = gr.Button("Skip")
134
 
135
  conv_id_element = gr.Text(value=conv_id, visible=False)
 
136
  input_list = [conv_id_element] + \
 
137
  markdown_blocks + \
138
  [last_response] + \
139
  [radio_submaxim_0_base] + \
@@ -144,6 +158,7 @@ with gr.Blocks(theme=gr.themes.Default()) as interface:
144
  fn=submit,
145
  inputs=input_list,
146
  outputs=[conv_id_element,
 
147
  *markdown_blocks,
148
  last_response,
149
  radio_submaxim_0_base,
@@ -155,6 +170,7 @@ with gr.Blocks(theme=gr.themes.Default()) as interface:
155
  fn=skip,
156
  inputs=input_list,
157
  outputs=[conv_id_element,
 
158
  *markdown_blocks,
159
  last_response,
160
  radio_submaxim_0_base,
@@ -165,17 +181,15 @@ with gr.Blocks(theme=gr.themes.Default()) as interface:
165
 
166
  css = """
167
  #textbox_id textarea {
168
- background-color: white;
169
  }
170
 
171
  .bottom-aligned-group {
172
- display: flex;
173
- flex-direction: column;
174
- justify-content: flex-end;
175
- height: 100%;
176
  }
177
  """
178
  interface.css = css
179
  interface.launch()
180
-
181
-
 
12
  ["Yes", "No", "NA"]
13
  ]
14
 
15
+ conversation_data = load_from_jsonl('./data/conversations_unlabeled_sliced.jsonl')
16
+ max_conversation_length = max([len(conversation['transcript']) for conversation in conversation_data_sliced])
 
17
 
18
+ conversation = get_conversation(conversation_data_sliced)
19
 
20
+
21
+ def save_labels(conv_id, slice_idx, skipped, submaxim_0=None, submaxim_1=None, submaxim_2=None):
22
  data = {
23
  'conv_id': conv_id,
24
+ 'slice_idx': int(slice_idx),
25
  'maxim': maxim,
26
  'skipped': skipped,
27
  'submaxim_0': submaxim_0,
 
30
  }
31
  os.makedirs("./labels", exist_ok=True)
32
 
33
+ with open(f"./labels/{maxim}_human_labels_{conv_id}_{slice_idx}.json", 'w') as f:
34
  json.dump(data, f, indent=4)
35
 
36
 
37
  def update_interface(new_conversation):
38
  new_conv_id = new_conversation['conv_id']
39
+ new_slice_idx = new_conversation['slice_idx']
40
+ new_transcript = new_conversation['transcript']
41
+ is_contextual_turn = True if new_slice_idx > 0 else False
42
 
43
  markdown_blocks = [None] * max_conversation_length
44
  for i in range(max_conversation_length):
45
+ if i < len(new_transcript) and new_transcript[i]['speaker'] != '':
46
+ markdown_blocks[i] = gr.Markdown(
47
+ f"""&nbsp;&nbsp;**{new_transcript[i]['speaker']}**: &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;{new_transcript[i]['response']}""",
48
+ visible=True)
49
  else:
50
  markdown_blocks[i] = gr.Markdown("", visible=False)
51
 
 
67
  new_radio_2_base = gr.Radio(label=submaxims[2],
68
  choices=checkbox_choices[2],
69
  value=None,
70
+ visible=is_contextual_turn)
71
  conv_len = gr.Number(value=len(new_transcript), visible=False)
72
 
73
+ return [new_conv_id] + [new_slice_idx] + list(markdown_blocks) + [new_last_response] + [new_radio_0_base] + [new_radio_1_base] + [new_radio_2_base] + [conv_len]
74
 
75
 
76
  def submit(*args):
77
  conv_id = args[0]
78
+ slice_idx = args[1]
79
  submaxim_0 = args[-4]
80
  submaxim_1 = args[-3]
81
  submaxim_2 = args[-2]
82
 
83
+ save_labels(conv_id, slice_idx, skipped=False, submaxim_0=submaxim_0, submaxim_1=submaxim_1, submaxim_2=submaxim_2)
84
 
85
+ new_conversation = get_conversation(conversation_data_sliced)
86
  return update_interface(new_conversation)
87
 
88
 
89
  def skip(*args):
90
  conv_id = args[0]
91
+ slice_idx = args[1]
92
+ save_labels(conv_id, slice_idx, skipped=True)
93
 
94
+ new_conversation = get_conversation(conversation_data_sliced)
95
+ return update_interface(new_conversation, slice_idx)
96
 
97
 
98
  with gr.Blocks(theme=gr.themes.Default()) as interface:
99
  conv_id = conversation['conv_id']
100
+ slice_idx = conversation['slice_idx']
101
  transcript = conversation['transcript']
102
  conv_len = gr.Number(value=len(transcript), visible=False)
103
+ is_contextual_turn = True if slice_idx > 0 else False
104
 
105
  markdown_blocks = [None] * max_conversation_length
106
  with gr.Column(scale=1, min_width=600):
 
108
  gr.Markdown("""<span style='font-size: 16px;'>&nbsp;&nbsp;&nbsp;&nbsp;**Conversational context** </span>""",
109
  visible=True)
110
  for i in range(max_conversation_length):
111
+ if i < len(transcript):
112
+ markdown_blocks[i] = gr.Markdown(
113
+ f"""&nbsp;&nbsp;**{transcript[i]['speaker']}**: &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;{transcript[i]['response']}""")
114
+ else:
115
+ markdown_blocks[i] = gr.Markdown("")
116
  if i >= conv_len.value:
117
  markdown_blocks[i].visible = False
118
 
 
135
  radio_submaxim_1_base = gr.Radio(label=submaxims[1],
136
  choices=checkbox_choices[1],
137
  value=None,
138
+ visible=is_contextual_turn)
139
  radio_submaxim_2_base = gr.Radio(label=submaxims[2],
140
  choices=checkbox_choices[2],
141
  value=None,
142
+ visible=is_contextual_turn)
143
 
144
  submit_button = gr.Button("Submit")
145
  skip_button = gr.Button("Skip")
146
 
147
  conv_id_element = gr.Text(value=conv_id, visible=False)
148
+ slice_idx_element = gr.Text(value=slice_idx, visible=False)
149
  input_list = [conv_id_element] + \
150
+ [slice_idx_element] + \
151
  markdown_blocks + \
152
  [last_response] + \
153
  [radio_submaxim_0_base] + \
 
158
  fn=submit,
159
  inputs=input_list,
160
  outputs=[conv_id_element,
161
+ slice_idx_element,
162
  *markdown_blocks,
163
  last_response,
164
  radio_submaxim_0_base,
 
170
  fn=skip,
171
  inputs=input_list,
172
  outputs=[conv_id_element,
173
+ slice_idx_element,
174
  *markdown_blocks,
175
  last_response,
176
  radio_submaxim_0_base,
 
181
 
182
  css = """
183
  #textbox_id textarea {
184
+ background-color: white;
185
  }
186
 
187
  .bottom-aligned-group {
188
+ display: flex;
189
+ flex-direction: column;
190
+ justify-content: flex-end;
191
+ height: 100%;
192
  }
193
  """
194
  interface.css = css
195
  interface.launch()
 
 
interface_utils.py CHANGED
@@ -31,17 +31,24 @@ def save_to_jsonl(data, filename):
31
  file.write(json_line + '\n')
32
 
33
 
34
- def get_conversation(conversation_data):
35
- conv = random.choice(conversation_data)
36
- return conv
37
-
38
-
39
- def pad_transcript(transcript, max_length):
40
- padding_count = max_length - len(transcript)
41
- if padding_count > 0:
42
- for _ in range(padding_count):
43
- transcript.append({'speaker': '', 'response': ''})
44
- return transcript
 
 
 
 
 
 
 
45
 
46
 
47
  def get_last_response(transcript):
 
31
  file.write(json_line + '\n')
32
 
33
 
34
+ def get_conversation(data, min_length=0):
35
+ conv = random.choice(data)
36
+ transcript = conv['transcript']
37
+ slice_index = random.randint(min_length, len(transcript) - 1)
38
+ conv_slice = transcript[slice_index]
39
+ return {
40
+ 'conv_id': conv['conv_id'],
41
+ 'slice_idx': slice_index,
42
+ 'transcript': conv_slice
43
+ }
44
+
45
+
46
+ # def pad_transcript(transcript, max_length):
47
+ # padding_count = max_length - len(transcript)
48
+ # if padding_count > 0:
49
+ # for _ in range(padding_count):
50
+ # transcript.append({'speaker': '', 'response': ''})
51
+ # return transcript
52
 
53
 
54
  def get_last_response(transcript):