AlGe commited on
Commit
1b2076c
·
verified ·
1 Parent(s): efec366

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -159
app.py CHANGED
@@ -40,49 +40,11 @@ monochrome = Monochrome()
40
 
41
  auth_token = os.environ['HF_TOKEN']
42
 
43
- tokenizer_bin = AutoTokenizer.from_pretrained("AlGe/deberta-v3-large_token", token=auth_token)
44
- model_bin = AutoModelForTokenClassification.from_pretrained("AlGe/deberta-v3-large_token", token=auth_token)
45
- tokenizer_bin.model_max_length = 512
46
- pipe_bin = pipeline("ner", model=model_bin, tokenizer=tokenizer_bin)
47
-
48
- tokenizer_ext = AutoTokenizer.from_pretrained("AlGe/deberta-v3-large_AIS-token", token=auth_token)
49
- model_ext = AutoModelForTokenClassification.from_pretrained("AlGe/deberta-v3-large_AIS-token", token=auth_token)
50
- tokenizer_ext.model_max_length = 512
51
- pipe_ext = pipeline("ner", model=model_ext, tokenizer=tokenizer_ext)
52
-
53
  model1 = AutoModelForSequenceClassification.from_pretrained("AlGe/deberta-v3-large_Int_segment", num_labels=1, token=auth_token)
54
  tokenizer1 = AutoTokenizer.from_pretrained("AlGe/deberta-v3-large_Int_segment", token=auth_token)
55
 
56
  model2 = AutoModelForSequenceClassification.from_pretrained("AlGe/deberta-v3-large_seq_ext", num_labels=1, token=auth_token)
57
 
58
- def process_ner(text: str, pipeline) -> dict:
59
- output = pipeline(text)
60
- entities = []
61
- current_entity = None
62
-
63
- for token in output:
64
- entity_type = token['entity'][2:]
65
- entity_prefix = token['entity'][:1]
66
-
67
- if current_entity is None or entity_type != current_entity['entity'] or (entity_prefix == 'B' and entity_type == current_entity['entity']):
68
- if current_entity is not None:
69
- entities.append(current_entity)
70
- current_entity = {
71
- "entity": entity_type,
72
- "start": token['start'],
73
- "end": token['end'],
74
- "score": token['score'],
75
- "tokens": [token['word']]
76
- }
77
- else:
78
- current_entity['end'] = token['end']
79
- current_entity['score'] = max(current_entity['score'], token['score'])
80
- current_entity['tokens'].append(token['word'])
81
-
82
- if current_entity is not None:
83
- entities.append(current_entity)
84
-
85
- return {"text": text, "entities": entities}
86
 
87
  def process_classification(text: str, model1, model2, tokenizer1) -> Tuple[str, str, str]:
88
  inputs1 = tokenizer1(text, max_length=512, return_tensors='pt', truncation=True, padding=True)
@@ -97,110 +59,11 @@ def process_classification(text: str, model1, model2, tokenizer1) -> Tuple[str,
97
 
98
  return f"{round(prediction1, 1)}", f"{round(prediction2, 1)}", f"{round(score, 2)}"
99
 
100
-
101
- def generate_charts(ner_output_bin: dict, ner_output_ext: dict) -> Tuple[go.Figure, go.Figure, np.ndarray]:
102
- entities_bin = [entity['entity'] for entity in ner_output_bin['entities']]
103
- entities_ext = [entity['entity'] for entity in ner_output_ext['entities']]
104
-
105
- # Counting entities for binary classification
106
- entity_counts_bin = {entity: entities_bin.count(entity) for entity in set(entities_bin)}
107
- bin_labels = list(entity_counts_bin.keys())
108
- bin_sizes = list(entity_counts_bin.values())
109
-
110
- # Counting entities for extended classification
111
- entity_counts_ext = {entity: entities_ext.count(entity) for entity in set(entities_ext)}
112
- ext_labels = list(entity_counts_ext.keys())
113
- ext_sizes = list(entity_counts_ext.values())
114
-
115
- bin_color_map = {
116
- "External": "#6ad5bc",
117
- "Internal": "#ee8bac"
118
- }
119
-
120
- ext_color_map = {
121
- "INTemothou": "#FF7F50", # Coral
122
- "INTpercept": "#FF4500", # OrangeRed
123
- "INTtime": "#FF6347", # Tomato
124
- "INTplace": "#FFD700", # Gold
125
- "INTevent": "#FFA500", # Orange
126
- "EXTsemantic": "#4682B4", # SteelBlue
127
- "EXTrepetition": "#5F9EA0", # CadetBlue
128
- "EXTother": "#00CED1", # DarkTurquoise
129
- }
130
-
131
- bin_colors = [bin_color_map.get(label, "#FFFFFF") for label in bin_labels]
132
- ext_colors = [ext_color_map.get(label, "#FFFFFF") for label in ext_labels]
133
-
134
- # Create pie chart for extended classification
135
- fig1 = go.Figure(data=[go.Pie(labels=ext_labels, values=ext_sizes, textinfo='label+percent', hole=.3, marker=dict(colors=ext_colors))])
136
- fig1.update_layout(
137
- template='plotly_dark',
138
- plot_bgcolor='rgba(0,0,0,0)',
139
- paper_bgcolor='rgba(0,0,0,0)'
140
- )
141
-
142
- # Create bar chart for binary classification
143
- fig2 = go.Figure(data=[go.Bar(x=bin_labels, y=bin_sizes, marker=dict(color=bin_colors))])
144
- fig2.update_layout(
145
- xaxis_title='Entity Type',
146
- yaxis_title='Count',
147
- template='plotly_dark',
148
- plot_bgcolor='rgba(0,0,0,0)',
149
- paper_bgcolor='rgba(0,0,0,0)'
150
- )
151
-
152
- # Generate word cloud
153
- wordcloud_image = generate_wordcloud(ner_output_ext['entities'], ext_color_map)
154
-
155
- return fig1, fig2, wordcloud_image
156
-
157
-
158
- def generate_wordcloud(entities: List[Dict], color_map: Dict[str, str]) -> np.ndarray:
159
- token_texts = []
160
- token_scores = []
161
- token_types = []
162
-
163
- for entity in entities:
164
- for token in entity['tokens']:
165
- token_texts.append(token)
166
- token_scores.append(entity['score'])
167
- token_types.append(entity['entity'])
168
-
169
- # Create a dictionary for word cloud
170
- word_freq = {text: score for text, score in zip(token_texts, token_scores)}
171
-
172
- def color_func(word, font_size, position, orientation, random_state=None, **kwargs):
173
- entity_type = next((t for t, w in zip(token_types, token_texts) if w == word), None)
174
- return color_map.get(entity_type, "#FFFFFF")
175
-
176
- wordcloud = WordCloud(width=800, height=400, background_color='#121212', color_func=color_func).generate_from_frequencies(word_freq)
177
-
178
- # Convert to image array
179
- plt.figure(figsize=(10, 5))
180
- plt.imshow(wordcloud, interpolation='bilinear')
181
- plt.axis('off')
182
- plt.tight_layout(pad=0)
183
-
184
- # Convert plt to numpy array
185
- plt_image = plt.gcf()
186
- plt_image.canvas.draw()
187
- image_array = np.frombuffer(plt_image.canvas.tostring_rgb(), dtype=np.uint8)
188
- image_array = image_array.reshape(plt_image.canvas.get_width_height()[::-1] + (3,))
189
- plt.close()
190
-
191
- return image_array
192
-
193
  @spaces.GPU
194
  def all(text: str):
195
- ner_output_bin = process_ner(text, pipe_bin)
196
- ner_output_ext = process_ner(text, pipe_ext)
197
  classification_output = process_classification(text, model1, model2, tokenizer1)
198
-
199
- pie_chart, bar_chart, wordcloud_image = generate_charts(ner_output_bin, ner_output_ext)
200
-
201
- return (ner_output_bin, ner_output_ext,
202
- classification_output[0], classification_output[1], classification_output[2],
203
- pie_chart, bar_chart, wordcloud_image)
204
 
205
  examples = [
206
  ['Bevor ich meinen Hund kaufte bin ich immer alleine durch den Park gelaufen. Gestern war ich aber mit dem Hund losgelaufen. Das Wetter war sehr schön, nicht wie sonst im Winter. Ich weiß nicht genau. Mir fällt sonst nichts dazu ein. Wir trafen auf mehrere Spaziergänger. Ein Mann mit seinem Kind. Das Kind hat ein Eis gegessen.'],
@@ -210,29 +73,9 @@ iface = gr.Interface(
210
  fn=all,
211
  inputs=gr.Textbox(lines=5, label="Input Text", placeholder="Write about how your breakfast went or anything else that happened or might happen to you ..."),
212
  outputs=[
213
- gr.HighlightedText(label="Binary Sequence Classification",
214
- color_map={
215
- "External": "#6ad5bcff",
216
- "Internal": "#ee8bacff"}
217
- ),
218
- gr.HighlightedText(label="Extended Sequence Classification",
219
- color_map={
220
- "INTemothou": "#FF7F50", # Coral
221
- "INTpercept": "#FF4500", # OrangeRed
222
- "INTtime": "#FF6347", # Tomato
223
- "INTplace": "#FFD700", # Gold
224
- "INTevent": "#FFA500", # Orange
225
- "EXTsemantic": "#4682B4", # SteelBlue
226
- "EXTrepetition": "#5F9EA0", # CadetBlue
227
- "EXTother": "#00CED1", # DarkTurquoise
228
- }
229
- ),
230
  gr.Label(label="Internal Detail Count"),
231
  gr.Label(label="External Detail Count"),
232
  gr.Label(label="Approximated Internal Detail Ratio"),
233
- gr.Plot(label="Extended SeqClass Entity Distribution Pie Chart"),
234
- gr.Plot(label="Binary SeqClass Entity Count Bar Chart"),
235
- gr.Image(label="Entity Word Cloud")
236
  ],
237
  title="Scoring Demo",
238
  description="Autobiographical Memory Analysis: This demo combines two text - and two sequence classification models to showcase our automated Autobiographical Interview scoring method. Submit a narrative to see the results.",
 
40
 
41
  auth_token = os.environ['HF_TOKEN']
42
 
 
 
 
 
 
 
 
 
 
 
43
  model1 = AutoModelForSequenceClassification.from_pretrained("AlGe/deberta-v3-large_Int_segment", num_labels=1, token=auth_token)
44
  tokenizer1 = AutoTokenizer.from_pretrained("AlGe/deberta-v3-large_Int_segment", token=auth_token)
45
 
46
  model2 = AutoModelForSequenceClassification.from_pretrained("AlGe/deberta-v3-large_seq_ext", num_labels=1, token=auth_token)
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  def process_classification(text: str, model1, model2, tokenizer1) -> Tuple[str, str, str]:
50
  inputs1 = tokenizer1(text, max_length=512, return_tensors='pt', truncation=True, padding=True)
 
59
 
60
  return f"{round(prediction1, 1)}", f"{round(prediction2, 1)}", f"{round(score, 2)}"
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  @spaces.GPU
63
  def all(text: str):
 
 
64
  classification_output = process_classification(text, model1, model2, tokenizer1)
65
+
66
+ return (classification_output[0], classification_output[1], classification_output[2])
 
 
 
 
67
 
68
  examples = [
69
  ['Bevor ich meinen Hund kaufte bin ich immer alleine durch den Park gelaufen. Gestern war ich aber mit dem Hund losgelaufen. Das Wetter war sehr schön, nicht wie sonst im Winter. Ich weiß nicht genau. Mir fällt sonst nichts dazu ein. Wir trafen auf mehrere Spaziergänger. Ein Mann mit seinem Kind. Das Kind hat ein Eis gegessen.'],
 
73
  fn=all,
74
  inputs=gr.Textbox(lines=5, label="Input Text", placeholder="Write about how your breakfast went or anything else that happened or might happen to you ..."),
75
  outputs=[
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  gr.Label(label="Internal Detail Count"),
77
  gr.Label(label="External Detail Count"),
78
  gr.Label(label="Approximated Internal Detail Ratio"),
 
 
 
79
  ],
80
  title="Scoring Demo",
81
  description="Autobiographical Memory Analysis: This demo combines two text - and two sequence classification models to showcase our automated Autobiographical Interview scoring method. Submit a narrative to see the results.",