Commit
·
f3bcc60
1
Parent(s):
02d6d31
Update app.py
Browse files
app.py
CHANGED
@@ -48,23 +48,36 @@ def process_categories(categories):
|
|
48 |
return df_category_list
|
49 |
|
50 |
def compare_text(transcript, categories):
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
#
|
62 |
-
#
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
-
targets = np.array([ np.array(value[0]) for value in df_phrases[["embedding"]].values ])
|
66 |
-
# print(f"targets:{targets.shape}")
|
67 |
-
df_cosines = pd.DataFrame(columns=['line'])
|
68 |
for i, row in df_sentences.iterrows():
|
69 |
line = f'{row["line"]:03}'
|
70 |
# print(f'Calculating cosines for [ {line} ] {row["sentence"][:50]}...')
|
@@ -132,37 +145,9 @@ def compare_text(transcript, categories):
|
|
132 |
|
133 |
return res, fig, details
|
134 |
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
df_results = pd.DataFrame(sentences, columns=['Sentence'])
|
139 |
-
embeddings = model.encode(sentences, convert_to_tensor=True)
|
140 |
-
for _, df_category in enumerate(df_category_list):
|
141 |
-
phrases_list = df_category["embeddings"].values.tolist()
|
142 |
-
phrases = torch.stack(phrases_list)
|
143 |
-
# Compute cosine-similarities
|
144 |
-
cosine_scores = util.cos_sim(embeddings, phrases).numpy()
|
145 |
-
max_scores = np.max(cosine_scores, axis=1)
|
146 |
-
df_results_plot[df_category.iloc[0,2]] = max_scores
|
147 |
-
df_results_grid[df_category.iloc[0,2]] = max_scores
|
148 |
-
df_results_plot = df_results_plot.round(decimals = 2)
|
149 |
-
df_results_grid = df_results_grid.round(decimals = 3)
|
150 |
-
|
151 |
-
df_sentences = pd.DataFrame(columns=['line', 'sentence', 'embedding'])
|
152 |
-
for idx, sentence in enumerate(transcript_to_sentences(transcript)):
|
153 |
-
embeddings = calculate_embeddings_with_roberta(sentence)
|
154 |
-
# Create new row
|
155 |
-
new_row = {
|
156 |
-
'line': idx + 1,
|
157 |
-
'sentence': sentence,
|
158 |
-
'embedding': embeddings
|
159 |
-
}
|
160 |
-
df_sentences = df_sentences.append(new_row, ignore_index=True)
|
161 |
-
# print(df_sentences.shape)
|
162 |
-
# df_sentences.head()
|
163 |
-
return df_sentences
|
164 |
-
#return res, fig, details
|
165 |
-
|
166 |
|
167 |
doc = nlp(transcript)
|
168 |
sentences = [ sentence.text for sentence in list(doc.sents) ]
|
|
|
48 |
return df_category_list
|
49 |
|
50 |
def compare_text(transcript, categories):
|
51 |
+
# Sentences
|
52 |
+
# df_sentences = pd.DataFrame(columns=['line', 'sentence', 'embedding'])
|
53 |
+
sentences = transcript_to_sentences(transcript)
|
54 |
+
embeddings = model.encode(sentences, convert_to_tensor=True)
|
55 |
+
#for idx, sentence in enumerate(sentences):
|
56 |
+
# embeddings = calculate_embeddings_with_roberta(sentence)
|
57 |
+
# # Create new row
|
58 |
+
# new_row = {
|
59 |
+
# 'line': idx + 1,
|
60 |
+
# 'sentence': sentence,
|
61 |
+
# 'embedding': embeddings
|
62 |
+
# }
|
63 |
+
# df_sentences = df_sentences.append(new_row, ignore_index=True)
|
64 |
+
# Categories
|
65 |
+
df_category_list = process_categories(categories)
|
66 |
+
df_cosines = pd.DataFrame(data=range(len(sentences)),columns=['line'])
|
67 |
+
return df_cosines
|
68 |
+
for _, df_category in enumerate(df_category_list):
|
69 |
+
phrases_list = df_category["embeddings"].values.tolist()
|
70 |
+
phrases = torch.stack(phrases_list)
|
71 |
+
# Compute cosine-similarities
|
72 |
+
cosine_scores = util.cos_sim(embeddings, phrases).numpy()
|
73 |
+
max_scores = np.max(cosine_scores, axis=1)
|
74 |
+
|
75 |
+
|
76 |
+
df_results_plot[df_category.iloc[0,2]] = max_scores
|
77 |
+
df_results_grid[df_category.iloc[0,2]] = max_scores
|
78 |
+
|
79 |
+
|
80 |
|
|
|
|
|
|
|
81 |
for i, row in df_sentences.iterrows():
|
82 |
line = f'{row["line"]:03}'
|
83 |
# print(f'Calculating cosines for [ {line} ] {row["sentence"][:50]}...')
|
|
|
145 |
|
146 |
return res, fig, details
|
147 |
|
148 |
+
#*********************************************************
|
149 |
+
|
150 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
|
152 |
doc = nlp(transcript)
|
153 |
sentences = [ sentence.text for sentence in list(doc.sents) ]
|