Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -13,11 +13,12 @@ tokenizer = None
|
|
13 |
ner_pipeline = None
|
14 |
pos_pipeline = None
|
15 |
intent_classifier = None
|
|
|
16 |
models_loaded = False
|
17 |
|
18 |
def load_models(progress=gr.Progress()):
|
19 |
"""Lazy-load models only when needed"""
|
20 |
-
global tokenizer, ner_pipeline, pos_pipeline, intent_classifier, models_loaded
|
21 |
|
22 |
if models_loaded:
|
23 |
return True
|
@@ -49,6 +50,14 @@ def load_models(progress=gr.Progress()):
|
|
49 |
device=0 if torch.cuda.is_available() else -1 # Use GPU if available
|
50 |
)
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
progress(1.0, desc="Models loaded successfully!")
|
53 |
models_loaded = True
|
54 |
return True
|
@@ -57,6 +66,25 @@ def load_models(progress=gr.Progress()):
|
|
57 |
print(f"Error loading models: {str(e)}")
|
58 |
return f"Error: {str(e)}"
|
59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
def get_token_colors(token_type):
|
61 |
colors = {
|
62 |
"prefix": "#D8BFD8", # Light purple
|
@@ -272,8 +300,26 @@ def analyze_keyword(keyword, progress=gr.Progress()):
|
|
272 |
importance = 60 + (len(token["text"]) * 2)
|
273 |
importance = min(95, importance)
|
274 |
|
275 |
-
# Generate related terms
|
276 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
277 |
|
278 |
full_token_analysis.append({
|
279 |
"token": token["text"],
|
|
|
13 |
ner_pipeline = None
|
14 |
pos_pipeline = None
|
15 |
intent_classifier = None
|
16 |
+
semantic_model = None
|
17 |
models_loaded = False
|
18 |
|
19 |
def load_models(progress=gr.Progress()):
|
20 |
"""Lazy-load models only when needed"""
|
21 |
+
global tokenizer, ner_pipeline, pos_pipeline, intent_classifier, semantic_model, models_loaded
|
22 |
|
23 |
if models_loaded:
|
24 |
return True
|
|
|
50 |
device=0 if torch.cuda.is_available() else -1 # Use GPU if available
|
51 |
)
|
52 |
|
53 |
+
progress(0.9, desc="Loading semantic model...")
|
54 |
+
try:
|
55 |
+
from sentence_transformers import SentenceTransformer
|
56 |
+
semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
|
57 |
+
except Exception as e:
|
58 |
+
print(f"Warning: Could not load semantic model: {str(e)}")
|
59 |
+
semantic_model = None # Set to None so we can check if it's available
|
60 |
+
|
61 |
progress(1.0, desc="Models loaded successfully!")
|
62 |
models_loaded = True
|
63 |
return True
|
|
|
66 |
print(f"Error loading models: {str(e)}")
|
67 |
return f"Error: {str(e)}"
|
68 |
|
69 |
+
def get_semantic_similarity(token, comparison_terms):
|
70 |
+
"""Calculate semantic similarity between a token and comparison terms"""
|
71 |
+
try:
|
72 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
73 |
+
|
74 |
+
token_embedding = semantic_model.encode([token])[0]
|
75 |
+
comparison_embeddings = semantic_model.encode(comparison_terms)
|
76 |
+
|
77 |
+
similarities = []
|
78 |
+
for i, emb in enumerate(comparison_embeddings):
|
79 |
+
similarity = cosine_similarity([token_embedding], [emb])[0][0]
|
80 |
+
similarities.append((comparison_terms[i], float(similarity)))
|
81 |
+
|
82 |
+
return sorted(similarities, key=lambda x: x[1], reverse=True)
|
83 |
+
except Exception as e:
|
84 |
+
print(f"Error in semantic similarity: {str(e)}")
|
85 |
+
# Return dummy data on error
|
86 |
+
return [(term, 0.5) for term in comparison_terms]
|
87 |
+
|
88 |
def get_token_colors(token_type):
|
89 |
colors = {
|
90 |
"prefix": "#D8BFD8", # Light purple
|
|
|
300 |
importance = 60 + (len(token["text"]) * 2)
|
301 |
importance = min(95, importance)
|
302 |
|
303 |
+
# Generate more meaningful related terms using semantic similarity
|
304 |
+
if semantic_model is not None:
|
305 |
+
try:
|
306 |
+
# Generate some potential related terms
|
307 |
+
prefix_related = [f"about {token['text']}", f"what is {token['text']}", f"how to {token['text']}"]
|
308 |
+
synonym_candidates = ["similar", "equivalent", "comparable", "like", "related", "alternative"]
|
309 |
+
domain_terms = ["software", "marketing", "business", "science", "education", "technology"]
|
310 |
+
comparison_terms = prefix_related + synonym_candidates + domain_terms
|
311 |
+
|
312 |
+
# Get similarities
|
313 |
+
similarities = get_semantic_similarity(token['text'], comparison_terms)
|
314 |
+
|
315 |
+
# Use top 3 most similar terms
|
316 |
+
related_terms = [term for term, score in similarities[:3]]
|
317 |
+
except Exception as e:
|
318 |
+
print(f"Error generating semantic related terms: {str(e)}")
|
319 |
+
related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"]
|
320 |
+
else:
|
321 |
+
# Fallback if semantic model isn't loaded
|
322 |
+
related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"]
|
323 |
|
324 |
full_token_analysis.append({
|
325 |
"token": token["text"],
|