PromptMeister commited on
Commit
8a0361c
·
verified ·
1 Parent(s): ec2c977

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -3
app.py CHANGED
@@ -13,11 +13,12 @@ tokenizer = None
13
  ner_pipeline = None
14
  pos_pipeline = None
15
  intent_classifier = None
 
16
  models_loaded = False
17
 
18
  def load_models(progress=gr.Progress()):
19
  """Lazy-load models only when needed"""
20
- global tokenizer, ner_pipeline, pos_pipeline, intent_classifier, models_loaded
21
 
22
  if models_loaded:
23
  return True
@@ -49,6 +50,14 @@ def load_models(progress=gr.Progress()):
49
  device=0 if torch.cuda.is_available() else -1 # Use GPU if available
50
  )
51
 
 
 
 
 
 
 
 
 
52
  progress(1.0, desc="Models loaded successfully!")
53
  models_loaded = True
54
  return True
@@ -57,6 +66,25 @@ def load_models(progress=gr.Progress()):
57
  print(f"Error loading models: {str(e)}")
58
  return f"Error: {str(e)}"
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  def get_token_colors(token_type):
61
  colors = {
62
  "prefix": "#D8BFD8", # Light purple
@@ -272,8 +300,26 @@ def analyze_keyword(keyword, progress=gr.Progress()):
272
  importance = 60 + (len(token["text"]) * 2)
273
  importance = min(95, importance)
274
 
275
- # Generate related terms (simplified)
276
- related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
 
278
  full_token_analysis.append({
279
  "token": token["text"],
 
13
  ner_pipeline = None
14
  pos_pipeline = None
15
  intent_classifier = None
16
+ semantic_model = None
17
  models_loaded = False
18
 
19
  def load_models(progress=gr.Progress()):
20
  """Lazy-load models only when needed"""
21
+ global tokenizer, ner_pipeline, pos_pipeline, intent_classifier, semantic_model, models_loaded
22
 
23
  if models_loaded:
24
  return True
 
50
  device=0 if torch.cuda.is_available() else -1 # Use GPU if available
51
  )
52
 
53
+ progress(0.9, desc="Loading semantic model...")
54
+ try:
55
+ from sentence_transformers import SentenceTransformer
56
+ semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
57
+ except Exception as e:
58
+ print(f"Warning: Could not load semantic model: {str(e)}")
59
+ semantic_model = None # Set to None so we can check if it's available
60
+
61
  progress(1.0, desc="Models loaded successfully!")
62
  models_loaded = True
63
  return True
 
66
  print(f"Error loading models: {str(e)}")
67
  return f"Error: {str(e)}"
68
 
69
+ def get_semantic_similarity(token, comparison_terms):
70
+ """Calculate semantic similarity between a token and comparison terms"""
71
+ try:
72
+ from sklearn.metrics.pairwise import cosine_similarity
73
+
74
+ token_embedding = semantic_model.encode([token])[0]
75
+ comparison_embeddings = semantic_model.encode(comparison_terms)
76
+
77
+ similarities = []
78
+ for i, emb in enumerate(comparison_embeddings):
79
+ similarity = cosine_similarity([token_embedding], [emb])[0][0]
80
+ similarities.append((comparison_terms[i], float(similarity)))
81
+
82
+ return sorted(similarities, key=lambda x: x[1], reverse=True)
83
+ except Exception as e:
84
+ print(f"Error in semantic similarity: {str(e)}")
85
+ # Return dummy data on error
86
+ return [(term, 0.5) for term in comparison_terms]
87
+
88
  def get_token_colors(token_type):
89
  colors = {
90
  "prefix": "#D8BFD8", # Light purple
 
300
  importance = 60 + (len(token["text"]) * 2)
301
  importance = min(95, importance)
302
 
303
+ # Generate more meaningful related terms using semantic similarity
304
+ if semantic_model is not None:
305
+ try:
306
+ # Generate some potential related terms
307
+ prefix_related = [f"about {token['text']}", f"what is {token['text']}", f"how to {token['text']}"]
308
+ synonym_candidates = ["similar", "equivalent", "comparable", "like", "related", "alternative"]
309
+ domain_terms = ["software", "marketing", "business", "science", "education", "technology"]
310
+ comparison_terms = prefix_related + synonym_candidates + domain_terms
311
+
312
+ # Get similarities
313
+ similarities = get_semantic_similarity(token['text'], comparison_terms)
314
+
315
+ # Use top 3 most similar terms
316
+ related_terms = [term for term, score in similarities[:3]]
317
+ except Exception as e:
318
+ print(f"Error generating semantic related terms: {str(e)}")
319
+ related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"]
320
+ else:
321
+ # Fallback if semantic model isn't loaded
322
+ related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"]
323
 
324
  full_token_analysis.append({
325
  "token": token["text"],