PromptMeister commited on
Commit
20172b4
·
verified ·
1 Parent(s): da51a21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +536 -205
app.py CHANGED
@@ -7,6 +7,9 @@ import json
7
  import time
8
  import os
9
  from functools import partial
 
 
 
10
 
11
  # Global variables to store models
12
  tokenizer = None
@@ -14,11 +17,16 @@ ner_pipeline = None
14
  pos_pipeline = None
15
  intent_classifier = None
16
  semantic_model = None
 
17
  models_loaded = False
18
 
 
 
 
 
19
  def load_models(progress=gr.Progress()):
20
  """Lazy-load models only when needed"""
21
- global tokenizer, ner_pipeline, pos_pipeline, intent_classifier, semantic_model, models_loaded
22
 
23
  if models_loaded:
24
  return True
@@ -32,17 +40,17 @@ def load_models(progress=gr.Progress()):
32
  progress(0.2, desc="Loading tokenizer...")
33
  tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
34
 
35
- progress(0.4, desc="Loading NER model...")
36
  ner_pipeline = pipeline("ner", model="dslim/bert-base-NER")
37
 
38
- progress(0.6, desc="Loading POS model...")
39
  # Use smaller POS model
40
  from transformers import AutoModelForTokenClassification, BertTokenizerFast
41
  pos_model = AutoModelForTokenClassification.from_pretrained("vblagoje/bert-english-uncased-finetuned-pos")
42
  pos_tokenizer = BertTokenizerFast.from_pretrained("vblagoje/bert-english-uncased-finetuned-pos")
43
  pos_pipeline = pipeline("token-classification", model=pos_model, tokenizer=pos_tokenizer)
44
 
45
- progress(0.8, desc="Loading intent classifier...")
46
  # Use a smaller model for zero-shot classification
47
  intent_classifier = pipeline(
48
  "zero-shot-classification",
@@ -50,7 +58,18 @@ def load_models(progress=gr.Progress()):
50
  device=0 if torch.cuda.is_available() else -1 # Use GPU if available
51
  )
52
 
53
- progress(0.9, desc="Loading semantic model...")
 
 
 
 
 
 
 
 
 
 
 
54
  try:
55
  from sentence_transformers import SentenceTransformer
56
  semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
@@ -66,6 +85,111 @@ def load_models(progress=gr.Progress()):
66
  print(f"Error loading models: {str(e)}")
67
  return f"Error: {str(e)}"
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  def get_semantic_similarity(token, comparison_terms):
70
  """Calculate semantic similarity between a token and comparison terms"""
71
  try:
@@ -263,209 +387,156 @@ def create_evolution_chart(data, forecast_months=6, growth_scenario="Moderate"):
263
  fig.update_layout(title="Fallback Chart (Error occurred)")
264
  return fig
265
 
266
- def analyze_keyword(keyword, forecast_months=6, growth_scenario="Moderate", progress=gr.Progress()):
267
- """Main function to analyze a keyword"""
268
- if not keyword or not keyword.strip():
269
- return (
270
- "<div>Please enter a keyword to analyze</div>",
271
- "<div>Please enter a keyword to analyze</div>",
272
- None,
273
- None
274
- )
275
-
276
- progress(0.1, desc="Starting analysis...")
277
-
278
- # Load models if not already loaded
279
- model_status = load_models(progress)
280
- if isinstance(model_status, str) and model_status.startswith("Error"):
281
- return (
282
- f"<div style='color:red;'>{model_status}</div>",
283
- f"<div style='color:red;'>{model_status}</div>",
284
- None,
285
- None
286
- )
287
-
288
  try:
289
- # Basic tokenization - just split on spaces for simplicity
290
- words = keyword.strip().lower().split()
291
- progress(0.2, desc="Analyzing tokens...")
292
-
293
- # Get token types
294
- token_analysis = analyze_token_types(words)
295
-
296
- progress(0.3, desc="Running NER...")
297
- # Get NER tags - handle potential errors
298
- try:
299
- ner_results = ner_pipeline(keyword)
300
- except Exception as e:
301
- print(f"NER error: {str(e)}")
302
- ner_results = []
303
-
304
- progress(0.4, desc="Running POS tagging...")
305
- # Get POS tags - handle potential errors
306
- try:
307
- pos_results = pos_pipeline(keyword)
308
- except Exception as e:
309
- print(f"POS error: {str(e)}")
310
- pos_results = []
311
-
312
- # Process and organize results
313
- full_token_analysis = []
314
- for token in token_analysis:
315
- # Find POS tag for this token
316
- pos_tag = "NOUN" # Default
317
- for pos_result in pos_results:
318
- if pos_result["word"].lower() == token["text"]:
319
- pos_tag = pos_result["entity"]
320
- break
321
-
322
- # Find entity type if any
323
- entity_type = None
324
- for ner_result in ner_results:
325
- if ner_result["word"].lower() == token["text"]:
326
- entity_type = ner_result["entity"]
327
- break
328
-
329
- # Generate historical data
330
- historical_data = simulate_historical_data(token["text"])
331
-
332
- # Generate origin data
333
- origin = generate_origin_data(token["text"])
334
-
335
- # Calculate importance (simplified algorithm)
336
- importance = 60 + (len(token["text"]) * 2)
337
- importance = min(95, importance)
338
-
339
- # Generate more meaningful related terms using semantic similarity
340
- if semantic_model is not None:
341
- try:
342
- # Generate some potential related terms
343
- prefix_related = [f"about {token['text']}", f"what is {token['text']}", f"how to {token['text']}"]
344
- synonym_candidates = ["similar", "equivalent", "comparable", "like", "related", "alternative"]
345
- domain_terms = ["software", "marketing", "business", "science", "education", "technology"]
346
- comparison_terms = prefix_related + synonym_candidates + domain_terms
347
-
348
- # Get similarities
349
- similarities = get_semantic_similarity(token['text'], comparison_terms)
350
-
351
- # Use top 3 most similar terms
352
- related_terms = [term for term, score in similarities[:3]]
353
- except Exception as e:
354
- print(f"Error generating semantic related terms: {str(e)}")
355
- related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"]
356
- else:
357
- # Fallback if semantic model isn't loaded
358
- related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"]
359
-
360
- full_token_analysis.append({
361
- "token": token["text"],
362
- "type": token["type"],
363
- "posTag": pos_tag,
364
- "entityType": entity_type,
365
- "importance": importance,
366
- "historicalData": historical_data,
367
- "origin": origin,
368
- "relatedTerms": related_terms
369
- })
370
-
371
- progress(0.6, desc="Analyzing intent...")
372
- # Intent analysis - handle potential errors
373
- try:
374
- intent_result = intent_classifier(
375
- keyword,
376
- candidate_labels=["informational", "navigational", "transactional"]
377
  )
378
-
379
- intent_analysis = {
380
- "type": intent_result["labels"][0].capitalize(),
381
- "strength": round(intent_result["scores"][0] * 100),
382
- "mutations": [
383
- f"{intent_result['labels'][0]}-variation-1",
384
- f"{intent_result['labels'][0]}-variation-2"
385
- ]
386
- }
387
- except Exception as e:
388
- print(f"Intent classification error: {str(e)}")
389
- intent_analysis = {
390
- "type": "Informational", # Default fallback
391
- "strength": 70,
392
- "mutations": ["fallback-variation-1", "fallback-variation-2"]
393
- }
394
 
395
- # Evolution potential (simplified calculation)
396
- evolution_potential = min(95, 65 + (len(keyword) % 30))
397
 
398
- # Predicted trends (simplified)
399
- trends = [
400
- "Voice search adaptation",
401
- "Visual search integration"
402
- ]
403
 
404
- # Generate more realistic and keyword-specific evolution data
405
- base_volume = 1000 + (len(keyword) * 100)
 
 
 
406
 
407
- # Adjust growth factor based on scenario
408
- if growth_scenario == "Conservative":
409
- growth_factor = 1.05 + (0.02 * (sum(ord(c) for c in keyword) % 5))
410
- elif growth_scenario == "Aggressive":
411
- growth_factor = 1.15 + (0.05 * (sum(ord(c) for c in keyword) % 5))
412
- else: # Moderate
413
- growth_factor = 1.1 + (0.03 * (sum(ord(c) for c in keyword) % 5))
 
414
 
415
- evolution_data = []
416
- months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][:int(forecast_months)]
417
- current_volume = base_volume
418
 
419
- for month in months:
420
- # Add some randomness to make it look more realistic
421
- np.random.seed(sum(ord(c) for c in month + keyword))
422
- random_factor = 0.9 + (0.2 * np.random.random())
423
- current_volume *= growth_factor * random_factor
424
-
425
- evolution_data.append({
426
- "month": month,
427
- "searchVolume": int(current_volume),
428
- "competitionScore": min(95, 45 + (months.index(month) * 3) + (sum(ord(c) for c in keyword) % 10)),
429
- "intentClarity": min(95, 80 + (months.index(month) * 2) + (sum(ord(c) for c in keyword) % 5))
430
- })
431
 
432
- progress(0.8, desc="Creating visualizations...")
433
- # Create interactive evolution chart
434
- evolution_chart = create_evolution_chart(evolution_data, forecast_months, growth_scenario)
 
 
 
 
 
 
 
 
 
 
 
 
435
 
436
- # Generate HTML for token visualization
437
- token_viz_html = generate_token_visualization_html(token_analysis, full_token_analysis)
 
 
 
 
 
 
 
438
 
439
- # Generate HTML for full analysis
440
- analysis_html = generate_full_analysis_html(
441
- keyword,
442
- full_token_analysis,
443
- intent_analysis,
444
- evolution_potential,
445
- trends
 
 
 
 
 
 
 
 
 
 
446
  )
 
 
 
 
 
 
 
 
 
 
447
 
448
- # Generate JSON results
449
- json_results = {
450
- "keyword": keyword,
451
- "tokenAnalysis": full_token_analysis,
452
- "intentAnalysis": intent_analysis,
453
- "evolutionPotential": evolution_potential,
454
- "predictedTrends": trends,
455
- "forecast": {
456
- "months": forecast_months,
457
- "scenario": growth_scenario,
458
- "data": evolution_data
459
- }
460
- }
461
 
462
- progress(1.0, desc="Analysis complete!")
463
- return token_viz_html, analysis_html, json_results, evolution_chart
464
 
465
- except Exception as e:
466
- error_message = f"<div style='color:red;padding:20px;'>Error analyzing keyword: {str(e)}</div>"
467
- print(f"Error in analyze_keyword: {str(e)}")
468
- return error_message, error_message, None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
469
 
470
  def generate_token_visualization_html(token_analysis, full_analysis):
471
  """Generate HTML for token visualization"""
@@ -641,7 +712,7 @@ def generate_full_analysis_html(keyword, token_analysis, intent_analysis, evolut
641
  <div style="font-size: 12px; margin-bottom: 8px;">
642
  <span style="font-weight: 500;">Origin: </span>
643
  <span>{token['origin']['era']}, </span>
644
- <span style="font-style: italic;">{token['origin']['language']}</span>
645
  </div>
646
  <div style="font-size: 12px; margin-bottom: 12px;">{token['origin']['note']}</div>
647
 
@@ -673,19 +744,266 @@ def generate_full_analysis_html(keyword, token_analysis, intent_analysis, evolut
673
 
674
  return html
675
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
676
  # Create the Gradio interface
677
  with gr.Blocks(css="footer {visibility: hidden}") as demo:
678
  gr.Markdown("# Keyword DNA Analyzer")
679
  gr.Markdown("Analyze the linguistic DNA of your keywords to understand their structure, intent, and potential.")
680
 
681
  with gr.Row():
682
- with gr.Column():
683
- input_text = gr.Textbox(label="Enter keyword to analyze", placeholder="e.g. artificial intelligence")
 
 
 
 
 
 
 
 
684
 
685
- # Add forecast settings
686
- with gr.Accordion("Forecast Settings", open=False):
687
- forecast_months = gr.Slider(minimum=3, maximum=12, value=6, step=1, label="Forecast Months")
688
- growth_scenario = gr.Radio(["Conservative", "Moderate", "Aggressive"], value="Moderate", label="Growth Scenario")
 
 
 
 
 
 
 
689
 
690
  # Add loading indicator
691
  status_html = gr.HTML('<div style="color:gray;text-align:center;">Enter a keyword and click "Analyze DNA"</div>')
@@ -697,7 +1015,7 @@ with gr.Blocks(css="footer {visibility: hidden}") as demo:
697
  for example in ["preprocessing", "breakdown", "artificial intelligence", "transformer model", "machine learning"]:
698
  example_btns.append(gr.Button(example))
699
 
700
- with gr.Column():
701
  with gr.Tabs():
702
  with gr.Tab("Token Visualization"):
703
  token_viz_html = gr.HTML()
@@ -708,17 +1026,30 @@ with gr.Blocks(css="footer {visibility: hidden}") as demo:
708
  with gr.Tab("Evolution Chart"):
709
  evolution_chart = gr.Plot(label="Keyword Evolution Forecast")
710
 
 
 
 
 
 
 
711
  with gr.Tab("Raw Data"):
712
  json_output = gr.JSON()
713
 
 
 
 
 
 
 
 
714
  # Set up event handlers
715
  analyze_btn.click(
716
  lambda: '<div style="color:blue;text-align:center;">Loading models and analyzing... This may take a moment.</div>',
717
  outputs=status_html
718
  ).then(
719
  analyze_keyword,
720
- inputs=[input_text, forecast_months, growth_scenario],
721
- outputs=[token_viz_html, analysis_html, json_output, evolution_chart]
722
  ).then(
723
  lambda: '<div style="color:green;text-align:center;">Analysis complete!</div>',
724
  outputs=status_html
@@ -739,8 +1070,8 @@ with gr.Blocks(css="footer {visibility: hidden}") as demo:
739
  outputs=status_html
740
  ).then(
741
  analyze_keyword,
742
- inputs=[input_text, forecast_months, growth_scenario],
743
- outputs=[token_viz_html, analysis_html, json_output, evolution_chart]
744
  ).then(
745
  lambda: '<div style="color:green;text-align:center;">Analysis complete!</div>',
746
  outputs=status_html
 
7
  import time
8
  import os
9
  from functools import partial
10
+ import datetime
11
+ import plotly.graph_objects as go
12
+ from plotly.subplots import make_subplots
13
 
14
  # Global variables to store models
15
  tokenizer = None
 
17
  pos_pipeline = None
18
  intent_classifier = None
19
  semantic_model = None
20
+ stt_model = None # Speech-to-text model
21
  models_loaded = False
22
 
23
+ # Database to store keyword ranking history (in-memory database for this example)
24
+ # In a real app, you would use a proper database
25
+ ranking_history = {}
26
+
27
  def load_models(progress=gr.Progress()):
28
  """Lazy-load models only when needed"""
29
+ global tokenizer, ner_pipeline, pos_pipeline, intent_classifier, semantic_model, stt_model, models_loaded
30
 
31
  if models_loaded:
32
  return True
 
40
  progress(0.2, desc="Loading tokenizer...")
41
  tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
42
 
43
+ progress(0.3, desc="Loading NER model...")
44
  ner_pipeline = pipeline("ner", model="dslim/bert-base-NER")
45
 
46
+ progress(0.4, desc="Loading POS model...")
47
  # Use smaller POS model
48
  from transformers import AutoModelForTokenClassification, BertTokenizerFast
49
  pos_model = AutoModelForTokenClassification.from_pretrained("vblagoje/bert-english-uncased-finetuned-pos")
50
  pos_tokenizer = BertTokenizerFast.from_pretrained("vblagoje/bert-english-uncased-finetuned-pos")
51
  pos_pipeline = pipeline("token-classification", model=pos_model, tokenizer=pos_tokenizer)
52
 
53
+ progress(0.6, desc="Loading intent classifier...")
54
  # Use a smaller model for zero-shot classification
55
  intent_classifier = pipeline(
56
  "zero-shot-classification",
 
58
  device=0 if torch.cuda.is_available() else -1 # Use GPU if available
59
  )
60
 
61
+ progress(0.7, desc="Loading speech-to-text model...")
62
+ try:
63
+ # Load automatic speech recognition model
64
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
65
+ processor = WhisperProcessor.from_pretrained("openai/whisper-small.en")
66
+ stt_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small.en")
67
+ stt_model = (processor, stt_model)
68
+ except Exception as e:
69
+ print(f"Warning: Could not load speech-to-text model: {str(e)}")
70
+ stt_model = None # Set to None so we can check if it's available
71
+
72
+ progress(0.8, desc="Loading semantic model...")
73
  try:
74
  from sentence_transformers import SentenceTransformer
75
  semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
 
85
  print(f"Error loading models: {str(e)}")
86
  return f"Error: {str(e)}"
87
 
88
+ def speech_to_text(audio_path):
89
+ """Convert speech to text using the loaded speech-to-text model"""
90
+ if stt_model is None:
91
+ return "Speech-to-text model not loaded. Please try text input instead."
92
+
93
+ try:
94
+ import librosa
95
+ import numpy as np
96
+
97
+ # Load audio file
98
+ audio, sr = librosa.load(audio_path, sr=16000)
99
+
100
+ # Process audio with Whisper
101
+ processor, model = stt_model
102
+ input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features
103
+
104
+ # Generate token ids
105
+ predicted_ids = model.generate(input_features)
106
+
107
+ # Decode token ids to text
108
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
109
+
110
+ return transcription
111
+ except Exception as e:
112
+ print(f"Error in speech_to_text: {str(e)}")
113
+ return f"Error processing speech: {str(e)}"
114
+
115
+ def handle_voice_input(audio):
116
+ """Handle voice input and convert to text"""
117
+ if audio is None:
118
+ return "No audio detected. Please try again."
119
+
120
+ try:
121
+ # Convert speech to text
122
+ text = speech_to_text(audio)
123
+ return text
124
+ except Exception as e:
125
+ print(f"Error in handle_voice_input: {str(e)}")
126
+ return f"Error: {str(e)}"
127
+
128
+ def simulate_google_serp(keyword, num_results=10):
129
+ """Simulate Google SERP results for a keyword"""
130
+ try:
131
+ # In a real implementation, this would call the Google API
132
+ # For now, we'll generate fake SERP data
133
+
134
+ # Deterministic seed for consistent results by keyword
135
+ np.random.seed(sum(ord(c) for c in keyword))
136
+
137
+ serp_results = []
138
+ domains = [
139
+ "example.com", "wikipedia.org", "medium.com", "github.com",
140
+ "stackoverflow.com", "amazon.com", "youtube.com", "reddit.com",
141
+ "linkedin.com", "twitter.com", "facebook.com", "instagram.com"
142
+ ]
143
+
144
+ for i in range(1, num_results + 1):
145
+ domain = domains[i % len(domains)]
146
+ title = f"{keyword.title()} - {domain.split('.')[0].title()} Resource #{i}"
147
+ snippet = f"This is a simulated SERP result for '{keyword}'. Result #{i} would provide relevant information about this topic."
148
+ url = f"https://www.{domain}/{keyword.replace(' ', '-')}-resource-{i}"
149
+
150
+ position = i
151
+ ctr = round(0.3 * (0.85 ** (i - 1)), 4) # Simulate click-through rate decay
152
+
153
+ serp_results.append({
154
+ "position": position,
155
+ "title": title,
156
+ "url": url,
157
+ "domain": domain,
158
+ "snippet": snippet,
159
+ "ctr_estimate": ctr,
160
+ "impressions_estimate": np.random.randint(1000, 10000)
161
+ })
162
+
163
+ return serp_results
164
+ except Exception as e:
165
+ print(f"Error in simulate_google_serp: {str(e)}")
166
+ return []
167
+
168
+ def update_ranking_history(keyword, serp_results):
169
+ """Update the ranking history for a keyword"""
170
+ try:
171
+ # Get current timestamp
172
+ timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
173
+
174
+ # Initialize if keyword not in history
175
+ if keyword not in ranking_history:
176
+ ranking_history[keyword] = []
177
+
178
+ # Add new entry
179
+ ranking_history[keyword].append({
180
+ "timestamp": timestamp,
181
+ "results": serp_results[:5] # Store top 5 results for history
182
+ })
183
+
184
+ # Keep only last 10 entries for each keyword
185
+ if len(ranking_history[keyword]) > 10:
186
+ ranking_history[keyword] = ranking_history[keyword][-10:]
187
+
188
+ return True
189
+ except Exception as e:
190
+ print(f"Error in update_ranking_history: {str(e)}")
191
+ return False
192
+
193
  def get_semantic_similarity(token, comparison_terms):
194
  """Calculate semantic similarity between a token and comparison terms"""
195
  try:
 
387
  fig.update_layout(title="Fallback Chart (Error occurred)")
388
  return fig
389
 
390
+ def create_ranking_history_chart(keyword_history):
391
+ """Create a chart showing keyword ranking history over time"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
  try:
393
+ if not keyword_history or len(keyword_history) < 2:
394
+ # Not enough data for a meaningful chart
395
+ fig = go.Figure()
396
+ fig.update_layout(
397
+ title="Insufficient Ranking Data",
398
+ annotations=[{
399
+ "text": "Need at least 2 data points for ranking history",
400
+ "showarrow": False,
401
+ "font": {"size": 16},
402
+ "xref": "paper",
403
+ "yref": "paper",
404
+ "x": 0.5,
405
+ "y": 0.5
406
+ }]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
407
  )
408
+ return fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409
 
410
+ # Create a figure
411
+ fig = go.Figure()
412
 
413
+ # Extract timestamps and convert to datetime objects
414
+ timestamps = [entry["timestamp"] for entry in keyword_history]
415
+ dates = [datetime.datetime.strptime(ts, "%Y-%m-%d %H:%M:%S") for ts in timestamps]
 
 
416
 
417
+ # Get unique domains from all results
418
+ all_domains = set()
419
+ for entry in keyword_history:
420
+ for result in entry["results"]:
421
+ all_domains.add(result["domain"])
422
 
423
+ # Colors for different domains
424
+ domain_colors = {}
425
+ color_palette = [
426
+ "#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
427
+ "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"
428
+ ]
429
+ for i, domain in enumerate(all_domains):
430
+ domain_colors[domain] = color_palette[i % len(color_palette)]
431
 
432
+ # Track domains and their positions over time
433
+ domain_tracking = {domain: {"x": [], "y": [], "text": []} for domain in all_domains}
 
434
 
435
+ for i, entry in enumerate(keyword_history):
436
+ for result in entry["results"]:
437
+ domain = result["domain"]
438
+ position = result["position"]
439
+ title = result["title"]
440
+
441
+ domain_tracking[domain]["x"].append(dates[i])
442
+ domain_tracking[domain]["y"].append(position)
443
+ domain_tracking[domain]["text"].append(title)
 
 
 
444
 
445
+ # Add traces for each domain
446
+ for domain, data in domain_tracking.items():
447
+ if len(data["x"]) > 0: # Only add domains that have data
448
+ fig.add_trace(
449
+ go.Scatter(
450
+ x=data["x"],
451
+ y=data["y"],
452
+ mode="lines+markers",
453
+ name=domain,
454
+ line=dict(color=domain_colors[domain]),
455
+ hovertemplate="%{text}<br>Position: %{y}<br>Date: %{x}<extra></extra>",
456
+ text=data["text"],
457
+ marker=dict(size=8)
458
+ )
459
+ )
460
 
461
+ # Update layout
462
+ fig.update_layout(
463
+ title="Keyword Ranking History",
464
+ xaxis_title="Date",
465
+ yaxis_title="Position",
466
+ yaxis=dict(autorange="reversed"), # Invert y-axis so position 1 is on top
467
+ hovermode="closest",
468
+ height=500
469
+ )
470
 
471
+ return fig
472
+
473
+ except Exception as e:
474
+ print(f"Error in create_ranking_history_chart: {str(e)}")
475
+ # Return fallback chart
476
+ fig = go.Figure()
477
+ fig.update_layout(
478
+ title="Error Creating Ranking Chart",
479
+ annotations=[{
480
+ "text": f"Error: {str(e)}",
481
+ "showarrow": False,
482
+ "font": {"size": 14},
483
+ "xref": "paper",
484
+ "yref": "paper",
485
+ "x": 0.5,
486
+ "y": 0.5
487
+ }]
488
  )
489
+ return fig
490
+
491
+ def generate_serp_html(keyword, serp_results):
492
+ """Generate HTML for SERP results"""
493
+ if not serp_results:
494
+ return "<div>No SERP results available</div>"
495
+
496
+ html = f"""
497
+ <div style="font-family: Arial, sans-serif; padding: 20px; border: 1px solid #ddd; border-radius: 8px;">
498
+ <h2 style="margin-top: 0;">SERP Results for "{keyword}"</h2>
499
 
500
+ <div style="background-color: #f5f5f5; padding: 10px; border-radius: 4px; margin-bottom: 20px;">
501
+ <div style="color: #666; font-size: 12px;">This is a simulated SERP. In a real application, this would use the Google API.</div>
502
+ </div>
 
 
 
 
 
 
 
 
 
 
503
 
504
+ <div class="serp-results" style="display: flex; flex-direction: column; gap: 16px;">
505
+ """
506
 
507
+ for result in serp_results:
508
+ position = result["position"]
509
+ title = result["title"]
510
+ url = result["url"]
511
+ snippet = result["snippet"]
512
+ domain = result["domain"]
513
+ ctr = result["ctr_estimate"]
514
+ impressions = result["impressions_estimate"]
515
+
516
+ html += f"""
517
+ <div class="serp-result" style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px; position: relative;">
518
+ <div style="position: absolute; top: -10px; left: -10px; background-color: #4299e1; color: white; width: 24px; height: 24px; border-radius: 50%; display: flex; align-items: center; justify-content: center; font-size: 12px;">
519
+ {position}
520
+ </div>
521
+ <div style="margin-bottom: 5px;">
522
+ <a href="#" style="font-size: 18px; color: #1a73e8; text-decoration: none; font-weight: 500;">{title}</a>
523
+ </div>
524
+ <div style="margin-bottom: 8px; color: #006621; font-size: 14px;">{url}</div>
525
+ <div style="color: #4d5156; font-size: 14px;">{snippet}</div>
526
+
527
+ <div style="display: flex; margin-top: 10px; font-size: 12px; color: #666;">
528
+ <div style="margin-right: 15px;"><span style="font-weight: 500;">CTR:</span> {ctr:.2%}</div>
529
+ <div><span style="font-weight: 500;">Est. Impressions:</span> {impressions:,}</div>
530
+ </div>
531
+ </div>
532
+ """
533
+
534
+ html += """
535
+ </div>
536
+ </div>
537
+ """
538
+
539
+ return html
540
 
541
  def generate_token_visualization_html(token_analysis, full_analysis):
542
  """Generate HTML for token visualization"""
 
712
  <div style="font-size: 12px; margin-bottom: 8px;">
713
  <span style="font-weight: 500;">Origin: </span>
714
  <span>{token['origin']['era']}, </span>
715
+ <span style="font-style: italic;">{token['origin']['language']}</span>
716
  </div>
717
  <div style="font-size: 12px; margin-bottom: 12px;">{token['origin']['note']}</div>
718
 
 
744
 
745
  return html
746
 
747
+ def analyze_keyword(keyword, forecast_months=6, growth_scenario="Moderate", get_serp=False, progress=gr.Progress()):
748
+ """Main function to analyze a keyword"""
749
+ if not keyword or not keyword.strip():
750
+ return (
751
+ "<div>Please enter a keyword to analyze</div>",
752
+ "<div>Please enter a keyword to analyze</div>",
753
+ None,
754
+ None,
755
+ None,
756
+ None,
757
+ None
758
+ )
759
+
760
+ progress(0.1, desc="Starting analysis...")
761
+
762
+ # Load models if not already loaded
763
+ model_status = load_models(progress)
764
+ if isinstance(model_status, str) and model_status.startswith("Error"):
765
+ return (
766
+ f"<div style='color:red;'>{model_status}</div>",
767
+ f"<div style='color:red;'>{model_status}</div>",
768
+ None,
769
+ None,
770
+ None,
771
+ None,
772
+ None
773
+ )
774
+
775
+ try:
776
+ # Basic tokenization - just split on spaces for simplicity
777
+ words = keyword.strip().lower().split()
778
+ progress(0.2, desc="Analyzing tokens...")
779
+
780
+ # Get token types
781
+ token_analysis = analyze_token_types(words)
782
+
783
+ progress(0.3, desc="Running NER...")
784
+ # Get NER tags - handle potential errors
785
+ try:
786
+ ner_results = ner_pipeline(keyword)
787
+ except Exception as e:
788
+ print(f"NER error: {str(e)}")
789
+ ner_results = []
790
+
791
+ progress(0.4, desc="Running POS tagging...")
792
+ # Get POS tags - handle potential errors
793
+ try:
794
+ pos_results = pos_pipeline(keyword)
795
+ except Exception as e:
796
+ print(f"POS error: {str(e)}")
797
+ pos_results = []
798
+
799
+ # Process and organize results
800
+ full_token_analysis = []
801
+ for token in token_analysis:
802
+ # Find POS tag for this token
803
+ pos_tag = "NOUN" # Default
804
+ for pos_result in pos_results:
805
+ if pos_result["word"].lower() == token["text"]:
806
+ pos_tag = pos_result["entity"]
807
+ break
808
+
809
+ # Find entity type if any
810
+ entity_type = None
811
+ for ner_result in ner_results:
812
+ if ner_result["word"].lower() == token["text"]:
813
+ entity_type = ner_result["entity"]
814
+ break
815
+
816
+ # Generate historical data
817
+ historical_data = simulate_historical_data(token["text"])
818
+
819
+ # Generate origin data
820
+ origin = generate_origin_data(token["text"])
821
+
822
+ # Calculate importance (simplified algorithm)
823
+ importance = 60 + (len(token["text"]) * 2)
824
+ importance = min(95, importance)
825
+
826
+ # Generate more meaningful related terms using semantic similarity
827
+ if semantic_model is not None:
828
+ try:
829
+ # Generate some potential related terms
830
+ prefix_related = [f"about {token['text']}", f"what is {token['text']}", f"how to {token['text']}"]
831
+ synonym_candidates = ["similar", "equivalent", "comparable", "like", "related", "alternative"]
832
+ domain_terms = ["software", "marketing", "business", "science", "education", "technology"]
833
+ comparison_terms = prefix_related + synonym_candidates + domain_terms
834
+
835
+ # Get similarities
836
+ similarities = get_semantic_similarity(token['text'], comparison_terms)
837
+
838
+ # Use top 3 most similar terms
839
+ related_terms = [term for term, score in similarities[:3]]
840
+ except Exception as e:
841
+ print(f"Error generating semantic related terms: {str(e)}")
842
+ related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"]
843
+ else:
844
+ # Fallback if semantic model isn't loaded
845
+ related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"]
846
+
847
+ full_token_analysis.append({
848
+ "token": token["text"],
849
+ "type": token["type"],
850
+ "posTag": pos_tag,
851
+ "entityType": entity_type,
852
+ "importance": importance,
853
+ "historicalData": historical_data,
854
+ "origin": origin,
855
+ "relatedTerms": related_terms
856
+ })
857
+
858
+ progress(0.5, desc="Analyzing intent...")
859
+ # Intent analysis - handle potential errors
860
+ try:
861
+ intent_result = intent_classifier(
862
+ keyword,
863
+ candidate_labels=["informational", "navigational", "transactional"]
864
+ )
865
+
866
+ intent_analysis = {
867
+ "type": intent_result["labels"][0].capitalize(),
868
+ "strength": round(intent_result["scores"][0] * 100),
869
+ "mutations": [
870
+ f"{intent_result['labels'][0]}-variation-1",
871
+ f"{intent_result['labels'][0]}-variation-2"
872
+ ]
873
+ }
874
+ except Exception as e:
875
+ print(f"Intent classification error: {str(e)}")
876
+ intent_analysis = {
877
+ "type": "Informational", # Default fallback
878
+ "strength": 70,
879
+ "mutations": ["fallback-variation-1", "fallback-variation-2"]
880
+ }
881
+
882
+ # Evolution potential (simplified calculation)
883
+ evolution_potential = min(95, 65 + (len(keyword) % 30))
884
+
885
+ # Predicted trends (simplified)
886
+ trends = [
887
+ "Voice search adaptation",
888
+ "Visual search integration"
889
+ ]
890
+
891
+ # Generate more realistic and keyword-specific evolution data
892
+ base_volume = 1000 + (len(keyword) * 100)
893
+
894
+ # Adjust growth factor based on scenario
895
+ if growth_scenario == "Conservative":
896
+ growth_factor = 1.05 + (0.02 * (sum(ord(c) for c in keyword) % 5))
897
+ elif growth_scenario == "Aggressive":
898
+ growth_factor = 1.15 + (0.05 * (sum(ord(c) for c in keyword) % 5))
899
+ else: # Moderate
900
+ growth_factor = 1.1 + (0.03 * (sum(ord(c) for c in keyword) % 5))
901
+
902
+ evolution_data = []
903
+ months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][:int(forecast_months)]
904
+ current_volume = base_volume
905
+
906
+ for month in months:
907
+ # Add some randomness to make it look more realistic
908
+ np.random.seed(sum(ord(c) for c in month + keyword))
909
+ random_factor = 0.9 + (0.2 * np.random.random())
910
+ current_volume *= growth_factor * random_factor
911
+
912
+ evolution_data.append({
913
+ "month": month,
914
+ "searchVolume": int(current_volume),
915
+ "competitionScore": min(95, 45 + (months.index(month) * 3) + (sum(ord(c) for c in keyword) % 10)),
916
+ "intentClarity": min(95, 80 + (months.index(month) * 2) + (sum(ord(c) for c in keyword) % 5))
917
+ })
918
+
919
+ progress(0.6, desc="Creating visualizations...")
920
+ # Create interactive evolution chart
921
+ evolution_chart = create_evolution_chart(evolution_data, forecast_months, growth_scenario)
922
+
923
+ # SERP results and ranking history (new feature)
924
+ serp_results = None
925
+ ranking_chart = None
926
+ serp_html = None
927
+
928
+ if get_serp:
929
+ progress(0.7, desc="Fetching SERP data...")
930
+ # Get SERP results
931
+ serp_results = simulate_google_serp(keyword)
932
+
933
+ # Update ranking history
934
+ update_ranking_history(keyword, serp_results)
935
+
936
+ progress(0.8, desc="Creating ranking charts...")
937
+ # Create ranking history chart
938
+ if keyword in ranking_history and len(ranking_history[keyword]) > 0:
939
+ ranking_chart = create_ranking_history_chart(ranking_history[keyword])
940
+
941
+ # Generate SERP HTML
942
+ serp_html = generate_serp_html(keyword, serp_results)
943
+
944
+ # Generate HTML for token visualization
945
+ token_viz_html = generate_token_visualization_html(token_analysis, full_token_analysis)
946
+
947
+ # Generate HTML for full analysis
948
+ analysis_html = generate_full_analysis_html(
949
+ keyword,
950
+ full_token_analysis,
951
+ intent_analysis,
952
+ evolution_potential,
953
+ trends
954
+ )
955
+
956
+ # Generate JSON results
957
+ json_results = {
958
+ "keyword": keyword,
959
+ "tokenAnalysis": full_token_analysis,
960
+ "intentAnalysis": intent_analysis,
961
+ "evolutionPotential": evolution_potential,
962
+ "predictedTrends": trends,
963
+ "forecast": {
964
+ "months": forecast_months,
965
+ "scenario": growth_scenario,
966
+ "data": evolution_data
967
+ },
968
+ "serpResults": serp_results
969
+ }
970
+
971
+ progress(1.0, desc="Analysis complete!")
972
+ return token_viz_html, analysis_html, json_results, evolution_chart, serp_html, ranking_chart, keyword
973
+
974
+ except Exception as e:
975
+ error_message = f"<div style='color:red;padding:20px;'>Error analyzing keyword: {str(e)}</div>"
976
+ print(f"Error in analyze_keyword: {str(e)}")
977
+ return error_message, error_message, None, None, None, None, None
978
+
979
  # Create the Gradio interface
980
  with gr.Blocks(css="footer {visibility: hidden}") as demo:
981
  gr.Markdown("# Keyword DNA Analyzer")
982
  gr.Markdown("Analyze the linguistic DNA of your keywords to understand their structure, intent, and potential.")
983
 
984
  with gr.Row():
985
+ with gr.Column(scale=1):
986
+ # Add voice search capabilities
987
+ with gr.Group():
988
+ gr.Markdown("### Enter Keyword")
989
+ with gr.Row():
990
+ input_text = gr.Textbox(label="Enter keyword to analyze", placeholder="e.g. artificial intelligence")
991
+
992
+ with gr.Row():
993
+ audio_input = gr.Audio(source="microphone", type="filepath", label="Or use voice search")
994
+ voice_submit_btn = gr.Button("Convert Voice to Text", variant="secondary")
995
 
996
+ # Add SERP settings
997
+ with gr.Accordion("Analysis Settings", open=False):
998
+ with gr.Row():
999
+ forecast_months = gr.Slider(minimum=3, maximum=12, value=6, step=1, label="Forecast Months")
1000
+ include_serp = gr.Checkbox(label="Include SERP Analysis", value=True)
1001
+
1002
+ growth_scenario = gr.Radio(
1003
+ ["Conservative", "Moderate", "Aggressive"],
1004
+ value="Moderate",
1005
+ label="Growth Scenario"
1006
+ )
1007
 
1008
  # Add loading indicator
1009
  status_html = gr.HTML('<div style="color:gray;text-align:center;">Enter a keyword and click "Analyze DNA"</div>')
 
1015
  for example in ["preprocessing", "breakdown", "artificial intelligence", "transformer model", "machine learning"]:
1016
  example_btns.append(gr.Button(example))
1017
 
1018
+ with gr.Column(scale=2):
1019
  with gr.Tabs():
1020
  with gr.Tab("Token Visualization"):
1021
  token_viz_html = gr.HTML()
 
1026
  with gr.Tab("Evolution Chart"):
1027
  evolution_chart = gr.Plot(label="Keyword Evolution Forecast")
1028
 
1029
+ with gr.Tab("SERP Results"):
1030
+ serp_html = gr.HTML()
1031
+
1032
+ with gr.Tab("Ranking History"):
1033
+ ranking_chart = gr.Plot(label="Keyword Ranking History")
1034
+
1035
  with gr.Tab("Raw Data"):
1036
  json_output = gr.JSON()
1037
 
1038
+ # Voice to text conversion handler
1039
+ voice_submit_btn.click(
1040
+ handle_voice_input,
1041
+ inputs=[audio_input],
1042
+ outputs=[input_text]
1043
+ )
1044
+
1045
  # Set up event handlers
1046
  analyze_btn.click(
1047
  lambda: '<div style="color:blue;text-align:center;">Loading models and analyzing... This may take a moment.</div>',
1048
  outputs=status_html
1049
  ).then(
1050
  analyze_keyword,
1051
+ inputs=[input_text, forecast_months, growth_scenario, include_serp],
1052
+ outputs=[token_viz_html, analysis_html, json_output, evolution_chart, serp_html, ranking_chart, input_text]
1053
  ).then(
1054
  lambda: '<div style="color:green;text-align:center;">Analysis complete!</div>',
1055
  outputs=status_html
 
1070
  outputs=status_html
1071
  ).then(
1072
  analyze_keyword,
1073
+ inputs=[input_text, forecast_months, growth_scenario, include_serp],
1074
+ outputs=[token_viz_html, analysis_html, json_output, evolution_chart, serp_html, ranking_chart, input_text]
1075
  ).then(
1076
  lambda: '<div style="color:green;text-align:center;">Analysis complete!</div>',
1077
  outputs=status_html