|
import gradio as gr |
|
import numpy as np |
|
import pandas as pd |
|
import torch |
|
import matplotlib.pyplot as plt |
|
import json |
|
import time |
|
import os |
|
from functools import partial |
|
import datetime |
|
import plotly.graph_objects as go |
|
from plotly.subplots import make_subplots |
|
|
|
|
|
tokenizer = None |
|
ner_pipeline = None |
|
pos_pipeline = None |
|
intent_classifier = None |
|
semantic_model = None |
|
stt_model = None |
|
models_loaded = False |
|
|
|
|
|
|
|
ranking_history = {} |
|
|
|
def load_models(progress=gr.Progress()): |
|
"""Lazy-load models only when needed""" |
|
global tokenizer, ner_pipeline, pos_pipeline, intent_classifier, semantic_model, stt_model, models_loaded |
|
|
|
if models_loaded: |
|
return True |
|
|
|
try: |
|
progress(0.1, desc="Loading models...") |
|
|
|
|
|
from transformers import AutoTokenizer, pipeline |
|
|
|
progress(0.2, desc="Loading tokenizer...") |
|
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") |
|
|
|
progress(0.3, desc="Loading NER model...") |
|
ner_pipeline = pipeline("ner", model="dslim/bert-base-NER") |
|
|
|
progress(0.4, desc="Loading POS model...") |
|
|
|
from transformers import AutoModelForTokenClassification, BertTokenizerFast |
|
pos_model = AutoModelForTokenClassification.from_pretrained("vblagoje/bert-english-uncased-finetuned-pos") |
|
pos_tokenizer = BertTokenizerFast.from_pretrained("vblagoje/bert-english-uncased-finetuned-pos") |
|
pos_pipeline = pipeline("token-classification", model=pos_model, tokenizer=pos_tokenizer) |
|
|
|
progress(0.6, desc="Loading intent classifier...") |
|
|
|
intent_classifier = pipeline( |
|
"zero-shot-classification", |
|
model="typeform/distilbert-base-uncased-mnli", |
|
device=0 if torch.cuda.is_available() else -1 |
|
) |
|
|
|
progress(0.7, desc="Loading speech-to-text model...") |
|
try: |
|
|
|
from transformers import WhisperProcessor, WhisperForConditionalGeneration |
|
processor = WhisperProcessor.from_pretrained("openai/whisper-small.en") |
|
stt_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small.en") |
|
stt_model = (processor, stt_model) |
|
except Exception as e: |
|
print(f"Warning: Could not load speech-to-text model: {str(e)}") |
|
stt_model = None |
|
|
|
progress(0.8, desc="Loading semantic model...") |
|
try: |
|
from sentence_transformers import SentenceTransformer |
|
semantic_model = SentenceTransformer('all-MiniLM-L6-v2') |
|
except Exception as e: |
|
print(f"Warning: Could not load semantic model: {str(e)}") |
|
semantic_model = None |
|
|
|
progress(1.0, desc="Models loaded successfully!") |
|
models_loaded = True |
|
return True |
|
|
|
except Exception as e: |
|
print(f"Error loading models: {str(e)}") |
|
return f"Error: {str(e)}" |
|
|
|
def speech_to_text(audio_path): |
|
"""Convert speech to text using the loaded speech-to-text model""" |
|
if stt_model is None: |
|
return "Speech-to-text model not loaded. Please try text input instead." |
|
|
|
try: |
|
import librosa |
|
import numpy as np |
|
|
|
|
|
audio, sr = librosa.load(audio_path, sr=16000) |
|
|
|
|
|
processor, model = stt_model |
|
input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features |
|
|
|
|
|
predicted_ids = model.generate(input_features) |
|
|
|
|
|
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] |
|
|
|
return transcription |
|
except Exception as e: |
|
print(f"Error in speech_to_text: {str(e)}") |
|
return f"Error processing speech: {str(e)}" |
|
|
|
def handle_voice_input(audio): |
|
"""Handle voice input and convert to text""" |
|
if audio is None: |
|
return "No audio detected. Please try again." |
|
|
|
try: |
|
|
|
text = speech_to_text(audio) |
|
return text |
|
except Exception as e: |
|
print(f"Error in handle_voice_input: {str(e)}") |
|
return f"Error: {str(e)}" |
|
|
|
def simulate_google_serp(keyword, num_results=10): |
|
"""Simulate Google SERP results for a keyword""" |
|
try: |
|
|
|
|
|
|
|
|
|
np.random.seed(sum(ord(c) for c in keyword)) |
|
|
|
serp_results = [] |
|
domains = [ |
|
"example.com", "wikipedia.org", "medium.com", "github.com", |
|
"stackoverflow.com", "amazon.com", "youtube.com", "reddit.com", |
|
"linkedin.com", "twitter.com", "facebook.com", "instagram.com" |
|
] |
|
|
|
for i in range(1, num_results + 1): |
|
domain = domains[i % len(domains)] |
|
title = f"{keyword.title()} - {domain.split('.')[0].title()} Resource #{i}" |
|
snippet = f"This is a simulated SERP result for '{keyword}'. Result #{i} would provide relevant information about this topic." |
|
url = f"https://www.{domain}/{keyword.replace(' ', '-')}-resource-{i}" |
|
|
|
position = i |
|
ctr = round(0.3 * (0.85 ** (i - 1)), 4) |
|
|
|
serp_results.append({ |
|
"position": position, |
|
"title": title, |
|
"url": url, |
|
"domain": domain, |
|
"snippet": snippet, |
|
"ctr_estimate": ctr, |
|
"impressions_estimate": np.random.randint(1000, 10000) |
|
}) |
|
|
|
return serp_results |
|
except Exception as e: |
|
print(f"Error in simulate_google_serp: {str(e)}") |
|
return [] |
|
|
|
def update_ranking_history(keyword, serp_results): |
|
"""Update the ranking history for a keyword""" |
|
try: |
|
|
|
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") |
|
|
|
|
|
if keyword not in ranking_history: |
|
ranking_history[keyword] = [] |
|
|
|
|
|
ranking_history[keyword].append({ |
|
"timestamp": timestamp, |
|
"results": serp_results[:5] |
|
}) |
|
|
|
|
|
if len(ranking_history[keyword]) > 10: |
|
ranking_history[keyword] = ranking_history[keyword][-10:] |
|
|
|
return True |
|
except Exception as e: |
|
print(f"Error in update_ranking_history: {str(e)}") |
|
return False |
|
|
|
def get_semantic_similarity(token, comparison_terms): |
|
"""Calculate semantic similarity between a token and comparison terms""" |
|
try: |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
|
token_embedding = semantic_model.encode([token])[0] |
|
comparison_embeddings = semantic_model.encode(comparison_terms) |
|
|
|
similarities = [] |
|
for i, emb in enumerate(comparison_embeddings): |
|
similarity = cosine_similarity([token_embedding], [emb])[0][0] |
|
similarities.append((comparison_terms[i], float(similarity))) |
|
|
|
return sorted(similarities, key=lambda x: x[1], reverse=True) |
|
except Exception as e: |
|
print(f"Error in semantic similarity: {str(e)}") |
|
|
|
return [(term, 0.5) for term in comparison_terms] |
|
|
|
def get_token_colors(token_type): |
|
colors = { |
|
"prefix": "#D8BFD8", |
|
"suffix": "#AEDAA4", |
|
"stem": "#A4C2F4", |
|
"compound_first": "#FFCC80", |
|
"compound_second": "#FFCC80", |
|
"word": "#E5E5E5" |
|
} |
|
return colors.get(token_type, "#E5E5E5") |
|
|
|
def simulate_historical_data(token): |
|
"""Generate simulated historical usage data for a token""" |
|
eras = ["1900s", "1950s", "1980s", "2000s", "2010s", "Present"] |
|
|
|
|
|
if len(token) > 8: |
|
|
|
values = [10, 20, 30, 60, 85, 95] |
|
elif token.startswith(("un", "re", "de", "pre")): |
|
|
|
values = [45, 50, 60, 70, 75, 80] |
|
else: |
|
|
|
|
|
base = 50 + (sum(ord(c) for c in token) % 30) |
|
|
|
np.random.seed(sum(ord(c) for c in token)) |
|
noise = np.random.normal(0, 5, 6) |
|
values = [max(5, min(95, base + i*5 + n)) for i, n in enumerate(noise)] |
|
|
|
return list(zip(eras, values)) |
|
|
|
def generate_origin_data(token): |
|
"""Generate simulated origin/etymology data for a token""" |
|
origins = [ |
|
{"era": "Ancient", "language": "Latin"}, |
|
{"era": "Ancient", "language": "Greek"}, |
|
{"era": "Medieval", "language": "Old English"}, |
|
{"era": "16th century", "language": "French"}, |
|
{"era": "18th century", "language": "Germanic"}, |
|
{"era": "19th century", "language": "Anglo-Saxon"}, |
|
{"era": "20th century", "language": "Modern English"} |
|
] |
|
|
|
|
|
index = sum(ord(c) for c in token) % len(origins) |
|
origin = origins[index] |
|
|
|
note = f"First appeared in {origin['era']} texts derived from {origin['language']}." |
|
origin["note"] = note |
|
|
|
return origin |
|
|
|
def analyze_token_types(tokens): |
|
"""Identify token types (prefix, suffix, compound, etc.)""" |
|
processed_tokens = [] |
|
|
|
prefixes = ["un", "re", "de", "pre", "post", "anti", "pro", "inter", "sub", "super"] |
|
suffixes = ["ing", "ed", "ly", "ment", "tion", "able", "ible", "ness", "ful", "less"] |
|
|
|
for token in tokens: |
|
token_text = token.lower() |
|
token_type = "word" |
|
|
|
|
|
for prefix in prefixes: |
|
if token_text.startswith(prefix) and len(token_text) > len(prefix) + 2: |
|
if token_text != prefix: |
|
token_type = "prefix" |
|
break |
|
|
|
|
|
if token_type == "word": |
|
for suffix in suffixes: |
|
if token_text.endswith(suffix) and len(token_text) > len(suffix) + 2: |
|
token_type = "suffix" |
|
break |
|
|
|
|
|
if token_type == "word" and len(token_text) > 8: |
|
token_type = "compound_first" |
|
|
|
processed_tokens.append({ |
|
"text": token_text, |
|
"type": token_type |
|
}) |
|
|
|
return processed_tokens |
|
|
|
def plot_historical_data(historical_data): |
|
"""Create a plot of historical usage data, with error handling""" |
|
try: |
|
eras = [item[0] for item in historical_data] |
|
values = [item[1] for item in historical_data] |
|
|
|
plt.figure(figsize=(8, 3)) |
|
plt.bar(eras, values, color='skyblue') |
|
plt.title('Historical Usage') |
|
plt.xlabel('Era') |
|
plt.ylabel('Usage Level') |
|
plt.ylim(0, 100) |
|
plt.xticks(rotation=45) |
|
plt.tight_layout() |
|
|
|
return plt |
|
except Exception as e: |
|
print(f"Error in plot_historical_data: {str(e)}") |
|
|
|
plt.figure(figsize=(8, 3)) |
|
plt.text(0.5, 0.5, f"Error creating plot: {str(e)}", |
|
horizontalalignment='center', verticalalignment='center') |
|
plt.axis('off') |
|
return plt |
|
|
|
def create_evolution_chart(data, forecast_months=6, growth_scenario="Moderate"): |
|
"""Create a simpler chart that's more compatible with Gradio""" |
|
try: |
|
import plotly.graph_objects as go |
|
|
|
|
|
fig = go.Figure() |
|
|
|
|
|
fig.add_trace( |
|
go.Scatter( |
|
x=[item["month"] for item in data], |
|
y=[item["searchVolume"] for item in data], |
|
name="Search Volume", |
|
line=dict(color="#8884d8", width=3), |
|
mode="lines+markers" |
|
) |
|
) |
|
|
|
|
|
max_volume = max([item["searchVolume"] for item in data]) |
|
scale_factor = max_volume / 100 |
|
|
|
|
|
fig.add_trace( |
|
go.Scatter( |
|
x=[item["month"] for item in data], |
|
y=[item["competitionScore"] * scale_factor for item in data], |
|
name="Competition Score", |
|
line=dict(color="#82ca9d", width=2, dash="dot"), |
|
mode="lines+markers" |
|
) |
|
) |
|
|
|
|
|
fig.add_trace( |
|
go.Scatter( |
|
x=[item["month"] for item in data], |
|
y=[item["intentClarity"] * scale_factor for item in data], |
|
name="Intent Clarity", |
|
line=dict(color="#ffc658", width=2, dash="dash"), |
|
mode="lines+markers" |
|
) |
|
) |
|
|
|
|
|
fig.update_layout( |
|
title=f"Keyword Evolution Forecast ({growth_scenario} Growth)", |
|
xaxis_title="Month", |
|
yaxis_title="Value", |
|
legend=dict(orientation="h", y=1.1), |
|
height=500 |
|
) |
|
|
|
return fig |
|
|
|
except Exception as e: |
|
print(f"Error in chart creation: {str(e)}") |
|
|
|
fig = go.Figure(data=go.Scatter(x=[1, 2, 3], y=[4, 1, 2])) |
|
fig.update_layout(title="Fallback Chart (Error occurred)") |
|
return fig |
|
|
|
def create_ranking_history_chart(keyword_history): |
|
"""Create a chart showing keyword ranking history over time""" |
|
try: |
|
if not keyword_history or len(keyword_history) < 2: |
|
|
|
fig = go.Figure() |
|
fig.update_layout( |
|
title="Insufficient Ranking Data", |
|
annotations=[{ |
|
"text": "Need at least 2 data points for ranking history", |
|
"showarrow": False, |
|
"font": {"size": 16}, |
|
"xref": "paper", |
|
"yref": "paper", |
|
"x": 0.5, |
|
"y": 0.5 |
|
}] |
|
) |
|
return fig |
|
|
|
|
|
fig = go.Figure() |
|
|
|
|
|
timestamps = [entry["timestamp"] for entry in keyword_history] |
|
dates = [datetime.datetime.strptime(ts, "%Y-%m-%d %H:%M:%S") for ts in timestamps] |
|
|
|
|
|
all_domains = set() |
|
for entry in keyword_history: |
|
for result in entry["results"]: |
|
all_domains.add(result["domain"]) |
|
|
|
|
|
domain_colors = {} |
|
color_palette = [ |
|
"#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", |
|
"#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf" |
|
] |
|
for i, domain in enumerate(all_domains): |
|
domain_colors[domain] = color_palette[i % len(color_palette)] |
|
|
|
|
|
domain_tracking = {domain: {"x": [], "y": [], "text": []} for domain in all_domains} |
|
|
|
for i, entry in enumerate(keyword_history): |
|
for result in entry["results"]: |
|
domain = result["domain"] |
|
position = result["position"] |
|
title = result["title"] |
|
|
|
domain_tracking[domain]["x"].append(dates[i]) |
|
domain_tracking[domain]["y"].append(position) |
|
domain_tracking[domain]["text"].append(title) |
|
|
|
|
|
for domain, data in domain_tracking.items(): |
|
if len(data["x"]) > 0: |
|
fig.add_trace( |
|
go.Scatter( |
|
x=data["x"], |
|
y=data["y"], |
|
mode="lines+markers", |
|
name=domain, |
|
line=dict(color=domain_colors[domain]), |
|
hovertemplate="%{text}<br>Position: %{y}<br>Date: %{x}<extra></extra>", |
|
text=data["text"], |
|
marker=dict(size=8) |
|
) |
|
) |
|
|
|
|
|
fig.update_layout( |
|
title="Keyword Ranking History", |
|
xaxis_title="Date", |
|
yaxis_title="Position", |
|
yaxis=dict(autorange="reversed"), |
|
hovermode="closest", |
|
height=500 |
|
) |
|
|
|
return fig |
|
|
|
except Exception as e: |
|
print(f"Error in create_ranking_history_chart: {str(e)}") |
|
|
|
fig = go.Figure() |
|
fig.update_layout( |
|
title="Error Creating Ranking Chart", |
|
annotations=[{ |
|
"text": f"Error: {str(e)}", |
|
"showarrow": False, |
|
"font": {"size": 14}, |
|
"xref": "paper", |
|
"yref": "paper", |
|
"x": 0.5, |
|
"y": 0.5 |
|
}] |
|
) |
|
return fig |
|
|
|
def generate_serp_html(keyword, serp_results): |
|
"""Generate HTML for SERP results""" |
|
if not serp_results: |
|
return "<div>No SERP results available</div>" |
|
|
|
html = f""" |
|
<div style="font-family: Arial, sans-serif; padding: 20px; border: 1px solid #ddd; border-radius: 8px;"> |
|
<h2 style="margin-top: 0;">SERP Results for "{keyword}"</h2> |
|
|
|
<div style="background-color: #f5f5f5; padding: 10px; border-radius: 4px; margin-bottom: 20px;"> |
|
<div style="color: #666; font-size: 12px;">This is a simulated SERP. In a real application, this would use the Google API.</div> |
|
</div> |
|
|
|
<div class="serp-results" style="display: flex; flex-direction: column; gap: 16px;"> |
|
""" |
|
|
|
for result in serp_results: |
|
position = result["position"] |
|
title = result["title"] |
|
url = result["url"] |
|
snippet = result["snippet"] |
|
domain = result["domain"] |
|
ctr = result["ctr_estimate"] |
|
impressions = result["impressions_estimate"] |
|
|
|
html += f""" |
|
<div class="serp-result" style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px; position: relative;"> |
|
<div style="position: absolute; top: -10px; left: -10px; background-color: #4299e1; color: white; width: 24px; height: 24px; border-radius: 50%; display: flex; align-items: center; justify-content: center; font-size: 12px;"> |
|
{position} |
|
</div> |
|
<div style="margin-bottom: 5px;"> |
|
<a href="#" style="font-size: 18px; color: #1a73e8; text-decoration: none; font-weight: 500;">{title}</a> |
|
</div> |
|
<div style="margin-bottom: 8px; color: #006621; font-size: 14px;">{url}</div> |
|
<div style="color: #4d5156; font-size: 14px;">{snippet}</div> |
|
|
|
<div style="display: flex; margin-top: 10px; font-size: 12px; color: #666;"> |
|
<div style="margin-right: 15px;"><span style="font-weight: 500;">CTR:</span> {ctr:.2%}</div> |
|
<div><span style="font-weight: 500;">Est. Impressions:</span> {impressions:,}</div> |
|
</div> |
|
</div> |
|
""" |
|
|
|
html += """ |
|
</div> |
|
</div> |
|
""" |
|
|
|
return html |
|
|
|
def generate_token_visualization_html(token_analysis, full_analysis): |
|
"""Generate HTML for token visualization""" |
|
html = """ |
|
<div style="font-family: Arial, sans-serif; padding: 20px; border: 1px solid #ddd; border-radius: 8px;"> |
|
<h2 style="margin-top: 0;">Token Visualization</h2> |
|
|
|
<div style="margin-bottom: 20px; padding: 15px; background-color: #f8f9fa; border-radius: 6px;"> |
|
<div style="margin-bottom: 8px; font-weight: bold; color: #4a5568;">Human View:</div> |
|
<div style="display: flex; flex-wrap: wrap; gap: 8px;"> |
|
""" |
|
|
|
|
|
for token in token_analysis: |
|
html += f""" |
|
<div style="padding: 6px 12px; background-color: white; border: 1px solid #cbd5e0; border-radius: 4px;"> |
|
{token['text']} |
|
</div> |
|
""" |
|
|
|
html += """ |
|
</div> |
|
</div> |
|
|
|
<div style="text-align: center; margin: 15px 0;"> |
|
<span style="font-size: 20px;">↓</span> |
|
</div> |
|
|
|
<div style="padding: 15px; background-color: #f0fff4; border-radius: 6px;"> |
|
<div style="margin-bottom: 8px; font-weight: bold; color: #2f855a;">Machine View:</div> |
|
<div style="display: flex; flex-wrap: wrap; gap: 8px;"> |
|
""" |
|
|
|
|
|
for token in full_analysis: |
|
bg_color = get_token_colors(token["type"]) |
|
html += f""" |
|
<div style="padding: 6px 12px; background-color: {bg_color}; border: 1px solid #a0aec0; border-radius: 4px; font-family: monospace;"> |
|
{token['token']} |
|
<span style="font-size: 10px; opacity: 0.7; display: block;">{token['type']}</span> |
|
</div> |
|
""" |
|
|
|
html += """ |
|
</div> |
|
</div> |
|
|
|
<div style="margin-top: 20px; display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px; text-align: center;"> |
|
""" |
|
|
|
|
|
word_count = len(token_analysis) |
|
token_count = len(full_analysis) |
|
ratio = round(token_count / max(1, word_count), 2) |
|
|
|
html += f""" |
|
<div style="background-color: #ebf8ff; padding: 10px; border-radius: 6px;"> |
|
<div style="font-size: 24px; font-weight: bold; color: #3182ce;">{word_count}</div> |
|
<div style="font-size: 14px; color: #4299e1;">Words</div> |
|
</div> |
|
|
|
<div style="background-color: #f0fff4; padding: 10px; border-radius: 6px;"> |
|
<div style="font-size: 24px; font-weight: bold; color: #38a169;">{token_count}</div> |
|
<div style="font-size: 14px; color: #48bb78;">Tokens</div> |
|
</div> |
|
|
|
<div style="background-color: #faf5ff; padding: 10px; border-radius: 6px;"> |
|
<div style="font-size: 24px; font-weight: bold; color: #805ad5;">{ratio}</div> |
|
<div style="font-size: 14px; color: #9f7aea;">Tokens per Word</div> |
|
</div> |
|
""" |
|
|
|
html += """ |
|
</div> |
|
</div> |
|
""" |
|
|
|
return html |
|
|
|
def generate_full_analysis_html(keyword, token_analysis, intent_analysis, evolution_potential, trends): |
|
"""Generate HTML for full keyword analysis""" |
|
html = f""" |
|
<div style="font-family: Arial, sans-serif; padding: 20px; border: 1px solid #ddd; border-radius: 8px;"> |
|
<h2 style="margin-top: 0;">Keyword DNA Analysis for: {keyword}</h2> |
|
|
|
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 15px; margin-bottom: 20px;"> |
|
<div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px;"> |
|
<h3 style="margin-top: 0; font-size: 16px;">Intent Gene</h3> |
|
<div style="display: flex; justify-content: space-between; margin-bottom: 10px;"> |
|
<span>Type:</span> |
|
<span>{intent_analysis['type']}</span> |
|
</div> |
|
<div style="display: flex; justify-content: space-between; align-items: center;"> |
|
<span>Strength:</span> |
|
<div style="width: 120px; height: 8px; background-color: #edf2f7; border-radius: 4px; overflow: hidden;"> |
|
<div style="height: 100%; background-color: #48bb78; width: {intent_analysis['strength']}%;"></div> |
|
</div> |
|
</div> |
|
</div> |
|
|
|
<div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px;"> |
|
<h3 style="margin-top: 0; font-size: 16px;">Evolution Potential</h3> |
|
<div style="display: flex; justify-content: center; align-items: center; height: 100px;"> |
|
<div style="position: relative; width: 100px; height: 100px;"> |
|
<div style="position: absolute; inset: 0; display: flex; align-items: center; justify-content: center;"> |
|
<span style="font-size: 24px; font-weight: bold;">{evolution_potential}</span> |
|
</div> |
|
<svg width="100" height="100" viewBox="0 0 36 36"> |
|
<path |
|
d="M18 2.0845 a 15.9155 15.9155 0 0 1 0 31.831 a 15.9155 15.9155 0 0 1 0 -31.831" |
|
fill="none" |
|
stroke="#4CAF50" |
|
stroke-width="3" |
|
stroke-dasharray="{evolution_potential}, 100" |
|
/> |
|
</svg> |
|
</div> |
|
</div> |
|
</div> |
|
</div> |
|
|
|
<div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px; margin-bottom: 20px;"> |
|
<h3 style="margin-top: 0; font-size: 16px;">Future Mutations</h3> |
|
<div style="display: flex; flex-direction: column; gap: 8px;"> |
|
""" |
|
|
|
|
|
for trend in trends: |
|
html += f""" |
|
<div style="display: flex; align-items: center; gap: 8px;"> |
|
<span style="color: #48bb78;">↗</span> |
|
<span>{trend}</span> |
|
</div> |
|
""" |
|
|
|
html += """ |
|
</div> |
|
</div> |
|
|
|
<h3 style="margin-bottom: 10px;">Token Details & Historical Analysis</h3> |
|
""" |
|
|
|
|
|
for token in token_analysis: |
|
html += f""" |
|
<div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px; margin-bottom: 15px;"> |
|
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px;"> |
|
<div style="display: flex; align-items: center; gap: 8px;"> |
|
<span style="font-size: 18px; font-weight: medium;">{token['token']}</span> |
|
<span style="padding: 2px 8px; background-color: #edf2f7; border-radius: 4px; font-size: 12px;">{token['posTag']}</span> |
|
""" |
|
|
|
if token['entityType']: |
|
html += f""" |
|
<span style="padding: 2px 8px; background-color: #ebf8ff; color: #3182ce; border-radius: 4px; font-size: 12px; display: flex; align-items: center;"> |
|
ⓘ {token['entityType']} |
|
</span> |
|
""" |
|
|
|
html += f""" |
|
</div> |
|
<div style="display: flex; align-items: center; gap: 4px;"> |
|
<span style="font-size: 12px; color: #718096;">Importance:</span> |
|
<div style="width: 64px; height: 8px; background-color: #edf2f7; border-radius: 4px; overflow: hidden;"> |
|
<div style="height: 100%; background-color: #4299e1; width: {token['importance']}%;"></div> |
|
</div> |
|
</div> |
|
</div> |
|
|
|
<div style="margin-top: 15px;"> |
|
<div style="font-size: 12px; color: #718096; margin-bottom: 4px;">Historical Relevance:</div> |
|
<div style="border: 1px solid #e2e8f0; border-radius: 4px; padding: 10px; background-color: #f7fafc;"> |
|
<div style="font-size: 12px; margin-bottom: 8px;"> |
|
<span style="font-weight: 500;">Origin: </span> |
|
<span>{token['origin']['era']}, </span> |
|
<span style="font-style: italic;">{token['origin']['language']}</span> |
|
</div> |
|
<div style="font-size: 12px; margin-bottom: 12px;">{token['origin']['note']}</div> |
|
|
|
<div style="display: flex; align-items: flex-end; height: 50px; gap: 4px; margin-top: 8px;"> |
|
""" |
|
|
|
|
|
for period, value in token['historicalData']: |
|
opacity = 0.3 + (token['historicalData'].index((period, value)) * 0.1) |
|
html += f""" |
|
<div style="display: flex; flex-direction: column; align-items: center; flex: 1;"> |
|
<div style="width: 100%; background-color: rgba(66, 153, 225, {opacity}); border-radius: 2px 2px 0 0; height: {max(4, value)}%;"></div> |
|
<div style="font-size: 9px; margin-top: 4px; color: #718096; transform: rotate(45deg); transform-origin: top left; white-space: nowrap;"> |
|
{period} |
|
</div> |
|
</div> |
|
""" |
|
|
|
html += """ |
|
</div> |
|
</div> |
|
</div> |
|
</div> |
|
""" |
|
|
|
html += """ |
|
</div> |
|
""" |
|
|
|
return html |
|
|
|
def analyze_keyword(keyword, forecast_months=6, growth_scenario="Moderate", get_serp=False, progress=gr.Progress()): |
|
"""Main function to analyze a keyword""" |
|
if not keyword or not keyword.strip(): |
|
return ( |
|
"<div>Please enter a keyword to analyze</div>", |
|
"<div>Please enter a keyword to analyze</div>", |
|
None, |
|
None, |
|
None, |
|
None, |
|
None |
|
) |
|
|
|
progress(0.1, desc="Starting analysis...") |
|
|
|
|
|
model_status = load_models(progress) |
|
if isinstance(model_status, str) and model_status.startswith("Error"): |
|
return ( |
|
f"<div style='color:red;'>{model_status}</div>", |
|
f"<div style='color:red;'>{model_status}</div>", |
|
None, |
|
None, |
|
None, |
|
None, |
|
None |
|
) |
|
|
|
try: |
|
|
|
words = keyword.strip().lower().split() |
|
progress(0.2, desc="Analyzing tokens...") |
|
|
|
|
|
token_analysis = analyze_token_types(words) |
|
|
|
progress(0.3, desc="Running NER...") |
|
|
|
try: |
|
ner_results = ner_pipeline(keyword) |
|
except Exception as e: |
|
print(f"NER error: {str(e)}") |
|
ner_results = [] |
|
|
|
progress(0.4, desc="Running POS tagging...") |
|
|
|
try: |
|
pos_results = pos_pipeline(keyword) |
|
except Exception as e: |
|
print(f"POS error: {str(e)}") |
|
pos_results = [] |
|
|
|
|
|
full_token_analysis = [] |
|
for token in token_analysis: |
|
|
|
pos_tag = "NOUN" |
|
for pos_result in pos_results: |
|
if pos_result["word"].lower() == token["text"]: |
|
pos_tag = pos_result["entity"] |
|
break |
|
|
|
|
|
entity_type = None |
|
for ner_result in ner_results: |
|
if ner_result["word"].lower() == token["text"]: |
|
entity_type = ner_result["entity"] |
|
break |
|
|
|
|
|
historical_data = simulate_historical_data(token["text"]) |
|
|
|
|
|
origin = generate_origin_data(token["text"]) |
|
|
|
|
|
importance = 60 + (len(token["text"]) * 2) |
|
importance = min(95, importance) |
|
|
|
|
|
if semantic_model is not None: |
|
try: |
|
|
|
prefix_related = [f"about {token['text']}", f"what is {token['text']}", f"how to {token['text']}"] |
|
synonym_candidates = ["similar", "equivalent", "comparable", "like", "related", "alternative"] |
|
domain_terms = ["software", "marketing", "business", "science", "education", "technology"] |
|
comparison_terms = prefix_related + synonym_candidates + domain_terms |
|
|
|
|
|
similarities = get_semantic_similarity(token['text'], comparison_terms) |
|
|
|
|
|
related_terms = [term for term, score in similarities[:3]] |
|
except Exception as e: |
|
print(f"Error generating semantic related terms: {str(e)}") |
|
related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"] |
|
else: |
|
|
|
related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"] |
|
|
|
full_token_analysis.append({ |
|
"token": token["text"], |
|
"type": token["type"], |
|
"posTag": pos_tag, |
|
"entityType": entity_type, |
|
"importance": importance, |
|
"historicalData": historical_data, |
|
"origin": origin, |
|
"relatedTerms": related_terms |
|
}) |
|
|
|
progress(0.5, desc="Analyzing intent...") |
|
|
|
try: |
|
intent_result = intent_classifier( |
|
keyword, |
|
candidate_labels=["informational", "navigational", "transactional"] |
|
) |
|
|
|
intent_analysis = { |
|
"type": intent_result["labels"][0].capitalize(), |
|
"strength": round(intent_result["scores"][0] * 100), |
|
"mutations": [ |
|
f"{intent_result['labels'][0]}-variation-1", |
|
f"{intent_result['labels'][0]}-variation-2" |
|
] |
|
} |
|
except Exception as e: |
|
print(f"Intent classification error: {str(e)}") |
|
intent_analysis = { |
|
"type": "Informational", |
|
"strength": 70, |
|
"mutations": ["fallback-variation-1", "fallback-variation-2"] |
|
} |
|
|
|
|
|
evolution_potential = min(95, 65 + (len(keyword) % 30)) |
|
|
|
|
|
trends = [ |
|
"Voice search adaptation", |
|
"Visual search integration" |
|
] |
|
|
|
|
|
base_volume = 1000 + (len(keyword) * 100) |
|
|
|
|
|
if growth_scenario == "Conservative": |
|
growth_factor = 1.05 + (0.02 * (sum(ord(c) for c in keyword) % 5)) |
|
elif growth_scenario == "Aggressive": |
|
growth_factor = 1.15 + (0.05 * (sum(ord(c) for c in keyword) % 5)) |
|
else: |
|
growth_factor = 1.1 + (0.03 * (sum(ord(c) for c in keyword) % 5)) |
|
|
|
evolution_data = [] |
|
months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][:int(forecast_months)] |
|
current_volume = base_volume |
|
|
|
for month in months: |
|
|
|
np.random.seed(sum(ord(c) for c in month + keyword)) |
|
random_factor = 0.9 + (0.2 * np.random.random()) |
|
current_volume *= growth_factor * random_factor |
|
|
|
evolution_data.append({ |
|
"month": month, |
|
"searchVolume": int(current_volume), |
|
"competitionScore": min(95, 45 + (months.index(month) * 3) + (sum(ord(c) for c in keyword) % 10)), |
|
"intentClarity": min(95, 80 + (months.index(month) * 2) + (sum(ord(c) for c in keyword) % 5)) |
|
}) |
|
|
|
progress(0.6, desc="Creating visualizations...") |
|
|
|
evolution_chart = create_evolution_chart(evolution_data, forecast_months, growth_scenario) |
|
|
|
|
|
serp_results = None |
|
ranking_chart = None |
|
serp_html = None |
|
|
|
if get_serp: |
|
progress(0.7, desc="Fetching SERP data...") |
|
|
|
serp_results = simulate_google_serp(keyword) |
|
|
|
|
|
update_ranking_history(keyword, serp_results) |
|
|
|
progress(0.8, desc="Creating ranking charts...") |
|
|
|
if keyword in ranking_history and len(ranking_history[keyword]) > 0: |
|
ranking_chart = create_ranking_history_chart(ranking_history[keyword]) |
|
|
|
|
|
serp_html = generate_serp_html(keyword, serp_results) |
|
|
|
|
|
token_viz_html = generate_token_visualization_html(token_analysis, full_token_analysis) |
|
|
|
|
|
analysis_html = generate_full_analysis_html( |
|
keyword, |
|
full_token_analysis, |
|
intent_analysis, |
|
evolution_potential, |
|
trends |
|
) |
|
|
|
|
|
json_results = { |
|
"keyword": keyword, |
|
"tokenAnalysis": full_token_analysis, |
|
"intentAnalysis": intent_analysis, |
|
"evolutionPotential": evolution_potential, |
|
"predictedTrends": trends, |
|
"forecast": { |
|
"months": forecast_months, |
|
"scenario": growth_scenario, |
|
"data": evolution_data |
|
}, |
|
"serpResults": serp_results |
|
} |
|
|
|
progress(1.0, desc="Analysis complete!") |
|
return token_viz_html, analysis_html, json_results, evolution_chart, serp_html, ranking_chart, keyword |
|
|
|
except Exception as e: |
|
error_message = f"<div style='color:red;padding:20px;'>Error analyzing keyword: {str(e)}</div>" |
|
print(f"Error in analyze_keyword: {str(e)}") |
|
return error_message, error_message, None, None, None, None, None |
|
|
|
|
|
with gr.Blocks(css="footer {visibility: hidden}") as demo: |
|
gr.Markdown("# Keyword DNA Analyzer") |
|
gr.Markdown("Analyze the linguistic DNA of your keywords to understand their structure, intent, and potential.") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
|
|
with gr.Group(): |
|
gr.Markdown("### Enter Keyword") |
|
with gr.Row(): |
|
input_text = gr.Textbox(label="Enter keyword to analyze", placeholder="e.g. artificial intelligence") |
|
|
|
with gr.Row(): |
|
audio_input = gr.Audio(type="filepath", label="Or use voice search") |
|
voice_submit_btn = gr.Button("Convert Voice to Text", variant="secondary") |
|
|
|
|
|
with gr.Accordion("Analysis Settings", open=False): |
|
with gr.Row(): |
|
forecast_months = gr.Slider(minimum=3, maximum=12, value=6, step=1, label="Forecast Months") |
|
include_serp = gr.Checkbox(label="Include SERP Analysis", value=True) |
|
|
|
growth_scenario = gr.Radio( |
|
["Conservative", "Moderate", "Aggressive"], |
|
value="Moderate", |
|
label="Growth Scenario" |
|
) |
|
|
|
|
|
status_html = gr.HTML('<div style="color:gray;text-align:center;">Enter a keyword and click "Analyze DNA"</div>') |
|
|
|
analyze_btn = gr.Button("Analyze DNA", variant="primary") |
|
|
|
with gr.Row(): |
|
example_btns = [] |
|
for example in ["preprocessing", "breakdown", "artificial intelligence", "transformer model", "machine learning"]: |
|
example_btns.append(gr.Button(example)) |
|
|
|
with gr.Column(scale=2): |
|
with gr.Tabs(): |
|
with gr.Tab("Token Visualization"): |
|
token_viz_html = gr.HTML() |
|
|
|
with gr.Tab("Full Analysis"): |
|
analysis_html = gr.HTML() |
|
|
|
with gr.Tab("Evolution Chart"): |
|
evolution_chart = gr.Plot(label="Keyword Evolution Forecast") |
|
|
|
with gr.Tab("SERP Results"): |
|
serp_html = gr.HTML() |
|
|
|
with gr.Tab("Ranking History"): |
|
ranking_chart = gr.Plot(label="Keyword Ranking History") |
|
|
|
with gr.Tab("Raw Data"): |
|
json_output = gr.JSON() |
|
|
|
|
|
voice_submit_btn.click( |
|
handle_voice_input, |
|
inputs=[audio_input], |
|
outputs=[input_text] |
|
) |
|
|
|
|
|
analyze_btn.click( |
|
lambda: '<div style="color:blue;text-align:center;">Loading models and analyzing... This may take a moment.</div>', |
|
outputs=status_html |
|
).then( |
|
analyze_keyword, |
|
inputs=[input_text, forecast_months, growth_scenario, include_serp], |
|
outputs=[token_viz_html, analysis_html, json_output, evolution_chart, serp_html, ranking_chart, input_text] |
|
).then( |
|
lambda: '<div style="color:green;text-align:center;">Analysis complete!</div>', |
|
outputs=status_html |
|
) |
|
|
|
|
|
for btn in example_btns: |
|
|
|
def set_example(btn_label): |
|
return btn_label |
|
|
|
btn.click( |
|
set_example, |
|
inputs=[btn], |
|
outputs=[input_text] |
|
).then( |
|
lambda: '<div style="color:blue;text-align:center;">Loading models and analyzing... This may take a moment.</div>', |
|
outputs=status_html |
|
).then( |
|
analyze_keyword, |
|
inputs=[input_text, forecast_months, growth_scenario, include_serp], |
|
outputs=[token_viz_html, analysis_html, json_output, evolution_chart, serp_html, ranking_chart, input_text] |
|
).then( |
|
lambda: '<div style="color:green;text-align:center;">Analysis complete!</div>', |
|
outputs=status_html |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |