Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,513 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import torch
|
5 |
+
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
|
6 |
+
from transformers import BertTokenizerFast
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
import json
|
9 |
|
10 |
+
# Initialize models
|
11 |
+
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
12 |
+
ner_pipeline = pipeline("ner", model="dslim/bert-base-NER")
|
13 |
+
pos_model = AutoModelForTokenClassification.from_pretrained("vblagoje/bert-english-uncased-finetuned-pos")
|
14 |
+
pos_tokenizer = BertTokenizerFast.from_pretrained("vblagoje/bert-english-uncased-finetuned-pos")
|
15 |
+
pos_pipeline = pipeline("token-classification", model=pos_model, tokenizer=pos_tokenizer)
|
16 |
|
17 |
+
# Intent classification - using zero-shot classification
|
18 |
+
intent_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
|
19 |
+
|
20 |
+
def get_token_colors(token_type):
|
21 |
+
colors = {
|
22 |
+
"prefix": "#D8BFD8", # Light purple
|
23 |
+
"suffix": "#AEDAA4", # Light green
|
24 |
+
"stem": "#A4C2F4", # Light blue
|
25 |
+
"compound_first": "#FFCC80", # Light orange
|
26 |
+
"compound_second": "#FFCC80", # Light orange
|
27 |
+
"word": "#E5E5E5" # Light gray
|
28 |
+
}
|
29 |
+
return colors.get(token_type, "#E5E5E5")
|
30 |
+
|
31 |
+
def simulate_historical_data(token):
|
32 |
+
"""Generate simulated historical usage data for a token"""
|
33 |
+
eras = ["1900s", "1950s", "1980s", "2000s", "2010s", "Present"]
|
34 |
+
|
35 |
+
# Different patterns based on token characteristics
|
36 |
+
if len(token) > 8:
|
37 |
+
# Possibly a technical term - recent growth
|
38 |
+
values = [10, 20, 30, 60, 85, 95]
|
39 |
+
elif token.startswith(("un", "re", "de", "pre")):
|
40 |
+
# Prefix words tend to be older
|
41 |
+
values = [45, 50, 60, 70, 75, 80]
|
42 |
+
else:
|
43 |
+
# Standard pattern for common words
|
44 |
+
base = 50 + (hash(token) % 30)
|
45 |
+
noise = np.random.normal(0, 5, 6)
|
46 |
+
values = [max(5, min(95, base + i*5 + n)) for i, n in enumerate(noise)]
|
47 |
+
|
48 |
+
return list(zip(eras, values))
|
49 |
+
|
50 |
+
def generate_origin_data(token):
|
51 |
+
"""Generate simulated origin/etymology data for a token"""
|
52 |
+
origins = [
|
53 |
+
{"era": "Ancient", "language": "Latin"},
|
54 |
+
{"era": "Ancient", "language": "Greek"},
|
55 |
+
{"era": "Medieval", "language": "Old English"},
|
56 |
+
{"era": "16th century", "language": "French"},
|
57 |
+
{"era": "18th century", "language": "Germanic"},
|
58 |
+
{"era": "19th century", "language": "Anglo-Saxon"},
|
59 |
+
{"era": "20th century", "language": "Modern English"}
|
60 |
+
]
|
61 |
+
|
62 |
+
# Deterministic selection based on the token
|
63 |
+
index = hash(token) % len(origins)
|
64 |
+
origin = origins[index]
|
65 |
+
|
66 |
+
note = f"First appeared in {origin['era']} texts derived from {origin['language']}."
|
67 |
+
origin["note"] = note
|
68 |
+
|
69 |
+
return origin
|
70 |
+
|
71 |
+
def analyze_token_types(tokens):
|
72 |
+
"""Identify token types (prefix, suffix, compound, etc.)"""
|
73 |
+
processed_tokens = []
|
74 |
+
|
75 |
+
prefixes = ["un", "re", "de", "pre", "post", "anti", "pro", "inter", "sub", "super"]
|
76 |
+
suffixes = ["ing", "ed", "ly", "ment", "tion", "able", "ible", "ness", "ful", "less"]
|
77 |
+
|
78 |
+
for token in tokens:
|
79 |
+
token_text = token.lower()
|
80 |
+
token_type = "word"
|
81 |
+
|
82 |
+
# Check for prefixes
|
83 |
+
for prefix in prefixes:
|
84 |
+
if token_text.startswith(prefix) and len(token_text) > len(prefix) + 2:
|
85 |
+
if token_text != prefix: # Make sure the word isn't just the prefix
|
86 |
+
token_type = "prefix"
|
87 |
+
break
|
88 |
+
|
89 |
+
# Check for suffixes
|
90 |
+
if token_type == "word":
|
91 |
+
for suffix in suffixes:
|
92 |
+
if token_text.endswith(suffix) and len(token_text) > len(suffix) + 2:
|
93 |
+
token_type = "suffix"
|
94 |
+
break
|
95 |
+
|
96 |
+
# Check for compound words (simplified)
|
97 |
+
if token_type == "word" and len(token_text) > 8:
|
98 |
+
token_type = "compound_first" # Simplified - in reality would need more analysis
|
99 |
+
|
100 |
+
processed_tokens.append({
|
101 |
+
"text": token_text,
|
102 |
+
"type": token_type
|
103 |
+
})
|
104 |
+
|
105 |
+
return processed_tokens
|
106 |
+
|
107 |
+
def plot_historical_data(historical_data):
|
108 |
+
"""Create a plot of historical usage data"""
|
109 |
+
eras = [item[0] for item in historical_data]
|
110 |
+
values = [item[1] for item in historical_data]
|
111 |
+
|
112 |
+
plt.figure(figsize=(8, 3))
|
113 |
+
plt.bar(eras, values, color='skyblue')
|
114 |
+
plt.title('Historical Usage')
|
115 |
+
plt.xlabel('Era')
|
116 |
+
plt.ylabel('Usage Level')
|
117 |
+
plt.ylim(0, 100)
|
118 |
+
plt.xticks(rotation=45)
|
119 |
+
plt.tight_layout()
|
120 |
+
|
121 |
+
return plt
|
122 |
+
|
123 |
+
def analyze_keyword(keyword):
|
124 |
+
if not keyword.strip():
|
125 |
+
return None, None, None, None, None
|
126 |
+
|
127 |
+
# Basic tokenization
|
128 |
+
words = keyword.strip().lower().split()
|
129 |
+
|
130 |
+
# Get token types
|
131 |
+
token_analysis = analyze_token_types(words)
|
132 |
+
|
133 |
+
# Get NER tags
|
134 |
+
ner_results = ner_pipeline(keyword)
|
135 |
+
|
136 |
+
# Get POS tags
|
137 |
+
pos_results = pos_pipeline(keyword)
|
138 |
+
|
139 |
+
# Process and organize results
|
140 |
+
full_token_analysis = []
|
141 |
+
for token in token_analysis:
|
142 |
+
# Find POS tag for this token
|
143 |
+
pos_tag = "NOUN" # Default
|
144 |
+
for pos_result in pos_results:
|
145 |
+
if pos_result["word"].lower() == token["text"]:
|
146 |
+
pos_tag = pos_result["entity"]
|
147 |
+
break
|
148 |
+
|
149 |
+
# Find entity type if any
|
150 |
+
entity_type = None
|
151 |
+
for ner_result in ner_results:
|
152 |
+
if ner_result["word"].lower() == token["text"]:
|
153 |
+
entity_type = ner_result["entity"]
|
154 |
+
break
|
155 |
+
|
156 |
+
# Generate historical data
|
157 |
+
historical_data = simulate_historical_data(token["text"])
|
158 |
+
|
159 |
+
# Generate origin data
|
160 |
+
origin = generate_origin_data(token["text"])
|
161 |
+
|
162 |
+
# Calculate importance (simplified algorithm)
|
163 |
+
importance = 60 + (len(token["text"]) * 2)
|
164 |
+
importance = min(95, importance)
|
165 |
+
|
166 |
+
# Generate related terms (simplified)
|
167 |
+
related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"]
|
168 |
+
|
169 |
+
full_token_analysis.append({
|
170 |
+
"token": token["text"],
|
171 |
+
"type": token["type"],
|
172 |
+
"posTag": pos_tag,
|
173 |
+
"entityType": entity_type,
|
174 |
+
"importance": importance,
|
175 |
+
"historicalData": historical_data,
|
176 |
+
"origin": origin,
|
177 |
+
"relatedTerms": related_terms
|
178 |
+
})
|
179 |
+
|
180 |
+
# Intent analysis
|
181 |
+
intent_result = intent_classifier(
|
182 |
+
keyword,
|
183 |
+
candidate_labels=["informational", "navigational", "transactional"]
|
184 |
+
)
|
185 |
+
|
186 |
+
intent_analysis = {
|
187 |
+
"type": intent_result["labels"][0].capitalize(),
|
188 |
+
"strength": round(intent_result["scores"][0] * 100),
|
189 |
+
"mutations": [
|
190 |
+
f"{intent_result['labels'][0]}-variation-1",
|
191 |
+
f"{intent_result['labels'][0]}-variation-2"
|
192 |
+
]
|
193 |
+
}
|
194 |
+
|
195 |
+
# Evolution potential (simplified calculation)
|
196 |
+
evolution_potential = min(95, 65 + (len(keyword) % 30))
|
197 |
+
|
198 |
+
# Predicted trends (simplified)
|
199 |
+
trends = [
|
200 |
+
"Voice search adaptation",
|
201 |
+
"Visual search integration"
|
202 |
+
]
|
203 |
+
|
204 |
+
# Evolution chart data (simulated)
|
205 |
+
evolution_data = [
|
206 |
+
{"month": "Jan", "searchVolume": 1000, "competitionScore": 45, "intentClarity": 80},
|
207 |
+
{"month": "Feb", "searchVolume": 1200, "competitionScore": 48, "intentClarity": 82},
|
208 |
+
{"month": "Mar", "searchVolume": 1100, "competitionScore": 52, "intentClarity": 85},
|
209 |
+
{"month": "Apr", "searchVolume": 1400, "competitionScore": 55, "intentClarity": 88},
|
210 |
+
{"month": "May", "searchVolume": 1800, "competitionScore": 58, "intentClarity": 90},
|
211 |
+
{"month": "Jun", "searchVolume": 2200, "competitionScore": 60, "intentClarity": 92}
|
212 |
+
]
|
213 |
+
|
214 |
+
# Create plots
|
215 |
+
evolution_chart = create_evolution_chart(evolution_data)
|
216 |
+
|
217 |
+
# Generate HTML for token visualization
|
218 |
+
token_viz_html = generate_token_visualization_html(token_analysis, full_token_analysis)
|
219 |
+
|
220 |
+
# Generate HTML for full analysis
|
221 |
+
analysis_html = generate_full_analysis_html(
|
222 |
+
keyword,
|
223 |
+
full_token_analysis,
|
224 |
+
intent_analysis,
|
225 |
+
evolution_potential,
|
226 |
+
trends
|
227 |
+
)
|
228 |
+
|
229 |
+
# Generate JSON results
|
230 |
+
json_results = {
|
231 |
+
"keyword": keyword,
|
232 |
+
"tokenAnalysis": full_token_analysis,
|
233 |
+
"intentAnalysis": intent_analysis,
|
234 |
+
"evolutionPotential": evolution_potential,
|
235 |
+
"predictedTrends": trends
|
236 |
+
}
|
237 |
+
|
238 |
+
return token_viz_html, analysis_html, json_results, evolution_chart, full_token_analysis
|
239 |
+
|
240 |
+
def create_evolution_chart(data):
|
241 |
+
"""Create an evolution chart from data"""
|
242 |
+
df = pd.DataFrame(data)
|
243 |
+
|
244 |
+
plt.figure(figsize=(10, 5))
|
245 |
+
plt.plot(df['month'], df['searchVolume'], marker='o', label='Search Volume')
|
246 |
+
plt.plot(df['month'], df['competitionScore']*20, marker='s', label='Competition Score')
|
247 |
+
plt.plot(df['month'], df['intentClarity']*20, marker='^', label='Intent Clarity')
|
248 |
+
|
249 |
+
plt.title('Predicted Evolution')
|
250 |
+
plt.xlabel('Month')
|
251 |
+
plt.ylabel('Value')
|
252 |
+
plt.legend()
|
253 |
+
plt.grid(True, linestyle='--', alpha=0.7)
|
254 |
+
plt.tight_layout()
|
255 |
+
|
256 |
+
return plt
|
257 |
+
|
258 |
+
def generate_token_visualization_html(token_analysis, full_analysis):
|
259 |
+
"""Generate HTML for token visualization"""
|
260 |
+
html = """
|
261 |
+
<div style="font-family: Arial, sans-serif; padding: 20px; border: 1px solid #ddd; border-radius: 8px;">
|
262 |
+
<h2 style="margin-top: 0;">Token Visualization</h2>
|
263 |
+
|
264 |
+
<div style="margin-bottom: 20px; padding: 15px; background-color: #f8f9fa; border-radius: 6px;">
|
265 |
+
<div style="margin-bottom: 8px; font-weight: bold; color: #4a5568;">Human View:</div>
|
266 |
+
<div style="display: flex; flex-wrap: wrap; gap: 8px;">
|
267 |
+
"""
|
268 |
+
|
269 |
+
# Add human view tokens
|
270 |
+
for token in token_analysis:
|
271 |
+
html += f"""
|
272 |
+
<div style="padding: 6px 12px; background-color: white; border: 1px solid #cbd5e0; border-radius: 4px;">
|
273 |
+
{token['text']}
|
274 |
+
</div>
|
275 |
+
"""
|
276 |
+
|
277 |
+
html += """
|
278 |
+
</div>
|
279 |
+
</div>
|
280 |
+
|
281 |
+
<div style="text-align: center; margin: 15px 0;">
|
282 |
+
<span style="font-size: 20px;">↓</span>
|
283 |
+
</div>
|
284 |
+
|
285 |
+
<div style="padding: 15px; background-color: #f0fff4; border-radius: 6px;">
|
286 |
+
<div style="margin-bottom: 8px; font-weight: bold; color: #2f855a;">Machine View:</div>
|
287 |
+
<div style="display: flex; flex-wrap: wrap; gap: 8px;">
|
288 |
+
"""
|
289 |
+
|
290 |
+
# Add machine view tokens
|
291 |
+
for token in full_analysis:
|
292 |
+
bg_color = get_token_colors(token["type"])
|
293 |
+
html += f"""
|
294 |
+
<div style="padding: 6px 12px; background-color: {bg_color}; border: 1px solid #a0aec0; border-radius: 4px; font-family: monospace;">
|
295 |
+
{token['token']}
|
296 |
+
<span style="font-size: 10px; opacity: 0.7; display: block;">{token['type']}</span>
|
297 |
+
</div>
|
298 |
+
"""
|
299 |
+
|
300 |
+
html += """
|
301 |
+
</div>
|
302 |
+
</div>
|
303 |
+
|
304 |
+
<div style="margin-top: 20px; display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px; text-align: center;">
|
305 |
+
"""
|
306 |
+
|
307 |
+
# Add stats
|
308 |
+
word_count = len(token_analysis)
|
309 |
+
token_count = len(full_analysis)
|
310 |
+
ratio = round(token_count / max(1, word_count), 2)
|
311 |
+
|
312 |
+
html += f"""
|
313 |
+
<div style="background-color: #ebf8ff; padding: 10px; border-radius: 6px;">
|
314 |
+
<div style="font-size: 24px; font-weight: bold; color: #3182ce;">{word_count}</div>
|
315 |
+
<div style="font-size: 14px; color: #4299e1;">Words</div>
|
316 |
+
</div>
|
317 |
+
|
318 |
+
<div style="background-color: #f0fff4; padding: 10px; border-radius: 6px;">
|
319 |
+
<div style="font-size: 24px; font-weight: bold; color: #38a169;">{token_count}</div>
|
320 |
+
<div style="font-size: 14px; color: #48bb78;">Tokens</div>
|
321 |
+
</div>
|
322 |
+
|
323 |
+
<div style="background-color: #faf5ff; padding: 10px; border-radius: 6px;">
|
324 |
+
<div style="font-size: 24px; font-weight: bold; color: #805ad5;">{ratio}</div>
|
325 |
+
<div style="font-size: 14px; color: #9f7aea;">Tokens per Word</div>
|
326 |
+
</div>
|
327 |
+
"""
|
328 |
+
|
329 |
+
html += """
|
330 |
+
</div>
|
331 |
+
</div>
|
332 |
+
"""
|
333 |
+
|
334 |
+
return html
|
335 |
+
|
336 |
+
def generate_full_analysis_html(keyword, token_analysis, intent_analysis, evolution_potential, trends):
|
337 |
+
"""Generate HTML for full keyword analysis"""
|
338 |
+
html = f"""
|
339 |
+
<div style="font-family: Arial, sans-serif; padding: 20px; border: 1px solid #ddd; border-radius: 8px;">
|
340 |
+
<h2 style="margin-top: 0;">Keyword DNA Analysis for: {keyword}</h2>
|
341 |
+
|
342 |
+
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 15px; margin-bottom: 20px;">
|
343 |
+
<div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px;">
|
344 |
+
<h3 style="margin-top: 0; font-size: 16px;">Intent Gene</h3>
|
345 |
+
<div style="display: flex; justify-content: space-between; margin-bottom: 10px;">
|
346 |
+
<span>Type:</span>
|
347 |
+
<span>{intent_analysis['type']}</span>
|
348 |
+
</div>
|
349 |
+
<div style="display: flex; justify-content: space-between; align-items: center;">
|
350 |
+
<span>Strength:</span>
|
351 |
+
<div style="width: 120px; height: 8px; background-color: #edf2f7; border-radius: 4px; overflow: hidden;">
|
352 |
+
<div style="height: 100%; background-color: #48bb78; width: {intent_analysis['strength']}%;"></div>
|
353 |
+
</div>
|
354 |
+
</div>
|
355 |
+
</div>
|
356 |
+
|
357 |
+
<div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px;">
|
358 |
+
<h3 style="margin-top: 0; font-size: 16px;">Evolution Potential</h3>
|
359 |
+
<div style="display: flex; justify-content: center; align-items: center; height: 100px;">
|
360 |
+
<div style="position: relative; width: 100px; height: 100px;">
|
361 |
+
<div style="position: absolute; inset: 0; display: flex; align-items: center; justify-content: center;">
|
362 |
+
<span style="font-size: 24px; font-weight: bold;">{evolution_potential}</span>
|
363 |
+
</div>
|
364 |
+
<svg width="100" height="100" viewBox="0 0 36 36">
|
365 |
+
<path
|
366 |
+
d="M18 2.0845 a 15.9155 15.9155 0 0 1 0 31.831 a 15.9155 15.9155 0 0 1 0 -31.831"
|
367 |
+
fill="none"
|
368 |
+
stroke="#4CAF50"
|
369 |
+
stroke-width="3"
|
370 |
+
stroke-dasharray="{evolution_potential}, 100"
|
371 |
+
/>
|
372 |
+
</svg>
|
373 |
+
</div>
|
374 |
+
</div>
|
375 |
+
</div>
|
376 |
+
</div>
|
377 |
+
|
378 |
+
<div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px; margin-bottom: 20px;">
|
379 |
+
<h3 style="margin-top: 0; font-size: 16px;">Future Mutations</h3>
|
380 |
+
<div style="display: flex; flex-direction: column; gap: 8px;">
|
381 |
+
"""
|
382 |
+
|
383 |
+
# Add trends
|
384 |
+
for trend in trends:
|
385 |
+
html += f"""
|
386 |
+
<div style="display: flex; align-items: center; gap: 8px;">
|
387 |
+
<span style="color: #48bb78;">↗</span>
|
388 |
+
<span>{trend}</span>
|
389 |
+
</div>
|
390 |
+
"""
|
391 |
+
|
392 |
+
html += """
|
393 |
+
</div>
|
394 |
+
</div>
|
395 |
+
|
396 |
+
<h3 style="margin-bottom: 10px;">Token Details & Historical Analysis</h3>
|
397 |
+
"""
|
398 |
+
|
399 |
+
# Add token details
|
400 |
+
for token in token_analysis:
|
401 |
+
html += f"""
|
402 |
+
<div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px; margin-bottom: 15px;">
|
403 |
+
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px;">
|
404 |
+
<div style="display: flex; align-items: center; gap: 8px;">
|
405 |
+
<span style="font-size: 18px; font-weight: medium;">{token['token']}</span>
|
406 |
+
<span style="padding: 2px 8px; background-color: #edf2f7; border-radius: 4px; font-size: 12px;">{token['posTag']}</span>
|
407 |
+
"""
|
408 |
+
|
409 |
+
if token['entityType']:
|
410 |
+
html += f"""
|
411 |
+
<span style="padding: 2px 8px; background-color: #ebf8ff; color: #3182ce; border-radius: 4px; font-size: 12px; display: flex; align-items: center;">
|
412 |
+
ⓘ {token['entityType']}
|
413 |
+
</span>
|
414 |
+
"""
|
415 |
+
|
416 |
+
html += f"""
|
417 |
+
</div>
|
418 |
+
<div style="display: flex; align-items: center; gap: 4px;">
|
419 |
+
<span style="font-size: 12px; color: #718096;">Importance:</span>
|
420 |
+
<div style="width: 64px; height: 8px; background-color: #edf2f7; border-radius: 4px; overflow: hidden;">
|
421 |
+
<div style="height: 100%; background-color: #4299e1; width: {token['importance']}%;"></div>
|
422 |
+
</div>
|
423 |
+
</div>
|
424 |
+
</div>
|
425 |
+
|
426 |
+
<div style="margin-top: 15px;">
|
427 |
+
<div style="font-size: 12px; color: #718096; margin-bottom: 4px;">Historical Relevance:</div>
|
428 |
+
<div style="border: 1px solid #e2e8f0; border-radius: 4px; padding: 10px; background-color: #f7fafc;">
|
429 |
+
<div style="font-size: 12px; margin-bottom: 8px;">
|
430 |
+
<span style="font-weight: 500;">Origin: </span>
|
431 |
+
<span>{token['origin']['era']}, </span>
|
432 |
+
<span style="font-style: italic;">{token['origin']['language']}</span>
|
433 |
+
</div>
|
434 |
+
<div style="font-size: 12px; margin-bottom: 12px;">{token['origin']['note']}</div>
|
435 |
+
|
436 |
+
<div style="display: flex; align-items: flex-end; height: 50px; gap: 4px; margin-top: 8px;">
|
437 |
+
"""
|
438 |
+
|
439 |
+
# Add historical data bars
|
440 |
+
for period, value in token['historicalData']:
|
441 |
+
opacity = 0.3 + (token['historicalData'].index((period, value)) * 0.1)
|
442 |
+
html += f"""
|
443 |
+
<div style="display: flex; flex-direction: column; align-items: center; flex: 1;">
|
444 |
+
<div style="width: 100%; background-color: rgba(66, 153, 225, {opacity}); border-radius: 2px 2px 0 0; height: {max(4, value)}%;"></div>
|
445 |
+
<div style="font-size: 9px; margin-top: 4px; color: #718096; transform: rotate(45deg); transform-origin: top left; white-space: nowrap;">
|
446 |
+
{period}
|
447 |
+
</div>
|
448 |
+
</div>
|
449 |
+
"""
|
450 |
+
|
451 |
+
html += """
|
452 |
+
</div>
|
453 |
+
</div>
|
454 |
+
</div>
|
455 |
+
</div>
|
456 |
+
"""
|
457 |
+
|
458 |
+
html += """
|
459 |
+
</div>
|
460 |
+
"""
|
461 |
+
|
462 |
+
return html
|
463 |
+
|
464 |
+
# Create the Gradio interface
|
465 |
+
with gr.Blocks(css="footer {visibility: hidden}") as demo:
|
466 |
+
gr.Markdown("# Keyword DNA Analyzer")
|
467 |
+
gr.Markdown("Analyze the linguistic DNA of your keywords to understand their structure, intent, and potential.")
|
468 |
+
|
469 |
+
with gr.Row():
|
470 |
+
with gr.Column():
|
471 |
+
input_text = gr.Textbox(label="Enter keyword to analyze", placeholder="e.g. artificial intelligence")
|
472 |
+
analyze_btn = gr.Button("Analyze DNA", variant="primary")
|
473 |
+
|
474 |
+
with gr.Row():
|
475 |
+
example_btns = []
|
476 |
+
for example in ["preprocessing", "breakdown", "artificial intelligence", "transformer model", "machine learning"]:
|
477 |
+
example_btns.append(gr.Button(example))
|
478 |
+
|
479 |
+
with gr.Column():
|
480 |
+
with gr.Tabs():
|
481 |
+
with gr.Tab("Token Visualization"):
|
482 |
+
token_viz_html = gr.HTML()
|
483 |
+
|
484 |
+
with gr.Tab("Full Analysis"):
|
485 |
+
analysis_html = gr.HTML()
|
486 |
+
|
487 |
+
with gr.Tab("Evolution Chart"):
|
488 |
+
evolution_chart = gr.Plot()
|
489 |
+
|
490 |
+
with gr.Tab("Raw Data"):
|
491 |
+
json_output = gr.JSON()
|
492 |
+
|
493 |
+
# Set up event handlers
|
494 |
+
analyze_btn.click(
|
495 |
+
analyze_keyword,
|
496 |
+
inputs=[input_text],
|
497 |
+
outputs=[token_viz_html, analysis_html, json_output, evolution_chart, None]
|
498 |
+
)
|
499 |
+
|
500 |
+
# Example buttons
|
501 |
+
for btn in example_btns:
|
502 |
+
btn.click(
|
503 |
+
lambda btn_text: btn_text,
|
504 |
+
inputs=[btn],
|
505 |
+
outputs=[input_text]
|
506 |
+
).then(
|
507 |
+
analyze_keyword,
|
508 |
+
inputs=[input_text],
|
509 |
+
outputs=[token_viz_html, analysis_html, json_output, evolution_chart, None]
|
510 |
+
)
|
511 |
+
|
512 |
+
# Launch the app
|
513 |
+
demo.launch()
|