Spaces:

PromptMeister
/

Keyword-DNA-Analyzer

Running

File size: 32,951 Bytes

import gradio as gr
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt
import json
import time
import os
from functools import partial

# Global variables to store models
tokenizer = None
ner_pipeline = None
pos_pipeline = None
intent_classifier = None
semantic_model = None
models_loaded = False

def load_models(progress=gr.Progress()):
    """Lazy-load models only when needed"""
    global tokenizer, ner_pipeline, pos_pipeline, intent_classifier, semantic_model, models_loaded
    
    if models_loaded:
        return True
    
    try:
        progress(0.1, desc="Loading models...")
        
        # Use smaller models and load them sequentially to reduce memory pressure
        from transformers import AutoTokenizer, pipeline
        
        progress(0.2, desc="Loading tokenizer...")
        tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
        
        progress(0.4, desc="Loading NER model...")
        ner_pipeline = pipeline("ner", model="dslim/bert-base-NER")
        
        progress(0.6, desc="Loading POS model...")
        # Use smaller POS model
        from transformers import AutoModelForTokenClassification, BertTokenizerFast
        pos_model = AutoModelForTokenClassification.from_pretrained("vblagoje/bert-english-uncased-finetuned-pos")
        pos_tokenizer = BertTokenizerFast.from_pretrained("vblagoje/bert-english-uncased-finetuned-pos")
        pos_pipeline = pipeline("token-classification", model=pos_model, tokenizer=pos_tokenizer)
        
        progress(0.8, desc="Loading intent classifier...")
        # Use a smaller model for zero-shot classification
        intent_classifier = pipeline(
            "zero-shot-classification", 
            model="typeform/distilbert-base-uncased-mnli",  # Smaller than BART
            device=0 if torch.cuda.is_available() else -1   # Use GPU if available
        )
        
        progress(0.9, desc="Loading semantic model...")
        try:
            from sentence_transformers import SentenceTransformer
            semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
        except Exception as e:
            print(f"Warning: Could not load semantic model: {str(e)}")
            semantic_model = None  # Set to None so we can check if it's available
        
        progress(1.0, desc="Models loaded successfully!")
        models_loaded = True
        return True
    
    except Exception as e:
        print(f"Error loading models: {str(e)}")
        return f"Error: {str(e)}"

def get_semantic_similarity(token, comparison_terms):
    """Calculate semantic similarity between a token and comparison terms"""
    try:
        from sklearn.metrics.pairwise import cosine_similarity
        
        token_embedding = semantic_model.encode([token])[0]
        comparison_embeddings = semantic_model.encode(comparison_terms)
        
        similarities = []
        for i, emb in enumerate(comparison_embeddings):
            similarity = cosine_similarity([token_embedding], [emb])[0][0]
            similarities.append((comparison_terms[i], float(similarity)))
        
        return sorted(similarities, key=lambda x: x[1], reverse=True)
    except Exception as e:
        print(f"Error in semantic similarity: {str(e)}")
        # Return dummy data on error
        return [(term, 0.5) for term in comparison_terms]

def get_token_colors(token_type):
    colors = {
        "prefix": "#D8BFD8",  # Light purple
        "suffix": "#AEDAA4",  # Light green
        "stem": "#A4C2F4",    # Light blue
        "compound_first": "#FFCC80",  # Light orange
        "compound_second": "#FFCC80", # Light orange
        "word": "#E5E5E5"     # Light gray
    }
    return colors.get(token_type, "#E5E5E5")

def simulate_historical_data(token):
    """Generate simulated historical usage data for a token"""
    eras = ["1900s", "1950s", "1980s", "2000s", "2010s", "Present"]
    
    # Different patterns based on token characteristics
    if len(token) > 8:
        # Possibly a technical term - recent growth
        values = [10, 20, 30, 60, 85, 95]
    elif token.startswith(("un", "re", "de", "pre")):
        # Prefix words tend to be older
        values = [45, 50, 60, 70, 75, 80]
    else:
        # Standard pattern for common words
        # Use token hash value modulo instead of hash() directly to avoid different results across runs
        base = 50 + (sum(ord(c) for c in token) % 30)
        # Use a fixed seed for reproducibility
        np.random.seed(sum(ord(c) for c in token))
        noise = np.random.normal(0, 5, 6)
        values = [max(5, min(95, base + i*5 + n)) for i, n in enumerate(noise)]
    
    return list(zip(eras, values))

def generate_origin_data(token):
    """Generate simulated origin/etymology data for a token"""
    origins = [
        {"era": "Ancient", "language": "Latin"},
        {"era": "Ancient", "language": "Greek"},
        {"era": "Medieval", "language": "Old English"},
        {"era": "16th century", "language": "French"},
        {"era": "18th century", "language": "Germanic"},
        {"era": "19th century", "language": "Anglo-Saxon"},
        {"era": "20th century", "language": "Modern English"}
    ]
    
    # Deterministic selection based on the token
    index = sum(ord(c) for c in token) % len(origins)
    origin = origins[index]
    
    note = f"First appeared in {origin['era']} texts derived from {origin['language']}."
    origin["note"] = note
    
    return origin

def analyze_token_types(tokens):
    """Identify token types (prefix, suffix, compound, etc.)"""
    processed_tokens = []
    
    prefixes = ["un", "re", "de", "pre", "post", "anti", "pro", "inter", "sub", "super"]
    suffixes = ["ing", "ed", "ly", "ment", "tion", "able", "ible", "ness", "ful", "less"]
    
    for token in tokens:
        token_text = token.lower()
        token_type = "word"
        
        # Check for prefixes
        for prefix in prefixes:
            if token_text.startswith(prefix) and len(token_text) > len(prefix) + 2:
                if token_text != prefix:  # Make sure the word isn't just the prefix
                    token_type = "prefix"
                    break
        
        # Check for suffixes
        if token_type == "word":
            for suffix in suffixes:
                if token_text.endswith(suffix) and len(token_text) > len(suffix) + 2:
                    token_type = "suffix"
                    break
        
        # Check for compound words (simplified)
        if token_type == "word" and len(token_text) > 8:
            token_type = "compound_first"  # Simplified - in reality would need more analysis
        
        processed_tokens.append({
            "text": token_text,
            "type": token_type
        })
    
    return processed_tokens

def plot_historical_data(historical_data):
    """Create a plot of historical usage data, with error handling"""
    try:
        eras = [item[0] for item in historical_data]
        values = [item[1] for item in historical_data]
        
        plt.figure(figsize=(8, 3))
        plt.bar(eras, values, color='skyblue')
        plt.title('Historical Usage')
        plt.xlabel('Era')
        plt.ylabel('Usage Level')
        plt.ylim(0, 100)
        plt.xticks(rotation=45)
        plt.tight_layout()
        
        return plt
    except Exception as e:
        print(f"Error in plot_historical_data: {str(e)}")
        # Return a simple error plot
        plt.figure(figsize=(8, 3))
        plt.text(0.5, 0.5, f"Error creating plot: {str(e)}", 
                 horizontalalignment='center', verticalalignment='center')
        plt.axis('off')
        return plt

def create_evolution_chart(data, forecast_months=6, growth_scenario="Moderate"):
    """Create an interactive evolution chart from data using Plotly"""
    try:
        import plotly.graph_objects as go
        from plotly.subplots import make_subplots
        
        # Create figure
        fig = make_subplots(specs=[[{"secondary_y": True}]])
        
        # Add traces
        fig.add_trace(
            go.Scatter(
                x=[item["month"] for item in data],
                y=[item["searchVolume"] for item in data],
                name="Search Volume",
                line=dict(color="#8884d8", width=3),
                hovertemplate="Month: %{x}<br>Volume: %{y}<extra></extra>"
            )
        )
        
        fig.add_trace(
            go.Scatter(
                x=[item["month"] for item in data],
                y=[item["competitionScore"] for item in data],
                name="Competition Score",
                line=dict(color="#82ca9d", width=3, dash="dot"),
                hovertemplate="Month: %{x}<br>Score: %{y}<extra></extra>"
            ),
            secondary_y=True
        )
        
        fig.add_trace(
            go.Scatter(
                x=[item["month"] for item in data],
                y=[item["intentClarity"] for item in data],
                name="Intent Clarity",
                line=dict(color="#ffc658", width=3, dash="dash"),
                hovertemplate="Month: %{x}<br>Clarity: %{y}<extra></extra>"
            ),
            secondary_y=True
        )
        
        # Add trend line
        x_values = list(range(len(data)))
        y_values = [item["searchVolume"] for item in data]
        
        # Simple linear regression
        slope, intercept = np.polyfit(x_values, y_values, 1)
        trend_y = [slope * x + intercept for x in x_values]
        
        fig.add_trace(
            go.Scatter(
                x=[item["month"] for item in data],
                y=trend_y,
                name="Trend",
                line=dict(color="rgba(255, 0, 0, 0.5)", width=2, dash="dot"),
                hoverinfo="skip"
            )
        )
        
        # Customize layout
        fig.update_layout(
            title=f"Keyword Evolution Forecast ({growth_scenario} Growth)",
            title_font=dict(size=20),
            hovermode="x unified",
            xaxis=dict(
                title="Month",
                titlefont=dict(size=14),
                showgrid=True,
                gridcolor="rgba(0,0,0,0.1)"
            ),
            yaxis=dict(
                title="Search Volume",
                titlefont=dict(size=14),
                showgrid=True,
                gridcolor="rgba(0,0,0,0.1)"
            ),
            yaxis2=dict(
                title="Score (0-100)",
                titlefont=dict(size=14),
                range=[0, 100]
            ),
            legend=dict(
                orientation="h",
                yanchor="bottom",
                y=1.02,
                xanchor="center",
                x=0.5
            ),
            margin=dict(l=10, r=10, t=80, b=10),
            height=500,
            template="plotly_white"
        )
        
        # Add annotations for key insights
        max_month_index = y_values.index(max(y_values))
        fig.add_annotation(
            x=data[max_month_index]["month"],
            y=max(y_values),
            text="Peak Volume",
            showarrow=True,
            arrowhead=1,
            ax=0,
            ay=-40
        )
        
        # Return the figure
        return fig
    
    except Exception as e:
        print(f"Error in create_evolution_chart: {str(e)}")
        # Create a simple error message plot with Plotly
        import plotly.graph_objects as go
        fig = go.Figure()
        fig.add_annotation(
            x=0.5, y=0.5,
            text=f"Error creating chart: {str(e)}",
            showarrow=False,
            font=dict(size=14, color="red")
        )
        fig.update_layout(
            xaxis=dict(showticklabels=False),
            yaxis=dict(showticklabels=False)
        )
        return fig

def analyze_keyword(keyword, forecast_months=6, growth_scenario="Moderate", progress=gr.Progress()):
    """Main function to analyze a keyword"""
    if not keyword or not keyword.strip():
        return (
            "<div>Please enter a keyword to analyze</div>",
            "<div>Please enter a keyword to analyze</div>", 
            None, 
            None
        )
    
    progress(0.1, desc="Starting analysis...")
    
    # Load models if not already loaded
    model_status = load_models(progress)
    if isinstance(model_status, str) and model_status.startswith("Error"):
        return (
            f"<div style='color:red;'>{model_status}</div>",
            f"<div style='color:red;'>{model_status}</div>",
            None,
            None
        )
    
    try:
        # Basic tokenization - just split on spaces for simplicity
        words = keyword.strip().lower().split()
        progress(0.2, desc="Analyzing tokens...")
        
        # Get token types
        token_analysis = analyze_token_types(words)
        
        progress(0.3, desc="Running NER...")
        # Get NER tags - handle potential errors
        try:
            ner_results = ner_pipeline(keyword)
        except Exception as e:
            print(f"NER error: {str(e)}")
            ner_results = []
        
        progress(0.4, desc="Running POS tagging...")
        # Get POS tags - handle potential errors
        try:
            pos_results = pos_pipeline(keyword)
        except Exception as e:
            print(f"POS error: {str(e)}")
            pos_results = []
        
        # Process and organize results
        full_token_analysis = []
        for token in token_analysis:
            # Find POS tag for this token
            pos_tag = "NOUN"  # Default
            for pos_result in pos_results:
                if pos_result["word"].lower() == token["text"]:
                    pos_tag = pos_result["entity"]
                    break
            
            # Find entity type if any
            entity_type = None
            for ner_result in ner_results:
                if ner_result["word"].lower() == token["text"]:
                    entity_type = ner_result["entity"]
                    break
            
            # Generate historical data
            historical_data = simulate_historical_data(token["text"])
            
            # Generate origin data
            origin = generate_origin_data(token["text"])
            
            # Calculate importance (simplified algorithm)
            importance = 60 + (len(token["text"]) * 2)
            importance = min(95, importance)
            
            # Generate more meaningful related terms using semantic similarity
            if semantic_model is not None:
                try:
                    # Generate some potential related terms
                    prefix_related = [f"about {token['text']}", f"what is {token['text']}", f"how to {token['text']}"]
                    synonym_candidates = ["similar", "equivalent", "comparable", "like", "related", "alternative"]
                    domain_terms = ["software", "marketing", "business", "science", "education", "technology"]
                    comparison_terms = prefix_related + synonym_candidates + domain_terms
                    
                    # Get similarities
                    similarities = get_semantic_similarity(token['text'], comparison_terms)
                    
                    # Use top 3 most similar terms
                    related_terms = [term for term, score in similarities[:3]]
                except Exception as e:
                    print(f"Error generating semantic related terms: {str(e)}")
                    related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"]
            else:
                # Fallback if semantic model isn't loaded
                related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"]
            
            full_token_analysis.append({
                "token": token["text"],
                "type": token["type"],
                "posTag": pos_tag,
                "entityType": entity_type,
                "importance": importance,
                "historicalData": historical_data,
                "origin": origin,
                "relatedTerms": related_terms
            })
        
        progress(0.6, desc="Analyzing intent...")
        # Intent analysis - handle potential errors
        try:
            intent_result = intent_classifier(
                keyword,
                candidate_labels=["informational", "navigational", "transactional"]
            )
            
            intent_analysis = {
                "type": intent_result["labels"][0].capitalize(),
                "strength": round(intent_result["scores"][0] * 100),
                "mutations": [
                    f"{intent_result['labels'][0]}-variation-1", 
                    f"{intent_result['labels'][0]}-variation-2"
                ]
            }
        except Exception as e:
            print(f"Intent classification error: {str(e)}")
            intent_analysis = {
                "type": "Informational",  # Default fallback
                "strength": 70,
                "mutations": ["fallback-variation-1", "fallback-variation-2"]
            }
        
        # Evolution potential (simplified calculation)
        evolution_potential = min(95, 65 + (len(keyword) % 30))
        
        # Predicted trends (simplified)
        trends = [
            "Voice search adaptation",
            "Visual search integration"
        ]
        
        # Generate more realistic and keyword-specific evolution data
        base_volume = 1000 + (len(keyword) * 100)
        
        # Adjust growth factor based on scenario
        if growth_scenario == "Conservative":
            growth_factor = 1.05 + (0.02 * (sum(ord(c) for c in keyword) % 5))
        elif growth_scenario == "Aggressive":
            growth_factor = 1.15 + (0.05 * (sum(ord(c) for c in keyword) % 5))
        else:  # Moderate
            growth_factor = 1.1 + (0.03 * (sum(ord(c) for c in keyword) % 5))
        
        evolution_data = []
        months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][:int(forecast_months)]
        current_volume = base_volume
        
        for month in months:
            # Add some randomness to make it look more realistic
            np.random.seed(sum(ord(c) for c in month + keyword))
            random_factor = 0.9 + (0.2 * np.random.random())
            current_volume *= growth_factor * random_factor
            
            evolution_data.append({
                "month": month,
                "searchVolume": int(current_volume),
                "competitionScore": min(95, 45 + (months.index(month) * 3) + (sum(ord(c) for c in keyword) % 10)),
                "intentClarity": min(95, 80 + (months.index(month) * 2) + (sum(ord(c) for c in keyword) % 5))
            })
        
        progress(0.8, desc="Creating visualizations...")
        # Create interactive evolution chart
        evolution_chart = create_evolution_chart(evolution_data, forecast_months, growth_scenario)
        
        # Generate HTML for token visualization
        token_viz_html = generate_token_visualization_html(token_analysis, full_token_analysis)
        
        # Generate HTML for full analysis
        analysis_html = generate_full_analysis_html(
            keyword, 
            full_token_analysis, 
            intent_analysis, 
            evolution_potential, 
            trends
        )
        
        # Generate JSON results
        json_results = {
            "keyword": keyword,
            "tokenAnalysis": full_token_analysis,
            "intentAnalysis": intent_analysis,
            "evolutionPotential": evolution_potential,
            "predictedTrends": trends,
            "forecast": {
                "months": forecast_months,
                "scenario": growth_scenario,
                "data": evolution_data
            }
        }
        
        progress(1.0, desc="Analysis complete!")
        return token_viz_html, analysis_html, json_results, evolution_chart
    
    except Exception as e:
        error_message = f"<div style='color:red;padding:20px;'>Error analyzing keyword: {str(e)}</div>"
        print(f"Error in analyze_keyword: {str(e)}")
        return error_message, error_message, None, None

def generate_token_visualization_html(token_analysis, full_analysis):
    """Generate HTML for token visualization"""
    html = """
    <div style="font-family: Arial, sans-serif; padding: 20px; border: 1px solid #ddd; border-radius: 8px;">
        <h2 style="margin-top: 0;">Token Visualization</h2>
        
        <div style="margin-bottom: 20px; padding: 15px; background-color: #f8f9fa; border-radius: 6px;">
            <div style="margin-bottom: 8px; font-weight: bold; color: #4a5568;">Human View:</div>
            <div style="display: flex; flex-wrap: wrap; gap: 8px;">
    """
    
    # Add human view tokens
    for token in token_analysis:
        html += f"""
        <div style="padding: 6px 12px; background-color: white; border: 1px solid #cbd5e0; border-radius: 4px;">
            {token['text']}
        </div>
        """
    
    html += """
            </div>
        </div>
        
        <div style="text-align: center; margin: 15px 0;">
            <span style="font-size: 20px;">↓</span>
        </div>
        
        <div style="padding: 15px; background-color: #f0fff4; border-radius: 6px;">
            <div style="margin-bottom: 8px; font-weight: bold; color: #2f855a;">Machine View:</div>
            <div style="display: flex; flex-wrap: wrap; gap: 8px;">
    """
    
    # Add machine view tokens
    for token in full_analysis:
        bg_color = get_token_colors(token["type"])
        html += f"""
        <div style="padding: 6px 12px; background-color: {bg_color}; border: 1px solid #a0aec0; border-radius: 4px; font-family: monospace;">
            {token['token']}
            <span style="font-size: 10px; opacity: 0.7; display: block;">{token['type']}</span>
        </div>
        """
    
    html += """
            </div>
        </div>
        
        <div style="margin-top: 20px; display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px; text-align: center;">
    """
    
    # Add stats
    word_count = len(token_analysis)
    token_count = len(full_analysis)
    ratio = round(token_count / max(1, word_count), 2)
    
    html += f"""
        <div style="background-color: #ebf8ff; padding: 10px; border-radius: 6px;">
            <div style="font-size: 24px; font-weight: bold; color: #3182ce;">{word_count}</div>
            <div style="font-size: 14px; color: #4299e1;">Words</div>
        </div>
        
        <div style="background-color: #f0fff4; padding: 10px; border-radius: 6px;">
            <div style="font-size: 24px; font-weight: bold; color: #38a169;">{token_count}</div>
            <div style="font-size: 14px; color: #48bb78;">Tokens</div>
        </div>
        
        <div style="background-color: #faf5ff; padding: 10px; border-radius: 6px;">
            <div style="font-size: 24px; font-weight: bold; color: #805ad5;">{ratio}</div>
            <div style="font-size: 14px; color: #9f7aea;">Tokens per Word</div>
        </div>
    """
    
    html += """
        </div>
    </div>
    """
    
    return html

def generate_full_analysis_html(keyword, token_analysis, intent_analysis, evolution_potential, trends):
    """Generate HTML for full keyword analysis"""
    html = f"""
    <div style="font-family: Arial, sans-serif; padding: 20px; border: 1px solid #ddd; border-radius: 8px;">
        <h2 style="margin-top: 0;">Keyword DNA Analysis for: {keyword}</h2>
        
        <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 15px; margin-bottom: 20px;">
            <div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px;">
                <h3 style="margin-top: 0; font-size: 16px;">Intent Gene</h3>
                <div style="display: flex; justify-content: space-between; margin-bottom: 10px;">
                    <span>Type:</span>
                    <span>{intent_analysis['type']}</span>
                </div>
                <div style="display: flex; justify-content: space-between; align-items: center;">
                    <span>Strength:</span>
                    <div style="width: 120px; height: 8px; background-color: #edf2f7; border-radius: 4px; overflow: hidden;">
                        <div style="height: 100%; background-color: #48bb78; width: {intent_analysis['strength']}%;"></div>
                    </div>
                </div>
            </div>
            
            <div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px;">
                <h3 style="margin-top: 0; font-size: 16px;">Evolution Potential</h3>
                <div style="display: flex; justify-content: center; align-items: center; height: 100px;">
                    <div style="position: relative; width: 100px; height: 100px;">
                        <div style="position: absolute; inset: 0; display: flex; align-items: center; justify-content: center;">
                            <span style="font-size: 24px; font-weight: bold;">{evolution_potential}</span>
                        </div>
                        <svg width="100" height="100" viewBox="0 0 36 36">
                            <path
                              d="M18 2.0845 a 15.9155 15.9155 0 0 1 0 31.831 a 15.9155 15.9155 0 0 1 0 -31.831"
                              fill="none"
                              stroke="#4CAF50"
                              stroke-width="3"
                              stroke-dasharray="{evolution_potential}, 100"
                            />
                        </svg>
                    </div>
                </div>
            </div>
        </div>
        
        <div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px; margin-bottom: 20px;">
            <h3 style="margin-top: 0; font-size: 16px;">Future Mutations</h3>
            <div style="display: flex; flex-direction: column; gap: 8px;">
    """
    
    # Add trends
    for trend in trends:
        html += f"""
        <div style="display: flex; align-items: center; gap: 8px;">
            <span style="color: #48bb78;">↗</span>
            <span>{trend}</span>
        </div>
        """
    
    html += """
            </div>
        </div>
        
        <h3 style="margin-bottom: 10px;">Token Details & Historical Analysis</h3>
    """
    
    # Add token details
    for token in token_analysis:
        html += f"""
        <div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px; margin-bottom: 15px;">
            <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px;">
                <div style="display: flex; align-items: center; gap: 8px;">
                    <span style="font-size: 18px; font-weight: medium;">{token['token']}</span>
                    <span style="padding: 2px 8px; background-color: #edf2f7; border-radius: 4px; font-size: 12px;">{token['posTag']}</span>
        """
        
        if token['entityType']:
            html += f"""
            <span style="padding: 2px 8px; background-color: #ebf8ff; color: #3182ce; border-radius: 4px; font-size: 12px; display: flex; align-items: center;">
                ⓘ {token['entityType']}
            </span>
            """
        
        html += f"""
                </div>
                <div style="display: flex; align-items: center; gap: 4px;">
                    <span style="font-size: 12px; color: #718096;">Importance:</span>
                    <div style="width: 64px; height: 8px; background-color: #edf2f7; border-radius: 4px; overflow: hidden;">
                        <div style="height: 100%; background-color: #4299e1; width: {token['importance']}%;"></div>
                    </div>
                </div>
            </div>
            
            <div style="margin-top: 15px;">
                <div style="font-size: 12px; color: #718096; margin-bottom: 4px;">Historical Relevance:</div>
                <div style="border: 1px solid #e2e8f0; border-radius: 4px; padding: 10px; background-color: #f7fafc;">
                    <div style="font-size: 12px; margin-bottom: 8px;">
                        <span style="font-weight: 500;">Origin: </span>
                        <span>{token['origin']['era']}, </span>
<span style="font-style: italic;">{token['origin']['language']}</span>
                    </div>
                    <div style="font-size: 12px; margin-bottom: 12px;">{token['origin']['note']}</div>
                    
                    <div style="display: flex; align-items: flex-end; height: 50px; gap: 4px; margin-top: 8px;">
        """
        
        # Add historical data bars
        for period, value in token['historicalData']:
            opacity = 0.3 + (token['historicalData'].index((period, value)) * 0.1)
            html += f"""
            <div style="display: flex; flex-direction: column; align-items: center; flex: 1;">
                <div style="width: 100%; background-color: rgba(66, 153, 225, {opacity}); border-radius: 2px 2px 0 0; height: {max(4, value)}%;"></div>
                <div style="font-size: 9px; margin-top: 4px; color: #718096; transform: rotate(45deg); transform-origin: top left; white-space: nowrap;">
                    {period}
                </div>
            </div>
            """
        
        html += """
                    </div>
                </div>
            </div>
        </div>
        """
    
    html += """
    </div>
    """
    
    return html

# Create the Gradio interface
with gr.Blocks(css="footer {visibility: hidden}") as demo:
    gr.Markdown("# Keyword DNA Analyzer")
    gr.Markdown("Analyze the linguistic DNA of your keywords to understand their structure, intent, and potential.")
    
    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(label="Enter keyword to analyze", placeholder="e.g. artificial intelligence")
            
            # Add forecast settings
            with gr.Accordion("Forecast Settings", open=False):
                forecast_months = gr.Slider(minimum=3, maximum=12, value=6, step=1, label="Forecast Months")
                growth_scenario = gr.Radio(["Conservative", "Moderate", "Aggressive"], value="Moderate", label="Growth Scenario")
            
            # Add loading indicator
            status_html = gr.HTML('<div style="color:gray;text-align:center;">Enter a keyword and click "Analyze DNA"</div>')
            
            analyze_btn = gr.Button("Analyze DNA", variant="primary")
            
            with gr.Row():
                example_btns = []
                for example in ["preprocessing", "breakdown", "artificial intelligence", "transformer model", "machine learning"]:
                    example_btns.append(gr.Button(example))
            
        with gr.Column():
            with gr.Tabs():
                with gr.Tab("Token Visualization"):
                    token_viz_html = gr.HTML()
                
                with gr.Tab("Full Analysis"):
                    analysis_html = gr.HTML()
                
                with gr.Tab("Evolution Chart"):
                    evolution_chart = gr.Plot(label="Keyword Evolution Forecast")
                
                with gr.Tab("Raw Data"):
                    json_output = gr.JSON()
    
    # Set up event handlers
    analyze_btn.click(
        lambda: '<div style="color:blue;text-align:center;">Loading models and analyzing... This may take a moment.</div>',
        outputs=status_html
    ).then(
        analyze_keyword,
        inputs=[input_text, forecast_months, growth_scenario],
        outputs=[token_viz_html, analysis_html, json_output, evolution_chart]
    ).then(
        lambda: '<div style="color:green;text-align:center;">Analysis complete!</div>',
        outputs=status_html
    )
    
    # Example buttons
    for btn in example_btns:
        # Define the function that will be called when an example button is clicked
        def set_example(btn_label):
            return btn_label
        
        btn.click(
            set_example,
            inputs=[btn],
            outputs=[input_text]
        ).then(
            lambda: '<div style="color:blue;text-align:center;">Loading models and analyzing... This may take a moment.</div>',
            outputs=status_html
        ).then(
            analyze_keyword,
            inputs=[input_text, forecast_months, growth_scenario],
            outputs=[token_viz_html, analysis_html, json_output, evolution_chart]
        ).then(
            lambda: '<div style="color:green;text-align:center;">Analysis complete!</div>',
            outputs=status_html
        )

# Launch the app
if __name__ == "__main__":
    demo.launch()