Spaces:

PromptMeister
/

Keyword-DNA-Analyzer

Running

App Files Files Community

PromptMeister commited on Mar 27

Commit

20172b4

verified ·

1 Parent(s): da51a21

Update app.py

Browse files

Files changed (1) hide show

app.py +536 -205

app.py CHANGED Viewed

@@ -7,6 +7,9 @@ import json
 import time
 import os
 from functools import partial
 # Global variables to store models
 tokenizer = None
@@ -14,11 +17,16 @@ ner_pipeline = None
 pos_pipeline = None
 intent_classifier = None
 semantic_model = None
 models_loaded = False
 def load_models(progress=gr.Progress()):
     """Lazy-load models only when needed"""
-    global tokenizer, ner_pipeline, pos_pipeline, intent_classifier, semantic_model, models_loaded
     if models_loaded:
         return True
@@ -32,17 +40,17 @@ def load_models(progress=gr.Progress()):
         progress(0.2, desc="Loading tokenizer...")
         tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
-        progress(0.4, desc="Loading NER model...")
         ner_pipeline = pipeline("ner", model="dslim/bert-base-NER")
-        progress(0.6, desc="Loading POS model...")
         # Use smaller POS model
         from transformers import AutoModelForTokenClassification, BertTokenizerFast
         pos_model = AutoModelForTokenClassification.from_pretrained("vblagoje/bert-english-uncased-finetuned-pos")
         pos_tokenizer = BertTokenizerFast.from_pretrained("vblagoje/bert-english-uncased-finetuned-pos")
         pos_pipeline = pipeline("token-classification", model=pos_model, tokenizer=pos_tokenizer)
-        progress(0.8, desc="Loading intent classifier...")
         # Use a smaller model for zero-shot classification
         intent_classifier = pipeline(
             "zero-shot-classification",
@@ -50,7 +58,18 @@ def load_models(progress=gr.Progress()):
             device=0 if torch.cuda.is_available() else -1   # Use GPU if available
         )
-        progress(0.9, desc="Loading semantic model...")
         try:
             from sentence_transformers import SentenceTransformer
             semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
@@ -66,6 +85,111 @@ def load_models(progress=gr.Progress()):
         print(f"Error loading models: {str(e)}")
         return f"Error: {str(e)}"
 def get_semantic_similarity(token, comparison_terms):
     """Calculate semantic similarity between a token and comparison terms"""
     try:
@@ -263,209 +387,156 @@ def create_evolution_chart(data, forecast_months=6, growth_scenario="Moderate"):
         fig.update_layout(title="Fallback Chart (Error occurred)")
         return fig
-def analyze_keyword(keyword, forecast_months=6, growth_scenario="Moderate", progress=gr.Progress()):
-    """Main function to analyze a keyword"""
-    if not keyword or not keyword.strip():
-        return (
-            "<div>Please enter a keyword to analyze</div>",
-            "<div>Please enter a keyword to analyze</div>",
-            None,
-            None
-        )
-    progress(0.1, desc="Starting analysis...")
-    # Load models if not already loaded
-    model_status = load_models(progress)
-    if isinstance(model_status, str) and model_status.startswith("Error"):
-        return (
-            f"<div style='color:red;'>{model_status}</div>",
-            f"<div style='color:red;'>{model_status}</div>",
-            None,
-            None
-        )
     try:
-        # Basic tokenization - just split on spaces for simplicity
-        words = keyword.strip().lower().split()
-        progress(0.2, desc="Analyzing tokens...")
-        # Get token types
-        token_analysis = analyze_token_types(words)
-        progress(0.3, desc="Running NER...")
-        # Get NER tags - handle potential errors
-        try:
-            ner_results = ner_pipeline(keyword)
-        except Exception as e:
-            print(f"NER error: {str(e)}")
-            ner_results = []
-        progress(0.4, desc="Running POS tagging...")
-        # Get POS tags - handle potential errors
-        try:
-            pos_results = pos_pipeline(keyword)
-        except Exception as e:
-            print(f"POS error: {str(e)}")
-            pos_results = []
-        # Process and organize results
-        full_token_analysis = []
-        for token in token_analysis:
-            # Find POS tag for this token
-            pos_tag = "NOUN"  # Default
-            for pos_result in pos_results:
-                if pos_result["word"].lower() == token["text"]:
-                    pos_tag = pos_result["entity"]
-                    break
-            # Find entity type if any
-            entity_type = None
-            for ner_result in ner_results:
-                if ner_result["word"].lower() == token["text"]:
-                    entity_type = ner_result["entity"]
-                    break
-            # Generate historical data
-            historical_data = simulate_historical_data(token["text"])
-            # Generate origin data
-            origin = generate_origin_data(token["text"])
-            # Calculate importance (simplified algorithm)
-            importance = 60 + (len(token["text"]) * 2)
-            importance = min(95, importance)
-            # Generate more meaningful related terms using semantic similarity
-            if semantic_model is not None:
-                try:
-                    # Generate some potential related terms
-                    prefix_related = [f"about {token['text']}", f"what is {token['text']}", f"how to {token['text']}"]
-                    synonym_candidates = ["similar", "equivalent", "comparable", "like", "related", "alternative"]
-                    domain_terms = ["software", "marketing", "business", "science", "education", "technology"]
-                    comparison_terms = prefix_related + synonym_candidates + domain_terms
-                    # Get similarities
-                    similarities = get_semantic_similarity(token['text'], comparison_terms)
-                    # Use top 3 most similar terms
-                    related_terms = [term for term, score in similarities[:3]]
-                except Exception as e:
-                    print(f"Error generating semantic related terms: {str(e)}")
-                    related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"]
-            else:
-                # Fallback if semantic model isn't loaded
-                related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"]
-            full_token_analysis.append({
-                "token": token["text"],
-                "type": token["type"],
-                "posTag": pos_tag,
-                "entityType": entity_type,
-                "importance": importance,
-                "historicalData": historical_data,
-                "origin": origin,
-                "relatedTerms": related_terms
-            })
-        progress(0.6, desc="Analyzing intent...")
-        # Intent analysis - handle potential errors
-        try:
-            intent_result = intent_classifier(
-                keyword,
-                candidate_labels=["informational", "navigational", "transactional"]
             )
-            intent_analysis = {
-                "type": intent_result["labels"][0].capitalize(),
-                "strength": round(intent_result["scores"][0] * 100),
-                "mutations": [
-                    f"{intent_result['labels'][0]}-variation-1",
-                    f"{intent_result['labels'][0]}-variation-2"
-                ]
-            }
-        except Exception as e:
-            print(f"Intent classification error: {str(e)}")
-            intent_analysis = {
-                "type": "Informational",  # Default fallback
-                "strength": 70,
-                "mutations": ["fallback-variation-1", "fallback-variation-2"]
-            }
-        # Evolution potential (simplified calculation)
-        evolution_potential = min(95, 65 + (len(keyword) % 30))
-        # Predicted trends (simplified)
-        trends = [
-            "Voice search adaptation",
-            "Visual search integration"
-        ]
-        # Generate more realistic and keyword-specific evolution data
-        base_volume = 1000 + (len(keyword) * 100)
-        # Adjust growth factor based on scenario
-        if growth_scenario == "Conservative":
-            growth_factor = 1.05 + (0.02 * (sum(ord(c) for c in keyword) % 5))
-        elif growth_scenario == "Aggressive":
-            growth_factor = 1.15 + (0.05 * (sum(ord(c) for c in keyword) % 5))
-        else:  # Moderate
-            growth_factor = 1.1 + (0.03 * (sum(ord(c) for c in keyword) % 5))
-        evolution_data = []
-        months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][:int(forecast_months)]
-        current_volume = base_volume
-        for month in months:
-            # Add some randomness to make it look more realistic
-            np.random.seed(sum(ord(c) for c in month + keyword))
-            random_factor = 0.9 + (0.2 * np.random.random())
-            current_volume *= growth_factor * random_factor
-            evolution_data.append({
-                "month": month,
-                "searchVolume": int(current_volume),
-                "competitionScore": min(95, 45 + (months.index(month) * 3) + (sum(ord(c) for c in keyword) % 10)),
-                "intentClarity": min(95, 80 + (months.index(month) * 2) + (sum(ord(c) for c in keyword) % 5))
-            })
-        progress(0.8, desc="Creating visualizations...")
-        # Create interactive evolution chart
-        evolution_chart = create_evolution_chart(evolution_data, forecast_months, growth_scenario)
-        # Generate HTML for token visualization
-        token_viz_html = generate_token_visualization_html(token_analysis, full_token_analysis)
-        # Generate HTML for full analysis
-        analysis_html = generate_full_analysis_html(
-            keyword,
-            full_token_analysis,
-            intent_analysis,
-            evolution_potential,
-            trends
         )
-        # Generate JSON results
-        json_results = {
-            "keyword": keyword,
-            "tokenAnalysis": full_token_analysis,
-            "intentAnalysis": intent_analysis,
-            "evolutionPotential": evolution_potential,
-            "predictedTrends": trends,
-            "forecast": {
-                "months": forecast_months,
-                "scenario": growth_scenario,
-                "data": evolution_data
-            }
-        }
-        progress(1.0, desc="Analysis complete!")
-        return token_viz_html, analysis_html, json_results, evolution_chart
-    except Exception as e:
-        error_message = f"<div style='color:red;padding:20px;'>Error analyzing keyword: {str(e)}</div>"
-        print(f"Error in analyze_keyword: {str(e)}")
-        return error_message, error_message, None, None
 def generate_token_visualization_html(token_analysis, full_analysis):
     """Generate HTML for token visualization"""
@@ -641,7 +712,7 @@ def generate_full_analysis_html(keyword, token_analysis, intent_analysis, evolut
                     <div style="font-size: 12px; margin-bottom: 8px;">
                         <span style="font-weight: 500;">Origin: </span>
                         <span>{token['origin']['era']}, </span>
-<span style="font-style: italic;">{token['origin']['language']}</span>
                     </div>
                     <div style="font-size: 12px; margin-bottom: 12px;">{token['origin']['note']}</div>
@@ -673,19 +744,266 @@ def generate_full_analysis_html(keyword, token_analysis, intent_analysis, evolut
     return html
 # Create the Gradio interface
 with gr.Blocks(css="footer {visibility: hidden}") as demo:
     gr.Markdown("# Keyword DNA Analyzer")
     gr.Markdown("Analyze the linguistic DNA of your keywords to understand their structure, intent, and potential.")
     with gr.Row():
-        with gr.Column():
-            input_text = gr.Textbox(label="Enter keyword to analyze", placeholder="e.g. artificial intelligence")
-            # Add forecast settings
-            with gr.Accordion("Forecast Settings", open=False):
-                forecast_months = gr.Slider(minimum=3, maximum=12, value=6, step=1, label="Forecast Months")
-                growth_scenario = gr.Radio(["Conservative", "Moderate", "Aggressive"], value="Moderate", label="Growth Scenario")
             # Add loading indicator
             status_html = gr.HTML('<div style="color:gray;text-align:center;">Enter a keyword and click "Analyze DNA"</div>')
@@ -697,7 +1015,7 @@ with gr.Blocks(css="footer {visibility: hidden}") as demo:
                 for example in ["preprocessing", "breakdown", "artificial intelligence", "transformer model", "machine learning"]:
                     example_btns.append(gr.Button(example))
-        with gr.Column():
             with gr.Tabs():
                 with gr.Tab("Token Visualization"):
                     token_viz_html = gr.HTML()
@@ -708,17 +1026,30 @@ with gr.Blocks(css="footer {visibility: hidden}") as demo:
                 with gr.Tab("Evolution Chart"):
                     evolution_chart = gr.Plot(label="Keyword Evolution Forecast")
                 with gr.Tab("Raw Data"):
                     json_output = gr.JSON()
     # Set up event handlers
     analyze_btn.click(
         lambda: '<div style="color:blue;text-align:center;">Loading models and analyzing... This may take a moment.</div>',
         outputs=status_html
     ).then(
         analyze_keyword,
-        inputs=[input_text, forecast_months, growth_scenario],
-        outputs=[token_viz_html, analysis_html, json_output, evolution_chart]
     ).then(
         lambda: '<div style="color:green;text-align:center;">Analysis complete!</div>',
         outputs=status_html
@@ -739,8 +1070,8 @@ with gr.Blocks(css="footer {visibility: hidden}") as demo:
             outputs=status_html
         ).then(
             analyze_keyword,
-            inputs=[input_text, forecast_months, growth_scenario],
-            outputs=[token_viz_html, analysis_html, json_output, evolution_chart]
         ).then(
             lambda: '<div style="color:green;text-align:center;">Analysis complete!</div>',
             outputs=status_html

 import time
 import os
 from functools import partial
+import datetime
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
 # Global variables to store models
 tokenizer = None
 pos_pipeline = None
 intent_classifier = None
 semantic_model = None
+stt_model = None  # Speech-to-text model
 models_loaded = False
+# Database to store keyword ranking history (in-memory database for this example)
+# In a real app, you would use a proper database
+ranking_history = {}
 def load_models(progress=gr.Progress()):
     """Lazy-load models only when needed"""
+    global tokenizer, ner_pipeline, pos_pipeline, intent_classifier, semantic_model, stt_model, models_loaded
     if models_loaded:
         return True
         progress(0.2, desc="Loading tokenizer...")
         tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
+        progress(0.3, desc="Loading NER model...")
         ner_pipeline = pipeline("ner", model="dslim/bert-base-NER")
+        progress(0.4, desc="Loading POS model...")
         # Use smaller POS model
         from transformers import AutoModelForTokenClassification, BertTokenizerFast
         pos_model = AutoModelForTokenClassification.from_pretrained("vblagoje/bert-english-uncased-finetuned-pos")
         pos_tokenizer = BertTokenizerFast.from_pretrained("vblagoje/bert-english-uncased-finetuned-pos")
         pos_pipeline = pipeline("token-classification", model=pos_model, tokenizer=pos_tokenizer)
+        progress(0.6, desc="Loading intent classifier...")
         # Use a smaller model for zero-shot classification
         intent_classifier = pipeline(
             "zero-shot-classification",
             device=0 if torch.cuda.is_available() else -1   # Use GPU if available
         )
+        progress(0.7, desc="Loading speech-to-text model...")
+        try:
+            # Load automatic speech recognition model
+            from transformers import WhisperProcessor, WhisperForConditionalGeneration
+            processor = WhisperProcessor.from_pretrained("openai/whisper-small.en")
+            stt_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small.en")
+            stt_model = (processor, stt_model)
+        except Exception as e:
+            print(f"Warning: Could not load speech-to-text model: {str(e)}")
+            stt_model = None  # Set to None so we can check if it's available
+        progress(0.8, desc="Loading semantic model...")
         try:
             from sentence_transformers import SentenceTransformer
             semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
         print(f"Error loading models: {str(e)}")
         return f"Error: {str(e)}"
+def speech_to_text(audio_path):
+    """Convert speech to text using the loaded speech-to-text model"""
+    if stt_model is None:
+        return "Speech-to-text model not loaded. Please try text input instead."
+    try:
+        import librosa
+        import numpy as np
+        # Load audio file
+        audio, sr = librosa.load(audio_path, sr=16000)
+        # Process audio with Whisper
+        processor, model = stt_model
+        input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features
+        # Generate token ids
+        predicted_ids = model.generate(input_features)
+        # Decode token ids to text
+        transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
+        return transcription
+    except Exception as e:
+        print(f"Error in speech_to_text: {str(e)}")
+        return f"Error processing speech: {str(e)}"
+def handle_voice_input(audio):
+    """Handle voice input and convert to text"""
+    if audio is None:
+        return "No audio detected. Please try again."
+    try:
+        # Convert speech to text
+        text = speech_to_text(audio)
+        return text
+    except Exception as e:
+        print(f"Error in handle_voice_input: {str(e)}")
+        return f"Error: {str(e)}"
+def simulate_google_serp(keyword, num_results=10):
+    """Simulate Google SERP results for a keyword"""
+    try:
+        # In a real implementation, this would call the Google API
+        # For now, we'll generate fake SERP data
+        # Deterministic seed for consistent results by keyword
+        np.random.seed(sum(ord(c) for c in keyword))
+        serp_results = []
+        domains = [
+            "example.com", "wikipedia.org", "medium.com", "github.com",
+            "stackoverflow.com", "amazon.com", "youtube.com", "reddit.com",
+            "linkedin.com", "twitter.com", "facebook.com", "instagram.com"
+        ]
+        for i in range(1, num_results + 1):
+            domain = domains[i % len(domains)]
+            title = f"{keyword.title()} - {domain.split('.')[0].title()} Resource #{i}"
+            snippet = f"This is a simulated SERP result for '{keyword}'. Result #{i} would provide relevant information about this topic."
+            url = f"https://www.{domain}/{keyword.replace(' ', '-')}-resource-{i}"
+            position = i
+            ctr = round(0.3 * (0.85 ** (i - 1)), 4)  # Simulate click-through rate decay
+            serp_results.append({
+                "position": position,
+                "title": title,
+                "url": url,
+                "domain": domain,
+                "snippet": snippet,
+                "ctr_estimate": ctr,
+                "impressions_estimate": np.random.randint(1000, 10000)
+            })
+        return serp_results
+    except Exception as e:
+        print(f"Error in simulate_google_serp: {str(e)}")
+        return []
+def update_ranking_history(keyword, serp_results):
+    """Update the ranking history for a keyword"""
+    try:
+        # Get current timestamp
+        timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        # Initialize if keyword not in history
+        if keyword not in ranking_history:
+            ranking_history[keyword] = []
+        # Add new entry
+        ranking_history[keyword].append({
+            "timestamp": timestamp,
+            "results": serp_results[:5]  # Store top 5 results for history
+        })
+        # Keep only last 10 entries for each keyword
+        if len(ranking_history[keyword]) > 10:
+            ranking_history[keyword] = ranking_history[keyword][-10:]
+        return True
+    except Exception as e:
+        print(f"Error in update_ranking_history: {str(e)}")
+        return False
 def get_semantic_similarity(token, comparison_terms):
     """Calculate semantic similarity between a token and comparison terms"""
     try:
         fig.update_layout(title="Fallback Chart (Error occurred)")
         return fig
+def create_ranking_history_chart(keyword_history):
+    """Create a chart showing keyword ranking history over time"""
     try:
+        if not keyword_history or len(keyword_history) < 2:
+            # Not enough data for a meaningful chart
+            fig = go.Figure()
+            fig.update_layout(
+                title="Insufficient Ranking Data",
+                annotations=[{
+                    "text": "Need at least 2 data points for ranking history",
+                    "showarrow": False,
+                    "font": {"size": 16},
+                    "xref": "paper",
+                    "yref": "paper",
+                    "x": 0.5,
+                    "y": 0.5
+                }]
             )
+            return fig
+        # Create a figure
+        fig = go.Figure()
+        # Extract timestamps and convert to datetime objects
+        timestamps = [entry["timestamp"] for entry in keyword_history]
+        dates = [datetime.datetime.strptime(ts, "%Y-%m-%d %H:%M:%S") for ts in timestamps]
+        # Get unique domains from all results
+        all_domains = set()
+        for entry in keyword_history:
+            for result in entry["results"]:
+                all_domains.add(result["domain"])
+        # Colors for different domains
+        domain_colors = {}
+        color_palette = [
+            "#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
+            "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"
+        ]
+        for i, domain in enumerate(all_domains):
+            domain_colors[domain] = color_palette[i % len(color_palette)]
+        # Track domains and their positions over time
+        domain_tracking = {domain: {"x": [], "y": [], "text": []} for domain in all_domains}
+        for i, entry in enumerate(keyword_history):
+            for result in entry["results"]:
+                domain = result["domain"]
+                position = result["position"]
+                title = result["title"]
+                domain_tracking[domain]["x"].append(dates[i])
+                domain_tracking[domain]["y"].append(position)
+                domain_tracking[domain]["text"].append(title)
+        # Add traces for each domain
+        for domain, data in domain_tracking.items():
+            if len(data["x"]) > 0:  # Only add domains that have data
+                fig.add_trace(
+                    go.Scatter(
+                        x=data["x"],
+                        y=data["y"],
+                        mode="lines+markers",
+                        name=domain,
+                        line=dict(color=domain_colors[domain]),
+                        hovertemplate="%{text}<br>Position: %{y}<br>Date: %{x}<extra></extra>",
+                        text=data["text"],
+                        marker=dict(size=8)
+                    )
+                )
+        # Update layout
+        fig.update_layout(
+            title="Keyword Ranking History",
+            xaxis_title="Date",
+            yaxis_title="Position",
+            yaxis=dict(autorange="reversed"),  # Invert y-axis so position 1 is on top
+            hovermode="closest",
+            height=500
+        )
+        return fig
+    except Exception as e:
+        print(f"Error in create_ranking_history_chart: {str(e)}")
+        # Return fallback chart
+        fig = go.Figure()
+        fig.update_layout(
+            title="Error Creating Ranking Chart",
+            annotations=[{
+                "text": f"Error: {str(e)}",
+                "showarrow": False,
+                "font": {"size": 14},
+                "xref": "paper",
+                "yref": "paper",
+                "x": 0.5,
+                "y": 0.5
+            }]
         )
+        return fig
+def generate_serp_html(keyword, serp_results):
+    """Generate HTML for SERP results"""
+    if not serp_results:
+        return "<div>No SERP results available</div>"
+    html = f"""
+    <div style="font-family: Arial, sans-serif; padding: 20px; border: 1px solid #ddd; border-radius: 8px;">
+        <h2 style="margin-top: 0;">SERP Results for "{keyword}"</h2>
+        <div style="background-color: #f5f5f5; padding: 10px; border-radius: 4px; margin-bottom: 20px;">
+            <div style="color: #666; font-size: 12px;">This is a simulated SERP. In a real application, this would use the Google API.</div>
+        </div>
+        <div class="serp-results" style="display: flex; flex-direction: column; gap: 16px;">
+    """
+    for result in serp_results:
+        position = result["position"]
+        title = result["title"]
+        url = result["url"]
+        snippet = result["snippet"]
+        domain = result["domain"]
+        ctr = result["ctr_estimate"]
+        impressions = result["impressions_estimate"]
+        html += f"""
+        <div class="serp-result" style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px; position: relative;">
+            <div style="position: absolute; top: -10px; left: -10px; background-color: #4299e1; color: white; width: 24px; height: 24px; border-radius: 50%; display: flex; align-items: center; justify-content: center; font-size: 12px;">
+                {position}
+            </div>
+            <div style="margin-bottom: 5px;">
+                <a href="#" style="font-size: 18px; color: #1a73e8; text-decoration: none; font-weight: 500;">{title}</a>
+            </div>
+            <div style="margin-bottom: 8px; color: #006621; font-size: 14px;">{url}</div>
+            <div style="color: #4d5156; font-size: 14px;">{snippet}</div>
+            <div style="display: flex; margin-top: 10px; font-size: 12px; color: #666;">
+                <div style="margin-right: 15px;"><span style="font-weight: 500;">CTR:</span> {ctr:.2%}</div>
+                <div><span style="font-weight: 500;">Est. Impressions:</span> {impressions:,}</div>
+            </div>
+        </div>
+        """
+    html += """
+        </div>
+    </div>
+    """
+    return html
 def generate_token_visualization_html(token_analysis, full_analysis):
     """Generate HTML for token visualization"""
                     <div style="font-size: 12px; margin-bottom: 8px;">
                         <span style="font-weight: 500;">Origin: </span>
                         <span>{token['origin']['era']}, </span>
+                        <span style="font-style: italic;">{token['origin']['language']}</span>
                     </div>
                     <div style="font-size: 12px; margin-bottom: 12px;">{token['origin']['note']}</div>
     return html
+def analyze_keyword(keyword, forecast_months=6, growth_scenario="Moderate", get_serp=False, progress=gr.Progress()):
+    """Main function to analyze a keyword"""
+    if not keyword or not keyword.strip():
+        return (
+            "<div>Please enter a keyword to analyze</div>",
+            "<div>Please enter a keyword to analyze</div>",
+            None,
+            None,
+            None,
+            None,
+            None
+        )
+    progress(0.1, desc="Starting analysis...")
+    # Load models if not already loaded
+    model_status = load_models(progress)
+    if isinstance(model_status, str) and model_status.startswith("Error"):
+        return (
+            f"<div style='color:red;'>{model_status}</div>",
+            f"<div style='color:red;'>{model_status}</div>",
+            None,
+            None,
+            None,
+            None,
+            None
+        )
+    try:
+        # Basic tokenization - just split on spaces for simplicity
+        words = keyword.strip().lower().split()
+        progress(0.2, desc="Analyzing tokens...")
+        # Get token types
+        token_analysis = analyze_token_types(words)
+        progress(0.3, desc="Running NER...")
+        # Get NER tags - handle potential errors
+        try:
+            ner_results = ner_pipeline(keyword)
+        except Exception as e:
+            print(f"NER error: {str(e)}")
+            ner_results = []
+        progress(0.4, desc="Running POS tagging...")
+        # Get POS tags - handle potential errors
+        try:
+            pos_results = pos_pipeline(keyword)
+        except Exception as e:
+            print(f"POS error: {str(e)}")
+            pos_results = []
+        # Process and organize results
+        full_token_analysis = []
+        for token in token_analysis:
+            # Find POS tag for this token
+            pos_tag = "NOUN"  # Default
+            for pos_result in pos_results:
+                if pos_result["word"].lower() == token["text"]:
+                    pos_tag = pos_result["entity"]
+                    break
+            # Find entity type if any
+            entity_type = None
+            for ner_result in ner_results:
+                if ner_result["word"].lower() == token["text"]:
+                    entity_type = ner_result["entity"]
+                    break
+            # Generate historical data
+            historical_data = simulate_historical_data(token["text"])
+            # Generate origin data
+            origin = generate_origin_data(token["text"])
+            # Calculate importance (simplified algorithm)
+            importance = 60 + (len(token["text"]) * 2)
+            importance = min(95, importance)
+            # Generate more meaningful related terms using semantic similarity
+            if semantic_model is not None:
+                try:
+                    # Generate some potential related terms
+                    prefix_related = [f"about {token['text']}", f"what is {token['text']}", f"how to {token['text']}"]
+                    synonym_candidates = ["similar", "equivalent", "comparable", "like", "related", "alternative"]
+                    domain_terms = ["software", "marketing", "business", "science", "education", "technology"]
+                    comparison_terms = prefix_related + synonym_candidates + domain_terms
+                    # Get similarities
+                    similarities = get_semantic_similarity(token['text'], comparison_terms)
+                    # Use top 3 most similar terms
+                    related_terms = [term for term, score in similarities[:3]]
+                except Exception as e:
+                    print(f"Error generating semantic related terms: {str(e)}")
+                    related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"]
+            else:
+                # Fallback if semantic model isn't loaded
+                related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"]
+            full_token_analysis.append({
+                "token": token["text"],
+                "type": token["type"],
+                "posTag": pos_tag,
+                "entityType": entity_type,
+                "importance": importance,
+                "historicalData": historical_data,
+                "origin": origin,
+                "relatedTerms": related_terms
+            })
+        progress(0.5, desc="Analyzing intent...")
+        # Intent analysis - handle potential errors
+        try:
+            intent_result = intent_classifier(
+                keyword,
+                candidate_labels=["informational", "navigational", "transactional"]
+            )
+            intent_analysis = {
+                "type": intent_result["labels"][0].capitalize(),
+                "strength": round(intent_result["scores"][0] * 100),
+                "mutations": [
+                    f"{intent_result['labels'][0]}-variation-1",
+                    f"{intent_result['labels'][0]}-variation-2"
+                ]
+            }
+        except Exception as e:
+            print(f"Intent classification error: {str(e)}")
+            intent_analysis = {
+                "type": "Informational",  # Default fallback
+                "strength": 70,
+                "mutations": ["fallback-variation-1", "fallback-variation-2"]
+            }
+        # Evolution potential (simplified calculation)
+        evolution_potential = min(95, 65 + (len(keyword) % 30))
+        # Predicted trends (simplified)
+        trends = [
+            "Voice search adaptation",
+            "Visual search integration"
+        ]
+        # Generate more realistic and keyword-specific evolution data
+        base_volume = 1000 + (len(keyword) * 100)
+        # Adjust growth factor based on scenario
+        if growth_scenario == "Conservative":
+            growth_factor = 1.05 + (0.02 * (sum(ord(c) for c in keyword) % 5))
+        elif growth_scenario == "Aggressive":
+            growth_factor = 1.15 + (0.05 * (sum(ord(c) for c in keyword) % 5))
+        else:  # Moderate
+            growth_factor = 1.1 + (0.03 * (sum(ord(c) for c in keyword) % 5))
+        evolution_data = []
+        months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][:int(forecast_months)]
+        current_volume = base_volume
+        for month in months:
+            # Add some randomness to make it look more realistic
+            np.random.seed(sum(ord(c) for c in month + keyword))
+            random_factor = 0.9 + (0.2 * np.random.random())
+            current_volume *= growth_factor * random_factor
+            evolution_data.append({
+                "month": month,
+                "searchVolume": int(current_volume),
+                "competitionScore": min(95, 45 + (months.index(month) * 3) + (sum(ord(c) for c in keyword) % 10)),
+                "intentClarity": min(95, 80 + (months.index(month) * 2) + (sum(ord(c) for c in keyword) % 5))
+            })
+        progress(0.6, desc="Creating visualizations...")
+        # Create interactive evolution chart
+        evolution_chart = create_evolution_chart(evolution_data, forecast_months, growth_scenario)
+        # SERP results and ranking history (new feature)
+        serp_results = None
+        ranking_chart = None
+        serp_html = None
+        if get_serp:
+            progress(0.7, desc="Fetching SERP data...")
+            # Get SERP results
+            serp_results = simulate_google_serp(keyword)
+            # Update ranking history
+            update_ranking_history(keyword, serp_results)
+            progress(0.8, desc="Creating ranking charts...")
+            # Create ranking history chart
+            if keyword in ranking_history and len(ranking_history[keyword]) > 0:
+                ranking_chart = create_ranking_history_chart(ranking_history[keyword])
+            # Generate SERP HTML
+            serp_html = generate_serp_html(keyword, serp_results)
+        # Generate HTML for token visualization
+        token_viz_html = generate_token_visualization_html(token_analysis, full_token_analysis)
+        # Generate HTML for full analysis
+        analysis_html = generate_full_analysis_html(
+            keyword,
+            full_token_analysis,
+            intent_analysis,
+            evolution_potential,
+            trends
+        )
+        # Generate JSON results
+        json_results = {
+            "keyword": keyword,
+            "tokenAnalysis": full_token_analysis,
+            "intentAnalysis": intent_analysis,
+            "evolutionPotential": evolution_potential,
+            "predictedTrends": trends,
+            "forecast": {
+                "months": forecast_months,
+                "scenario": growth_scenario,
+                "data": evolution_data
+            },
+            "serpResults": serp_results
+        }
+        progress(1.0, desc="Analysis complete!")
+        return token_viz_html, analysis_html, json_results, evolution_chart, serp_html, ranking_chart, keyword
+    except Exception as e:
+        error_message = f"<div style='color:red;padding:20px;'>Error analyzing keyword: {str(e)}</div>"
+        print(f"Error in analyze_keyword: {str(e)}")
+        return error_message, error_message, None, None, None, None, None
 # Create the Gradio interface
 with gr.Blocks(css="footer {visibility: hidden}") as demo:
     gr.Markdown("# Keyword DNA Analyzer")
     gr.Markdown("Analyze the linguistic DNA of your keywords to understand their structure, intent, and potential.")
     with gr.Row():
+        with gr.Column(scale=1):
+            # Add voice search capabilities
+            with gr.Group():
+                gr.Markdown("### Enter Keyword")
+                with gr.Row():
+                    input_text = gr.Textbox(label="Enter keyword to analyze", placeholder="e.g. artificial intelligence")
+                with gr.Row():
+                    audio_input = gr.Audio(source="microphone", type="filepath", label="Or use voice search")
+                    voice_submit_btn = gr.Button("Convert Voice to Text", variant="secondary")
+            # Add SERP settings
+            with gr.Accordion("Analysis Settings", open=False):
+                with gr.Row():
+                    forecast_months = gr.Slider(minimum=3, maximum=12, value=6, step=1, label="Forecast Months")
+                    include_serp = gr.Checkbox(label="Include SERP Analysis", value=True)
+                growth_scenario = gr.Radio(
+                    ["Conservative", "Moderate", "Aggressive"],
+                    value="Moderate",
+                    label="Growth Scenario"
+                )
             # Add loading indicator
             status_html = gr.HTML('<div style="color:gray;text-align:center;">Enter a keyword and click "Analyze DNA"</div>')
                 for example in ["preprocessing", "breakdown", "artificial intelligence", "transformer model", "machine learning"]:
                     example_btns.append(gr.Button(example))
+        with gr.Column(scale=2):
             with gr.Tabs():
                 with gr.Tab("Token Visualization"):
                     token_viz_html = gr.HTML()
                 with gr.Tab("Evolution Chart"):
                     evolution_chart = gr.Plot(label="Keyword Evolution Forecast")
+                with gr.Tab("SERP Results"):
+                    serp_html = gr.HTML()
+                with gr.Tab("Ranking History"):
+                    ranking_chart = gr.Plot(label="Keyword Ranking History")
                 with gr.Tab("Raw Data"):
                     json_output = gr.JSON()
+    # Voice to text conversion handler
+    voice_submit_btn.click(
+        handle_voice_input,
+        inputs=[audio_input],
+        outputs=[input_text]
+    )
     # Set up event handlers
     analyze_btn.click(
         lambda: '<div style="color:blue;text-align:center;">Loading models and analyzing... This may take a moment.</div>',
         outputs=status_html
     ).then(
         analyze_keyword,
+        inputs=[input_text, forecast_months, growth_scenario, include_serp],
+        outputs=[token_viz_html, analysis_html, json_output, evolution_chart, serp_html, ranking_chart, input_text]
     ).then(
         lambda: '<div style="color:green;text-align:center;">Analysis complete!</div>',
         outputs=status_html
             outputs=status_html
         ).then(
             analyze_keyword,
+            inputs=[input_text, forecast_months, growth_scenario, include_serp],
+            outputs=[token_viz_html, analysis_html, json_output, evolution_chart, serp_html, ranking_chart, input_text]
         ).then(
             lambda: '<div style="color:green;text-align:center;">Analysis complete!</div>',
             outputs=status_html