Spaces:

PromptMeister
/

Keyword-DNA-Analyzer

Running

App Files Files Community

PromptMeister commited on Mar 19

Commit

b1935a9

verified ·

1 Parent(s): 33523ce

Update app.py

Browse files

Files changed (1) hide show

app.py +510 -4

app.py CHANGED Viewed

@@ -1,7 +1,513 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

 import gradio as gr
+import numpy as np
+import pandas as pd
+import torch
+from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
+from transformers import BertTokenizerFast
+import matplotlib.pyplot as plt
+import json
+# Initialize models
+tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
+ner_pipeline = pipeline("ner", model="dslim/bert-base-NER")
+pos_model = AutoModelForTokenClassification.from_pretrained("vblagoje/bert-english-uncased-finetuned-pos")
+pos_tokenizer = BertTokenizerFast.from_pretrained("vblagoje/bert-english-uncased-finetuned-pos")
+pos_pipeline = pipeline("token-classification", model=pos_model, tokenizer=pos_tokenizer)
+# Intent classification - using zero-shot classification
+intent_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
+def get_token_colors(token_type):
+    colors = {
+        "prefix": "#D8BFD8",  # Light purple
+        "suffix": "#AEDAA4",  # Light green
+        "stem": "#A4C2F4",    # Light blue
+        "compound_first": "#FFCC80",  # Light orange
+        "compound_second": "#FFCC80", # Light orange
+        "word": "#E5E5E5"     # Light gray
+    }
+    return colors.get(token_type, "#E5E5E5")
+def simulate_historical_data(token):
+    """Generate simulated historical usage data for a token"""
+    eras = ["1900s", "1950s", "1980s", "2000s", "2010s", "Present"]
+    # Different patterns based on token characteristics
+    if len(token) > 8:
+        # Possibly a technical term - recent growth
+        values = [10, 20, 30, 60, 85, 95]
+    elif token.startswith(("un", "re", "de", "pre")):
+        # Prefix words tend to be older
+        values = [45, 50, 60, 70, 75, 80]
+    else:
+        # Standard pattern for common words
+        base = 50 + (hash(token) % 30)
+        noise = np.random.normal(0, 5, 6)
+        values = [max(5, min(95, base + i*5 + n)) for i, n in enumerate(noise)]
+    return list(zip(eras, values))
+def generate_origin_data(token):
+    """Generate simulated origin/etymology data for a token"""
+    origins = [
+        {"era": "Ancient", "language": "Latin"},
+        {"era": "Ancient", "language": "Greek"},
+        {"era": "Medieval", "language": "Old English"},
+        {"era": "16th century", "language": "French"},
+        {"era": "18th century", "language": "Germanic"},
+        {"era": "19th century", "language": "Anglo-Saxon"},
+        {"era": "20th century", "language": "Modern English"}
+    ]
+    # Deterministic selection based on the token
+    index = hash(token) % len(origins)
+    origin = origins[index]
+    note = f"First appeared in {origin['era']} texts derived from {origin['language']}."
+    origin["note"] = note
+    return origin
+def analyze_token_types(tokens):
+    """Identify token types (prefix, suffix, compound, etc.)"""
+    processed_tokens = []
+    prefixes = ["un", "re", "de", "pre", "post", "anti", "pro", "inter", "sub", "super"]
+    suffixes = ["ing", "ed", "ly", "ment", "tion", "able", "ible", "ness", "ful", "less"]
+    for token in tokens:
+        token_text = token.lower()
+        token_type = "word"
+        # Check for prefixes
+        for prefix in prefixes:
+            if token_text.startswith(prefix) and len(token_text) > len(prefix) + 2:
+                if token_text != prefix:  # Make sure the word isn't just the prefix
+                    token_type = "prefix"
+                    break
+        # Check for suffixes
+        if token_type == "word":
+            for suffix in suffixes:
+                if token_text.endswith(suffix) and len(token_text) > len(suffix) + 2:
+                    token_type = "suffix"
+                    break
+        # Check for compound words (simplified)
+        if token_type == "word" and len(token_text) > 8:
+            token_type = "compound_first"  # Simplified - in reality would need more analysis
+        processed_tokens.append({
+            "text": token_text,
+            "type": token_type
+        })
+    return processed_tokens
+def plot_historical_data(historical_data):
+    """Create a plot of historical usage data"""
+    eras = [item[0] for item in historical_data]
+    values = [item[1] for item in historical_data]
+    plt.figure(figsize=(8, 3))
+    plt.bar(eras, values, color='skyblue')
+    plt.title('Historical Usage')
+    plt.xlabel('Era')
+    plt.ylabel('Usage Level')
+    plt.ylim(0, 100)
+    plt.xticks(rotation=45)
+    plt.tight_layout()
+    return plt
+def analyze_keyword(keyword):
+    if not keyword.strip():
+        return None, None, None, None, None
+    # Basic tokenization
+    words = keyword.strip().lower().split()
+    # Get token types
+    token_analysis = analyze_token_types(words)
+    # Get NER tags
+    ner_results = ner_pipeline(keyword)
+    # Get POS tags
+    pos_results = pos_pipeline(keyword)
+    # Process and organize results
+    full_token_analysis = []
+    for token in token_analysis:
+        # Find POS tag for this token
+        pos_tag = "NOUN"  # Default
+        for pos_result in pos_results:
+            if pos_result["word"].lower() == token["text"]:
+                pos_tag = pos_result["entity"]
+                break
+        # Find entity type if any
+        entity_type = None
+        for ner_result in ner_results:
+            if ner_result["word"].lower() == token["text"]:
+                entity_type = ner_result["entity"]
+                break
+        # Generate historical data
+        historical_data = simulate_historical_data(token["text"])
+        # Generate origin data
+        origin = generate_origin_data(token["text"])
+        # Calculate importance (simplified algorithm)
+        importance = 60 + (len(token["text"]) * 2)
+        importance = min(95, importance)
+        # Generate related terms (simplified)
+        related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"]
+        full_token_analysis.append({
+            "token": token["text"],
+            "type": token["type"],
+            "posTag": pos_tag,
+            "entityType": entity_type,
+            "importance": importance,
+            "historicalData": historical_data,
+            "origin": origin,
+            "relatedTerms": related_terms
+        })
+    # Intent analysis
+    intent_result = intent_classifier(
+        keyword,
+        candidate_labels=["informational", "navigational", "transactional"]
+    )
+    intent_analysis = {
+        "type": intent_result["labels"][0].capitalize(),
+        "strength": round(intent_result["scores"][0] * 100),
+        "mutations": [
+            f"{intent_result['labels'][0]}-variation-1",
+            f"{intent_result['labels'][0]}-variation-2"
+        ]
+    }
+    # Evolution potential (simplified calculation)
+    evolution_potential = min(95, 65 + (len(keyword) % 30))
+    # Predicted trends (simplified)
+    trends = [
+        "Voice search adaptation",
+        "Visual search integration"
+    ]
+    # Evolution chart data (simulated)
+    evolution_data = [
+        {"month": "Jan", "searchVolume": 1000, "competitionScore": 45, "intentClarity": 80},
+        {"month": "Feb", "searchVolume": 1200, "competitionScore": 48, "intentClarity": 82},
+        {"month": "Mar", "searchVolume": 1100, "competitionScore": 52, "intentClarity": 85},
+        {"month": "Apr", "searchVolume": 1400, "competitionScore": 55, "intentClarity": 88},
+        {"month": "May", "searchVolume": 1800, "competitionScore": 58, "intentClarity": 90},
+        {"month": "Jun", "searchVolume": 2200, "competitionScore": 60, "intentClarity": 92}
+    ]
+    # Create plots
+    evolution_chart = create_evolution_chart(evolution_data)
+    # Generate HTML for token visualization
+    token_viz_html = generate_token_visualization_html(token_analysis, full_token_analysis)
+    # Generate HTML for full analysis
+    analysis_html = generate_full_analysis_html(
+        keyword,
+        full_token_analysis,
+        intent_analysis,
+        evolution_potential,
+        trends
+    )
+    # Generate JSON results
+    json_results = {
+        "keyword": keyword,
+        "tokenAnalysis": full_token_analysis,
+        "intentAnalysis": intent_analysis,
+        "evolutionPotential": evolution_potential,
+        "predictedTrends": trends
+    }
+    return token_viz_html, analysis_html, json_results, evolution_chart, full_token_analysis
+def create_evolution_chart(data):
+    """Create an evolution chart from data"""
+    df = pd.DataFrame(data)
+    plt.figure(figsize=(10, 5))
+    plt.plot(df['month'], df['searchVolume'], marker='o', label='Search Volume')
+    plt.plot(df['month'], df['competitionScore']*20, marker='s', label='Competition Score')
+    plt.plot(df['month'], df['intentClarity']*20, marker='^', label='Intent Clarity')
+    plt.title('Predicted Evolution')
+    plt.xlabel('Month')
+    plt.ylabel('Value')
+    plt.legend()
+    plt.grid(True, linestyle='--', alpha=0.7)
+    plt.tight_layout()
+    return plt
+def generate_token_visualization_html(token_analysis, full_analysis):
+    """Generate HTML for token visualization"""
+    html = """
+    <div style="font-family: Arial, sans-serif; padding: 20px; border: 1px solid #ddd; border-radius: 8px;">
+        <h2 style="margin-top: 0;">Token Visualization</h2>
+        <div style="margin-bottom: 20px; padding: 15px; background-color: #f8f9fa; border-radius: 6px;">
+            <div style="margin-bottom: 8px; font-weight: bold; color: #4a5568;">Human View:</div>
+            <div style="display: flex; flex-wrap: wrap; gap: 8px;">
+    """
+    # Add human view tokens
+    for token in token_analysis:
+        html += f"""
+        <div style="padding: 6px 12px; background-color: white; border: 1px solid #cbd5e0; border-radius: 4px;">
+            {token['text']}
+        </div>
+        """
+    html += """
+            </div>
+        </div>
+        <div style="text-align: center; margin: 15px 0;">
+            <span style="font-size: 20px;">↓</span>
+        </div>
+        <div style="padding: 15px; background-color: #f0fff4; border-radius: 6px;">
+            <div style="margin-bottom: 8px; font-weight: bold; color: #2f855a;">Machine View:</div>
+            <div style="display: flex; flex-wrap: wrap; gap: 8px;">
+    """
+    # Add machine view tokens
+    for token in full_analysis:
+        bg_color = get_token_colors(token["type"])
+        html += f"""
+        <div style="padding: 6px 12px; background-color: {bg_color}; border: 1px solid #a0aec0; border-radius: 4px; font-family: monospace;">
+            {token['token']}
+            <span style="font-size: 10px; opacity: 0.7; display: block;">{token['type']}</span>
+        </div>
+        """
+    html += """
+            </div>
+        </div>
+        <div style="margin-top: 20px; display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px; text-align: center;">
+    """
+    # Add stats
+    word_count = len(token_analysis)
+    token_count = len(full_analysis)
+    ratio = round(token_count / max(1, word_count), 2)
+    html += f"""
+        <div style="background-color: #ebf8ff; padding: 10px; border-radius: 6px;">
+            <div style="font-size: 24px; font-weight: bold; color: #3182ce;">{word_count}</div>
+            <div style="font-size: 14px; color: #4299e1;">Words</div>
+        </div>
+        <div style="background-color: #f0fff4; padding: 10px; border-radius: 6px;">
+            <div style="font-size: 24px; font-weight: bold; color: #38a169;">{token_count}</div>
+            <div style="font-size: 14px; color: #48bb78;">Tokens</div>
+        </div>
+        <div style="background-color: #faf5ff; padding: 10px; border-radius: 6px;">
+            <div style="font-size: 24px; font-weight: bold; color: #805ad5;">{ratio}</div>
+            <div style="font-size: 14px; color: #9f7aea;">Tokens per Word</div>
+        </div>
+    """
+    html += """
+        </div>
+    </div>
+    """
+    return html
+def generate_full_analysis_html(keyword, token_analysis, intent_analysis, evolution_potential, trends):
+    """Generate HTML for full keyword analysis"""
+    html = f"""
+    <div style="font-family: Arial, sans-serif; padding: 20px; border: 1px solid #ddd; border-radius: 8px;">
+        <h2 style="margin-top: 0;">Keyword DNA Analysis for: {keyword}</h2>
+        <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 15px; margin-bottom: 20px;">
+            <div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px;">
+                <h3 style="margin-top: 0; font-size: 16px;">Intent Gene</h3>
+                <div style="display: flex; justify-content: space-between; margin-bottom: 10px;">
+                    <span>Type:</span>
+                    <span>{intent_analysis['type']}</span>
+                </div>
+                <div style="display: flex; justify-content: space-between; align-items: center;">
+                    <span>Strength:</span>
+                    <div style="width: 120px; height: 8px; background-color: #edf2f7; border-radius: 4px; overflow: hidden;">
+                        <div style="height: 100%; background-color: #48bb78; width: {intent_analysis['strength']}%;"></div>
+                    </div>
+                </div>
+            </div>
+            <div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px;">
+                <h3 style="margin-top: 0; font-size: 16px;">Evolution Potential</h3>
+                <div style="display: flex; justify-content: center; align-items: center; height: 100px;">
+                    <div style="position: relative; width: 100px; height: 100px;">
+                        <div style="position: absolute; inset: 0; display: flex; align-items: center; justify-content: center;">
+                            <span style="font-size: 24px; font-weight: bold;">{evolution_potential}</span>
+                        </div>
+                        <svg width="100" height="100" viewBox="0 0 36 36">
+                            <path
+                              d="M18 2.0845 a 15.9155 15.9155 0 0 1 0 31.831 a 15.9155 15.9155 0 0 1 0 -31.831"
+                              fill="none"
+                              stroke="#4CAF50"
+                              stroke-width="3"
+                              stroke-dasharray="{evolution_potential}, 100"
+                            />
+                        </svg>
+                    </div>
+                </div>
+            </div>
+        </div>
+        <div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px; margin-bottom: 20px;">
+            <h3 style="margin-top: 0; font-size: 16px;">Future Mutations</h3>
+            <div style="display: flex; flex-direction: column; gap: 8px;">
+    """
+    # Add trends
+    for trend in trends:
+        html += f"""
+        <div style="display: flex; align-items: center; gap: 8px;">
+            <span style="color: #48bb78;">↗</span>
+            <span>{trend}</span>
+        </div>
+        """
+    html += """
+            </div>
+        </div>
+        <h3 style="margin-bottom: 10px;">Token Details & Historical Analysis</h3>
+    """
+    # Add token details
+    for token in token_analysis:
+        html += f"""
+        <div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px; margin-bottom: 15px;">
+            <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px;">
+                <div style="display: flex; align-items: center; gap: 8px;">
+                    <span style="font-size: 18px; font-weight: medium;">{token['token']}</span>
+                    <span style="padding: 2px 8px; background-color: #edf2f7; border-radius: 4px; font-size: 12px;">{token['posTag']}</span>
+        """
+        if token['entityType']:
+            html += f"""
+            <span style="padding: 2px 8px; background-color: #ebf8ff; color: #3182ce; border-radius: 4px; font-size: 12px; display: flex; align-items: center;">
+                ⓘ {token['entityType']}
+            </span>
+            """
+        html += f"""
+                </div>
+                <div style="display: flex; align-items: center; gap: 4px;">
+                    <span style="font-size: 12px; color: #718096;">Importance:</span>
+                    <div style="width: 64px; height: 8px; background-color: #edf2f7; border-radius: 4px; overflow: hidden;">
+                        <div style="height: 100%; background-color: #4299e1; width: {token['importance']}%;"></div>
+                    </div>
+                </div>
+            </div>
+            <div style="margin-top: 15px;">
+                <div style="font-size: 12px; color: #718096; margin-bottom: 4px;">Historical Relevance:</div>
+                <div style="border: 1px solid #e2e8f0; border-radius: 4px; padding: 10px; background-color: #f7fafc;">
+                    <div style="font-size: 12px; margin-bottom: 8px;">
+                        <span style="font-weight: 500;">Origin: </span>
+                        <span>{token['origin']['era']}, </span>
+                        <span style="font-style: italic;">{token['origin']['language']}</span>
+                    </div>
+                    <div style="font-size: 12px; margin-bottom: 12px;">{token['origin']['note']}</div>
+                    <div style="display: flex; align-items: flex-end; height: 50px; gap: 4px; margin-top: 8px;">
+        """
+        # Add historical data bars
+        for period, value in token['historicalData']:
+            opacity = 0.3 + (token['historicalData'].index((period, value)) * 0.1)
+            html += f"""
+            <div style="display: flex; flex-direction: column; align-items: center; flex: 1;">
+                <div style="width: 100%; background-color: rgba(66, 153, 225, {opacity}); border-radius: 2px 2px 0 0; height: {max(4, value)}%;"></div>
+                <div style="font-size: 9px; margin-top: 4px; color: #718096; transform: rotate(45deg); transform-origin: top left; white-space: nowrap;">
+                    {period}
+                </div>
+            </div>
+            """
+        html += """
+                    </div>
+                </div>
+            </div>
+        </div>
+        """
+    html += """
+    </div>
+    """
+    return html
+# Create the Gradio interface
+with gr.Blocks(css="footer {visibility: hidden}") as demo:
+    gr.Markdown("# Keyword DNA Analyzer")
+    gr.Markdown("Analyze the linguistic DNA of your keywords to understand their structure, intent, and potential.")
+    with gr.Row():
+        with gr.Column():
+            input_text = gr.Textbox(label="Enter keyword to analyze", placeholder="e.g. artificial intelligence")
+            analyze_btn = gr.Button("Analyze DNA", variant="primary")
+            with gr.Row():
+                example_btns = []
+                for example in ["preprocessing", "breakdown", "artificial intelligence", "transformer model", "machine learning"]:
+                    example_btns.append(gr.Button(example))
+        with gr.Column():
+            with gr.Tabs():
+                with gr.Tab("Token Visualization"):
+                    token_viz_html = gr.HTML()
+                with gr.Tab("Full Analysis"):
+                    analysis_html = gr.HTML()
+                with gr.Tab("Evolution Chart"):
+                    evolution_chart = gr.Plot()
+                with gr.Tab("Raw Data"):
+                    json_output = gr.JSON()
+    # Set up event handlers
+    analyze_btn.click(
+        analyze_keyword,
+        inputs=[input_text],
+        outputs=[token_viz_html, analysis_html, json_output, evolution_chart, None]
+    )
+    # Example buttons
+    for btn in example_btns:
+        btn.click(
+            lambda btn_text: btn_text,
+            inputs=[btn],
+            outputs=[input_text]
+        ).then(
+            analyze_keyword,
+            inputs=[input_text],
+            outputs=[token_viz_html, analysis_html, json_output, evolution_chart, None]
+        )
+# Launch the app
+demo.launch()