SEO-checker / app.py
aiqcamp's picture
Update app.py
aa283e0 verified
raw
history blame contribute delete
31 kB
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin
import gradio as gr
import re
import concurrent.futures
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
import time
class SEOChecker:
def __init__(self):
self.user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
self.headers = {'User-Agent': self.user_agent}
def get_page_content(self, url):
"""Fetch page content with error handling and timeout"""
if not url.startswith(('http://', 'https://')):
url = "https://" + url
try:
response = requests.get(url, headers=self.headers, timeout=15)
response.raise_for_status()
return response.text, url, None
except requests.exceptions.RequestException as e:
return None, url, str(e)
def check_page_speed(self, url):
"""Basic page load time measurement"""
start_time = time.time()
try:
requests.get(url, headers=self.headers, timeout=10)
load_time = time.time() - start_time
return load_time
except:
return None
def get_keyword_suggestions(self, content):
"""Extract potential keywords from text content"""
if not content:
return []
# Remove HTML tags
text = BeautifulSoup(content, "html.parser").get_text()
# Clean and tokenize
words = re.findall(r'\b[a-zA-Z]{4,15}\b', text.lower())
# Count word frequency
word_freq = {}
for word in words:
if word not in ['this', 'that', 'with', 'from', 'have', 'were', 'they', 'will', 'what', 'when', 'where', 'which']:
word_freq[word] = word_freq.get(word, 0) + 1
# Return top keywords
keywords = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
return keywords[:10]
def analyze_seo(self, url):
"""Main SEO analysis function"""
content, final_url, error = self.get_page_content(url)
if error:
return {
"status": "error",
"message": f"Error accessing URL: {error}",
"details": {},
"score": 0,
"suggestions": []
}
soup = BeautifulSoup(content, "html.parser")
parsed_url = urlparse(final_url)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
# Initialize result dictionary
result = {
"status": "success",
"url": final_url,
"details": {},
"checks": [],
"suggestions": [],
"keywords": self.get_keyword_suggestions(content)
}
# Title check
title = soup.title.string.strip() if soup.title else ""
title_len = len(title) if title else 0
title_status = "good" if title and 10 <= title_len <= 60 else "warning" if title else "error"
result["details"]["title"] = {
"content": title,
"length": title_len,
"status": title_status
}
if not title:
result["checks"].append({"type": "error", "message": "Missing <title> tag"})
result["suggestions"].append("Add a descriptive title tag between 50-60 characters")
elif title_len > 60:
result["checks"].append({"type": "warning", "message": f"Title is too long ({title_len} chars)"})
result["suggestions"].append("Keep title under 60 characters for better display in search results")
elif title_len < 10:
result["checks"].append({"type": "warning", "message": f"Title is too short ({title_len} chars)"})
result["suggestions"].append("Make title more descriptive (30-60 characters recommended)")
else:
result["checks"].append({"type": "good", "message": f"Title length is good ({title_len} chars)"})
# Meta description
desc_tag = soup.find("meta", attrs={"name": "description"})
desc = desc_tag["content"].strip() if desc_tag and desc_tag.get("content") else ""
desc_len = len(desc) if desc else 0
desc_status = "good" if desc and 50 <= desc_len <= 160 else "warning" if desc else "error"
result["details"]["meta_description"] = {
"content": desc,
"length": desc_len,
"status": desc_status
}
if not desc:
result["checks"].append({"type": "error", "message": "Missing meta description"})
result["suggestions"].append("Add a meta description summarizing your page content")
elif desc_len > 160:
result["checks"].append({"type": "warning", "message": f"Meta description is too long ({desc_len} chars)"})
result["suggestions"].append("Keep meta description under 160 characters")
elif desc_len < 50:
result["checks"].append({"type": "warning", "message": f"Meta description is too short ({desc_len} chars)"})
result["suggestions"].append("Make meta description more informative (100-160 chars recommended)")
else:
result["checks"].append({"type": "good", "message": f"Meta description length is good ({desc_len} chars)"})
# Canonical URL
canonical = soup.find("link", rel="canonical")
canonical_url = canonical.get("href") if canonical else None
result["details"]["canonical"] = {
"exists": canonical is not None,
"url": canonical_url
}
if not canonical:
result["checks"].append({"type": "warning", "message": "Missing canonical link"})
result["suggestions"].append("Add a canonical link to prevent duplicate content issues")
else:
result["checks"].append({"type": "good", "message": "Canonical link is present"})
# Headings structure
headings = {f"h{i}": len(soup.find_all(f"h{i}")) for i in range(1, 7)}
result["details"]["headings"] = headings
if headings["h1"] == 0:
result["checks"].append({"type": "error", "message": "No H1 heading found"})
result["suggestions"].append("Add a single H1 heading that describes your main content")
elif headings["h1"] > 1:
result["checks"].append({"type": "warning", "message": f"Multiple H1 headings found ({headings['h1']})"})
result["suggestions"].append("Use only one H1 heading per page for SEO clarity")
else:
result["checks"].append({"type": "good", "message": "Single H1 heading structure is good"})
if sum(headings.values()) < 3:
result["checks"].append({"type": "warning", "message": "Few headings used in content"})
result["suggestions"].append("Structure content with more headings for readability and SEO")
# Mobile viewport
viewport = soup.find("meta", attrs={"name": "viewport"})
result["details"]["viewport"] = viewport is not None
if not viewport:
result["checks"].append({"type": "warning", "message": "No viewport meta tag"})
result["suggestions"].append("Add viewport meta tag for mobile responsiveness")
else:
result["checks"].append({"type": "good", "message": "Viewport meta tag is present"})
# HTTPS check
is_https = final_url.startswith("https://")
result["details"]["https"] = is_https
if not is_https:
result["checks"].append({"type": "error", "message": "Site is not using HTTPS"})
result["suggestions"].append("Install SSL and redirect HTTP to HTTPS for security and SEO")
else:
result["checks"].append({"type": "good", "message": "Site is using HTTPS"})
# Images alt text
images = soup.find_all("img")
images_no_alt = [img.get('src', '(no src)') for img in images if not img.get("alt")]
result["details"]["images"] = {
"total": len(images),
"missing_alt": len(images_no_alt),
"examples_missing_alt": images_no_alt[:3]
}
if images and images_no_alt:
result["checks"].append({"type": "warning", "message": f"{len(images_no_alt)} of {len(images)} images missing alt text"})
result["suggestions"].append("Add descriptive alt attributes to all images for accessibility and SEO")
elif images:
result["checks"].append({"type": "good", "message": "All images have alt text"})
# Check robots.txt and sitemap
with concurrent.futures.ThreadPoolExecutor() as executor:
robots_future = executor.submit(self.check_file_exists, urljoin(base_url, "/robots.txt"))
sitemap_future = executor.submit(self.check_file_exists, urljoin(base_url, "/sitemap.xml"))
robots_exists = robots_future.result()
sitemap_exists = sitemap_future.result()
result["details"]["robots_txt"] = robots_exists
result["details"]["sitemap_xml"] = sitemap_exists
if not robots_exists:
result["checks"].append({"type": "warning", "message": "robots.txt not found"})
result["suggestions"].append("Create a robots.txt file to guide search engines")
else:
result["checks"].append({"type": "good", "message": "robots.txt file exists"})
if not sitemap_exists:
result["checks"].append({"type": "warning", "message": "sitemap.xml not found"})
result["suggestions"].append("Add a sitemap.xml file for better crawling")
else:
result["checks"].append({"type": "good", "message": "sitemap.xml file exists"})
# Open Graph Tags
og_tags = {
"title": soup.find("meta", property="og:title") is not None,
"description": soup.find("meta", property="og:description") is not None,
"image": soup.find("meta", property="og:image") is not None
}
result["details"]["open_graph"] = og_tags
og_missing = [tag for tag, exists in og_tags.items() if not exists]
if og_missing:
result["checks"].append({"type": "warning", "message": f"Missing Open Graph tags: {', '.join(og_missing)}"})
result["suggestions"].append("Add Open Graph meta tags to improve sharing on social media")
else:
result["checks"].append({"type": "good", "message": "Open Graph meta tags are present"})
# Link analysis
links = soup.find_all("a", href=True)
internal_links = []
external_links = []
for link in links:
href = link.get('href', '')
if not href or href.startswith('#'):
continue
if href.startswith('/') or parsed_url.netloc in href:
internal_links.append(href)
elif href.startswith(('http://', 'https://')):
external_links.append(href)
result["details"]["links"] = {
"internal": len(internal_links),
"external": len(external_links),
"total": len(internal_links) + len(external_links)
}
result["checks"].append({"type": "info", "message": f"Found {len(internal_links)} internal and {len(external_links)} external links"})
if len(internal_links) < 2 and not (len(internal_links) == 0 and len(external_links) == 0):
result["suggestions"].append("Add more internal links to improve site structure")
# Text to HTML ratio analysis
html_size = len(content)
text = soup.get_text()
text_size = len(text)
if html_size > 0:
text_ratio = (text_size / html_size) * 100
else:
text_ratio = 0
result["details"]["content"] = {
"html_size": html_size,
"text_size": text_size,
"text_ratio": text_ratio
}
if text_ratio < 10:
result["checks"].append({"type": "warning", "message": f"Low text-to-HTML ratio: {text_ratio:.1f}%"})
result["suggestions"].append("Increase text content relative to HTML for better SEO")
else:
result["checks"].append({"type": "good", "message": f"Text-to-HTML ratio: {text_ratio:.1f}%"})
# Page speed (basic)
load_time = self.check_page_speed(final_url)
result["details"]["page_speed"] = load_time
if load_time:
if load_time > 2:
result["checks"].append({"type": "warning", "message": f"Slow page load time: {load_time:.2f} seconds"})
result["suggestions"].append("Optimize page speed by reducing file sizes and requests")
else:
result["checks"].append({"type": "good", "message": f"Page load time: {load_time:.2f} seconds"})
# Calculate overall score
scores = {"good": 10, "info": 5, "warning": 0, "error": -10}
total_points = sum(scores.get(check["type"], 0) for check in result["checks"])
max_score = 10 * sum(1 for check in result["checks"] if check["type"] in ["good", "error"])
if max_score > 0:
percentage_score = min(100, max(0, (total_points + max_score) / (2 * max_score) * 100))
else:
percentage_score = 50
result["score"] = round(percentage_score)
return result
def check_file_exists(self, url):
"""Check if a file exists at the given URL"""
try:
response = requests.head(url, headers=self.headers, timeout=5)
return response.status_code == 200
except:
return False
def generate_chart(self, result):
"""Generate SEO score chart data"""
if result["status"] == "error":
return None
categories = {
"title": result["details"]["title"]["status"] == "good",
"meta_description": result["details"]["meta_description"]["status"] == "good",
"headings": result["details"]["headings"]["h1"] == 1,
"https": result["details"]["https"],
"images": result["details"]["images"]["total"] == 0 or result["details"]["images"]["missing_alt"] == 0,
"robots_sitemap": result["details"]["robots_txt"] and result["details"]["sitemap_xml"],
"open_graph": all(result["details"]["open_graph"].values())
}
return categories
def format_result_html(result):
"""Format the SEO result as HTML for display"""
if result["status"] == "error":
return f"""
<div style="padding: 20px; background-color: #ffebee; border-radius: 8px; margin-bottom: 20px;">
<h3 style="color: #c62828;">Error</h3>
<p>{result["message"]}</p>
</div>
"""
# Calculate counts for each check type
check_counts = {"good": 0, "info": 0, "warning": 0, "error": 0}
for check in result["checks"]:
check_counts[check["type"]] = check_counts.get(check["type"], 0) + 1
# Build the HTML
html = f"""
<div style="font-family: Arial, sans-serif;">
<div style="display: flex; align-items: center; margin-bottom: 20px;">
<div style="width: 120px; height: 120px; position: relative; margin-right: 20px;">
<div style="position: absolute; width: 100%; height: 100%; border-radius: 50%; background: conic-gradient(
from 0deg,
#4caf50 0% {result["score"]}%,
#e0e0e0 {result["score"]}% 100%
);"></div>
<div style="position: absolute; top: 10px; left: 10px; right: 10px; bottom: 10px; background: white; border-radius: 50%; display: flex; align-items: center; justify-content: center; flex-direction: column;">
<span style="font-size: 28px; font-weight: bold;">{result["score"]}</span>
<span style="font-size: 12px;">SEO Score</span>
</div>
</div>
<div>
<h2 style="margin: 0 0 10px 0;">SEO Report for {result["url"]}</h2>
<div style="display: flex; flex-wrap: wrap; gap: 10px;">
<span style="background-color: #e8f5e9; color: #2e7d32; padding: 5px 10px; border-radius: 4px; font-size: 12px;">
{check_counts["good"]} Passed
</span>
<span style="background-color: #fff8e1; color: #f57c00; padding: 5px 10px; border-radius: 4px; font-size: 12px;">
{check_counts["warning"]} Warnings
</span>
<span style="background-color: #ffebee; color: #c62828; padding: 5px 10px; border-radius: 4px; font-size: 12px;">
{check_counts["error"]} Errors
</span>
<span style="background-color: #e3f2fd; color: #1565c0; padding: 5px 10px; border-radius: 4px; font-size: 12px;">
{check_counts["info"]} Info
</span>
</div>
<div style="margin-top: 10px; color: #555; font-size: 13px;">
Generated on {datetime.now().strftime('%Y-%m-%d %H:%M')}
</div>
</div>
</div>
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px; margin-bottom: 20px;">
<div style="background-color: #f5f5f5; border-radius: 8px; padding: 15px;">
<h3 style="margin-top: 0; color: #333;">Page Details</h3>
<table style="width: 100%; border-collapse: collapse;">
<tr>
<td style="padding: 8px 0; border-bottom: 1px solid #ddd; width: 40%; color: #777;">Title</td>
<td style="padding: 8px 0; border-bottom: 1px solid #ddd;">
{result["details"]["title"]["content"] or "Missing"}
<div style="font-size: 12px; color: #777; margin-top: 4px;">
Length: {result["details"]["title"]["length"]} chars
{" ✓" if result["details"]["title"]["status"] == "good" else " ⚠"}
</div>
</td>
</tr>
<tr>
<td style="padding: 8px 0; border-bottom: 1px solid #ddd; color: #777;">Meta Description</td>
<td style="padding: 8px 0; border-bottom: 1px solid #ddd;">
{result["details"]["meta_description"]["content"] or "Missing"}
<div style="font-size: 12px; color: #777; margin-top: 4px;">
Length: {result["details"]["meta_description"]["length"]} chars
{" ✓" if result["details"]["meta_description"]["status"] == "good" else " ⚠"}
</div>
</td>
</tr>
<tr>
<td style="padding: 8px 0; border-bottom: 1px solid #ddd; color: #777;">Headings</td>
<td style="padding: 8px 0; border-bottom: 1px solid #ddd;">
H1: {result["details"]["headings"]["h1"]},
H2: {result["details"]["headings"]["h2"]},
H3: {result["details"]["headings"]["h3"]}
</td>
</tr>
<tr>
<td style="padding: 8px 0; border-bottom: 1px solid #ddd; color: #777;">Security & Files</td>
<td style="padding: 8px 0; border-bottom: 1px solid #ddd;">
HTTPS: {"✓" if result["details"]["https"] else "✕"},
robots.txt: {"✓" if result["details"]["robots_txt"] else "✕"},
sitemap.xml: {"✓" if result["details"]["sitemap_xml"] else "✕"}
</td>
</tr>
<tr>
<td style="padding: 8px 0; border-bottom: 1px solid #ddd; color: #777;">Links</td>
<td style="padding: 8px 0; border-bottom: 1px solid #ddd;">
Internal: {result["details"]["links"]["internal"]},
External: {result["details"]["links"]["external"]}
</td>
</tr>
<tr>
<td style="padding: 8px 0; color: #777;">Images</td>
<td style="padding: 8px 0;">
Total: {result["details"]["images"]["total"]},
Missing alt: {result["details"]["images"]["missing_alt"]}
</td>
</tr>
</table>
</div>
<div style="background-color: #f5f5f5; border-radius: 8px; padding: 15px;">
<h3 style="margin-top: 0; color: #333;">Top Potential Keywords</h3>
<div style="max-height: 200px; overflow-y: auto;">
<table style="width: 100%; border-collapse: collapse;">
<tr style="background-color: #eee;">
<th style="padding: 8px; text-align: left; border-bottom: 1px solid #ddd;">Keyword</th>
<th style="padding: 8px; text-align: right; border-bottom: 1px solid #ddd;">Frequency</th>
</tr>
"""
# Add keyword rows
for keyword, count in result["keywords"]:
html += f"""
<tr>
<td style="padding: 8px; border-bottom: 1px solid #ddd;">{keyword}</td>
<td style="padding: 8px; border-bottom: 1px solid #ddd; text-align: right;">{count}</td>
</tr>
"""
html += """
</table>
</div>
</div>
</div>
<div style="margin-bottom: 20px;">
<h3 style="color: #333;">SEO Checks</h3>
<div style="display: grid; grid-template-columns: repeat(auto-fill, minmax(300px, 1fr)); gap: 15px;">
"""
# Add check cards
icons = {
"good": "✓",
"info": "ℹ",
"warning": "⚠",
"error": "✕"
}
bg_colors = {
"good": "#e8f5e9",
"info": "#e3f2fd",
"warning": "#fff8e1",
"error": "#ffebee"
}
text_colors = {
"good": "#2e7d32",
"info": "#1565c0",
"warning": "#f57c00",
"error": "#c62828"
}
for check in result["checks"]:
html += f"""
<div style="background-color: {bg_colors[check["type"]]}; border-radius: 8px; padding: 12px; position: relative;">
<div style="position: absolute; top: 12px; right: 12px; font-size: 18px;">
{icons[check["type"]]}
</div>
<div style="color: {text_colors[check["type"]]}; margin-bottom: 5px; font-weight: bold;">
{check["type"].capitalize()}
</div>
<div style="color: #333;">
{check["message"]}
</div>
</div>
"""
html += """
</div>
</div>
<div style="background-color: #f5f5f5; border-radius: 8px; padding: 15px; margin-bottom: 20px;">
<h3 style="margin-top: 0; color: #333;">Improvement Suggestions</h3>
<ul style="margin: 0; padding-left: 20px;">
"""
# Add suggestions
for suggestion in result["suggestions"]:
html += f"""
<li style="margin-bottom: 8px;">{suggestion}</li>
"""
html += """
</ul>
</div>
</div>
"""
return html
def seo_analysis(url):
"""Run the SEO analysis and return results in structured format"""
checker = SEOChecker()
result = checker.analyze_seo(url)
if result["status"] == "error":
return result["message"], "", ""
# Format text report
text_report = f"SEO Score: {result['score']}/100 for {result['url']}\n\n"
text_report += "--- SEO CHECKS ---\n"
for check in result["checks"]:
icon = "✓" if check["type"] == "good" else "ℹ" if check["type"] == "info" else "⚠" if check["type"] == "warning" else "✕"
text_report += f"{icon} {check['message']}\n"
text_report += "\n--- SUGGESTIONS ---\n"
for i, suggestion in enumerate(result["suggestions"], 1):
text_report += f"{i}. {suggestion}\n"
# Format HTML report
html_report = format_result_html(result)
# Generate chart data
chart_data = checker.generate_chart(result)
chart_html = ""
if chart_data:
# Create simple chart
categories = list(chart_data.keys())
values = [int(v) * 100 for v in chart_data.values()]
plt.figure(figsize=(10, 6))
colors = ['#4caf50' if v == 100 else '#f57c00' for v in values]
y_pos = np.arange(len(categories))
plt.barh(y_pos, values, color=colors)
plt.yticks(y_pos, [c.replace('_', ' ').title() for c in categories])
plt.xlim(0, 100)
plt.title('SEO Category Performance')
plt.xlabel('Score (%)')
for i, v in enumerate(values):
plt.text(v + 5, i, f"{v}%" if v > 0 else "0%", va='center')
# Save to file
chart_file = "seo_chart.png"
plt.tight_layout()
plt.savefig(chart_file)
plt.close()
# Create HTML image reference
chart_html = f'<img src="file={chart_file}" alt="SEO Performance Chart" style="width:100%;max-width:800px;">'
return text_report, html_report, chart_html
def generate_example_report():
"""Generate an example report for the demo"""
checker = SEOChecker()
sample_urls = [
"https://example.com",
"https://websitelayout.net",
"https://yahoo.com"
]
# Select a random sample URL
import random
sample_url = random.choice(sample_urls)
# Run analysis
result = checker.analyze_seo(sample_url)
if result["status"] == "error":
return f"Error analyzing {sample_url}: {result.get('message', 'Unknown error')}", "", ""
# Format text report
text_report = f"SAMPLE REPORT - URL: {sample_url}\n\n"
text_report += f"SEO Score: {result['score']}/100\n\n"
text_report += "--- KEY FINDINGS ---\n"
for check in result["checks"][:5]: # Just show top 5 findings
icon = "✓" if check["type"] == "good" else "ℹ" if check["type"] == "info" else "⚠" if check["type"] == "warning" else "✕"
text_report += f"{icon} {check['message']}\n"
text_report += "\n(This is an example report - enter your own URL for a full analysis)"
# HTML report
html_report = format_result_html(result)
html_report += '<div style="background-color: #e3f2fd; color: #0d47a1; padding: 10px; border-radius: 4px; margin-top: 20px; text-align: center;">This is an example report - enter your own URL for a full analysis</div>'
# Generate chart data
chart_data = checker.generate_chart(result)
chart_html = ""
return text_report, html_report, chart_html
# Set up the Gradio interface
def create_interface():
with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal"), css="""
.container { max-width: 1200px; margin: 0 auto; }
.header { margin-bottom: 20px; text-align: center; }
.header h1 { margin-bottom: 5px; color: #1e88e5; }
.header p { color: #555; }
.footer { margin-top: 30px; text-align: center; color: #777; font-size: 12px; }
.score-box { display: flex; align-items: center; gap: 20px; padding: 20px; margin-bottom: 20px; }
.url-input { margin-bottom: 20px; }
.report-container { border-radius: 10px; overflow: hidden; }
""") as demo:
gr.HTML("""
<div class="header">
<h1>Advanced SEO Website Analyzer</h1>
<p>Perform a comprehensive SEO audit of any website with detailed insights and recommendations</p>
</div>
""")
with gr.Row(equal_height=True):
with gr.Column():
url_input = gr.Textbox(
label="Website URL to Analyze",
placeholder="Enter URL (e.g., example.com or https://example.com)",
scale=3
)
with gr.Row():
analyze_btn = gr.Button("Analyze Website", variant="primary", scale=2)
example_btn = gr.Button("See Example Report", scale=1)
text_output = gr.Textbox(
label="Text Summary",
placeholder="SEO analysis results will appear here...",
lines=10,
max_lines=20
)
with gr.Column():
html_output = gr.HTML(
label="Visual Report",
value='<div style="height: 400px; display: flex; justify-content: center; align-items: center; background-color: #f5f5f5; border-radius: 8px;"><p style="color: #777;">Enter a URL and click "Analyze Website" to see a detailed report here.</p></div>'
)
with gr.Row():
chart_output = gr.HTML(
label="Performance Chart",
value=""
)
gr.HTML("""
<div class="footer">
<p>© 2025 SEO Website Analyzer | Provides quick, comprehensive SEO analysis</p>
</div>
""")
# Connect the components
analyze_btn.click(
fn=seo_analysis,
inputs=url_input,
outputs=[text_output, html_output, chart_output]
)
example_btn.click(
fn=generate_example_report,
inputs=[],
outputs=[text_output, html_output, chart_output]
)
return demo
# Run the app
if __name__ == "__main__":
demo = create_interface()
demo.launch(share=True)