Spaces:

Merlintxu
/

SEO

Sleeping

App Files Files Community

SEO / app.py

Merlintxu

Update app.py

2e8fa9d verified 19 days ago

raw

history blame contribute delete

7.25 kB

	import gradio as gr
	import json
	import pandas as pd
	import spacy
	import subprocess
	import sys
	import logging
	from pathlib import Path
	from seo_analyzer import SEOSpaceAnalyzer

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	def setup_spacy_model():
	"""Carga o descarga el modelo spaCy necesario."""
	try:
	spacy.load("es_core_news_lg")
	logger.info("Modelo spaCy 'es_core_news_lg' cargado correctamente.")
	except OSError:
	logger.info("Descargando spaCy model es_core_news_lg...")
	subprocess.run([sys.executable, "-m", "spacy", "download", "es_core_news_lg"], check=True)

	def list_content_storage_files() -> list:
	"""Devuelve la lista de archivos en la carpeta content_storage."""
	base_dir = Path("content_storage")
	if not base_dir.exists():
	return []
	return [str(file.relative_to(base_dir)) for file in base_dir.glob("*/") if file.is_file()]

	def download_storage_file(selected_file: str) -> str:
	"""Dado el nombre del archivo (relativo a content_storage), devuelve la ruta para descarga."""
	if not selected_file:
	return ""
	file_path = Path("content_storage") / selected_file
	return str(file_path) if file_path.exists() else ""

	def refresh_file_list() -> list:
	"""Actualiza la lista de archivos disponibles en content_storage."""
	return list_content_storage_files()

	# Creamos la interfaz
	def create_interface() -> gr.Blocks:
	analyzer = SEOSpaceAnalyzer()

	# Definimos una función envoltorio para incluir callbacks de estado
	def analyze_with_callbacks(sitemap_url: str):
	status_msgs = []

	def status_callback(msg: str):
	status_msgs.append(msg)
	logger.info(msg)

	def progress_callback(current: int, total: int):
	logger.info(f"Batch {current} de {total} procesado.")

	# Se llama al método modificado que procesa en lotes de 5
	results = analyzer.analyze_sitemap(sitemap_url, progress_callback=progress_callback, status_callback=status_callback)
	final_status = "\n".join(status_msgs) if status_msgs else "Análisis completado."
	# 'results' es una tupla de 7 elementos:
	# (stats, recommendations, content_analysis, links, details, similarities, seo_tags)
	# Devolvemos esos 7 outputs más el mensaje final en estado (total 8)
	return (*results, final_status)

	with gr.Blocks(title="SEO Analyzer Pro", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🧠 SEO Analyzer Pro
	Este espacio analiza contenido web orientado a normativa bancaria y genera:
	- Temas inferidos automáticamente
	- Títulos y meta descripciones SEO
	- Alertas por lenguaje de riesgo
	""")
	with gr.Row():
	sitemap_input = gr.Textbox(label="📍 URL del Sitemap", placeholder="https://ejemplo.com/sitemap.xml")
	analyze_btn = gr.Button("🔍 Analizar")
	clear_btn = gr.Button("🧹 Limpiar")
	download_json_btn = gr.Button("📥 Descargar JSON")
	download_csv_btn = gr.Button("📤 Descargar CSV")
	status_output = gr.Textbox(label="Estado del análisis", interactive=False)
	with gr.Tabs():
	with gr.Tab("📊 Resumen"):
	stats_output = gr.JSON(label="Estadísticas")
	recommendations_output = gr.JSON(label="Recomendaciones SEO")
	with gr.Tab("📝 Contenido"):
	content_output = gr.JSON(label="Análisis de contenido")
	with gr.Tab("🔗 Enlaces"):
	links_output = gr.JSON(label="Análisis de enlaces")
	links_plot = gr.Plot(label="Visualización de enlaces internos")
	with gr.Tab("📄 Detalles"):
	details_output = gr.JSON(label="Detalles por página")
	with gr.Tab("🧠 SEO y Temas"):
	seo_tags_output = gr.JSON(label="Metadatos SEO generados")
	# Los siguientes se actualizan vía change en seo_tags_output
	topics_output = gr.JSON(label="Temas inferidos")
	flags_output = gr.JSON(label="Términos prohibidos detectados")
	with gr.Tab("🔗 Similitud"):
	similarity_output = gr.JSON(label="Similitud entre URLs")
	with gr.Tab("📁 Archivos"):
	file_dropdown = gr.Dropdown(label="Archivos en content_storage", choices=list_content_storage_files())
	refresh_btn = gr.Button("Actualizar lista")
	download_file_btn = gr.Button("Descargar Archivo Seleccionado", variant="secondary")
	file_download = gr.File(label="Archivo Seleccionado")
	def export_json() -> str:
	if analyzer.current_analysis:
	path = Path("content_storage/seo_report.json")
	with open(path, "w", encoding="utf-8") as f:
	json.dump(analyzer.current_analysis, f, indent=2, ensure_ascii=False)
	return str(path)
	return ""
	def export_csv() -> str:
	if not analyzer.current_analysis:
	return ""
	path = Path("content_storage/seo_summary.csv")
	data = []
	for url, seo in analyzer.current_analysis.get("seo_tags", {}).items():
	data.append({
	"url": url,
	"title": seo.get("title", ""),
	"meta_description": seo.get("meta_description", ""),
	"flags": ", ".join(seo.get("flags", [])),
	"topics": ", ".join(analyzer.current_analysis.get("topics", {}).get(url, [])),
	"summary": analyzer.current_analysis.get("summaries", {}).get(url, "")
	})
	pd.DataFrame(data).to_csv(path, index=False)
	return str(path)
	analyze_btn.click(
	fn=analyze_with_callbacks,
	inputs=sitemap_input,
	outputs=[
	stats_output, recommendations_output, content_output,
	links_output, details_output, similarity_output,
	seo_tags_output, status_output
	],
	show_progress=True
	)
	clear_btn.click(fn=lambda: [None]*8, outputs=[
	stats_output, recommendations_output, content_output,
	links_output, details_output, similarity_output,
	seo_tags_output, status_output
	])
	download_json_btn.click(fn=export_json, outputs=status_output)
	download_csv_btn.click(fn=export_csv, outputs=status_output)
	links_output.change(fn=analyzer.plot_internal_links, inputs=links_output, outputs=links_plot)
	seo_tags_output.change(fn=lambda: analyzer.current_analysis.get("topics", {}), outputs=topics_output)
	seo_tags_output.change(fn=lambda: analyzer.current_analysis.get("flags", {}), outputs=flags_output)
	refresh_btn.click(fn=refresh_file_list, outputs=file_dropdown)
	download_file_btn.click(fn=download_storage_file, inputs=file_dropdown, outputs=file_download)
	return demo

	if __name__ == "__main__":
	setup_spacy_model()
	app = create_interface()
	app.launch(server_name="0.0.0.0", server_port=7860)