Spaces:

Noobian
/

smol-url-analysis

Sleeping

App Files Files Community

smol-url-analysis / app.py

Noobian

Update app.py

e71e82a verified 3 months ago

raw

history blame contribute delete

4.41 kB

	from smolagents import ToolCallingAgent, tool, HfApiModel, DuckDuckGoSearchTool, CodeAgent
	from jinaai import scrape_page_with_jina_ai, search_facts_with_jina_ai
	import gradio as gr
	import os
	import datetime
	import time
	from huggingface_hub import login


	login(token=os.getenv("HF_API_TOKEN"))

	# Initialize agent
	model_id = "meta-llama/Llama-3.3-70B-Instruct" #"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" "deepseek-ai/deepseek-coder-6.7b-instruct"

	agent = CodeAgent(
	tools=[scrape_page_with_jina_ai, search_facts_with_jina_ai, DuckDuckGoSearchTool()],
	model=HfApiModel(model_id=model_id),
	max_steps=7
	)

	def analyze_urls(urls: str, prompt: str, progress=gr.Progress()) -> str:
	"""Analyze multiple URLs based on the given prompt"""
	try:
	progress(0, desc="Starting analysis...")
	url_list = [url.strip() for url in urls.split('\n') if url.strip()]

	progress(0.3, desc="Analyzing URLs...")
	# Create bullet-pointed list of URLs with simple newline join
	url_bullets = "\n".join(f"- {url}" for url in url_list)

	result = agent.run(f"""Analyze these URLs:
	{url_bullets}

	Create a comprehensive report that answers: {prompt}

	Format the report in markdown with these sections:
	1. 📝 Overall Summary
	2. 🔍 Analysis by Source
	- Include key findings from each URL
	- Compare and contrast information across sources
	3. 💡 Consolidated Analysis
	4. 🔗 Sources

	Make it visually appealing with clear headings and bullet points.""")

	progress(0.9, desc="Formatting report...")
	formatted_result = f"""# 📊 Multi-URL Analysis Report

	## 🌐 Analyzed URLs
	{url_bullets}

	## ❓ Analysis Prompt
	> {prompt}

	---

	{result}

	### 📋 Metadata
	- Generated: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
	- URLs Analyzed: {len(url_list)}
	"""
	progress(1.0, desc="Done!")
	return formatted_result
	except Exception as e:
	return f"""### ❌ Error

	Analysis Failed
	```
	Error during analysis: {str(e)}
	```

	Please check the URLs and try again."""

	def show_loading():
	return """# ⏳ Analyzing...

	## Current Status
	- 🔄 Fetching webpage content
	- 🤖 AI processing
	- 📊 Generating report

	Please wait while we analyze your URLs..."""

	# Create Gradio interface
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🔍 URL Analyzer")
	gr.Markdown("""
	Enter multiple URLs separated by new lines and what you want to know about them.
	The AI will analyze the content and create a detailed report.

	---
	""")

	with gr.Row():
	urls = gr.Textbox(
	label="URLs",
	placeholder="https://example.com\nhttps://example.org",
	scale=2
	)
	prompt = gr.Textbox(
	label="What do you want to know?",
	placeholder="What are the main points discussed?",
	scale=2
	)

	submit = gr.Button("📊 Analyze", variant="primary", size="lg")
	status = gr.Markdown("", elem_id="status")

	with gr.Row():
	output = gr.Markdown(label="Analysis Report", show_label=False, value="")

	# Example inputs with better descriptions
	gr.Examples(
	label="Example Analyses",
	examples=[
	["https://www.dabangasudan.org/en/all-news/category/news",
	"What are the latest developments in Sudan's conflict, focusing on humanitarian situation and military movements?"],
	["https://www.dabangasudan.org/en/all-news/category/news",
	"Analyze the economic and agricultural situation in Sudan based on recent news."],
	["https://littlesis.org/research/reports/",
	"What are the latest corporate influence investigations and their key findings?"],
	["https://littlesis.org/research/reports/",
	"Summarize the recent reports about energy companies and environmental impact."]
	],
	inputs=[urls, prompt]
	)

	def clear_output():
	return "", ""

	submit.click(
	fn=show_loading,
	outputs=output,
	).then(
	fn=analyze_urls,
	inputs=[urls, prompt],
	outputs=output,
	show_progress="full"
	).then(
	fn=lambda: gr.update(visible=True),
	outputs=[status]
	)

	if __name__ == "__main__":
	demo.queue().launch(share=True)