Spaces:

Noobian
/

smol-url-analysis

Sleeping

File size: 4,405 Bytes

from smolagents import ToolCallingAgent, tool, HfApiModel, DuckDuckGoSearchTool, CodeAgent
from jinaai import scrape_page_with_jina_ai, search_facts_with_jina_ai
import gradio as gr
import os
import datetime
import time
from huggingface_hub import login


login(token=os.getenv("HF_API_TOKEN"))

# Initialize agent
model_id = "meta-llama/Llama-3.3-70B-Instruct" #"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" "deepseek-ai/deepseek-coder-6.7b-instruct"

agent = CodeAgent(   
    tools=[scrape_page_with_jina_ai, search_facts_with_jina_ai, DuckDuckGoSearchTool()],
    model=HfApiModel(model_id=model_id),
    max_steps=7
)

def analyze_urls(urls: str, prompt: str, progress=gr.Progress()) -> str:
    """Analyze multiple URLs based on the given prompt"""
    try:
        progress(0, desc="Starting analysis...")
        url_list = [url.strip() for url in urls.split('\n') if url.strip()]
        
        progress(0.3, desc="Analyzing URLs...")
        # Create bullet-pointed list of URLs with simple newline join
        url_bullets = "\n".join(f"- {url}" for url in url_list)
        
        result = agent.run(f"""Analyze these URLs:
{url_bullets}

Create a comprehensive report that answers: {prompt}

Format the report in markdown with these sections:
1. 📝 Overall Summary
2. 🔍 Analysis by Source
   - Include key findings from each URL
   - Compare and contrast information across sources
3. 💡 Consolidated Analysis
4. 🔗 Sources

Make it visually appealing with clear headings and bullet points.""")
        
        progress(0.9, desc="Formatting report...")
        formatted_result = f"""# 📊 Multi-URL Analysis Report

## 🌐 Analyzed URLs
{url_bullets}

## ❓ Analysis Prompt
> {prompt}

---

{result}

### 📋 Metadata
- **Generated**: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
- **URLs Analyzed**: {len(url_list)}
"""
        progress(1.0, desc="Done!")
        return formatted_result
    except Exception as e:
        return f"""### ❌ Error

**Analysis Failed**
```
Error during analysis: {str(e)}
```

Please check the URLs and try again."""

def show_loading():
    return """# ⏳ Analyzing...

## Current Status
- 🔄 Fetching webpage content
- 🤖 AI processing
- 📊 Generating report

Please wait while we analyze your URLs..."""

# Create Gradio interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🔍 URL Analyzer")
    gr.Markdown("""
    Enter multiple URLs separated by new lines and what you want to know about them. 
    The AI will analyze the content and create a detailed report.
    
    ---
    """)
    
    with gr.Row():
        urls = gr.Textbox(
            label="URLs",
            placeholder="https://example.com\nhttps://example.org",
            scale=2
        )
        prompt = gr.Textbox(
            label="What do you want to know?",
            placeholder="What are the main points discussed?",
            scale=2
        )
    
    submit = gr.Button("📊 Analyze", variant="primary", size="lg")
    status = gr.Markdown("", elem_id="status")
    
    with gr.Row():
        output = gr.Markdown(label="Analysis Report", show_label=False, value="")
    
    # Example inputs with better descriptions
    gr.Examples(
        label="Example Analyses",
        examples=[
            ["https://www.dabangasudan.org/en/all-news/category/news", 
             "What are the latest developments in Sudan's conflict, focusing on humanitarian situation and military movements?"],
            ["https://www.dabangasudan.org/en/all-news/category/news", 
             "Analyze the economic and agricultural situation in Sudan based on recent news."],
            ["https://littlesis.org/research/reports/", 
             "What are the latest corporate influence investigations and their key findings?"],
            ["https://littlesis.org/research/reports/", 
             "Summarize the recent reports about energy companies and environmental impact."]
        ],
        inputs=[urls, prompt]
    )
    
    def clear_output():
        return "", ""
    
    submit.click(
        fn=show_loading,
        outputs=output,
    ).then(
        fn=analyze_urls,
        inputs=[urls, prompt],
        outputs=output,
        show_progress="full"
    ).then(
        fn=lambda: gr.update(visible=True),
        outputs=[status]
    )

if __name__ == "__main__":
    demo.queue().launch(share=True)