File size: 4,405 Bytes
9d93819
 
b41a812
9d93819
 
 
e71e82a
 
9d93819
6fa421e
 
9d93819
f5e6ea4
9d93819
 
 
6fa421e
9d93819
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b41a812
9d93819
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b41a812
9d93819
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
from smolagents import ToolCallingAgent, tool, HfApiModel, DuckDuckGoSearchTool, CodeAgent
from jinaai import scrape_page_with_jina_ai, search_facts_with_jina_ai
import gradio as gr
import os
import datetime
import time
from huggingface_hub import login


login(token=os.getenv("HF_API_TOKEN"))

# Initialize agent
model_id = "meta-llama/Llama-3.3-70B-Instruct" #"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" "deepseek-ai/deepseek-coder-6.7b-instruct"

agent = CodeAgent(   
    tools=[scrape_page_with_jina_ai, search_facts_with_jina_ai, DuckDuckGoSearchTool()],
    model=HfApiModel(model_id=model_id),
    max_steps=7
)

def analyze_urls(urls: str, prompt: str, progress=gr.Progress()) -> str:
    """Analyze multiple URLs based on the given prompt"""
    try:
        progress(0, desc="Starting analysis...")
        url_list = [url.strip() for url in urls.split('\n') if url.strip()]
        
        progress(0.3, desc="Analyzing URLs...")
        # Create bullet-pointed list of URLs with simple newline join
        url_bullets = "\n".join(f"- {url}" for url in url_list)
        
        result = agent.run(f"""Analyze these URLs:
{url_bullets}

Create a comprehensive report that answers: {prompt}

Format the report in markdown with these sections:
1. πŸ“ Overall Summary
2. πŸ” Analysis by Source
   - Include key findings from each URL
   - Compare and contrast information across sources
3. πŸ’‘ Consolidated Analysis
4. πŸ”— Sources

Make it visually appealing with clear headings and bullet points.""")
        
        progress(0.9, desc="Formatting report...")
        formatted_result = f"""# πŸ“Š Multi-URL Analysis Report

## 🌐 Analyzed URLs
{url_bullets}

## ❓ Analysis Prompt
> {prompt}

---

{result}

### πŸ“‹ Metadata
- **Generated**: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
- **URLs Analyzed**: {len(url_list)}
"""
        progress(1.0, desc="Done!")
        return formatted_result
    except Exception as e:
        return f"""### ❌ Error

**Analysis Failed**
```
Error during analysis: {str(e)}
```

Please check the URLs and try again."""

def show_loading():
    return """# ⏳ Analyzing...

## Current Status
- πŸ”„ Fetching webpage content
- πŸ€– AI processing
- πŸ“Š Generating report

Please wait while we analyze your URLs..."""

# Create Gradio interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# πŸ” URL Analyzer")
    gr.Markdown("""
    Enter multiple URLs separated by new lines and what you want to know about them. 
    The AI will analyze the content and create a detailed report.
    
    ---
    """)
    
    with gr.Row():
        urls = gr.Textbox(
            label="URLs",
            placeholder="https://example.com\nhttps://example.org",
            scale=2
        )
        prompt = gr.Textbox(
            label="What do you want to know?",
            placeholder="What are the main points discussed?",
            scale=2
        )
    
    submit = gr.Button("πŸ“Š Analyze", variant="primary", size="lg")
    status = gr.Markdown("", elem_id="status")
    
    with gr.Row():
        output = gr.Markdown(label="Analysis Report", show_label=False, value="")
    
    # Example inputs with better descriptions
    gr.Examples(
        label="Example Analyses",
        examples=[
            ["https://www.dabangasudan.org/en/all-news/category/news", 
             "What are the latest developments in Sudan's conflict, focusing on humanitarian situation and military movements?"],
            ["https://www.dabangasudan.org/en/all-news/category/news", 
             "Analyze the economic and agricultural situation in Sudan based on recent news."],
            ["https://littlesis.org/research/reports/", 
             "What are the latest corporate influence investigations and their key findings?"],
            ["https://littlesis.org/research/reports/", 
             "Summarize the recent reports about energy companies and environmental impact."]
        ],
        inputs=[urls, prompt]
    )
    
    def clear_output():
        return "", ""
    
    submit.click(
        fn=show_loading,
        outputs=output,
    ).then(
        fn=analyze_urls,
        inputs=[urls, prompt],
        outputs=output,
        show_progress="full"
    ).then(
        fn=lambda: gr.update(visible=True),
        outputs=[status]
    )

if __name__ == "__main__":
    demo.queue().launch(share=True)