Spaces:

Noobian
/

smol-url-analysis

Sleeping

App Files Files Community

Noobian commited on Feb 19

Commit

9d93819

1 Parent(s): b41a812

initial commit: main app files

Browse files

Files changed (3) hide show

app.py +145 -4
jinaai.py +46 -0
requirements.txt +9 -0

app.py CHANGED Viewed

@@ -1,7 +1,148 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

+import sys
+sys.path.append('.')  # Add current directory to path
+from smolagents import ToolCallingAgent, tool, HfApiModel, DuckDuckGoSearchTool, CodeAgent
+from jinaai import scrape_page_with_jina_ai, search_facts_with_jina_ai
 import gradio as gr
+from dotenv import load_dotenv
+import os
+import datetime
+import time
+load_dotenv()
+# Initialize agent
+model_id = "meta-llama/Llama-3.3-70B-Instruct" #"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
+agent = CodeAgent(
+    tools=[scrape_page_with_jina_ai, search_facts_with_jina_ai, DuckDuckGoSearchTool()],
+    model=HfApiModel(model_id=model_id),
+    max_steps=7
+)
+def analyze_urls(urls: str, prompt: str, progress=gr.Progress()) -> str:
+    """Analyze multiple URLs based on the given prompt"""
+    try:
+        progress(0, desc="Starting analysis...")
+        url_list = [url.strip() for url in urls.split('\n') if url.strip()]
+        progress(0.3, desc="Analyzing URLs...")
+        # Create bullet-pointed list of URLs with simple newline join
+        url_bullets = "\n".join(f"- {url}" for url in url_list)
+        result = agent.run(f"""Analyze these URLs:
+{url_bullets}
+Create a comprehensive report that answers: {prompt}
+Format the report in markdown with these sections:
+1. 📝 Overall Summary
+2. 🔍 Analysis by Source
+   - Include key findings from each URL
+   - Compare and contrast information across sources
+3. 💡 Consolidated Analysis
+4. 🔗 Sources
+Make it visually appealing with clear headings and bullet points.""")
+        progress(0.9, desc="Formatting report...")
+        formatted_result = f"""# 📊 Multi-URL Analysis Report
+## 🌐 Analyzed URLs
+{url_bullets}
+## ❓ Analysis Prompt
+> {prompt}
+---
+{result}
+### 📋 Metadata
+- **Generated**: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+- **URLs Analyzed**: {len(url_list)}
+"""
+        progress(1.0, desc="Done!")
+        return formatted_result
+    except Exception as e:
+        return f"""### ❌ Error
+**Analysis Failed**
+```
+Error during analysis: {str(e)}
+```
+Please check the URLs and try again."""
+def show_loading():
+    return """# ⏳ Analyzing...
+## Current Status
+- 🔄 Fetching webpage content
+- 🤖 AI processing
+- 📊 Generating report
+Please wait while we analyze your URLs..."""
+# Create Gradio interface
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🔍 URL Analyzer")
+    gr.Markdown("""
+    Enter multiple URLs separated by new lines and what you want to know about them.
+    The AI will analyze the content and create a detailed report.
+    ---
+    """)
+    with gr.Row():
+        urls = gr.Textbox(
+            label="URLs",
+            placeholder="https://example.com\nhttps://example.org",
+            scale=2
+        )
+        prompt = gr.Textbox(
+            label="What do you want to know?",
+            placeholder="What are the main points discussed?",
+            scale=2
+        )
+    submit = gr.Button("📊 Analyze", variant="primary", size="lg")
+    status = gr.Markdown("", elem_id="status")
+    with gr.Row():
+        output = gr.Markdown(label="Analysis Report", show_label=False, value="")
+    # Example inputs with better descriptions
+    gr.Examples(
+        label="Example Analyses",
+        examples=[
+            ["https://www.dabangasudan.org/en/all-news/category/news",
+             "What are the latest developments in Sudan's conflict, focusing on humanitarian situation and military movements?"],
+            ["https://www.dabangasudan.org/en/all-news/category/news",
+             "Analyze the economic and agricultural situation in Sudan based on recent news."],
+            ["https://littlesis.org/research/reports/",
+             "What are the latest corporate influence investigations and their key findings?"],
+            ["https://littlesis.org/research/reports/",
+             "Summarize the recent reports about energy companies and environmental impact."]
+        ],
+        inputs=[urls, prompt]
+    )
+    def clear_output():
+        return "", ""
+    submit.click(
+        fn=show_loading,
+        outputs=output,
+    ).then(
+        fn=analyze_urls,
+        inputs=[urls, prompt],
+        outputs=output,
+        show_progress="full"
+    ).then(
+        fn=lambda: gr.update(visible=True),
+        outputs=[status]
+    )
+if __name__ == "__main__":
+    demo.queue().launch(share=True)

jinaai.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import os
+import requests
+from requests.exceptions import RequestException
+import datetime
+from dotenv import load_dotenv
+from smolagents import tool
+load_dotenv()
+headers = {'Authorization': 'Bearer ' + os.getenv('JINA_API_KEY')}
+@tool
+def scrape_page_with_jina_ai(url: str) -> str:
+    """Scrapes content from a webpage using Jina AI's web scraping service.
+    Args:
+        url: The URL of the webpage to scrape. Must be a valid web address to extract content from.
+    Returns:
+        str: The scraped content in markdown format.
+    """
+    try:
+        print(f"Scraping Jina AI..: {url}")
+        response = requests.get("https://r.jina.ai/" + url, headers=headers)
+        response.raise_for_status()
+        return response.text
+    except RequestException as e:
+        return f"Error scraping webpage: {str(e)}"
+@tool
+def search_facts_with_jina_ai(query: str) -> str:
+    """Searches for facts and information using Jina AI's search service.
+    Args:
+        query: The search query string used to find relevant facts and information.
+    Returns:
+        str: The search results in markdown format containing relevant facts and information.
+    """
+    try:
+        print(f"Searching Jina AI..: {query}")
+        response = requests.get("https://s.jina.ai/" + query, headers=headers)
+        response.raise_for_status()
+        return response.text
+    except RequestException as e:
+        return f"Error searching facts: {str(e)}"

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+gradio
+groq
+requests
+python-dotenv
+markdownify
+duckduckgo-search
+litellm
+huggingface-hub
+smolagents