MHamdan commited on
Commit
dd103f7
Β·
verified Β·
1 Parent(s): 41af56e

Upload tool

Browse files
Files changed (3) hide show
  1. app.py +4 -65
  2. requirements.txt +3 -11
  3. tool.py +43 -114
app.py CHANGED
@@ -1,67 +1,6 @@
 
 
1
 
2
- import gradio as gr
3
- from smolagents import load_tool
4
 
5
- # Load the tool
6
- web_analyzer = load_tool("MHamdan/web-analyzer", trust_remote_code=True)
7
-
8
- def analyze_content(url, mode):
9
- return web_analyzer(url, mode)
10
-
11
- def create_interface():
12
- with gr.Blocks(title="AI Web Analyzer") as iface:
13
- gr.Markdown("# πŸ€– AI-Powered Web Content Analyzer")
14
- gr.Markdown("""
15
- ## Features:
16
- - πŸ“Š **Analyze**: Complete content analysis with AI summary
17
- - πŸ“ **Summarize**: AI-generated multi-section summary
18
- - 😊 **Sentiment**: Section-by-section sentiment analysis
19
- - 🎯 **Topics**: AI topic classification
20
- """)
21
-
22
- with gr.Row():
23
- with gr.Column():
24
- url_input = gr.Textbox(
25
- label="Webpage URL",
26
- placeholder="Enter URL to analyze..."
27
- )
28
- mode = gr.Dropdown(
29
- choices=["analyze", "summarize", "sentiment", "topics"],
30
- label="Analysis Mode",
31
- value="analyze"
32
- )
33
- submit_btn = gr.Button("Analyze Content", variant="primary")
34
-
35
- with gr.Column():
36
- output = gr.Textbox(
37
- label="AI Analysis Results",
38
- lines=15
39
- )
40
-
41
- # Example data
42
- examples = [
43
- ["https://www.artificialintelligence-news.com/2024/02/14/openai-anthropic-google-white-house-red-teaming/", "analyze"],
44
- ["https://www.artificialintelligence-news.com/2024/02/13/ai-21-labs-wordtune-chatgpt-plugin/", "summarize"],
45
- ["https://www.artificialintelligence-news.com/2024/02/12/google-responds-gemini-ai-historical-images/", "sentiment"],
46
- ["https://www.artificialintelligence-news.com/2024/02/09/anthropic-claude-3-models-preview/", "topics"]
47
- ]
48
-
49
- gr.Examples(
50
- examples=examples,
51
- inputs=[url_input, mode],
52
- outputs=output,
53
- fn=analyze_content,
54
- cache_examples=True
55
- )
56
-
57
- submit_btn.click(
58
- fn=analyze_content,
59
- inputs=[url_input, mode],
60
- outputs=output
61
- )
62
-
63
- return iface
64
-
65
- # Create and launch the interface
66
- demo = create_interface()
67
- demo.launch()
 
1
+ from smolagents import launch_gradio_demo
2
+ from tool import SimpleTool
3
 
4
+ tool = SimpleTool()
 
5
 
6
+ launch_gradio_demo(tool)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,12 +1,4 @@
1
-
2
- gradio>=4.0.0
3
- beautifulsoup4>=4.9.3
4
- requests>=2.25.1
5
- smolagents
6
  transformers
7
- torch>=2.0.0
8
- accelerate
9
- sacremoses
10
- sentencepiece
11
- protobuf
12
- scipy
 
1
+ bs4
2
+ requests
 
 
 
3
  transformers
4
+ smolagents
 
 
 
 
 
tool.py CHANGED
@@ -2,141 +2,70 @@ from smolagents import Tool
2
  from typing import Any, Optional
3
 
4
  class SimpleTool(Tool):
5
- name = "web_analyzer"
6
- description = "Advanced web content analyzer with AI-powered analysis."
7
- inputs = {"url":{"type":"string","description":"The webpage URL to analyze."},"mode":{"type":"string","nullable":True,"description":"Analysis mode ('analyze', 'summarize', 'sentiment', 'topics')."}}
8
  output_type = "string"
9
 
10
- def forward(self, url: str, mode: str = "analyze") -> str:
11
- """Advanced web content analyzer with AI-powered analysis.
12
 
13
  Args:
14
  url: The webpage URL to analyze.
15
- mode: Analysis mode ('analyze', 'summarize', 'sentiment', 'topics').
16
 
17
  Returns:
18
- str: AI-enhanced analysis of web content.
19
  """
20
  import requests
21
  from bs4 import BeautifulSoup
22
  import re
23
  from transformers import pipeline
24
- import torch
25
-
26
- # Check if GPU is available
27
- device = 0 if torch.cuda.is_available() else -1
28
 
29
  try:
 
30
  headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
31
  response = requests.get(url, headers=headers, timeout=10)
32
- response.raise_for_status()
33
 
 
34
  soup = BeautifulSoup(response.text, 'html.parser')
35
-
36
- # Remove scripts and styles
37
  for tag in soup(['script', 'style', 'meta']):
38
  tag.decompose()
39
 
 
40
  title = soup.title.string if soup.title else "No title found"
41
- title = re.sub(r'\s+', ' ', title).strip()
42
- text_content = re.sub(r'\s+', ' ', soup.get_text()).strip()
43
-
44
- if len(text_content) < 100:
45
- return "Error: Not enough content to analyze"
46
-
47
- if mode == "analyze":
48
- try:
49
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device)
50
- classifier = pipeline("text-classification", model="nlptown/bert-base-multilingual-uncased-sentiment", device=device)
51
-
52
- summary = summarizer(text_content[:1024], max_length=100, min_length=30)[0]['summary_text']
53
- sentiment = classifier(text_content[:512])[0]
54
- sent_score = int(sentiment['label'][0])
55
- sent_text = ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"][sent_score-1]
56
-
57
- return f"""πŸ“Š Content Analysis
58
-
59
- Title: {title}
60
-
61
- πŸ“ AI Summary:
62
- {summary}
63
-
64
- 😊 Overall Sentiment: {sent_text} ({sent_score}/5)
65
-
66
- Length: {len(text_content)} characters"""
67
-
68
- except Exception as e:
69
- return f"Error with AI analysis: {str(e)}. Please check if PyTorch and transformers are properly installed."
70
-
71
-
72
- elif mode == "summarize":
73
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
74
-
75
- # Process in chunks
76
- chunk_size = 1024
77
- summaries = []
78
-
79
- for i in range(0, min(len(text_content), 3072), chunk_size):
80
- chunk = text_content[i:i+chunk_size]
81
- if len(chunk) > 100:
82
- summary = summarizer(chunk, max_length=100, min_length=30)[0]['summary_text']
83
- summaries.append(summary)
84
-
85
- return f"""πŸ“ Multi-Section Summary
86
-
87
- Title: {title}
88
-
89
- {' '.join(summaries)}"""
90
-
91
- elif mode == "sentiment":
92
- classifier = pipeline("text-classification",
93
- model="nlptown/bert-base-multilingual-uncased-sentiment")
94
-
95
- # Analyze paragraphs
96
- paragraphs = soup.find_all('p')
97
- sentiments = ""
98
- count = 0
99
-
100
- for p in paragraphs:
101
- text = p.text.strip()
102
- if len(text) > 50:
103
- result = classifier(text[:512])[0]
104
- score = int(result['label'][0])
105
- mood = ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"][score-1]
106
- sentiments += f"\nSection {count + 1}: {mood} ({score}/5 stars)"
107
- count += 1
108
- if count >= 5:
109
- break
110
-
111
- return f"""😊 Sentiment Analysis
112
-
113
- Title: {title}
114
- {sentiments}"""
115
-
116
- elif mode == "topics":
117
- classifier = pipeline("zero-shot-classification",
118
- model="facebook/bart-large-mnli")
119
-
120
- topics = [
121
- "Technology", "AI/ML", "Business", "Science",
122
- "Innovation", "Research", "Industry News"
123
- ]
124
-
125
- results = classifier(text_content[:512], topics)
126
-
127
- topic_analysis = "Detected Topics:\n"
128
- for topic, score in zip(results['labels'], results['scores']):
129
- if score > 0.1:
130
- topic_analysis += f"- {topic}: {score*100:.1f}% confidence\n"
131
-
132
- return f"""🎯 Topic Classification
133
-
134
- Title: {title}
135
-
136
- {topic_analysis}"""
137
-
138
- else:
139
- return f"Error: Unknown mode '{mode}'"
140
 
141
  except Exception as e:
142
- return f"Error processing webpage: {str(e)}"
 
 
 
2
  from typing import Any, Optional
3
 
4
  class SimpleTool(Tool):
5
+ name = "web_content_analyzer"
6
+ description = "Analyzes web content using AI models."
7
+ inputs = {"url":{"type":"string","description":"The webpage URL to analyze."}}
8
  output_type = "string"
9
 
10
+ def forward(self, url: str) -> str:
11
+ """Analyzes web content using AI models.
12
 
13
  Args:
14
  url: The webpage URL to analyze.
 
15
 
16
  Returns:
17
+ str: Analysis results in JSON format.
18
  """
19
  import requests
20
  from bs4 import BeautifulSoup
21
  import re
22
  from transformers import pipeline
23
+ import json
 
 
 
24
 
25
  try:
26
+ # Fetch content
27
  headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
28
  response = requests.get(url, headers=headers, timeout=10)
 
29
 
30
+ # Parse HTML
31
  soup = BeautifulSoup(response.text, 'html.parser')
 
 
32
  for tag in soup(['script', 'style', 'meta']):
33
  tag.decompose()
34
 
35
+ # Extract basic info
36
  title = soup.title.string if soup.title else "No title found"
37
+ text = re.sub(r'\s+', ' ', soup.get_text()).strip()
38
+
39
+ if len(text) < 100:
40
+ return json.dumps({
41
+ "error": "Not enough content to analyze"
42
+ })
43
+
44
+ # Get summary
45
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
46
+ summary = summarizer(text[:1024], max_length=100, min_length=30)[0]['summary_text']
47
+
48
+ # Get sentiment
49
+ classifier = pipeline("text-classification",
50
+ model="nlptown/bert-base-multilingual-uncased-sentiment")
51
+ sentiment = classifier(text[:512])[0]
52
+ score = int(sentiment['label'][0])
53
+ mood = ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"][score-1]
54
+
55
+ # Format results
56
+ result = {
57
+ "title": title,
58
+ "summary": summary,
59
+ "sentiment": f"{mood} ({score}/5)",
60
+ "stats": {
61
+ "words": len(text.split()),
62
+ "chars": len(text)
63
+ }
64
+ }
65
+
66
+ return json.dumps(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  except Exception as e:
69
+ return json.dumps({
70
+ "error": str(e)
71
+ })