wt002 commited on
Commit
b4eaa1c
Β·
verified Β·
1 Parent(s): 632828a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -167
app.py CHANGED
@@ -25,182 +25,35 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
25
  class BasicAgent:
26
  def __init__(self):
27
  print("BasicAgent initialized.")
28
-
29
- def __call__(self, question: str) -> str:
30
- print(f"Agent received question (first 50 chars): {question[:50]}...")
31
- fixed_answer = self.process_request(question)
32
- print(f"Agent returning answer: {fixed_answer[:100]}...") # Truncate long output
33
- return fixed_answer
34
-
35
 
36
- class SearchAgent(BasicAgent):
37
- def __init__(self):
38
- super().__init__()
39
- print("SearchAgent specialized initialization.")
40
-
41
- def process_request(self, query: str) -> str:
42
- """Returns mock search results in JSON format"""
43
- mock_results = [
44
- {
45
- "url": f"https://www.google.com/search?q={query.replace(' ', '+')}&result={i}",
46
- "title": f"Example Result {i} for '{query[:15]}...'",
47
- "snippet": f"This is a mock search result snippet about {query[:15]}... showing result number {i}.",
48
- "relevance": i/3 # Score from 0.33 to 1.0
49
- }
50
- for i in range(1, 4)
51
- ]
52
- return json.dumps({
53
- "query": query,
54
- "results": mock_results,
55
- "count": len(mock_results),
56
- "status": "success"
57
- })
58
 
59
- class BrowserAgent(BasicAgent):
60
- def __init__(self):
61
- super().__init__()
62
- self.current_page = None
63
- self.history = []
64
- self.session = requests.Session()
65
- self.session.headers.update({'User-Agent': 'WebNavigator/1.0'})
66
- print("BrowserAgent initialized with fresh session.")
67
-
68
- def process_request(self, url: str) -> str:
69
  try:
70
- response = self.session.get(url)
71
- response.raise_for_status()
72
- self.current_page = {
73
- 'url': url,
74
- 'content': response.text,
75
- 'timestamp': datetime.now()
76
- }
77
- self.history.append(self.current_page)
78
- return json.dumps({
79
- "status": "success",
80
- "url": url,
81
- "message": f"Successfully retrieved page: {url}"
82
- })
83
- except Exception as e:
84
- return json.dumps({
85
- "status": "error",
86
- "url": url,
87
- "message": f"Error visiting {url}: {str(e)}"
88
- })
89
-
90
- class ContentExtractorAgent(BasicAgent):
91
- def __init__(self):
92
- super().__init__()
93
- print("ContentExtractorAgent initialized.")
94
-
95
- def process_request(self, html: str) -> str:
96
- soup = BeautifulSoup(html, 'html.parser')
97
-
98
- # Remove unwanted elements
99
- for element in soup(['script', 'style', 'nav', 'footer']):
100
- element.decompose()
101
-
102
- title = soup.title.string if soup.title else ""
103
- main_content = soup.find('main') or soup.find('article') or soup.body
104
-
105
- extracted = {
106
- 'title': title,
107
- 'text': main_content.get_text(separator='\n', strip=True),
108
- 'links': [a['href'] for a in main_content.find_all('a', href=True)]
109
- }
110
- return json.dumps(extracted)
111
-
112
- class WebNavigator(BasicAgent):
113
- def __init__(self):
114
- super().__init__()
115
- self.search_agent = SearchAgent()
116
- self.browser_agent = BrowserAgent()
117
- self.extractor_agent = ContentExtractorAgent()
118
- self.search_history = []
119
- print("WebNavigator fully initialized with all sub-agents.")
120
-
121
- def process_request(self, question: str) -> str:
122
- # First try to interpret as a direct URL
123
- if question.startswith(('http://', 'https://')):
124
- return self.get_page_summary(question)
125
-
126
- # Otherwise treat as search query
127
- return self.search_and_extract(question)
128
-
129
- def search_and_extract(self, query: str) -> str:
130
- try:
131
- search_results = json.loads(self.search_agent(query))
132
- if search_results['status'] != 'success':
133
- return json.dumps({"error": "Search failed", "details": search_results})
134
 
135
- extracted_data = []
136
- for result in search_results['results']:
137
- visit_result = json.loads(self.browser_agent(result['url']))
138
 
139
- if visit_result['status'] == 'success':
140
- content = json.loads(self.extractor_agent(self.browser_agent.current_page['content']))
141
- extracted_data.append({
142
- 'query': query,
143
- 'url': result['url'],
144
- 'content': content
145
- })
146
-
147
- self.search_history.append({
148
- 'query': query,
149
- 'timestamp': datetime.now().isoformat(),
150
- 'results': extracted_data
151
- })
152
 
153
- return json.dumps({
154
- "status": "success",
155
- "query": query,
156
- "results": extracted_data
157
- })
158
- except Exception as e:
159
- return json.dumps({
160
- "status": "error",
161
- "message": str(e)
162
- })
163
-
164
- def get_page_summary(self, url: str) -> str:
165
- try:
166
- visit_result = json.loads(self.browser_agent(url))
167
- if visit_result['status'] != 'success':
168
- return json.dumps(visit_result)
169
 
170
- content = json.loads(self.extractor_agent(self.browser_agent.current_page['content']))
171
- return json.dumps({
172
- "status": "success",
173
- "url": url,
174
- "title": content['title'],
175
- "summary": ' '.join(content['text'].split()[:100]) + '...'
176
- })
177
  except Exception as e:
178
- return json.dumps({
179
- "status": "error",
180
- "message": str(e)
181
- })
182
 
183
-
184
- # After all your class definitions...
185
-
186
- if __name__ == "__main__":
187
- import json
188
- from datetime import datetime
189
- import requests
190
- from bs4 import BeautifulSoup
191
-
192
- # Initialize the navigator
193
- navigator = WebNavigator()
194
-
195
- # Example 1: Perform a search
196
- search_results = navigator("Python web scraping")
197
- print("\nSearch results:")
198
- print(search_results[:500] + "...") # Print first 500 chars
199
-
200
- # Example 2: Visit a URL directly
201
- url_result = navigator("https://example.com")
202
- print("\nURL visit results:")
203
- print(url_result)
204
 
205
 
206
  def run_and_submit_all( profile: gr.OAuthProfile | None):
 
25
  class BasicAgent:
26
  def __init__(self):
27
  print("BasicAgent initialized.")
 
 
 
 
 
 
 
28
 
29
+ def __call__(self, question: str) -> str:
30
+ print(f"Agent received question: {question[:50]}...")
31
+ answer = self.basic_search(question)
32
+ print(f"Answer: {answer}")
33
+ return answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ def basic_search(self, query):
 
 
 
 
 
 
 
 
 
36
  try:
37
+ # Get search results from DuckDuckGo
38
+ results = ddg(query, max_results=3)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
+ # Format the results
41
+ if not results:
42
+ return "No results found"
43
 
44
+ formatted_results = []
45
+ for i, result in enumerate(results, 1):
46
+ formatted_results.append(
47
+ f"{i}. {result['title']}\n"
48
+ f" {result['link']}\n"
49
+ f" {result['body']}"
50
+ )
 
 
 
 
 
 
51
 
52
+ return "\n\n".join(formatted_results)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
 
 
 
 
 
 
 
54
  except Exception as e:
55
+ return f"Search error: {str(e)}"
 
 
 
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
 
59
  def run_and_submit_all( profile: gr.OAuthProfile | None):