Final_Assignment_Project

Building

App Files Files Community

wt002 commited on 2 days ago

Commit

b2a7d74

verified ·

1 Parent(s): e06cf2f

Update app.py

Browse files

Files changed (1) hide show

app.py +117 -92

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ from dotenv import load_dotenv
 import gradio as gr
 import requests
-from typing import List, Dict, Union
 import pandas as pd
 import wikipediaapi
 import requests
@@ -22,104 +22,129 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
-import requests
-from bs4 import BeautifulSoup
-import urllib.parse
-import re
-from typing import Optional
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
-        self.headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
-            'Accept-Language': 'en-US,en;q=0.9'
-        }
-        self.answer_patterns = {
-            'definition': r'(?:is|are|was|were) (?:an?|the)? (.+?)(?:\.|,)',
-            'quantity': r'(?:is|are|was|were) (?:about|approximately)? (\d+[\d,\.]*\s*\w+)',
-            'person': r'(?:by|named) (.+?)(?:\.|,)',
-            'date': r'(?:on|in) (.+? \d{4}|\d{1,2} [A-Za-z]+ \d{4})'
-        }
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
-        # Try Wikipedia first for factual questions
-        if self._is_wikipedia_question(question):
-            answer = self._search_wikipedia(question)
-            if answer and answer != "No answer found":
-                return answer
-        # Fall back to Google search
-        answer = self._search_google(question)
-        print(f"Agent returning answer: {answer[:50]}...")
-        return answer
-    def _is_wikipedia_question(self, question: str) -> bool:
-        """Check if question is suitable for Wikipedia"""
-        question_lower = question.lower()
-        return any(keyword in question_lower
-                  for keyword in ['who', 'what', 'when', 'where', 'why', 'how', 'define'])
-    def _search_wikipedia(self, question: str) -> str:
-        """Search Wikipedia directly for answers"""
-        try:
-            # Extract main topic from question
-            topic = re.sub(r'(who|what|when|where|why|how|is|are|was|were|did|does|do)\s+', '', question, flags=re.IGNORECASE)
-            topic = re.sub(r'\?.*', '', topic).strip()
-            url = f"https://en.wikipedia.org/wiki/{urllib.parse.quote(topic.replace(' ', '_'))}"
-            response = requests.get(url, headers=self.headers, timeout=5)
-            if response.status_code == 200:
-                soup = BeautifulSoup(response.text, 'html.parser')
-                first_paragraph = soup.select_one('div.mw-parser-output > p:not(.mw-empty-elt)')
-                if first_paragraph:
-                    text = first_paragraph.get_text()
-                    # Try to extract most relevant sentence
-                    for pattern_type, pattern in self.answer_patterns.items():
-                        match = re.search(pattern, text, re.IGNORECASE)
-                        if match:
-                            return f"{match.group(1).strip()} (Source: Wikipedia)"
-                    return text.split('.')[0] + " (Source: Wikipedia)"
-            return "No answer found"
-        except Exception:
-            return "No answer found"
-    def _search_google(self, question: str) -> str:
-        """Search Google for answers"""
-        try:
-            url = f"https://www.google.com/search?q={urllib.parse.quote(question)}"
-            response = requests.get(url, headers=self.headers, timeout=5)
-            soup = BeautifulSoup(response.text, 'html.parser')
-            # Check Google's answer boxes
-            for selector in ['.Z0LcW', '.LGOjhe', '.hgKElc', '.kno-rdesc span']:
-                element = soup.select_one(selector)
-                if element:
-                    return element.get_text() + " (Source: Google)"
-            # Try featured snippet
-            snippet = soup.select_one('.xpdopen .kno-rdesc span, .ifM9O')
-            if snippet:
-                return snippet.get_text() + " (Source: Google)"
-            # Fallback to first result summary
-            first_result = soup.select_one('.tF2Cxc')
-            if first_result:
-                summary = first_result.select_one('.IsZvec, .VwiC3b')
-                if summary:
-                    return summary.get_text()[:150] + "... (Source: Google)"
-            return "No concise answer found"
-        except Exception:
-            return "Search failed"

 import gradio as gr
 import requests
+from typing import List, Dict, Union, Optional
 import pandas as pd
 import wikipediaapi
 import requests
 # --- Basic Agent Definition ---
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
+        fixed_answer = self.process_request(question)
+        print(f"Agent returning answer: {fixed_answer}")
+        return fixed_answer
+    def process_request(self, question: str) -> str:
+        return "This is a default answer."
+class SearchAgent(BasicAgent):
+    def __init__(self):
+        super().__init__()
+        print("SearchAgent specialized initialization.")
+    def process_request(self, query: str) -> str:
+        # In a real implementation, this would call a search API
+        mock_results = [
+            {"url": f"https://example.com/result{i}", "title": f"Result {i} for {query[:20]}..."}
+            for i in range(1, 4)
+        ]
+        return str(mock_results)
+class BrowserAgent(BasicAgent):
+    def __init__(self):
+        super().__init__()
+        self.current_page = None
+        self.history = []
+        self.session = requests.Session()
+        self.session.headers.update({'User-Agent': 'WebNavigator/1.0'})
+        print("BrowserAgent initialized with fresh session.")
+    def process_request(self, url: str) -> str:
+        try:
+            response = self.session.get(url)
+            response.raise_for_status()
+            self.current_page = {
+                'url': url,
+                'content': response.text,
+                'timestamp': datetime.now()
+            }
+            self.history.append(self.current_page)
+            return f"Successfully retrieved page: {url}"
+        except Exception as e:
+            return f"Error visiting {url}: {str(e)}"
+class ContentExtractorAgent(BasicAgent):
+    def __init__(self):
+        super().__init__()
+        print("ContentExtractorAgent initialized.")
+    def process_request(self, html: str) -> str:
+        soup = BeautifulSoup(html, 'html.parser')
+        # Remove unwanted elements
+        for element in soup(['script', 'style', 'nav', 'footer']):
+            element.decompose()
+        title = soup.title.string if soup.title else ""
+        main_content = soup.find('main') or soup.find('article') or soup.body
+        extracted = {
+            'title': title,
+            'text': main_content.get_text(separator='\n', strip=True),
+            'links': [a['href'] for a in main_content.find_all('a', href=True)]
+        }
+        return str(extracted)
+class WebNavigator(BasicAgent):
+    def __init__(self):
+        super().__init__()
+        self.search_agent = SearchAgent()
+        self.browser_agent = BrowserAgent()
+        self.extractor_agent = ContentExtractorAgent()
+        self.search_history = []
+        print("WebNavigator fully initialized with all sub-agents.")
+    def process_request(self, question: str) -> str:
+        # First try to interpret as a direct URL
+        if question.startswith(('http://', 'https://')):
+            return self.get_page_summary(question)
+        # Otherwise treat as search query
+        return self.search_and_extract(question)
+    def search_and_extract(self, query: str) -> str:
+        search_results = eval(self.search_agent(query))  # Convert string output back to list
+        extracted_data = []
+        for result in search_results:
+            visit_result = self.browser_agent(result['url'])
+            if "Successfully" in visit_result:
+                html = eval(self.browser_agent.current_page['content'])  # Get stored HTML
+                content = self.extractor_agent(html)
+                extracted_data.append({
+                    'query': query,
+                    'url': result['url'],
+                    'content': eval(content)  # Convert string output back to dict
+                })
+        self.search_history.append({
+            'query': query,
+            'timestamp': datetime.now(),
+            'results': extracted_data
+        })
+        return str(extracted_data)
+    def get_page_summary(self, url: str) -> str:
+        visit_result = self.browser_agent(url)
+        if "Successfully" in visit_result:
+            html = eval(self.browser_agent.current_page['content'])
+            content = eval(self.extractor_agent(html))
+            return str({
+                'url': url,
+                'title': content['title'],
+                'summary': ' '.join(content['text'].split()[:100]) + '...'
+            })
+        return visit_result  # Return the error message