Final_Assignment_Project

Running

App Files Files Community

wt002 commited on 3 days ago

Commit

7bc778b

verified ·

1 Parent(s): 25e901d

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -17

app.py CHANGED Viewed

@@ -6,10 +6,11 @@ import requests
 from typing import List, Dict, Union
 import pandas as pd
 import wikipediaapi
 from bs4 import BeautifulSoup
 import urllib.parse
 from typing import List, Dict
-import fake_useragent  # For realistic user-agent rotation
 load_dotenv()
@@ -22,17 +23,27 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 class BasicAgent:
     def __init__(self):
-        self.user_agent = fake_useragent.UserAgent().random
         self.headers = {
-            'User-Agent': self.user_agent,
             'Accept-Language': 'en-US,en;q=0.5',
         }
-        print("GoogleScraper initialized with User-Agent:", self.user_agent[:50] + "...")
     def search(self, query: str, num_results: int = 3) -> List[Dict]:
         """Perform Google search and return structured results"""
         encoded_query = urllib.parse.quote_plus(query)
-        url = f"https://www.google.com/search?q={encoded_query}&num={num_results + 2}"  # +2 for buffer
         try:
             response = requests.get(url, headers=self.headers, timeout=10)
@@ -47,17 +58,17 @@ class BasicAgent:
         soup = BeautifulSoup(html, 'html.parser')
         results = []
-        # Main result blocks (class names may change - this works as of July 2024)
-        for i, result in enumerate(soup.select('.tF2Cxc')[:max_results]):
-            title = result.select_one('h3')
             link = result.find('a')['href']
-            snippet = result.select_one('.IsZvec')
             if title and link:
                 results.append({
                     'position': i + 1,
                     'title': title.get_text(),
-                    'link': link,
                     'snippet': snippet.get_text() if snippet else None
                 })
@@ -69,26 +80,25 @@ class BasicAgent:
         for res in results:
             output.append(
                 f"{res['position']}. {res['title']}\n"
-                f"   {res['link']}\n"
-                f"   {res['snippet'] or 'No description available'}\n"
             )
         return "\n".join(output)
-# Usage Example
 if __name__ == "__main__":
     scraper = BasicAgent()
-    # Search for Python programming
     query = "Python programming language"
-    print(f"Searching Google for: '{query}'")
-    results = scraper.search(query)
     if results:
         print("\nTop Results:")
         print(scraper.pretty_print(results))
     else:
-        print("No results found or search failed")
 def run_and_submit_all( profile: gr.OAuthProfile | None):

 from typing import List, Dict, Union
 import pandas as pd
 import wikipediaapi
+import requests
 from bs4 import BeautifulSoup
 import urllib.parse
+import random
 from typing import List, Dict
 load_dotenv()
 class BasicAgent:
     def __init__(self):
         self.headers = {
+            'User-Agent': self._get_random_user_agent(),
             'Accept-Language': 'en-US,en;q=0.5',
         }
+    def _get_random_user_agent(self) -> str:
+        """Fallback user-agent generator if fake-useragent isn't installed"""
+        browsers = [
+            # Chrome
+            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+            # Firefox
+            'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
+            # Safari
+            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15'
+        ]
+        return random.choice(browsers)
     def search(self, query: str, num_results: int = 3) -> List[Dict]:
         """Perform Google search and return structured results"""
         encoded_query = urllib.parse.quote_plus(query)
+        url = f"https://www.google.com/search?q={encoded_query}&num={num_results + 2}"
         try:
             response = requests.get(url, headers=self.headers, timeout=10)
         soup = BeautifulSoup(html, 'html.parser')
         results = []
+        # Current Google result selectors (July 2024)
+        for i, result in enumerate(soup.select('.tF2Cxc, .g')[:max_results]):
+            title = result.select_one('h3, .LC20lb')
             link = result.find('a')['href']
+            snippet = result.select_one('.IsZvec, .VwiC3b')
             if title and link:
                 results.append({
                     'position': i + 1,
                     'title': title.get_text(),
+                    'link': link if link.startswith('http') else f"https://www.google.com{link}",
                     'snippet': snippet.get_text() if snippet else None
                 })
         for res in results:
             output.append(
                 f"{res['position']}. {res['title']}\n"
+                f"   🔗 {res['link']}\n"
+                f"   📝 {res['snippet'] or 'No description available'}\n"
             )
         return "\n".join(output)
 if __name__ == "__main__":
     scraper = BasicAgent()
+    # Example search
     query = "Python programming language"
+    print(f"🔍 Searching Google for: '{query}'")
+    results = scraper.search(query, num_results=3)
     if results:
         print("\nTop Results:")
         print(scraper.pretty_print(results))
     else:
+        print("❌ No results found or search failed")
 def run_and_submit_all( profile: gr.OAuthProfile | None):