Final_Assignment_Project

Running

App Files Files Community

wt002 commited on 3 days ago

Commit

a5c16dc

verified ·

1 Parent(s): 56615fd

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -20

app.py CHANGED Viewed

@@ -7,10 +7,11 @@ from typing import List, Dict, Union
 import pandas as pd
 import wikipediaapi
 import requests
 from bs4 import BeautifulSoup
-import urllib.parse
 import random
-from typing import List, Dict
 load_dotenv()
@@ -23,28 +24,93 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 class BasicAgent:
     def __init__(self):
-        self.headers = {'User-Agent': random.choice([
-            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
-            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15'
-        ])}
     def __call__(self, query: str) -> str:
-        """Returns 1-3 word answer when possible"""
         try:
-            url = f"https://www.google.com/search?q={requests.utils.quote(query)}"
-            html = requests.get(url, headers=self.headers, timeout=20).text
-            soup = BeautifulSoup(html, 'html.parser')
-            # Try to extract short answer
-            short = (soup.select_one('.LGOjhe, .kno-rdesc span') or
-                    soup.select_one('.hgKElc') or
-                    soup.select_one('.Z0LcW'))
-            return short.get_text()[:50].split('.')[0] if short else "No short answer found"
         except Exception:
-            return "Search failed"
 def run_and_submit_all( profile: gr.OAuthProfile | None):

 import pandas as pd
 import wikipediaapi
 import requests
+import requests
 from bs4 import BeautifulSoup
 import random
+import re
+from typing import Optional
 load_dotenv()
 class BasicAgent:
     def __init__(self):
+        self.headers = {
+            'User-Agent': random.choice([
+                'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
+                'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15) AppleWebKit/605.1.15'
+            ]),
+            'Accept-Language': 'en-US,en;q=0.9'
+        }
+        self.answer_patterns = [
+            (r'(?:is|are|was|were) (?:about|approximately)? (\d+[\d,\.]+\s*\w+)', 0),  # Quantities
+            (r'(?:is|are) (.{5,30}?\b(?:ing|tion|ment)\b)', 1),  # Definitions
+            (r'\b(?:located in|found in|from) (.{10,30})', 1),  # Locations
+            (r'\b(?:born on|died on) (.{8,15}\d{4})', 1)  # Dates
+        ]
     def __call__(self, query: str) -> str:
+        """Get concise answer with improved accuracy"""
         try:
+            # Try Google's direct answer boxes first
+            direct_answer = self._get_direct_answer(query)
+            if direct_answer and len(direct_answer.split()) <= 8:
+                return self._clean_answer(direct_answer)
+            # Fallback to featured snippet extraction
+            snippet = self._get_featured_snippet(query)
+            if snippet:
+                best_answer = self._extract_best_fragment(snippet, query)
+                if best_answer:
+                    return best_answer
+            # Final fallback to first result summary
+            return self._get_first_result_summary(query) or "No concise answer found"
         except Exception:
+            return "Search error"
+    def _get_direct_answer(self, query: str) -> Optional[str]:
+        """Extract Google's instant answer"""
+        url = f"https://www.google.com/search?q={requests.utils.quote(query)}"
+        html = requests.get(url, headers=self.headers, timeout=3).text
+        soup = BeautifulSoup(html, 'html.parser')
+        for selector in ['.LGOjhe', '.kno-rdesc span', '.hgKElc', '.Z0LcW']:
+            element = soup.select_one(selector)
+            if element:
+                return element.get_text()
+        return None
+    def _get_featured_snippet(self, query: str) -> Optional[str]:
+        """Get featured snippet text"""
+        url = f"https://www.google.com/search?q={requests.utils.quote(query)}"
+        html = requests.get(url, headers=self.headers, timeout=3).text
+        soup = BeautifulSoup(html, 'html.parser')
+        snippet = soup.select_one('.xpdopen .kno-rdesc span, .ifM9O')
+        return snippet.get_text() if snippet else None
+    def _extract_best_fragment(self, text: str, query: str) -> Optional[str]:
+        """Extract most relevant sentence fragment"""
+        sentences = re.split(r'[\.\!\?]', text)
+        query_words = set(query.lower().split())
+        for pattern, group_idx in self.answer_patterns:
+            for sentence in sentences:
+                match = re.search(pattern, sentence, re.IGNORECASE)
+                if match:
+                    return self._clean_answer(match.group(group_idx))
+        # Fallback to shortest meaningful sentence
+        return min([s.strip() for s in sentences if 5 < len(s.split()) < 15],
+                  key=len, default=None)
+    def _get_first_result_summary(self, query: str) -> Optional[str]:
+        """Extract summary from first result"""
+        url = f"https://www.google.com/search?q={requests.utils.quote(query)}"
+        html = requests.get(url, headers=self.headers, timeout=3).text
+        soup = BeautifulSoup(html, 'html.parser')
+        first_result = soup.select_one('.tF2Cxc')
+        if first_result:
+            snippet = first_result.select_one('.IsZvec, .VwiC3b')
+            return self._clean_answer(snippet.get_text()) if snippet else None
+        return None
+    def _clean_answer(self, text: str) -> str:
+        """Clean and shorten the answer"""
+        text = re.sub(r'\[\d+\]', '', text)  # Remove citations
+        text = re.sub(r'\s+', ' ', text).strip()
+        return text[:120]  # Limit length
 def run_and_submit_all( profile: gr.OAuthProfile | None):