wt002 commited on
Commit
a5c16dc
·
verified ·
1 Parent(s): 56615fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -20
app.py CHANGED
@@ -7,10 +7,11 @@ from typing import List, Dict, Union
7
  import pandas as pd
8
  import wikipediaapi
9
  import requests
 
10
  from bs4 import BeautifulSoup
11
- import urllib.parse
12
  import random
13
- from typing import List, Dict
 
14
 
15
  load_dotenv()
16
 
@@ -23,28 +24,93 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
23
 
24
  class BasicAgent:
25
  def __init__(self):
26
- self.headers = {'User-Agent': random.choice([
27
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
28
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15'
29
- ])}
 
 
 
 
 
 
 
 
 
30
 
31
  def __call__(self, query: str) -> str:
32
- """Returns 1-3 word answer when possible"""
33
  try:
34
- url = f"https://www.google.com/search?q={requests.utils.quote(query)}"
35
- html = requests.get(url, headers=self.headers, timeout=20).text
36
- soup = BeautifulSoup(html, 'html.parser')
37
-
38
- # Try to extract short answer
39
- short = (soup.select_one('.LGOjhe, .kno-rdesc span') or
40
- soup.select_one('.hgKElc') or
41
- soup.select_one('.Z0LcW'))
42
-
43
- return short.get_text()[:50].split('.')[0] if short else "No short answer found"
44
-
 
 
 
 
45
  except Exception:
46
- return "Search failed"
47
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
 
50
  def run_and_submit_all( profile: gr.OAuthProfile | None):
 
7
  import pandas as pd
8
  import wikipediaapi
9
  import requests
10
+ import requests
11
  from bs4 import BeautifulSoup
 
12
  import random
13
+ import re
14
+ from typing import Optional
15
 
16
  load_dotenv()
17
 
 
24
 
25
  class BasicAgent:
26
  def __init__(self):
27
+ self.headers = {
28
+ 'User-Agent': random.choice([
29
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
30
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15) AppleWebKit/605.1.15'
31
+ ]),
32
+ 'Accept-Language': 'en-US,en;q=0.9'
33
+ }
34
+ self.answer_patterns = [
35
+ (r'(?:is|are|was|were) (?:about|approximately)? (\d+[\d,\.]+\s*\w+)', 0), # Quantities
36
+ (r'(?:is|are) (.{5,30}?\b(?:ing|tion|ment)\b)', 1), # Definitions
37
+ (r'\b(?:located in|found in|from) (.{10,30})', 1), # Locations
38
+ (r'\b(?:born on|died on) (.{8,15}\d{4})', 1) # Dates
39
+ ]
40
 
41
  def __call__(self, query: str) -> str:
42
+ """Get concise answer with improved accuracy"""
43
  try:
44
+ # Try Google's direct answer boxes first
45
+ direct_answer = self._get_direct_answer(query)
46
+ if direct_answer and len(direct_answer.split()) <= 8:
47
+ return self._clean_answer(direct_answer)
48
+
49
+ # Fallback to featured snippet extraction
50
+ snippet = self._get_featured_snippet(query)
51
+ if snippet:
52
+ best_answer = self._extract_best_fragment(snippet, query)
53
+ if best_answer:
54
+ return best_answer
55
+
56
+ # Final fallback to first result summary
57
+ return self._get_first_result_summary(query) or "No concise answer found"
58
+
59
  except Exception:
60
+ return "Search error"
61
+
62
+ def _get_direct_answer(self, query: str) -> Optional[str]:
63
+ """Extract Google's instant answer"""
64
+ url = f"https://www.google.com/search?q={requests.utils.quote(query)}"
65
+ html = requests.get(url, headers=self.headers, timeout=3).text
66
+ soup = BeautifulSoup(html, 'html.parser')
67
+
68
+ for selector in ['.LGOjhe', '.kno-rdesc span', '.hgKElc', '.Z0LcW']:
69
+ element = soup.select_one(selector)
70
+ if element:
71
+ return element.get_text()
72
+ return None
73
+
74
+ def _get_featured_snippet(self, query: str) -> Optional[str]:
75
+ """Get featured snippet text"""
76
+ url = f"https://www.google.com/search?q={requests.utils.quote(query)}"
77
+ html = requests.get(url, headers=self.headers, timeout=3).text
78
+ soup = BeautifulSoup(html, 'html.parser')
79
+ snippet = soup.select_one('.xpdopen .kno-rdesc span, .ifM9O')
80
+ return snippet.get_text() if snippet else None
81
+
82
+ def _extract_best_fragment(self, text: str, query: str) -> Optional[str]:
83
+ """Extract most relevant sentence fragment"""
84
+ sentences = re.split(r'[\.\!\?]', text)
85
+ query_words = set(query.lower().split())
86
+
87
+ for pattern, group_idx in self.answer_patterns:
88
+ for sentence in sentences:
89
+ match = re.search(pattern, sentence, re.IGNORECASE)
90
+ if match:
91
+ return self._clean_answer(match.group(group_idx))
92
+
93
+ # Fallback to shortest meaningful sentence
94
+ return min([s.strip() for s in sentences if 5 < len(s.split()) < 15],
95
+ key=len, default=None)
96
+
97
+ def _get_first_result_summary(self, query: str) -> Optional[str]:
98
+ """Extract summary from first result"""
99
+ url = f"https://www.google.com/search?q={requests.utils.quote(query)}"
100
+ html = requests.get(url, headers=self.headers, timeout=3).text
101
+ soup = BeautifulSoup(html, 'html.parser')
102
+ first_result = soup.select_one('.tF2Cxc')
103
+ if first_result:
104
+ snippet = first_result.select_one('.IsZvec, .VwiC3b')
105
+ return self._clean_answer(snippet.get_text()) if snippet else None
106
+ return None
107
+
108
+ def _clean_answer(self, text: str) -> str:
109
+ """Clean and shorten the answer"""
110
+ text = re.sub(r'\[\d+\]', '', text) # Remove citations
111
+ text = re.sub(r'\s+', ' ', text).strip()
112
+ return text[:120] # Limit length
113
+
114
 
115
 
116
  def run_and_submit_all( profile: gr.OAuthProfile | None):