Final_Assignment_Project

Running

App Files Files Community

wt002 commited on 4 days ago

Commit

cda9f5c

verified ·

1 Parent(s): 465be66

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -219

app.py CHANGED Viewed

@@ -6,11 +6,11 @@ import requests
 from typing import List, Dict, Union
 import requests
 import wikipediaapi
-import pandas as pd
 import requests
-from bs4 import BeautifulSoup
-import re
-from urllib.parse import quote
 load_dotenv()
@@ -18,242 +18,125 @@ load_dotenv()
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 class BasicAgent:
-    def __init__(self):
         print("BasicAgent initialized.")
-        self.session = requests.Session()
-        self.session.headers.update({
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
-        })
-        self.cache = {}
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = self.agent.answer_question({question})
-        print(f"Agent returning fixed answer: {fixed_answer}")
         return fixed_answer
-    def analyze_query(self, query):
-        """Analyze the query using regex patterns"""
-        return {
-            'entities': self._extract_entities(query),
-            'intent': self._determine_intent(query.lower()),
-            'time_constraints': self._extract_time_constraints(query),
-            'quantities': self._extract_quantities(query)
-        }
-    def _extract_entities(self, text):
-        """Simple entity extraction using capitalization patterns"""
-        # Find proper nouns (capitalized phrases)
-        entities = re.findall(r'([A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+)*)', text)
-        # Filter out small words and standalone letters
-        return [(ent, 'UNKNOWN') for ent in entities if len(ent) > 2 and ' ' in ent]
-    def _determine_intent(self, query):
-        """Determine intent using keyword patterns"""
-        if 'how many' in query:
-            return 'count'
-        elif 'when' in query or 'date' in query:
-            return 'date'
-        elif 'who' in query:
-            return 'person'
-        elif 'what is' in query or 'define' in query:
-            return 'definition'
-        elif 'list' in query or 'name all' in query:
-            return 'list'
-        return 'general'
-    def _extract_time_constraints(self, text):
-        """Extract year ranges from text"""
-        constraints = []
-        # Match patterns like "between 2000 and 2009"
-        range_match = re.search(r'between (\d{4}) and (\d{4})', text)
-        if range_match:
-            constraints.append(('range', int(range_match.group(1)), int(range_match.group(2))))
-        # Match patterns like "in 2005"
-        year_match = re.search(r'in (\d{4})', text)
-        if year_match:
-            constraints.append(('point', int(year_match.group(1))))
-        return constraints
-    def _extract_quantities(self, text):
-        """Extract numbers from text"""
-        return [int(match) for match in re.findall(r'\b(\d+)\b', text)]
-    def search_wikipedia(self, query, num_results=3):
-        """Search Wikipedia's API"""
-        url = "https://en.wikipedia.org/w/api.php"
         params = {
-            'action': 'query',
-            'list': 'search',
-            'srsearch': query,
-            'format': 'json',
-            'srlimit': num_results
         }
         try:
-            response = self.session.get(url, params=params).json()
-            return [{
-                'url': f"https://en.wikipedia.org/wiki/{item['title'].replace(' ', '_')}",
-                'title': item['title'],
-                'snippet': item['snippet'],
-                'source': 'wikipedia'
-            } for item in response['query']['search']]
-        except Exception as e:
-            print(f"Wikipedia search error: {e}")
             return []
-    def fetch_page(self, url):
-        """Fetch and parse a Wikipedia page"""
-        if url in self.cache:
-            return self.cache[url]
         try:
-            response = self.session.get(url, timeout=10)
-            soup = BeautifulSoup(response.text, 'html.parser')
-            # Clean the page content
-            for element in soup(['script', 'style', 'nav', 'footer', 'table']):
-                element.decompose()
-            page_data = {
-                'url': url,
-                'title': soup.title.string if soup.title else '',
-                'text': ' '.join(soup.stripped_strings),
-                'soup': soup
-            }
-            self.cache[url] = page_data
-            return page_data
         except Exception as e:
-            print(f"Error fetching {url}: {e}")
-            return None
-    def answer_question(self, question):
-        """Answer a question using Wikipedia"""
-        print(f"\nQuestion: {question}")
-        # Step 1: Analyze the question
-        analysis = self.analyze_query(question)
-        print(f"Analysis: {analysis}")
-        # Step 2: Search Wikipedia
-        search_results = self.search_wikipedia(question)
-        if not search_results:
-            return {"answer": "No Wikipedia results found", "source": None}
-        # Step 3: Fetch and analyze pages
-        answers = []
-        for result in search_results:
-            page = self.fetch_page(result['url'])
-            if page:
-                answer = self._extract_answer(page, analysis)
-                if answer:
-                    answers.append({
-                        'answer': answer,
-                        'source': result['url'],
-                        'confidence': self._calculate_confidence(answer, analysis)
-                    })
-        # Step 4: Return the best answer
-        if not answers:
-            return {"answer": "No answers found in Wikipedia", "source": None}
-        answers.sort(key=lambda x: x['confidence'], reverse=True)
-        best_answer = answers[0]
-        # Format the output
-        result = {
-            "question": question,
-            "answer": best_answer['answer'],
-            "source": best_answer['source'],
-            "confidence": f"{best_answer['confidence']:.0%}"
-        }
-        if isinstance(best_answer['answer'], list):
-            result['answer'] = "\n- " + "\n- ".join(best_answer['answer'])
-        return result
-    def _extract_answer(self, page, analysis):
-        """Extract answer based on intent"""
-        if analysis['intent'] == 'count':
-            return self._extract_count(page['text'], analysis)
-        elif analysis['intent'] == 'date':
-            return self._extract_date(page['text'], analysis)
-        elif analysis['intent'] == 'list':
-            return self._extract_list(page['soup'], analysis)
-        else:
-            return self._extract_general(page['text'], analysis)
-    def _extract_count(self, text, analysis):
-        """Extract a count/number from text"""
-        entities = [e[0] for e in analysis['entities']]
-        pattern = r'(\b\d+\b)[^\.]*\b(' + '|'.join(re.escape(e) for e in entities) + r')\b'
-        matches = re.finditer(pattern, text, re.IGNORECASE)
-        counts = [int(match.group(1)) for match in matches]
-        return max(counts) if counts else None
-    def _extract_date(self, text, analysis):
-        """Extract dates from text"""
-        date_pattern = r'\b(\d{1,2}(?:st|nd|rd|th)?\s+(?:\w+)\s+\d{4}|\d{4})\b'
-        dates = [match.group(0) for match in re.finditer(date_pattern, text)]
-        entities = [e[0] for e in analysis['entities']]
-        return next((d for d in dates if any(e.lower() in text.lower() for e in entities)), None)
-    def _extract_list(self, soup, analysis):
-        """Extract list items from page"""
-        entities = [e[0] for e in analysis['entities']]
-        items = []
-        for list_tag in soup.find_all(['ul', 'ol']):
-            list_items = [li.get_text().strip() for li in list_tag.find_all('li')]
-            if any(e.lower() in ' '.join(list_items).lower() for e in entities):
-                items.extend(list_items)
-        return items if items else None
-    def _extract_general(self, text, analysis):
-        """Extract general information from text"""
-        entities = [e[0] for e in analysis['entities']]
-        sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
-        relevant = [s for s in sentences if any(e.lower() in s.lower() for e in entities)]
-        return ' '.join(relevant) if relevant else None
-    def _calculate_confidence(self, answer, analysis):
-        """Calculate confidence score for an answer"""
-        confidence = 0.5  # Base confidence
-        if analysis['intent'] == 'count' and isinstance(answer, int):
-            confidence += 0.3
-        elif analysis['intent'] == 'date' and re.match(r'.*\d{4}.*', str(answer)):
-            confidence += 0.3
-        elif analysis['intent'] == 'list' and isinstance(answer, list):
-            confidence += 0.3
-        if analysis['time_constraints'] and str(answer):
-            for constraint in analysis['time_constraints']:
-                if constraint[0] == 'range':
-                    years = re.findall(r'\b(19|20)\d{2}\b', str(answer))
-                    if any(constraint[1] <= int(y) <= constraint[2] for y in years):
-                        confidence += 0.2
-        return min(0.99, max(0.1, confidence))
-if __name__ == "__main__":
-    agent = BasicAgent()
-    questions = [
-        "How many studio albums did Taylor Swift release between 2010 and 2015?",
-        "When was the first iPhone released?",
-        "What is the capital of Canada?",
-        "List the planets in our solar system"
-    ]
-    for question in questions:
-        result = agent.answer_question(question)
-        print(f"\nAnswer: {result['answer']}")
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """

 from typing import List, Dict, Union
 import requests
 import wikipediaapi
+import google.generativeai as genai
+from typing import List, Dict, Union
 import requests
+import wikipediaapi
+import pandas as pd
 load_dotenv()
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 class BasicAgent:
+    def __init__(self, model="google/gemma-7b"):
+        self.api_url = f"https://api-inference.huggingface.co/models/{model}"
+        self.headers = {"Authorization": f"Bearer {os.getenv('HF_API_KEY')}"}
         print("BasicAgent initialized.")
+        #usage
+        #agent = HuggingFaceAgent("google/gemma-7b")  # Same architecture as Gemini
+        #print(agent.generate("Explain quantum computing"))
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
+        fixed_answer = self.agent.generate(question)
+        print(f"Agent returning answer: {fixed_answer}")
         return fixed_answer
+     # to check
+    def generate_response(self, prompt: str) -> str:
+        """Get response from Gema"""
+        try:
+            response = self.model.generate_content(prompt)
+            return response.text
+        except Exception as e:
+            return f"Error generating response: {str(e)}"
+    def web_search(self, query: str) -> List[Dict]:
+        """Use SearxNG meta-search engine"""
         params = {
+            "q": query,
+            "format": "json",
+            "engines": "google,bing,duckduckgo"
         }
         try:
+            response = requests.get(self.searx_url, params=params)
+            response.raise_for_status()
+            return response.json().get("results", [])
+        except requests.RequestException:
             return []
+    def wikipedia_search(self, query: str) -> str:
+        """Get Wikipedia summary"""
+        page = self.wiki.page(query)
+        return page.summary if page.exists() else "No Wikipedia page found"
+    def process_document(self, file_path: str) -> str:
+        """Handle PDF, Word, CSV, Excel files"""
+        if not os.path.exists(file_path):
+            return "File not found"
+        ext = os.path.splitext(file_path)[1].lower()
         try:
+            if ext == '.pdf':
+                return self._process_pdf(file_path)
+            elif ext in ('.doc', '.docx'):
+                return self._process_word(file_path)
+            elif ext == '.csv':
+                return pd.read_csv(file_path).to_string()
+            elif ext in ('.xls', '.xlsx'):
+                return pd.read_excel(file_path).to_string()
+            else:
+                return "Unsupported file format"
         except Exception as e:
+            return f"Error processing document: {str(e)}"
+    def _process_pdf(self, file_path: str) -> str:
+        """Process PDF using Gemini's vision capability"""
+        try:
+            # For Gemini 1.5 or later which supports file uploads
+            with open(file_path, "rb") as f:
+                file = genai.upload_file(f)
+                response = self.model.generate_content(
+                    ["Extract and summarize the key points from this document:", file]
+                )
+                return response.text
+        except:
+            # Fallback for older Gemini versions
+            try:
+                import PyPDF2
+                with open(file_path, 'rb') as f:
+                    reader = PyPDF2.PdfReader(f)
+                    return "\n".join([page.extract_text() for page in reader.pages])
+            except ImportError:
+                return "PDF processing requires PyPDF2 (pip install PyPDF2)"
+    def _process_word(self, file_path: str) -> str:
+        """Process Word documents"""
+        try:
+            from docx import Document
+            doc = Document(file_path)
+            return "\n".join([para.text for para in doc.paragraphs])
+        except ImportError:
+            return "Word processing requires python-docx (pip install python-docx)"
+    def process_request(self, request: Union[str, Dict]) -> str:
+        """
+        Handle different request types:
+        - Direct text queries
+        - File processing requests
+        - Complex multi-step requests
+        """
+        if isinstance(request, dict):
+            if 'steps' in request:
+                results = []
+                for step in request['steps']:
+                    if step['type'] == 'search':
+                        results.append(self.web_search(step['query']))
+                    elif step['type'] == 'process':
+                        results.append(self.process_document(step['file']))
+                return self.generate_response(f"Process these results: {results}")
+            return "Unsupported request format"
+        return self.generate_response(request)
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """