Spaces:

jayash391
/

Sherlock

Sleeping

App Files Files Community

jayash391 commited on Apr 30, 2024

Commit

b2b9111

verified ·

1 Parent(s): e626369

Update sherlock2.py

Browse files

Files changed (1) hide show

sherlock2.py +101 -64

sherlock2.py CHANGED Viewed

@@ -3,6 +3,7 @@ import google.ai.generativelanguage as glm
 import streamlit as st
 from bs4 import BeautifulSoup
 import wikipedia
 import os
 import re
 import requests
@@ -83,74 +84,110 @@ def search_and_scrape_wikipedia(keywords, max_topics_per_query=3, react_model='g
         - "additional_sources": (Optional) A list of additional source URLs extracted from citations.
     """
-    # ReAct Prompt Template (similar to the provided example)
-    react_prompt_template = """
-    **Question:** {question}
-    **Thought 1:** I need to search Wikipedia for information related to "{question}".
-    **Action 1:**
-    **Observation 1:** {observation}  # This will be filled in during the process
-    # ... (Further Thought-Action-Observation steps as needed)
-    **Action N:**   # The final answer will be extracted from here
     """
-    search_history = set()  # Keep track of explored topics to avoid redundancy
     wikipedia_info = []
-    react_model = genai.GenerativeModel(react_model)  # Initialize the generative model
     for query in keywords:
-        # Search Wikipedia (modified line)
-        search_terms = wikipedia.search(query, results=max_topics_per_query, suggestion=False, srsearch=query)
-        for search_term in search_terms:
-            if search_term in search_history:
-                continue
-            search_history.add(search_term)
-            try:
-                # Construct the initial ReAct prompt
-                react_prompt = react_prompt_template.format(question=search_term, observation="")
-                # Perform ReAct-based search and extraction
-                while True:
-                    response = react_model.generate_content([react_prompt], stop_sequences=[""])
-                    # Extract action and observation from the response
-                    action, observation = re.findall(r"<(.*?)>(.*?)", response.text)[-1]  # Get the last action and observation
-                    # Update the ReAct prompt with the observation
-                    react_prompt = react_prompt.replace("{observation}", observation.strip(), 1)  # Replace only the first occurrence
-                    if action == "finish":
-                        answer = observation.strip()  # Extract the final answer
-                        break  # Exit the loop when  is encountered
-                # Get Wikipedia page and URL (modified line)
-                page = wikipedia.page(search_term, auto_suggest=False)
-                url = page.url
-                additional_sources = []
-                if response.candidates[0].citation_metadata:
-                    additional_sources = [source.url for source in response.candidates[0].citation_metadata.citation_sources]
-                wikipedia_info.append({
-                    "topic": search_term,
-                    "summary": answer,  # Use the extracted answer as the summary
-                    "url": url,
-                    "additional_sources": additional_sources
-                })
-            except wikipedia.exceptions.DisambiguationError:
-                print(f"Ambiguous results for '{search_term}' (originally for '{query}'), skipping.")
-            except wikipedia.exceptions.PageError:
-                print(f"No Wikipedia page found for '{search_term}', skipping.")
-            except Exception as e:
-                st.error(f"Error searching Wikipedia: {e}")
     return wikipedia_info

 import streamlit as st
 from bs4 import BeautifulSoup
 import wikipedia
+from wikipedia.exceptions import DisambiguationError, PageError
 import os
 import re
 import requests
         - "additional_sources": (Optional) A list of additional source URLs extracted from citations.
     """
+    model_instructions = """Solve a question answering task with interleaving Thought, Action, Observation steps. Thought can reason about the current situation, Observation is understanding relevant information from an Action's output and Action can be of three types:
+    (1) , which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search and you can try to search the information from those topics.
+    (2) , which returns the next sentence containing keyword in the current context. This only does exact matches, so keep your searches short.
+    (3) , which returns the answer and finishes the task.
     """
+    # Define tools for ReAct (search, lookup, finish)
+    class ReAct:
+        def __init__(self, model: str, react_prompt: str):
+            self.model = genai.GenerativeModel(model)
+            self.chat = self.model.start_chat(history=[])
+            self.should_continue_prompting = True
+            self._search_history: list[str] = []
+            self._search_urls: list[str] = []
+            self._prompt = react_prompt
+        @classmethod
+        def add_method(cls, func):
+            setattr(cls, func.__name__, func)
+        @staticmethod
+        def clean(text: str):
+            text = text.replace("\n", " ")
+            return text
+        def search(self, query: str):
+            observation = None
+            query = query.strip()
+            try:
+                observation = wikipedia.summary(query, sentences=4, auto_suggest=False)
+                wiki_url = wikipedia.page(query, auto_suggest=False).url
+                observation = self.clean(observation)
+                self._search_history.append(query)
+                self._search_urls.append(wiki_url)
+            except (DisambiguationError, PageError) as e:
+                observation = f'Could not find ["{query}"]. Similar: {wikipedia.search(query)}.'
+            return observation
+        def lookup(self, phrase: str, context_length=200):
+            page = wikipedia.page(self._search_history[-1], auto_suggest=False).content
+            page = self.clean(page)
+            start_index = page.find(phrase)
+            result = page[max(0, start_index - context_length):start_index+len(phrase)+context_length]
+            return result
+        def finish(self, _):
+            self.should_continue_prompting = False
+        def __call__(self, user_question, max_calls: int=8, **generation_kwargs):
+            if len(self.chat.history) == 0:
+                model_prompt = self._prompt.format(question=user_question)
+            else:
+                model_prompt = user_question
+            callable_entities = ['', '', '']
+            generation_kwargs.update({'stop_sequences': callable_entities})
+            self.should_continue_prompting = True
+            for idx in range(max_calls):
+                self.response = self.chat.send_message(content=[model_prompt],
+                                                       generation_config=generation_kwargs, stream=False)
+                response_cmd = self.chat.history[-1].parts[-1].text
+                try:
+                    cmd = re.findall(r'<(.*)>', response_cmd)[-1]
+                    query = response_cmd.split(f'<{cmd}>')[-1].strip()
+                    observation = self.__getattribute__(cmd)(query)
+                    if not self.should_continue_prompting:
+                        break
+                    model_prompt = f"\nObservation {idx + 1}\n{observation}"
+                except (IndexError, AttributeError) as e:
+                    model_prompt = "Please try to generate thought-action-observation traces."
+    # Initialize ReAct with model and instructions
+    react_agent = ReAct(model=react_model, react_prompt=model_instructions)
+    search_history = set()
     wikipedia_info = []
     for query in keywords:
+        # Use ReAct to search and extract information
+        react_agent(query)
+        response_text = react_agent.response.text
+        # Process response_text to extract information
+        observations = []
+        for line in response_text.strip().split('\n'):
+            if line.startswith("Observation"):
+                observations.append(line.split(':')[-1].strip())
+        # Assuming the last observation is the final answer/summary
+        summary = observations[-1]
+        # Get URL from search history (assuming successful search)
+        url = react_agent._search_urls[-1]
+        # Create a dictionary with the extracted information
+        wikipedia_info.append({
+            "topic": query,  # Using the original query as the topic
+            "summary": summary,
+            "url": url
+            # "additional_sources":  # Add this if you implement additional source extraction
+        })
     return wikipedia_info