jayash391 commited on
Commit
cadb43e
·
1 Parent(s): be72f3a

Update sherlock2.py

Browse files
Files changed (1) hide show
  1. sherlock2.py +15 -133
sherlock2.py CHANGED
@@ -65,134 +65,6 @@ def generate_embeddings_from_documents(extracted_text):
65
  st.error(f"Error generating embeddings: {e}")
66
  return embeddings
67
 
68
-
69
- # Web scraping and Wikipedia search function
70
- def search_and_scrape_wikipedia(keywords, max_topics_per_query=3, react_model='gemini-pro'):
71
- """
72
- Searches and scrapes Wikipedia using the ReAct prompting method to find information relevant to the provided keywords.
73
-
74
- Args:
75
- keywords (list): A list of keywords to search for on Wikipedia.
76
- max_topics_per_query (int, optional): The maximum number of Wikipedia topics to explore for each query. Defaults to 3.
77
- react_model (str, optional): The name of the generative model to use with ReAct prompting.
78
- Defaults to 'gemini-pro'.
79
- Returns:
80
- list: A list of dictionaries, where each dictionary represents a relevant piece of information, with keys:
81
- - "topic": The Wikipedia topic title.
82
- - "summary": A summary of the relevant information extracted from the topic.
83
- - "url": The URL of the Wikipedia page.
84
- - "additional_sources": (Optional) A list of additional source URLs extracted from citations.
85
- """
86
-
87
- model_instructions = """Solve a question answering task with interleaving Thought, Action, Observation steps. Thought can reason about the current situation, Observation is understanding relevant information from an Action's output and Action can be of three types:
88
- (1) , which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search and you can try to search the information from those topics.
89
- (2) , which returns the next sentence containing keyword in the current context. This only does exact matches, so keep your searches short.
90
- (3) , which returns the answer and finishes the task.
91
- """
92
-
93
- # Define tools for ReAct (search, lookup, finish)
94
- class ReAct:
95
- def __init__(self, model: str, react_prompt: str):
96
- self.model = genai.GenerativeModel(model)
97
- self.chat = self.model.start_chat(history=[])
98
- self.should_continue_prompting = True
99
- self._search_history: list[str] = []
100
- self._search_urls: list[str] = []
101
- self._prompt = react_prompt
102
-
103
- @classmethod
104
- def add_method(cls, func):
105
- setattr(cls, func.__name__, func)
106
-
107
- @staticmethod
108
- def clean(text: str):
109
- text = text.replace("\n", " ")
110
- return text
111
-
112
- def search(self, query: str):
113
- observation = None
114
- query = query.strip()
115
- try:
116
- observation = wikipedia.summary(query, sentences=4, auto_suggest=False)
117
- wiki_url = wikipedia.page(query, auto_suggest=False).url
118
- observation = self.clean(observation)
119
- self._search_history.append(query)
120
- self._search_urls.append(wiki_url)
121
- except (DisambiguationError, PageError) as e:
122
- observation = f'Could not find ["{query}"]. Similar: {wikipedia.search(query)}.'
123
- return observation
124
-
125
- def lookup(self, phrase: str, context_length=200):
126
- page = wikipedia.page(self._search_history[-1], auto_suggest=False).content
127
- page = self.clean(page)
128
- start_index = page.find(phrase)
129
- result = page[max(0, start_index - context_length):start_index+len(phrase)+context_length]
130
- return result
131
-
132
- def finish(self, _):
133
- self.should_continue_prompting = False
134
-
135
- def __call__(self, user_question, max_calls: int=8, **generation_kwargs):
136
- if len(self.chat.history) == 0:
137
- model_prompt = self._prompt.format(question=user_question)
138
- else:
139
- model_prompt = user_question
140
-
141
- callable_entities = ['', '', '']
142
- generation_kwargs.update({'stop_sequences': callable_entities})
143
-
144
- self.should_continue_prompting = True
145
- for idx in range(max_calls):
146
- self.response = self.chat.send_message(content=[model_prompt],
147
- generation_config=generation_kwargs, stream=False)
148
- response_cmd = self.chat.history[-1].parts[-1].text
149
- try:
150
- cmd = re.findall(r'<(.*)>', response_cmd)[-1]
151
- query = response_cmd.split(f'<{cmd}>')[-1].strip()
152
- observation = self.__getattribute__(cmd)(query)
153
-
154
- if not self.should_continue_prompting:
155
- break
156
-
157
- model_prompt = f"\nObservation {idx + 1}\n{observation}"
158
-
159
- except (IndexError, AttributeError) as e:
160
- model_prompt = "Please try to generate thought-action-observation traces."
161
-
162
- # Initialize ReAct with model and instructions
163
- react_agent = ReAct(model=react_model, react_prompt=model_instructions)
164
-
165
- search_history = set()
166
- wikipedia_info = []
167
-
168
- for query in keywords:
169
- # Use ReAct to search and extract information
170
- react_agent(query)
171
- response_text = react_agent.response.text
172
-
173
- # Process response_text to extract information
174
- observations = []
175
- for line in response_text.strip().split('\n'):
176
- if line.startswith("Observation"):
177
- observations.append(line.split(':')[-1].strip())
178
-
179
- # Check if observations list is not empty before accessing elements
180
- if observations:
181
- summary = observations[-1]
182
- url = react_agent._search_urls[-1]
183
-
184
- # Create a dictionary with the extracted information
185
- wikipedia_info.append({
186
- "topic": query, # Using the original query as the topic
187
- "summary": summary,
188
- "url": url
189
- })
190
- else:
191
- # Handle the case where no observations were found (optional)
192
- print(f"No observations found for query: {query}")
193
-
194
- return wikipedia_info
195
-
196
  def extract_keywords_simple(extracted_text):
197
  """Extracts keywords and important information from the given text using Gemini 1.5 Pro."""
198
  prompt = """
@@ -299,16 +171,26 @@ def clear_chat():
299
  st.session_state.chat_history = []
300
 
301
  def show_intro_popup():
302
- """Displays a popup with information about Sherlock and chat instructions."""
303
- with st.expander("Welcome to AI Detective Sherlock Holmes!"): # Use expander for a collapsible popup
 
304
  st.write("""
305
  **Meet Sherlock Holmes, the world's most renowned detective!**
306
 
307
  This application allows you to:
308
- * **Investigate Cases:** Upload case files and images for Sherlock to analyze.
309
- * **Chat with Sherlock:** Ask him questions and get his insights.
310
 
311
- **To chat with Sherlock, go to the "Chat with Sherlock" page in the sidebar.**
 
 
 
 
 
 
 
 
 
 
 
312
  """)
313
 
314
  def investigate():
 
65
  st.error(f"Error generating embeddings: {e}")
66
  return embeddings
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  def extract_keywords_simple(extracted_text):
69
  """Extracts keywords and important information from the given text using Gemini 1.5 Pro."""
70
  prompt = """
 
171
  st.session_state.chat_history = []
172
 
173
  def show_intro_popup():
174
+ """Displays a popup with detailed information about Sherlock and chat instructions,
175
+ emphasizing the importance of reliable sources and cautioning against misleading online information."""
176
+ with st.expander("Welcome to AI Detective Sherlock Holmes!"):
177
  st.write("""
178
  **Meet Sherlock Holmes, the world's most renowned detective!**
179
 
180
  This application allows you to:
 
 
181
 
182
+ * **Investigate Cases:** Upload case files and images for Sherlock to analyze. He will use his exceptional deductive reasoning skills to uncover clues and provide insights.
183
+ * **Chat with Sherlock:** Ask him questions and get his expert opinion on various aspects of the case.
184
+
185
+ **To chat with Sherlock, go to the "Chat with Sherlock" page in the sidebar.**
186
+
187
+ **Important Note:**
188
+
189
+ While the internet can be a valuable resource, it's crucial to be aware that not all information found online is accurate or reliable. To ensure the integrity of our investigation, we prioritize the use of trustworthy sources such as official documents, witness testimonies, and expert analysis.
190
+
191
+ Therefore, **we advise against relying solely on internet searches** during the investigation process. Instead, focus on providing Sherlock with concrete evidence and factual information to facilitate his deductions.
192
+
193
+ Remember, the truth is often elusive, but with careful observation, logical reasoning, and a discerning eye, we can unravel even the most complex mysteries.
194
  """)
195
 
196
  def investigate():