Spaces:
Sleeping
Sleeping
Update sherlock2.py
Browse files- sherlock2.py +15 -133
sherlock2.py
CHANGED
@@ -65,134 +65,6 @@ def generate_embeddings_from_documents(extracted_text):
|
|
65 |
st.error(f"Error generating embeddings: {e}")
|
66 |
return embeddings
|
67 |
|
68 |
-
|
69 |
-
# Web scraping and Wikipedia search function
|
70 |
-
def search_and_scrape_wikipedia(keywords, max_topics_per_query=3, react_model='gemini-pro'):
|
71 |
-
"""
|
72 |
-
Searches and scrapes Wikipedia using the ReAct prompting method to find information relevant to the provided keywords.
|
73 |
-
|
74 |
-
Args:
|
75 |
-
keywords (list): A list of keywords to search for on Wikipedia.
|
76 |
-
max_topics_per_query (int, optional): The maximum number of Wikipedia topics to explore for each query. Defaults to 3.
|
77 |
-
react_model (str, optional): The name of the generative model to use with ReAct prompting.
|
78 |
-
Defaults to 'gemini-pro'.
|
79 |
-
Returns:
|
80 |
-
list: A list of dictionaries, where each dictionary represents a relevant piece of information, with keys:
|
81 |
-
- "topic": The Wikipedia topic title.
|
82 |
-
- "summary": A summary of the relevant information extracted from the topic.
|
83 |
-
- "url": The URL of the Wikipedia page.
|
84 |
-
- "additional_sources": (Optional) A list of additional source URLs extracted from citations.
|
85 |
-
"""
|
86 |
-
|
87 |
-
model_instructions = """Solve a question answering task with interleaving Thought, Action, Observation steps. Thought can reason about the current situation, Observation is understanding relevant information from an Action's output and Action can be of three types:
|
88 |
-
(1) , which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search and you can try to search the information from those topics.
|
89 |
-
(2) , which returns the next sentence containing keyword in the current context. This only does exact matches, so keep your searches short.
|
90 |
-
(3) , which returns the answer and finishes the task.
|
91 |
-
"""
|
92 |
-
|
93 |
-
# Define tools for ReAct (search, lookup, finish)
|
94 |
-
class ReAct:
|
95 |
-
def __init__(self, model: str, react_prompt: str):
|
96 |
-
self.model = genai.GenerativeModel(model)
|
97 |
-
self.chat = self.model.start_chat(history=[])
|
98 |
-
self.should_continue_prompting = True
|
99 |
-
self._search_history: list[str] = []
|
100 |
-
self._search_urls: list[str] = []
|
101 |
-
self._prompt = react_prompt
|
102 |
-
|
103 |
-
@classmethod
|
104 |
-
def add_method(cls, func):
|
105 |
-
setattr(cls, func.__name__, func)
|
106 |
-
|
107 |
-
@staticmethod
|
108 |
-
def clean(text: str):
|
109 |
-
text = text.replace("\n", " ")
|
110 |
-
return text
|
111 |
-
|
112 |
-
def search(self, query: str):
|
113 |
-
observation = None
|
114 |
-
query = query.strip()
|
115 |
-
try:
|
116 |
-
observation = wikipedia.summary(query, sentences=4, auto_suggest=False)
|
117 |
-
wiki_url = wikipedia.page(query, auto_suggest=False).url
|
118 |
-
observation = self.clean(observation)
|
119 |
-
self._search_history.append(query)
|
120 |
-
self._search_urls.append(wiki_url)
|
121 |
-
except (DisambiguationError, PageError) as e:
|
122 |
-
observation = f'Could not find ["{query}"]. Similar: {wikipedia.search(query)}.'
|
123 |
-
return observation
|
124 |
-
|
125 |
-
def lookup(self, phrase: str, context_length=200):
|
126 |
-
page = wikipedia.page(self._search_history[-1], auto_suggest=False).content
|
127 |
-
page = self.clean(page)
|
128 |
-
start_index = page.find(phrase)
|
129 |
-
result = page[max(0, start_index - context_length):start_index+len(phrase)+context_length]
|
130 |
-
return result
|
131 |
-
|
132 |
-
def finish(self, _):
|
133 |
-
self.should_continue_prompting = False
|
134 |
-
|
135 |
-
def __call__(self, user_question, max_calls: int=8, **generation_kwargs):
|
136 |
-
if len(self.chat.history) == 0:
|
137 |
-
model_prompt = self._prompt.format(question=user_question)
|
138 |
-
else:
|
139 |
-
model_prompt = user_question
|
140 |
-
|
141 |
-
callable_entities = ['', '', '']
|
142 |
-
generation_kwargs.update({'stop_sequences': callable_entities})
|
143 |
-
|
144 |
-
self.should_continue_prompting = True
|
145 |
-
for idx in range(max_calls):
|
146 |
-
self.response = self.chat.send_message(content=[model_prompt],
|
147 |
-
generation_config=generation_kwargs, stream=False)
|
148 |
-
response_cmd = self.chat.history[-1].parts[-1].text
|
149 |
-
try:
|
150 |
-
cmd = re.findall(r'<(.*)>', response_cmd)[-1]
|
151 |
-
query = response_cmd.split(f'<{cmd}>')[-1].strip()
|
152 |
-
observation = self.__getattribute__(cmd)(query)
|
153 |
-
|
154 |
-
if not self.should_continue_prompting:
|
155 |
-
break
|
156 |
-
|
157 |
-
model_prompt = f"\nObservation {idx + 1}\n{observation}"
|
158 |
-
|
159 |
-
except (IndexError, AttributeError) as e:
|
160 |
-
model_prompt = "Please try to generate thought-action-observation traces."
|
161 |
-
|
162 |
-
# Initialize ReAct with model and instructions
|
163 |
-
react_agent = ReAct(model=react_model, react_prompt=model_instructions)
|
164 |
-
|
165 |
-
search_history = set()
|
166 |
-
wikipedia_info = []
|
167 |
-
|
168 |
-
for query in keywords:
|
169 |
-
# Use ReAct to search and extract information
|
170 |
-
react_agent(query)
|
171 |
-
response_text = react_agent.response.text
|
172 |
-
|
173 |
-
# Process response_text to extract information
|
174 |
-
observations = []
|
175 |
-
for line in response_text.strip().split('\n'):
|
176 |
-
if line.startswith("Observation"):
|
177 |
-
observations.append(line.split(':')[-1].strip())
|
178 |
-
|
179 |
-
# Check if observations list is not empty before accessing elements
|
180 |
-
if observations:
|
181 |
-
summary = observations[-1]
|
182 |
-
url = react_agent._search_urls[-1]
|
183 |
-
|
184 |
-
# Create a dictionary with the extracted information
|
185 |
-
wikipedia_info.append({
|
186 |
-
"topic": query, # Using the original query as the topic
|
187 |
-
"summary": summary,
|
188 |
-
"url": url
|
189 |
-
})
|
190 |
-
else:
|
191 |
-
# Handle the case where no observations were found (optional)
|
192 |
-
print(f"No observations found for query: {query}")
|
193 |
-
|
194 |
-
return wikipedia_info
|
195 |
-
|
196 |
def extract_keywords_simple(extracted_text):
|
197 |
"""Extracts keywords and important information from the given text using Gemini 1.5 Pro."""
|
198 |
prompt = """
|
@@ -299,16 +171,26 @@ def clear_chat():
|
|
299 |
st.session_state.chat_history = []
|
300 |
|
301 |
def show_intro_popup():
|
302 |
-
"""Displays a popup with information about Sherlock and chat instructions
|
303 |
-
|
|
|
304 |
st.write("""
|
305 |
**Meet Sherlock Holmes, the world's most renowned detective!**
|
306 |
|
307 |
This application allows you to:
|
308 |
-
* **Investigate Cases:** Upload case files and images for Sherlock to analyze.
|
309 |
-
* **Chat with Sherlock:** Ask him questions and get his insights.
|
310 |
|
311 |
-
**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
312 |
""")
|
313 |
|
314 |
def investigate():
|
|
|
65 |
st.error(f"Error generating embeddings: {e}")
|
66 |
return embeddings
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
def extract_keywords_simple(extracted_text):
|
69 |
"""Extracts keywords and important information from the given text using Gemini 1.5 Pro."""
|
70 |
prompt = """
|
|
|
171 |
st.session_state.chat_history = []
|
172 |
|
173 |
def show_intro_popup():
|
174 |
+
"""Displays a popup with detailed information about Sherlock and chat instructions,
|
175 |
+
emphasizing the importance of reliable sources and cautioning against misleading online information."""
|
176 |
+
with st.expander("Welcome to AI Detective Sherlock Holmes!"):
|
177 |
st.write("""
|
178 |
**Meet Sherlock Holmes, the world's most renowned detective!**
|
179 |
|
180 |
This application allows you to:
|
|
|
|
|
181 |
|
182 |
+
* **Investigate Cases:** Upload case files and images for Sherlock to analyze. He will use his exceptional deductive reasoning skills to uncover clues and provide insights.
|
183 |
+
* **Chat with Sherlock:** Ask him questions and get his expert opinion on various aspects of the case.
|
184 |
+
|
185 |
+
**To chat with Sherlock, go to the "Chat with Sherlock" page in the sidebar.**
|
186 |
+
|
187 |
+
**Important Note:**
|
188 |
+
|
189 |
+
While the internet can be a valuable resource, it's crucial to be aware that not all information found online is accurate or reliable. To ensure the integrity of our investigation, we prioritize the use of trustworthy sources such as official documents, witness testimonies, and expert analysis.
|
190 |
+
|
191 |
+
Therefore, **we advise against relying solely on internet searches** during the investigation process. Instead, focus on providing Sherlock with concrete evidence and factual information to facilitate his deductions.
|
192 |
+
|
193 |
+
Remember, the truth is often elusive, but with careful observation, logical reasoning, and a discerning eye, we can unravel even the most complex mysteries.
|
194 |
""")
|
195 |
|
196 |
def investigate():
|