Spaces:
Sleeping
Sleeping
Update sherlock2.py
Browse files- sherlock2.py +21 -37
sherlock2.py
CHANGED
@@ -21,7 +21,7 @@ genai.configure(api_key=os.getenv("GEMINI_API_KEY_PROJECTID"))
|
|
21 |
|
22 |
# Load pre-trained Gemini model
|
23 |
model = genai.GenerativeModel('models/gemini-1.5-pro-latest')
|
24 |
-
vision_model = genai.GenerativeModel('models/gemini-pro-vision')
|
25 |
|
26 |
# Define Sherlock Holmes's persona and guidelines
|
27 |
sherlock_persona = """
|
@@ -50,21 +50,6 @@ Let your every utterance and action be a masterpiece of deductive prowess, a sym
|
|
50 |
# Generate embeddings using the Gemini Embedding API
|
51 |
embed_model = 'models/embedding-001'
|
52 |
|
53 |
-
# Function for embedding generation (using models/embedding-001)
|
54 |
-
def generate_embeddings_from_documents(extracted_text):
|
55 |
-
"""Generates embeddings for a list of extracted text documents using the 'models/embedding-001' model
|
56 |
-
and the appropriate task type."""
|
57 |
-
embeddings = []
|
58 |
-
for text in extracted_text:
|
59 |
-
try:
|
60 |
-
# Determine the appropriate task type (e.g., "RETRIEVAL_DOCUMENT" for search/similarity)
|
61 |
-
task_type = "RETRIEVAL_DOCUMENT"
|
62 |
-
response = genai.embed_content(model=embed_model, content=text, task_type=task_type)
|
63 |
-
embeddings.append(response["embedding"])
|
64 |
-
except Exception as e:
|
65 |
-
st.error(f"Error generating embeddings: {e}")
|
66 |
-
return embeddings
|
67 |
-
|
68 |
def extract_keywords_simple(extracted_text):
|
69 |
"""Extracts keywords and important information from the given text using Gemini 1.5 Pro."""
|
70 |
prompt = """
|
@@ -77,15 +62,16 @@ def extract_keywords_simple(extracted_text):
|
|
77 |
return keywords
|
78 |
|
79 |
# Function to extract text from various file types
|
80 |
-
def
|
81 |
-
"""Extracts text content
|
82 |
-
|
83 |
for uploaded_file in uploaded_files:
|
84 |
file_type = uploaded_file.type
|
85 |
if file_type == "text/plain":
|
86 |
# Plain Text File
|
87 |
raw_text = str(uploaded_file.read(), "utf-8")
|
88 |
-
|
|
|
89 |
elif file_type == "application/pdf":
|
90 |
# PDF Document
|
91 |
pdf_reader = PyPDF2.PdfReader(uploaded_file)
|
@@ -93,19 +79,21 @@ def extract_text_from_files(uploaded_files):
|
|
93 |
for page_num in range(len(pdf_reader.pages)):
|
94 |
page = pdf_reader.pages[page_num]
|
95 |
text += page.extract_text()
|
96 |
-
|
|
|
97 |
else:
|
98 |
# Other Document Types (Using Textract)
|
99 |
try:
|
100 |
text = textract.process(uploaded_file).decode("utf-8")
|
101 |
-
|
|
|
102 |
except Exception as e:
|
103 |
st.error(f"Error extracting text from file: {e}")
|
104 |
-
return
|
105 |
|
106 |
-
# Function to process images using Gemini Pro Vision
|
107 |
def process_images(uploaded_images):
|
108 |
-
"""Processes a list of uploaded images using Gemini Pro Vision to extract relevant information."""
|
109 |
image_insights = []
|
110 |
for uploaded_image in uploaded_images:
|
111 |
try:
|
@@ -171,7 +159,7 @@ def clear_chat():
|
|
171 |
st.session_state.chat_history = []
|
172 |
|
173 |
def investigate():
|
174 |
-
"""Handles the case investigation process with
|
175 |
st.subheader("Case Investigation")
|
176 |
|
177 |
# File upload with clear labels and progress bars
|
@@ -189,24 +177,20 @@ def investigate():
|
|
189 |
|
190 |
# Extract text and process images with progress indication
|
191 |
with st.spinner("Extracting text and analyzing images..."):
|
192 |
-
|
193 |
-
keywords = extract_keywords_simple("\n\n".join(
|
194 |
-
case_embeddings = generate_embeddings_from_documents(case_text)
|
195 |
image_insights = process_images(images)
|
196 |
|
197 |
combined_information = {
|
198 |
-
"
|
199 |
"image_insights": image_insights,
|
200 |
"keywords": keywords
|
201 |
}
|
202 |
|
203 |
-
# Convert case_embeddings to a string
|
204 |
-
case_embeddings_str = " ".join(str(embedding) for embedding in case_embeddings)
|
205 |
-
|
206 |
prompt = """
|
207 |
You are Sherlock Holmes, the renowned detective. Analyze the following case information and provide insights or
|
208 |
suggestions for further investigation:
|
209 |
-
""" + str(combined_information)
|
210 |
|
211 |
response = model.generate_content([sherlock_persona, sherlock_guidelines, prompt])
|
212 |
|
@@ -214,12 +198,12 @@ def investigate():
|
|
214 |
with st.expander("Sherlock's Analysis and Suggestions:"):
|
215 |
st.write(response.text)
|
216 |
|
217 |
-
web_search_results = []
|
218 |
|
219 |
search_options = st.multiselect("Search for additional clues:", ["Internet"], default=["Internet"])
|
220 |
if st.button("Search"):
|
221 |
with st.spinner("Searching for clues..."):
|
222 |
-
web_search_results = search_internet("\n\n".join(
|
223 |
st.subheader("Internet Search Results:")
|
224 |
for result in web_search_results:
|
225 |
st.write(f"**Title:** {result['title']}")
|
@@ -235,7 +219,7 @@ def investigate():
|
|
235 |
including deductions, potential suspects, and conclusions.
|
236 |
"""
|
237 |
final_report = model.generate_content([sherlock_persona, sherlock_guidelines, report_prompt,
|
238 |
-
|
239 |
st.header("Case Report")
|
240 |
st.write(final_report.text)
|
241 |
|
|
|
21 |
|
22 |
# Load pre-trained Gemini model
|
23 |
model = genai.GenerativeModel('models/gemini-1.5-pro-latest')
|
24 |
+
vision_model = genai.GenerativeModel('models/gemini-1.0-pro-vision-latest')
|
25 |
|
26 |
# Define Sherlock Holmes's persona and guidelines
|
27 |
sherlock_persona = """
|
|
|
50 |
# Generate embeddings using the Gemini Embedding API
|
51 |
embed_model = 'models/embedding-001'
|
52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
def extract_keywords_simple(extracted_text):
|
54 |
"""Extracts keywords and important information from the given text using Gemini 1.5 Pro."""
|
55 |
prompt = """
|
|
|
62 |
return keywords
|
63 |
|
64 |
# Function to extract text from various file types
|
65 |
+
def extract_text_and_embeddings(uploaded_files):
|
66 |
+
"""Extracts text content and generates embeddings for a list of uploaded files."""
|
67 |
+
extracted_data = []
|
68 |
for uploaded_file in uploaded_files:
|
69 |
file_type = uploaded_file.type
|
70 |
if file_type == "text/plain":
|
71 |
# Plain Text File
|
72 |
raw_text = str(uploaded_file.read(), "utf-8")
|
73 |
+
embedding = genai.embed_content(model=embed_model, content=raw_text.strip(), task_type="RETRIEVAL_DOCUMENT")["embedding"]
|
74 |
+
extracted_data.append({"text": raw_text.strip(), "embedding": embedding})
|
75 |
elif file_type == "application/pdf":
|
76 |
# PDF Document
|
77 |
pdf_reader = PyPDF2.PdfReader(uploaded_file)
|
|
|
79 |
for page_num in range(len(pdf_reader.pages)):
|
80 |
page = pdf_reader.pages[page_num]
|
81 |
text += page.extract_text()
|
82 |
+
embedding = genai.embed_content(model=embed_model, content=text, task_type="RETRIEVAL_DOCUMENT")["embedding"]
|
83 |
+
extracted_data.append({"text": text, "embedding": embedding})
|
84 |
else:
|
85 |
# Other Document Types (Using Textract)
|
86 |
try:
|
87 |
text = textract.process(uploaded_file).decode("utf-8")
|
88 |
+
embedding = genai.embed_content(model=embed_model, content=text, task_type="RETRIEVAL_DOCUMENT")["embedding"]
|
89 |
+
extracted_data.append({"text": text, "embedding": embedding})
|
90 |
except Exception as e:
|
91 |
st.error(f"Error extracting text from file: {e}")
|
92 |
+
return pd.DataFrame(extracted_data)
|
93 |
|
94 |
+
# Function to process images using Gemini 1.0 Pro Vision
|
95 |
def process_images(uploaded_images):
|
96 |
+
"""Processes a list of uploaded images using Gemini 1.0 Pro Vision to extract relevant information."""
|
97 |
image_insights = []
|
98 |
for uploaded_image in uploaded_images:
|
99 |
try:
|
|
|
159 |
st.session_state.chat_history = []
|
160 |
|
161 |
def investigate():
|
162 |
+
"""Handles the case investigation process with Pandas for embeddings."""
|
163 |
st.subheader("Case Investigation")
|
164 |
|
165 |
# File upload with clear labels and progress bars
|
|
|
177 |
|
178 |
# Extract text and process images with progress indication
|
179 |
with st.spinner("Extracting text and analyzing images..."):
|
180 |
+
case_data = extract_text_and_embeddings(documents)
|
181 |
+
keywords = extract_keywords_simple("\n\n".join(case_data["text"]))
|
|
|
182 |
image_insights = process_images(images)
|
183 |
|
184 |
combined_information = {
|
185 |
+
"case_data": case_data,
|
186 |
"image_insights": image_insights,
|
187 |
"keywords": keywords
|
188 |
}
|
189 |
|
|
|
|
|
|
|
190 |
prompt = """
|
191 |
You are Sherlock Holmes, the renowned detective. Analyze the following case information and provide insights or
|
192 |
suggestions for further investigation:
|
193 |
+
""" + str(combined_information)
|
194 |
|
195 |
response = model.generate_content([sherlock_persona, sherlock_guidelines, prompt])
|
196 |
|
|
|
198 |
with st.expander("Sherlock's Analysis and Suggestions:"):
|
199 |
st.write(response.text)
|
200 |
|
201 |
+
web_search_results = []
|
202 |
|
203 |
search_options = st.multiselect("Search for additional clues:", ["Internet"], default=["Internet"])
|
204 |
if st.button("Search"):
|
205 |
with st.spinner("Searching for clues..."):
|
206 |
+
web_search_results = search_internet("\n\n".join(case_data["text"]))
|
207 |
st.subheader("Internet Search Results:")
|
208 |
for result in web_search_results:
|
209 |
st.write(f"**Title:** {result['title']}")
|
|
|
219 |
including deductions, potential suspects, and conclusions.
|
220 |
"""
|
221 |
final_report = model.generate_content([sherlock_persona, sherlock_guidelines, report_prompt,
|
222 |
+
str(web_search_results)])
|
223 |
st.header("Case Report")
|
224 |
st.write(final_report.text)
|
225 |
|