jayash391 commited on
Commit
9901b65
·
verified ·
1 Parent(s): 9dba1cd

Update sherlock2.py

Browse files
Files changed (1) hide show
  1. sherlock2.py +21 -37
sherlock2.py CHANGED
@@ -21,7 +21,7 @@ genai.configure(api_key=os.getenv("GEMINI_API_KEY_PROJECTID"))
21
 
22
  # Load pre-trained Gemini model
23
  model = genai.GenerativeModel('models/gemini-1.5-pro-latest')
24
- vision_model = genai.GenerativeModel('models/gemini-pro-vision')
25
 
26
  # Define Sherlock Holmes's persona and guidelines
27
  sherlock_persona = """
@@ -50,21 +50,6 @@ Let your every utterance and action be a masterpiece of deductive prowess, a sym
50
  # Generate embeddings using the Gemini Embedding API
51
  embed_model = 'models/embedding-001'
52
 
53
- # Function for embedding generation (using models/embedding-001)
54
- def generate_embeddings_from_documents(extracted_text):
55
- """Generates embeddings for a list of extracted text documents using the 'models/embedding-001' model
56
- and the appropriate task type."""
57
- embeddings = []
58
- for text in extracted_text:
59
- try:
60
- # Determine the appropriate task type (e.g., "RETRIEVAL_DOCUMENT" for search/similarity)
61
- task_type = "RETRIEVAL_DOCUMENT"
62
- response = genai.embed_content(model=embed_model, content=text, task_type=task_type)
63
- embeddings.append(response["embedding"])
64
- except Exception as e:
65
- st.error(f"Error generating embeddings: {e}")
66
- return embeddings
67
-
68
  def extract_keywords_simple(extracted_text):
69
  """Extracts keywords and important information from the given text using Gemini 1.5 Pro."""
70
  prompt = """
@@ -77,15 +62,16 @@ def extract_keywords_simple(extracted_text):
77
  return keywords
78
 
79
  # Function to extract text from various file types
80
- def extract_text_from_files(uploaded_files):
81
- """Extracts text content from a list of uploaded files, handling various file types."""
82
- extracted_text = []
83
  for uploaded_file in uploaded_files:
84
  file_type = uploaded_file.type
85
  if file_type == "text/plain":
86
  # Plain Text File
87
  raw_text = str(uploaded_file.read(), "utf-8")
88
- extracted_text.append(raw_text.strip())
 
89
  elif file_type == "application/pdf":
90
  # PDF Document
91
  pdf_reader = PyPDF2.PdfReader(uploaded_file)
@@ -93,19 +79,21 @@ def extract_text_from_files(uploaded_files):
93
  for page_num in range(len(pdf_reader.pages)):
94
  page = pdf_reader.pages[page_num]
95
  text += page.extract_text()
96
- extracted_text.append(text)
 
97
  else:
98
  # Other Document Types (Using Textract)
99
  try:
100
  text = textract.process(uploaded_file).decode("utf-8")
101
- extracted_text.append(text)
 
102
  except Exception as e:
103
  st.error(f"Error extracting text from file: {e}")
104
- return extracted_text
105
 
106
- # Function to process images using Gemini Pro Vision
107
  def process_images(uploaded_images):
108
- """Processes a list of uploaded images using Gemini Pro Vision to extract relevant information."""
109
  image_insights = []
110
  for uploaded_image in uploaded_images:
111
  try:
@@ -171,7 +159,7 @@ def clear_chat():
171
  st.session_state.chat_history = []
172
 
173
  def investigate():
174
- """Handles the case investigation process with improved UI and functionality."""
175
  st.subheader("Case Investigation")
176
 
177
  # File upload with clear labels and progress bars
@@ -189,24 +177,20 @@ def investigate():
189
 
190
  # Extract text and process images with progress indication
191
  with st.spinner("Extracting text and analyzing images..."):
192
- case_text = extract_text_from_files(documents)
193
- keywords = extract_keywords_simple("\n\n".join(case_text))
194
- case_embeddings = generate_embeddings_from_documents(case_text)
195
  image_insights = process_images(images)
196
 
197
  combined_information = {
198
- "case_text": case_text,
199
  "image_insights": image_insights,
200
  "keywords": keywords
201
  }
202
 
203
- # Convert case_embeddings to a string
204
- case_embeddings_str = " ".join(str(embedding) for embedding in case_embeddings)
205
-
206
  prompt = """
207
  You are Sherlock Holmes, the renowned detective. Analyze the following case information and provide insights or
208
  suggestions for further investigation:
209
- """ + str(combined_information) + "\nCase Embeddings: " + case_embeddings_str
210
 
211
  response = model.generate_content([sherlock_persona, sherlock_guidelines, prompt])
212
 
@@ -214,12 +198,12 @@ def investigate():
214
  with st.expander("Sherlock's Analysis and Suggestions:"):
215
  st.write(response.text)
216
 
217
- web_search_results = [] # Add this line
218
 
219
  search_options = st.multiselect("Search for additional clues:", ["Internet"], default=["Internet"])
220
  if st.button("Search"):
221
  with st.spinner("Searching for clues..."):
222
- web_search_results = search_internet("\n\n".join(case_text))
223
  st.subheader("Internet Search Results:")
224
  for result in web_search_results:
225
  st.write(f"**Title:** {result['title']}")
@@ -235,7 +219,7 @@ def investigate():
235
  including deductions, potential suspects, and conclusions.
236
  """
237
  final_report = model.generate_content([sherlock_persona, sherlock_guidelines, report_prompt,
238
- case_embeddings_str, str(web_search_results)]) # Removed wikipedia_info
239
  st.header("Case Report")
240
  st.write(final_report.text)
241
 
 
21
 
22
  # Load pre-trained Gemini model
23
  model = genai.GenerativeModel('models/gemini-1.5-pro-latest')
24
+ vision_model = genai.GenerativeModel('models/gemini-1.0-pro-vision-latest')
25
 
26
  # Define Sherlock Holmes's persona and guidelines
27
  sherlock_persona = """
 
50
  # Generate embeddings using the Gemini Embedding API
51
  embed_model = 'models/embedding-001'
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  def extract_keywords_simple(extracted_text):
54
  """Extracts keywords and important information from the given text using Gemini 1.5 Pro."""
55
  prompt = """
 
62
  return keywords
63
 
64
  # Function to extract text from various file types
65
+ def extract_text_and_embeddings(uploaded_files):
66
+ """Extracts text content and generates embeddings for a list of uploaded files."""
67
+ extracted_data = []
68
  for uploaded_file in uploaded_files:
69
  file_type = uploaded_file.type
70
  if file_type == "text/plain":
71
  # Plain Text File
72
  raw_text = str(uploaded_file.read(), "utf-8")
73
+ embedding = genai.embed_content(model=embed_model, content=raw_text.strip(), task_type="RETRIEVAL_DOCUMENT")["embedding"]
74
+ extracted_data.append({"text": raw_text.strip(), "embedding": embedding})
75
  elif file_type == "application/pdf":
76
  # PDF Document
77
  pdf_reader = PyPDF2.PdfReader(uploaded_file)
 
79
  for page_num in range(len(pdf_reader.pages)):
80
  page = pdf_reader.pages[page_num]
81
  text += page.extract_text()
82
+ embedding = genai.embed_content(model=embed_model, content=text, task_type="RETRIEVAL_DOCUMENT")["embedding"]
83
+ extracted_data.append({"text": text, "embedding": embedding})
84
  else:
85
  # Other Document Types (Using Textract)
86
  try:
87
  text = textract.process(uploaded_file).decode("utf-8")
88
+ embedding = genai.embed_content(model=embed_model, content=text, task_type="RETRIEVAL_DOCUMENT")["embedding"]
89
+ extracted_data.append({"text": text, "embedding": embedding})
90
  except Exception as e:
91
  st.error(f"Error extracting text from file: {e}")
92
+ return pd.DataFrame(extracted_data)
93
 
94
+ # Function to process images using Gemini 1.0 Pro Vision
95
  def process_images(uploaded_images):
96
+ """Processes a list of uploaded images using Gemini 1.0 Pro Vision to extract relevant information."""
97
  image_insights = []
98
  for uploaded_image in uploaded_images:
99
  try:
 
159
  st.session_state.chat_history = []
160
 
161
  def investigate():
162
+ """Handles the case investigation process with Pandas for embeddings."""
163
  st.subheader("Case Investigation")
164
 
165
  # File upload with clear labels and progress bars
 
177
 
178
  # Extract text and process images with progress indication
179
  with st.spinner("Extracting text and analyzing images..."):
180
+ case_data = extract_text_and_embeddings(documents)
181
+ keywords = extract_keywords_simple("\n\n".join(case_data["text"]))
 
182
  image_insights = process_images(images)
183
 
184
  combined_information = {
185
+ "case_data": case_data,
186
  "image_insights": image_insights,
187
  "keywords": keywords
188
  }
189
 
 
 
 
190
  prompt = """
191
  You are Sherlock Holmes, the renowned detective. Analyze the following case information and provide insights or
192
  suggestions for further investigation:
193
+ """ + str(combined_information)
194
 
195
  response = model.generate_content([sherlock_persona, sherlock_guidelines, prompt])
196
 
 
198
  with st.expander("Sherlock's Analysis and Suggestions:"):
199
  st.write(response.text)
200
 
201
+ web_search_results = []
202
 
203
  search_options = st.multiselect("Search for additional clues:", ["Internet"], default=["Internet"])
204
  if st.button("Search"):
205
  with st.spinner("Searching for clues..."):
206
+ web_search_results = search_internet("\n\n".join(case_data["text"]))
207
  st.subheader("Internet Search Results:")
208
  for result in web_search_results:
209
  st.write(f"**Title:** {result['title']}")
 
219
  including deductions, potential suspects, and conclusions.
220
  """
221
  final_report = model.generate_content([sherlock_persona, sherlock_guidelines, report_prompt,
222
+ str(web_search_results)])
223
  st.header("Case Report")
224
  st.write(final_report.text)
225