ProfessorLeVesseur commited on
Commit
0d32d1f
·
verified ·
1 Parent(s): 3670dab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -24
app.py CHANGED
@@ -301,12 +301,10 @@ for key in ['pdf_processed', 'markdown_texts', 'df']:
301
  # ---------------------------------------------------------------------------------------
302
  # API Configuration
303
  # ---------------------------------------------------------------------------------------
304
- # Retrieve Hugging Face API key from environment variables
305
  hf_api_key = os.getenv('HF_API_KEY')
306
  if not hf_api_key:
307
  raise ValueError("HF_API_KEY not set in environment variables")
308
 
309
- # Create the Hugging Face inference client
310
  client = InferenceClient(api_key=hf_api_key)
311
 
312
  # ---------------------------------------------------------------------------------------
@@ -321,8 +319,8 @@ class SurveyAnalysis:
321
  Instructions:
322
  - Extract exact quotes per topic.
323
  - Ignore irrelevant topics.
 
324
 
325
- Format:
326
  [Topic]
327
  - "Exact quote"
328
 
@@ -331,32 +329,31 @@ Meeting Notes:
331
  """
332
 
333
  def prompt_response_from_hf_llm(self, llm_input):
334
- # Define a system prompt to guide the model's responses
335
  system_prompt = """
336
- <Persona> An expert Implementation Specialist at Michigan's Multi-Tiered System of Support Technical Assistance Center (MiMTSS TA Center) with deep expertise in SWPBIS, SEL, Structured Literacy, Science of Reading, and family engagement practices.</Persona>
337
- <Task> Analyze educational data and provide evidence-based recommendations for improving student outcomes across multiple tiers of support, drawing from established frameworks in behavioral interventions, literacy instruction, and family engagement.</Task>
338
- <Context> Operating within Michigan's educational system to support schools in implementing multi-tiered support systems, with access to student metrics data and knowledge of state-specific educational requirements and MTSS frameworks. </Context>
339
- <Format> Deliver insights through clear, actionable recommendations supported by data analysis, incorporating technical expertise while maintaining accessibility for educators and administrators at various levels of MTSS implementation.</Format>
 
 
 
 
 
340
  """
341
-
342
- # Generate the refined prompt using Hugging Face API
343
  response = client.chat.completions.create(
344
  model="meta-llama/Llama-3.1-70B-Instruct",
345
  messages=[
346
- {"role": "system", "content": system_prompt}, # Add system prompt here
347
  {"role": "user", "content": llm_input}
348
  ],
349
- stream=True,
350
  temperature=0.5,
351
  max_tokens=1024,
352
  top_p=0.7
353
  )
354
-
355
- # Combine messages if response is streamed
356
- response_content = ""
357
- for message in response:
358
- response_content += message.choices[0].delta.content
359
 
 
 
360
  return response_content.strip()
361
 
362
  def extract_text(self, response):
@@ -367,7 +364,6 @@ Meeting Notes:
367
  for _, row in df.iterrows():
368
  llm_input = self.prepare_llm_input(row['Document_Text'], topics)
369
  response = self.prompt_response_from_hf_llm(llm_input)
370
- print("AI Response:", response) # Debugging: print the AI response
371
  notes = self.extract_text(response)
372
  results.append({'Document_Text': row['Document_Text'], 'Topic_Summary': notes})
373
  return pd.concat([df.reset_index(drop=True), pd.DataFrame(results)['Topic_Summary']], axis=1)
@@ -408,17 +404,24 @@ def extract_markdown_from_image(image):
408
  doc.load_from_doctags(doctags_doc)
409
  return doc.export_to_markdown()
410
 
 
411
  def extract_excerpts(processed_df):
412
  rows = []
413
  for _, r in processed_df.iterrows():
414
- for sec in re.split(r'\n(?=\[)', r['Topic_Summary']):
415
- topic_match = re.match(r'\[([^\]]+)\]', sec)
 
416
  if topic_match:
417
- topic = topic_match.group(1)
418
- excerpts = re.findall(r'- "([^"]+)"', sec)
419
  for excerpt in excerpts:
420
- rows.append({'Document_Text': r['Document_Text'], 'Topic_Summary': r['Topic_Summary'], 'Excerpt': excerpt, 'Topic': topic})
421
- print("Extracted Rows:", rows) # Debugging: print extracted rows
 
 
 
 
 
422
  return pd.DataFrame(rows)
423
 
424
  # ---------------------------------------------------------------------------------------
 
301
  # ---------------------------------------------------------------------------------------
302
  # API Configuration
303
  # ---------------------------------------------------------------------------------------
 
304
  hf_api_key = os.getenv('HF_API_KEY')
305
  if not hf_api_key:
306
  raise ValueError("HF_API_KEY not set in environment variables")
307
 
 
308
  client = InferenceClient(api_key=hf_api_key)
309
 
310
  # ---------------------------------------------------------------------------------------
 
319
  Instructions:
320
  - Extract exact quotes per topic.
321
  - Ignore irrelevant topics.
322
+ - Strictly follow this format:
323
 
 
324
  [Topic]
325
  - "Exact quote"
326
 
 
329
  """
330
 
331
  def prompt_response_from_hf_llm(self, llm_input):
 
332
  system_prompt = """
333
+ You are an expert assistant tasked with extracting exact quotes from provided meeting notes based on given topics.
334
+
335
+ Instructions:
336
+ - Only extract exact quotes relevant to provided topics.
337
+ - Ignore irrelevant content.
338
+ - Strictly follow this format:
339
+
340
+ [Topic]
341
+ - "Exact quote"
342
  """
343
+
 
344
  response = client.chat.completions.create(
345
  model="meta-llama/Llama-3.1-70B-Instruct",
346
  messages=[
347
+ {"role": "system", "content": system_prompt},
348
  {"role": "user", "content": llm_input}
349
  ],
 
350
  temperature=0.5,
351
  max_tokens=1024,
352
  top_p=0.7
353
  )
 
 
 
 
 
354
 
355
+ response_content = response.choices[0].message.content
356
+ print("Full AI Response:", response_content) # Debugging
357
  return response_content.strip()
358
 
359
  def extract_text(self, response):
 
364
  for _, row in df.iterrows():
365
  llm_input = self.prepare_llm_input(row['Document_Text'], topics)
366
  response = self.prompt_response_from_hf_llm(llm_input)
 
367
  notes = self.extract_text(response)
368
  results.append({'Document_Text': row['Document_Text'], 'Topic_Summary': notes})
369
  return pd.concat([df.reset_index(drop=True), pd.DataFrame(results)['Topic_Summary']], axis=1)
 
404
  doc.load_from_doctags(doctags_doc)
405
  return doc.export_to_markdown()
406
 
407
+ # Revised extract_excerpts function with improved robustness
408
  def extract_excerpts(processed_df):
409
  rows = []
410
  for _, r in processed_df.iterrows():
411
+ sections = re.split(r'\n(?=(?:\*\*|\[)?[A-Za-z/ ]+(?:\*\*|\])?\n- )', r['Topic_Summary'])
412
+ for sec in sections:
413
+ topic_match = re.match(r'(?:\*\*|\[)?([A-Za-z/ ]+)(?:\*\*|\])?', sec.strip())
414
  if topic_match:
415
+ topic = topic_match.group(1).strip()
416
+ excerpts = re.findall(r'- "?([^"\n]+)"?', sec)
417
  for excerpt in excerpts:
418
+ rows.append({
419
+ 'Document_Text': r['Document_Text'],
420
+ 'Topic_Summary': r['Topic_Summary'],
421
+ 'Excerpt': excerpt.strip(),
422
+ 'Topic': topic
423
+ })
424
+ print("Extracted Rows:", rows) # Debugging
425
  return pd.DataFrame(rows)
426
 
427
  # ---------------------------------------------------------------------------------------