mjbuehler commited on
Commit
3de3075
·
verified ·
1 Parent(s): 03b7882

Update app.py

Browse files

Added deep research

Files changed (1) hide show
  1. app.py +46 -0
app.py CHANGED
@@ -31,6 +31,52 @@ def read_readme():
31
 
32
  # Define multiple sets of instruction templates
33
  INSTRUCTION_TEMPLATES = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  ################# PODCAST ##################
35
  "podcast": {
36
  "intro": """Your task is to take the input text provided and turn it into an lively, engaging, informative podcast dialogue, in the style of NPR. Do not use or make up names. The input text may be messy or unstructured, as it could come from a variety of sources like PDFs or web pages.
 
31
 
32
  # Define multiple sets of instruction templates
33
  INSTRUCTION_TEMPLATES = {
34
+
35
+ ################# DEEP DATA ANALYSIS ##################
36
+ "deep_analysis": {
37
+ # 1) High‑level task description
38
+ "intro": """You are a senior analyst who conducts deep research.
39
+
40
+ Your job is to turn the raw materials supplied below (PDF text, markdown, tables, figures or loose CSV/TXT files) into a **deep research report** that humans can read.
41
+
42
+ The finished report must contain, in this exact order:
43
+
44
+ 1. **Metadata block** – start with the title/s, authors, publication years (as they are available). If not available, start by describing the types of raw materials you analyzed.
45
+ 2. **Data extraction** – careful extraction of key data and quantitative information, presented as a carefully crafted narrative. For example, discuss the domain, industry, area of science. Define. all terms.
46
+ 3. **Key insights** – interpretation of the results. This must be comprehensive and include your thoughts and interpretation, and context.
47
+ 4. **Examples** – pick a few examples to illustrate the key concepts (one or more). Use strong storytelling to show depth while making it broadly understandable.
48
+ 5. **Strengths** – strengths of the results or data, paper or information in the raw materials.
49
+ 6. **Weaknesses** – assess weaknesses of the document, paper or data.
50
+ 7. **Relating to other fields** – relate the raw materials to other fields, historical results, contemperary work, or other significant concepts. Discuss the significance.
51
+ 8. **Open questions / action items** – what further analysis or experiments would you recommend?
52
+
53
+ Keep the narrative clear and concise, suitable for a technically literate audience, with depth. Do **not** reveal chain‑of‑thought; only present the final reasoning.""",
54
+
55
+ # 2) How to read / transform the source material
56
+ "text_instructions": """Carefully scan the input text for any data, insights, and so on.
57
+ If tables are broken across lines, reconstruct them logically to extract key insights.
58
+
59
+ Translate uncommon units to SI in parentheses, and explain.""",
60
+
61
+ # 3) Scratch‑pad / brainstorming (hidden from end‑user)
62
+ "scratch_pad": """Brainstorm here (hidden):
63
+ - Map each table to a clean DataFrame name.
64
+ - Decide which statistical measures are meaningful.
65
+ - Note any assumptions or gap‑filling you’ll need (e.g., missing column headers), uncertainties, issues with the data, and. soon.
66
+ When ready, compile the final report strictly following the template above.""",
67
+
68
+ # 4) Prelude that introduces the report proper
69
+ "prelude": """Below is the structured report based on the supplied raw data:""",
70
+
71
+ # 5) Main output instructions
72
+ "dialog": """Design your output to be read aloud -- it will be directly converted into audio. The presentation of materials should include 30,000 words.
73
+
74
+ There is only one speaker, you. Stay on topic and maintaining an engaging flow.
75
+
76
+ Write a clear, detailed, and well-prepared analysis and report as a single narrator. Begin every paragraph with `speaker-1:`."""
77
+ }
78
+
79
+
80
  ################# PODCAST ##################
81
  "podcast": {
82
  "intro": """Your task is to take the input text provided and turn it into an lively, engaging, informative podcast dialogue, in the style of NPR. Do not use or make up names. The input text may be messy or unstructured, as it could come from a variety of sources like PDFs or web pages.