burtenshaw commited on
Commit
99bcf43
·
1 Parent(s): 985b2b6

add create presentation script using command a

Browse files
Files changed (1) hide show
  1. scripts/create_presentation.py +266 -0
scripts/create_presentation.py ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ from huggingface_hub import InferenceClient
4
+ from requests.exceptions import RequestException
5
+ import time
6
+ import argparse
7
+
8
+ # Use the model ID specified in the script's default
9
+ DEFAULT_LLM_MODEL = "CohereLabs/c4ai-command-a-03-2025" # Model ID from the error log
10
+
11
+
12
+ def parse_arguments():
13
+ """Parses command-line arguments."""
14
+ parser = argparse.ArgumentParser(
15
+ description="Generate a Remark.js presentation from a Markdown file using an LLM."
16
+ )
17
+ parser.add_argument(
18
+ "input_file", help="Path to the input Markdown (.md or .mdx) file."
19
+ )
20
+ parser.add_argument(
21
+ "-o",
22
+ "--output_file",
23
+ help="Path to the output presentation file. Defaults to <input_file_name>_presentation.md",
24
+ )
25
+ return parser.parse_args()
26
+
27
+
28
+ def read_input_file(filepath):
29
+ """Reads content from the specified file."""
30
+ if not os.path.exists(filepath):
31
+ print(f"Error: Input file not found at {filepath}")
32
+ return None
33
+ print(f"Reading input file: {filepath}")
34
+ try:
35
+ with open(filepath, "r", encoding="utf-8") as f:
36
+ return f.read()
37
+ except Exception as e:
38
+ print(f"Error reading file {filepath}: {e}")
39
+ return None
40
+
41
+
42
+ def generate_presentation_with_llm(
43
+ client, llm_model, full_markdown_content, input_filename
44
+ ):
45
+ """Generates the entire presentation using the LLM."""
46
+ if not client:
47
+ print("LLM client not available. Cannot generate presentation.")
48
+ return None
49
+
50
+ # Limit input content length if necessary (though models like Command R+ handle large contexts)
51
+ # max_input_len = 100000 # Example limit
52
+ # if len(full_markdown_content) > max_input_len:
53
+ # print(f"Warning: Input content truncated to {max_input_len} characters for LLM.")
54
+ # full_markdown_content = full_markdown_content[:max_input_len]
55
+
56
+ prompt = f"""
57
+ You are an expert technical writer and presentation creator. Your task is to convert the following Markdown course material into a complete Remark.js presentation file.
58
+
59
+ **Input Markdown Content:**
60
+
61
+ {full_markdown_content}
62
+
63
+ **Instructions:**
64
+
65
+ 1. **Structure:** Create slides based on the logical sections of the input markdown. Use `## ` headings in the input as the primary indicator for new slides.
66
+ 2. **Slide Format:** Each slide should start with `# Slide Title` derived from the corresponding `## Heading`.
67
+ 3. **Content:** Include the relevant text, code blocks (preserving language identifiers like ```python), and lists from the input markdown within each slide.
68
+ 4. **Images:** Convert Markdown images `![alt](url)` into Remark.js format: `.center[![alt](url)]`. Ensure the image URL is correct and accessible.
69
+ 5. **Presenter Notes:** For each slide, generate concise speaker notes (2-4 sentences) summarizing the key points, definitions, or context. Place these notes after the slide content, separated by `???`.
70
+ 6. **Separators:** Separate individual slides using `\n\n---\n\n`.
71
+ 7. **Cleanup:** Do NOT include any HTML/MDX specific tags like `<CourseFloatingBanner>`, `<Tip>`, `<Question>`, `<Youtube>`, or internal links like `[[...]]` in the final output. Remove frontmatter if present.
72
+ 8. **Start/End:**
73
+ * Begin the presentation with a title slide:
74
+ ```markdown
75
+ class: impact
76
+
77
+ # Presentation based on {os.path.basename(input_filename)}
78
+ ## Generated Presentation
79
+
80
+ .center[![Hugging Face Logo](https://huggingface.co/front/assets/huggingface_logo.svg)]
81
+
82
+ ???
83
+ This presentation was automatically generated from the content of {os.path.basename(input_filename)}. It covers the key topics discussed in the material.
84
+ ```
85
+ * End the presentation with a final "Thank You" slide:
86
+ ```markdown
87
+ class: center, middle
88
+
89
+ # Thank You!
90
+
91
+ ???
92
+ This concludes the presentation generated from the provided material.
93
+ ```
94
+ 9. **Output:** Provide ONLY the complete Remark.js Markdown content, starting with the title slide and ending with the thank you slide, with all generated slides in between. Do not include any introductory text or explanations before or after the presentation markdown.
95
+
96
+ **Generate the Remark.js presentation now:**
97
+ """
98
+ max_retries = 2
99
+ retry_delay = 10 # seconds, generation can take time
100
+ for attempt in range(max_retries):
101
+ try:
102
+ print(
103
+ f"Attempting LLM generation (Attempt {attempt + 1}/{max_retries})... This may take a while."
104
+ )
105
+ # Use the client's chat completion method appropriate for the provider
106
+ # For Cohere provider, it might be client.chat.completions.create or similar
107
+ # Assuming client.chat_completion works based on previous script structure
108
+ completion = client.chat.completions.create(
109
+ messages=[{"role": "user", "content": prompt}],
110
+ model=llm_model,
111
+ max_tokens=8000, # Increase max_tokens significantly for full presentation (adjust based on model limits)
112
+ temperature=0.3, # Lower temperature for more deterministic structure following
113
+ )
114
+ presentation_content = completion.choices[0].message.content.strip()
115
+
116
+ # Basic validation: Check if it looks like a remark presentation
117
+ if "---" in presentation_content and "???" in presentation_content:
118
+ # Attempt to remove potential preamble/postamble from the LLM response
119
+ # Find the first 'class: impact' and last 'Thank You!' slide markers
120
+ start_match = re.search(r"class:\s*impact", presentation_content)
121
+ # Find the end of the "Thank You" slide block more reliably
122
+ thank_you_slide_end_index = presentation_content.rfind(
123
+ "\n\n???\n"
124
+ ) # Look for the notes separator of the last slide
125
+
126
+ if start_match and thank_you_slide_end_index != -1:
127
+ start_index = start_match.start()
128
+ # Find the end of the notes for the thank you slide
129
+ # Search for the end of the notes block, which might just be the end of the string
130
+ end_of_notes_pattern = re.compile(
131
+ r"\n\n(?!(\?\?\?|---))", re.MULTILINE
132
+ ) # Look for a double newline not followed by ??? or ---
133
+ end_match = end_of_notes_pattern.search(
134
+ presentation_content,
135
+ thank_you_slide_end_index + len("\n\n???\n"),
136
+ )
137
+
138
+ if end_match:
139
+ end_index = end_match.start() # End before the double newline
140
+ else: # If no clear end found after notes, take rest of string
141
+ end_index = len(presentation_content)
142
+
143
+ presentation_content = presentation_content[
144
+ start_index:end_index
145
+ ].strip()
146
+ print("LLM generation successful.")
147
+ return presentation_content
148
+ elif start_match: # Fallback if end markers are weird but start is okay
149
+ presentation_content = presentation_content[
150
+ start_match.start() :
151
+ ].strip()
152
+ print("LLM generation successful (end marker adjustment needed).")
153
+ return presentation_content
154
+ else:
155
+ print(
156
+ "Warning: Generated content might not start correctly. Using full response."
157
+ )
158
+ return presentation_content # Return raw if markers not found
159
+
160
+ else:
161
+ print(
162
+ "Warning: Generated content doesn't seem to contain expected Remark.js separators (---, ???)."
163
+ )
164
+ return presentation_content # Return raw content for inspection
165
+
166
+ except RequestException as e:
167
+ print(f"API Request Error (Attempt {attempt + 1}/{max_retries}): {e}")
168
+ if attempt < max_retries - 1:
169
+ print(f"Retrying in {retry_delay} seconds...")
170
+ time.sleep(retry_delay)
171
+ else:
172
+ print("Max retries reached for API request.")
173
+ return None
174
+ except Exception as e:
175
+ print(f"Error during LLM call (Attempt {attempt + 1}/{max_retries}): {e}")
176
+ # Attempt to safely access response details if they exist
177
+ response_details = ""
178
+ if hasattr(e, "response"):
179
+ try:
180
+ status = getattr(e.response, "status_code", "N/A")
181
+ text = getattr(e.response, "text", "N/A")
182
+ response_details = f" (Status: {status}, Body: {text[:500]}...)" # Limit body length
183
+ except Exception as inner_e:
184
+ response_details = (
185
+ f" (Could not parse error response details: {inner_e})"
186
+ )
187
+ print(f"LLM Call Error: {e}{response_details}")
188
+
189
+ if attempt < max_retries - 1:
190
+ print(f"Retrying in {retry_delay} seconds...")
191
+ time.sleep(retry_delay)
192
+ else:
193
+ print("Max retries reached for LLM call.")
194
+ return None
195
+
196
+ print("Failed to generate presentation after multiple retries.")
197
+ return None
198
+
199
+
200
+ def write_output_file(filepath, content):
201
+ """Writes the presentation content to the output file."""
202
+ if content is None:
203
+ print("No content to write.")
204
+ return
205
+ print(f"\nWriting presentation to: {filepath}")
206
+ try:
207
+ # Ensure directory exists
208
+ output_dir = os.path.dirname(filepath)
209
+ if (
210
+ output_dir
211
+ ): # Ensure output_dir is not empty (happens if writing to current dir)
212
+ os.makedirs(output_dir, exist_ok=True)
213
+ with open(filepath, "w", encoding="utf-8") as f:
214
+ f.write(content)
215
+ print("Successfully generated presentation.")
216
+ except Exception as e:
217
+ print(f"Error writing output file {filepath}: {e}")
218
+
219
+
220
+ # --- Main Orchestration ---
221
+
222
+
223
+ def main():
224
+ """Main function to orchestrate presentation generation."""
225
+ args = parse_arguments()
226
+
227
+ # Determine output file path
228
+ if args.output_file:
229
+ output_file_path = args.output_file
230
+ else:
231
+ base_name = os.path.splitext(os.path.basename(args.input_file))[0]
232
+ # Place output in the same directory as input by default
233
+ output_dir = os.path.dirname(args.input_file)
234
+ # Handle case where input file has no directory path
235
+ output_file_path = os.path.join(
236
+ output_dir or ".", f"{base_name}_presentation.md"
237
+ )
238
+
239
+ # Get config
240
+ hf_api_key = os.environ.get(
241
+ "HF_API_KEY",
242
+ )
243
+ llm_model = os.environ.get("LLM_MODEL", DEFAULT_LLM_MODEL)
244
+
245
+ client = InferenceClient(token=hf_api_key, provider="cohere")
246
+
247
+ # Read Input
248
+ all_content = read_input_file(args.input_file)
249
+
250
+ if all_content is None:
251
+ exit(1) # Exit if file reading failed
252
+
253
+ # Generate Presentation using LLM
254
+ print(f"Requesting presentation generation from model '{llm_model}'...")
255
+ final_presentation_content = generate_presentation_with_llm(
256
+ client, llm_model, all_content, args.input_file
257
+ )
258
+
259
+ # Write Output
260
+ write_output_file(output_file_path, final_presentation_content)
261
+
262
+ print("Script finished.")
263
+
264
+
265
+ if __name__ == "__main__":
266
+ main()