import boto3 from boto3.s3.transfer import TransferConfig from tqdm import tqdm import os def upload_file_to_s3(file_path, bucket_name, s3_prefix): class ProgressPercentage(object): def __init__(self, filename): self._filename = filename self._size = float(os.path.getsize(filename)) self._seen_so_far = 0 self._pbar = tqdm(total=self._size, unit='B', unit_scale=True, desc=f"Uploading {os.path.basename(filename)}") def __call__(self, bytes_amount): self._seen_so_far += bytes_amount self._pbar.update(bytes_amount) s3_client = boto3.client('s3') file_name = os.path.basename(file_path) s3_path = f"{s3_prefix}/{file_name}" # Configure multipart upload config = TransferConfig( multipart_threshold=1024 * 25, # 25MB max_concurrency=10, multipart_chunksize=1024 * 25, # 25MB use_threads=True ) try: s3_client.upload_file( file_path, bucket_name, s3_path, Config=config, Callback=ProgressPercentage(file_path) ) return f"s3://{bucket_name}/{s3_path}" except Exception as e: print(f"Failed to upload {file_path} to S3: {str(e)}") return None max_lr = 1e-3 warmup_steps = 10 max_steps = 25000 import math def get_lr_lambda(current_step, warmup_steps, max_steps, max_lr): """ Learning rate scheduler with: 1. Linear warmup 2. Cosine decay 3. Minimum learning rate of 10% of max_lr """ min_lr = max_lr * 0.1 # Minimum learning rate (10% of max_lr) if current_step < warmup_steps: # Linear warmup return max_lr * (current_step + 1) / warmup_steps elif current_step > max_steps: # After max_steps, return minimum learning rate return min_lr else: # Cosine decay between warmup_steps and max_steps decay_ratio = (current_step - warmup_steps) / (max_steps - warmup_steps) coeff = 0.5 * (1.0 + math.cos(math.pi * decay_ratio)) return min_lr + coeff * (max_lr - min_lr) def plot_lr_schedule(): """ Helper function to visualize the learning rate schedule """ import matplotlib.pyplot as plt steps = list(range(0, max_steps + 100)) lrs = [get_lr_lambda(step, warmup_steps, max_steps, max_lr) for step in steps] plt.figure(figsize=(10, 5)) plt.plot(steps, lrs) plt.title('Learning Rate Schedule') plt.xlabel('Steps') plt.ylabel('Learning Rate') plt.grid(True) plt.show() def plot_training_loss(log_file_path, output_path=None): """ Parse a training log file and plot the running average loss against batch steps. Also adds a trend line to visualize the overall training progress. Args: log_file_path (str): Path to the training log file output_path (str, optional): Path to save the plot as PNG. If None, displays the plot instead. """ import re import matplotlib.pyplot as plt import numpy as np from scipy.optimize import curve_fit # Regular expression to extract batch number and loss pattern = r"Batch (\d+), Running Avg Loss: ([0-9.]+)" steps = [] losses = [] # Read and parse the log file with open(log_file_path, 'r') as file: for line in file: match = re.search(pattern, line) if match: batch_num = int(match.group(1)) loss = float(match.group(2)) steps.append(batch_num) losses.append(loss) if not steps: print("No loss data found in the log file.") return # Create the plot plt.figure(figsize=(12, 6)) plt.plot(steps, losses, 'b-', alpha=0.5, label='Running Avg Loss') # Add trend line (using polynomial fit) def poly_func(x, a, b, c): return a * x**2 + b * x + c # Convert to numpy arrays for curve fitting x_array = np.array(steps) y_array = np.array(losses) # Fit the curve try: popt, _ = curve_fit(poly_func, x_array, y_array) x_line = np.linspace(min(steps), max(steps), 1000) y_line = poly_func(x_line, *popt) plt.plot(x_line, y_line, 'r-', label='Trend Line') except Exception as e: print(f"Could not fit trend line: {e}") # Fallback to simple moving average for trend window_size = min(len(steps) // 10, 100) if len(steps) > 100 else len(steps) // 2 if window_size > 0: moving_avg = np.convolve(y_array, np.ones(window_size)/window_size, mode='valid') plt.plot(steps[window_size-1:], moving_avg, 'r-', label='Moving Average Trend') # Add labels and title plt.xlabel('Batch Number') plt.ylabel('Running Average Loss') plt.title('Training Loss Over Time') plt.grid(True) plt.legend() # Add min and max loss annotations min_loss = min(losses) min_idx = losses.index(min_loss) max_loss = max(losses) max_idx = losses.index(max_loss) plt.annotate(f'Min: {min_loss:.5f}', xy=(steps[min_idx], min_loss), xytext=(steps[min_idx], min_loss*1.05), arrowprops=dict(facecolor='green', shrink=0.05), fontsize=10) plt.annotate(f'Max: {max_loss:.5f}', xy=(steps[max_idx], max_loss), xytext=(steps[max_idx], max_loss*0.95), arrowprops=dict(facecolor='red', shrink=0.05), fontsize=10) # Save or show the plot plt.tight_layout() if output_path: plt.savefig(output_path, dpi=300, bbox_inches='tight') print(f"Plot saved to {output_path}") else: plt.show() if __name__ == "__main__": # plot_lr_schedule() plot_training_loss("training.log", "train_loss.png")