Spaces:

George-API
/

qwen4bit

Sleeping

App Files Files Community

George-API commited on Mar 11

Commit

cc775c6

verified ·

1 Parent(s): 87e4dc7

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +34 -7

app.py CHANGED Viewed

@@ -3,8 +3,10 @@ import os
 import json
 import torch
 import subprocess
 from dotenv import load_dotenv
 import logging
 # Configure logging
 logging.basicConfig(
@@ -38,16 +40,39 @@ model_config = config.get("model_config", {})
 MODEL_NAME = model_config.get("model_name_or_path", "unsloth/DeepSeek-R1-Distill-Qwen-14B-bnb-4bit")
 SPACE_NAME = os.getenv("HF_SPACE_NAME", "phi4training")
 # Function to start the training process
 def start_training():
     try:
-        # Run the training script directly - IMPORTANT: Don't redirect output so container logs show
-        # Using nohup to ensure process continues even if web request ends
-        os.system("nohup python run_cloud_training.py > training.log 2>&1 &")
         # Log the start of training
-        logger.info("Training started - Check Hugging Face logs for details")
-        print("Training process initiated! This will appear in Hugging Face logs.")
         return """
         ✅ Training process initiated!
@@ -56,7 +81,7 @@ def start_training():
         To monitor progress:
         1. Check the Hugging Face space logs in the "Logs" tab
-        2. Training metrics will be available in the Hugging Face UI
         3. The process will continue running in the background
         NOTE: This is a research training phase only, no model outputs will be available.
@@ -121,6 +146,8 @@ with gr.Blocks(css="footer {visibility: hidden}") as demo:
 # Launch the interface
 if __name__ == "__main__":
     # Start Gradio with minimal features
     logger.info("Starting research training dashboard")
-    print("Research training dashboard started - Logs will be visible here")
     demo.launch(share=False)

 import json
 import torch
 import subprocess
+import sys
 from dotenv import load_dotenv
 import logging
+import threading
 # Configure logging
 logging.basicConfig(
 MODEL_NAME = model_config.get("model_name_or_path", "unsloth/DeepSeek-R1-Distill-Qwen-14B-bnb-4bit")
 SPACE_NAME = os.getenv("HF_SPACE_NAME", "phi4training")
+# Function to run training in a thread and stream output to container logs
+def run_training():
+    """Run the training script and stream its output to container logs"""
+    process = subprocess.Popen(
+        ["python", "run_cloud_training.py"],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        universal_newlines=True,
+        bufsize=1
+    )
+    # Stream output directly to sys.stdout (container logs)
+    for line in iter(process.stdout.readline, ''):
+        sys.stdout.write(line)
+        sys.stdout.flush()
 # Function to start the training process
 def start_training():
     try:
+        # Print directly to container logs
+        print("\n===== STARTING TRAINING PROCESS =====\n")
+        print(f"Model: {MODEL_NAME}")
+        print(f"Training with configuration from transformers_config.json")
+        print("Training logs will appear below:")
+        print("=" * 50)
+        # Start training in a separate thread
+        training_thread = threading.Thread(target=run_training)
+        training_thread.daemon = True  # Allow the thread to be terminated when app exits
+        training_thread.start()
         # Log the start of training
+        logger.info("Training started in background thread")
         return """
         ✅ Training process initiated!
         To monitor progress:
         1. Check the Hugging Face space logs in the "Logs" tab
+        2. You should see training output appearing directly in the logs
         3. The process will continue running in the background
         NOTE: This is a research training phase only, no model outputs will be available.
 # Launch the interface
 if __name__ == "__main__":
     # Start Gradio with minimal features
+    print("\n===== RESEARCH TRAINING DASHBOARD STARTED =====\n")
+    print("Click 'Start Training' to begin the fine-tuning process")
+    print("All training output will appear in these logs")
     logger.info("Starting research training dashboard")
     demo.launch(share=False)