Spaces:

Steph254
/

demo_1

Runtime error

App Files Files Community

Steph254 commited on Mar 18

Commit

79ccf40

verified ·

1 Parent(s): dde7e39

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -72

app.py CHANGED Viewed

@@ -5,47 +5,56 @@ import json
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from peft import PeftModel
-# Set Hugging Face Token for Authentication (ensure it's set in your environment)
-HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
-# Base model (needed for QLoRA adapter)
-BASE_MODEL = "meta-llama/Llama-3-1B-Instruct"
-QLORA_ADAPTER = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8"
 # Function to load Llama model
-def load_llama_model():
-    print("Loading base model...")
-    model = AutoModelForCausalLM.from_pretrained(
-        BASE_MODEL,
-        torch_dtype=torch.float32,
-        device_map="cpu",  # Ensure it runs on CPU
-        token=HUGGINGFACE_TOKEN
-    )
-    print("Loading tokenizer...")
-    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False, token=HUGGINGFACE_TOKEN)
-    print("Loading QLoRA adapter...")
-    model = PeftModel.from_pretrained(
-        model,
-        QLORA_ADAPTER,
-        token=HUGGINGFACE_TOKEN
-    )
-    print("Merging LoRA weights...")
-    model = model.merge_and_unload()  # Merge LoRA weights for inference
-    return tokenizer, model
 # Load Llama 3.2 model
-MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8"
-tokenizer, model = load_llama_model()
 # Load Llama Guard for content moderation
-LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4"
-guard_tokenizer, guard_model = load_llama_model(LLAMA_GUARD_NAME)
-# Define Prompt Templates
 PROMPTS = {
     "project_analysis": """<|begin_of_text|><|prompt|>Analyze this project description and generate:
 1. Project timeline with milestones
@@ -53,12 +62,10 @@ PROMPTS = {
 3. Potential risks
 4. Team composition
 5. Cost estimation
 Project: {project_description}<|completion|>""",
     "code_generation": """<|begin_of_text|><|prompt|>Generate implementation code for this feature:
 {feature_description}
 Considerations:
 - Use {programming_language}
 - Follow {coding_standards}
@@ -67,13 +74,11 @@ Considerations:
     "risk_analysis": """<|begin_of_text|><|prompt|>Predict potential risks for this project plan:
 {project_data}
 Format output as JSON with risk types, probabilities, and mitigation strategies<|completion|>"""
 }
-# Function: Content Moderation using Llama Guard
 def moderate_input(user_input):
-    # Llama Guard specific prompt format
     prompt = f"""<|begin_of_text|><|user|>
 Input: {user_input}
 Please verify that this input doesn't violate any content policies.
@@ -81,7 +86,7 @@ Please verify that this input doesn't violate any content policies.
     inputs = guard_tokenizer(prompt, return_tensors="pt", truncation=True)
-    with torch.no_grad():  # Disable gradient calculation for inference
         outputs = guard_model.generate(
             inputs.input_ids,
             max_length=256,
@@ -92,19 +97,19 @@ Please verify that this input doesn't violate any content policies.
     if "flagged" in response.lower() or "violated" in response.lower() or "policy violation" in response.lower():
         return "⚠️ Content flagged by Llama Guard. Please modify your input."
-    return None  # Safe input, proceed normally
-# Function: Generate AI responses
 def generate_response(prompt_type, **kwargs):
     prompt = PROMPTS[prompt_type].format(**kwargs)
     moderation_warning = moderate_input(prompt)
     if moderation_warning:
-        return moderation_warning  # Stop processing if flagged
     inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
-    with torch.no_grad():  # Disable gradient calculation for inference
         outputs = model.generate(
             inputs.input_ids,
             max_length=1024,
@@ -115,40 +120,17 @@ def generate_response(prompt_type, **kwargs):
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
-# Function: Analyze project
-def analyze_project(project_desc):
-    return generate_response("project_analysis", project_description=project_desc)
-# Function: Generate code
-def generate_code(feature_desc, lang="Python", standards="PEP8"):
-    return generate_response("code_generation", feature_description=feature_desc, programming_language=lang, coding_standards=standards)
-# Function: Predict risks
-def predict_risks(project_data):
-    risks = generate_response("risk_analysis", project_data=project_data)
-    try:
-        # Try to extract JSON part from the response
-        import re
-        json_match = re.search(r'\{.*\}', risks, re.DOTALL)
-        if json_match:
-            return json.loads(json_match.group(0))
-        return {"error": "Could not parse JSON response"}
-    except json.JSONDecodeError:
-        return {"error": "Invalid JSON response. Please refine your input."}
-# Gradio UI
 def create_gradio_interface():
     with gr.Blocks(title="AI Project Manager", theme=gr.themes.Soft()) as demo:
         gr.Markdown("# 🚀 AI-Powered Project Manager & Code Assistant")
-        # Project Analysis Tab
         with gr.Tab("Project Setup"):
             project_input = gr.Textbox(label="Project Description", lines=5, placeholder="Describe your project...")
-            project_output = gr.Textbox(label="Project Analysis", lines=15)  # Changed from JSON to Textbox for better formatting
             analyze_btn = gr.Button("Analyze Project")
             analyze_btn.click(analyze_project, inputs=project_input, outputs=project_output)
-        # Code Generation Tab
         with gr.Tab("Code Assistant"):
             code_input = gr.Textbox(label="Feature Description", lines=3)
             lang_select = gr.Dropdown(["Python", "JavaScript", "Java", "C++"], label="Language", value="Python")
@@ -157,14 +139,12 @@ def create_gradio_interface():
             code_btn = gr.Button("Generate Code")
             code_btn.click(generate_code, inputs=[code_input, lang_select, standards_select], outputs=code_output)
-        # Risk Analysis Tab
         with gr.Tab("Risk Analysis"):
             risk_input = gr.Textbox(label="Project Plan", lines=5)
             risk_output = gr.JSON(label="Risk Predictions")
             risk_btn = gr.Button("Predict Risks")
             risk_btn.click(predict_risks, inputs=risk_input, outputs=risk_output)
-        # Real-time Chatbot for Collaboration
         with gr.Tab("Live Collaboration"):
             gr.Markdown("## Real-time Project Collaboration")
             chat = gr.Chatbot(height=400)
@@ -177,9 +157,8 @@ def create_gradio_interface():
                     chat_history.append((message, moderation_warning))
                     return "", chat_history
-                # Format chat history for context
                 history_text = ""
-                for i, (usr, ai) in enumerate(chat_history[-3:]):  # Use last 3 messages for context
                     history_text += f"User: {usr}\nAI: {ai}\n"
                 prompt = f"""<|begin_of_text|><|prompt|>Project Management Chat:

 from transformers import AutoTokenizer, AutoModelForCausalLM
 from peft import PeftModel
+# Set Hugging Face Token for Authentication
+HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")  # Ensure this is set in your environment
+# Correct model paths (replace with your actual paths)
+BASE_MODEL = "meta-llama/Llama-3-1B-Instruct"  # Ensure this is the correct identifier
+QLORA_ADAPTER = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8"  # Ensure this is correct
+LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4"  # Ensure this is correct
 # Function to load Llama model
+def load_llama_model(model_name, is_guard=False):
+    print(f"Loading model: {model_name}")
+    try:
+        # Load tokenizer
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_name,
+            use_fast=False,
+            token=HUGGINGFACE_TOKEN
+        )
+        # Load model
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            torch_dtype=torch.float32,
+            device_map="cpu",  # Ensure it runs on CPU
+            token=HUGGINGFACE_TOKEN
+        )
+        # Load QLoRA adapter if applicable
+        if not is_guard and "QLORA" in model_name:
+            print("Loading QLoRA adapter...")
+            model = PeftModel.from_pretrained(
+                model,
+                model_name,
+                token=HUGGINGFACE_TOKEN
+            )
+            print("Merging LoRA weights...")
+            model = model.merge_and_unload()  # Merge LoRA weights for inference
+        return tokenizer, model
+    except Exception as e:
+        print(f"Error loading model {model_name}: {e}")
+        raise
 # Load Llama 3.2 model
+tokenizer, model = load_llama_model(QLORA_ADAPTER)
 # Load Llama Guard for content moderation
+guard_tokenizer, guard_model = load_llama_model(LLAMA_GUARD_NAME, is_guard=True)
+# Define Prompt Templates (same as before)
 PROMPTS = {
     "project_analysis": """<|begin_of_text|><|prompt|>Analyze this project description and generate:
 1. Project timeline with milestones
 3. Potential risks
 4. Team composition
 5. Cost estimation
 Project: {project_description}<|completion|>""",
     "code_generation": """<|begin_of_text|><|prompt|>Generate implementation code for this feature:
 {feature_description}
 Considerations:
 - Use {programming_language}
 - Follow {coding_standards}
     "risk_analysis": """<|begin_of_text|><|prompt|>Predict potential risks for this project plan:
 {project_data}
 Format output as JSON with risk types, probabilities, and mitigation strategies<|completion|>"""
 }
+# Function: Content Moderation using Llama Guard (same as before)
 def moderate_input(user_input):
     prompt = f"""<|begin_of_text|><|user|>
 Input: {user_input}
 Please verify that this input doesn't violate any content policies.
     inputs = guard_tokenizer(prompt, return_tensors="pt", truncation=True)
+    with torch.no_grad():
         outputs = guard_model.generate(
             inputs.input_ids,
             max_length=256,
     if "flagged" in response.lower() or "violated" in response.lower() or "policy violation" in response.lower():
         return "⚠️ Content flagged by Llama Guard. Please modify your input."
+    return None
+# Function: Generate AI responses (same as before)
 def generate_response(prompt_type, **kwargs):
     prompt = PROMPTS[prompt_type].format(**kwargs)
     moderation_warning = moderate_input(prompt)
     if moderation_warning:
+        return moderation_warning
     inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
+    with torch.no_grad():
         outputs = model.generate(
             inputs.input_ids,
             max_length=1024,
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
+# Gradio UI (same as before)
 def create_gradio_interface():
     with gr.Blocks(title="AI Project Manager", theme=gr.themes.Soft()) as demo:
         gr.Markdown("# 🚀 AI-Powered Project Manager & Code Assistant")
         with gr.Tab("Project Setup"):
             project_input = gr.Textbox(label="Project Description", lines=5, placeholder="Describe your project...")
+            project_output = gr.Textbox(label="Project Analysis", lines=15)
             analyze_btn = gr.Button("Analyze Project")
             analyze_btn.click(analyze_project, inputs=project_input, outputs=project_output)
         with gr.Tab("Code Assistant"):
             code_input = gr.Textbox(label="Feature Description", lines=3)
             lang_select = gr.Dropdown(["Python", "JavaScript", "Java", "C++"], label="Language", value="Python")
             code_btn = gr.Button("Generate Code")
             code_btn.click(generate_code, inputs=[code_input, lang_select, standards_select], outputs=code_output)
         with gr.Tab("Risk Analysis"):
             risk_input = gr.Textbox(label="Project Plan", lines=5)
             risk_output = gr.JSON(label="Risk Predictions")
             risk_btn = gr.Button("Predict Risks")
             risk_btn.click(predict_risks, inputs=risk_input, outputs=risk_output)
         with gr.Tab("Live Collaboration"):
             gr.Markdown("## Real-time Project Collaboration")
             chat = gr.Chatbot(height=400)
                     chat_history.append((message, moderation_warning))
                     return "", chat_history
                 history_text = ""
+                for i, (usr, ai) in enumerate(chat_history[-3:]):
                     history_text += f"User: {usr}\nAI: {ai}\n"
                 prompt = f"""<|begin_of_text|><|prompt|>Project Management Chat: