Spaces:

Steph254
/

demo_1

Runtime error

App Files Files Community

Steph254 commited on Mar 18

Commit

be4cb79

verified ·

1 Parent(s): 832481e

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -57

app.py CHANGED Viewed

@@ -2,39 +2,35 @@ import os
 import gradio as gr
 import torch
 import json
-from transformers import LlamaTokenizer, AutoModelForCausalLM
 from peft import PeftModel
 # Set Hugging Face Token for Authentication
 HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")  # Ensure this is set in your environment
-# Add this at the beginning of your script
-token_value = os.getenv("HUGGINGFACE_TOKEN")
-if token_value:
-    print("HUGGINGFACE_TOKEN is set")
-    # Print first few characters to verify it's not empty
-    print(f"Token starts with: {token_value[:5]}...")
-else:
-    print("HUGGINGFACE_TOKEN is not set")
-# Correct model paths (replace with your actual paths)
-BASE_MODEL = "meta-llama/Llama-3.2-1B-Instruct"  # Ensure this is the correct identifier
-QLORA_ADAPTER = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8"  # Ensure this is correct
-LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4"  # Ensure this is correct
 # Function to load Llama model
-def load_llama_model(base_model=BASE_MODEL, adapter=None):
-    print(f"🔄 Loading Base Model: {base_model}")
-    tokenizer = LlamaTokenizer.from_pretrained(base_model, token=HUGGINGFACE_TOKEN)
     model = AutoModelForCausalLM.from_pretrained(
         base_model,
         token=HUGGINGFACE_TOKEN,
-        torch_dtype=torch.float16,
         low_cpu_mem_usage=True
     )
-    if adapter:
         print(f"🔄 Loading Adapter: {adapter}")
         model = PeftModel.from_pretrained(model, adapter, token=HUGGINGFACE_TOKEN)
         model = model.merge_and_unload()
@@ -43,58 +39,40 @@ def load_llama_model(base_model=BASE_MODEL, adapter=None):
     model.eval()
     return tokenizer, model
-# Function to load Llama Guard Model for content moderation
-def load_llama_guard():
-    print(f"🔄 Loading Llama Guard Model: {LLAMA_GUARD_NAME}")
-    tokenizer = AutoTokenizer.from_pretrained(LLAMA_GUARD_NAME, use_auth_token=HUGGINGFACE_TOKEN)
-    model = AutoModelForCausalLM.from_pretrained(
-        LLAMA_GUARD_NAME,
-        use_auth_token=HUGGINGFACE_TOKEN,
-        torch_dtype=torch.float16,
-        low_cpu_mem_usage=True
-    )
-    model.eval()
-    print("✅ Llama Guard Model Loaded Successfully")
-    return tokenizer, model
 # Load Llama 3.2 model
-tokenizer, model = load_llama_model(QLORA_ADAPTER)
 # Load Llama Guard for content moderation
-guard_tokenizer, guard_model = load_llama_model(LLAMA_GUARD_NAME, is_guard=True)
-# Define Prompt Templates (same as before)
 PROMPTS = {
-    "project_analysis": """<|begin_of_text|><|prompt|>Analyze this project description and generate:
 1. Project timeline with milestones
 2. Required technology stack
 3. Potential risks
 4. Team composition
 5. Cost estimation
-Project: {project_description}<|completion|>""",
-    "code_generation": """<|begin_of_text|><|prompt|>Generate implementation code for this feature:
 {feature_description}
 Considerations:
 - Use {programming_language}
 - Follow {coding_standards}
 - Include error handling
-- Add documentation<|completion|>""",
-    "risk_analysis": """<|begin_of_text|><|prompt|>Predict potential risks for this project plan:
 {project_data}
-Format output as JSON with risk types, probabilities, and mitigation strategies<|completion|>"""
 }
-# Function: Content Moderation using Llama Guard (same as before)
 def moderate_input(user_input):
-    prompt = f"""<|begin_of_text|><|user|>
-Input: {user_input}
-Please verify that this input doesn't violate any content policies.
-<|assistant|>"""
     inputs = guard_tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
     with torch.no_grad():
@@ -107,7 +85,7 @@ Please verify that this input doesn't violate any content policies.
     return None
-# Function: Generate AI responses (same as before)
 def generate_response(prompt_type, **kwargs):
     prompt = PROMPTS[prompt_type].format(**kwargs)
@@ -116,7 +94,7 @@ def generate_response(prompt_type, **kwargs):
         return moderation_warning
     inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
     with torch.no_grad():
         outputs = model.generate(
             inputs.input_ids,
@@ -128,7 +106,17 @@ def generate_response(prompt_type, **kwargs):
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
-# Gradio UI (same as before)
 def create_gradio_interface():
     with gr.Blocks(title="AI Project Manager", theme=gr.themes.Soft()) as demo:
         gr.Markdown("# 🚀 AI-Powered Project Manager & Code Assistant")
@@ -169,11 +157,11 @@ def create_gradio_interface():
                 for i, (usr, ai) in enumerate(chat_history[-3:]):
                     history_text += f"User: {usr}\nAI: {ai}\n"
-                prompt = f"""<|begin_of_text|><|prompt|>Project Management Chat:
 Context: {message}
 Chat History: {history_text}
-User: {message}<|completion|>"""
                 inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
                 with torch.no_grad():
@@ -197,4 +185,4 @@ User: {message}<|completion|>"""
 # Run Gradio App
 if __name__ == "__main__":
     interface = create_gradio_interface()
-    interface.launch(share=True)

 import gradio as gr
 import torch
 import json
+from transformers import AutoTokenizer, AutoModelForCausalLM
 from peft import PeftModel
 # Set Hugging Face Token for Authentication
 HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")  # Ensure this is set in your environment
+if not HUGGINGFACE_TOKEN:
+    raise ValueError("❌ HUGGINGFACE_TOKEN is not set. Please set it in your environment.")
+print("✅ HUGGINGFACE_TOKEN is set.")
+# Model Paths (Replace with your actual Hugging Face Model Names)
+BASE_MODEL = "meta-llama/Llama-3.2-1B-Instruct"
+QLORA_ADAPTER = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8"
+LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4"
 # Function to load Llama model
+def load_llama_model(base_model=BASE_MODEL, adapter=None, is_guard=False):
+    print(f"🔄 Loading Model: {base_model}")
+    tokenizer = AutoTokenizer.from_pretrained(base_model, token=HUGGINGFACE_TOKEN)
     model = AutoModelForCausalLM.from_pretrained(
         base_model,
         token=HUGGINGFACE_TOKEN,
+        torch_dtype=torch.float32,  # Using float32 for CPU compatibility
         low_cpu_mem_usage=True
     )
+    if adapter and not is_guard:
         print(f"🔄 Loading Adapter: {adapter}")
         model = PeftModel.from_pretrained(model, adapter, token=HUGGINGFACE_TOKEN)
         model = model.merge_and_unload()
     model.eval()
     return tokenizer, model
 # Load Llama 3.2 model
+tokenizer, model = load_llama_model(adapter=QLORA_ADAPTER)
 # Load Llama Guard for content moderation
+guard_tokenizer, guard_model = load_llama_model(base_model=LLAMA_GUARD_NAME, is_guard=True)
+# Define Prompt Templates
 PROMPTS = {
+    "project_analysis": """Analyze this project description and generate:
 1. Project timeline with milestones
 2. Required technology stack
 3. Potential risks
 4. Team composition
 5. Cost estimation
+Project: {project_description}""",
+    "code_generation": """Generate implementation code for this feature:
 {feature_description}
 Considerations:
 - Use {programming_language}
 - Follow {coding_standards}
 - Include error handling
+- Add documentation""",
+    "risk_analysis": """Predict potential risks for this project plan:
 {project_data}
+Format output as JSON with risk types, probabilities, and mitigation strategies"""
 }
+# Function: Content Moderation using Llama Guard
 def moderate_input(user_input):
+    prompt = f"""Input: {user_input}
+Please verify that this input doesn't violate any content policies."""
     inputs = guard_tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
     with torch.no_grad():
     return None
+# Function: Generate AI responses
 def generate_response(prompt_type, **kwargs):
     prompt = PROMPTS[prompt_type].format(**kwargs)
         return moderation_warning
     inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
     with torch.no_grad():
         outputs = model.generate(
             inputs.input_ids,
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
+# Define UI functions
+def analyze_project(project_description):
+    return generate_response("project_analysis", project_description=project_description)
+def generate_code(feature_description, programming_language, coding_standards):
+    return generate_response("code_generation", feature_description=feature_description, programming_language=programming_language, coding_standards=coding_standards)
+def predict_risks(project_data):
+    return generate_response("risk_analysis", project_data=project_data)
+# Gradio UI Setup
 def create_gradio_interface():
     with gr.Blocks(title="AI Project Manager", theme=gr.themes.Soft()) as demo:
         gr.Markdown("# 🚀 AI-Powered Project Manager & Code Assistant")
                 for i, (usr, ai) in enumerate(chat_history[-3:]):
                     history_text += f"User: {usr}\nAI: {ai}\n"
+                prompt = f"""Project Management Chat:
 Context: {message}
 Chat History: {history_text}
+User: {message}"""
                 inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
                 with torch.no_grad():
 # Run Gradio App
 if __name__ == "__main__":
     interface = create_gradio_interface()
+    interface.launch(share=True)