import os import gradio as gr import torch import json from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel # Set Hugging Face Token for Authentication HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN") # Ensure this is set in your environment # Add this at the beginning of your script token_value = os.getenv("HUGGINGFACE_TOKEN") if token_value: print("HUGGINGFACE_TOKEN is set") # Print first few characters to verify it's not empty print(f"Token starts with: {token_value[:5]}...") else: print("HUGGINGFACE_TOKEN is not set") # Correct model paths (replace with your actual paths) BASE_MODEL = "meta-llama/Llama-3.2-1B-Instruct" # Ensure this is the correct identifier QLORA_ADAPTER = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8" # Ensure this is correct LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4" # Ensure this is correct # Function to load Llama model def load_llama_model(): print(f"🔄 Loading Base Model: {BASE_MODEL}") tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_auth_token=HUGGINGFACE_TOKEN) model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, use_auth_token=HUGGINGFACE_TOKEN, torch_dtype=torch.float16, low_cpu_mem_usage=True ) print(f"✅ Base Model Loaded Successfully") # Load QLoRA adapter if available print(f"🔄 Loading QLoRA Adapter: {QLORA_ADAPTER}") model = PeftModel.from_pretrained(model, QLORA_ADAPTER, use_auth_token=HUGGINGFACE_TOKEN) print("🔄 Merging LoRA Weights...") model = model.merge_and_unload() print("✅ QLoRA Adapter Loaded Successfully") model.eval() return tokenizer, model # Function to load Llama Guard Model for content moderation def load_llama_guard(): print(f"🔄 Loading Llama Guard Model: {LLAMA_GUARD_NAME}") tokenizer = AutoTokenizer.from_pretrained(LLAMA_GUARD_NAME, use_auth_token=HUGGINGFACE_TOKEN) model = AutoModelForCausalLM.from_pretrained( LLAMA_GUARD_NAME, use_auth_token=HUGGINGFACE_TOKEN, torch_dtype=torch.float16, low_cpu_mem_usage=True ) model.eval() print("✅ Llama Guard Model Loaded Successfully") return tokenizer, model except Exception as e: print(f"❌ Error loading model {model_path}: {e}") raise # Load Llama 3.2 model tokenizer, model = load_llama_model(QLORA_ADAPTER) # Load Llama Guard for content moderation guard_tokenizer, guard_model = load_llama_model(LLAMA_GUARD_NAME, is_guard=True) # Define Prompt Templates (same as before) PROMPTS = { "project_analysis": """<|begin_of_text|><|prompt|>Analyze this project description and generate: 1. Project timeline with milestones 2. Required technology stack 3. Potential risks 4. Team composition 5. Cost estimation Project: {project_description}<|completion|>""", "code_generation": """<|begin_of_text|><|prompt|>Generate implementation code for this feature: {feature_description} Considerations: - Use {programming_language} - Follow {coding_standards} - Include error handling - Add documentation<|completion|>""", "risk_analysis": """<|begin_of_text|><|prompt|>Predict potential risks for this project plan: {project_data} Format output as JSON with risk types, probabilities, and mitigation strategies<|completion|>""" } # Function: Content Moderation using Llama Guard (same as before) def moderate_input(user_input): prompt = f"""<|begin_of_text|><|user|> Input: {user_input} Please verify that this input doesn't violate any content policies. <|assistant|>""" inputs = guard_tokenizer(prompt, return_tensors="pt", truncation=True) with torch.no_grad(): outputs = guard_model.generate( inputs.input_ids, max_length=256, temperature=0.1 ) response = guard_tokenizer.decode(outputs[0], skip_special_tokens=True) if "flagged" in response.lower() or "violated" in response.lower() or "policy violation" in response.lower(): return "⚠️ Content flagged by Llama Guard. Please modify your input." return None # Function: Generate AI responses (same as before) def generate_response(prompt_type, **kwargs): prompt = PROMPTS[prompt_type].format(**kwargs) moderation_warning = moderate_input(prompt) if moderation_warning: return moderation_warning inputs = tokenizer(prompt, return_tensors="pt", truncation=True) with torch.no_grad(): outputs = model.generate( inputs.input_ids, max_length=1024, temperature=0.7 if prompt_type == "project_analysis" else 0.5, top_p=0.9, do_sample=True ) return tokenizer.decode(outputs[0], skip_special_tokens=True) # Gradio UI (same as before) def create_gradio_interface(): with gr.Blocks(title="AI Project Manager", theme=gr.themes.Soft()) as demo: gr.Markdown("# 🚀 AI-Powered Project Manager & Code Assistant") with gr.Tab("Project Setup"): project_input = gr.Textbox(label="Project Description", lines=5, placeholder="Describe your project...") project_output = gr.Textbox(label="Project Analysis", lines=15) analyze_btn = gr.Button("Analyze Project") analyze_btn.click(analyze_project, inputs=project_input, outputs=project_output) with gr.Tab("Code Assistant"): code_input = gr.Textbox(label="Feature Description", lines=3) lang_select = gr.Dropdown(["Python", "JavaScript", "Java", "C++"], label="Language", value="Python") standards_select = gr.Dropdown(["PEP8", "Google", "Airbnb"], label="Coding Standard", value="PEP8") code_output = gr.Code(label="Generated Code") code_btn = gr.Button("Generate Code") code_btn.click(generate_code, inputs=[code_input, lang_select, standards_select], outputs=code_output) with gr.Tab("Risk Analysis"): risk_input = gr.Textbox(label="Project Plan", lines=5) risk_output = gr.JSON(label="Risk Predictions") risk_btn = gr.Button("Predict Risks") risk_btn.click(predict_risks, inputs=risk_input, outputs=risk_output) with gr.Tab("Live Collaboration"): gr.Markdown("## Real-time Project Collaboration") chat = gr.Chatbot(height=400) msg = gr.Textbox(label="Chat with AI PM") clear = gr.Button("Clear Chat") def respond(message, chat_history): moderation_warning = moderate_input(message) if moderation_warning: chat_history.append((message, moderation_warning)) return "", chat_history history_text = "" for i, (usr, ai) in enumerate(chat_history[-3:]): history_text += f"User: {usr}\nAI: {ai}\n" prompt = f"""<|begin_of_text|><|prompt|>Project Management Chat: Context: {message} Chat History: {history_text} User: {message}<|completion|>""" inputs = tokenizer(prompt, return_tensors="pt", truncation=True) with torch.no_grad(): outputs = model.generate( inputs.input_ids, max_length=1024, temperature=0.7, top_p=0.9, do_sample=True ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) chat_history.append((message, response)) return "", chat_history msg.submit(respond, [msg, chat], [msg, chat]) clear.click(lambda: None, None, chat, queue=False) return demo # Run Gradio App if __name__ == "__main__": interface = create_gradio_interface() interface.launch(share=True)