import os
import gradio as gr
import torch
import json
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# Set Hugging Face Token for Authentication
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")  # Ensure this is set in your environment

# Add this at the beginning of your script
token_value = os.getenv("HUGGINGFACE_TOKEN")
if token_value:
    print("HUGGINGFACE_TOKEN is set")
    # Print first few characters to verify it's not empty
    print(f"Token starts with: {token_value[:5]}...")
else:
    print("HUGGINGFACE_TOKEN is not set")
    
# Correct model paths (replace with your actual paths)
BASE_MODEL = "meta-llama/Llama-3.2-1B-Instruct"  # Ensure this is the correct identifier
QLORA_ADAPTER = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8"  # Ensure this is correct
LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4"  # Ensure this is correct

# Function to load Llama model
def load_llama_model():
    print(f"🔄 Loading Base Model: {BASE_MODEL}")
    
    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_auth_token=HUGGINGFACE_TOKEN)
    model = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL, 
        use_auth_token=HUGGINGFACE_TOKEN,
        torch_dtype=torch.float16,
        low_cpu_mem_usage=True
    )

    print(f"✅ Base Model Loaded Successfully")

    # Load QLoRA adapter if available
    print(f"🔄 Loading QLoRA Adapter: {QLORA_ADAPTER}")
    model = PeftModel.from_pretrained(model, QLORA_ADAPTER, use_auth_token=HUGGINGFACE_TOKEN)
    print("🔄 Merging LoRA Weights...")
    model = model.merge_and_unload()
    print("✅ QLoRA Adapter Loaded Successfully")

    model.eval()
    return tokenizer, model

# Function to load Llama Guard Model for content moderation
def load_llama_guard():
    print(f"🔄 Loading Llama Guard Model: {LLAMA_GUARD_NAME}")
    
    tokenizer = AutoTokenizer.from_pretrained(LLAMA_GUARD_NAME, use_auth_token=HUGGINGFACE_TOKEN)
    model = AutoModelForCausalLM.from_pretrained(
        LLAMA_GUARD_NAME, 
        use_auth_token=HUGGINGFACE_TOKEN,
        torch_dtype=torch.float16,
        low_cpu_mem_usage=True
    )

    model.eval()
    print("✅ Llama Guard Model Loaded Successfully")
    return tokenizer, model

    except Exception as e:
        print(f"❌ Error loading model {model_path}: {e}")
        raise
        
# Load Llama 3.2 model
tokenizer, model = load_llama_model(QLORA_ADAPTER)

# Load Llama Guard for content moderation
guard_tokenizer, guard_model = load_llama_model(LLAMA_GUARD_NAME, is_guard=True)

# Define Prompt Templates (same as before)
PROMPTS = {
    "project_analysis": """<|begin_of_text|><|prompt|>Analyze this project description and generate:
1. Project timeline with milestones
2. Required technology stack
3. Potential risks
4. Team composition
5. Cost estimation
Project: {project_description}<|completion|>""",
    
    "code_generation": """<|begin_of_text|><|prompt|>Generate implementation code for this feature:
{feature_description}
Considerations:
- Use {programming_language}
- Follow {coding_standards}
- Include error handling
- Add documentation<|completion|>""",

    "risk_analysis": """<|begin_of_text|><|prompt|>Predict potential risks for this project plan:
{project_data}
Format output as JSON with risk types, probabilities, and mitigation strategies<|completion|>"""
}

# Function: Content Moderation using Llama Guard (same as before)
def moderate_input(user_input):
    prompt = f"""<|begin_of_text|><|user|>
Input: {user_input}
Please verify that this input doesn't violate any content policies.
<|assistant|>"""
    
    inputs = guard_tokenizer(prompt, return_tensors="pt", truncation=True)
    
    with torch.no_grad():
        outputs = guard_model.generate(
            inputs.input_ids,
            max_length=256,
            temperature=0.1
        )
    
    response = guard_tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    if "flagged" in response.lower() or "violated" in response.lower() or "policy violation" in response.lower():
        return "⚠️ Content flagged by Llama Guard. Please modify your input."
    return None

# Function: Generate AI responses (same as before)
def generate_response(prompt_type, **kwargs):
    prompt = PROMPTS[prompt_type].format(**kwargs)
    
    moderation_warning = moderate_input(prompt)
    if moderation_warning:
        return moderation_warning

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
    
    with torch.no_grad():
        outputs = model.generate(
            inputs.input_ids,
            max_length=1024,
            temperature=0.7 if prompt_type == "project_analysis" else 0.5,
            top_p=0.9,
            do_sample=True
        )

    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Gradio UI (same as before)
def create_gradio_interface():
    with gr.Blocks(title="AI Project Manager", theme=gr.themes.Soft()) as demo:
        gr.Markdown("# 🚀 AI-Powered Project Manager & Code Assistant")
        
        with gr.Tab("Project Setup"):
            project_input = gr.Textbox(label="Project Description", lines=5, placeholder="Describe your project...")
            project_output = gr.Textbox(label="Project Analysis", lines=15)
            analyze_btn = gr.Button("Analyze Project")
            analyze_btn.click(analyze_project, inputs=project_input, outputs=project_output)
        
        with gr.Tab("Code Assistant"):
            code_input = gr.Textbox(label="Feature Description", lines=3)
            lang_select = gr.Dropdown(["Python", "JavaScript", "Java", "C++"], label="Language", value="Python")
            standards_select = gr.Dropdown(["PEP8", "Google", "Airbnb"], label="Coding Standard", value="PEP8")
            code_output = gr.Code(label="Generated Code")
            code_btn = gr.Button("Generate Code")
            code_btn.click(generate_code, inputs=[code_input, lang_select, standards_select], outputs=code_output)
        
        with gr.Tab("Risk Analysis"):
            risk_input = gr.Textbox(label="Project Plan", lines=5)
            risk_output = gr.JSON(label="Risk Predictions") 
            risk_btn = gr.Button("Predict Risks")
            risk_btn.click(predict_risks, inputs=risk_input, outputs=risk_output)
        
        with gr.Tab("Live Collaboration"):
            gr.Markdown("## Real-time Project Collaboration")
            chat = gr.Chatbot(height=400)
            msg = gr.Textbox(label="Chat with AI PM")
            clear = gr.Button("Clear Chat")
            
            def respond(message, chat_history):
                moderation_warning = moderate_input(message)
                if moderation_warning:
                    chat_history.append((message, moderation_warning))
                    return "", chat_history

                history_text = ""
                for i, (usr, ai) in enumerate(chat_history[-3:]):
                    history_text += f"User: {usr}\nAI: {ai}\n"
                
                prompt = f"""<|begin_of_text|><|prompt|>Project Management Chat:
Context: {message}
Chat History: {history_text}
User: {message}<|completion|>"""
                
                inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
                
                with torch.no_grad():
                    outputs = model.generate(
                        inputs.input_ids,
                        max_length=1024,
                        temperature=0.7,
                        top_p=0.9,
                        do_sample=True
                    )
                
                response = tokenizer.decode(outputs[0], skip_special_tokens=True)
                chat_history.append((message, response))
                return "", chat_history
            
            msg.submit(respond, [msg, chat], [msg, chat])
            clear.click(lambda: None, None, chat, queue=False)

    return demo

# Run Gradio App
if __name__ == "__main__":
    interface = create_gradio_interface()
    interface.launch(share=True)