# app.py
# =============
# This is a complete app.py file for a Gradio app using the meta-llama/Llama-3.2-3B-Instruct model.
# The app allows users to input a message and receive a response from the model.

# Dependencies
# =============
# The following dependencies are required to run this app:
# - transformers
# - gradio
# - torch
#
# You can install these dependencies using pip:
# pip install transformers gradio torch

import torch
from transformers import pipeline
import gradio as gr  # Import gradio

# Load the model and tokenizer
model_id = "meta-llama/Llama-3.2-3B-Instruct"
device = "cpu"  # Use CPU for inference

# Initialize the pipeline
pipe = pipeline(
    "text-generation",
    model=model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

def generate_response(prompt):
    """
    Generate a response from the model based on the given prompt.
    
    Args:
        prompt (str): The input message from the user.
    
    Returns:
        str: The generated response from the model.
    """
    messages = [
        {"role": "system", "content": "You are a helpful assistant!"},
        {"role": "user", "content": prompt},
    ]
    outputs = pipe(
        messages,
        max_new_tokens=256,
    )
    return outputs[0]["generated_text"][-1]

# Define the Gradio interface
def gradio_interface():
    """
    Define the Gradio interface for the app.
    """
    iface = gr.Interface(
        fn=generate_response,
        inputs=gr.inputs.Textbox(lines=2, placeholder="Enter your message here..."),
        outputs="text",
        title="Llama-3.2-3B-Instruct Chatbot",
        description="Chat with the Llama-3.2-3B-Instruct model. Enter your message and get a response!",
    )
    return iface

# Launch the Gradio app
if __name__ == "__main__":
    iface = gradio_interface()
    iface.launch()