# app.py # ============= # This is a complete app.py file for a Gradio app using the meta-llama/Llama-3.2-3B-Instruct model. # The app allows users to input a message and receive a response from the model. # Dependencies # ============= # The following dependencies are required to run this app: # - transformers # - gradio # - torch # # You can install these dependencies using pip: # pip install transformers gradio torch import torch from transformers import pipeline import gradio as gr # Import gradio # Load the model and tokenizer model_id = "meta-llama/Llama-3.2-3B-Instruct" device = "cpu" # Use CPU for inference # Initialize the pipeline pipe = pipeline( "text-generation", model=model_id, torch_dtype=torch.bfloat16, device_map="auto", ) def generate_response(prompt): """ Generate a response from the model based on the given prompt. Args: prompt (str): The input message from the user. Returns: str: The generated response from the model. """ messages = [ {"role": "system", "content": "You are a helpful assistant!"}, {"role": "user", "content": prompt}, ] outputs = pipe( messages, max_new_tokens=256, ) return outputs[0]["generated_text"][-1] # Define the Gradio interface def gradio_interface(): """ Define the Gradio interface for the app. """ iface = gr.Interface( fn=generate_response, inputs=gr.inputs.Textbox(lines=2, placeholder="Enter your message here..."), outputs="text", title="Llama-3.2-3B-Instruct Chatbot", description="Chat with the Llama-3.2-3B-Instruct model. Enter your message and get a response!", ) return iface # Launch the Gradio app if __name__ == "__main__": iface = gradio_interface() iface.launch()