bot / app.py
nagasurendra's picture
Update app.py
eedee66 verified
import transformers
import torch
# Specify the model you want to use
model_id = "nvidia/Llama-3.1-Nemotron-8B-UltraLong-4M-Instruct"
# Set up the text-generation pipeline
pipeline = transformers.pipeline(
"text-generation", # You are using the text generation pipeline
model=model_id,
model_kwargs={"torch_dtype": torch.bfloat16}, # Specifying the torch dtype
device_map="auto", # This will use available hardware (GPU or CPU)
)
# Define the conversation/messages you want the model to handle
messages = [
{"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
{"role": "user", "content": "Who are you?"}
]
# Use the pipeline to generate a response
outputs = pipeline(
messages[1]["content"], # Use only the user message here
max_new_tokens=256, # Limit the number of tokens generated
)
pipeline = transformers.pipeline(
"text-generation",
model=model_id,
model_kwargs={"torch_dtype": torch.bfloat16},
device=-1, # Use CPU (avoid device_map)
)
# Print the generated text from the output
print(outputs[0]["generated_text"])