Spaces:
Runtime error
Runtime error
import transformers | |
import torch | |
# Specify the model you want to use | |
model_id = "nvidia/Llama-3.1-Nemotron-8B-UltraLong-4M-Instruct" | |
# Set up the text-generation pipeline | |
pipeline = transformers.pipeline( | |
"text-generation", # You are using the text generation pipeline | |
model=model_id, | |
model_kwargs={"torch_dtype": torch.bfloat16}, # Specifying the torch dtype | |
device_map="auto", # This will use available hardware (GPU or CPU) | |
) | |
# Define the conversation/messages you want the model to handle | |
messages = [ | |
{"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"}, | |
{"role": "user", "content": "Who are you?"} | |
] | |
# Use the pipeline to generate a response | |
outputs = pipeline( | |
messages[1]["content"], # Use only the user message here | |
max_new_tokens=256, # Limit the number of tokens generated | |
) | |
pipeline = transformers.pipeline( | |
"text-generation", | |
model=model_id, | |
model_kwargs={"torch_dtype": torch.bfloat16}, | |
device=-1, # Use CPU (avoid device_map) | |
) | |
# Print the generated text from the output | |
print(outputs[0]["generated_text"]) | |