Spaces:
Sleeping
Sleeping
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import warnings | |
torch_dtype = torch.bfloat16 | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model_name = "mosaicml/mpt-7b" | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
torch_dtype=torch_dtype, | |
trust_remote_code=True, | |
use_auth_token=None, | |
) | |
tokenizer = AutoTokenizer.from_pretrained( | |
model_name, | |
trust_remote_code=True, | |
use_auth_token=None, | |
) | |
model.eval() | |
model.to(device=device, dtype=torch_dtype) | |
if tokenizer.pad_token_id is None: | |
warnings.warn( | |
"pad_token_id is not set for the tokenizer. Using eos_token_id as pad_token_id." | |
) | |
tokenizer.pad_token = tokenizer.eos_token | |
tokenizer.padding_side = "left" | |
gkw = { | |
"temperature": 0.5, | |
"top_p": 0.92, | |
"top_k": 0, | |
"max_new_tokens": 512, | |
"use_cache": True, | |
"do_sample": True, | |
"eos_token_id": tokenizer.eos_token_id, | |
"pad_token_id": tokenizer.pad_token_id, | |
"repetition_penalty": 1.1, # 1.0 means no penalty, > 1.0 means penalty, 1.2 from CTRL paper | |
} | |
def mpt_7b(s): | |
input_ids = tokenizer(s, return_tensors="pt").input_ids | |
input_ids = input_ids.to(model.device) | |
with torch.no_grad(): | |
output_ids = model.generate(input_ids, **gkw) | |
# Slice the output_ids tensor to get only new tokens | |
new_tokens = output_ids[0, len(input_ids[0]) :] | |
output_text = tokenizer.decode(new_tokens, skip_special_tokens=True) | |
return output_text | |