nvidiafaq / functions.py
Heykal Sayid
delete cache
316afbb
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Model Info
# model_path = '/Users/heykalsayid/Desktop/skill-academy/projects/ai-porto/deployment/app/model/eleutherai-finetuned'
model_path_hf = 'paacamo/EleutherAI-pythia-1b-finetuned-nvidia-faq'
tokenizer = AutoTokenizer.from_pretrained(model_path_hf)
model = AutoModelForCausalLM.from_pretrained(model_path_hf)
def text_generation(text, model=model, tokenizer=tokenizer, max_input_token=300, max_output_token=100):
# Tokenize
tokenizer.truncation_side = 'left'
input_encoded = tokenizer(
text,
return_tensors='pt',
padding=True,
truncation=True,
max_length=max_input_token
)
# set attention mask to the output
input_ids = input_encoded['input_ids']
attention_mask = input_encoded['attention_mask']
# generate
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
output_ids = model.generate(
input_ids=input_ids.to(device),
attention_mask=attention_mask.to(device),
max_new_tokens=max_output_token,
pad_token_id=tokenizer.eos_token_id,
do_sample=True,
top_p=0.95,
temperature=0.7
)
# decode
generated_text_answer = tokenizer.decode(
output_ids[0][input_ids.shape[-1]:], skip_special_tokens=True
)
return generated_text_answer