Spaces:
Sleeping
Sleeping
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import os | |
import torch | |
# Check if CUDA is available for faster inference | |
device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
# Load the tokenizer and model once, outside of the function | |
huggingface_token = os.environ.get("KEY2") | |
tokenizer = AutoTokenizer.from_pretrained( | |
"meta-llama/Llama-3.2-1B", | |
use_auth_token=huggingface_token | |
) | |
model = AutoModelForCausalLM.from_pretrained( | |
"meta-llama/Llama-3.2-1B", | |
use_auth_token=huggingface_token | |
).to(device) | |
def modelFeedback(ats_score, resume_data, job_description): | |
""" | |
""" | |
try: | |
# Tokenize the input | |
input_ids = tokenizer.encode(input_prompt, return_tensors="pt").to(device) | |
# Disable gradient calculation for faster inference | |
with torch.no_grad(): | |
# Generate the output | |
output = model.generate( | |
input_ids, | |
max_length=1500, | |
temperature=0.01, | |
pad_token_id=tokenizer.eos_token_id # Ensure padding works properly | |
) | |
# Decode the output | |
response_text = tokenizer.decode(output[0], skip_special_tokens=True) | |
return response_text | |
except Exception as e: | |
print(f"Error during generation: {e}") |