Spaces:

Lihuchen
/

llm_with_confidence

Runtime error

llm_with_confidence / cpu_llama_generate.py

Upload 3 files

f2ff742 over 1 year ago

486 Bytes

	from ctransformers import AutoModelForCausalLM

	#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # the device to load the model onto
	device = 'cpu'


	# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
	llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7b-Chat-GGUF", model_file="llama-2-7b-chat.Q4_K_M.gguf", model_type="llama", gpu_layers=0)


	def run(query):
	return llm(query)