Spaces:

nyax
/

PhyloLM

Running on Zero

App Files Files Community

PhyloLM / llm_run.py

Daetheys

Improved error message for trust_remote_code models

c80c514 10 days ago

raw

history blame contribute delete

2 kB

	import transformers
	from huggingface_hub import snapshot_download,constants

	def download_llm_to_cache(model_name, revision="main", cache_dir=None):
	"""
	Download an LLM from the Hugging Face Hub to the cache without loading it into memory.

	Args:
	model_name (str): The name of the model on Hugging Face Hub (e.g., "meta-llama/Llama-2-7b-hf")
	revision (str, optional): The specific model version to use. Defaults to "main".
	cache_dir (str, optional): The cache directory to use. If None, uses the default HF cache directory.

	Returns:
	str: Path to the model in cache
	"""
	# Get default cache dir if not specified
	if cache_dir is None:
	cache_dir = constants.HUGGINGFACE_HUB_CACHE

	try:
	# Download model to cache without loading into memory
	cached_path = snapshot_download(
	repo_id=model_name,
	revision=revision,
	cache_dir=cache_dir,
	local_files_only=False # Set to True if you want to check local cache only
	)

	print(f"Model '{model_name}' is available in cache at: {cached_path}")
	return cached_path

	except Exception as e:
	print(f"Error downloading model '{model_name}': {e}")
	return None

	def load_model(path,cache_dir=None):
	model = transformers.AutoModelForCausalLM.from_pretrained(path,cache_dir=cache_dir,device_map='auto',trust_remote_code=False)
	tokenizer = transformers.AutoTokenizer.from_pretrained(path,cache_dir=cache_dir,device_map='auto',trust_remote_code=False)
	return model,tokenizer

	def llm_run(model,tokenizer,genes,N):
	generate = transformers.pipeline('text-generation',model=model, tokenizer=tokenizer,device_map='auto')
	output = []
	for i,gene in enumerate(genes):
	out = generate([gene], min_new_tokens=4, max_new_tokens=4, do_sample=True, num_return_sequences=N)
	output.append(out[0])
	yield output
	return output