|
import transformers |
|
from huggingface_hub import snapshot_download,constants |
|
|
|
def download_llm_to_cache(model_name, revision="main", cache_dir=None): |
|
""" |
|
Download an LLM from the Hugging Face Hub to the cache without loading it into memory. |
|
|
|
Args: |
|
model_name (str): The name of the model on Hugging Face Hub (e.g., "meta-llama/Llama-2-7b-hf") |
|
revision (str, optional): The specific model version to use. Defaults to "main". |
|
cache_dir (str, optional): The cache directory to use. If None, uses the default HF cache directory. |
|
|
|
Returns: |
|
str: Path to the model in cache |
|
""" |
|
|
|
if cache_dir is None: |
|
cache_dir = constants.HUGGINGFACE_HUB_CACHE |
|
|
|
try: |
|
|
|
cached_path = snapshot_download( |
|
repo_id=model_name, |
|
revision=revision, |
|
cache_dir=cache_dir, |
|
local_files_only=False |
|
) |
|
|
|
print(f"Model '{model_name}' is available in cache at: {cached_path}") |
|
return cached_path |
|
|
|
except Exception as e: |
|
print(f"Error downloading model '{model_name}': {e}") |
|
return None |
|
|
|
def load_model(path,cache_dir=None): |
|
model = transformers.AutoModelForCausalLM.from_pretrained(path,cache_dir=cache_dir,device_map='auto',trust_remote_code=False) |
|
tokenizer = transformers.AutoTokenizer.from_pretrained(path,cache_dir=cache_dir,device_map='auto',trust_remote_code=False) |
|
return model,tokenizer |
|
|
|
def llm_run(model,tokenizer,genes,N): |
|
generate = transformers.pipeline('text-generation',model=model, tokenizer=tokenizer,device_map='auto') |
|
output = [] |
|
for i,gene in enumerate(genes): |
|
out = generate([gene], min_new_tokens=4, max_new_tokens=4, do_sample=True, num_return_sequences=N) |
|
output.append(out[0]) |
|
yield output |
|
return output |