""" Load LLMs from huggingface, Groq, etc. """ from transformers import ( # AutoModelForCausalLM, AutoTokenizer, pipeline, ) from langchain.llms import HuggingFacePipeline from langchain_groq import ChatGroq from langchain.llms import HuggingFaceTextGenInference # from langchain.chat_models import ChatOpenAI # oai model def get_llm_hf_online(inference_api_url=""): """Get LLM using huggingface inference.""" if not inference_api_url: # default api url inference_api_url = ( "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta" ) llm = HuggingFaceTextGenInference( verbose=True, # Provides detailed logs of operation max_new_tokens=1024, # Maximum number of token that can be generated. top_p=0.95, # Threshold for controlling randomness in text generation process. temperature=0.1, inference_server_url=inference_api_url, timeout=10, # Timeout for connection with the url ) return llm def get_llm_hf_local(model_path): """Get local LLM from huggingface.""" model = LlamaForCausalLM.from_pretrained(model_path, device_map="auto") tokenizer = AutoTokenizer.from_pretrained(model_path) pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=2048, # better setting? model_kwargs={"temperature": 0.1}, # better setting? ) llm = HuggingFacePipeline(pipeline=pipe) return llm def get_groq_chat(model_name="llama-3.1-70b-versatile"): """Get LLM from Groq.""" llm = ChatGroq(temperature=0, model_name=model_name) return llm