"""Module for OpenAI model and embeddings.""" # import os from typing import List # import onnxruntime as ort from langchain.embeddings.base import Embeddings from sentence_transformers import SentenceTransformer from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings # from langchain_huggingface import HuggingFacePipeline # from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline # from huggingface_hub import hf_hub_download class GPTModel(AzureChatOpenAI): """ GPTModel class that extends AzureChatOpenAI. This class initializes a GPT model with specific deployment settings and a callback function. Attributes: callback (function): The callback function to be used with the model. Methods: __init__(callback): Initializes the GPTModel with the specified callback function. """ def __init__(self): super().__init__( deployment_name="gpt-4o", streaming=True, temperature=0) class GPTEmbeddings(AzureOpenAIEmbeddings): """ GPTEmbeddings class that extends AzureOpenAIEmbeddings. This class is designed to handle embeddings using GPT model provided by Azure OpenAI services. Attributes: Inherits all attributes from AzureOpenAIEmbeddings. Methods: Inherits all methods from AzureOpenAIEmbeddings. """ # class Phi4MiniONNXLLM: # """ # A class for interfacing with a pre-trained ONNX model for inference. # Attributes: # session (onnxruntime.InferenceSession): The ONNX runtime inference session for the model. # input_name (str): The name of the input node in the ONNX model. # output_name (str): The name of the output node in the ONNX model. # Methods: # __init__(model_path): # Initializes the Phi4MiniONNXLLM instance by loading the ONNX model from specified path. # __call__(input_ids): # Performs inference on the given input data and returns the model's output. # """ # def __init__(self, repo_id, subfolder, onnx_file="model.onnx", weights_file="model.onnx.data"): # self.repo_id = repo_id # model_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{onnx_file}") # weights_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{weights_file}") # self.session = ort.InferenceSession(model_path) # # Verify both files exist # print(f"Model path: {model_path}, Exists: {os.path.exists(model_path)}") # print(f"Weights path: {weights_path}, Exists: {os.path.exists(weights_path)}") # self.input_name = self.session.get_inputs()[0].name # self.output_name = self.session.get_outputs()[0].name # def __call__(self, input_text): # # Assuming input_ids is a tensor or numpy array # tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-4-mini-instruct-onnx") # inputs = tokenizer(input_text, return_tensors="pt") # input_feed = { # self.input_name: inputs["input_ids"].numpy(), # "attention_mask": inputs["attention_mask"].numpy(), # # Add past_key_values if applicable # } # outputs = self.session.run([self.output_name], input_feed) # return outputs # class HuggingfaceModel(HuggingFacePipeline): # """ # HuggingfaceModel is a wrapper class for the Hugging Face text-generation pipeline. # Attributes: # name (str): The name or path of the pre-trained model to load from Hugging Face. # max_tokens (int): The maximum number of new tokens to generate in the text output. # Defaults to 200. # Methods: # __init__(name, max_tokens=200): # Initializes the HuggingfaceModel with the specified model name and maximum token limit. # """ # def __init__(self, name, max_tokens=500): # super().__init__(pipeline=pipeline( # "text-generation", # model=AutoModelForCausalLM.from_pretrained(name), # tokenizer=AutoTokenizer.from_pretrained(name), # max_new_tokens=max_tokens # ) # ) class EmbeddingsModel(Embeddings): """ A model for generating embeddings using SentenceTransformer. Attributes: model (SentenceTransformer): The SentenceTransformer model used for generating embeddings. """ def __init__(self, model_name: str): """ Initializes the Chroma model with the specified model name. Args: model_name (str): The name of the model to be used for sentence transformation. """ self.model = SentenceTransformer(model_name) def embed_documents(self, documents: List[str]) -> List[List[float]]: """ Embed a list of documents into a list of vectors. Args: documents (List[str]): A list of documents to be embedded. Returns: List[List[float]]: A list of vectors representing the embedded documents. """ return self.model.encode(documents).tolist() def embed_query(self, query: str) -> List[float]: """ Embed a query string into a list of floats using the model's encoding. Args: query (str): The query string to be embedded. Returns: List[float]: The embedded representation of the query as a list of floats. """ return self.model.encode([query]).tolist()[0] # model_name = "microsoft/phi-1_5" # tokenizer = AutoTokenizer.from_pretrained(model_name) # model = AutoModelForCausalLM.from_pretrained(model_name) # pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=200) # phi4_llm = HuggingFacePipeline(pipeline=pipe) # tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", pad_token_id=50256) # model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2") # pipe = pipeline( # "text-generation", model=model, tokenizer=tokenizer, # max_new_tokens=10, truncation=True, # Truncate input sequences # ) # phi4_llm = HuggingFacePipeline(pipeline=pipe)