Spaces:

pmelnechuk
/

MathQA

Running

pmelnechuk commited on Feb 21

Commit

e852058

verified ·

1 Parent(s): 97dea46

prueba solucion carlos

Files changed (1) hide show

src/model_load.py CHANGED Viewed

@@ -9,14 +9,13 @@ from langchain.chains import LLMChain
 def load_model():
     model_name="tiiuae/Falcon3-10B-Instruct"
     # Cargar tokenizer y modelo de Hugging Face
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForCausalLM.from_pretrained(model_name,
-                                                 torch_dtype=torch.float16,
                                                  device_map="auto",
                                                  quantization_config=BitsAndBytesConfig(load_in_8bit=True),
-                                                 llm_int8_enable_fp32_cpu_offload=True)
     # Crear pipeline de generación de texto
     text_generation_pipeline = pipeline(

 def load_model():
     model_name="tiiuae/Falcon3-10B-Instruct"
+    max_memory = {0: "23GB", "cpu": "30GB"}
     # Cargar tokenizer y modelo de Hugging Face
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForCausalLM.from_pretrained(model_name,
                                                  device_map="auto",
                                                  quantization_config=BitsAndBytesConfig(load_in_8bit=True),
+                                                 max_memory=max_memory)
     # Crear pipeline de generación de texto
     text_generation_pipeline = pipeline(