Ankerkraut commited on
Commit
be49a41
·
1 Parent(s): 7ffa2a6

remove fully

Browse files
Files changed (1) hide show
  1. app.py +1 -8
app.py CHANGED
@@ -2,7 +2,7 @@ import spaces
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
  from qdrant_client import QdrantClient, models
5
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
6
  from sentence_transformers import SentenceTransformer
7
  from huggingface_hub import login
8
  import torch
@@ -110,18 +110,11 @@ client.add(collection_name="products",
110
  client.add(collection_name="recipes",
111
  documents=recipe_strings)
112
  model_name = "LeoLM/leo-hessianai-13b-chat"
113
- bnb_config = BitsAndBytesConfig(
114
- load_in_4bit=True, # Use 4-bit quantization
115
- bnb_4bit_compute_dtype=torch.float16, # Reduce memory usage
116
- bnb_4bit_use_double_quant=True,
117
- llm_int8_enable_fp32_cpu_offload=True
118
- )
119
 
120
  @spaces.GPU
121
  def load_model():
122
  ankerbot_model = AutoModelForCausalLM.from_pretrained(
123
  model_name,
124
- quantization_config=bnb_config,
125
  device_map="cpu",
126
  torch_dtype=torch.float16,
127
  use_cache=True,
 
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
  from qdrant_client import QdrantClient, models
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
6
  from sentence_transformers import SentenceTransformer
7
  from huggingface_hub import login
8
  import torch
 
110
  client.add(collection_name="recipes",
111
  documents=recipe_strings)
112
  model_name = "LeoLM/leo-hessianai-13b-chat"
 
 
 
 
 
 
113
 
114
  @spaces.GPU
115
  def load_model():
116
  ankerbot_model = AutoModelForCausalLM.from_pretrained(
117
  model_name,
 
118
  device_map="cpu",
119
  torch_dtype=torch.float16,
120
  use_cache=True,