Spaces:
Sleeping
Sleeping
Commit
·
be49a41
1
Parent(s):
7ffa2a6
remove fully
Browse files
app.py
CHANGED
@@ -2,7 +2,7 @@ import spaces
|
|
2 |
import gradio as gr
|
3 |
from huggingface_hub import InferenceClient
|
4 |
from qdrant_client import QdrantClient, models
|
5 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
6 |
from sentence_transformers import SentenceTransformer
|
7 |
from huggingface_hub import login
|
8 |
import torch
|
@@ -110,18 +110,11 @@ client.add(collection_name="products",
|
|
110 |
client.add(collection_name="recipes",
|
111 |
documents=recipe_strings)
|
112 |
model_name = "LeoLM/leo-hessianai-13b-chat"
|
113 |
-
bnb_config = BitsAndBytesConfig(
|
114 |
-
load_in_4bit=True, # Use 4-bit quantization
|
115 |
-
bnb_4bit_compute_dtype=torch.float16, # Reduce memory usage
|
116 |
-
bnb_4bit_use_double_quant=True,
|
117 |
-
llm_int8_enable_fp32_cpu_offload=True
|
118 |
-
)
|
119 |
|
120 |
@spaces.GPU
|
121 |
def load_model():
|
122 |
ankerbot_model = AutoModelForCausalLM.from_pretrained(
|
123 |
model_name,
|
124 |
-
quantization_config=bnb_config,
|
125 |
device_map="cpu",
|
126 |
torch_dtype=torch.float16,
|
127 |
use_cache=True,
|
|
|
2 |
import gradio as gr
|
3 |
from huggingface_hub import InferenceClient
|
4 |
from qdrant_client import QdrantClient, models
|
5 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
6 |
from sentence_transformers import SentenceTransformer
|
7 |
from huggingface_hub import login
|
8 |
import torch
|
|
|
110 |
client.add(collection_name="recipes",
|
111 |
documents=recipe_strings)
|
112 |
model_name = "LeoLM/leo-hessianai-13b-chat"
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
@spaces.GPU
|
115 |
def load_model():
|
116 |
ankerbot_model = AutoModelForCausalLM.from_pretrained(
|
117 |
model_name,
|
|
|
118 |
device_map="cpu",
|
119 |
torch_dtype=torch.float16,
|
120 |
use_cache=True,
|