Spaces:
Sleeping
Sleeping
Commit
·
961fcf4
1
Parent(s):
21e5eec
env variable flash attn 0
Browse files
app.py
CHANGED
@@ -8,7 +8,8 @@ from huggingface_hub import login
|
|
8 |
import torch
|
9 |
import json
|
10 |
import bs4
|
11 |
-
|
|
|
12 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
13 |
|
14 |
product_strings = []
|
@@ -105,7 +106,6 @@ def load_model():
|
|
105 |
truncation=True,
|
106 |
padding=True, )
|
107 |
ankerbot_model.to("cuda")
|
108 |
-
prompt_format = "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
|
109 |
generator = pipeline(task="text-generation", model=ankerbot_model, tokenizer=ankerbot_tokenizer, torch_dtype=torch.float16, attn_implementation="eager", use_flash_attention_2=False) # True for flash-attn2 else False
|
110 |
generator_mini = pipeline(task="text-generation", model=ankerbot_model, tokenizer=ankerbot_tokenizer, torch_dtype=torch.float16, attn_implementation="eager", use_flash_attention_2=False) # True for flash-attn2 else False
|
111 |
load_model()
|
|
|
8 |
import torch
|
9 |
import json
|
10 |
import bs4
|
11 |
+
import os
|
12 |
+
os.environ["USE_FLASH_ATTENTION"] = "0"
|
13 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
14 |
|
15 |
product_strings = []
|
|
|
106 |
truncation=True,
|
107 |
padding=True, )
|
108 |
ankerbot_model.to("cuda")
|
|
|
109 |
generator = pipeline(task="text-generation", model=ankerbot_model, tokenizer=ankerbot_tokenizer, torch_dtype=torch.float16, attn_implementation="eager", use_flash_attention_2=False) # True for flash-attn2 else False
|
110 |
generator_mini = pipeline(task="text-generation", model=ankerbot_model, tokenizer=ankerbot_tokenizer, torch_dtype=torch.float16, attn_implementation="eager", use_flash_attention_2=False) # True for flash-attn2 else False
|
111 |
load_model()
|