Ankerkraut commited on
Commit
961fcf4
·
1 Parent(s): 21e5eec

env variable flash attn 0

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -8,7 +8,8 @@ from huggingface_hub import login
8
  import torch
9
  import json
10
  import bs4
11
-
 
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
13
 
14
  product_strings = []
@@ -105,7 +106,6 @@ def load_model():
105
  truncation=True,
106
  padding=True, )
107
  ankerbot_model.to("cuda")
108
- prompt_format = "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
109
  generator = pipeline(task="text-generation", model=ankerbot_model, tokenizer=ankerbot_tokenizer, torch_dtype=torch.float16, attn_implementation="eager", use_flash_attention_2=False) # True for flash-attn2 else False
110
  generator_mini = pipeline(task="text-generation", model=ankerbot_model, tokenizer=ankerbot_tokenizer, torch_dtype=torch.float16, attn_implementation="eager", use_flash_attention_2=False) # True for flash-attn2 else False
111
  load_model()
 
8
  import torch
9
  import json
10
  import bs4
11
+ import os
12
+ os.environ["USE_FLASH_ATTENTION"] = "0"
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
14
 
15
  product_strings = []
 
106
  truncation=True,
107
  padding=True, )
108
  ankerbot_model.to("cuda")
 
109
  generator = pipeline(task="text-generation", model=ankerbot_model, tokenizer=ankerbot_tokenizer, torch_dtype=torch.float16, attn_implementation="eager", use_flash_attention_2=False) # True for flash-attn2 else False
110
  generator_mini = pipeline(task="text-generation", model=ankerbot_model, tokenizer=ankerbot_tokenizer, torch_dtype=torch.float16, attn_implementation="eager", use_flash_attention_2=False) # True for flash-attn2 else False
111
  load_model()