ankandrew commited on
Commit
cece0ec
·
1 Parent(s): e8a0cf2
Files changed (1) hide show
  1. app.py +1 -3
app.py CHANGED
@@ -7,10 +7,9 @@ import gradio as gr
7
  import spaces
8
  from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
9
  from qwen_vl_utils import process_vision_info
10
- from transformers.utils import is_flash_attn_2_available
11
 
12
  subprocess.run(
13
- "pip install 'flash-attn==2.2.0' --no-build-isolation",
14
  env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
15
  shell=True,
16
  )
@@ -49,7 +48,6 @@ def run_inference(model_key, input_type, text, image, video, fps, system_prompt,
49
  model_id,
50
  torch_dtype="auto",
51
  device_map="auto",
52
- attn_implementation="flash_attention_2" if is_flash_attn_2_available() else None,
53
  )
54
  processor = AutoProcessor.from_pretrained(model_id)
55
 
 
7
  import spaces
8
  from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
9
  from qwen_vl_utils import process_vision_info
 
10
 
11
  subprocess.run(
12
+ "pip install flash-attn --no-build-isolation",
13
  env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
14
  shell=True,
15
  )
 
48
  model_id,
49
  torch_dtype="auto",
50
  device_map="auto",
 
51
  )
52
  processor = AutoProcessor.from_pretrained(model_id)
53