ankandrew
commited on
Commit
·
cece0ec
1
Parent(s):
e8a0cf2
Minor fix
Browse files
app.py
CHANGED
@@ -7,10 +7,9 @@ import gradio as gr
|
|
7 |
import spaces
|
8 |
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
|
9 |
from qwen_vl_utils import process_vision_info
|
10 |
-
from transformers.utils import is_flash_attn_2_available
|
11 |
|
12 |
subprocess.run(
|
13 |
-
"pip install
|
14 |
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
|
15 |
shell=True,
|
16 |
)
|
@@ -49,7 +48,6 @@ def run_inference(model_key, input_type, text, image, video, fps, system_prompt,
|
|
49 |
model_id,
|
50 |
torch_dtype="auto",
|
51 |
device_map="auto",
|
52 |
-
attn_implementation="flash_attention_2" if is_flash_attn_2_available() else None,
|
53 |
)
|
54 |
processor = AutoProcessor.from_pretrained(model_id)
|
55 |
|
|
|
7 |
import spaces
|
8 |
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
|
9 |
from qwen_vl_utils import process_vision_info
|
|
|
10 |
|
11 |
subprocess.run(
|
12 |
+
"pip install flash-attn --no-build-isolation",
|
13 |
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
|
14 |
shell=True,
|
15 |
)
|
|
|
48 |
model_id,
|
49 |
torch_dtype="auto",
|
50 |
device_map="auto",
|
|
|
51 |
)
|
52 |
processor = AutoProcessor.from_pretrained(model_id)
|
53 |
|