Sofia Casadei commited on
Commit
3375ee2
·
1 Parent(s): 0d64afb

install flash attention

Browse files
Files changed (2) hide show
  1. Dockerfile +5 -0
  2. main.py +0 -8
Dockerfile CHANGED
@@ -45,6 +45,11 @@ COPY --chown=user requirements.txt .
45
  RUN --mount=type=cache,target=$UV_CACHE_DIR,uid=1000,gid=1000 \
46
  /uv pip install -r requirements.txt
47
 
 
 
 
 
 
48
  # Copy application code
49
  COPY --chown=user . .
50
 
 
45
  RUN --mount=type=cache,target=$UV_CACHE_DIR,uid=1000,gid=1000 \
46
  /uv pip install -r requirements.txt
47
 
48
+ # Conditionally install flash-attn if CUDA is available
49
+ RUN --mount=type=cache,target=$UV_CACHE_DIR,uid=1000,gid=1000 \
50
+ python -c "import torch; exit(0 if torch.cuda.is_available() else 1)" && \
51
+ /uv pip install flash-attn --no-build-isolation || echo "CUDA not available, skipping flash-attn installation"
52
+
53
  # Copy application code
54
  COPY --chown=user . .
55
 
main.py CHANGED
@@ -43,14 +43,6 @@ LANGUAGE = os.getenv("LANGUAGE", "english").lower()
43
 
44
  device = get_device(force_cpu=False)
45
 
46
- # Install Flash Attention 2 if device is "cuda"
47
- if device == "cuda":
48
- subprocess.run(
49
- ["pip", "install", "flash-attn", "--no-build-isolation"],
50
- env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
51
- shell=True,
52
- )
53
-
54
  torch_dtype, np_dtype = get_torch_and_np_dtypes(device, use_bfloat16=False)
55
  logger.info(f"Using device: {device}, torch_dtype: {torch_dtype}, np_dtype: {np_dtype}")
56
 
 
43
 
44
  device = get_device(force_cpu=False)
45
 
 
 
 
 
 
 
 
 
46
  torch_dtype, np_dtype = get_torch_and_np_dtypes(device, use_bfloat16=False)
47
  logger.info(f"Using device: {device}, torch_dtype: {torch_dtype}, np_dtype: {np_dtype}")
48