Spaces:
Sleeping
Sleeping
Sofia Casadei
commited on
Commit
·
3375ee2
1
Parent(s):
0d64afb
install flash attention
Browse files- Dockerfile +5 -0
- main.py +0 -8
Dockerfile
CHANGED
@@ -45,6 +45,11 @@ COPY --chown=user requirements.txt .
|
|
45 |
RUN --mount=type=cache,target=$UV_CACHE_DIR,uid=1000,gid=1000 \
|
46 |
/uv pip install -r requirements.txt
|
47 |
|
|
|
|
|
|
|
|
|
|
|
48 |
# Copy application code
|
49 |
COPY --chown=user . .
|
50 |
|
|
|
45 |
RUN --mount=type=cache,target=$UV_CACHE_DIR,uid=1000,gid=1000 \
|
46 |
/uv pip install -r requirements.txt
|
47 |
|
48 |
+
# Conditionally install flash-attn if CUDA is available
|
49 |
+
RUN --mount=type=cache,target=$UV_CACHE_DIR,uid=1000,gid=1000 \
|
50 |
+
python -c "import torch; exit(0 if torch.cuda.is_available() else 1)" && \
|
51 |
+
/uv pip install flash-attn --no-build-isolation || echo "CUDA not available, skipping flash-attn installation"
|
52 |
+
|
53 |
# Copy application code
|
54 |
COPY --chown=user . .
|
55 |
|
main.py
CHANGED
@@ -43,14 +43,6 @@ LANGUAGE = os.getenv("LANGUAGE", "english").lower()
|
|
43 |
|
44 |
device = get_device(force_cpu=False)
|
45 |
|
46 |
-
# Install Flash Attention 2 if device is "cuda"
|
47 |
-
if device == "cuda":
|
48 |
-
subprocess.run(
|
49 |
-
["pip", "install", "flash-attn", "--no-build-isolation"],
|
50 |
-
env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
|
51 |
-
shell=True,
|
52 |
-
)
|
53 |
-
|
54 |
torch_dtype, np_dtype = get_torch_and_np_dtypes(device, use_bfloat16=False)
|
55 |
logger.info(f"Using device: {device}, torch_dtype: {torch_dtype}, np_dtype: {np_dtype}")
|
56 |
|
|
|
43 |
|
44 |
device = get_device(force_cpu=False)
|
45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
torch_dtype, np_dtype = get_torch_and_np_dtypes(device, use_bfloat16=False)
|
47 |
logger.info(f"Using device: {device}, torch_dtype: {torch_dtype}, np_dtype: {np_dtype}")
|
48 |
|