danghungithp commited on
Commit
faa95b9
·
verified ·
1 Parent(s): e639f91

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +15 -125
Dockerfile CHANGED
@@ -1,131 +1,21 @@
1
- # vim: filetype=dockerfile
2
 
3
- ARG FLAVOR=${TARGETARCH}
 
4
 
5
- ARG ROCMVERSION=6.3.3
6
- ARG JETPACK5VERSION=r35.4.1
7
- ARG JETPACK6VERSION=r36.4.0
8
- ARG CMAKEVERSION=3.31.2
9
 
10
- # CUDA v11 requires gcc v10. v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version
11
- FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64
12
- RUN yum install -y yum-utils \
13
- && yum-config-manager --add-repo https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/ \
14
- && rpm --import https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-8 \
15
- && dnf install -y yum-utils ccache gcc-toolset-10-gcc-10.2.1-8.2.el8 gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 gcc-toolset-10-binutils-2.35-11.el8 \
16
- && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
17
- ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH
18
 
19
- FROM --platform=linux/arm64 almalinux:8 AS base-arm64
20
- # install epel-release for ccache
21
- RUN yum install -y yum-utils epel-release \
22
- && dnf install -y clang ccache \
23
- && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
24
- ENV CC=clang CXX=clang++
25
 
26
- FROM base-${TARGETARCH} AS base
27
- ARG CMAKEVERSION
28
- RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
29
- COPY CMakeLists.txt CMakePresets.json .
30
- COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
31
- ENV LDFLAGS=-s
32
 
33
- FROM base AS cpu
34
- RUN dnf install -y gcc-toolset-11-gcc gcc-toolset-11-gcc-c++
35
- ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH
36
- RUN --mount=type=cache,target=/root/.ccache \
37
- cmake --preset 'CPU' \
38
- && cmake --build --parallel --preset 'CPU' \
39
- && cmake --install build --component CPU --strip --parallel 8
40
-
41
- FROM base AS cuda-11
42
- ARG CUDA11VERSION=11.3
43
- RUN dnf install -y cuda-toolkit-${CUDA11VERSION//./-}
44
- ENV PATH=/usr/local/cuda-11/bin:$PATH
45
- RUN --mount=type=cache,target=/root/.ccache \
46
- cmake --preset 'CUDA 11' \
47
- && cmake --build --parallel --preset 'CUDA 11' \
48
- && cmake --install build --component CUDA --strip --parallel 8
49
-
50
- FROM base AS cuda-12
51
- ARG CUDA12VERSION=12.8
52
- RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-}
53
- ENV PATH=/usr/local/cuda-12/bin:$PATH
54
- RUN --mount=type=cache,target=/root/.ccache \
55
- cmake --preset 'CUDA 12' \
56
- && cmake --build --parallel --preset 'CUDA 12' \
57
- && cmake --install build --component CUDA --strip --parallel 8
58
-
59
- FROM base AS rocm-6
60
- ENV PATH=/opt/rocm/hcc/bin:/opt/rocm/hip/bin:/opt/rocm/bin:/opt/rocm/hcc/bin:$PATH
61
- RUN --mount=type=cache,target=/root/.ccache \
62
- cmake --preset 'ROCm 6' \
63
- && cmake --build --parallel --preset 'ROCm 6' \
64
- && cmake --install build --component HIP --strip --parallel 8
65
-
66
- FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK5VERSION} AS jetpack-5
67
- ARG CMAKEVERSION
68
- RUN apt-get update && apt-get install -y curl ccache \
69
- && curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
70
- COPY CMakeLists.txt CMakePresets.json .
71
- COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
72
- RUN --mount=type=cache,target=/root/.ccache \
73
- cmake --preset 'JetPack 5' \
74
- && cmake --build --parallel --preset 'JetPack 5' \
75
- && cmake --install build --component CUDA --strip --parallel 8
76
-
77
- FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK6VERSION} AS jetpack-6
78
- ARG CMAKEVERSION
79
- RUN apt-get update && apt-get install -y curl ccache \
80
- && curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
81
- COPY CMakeLists.txt CMakePresets.json .
82
- COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
83
- RUN --mount=type=cache,target=/root/.ccache \
84
- cmake --preset 'JetPack 6' \
85
- && cmake --build --parallel --preset 'JetPack 6' \
86
- && cmake --install build --component CUDA --strip --parallel 8
87
-
88
- FROM base AS build
89
- WORKDIR /go/src/github.com/ollama/ollama
90
- COPY go.mod go.sum .
91
- RUN curl -fsSL https://golang.org/dl/go$(awk '/^go/ { print $2 }' go.mod).linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local
92
- ENV PATH=/usr/local/go/bin:$PATH
93
- RUN go mod download
94
- COPY . .
95
- ARG GOFLAGS="'-ldflags=-w -s'"
96
- ENV CGO_ENABLED=1
97
- RUN --mount=type=cache,target=/root/.cache/go-build \
98
- go build -trimpath -buildmode=pie -o /bin/ollama .
99
-
100
- FROM --platform=linux/amd64 scratch AS amd64
101
- COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11
102
- COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12
103
-
104
- FROM --platform=linux/arm64 scratch AS arm64
105
- COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11
106
- COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12
107
- COPY --from=jetpack-5 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_jetpack5
108
- COPY --from=jetpack-6 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_jetpack6
109
-
110
- FROM scratch AS rocm
111
- COPY --from=rocm-6 dist/lib/ollama/rocm /lib/ollama/rocm
112
-
113
- FROM ${FLAVOR} AS archive
114
- COPY --from=cpu dist/lib/ollama /lib/ollama
115
- COPY --from=build /bin/ollama /bin/ollama
116
-
117
- FROM ubuntu:20.04
118
- RUN apt-get update \
119
- && apt-get install -y ca-certificates \
120
- && apt-get clean \
121
- && rm -rf /var/lib/apt/lists/*
122
- COPY --from=archive /bin /usr/bin
123
- ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
124
- COPY --from=archive /lib/ollama /usr/lib/ollama
125
- ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
126
- ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
127
- ENV NVIDIA_VISIBLE_DEVICES=all
128
- ENV OLLAMA_HOST=0.0.0.0:11434
129
- EXPOSE 11434
130
- ENTRYPOINT ["/bin/ollama"]
131
- CMD ["serve"]
 
1
+ FROM ollama/ollama:0.1.44
2
 
3
+ # Install Python, pip, and curl (for health check)
4
+ RUN apt-get update && apt-get install -y python3 python3-pip curl
5
 
6
+ # Copy and install Python dependencies
7
+ COPY requirements.txt .
8
+ RUN pip3 install -r requirements.txt
 
9
 
10
+ # Copy the application code
11
+ COPY . /app
12
+ WORKDIR /app
 
 
 
 
 
13
 
14
+ # Create and configure startup script
15
+ RUN echo '#!/bin/bash\nollama serve &\nuntil curl -s http://localhost:11434 > /dev/null; do\n sleep 1\ndone\npython3 app.py' > start.sh && chmod +x start.sh
 
 
 
 
16
 
17
+ # Expose port for Hugging Face Spaces
18
+ EXPOSE 7860
 
 
 
 
19
 
20
+ # Set the startup script as the entry point
21
+ CMD ["./start.sh"]