mirror of
https://github.com/zebrajr/ollama.git
synced 2025-12-06 00:19:51 +01:00
Initially Vulkan support in Ollama will require building from source. Once it is more thoroughly tested and we have fixed any critical bugs, then we can bundle Vulkan into the official binary releases.
202 lines
8.5 KiB
Docker
202 lines
8.5 KiB
Docker
# vim: filetype=dockerfile
|
|
|
|
ARG FLAVOR=${TARGETARCH}
|
|
ARG PARALLEL=8
|
|
|
|
ARG ROCMVERSION=6.3.3
|
|
ARG JETPACK5VERSION=r35.4.1
|
|
ARG JETPACK6VERSION=r36.4.0
|
|
ARG CMAKEVERSION=3.31.2
|
|
ARG VULKANVERSION=1.4.321.1
|
|
|
|
# We require gcc v10 minimum. v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version
|
|
FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64
|
|
RUN yum install -y yum-utils \
|
|
&& yum-config-manager --add-repo https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/ \
|
|
&& rpm --import https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-8 \
|
|
&& dnf install -y yum-utils ccache gcc-toolset-10-gcc-10.2.1-8.2.el8 gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 gcc-toolset-10-binutils-2.35-11.el8 \
|
|
&& dnf install -y ccache \
|
|
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
|
|
ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH
|
|
ARG VULKANVERSION
|
|
RUN wget https://sdk.lunarg.com/sdk/download/${VULKANVERSION}/linux/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz -O /tmp/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz \
|
|
&& tar xvf /tmp/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz \
|
|
&& dnf -y install ninja-build \
|
|
&& ln -s /usr/bin/python3 /usr/bin/python \
|
|
&& /${VULKANVERSION}/vulkansdk -j 8 vulkan-headers \
|
|
&& /${VULKANVERSION}/vulkansdk -j 8 shaderc
|
|
RUN cp -r /${VULKANVERSION}/x86_64/include/* /usr/local/include/ \
|
|
&& cp -r /${VULKANVERSION}/x86_64/lib/* /usr/local/lib
|
|
ENV PATH=/${VULKANVERSION}/x86_64/bin:$PATH
|
|
|
|
FROM --platform=linux/arm64 almalinux:8 AS base-arm64
|
|
# install epel-release for ccache
|
|
RUN yum install -y yum-utils epel-release \
|
|
&& dnf install -y clang ccache \
|
|
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
|
|
ENV CC=clang CXX=clang++
|
|
|
|
FROM base-${TARGETARCH} AS base
|
|
ARG CMAKEVERSION
|
|
RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
|
|
COPY CMakeLists.txt CMakePresets.json .
|
|
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
|
|
ENV LDFLAGS=-s
|
|
|
|
FROM base AS cpu
|
|
RUN dnf install -y gcc-toolset-11-gcc gcc-toolset-11-gcc-c++
|
|
ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH
|
|
ARG PARALLEL
|
|
RUN --mount=type=cache,target=/root/.ccache \
|
|
cmake --preset 'CPU' \
|
|
&& cmake --build --parallel ${PARALLEL} --preset 'CPU' \
|
|
&& cmake --install build --component CPU --strip --parallel ${PARALLEL}
|
|
|
|
FROM base AS cuda-11
|
|
ARG CUDA11VERSION=11.8
|
|
RUN dnf install -y cuda-toolkit-${CUDA11VERSION//./-}
|
|
ENV PATH=/usr/local/cuda-11/bin:$PATH
|
|
ARG PARALLEL
|
|
RUN --mount=type=cache,target=/root/.ccache \
|
|
cmake --preset 'CUDA 11' -DOLLAMA_RUNNER_DIR="cuda_v11" \
|
|
&& cmake --build --parallel ${PARALLEL} --preset 'CUDA 11' \
|
|
&& cmake --install build --component CUDA --strip --parallel ${PARALLEL}
|
|
|
|
FROM base AS cuda-12
|
|
ARG CUDA12VERSION=12.8
|
|
RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-}
|
|
ENV PATH=/usr/local/cuda-12/bin:$PATH
|
|
ARG PARALLEL
|
|
RUN --mount=type=cache,target=/root/.ccache \
|
|
cmake --preset 'CUDA 12' -DOLLAMA_RUNNER_DIR="cuda_v12"\
|
|
&& cmake --build --parallel ${PARALLEL} --preset 'CUDA 12' \
|
|
&& cmake --install build --component CUDA --strip --parallel ${PARALLEL}
|
|
|
|
|
|
FROM base AS cuda-13
|
|
ARG CUDA13VERSION=13.0
|
|
RUN dnf install -y cuda-toolkit-${CUDA13VERSION//./-}
|
|
ENV PATH=/usr/local/cuda-13/bin:$PATH
|
|
ARG PARALLEL
|
|
RUN --mount=type=cache,target=/root/.ccache \
|
|
cmake --preset 'CUDA 13' -DOLLAMA_RUNNER_DIR="cuda_v13" \
|
|
&& cmake --build --parallel ${PARALLEL} --preset 'CUDA 13' \
|
|
&& cmake --install build --component CUDA --strip --parallel ${PARALLEL}
|
|
|
|
|
|
FROM base AS rocm-6
|
|
ENV PATH=/opt/rocm/hcc/bin:/opt/rocm/hip/bin:/opt/rocm/bin:/opt/rocm/hcc/bin:$PATH
|
|
ARG PARALLEL
|
|
RUN --mount=type=cache,target=/root/.ccache \
|
|
cmake --preset 'ROCm 6' -DOLLAMA_RUNNER_DIR="rocm" \
|
|
&& cmake --build --parallel ${PARALLEL} --preset 'ROCm 6' \
|
|
&& cmake --install build --component HIP --strip --parallel ${PARALLEL}
|
|
RUN rm -f dist/lib/ollama/rocm/rocblas/library/*gfx90[06]*
|
|
|
|
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK5VERSION} AS jetpack-5
|
|
ARG CMAKEVERSION
|
|
RUN apt-get update && apt-get install -y curl ccache \
|
|
&& curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
|
|
COPY CMakeLists.txt CMakePresets.json .
|
|
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
|
|
ARG PARALLEL
|
|
RUN --mount=type=cache,target=/root/.ccache \
|
|
cmake --preset 'JetPack 5' -DOLLAMA_RUNNER_DIR="cuda_jetpack5" \
|
|
&& cmake --build --parallel ${PARALLEL} --preset 'JetPack 5' \
|
|
&& cmake --install build --component CUDA --strip --parallel ${PARALLEL}
|
|
|
|
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK6VERSION} AS jetpack-6
|
|
ARG CMAKEVERSION
|
|
RUN apt-get update && apt-get install -y curl ccache \
|
|
&& curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
|
|
COPY CMakeLists.txt CMakePresets.json .
|
|
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
|
|
ARG PARALLEL
|
|
RUN --mount=type=cache,target=/root/.ccache \
|
|
cmake --preset 'JetPack 6' -DOLLAMA_RUNNER_DIR="cuda_jetpack6" \
|
|
&& cmake --build --parallel ${PARALLEL} --preset 'JetPack 6' \
|
|
&& cmake --install build --component CUDA --strip --parallel ${PARALLEL}
|
|
|
|
FROM base AS vulkan
|
|
RUN --mount=type=cache,target=/root/.ccache \
|
|
cmake --preset 'Vulkan' -DOLLAMA_RUNNER_DIR="vulkan" \
|
|
&& cmake --build --parallel --preset 'Vulkan' \
|
|
&& cmake --install build --component Vulkan --strip --parallel 8
|
|
|
|
|
|
FROM base AS build
|
|
WORKDIR /go/src/github.com/ollama/ollama
|
|
COPY go.mod go.sum .
|
|
RUN curl -fsSL https://golang.org/dl/go$(awk '/^go/ { print $2 }' go.mod).linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local
|
|
ENV PATH=/usr/local/go/bin:$PATH
|
|
RUN go mod download
|
|
COPY . .
|
|
ARG GOFLAGS="'-ldflags=-w -s'"
|
|
ENV CGO_ENABLED=1
|
|
ARG CGO_CFLAGS
|
|
ARG CGO_CXXFLAGS
|
|
RUN --mount=type=cache,target=/root/.cache/go-build \
|
|
go build -trimpath -buildmode=pie -o /bin/ollama .
|
|
|
|
FROM --platform=linux/amd64 scratch AS amd64
|
|
# COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/
|
|
COPY --from=cuda-12 dist/lib/ollama /lib/ollama/
|
|
COPY --from=cuda-13 dist/lib/ollama /lib/ollama/
|
|
COPY --from=vulkan dist/lib/ollama /lib/ollama/
|
|
|
|
FROM --platform=linux/arm64 scratch AS arm64
|
|
# COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/
|
|
COPY --from=cuda-12 dist/lib/ollama /lib/ollama/
|
|
COPY --from=cuda-13 dist/lib/ollama/ /lib/ollama/
|
|
COPY --from=jetpack-5 dist/lib/ollama/ /lib/ollama/
|
|
COPY --from=jetpack-6 dist/lib/ollama/ /lib/ollama/
|
|
|
|
FROM scratch AS rocm
|
|
COPY --from=rocm-6 dist/lib/ollama /lib/ollama
|
|
|
|
FROM ${FLAVOR} AS archive
|
|
ARG VULKANVERSION
|
|
COPY --from=cpu dist/lib/ollama /lib/ollama
|
|
COPY --from=build /bin/ollama /bin/ollama
|
|
|
|
# Temporary opt-out stages for Vulkan
|
|
FROM --platform=linux/amd64 scratch AS amd64_novulkan
|
|
# COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/
|
|
COPY --from=cuda-12 dist/lib/ollama /lib/ollama/
|
|
COPY --from=cuda-13 dist/lib/ollama /lib/ollama/
|
|
FROM arm64 AS arm64_novulkan
|
|
FROM ${FLAVOR}_novulkan AS archive_novulkan
|
|
COPY --from=cpu dist/lib/ollama /lib/ollama
|
|
COPY --from=build /bin/ollama /bin/ollama
|
|
FROM ubuntu:24.04 AS novulkan
|
|
RUN apt-get update \
|
|
&& apt-get install -y ca-certificates \
|
|
&& apt-get clean \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
COPY --from=archive_novulkan /bin /usr/bin
|
|
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
|
COPY --from=archive_novulkan /lib/ollama /usr/lib/ollama
|
|
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
|
|
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
|
ENV NVIDIA_VISIBLE_DEVICES=all
|
|
ENV OLLAMA_HOST=0.0.0.0:11434
|
|
EXPOSE 11434
|
|
ENTRYPOINT ["/bin/ollama"]
|
|
CMD ["serve"]
|
|
|
|
FROM ubuntu:24.04 AS default
|
|
RUN apt-get update \
|
|
&& apt-get install -y ca-certificates libvulkan1 \
|
|
&& apt-get clean \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
COPY --from=archive /bin /usr/bin
|
|
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
|
COPY --from=archive /lib/ollama /usr/lib/ollama
|
|
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
|
|
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
|
ENV NVIDIA_VISIBLE_DEVICES=all
|
|
ENV OLLAMA_HOST=0.0.0.0:11434
|
|
EXPOSE 11434
|
|
ENTRYPOINT ["/bin/ollama"]
|
|
CMD ["serve"]
|