forked from stablecabal/gyre
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
248 lines (161 loc) · 6.42 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
ARG CUDA_VER=118
ARG CUDA_FULLVER=11.8.0
# ----- Build the core "dist image" base -----
FROM ghcr.io/stablecabal/gyre-devbase:pytorch112-cuda${CUDA_VER}-latest AS regularbase
# Install dependancies
ENV FLIT_ROOT_INSTALL=1
RUN /bin/micromamba -r /env -n gyre install -c defaults flit
# We copy only the minimum for flit to run so avoid cache invalidation on code changes
COPY pyproject.toml .
COPY gyre/__init__.py gyre/
RUN touch README.md
RUN /bin/micromamba -r /env -n gyre run flit install --pth-file
RUN /bin/micromamba -r /env -n gyre run pip cache purge
# Setup NVM & Node for Localtunnel
ENV NVM_DIR=/nvm
ENV NODE_VERSION=16.18.0
RUN mkdir -p $NVM_DIR
RUN curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.1/install.sh | bash \
&& . $NVM_DIR/nvm.sh \
&& nvm install $NODE_VERSION \
&& nvm alias default $NODE_VERSION \
&& nvm use default
# ----- Build bitsandbytes -----
FROM ghcr.io/stablecabal/gyre-devbase:pytorch112-cuda${CUDA_VER}-latest AS bitsandbytesbase
ARG BANDB_REF=main
ARG CUDA_VER
WORKDIR /
RUN git clone https://github.com/TimDettmers/bitsandbytes.git
WORKDIR /bitsandbytes
RUN git checkout $BANDB_REF
#COPY docker_support/bitsandbytes.sm89.diff /
#RUN patch -p1 < /bitsandbytes.sm89.diff
ENV CUDA_VERSION=${CUDA_VER}
RUN /bin/micromamba -r /env -n gyre run make `echo ${CUDA_VER} | sed -e 's/118/cuda12x/' | sed -e 's/11./cuda11x/'`
RUN /bin/micromamba -r /env -n gyre run python setup.py bdist_wheel
# ----- Build triton -----
FROM ghcr.io/stablecabal/gyre-devbase:pytorch112-cuda${CUDA_VER}-latest AS tritonbase
ARG TRITON_REF=tags/v1.0
WORKDIR /
RUN git clone https://github.com/openai/triton.git
WORKDIR /triton
RUN git checkout $TRITON_REF
WORKDIR /triton/python
RUN /bin/micromamba -r /env -n gyre run pip install cmake
RUN apt install -y zlib1g-dev libtinfo-dev
RUN /bin/micromamba -r /env -n gyre run pip install .
RUN tar cvjf /triton.tbz /env/envs/gyre/lib/python3.*/site-packages/triton*
# ----- Build xformers (on top of triton) -----
FROM tritonbase AS xformersbase
ARG XFORMERS_REPO=https://github.com/facebookresearch/xformers.git
ARG XFORMERS_REF=main
COPY docker_support/cuda_archs.sh /
WORKDIR /
RUN git clone $XFORMERS_REPO
WORKDIR /xformers
RUN git checkout $XFORMERS_REF
RUN git submodule update --init --recursive
RUN /bin/micromamba -r /env -n gyre run pip install -r requirements.txt
ENV FORCE_CUDA=1
RUN TORCH_CUDA_ARCH_LIST="`/cuda_archs.sh`" /bin/micromamba -r /env -n gyre run pip install .
RUN tar cvjf /xformers.tbz /env/envs/gyre/lib/python3.*/site-packages/xformers*
# ----- Build deepspeed (on top of triton) -----
FROM tritonbase AS deepspeedbase
ARG DEEPSPEED_REF=tags/v0.7.4
COPY docker_support/cuda_archs.sh /
RUN git clone https://github.com/microsoft/DeepSpeed.git
WORKDIR /
WORKDIR /DeepSpeed
RUN git checkout $DEEPSPEED_REF
RUN apt install -y libaio-dev
ENV DS_BUILD_OPS=1
ENV DS_BUILD_SPARSE_ATTN=0
RUN TORCH_CUDA_ARCH_LIST="`/cuda_archs.sh`" /bin/micromamba -r /env -n gyre run pip install .
RUN tar cvjf /deepspeed.tbz /env/envs/gyre/lib/python3.*/site-packages/deepspeed*
# ----- Build the basic inference server image -----
FROM nvidia/cuda:${CUDA_FULLVER}-cudnn8-runtime-ubuntu20.04 AS basic
COPY --from=regularbase /bin/micromamba /bin/
RUN mkdir -p /env/envs
COPY --from=regularbase /env/envs /env/envs/
RUN mkdir -p /nvm
COPY --from=regularbase /nvm /nvm/
# Setup NVM & Node for Localtunnel
ENV NVM_DIR=/nvm
ENV NODE_VERSION=16.18.0
ENV NODE_PATH $NVM_DIR/versions/node/v$NODE_VERSION/lib/node_modules
ENV PATH $NVM_DIR/versions/node/v$NODE_VERSION/bin:$PATH
RUN npm install -g localtunnel
# Now we can copy everything we need
COPY nonfree /nonfree/
COPY gyre /gyre/
COPY server.py .
# Set up some config files
RUN mkdir -p /huggingface
RUN mkdir -p /weights
RUN mkdir -p /config
COPY gyre/config/. /config/
# Set up some environment files
ENV HF_HOME=/huggingface
ENV HF_API_TOKEN=mustset
ENV SD_ENGINECFG=/config/engines.yaml
ENV SD_WEIGHT_ROOT=/weights
CMD [ "/bin/micromamba", "-r", "env", "-n", "gyre", "run", "python", "./server.py" ]
# ----- Build the basic inference server image + xformers -----
FROM basic as xformers
COPY --from=xformersbase /xformers/requirements.txt /
RUN /bin/micromamba -r /env -n gyre run pip install -r requirements.txt
RUN rm requirements.txt
COPY --from=tritonbase /triton.tbz /
RUN tar xvjf /triton.tbz
COPY --from=xformersbase /xformers.tbz /
RUN tar xvjf /xformers.tbz
RUN rm /*.tbz
CMD [ "/bin/micromamba", "-r", "env", "-n", "gyre", "run", "python", "./server.py" ]
# ----- Build the inference server image with training support -----
# (based on a -devel image instead of -runtime, but otherwise identical to basic)
FROM nvidia/cuda:${CUDA_FULLVER}-cudnn8-devel-ubuntu20.04 AS basic-training
COPY --from=regularbase /bin/micromamba /bin/
RUN mkdir -p /env/envs
COPY --from=regularbase /env/envs /env/envs/
RUN mkdir -p /nvm
COPY --from=regularbase /nvm /nvm/
# Setup NVM & Node for Localtunnel
ENV NVM_DIR=/nvm
ENV NODE_VERSION=16.18.0
ENV NODE_PATH $NVM_DIR/versions/node/v$NODE_VERSION/lib/node_modules
ENV PATH $NVM_DIR/versions/node/v$NODE_VERSION/bin:$PATH
RUN npm install -g localtunnel
# Now we can copy everything we need
COPY nonfree /nonfree/
COPY gyre /gyre/
COPY server.py .
# Set up some config files
RUN mkdir -p /huggingface
RUN mkdir -p /weights
RUN mkdir -p /config
COPY gyre/config/. /config/
# Set up some environment files
ENV HF_HOME=/huggingface
ENV HF_API_TOKEN=mustset
ENV SD_ENGINECFG=/config/engines.yaml
ENV SD_WEIGHT_ROOT=/weights
CMD [ "/bin/micromamba", "-r", "env", "-n", "gyre", "run", "python", "./server.py" ]
# ----- Build the inference server image with training support + xformers, deepspeed, and bitsandbytes -----
FROM basic-training as xformers-training
COPY --from=xformersbase /xformers/requirements.txt /
RUN /bin/micromamba -r /env -n gyre run pip install -r requirements.txt
RUN rm requirements.txt
COPY --from=deepspeedbase /DeepSpeed/requirements/requirements.txt /
RUN /bin/micromamba -r /env -n gyre run pip install -r requirements.txt
RUN rm requirements.txt
COPY --from=tritonbase /triton.tbz /
RUN tar xvjf /triton.tbz
COPY --from=xformersbase /xformers.tbz /
RUN tar xvjf /xformers.tbz
COPY --from=deepspeedbase /deepspeed.tbz /
RUN tar xvjf /deepspeed.tbz
RUN rm /*.tbz
COPY --from=bitsandbytesbase /bitsandbytes/dist/*.whl /
RUN /bin/micromamba -r /env -n gyre run pip install /*.whl
RUN rm /*.whl
CMD [ "/bin/micromamba", "-r", "env", "-n", "gyre", "run", "python", "./server.py" ]