-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathDockerfile
389 lines (266 loc) · 9.97 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
# syntax=docker/dockerfile:1.5
ARG CUDA_VER=118
ARG CUDA_FULLVER=11.8.0
# ----- Build MMCV -----
FROM ghcr.io/stablecabal/gyre-devbase:pytorch112-cuda${CUDA_VER}-latest AS mmcvbase
ARG MMCV_REPO=https://github.com/open-mmlab/mmcv.git
ARG MMCV_REF=v1.7.1
ARG MAX_JOBS=8
COPY docker_support/cuda_archs.sh /
# Install dependancies
ENV FLIT_ROOT_INSTALL=1
RUN /bin/micromamba -r /env -n gyre run pip install "flit~=3.8.0"
# We copy only the minimum for flit to run so avoid cache invalidation on code changes
COPY pyproject.toml /pyproject.toml.in
# Don't install mmcv (we build from source), and use opencv-python-headless
RUN cat /pyproject.toml.in | sed -e /mmcv/d | sed -e s/opencv-python/opencv-python-headless/ > /pyproject.toml
COPY gyre/__init__.py gyre/
RUN touch README.md
RUN /bin/micromamba -r /env -n gyre run flit install --only-deps
RUN /bin/micromamba -r /env -n gyre run pip cache purge
# HACK make it look like opencv-python is installed, to prevent it being pulled in by child dependancies
COPY --chmod=755 <<"EOT" /hack_opencv.sh
#!/bin/bash
for x in /env/envs/gyre/lib/python*/site-packages/opencv_python_headless*.dist-info
do
y=`echo $x | sed -e s/_headless//`
cp -r $x $y
cat $y/METADATA | sed -e s/-headless//g > $y/METADATA.new
mv $y/METADATA.new $y/METADATA
done
EOT
RUN /hack_opencv.sh
WORKDIR /
RUN git clone $MMCV_REPO
WORKDIR /mmcv
RUN git checkout $MMCV_REF
RUN git submodule update --init --recursive
RUN /bin/micromamba -r /env -n gyre run pip install ninja psutil
ENV FORCE_CUDA=1
ENV MMCV_WITH_OPS=1
ENV MAX_JOBS=$MAX_JOBS
RUN TORCH_CUDA_ARCH_LIST="`/cuda_archs.sh`" /bin/micromamba -r /env -n gyre run pip install .
# ----- Build the core "dist image" base -----
FROM mmcvbase AS regularbase
# Setup NVM & Node for Localtunnel
ENV NVM_DIR=/nvm
ENV NODE_VERSION=16.18.0
RUN mkdir -p $NVM_DIR
RUN curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.1/install.sh | bash \
&& . $NVM_DIR/nvm.sh \
&& nvm install $NODE_VERSION \
&& nvm alias default $NODE_VERSION \
&& nvm use default
# ----- Build bitsandbytes -----
FROM ghcr.io/stablecabal/gyre-devbase:pytorch112-cuda${CUDA_VER}-latest AS bitsandbytesbase
ARG BANDB_REF=0.37.0
ARG CUDA_VER
WORKDIR /
RUN git clone https://github.com/TimDettmers/bitsandbytes.git
WORKDIR /bitsandbytes
RUN git checkout $BANDB_REF
#COPY docker_support/bitsandbytes.sm89.diff /
#RUN patch -p1 < /bitsandbytes.sm89.diff
ENV CUDA_VERSION=${CUDA_VER}
RUN /bin/micromamba -r /env -n gyre run make `echo ${CUDA_VER} | sed -e 's/118/cuda12x/' | sed -e 's/11./cuda11x/'`
RUN /bin/micromamba -r /env -n gyre run python setup.py bdist_wheel
# ----- Build triton -----
FROM ghcr.io/stablecabal/gyre-devbase:pytorch112-cuda${CUDA_VER}-latest AS tritonbase
ARG TRITON_REF=tags/v1.0
WORKDIR /
RUN git clone https://github.com/openai/triton.git
WORKDIR /triton
RUN git checkout $TRITON_REF
WORKDIR /triton/python
RUN /bin/micromamba -r /env -n gyre run pip install cmake
RUN apt install -y zlib1g-dev libtinfo-dev
RUN /bin/micromamba -r /env -n gyre run pip install .
RUN tar cvjf /triton.tbz /env/envs/gyre/lib/python3.*/site-packages/triton*
# ----- Build xformers (on top of triton) -----
FROM tritonbase AS xformersbase
ARG XFORMERS_REPO=https://github.com/facebookresearch/xformers.git
ARG XFORMERS_REF=main
ARG MAX_JOBS=8
COPY docker_support/cuda_archs.sh /
WORKDIR /
RUN git clone $XFORMERS_REPO
WORKDIR /xformers
RUN git checkout $XFORMERS_REF
RUN git submodule update --init --recursive
RUN /bin/micromamba -r /env -n gyre run pip install -r requirements.txt
RUN /bin/micromamba -r /env -n gyre run pip install ninja
ENV FORCE_CUDA=1
ENV MAX_JOBS=$MAX_JOBS
RUN TORCH_CUDA_ARCH_LIST="`/cuda_archs.sh`" /bin/micromamba -r /env -n gyre run pip install .
RUN tar cvjf /xformers.tbz /env/envs/gyre/lib/python3.*/site-packages/xformers*
# ----- Build deepspeed (on top of triton) -----
FROM tritonbase AS deepspeedbase
ARG DEEPSPEED_REF=tags/v0.7.4
COPY docker_support/cuda_archs.sh /
RUN git clone https://github.com/microsoft/DeepSpeed.git
WORKDIR /
WORKDIR /DeepSpeed
RUN git checkout $DEEPSPEED_REF
RUN apt install -y libaio-dev
ENV DS_BUILD_OPS=1
ENV DS_BUILD_SPARSE_ATTN=0
RUN TORCH_CUDA_ARCH_LIST="`/cuda_archs.sh`" /bin/micromamba -r /env -n gyre run pip install .
RUN tar cvjf /deepspeed.tbz /env/envs/gyre/lib/python3.*/site-packages/deepspeed*
# ----- Build (i.e. checkout) the flyingdog interface
FROM ghcr.io/stablecabal/gyre-devbase:pytorch112-cuda${CUDA_VER}-latest AS flyingdogbase
ARG FLYINGDOG_REPO=https://github.com/flyingdogsoftware/aistudio.git
ARG FLYINGDOG_REF=main
WORKDIR /
RUN git clone $FLYINGDOG_REPO
WORKDIR /aistudio
RUN git checkout $FLYINGDOG_REF
# ----- Build the basic inference server image -----
FROM nvidia/cuda:${CUDA_FULLVER}-cudnn8-runtime-ubuntu20.04 AS basic
COPY --from=regularbase /bin/micromamba /bin/
RUN mkdir -p /env/envs
COPY --from=regularbase /env/envs /env/envs/
RUN mkdir -p /nvm
COPY --from=regularbase /nvm /nvm/
# Setup NVM & Node for Localtunnel
ENV NVM_DIR=/nvm
ENV NODE_VERSION=16.18.0
ENV NODE_PATH $NVM_DIR/versions/node/v$NODE_VERSION/lib/node_modules
ENV PATH $NVM_DIR/versions/node/v$NODE_VERSION/bin:$PATH
RUN npm install -g localtunnel
# Now we can copy everything we need
COPY gyre /gyre/
COPY server.py .
# Set up some config files
# - Caches
RUN mkdir -p /cache
RUN mkdir -p /huggingface
RUN mkdir -p /gyrecache
# - Local weights
RUN mkdir -p /weights
RUN mkdir -p /lora
RUN mkdir -p /embedding
# - Config
RUN mkdir -p /config
COPY gyre/config/. /config/
# Link in the individual cache files, so you can override either the whole cache or
# just the individual folders
RUN ln -s /huggingface /cache/huggingface
RUN ln -s /gyrecache /cache/gyre
# Set up some environment files
ENV XDG_CACHE_HOME=/cache
ENV SD_ENGINECFG=/config/engines.yaml
ENV SD_WEIGHT_ROOT=/weights
ENV SD_LOCAL_RESOURCE_1=embedding:/embedding
ENV SD_LOCAL_RESOURCE_2=lora:/lora
# Create a couple of utility scripts
COPY --chmod=755 <<"EOT" /run.sh
#!/bin/bash
/bin/micromamba -r /env -n gyre run "$@"
EOT
COPY --chmod=755 <<"EOT" /server.sh
#!/bin/bash
/bin/micromamba -r /env -n gyre run python ./server.py "$@"
EOT
# And set up command
CMD [ "/bin/micromamba", "-r", "env", "-n", "gyre", "run", "python", "./server.py" ]
# ----- Build the basic inference server image + xformers -----
FROM basic as xformers
COPY --from=xformersbase /xformers/requirements.txt /
RUN /bin/micromamba -r /env -n gyre run pip install -r requirements.txt
RUN rm requirements.txt
COPY --from=tritonbase /triton.tbz /
RUN tar xvjf /triton.tbz
COPY --from=xformersbase /xformers.tbz /
RUN tar xvjf /xformers.tbz
RUN rm /*.tbz
COPY --from=bitsandbytesbase /bitsandbytes/dist/*.whl /
RUN /bin/micromamba -r /env -n gyre run pip install /*.whl
RUN rm /*.whl
CMD [ "/bin/micromamba", "-r", "env", "-n", "gyre", "run", "python", "./server.py" ]
# ----- Build the bundled xformers + flyingdog image -----
FROM xformers as bundle
RUN mkdir -p /aistudio
COPY --from=flyingdogbase /aistudio /aistudio/
ENV SD_HTTP_FILE_ROOT=/aistudio/dist
ENV SD_HTTP_PROXY_1=flyingdog:www.flyingdog.de
CMD [ "/bin/micromamba", "-r", "env", "-n", "gyre", "run", "python", "./server.py" ]
# ----- Build the inference server image with training support -----
# (based on a -devel image instead of -runtime, but otherwise identical to basic)
FROM nvidia/cuda:${CUDA_FULLVER}-cudnn8-devel-ubuntu20.04 AS basic-training
COPY --from=regularbase /bin/micromamba /bin/
RUN mkdir -p /env/envs
COPY --from=regularbase /env/envs /env/envs/
RUN mkdir -p /nvm
COPY --from=regularbase /nvm /nvm/
# Setup NVM & Node for Localtunnel
ENV NVM_DIR=/nvm
ENV NODE_VERSION=16.18.0
ENV NODE_PATH $NVM_DIR/versions/node/v$NODE_VERSION/lib/node_modules
ENV PATH $NVM_DIR/versions/node/v$NODE_VERSION/bin:$PATH
RUN npm install -g localtunnel
# Now we can copy everything we need
COPY gyre /gyre/
COPY server.py .
# Set up some config files
# - Caches
RUN mkdir -p /cache
RUN mkdir -p /huggingface
RUN mkdir -p /gyrecache
# - Local weights
RUN mkdir -p /weights
RUN mkdir -p /lora
RUN mkdir -p /embedding
# - Config
RUN mkdir -p /config
COPY gyre/config/. /config/
# Link in the individual cache files, so you can override either the whole cache or
# just the individual folders
RUN ln -s /huggingface /cache/huggingface
RUN ln -s /gyrecache /cache/gyre
# Set up some environment files
ENV XDG_CACHE_HOME=/cache
ENV SD_ENGINECFG=/config/engines.yaml
ENV SD_WEIGHT_ROOT=/weights
ENV SD_LOCAL_RESOURCE_1=embedding:/embedding
ENV SD_LOCAL_RESOURCE_2=lora:/lora
# Create a couple of utility scripts
COPY --chmod=755 <<"EOT" /run.sh
#!/bin/bash
/bin/micromamba -r /env -n gyre run "$@"
EOT
COPY --chmod=755 <<"EOT" /server.sh
#!/bin/bash
/bin/micromamba -r /env -n gyre run python ./server.py "$@"
EOT
# And set up command
CMD [ "/bin/micromamba", "-r", "env", "-n", "gyre", "run", "python", "./server.py" ]
# ----- Build the inference server image with training support + xformers, deepspeed, and bitsandbytes -----
FROM basic-training as xformers-training
COPY --from=xformersbase /xformers/requirements.txt /
RUN /bin/micromamba -r /env -n gyre run pip install -r requirements.txt
RUN rm requirements.txt
COPY --from=deepspeedbase /DeepSpeed/requirements/requirements.txt /
RUN /bin/micromamba -r /env -n gyre run pip install -r requirements.txt
RUN rm requirements.txt
COPY --from=tritonbase /triton.tbz /
RUN tar xvjf /triton.tbz
COPY --from=xformersbase /xformers.tbz /
RUN tar xvjf /xformers.tbz
COPY --from=deepspeedbase /deepspeed.tbz /
RUN tar xvjf /deepspeed.tbz
RUN rm /*.tbz
COPY --from=bitsandbytesbase /bitsandbytes/dist/*.whl /
RUN /bin/micromamba -r /env -n gyre run pip install /*.whl
RUN rm /*.whl
CMD [ "/bin/micromamba", "-r", "env", "-n", "gyre", "run", "python", "./server.py" ]
# ----- Non-free targets -----
FROM basic as basic-nonfree
COPY nonfree /nonfree/
FROM xformers as xformers-nonfree
COPY nonfree /nonfree/
FROM bundle as bundle-nonfree
COPY nonfree /nonfree/
FROM training as basic-training-nonfree
COPY nonfree /nonfree/
FROM xformers-training as xformers-training-nonfree
COPY nonfree /nonfree/