forked from NVIDIA/spark-rapids
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbuildall
executable file
·310 lines (256 loc) · 9.27 KB
/
buildall
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
#!/bin/bash
#
# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
set -e
shopt -s extglob
SKIP_CLEAN=1
BUILD_ALL_DEBUG=0
SCALA213=0
function print_usage() {
echo "Usage: buildall [OPTION]"
echo "Options:"
echo " -h, --help"
echo " print this help message"
echo " --debug"
echo " enable bash -x tracing right after this option is parsed"
echo " --clean"
echo " include Maven clean phase"
echo " -gb, --generate-bloop"
echo " generate projects for Bloop clients: IDE (Scala Metals, IntelliJ) or Bloop CLI"
echo " -p=DIST_PROFILE, --profile=DIST_PROFILE"
echo " use this profile for the dist module, default: noSnapshots, also supported: snapshots, minimumFeatureVersionMix,"
echo " snapshotsWithDatabricks, noSnapshotsWithDatabricks, noSnapshotsScala213, snapshotsScala213."
echo " NOTE: the Databricks-related spark3XYdb shims are not built locally, the jars are fetched prebuilt from a"
echo " . remote Maven repo. You can also supply a comma-separated list of build versions. E.g., --profile=320,330 will"
echo " build only the distribution jar only for 3.2.0 and 3.3.0"
echo " -m=MODULE, --module=MODULE"
echo " after finishing parallel builds, resume from dist and build up to and including module MODULE."
echo " E.g., --module=integration_tests"
echo " -P=N, --parallel=N"
echo " Build in parallel, N (4 by default) is passed via -P to xargs"
echo " --install"
echo " Intall the resulting jar instead of just building it"
echo " -o=MVN_OPT, --option=MVN_OPT"
echo " use this option to build project with maven. E.g., --option='-Dcudf.version=cuda11'"
echo " --rebuild-dist-only"
echo " repackage the dist module artifact using installed dependencies"
}
function bloopInstall() {
[[ "$BUILD_ALL_DEBUG" == "1" ]] && set -x
local bloopTmpDir=$(mktemp -d /tmp/tmp.bloop.XXXXXX)
time (
bloopDirsGenerated=()
for bv in "${SPARK_SHIM_VERSIONS[@]}"; do
bloopTmpConfigDir="$bloopTmpDir/.bloop$bv"
mkdir -p $bloopTmpConfigDir
$MVN -B clean install \
-DbloopInstall \
-Dbloop.configDirectory="$bloopTmpConfigDir" \
-DdownloadSources=true \
-Dbuildver="$bv"
specifier="spark$bv"
bloopDir=$PWD/.bloop-$specifier
rm -rf $bloopDir
mv $bloopTmpConfigDir $bloopDir
echo "generated bloop files under $bloopDir"
bloopDirsGenerated+=($bloopDir)
done
echo "#### Created bloop projects ${bloopDirsGenerated[@]}"
echo "Execute"
echo " ln -s .bloop-spark3XY .bloop"
echo "to make it an active Bloop project in VS Code Scala Metals"
)
}
function versionsFromDistProfile() {
[[ "$BUILD_ALL_DEBUG" == "1" ]] && set -x
versionRawStr=$($MVN -B help:evaluate -q -pl dist -P"$1" -Dexpression=included_buildvers -DforceStdout)
versionStr=${versionRawStr//[$'\n',]/}
echo -n $versionStr
}
function versionsFromReleaseProfiles() {
versionRawStr=$(python build/get_buildvers.py $1 $2)
versionStr=${versionRawStr//[$'\n',]/}
echo -n $versionStr
}
FINAL_OP="package"
while [[ "$1" != "" ]] ; do
case "$1" in
--help|-h)
print_usage
exit 0
;;
--generate-bloop|-gb)
GEN_BLOOP=true
;;
-p=*|--profile=*)
DIST_PROFILE="${1#*=}"
;;
-m=*|--module=*)
MODULE="${1#*=}"
;;
-P=*|--parallel=*)
BUILD_PARALLEL="${1#*=}"
;;
--debug)
BUILD_ALL_DEBUG=1
set -x
;;
--clean)
SKIP_CLEAN="0"
;;
--install)
FINAL_OP="install"
;;
--scala213)
SCALA213=1
;;
--rebuild-dist-only)
SKIP_DIST_DEPS="1"
MODULE="dist"
;;
-o=*|--option=*)
MVN_OPT="${1#*=}"
;;
*)
echo >&2 "Unknown arg: $1"
print_usage
exit 1
;;
esac
# advance $1 to the next in the arg list
shift
done
if [[ "$DIST_PROFILE" == *Scala213 ]]; then
SCALA213=1
fi
# include options to mvn command
export MVN="mvn -Dmaven.wagon.http.retryHandler.count=3 ${MVN_OPT}"
if [[ "$SCALA213" == "1" ]]; then
MVN="$MVN -f scala2.13/"
DIST_PROFILE=${DIST_PROFILE:-"noSnapshotsScala213"}
$(dirname $0)/make-scala-version-build-files.sh 2.13
else
DIST_PROFILE=${DIST_PROFILE:-"noSnapshots"}
fi
[[ "$MODULE" != "" ]] && MODULE_OPT="--projects $MODULE --also-make" || MODULE_OPT=""
echo "Collecting Spark versions..."
case $DIST_PROFILE in
snapshotsScala213)
SPARK_SHIM_VERSIONS=($(versionsFromReleaseProfiles "snap_and_no_snap" "scala2.13/pom.xml"))
;;
noSnapshotsScala213)
SPARK_SHIM_VERSIONS=($(versionsFromReleaseProfiles "no_snapshots" "scala2.13/pom.xml"))
;;
snapshots?(WithDatabricks))
SPARK_SHIM_VERSIONS=($(versionsFromReleaseProfiles "snap_and_no_snap" "pom.xml"))
;;
noSnapshots?(WithDatabricks))
SPARK_SHIM_VERSIONS=($(versionsFromReleaseProfiles "no_snapshots" "pom.xml"))
;;
minimumFeatureVersionMix)
SPARK_SHIM_VERSIONS=($(versionsFromDistProfile "minimumFeatureVersionMix"))
;;
[34]*)
<<< $DIST_PROFILE IFS="," read -ra SPARK_SHIM_VERSIONS
INCLUDED_BUILDVERS_OPT="-Dincluded_buildvers=$DIST_PROFILE"
unset DIST_PROFILE
;;
*)
echo "ERROR unexpected value for profile: $DIST_PROFILE, see 'buildall --help'"
exit 1
esac
echo "Spark versions involved: ${SPARK_SHIM_VERSIONS[@]} ..."
export MVN_BASE_DIR=$($MVN help:evaluate -Dexpression=project.basedir -q -DforceStdout)
if [[ "$GEN_BLOOP" == "true" ]]; then
bloopInstall
exit 0
fi
[[ "$DIST_PROFILE" != "" ]] && MVN_PROFILE_OPT="-P$DIST_PROFILE" || MVN_PROFILE_OPT=""
# First element in SPARK_SHIM_VERSIONS to do most of the checks
export BASE_VER=${SPARK_SHIM_VERSIONS[0]}
export NUM_SHIMS=${#SPARK_SHIM_VERSIONS[@]}
export BUILD_PARALLEL=${BUILD_PARALLEL:-4}
if [[ "$SKIP_CLEAN" != "1" ]]; then
echo Clean once across all modules
$MVN -q clean
fi
echo "Building a combined dist jar with Shims for ${SPARK_SHIM_VERSIONS[@]} ..."
function build_single_shim() {
[[ "$BUILD_ALL_DEBUG" == "1" ]] && set -x
BUILD_VER=$1
mkdir -p "$MVN_BASE_DIR/target"
(( BUILD_PARALLEL == 1 || NUM_SHIMS == 1 )) && LOG_FILE="/dev/tty" || \
LOG_FILE="$MVN_BASE_DIR/target/mvn-build-$BUILD_VER.log"
if [[ "$BUILD_VER" == "$BASE_VER" ]]; then
SKIP_CHECKS="false"
# WORKAROUND:
# maven build on L193 currently relies on aggregator dependency which
# will removed by
# https://github.com/NVIDIA/spark-rapids/issues/3932
#
# if it were a single maven invocation Maven would register and give
# precedence package-phase artifacts
#
MVN_PHASE="install"
else
SKIP_CHECKS="true"
MVN_PHASE="package"
fi
echo "#### REDIRECTING mvn output to $LOG_FILE ####"
$MVN -U "$MVN_PHASE" \
-DskipTests \
-Dbuildver="$BUILD_VER" \
-Drat.skip="$SKIP_CHECKS" \
-Dmaven.scaladoc.skip \
-Dmaven.scalastyle.skip="$SKIP_CHECKS" \
-pl tools -am > "$LOG_FILE" 2>&1 || {
[[ "$LOG_FILE" != "/dev/tty" ]] && echo "$LOG_FILE:" && tail -500 "$LOG_FILE" || true
exit 255
}
}
export -f build_single_shim
# Install all the versions for DIST_PROFILE
# First build the aggregator module for all SPARK_SHIM_VERSIONS in parallel skipping expensive plugins that
# - either deferred to 320 because the check is identical in all shim profiles such as scalastyle
# - or deferred to 320 because we currently don't require it per shim such as scaladoc generation
# - or there is a dedicated step to run against a particular shim jar such as unit tests, in
# the near future we will run unit tests against a combined multi-shim jar to catch classloading
# regressions even before pytest-based integration_tests
#
# Then resume maven build from the dist module now that shims have been installed
time (
# printf a single buildver array element per line
if [[ "$SKIP_DIST_DEPS" != "1" ]]; then
# Execute initialize to download a massive jar for spark-rapids-jni in a single thread to
# avoid repeating this work in parallel
# Initialize sql-plugin-api only to avoid dealing with missing submodule dependencies
#
$MVN initialize -pl sql-plugin-api -am
printf "%s\n" "${SPARK_SHIM_VERSIONS[@]}" | \
xargs -t -I% -P "$BUILD_PARALLEL" -n 1 \
bash -c 'build_single_shim "$@"' _ %
fi
# This used to resume from dist. However, without including aggregator in the build
# the build does not properly initialize spark.version property via buildver profiles
# in the root pom, and we get a missing spark320 dependency even for --profile=320,321
# where the build does not require it. Moving it to aggregator resolves this issue with
# a negligible increase of the build time by ~2 seconds.
joinShimBuildFrom="aggregator"
echo "Resuming from $joinShimBuildFrom build only using $BASE_VER"
$MVN $FINAL_OP -rf $joinShimBuildFrom $MODULE_OPT $MVN_PROFILE_OPT $INCLUDED_BUILDVERS_OPT \
-Dbuildver="$BASE_VER" \
-DskipTests -Dmaven.scaladoc.skip
)