forked from Azure/databox-adls-loader
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdistcp-to-databox.sh
28 lines (25 loc) · 919 Bytes
/
distcp-to-databox.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#!/bin/sh
SOURCE_FILE_LIST=$1
DEST_DNS_NAME=$2
DEST_ACCOUNT_KEY=$3
CONTAINER=$4
JOB_QUEUE=${5:-default}
NUM_MAPPERS=${6:-4}
HADOOP_OPTS=$7
if [ ! -f $SOURCE_FILE_LIST ] || [ -z $DEST_DNS_NAME ] || [ -z $CONTAINER ];
then
echo "Usage: $0 {source filelist file} {databox DNS name} {databox account key} {databox container} [{YARN queue}] [{mappers}] [{Hadoop opts}]"
exit 1
fi
DEST_ROOT_PATH=wasb://$CONTAINER@$DEST_DNS_NAME
while read path;
do
# Test if the destination already exists
DEST_PATH=$DEST_ROOT_PATH$path
hadoop fs $HADOOP_OPTS -D fs.azure.account.key.$DEST_DNS_NAME=$DEST_ACCOUNT_KEY -test -e $DEST_PATH
if [ $? -eq 0 ]; then
DEST_PATH=${DEST_PATH%/*}
fi
DEST_PATH=$DEST_PATH/
hadoop distcp $HADOOP_OPTS -D fs.azure.account.key.$DEST_DNS_NAME=$DEST_ACCOUNT_KEY -Dmapred.job.queue.name=$JOB_QUEUE -m $NUM_MAPPERS -async $path $DEST_PATH
done < $SOURCE_FILE_LIST