From 584eb67188c9f082a9f705dc5608b3d424be692b Mon Sep 17 00:00:00 2001 From: loadit1 <46778047+loadit1@users.noreply.github.com> Date: Tue, 25 Feb 2020 12:47:38 +0300 Subject: [PATCH 1/5] bzip2 to lbzip2 migration to use all CPU cores Summary of changes: If lbzip2 installed on system we use it. If not, using legacy bzip2. Updated Dockerfile to install lbzip2 --- Dockerfile | 2 +- cmd/download_extract.sh | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index a9b59d76..1532df5a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ FROM pelias/baseimage # downloader apt dependencies # note: this is done in one command in order to keep down the size of intermediate containers -RUN apt-get update && apt-get install -y build-essential python jq && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y build-essential python jq lbzip2 && rm -rf /var/lib/apt/lists/* # change working dir ENV WORKDIR /code/pelias/placeholder diff --git a/cmd/download_extract.sh b/cmd/download_extract.sh index 2f3fb743..1c017962 100755 --- a/cmd/download_extract.sh +++ b/cmd/download_extract.sh @@ -11,7 +11,11 @@ PLACETYPES=( 'neighbourhood' 'macrohood' 'borough' 'locality' 'localadmin' 'coun # download and extract fields from contents of tar function extract { curl -so "/tmp/wof-${1}-latest-bundle.tar.bz2" "https://whosonfirst.mapzen.com/bundles/wof-${1}-latest-bundle.tar.bz2" - tar --wildcards '*.geojson' -jx --to-command 'jq -cMf "${DIR}/jq.filter"' -f "/tmp/wof-${1}-latest-bundle.tar.bz2" + if hash lbzip2 2>/dev/null; then + tar --wildcards '*.geojson' -x --use-compress-program=lbzip2 --to-command 'jq -cMf "${DIR}/jq.filter"' -f "/tmp/wof-${1}-latest-bundle.tar.bz2" + else + tar --wildcards '*.geojson' -jx --to-command 'jq -cMf "${DIR}/jq.filter"' -f "/tmp/wof-${1}-latest-bundle.tar.bz2" + fi rc=$?; if [[ $rc != 0 ]]; then >&2 echo "/tmp/wof-${1}-latest-bundle.tar.bz2" >&2 echo "command exited with status: $rc" From 6d949eb279da2a1b68a53e18333363bc569f81e0 Mon Sep 17 00:00:00 2001 From: missinglink Date: Tue, 25 Feb 2020 11:11:39 +0100 Subject: [PATCH 2/5] fix(CI): use Geocode Earth CDN --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 37964a12..1c6a6cc3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,7 @@ matrix: fast_finish: true script: npm run travis env: - - BUCKET=https://storage.googleapis.com/pelias-data.geocode.earth/placeholder + - BUCKET=https://data.geocode.earth/placeholder/2019-01-28 before_install: - npm i -g npm before_script: From e8f48b08fdc40a61ec1e734962a48a6d72debfda Mon Sep 17 00:00:00 2001 From: loadit1 <46778047+loadit1@users.noreply.github.com> Date: Tue, 25 Feb 2020 12:47:38 +0300 Subject: [PATCH 3/5] bzip2 to lbzip2 migration to use all CPU cores Summary of changes: If lbzip2 installed on system we use it. If not, using legacy bzip2. Updated Dockerfile to install lbzip2 --- Dockerfile | 2 +- cmd/download_extract.sh | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index a9b59d76..1532df5a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ FROM pelias/baseimage # downloader apt dependencies # note: this is done in one command in order to keep down the size of intermediate containers -RUN apt-get update && apt-get install -y build-essential python jq && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y build-essential python jq lbzip2 && rm -rf /var/lib/apt/lists/* # change working dir ENV WORKDIR /code/pelias/placeholder diff --git a/cmd/download_extract.sh b/cmd/download_extract.sh index 2f3fb743..1c017962 100755 --- a/cmd/download_extract.sh +++ b/cmd/download_extract.sh @@ -11,7 +11,11 @@ PLACETYPES=( 'neighbourhood' 'macrohood' 'borough' 'locality' 'localadmin' 'coun # download and extract fields from contents of tar function extract { curl -so "/tmp/wof-${1}-latest-bundle.tar.bz2" "https://whosonfirst.mapzen.com/bundles/wof-${1}-latest-bundle.tar.bz2" - tar --wildcards '*.geojson' -jx --to-command 'jq -cMf "${DIR}/jq.filter"' -f "/tmp/wof-${1}-latest-bundle.tar.bz2" + if hash lbzip2 2>/dev/null; then + tar --wildcards '*.geojson' -x --use-compress-program=lbzip2 --to-command 'jq -cMf "${DIR}/jq.filter"' -f "/tmp/wof-${1}-latest-bundle.tar.bz2" + else + tar --wildcards '*.geojson' -jx --to-command 'jq -cMf "${DIR}/jq.filter"' -f "/tmp/wof-${1}-latest-bundle.tar.bz2" + fi rc=$?; if [[ $rc != 0 ]]; then >&2 echo "/tmp/wof-${1}-latest-bundle.tar.bz2" >&2 echo "command exited with status: $rc" From 776c5c052c60752fadc40c2ffc8169374115a89f Mon Sep 17 00:00:00 2001 From: loadit1 <46778047+loadit1@users.noreply.github.com> Date: Tue, 25 Feb 2020 15:45:03 +0300 Subject: [PATCH 4/5] Update download_extract.sh --- cmd/download_extract.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cmd/download_extract.sh b/cmd/download_extract.sh index 1c017962..832e435d 100755 --- a/cmd/download_extract.sh +++ b/cmd/download_extract.sh @@ -12,10 +12,10 @@ PLACETYPES=( 'neighbourhood' 'macrohood' 'borough' 'locality' 'localadmin' 'coun function extract { curl -so "/tmp/wof-${1}-latest-bundle.tar.bz2" "https://whosonfirst.mapzen.com/bundles/wof-${1}-latest-bundle.tar.bz2" if hash lbzip2 2>/dev/null; then - tar --wildcards '*.geojson' -x --use-compress-program=lbzip2 --to-command 'jq -cMf "${DIR}/jq.filter"' -f "/tmp/wof-${1}-latest-bundle.tar.bz2" - else - tar --wildcards '*.geojson' -jx --to-command 'jq -cMf "${DIR}/jq.filter"' -f "/tmp/wof-${1}-latest-bundle.tar.bz2" - fi + tar --wildcards '*.geojson' -x --use-compress-program=lbzip2 --to-command 'jq -cMf "${DIR}/jq.filter"' -f "/tmp/wof-${1}-latest-bundle.tar.bz2" + else + tar --wildcards '*.geojson' -jx --to-command 'jq -cMf "${DIR}/jq.filter"' -f "/tmp/wof-${1}-latest-bundle.tar.bz2" + fi rc=$?; if [[ $rc != 0 ]]; then >&2 echo "/tmp/wof-${1}-latest-bundle.tar.bz2" >&2 echo "command exited with status: $rc" From fa222889d08febce9052de06b93f5de729ce4e5e Mon Sep 17 00:00:00 2001 From: loadit1 <46778047+loadit1@users.noreply.github.com> Date: Wed, 26 Feb 2020 17:16:03 +0300 Subject: [PATCH 5/5] Update Dockerfile --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 1532df5a..e8681137 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ FROM pelias/baseimage # downloader apt dependencies # note: this is done in one command in order to keep down the size of intermediate containers -RUN apt-get update && apt-get install -y build-essential python jq lbzip2 && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y build-essential python jq lbzip2 parallel && rm -rf /var/lib/apt/lists/* # change working dir ENV WORKDIR /code/pelias/placeholder