From b7d20bed3a8aa599a3f85b1aac692372d1f7bd51 Mon Sep 17 00:00:00 2001 From: Travis Grigsby Date: Mon, 19 Apr 2021 10:23:26 -0700 Subject: [PATCH 1/3] Reducing concurrency - this was being heavily throttled by s3 and took less time with 4 concurrent goroutines instead of 16 --- go/cmd/tz-missing-meta-tiles-write/missing-meta-tiles-write.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/cmd/tz-missing-meta-tiles-write/missing-meta-tiles-write.go b/go/cmd/tz-missing-meta-tiles-write/missing-meta-tiles-write.go index d545169..45c05c5 100644 --- a/go/cmd/tz-missing-meta-tiles-write/missing-meta-tiles-write.go +++ b/go/cmd/tz-missing-meta-tiles-write/missing-meta-tiles-write.go @@ -159,7 +159,7 @@ func main() { flag.StringVar(&destBucket, "dest-bucket", "", "dest s3 bucket to write tiles") flag.StringVar(&destDatePrefix, "dest-date-prefix", "", "dest date prefix to write tiles found") flag.StringVar(&hexPrefix, "hex-prefix", "", "hex prefix for job, must be 3 lowercase hexadecimal characters") - flag.UintVar(&concurrency, "concurrency", 16, "number of goroutines listing bucket per hash prefix") + flag.UintVar(&concurrency, "concurrency", 4, "number of goroutines listing bucket per hash prefix") flag.StringVar(®ion, "region", "us-east-1", "region") flag.StringVar(&keyFormatTypeStr, "key-format-type", "", "Either 'prefix-hash' or 'hash-prefix' to control the order of the date prefix and hash in the src S3 key.") flag.BoolVar(&allBuckets, "all-buckets", false, "If true, check all buckets in list, not just the last one.") From 8a5f6279748abe8f454c5c1b03ffad4a695526db Mon Sep 17 00:00:00 2001 From: Travis Grigsby Date: Thu, 22 Apr 2021 11:39:39 -0700 Subject: [PATCH 2/3] changing mem reqs and adding data collection for low zoom and raw tiles --- batch-setup/batch.py | 6 ++++-- batch-setup/make_tiles.py | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/batch-setup/batch.py b/batch-setup/batch.py index dd148ad..5831322 100644 --- a/batch-setup/batch.py +++ b/batch-setup/batch.py @@ -107,7 +107,8 @@ def env_for_image(name, db_hosts, db_name, db_user, db_password, buckets, def cmd_for_image(name, region): if name == 'rawr-batch': - cmd = ['tilequeue', 'rawr-tile', + cmd = ['/usr/bin/time', '-f', '"{\\"max_resident_kb\\": %M, \\"cpu_percent\\": \\"%P\\", \\"wall_time_seconds\\": %e}\\"', + 'tilequeue', 'rawr-tile', '--config', '/etc/tilequeue/config.yaml', '--tile', 'Ref::tile', '--run_id', 'Ref::run_id'] @@ -120,7 +121,8 @@ def cmd_for_image(name, region): '--run_id', 'Ref::run_id'] elif name == 'meta-low-zoom-batch': - cmd = ['tilequeue', 'meta-tile-low-zoom', + cmd = ['/usr/bin/time', '-f', '"{\\"max_resident_kb\\": %M, \\"cpu_percent\\": \\"%P\\", \\"wall_time_seconds\\": %e}\\"', + 'tilequeue', 'meta-tile-low-zoom', '--config', '/etc/tilequeue/config.yaml', '--tile', 'Ref::tile', '--run_id', 'Ref::run_id'] diff --git a/batch-setup/make_tiles.py b/batch-setup/make_tiles.py index c12dc9d..3e8dfed 100644 --- a/batch-setup/make_tiles.py +++ b/batch-setup/make_tiles.py @@ -134,8 +134,8 @@ def _chr_range(a, b): # raised if jobs fail with out-of-memory errors. memory = { 'rawr-batch': 8192, - 'meta-batch': 12288, # 12 GiB - 'meta-low-zoom-batch': 12288, # 12 GiB + 'meta-batch': 8192, # 8 GiB + 'meta-low-zoom-batch': 8192, # 8 GiB 'missing-meta-tiles-write': 1024, } # defaults for the moment. TODO: make them configurable from the command From cf378a0ee63d859b37010777b6db4435902793d7 Mon Sep 17 00:00:00 2001 From: travisgrigsby Date: Thu, 22 Apr 2021 13:08:55 -0700 Subject: [PATCH 3/3] lowering memory requests, adding exponential mem increase on batch job retry (#73) * adding exponential mem increase on batch job retry * reducing mem reqs for meta-zoom --- batch-setup/make_meta_tiles.py | 14 ++++++++++++-- batch-setup/make_tiles.py | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/batch-setup/make_meta_tiles.py b/batch-setup/make_meta_tiles.py index f5e91eb..323156a 100644 --- a/batch-setup/make_meta_tiles.py +++ b/batch-setup/make_meta_tiles.py @@ -291,7 +291,7 @@ def _big_jobs(rawr_bucket, prefix, key_format_type, rawr_zoom, group_zoom, return big_jobs -def enqueue_tiles(config_file, tile_list_file, check_metatile_exists): +def enqueue_tiles(config_file, tile_list_file, check_metatile_exists, mem_multiplier=1.0, mem_max=32 * 1024): from tilequeue.command import make_config_from_argparse from tilequeue.command import tilequeue_batch_enqueue from make_rawr_tiles import BatchEnqueueArgs @@ -301,9 +301,15 @@ def enqueue_tiles(config_file, tile_list_file, check_metatile_exists): str(check_metatile_exists).lower()) with open(args.config) as fh: cfg = make_config_from_argparse(fh) + + update_memory_request(cfg, mem_multiplier, mem_max) tilequeue_batch_enqueue(cfg, args) +def update_memory_request(cfg, mem_multiplier, mem_max): + cfg.yml["batch"]["memory"] = int(min(cfg.yml["batch"]["memory"] * mem_multiplier, mem_max)) + + # adaptor class for MissingTiles to see just the high zoom parts, this is used # along with the LowZoomLense to loop over missing tiles generically but # separately. @@ -342,7 +348,10 @@ def _missing(self): self.split_zoom, self.zoom_max, self.big_jobs) def render(self, num_retries, lense): + mem_max = 32 * 1024 # 32 GiB + for retry_number in range(0, num_retries): + mem_multiplier = 1.5 ** retry_number with self._missing() as missing: missing_tile_file = lense.missing_file(missing) count = wc_line(missing_tile_file) @@ -361,8 +370,9 @@ def render(self, num_retries, lense): sample = head_lines(missing_tile_file, 10) print("Enqueueing %d %s tiles (e.g. %s)" % (count, lense.description, ', '.join(sample))) + enqueue_tiles(lense.config, missing_tile_file, - check_metatile_exists) + check_metatile_exists, mem_multiplier, mem_max) else: with self._missing() as missing: diff --git a/batch-setup/make_tiles.py b/batch-setup/make_tiles.py index 3e8dfed..4edbc6a 100644 --- a/batch-setup/make_tiles.py +++ b/batch-setup/make_tiles.py @@ -134,7 +134,7 @@ def _chr_range(a, b): # raised if jobs fail with out-of-memory errors. memory = { 'rawr-batch': 8192, - 'meta-batch': 8192, # 8 GiB + 'meta-batch': 4096, # 4 GiB 'meta-low-zoom-batch': 8192, # 8 GiB 'missing-meta-tiles-write': 1024, }