diff --git a/batch-setup/batch.py b/batch-setup/batch.py index dd148ad..5831322 100644 --- a/batch-setup/batch.py +++ b/batch-setup/batch.py @@ -107,7 +107,8 @@ def env_for_image(name, db_hosts, db_name, db_user, db_password, buckets, def cmd_for_image(name, region): if name == 'rawr-batch': - cmd = ['tilequeue', 'rawr-tile', + cmd = ['/usr/bin/time', '-f', '"{\\"max_resident_kb\\": %M, \\"cpu_percent\\": \\"%P\\", \\"wall_time_seconds\\": %e}\\"', + 'tilequeue', 'rawr-tile', '--config', '/etc/tilequeue/config.yaml', '--tile', 'Ref::tile', '--run_id', 'Ref::run_id'] @@ -120,7 +121,8 @@ def cmd_for_image(name, region): '--run_id', 'Ref::run_id'] elif name == 'meta-low-zoom-batch': - cmd = ['tilequeue', 'meta-tile-low-zoom', + cmd = ['/usr/bin/time', '-f', '"{\\"max_resident_kb\\": %M, \\"cpu_percent\\": \\"%P\\", \\"wall_time_seconds\\": %e}\\"', + 'tilequeue', 'meta-tile-low-zoom', '--config', '/etc/tilequeue/config.yaml', '--tile', 'Ref::tile', '--run_id', 'Ref::run_id'] diff --git a/batch-setup/make_meta_tiles.py b/batch-setup/make_meta_tiles.py index e8c9014..92c5d57 100644 --- a/batch-setup/make_meta_tiles.py +++ b/batch-setup/make_meta_tiles.py @@ -371,7 +371,7 @@ def _big_jobs(rawr_bucket, prefix, key_format_type, rawr_zoom, group_zoom, return big_jobs -def enqueue_tiles(config_file, tile_list_file, check_metatile_exists, tile_specifier=TileSpecifier()): +def enqueue_tiles(config_file, tile_list_file, check_metatile_exists, tile_specifier=TileSpecifier(), mem_multiplier=1.0, mem_max=32 * 1024): from tilequeue.command import make_config_from_argparse from tilequeue.command import tilequeue_batch_enqueue from make_rawr_tiles import BatchEnqueueArgs @@ -385,18 +385,28 @@ def enqueue_tiles(config_file, tile_list_file, check_metatile_exists, tile_speci coord_lines = [line.strip() for line in tile_list.readlines()] reordered_lines = tile_specifier.reorder(coord_lines) - - overprovision_multiplier = 1.2 # overprovision by 20% + + # if we aren't already increasing our memory usage somewhere else, we want to + # overprovision by 20% to allow for changes in tile complexity over time + overprovision_multiplier = 1.0 + if mem_multiplier <= 1.0: + overprovision_multiplier = 1.2 + print("[%s] Starting to enqueue %d tile batches" % (time.ctime(), len(reordered_lines))) for coord_line in reordered_lines: # override memory requirements for this job with what the tile_specifier tells us - memory_mb = int(tile_specifier.get_mem_reqs_mb(coord_line, overprovision_multiplier)) - cfg.yml["batch"]["memory"] = memory_mb + mem_mb = int(tile_specifier.get_mem_reqs_mb(coord_line, overprovision_multiplier)) + update_memory_request(cfg, mem_mb, mem_multiplier, mem_max) args = BatchEnqueueArgs(config_file, coord_line, None, None) tilequeue_batch_enqueue(cfg, args) print("[%s] Done enqueuing tile batches" % time.ctime()) +def update_memory_request(cfg, mem_mb, mem_multiplier, mem_max): + adjusted_mem = mem_mb * mem_multiplier + cfg.yml["batch"]["memory"] = int(min(adjusted_mem, mem_max)) + + # adaptor class for MissingTiles to see just the high zoom parts, this is used # along with the LowZoomLense to loop over missing tiles generically but # separately. @@ -436,7 +446,10 @@ def _missing(self): self.split_zoom, self.zoom_max, self.big_jobs) def render(self, num_retries, lense): + mem_max = 32 * 1024 # 32 GiB + for retry_number in range(0, num_retries): + mem_multiplier = 1.5 ** retry_number with self._missing() as missing: missing_tile_file = lense.missing_file(missing) count = wc_line(missing_tile_file) @@ -455,8 +468,9 @@ def render(self, num_retries, lense): sample = head_lines(missing_tile_file, 10) print("Enqueueing %d %s tiles (e.g. %s)" % (count, lense.description, ', '.join(sample))) + enqueue_tiles(lense.config, missing_tile_file, - check_metatile_exists, self.tile_specifier) + check_metatile_exists, self.tile_specifier, mem_multiplier, mem_max) else: with self._missing() as missing: diff --git a/batch-setup/make_tiles.py b/batch-setup/make_tiles.py index c12dc9d..4edbc6a 100644 --- a/batch-setup/make_tiles.py +++ b/batch-setup/make_tiles.py @@ -134,8 +134,8 @@ def _chr_range(a, b): # raised if jobs fail with out-of-memory errors. memory = { 'rawr-batch': 8192, - 'meta-batch': 12288, # 12 GiB - 'meta-low-zoom-batch': 12288, # 12 GiB + 'meta-batch': 4096, # 4 GiB + 'meta-low-zoom-batch': 8192, # 8 GiB 'missing-meta-tiles-write': 1024, } # defaults for the moment. TODO: make them configurable from the command diff --git a/go/cmd/tz-missing-meta-tiles-write/missing-meta-tiles-write.go b/go/cmd/tz-missing-meta-tiles-write/missing-meta-tiles-write.go index d545169..45c05c5 100644 --- a/go/cmd/tz-missing-meta-tiles-write/missing-meta-tiles-write.go +++ b/go/cmd/tz-missing-meta-tiles-write/missing-meta-tiles-write.go @@ -159,7 +159,7 @@ func main() { flag.StringVar(&destBucket, "dest-bucket", "", "dest s3 bucket to write tiles") flag.StringVar(&destDatePrefix, "dest-date-prefix", "", "dest date prefix to write tiles found") flag.StringVar(&hexPrefix, "hex-prefix", "", "hex prefix for job, must be 3 lowercase hexadecimal characters") - flag.UintVar(&concurrency, "concurrency", 16, "number of goroutines listing bucket per hash prefix") + flag.UintVar(&concurrency, "concurrency", 4, "number of goroutines listing bucket per hash prefix") flag.StringVar(®ion, "region", "us-east-1", "region") flag.StringVar(&keyFormatTypeStr, "key-format-type", "", "Either 'prefix-hash' or 'hash-prefix' to control the order of the date prefix and hash in the src S3 key.") flag.BoolVar(&allBuckets, "all-buckets", false, "If true, check all buckets in list, not just the last one.")