diff --git a/README.md b/README.md
index 9bec355..e40976b 100644
--- a/README.md
+++ b/README.md
@@ -6,9 +6,16 @@ explanation.
 
 Usage is as follows:
 
-* `gen.py 1000`: Generate 1000 test set images in `./test`.
-* `train.py`: Train the model.
-* `detect.py in.jpg weights.npz out.jpg`: Detect number plates in an image.
+1. `extractbgs.py SUN397.tar.gz`: Extract ~3GB of background images from the [SUN database](http://groups.csail.mit.edu/vision/SUN/)
+   into `bgs/`. (`bgs/` must not already exist.) The tar file (39GB) can be [downloaded here](http://groups.csail.mit.edu/vision/SUN1old/SUN397.tar).
+
+2. `gen.py 1000`: Generate 1000 test set images in `test/`. (`test/` must not
+    already exist.) This step requires `UKNumberPlate.ttf` to be in the current
+    directory, which can be [downloaded here](http://www.dafont.com/uk-number-plate.font).
+
+3. `train.py`: Train the model. A GPU is recommended for this step.
+
+4. `detect.py in.jpg weights.npz out.jpg`: Detect number plates in an image.
 
 The project has the following dependencies:
 
diff --git a/detect.py b/detect.py
old mode 100644
new mode 100755
diff --git a/extractbgs.py b/extractbgs.py
new file mode 100755
index 0000000..8cf7ba9
--- /dev/null
+++ b/extractbgs.py
@@ -0,0 +1,96 @@
+# Copyright (c) 2016 Matthew Earl
+# 
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# 
+#     The above copyright notice and this permission notice shall be included
+#     in all copies or substantial portions of the Software.
+# 
+#     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+#     OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+#     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
+#     NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+#     DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+#     OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+#     USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+
+
+"""
+Extract background images from a tar archive.
+
+"""
+
+
+__all__ = (
+    'extract_backgrounds',
+)
+
+
+import os
+import sys
+import tarfile
+
+import cv2
+import numpy
+
+
+def im_from_file(f):
+    a = numpy.asarray(bytearray(f.read()), dtype=numpy.uint8)
+    return cv2.imdecode(a, cv2.CV_LOAD_IMAGE_GRAYSCALE)
+
+
+def extract_backgrounds(archive_name):
+    """
+    Extract backgrounds from provided tar archive.
+
+    JPEGs from the archive are converted into grayscale, and cropped/resized to
+    256x256, and saved in ./bgs/.
+
+    :param archive_name:
+        Name of the .tar file containing JPEGs of background images.
+
+    """
+    os.mkdir("bgs")
+
+    t = tarfile.open(name=archive_name)
+
+    def members():
+        m = t.next()
+        while m:
+            yield m
+            m = t.next()
+    index = 0
+    for m in members():
+        if not m.name.endswith(".jpg"):
+            continue
+        f =  t.extractfile(m)
+        try:
+            im = im_from_file(f)
+        finally:
+            f.close()
+        if im is None:
+            continue
+        
+        if im.shape[0] > im.shape[1]:
+            im = im[:im.shape[1], :]
+        else:
+            im = im[:, :im.shape[0]]
+        if im.shape[0] > 256:
+            im = cv2.resize(im, (256, 256))
+        fname = "bgs/{:08}.jpg".format(index)
+        print fname
+        rc = cv2.imwrite(fname, im)
+        if not rc:
+            raise Exception("Failed to write file {}".format(fname))
+        index += 1
+
+
+if __name__ == "__main__":
+
+    extract_backgrounds(sys.argv[1])
+
diff --git a/gen.py b/gen.py
old mode 100644
new mode 100755
index 2bdcda0..3a899f8
--- a/gen.py
+++ b/gen.py
@@ -28,14 +28,13 @@
 
 __all__ = (
     'generate_ims',
-    'extract_backgrounds',
 )
 
 
 import math
+import os
 import random
 import sys
-import tarfile
 
 import cv2
 import numpy
@@ -212,11 +211,11 @@ def generate_plate(font_height, char_ims):
     return plate, rounded_rect(out_shape, radius), code.replace(" ", "")
 
 
-def generate_bg():
+def generate_bg(num_bg_images):
     found = False
     while not found:
-        bg = cv2.imread("bgs/{:08d}.jpg".format(random.randint(0, 108600)),
-                        cv2.CV_LOAD_IMAGE_GRAYSCALE) / 255.
+        fname = "bgs/{:08d}.jpg".format(random.randint(0, num_bg_images - 1))
+        bg = cv2.imread(fname, cv2.CV_LOAD_IMAGE_GRAYSCALE) / 255.
         if (bg.shape[1] >= OUTPUT_SHAPE[1] and
             bg.shape[0] >= OUTPUT_SHAPE[0]):
             found = True
@@ -228,8 +227,8 @@ def generate_bg():
     return bg
 
 
-def generate_im(char_ims):
-    bg = generate_bg()
+def generate_im(char_ims, num_bg_images):
+    bg = generate_bg(num_bg_images)
 
     plate, plate_mask, code = generate_plate(FONT_HEIGHT, char_ims)
     
@@ -267,58 +266,13 @@ def generate_ims(num_images):
     """
     variation = 1.0
     char_ims = dict(make_char_ims(FONT_HEIGHT))
+    num_bg_images = len(os.listdir("bgs"))
     for i in range(num_images):
-        yield generate_im(char_ims)
-
-
-def im_from_file(f):
-    a = numpy.asarray(bytearray(f.read()), dtype=numpy.uint8)
-    return cv2.imdecode(a, cv2.CV_LOAD_IMAGE_GRAYSCALE)
-
-
-def extract_backgrounds(archive_name):
-    """
-    Extract backgrounds from provided tar archive.
-
-    JPEGs from the archive are converted into grayscale, and cropped/resized to
-    256x256, and saved in ./bgs/.
-
-    :param archive_name:
-        Name of the .tar file containing JPEGs of background images.
-
-    """
-    t = tarfile.open(name=archive_name)
-
-    def members():
-        m = t.next()
-        while m:
-            yield m
-            m = t.next()
-    index = 0
-    for m in members():
-        if not m.name.endswith(".jpg"):
-            continue
-        f =  t.extractfile(m)
-        try:
-            im = im_from_file(f)
-        finally:
-            f.close()
-        if im is None:
-            continue
-        
-        if im.shape[0] > im.shape[1]:
-            im = im[:im.shape[1], :]
-        else:
-            im = im[:, :im.shape[0]]
-        if im.shape[0] > 256:
-            im = cv2.resize(im, (256, 256))
-        fname = "bgs/{:08}.jpg".format(index)
-        print fname
-        cv2.imwrite(fname, im)
-        index += 1
+        yield generate_im(char_ims, num_bg_images)
 
 
 if __name__ == "__main__":
+    os.mkdir("test")
     im_gen = generate_ims(int(sys.argv[1]))
     for img_idx, (im, c, p) in enumerate(im_gen):
         fname = "test/{:08d}_{}_{}.png".format(img_idx, c,
diff --git a/train.py b/train.py
old mode 100644
new mode 100755
diff --git a/vis.py b/vis.py
old mode 100644
new mode 100755