diff --git a/.gitignore b/.gitignore
index 20047b7a..188a0866 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,5 +14,7 @@ ao_result.json
 aolog
 ao_db_backups
 db_block_*.json
-*.pickle
 imgindex.json
+
+*.pickle
+config.py
diff --git a/ap/ap.py b/ap/ap.py
index 78b946e5..eaa2ccf4 100644
--- a/ap/ap.py
+++ b/ap/ap.py
@@ -1,22 +1,39 @@
 import os
 import sys
 import shutil
+from pathlib import Path
+import argparse
+
+import graphviz
 
 import pickle
+import copy
 import time
-import re
 import itertools
 
+import re
+import string
+
 import warnings
 import textwrap
 
 import psutil
 import hashlib
+from PIL import Image
+from PIL import UnidentifiedImageError
+import pytesseract
 
 dbpath = '/home/alex/Desktop/ap.pickle'
 timelimit = 20
 
 loglevel = 0
+parser = argparse.ArgumentParser()
+#parser.add_argument('subcommand', type=str)
+subparsers = parser.add_subparsers()
+
+imf_parser = subparsers.add_parser('imf')
+imf_parser.add_argument('text', type=str)
+imf_parser.set_defaults(func=lambda argvals: openfiles(list(filter(lambda x: hasattr(x, 'text') and argvals.text.lower() in x.text.lower(), data['files']))))
 
 from types import ModuleType, FunctionType
 from gc import get_referents
@@ -26,7 +43,8 @@
 # Exclude modules as well.
 BLACKLIST = type, ModuleType, FunctionType
 
-# https://stackoverflow.com/a/30316760
+# Estimate the "true" size of an object, which may be nested and/or contain
+# references to other objects (from https://stackoverflow.com/a/30316760)
 def getsize(obj):
     """sum size of object & members."""
     if isinstance(obj, BLACKLIST):
@@ -44,7 +62,8 @@ def getsize(obj):
         objects = get_referents(*need_referents)
     return size
 
-# Based on https://stackoverflow.com/a/44873382
+# Hash a file given a path and return hex digests for the md5 and sha1 hashes
+# of the file's content (based on https://stackoverflow.com/a/44873382)
 def hashfile(path):
     # 256kb
     BUF_SIZE = 2 ** 18
@@ -75,10 +94,14 @@ def hashfile(path):
         'files': []
     }
 
+# Store the current database at `path` (serialized using pickle; not very
+# efficient, but extremely expressive)
 def save(path=dbpath):
     with open(path, 'wb') as f:
         pickle.dump(data, f)
 
+# Logging helper function; displays the string `content`, indented and
+# line-wrapped
 def log(content, level=0):
     n=60
     lines = [content[i:i+n] for i in range(0, len(content), n)]
@@ -89,13 +112,30 @@ def log(content, level=0):
     p='  ' * level # prefix
     print(p + f'\n{p}~ '.join(textwrap.wrap(content, n)))
 
+
+# Helper class to make my life easier;
+# - supports arbitrary keyword arguments (which are stored as attributes)
+# - tracks access and modification
+# - enables method chaining for common iterable operations
+class node:
+    def __init__(self, *args, **kwargs):
+        for k, v in kwargs.items():
+            setattr(self, k, v)
+        created = time.time()
+
 # Very minimalist class for representing a file; aggregated from file
 # snapshots, which represent a (possibly nonexistent) file at a particular
 # point in time
 class filenode:
 
-    attrs = {'id': int, 'path': str, 'name': str, 'tags': list, 'size': int,
-             'snapshots': list, 'ext': str}
+    attrs = {'id': int, # Numeric ID 
+             'path': str, # Filepath
+             'name': str, # File name
+             'tags': list, # Internal file tags (different than those assigned to metadata by a file manager)
+             'size': int, # File size in bytes
+             'snapshots': list, # A chronologically ordered (?) list of file snapshots
+             'ext': str # File extension
+             }
     def __init__(self, **kwargs):
         self.id = 0
         self.path = ''
@@ -106,6 +146,9 @@ def __init__(self, **kwargs):
         for k, v in kwargs.items():
             setattr(self, k, v)
 
+    # Check that the expected attributes are present within this filenode and
+    # have the correct types (this is Python, so we only emit a warning if
+    # something is askew)
     def validate(self):
         for k, v in filenode.attrs.items():
             if hasattr(self, k):
@@ -114,8 +157,9 @@ def validate(self):
             else:
                 warnings.warn(f'filenode missing expected attribute: `{k}`')
 
+    # Generate a string representing this filenode
     def __str__(self):
-        return '\n'.join(f'{a}: {getattr(self, a) if hasattr(self, a) else None}' for a in 'path ext tags'.split())
+        return 'filenode { '+'\n'.join(f'{a}: {getattr(self, a) if hasattr(self, a) else None}' for a in 'name path ext tags snapshots'.split())+' }'
 
     def print(self):
         print(self)
@@ -124,6 +168,7 @@ def print(self):
 # those used by Git; this approach enables highly accurate monitoring and
 # versioning without having to continuously listen for file system events
 class snapshot:
+    # Initialize a new file snapshot
     def __init__(self, **kwargs):
         for k, v in kwargs.items():
             setattr(self, k, v)
@@ -139,16 +184,20 @@ def process(self):
             warnings.warn('File database has more than one live file with the\
                           same path; this should not happen')
         #node = current.next()
+
+        # "temporary" - generate ext if not present
         #hasn'tattr
         if not hasattr(self, 'ext'):
             _, self.ext = os.path.splitext(self.path)
 
+        # If a matching filenode is found, integrate this snapshot into it
         if current:
             log(f'Found corresponding node ({self.path})', 1)
             current[0].snapshots.append(self)
             #current[0].ext = self.ext
             current[0].ext = self.ext
-            current[0].print()
+            #current[0].print()
+        # Otherwise, add a new node with a reference to this snapshot
         else:
             log(f'Adding file node for {self.path} ({len(data["files"])} total)', 1)
             newnode = filenode(
@@ -161,7 +210,8 @@ def process(self):
                 textproc=False
             )
             data['files'].append(newnode)
-            newnode.print()
+            #newnode.print()
+        # Mark the snapshot as having been incorporated into the main database
         self.processed=True
 
 # TODO: refactor using https://docs.python.org/3/library/pathlib.html#pathlib.Path.iterdir
@@ -180,6 +230,7 @@ def catalog(path='.', limit=1000, i=0, recursive=True, level=0, delay=0.01) -> i
         # Why don't old nodes (without name/path) cause issues?
         log(f'Adding snapshot; {len(data["snapshots"])} total', level+1)
         fname, ext = os.path.splitext(fullpath)
+        # Get information about a file or directory
         stats = os.stat(fullpath)
         # should we move the hashing elsewhere?
         # TODO: store references to files contained in directory (and possibly the inverse)
@@ -195,21 +246,122 @@ def catalog(path='.', limit=1000, i=0, recursive=True, level=0, delay=0.01) -> i
             #md5=md5,
             #sha1=sha1
         )
+        # Only hash small files (exclude dirs)
         if os.path.isdir(fullpath) or stats.st_size > 10e7:
             snapshot_info |= dict(md5=None, sha1=None, hashed=False)
         else:
             md5, sha1 = hashfile(fullpath)
             snapshot_info |= dict(md5=md5, sha1=sha1, hashed=True)
+        # Store snapshot in main database
         newsnapshot = snapshot(**snapshot_info)
         data['snapshots'].append(newsnapshot)
+        # Integrate the snapshot into the file database
         newsnapshot.process()
         i += 1
 
+        # Recurse into subdirectories (if enabled)
         #if os.path.isdir(subpath) and recursive:
         if os.path.isdir(fullpath) and recursive:
             i = catalog(fullpath, limit, i, True, level=level+1)
+        # Limit total number of files visited
         if i >= limit:
             print(f'Reached limit: {limit}')
             break
         time.sleep(delay)
     return i
+
+# Generates a folder containing symlinks to each of the given files and opens
+# it using the associated program
+def openfiles(files):
+    dirname = f'/home/alex/Desktop/ap-temp-{time.time()}'
+    # be careful
+    fcopy = copy.deepcopy(files)
+    # If two or more files have the same name, rename (copies of) them with a
+    # numeric suffix
+    for f in fcopy:
+        if sum(f.name == g.name for g in fcopy) > 1:
+            # we're operating over references so we can modify the cloned nodes
+            # directly
+            for i, h in enumerate(list(filter(lambda x: f.name == x.name, fcopy))):
+                suffix = f'-{i+1}'
+                #a, b = 
+                h.name = str(Path(h.name).stem+suffix+h.ext)
+                # h.path += suffix
+
+    Path(dirname).mkdir()
+    # Generate the symlinks and open the folder
+    for f in fcopy:
+        Path(os.path.join(dirname, f.name)).symlink_to(f.path)
+    os.system(f'xdg-open {dirname}')
+
+# Add `tag` to `fnode` if its extension matches any of the types in `types`
+def tagfile(fnode, types, tag):
+    if (hasattr(fnode, 'ext') and
+        fnode.ext.lower()[1:] in types.split() and
+        'image' not in fnode.tags): # TODO
+        fnode.tags.append('image')
+
+# Apply some simple tagging rules to file nodes
+def tagfiles(n=0):
+    log('Tagging files')
+    for anode in itertools.islice(data['files'], n):
+        anode.validate()
+        # Rules (i.e., types -> tag)
+        tagfile(anode, 'gif png jpg jpeg tiff webm', 'image')
+        tagfile(anode, 'txt js py sh java css html todo', 'textlike')
+
+        # temporary-ish code to upgrade old file node instances
+        if not hasattr(anode, 'tags'): setattr(anode, 'tags', [])
+        anode.print()
+    save()
+
+def mayhave(obj, attr):
+    if hasattr(obj, attr):
+        return getattr(obj, attr)
+    else:
+        return None
+
+# Apply OCR to (up to `n`) files tagged with "image" and store the resulting
+# text in the filenode
+def extracttext(n=0):
+    log('Running OCR (Tesseract)')
+    for anode in itertools.islice(filter(
+            lambda x:
+                (hasattr(x, 'tags') and
+                'image' in x.tags and
+                not mayhave(x, 'processed')),
+            data['files']), n):
+        #anode.print()
+        try:
+            log('', 1)
+            log(anode.path, 1)
+            imgcontent = pytesseract.image_to_string(Image.open(anode.path))
+            setattr(anode, 'text', imgcontent)
+
+            # Display a slightly more readable version of the extracted content
+            text = imgcontent.replace('\n', '')
+            text = re.sub('[\n ]+', ' ', text, re.M)
+            log(f"Result (condensed): {text}", 1)
+        # Catch occasional invalid images
+        except UnidentifiedImageError as exception:
+            print(exception)
+        anode.processed = True
+    # Update database file
+    save()
+
+with open(dbpath, 'rb') as f:
+    data = pickle.load(f)
+
+args = parser.parse_args()
+#match args.subcommand:
+#    'imf'
+
+if hasattr(args, 'func'):
+    args.func(args)
+
+# TODO: generate folder from tags/types
+# TODO: generate human-readable file manifest
+# TODO: add command/function for general searches
+# TODO: fuzzy string matching
+# TODO: auto-generate appropriate file names ?
+# TODO: store file relations in snapshots/nodes