diff --git a/test/unit_tests/test_unicode_helpers.py b/test/unit_tests/test_unicode_helpers.py
new file mode 100644
index 00000000..9b65c0ae
--- /dev/null
+++ b/test/unit_tests/test_unicode_helpers.py
@@ -0,0 +1,28 @@
+from nose.tools import raises, assert_equals, nottest
+
+from totalimpact import unicode_helpers
+
+
+
+class TestUnicodeHelpers():
+
+    def setUp(self):
+        pass
+
+    def test_remove_nonprinting_characters(self):
+        unicode_input = u"hi"
+        response = unicode_helpers.remove_nonprinting_characters(unicode_input)
+        expected = u"hi"
+        assert_equals(response, expected)
+
+    def test_remove_nonprinting_characters(self):
+        unicode_input = '0000-0001-8907-4150\xe2\x80\x8e' # a nonprinting character at the end
+        response = unicode_helpers.remove_nonprinting_characters(unicode_input)
+        expected = "0000-0001-8907-4150"
+        assert_equals(response, expected)
+
+    def test_remove_nonprinting_characters_unicode_input(self):
+        unicode_input = u'0000-0001-8907-4150\u200e'  # a nonprinting character at the end
+        response = unicode_helpers.remove_nonprinting_characters(unicode_input)
+        expected = u"0000-0001-8907-4150"
+        assert_equals(response, expected)
diff --git a/test/unit_tests/test_views.py b/test/unit_tests/test_views.py
index f74e883f..6f6335e2 100644
--- a/test/unit_tests/test_views.py
+++ b/test/unit_tests/test_views.py
@@ -219,6 +219,14 @@ def test_memberitems_get(self):
         assert_equals(json.loads(response.data)["memberitems"], GOLD_MEMBER_ITEM_CONTENT)
         assert_equals(response.mimetype, "application/json")
 
+    def test_memberitems_get_with_nonprinting_character(self):        
+        response = self.client.get(u'/provider/dryad/memberitems/Otto\u200e%2C%20Sarah%20P.?method=sync')
+        print response
+        print response.data
+        assert_equals(response.status_code, 200)
+        assert_equals(json.loads(response.data)["memberitems"], GOLD_MEMBER_ITEM_CONTENT)
+        assert_equals(response.mimetype, "application/json")
+
     def test_file_parsing(self):
         datadir = os.path.join(os.path.split(__file__)[0], "../../extras/sample_provider_pages/bibtex")
         path = os.path.join(datadir, "Vision.bib")
diff --git a/totalimpact/item.py b/totalimpact/item.py
index 173561c6..b432ecec 100644
--- a/totalimpact/item.py
+++ b/totalimpact/item.py
@@ -4,6 +4,8 @@
 
 from totalimpact.providers.provider import ProviderFactory
 from totalimpact.providers.provider import ProviderTimeout, ProviderServerError
+from totalimpact import unicode_helpers
+
 from totalimpact import default_settings
 from totalimpact.utils import Retry
 
@@ -13,11 +15,6 @@
 import logging
 logger = logging.getLogger('ti.item')
 
-# setup to remove control characters from received IDs
-# from http://stackoverflow.com/questions/92438/stripping-non-printable-characters-from-a-string-in-python
-control_chars = ''.join(map(unichr, range(0,32) + range(127,160)))
-control_char_re = re.compile('[%s]' % re.escape(control_chars))
-
 class NotAuthenticatedError(Exception):
     pass
 
@@ -36,9 +33,8 @@ def largest_value_that_is_less_than_or_equal_to(target, collection):
 
 def clean_id(nid):
     try:
-        nid = control_char_re.sub('', nid)
-        nid = nid.replace(u'\u200b', "")
         nid = nid.strip()
+        nid = unicode_helpers.remove_nonprinting_characters(nid)
     except TypeError:
         #isn't a string.  That's ok, might be biblio
         pass
@@ -399,6 +395,8 @@ def create_or_find_items_from_aliases(clean_aliases, myredis, mydao):
     new_items = []
     for alias in clean_aliases:
         (namespace, nid) = alias
+        namespace = clean_id(namespace)
+        nid = clean_id(nid)
         existing_tiid = get_tiid_by_alias(namespace, nid, mydao)
         if existing_tiid:
             tiids.append(existing_tiid)
@@ -411,8 +409,6 @@ def create_or_find_items_from_aliases(clean_aliases, myredis, mydao):
                     alias=alias
                 ))
             item = make()
-            namespace = clean_id(namespace)
-            nid = clean_id(nid)
             item["aliases"][namespace] = [nid]
             item["aliases"] = canonical_aliases(item["aliases"])
 
diff --git a/totalimpact/providers/bibtex.py b/totalimpact/providers/bibtex.py
index 60eec623..f7fb6ed2 100644
--- a/totalimpact/providers/bibtex.py
+++ b/totalimpact/providers/bibtex.py
@@ -7,7 +7,7 @@
 
 from totalimpact.providers import provider
 from totalimpact.providers.provider import Provider, ProviderContentMalformedError, ProviderTimeout, ProviderServerError
-from totalimpact import utils 
+from totalimpact import unicode_helpers 
 from totalimpact.providers import bibtex_lookup
 
 import logging
@@ -42,7 +42,7 @@ def __init__(self):
         self.bibtex_to_unicode = build_bibtex_to_unicode(bibtex_lookup.unicode_to_latex)
 
     def _to_unicode(self, text):
-        text = utils.to_unicode_or_bust(text)
+        text = unicode_helpers.to_unicode_or_bust(text)
         if "{" in text:
             text = text.replace("\\", "")
             for i, j in self.bibtex_to_unicode.iteritems():
diff --git a/totalimpact/providers/webpage.py b/totalimpact/providers/webpage.py
index 4e1b1ad3..36fc41ae 100644
--- a/totalimpact/providers/webpage.py
+++ b/totalimpact/providers/webpage.py
@@ -1,6 +1,6 @@
 from totalimpact.providers import provider
 from totalimpact.providers.provider import Provider, ProviderContentMalformedError
-from totalimpact import utils
+from totalimpact import unicode_helpers
 
 import lxml.html
 import re
@@ -89,7 +89,7 @@ def _extract_biblio(self, page, id=None):
         if not page:
             return biblio_dict
         
-        unicode_page = utils.to_unicode_or_bust(page)
+        unicode_page = unicode_helpers.to_unicode_or_bust(page)
         try:
             parsed_html = lxml.html.document_fromstring(unicode_page)
 
diff --git a/totalimpact/unicode_helpers.py b/totalimpact/unicode_helpers.py
new file mode 100644
index 00000000..53074e22
--- /dev/null
+++ b/totalimpact/unicode_helpers.py
@@ -0,0 +1,31 @@
+import unicodedata
+import logging
+
+logger = logging.getLogger('ti.unicode_helpers')
+
+#from http://farmdev.com/talks/unicode/
+def to_unicode_or_bust(obj, encoding='utf-8'):
+    if isinstance(obj, basestring):
+        if not isinstance(obj, unicode):
+            obj = unicode(obj, encoding)
+    return obj
+
+
+def remove_nonprinting_characters(input, encoding='utf-8'):
+    input_was_unicode = True
+    if isinstance(input, basestring):
+        if not isinstance(input, unicode):
+            input_was_unicode = False
+
+    unicode_input = to_unicode_or_bust(input)
+
+    # see http://www.fileformat.info/info/unicode/category/index.htm
+    char_classes_to_remove = ["C", "M", "Z"]
+
+    response = u''.join(c for c in unicode_input if unicodedata.category(c)[0] not in char_classes_to_remove)
+
+    if not input_was_unicode:
+        response = response.encode(encoding)
+        
+    return response
+
diff --git a/totalimpact/utils.py b/totalimpact/utils.py
index 99953ba5..afec7e18 100644
--- a/totalimpact/utils.py
+++ b/totalimpact/utils.py
@@ -41,11 +41,3 @@ def fn(*args, **kwargs):
             return False # fail silently...
         return fn
 
-
-#from http://farmdev.com/talks/unicode/
-def to_unicode_or_bust(obj, encoding='utf-8'):
-     if isinstance(obj, basestring):
-         if not isinstance(obj, unicode):
-             obj = unicode(obj, encoding)
-     return obj
-
diff --git a/totalimpact/views.py b/totalimpact/views.py
index 124eb900..8be8f50c 100755
--- a/totalimpact/views.py
+++ b/totalimpact/views.py
@@ -12,6 +12,7 @@
 from totalimpact import item as item_module
 from totalimpact.models import MemberItems, UserFactory, NotAuthenticatedError
 from totalimpact.providers.provider import ProviderFactory, ProviderItemNotFoundError, ProviderError, ProviderServerError, ProviderTimeout
+from totalimpact import unicode_helpers
 from totalimpact import default_settings
 import logging
 
@@ -325,6 +326,7 @@ def provider_memberitems_get(provider_name, query):
     """
     Gets aliases associated with a query from a given provider.
     """
+    query = unicode_helpers.remove_nonprinting_characters(query)
 
     try:
         provider = ProviderFactory.get_provider(provider_name)
@@ -488,6 +490,19 @@ def delete_items(cid=""):
     return resp
 
 
+def get_alias_strings(aliases):
+    alias_strings = []
+    for (namespace, nid) in aliases:
+        namespace = item_module.clean_id(namespace)
+        nid = item_module.clean_id(nid)
+        try:
+            alias_strings += [namespace+":"+nid]
+        except TypeError:
+            # jsonify the biblio dicts
+            alias_strings += [namespace+":"+json.dumps(nid)]
+    return alias_strings   
+
+
 @app.route("/collection/<cid>/items", methods=["PUT"])
 @app.route("/v1/collection/<cid>/items", methods=["PUT"])
 def put_collection(cid=""):
@@ -499,12 +514,7 @@ def put_collection(cid=""):
 
     try:
         aliases = request.json["aliases"]
-        try:
-            alias_strings = [namespace+":"+nid for (namespace, nid) in aliases]
-        except TypeError:
-            # jsonify the biblio dicts
-            alias_strings = [namespace+":"+json.dumps(nid) for (namespace, nid) in aliases]
-
+        alias_strings = get_alias_strings(aliases)
         (tiids, new_items) = item_module.create_or_update_items_from_aliases(
             aliases, myredis, mydao)
 
@@ -531,6 +541,7 @@ def put_collection(cid=""):
     return resp
 
 
+
 """ Updates all the items in a given collection.
 """
 @app.route("/collection/<cid>", methods=["POST"])
@@ -555,7 +566,6 @@ def collection_update(cid=""):
     return resp
 
 
-
 # creates a collection with aliases
 @app.route('/collection', methods=['POST'])
 @app.route('/v1/collection', methods=['POST'])
@@ -574,9 +584,6 @@ def collection_create():
         coll["title"] = request.json["title"]
         aliases = request.json["aliases"]
         (tiids, new_items) = item_module.create_or_update_items_from_aliases(aliases, myredis, mydao)
-        for item in new_items:
-            namespaces = item["aliases"].keys()
-
         if not tiids:
             abort_custom(404, "POST /collection requires a list of [namespace, id] pairs.")
     except (AttributeError, TypeError):
@@ -587,14 +594,10 @@ def collection_create():
                 json=str(request.json)))
         abort_custom(404, "Missing arguments.")
 
-    try:
-        alias_strings = aliases_strings = [namespace+":"+nid for (namespace, nid) in aliases]
-    except TypeError:
-        # jsonify the biblio dicts
-        alias_strings = aliases_strings = [namespace+":"+json.dumps(nid) for (namespace, nid) in aliases]
+    alias_strings = get_alias_strings(aliases)
 
     # save dict of alias:tiid
-    coll["alias_tiids"] = dict(zip(aliases_strings, tiids))
+    coll["alias_tiids"] = dict(zip(alias_strings, tiids))
 
     logger.info(json.dumps(coll, sort_keys=True, indent=4))