From 17f26bc6ece9a12263adfd3b1ed145c390e11d22 Mon Sep 17 00:00:00 2001 From: "J. Cliff Dyer" Date: Thu, 1 Dec 2016 11:53:31 -0500 Subject: [PATCH] Add XMLString field type which validates input. TNL-5245 --- .gitignore | 3 +- MANIFEST.in | 1 + requirements.txt | 1 + script/max_pylint_violations | 4 +-- setup.py | 16 ++++++++-- xblock/VERSION.txt | 1 + xblock/__init__.py | 9 ++++-- xblock/fields.py | 61 ++++++++++++++++++++++++++++++------ xblock/test/test_fields.py | 61 ++++++++++++++++++++++++++++-------- 9 files changed, 125 insertions(+), 32 deletions(-) mode change 100644 => 100755 setup.py create mode 100644 xblock/VERSION.txt diff --git a/.gitignore b/.gitignore index 3f3383f92..4cfca3148 100644 --- a/.gitignore +++ b/.gitignore @@ -3,8 +3,9 @@ *.pyc local_settings.py *.egg-info -.treerc .coverage +.tox +.treerc htmlcov *~ diff --git a/MANIFEST.in b/MANIFEST.in index e69de29bb..3c1402c64 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -0,0 +1 @@ +include xblock/VERSION.txt diff --git a/requirements.txt b/requirements.txt index 0e3da7285..2dc9c9abf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ pyyaml lxml webob>=1.6.0 simplejson +six pytz python-dateutil markupsafe diff --git a/script/max_pylint_violations b/script/max_pylint_violations index 099261d78..d935e532d 100755 --- a/script/max_pylint_violations +++ b/script/max_pylint_violations @@ -1,10 +1,10 @@ #!/bin/bash -DEFAULT_MAX=11 +DEFAULT_MAX=8 pylint xblock | tee /tmp/pylint-xblock.log ERR=`grep -E "^[C|R|W|E]:" /tmp/pylint-xblock.log | wc -l` MAX=${1-$DEFAULT_MAX} -if [ $ERR -ge $MAX ]; then +if [ $ERR -gt $MAX ]; then echo "too many pylint violations: $ERR (max is $MAX)" exit 1 else diff --git a/setup.py b/setup.py old mode 100644 new mode 100755 index 0e78473c3..1101c3361 --- a/setup.py +++ b/setup.py @@ -1,9 +1,17 @@ -"""Set up for XBlock""" +#!/usr/bin/env python + +""" +Set up for XBlock +""" + +import os.path from setuptools import setup +version_file = os.path.join(os.path.dirname(__file__), 'xblock/VERSION.txt') + setup( name='XBlock', - version='0.4.12', + version=open(version_file).read().strip(), description='XBlock Core Library', packages=[ 'xblock', @@ -12,14 +20,16 @@ 'xblock.test', 'xblock.test.django', ], + include_package_data=True, install_requires=[ + 'fs', 'lxml', 'markupsafe', 'python-dateutil', 'pytz', 'pyyaml', + 'six', 'webob', - 'fs', ], extras_require={ 'django': ['django-pyfs'] diff --git a/xblock/VERSION.txt b/xblock/VERSION.txt new file mode 100644 index 000000000..1f7716999 --- /dev/null +++ b/xblock/VERSION.txt @@ -0,0 +1 @@ +0.4.13 diff --git a/xblock/__init__.py b/xblock/__init__.py index 49bc3d3c9..27a1cc562 100644 --- a/xblock/__init__.py +++ b/xblock/__init__.py @@ -2,8 +2,7 @@ XBlock Courseware Components """ -# For backwards compatability, provide the XBlockMixin in xblock.fields -# without causing a circular import +import os import warnings import xblock.core import xblock.fields @@ -20,6 +19,10 @@ def __init__(self, *args, **kwargs): super(XBlockMixin, self).__init__(*args, **kwargs) +# For backwards compatability, provide the XBlockMixin in xblock.fields +# without causing a circular import xblock.fields.XBlockMixin = XBlockMixin -__version__ = "0.4.7" +VERSION_FILE = os.path.join(os.path.dirname(__file__), 'VERSION.txt') + +__version__ = open(VERSION_FILE).read().strip() diff --git a/xblock/fields.py b/xblock/fields.py index a210c7730..d270ee340 100644 --- a/xblock/fields.py +++ b/xblock/fields.py @@ -9,24 +9,26 @@ from collections import namedtuple import copy import datetime -import dateutil.parser import hashlib import itertools -import pytz +import json import traceback +import unicodedata import warnings -import json + +import dateutil.parser +from lxml import etree +import pytz +import six import yaml -import unicodedata from xblock.internal import Nameable - # __all__ controls what classes end up in the docs, and in what order. __all__ = [ 'BlockScope', 'UserScope', 'Scope', 'ScopeIds', 'Field', - 'Boolean', 'Dict', 'Float', 'Integer', 'List', 'Set', 'String', + 'Boolean', 'Dict', 'Float', 'Integer', 'List', 'Set', 'String', 'XMLString', 'XBlockMixin', ] @@ -832,6 +834,19 @@ class String(JSONField): """ MUTABLE = False + VALID_CONTROLS = {u'\n', u'\r', u'\t'} + + def _valid_unichar(self, character): + """ + Strip invalid control characters from a unicode text object. + """ + return unicodedata.category(character)[0] != u'C' or character in self.VALID_CONTROLS + + def _valid_bytechar(self, character): + """ + Strip invalid control characters from a bytestring object. + """ + return ord(character) >= 32 or character.decode('ascii', errors='replace') in self.VALID_CONTROLS def _sanitize(self, value): """ @@ -839,10 +854,10 @@ def _sanitize(self, value): https://www.w3.org/TR/xml/#charsets Leave all other characters. """ - if isinstance(value, unicode): - new_value = u''.join(ch for ch in value if unicodedata.category(ch)[0] != u'C' or ch in (u'\n', u'\r', u'\t')) - elif isinstance(value, str): - new_value = ''.join(ch for ch in value if ord(ch) >= 32 or ch in ('\n', '\r', '\t')) + if isinstance(value, six.text_type): + new_value = u''.join(ch for ch in value if self._valid_unichar(ch)) + elif isinstance(value, six.binary_type): + new_value = b''.join(ch for ch in value if self._valid_bytechar(ch)) else: return value # The new string will be equivalent to the original string if no control characters are present. @@ -871,6 +886,32 @@ def none_to_xml(self): enforce_type = from_json +class XMLString(String): + """ + A field class for representing an XML string. + + The value, as loaded or enforced, can either be None or a basestring instance. + If it is a basestring instance, it must be valid XML. If it is not valid XML, + an lxml.etree.XMLSyntaxError will be raised. + """ + + def to_json(self, value): + """ + Serialize the data, ensuring that it is valid XML (or None). + + Raises an lxml.etree.XMLSyntaxError if it is a basestring but not valid + XML. + """ + if self._enable_enforce_type: + value = self.enforce_type(value) + return super(XMLString, self).to_json(value) + + def enforce_type(self, value): + if value is not None: + etree.XML(value) + return value + + class DateTime(JSONField): """ A field for representing a datetime. diff --git a/xblock/test/test_fields.py b/xblock/test/test_fields.py index 25325a6d4..c10425d06 100644 --- a/xblock/test/test_fields.py +++ b/xblock/test/test_fields.py @@ -2,34 +2,31 @@ Tests for classes extending Field. """ -# Allow accessing protected members for testing purposes -# pylint: disable=W0212 - -from mock import Mock -import unittest +# pylint: disable=abstract-class-instantiated, protected-access +from contextlib import contextmanager import datetime as dt -import pytz -import warnings +import itertools import math import textwrap -import itertools -from contextlib import contextmanager +import unittest +import warnings import ddt +from lxml import etree +from mock import Mock +import pytz from xblock.core import XBlock, Scope from xblock.field_data import DictFieldData from xblock.fields import ( - Any, Boolean, Dict, Field, Float, - Integer, List, Set, String, DateTime, Reference, ReferenceList, Sentinel, - UNIQUE_ID + Any, Boolean, Dict, Field, Float, Integer, List, Set, String, XMLString, DateTime, Reference, ReferenceList, + ScopeIds, Sentinel, UNIQUE_ID, scope_key, ) from xblock.test.tools import ( assert_equals, assert_not_equals, assert_in, assert_not_in, assert_false, TestRuntime ) -from xblock.fields import scope_key, ScopeIds class FieldTest(unittest.TestCase): @@ -244,6 +241,44 @@ def test_control_characters_filtered(self): self.assertJSONOrSetGetEquals(u'\n\r\t', u'\n\v\r\b\t') +@ddt.ddt +class XMLStringTest(FieldTest): + """ + Tests the XMLString Field. + """ + FIELD_TO_TEST = XMLString + + @ddt.data( + u'Hello', + u'Hello', + u'', + '', + '\xc8\x88', + None + ) + def test_json_equals(self, input_text): + xml_string = self.FIELD_TO_TEST(enforce_type=True) + self.assertEqual(xml_string.to_json(input_text), input_text) + + @ddt.data( + 'text', + '', + '', + ) + def test_bad_xml(self, input_text): + # pylint: disable=no-member + xml_string = self.FIELD_TO_TEST(enforce_type=True) + self.assertRaises(etree.XMLSyntaxError, xml_string.to_json, input_text) + unchecked_xml_string = self.FIELD_TO_TEST(enforce_type=False) + self.assertEqual(unchecked_xml_string.to_json(input_text), input_text) + + @ddt.ddt class DateTest(FieldTest): """