From ff6edf3abee8d845adbb27d12b1a2c9ebacb6979 Mon Sep 17 00:00:00 2001 From: rhelins Date: Sat, 24 Mar 2018 22:25:35 -0600 Subject: [PATCH] Better support for older versions of Python. Replace pytest with unittest. Corrections to tests. Additional documentation. --- README.md | 100 +++++++++-- bientropy/test_suite.py | 355 ++++++++++++++++++++++++---------------- ext/bientropy.c | 39 +++-- ext/bientropymodule.c | 3 +- setup.cfg | 11 +- setup.py | 54 ++++-- 6 files changed, 372 insertions(+), 190 deletions(-) diff --git a/README.md b/README.md index 254452b..b7feab0 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,11 @@ module for convenience. Performance ----------- +According to the paper, the "BiEntropy algorithm evaluates the order and +disorder of a binary string of length n in O(n^2) time using O(n) memory." In +other words, the run time has quadratic growth and the memory requirement has +linear growth with respect to the string length. + The metrics are implemented in Python using the 'bitstring' package for handling arbitrary length binary strings and in native C using the GNU Multiple Precision (GMP) arithmetic library. @@ -53,23 +58,56 @@ length of the input in bytes. Requirements ------------ -This package is tested with Python versions 2.7 and 3.6. +This package is tested with Python versions 2.7, 3.4, 3.5 and 3.6. Installation: -* Python http://python.org/ -* GCC http://gcc.gnu.org/ -* libgmp http://gmplib.org/ +* Python http://python.org/ (>= 2.7 or >= 3.4) * bitstring http://pythonhosted.org/bitstring/ * NumPy http://numpy.org/ +Compiling: +* GCC http://gcc.gnu.org/ on Linux +* MSVC 9 if using Python 2.7 on Windows + * https://www.microsoft.com/EN-US/DOWNLOAD/confirmation.aspx?id=44266 +* MSVC 14 if using Python 3.x on Windows + * http://landinghub.visualstudio.com/visual-cpp-build-tools +* GMP http://gmplib.org/ or MPIR http://mpir.org/ on Windows + For running tests: -* pytest http://pytest.org/ +* mock https://pypi.org/project/mock/ if using Python 2.7 -Installation ------------- +Install from pip +---------------- + +This package includes a C extension which has to be compiled for each platform. +Python wheels include compiled binary code and allow the extension to be +installed without requiring a compiler. + +`pip >= 1.4` with `setuptools >= 0.8` will use a wheel if there is one available +for the target platform: +``` +pip install --user BiEntropy +``` + +Once installed, the tests should be run with the command: +``` +python -m bientropy.test_suite +``` + +A list of available wheel files is available at: +https://pypi.org/project/BiEntropy/#files + + +Install from Source +------------------- + +The source code for the `bientropy` package can be cloned or downloaded from: +* GitHub: https://github.com/sandialabs/bientropy +* PyPI: https://pypi.org/project/BiEntropy -You will need to install the GMP library if not installing from a wheel. +The [GMP library](http://gmplib.org/) and headers need to be installed before +compiling. On Debian/Ubuntu: ``` @@ -83,21 +121,55 @@ yum install gmp-devel Then, use `setup.py` to compile and install the package: ``` -python setup.py install +python setup.py install --user ``` -Optionally, you can run the unit tests with the following command: +Once installed, the tests should be run with the command: ``` -python setup.py test +python -m bientropy.test_suite ``` -You can test your installation with this command: + +Compiling on Windows +-------------------- + +Compiling GMP on Microsoft Windows is only supported under Cygwin, MinGW or +DJGPP. However, this package can be compiled with MPIR, a fork of GMP, on +Windows. The source for MPIR is available at http://mpir.org/ +The `setup.py` script expects the header files, library files and DLL to be +available under `mpir/dll/x64/Release`. + +A compiled distribution of the MPIR libray was also available at: +http://www.holoborodko.com/pavel/mpfr/#download +Download the `MPFR-MPIR-x86-x64-MSVC2010.zip` file and extract `mpir` from the +ZIP file to this directory. + +Once MPIR is ready, proceed as usual. +``` +python setup.py install --user +``` + +After installing, the tests should be run with the command: +``` +python -m bientropy.test_suite +``` + + +Included Scripts +---------------- + +After installing, a demonstration can be run with this command: ``` python -m bientropy.demo ``` +This file (`bientropy/demo.py`) also serves as a good example for using +the package. -This file (`bientropy/demo.py`) also serves as a good starting point for using -the code. +The same benchmark script used to generate the data shown in the table and plot +above is also included. It can be run with: +``` +python -m bientropy.benchmark +``` Development diff --git a/bientropy/test_suite.py b/bientropy/test_suite.py index b4a31df..ee7ec8e 100644 --- a/bientropy/test_suite.py +++ b/bientropy/test_suite.py @@ -26,10 +26,11 @@ ''' from __future__ import print_function import os +import sys from multiprocessing import Pool, cpu_count +from itertools import repeat -from pytest import approx, raises - +from unittest import TestCase, main try: from unittest.mock import patch, call, MagicMock PY3 = True @@ -45,23 +46,15 @@ from .testvectors import BIENTROPY_2BITS, BIENTROPY_4BITS, ORDERING_4BIT, \ BIENTROPY_8BITS, TBIENTROPY_8BITS, PRIMES + def round_fun(x): - """Common numeric round function""" + '''Common numeric round function''' return round(x, 2) -def test_bin_deriv_k(): - assert bin_deriv_k(Bits('0b01010101'), 1) == Bits('0b1111111') - assert bin_deriv_k(Bits('0b00010001'), 3) == Bits('0b11111') - assert bin_deriv_k(Bits('0b00011111'), 6) == Bits('0b01') - -def test_p_k(): - assert p_k(Bits('0b01010101'), 1) == 1.0 - assert p_k(Bits('0b00010001'), 3) == 1.0 - assert p_k(Bits('0b00011111'), 6) == 0.5 def check_ordering_4bit(fun, sols): - """Check that the 4-bit ordering sequence is the same - as the one found in the paper""" + '''Check that the 4-bit ordering sequence is the same + as the one found in the paper for a particular function''' for x in ORDERING_4BIT: for y in ORDERING_4BIT: a = (x<<4) + y @@ -72,148 +65,226 @@ def check_ordering_4bit(fun, sols): Bits(uint=x, length=4).bin, Bits(uint=y, length=4).bin) assert sols[check_key] == round_fun(r) - print('%s(%s): %.2f' % (fun.__name__, b.bin, r)) - -def test_cbientropy_2bit(): - for b, r in BIENTROPY_2BITS: - assert round_fun(cbientropy.bien(b)) == r - -def test_cbientropy_4bit(): - for b, r in BIENTROPY_4BITS: - assert round_fun(cbientropy.bien(b)) == r - -def test_pybientropy_2bit(): - "Check the Python BiEn implementation with 2-bit strings" - for b, r in BIENTROPY_2BITS: - assert round_fun(pybientropy.bien(b)) == r - -def test_pybientropy_4bit(): - "Check the Python BiEn implementation with 4-bit strings" - for b, r in BIENTROPY_4BITS: - assert round_fun(pybientropy.bien(b)) == r - -def test_cbientropy(): - "Check the C BiEn implementation with 8-bit strings" - check_ordering_4bit(cbientropy.bien, BIENTROPY_8BITS) - -def test_pybientropy(): - "Check the Python BiEn implementation with 8-bit strings" - check_ordering_4bit(pybientropy.bien, BIENTROPY_8BITS) - -def test_ctbientropy(): - "Check the C TBiEn implementation with 8-bit strings" - check_ordering_4bit(cbientropy.tbien, TBIENTROPY_8BITS) - -def test_pytbientropy(): - "Check the Python TBiEn implementation with 8-bit strings" - check_ordering_4bit(pybientropy.tbien, TBIENTROPY_8BITS) - -def test_c_bytes_vs_obj(max_s_len=128): - for s_len in range(1, max_s_len): - ti = os.urandom(s_len) - assert cbientropy.bien(Bits(bytes=ti)) == cbientropy.bien(ti) - assert cbientropy.tbien(Bits(bytes=ti)) == cbientropy.tbien(ti) - -def test_odd_sizes(): - for prime in PRIMES: - rand_s = Bits(bytes=os.urandom(int(prime/8+1)))[:prime] - assert len(rand_s) == prime - assert cbientropy.bien(rand_s) == approx(pybientropy.bien(rand_s)) - assert cbientropy.tbien(rand_s) == approx(pybientropy.tbien(rand_s)) - - -def run_large_strings(index, s_len=128, tolerance=0.001): - ti = os.urandom(s_len) + + +def run_large_byte_strings(s_byte_len=128, tolerance=0.001): + ti = os.urandom(s_byte_len) pybien = pybientropy.bien(Bits(bytes=ti)) cbien = cbientropy.bien(ti) assert abs(pybien - cbien) < tolerance pytbien = pybientropy.tbien(Bits(bytes=ti)) ctbien = cbientropy.tbien(ti) assert abs(pytbien - ctbien) < tolerance + return ti, cbien, ctbien + + + +class BiEntropyTests(TestCase): + 'Test the C and Python implementaions' + + def test_bin_deriv_k(self): + 'Check the k^th binary derivate examples from the paper' + self.assertEqual(bin_deriv_k(Bits('0b01010101'), 1), Bits('0b1111111')) + self.assertEqual(bin_deriv_k(Bits('0b00010001'), 3), Bits('0b11111')) + self.assertEqual(bin_deriv_k(Bits('0b00011111'), 6), Bits('0b01')) + + + def test_p_k(self): + 'Check the p_k(x) examples from the paper' + self.assertEqual(p_k(Bits('0b01010101'), 1), 1.0) + self.assertEqual(p_k(Bits('0b00010001'), 3), 1.0) + self.assertEqual(p_k(Bits('0b00011111'), 6), 0.5) + -def test_large_strings(num_s=2**6, s_len=256, tolerance=0.001): - """ - Ensure that the Python and C implementations for BiEn and TBiEn match for - longer bit strings. - """ - p = Pool(cpu_count()) - p.map(run_large_strings, range(num_s)) - p.close() - -@patch('bientropy.pybientropy.DEBUG', new=True) -def test_pytbien_debug(): - with patch('builtins.print' if PY3 else '__builtin__.print') as mock_print: - pybientropy.tbien(Bits('0b1011')) - mock_print.assert_has_calls([ - call(' 1011 3 4 0.75 0.25 0.31 0.50 0.81 0 1.00 0.81'), - call(' 110 2 3 0.67 0.33 0.39 0.53 0.92 1 1.58 1.46'), - call(' 01 1 2 0.50 0.50 0.50 0.50 1.00 2 2.00 2.00'), - call('4.585'), - call('4.267') - ]) - -@patch('bientropy.pybientropy.DEBUG', new=True) -def test_pybien_debug(): - with patch('builtins.print' if PY3 else '__builtin__.print') as mock_print: - pybientropy.bien(Bits('0b1011')) - mock_print.assert_has_calls([ - call(' 1011 3 4 0.75 0.25 0.31 0.50 0.81 0 1 0.81'), - call(' 110 2 3 0.67 0.33 0.39 0.53 0.92 1 2 1.84'), - call(' 01 1 2 0.50 0.50 0.50 0.50 1.00 2 4 4.00'), - call('6.648') - ]) - -def test_error_bad_tobytes(): - m_obj = MagicMock() - m_obj.tobytes.side_effect = Exception('bad') - for fun in [cbientropy.bien, cbientropy.bien]: - m_obj.reset_mock() - with raises(Exception) as e: - fun(m_obj) - m_obj.tobytes.assert_called_once() - - -def test_error_bad_tobytes_retval(): - m_obj = MagicMock() - m_obj.tobytes.return_value = 42 - for fun in [cbientropy.bien, cbientropy.tbien]: - m_obj.reset_mock() - with raises(ValueError) as e: - fun(m_obj) - m_obj.tobytes.assert_called_once() - - -def test_error_bad_len(): - ''' - The value of len() should be in bits and not bytes - ''' - m_obj = MagicMock() - m_obj.tobytes.return_value = b'\xde\xad\xbe\xef' - for m_obj.__len__.return_value in [4, 23, 33]: + def test_cbientropy_2bit(self): + 'Check the C BiEn function against the 2-bit strings' + for b, r in BIENTROPY_2BITS: + self.assertEqual(round_fun(cbientropy.bien(b)), r) + + + def test_cbientropy_4bit(self): + 'Check the C BiEn function against the 4-bit strings' + for b, r in BIENTROPY_4BITS: + self.assertEqual(round_fun(cbientropy.bien(b)), r) + + + def test_pybientropy_2bit(self): + 'Check the Python BiEn implementation with 2-bit strings' + for b, r in BIENTROPY_2BITS: + self.assertEqual(round_fun(pybientropy.bien(b)), r) + + + def test_pybientropy_4bit(self): + 'Check the Python BiEn implementation with 4-bit strings' + for b, r in BIENTROPY_4BITS: + self.assertEqual(round_fun(pybientropy.bien(b)), r) + + + def test_cbientropy(self): + 'Check the C BiEn implementation with 8-bit strings' + check_ordering_4bit(cbientropy.bien, BIENTROPY_8BITS) + + + def test_pybientropy(self): + 'Check the Python BiEn implementation with 8-bit strings' + check_ordering_4bit(pybientropy.bien, BIENTROPY_8BITS) + + + def test_ctbientropy(self): + 'Check the C TBiEn implementation with 8-bit strings' + check_ordering_4bit(cbientropy.tbien, TBIENTROPY_8BITS) + + + def test_pytbientropy(self): + 'Check the Python TBiEn implementation with 8-bit strings' + check_ordering_4bit(pybientropy.tbien, TBIENTROPY_8BITS) + + + def test_c_bytes_vs_obj(self, max_s_len=128): + ''' + Check that the C Bien and TBiEn compute the same result for both string + and bitstring input types + ''' + for s_len in range(1, max_s_len): + ti = os.urandom(s_len) + self.assertEqual(cbientropy.bien(Bits(bytes=ti)), + cbientropy.bien(ti)) + self.assertEqual(cbientropy.tbien(Bits(bytes=ti)), + cbientropy.tbien(ti)) + + + def test_odd_sizes(self): + ''' + Check that the Python and C implementations for BiEn and TBiEn match + for bit strings with prime-number lengths. + ''' + input_set = set() + for prime in PRIMES: + rand_s = Bits(bytes=os.urandom(int(prime/8+1)))[:prime] + input_set = input_set.union([rand_s]) + self.assertEqual(len(rand_s), prime) + self.assertAlmostEqual(cbientropy.bien(rand_s), + pybientropy.bien(rand_s)) + self.assertAlmostEqual(cbientropy.tbien(rand_s), + pybientropy.tbien(rand_s)) + + # check that all the strings are distinct + self.assertEqual(len(input_set), len(PRIMES)) + + + def test_large_strings(self, num_s=2**4, s_byte_len=256): + ''' + Check that the Python and C implementations for BiEn and TBiEn match + for longer bit strings. + ''' + if sys.platform != 'win32': + pool = Pool(cpu_count()) + map_fun = pool.map + else: + map_fun = map + + results = map_fun(run_large_byte_strings, repeat(s_byte_len, num_s)) + if map_fun != map: + pool.close() + # check that all the strings are distinct + self.assertEqual( + len(set([result[0] for result in results])), + num_s) + + + @patch('bientropy.pybientropy.DEBUG', new=True) + def test_pytbien_debug(self): + 'Check that Python TBiEn has the correct progression of state' + with patch('builtins.print' \ + if PY3 else '__builtin__.print') as mock_print: + pybientropy.tbien(Bits('0b1011')) + mock_print.assert_has_calls([ + call(' 1011 3 4 0.75 0.25 0.31 0.50 0.81 0 1.00 0.81'), + call(' 110 2 3 0.67 0.33 0.39 0.53 0.92 1 1.58 1.46'), + call(' 01 1 2 0.50 0.50 0.50 0.50 1.00 2 2.00 2.00'), + call('4.585'), + call('4.267') + ]) + + + @patch('bientropy.pybientropy.DEBUG', new=True) + def test_pybien_debug(self): + 'Check that Python BiEn has the correct progression of state' + with patch('builtins.print' \ + if PY3 else '__builtin__.print') as mock_print: + pybientropy.bien(Bits('0b1011')) + mock_print.assert_has_calls([ + call(' 1011 3 4 0.75 0.25 0.31 0.50 0.81 0 1 0.81'), + call(' 110 2 3 0.67 0.33 0.39 0.53 0.92 1 2 1.84'), + call(' 01 1 2 0.50 0.50 0.50 0.50 1.00 2 4 4.00'), + call('6.648') + ]) + + + def test_error_bad_tobytes(self): + ''' + Check that C BiEn and TBiEn behave correctly when the tobytes() method + of the input object raises an exception + ''' + m_obj = MagicMock() + m_obj.tobytes.side_effect = Exception('bad') for fun in [cbientropy.bien, cbientropy.tbien]: m_obj.reset_mock() - with raises(TypeError) as e: + with self.assertRaises(Exception): fun(m_obj) - m_obj.__len__.assert_called() + m_obj.tobytes.assert_called_once_with() -def test_error_bad_obj(): - m_obj = 1984 # not a string and does not have a tobytes() method - for fun in [cbientropy.bien, cbientropy.tbien]: - with raises(TypeError) as e: - fun(m_obj) + def test_error_bad_tobytes_retval(self): + ''' + Check that C BiEn and TBiEn behave correctly when the tobytes() method + of the input object returns something other than a byte string + ''' + m_obj = MagicMock() + m_obj.tobytes.return_value = 42 + for fun in [cbientropy.bien, cbientropy.tbien]: + m_obj.reset_mock() + with self.assertRaises(ValueError): + fun(m_obj) + m_obj.tobytes.assert_called_once_with() + + + def test_error_bad_len(self): + ''' + Check that C BiEn and TBiEn enforce that value of the len() method of + the input object is in bits and not bytes + ''' + m_obj = MagicMock() + m_obj.tobytes.return_value = b'\xde\xad\xbe\xef' + for m_obj.__len__.return_value in [4, 23, 33]: + for fun in [cbientropy.bien, cbientropy.tbien]: + m_obj.reset_mock() + with self.assertRaises(TypeError): + fun(m_obj) + m_obj.__len__.assert_called_with() + + + def test_error_bad_obj(self): + ''' + Check that C BiEn and TBiEn raise an exception if the input object is + not a byte string and does not have a len() method + ''' + m_obj = 1984 # not a string and does not have a tobytes() method + for fun in [cbientropy.bien, cbientropy.tbien]: + with self.assertRaises(TypeError): + fun(m_obj) -def test_error_empty(): - for empty in [b'', Bits()]: - for fun in [cbientropy.bien, cbientropy.tbien]: - with raises(ValueError) as e: - fun(empty) + def test_error_empty(self): + ''' + Check that C BiEn and TBiEn raise an exception if the input object has + a zero length + ''' + for empty in [b'', Bits()]: + for fun in [cbientropy.bien, cbientropy.tbien]: + with self.assertRaises(ValueError): + fun(empty) if __name__ == '__main__': - print("""Trying running with: - $ pytest -or: - $ python -m pytest -""") + main() diff --git a/ext/bientropy.c b/ext/bientropy.c index 6410aa8..86f3396 100644 --- a/ext/bientropy.c +++ b/ext/bientropy.c @@ -24,8 +24,17 @@ #include #include +#define _USE_MATH_DEFINES // should define M_LOG2E #include +#if (_MSC_VER == 1500) +// support for VC9/Visual C++ 2008 +static double log2(double n) +{ + return log(n) * M_LOG2E; +} +#endif + #include "bientropy.h" /** brief mpz_bin_d - The binary derivative is computed using the exclusive or @@ -38,9 +47,9 @@ mpz_bin mpz_bin_d (mpz_bin x) { mpz_bin r; - mpz_init(r.i); - mpz_t a, b; + + mpz_init(r.i); mpz_init(a); mpz_init(b); @@ -97,17 +106,17 @@ mpz_bin mpz_bin_d_k (mpz_bin x, unsigned k) */ double bien(mpz_bin s) { - mpf_t t, t_k; + mpf_t t, t_k, result; + mpz_bin s_k, s_k_new; + unsigned ones, k; + double p, e, g, retval; + mpf_init (t); mpf_init (t_k); - mpz_bin s_k, s_k_new; mpz_init (s_k.i); mpz_set(s_k.i, s.i); s_k.len = s.len; - unsigned ones; - double p, e, g; - unsigned k; for (k = 0; k=3.1.5', 'numpy>=1.11.2', ] -test_requirements = [ - 'pytest' - ] +test_requirements = [] if int(platform.python_version_tuple()[0]) < 3: + requirements.append('mock') test_requirements.append('mock') +ext_include_dirs = [] +ext_library_dirs = [] +ext_libs = ['gmp'] +package_data = {} + +if sys.platform == 'win32': + mpir_dir = 'mpir/dll/x64/Release' + if not os.path.isdir(mpir_dir): + raise Exception('''This package can be compiled with MPIR on Windows. +The source for MPIR is available at http://mpir.org/ +The header files, library files and DLL are expected under %s + +A compiled distribution of MPIR was also available at: +http://www.holoborodko.com/pavel/mpfr/#download +Download 'MPFR-MPIR-x86-x64-MSVC2010.zip'. +Extract 'mpir' from the ZIP file to this directory. +''' % mpir_dir) + import shutil + if not os.path.isfile('bientropy/mpir.dll'): + shutil.copy(os.path.join(mpir_dir, 'mpir.dll'), 'bientropy') + ext_include_dirs = [mpir_dir] + ext_library_dirs = [mpir_dir] + ext_libs = ['mpir'] + package_data['bientropy'] = ['mpir.dll'] + +MODULE = Extension('bientropy.cbientropy', + sources=['ext/bientropy.c', + 'ext/bientropymodule.c'], + include_dirs=ext_include_dirs, + library_dirs=ext_library_dirs, + libraries=ext_libs, + ) + setup(name='BiEntropy', - version='1.0', + version='1.0.4', description='High-performance implementations of BiEntropy metrics ' 'proposed by Grenville J. Croll', ext_modules=[MODULE], url='https://github.com/sandialabs/bientropy', author='Ryan Helinski', author_email='rhelins@sandia.gov', - license='GPLv3', keywords='entropy randomness statistics', headers=['ext/bientropy.h'], packages=['bientropy'], + package_data = package_data, install_requires=requirements, - setup_requires=['pytest-runner'], tests_require=test_requirements, test_suite='bientropy.test_suite' )