diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 00000000..7a24d361
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,36 @@
+* text=auto
+*.m text diff=objc eol=lf
+*.h text diff=objc
+*.c text
+*.cpp text
+*.swift text diff=swift eol=lf
+
+*.pbxproj eol=lf
+
+*.pdf binary
+*.gif binary
+*.jpg binary
+*.png binary
+
+Crypto/aes.h eol=crlf
+Crypto/aes_modes.c eol=crlf
+Crypto/aescrypt.c eol=crlf
+Crypto/aeskey.c eol=crlf
+Crypto/aesopt.h eol=crlf
+Crypto/aestab.c eol=crlf
+Crypto/aestab.h eol=crlf
+Crypto/brg_endian.h eol=crlf
+Crypto/des.c eol=crlf
+Crypto/des.h eol=crlf
+
+WinZipJPEG/Dependencies eol=crlf
+WinZipJPEG/JPEG.c eol=crlf
+WinZipJPEG/JPEG.h eol=crlf
+
+lzma/Bra.c eol=crlf
+lzma/Bra.h eol=crlf
+lzma/Bra86.c eol=crlf
+lzma/BraIA64.c eol=crlf
+lzma/Lzma2Dec.c eol=crlf
+lzma/Lzma2Dec.h eol=crlf
+lzma/Types.h eol=crlf
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..c5451120
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+xcuserdata/
diff --git a/Crypto/aes.h b/Crypto/aes.h
index 24b3d08f..a29756c0 100755
--- a/Crypto/aes.h
+++ b/Crypto/aes.h
@@ -1,194 +1,194 @@
-/*
----------------------------------------------------------------------------
-Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
-
-The redistribution and use of this software (with or without changes)
-is allowed without the payment of fees or royalties provided that:
-
-  source code distributions include the above copyright notice, this
-  list of conditions and the following disclaimer;
-
-  binary distributions include the above copyright notice, this list
-  of conditions and the following disclaimer in their documentation.
-
-This software is provided 'as is' with no explicit or implied warranties
-in respect of its operation, including, but not limited to, correctness
-and fitness for purpose.
----------------------------------------------------------------------------
-Issue Date: 20/12/2007
-
- This file contains the definitions required to use AES in C. See aesopt.h
- for optimisation details.
-*/
-
-#ifndef _AES_H
-#define _AES_H
-
-#include <stdlib.h>
-#include <stdint.h>
-
-#if defined(__cplusplus)
-extern "C"
-{
-#endif
-
-#define AES_128     /* if a fast 128 bit key scheduler is needed    */
-#define AES_192     /* if a fast 192 bit key scheduler is needed    */
-#define AES_256     /* if a fast 256 bit key scheduler is needed    */
-#define AES_VAR     /* if variable key size scheduler is needed     */
-#define AES_MODES   /* if support is needed for modes               */
-
-/* The following must also be set in assembler files if being used  */
-
-#define AES_ENCRYPT /* if support for encryption is needed          */
-#define AES_DECRYPT /* if support for decryption is needed          */
-#define AES_REV_DKS /* define to reverse decryption key schedule    */
-
-#define AES_BLOCK_SIZE  16  /* the AES block size in bytes          */
-#define N_COLS           4  /* the number of columns in the state   */
-
-/* The key schedule length is 11, 13 or 15 16-byte blocks for 128,  */
-/* 192 or 256-bit keys respectively. That is 176, 208 or 240 bytes  */
-/* or 44, 52 or 60 32-bit words.                                    */
-
-#if defined( AES_VAR ) || defined( AES_256 )
-#define KS_LENGTH       60
-#elif defined( AES_192 )
-#define KS_LENGTH       52
-#else
-#define KS_LENGTH       44
-#endif
-
-/* the character array 'inf' in the following structures is used    */
-/* to hold AES context information. This AES code uses cx->inf.b[0] */
-/* to hold the number of rounds multiplied by 16. The other three   */
-/* elements can be used by code that implements additional modes    */
-
-typedef union
-{   uint32_t l;
-    uint8_t b[4];
-} aes_inf;
-
-typedef struct
-{   uint32_t ks[KS_LENGTH];
-    aes_inf inf;
-} aes_encrypt_ctx;
-
-typedef struct
-{   uint32_t ks[KS_LENGTH];
-    aes_inf inf;
-} aes_decrypt_ctx;
-
-/* This routine must be called before first use if non-static       */
-/* tables are being used                                            */
-
-int aes_init(void);
-
-/* Key lengths in the range 16 <= key_len <= 32 are given in bytes, */
-/* those in the range 128 <= key_len <= 256 are given in bits       */
-
-#if defined( AES_ENCRYPT )
-
-#if defined( AES_128 ) || defined( AES_VAR)
-int aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]);
-#endif
-
-#if defined( AES_192 ) || defined( AES_VAR)
-int aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]);
-#endif
-
-#if defined( AES_256 ) || defined( AES_VAR)
-int aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]);
-#endif
-
-#if defined( AES_VAR )
-int aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1]);
-#endif
-
-int aes_encrypt(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1]);
-
-#endif
-
-#if defined( AES_DECRYPT )
-
-#if defined( AES_128 ) || defined( AES_VAR)
-int aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]);
-#endif
-
-#if defined( AES_192 ) || defined( AES_VAR)
-int aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]);
-#endif
-
-#if defined( AES_256 ) || defined( AES_VAR)
-int aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]);
-#endif
-
-#if defined( AES_VAR )
-int aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1]);
-#endif
-
-int aes_decrypt(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1]);
-
-#endif
-
-#if defined( AES_MODES )
-
-/* Multiple calls to the following subroutines for multiple block   */
-/* ECB, CBC, CFB, OFB and CTR mode encryption can be used to handle */
-/* long messages incremantally provided that the context AND the iv */
-/* are preserved between all such calls.  For the ECB and CBC modes */
-/* each individual call within a series of incremental calls must   */
-/* process only full blocks (i.e. len must be a multiple of 16) but */
-/* the CFB, OFB and CTR mode calls can handle multiple incremental  */
-/* calls of any length. Each mode is reset when a new AES key is    */
-/* set but ECB and CBC operations can be reset without setting a    */
-/* new key by setting a new IV value.  To reset CFB, OFB and CTR    */
-/* without setting the key, aes_mode_reset() must be called and the */
-/* IV must be set.  NOTE: All these calls update the IV on exit so  */
-/* this has to be reset if a new operation with the same IV as the  */
-/* previous one is required (or decryption follows encryption with  */
-/* the same IV array).                                              */
-
-int aes_test_alignment_detection(unsigned int n);
-
-int aes_ecb_encrypt(const unsigned char *ibuf, unsigned char *obuf,
-                    int len, const aes_encrypt_ctx cx[1]);
-
-int aes_ecb_decrypt(const unsigned char *ibuf, unsigned char *obuf,
-                    int len, const aes_decrypt_ctx cx[1]);
-
-int aes_cbc_encrypt(const unsigned char *ibuf, unsigned char *obuf,
-                    int len, unsigned char *iv, const aes_encrypt_ctx cx[1]);
-
-int aes_cbc_decrypt(const unsigned char *ibuf, unsigned char *obuf,
-                    int len, unsigned char *iv, const aes_decrypt_ctx cx[1]);
-
-int aes_mode_reset(aes_encrypt_ctx cx[1]);
-
-int aes_cfb_encrypt(const unsigned char *ibuf, unsigned char *obuf,
-                    int len, unsigned char *iv, aes_encrypt_ctx cx[1]);
-
-int aes_cfb_decrypt(const unsigned char *ibuf, unsigned char *obuf,
-                    int len, unsigned char *iv, aes_encrypt_ctx cx[1]);
-
-#define aes_ofb_encrypt aes_ofb_crypt
-#define aes_ofb_decrypt aes_ofb_crypt
-
-int aes_ofb_crypt(const unsigned char *ibuf, unsigned char *obuf,
-                    int len, unsigned char *iv, aes_encrypt_ctx cx[1]);
-
-typedef void cbuf_inc(unsigned char *cbuf);
-
-#define aes_ctr_encrypt aes_ctr_crypt
-#define aes_ctr_decrypt aes_ctr_crypt
-
-int aes_ctr_crypt(const unsigned char *ibuf, unsigned char *obuf,
-            int len, unsigned char *cbuf, cbuf_inc ctr_inc, aes_encrypt_ctx cx[1]);
-
-#endif
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif
+/*
+---------------------------------------------------------------------------
+Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
+
+The redistribution and use of this software (with or without changes)
+is allowed without the payment of fees or royalties provided that:
+
+  source code distributions include the above copyright notice, this
+  list of conditions and the following disclaimer;
+
+  binary distributions include the above copyright notice, this list
+  of conditions and the following disclaimer in their documentation.
+
+This software is provided 'as is' with no explicit or implied warranties
+in respect of its operation, including, but not limited to, correctness
+and fitness for purpose.
+---------------------------------------------------------------------------
+Issue Date: 20/12/2007
+
+ This file contains the definitions required to use AES in C. See aesopt.h
+ for optimisation details.
+*/
+
+#ifndef _AES_H
+#define _AES_H
+
+#include <stdlib.h>
+#include <stdint.h>
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+#define AES_128     /* if a fast 128 bit key scheduler is needed    */
+#define AES_192     /* if a fast 192 bit key scheduler is needed    */
+#define AES_256     /* if a fast 256 bit key scheduler is needed    */
+#define AES_VAR     /* if variable key size scheduler is needed     */
+#define AES_MODES   /* if support is needed for modes               */
+
+/* The following must also be set in assembler files if being used  */
+
+#define AES_ENCRYPT /* if support for encryption is needed          */
+#define AES_DECRYPT /* if support for decryption is needed          */
+#define AES_REV_DKS /* define to reverse decryption key schedule    */
+
+#define AES_BLOCK_SIZE  16  /* the AES block size in bytes          */
+#define N_COLS           4  /* the number of columns in the state   */
+
+/* The key schedule length is 11, 13 or 15 16-byte blocks for 128,  */
+/* 192 or 256-bit keys respectively. That is 176, 208 or 240 bytes  */
+/* or 44, 52 or 60 32-bit words.                                    */
+
+#if defined( AES_VAR ) || defined( AES_256 )
+#define KS_LENGTH       60
+#elif defined( AES_192 )
+#define KS_LENGTH       52
+#else
+#define KS_LENGTH       44
+#endif
+
+/* the character array 'inf' in the following structures is used    */
+/* to hold AES context information. This AES code uses cx->inf.b[0] */
+/* to hold the number of rounds multiplied by 16. The other three   */
+/* elements can be used by code that implements additional modes    */
+
+typedef union
+{   uint32_t l;
+    uint8_t b[4];
+} aes_inf;
+
+typedef struct
+{   uint32_t ks[KS_LENGTH];
+    aes_inf inf;
+} aes_encrypt_ctx;
+
+typedef struct
+{   uint32_t ks[KS_LENGTH];
+    aes_inf inf;
+} aes_decrypt_ctx;
+
+/* This routine must be called before first use if non-static       */
+/* tables are being used                                            */
+
+int aes_init(void);
+
+/* Key lengths in the range 16 <= key_len <= 32 are given in bytes, */
+/* those in the range 128 <= key_len <= 256 are given in bits       */
+
+#if defined( AES_ENCRYPT )
+
+#if defined( AES_128 ) || defined( AES_VAR)
+int aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]);
+#endif
+
+#if defined( AES_192 ) || defined( AES_VAR)
+int aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]);
+#endif
+
+#if defined( AES_256 ) || defined( AES_VAR)
+int aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]);
+#endif
+
+#if defined( AES_VAR )
+int aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1]);
+#endif
+
+int aes_encrypt(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1]);
+
+#endif
+
+#if defined( AES_DECRYPT )
+
+#if defined( AES_128 ) || defined( AES_VAR)
+int aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]);
+#endif
+
+#if defined( AES_192 ) || defined( AES_VAR)
+int aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]);
+#endif
+
+#if defined( AES_256 ) || defined( AES_VAR)
+int aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]);
+#endif
+
+#if defined( AES_VAR )
+int aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1]);
+#endif
+
+int aes_decrypt(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1]);
+
+#endif
+
+#if defined( AES_MODES )
+
+/* Multiple calls to the following subroutines for multiple block   */
+/* ECB, CBC, CFB, OFB and CTR mode encryption can be used to handle */
+/* long messages incremantally provided that the context AND the iv */
+/* are preserved between all such calls.  For the ECB and CBC modes */
+/* each individual call within a series of incremental calls must   */
+/* process only full blocks (i.e. len must be a multiple of 16) but */
+/* the CFB, OFB and CTR mode calls can handle multiple incremental  */
+/* calls of any length. Each mode is reset when a new AES key is    */
+/* set but ECB and CBC operations can be reset without setting a    */
+/* new key by setting a new IV value.  To reset CFB, OFB and CTR    */
+/* without setting the key, aes_mode_reset() must be called and the */
+/* IV must be set.  NOTE: All these calls update the IV on exit so  */
+/* this has to be reset if a new operation with the same IV as the  */
+/* previous one is required (or decryption follows encryption with  */
+/* the same IV array).                                              */
+
+int aes_test_alignment_detection(unsigned int n);
+
+int aes_ecb_encrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, const aes_encrypt_ctx cx[1]);
+
+int aes_ecb_decrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, const aes_decrypt_ctx cx[1]);
+
+int aes_cbc_encrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, const aes_encrypt_ctx cx[1]);
+
+int aes_cbc_decrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, const aes_decrypt_ctx cx[1]);
+
+int aes_mode_reset(aes_encrypt_ctx cx[1]);
+
+int aes_cfb_encrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, aes_encrypt_ctx cx[1]);
+
+int aes_cfb_decrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, aes_encrypt_ctx cx[1]);
+
+#define aes_ofb_encrypt aes_ofb_crypt
+#define aes_ofb_decrypt aes_ofb_crypt
+
+int aes_ofb_crypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, aes_encrypt_ctx cx[1]);
+
+typedef void cbuf_inc(unsigned char *cbuf);
+
+#define aes_ctr_encrypt aes_ctr_crypt
+#define aes_ctr_decrypt aes_ctr_crypt
+
+int aes_ctr_crypt(const unsigned char *ibuf, unsigned char *obuf,
+            int len, unsigned char *cbuf, cbuf_inc ctr_inc, aes_encrypt_ctx cx[1]);
+
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/Crypto/aes_modes.c b/Crypto/aes_modes.c
index c02f144c..7ff44536 100755
--- a/Crypto/aes_modes.c
+++ b/Crypto/aes_modes.c
@@ -1,950 +1,950 @@
-/*
----------------------------------------------------------------------------
-Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
-
-The redistribution and use of this software (with or without changes)
-is allowed without the payment of fees or royalties provided that:
-
-  source code distributions include the above copyright notice, this
-  list of conditions and the following disclaimer;
-
-  binary distributions include the above copyright notice, this list
-  of conditions and the following disclaimer in their documentation.
-
-This software is provided 'as is' with no explicit or implied warranties
-in respect of its operation, including, but not limited to, correctness
-and fitness for purpose.
----------------------------------------------------------------------------
-Issue Date: 20/12/2007
-
- These subroutines implement multiple block AES modes for ECB, CBC, CFB,
- OFB and CTR encryption,  The code provides support for the VIA Advanced
- Cryptography Engine (ACE).
-
- NOTE: In the following subroutines, the AES contexts (ctx) must be
- 16 byte aligned if VIA ACE is being used
-*/
-
-#include <string.h>
-#include <assert.h>
-
-#include "aesopt.h"
-
-#if defined( AES_MODES )
-#if defined(__cplusplus)
-extern "C"
-{
-#endif
-
-#if defined( _MSC_VER ) && ( _MSC_VER > 800 )
-#pragma intrinsic(memcpy)
-#endif
-
-#define BFR_BLOCKS      8
-
-/* These values are used to detect long word alignment in order to */
-/* speed up some buffer operations. This facility may not work on  */
-/* some machines so this define can be commented out if necessary  */
-
-#define FAST_BUFFER_OPERATIONS
-
-#define lp32(x)         ((uint32_t*)(x))
-
-#define ALIGN_OFFSET(x,n)	(((intptr_t)(x)) & ((n) - 1))
-#define ALIGN_FLOOR(x,n)	((uint8_t*)(x) - ( ((intptr_t)(x)) & ((n) - 1)))
-#define ALIGN_CEIL(x,n)		((uint8_t*)(x) + (-((intptr_t)(x)) & ((n) - 1)))
-
-#if defined( USE_VIA_ACE_IF_PRESENT )
-
-#include "aes_via_ace.h"
-
-#pragma pack(16)
-
-aligned_array(unsigned long,    enc_gen_table, 12, 16) =    NEH_ENC_GEN_DATA;
-aligned_array(unsigned long,   enc_load_table, 12, 16) =   NEH_ENC_LOAD_DATA;
-aligned_array(unsigned long, enc_hybrid_table, 12, 16) = NEH_ENC_HYBRID_DATA;
-aligned_array(unsigned long,    dec_gen_table, 12, 16) =    NEH_DEC_GEN_DATA;
-aligned_array(unsigned long,   dec_load_table, 12, 16) =   NEH_DEC_LOAD_DATA;
-aligned_array(unsigned long, dec_hybrid_table, 12, 16) = NEH_DEC_HYBRID_DATA;
-
-/* NOTE: These control word macros must only be used after  */
-/* a key has been set up because they depend on key size    */
-/* See the VIA ACE documentation for key type information   */
-/* and aes_via_ace.h for non-default NEH_KEY_TYPE values    */
-
-#ifndef NEH_KEY_TYPE
-#  define NEH_KEY_TYPE NEH_HYBRID
-#endif
-
-#if NEH_KEY_TYPE == NEH_LOAD
-#define kd_adr(c)   ((uint8_t*)(c)->ks)
-#elif NEH_KEY_TYPE == NEH_GENERATE
-#define kd_adr(c)   ((uint8_t*)(c)->ks + (c)->inf.b[0])
-#elif NEH_KEY_TYPE == NEH_HYBRID
-#define kd_adr(c)   ((uint8_t*)(c)->ks + ((c)->inf.b[0] == 160 ? 160 : 0))
-#else
-#error no key type defined for VIA ACE 
-#endif
-
-#else
-
-#define aligned_array(type, name, no, stride) type name[no]
-#define aligned_auto(type, name, no, stride)  type name[no]
-
-#endif
-
-#if defined( _MSC_VER ) && _MSC_VER > 1200
-
-#define via_cwd(cwd, ty, dir, len) \
-    unsigned long* cwd = (dir##_##ty##_table + ((len - 128) >> 4))
-
-#else
-
-#define via_cwd(cwd, ty, dir, len)              \
-    aligned_auto(unsigned long, cwd, 4, 16);    \
-    cwd[1] = cwd[2] = cwd[3] = 0;               \
-    cwd[0] = neh_##dir##_##ty##_key(len)
-
-#endif
-
-/* test the code for detecting and setting pointer alignment */
-
-int aes_test_alignment_detection(unsigned int n)	/* 4 <= n <= 16 */
-{	uint8_t	p[16];
-    uint32_t i, count_eq = 0, count_neq = 0;
-
-    if(n < 4 || n > 16)
-        return EXIT_FAILURE;
-
-    for(i = 0; i < n; ++i)
-    {
-        uint8_t *qf = ALIGN_FLOOR(p + i, n),
-                *qh =  ALIGN_CEIL(p + i, n);
-        
-        if(qh == qf)
-            ++count_eq;
-        else if(qh == qf + n)
-            ++count_neq;
-        else
-            return EXIT_FAILURE;
-    }
-    return (count_eq != 1 || count_neq != n - 1 ? EXIT_FAILURE : EXIT_SUCCESS);
-}
-
-int aes_mode_reset(aes_encrypt_ctx ctx[1])
-{
-    ctx->inf.b[2] = 0;
-    return EXIT_SUCCESS;
-}
-
-int aes_ecb_encrypt(const unsigned char *ibuf, unsigned char *obuf,
-                    int len, const aes_encrypt_ctx ctx[1])
-{   int nb = len >> 4;
-
-    if(len & (AES_BLOCK_SIZE - 1))
-        return EXIT_FAILURE;
-
-#if defined( USE_VIA_ACE_IF_PRESENT )
-
-    if(ctx->inf.b[1] == 0xff)
-    {   uint8_t *ksp = (uint8_t*)(ctx->ks);
-        via_cwd(cwd, hybrid, enc, 2 * ctx->inf.b[0] - 192);
-
-        if(ALIGN_OFFSET( ctx, 16 ))
-            return EXIT_FAILURE;
-
-        if(!ALIGN_OFFSET( ibuf, 16 ) && !ALIGN_OFFSET( obuf, 16 ))
-        {
-            via_ecb_op5(ksp, cwd, ibuf, obuf, nb);
-        }
-        else
-        {   aligned_auto(uint8_t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
-            uint8_t *ip, *op;
-
-            while(nb)
-            {
-                int m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb);
-
-                ip = (ALIGN_OFFSET( ibuf, 16 ) ? buf : ibuf);
-                op = (ALIGN_OFFSET( obuf, 16 ) ? buf : obuf);
-
-                if(ip != ibuf)
-                    memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
-
-                via_ecb_op5(ksp, cwd, ip, op, m);
-
-                if(op != obuf)
-                    memcpy(obuf, buf, m * AES_BLOCK_SIZE);
-
-                ibuf += m * AES_BLOCK_SIZE;
-                obuf += m * AES_BLOCK_SIZE;
-                nb -= m;
-            }
-        }
-
-        return EXIT_SUCCESS;
-    }
-
-#endif
-
-#if !defined( ASSUME_VIA_ACE_PRESENT )
-    while(nb--)
-    {
-        if(aes_encrypt(ibuf, obuf, ctx) != EXIT_SUCCESS)
-            return EXIT_FAILURE;
-        ibuf += AES_BLOCK_SIZE;
-        obuf += AES_BLOCK_SIZE;
-    }
-#endif
-    return EXIT_SUCCESS;
-}
-
-int aes_ecb_decrypt(const unsigned char *ibuf, unsigned char *obuf,
-                    int len, const aes_decrypt_ctx ctx[1])
-{   int nb = len >> 4;
-
-    if(len & (AES_BLOCK_SIZE - 1))
-        return EXIT_FAILURE;
-
-#if defined( USE_VIA_ACE_IF_PRESENT )
-
-    if(ctx->inf.b[1] == 0xff)
-    {   uint8_t *ksp = kd_adr(ctx);
-        via_cwd(cwd, hybrid, dec, 2 * ctx->inf.b[0] - 192);
-
-        if(ALIGN_OFFSET( ctx, 16 ))
-            return EXIT_FAILURE;
-
-        if(!ALIGN_OFFSET( ibuf, 16 ) && !ALIGN_OFFSET( obuf, 16 ))
-        {
-            via_ecb_op5(ksp, cwd, ibuf, obuf, nb);
-        }
-        else
-        {   aligned_auto(uint8_t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
-            uint8_t *ip, *op;
-
-            while(nb)
-            {
-                int m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb);
-
-                ip = (ALIGN_OFFSET( ibuf, 16 ) ? buf : ibuf);
-                op = (ALIGN_OFFSET( obuf, 16 ) ? buf : obuf);
-
-                if(ip != ibuf)
-                    memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
-
-                via_ecb_op5(ksp, cwd, ip, op, m);
-
-                if(op != obuf)
-                    memcpy(obuf, buf, m * AES_BLOCK_SIZE);
-
-                ibuf += m * AES_BLOCK_SIZE;
-                obuf += m * AES_BLOCK_SIZE;
-                nb -= m;
-            }
-        }
-
-        return EXIT_SUCCESS;
-    }
-
-#endif
-
-#if !defined( ASSUME_VIA_ACE_PRESENT )
-    while(nb--)
-    {
-        if(aes_decrypt(ibuf, obuf, ctx) != EXIT_SUCCESS)
-            return EXIT_FAILURE;
-        ibuf += AES_BLOCK_SIZE;
-        obuf += AES_BLOCK_SIZE;
-    }
-#endif
-    return EXIT_SUCCESS;
-}
-
-int aes_cbc_encrypt(const unsigned char *ibuf, unsigned char *obuf,
-                    int len, unsigned char *iv, const aes_encrypt_ctx ctx[1])
-{   int nb = len >> 4;
-
-    if(len & (AES_BLOCK_SIZE - 1))
-        return EXIT_FAILURE;
-
-#if defined( USE_VIA_ACE_IF_PRESENT )
-
-    if(ctx->inf.b[1] == 0xff)
-    {   uint8_t *ksp = (uint8_t*)(ctx->ks), *ivp = iv;
-        aligned_auto(uint8_t, liv, AES_BLOCK_SIZE, 16);
-        via_cwd(cwd, hybrid, enc, 2 * ctx->inf.b[0] - 192);
-
-        if(ALIGN_OFFSET( ctx, 16 ))
-            return EXIT_FAILURE;
-
-        if(ALIGN_OFFSET( iv, 16 ))   /* ensure an aligned iv */
-        {
-            ivp = liv;
-            memcpy(liv, iv, AES_BLOCK_SIZE);
-        }
-
-        if(!ALIGN_OFFSET( ibuf, 16 ) && !ALIGN_OFFSET( obuf, 16 ) && !ALIGN_OFFSET( iv, 16 ))
-        {
-            via_cbc_op7(ksp, cwd, ibuf, obuf, nb, ivp, ivp);
-        }
-        else
-        {   aligned_auto(uint8_t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
-            uint8_t *ip, *op;
-
-            while(nb)
-            {
-                int m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb);
-
-                ip = (ALIGN_OFFSET( ibuf, 16 ) ? buf : ibuf);
-                op = (ALIGN_OFFSET( obuf, 16 ) ? buf : obuf);
-
-                if(ip != ibuf)
-                    memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
-
-                via_cbc_op7(ksp, cwd, ip, op, m, ivp, ivp);
-
-                if(op != obuf)
-                    memcpy(obuf, buf, m * AES_BLOCK_SIZE);
-
-                ibuf += m * AES_BLOCK_SIZE;
-                obuf += m * AES_BLOCK_SIZE;
-                nb -= m;
-            }
-        }
-
-        if(iv != ivp)
-            memcpy(iv, ivp, AES_BLOCK_SIZE);
-
-        return EXIT_SUCCESS;
-    }
-
-#endif
-
-#if !defined( ASSUME_VIA_ACE_PRESENT )
-# ifdef FAST_BUFFER_OPERATIONS
-    if(!ALIGN_OFFSET( ibuf, 4 ) && !ALIGN_OFFSET( iv, 4 ))
-        while(nb--)
-        {
-            lp32(iv)[0] ^= lp32(ibuf)[0];
-            lp32(iv)[1] ^= lp32(ibuf)[1];
-            lp32(iv)[2] ^= lp32(ibuf)[2];
-            lp32(iv)[3] ^= lp32(ibuf)[3];
-            if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
-                return EXIT_FAILURE;
-            memcpy(obuf, iv, AES_BLOCK_SIZE);
-            ibuf += AES_BLOCK_SIZE;
-            obuf += AES_BLOCK_SIZE;
-        }
-    else
-# endif
-        while(nb--)
-        {
-            iv[ 0] ^= ibuf[ 0]; iv[ 1] ^= ibuf[ 1];
-            iv[ 2] ^= ibuf[ 2]; iv[ 3] ^= ibuf[ 3];
-            iv[ 4] ^= ibuf[ 4]; iv[ 5] ^= ibuf[ 5];
-            iv[ 6] ^= ibuf[ 6]; iv[ 7] ^= ibuf[ 7];
-            iv[ 8] ^= ibuf[ 8]; iv[ 9] ^= ibuf[ 9];
-            iv[10] ^= ibuf[10]; iv[11] ^= ibuf[11];
-            iv[12] ^= ibuf[12]; iv[13] ^= ibuf[13];
-            iv[14] ^= ibuf[14]; iv[15] ^= ibuf[15];
-            if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
-                return EXIT_FAILURE;
-            memcpy(obuf, iv, AES_BLOCK_SIZE);
-            ibuf += AES_BLOCK_SIZE;
-            obuf += AES_BLOCK_SIZE;
-        }
-#endif
-    return EXIT_SUCCESS;
-}
-
-int aes_cbc_decrypt(const unsigned char *ibuf, unsigned char *obuf,
-                    int len, unsigned char *iv, const aes_decrypt_ctx ctx[1])
-{   unsigned char tmp[AES_BLOCK_SIZE];
-    int nb = len >> 4;
-
-    if(len & (AES_BLOCK_SIZE - 1))
-        return EXIT_FAILURE;
-
-#if defined( USE_VIA_ACE_IF_PRESENT )
-
-    if(ctx->inf.b[1] == 0xff)
-    {   uint8_t *ksp = kd_adr(ctx), *ivp = iv;
-        aligned_auto(uint8_t, liv, AES_BLOCK_SIZE, 16);
-        via_cwd(cwd, hybrid, dec, 2 * ctx->inf.b[0] - 192);
-
-        if(ALIGN_OFFSET( ctx, 16 ))
-            return EXIT_FAILURE;
-
-        if(ALIGN_OFFSET( iv, 16 ))   /* ensure an aligned iv */
-        {
-            ivp = liv;
-            memcpy(liv, iv, AES_BLOCK_SIZE);
-        }
-
-        if(!ALIGN_OFFSET( ibuf, 16 ) && !ALIGN_OFFSET( obuf, 16 ) && !ALIGN_OFFSET( iv, 16 ))
-        {
-            via_cbc_op6(ksp, cwd, ibuf, obuf, nb, ivp);
-        }
-        else
-        {   aligned_auto(uint8_t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
-            uint8_t *ip, *op;
-
-            while(nb)
-            {
-                int m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb);
-
-                ip = (ALIGN_OFFSET( ibuf, 16 ) ? buf : ibuf);
-                op = (ALIGN_OFFSET( obuf, 16 ) ? buf : obuf);
-
-                if(ip != ibuf)
-                    memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
-
-                via_cbc_op6(ksp, cwd, ip, op, m, ivp);
-
-                if(op != obuf)
-                    memcpy(obuf, buf, m * AES_BLOCK_SIZE);
-
-                ibuf += m * AES_BLOCK_SIZE;
-                obuf += m * AES_BLOCK_SIZE;
-                nb -= m;
-            }
-        }
-
-        if(iv != ivp)
-            memcpy(iv, ivp, AES_BLOCK_SIZE);
-
-        return EXIT_SUCCESS;
-    }
-#endif
-
-#if !defined( ASSUME_VIA_ACE_PRESENT )
-# ifdef FAST_BUFFER_OPERATIONS
-    if(!ALIGN_OFFSET( obuf, 4 ) && !ALIGN_OFFSET( iv, 4 ))
-        while(nb--)
-        {
-            memcpy(tmp, ibuf, AES_BLOCK_SIZE);
-            if(aes_decrypt(ibuf, obuf, ctx) != EXIT_SUCCESS)
-                return EXIT_FAILURE;
-            lp32(obuf)[0] ^= lp32(iv)[0];
-            lp32(obuf)[1] ^= lp32(iv)[1];
-            lp32(obuf)[2] ^= lp32(iv)[2];
-            lp32(obuf)[3] ^= lp32(iv)[3];
-            memcpy(iv, tmp, AES_BLOCK_SIZE);
-            ibuf += AES_BLOCK_SIZE;
-            obuf += AES_BLOCK_SIZE;
-        }
-    else
-# endif
-        while(nb--)
-        {
-            memcpy(tmp, ibuf, AES_BLOCK_SIZE);
-            if(aes_decrypt(ibuf, obuf, ctx) != EXIT_SUCCESS)
-                return EXIT_FAILURE;
-            obuf[ 0] ^= iv[ 0]; obuf[ 1] ^= iv[ 1];
-            obuf[ 2] ^= iv[ 2]; obuf[ 3] ^= iv[ 3];
-            obuf[ 4] ^= iv[ 4]; obuf[ 5] ^= iv[ 5];
-            obuf[ 6] ^= iv[ 6]; obuf[ 7] ^= iv[ 7];
-            obuf[ 8] ^= iv[ 8]; obuf[ 9] ^= iv[ 9];
-            obuf[10] ^= iv[10]; obuf[11] ^= iv[11];
-            obuf[12] ^= iv[12]; obuf[13] ^= iv[13];
-            obuf[14] ^= iv[14]; obuf[15] ^= iv[15];
-            memcpy(iv, tmp, AES_BLOCK_SIZE);
-            ibuf += AES_BLOCK_SIZE;
-            obuf += AES_BLOCK_SIZE;
-        }
-#endif
-    return EXIT_SUCCESS;
-}
-
-int aes_cfb_encrypt(const unsigned char *ibuf, unsigned char *obuf,
-                    int len, unsigned char *iv, aes_encrypt_ctx ctx[1])
-{   int cnt = 0, b_pos = (int)ctx->inf.b[2], nb;
-
-    if(b_pos)           /* complete any partial block   */
-    {
-        while(b_pos < AES_BLOCK_SIZE && cnt < len)
-        {
-            *obuf++ = (iv[b_pos++] ^= *ibuf++);
-            cnt++;
-        }
-
-        b_pos = (b_pos == AES_BLOCK_SIZE ? 0 : b_pos);
-    }
-
-    if((nb = (len - cnt) >> 4) != 0)    /* process whole blocks */
-    {
-#if defined( USE_VIA_ACE_IF_PRESENT )
-
-        if(ctx->inf.b[1] == 0xff)
-        {   int m;
-            uint8_t *ksp = (uint8_t*)(ctx->ks), *ivp = iv;
-            aligned_auto(uint8_t, liv, AES_BLOCK_SIZE, 16);
-            via_cwd(cwd, hybrid, enc, 2 * ctx->inf.b[0] - 192);
-
-            if(ALIGN_OFFSET( ctx, 16 ))
-                return EXIT_FAILURE;
-
-            if(ALIGN_OFFSET( iv, 16 ))   /* ensure an aligned iv */
-            {
-                ivp = liv;
-                memcpy(liv, iv, AES_BLOCK_SIZE);
-            }
-
-            if(!ALIGN_OFFSET( ibuf, 16 ) && !ALIGN_OFFSET( obuf, 16 ))
-            {
-                via_cfb_op7(ksp, cwd, ibuf, obuf, nb, ivp, ivp);
-                ibuf += nb * AES_BLOCK_SIZE;
-                obuf += nb * AES_BLOCK_SIZE;
-                cnt  += nb * AES_BLOCK_SIZE;
-            }
-            else    /* input, output or both are unaligned  */
-            {   aligned_auto(uint8_t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
-                uint8_t *ip, *op;
-
-                while(nb)
-                {
-                    m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb), nb -= m;
-
-                    ip = (ALIGN_OFFSET( ibuf, 16 ) ? buf : ibuf);
-                    op = (ALIGN_OFFSET( obuf, 16 ) ? buf : obuf);
-
-                    if(ip != ibuf)
-                        memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
-
-                    via_cfb_op7(ksp, cwd, ip, op, m, ivp, ivp);
-
-                    if(op != obuf)
-                        memcpy(obuf, buf, m * AES_BLOCK_SIZE);
-
-                    ibuf += m * AES_BLOCK_SIZE;
-                    obuf += m * AES_BLOCK_SIZE;
-                    cnt  += m * AES_BLOCK_SIZE;
-                }
-            }
-
-            if(ivp != iv)
-                memcpy(iv, ivp, AES_BLOCK_SIZE);
-        }
-#else
-# ifdef FAST_BUFFER_OPERATIONS
-        if(!ALIGN_OFFSET( ibuf, 4 ) && !ALIGN_OFFSET( obuf, 4 ) && !ALIGN_OFFSET( iv, 4 ))
-            while(cnt + AES_BLOCK_SIZE <= len)
-            {
-                assert(b_pos == 0);
-                if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
-                    return EXIT_FAILURE;
-                lp32(obuf)[0] = lp32(iv)[0] ^= lp32(ibuf)[0];
-                lp32(obuf)[1] = lp32(iv)[1] ^= lp32(ibuf)[1];
-                lp32(obuf)[2] = lp32(iv)[2] ^= lp32(ibuf)[2];
-                lp32(obuf)[3] = lp32(iv)[3] ^= lp32(ibuf)[3];
-                ibuf += AES_BLOCK_SIZE;
-                obuf += AES_BLOCK_SIZE;
-                cnt  += AES_BLOCK_SIZE;
-            }
-        else
-# endif
-            while(cnt + AES_BLOCK_SIZE <= len)
-            {
-                assert(b_pos == 0);
-                if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
-                    return EXIT_FAILURE;
-                obuf[ 0] = iv[ 0] ^= ibuf[ 0]; obuf[ 1] = iv[ 1] ^= ibuf[ 1];
-                obuf[ 2] = iv[ 2] ^= ibuf[ 2]; obuf[ 3] = iv[ 3] ^= ibuf[ 3];
-                obuf[ 4] = iv[ 4] ^= ibuf[ 4]; obuf[ 5] = iv[ 5] ^= ibuf[ 5];
-                obuf[ 6] = iv[ 6] ^= ibuf[ 6]; obuf[ 7] = iv[ 7] ^= ibuf[ 7];
-                obuf[ 8] = iv[ 8] ^= ibuf[ 8]; obuf[ 9] = iv[ 9] ^= ibuf[ 9];
-                obuf[10] = iv[10] ^= ibuf[10]; obuf[11] = iv[11] ^= ibuf[11];
-                obuf[12] = iv[12] ^= ibuf[12]; obuf[13] = iv[13] ^= ibuf[13];
-                obuf[14] = iv[14] ^= ibuf[14]; obuf[15] = iv[15] ^= ibuf[15];
-                ibuf += AES_BLOCK_SIZE;
-                obuf += AES_BLOCK_SIZE;
-                cnt  += AES_BLOCK_SIZE;
-            }
-#endif
-    }
-
-    while(cnt < len)
-    {
-        if(!b_pos && aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
-            return EXIT_FAILURE;
-
-        while(cnt < len && b_pos < AES_BLOCK_SIZE)
-        {
-            *obuf++ = (iv[b_pos++] ^= *ibuf++);
-            cnt++;
-        }
-
-        b_pos = (b_pos == AES_BLOCK_SIZE ? 0 : b_pos);
-    }
-
-    ctx->inf.b[2] = (uint8_t)b_pos;
-    return EXIT_SUCCESS;
-}
-
-int aes_cfb_decrypt(const unsigned char *ibuf, unsigned char *obuf,
-                    int len, unsigned char *iv, aes_encrypt_ctx ctx[1])
-{   int cnt = 0, b_pos = (int)ctx->inf.b[2], nb;
-
-    if(b_pos)           /* complete any partial block   */
-    {   uint8_t t;
-
-        while(b_pos < AES_BLOCK_SIZE && cnt < len)
-        {
-            t = *ibuf++;
-            *obuf++ = t ^ iv[b_pos];
-            iv[b_pos++] = t;
-            cnt++;
-        }
-
-        b_pos = (b_pos == AES_BLOCK_SIZE ? 0 : b_pos);
-    }
-
-    if((nb = (len - cnt) >> 4) != 0)    /* process whole blocks */
-    {
-#if defined( USE_VIA_ACE_IF_PRESENT )
-
-        if(ctx->inf.b[1] == 0xff)
-        {   int m;
-            uint8_t *ksp = (uint8_t*)(ctx->ks), *ivp = iv;
-            aligned_auto(uint8_t, liv, AES_BLOCK_SIZE, 16);
-            via_cwd(cwd, hybrid, dec, 2 * ctx->inf.b[0] - 192);
-
-            if(ALIGN_OFFSET( ctx, 16 ))
-                return EXIT_FAILURE;
-
-            if(ALIGN_OFFSET( iv, 16 ))   /* ensure an aligned iv */
-            {
-                ivp = liv;
-                memcpy(liv, iv, AES_BLOCK_SIZE);
-            }
-
-            if(!ALIGN_OFFSET( ibuf, 16 ) && !ALIGN_OFFSET( obuf, 16 ))
-            {
-                via_cfb_op6(ksp, cwd, ibuf, obuf, nb, ivp);
-                ibuf += nb * AES_BLOCK_SIZE;
-                obuf += nb * AES_BLOCK_SIZE;
-                cnt  += nb * AES_BLOCK_SIZE;
-            }
-            else    /* input, output or both are unaligned  */
-            {   aligned_auto(uint8_t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
-                uint8_t *ip, *op;
-
-                while(nb)
-                {
-                    m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb), nb -= m;
-
-                    ip = (ALIGN_OFFSET( ibuf, 16 ) ? buf : ibuf);
-                    op = (ALIGN_OFFSET( obuf, 16 ) ? buf : obuf);
-
-                    if(ip != ibuf)  /* input buffer is not aligned */
-                        memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
-
-                    via_cfb_op6(ksp, cwd, ip, op, m, ivp);
-
-                    if(op != obuf)  /* output buffer is not aligned */
-                        memcpy(obuf, buf, m * AES_BLOCK_SIZE);
-
-                    ibuf += m * AES_BLOCK_SIZE;
-                    obuf += m * AES_BLOCK_SIZE;
-                    cnt  += m * AES_BLOCK_SIZE;
-                }
-            }
-
-            if(ivp != iv)
-                memcpy(iv, ivp, AES_BLOCK_SIZE);
-        }
-#else
-# ifdef FAST_BUFFER_OPERATIONS
-        if(!ALIGN_OFFSET( ibuf, 4 ) && !ALIGN_OFFSET( obuf, 4 ) &&!ALIGN_OFFSET( iv, 4 ))
-            while(cnt + AES_BLOCK_SIZE <= len)
-            {   uint32_t t;
-
-                assert(b_pos == 0);
-                if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
-                    return EXIT_FAILURE;
-                t = lp32(ibuf)[0], lp32(obuf)[0] = t ^ lp32(iv)[0], lp32(iv)[0] = t;
-                t = lp32(ibuf)[1], lp32(obuf)[1] = t ^ lp32(iv)[1], lp32(iv)[1] = t;
-                t = lp32(ibuf)[2], lp32(obuf)[2] = t ^ lp32(iv)[2], lp32(iv)[2] = t;
-                t = lp32(ibuf)[3], lp32(obuf)[3] = t ^ lp32(iv)[3], lp32(iv)[3] = t;
-                ibuf += AES_BLOCK_SIZE;
-                obuf += AES_BLOCK_SIZE;
-                cnt  += AES_BLOCK_SIZE;
-            }
-        else
-# endif
-            while(cnt + AES_BLOCK_SIZE <= len)
-            {   uint8_t t;
-
-                assert(b_pos == 0);
-                if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
-                    return EXIT_FAILURE;
-                t = ibuf[ 0], obuf[ 0] = t ^ iv[ 0], iv[ 0] = t;
-                t = ibuf[ 1], obuf[ 1] = t ^ iv[ 1], iv[ 1] = t;
-                t = ibuf[ 2], obuf[ 2] = t ^ iv[ 2], iv[ 2] = t;
-                t = ibuf[ 3], obuf[ 3] = t ^ iv[ 3], iv[ 3] = t;
-                t = ibuf[ 4], obuf[ 4] = t ^ iv[ 4], iv[ 4] = t;
-                t = ibuf[ 5], obuf[ 5] = t ^ iv[ 5], iv[ 5] = t;
-                t = ibuf[ 6], obuf[ 6] = t ^ iv[ 6], iv[ 6] = t;
-                t = ibuf[ 7], obuf[ 7] = t ^ iv[ 7], iv[ 7] = t;
-                t = ibuf[ 8], obuf[ 8] = t ^ iv[ 8], iv[ 8] = t;
-                t = ibuf[ 9], obuf[ 9] = t ^ iv[ 9], iv[ 9] = t;
-                t = ibuf[10], obuf[10] = t ^ iv[10], iv[10] = t;
-                t = ibuf[11], obuf[11] = t ^ iv[11], iv[11] = t;
-                t = ibuf[12], obuf[12] = t ^ iv[12], iv[12] = t;
-                t = ibuf[13], obuf[13] = t ^ iv[13], iv[13] = t;
-                t = ibuf[14], obuf[14] = t ^ iv[14], iv[14] = t;
-                t = ibuf[15], obuf[15] = t ^ iv[15], iv[15] = t;
-                ibuf += AES_BLOCK_SIZE;
-                obuf += AES_BLOCK_SIZE;
-                cnt  += AES_BLOCK_SIZE;
-            }
-#endif
-    }
-
-    while(cnt < len)
-    {   uint8_t t;
-
-        if(!b_pos && aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
-            return EXIT_FAILURE;
-
-        while(cnt < len && b_pos < AES_BLOCK_SIZE)
-        {
-            t = *ibuf++;
-            *obuf++ = t ^ iv[b_pos];
-            iv[b_pos++] = t;
-            cnt++;
-        }
-
-        b_pos = (b_pos == AES_BLOCK_SIZE ? 0 : b_pos);
-    }
-
-    ctx->inf.b[2] = (uint8_t)b_pos;
-    return EXIT_SUCCESS;
-}
-
-int aes_ofb_crypt(const unsigned char *ibuf, unsigned char *obuf,
-                    int len, unsigned char *iv, aes_encrypt_ctx ctx[1])
-{   int cnt = 0, b_pos = (int)ctx->inf.b[2], nb;
-
-    if(b_pos)           /* complete any partial block   */
-    {
-        while(b_pos < AES_BLOCK_SIZE && cnt < len)
-        {
-            *obuf++ = iv[b_pos++] ^ *ibuf++;
-            cnt++;
-        }
-
-        b_pos = (b_pos == AES_BLOCK_SIZE ? 0 : b_pos);
-    }
-
-    if((nb = (len - cnt) >> 4) != 0)   /* process whole blocks */
-    {
-#if defined( USE_VIA_ACE_IF_PRESENT )
-
-        if(ctx->inf.b[1] == 0xff)
-        {   int m;
-            uint8_t *ksp = (uint8_t*)(ctx->ks), *ivp = iv;
-            aligned_auto(uint8_t, liv, AES_BLOCK_SIZE, 16);
-            via_cwd(cwd, hybrid, enc, 2 * ctx->inf.b[0] - 192);
-
-            if(ALIGN_OFFSET( ctx, 16 ))
-                return EXIT_FAILURE;
-
-            if(ALIGN_OFFSET( iv, 16 ))   /* ensure an aligned iv */
-            {
-                ivp = liv;
-                memcpy(liv, iv, AES_BLOCK_SIZE);
-            }
-
-            if(!ALIGN_OFFSET( ibuf, 16 ) && !ALIGN_OFFSET( obuf, 16 ))
-            {
-                via_ofb_op6(ksp, cwd, ibuf, obuf, nb, ivp);
-                ibuf += nb * AES_BLOCK_SIZE;
-                obuf += nb * AES_BLOCK_SIZE;
-                cnt  += nb * AES_BLOCK_SIZE;
-            }
-            else    /* input, output or both are unaligned  */
-        {   aligned_auto(uint8_t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
-            uint8_t *ip, *op;
-
-                while(nb)
-                {
-                    m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb), nb -= m;
-
-                    ip = (ALIGN_OFFSET( ibuf, 16 ) ? buf : ibuf);
-                    op = (ALIGN_OFFSET( obuf, 16 ) ? buf : obuf);
-
-                    if(ip != ibuf)
-                        memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
-
-                    via_ofb_op6(ksp, cwd, ip, op, m, ivp);
-
-                    if(op != obuf)
-                        memcpy(obuf, buf, m * AES_BLOCK_SIZE);
-
-                    ibuf += m * AES_BLOCK_SIZE;
-                    obuf += m * AES_BLOCK_SIZE;
-                    cnt  += m * AES_BLOCK_SIZE;
-                }
-            }
-
-            if(ivp != iv)
-                memcpy(iv, ivp, AES_BLOCK_SIZE);
-        }
-#else
-# ifdef FAST_BUFFER_OPERATIONS
-        if(!ALIGN_OFFSET( ibuf, 4 ) && !ALIGN_OFFSET( obuf, 4 ) && !ALIGN_OFFSET( iv, 4 ))
-            while(cnt + AES_BLOCK_SIZE <= len)
-            {
-                assert(b_pos == 0);
-                if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
-                    return EXIT_FAILURE;
-                lp32(obuf)[0] = lp32(iv)[0] ^ lp32(ibuf)[0];
-                lp32(obuf)[1] = lp32(iv)[1] ^ lp32(ibuf)[1];
-                lp32(obuf)[2] = lp32(iv)[2] ^ lp32(ibuf)[2];
-                lp32(obuf)[3] = lp32(iv)[3] ^ lp32(ibuf)[3];
-                ibuf += AES_BLOCK_SIZE;
-                obuf += AES_BLOCK_SIZE;
-                cnt  += AES_BLOCK_SIZE;
-            }
-        else
-# endif
-            while(cnt + AES_BLOCK_SIZE <= len)
-            {
-                assert(b_pos == 0);
-                if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
-                    return EXIT_FAILURE;
-                obuf[ 0] = iv[ 0] ^ ibuf[ 0]; obuf[ 1] = iv[ 1] ^ ibuf[ 1];
-                obuf[ 2] = iv[ 2] ^ ibuf[ 2]; obuf[ 3] = iv[ 3] ^ ibuf[ 3];
-                obuf[ 4] = iv[ 4] ^ ibuf[ 4]; obuf[ 5] = iv[ 5] ^ ibuf[ 5];
-                obuf[ 6] = iv[ 6] ^ ibuf[ 6]; obuf[ 7] = iv[ 7] ^ ibuf[ 7];
-                obuf[ 8] = iv[ 8] ^ ibuf[ 8]; obuf[ 9] = iv[ 9] ^ ibuf[ 9];
-                obuf[10] = iv[10] ^ ibuf[10]; obuf[11] = iv[11] ^ ibuf[11];
-                obuf[12] = iv[12] ^ ibuf[12]; obuf[13] = iv[13] ^ ibuf[13];
-                obuf[14] = iv[14] ^ ibuf[14]; obuf[15] = iv[15] ^ ibuf[15];
-                ibuf += AES_BLOCK_SIZE;
-                obuf += AES_BLOCK_SIZE;
-                cnt  += AES_BLOCK_SIZE;
-            }
-#endif
-    }
-
-    while(cnt < len)
-    {
-        if(!b_pos && aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
-            return EXIT_FAILURE;
-
-        while(cnt < len && b_pos < AES_BLOCK_SIZE)
-        {
-            *obuf++ = iv[b_pos++] ^ *ibuf++;
-            cnt++;
-        }
-
-        b_pos = (b_pos == AES_BLOCK_SIZE ? 0 : b_pos);
-    }
-
-    ctx->inf.b[2] = (uint8_t)b_pos;
-    return EXIT_SUCCESS;
-}
-
-#define BFR_LENGTH  (BFR_BLOCKS * AES_BLOCK_SIZE)
-
-int aes_ctr_crypt(const unsigned char *ibuf, unsigned char *obuf,
-            int len, unsigned char *cbuf, cbuf_inc ctr_inc, aes_encrypt_ctx ctx[1])
-{   unsigned char   *ip;
-    int             i, blen, b_pos = (int)(ctx->inf.b[2]);
-
-#if defined( USE_VIA_ACE_IF_PRESENT )
-    aligned_auto(uint8_t, buf, BFR_LENGTH, 16);
-    if(ctx->inf.b[1] == 0xff && ALIGN_OFFSET( ctx, 16 ))
-        return EXIT_FAILURE;
-#else
-    uint8_t buf[BFR_LENGTH]={0};
-#endif
-
-    if(b_pos)
-    {
-        memcpy(buf, cbuf, AES_BLOCK_SIZE);
-        if(aes_ecb_encrypt(buf, buf, AES_BLOCK_SIZE, ctx) != EXIT_SUCCESS)
-            return EXIT_FAILURE;
-
-        while(b_pos < AES_BLOCK_SIZE && len)
-        {
-            *obuf++ = *ibuf++ ^ buf[b_pos++];
-            --len;
-        }
-
-        if(len)
-            ctr_inc(cbuf), b_pos = 0;
-    }
-
-    while(len)
-    {
-        blen = (len > BFR_LENGTH ? BFR_LENGTH : len), len -= blen;
-
-        for(i = 0, ip = buf; i < (blen >> 4); ++i)
-        {
-            memcpy(ip, cbuf, AES_BLOCK_SIZE);
-            ctr_inc(cbuf);
-            ip += AES_BLOCK_SIZE;
-        }
-
-        if(blen & (AES_BLOCK_SIZE - 1))
-            memcpy(ip, cbuf, AES_BLOCK_SIZE), i++;
-
-#if defined( USE_VIA_ACE_IF_PRESENT )
-        if(ctx->inf.b[1] == 0xff)
-        {
-            via_cwd(cwd, hybrid, enc, 2 * ctx->inf.b[0] - 192);
-            via_ecb_op5((ctx->ks), cwd, buf, buf, i);
-        }
-        else
-#endif
-        if(aes_ecb_encrypt(buf, buf, i * AES_BLOCK_SIZE, ctx) != EXIT_SUCCESS)
-            return EXIT_FAILURE;
-
-        i = 0; ip = buf;
-# ifdef FAST_BUFFER_OPERATIONS
-        if(!ALIGN_OFFSET( ibuf, 4 ) && !ALIGN_OFFSET( obuf, 4 ) && !ALIGN_OFFSET( ip, 4 ))
-            while(i + AES_BLOCK_SIZE <= blen)
-            {
-                lp32(obuf)[0] = lp32(ibuf)[0] ^ lp32(ip)[0];
-                lp32(obuf)[1] = lp32(ibuf)[1] ^ lp32(ip)[1];
-                lp32(obuf)[2] = lp32(ibuf)[2] ^ lp32(ip)[2];
-                lp32(obuf)[3] = lp32(ibuf)[3] ^ lp32(ip)[3];
-                i += AES_BLOCK_SIZE;
-                ip += AES_BLOCK_SIZE;
-                ibuf += AES_BLOCK_SIZE;
-                obuf += AES_BLOCK_SIZE;
-            }
-        else
-#endif
-            while(i + AES_BLOCK_SIZE <= blen)
-            {
-                obuf[ 0] = ibuf[ 0] ^ ip[ 0]; obuf[ 1] = ibuf[ 1] ^ ip[ 1];
-                obuf[ 2] = ibuf[ 2] ^ ip[ 2]; obuf[ 3] = ibuf[ 3] ^ ip[ 3];
-                obuf[ 4] = ibuf[ 4] ^ ip[ 4]; obuf[ 5] = ibuf[ 5] ^ ip[ 5];
-                obuf[ 6] = ibuf[ 6] ^ ip[ 6]; obuf[ 7] = ibuf[ 7] ^ ip[ 7];
-                obuf[ 8] = ibuf[ 8] ^ ip[ 8]; obuf[ 9] = ibuf[ 9] ^ ip[ 9];
-                obuf[10] = ibuf[10] ^ ip[10]; obuf[11] = ibuf[11] ^ ip[11];
-                obuf[12] = ibuf[12] ^ ip[12]; obuf[13] = ibuf[13] ^ ip[13];
-                obuf[14] = ibuf[14] ^ ip[14]; obuf[15] = ibuf[15] ^ ip[15];
-                i += AES_BLOCK_SIZE;
-                ip += AES_BLOCK_SIZE;
-                ibuf += AES_BLOCK_SIZE;
-                obuf += AES_BLOCK_SIZE;
-            }
-
-        while(i++ < blen)
-            *obuf++ = *ibuf++ ^ ip[b_pos++];
-    }
-
-    ctx->inf.b[2] = (uint8_t)b_pos;
-    return EXIT_SUCCESS;
-}
-
-#if defined(__cplusplus)
-}
-#endif
-#endif
+/*
+---------------------------------------------------------------------------
+Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
+
+The redistribution and use of this software (with or without changes)
+is allowed without the payment of fees or royalties provided that:
+
+  source code distributions include the above copyright notice, this
+  list of conditions and the following disclaimer;
+
+  binary distributions include the above copyright notice, this list
+  of conditions and the following disclaimer in their documentation.
+
+This software is provided 'as is' with no explicit or implied warranties
+in respect of its operation, including, but not limited to, correctness
+and fitness for purpose.
+---------------------------------------------------------------------------
+Issue Date: 20/12/2007
+
+ These subroutines implement multiple block AES modes for ECB, CBC, CFB,
+ OFB and CTR encryption,  The code provides support for the VIA Advanced
+ Cryptography Engine (ACE).
+
+ NOTE: In the following subroutines, the AES contexts (ctx) must be
+ 16 byte aligned if VIA ACE is being used
+*/
+
+#include <string.h>
+#include <assert.h>
+
+#include "aesopt.h"
+
+#if defined( AES_MODES )
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+#if defined( _MSC_VER ) && ( _MSC_VER > 800 )
+#pragma intrinsic(memcpy)
+#endif
+
+#define BFR_BLOCKS      8
+
+/* These values are used to detect long word alignment in order to */
+/* speed up some buffer operations. This facility may not work on  */
+/* some machines so this define can be commented out if necessary  */
+
+#define FAST_BUFFER_OPERATIONS
+
+#define lp32(x)         ((uint32_t*)(x))
+
+#define ALIGN_OFFSET(x,n)	(((intptr_t)(x)) & ((n) - 1))
+#define ALIGN_FLOOR(x,n)	((uint8_t*)(x) - ( ((intptr_t)(x)) & ((n) - 1)))
+#define ALIGN_CEIL(x,n)		((uint8_t*)(x) + (-((intptr_t)(x)) & ((n) - 1)))
+
+#if defined( USE_VIA_ACE_IF_PRESENT )
+
+#include "aes_via_ace.h"
+
+#pragma pack(16)
+
+aligned_array(unsigned long,    enc_gen_table, 12, 16) =    NEH_ENC_GEN_DATA;
+aligned_array(unsigned long,   enc_load_table, 12, 16) =   NEH_ENC_LOAD_DATA;
+aligned_array(unsigned long, enc_hybrid_table, 12, 16) = NEH_ENC_HYBRID_DATA;
+aligned_array(unsigned long,    dec_gen_table, 12, 16) =    NEH_DEC_GEN_DATA;
+aligned_array(unsigned long,   dec_load_table, 12, 16) =   NEH_DEC_LOAD_DATA;
+aligned_array(unsigned long, dec_hybrid_table, 12, 16) = NEH_DEC_HYBRID_DATA;
+
+/* NOTE: These control word macros must only be used after  */
+/* a key has been set up because they depend on key size    */
+/* See the VIA ACE documentation for key type information   */
+/* and aes_via_ace.h for non-default NEH_KEY_TYPE values    */
+
+#ifndef NEH_KEY_TYPE
+#  define NEH_KEY_TYPE NEH_HYBRID
+#endif
+
+#if NEH_KEY_TYPE == NEH_LOAD
+#define kd_adr(c)   ((uint8_t*)(c)->ks)
+#elif NEH_KEY_TYPE == NEH_GENERATE
+#define kd_adr(c)   ((uint8_t*)(c)->ks + (c)->inf.b[0])
+#elif NEH_KEY_TYPE == NEH_HYBRID
+#define kd_adr(c)   ((uint8_t*)(c)->ks + ((c)->inf.b[0] == 160 ? 160 : 0))
+#else
+#error no key type defined for VIA ACE 
+#endif
+
+#else
+
+#define aligned_array(type, name, no, stride) type name[no]
+#define aligned_auto(type, name, no, stride)  type name[no]
+
+#endif
+
+#if defined( _MSC_VER ) && _MSC_VER > 1200
+
+#define via_cwd(cwd, ty, dir, len) \
+    unsigned long* cwd = (dir##_##ty##_table + ((len - 128) >> 4))
+
+#else
+
+#define via_cwd(cwd, ty, dir, len)              \
+    aligned_auto(unsigned long, cwd, 4, 16);    \
+    cwd[1] = cwd[2] = cwd[3] = 0;               \
+    cwd[0] = neh_##dir##_##ty##_key(len)
+
+#endif
+
+/* test the code for detecting and setting pointer alignment */
+
+int aes_test_alignment_detection(unsigned int n)	/* 4 <= n <= 16 */
+{	uint8_t	p[16];
+    uint32_t i, count_eq = 0, count_neq = 0;
+
+    if(n < 4 || n > 16)
+        return EXIT_FAILURE;
+
+    for(i = 0; i < n; ++i)
+    {
+        uint8_t *qf = ALIGN_FLOOR(p + i, n),
+                *qh =  ALIGN_CEIL(p + i, n);
+        
+        if(qh == qf)
+            ++count_eq;
+        else if(qh == qf + n)
+            ++count_neq;
+        else
+            return EXIT_FAILURE;
+    }
+    return (count_eq != 1 || count_neq != n - 1 ? EXIT_FAILURE : EXIT_SUCCESS);
+}
+
+int aes_mode_reset(aes_encrypt_ctx ctx[1])
+{
+    ctx->inf.b[2] = 0;
+    return EXIT_SUCCESS;
+}
+
+int aes_ecb_encrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, const aes_encrypt_ctx ctx[1])
+{   int nb = len >> 4;
+
+    if(len & (AES_BLOCK_SIZE - 1))
+        return EXIT_FAILURE;
+
+#if defined( USE_VIA_ACE_IF_PRESENT )
+
+    if(ctx->inf.b[1] == 0xff)
+    {   uint8_t *ksp = (uint8_t*)(ctx->ks);
+        via_cwd(cwd, hybrid, enc, 2 * ctx->inf.b[0] - 192);
+
+        if(ALIGN_OFFSET( ctx, 16 ))
+            return EXIT_FAILURE;
+
+        if(!ALIGN_OFFSET( ibuf, 16 ) && !ALIGN_OFFSET( obuf, 16 ))
+        {
+            via_ecb_op5(ksp, cwd, ibuf, obuf, nb);
+        }
+        else
+        {   aligned_auto(uint8_t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
+            uint8_t *ip, *op;
+
+            while(nb)
+            {
+                int m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb);
+
+                ip = (ALIGN_OFFSET( ibuf, 16 ) ? buf : ibuf);
+                op = (ALIGN_OFFSET( obuf, 16 ) ? buf : obuf);
+
+                if(ip != ibuf)
+                    memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
+
+                via_ecb_op5(ksp, cwd, ip, op, m);
+
+                if(op != obuf)
+                    memcpy(obuf, buf, m * AES_BLOCK_SIZE);
+
+                ibuf += m * AES_BLOCK_SIZE;
+                obuf += m * AES_BLOCK_SIZE;
+                nb -= m;
+            }
+        }
+
+        return EXIT_SUCCESS;
+    }
+
+#endif
+
+#if !defined( ASSUME_VIA_ACE_PRESENT )
+    while(nb--)
+    {
+        if(aes_encrypt(ibuf, obuf, ctx) != EXIT_SUCCESS)
+            return EXIT_FAILURE;
+        ibuf += AES_BLOCK_SIZE;
+        obuf += AES_BLOCK_SIZE;
+    }
+#endif
+    return EXIT_SUCCESS;
+}
+
+int aes_ecb_decrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, const aes_decrypt_ctx ctx[1])
+{   int nb = len >> 4;
+
+    if(len & (AES_BLOCK_SIZE - 1))
+        return EXIT_FAILURE;
+
+#if defined( USE_VIA_ACE_IF_PRESENT )
+
+    if(ctx->inf.b[1] == 0xff)
+    {   uint8_t *ksp = kd_adr(ctx);
+        via_cwd(cwd, hybrid, dec, 2 * ctx->inf.b[0] - 192);
+
+        if(ALIGN_OFFSET( ctx, 16 ))
+            return EXIT_FAILURE;
+
+        if(!ALIGN_OFFSET( ibuf, 16 ) && !ALIGN_OFFSET( obuf, 16 ))
+        {
+            via_ecb_op5(ksp, cwd, ibuf, obuf, nb);
+        }
+        else
+        {   aligned_auto(uint8_t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
+            uint8_t *ip, *op;
+
+            while(nb)
+            {
+                int m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb);
+
+                ip = (ALIGN_OFFSET( ibuf, 16 ) ? buf : ibuf);
+                op = (ALIGN_OFFSET( obuf, 16 ) ? buf : obuf);
+
+                if(ip != ibuf)
+                    memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
+
+                via_ecb_op5(ksp, cwd, ip, op, m);
+
+                if(op != obuf)
+                    memcpy(obuf, buf, m * AES_BLOCK_SIZE);
+
+                ibuf += m * AES_BLOCK_SIZE;
+                obuf += m * AES_BLOCK_SIZE;
+                nb -= m;
+            }
+        }
+
+        return EXIT_SUCCESS;
+    }
+
+#endif
+
+#if !defined( ASSUME_VIA_ACE_PRESENT )
+    while(nb--)
+    {
+        if(aes_decrypt(ibuf, obuf, ctx) != EXIT_SUCCESS)
+            return EXIT_FAILURE;
+        ibuf += AES_BLOCK_SIZE;
+        obuf += AES_BLOCK_SIZE;
+    }
+#endif
+    return EXIT_SUCCESS;
+}
+
+int aes_cbc_encrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, const aes_encrypt_ctx ctx[1])
+{   int nb = len >> 4;
+
+    if(len & (AES_BLOCK_SIZE - 1))
+        return EXIT_FAILURE;
+
+#if defined( USE_VIA_ACE_IF_PRESENT )
+
+    if(ctx->inf.b[1] == 0xff)
+    {   uint8_t *ksp = (uint8_t*)(ctx->ks), *ivp = iv;
+        aligned_auto(uint8_t, liv, AES_BLOCK_SIZE, 16);
+        via_cwd(cwd, hybrid, enc, 2 * ctx->inf.b[0] - 192);
+
+        if(ALIGN_OFFSET( ctx, 16 ))
+            return EXIT_FAILURE;
+
+        if(ALIGN_OFFSET( iv, 16 ))   /* ensure an aligned iv */
+        {
+            ivp = liv;
+            memcpy(liv, iv, AES_BLOCK_SIZE);
+        }
+
+        if(!ALIGN_OFFSET( ibuf, 16 ) && !ALIGN_OFFSET( obuf, 16 ) && !ALIGN_OFFSET( iv, 16 ))
+        {
+            via_cbc_op7(ksp, cwd, ibuf, obuf, nb, ivp, ivp);
+        }
+        else
+        {   aligned_auto(uint8_t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
+            uint8_t *ip, *op;
+
+            while(nb)
+            {
+                int m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb);
+
+                ip = (ALIGN_OFFSET( ibuf, 16 ) ? buf : ibuf);
+                op = (ALIGN_OFFSET( obuf, 16 ) ? buf : obuf);
+
+                if(ip != ibuf)
+                    memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
+
+                via_cbc_op7(ksp, cwd, ip, op, m, ivp, ivp);
+
+                if(op != obuf)
+                    memcpy(obuf, buf, m * AES_BLOCK_SIZE);
+
+                ibuf += m * AES_BLOCK_SIZE;
+                obuf += m * AES_BLOCK_SIZE;
+                nb -= m;
+            }
+        }
+
+        if(iv != ivp)
+            memcpy(iv, ivp, AES_BLOCK_SIZE);
+
+        return EXIT_SUCCESS;
+    }
+
+#endif
+
+#if !defined( ASSUME_VIA_ACE_PRESENT )
+# ifdef FAST_BUFFER_OPERATIONS
+    if(!ALIGN_OFFSET( ibuf, 4 ) && !ALIGN_OFFSET( iv, 4 ))
+        while(nb--)
+        {
+            lp32(iv)[0] ^= lp32(ibuf)[0];
+            lp32(iv)[1] ^= lp32(ibuf)[1];
+            lp32(iv)[2] ^= lp32(ibuf)[2];
+            lp32(iv)[3] ^= lp32(ibuf)[3];
+            if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
+                return EXIT_FAILURE;
+            memcpy(obuf, iv, AES_BLOCK_SIZE);
+            ibuf += AES_BLOCK_SIZE;
+            obuf += AES_BLOCK_SIZE;
+        }
+    else
+# endif
+        while(nb--)
+        {
+            iv[ 0] ^= ibuf[ 0]; iv[ 1] ^= ibuf[ 1];
+            iv[ 2] ^= ibuf[ 2]; iv[ 3] ^= ibuf[ 3];
+            iv[ 4] ^= ibuf[ 4]; iv[ 5] ^= ibuf[ 5];
+            iv[ 6] ^= ibuf[ 6]; iv[ 7] ^= ibuf[ 7];
+            iv[ 8] ^= ibuf[ 8]; iv[ 9] ^= ibuf[ 9];
+            iv[10] ^= ibuf[10]; iv[11] ^= ibuf[11];
+            iv[12] ^= ibuf[12]; iv[13] ^= ibuf[13];
+            iv[14] ^= ibuf[14]; iv[15] ^= ibuf[15];
+            if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
+                return EXIT_FAILURE;
+            memcpy(obuf, iv, AES_BLOCK_SIZE);
+            ibuf += AES_BLOCK_SIZE;
+            obuf += AES_BLOCK_SIZE;
+        }
+#endif
+    return EXIT_SUCCESS;
+}
+
+int aes_cbc_decrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, const aes_decrypt_ctx ctx[1])
+{   unsigned char tmp[AES_BLOCK_SIZE];
+    int nb = len >> 4;
+
+    if(len & (AES_BLOCK_SIZE - 1))
+        return EXIT_FAILURE;
+
+#if defined( USE_VIA_ACE_IF_PRESENT )
+
+    if(ctx->inf.b[1] == 0xff)
+    {   uint8_t *ksp = kd_adr(ctx), *ivp = iv;
+        aligned_auto(uint8_t, liv, AES_BLOCK_SIZE, 16);
+        via_cwd(cwd, hybrid, dec, 2 * ctx->inf.b[0] - 192);
+
+        if(ALIGN_OFFSET( ctx, 16 ))
+            return EXIT_FAILURE;
+
+        if(ALIGN_OFFSET( iv, 16 ))   /* ensure an aligned iv */
+        {
+            ivp = liv;
+            memcpy(liv, iv, AES_BLOCK_SIZE);
+        }
+
+        if(!ALIGN_OFFSET( ibuf, 16 ) && !ALIGN_OFFSET( obuf, 16 ) && !ALIGN_OFFSET( iv, 16 ))
+        {
+            via_cbc_op6(ksp, cwd, ibuf, obuf, nb, ivp);
+        }
+        else
+        {   aligned_auto(uint8_t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
+            uint8_t *ip, *op;
+
+            while(nb)
+            {
+                int m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb);
+
+                ip = (ALIGN_OFFSET( ibuf, 16 ) ? buf : ibuf);
+                op = (ALIGN_OFFSET( obuf, 16 ) ? buf : obuf);
+
+                if(ip != ibuf)
+                    memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
+
+                via_cbc_op6(ksp, cwd, ip, op, m, ivp);
+
+                if(op != obuf)
+                    memcpy(obuf, buf, m * AES_BLOCK_SIZE);
+
+                ibuf += m * AES_BLOCK_SIZE;
+                obuf += m * AES_BLOCK_SIZE;
+                nb -= m;
+            }
+        }
+
+        if(iv != ivp)
+            memcpy(iv, ivp, AES_BLOCK_SIZE);
+
+        return EXIT_SUCCESS;
+    }
+#endif
+
+#if !defined( ASSUME_VIA_ACE_PRESENT )
+# ifdef FAST_BUFFER_OPERATIONS
+    if(!ALIGN_OFFSET( obuf, 4 ) && !ALIGN_OFFSET( iv, 4 ))
+        while(nb--)
+        {
+            memcpy(tmp, ibuf, AES_BLOCK_SIZE);
+            if(aes_decrypt(ibuf, obuf, ctx) != EXIT_SUCCESS)
+                return EXIT_FAILURE;
+            lp32(obuf)[0] ^= lp32(iv)[0];
+            lp32(obuf)[1] ^= lp32(iv)[1];
+            lp32(obuf)[2] ^= lp32(iv)[2];
+            lp32(obuf)[3] ^= lp32(iv)[3];
+            memcpy(iv, tmp, AES_BLOCK_SIZE);
+            ibuf += AES_BLOCK_SIZE;
+            obuf += AES_BLOCK_SIZE;
+        }
+    else
+# endif
+        while(nb--)
+        {
+            memcpy(tmp, ibuf, AES_BLOCK_SIZE);
+            if(aes_decrypt(ibuf, obuf, ctx) != EXIT_SUCCESS)
+                return EXIT_FAILURE;
+            obuf[ 0] ^= iv[ 0]; obuf[ 1] ^= iv[ 1];
+            obuf[ 2] ^= iv[ 2]; obuf[ 3] ^= iv[ 3];
+            obuf[ 4] ^= iv[ 4]; obuf[ 5] ^= iv[ 5];
+            obuf[ 6] ^= iv[ 6]; obuf[ 7] ^= iv[ 7];
+            obuf[ 8] ^= iv[ 8]; obuf[ 9] ^= iv[ 9];
+            obuf[10] ^= iv[10]; obuf[11] ^= iv[11];
+            obuf[12] ^= iv[12]; obuf[13] ^= iv[13];
+            obuf[14] ^= iv[14]; obuf[15] ^= iv[15];
+            memcpy(iv, tmp, AES_BLOCK_SIZE);
+            ibuf += AES_BLOCK_SIZE;
+            obuf += AES_BLOCK_SIZE;
+        }
+#endif
+    return EXIT_SUCCESS;
+}
+
+int aes_cfb_encrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, aes_encrypt_ctx ctx[1])
+{   int cnt = 0, b_pos = (int)ctx->inf.b[2], nb;
+
+    if(b_pos)           /* complete any partial block   */
+    {
+        while(b_pos < AES_BLOCK_SIZE && cnt < len)
+        {
+            *obuf++ = (iv[b_pos++] ^= *ibuf++);
+            cnt++;
+        }
+
+        b_pos = (b_pos == AES_BLOCK_SIZE ? 0 : b_pos);
+    }
+
+    if((nb = (len - cnt) >> 4) != 0)    /* process whole blocks */
+    {
+#if defined( USE_VIA_ACE_IF_PRESENT )
+
+        if(ctx->inf.b[1] == 0xff)
+        {   int m;
+            uint8_t *ksp = (uint8_t*)(ctx->ks), *ivp = iv;
+            aligned_auto(uint8_t, liv, AES_BLOCK_SIZE, 16);
+            via_cwd(cwd, hybrid, enc, 2 * ctx->inf.b[0] - 192);
+
+            if(ALIGN_OFFSET( ctx, 16 ))
+                return EXIT_FAILURE;
+
+            if(ALIGN_OFFSET( iv, 16 ))   /* ensure an aligned iv */
+            {
+                ivp = liv;
+                memcpy(liv, iv, AES_BLOCK_SIZE);
+            }
+
+            if(!ALIGN_OFFSET( ibuf, 16 ) && !ALIGN_OFFSET( obuf, 16 ))
+            {
+                via_cfb_op7(ksp, cwd, ibuf, obuf, nb, ivp, ivp);
+                ibuf += nb * AES_BLOCK_SIZE;
+                obuf += nb * AES_BLOCK_SIZE;
+                cnt  += nb * AES_BLOCK_SIZE;
+            }
+            else    /* input, output or both are unaligned  */
+            {   aligned_auto(uint8_t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
+                uint8_t *ip, *op;
+
+                while(nb)
+                {
+                    m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb), nb -= m;
+
+                    ip = (ALIGN_OFFSET( ibuf, 16 ) ? buf : ibuf);
+                    op = (ALIGN_OFFSET( obuf, 16 ) ? buf : obuf);
+
+                    if(ip != ibuf)
+                        memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
+
+                    via_cfb_op7(ksp, cwd, ip, op, m, ivp, ivp);
+
+                    if(op != obuf)
+                        memcpy(obuf, buf, m * AES_BLOCK_SIZE);
+
+                    ibuf += m * AES_BLOCK_SIZE;
+                    obuf += m * AES_BLOCK_SIZE;
+                    cnt  += m * AES_BLOCK_SIZE;
+                }
+            }
+
+            if(ivp != iv)
+                memcpy(iv, ivp, AES_BLOCK_SIZE);
+        }
+#else
+# ifdef FAST_BUFFER_OPERATIONS
+        if(!ALIGN_OFFSET( ibuf, 4 ) && !ALIGN_OFFSET( obuf, 4 ) && !ALIGN_OFFSET( iv, 4 ))
+            while(cnt + AES_BLOCK_SIZE <= len)
+            {
+                assert(b_pos == 0);
+                if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
+                    return EXIT_FAILURE;
+                lp32(obuf)[0] = lp32(iv)[0] ^= lp32(ibuf)[0];
+                lp32(obuf)[1] = lp32(iv)[1] ^= lp32(ibuf)[1];
+                lp32(obuf)[2] = lp32(iv)[2] ^= lp32(ibuf)[2];
+                lp32(obuf)[3] = lp32(iv)[3] ^= lp32(ibuf)[3];
+                ibuf += AES_BLOCK_SIZE;
+                obuf += AES_BLOCK_SIZE;
+                cnt  += AES_BLOCK_SIZE;
+            }
+        else
+# endif
+            while(cnt + AES_BLOCK_SIZE <= len)
+            {
+                assert(b_pos == 0);
+                if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
+                    return EXIT_FAILURE;
+                obuf[ 0] = iv[ 0] ^= ibuf[ 0]; obuf[ 1] = iv[ 1] ^= ibuf[ 1];
+                obuf[ 2] = iv[ 2] ^= ibuf[ 2]; obuf[ 3] = iv[ 3] ^= ibuf[ 3];
+                obuf[ 4] = iv[ 4] ^= ibuf[ 4]; obuf[ 5] = iv[ 5] ^= ibuf[ 5];
+                obuf[ 6] = iv[ 6] ^= ibuf[ 6]; obuf[ 7] = iv[ 7] ^= ibuf[ 7];
+                obuf[ 8] = iv[ 8] ^= ibuf[ 8]; obuf[ 9] = iv[ 9] ^= ibuf[ 9];
+                obuf[10] = iv[10] ^= ibuf[10]; obuf[11] = iv[11] ^= ibuf[11];
+                obuf[12] = iv[12] ^= ibuf[12]; obuf[13] = iv[13] ^= ibuf[13];
+                obuf[14] = iv[14] ^= ibuf[14]; obuf[15] = iv[15] ^= ibuf[15];
+                ibuf += AES_BLOCK_SIZE;
+                obuf += AES_BLOCK_SIZE;
+                cnt  += AES_BLOCK_SIZE;
+            }
+#endif
+    }
+
+    while(cnt < len)
+    {
+        if(!b_pos && aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
+            return EXIT_FAILURE;
+
+        while(cnt < len && b_pos < AES_BLOCK_SIZE)
+        {
+            *obuf++ = (iv[b_pos++] ^= *ibuf++);
+            cnt++;
+        }
+
+        b_pos = (b_pos == AES_BLOCK_SIZE ? 0 : b_pos);
+    }
+
+    ctx->inf.b[2] = (uint8_t)b_pos;
+    return EXIT_SUCCESS;
+}
+
+int aes_cfb_decrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, aes_encrypt_ctx ctx[1])
+{   int cnt = 0, b_pos = (int)ctx->inf.b[2], nb;
+
+    if(b_pos)           /* complete any partial block   */
+    {   uint8_t t;
+
+        while(b_pos < AES_BLOCK_SIZE && cnt < len)
+        {
+            t = *ibuf++;
+            *obuf++ = t ^ iv[b_pos];
+            iv[b_pos++] = t;
+            cnt++;
+        }
+
+        b_pos = (b_pos == AES_BLOCK_SIZE ? 0 : b_pos);
+    }
+
+    if((nb = (len - cnt) >> 4) != 0)    /* process whole blocks */
+    {
+#if defined( USE_VIA_ACE_IF_PRESENT )
+
+        if(ctx->inf.b[1] == 0xff)
+        {   int m;
+            uint8_t *ksp = (uint8_t*)(ctx->ks), *ivp = iv;
+            aligned_auto(uint8_t, liv, AES_BLOCK_SIZE, 16);
+            via_cwd(cwd, hybrid, dec, 2 * ctx->inf.b[0] - 192);
+
+            if(ALIGN_OFFSET( ctx, 16 ))
+                return EXIT_FAILURE;
+
+            if(ALIGN_OFFSET( iv, 16 ))   /* ensure an aligned iv */
+            {
+                ivp = liv;
+                memcpy(liv, iv, AES_BLOCK_SIZE);
+            }
+
+            if(!ALIGN_OFFSET( ibuf, 16 ) && !ALIGN_OFFSET( obuf, 16 ))
+            {
+                via_cfb_op6(ksp, cwd, ibuf, obuf, nb, ivp);
+                ibuf += nb * AES_BLOCK_SIZE;
+                obuf += nb * AES_BLOCK_SIZE;
+                cnt  += nb * AES_BLOCK_SIZE;
+            }
+            else    /* input, output or both are unaligned  */
+            {   aligned_auto(uint8_t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
+                uint8_t *ip, *op;
+
+                while(nb)
+                {
+                    m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb), nb -= m;
+
+                    ip = (ALIGN_OFFSET( ibuf, 16 ) ? buf : ibuf);
+                    op = (ALIGN_OFFSET( obuf, 16 ) ? buf : obuf);
+
+                    if(ip != ibuf)  /* input buffer is not aligned */
+                        memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
+
+                    via_cfb_op6(ksp, cwd, ip, op, m, ivp);
+
+                    if(op != obuf)  /* output buffer is not aligned */
+                        memcpy(obuf, buf, m * AES_BLOCK_SIZE);
+
+                    ibuf += m * AES_BLOCK_SIZE;
+                    obuf += m * AES_BLOCK_SIZE;
+                    cnt  += m * AES_BLOCK_SIZE;
+                }
+            }
+
+            if(ivp != iv)
+                memcpy(iv, ivp, AES_BLOCK_SIZE);
+        }
+#else
+# ifdef FAST_BUFFER_OPERATIONS
+        if(!ALIGN_OFFSET( ibuf, 4 ) && !ALIGN_OFFSET( obuf, 4 ) &&!ALIGN_OFFSET( iv, 4 ))
+            while(cnt + AES_BLOCK_SIZE <= len)
+            {   uint32_t t;
+
+                assert(b_pos == 0);
+                if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
+                    return EXIT_FAILURE;
+                t = lp32(ibuf)[0], lp32(obuf)[0] = t ^ lp32(iv)[0], lp32(iv)[0] = t;
+                t = lp32(ibuf)[1], lp32(obuf)[1] = t ^ lp32(iv)[1], lp32(iv)[1] = t;
+                t = lp32(ibuf)[2], lp32(obuf)[2] = t ^ lp32(iv)[2], lp32(iv)[2] = t;
+                t = lp32(ibuf)[3], lp32(obuf)[3] = t ^ lp32(iv)[3], lp32(iv)[3] = t;
+                ibuf += AES_BLOCK_SIZE;
+                obuf += AES_BLOCK_SIZE;
+                cnt  += AES_BLOCK_SIZE;
+            }
+        else
+# endif
+            while(cnt + AES_BLOCK_SIZE <= len)
+            {   uint8_t t;
+
+                assert(b_pos == 0);
+                if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
+                    return EXIT_FAILURE;
+                t = ibuf[ 0], obuf[ 0] = t ^ iv[ 0], iv[ 0] = t;
+                t = ibuf[ 1], obuf[ 1] = t ^ iv[ 1], iv[ 1] = t;
+                t = ibuf[ 2], obuf[ 2] = t ^ iv[ 2], iv[ 2] = t;
+                t = ibuf[ 3], obuf[ 3] = t ^ iv[ 3], iv[ 3] = t;
+                t = ibuf[ 4], obuf[ 4] = t ^ iv[ 4], iv[ 4] = t;
+                t = ibuf[ 5], obuf[ 5] = t ^ iv[ 5], iv[ 5] = t;
+                t = ibuf[ 6], obuf[ 6] = t ^ iv[ 6], iv[ 6] = t;
+                t = ibuf[ 7], obuf[ 7] = t ^ iv[ 7], iv[ 7] = t;
+                t = ibuf[ 8], obuf[ 8] = t ^ iv[ 8], iv[ 8] = t;
+                t = ibuf[ 9], obuf[ 9] = t ^ iv[ 9], iv[ 9] = t;
+                t = ibuf[10], obuf[10] = t ^ iv[10], iv[10] = t;
+                t = ibuf[11], obuf[11] = t ^ iv[11], iv[11] = t;
+                t = ibuf[12], obuf[12] = t ^ iv[12], iv[12] = t;
+                t = ibuf[13], obuf[13] = t ^ iv[13], iv[13] = t;
+                t = ibuf[14], obuf[14] = t ^ iv[14], iv[14] = t;
+                t = ibuf[15], obuf[15] = t ^ iv[15], iv[15] = t;
+                ibuf += AES_BLOCK_SIZE;
+                obuf += AES_BLOCK_SIZE;
+                cnt  += AES_BLOCK_SIZE;
+            }
+#endif
+    }
+
+    while(cnt < len)
+    {   uint8_t t;
+
+        if(!b_pos && aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
+            return EXIT_FAILURE;
+
+        while(cnt < len && b_pos < AES_BLOCK_SIZE)
+        {
+            t = *ibuf++;
+            *obuf++ = t ^ iv[b_pos];
+            iv[b_pos++] = t;
+            cnt++;
+        }
+
+        b_pos = (b_pos == AES_BLOCK_SIZE ? 0 : b_pos);
+    }
+
+    ctx->inf.b[2] = (uint8_t)b_pos;
+    return EXIT_SUCCESS;
+}
+
+int aes_ofb_crypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, aes_encrypt_ctx ctx[1])
+{   int cnt = 0, b_pos = (int)ctx->inf.b[2], nb;
+
+    if(b_pos)           /* complete any partial block   */
+    {
+        while(b_pos < AES_BLOCK_SIZE && cnt < len)
+        {
+            *obuf++ = iv[b_pos++] ^ *ibuf++;
+            cnt++;
+        }
+
+        b_pos = (b_pos == AES_BLOCK_SIZE ? 0 : b_pos);
+    }
+
+    if((nb = (len - cnt) >> 4) != 0)   /* process whole blocks */
+    {
+#if defined( USE_VIA_ACE_IF_PRESENT )
+
+        if(ctx->inf.b[1] == 0xff)
+        {   int m;
+            uint8_t *ksp = (uint8_t*)(ctx->ks), *ivp = iv;
+            aligned_auto(uint8_t, liv, AES_BLOCK_SIZE, 16);
+            via_cwd(cwd, hybrid, enc, 2 * ctx->inf.b[0] - 192);
+
+            if(ALIGN_OFFSET( ctx, 16 ))
+                return EXIT_FAILURE;
+
+            if(ALIGN_OFFSET( iv, 16 ))   /* ensure an aligned iv */
+            {
+                ivp = liv;
+                memcpy(liv, iv, AES_BLOCK_SIZE);
+            }
+
+            if(!ALIGN_OFFSET( ibuf, 16 ) && !ALIGN_OFFSET( obuf, 16 ))
+            {
+                via_ofb_op6(ksp, cwd, ibuf, obuf, nb, ivp);
+                ibuf += nb * AES_BLOCK_SIZE;
+                obuf += nb * AES_BLOCK_SIZE;
+                cnt  += nb * AES_BLOCK_SIZE;
+            }
+            else    /* input, output or both are unaligned  */
+        {   aligned_auto(uint8_t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
+            uint8_t *ip, *op;
+
+                while(nb)
+                {
+                    m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb), nb -= m;
+
+                    ip = (ALIGN_OFFSET( ibuf, 16 ) ? buf : ibuf);
+                    op = (ALIGN_OFFSET( obuf, 16 ) ? buf : obuf);
+
+                    if(ip != ibuf)
+                        memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
+
+                    via_ofb_op6(ksp, cwd, ip, op, m, ivp);
+
+                    if(op != obuf)
+                        memcpy(obuf, buf, m * AES_BLOCK_SIZE);
+
+                    ibuf += m * AES_BLOCK_SIZE;
+                    obuf += m * AES_BLOCK_SIZE;
+                    cnt  += m * AES_BLOCK_SIZE;
+                }
+            }
+
+            if(ivp != iv)
+                memcpy(iv, ivp, AES_BLOCK_SIZE);
+        }
+#else
+# ifdef FAST_BUFFER_OPERATIONS
+        if(!ALIGN_OFFSET( ibuf, 4 ) && !ALIGN_OFFSET( obuf, 4 ) && !ALIGN_OFFSET( iv, 4 ))
+            while(cnt + AES_BLOCK_SIZE <= len)
+            {
+                assert(b_pos == 0);
+                if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
+                    return EXIT_FAILURE;
+                lp32(obuf)[0] = lp32(iv)[0] ^ lp32(ibuf)[0];
+                lp32(obuf)[1] = lp32(iv)[1] ^ lp32(ibuf)[1];
+                lp32(obuf)[2] = lp32(iv)[2] ^ lp32(ibuf)[2];
+                lp32(obuf)[3] = lp32(iv)[3] ^ lp32(ibuf)[3];
+                ibuf += AES_BLOCK_SIZE;
+                obuf += AES_BLOCK_SIZE;
+                cnt  += AES_BLOCK_SIZE;
+            }
+        else
+# endif
+            while(cnt + AES_BLOCK_SIZE <= len)
+            {
+                assert(b_pos == 0);
+                if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
+                    return EXIT_FAILURE;
+                obuf[ 0] = iv[ 0] ^ ibuf[ 0]; obuf[ 1] = iv[ 1] ^ ibuf[ 1];
+                obuf[ 2] = iv[ 2] ^ ibuf[ 2]; obuf[ 3] = iv[ 3] ^ ibuf[ 3];
+                obuf[ 4] = iv[ 4] ^ ibuf[ 4]; obuf[ 5] = iv[ 5] ^ ibuf[ 5];
+                obuf[ 6] = iv[ 6] ^ ibuf[ 6]; obuf[ 7] = iv[ 7] ^ ibuf[ 7];
+                obuf[ 8] = iv[ 8] ^ ibuf[ 8]; obuf[ 9] = iv[ 9] ^ ibuf[ 9];
+                obuf[10] = iv[10] ^ ibuf[10]; obuf[11] = iv[11] ^ ibuf[11];
+                obuf[12] = iv[12] ^ ibuf[12]; obuf[13] = iv[13] ^ ibuf[13];
+                obuf[14] = iv[14] ^ ibuf[14]; obuf[15] = iv[15] ^ ibuf[15];
+                ibuf += AES_BLOCK_SIZE;
+                obuf += AES_BLOCK_SIZE;
+                cnt  += AES_BLOCK_SIZE;
+            }
+#endif
+    }
+
+    while(cnt < len)
+    {
+        if(!b_pos && aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
+            return EXIT_FAILURE;
+
+        while(cnt < len && b_pos < AES_BLOCK_SIZE)
+        {
+            *obuf++ = iv[b_pos++] ^ *ibuf++;
+            cnt++;
+        }
+
+        b_pos = (b_pos == AES_BLOCK_SIZE ? 0 : b_pos);
+    }
+
+    ctx->inf.b[2] = (uint8_t)b_pos;
+    return EXIT_SUCCESS;
+}
+
+#define BFR_LENGTH  (BFR_BLOCKS * AES_BLOCK_SIZE)
+
+int aes_ctr_crypt(const unsigned char *ibuf, unsigned char *obuf,
+            int len, unsigned char *cbuf, cbuf_inc ctr_inc, aes_encrypt_ctx ctx[1])
+{   unsigned char   *ip;
+    int             i, blen, b_pos = (int)(ctx->inf.b[2]);
+
+#if defined( USE_VIA_ACE_IF_PRESENT )
+    aligned_auto(uint8_t, buf, BFR_LENGTH, 16);
+    if(ctx->inf.b[1] == 0xff && ALIGN_OFFSET( ctx, 16 ))
+        return EXIT_FAILURE;
+#else
+    uint8_t buf[BFR_LENGTH]={0};
+#endif
+
+    if(b_pos)
+    {
+        memcpy(buf, cbuf, AES_BLOCK_SIZE);
+        if(aes_ecb_encrypt(buf, buf, AES_BLOCK_SIZE, ctx) != EXIT_SUCCESS)
+            return EXIT_FAILURE;
+
+        while(b_pos < AES_BLOCK_SIZE && len)
+        {
+            *obuf++ = *ibuf++ ^ buf[b_pos++];
+            --len;
+        }
+
+        if(len)
+            ctr_inc(cbuf), b_pos = 0;
+    }
+
+    while(len)
+    {
+        blen = (len > BFR_LENGTH ? BFR_LENGTH : len), len -= blen;
+
+        for(i = 0, ip = buf; i < (blen >> 4); ++i)
+        {
+            memcpy(ip, cbuf, AES_BLOCK_SIZE);
+            ctr_inc(cbuf);
+            ip += AES_BLOCK_SIZE;
+        }
+
+        if(blen & (AES_BLOCK_SIZE - 1))
+            memcpy(ip, cbuf, AES_BLOCK_SIZE), i++;
+
+#if defined( USE_VIA_ACE_IF_PRESENT )
+        if(ctx->inf.b[1] == 0xff)
+        {
+            via_cwd(cwd, hybrid, enc, 2 * ctx->inf.b[0] - 192);
+            via_ecb_op5((ctx->ks), cwd, buf, buf, i);
+        }
+        else
+#endif
+        if(aes_ecb_encrypt(buf, buf, i * AES_BLOCK_SIZE, ctx) != EXIT_SUCCESS)
+            return EXIT_FAILURE;
+
+        i = 0; ip = buf;
+# ifdef FAST_BUFFER_OPERATIONS
+        if(!ALIGN_OFFSET( ibuf, 4 ) && !ALIGN_OFFSET( obuf, 4 ) && !ALIGN_OFFSET( ip, 4 ))
+            while(i + AES_BLOCK_SIZE <= blen)
+            {
+                lp32(obuf)[0] = lp32(ibuf)[0] ^ lp32(ip)[0];
+                lp32(obuf)[1] = lp32(ibuf)[1] ^ lp32(ip)[1];
+                lp32(obuf)[2] = lp32(ibuf)[2] ^ lp32(ip)[2];
+                lp32(obuf)[3] = lp32(ibuf)[3] ^ lp32(ip)[3];
+                i += AES_BLOCK_SIZE;
+                ip += AES_BLOCK_SIZE;
+                ibuf += AES_BLOCK_SIZE;
+                obuf += AES_BLOCK_SIZE;
+            }
+        else
+#endif
+            while(i + AES_BLOCK_SIZE <= blen)
+            {
+                obuf[ 0] = ibuf[ 0] ^ ip[ 0]; obuf[ 1] = ibuf[ 1] ^ ip[ 1];
+                obuf[ 2] = ibuf[ 2] ^ ip[ 2]; obuf[ 3] = ibuf[ 3] ^ ip[ 3];
+                obuf[ 4] = ibuf[ 4] ^ ip[ 4]; obuf[ 5] = ibuf[ 5] ^ ip[ 5];
+                obuf[ 6] = ibuf[ 6] ^ ip[ 6]; obuf[ 7] = ibuf[ 7] ^ ip[ 7];
+                obuf[ 8] = ibuf[ 8] ^ ip[ 8]; obuf[ 9] = ibuf[ 9] ^ ip[ 9];
+                obuf[10] = ibuf[10] ^ ip[10]; obuf[11] = ibuf[11] ^ ip[11];
+                obuf[12] = ibuf[12] ^ ip[12]; obuf[13] = ibuf[13] ^ ip[13];
+                obuf[14] = ibuf[14] ^ ip[14]; obuf[15] = ibuf[15] ^ ip[15];
+                i += AES_BLOCK_SIZE;
+                ip += AES_BLOCK_SIZE;
+                ibuf += AES_BLOCK_SIZE;
+                obuf += AES_BLOCK_SIZE;
+            }
+
+        while(i++ < blen)
+            *obuf++ = *ibuf++ ^ ip[b_pos++];
+    }
+
+    ctx->inf.b[2] = (uint8_t)b_pos;
+    return EXIT_SUCCESS;
+}
+
+#if defined(__cplusplus)
+}
+#endif
+#endif
diff --git a/Crypto/aescrypt.c b/Crypto/aescrypt.c
index 97682da9..e2c0aaf1 100755
--- a/Crypto/aescrypt.c
+++ b/Crypto/aescrypt.c
@@ -1,294 +1,294 @@
-/*
----------------------------------------------------------------------------
-Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
-
-The redistribution and use of this software (with or without changes)
-is allowed without the payment of fees or royalties provided that:
-
-  source code distributions include the above copyright notice, this
-  list of conditions and the following disclaimer;
-
-  binary distributions include the above copyright notice, this list
-  of conditions and the following disclaimer in their documentation.
-
-This software is provided 'as is' with no explicit or implied warranties
-in respect of its operation, including, but not limited to, correctness
-and fitness for purpose.
----------------------------------------------------------------------------
-Issue Date: 20/12/2007
-*/
-
-#include "aesopt.h"
-#include "aestab.h"
-
-#if defined(__cplusplus)
-extern "C"
-{
-#endif
-
-#define si(y,x,k,c) (s(y,c) = word_in(x, c) ^ (k)[c])
-#define so(y,x,c)   word_out(y, c, s(x,c))
-
-#if defined(ARRAYS)
-#define locals(y,x)     x[4],y[4]
-#else
-#define locals(y,x)     x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
-#endif
-
-#define l_copy(y, x)    s(y,0) = s(x,0); s(y,1) = s(x,1); \
-                        s(y,2) = s(x,2); s(y,3) = s(x,3);
-#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3)
-#define state_out(y,x)  so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
-#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
-
-#if ( FUNCS_IN_C & ENCRYPTION_IN_C )
-
-/* Visual C++ .Net v7.1 provides the fastest encryption code when using
-   Pentium optimiation with small code but this is poor for decryption
-   so we need to control this with the following VC++ pragmas
-*/
-
-#if defined( _MSC_VER ) && !defined( _WIN64 )
-#pragma optimize( "s", on )
-#endif
-
-/* Given the column (c) of the output state variable, the following
-   macros give the input state variables which are needed in its
-   computation for each row (r) of the state. All the alternative
-   macros give the same end values but expand into different ways
-   of calculating these values.  In particular the complex macro
-   used for dynamically variable block sizes is designed to expand
-   to a compile time constant whenever possible but will expand to
-   conditional clauses on some branches (I am grateful to Frank
-   Yellin for this construction)
-*/
-
-#define fwd_var(x,r,c)\
- ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
- : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
- : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
- :          ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
-
-#if defined(FT4_SET)
-#undef  dec_fmvars
-#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c))
-#elif defined(FT1_SET)
-#undef  dec_fmvars
-#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(f,n),fwd_var,rf1,c))
-#else
-#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_use(s,box),fwd_var,rf1,c)))
-#endif
-
-#if defined(FL4_SET)
-#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,l),fwd_var,rf1,c))
-#elif defined(FL1_SET)
-#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(f,l),fwd_var,rf1,c))
-#else
-#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ no_table(x,t_use(s,box),fwd_var,rf1,c))
-#endif
-
-int aes_encrypt(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1])
-{   uint32_t         locals(b0, b1);
-    const uint32_t   *kp;
-#if defined( dec_fmvars )
-    dec_fmvars; /* declare variables for fwd_mcol() if needed */
-#endif
-
-    if( cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16 )
-        return EXIT_FAILURE;
-
-    kp = cx->ks;
-    state_in(b0, in, kp);
-
-#if (ENC_UNROLL == FULL)
-
-    switch(cx->inf.b[0])
-    {
-    case 14 * 16:
-        round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
-        round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
-        kp += 2 * N_COLS;
-    case 12 * 16:
-        round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
-        round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
-        kp += 2 * N_COLS;
-    case 10 * 16:
-        round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
-        round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
-        round(fwd_rnd,  b1, b0, kp + 3 * N_COLS);
-        round(fwd_rnd,  b0, b1, kp + 4 * N_COLS);
-        round(fwd_rnd,  b1, b0, kp + 5 * N_COLS);
-        round(fwd_rnd,  b0, b1, kp + 6 * N_COLS);
-        round(fwd_rnd,  b1, b0, kp + 7 * N_COLS);
-        round(fwd_rnd,  b0, b1, kp + 8 * N_COLS);
-        round(fwd_rnd,  b1, b0, kp + 9 * N_COLS);
-        round(fwd_lrnd, b0, b1, kp +10 * N_COLS);
-    }
-
-#else
-
-#if (ENC_UNROLL == PARTIAL)
-    {   uint32_t    rnd;
-        for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1; ++rnd)
-        {
-            kp += N_COLS;
-            round(fwd_rnd, b1, b0, kp);
-            kp += N_COLS;
-            round(fwd_rnd, b0, b1, kp);
-        }
-        kp += N_COLS;
-        round(fwd_rnd,  b1, b0, kp);
-#else
-    {   uint32_t    rnd;
-        for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1; ++rnd)
-        {
-            kp += N_COLS;
-            round(fwd_rnd, b1, b0, kp);
-            l_copy(b0, b1);
-        }
-#endif
-        kp += N_COLS;
-        round(fwd_lrnd, b0, b1, kp);
-    }
-#endif
-
-    state_out(out, b0);
-    return EXIT_SUCCESS;
-}
-
-#endif
-
-#if ( FUNCS_IN_C & DECRYPTION_IN_C)
-
-/* Visual C++ .Net v7.1 provides the fastest encryption code when using
-   Pentium optimiation with small code but this is poor for decryption
-   so we need to control this with the following VC++ pragmas
-*/
-
-#if defined( _MSC_VER ) && !defined( _WIN64 )
-#pragma optimize( "t", on )
-#endif
-
-/* Given the column (c) of the output state variable, the following
-   macros give the input state variables which are needed in its
-   computation for each row (r) of the state. All the alternative
-   macros give the same end values but expand into different ways
-   of calculating these values.  In particular the complex macro
-   used for dynamically variable block sizes is designed to expand
-   to a compile time constant whenever possible but will expand to
-   conditional clauses on some branches (I am grateful to Frank
-   Yellin for this construction)
-*/
-
-#define inv_var(x,r,c)\
- ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
- : r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\
- : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
- :          ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0)))
-
-#if defined(IT4_SET)
-#undef  dec_imvars
-#define inv_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,n),inv_var,rf1,c))
-#elif defined(IT1_SET)
-#undef  dec_imvars
-#define inv_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(i,n),inv_var,rf1,c))
-#else
-#define inv_rnd(y,x,k,c)    (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c)))
-#endif
-
-#if defined(IL4_SET)
-#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,l),inv_var,rf1,c))
-#elif defined(IL1_SET)
-#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(i,l),inv_var,rf1,c))
-#else
-#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c))
-#endif
-
-/* This code can work with the decryption key schedule in the   */
-/* order that is used for encrytpion (where the 1st decryption  */
-/* round key is at the high end ot the schedule) or with a key  */
-/* schedule that has been reversed to put the 1st decryption    */
-/* round key at the low end of the schedule in memory (when     */
-/* AES_REV_DKS is defined)                                      */
-
-#ifdef AES_REV_DKS
-#define key_ofs     0
-#define rnd_key(n)  (kp + n * N_COLS)
-#else
-#define key_ofs     1
-#define rnd_key(n)  (kp - n * N_COLS)
-#endif
-
-int aes_decrypt(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1])
-{   uint32_t        locals(b0, b1);
-#if defined( dec_imvars )
-    dec_imvars; /* declare variables for inv_mcol() if needed */
-#endif
-    const uint32_t *kp;
-
-    if( cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16 )
-        return EXIT_FAILURE;
-
-    kp = cx->ks + (key_ofs ? (cx->inf.b[0] >> 2) : 0);
-    state_in(b0, in, kp);
-
-#if (DEC_UNROLL == FULL)
-
-    kp = cx->ks + (key_ofs ? 0 : (cx->inf.b[0] >> 2));
-    switch(cx->inf.b[0])
-    {
-    case 14 * 16:
-        round(inv_rnd,  b1, b0, rnd_key(-13));
-        round(inv_rnd,  b0, b1, rnd_key(-12));
-    case 12 * 16:
-        round(inv_rnd,  b1, b0, rnd_key(-11));
-        round(inv_rnd,  b0, b1, rnd_key(-10));
-    case 10 * 16:
-        round(inv_rnd,  b1, b0, rnd_key(-9));
-        round(inv_rnd,  b0, b1, rnd_key(-8));
-        round(inv_rnd,  b1, b0, rnd_key(-7));
-        round(inv_rnd,  b0, b1, rnd_key(-6));
-        round(inv_rnd,  b1, b0, rnd_key(-5));
-        round(inv_rnd,  b0, b1, rnd_key(-4));
-        round(inv_rnd,  b1, b0, rnd_key(-3));
-        round(inv_rnd,  b0, b1, rnd_key(-2));
-        round(inv_rnd,  b1, b0, rnd_key(-1));
-        round(inv_lrnd, b0, b1, rnd_key( 0));
-    }
-
-#else
-
-#if (DEC_UNROLL == PARTIAL)
-    {   uint32_t    rnd;
-        for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1; ++rnd)
-        {
-            kp = rnd_key(1);
-            round(inv_rnd, b1, b0, kp);
-            kp = rnd_key(1);
-            round(inv_rnd, b0, b1, kp);
-        }
-        kp = rnd_key(1);
-        round(inv_rnd, b1, b0, kp);
-#else
-    {   uint32_t    rnd;
-        for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1; ++rnd)
-        {
-            kp = rnd_key(1);
-            round(inv_rnd, b1, b0, kp);
-            l_copy(b0, b1);
-        }
-#endif
-        kp = rnd_key(1);
-        round(inv_lrnd, b0, b1, kp);
-        }
-#endif
-
-    state_out(out, b0);
-    return EXIT_SUCCESS;
-}
-
-#endif
-
-#if defined(__cplusplus)
-}
-#endif
+/*
+---------------------------------------------------------------------------
+Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
+
+The redistribution and use of this software (with or without changes)
+is allowed without the payment of fees or royalties provided that:
+
+  source code distributions include the above copyright notice, this
+  list of conditions and the following disclaimer;
+
+  binary distributions include the above copyright notice, this list
+  of conditions and the following disclaimer in their documentation.
+
+This software is provided 'as is' with no explicit or implied warranties
+in respect of its operation, including, but not limited to, correctness
+and fitness for purpose.
+---------------------------------------------------------------------------
+Issue Date: 20/12/2007
+*/
+
+#include "aesopt.h"
+#include "aestab.h"
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+#define si(y,x,k,c) (s(y,c) = word_in(x, c) ^ (k)[c])
+#define so(y,x,c)   word_out(y, c, s(x,c))
+
+#if defined(ARRAYS)
+#define locals(y,x)     x[4],y[4]
+#else
+#define locals(y,x)     x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
+#endif
+
+#define l_copy(y, x)    s(y,0) = s(x,0); s(y,1) = s(x,1); \
+                        s(y,2) = s(x,2); s(y,3) = s(x,3);
+#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3)
+#define state_out(y,x)  so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
+#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
+
+#if ( FUNCS_IN_C & ENCRYPTION_IN_C )
+
+/* Visual C++ .Net v7.1 provides the fastest encryption code when using
+   Pentium optimiation with small code but this is poor for decryption
+   so we need to control this with the following VC++ pragmas
+*/
+
+#if defined( _MSC_VER ) && !defined( _WIN64 )
+#pragma optimize( "s", on )
+#endif
+
+/* Given the column (c) of the output state variable, the following
+   macros give the input state variables which are needed in its
+   computation for each row (r) of the state. All the alternative
+   macros give the same end values but expand into different ways
+   of calculating these values.  In particular the complex macro
+   used for dynamically variable block sizes is designed to expand
+   to a compile time constant whenever possible but will expand to
+   conditional clauses on some branches (I am grateful to Frank
+   Yellin for this construction)
+*/
+
+#define fwd_var(x,r,c)\
+ ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
+ : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
+ : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
+ :          ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
+
+#if defined(FT4_SET)
+#undef  dec_fmvars
+#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c))
+#elif defined(FT1_SET)
+#undef  dec_fmvars
+#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(f,n),fwd_var,rf1,c))
+#else
+#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_use(s,box),fwd_var,rf1,c)))
+#endif
+
+#if defined(FL4_SET)
+#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,l),fwd_var,rf1,c))
+#elif defined(FL1_SET)
+#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(f,l),fwd_var,rf1,c))
+#else
+#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ no_table(x,t_use(s,box),fwd_var,rf1,c))
+#endif
+
+int aes_encrypt(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1])
+{   uint32_t         locals(b0, b1);
+    const uint32_t   *kp;
+#if defined( dec_fmvars )
+    dec_fmvars; /* declare variables for fwd_mcol() if needed */
+#endif
+
+    if( cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16 )
+        return EXIT_FAILURE;
+
+    kp = cx->ks;
+    state_in(b0, in, kp);
+
+#if (ENC_UNROLL == FULL)
+
+    switch(cx->inf.b[0])
+    {
+    case 14 * 16:
+        round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
+        round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
+        kp += 2 * N_COLS;
+    case 12 * 16:
+        round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
+        round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
+        kp += 2 * N_COLS;
+    case 10 * 16:
+        round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
+        round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
+        round(fwd_rnd,  b1, b0, kp + 3 * N_COLS);
+        round(fwd_rnd,  b0, b1, kp + 4 * N_COLS);
+        round(fwd_rnd,  b1, b0, kp + 5 * N_COLS);
+        round(fwd_rnd,  b0, b1, kp + 6 * N_COLS);
+        round(fwd_rnd,  b1, b0, kp + 7 * N_COLS);
+        round(fwd_rnd,  b0, b1, kp + 8 * N_COLS);
+        round(fwd_rnd,  b1, b0, kp + 9 * N_COLS);
+        round(fwd_lrnd, b0, b1, kp +10 * N_COLS);
+    }
+
+#else
+
+#if (ENC_UNROLL == PARTIAL)
+    {   uint32_t    rnd;
+        for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1; ++rnd)
+        {
+            kp += N_COLS;
+            round(fwd_rnd, b1, b0, kp);
+            kp += N_COLS;
+            round(fwd_rnd, b0, b1, kp);
+        }
+        kp += N_COLS;
+        round(fwd_rnd,  b1, b0, kp);
+#else
+    {   uint32_t    rnd;
+        for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1; ++rnd)
+        {
+            kp += N_COLS;
+            round(fwd_rnd, b1, b0, kp);
+            l_copy(b0, b1);
+        }
+#endif
+        kp += N_COLS;
+        round(fwd_lrnd, b0, b1, kp);
+    }
+#endif
+
+    state_out(out, b0);
+    return EXIT_SUCCESS;
+}
+
+#endif
+
+#if ( FUNCS_IN_C & DECRYPTION_IN_C)
+
+/* Visual C++ .Net v7.1 provides the fastest encryption code when using
+   Pentium optimiation with small code but this is poor for decryption
+   so we need to control this with the following VC++ pragmas
+*/
+
+#if defined( _MSC_VER ) && !defined( _WIN64 )
+#pragma optimize( "t", on )
+#endif
+
+/* Given the column (c) of the output state variable, the following
+   macros give the input state variables which are needed in its
+   computation for each row (r) of the state. All the alternative
+   macros give the same end values but expand into different ways
+   of calculating these values.  In particular the complex macro
+   used for dynamically variable block sizes is designed to expand
+   to a compile time constant whenever possible but will expand to
+   conditional clauses on some branches (I am grateful to Frank
+   Yellin for this construction)
+*/
+
+#define inv_var(x,r,c)\
+ ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
+ : r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\
+ : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
+ :          ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0)))
+
+#if defined(IT4_SET)
+#undef  dec_imvars
+#define inv_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,n),inv_var,rf1,c))
+#elif defined(IT1_SET)
+#undef  dec_imvars
+#define inv_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(i,n),inv_var,rf1,c))
+#else
+#define inv_rnd(y,x,k,c)    (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c)))
+#endif
+
+#if defined(IL4_SET)
+#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,l),inv_var,rf1,c))
+#elif defined(IL1_SET)
+#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(i,l),inv_var,rf1,c))
+#else
+#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c))
+#endif
+
+/* This code can work with the decryption key schedule in the   */
+/* order that is used for encrytpion (where the 1st decryption  */
+/* round key is at the high end ot the schedule) or with a key  */
+/* schedule that has been reversed to put the 1st decryption    */
+/* round key at the low end of the schedule in memory (when     */
+/* AES_REV_DKS is defined)                                      */
+
+#ifdef AES_REV_DKS
+#define key_ofs     0
+#define rnd_key(n)  (kp + n * N_COLS)
+#else
+#define key_ofs     1
+#define rnd_key(n)  (kp - n * N_COLS)
+#endif
+
+int aes_decrypt(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1])
+{   uint32_t        locals(b0, b1);
+#if defined( dec_imvars )
+    dec_imvars; /* declare variables for inv_mcol() if needed */
+#endif
+    const uint32_t *kp;
+
+    if( cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16 )
+        return EXIT_FAILURE;
+
+    kp = cx->ks + (key_ofs ? (cx->inf.b[0] >> 2) : 0);
+    state_in(b0, in, kp);
+
+#if (DEC_UNROLL == FULL)
+
+    kp = cx->ks + (key_ofs ? 0 : (cx->inf.b[0] >> 2));
+    switch(cx->inf.b[0])
+    {
+    case 14 * 16:
+        round(inv_rnd,  b1, b0, rnd_key(-13));
+        round(inv_rnd,  b0, b1, rnd_key(-12));
+    case 12 * 16:
+        round(inv_rnd,  b1, b0, rnd_key(-11));
+        round(inv_rnd,  b0, b1, rnd_key(-10));
+    case 10 * 16:
+        round(inv_rnd,  b1, b0, rnd_key(-9));
+        round(inv_rnd,  b0, b1, rnd_key(-8));
+        round(inv_rnd,  b1, b0, rnd_key(-7));
+        round(inv_rnd,  b0, b1, rnd_key(-6));
+        round(inv_rnd,  b1, b0, rnd_key(-5));
+        round(inv_rnd,  b0, b1, rnd_key(-4));
+        round(inv_rnd,  b1, b0, rnd_key(-3));
+        round(inv_rnd,  b0, b1, rnd_key(-2));
+        round(inv_rnd,  b1, b0, rnd_key(-1));
+        round(inv_lrnd, b0, b1, rnd_key( 0));
+    }
+
+#else
+
+#if (DEC_UNROLL == PARTIAL)
+    {   uint32_t    rnd;
+        for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1; ++rnd)
+        {
+            kp = rnd_key(1);
+            round(inv_rnd, b1, b0, kp);
+            kp = rnd_key(1);
+            round(inv_rnd, b0, b1, kp);
+        }
+        kp = rnd_key(1);
+        round(inv_rnd, b1, b0, kp);
+#else
+    {   uint32_t    rnd;
+        for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1; ++rnd)
+        {
+            kp = rnd_key(1);
+            round(inv_rnd, b1, b0, kp);
+            l_copy(b0, b1);
+        }
+#endif
+        kp = rnd_key(1);
+        round(inv_lrnd, b0, b1, kp);
+        }
+#endif
+
+    state_out(out, b0);
+    return EXIT_SUCCESS;
+}
+
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
diff --git a/Crypto/aeskey.c b/Crypto/aeskey.c
index 3bef87c4..f997582f 100755
--- a/Crypto/aeskey.c
+++ b/Crypto/aeskey.c
@@ -1,548 +1,548 @@
-/*
----------------------------------------------------------------------------
-Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
-
-The redistribution and use of this software (with or without changes)
-is allowed without the payment of fees or royalties provided that:
-
-  source code distributions include the above copyright notice, this
-  list of conditions and the following disclaimer;
-
-  binary distributions include the above copyright notice, this list
-  of conditions and the following disclaimer in their documentation.
-
-This software is provided 'as is' with no explicit or implied warranties
-in respect of its operation, including, but not limited to, correctness
-and fitness for purpose.
----------------------------------------------------------------------------
-Issue Date: 20/12/2007
-*/
-
-#include "aesopt.h"
-#include "aestab.h"
-
-#ifdef USE_VIA_ACE_IF_PRESENT
-#  include "aes_via_ace.h"
-#endif
-
-#if defined(__cplusplus)
-extern "C"
-{
-#endif
-
-/* Initialise the key schedule from the user supplied key. The key
-   length can be specified in bytes, with legal values of 16, 24
-   and 32, or in bits, with legal values of 128, 192 and 256. These
-   values correspond with Nk values of 4, 6 and 8 respectively.
-
-   The following macros implement a single cycle in the key
-   schedule generation process. The number of cycles needed
-   for each cx->n_col and nk value is:
-
-    nk =             4  5  6  7  8
-    ------------------------------
-    cx->n_col = 4   10  9  8  7  7
-    cx->n_col = 5   14 11 10  9  9
-    cx->n_col = 6   19 15 12 11 11
-    cx->n_col = 7   21 19 16 13 14
-    cx->n_col = 8   29 23 19 17 14
-*/
-
-#if defined( REDUCE_CODE_SIZE )
-#  define ls_box ls_sub
-   uint32_t ls_sub(const uint32_t t, const uint32_t n);
-#  define inv_mcol im_sub
-   uint32_t im_sub(const uint32_t x);
-#  ifdef ENC_KS_UNROLL
-#    undef ENC_KS_UNROLL
-#  endif
-#  ifdef DEC_KS_UNROLL
-#    undef DEC_KS_UNROLL
-#  endif
-#endif
-
-#if (FUNCS_IN_C & ENC_KEYING_IN_C)
-
-#if defined(AES_128) || defined( AES_VAR )
-
-#define ke4(k,i) \
-{   k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; \
-    k[4*(i)+5] = ss[1] ^= ss[0]; \
-    k[4*(i)+6] = ss[2] ^= ss[1]; \
-    k[4*(i)+7] = ss[3] ^= ss[2]; \
-}
-
-int aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1])
-{   uint32_t    ss[4];
-
-    cx->ks[0] = ss[0] = word_in(key, 0);
-    cx->ks[1] = ss[1] = word_in(key, 1);
-    cx->ks[2] = ss[2] = word_in(key, 2);
-    cx->ks[3] = ss[3] = word_in(key, 3);
-
-#ifdef ENC_KS_UNROLL
-    ke4(cx->ks, 0);  ke4(cx->ks, 1);
-    ke4(cx->ks, 2);  ke4(cx->ks, 3);
-    ke4(cx->ks, 4);  ke4(cx->ks, 5);
-    ke4(cx->ks, 6);  ke4(cx->ks, 7);
-    ke4(cx->ks, 8);
-#else
-    {   uint32_t i;
-        for(i = 0; i < 9; ++i)
-            ke4(cx->ks, i);
-    }
-#endif
-    ke4(cx->ks, 9);
-    cx->inf.l = 0;
-    cx->inf.b[0] = 10 * 16;
-
-#ifdef USE_VIA_ACE_IF_PRESENT
-    if(VIA_ACE_AVAILABLE)
-        cx->inf.b[1] = 0xff;
-#endif
-    return EXIT_SUCCESS;
-}
-
-#endif
-
-#if defined(AES_192) || defined( AES_VAR )
-
-#define kef6(k,i) \
-{   k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; \
-    k[6*(i)+ 7] = ss[1] ^= ss[0]; \
-    k[6*(i)+ 8] = ss[2] ^= ss[1]; \
-    k[6*(i)+ 9] = ss[3] ^= ss[2]; \
-}
-
-#define ke6(k,i) \
-{   kef6(k,i); \
-    k[6*(i)+10] = ss[4] ^= ss[3]; \
-    k[6*(i)+11] = ss[5] ^= ss[4]; \
-}
-
-int aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1])
-{   uint32_t    ss[6];
-
-    cx->ks[0] = ss[0] = word_in(key, 0);
-    cx->ks[1] = ss[1] = word_in(key, 1);
-    cx->ks[2] = ss[2] = word_in(key, 2);
-    cx->ks[3] = ss[3] = word_in(key, 3);
-    cx->ks[4] = ss[4] = word_in(key, 4);
-    cx->ks[5] = ss[5] = word_in(key, 5);
-
-#ifdef ENC_KS_UNROLL
-    ke6(cx->ks, 0);  ke6(cx->ks, 1);
-    ke6(cx->ks, 2);  ke6(cx->ks, 3);
-    ke6(cx->ks, 4);  ke6(cx->ks, 5);
-    ke6(cx->ks, 6);
-#else
-    {   uint32_t i;
-        for(i = 0; i < 7; ++i)
-            ke6(cx->ks, i);
-    }
-#endif
-    kef6(cx->ks, 7);
-    cx->inf.l = 0;
-    cx->inf.b[0] = 12 * 16;
-
-#ifdef USE_VIA_ACE_IF_PRESENT
-    if(VIA_ACE_AVAILABLE)
-        cx->inf.b[1] = 0xff;
-#endif
-    return EXIT_SUCCESS;
-}
-
-#endif
-
-#if defined(AES_256) || defined( AES_VAR )
-
-#define kef8(k,i) \
-{   k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; \
-    k[8*(i)+ 9] = ss[1] ^= ss[0]; \
-    k[8*(i)+10] = ss[2] ^= ss[1]; \
-    k[8*(i)+11] = ss[3] ^= ss[2]; \
-}
-
-#define ke8(k,i) \
-{   kef8(k,i); \
-    k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); \
-    k[8*(i)+13] = ss[5] ^= ss[4]; \
-    k[8*(i)+14] = ss[6] ^= ss[5]; \
-    k[8*(i)+15] = ss[7] ^= ss[6]; \
-}
-
-int aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1])
-{   uint32_t    ss[8];
-
-    cx->ks[0] = ss[0] = word_in(key, 0);
-    cx->ks[1] = ss[1] = word_in(key, 1);
-    cx->ks[2] = ss[2] = word_in(key, 2);
-    cx->ks[3] = ss[3] = word_in(key, 3);
-    cx->ks[4] = ss[4] = word_in(key, 4);
-    cx->ks[5] = ss[5] = word_in(key, 5);
-    cx->ks[6] = ss[6] = word_in(key, 6);
-    cx->ks[7] = ss[7] = word_in(key, 7);
-
-#ifdef ENC_KS_UNROLL
-    ke8(cx->ks, 0); ke8(cx->ks, 1);
-    ke8(cx->ks, 2); ke8(cx->ks, 3);
-    ke8(cx->ks, 4); ke8(cx->ks, 5);
-#else
-    {   uint32_t i;
-        for(i = 0; i < 6; ++i)
-            ke8(cx->ks,  i);
-    }
-#endif
-    kef8(cx->ks, 6);
-    cx->inf.l = 0;
-    cx->inf.b[0] = 14 * 16;
-
-#ifdef USE_VIA_ACE_IF_PRESENT
-    if(VIA_ACE_AVAILABLE)
-        cx->inf.b[1] = 0xff;
-#endif
-    return EXIT_SUCCESS;
-}
-
-#endif
-
-#if defined( AES_VAR )
-
-int aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1])
-{   
-    switch(key_len)
-    {
-    case 16: case 128: return aes_encrypt_key128(key, cx);
-    case 24: case 192: return aes_encrypt_key192(key, cx);
-    case 32: case 256: return aes_encrypt_key256(key, cx);
-    default: return EXIT_FAILURE;
-    }
-}
-
-#endif
-
-#endif
-
-#if (FUNCS_IN_C & DEC_KEYING_IN_C)
-
-/* this is used to store the decryption round keys  */
-/* in forward or reverse order                      */
-
-#ifdef AES_REV_DKS
-#define v(n,i)  ((n) - (i) + 2 * ((i) & 3))
-#else
-#define v(n,i)  (i)
-#endif
-
-#if DEC_ROUND == NO_TABLES
-#define ff(x)   (x)
-#else
-#define ff(x)   inv_mcol(x)
-#if defined( dec_imvars )
-#define d_vars  dec_imvars
-#endif
-#endif
-
-#if defined(AES_128) || defined( AES_VAR )
-
-#define k4e(k,i) \
-{   k[v(40,(4*(i))+4)] = ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; \
-    k[v(40,(4*(i))+5)] = ss[1] ^= ss[0]; \
-    k[v(40,(4*(i))+6)] = ss[2] ^= ss[1]; \
-    k[v(40,(4*(i))+7)] = ss[3] ^= ss[2]; \
-}
-
-#if 1
-
-#define kdf4(k,i) \
-{   ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; \
-    ss[1] = ss[1] ^ ss[3]; \
-    ss[2] = ss[2] ^ ss[3]; \
-    ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; \
-    ss[i % 4] ^= ss[4]; \
-    ss[4] ^= k[v(40,(4*(i)))];   k[v(40,(4*(i))+4)] = ff(ss[4]); \
-    ss[4] ^= k[v(40,(4*(i))+1)]; k[v(40,(4*(i))+5)] = ff(ss[4]); \
-    ss[4] ^= k[v(40,(4*(i))+2)]; k[v(40,(4*(i))+6)] = ff(ss[4]); \
-    ss[4] ^= k[v(40,(4*(i))+3)]; k[v(40,(4*(i))+7)] = ff(ss[4]); \
-}
-
-#define kd4(k,i) \
-{   ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; \
-    ss[i % 4] ^= ss[4]; ss[4] = ff(ss[4]); \
-    k[v(40,(4*(i))+4)] = ss[4] ^= k[v(40,(4*(i)))]; \
-    k[v(40,(4*(i))+5)] = ss[4] ^= k[v(40,(4*(i))+1)]; \
-    k[v(40,(4*(i))+6)] = ss[4] ^= k[v(40,(4*(i))+2)]; \
-    k[v(40,(4*(i))+7)] = ss[4] ^= k[v(40,(4*(i))+3)]; \
-}
-
-#define kdl4(k,i) \
-{   ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; ss[i % 4] ^= ss[4]; \
-    k[v(40,(4*(i))+4)] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; \
-    k[v(40,(4*(i))+5)] = ss[1] ^ ss[3]; \
-    k[v(40,(4*(i))+6)] = ss[0]; \
-    k[v(40,(4*(i))+7)] = ss[1]; \
-}
-
-#else
-
-#define kdf4(k,i) \
-{   ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[v(40,(4*(i))+ 4)] = ff(ss[0]); \
-    ss[1] ^= ss[0]; k[v(40,(4*(i))+ 5)] = ff(ss[1]); \
-    ss[2] ^= ss[1]; k[v(40,(4*(i))+ 6)] = ff(ss[2]); \
-    ss[3] ^= ss[2]; k[v(40,(4*(i))+ 7)] = ff(ss[3]); \
-}
-
-#define kd4(k,i) \
-{   ss[4] = ls_box(ss[3],3) ^ t_use(r,c)[i]; \
-    ss[0] ^= ss[4]; ss[4] = ff(ss[4]); k[v(40,(4*(i))+ 4)] = ss[4] ^= k[v(40,(4*(i)))]; \
-    ss[1] ^= ss[0]; k[v(40,(4*(i))+ 5)] = ss[4] ^= k[v(40,(4*(i))+ 1)]; \
-    ss[2] ^= ss[1]; k[v(40,(4*(i))+ 6)] = ss[4] ^= k[v(40,(4*(i))+ 2)]; \
-    ss[3] ^= ss[2]; k[v(40,(4*(i))+ 7)] = ss[4] ^= k[v(40,(4*(i))+ 3)]; \
-}
-
-#define kdl4(k,i) \
-{   ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[v(40,(4*(i))+ 4)] = ss[0]; \
-    ss[1] ^= ss[0]; k[v(40,(4*(i))+ 5)] = ss[1]; \
-    ss[2] ^= ss[1]; k[v(40,(4*(i))+ 6)] = ss[2]; \
-    ss[3] ^= ss[2]; k[v(40,(4*(i))+ 7)] = ss[3]; \
-}
-
-#endif
-
-int aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1])
-{   uint32_t    ss[5];
-#if defined( d_vars )
-        d_vars;
-#endif
-    cx->ks[v(40,(0))] = ss[0] = word_in(key, 0);
-    cx->ks[v(40,(1))] = ss[1] = word_in(key, 1);
-    cx->ks[v(40,(2))] = ss[2] = word_in(key, 2);
-    cx->ks[v(40,(3))] = ss[3] = word_in(key, 3);
-
-#ifdef DEC_KS_UNROLL
-     kdf4(cx->ks, 0); kd4(cx->ks, 1);
-     kd4(cx->ks, 2);  kd4(cx->ks, 3);
-     kd4(cx->ks, 4);  kd4(cx->ks, 5);
-     kd4(cx->ks, 6);  kd4(cx->ks, 7);
-     kd4(cx->ks, 8);  kdl4(cx->ks, 9);
-#else
-    {   uint32_t i;
-        for(i = 0; i < 10; ++i)
-            k4e(cx->ks, i);
-#if !(DEC_ROUND == NO_TABLES)
-        for(i = N_COLS; i < 10 * N_COLS; ++i)
-            cx->ks[i] = inv_mcol(cx->ks[i]);
-#endif
-    }
-#endif
-    cx->inf.l = 0;
-    cx->inf.b[0] = 10 * 16;
-
-#ifdef USE_VIA_ACE_IF_PRESENT
-    if(VIA_ACE_AVAILABLE)
-        cx->inf.b[1] = 0xff;
-#endif
-    return EXIT_SUCCESS;
-}
-
-#endif
-
-#if defined(AES_192) || defined( AES_VAR )
-
-#define k6ef(k,i) \
-{   k[v(48,(6*(i))+ 6)] = ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; \
-    k[v(48,(6*(i))+ 7)] = ss[1] ^= ss[0]; \
-    k[v(48,(6*(i))+ 8)] = ss[2] ^= ss[1]; \
-    k[v(48,(6*(i))+ 9)] = ss[3] ^= ss[2]; \
-}
-
-#define k6e(k,i) \
-{   k6ef(k,i); \
-    k[v(48,(6*(i))+10)] = ss[4] ^= ss[3]; \
-    k[v(48,(6*(i))+11)] = ss[5] ^= ss[4]; \
-}
-
-#define kdf6(k,i) \
-{   ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[v(48,(6*(i))+ 6)] = ff(ss[0]); \
-    ss[1] ^= ss[0]; k[v(48,(6*(i))+ 7)] = ff(ss[1]); \
-    ss[2] ^= ss[1]; k[v(48,(6*(i))+ 8)] = ff(ss[2]); \
-    ss[3] ^= ss[2]; k[v(48,(6*(i))+ 9)] = ff(ss[3]); \
-    ss[4] ^= ss[3]; k[v(48,(6*(i))+10)] = ff(ss[4]); \
-    ss[5] ^= ss[4]; k[v(48,(6*(i))+11)] = ff(ss[5]); \
-}
-
-#define kd6(k,i) \
-{   ss[6] = ls_box(ss[5],3) ^ t_use(r,c)[i]; \
-    ss[0] ^= ss[6]; ss[6] = ff(ss[6]); k[v(48,(6*(i))+ 6)] = ss[6] ^= k[v(48,(6*(i)))]; \
-    ss[1] ^= ss[0]; k[v(48,(6*(i))+ 7)] = ss[6] ^= k[v(48,(6*(i))+ 1)]; \
-    ss[2] ^= ss[1]; k[v(48,(6*(i))+ 8)] = ss[6] ^= k[v(48,(6*(i))+ 2)]; \
-    ss[3] ^= ss[2]; k[v(48,(6*(i))+ 9)] = ss[6] ^= k[v(48,(6*(i))+ 3)]; \
-    ss[4] ^= ss[3]; k[v(48,(6*(i))+10)] = ss[6] ^= k[v(48,(6*(i))+ 4)]; \
-    ss[5] ^= ss[4]; k[v(48,(6*(i))+11)] = ss[6] ^= k[v(48,(6*(i))+ 5)]; \
-}
-
-#define kdl6(k,i) \
-{   ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[v(48,(6*(i))+ 6)] = ss[0]; \
-    ss[1] ^= ss[0]; k[v(48,(6*(i))+ 7)] = ss[1]; \
-    ss[2] ^= ss[1]; k[v(48,(6*(i))+ 8)] = ss[2]; \
-    ss[3] ^= ss[2]; k[v(48,(6*(i))+ 9)] = ss[3]; \
-}
-
-int aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1])
-{   uint32_t    ss[7];
-#if defined( d_vars )
-        d_vars;
-#endif
-    cx->ks[v(48,(0))] = ss[0] = word_in(key, 0);
-    cx->ks[v(48,(1))] = ss[1] = word_in(key, 1);
-    cx->ks[v(48,(2))] = ss[2] = word_in(key, 2);
-    cx->ks[v(48,(3))] = ss[3] = word_in(key, 3);
-
-#ifdef DEC_KS_UNROLL
-    cx->ks[v(48,(4))] = ff(ss[4] = word_in(key, 4));
-    cx->ks[v(48,(5))] = ff(ss[5] = word_in(key, 5));
-    kdf6(cx->ks, 0); kd6(cx->ks, 1);
-    kd6(cx->ks, 2);  kd6(cx->ks, 3);
-    kd6(cx->ks, 4);  kd6(cx->ks, 5);
-    kd6(cx->ks, 6);  kdl6(cx->ks, 7);
-#else
-    cx->ks[v(48,(4))] = ss[4] = word_in(key, 4);
-    cx->ks[v(48,(5))] = ss[5] = word_in(key, 5);
-    {   uint32_t i;
-
-        for(i = 0; i < 7; ++i)
-            k6e(cx->ks, i);
-        k6ef(cx->ks, 7);
-#if !(DEC_ROUND == NO_TABLES)
-        for(i = N_COLS; i < 12 * N_COLS; ++i)
-            cx->ks[i] = inv_mcol(cx->ks[i]);
-#endif
-    }
-#endif
-    cx->inf.l = 0;
-    cx->inf.b[0] = 12 * 16;
-
-#ifdef USE_VIA_ACE_IF_PRESENT
-    if(VIA_ACE_AVAILABLE)
-        cx->inf.b[1] = 0xff;
-#endif
-    return EXIT_SUCCESS;
-}
-
-#endif
-
-#if defined(AES_256) || defined( AES_VAR )
-
-#define k8ef(k,i) \
-{   k[v(56,(8*(i))+ 8)] = ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; \
-    k[v(56,(8*(i))+ 9)] = ss[1] ^= ss[0]; \
-    k[v(56,(8*(i))+10)] = ss[2] ^= ss[1]; \
-    k[v(56,(8*(i))+11)] = ss[3] ^= ss[2]; \
-}
-
-#define k8e(k,i) \
-{   k8ef(k,i); \
-    k[v(56,(8*(i))+12)] = ss[4] ^= ls_box(ss[3],0); \
-    k[v(56,(8*(i))+13)] = ss[5] ^= ss[4]; \
-    k[v(56,(8*(i))+14)] = ss[6] ^= ss[5]; \
-    k[v(56,(8*(i))+15)] = ss[7] ^= ss[6]; \
-}
-
-#define kdf8(k,i) \
-{   ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[v(56,(8*(i))+ 8)] = ff(ss[0]); \
-    ss[1] ^= ss[0]; k[v(56,(8*(i))+ 9)] = ff(ss[1]); \
-    ss[2] ^= ss[1]; k[v(56,(8*(i))+10)] = ff(ss[2]); \
-    ss[3] ^= ss[2]; k[v(56,(8*(i))+11)] = ff(ss[3]); \
-    ss[4] ^= ls_box(ss[3],0); k[v(56,(8*(i))+12)] = ff(ss[4]); \
-    ss[5] ^= ss[4]; k[v(56,(8*(i))+13)] = ff(ss[5]); \
-    ss[6] ^= ss[5]; k[v(56,(8*(i))+14)] = ff(ss[6]); \
-    ss[7] ^= ss[6]; k[v(56,(8*(i))+15)] = ff(ss[7]); \
-}
-
-#define kd8(k,i) \
-{   ss[8] = ls_box(ss[7],3) ^ t_use(r,c)[i]; \
-    ss[0] ^= ss[8]; ss[8] = ff(ss[8]); k[v(56,(8*(i))+ 8)] = ss[8] ^= k[v(56,(8*(i)))]; \
-    ss[1] ^= ss[0]; k[v(56,(8*(i))+ 9)] = ss[8] ^= k[v(56,(8*(i))+ 1)]; \
-    ss[2] ^= ss[1]; k[v(56,(8*(i))+10)] = ss[8] ^= k[v(56,(8*(i))+ 2)]; \
-    ss[3] ^= ss[2]; k[v(56,(8*(i))+11)] = ss[8] ^= k[v(56,(8*(i))+ 3)]; \
-    ss[8] = ls_box(ss[3],0); \
-    ss[4] ^= ss[8]; ss[8] = ff(ss[8]); k[v(56,(8*(i))+12)] = ss[8] ^= k[v(56,(8*(i))+ 4)]; \
-    ss[5] ^= ss[4]; k[v(56,(8*(i))+13)] = ss[8] ^= k[v(56,(8*(i))+ 5)]; \
-    ss[6] ^= ss[5]; k[v(56,(8*(i))+14)] = ss[8] ^= k[v(56,(8*(i))+ 6)]; \
-    ss[7] ^= ss[6]; k[v(56,(8*(i))+15)] = ss[8] ^= k[v(56,(8*(i))+ 7)]; \
-}
-
-#define kdl8(k,i) \
-{   ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[v(56,(8*(i))+ 8)] = ss[0]; \
-    ss[1] ^= ss[0]; k[v(56,(8*(i))+ 9)] = ss[1]; \
-    ss[2] ^= ss[1]; k[v(56,(8*(i))+10)] = ss[2]; \
-    ss[3] ^= ss[2]; k[v(56,(8*(i))+11)] = ss[3]; \
-}
-
-int aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1])
-{   uint32_t    ss[9];
-#if defined( d_vars )
-        d_vars;
-#endif
-    cx->ks[v(56,(0))] = ss[0] = word_in(key, 0);
-    cx->ks[v(56,(1))] = ss[1] = word_in(key, 1);
-    cx->ks[v(56,(2))] = ss[2] = word_in(key, 2);
-    cx->ks[v(56,(3))] = ss[3] = word_in(key, 3);
-
-#ifdef DEC_KS_UNROLL
-    cx->ks[v(56,(4))] = ff(ss[4] = word_in(key, 4));
-    cx->ks[v(56,(5))] = ff(ss[5] = word_in(key, 5));
-    cx->ks[v(56,(6))] = ff(ss[6] = word_in(key, 6));
-    cx->ks[v(56,(7))] = ff(ss[7] = word_in(key, 7));
-    kdf8(cx->ks, 0); kd8(cx->ks, 1);
-    kd8(cx->ks, 2);  kd8(cx->ks, 3);
-    kd8(cx->ks, 4);  kd8(cx->ks, 5);
-    kdl8(cx->ks, 6);
-#else
-    cx->ks[v(56,(4))] = ss[4] = word_in(key, 4);
-    cx->ks[v(56,(5))] = ss[5] = word_in(key, 5);
-    cx->ks[v(56,(6))] = ss[6] = word_in(key, 6);
-    cx->ks[v(56,(7))] = ss[7] = word_in(key, 7);
-    {   uint32_t i;
-
-        for(i = 0; i < 6; ++i)
-            k8e(cx->ks,  i);
-        k8ef(cx->ks,  6);
-#if !(DEC_ROUND == NO_TABLES)
-        for(i = N_COLS; i < 14 * N_COLS; ++i)
-            cx->ks[i] = inv_mcol(cx->ks[i]);
-#endif
-    }
-#endif
-    cx->inf.l = 0;
-    cx->inf.b[0] = 14 * 16;
-
-#ifdef USE_VIA_ACE_IF_PRESENT
-    if(VIA_ACE_AVAILABLE)
-        cx->inf.b[1] = 0xff;
-#endif
-    return EXIT_SUCCESS;
-}
-
-#endif
-
-#if defined( AES_VAR )
-
-int aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1])
-{
-    switch(key_len)
-    {
-    case 16: case 128: return aes_decrypt_key128(key, cx);
-    case 24: case 192: return aes_decrypt_key192(key, cx);
-    case 32: case 256: return aes_decrypt_key256(key, cx);
-    default: return EXIT_FAILURE;
-    }
-}
-
-#endif
-
-#endif
-
-#if defined(__cplusplus)
-}
-#endif
+/*
+---------------------------------------------------------------------------
+Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
+
+The redistribution and use of this software (with or without changes)
+is allowed without the payment of fees or royalties provided that:
+
+  source code distributions include the above copyright notice, this
+  list of conditions and the following disclaimer;
+
+  binary distributions include the above copyright notice, this list
+  of conditions and the following disclaimer in their documentation.
+
+This software is provided 'as is' with no explicit or implied warranties
+in respect of its operation, including, but not limited to, correctness
+and fitness for purpose.
+---------------------------------------------------------------------------
+Issue Date: 20/12/2007
+*/
+
+#include "aesopt.h"
+#include "aestab.h"
+
+#ifdef USE_VIA_ACE_IF_PRESENT
+#  include "aes_via_ace.h"
+#endif
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+/* Initialise the key schedule from the user supplied key. The key
+   length can be specified in bytes, with legal values of 16, 24
+   and 32, or in bits, with legal values of 128, 192 and 256. These
+   values correspond with Nk values of 4, 6 and 8 respectively.
+
+   The following macros implement a single cycle in the key
+   schedule generation process. The number of cycles needed
+   for each cx->n_col and nk value is:
+
+    nk =             4  5  6  7  8
+    ------------------------------
+    cx->n_col = 4   10  9  8  7  7
+    cx->n_col = 5   14 11 10  9  9
+    cx->n_col = 6   19 15 12 11 11
+    cx->n_col = 7   21 19 16 13 14
+    cx->n_col = 8   29 23 19 17 14
+*/
+
+#if defined( REDUCE_CODE_SIZE )
+#  define ls_box ls_sub
+   uint32_t ls_sub(const uint32_t t, const uint32_t n);
+#  define inv_mcol im_sub
+   uint32_t im_sub(const uint32_t x);
+#  ifdef ENC_KS_UNROLL
+#    undef ENC_KS_UNROLL
+#  endif
+#  ifdef DEC_KS_UNROLL
+#    undef DEC_KS_UNROLL
+#  endif
+#endif
+
+#if (FUNCS_IN_C & ENC_KEYING_IN_C)
+
+#if defined(AES_128) || defined( AES_VAR )
+
+#define ke4(k,i) \
+{   k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; \
+    k[4*(i)+5] = ss[1] ^= ss[0]; \
+    k[4*(i)+6] = ss[2] ^= ss[1]; \
+    k[4*(i)+7] = ss[3] ^= ss[2]; \
+}
+
+int aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1])
+{   uint32_t    ss[4];
+
+    cx->ks[0] = ss[0] = word_in(key, 0);
+    cx->ks[1] = ss[1] = word_in(key, 1);
+    cx->ks[2] = ss[2] = word_in(key, 2);
+    cx->ks[3] = ss[3] = word_in(key, 3);
+
+#ifdef ENC_KS_UNROLL
+    ke4(cx->ks, 0);  ke4(cx->ks, 1);
+    ke4(cx->ks, 2);  ke4(cx->ks, 3);
+    ke4(cx->ks, 4);  ke4(cx->ks, 5);
+    ke4(cx->ks, 6);  ke4(cx->ks, 7);
+    ke4(cx->ks, 8);
+#else
+    {   uint32_t i;
+        for(i = 0; i < 9; ++i)
+            ke4(cx->ks, i);
+    }
+#endif
+    ke4(cx->ks, 9);
+    cx->inf.l = 0;
+    cx->inf.b[0] = 10 * 16;
+
+#ifdef USE_VIA_ACE_IF_PRESENT
+    if(VIA_ACE_AVAILABLE)
+        cx->inf.b[1] = 0xff;
+#endif
+    return EXIT_SUCCESS;
+}
+
+#endif
+
+#if defined(AES_192) || defined( AES_VAR )
+
+#define kef6(k,i) \
+{   k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; \
+    k[6*(i)+ 7] = ss[1] ^= ss[0]; \
+    k[6*(i)+ 8] = ss[2] ^= ss[1]; \
+    k[6*(i)+ 9] = ss[3] ^= ss[2]; \
+}
+
+#define ke6(k,i) \
+{   kef6(k,i); \
+    k[6*(i)+10] = ss[4] ^= ss[3]; \
+    k[6*(i)+11] = ss[5] ^= ss[4]; \
+}
+
+int aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1])
+{   uint32_t    ss[6];
+
+    cx->ks[0] = ss[0] = word_in(key, 0);
+    cx->ks[1] = ss[1] = word_in(key, 1);
+    cx->ks[2] = ss[2] = word_in(key, 2);
+    cx->ks[3] = ss[3] = word_in(key, 3);
+    cx->ks[4] = ss[4] = word_in(key, 4);
+    cx->ks[5] = ss[5] = word_in(key, 5);
+
+#ifdef ENC_KS_UNROLL
+    ke6(cx->ks, 0);  ke6(cx->ks, 1);
+    ke6(cx->ks, 2);  ke6(cx->ks, 3);
+    ke6(cx->ks, 4);  ke6(cx->ks, 5);
+    ke6(cx->ks, 6);
+#else
+    {   uint32_t i;
+        for(i = 0; i < 7; ++i)
+            ke6(cx->ks, i);
+    }
+#endif
+    kef6(cx->ks, 7);
+    cx->inf.l = 0;
+    cx->inf.b[0] = 12 * 16;
+
+#ifdef USE_VIA_ACE_IF_PRESENT
+    if(VIA_ACE_AVAILABLE)
+        cx->inf.b[1] = 0xff;
+#endif
+    return EXIT_SUCCESS;
+}
+
+#endif
+
+#if defined(AES_256) || defined( AES_VAR )
+
+#define kef8(k,i) \
+{   k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; \
+    k[8*(i)+ 9] = ss[1] ^= ss[0]; \
+    k[8*(i)+10] = ss[2] ^= ss[1]; \
+    k[8*(i)+11] = ss[3] ^= ss[2]; \
+}
+
+#define ke8(k,i) \
+{   kef8(k,i); \
+    k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); \
+    k[8*(i)+13] = ss[5] ^= ss[4]; \
+    k[8*(i)+14] = ss[6] ^= ss[5]; \
+    k[8*(i)+15] = ss[7] ^= ss[6]; \
+}
+
+int aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1])
+{   uint32_t    ss[8];
+
+    cx->ks[0] = ss[0] = word_in(key, 0);
+    cx->ks[1] = ss[1] = word_in(key, 1);
+    cx->ks[2] = ss[2] = word_in(key, 2);
+    cx->ks[3] = ss[3] = word_in(key, 3);
+    cx->ks[4] = ss[4] = word_in(key, 4);
+    cx->ks[5] = ss[5] = word_in(key, 5);
+    cx->ks[6] = ss[6] = word_in(key, 6);
+    cx->ks[7] = ss[7] = word_in(key, 7);
+
+#ifdef ENC_KS_UNROLL
+    ke8(cx->ks, 0); ke8(cx->ks, 1);
+    ke8(cx->ks, 2); ke8(cx->ks, 3);
+    ke8(cx->ks, 4); ke8(cx->ks, 5);
+#else
+    {   uint32_t i;
+        for(i = 0; i < 6; ++i)
+            ke8(cx->ks,  i);
+    }
+#endif
+    kef8(cx->ks, 6);
+    cx->inf.l = 0;
+    cx->inf.b[0] = 14 * 16;
+
+#ifdef USE_VIA_ACE_IF_PRESENT
+    if(VIA_ACE_AVAILABLE)
+        cx->inf.b[1] = 0xff;
+#endif
+    return EXIT_SUCCESS;
+}
+
+#endif
+
+#if defined( AES_VAR )
+
+int aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1])
+{   
+    switch(key_len)
+    {
+    case 16: case 128: return aes_encrypt_key128(key, cx);
+    case 24: case 192: return aes_encrypt_key192(key, cx);
+    case 32: case 256: return aes_encrypt_key256(key, cx);
+    default: return EXIT_FAILURE;
+    }
+}
+
+#endif
+
+#endif
+
+#if (FUNCS_IN_C & DEC_KEYING_IN_C)
+
+/* this is used to store the decryption round keys  */
+/* in forward or reverse order                      */
+
+#ifdef AES_REV_DKS
+#define v(n,i)  ((n) - (i) + 2 * ((i) & 3))
+#else
+#define v(n,i)  (i)
+#endif
+
+#if DEC_ROUND == NO_TABLES
+#define ff(x)   (x)
+#else
+#define ff(x)   inv_mcol(x)
+#if defined( dec_imvars )
+#define d_vars  dec_imvars
+#endif
+#endif
+
+#if defined(AES_128) || defined( AES_VAR )
+
+#define k4e(k,i) \
+{   k[v(40,(4*(i))+4)] = ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; \
+    k[v(40,(4*(i))+5)] = ss[1] ^= ss[0]; \
+    k[v(40,(4*(i))+6)] = ss[2] ^= ss[1]; \
+    k[v(40,(4*(i))+7)] = ss[3] ^= ss[2]; \
+}
+
+#if 1
+
+#define kdf4(k,i) \
+{   ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; \
+    ss[1] = ss[1] ^ ss[3]; \
+    ss[2] = ss[2] ^ ss[3]; \
+    ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; \
+    ss[i % 4] ^= ss[4]; \
+    ss[4] ^= k[v(40,(4*(i)))];   k[v(40,(4*(i))+4)] = ff(ss[4]); \
+    ss[4] ^= k[v(40,(4*(i))+1)]; k[v(40,(4*(i))+5)] = ff(ss[4]); \
+    ss[4] ^= k[v(40,(4*(i))+2)]; k[v(40,(4*(i))+6)] = ff(ss[4]); \
+    ss[4] ^= k[v(40,(4*(i))+3)]; k[v(40,(4*(i))+7)] = ff(ss[4]); \
+}
+
+#define kd4(k,i) \
+{   ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; \
+    ss[i % 4] ^= ss[4]; ss[4] = ff(ss[4]); \
+    k[v(40,(4*(i))+4)] = ss[4] ^= k[v(40,(4*(i)))]; \
+    k[v(40,(4*(i))+5)] = ss[4] ^= k[v(40,(4*(i))+1)]; \
+    k[v(40,(4*(i))+6)] = ss[4] ^= k[v(40,(4*(i))+2)]; \
+    k[v(40,(4*(i))+7)] = ss[4] ^= k[v(40,(4*(i))+3)]; \
+}
+
+#define kdl4(k,i) \
+{   ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; ss[i % 4] ^= ss[4]; \
+    k[v(40,(4*(i))+4)] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; \
+    k[v(40,(4*(i))+5)] = ss[1] ^ ss[3]; \
+    k[v(40,(4*(i))+6)] = ss[0]; \
+    k[v(40,(4*(i))+7)] = ss[1]; \
+}
+
+#else
+
+#define kdf4(k,i) \
+{   ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[v(40,(4*(i))+ 4)] = ff(ss[0]); \
+    ss[1] ^= ss[0]; k[v(40,(4*(i))+ 5)] = ff(ss[1]); \
+    ss[2] ^= ss[1]; k[v(40,(4*(i))+ 6)] = ff(ss[2]); \
+    ss[3] ^= ss[2]; k[v(40,(4*(i))+ 7)] = ff(ss[3]); \
+}
+
+#define kd4(k,i) \
+{   ss[4] = ls_box(ss[3],3) ^ t_use(r,c)[i]; \
+    ss[0] ^= ss[4]; ss[4] = ff(ss[4]); k[v(40,(4*(i))+ 4)] = ss[4] ^= k[v(40,(4*(i)))]; \
+    ss[1] ^= ss[0]; k[v(40,(4*(i))+ 5)] = ss[4] ^= k[v(40,(4*(i))+ 1)]; \
+    ss[2] ^= ss[1]; k[v(40,(4*(i))+ 6)] = ss[4] ^= k[v(40,(4*(i))+ 2)]; \
+    ss[3] ^= ss[2]; k[v(40,(4*(i))+ 7)] = ss[4] ^= k[v(40,(4*(i))+ 3)]; \
+}
+
+#define kdl4(k,i) \
+{   ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[v(40,(4*(i))+ 4)] = ss[0]; \
+    ss[1] ^= ss[0]; k[v(40,(4*(i))+ 5)] = ss[1]; \
+    ss[2] ^= ss[1]; k[v(40,(4*(i))+ 6)] = ss[2]; \
+    ss[3] ^= ss[2]; k[v(40,(4*(i))+ 7)] = ss[3]; \
+}
+
+#endif
+
+int aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1])
+{   uint32_t    ss[5];
+#if defined( d_vars )
+        d_vars;
+#endif
+    cx->ks[v(40,(0))] = ss[0] = word_in(key, 0);
+    cx->ks[v(40,(1))] = ss[1] = word_in(key, 1);
+    cx->ks[v(40,(2))] = ss[2] = word_in(key, 2);
+    cx->ks[v(40,(3))] = ss[3] = word_in(key, 3);
+
+#ifdef DEC_KS_UNROLL
+     kdf4(cx->ks, 0); kd4(cx->ks, 1);
+     kd4(cx->ks, 2);  kd4(cx->ks, 3);
+     kd4(cx->ks, 4);  kd4(cx->ks, 5);
+     kd4(cx->ks, 6);  kd4(cx->ks, 7);
+     kd4(cx->ks, 8);  kdl4(cx->ks, 9);
+#else
+    {   uint32_t i;
+        for(i = 0; i < 10; ++i)
+            k4e(cx->ks, i);
+#if !(DEC_ROUND == NO_TABLES)
+        for(i = N_COLS; i < 10 * N_COLS; ++i)
+            cx->ks[i] = inv_mcol(cx->ks[i]);
+#endif
+    }
+#endif
+    cx->inf.l = 0;
+    cx->inf.b[0] = 10 * 16;
+
+#ifdef USE_VIA_ACE_IF_PRESENT
+    if(VIA_ACE_AVAILABLE)
+        cx->inf.b[1] = 0xff;
+#endif
+    return EXIT_SUCCESS;
+}
+
+#endif
+
+#if defined(AES_192) || defined( AES_VAR )
+
+#define k6ef(k,i) \
+{   k[v(48,(6*(i))+ 6)] = ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; \
+    k[v(48,(6*(i))+ 7)] = ss[1] ^= ss[0]; \
+    k[v(48,(6*(i))+ 8)] = ss[2] ^= ss[1]; \
+    k[v(48,(6*(i))+ 9)] = ss[3] ^= ss[2]; \
+}
+
+#define k6e(k,i) \
+{   k6ef(k,i); \
+    k[v(48,(6*(i))+10)] = ss[4] ^= ss[3]; \
+    k[v(48,(6*(i))+11)] = ss[5] ^= ss[4]; \
+}
+
+#define kdf6(k,i) \
+{   ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[v(48,(6*(i))+ 6)] = ff(ss[0]); \
+    ss[1] ^= ss[0]; k[v(48,(6*(i))+ 7)] = ff(ss[1]); \
+    ss[2] ^= ss[1]; k[v(48,(6*(i))+ 8)] = ff(ss[2]); \
+    ss[3] ^= ss[2]; k[v(48,(6*(i))+ 9)] = ff(ss[3]); \
+    ss[4] ^= ss[3]; k[v(48,(6*(i))+10)] = ff(ss[4]); \
+    ss[5] ^= ss[4]; k[v(48,(6*(i))+11)] = ff(ss[5]); \
+}
+
+#define kd6(k,i) \
+{   ss[6] = ls_box(ss[5],3) ^ t_use(r,c)[i]; \
+    ss[0] ^= ss[6]; ss[6] = ff(ss[6]); k[v(48,(6*(i))+ 6)] = ss[6] ^= k[v(48,(6*(i)))]; \
+    ss[1] ^= ss[0]; k[v(48,(6*(i))+ 7)] = ss[6] ^= k[v(48,(6*(i))+ 1)]; \
+    ss[2] ^= ss[1]; k[v(48,(6*(i))+ 8)] = ss[6] ^= k[v(48,(6*(i))+ 2)]; \
+    ss[3] ^= ss[2]; k[v(48,(6*(i))+ 9)] = ss[6] ^= k[v(48,(6*(i))+ 3)]; \
+    ss[4] ^= ss[3]; k[v(48,(6*(i))+10)] = ss[6] ^= k[v(48,(6*(i))+ 4)]; \
+    ss[5] ^= ss[4]; k[v(48,(6*(i))+11)] = ss[6] ^= k[v(48,(6*(i))+ 5)]; \
+}
+
+#define kdl6(k,i) \
+{   ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[v(48,(6*(i))+ 6)] = ss[0]; \
+    ss[1] ^= ss[0]; k[v(48,(6*(i))+ 7)] = ss[1]; \
+    ss[2] ^= ss[1]; k[v(48,(6*(i))+ 8)] = ss[2]; \
+    ss[3] ^= ss[2]; k[v(48,(6*(i))+ 9)] = ss[3]; \
+}
+
+int aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1])
+{   uint32_t    ss[7];
+#if defined( d_vars )
+        d_vars;
+#endif
+    cx->ks[v(48,(0))] = ss[0] = word_in(key, 0);
+    cx->ks[v(48,(1))] = ss[1] = word_in(key, 1);
+    cx->ks[v(48,(2))] = ss[2] = word_in(key, 2);
+    cx->ks[v(48,(3))] = ss[3] = word_in(key, 3);
+
+#ifdef DEC_KS_UNROLL
+    cx->ks[v(48,(4))] = ff(ss[4] = word_in(key, 4));
+    cx->ks[v(48,(5))] = ff(ss[5] = word_in(key, 5));
+    kdf6(cx->ks, 0); kd6(cx->ks, 1);
+    kd6(cx->ks, 2);  kd6(cx->ks, 3);
+    kd6(cx->ks, 4);  kd6(cx->ks, 5);
+    kd6(cx->ks, 6);  kdl6(cx->ks, 7);
+#else
+    cx->ks[v(48,(4))] = ss[4] = word_in(key, 4);
+    cx->ks[v(48,(5))] = ss[5] = word_in(key, 5);
+    {   uint32_t i;
+
+        for(i = 0; i < 7; ++i)
+            k6e(cx->ks, i);
+        k6ef(cx->ks, 7);
+#if !(DEC_ROUND == NO_TABLES)
+        for(i = N_COLS; i < 12 * N_COLS; ++i)
+            cx->ks[i] = inv_mcol(cx->ks[i]);
+#endif
+    }
+#endif
+    cx->inf.l = 0;
+    cx->inf.b[0] = 12 * 16;
+
+#ifdef USE_VIA_ACE_IF_PRESENT
+    if(VIA_ACE_AVAILABLE)
+        cx->inf.b[1] = 0xff;
+#endif
+    return EXIT_SUCCESS;
+}
+
+#endif
+
+#if defined(AES_256) || defined( AES_VAR )
+
+#define k8ef(k,i) \
+{   k[v(56,(8*(i))+ 8)] = ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; \
+    k[v(56,(8*(i))+ 9)] = ss[1] ^= ss[0]; \
+    k[v(56,(8*(i))+10)] = ss[2] ^= ss[1]; \
+    k[v(56,(8*(i))+11)] = ss[3] ^= ss[2]; \
+}
+
+#define k8e(k,i) \
+{   k8ef(k,i); \
+    k[v(56,(8*(i))+12)] = ss[4] ^= ls_box(ss[3],0); \
+    k[v(56,(8*(i))+13)] = ss[5] ^= ss[4]; \
+    k[v(56,(8*(i))+14)] = ss[6] ^= ss[5]; \
+    k[v(56,(8*(i))+15)] = ss[7] ^= ss[6]; \
+}
+
+#define kdf8(k,i) \
+{   ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[v(56,(8*(i))+ 8)] = ff(ss[0]); \
+    ss[1] ^= ss[0]; k[v(56,(8*(i))+ 9)] = ff(ss[1]); \
+    ss[2] ^= ss[1]; k[v(56,(8*(i))+10)] = ff(ss[2]); \
+    ss[3] ^= ss[2]; k[v(56,(8*(i))+11)] = ff(ss[3]); \
+    ss[4] ^= ls_box(ss[3],0); k[v(56,(8*(i))+12)] = ff(ss[4]); \
+    ss[5] ^= ss[4]; k[v(56,(8*(i))+13)] = ff(ss[5]); \
+    ss[6] ^= ss[5]; k[v(56,(8*(i))+14)] = ff(ss[6]); \
+    ss[7] ^= ss[6]; k[v(56,(8*(i))+15)] = ff(ss[7]); \
+}
+
+#define kd8(k,i) \
+{   ss[8] = ls_box(ss[7],3) ^ t_use(r,c)[i]; \
+    ss[0] ^= ss[8]; ss[8] = ff(ss[8]); k[v(56,(8*(i))+ 8)] = ss[8] ^= k[v(56,(8*(i)))]; \
+    ss[1] ^= ss[0]; k[v(56,(8*(i))+ 9)] = ss[8] ^= k[v(56,(8*(i))+ 1)]; \
+    ss[2] ^= ss[1]; k[v(56,(8*(i))+10)] = ss[8] ^= k[v(56,(8*(i))+ 2)]; \
+    ss[3] ^= ss[2]; k[v(56,(8*(i))+11)] = ss[8] ^= k[v(56,(8*(i))+ 3)]; \
+    ss[8] = ls_box(ss[3],0); \
+    ss[4] ^= ss[8]; ss[8] = ff(ss[8]); k[v(56,(8*(i))+12)] = ss[8] ^= k[v(56,(8*(i))+ 4)]; \
+    ss[5] ^= ss[4]; k[v(56,(8*(i))+13)] = ss[8] ^= k[v(56,(8*(i))+ 5)]; \
+    ss[6] ^= ss[5]; k[v(56,(8*(i))+14)] = ss[8] ^= k[v(56,(8*(i))+ 6)]; \
+    ss[7] ^= ss[6]; k[v(56,(8*(i))+15)] = ss[8] ^= k[v(56,(8*(i))+ 7)]; \
+}
+
+#define kdl8(k,i) \
+{   ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[v(56,(8*(i))+ 8)] = ss[0]; \
+    ss[1] ^= ss[0]; k[v(56,(8*(i))+ 9)] = ss[1]; \
+    ss[2] ^= ss[1]; k[v(56,(8*(i))+10)] = ss[2]; \
+    ss[3] ^= ss[2]; k[v(56,(8*(i))+11)] = ss[3]; \
+}
+
+int aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1])
+{   uint32_t    ss[9];
+#if defined( d_vars )
+        d_vars;
+#endif
+    cx->ks[v(56,(0))] = ss[0] = word_in(key, 0);
+    cx->ks[v(56,(1))] = ss[1] = word_in(key, 1);
+    cx->ks[v(56,(2))] = ss[2] = word_in(key, 2);
+    cx->ks[v(56,(3))] = ss[3] = word_in(key, 3);
+
+#ifdef DEC_KS_UNROLL
+    cx->ks[v(56,(4))] = ff(ss[4] = word_in(key, 4));
+    cx->ks[v(56,(5))] = ff(ss[5] = word_in(key, 5));
+    cx->ks[v(56,(6))] = ff(ss[6] = word_in(key, 6));
+    cx->ks[v(56,(7))] = ff(ss[7] = word_in(key, 7));
+    kdf8(cx->ks, 0); kd8(cx->ks, 1);
+    kd8(cx->ks, 2);  kd8(cx->ks, 3);
+    kd8(cx->ks, 4);  kd8(cx->ks, 5);
+    kdl8(cx->ks, 6);
+#else
+    cx->ks[v(56,(4))] = ss[4] = word_in(key, 4);
+    cx->ks[v(56,(5))] = ss[5] = word_in(key, 5);
+    cx->ks[v(56,(6))] = ss[6] = word_in(key, 6);
+    cx->ks[v(56,(7))] = ss[7] = word_in(key, 7);
+    {   uint32_t i;
+
+        for(i = 0; i < 6; ++i)
+            k8e(cx->ks,  i);
+        k8ef(cx->ks,  6);
+#if !(DEC_ROUND == NO_TABLES)
+        for(i = N_COLS; i < 14 * N_COLS; ++i)
+            cx->ks[i] = inv_mcol(cx->ks[i]);
+#endif
+    }
+#endif
+    cx->inf.l = 0;
+    cx->inf.b[0] = 14 * 16;
+
+#ifdef USE_VIA_ACE_IF_PRESENT
+    if(VIA_ACE_AVAILABLE)
+        cx->inf.b[1] = 0xff;
+#endif
+    return EXIT_SUCCESS;
+}
+
+#endif
+
+#if defined( AES_VAR )
+
+int aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1])
+{
+    switch(key_len)
+    {
+    case 16: case 128: return aes_decrypt_key128(key, cx);
+    case 24: case 192: return aes_decrypt_key192(key, cx);
+    case 32: case 256: return aes_decrypt_key256(key, cx);
+    default: return EXIT_FAILURE;
+    }
+}
+
+#endif
+
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
diff --git a/Crypto/aesopt.h b/Crypto/aesopt.h
index daf0c01c..fec85340 100755
--- a/Crypto/aesopt.h
+++ b/Crypto/aesopt.h
@@ -1,739 +1,739 @@
-/*
----------------------------------------------------------------------------
-Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
-
-The redistribution and use of this software (with or without changes)
-is allowed without the payment of fees or royalties provided that:
-
-  source code distributions include the above copyright notice, this
-  list of conditions and the following disclaimer;
-
-  binary distributions include the above copyright notice, this list
-  of conditions and the following disclaimer in their documentation.
-
-This software is provided 'as is' with no explicit or implied warranties
-in respect of its operation, including, but not limited to, correctness
-and fitness for purpose.
----------------------------------------------------------------------------
-Issue Date: 20/12/2007
-
- This file contains the compilation options for AES (Rijndael) and code
- that is common across encryption, key scheduling and table generation.
-
- OPERATION
-
- These source code files implement the AES algorithm Rijndael designed by
- Joan Daemen and Vincent Rijmen. This version is designed for the standard
- block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24
- and 32 bytes).
-
- This version is designed for flexibility and speed using operations on
- 32-bit words rather than operations on bytes.  It can be compiled with
- either big or little endian internal byte order but is faster when the
- native byte order for the processor is used.
-
- THE CIPHER INTERFACE
-
- The cipher interface is implemented as an array of bytes in which lower
- AES bit sequence indexes map to higher numeric significance within bytes.
-
-  uint8_t                 (an unsigned  8-bit type)
-  uint32_t                (an unsigned 32-bit type)
-  struct aes_encrypt_ctx  (structure for the cipher encryption context)
-  struct aes_decrypt_ctx  (structure for the cipher decryption context)
-  AES_RETURN                the function return type
-
-  C subroutine calls:
-
-  AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]);
-  AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]);
-  AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]);
-  AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out,
-                                                  const aes_encrypt_ctx cx[1]);
-
-  AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]);
-  AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]);
-  AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]);
-  AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out,
-                                                  const aes_decrypt_ctx cx[1]);
-
- IMPORTANT NOTE: If you are using this C interface with dynamic tables make sure that
- you call aes_init() before AES is used so that the tables are initialised.
-
- C++ aes class subroutines:
-
-     Class AESencrypt  for encryption
-
-      Construtors:
-          AESencrypt(void)
-          AESencrypt(const unsigned char *key) - 128 bit key
-      Members:
-          AES_RETURN key128(const unsigned char *key)
-          AES_RETURN key192(const unsigned char *key)
-          AES_RETURN key256(const unsigned char *key)
-          AES_RETURN encrypt(const unsigned char *in, unsigned char *out) const
-
-      Class AESdecrypt  for encryption
-      Construtors:
-          AESdecrypt(void)
-          AESdecrypt(const unsigned char *key) - 128 bit key
-      Members:
-          AES_RETURN key128(const unsigned char *key)
-          AES_RETURN key192(const unsigned char *key)
-          AES_RETURN key256(const unsigned char *key)
-          AES_RETURN decrypt(const unsigned char *in, unsigned char *out) const
-*/
-
-#if !defined( _AESOPT_H )
-#define _AESOPT_H
-
-#if defined( __cplusplus )
-#include "aescpp.h"
-#else
-#include "aes.h"
-#endif
-
-/*  PLATFORM SPECIFIC INCLUDES */
-
-#include "brg_endian.h"
-
-/*  CONFIGURATION - THE USE OF DEFINES
-
-    Later in this section there are a number of defines that control the
-    operation of the code.  In each section, the purpose of each define is
-    explained so that the relevant form can be included or excluded by
-    setting either 1's or 0's respectively on the branches of the related
-    #if clauses.  The following local defines should not be changed.
-*/
-
-#define ENCRYPTION_IN_C     1
-#define DECRYPTION_IN_C     2
-#define ENC_KEYING_IN_C     4
-#define DEC_KEYING_IN_C     8
-
-#define NO_TABLES           0
-#define ONE_TABLE           1
-#define FOUR_TABLES         4
-#define NONE                0
-#define PARTIAL             1
-#define FULL                2
-
-/*  --- START OF USER CONFIGURED OPTIONS --- */
-
-/*  1. BYTE ORDER WITHIN 32 BIT WORDS
-
-    The fundamental data processing units in Rijndael are 8-bit bytes. The
-    input, output and key input are all enumerated arrays of bytes in which
-    bytes are numbered starting at zero and increasing to one less than the
-    number of bytes in the array in question. This enumeration is only used
-    for naming bytes and does not imply any adjacency or order relationship
-    from one byte to another. When these inputs and outputs are considered
-    as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to
-    byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte.
-    In this implementation bits are numbered from 0 to 7 starting at the
-    numerically least significant end of each byte (bit n represents 2^n).
-
-    However, Rijndael can be implemented more efficiently using 32-bit
-    words by packing bytes into words so that bytes 4*n to 4*n+3 are placed
-    into word[n]. While in principle these bytes can be assembled into words
-    in any positions, this implementation only supports the two formats in
-    which bytes in adjacent positions within words also have adjacent byte
-    numbers. This order is called big-endian if the lowest numbered bytes
-    in words have the highest numeric significance and little-endian if the
-    opposite applies.
-
-    This code can work in either order irrespective of the order used by the
-    machine on which it runs. Normally the internal byte order will be set
-    to the order of the processor on which the code is to be run but this
-    define can be used to reverse this in special situations
-
-    WARNING: Assembler code versions rely on PLATFORM_BYTE_ORDER being set.
-    This define will hence be redefined later (in section 4) if necessary
-*/
-
-#if 1
-#  define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
-#elif 0
-#  define ALGORITHM_BYTE_ORDER IS_LITTLE_ENDIAN
-#elif 0
-#  define ALGORITHM_BYTE_ORDER IS_BIG_ENDIAN
-#else
-#  error The algorithm byte order is not defined
-#endif
-
-/*  2. VIA ACE SUPPORT */
-
-#if defined( __GNUC__ ) && defined( __i386__ ) \
- || defined( _WIN32   ) && defined( _M_IX86  ) \
- && !(defined( _WIN64 ) || defined( _WIN32_WCE ) || defined( _MSC_VER ) && ( _MSC_VER <= 800 ))
-#  define VIA_ACE_POSSIBLE
-#endif
-
-/*  Define this option if support for the VIA ACE is required. This uses
-    inline assembler instructions and is only implemented for the Microsoft,
-    Intel and GCC compilers.  If VIA ACE is known to be present, then defining
-    ASSUME_VIA_ACE_PRESENT will remove the ordinary encryption/decryption
-    code.  If USE_VIA_ACE_IF_PRESENT is defined then VIA ACE will be used if
-    it is detected (both present and enabled) but the normal AES code will
-    also be present.
-
-    When VIA ACE is to be used, all AES encryption contexts MUST be 16 byte
-    aligned; other input/output buffers do not need to be 16 byte aligned
-    but there are very large performance gains if this can be arranged.
-    VIA ACE also requires the decryption key schedule to be in reverse
-    order (which later checks below ensure).
-*/
-
-#if 0 && defined( VIA_ACE_POSSIBLE ) && !defined( USE_VIA_ACE_IF_PRESENT )
-#  define USE_VIA_ACE_IF_PRESENT
-#endif
-
-#if 0 && defined( VIA_ACE_POSSIBLE ) && !defined( ASSUME_VIA_ACE_PRESENT )
-#  define ASSUME_VIA_ACE_PRESENT
-#  endif
-
-/*  3. ASSEMBLER SUPPORT
-
-    This define (which can be on the command line) enables the use of the
-    assembler code routines for encryption, decryption and key scheduling
-    as follows:
-
-    ASM_X86_V1C uses the assembler (aes_x86_v1.asm) with large tables for
-                encryption and decryption and but with key scheduling in C
-    ASM_X86_V2  uses assembler (aes_x86_v2.asm) with compressed tables for
-                encryption, decryption and key scheduling
-    ASM_X86_V2C uses assembler (aes_x86_v2.asm) with compressed tables for
-                encryption and decryption and but with key scheduling in C
-    ASM_AMD64_C uses assembler (aes_amd64.asm) with compressed tables for
-                encryption and decryption and but with key scheduling in C
-
-    Change one 'if 0' below to 'if 1' to select the version or define
-    as a compilation option.
-*/
-
-#if 0 && !defined( ASM_X86_V1C )
-#  define ASM_X86_V1C
-#elif 0 && !defined( ASM_X86_V2  )
-#  define ASM_X86_V2
-#elif 0 && !defined( ASM_X86_V2C )
-#  define ASM_X86_V2C
-#elif 0 && !defined( ASM_AMD64_C )
-#  define ASM_AMD64_C
-#endif
-
-#if (defined ( ASM_X86_V1C ) || defined( ASM_X86_V2 ) || defined( ASM_X86_V2C )) \
-      && !defined( _M_IX86 ) || defined( ASM_AMD64_C ) && !defined( _M_X64 )
-#  error Assembler code is only available for x86 and AMD64 systems
-#endif
-
-/*  4. FAST INPUT/OUTPUT OPERATIONS.
-
-    On some machines it is possible to improve speed by transferring the
-    bytes in the input and output arrays to and from the internal 32-bit
-    variables by addressing these arrays as if they are arrays of 32-bit
-    words.  On some machines this will always be possible but there may
-    be a large performance penalty if the byte arrays are not aligned on
-    the normal word boundaries. On other machines this technique will
-    lead to memory access errors when such 32-bit word accesses are not
-    properly aligned. The option SAFE_IO avoids such problems but will
-    often be slower on those machines that support misaligned access
-    (especially so if care is taken to align the input  and output byte
-    arrays on 32-bit word boundaries). If SAFE_IO is not defined it is
-    assumed that access to byte arrays as if they are arrays of 32-bit
-    words will not cause problems when such accesses are misaligned.
-*/
-#if 1 && !defined( _MSC_VER )
-#  define SAFE_IO
-#endif
-
-/*  5. LOOP UNROLLING
-
-    The code for encryption and decrytpion cycles through a number of rounds
-    that can be implemented either in a loop or by expanding the code into a
-    long sequence of instructions, the latter producing a larger program but
-    one that will often be much faster. The latter is called loop unrolling.
-    There are also potential speed advantages in expanding two iterations in
-    a loop with half the number of iterations, which is called partial loop
-    unrolling.  The following options allow partial or full loop unrolling
-    to be set independently for encryption and decryption
-*/
-#if 1
-#  define ENC_UNROLL  FULL
-#elif 0
-#  define ENC_UNROLL  PARTIAL
-#else
-#  define ENC_UNROLL  NONE
-#endif
-
-#if 1
-#  define DEC_UNROLL  FULL
-#elif 0
-#  define DEC_UNROLL  PARTIAL
-#else
-#  define DEC_UNROLL  NONE
-#endif
-
-#if 1
-#  define ENC_KS_UNROLL
-#endif
-
-#if 1
-#  define DEC_KS_UNROLL
-#endif
-
-/*  6. FAST FINITE FIELD OPERATIONS
-
-    If this section is included, tables are used to provide faster finite
-    field arithmetic (this has no effect if FIXED_TABLES is defined).
-*/
-#if 1
-#  define FF_TABLES
-#endif
-
-/*  7. INTERNAL STATE VARIABLE FORMAT
-
-    The internal state of Rijndael is stored in a number of local 32-bit
-    word varaibles which can be defined either as an array or as individual
-    names variables. Include this section if you want to store these local
-    varaibles in arrays. Otherwise individual local variables will be used.
-*/
-#if 1
-#  define ARRAYS
-#endif
-
-/*  8. FIXED OR DYNAMIC TABLES
-
-    When this section is included the tables used by the code are compiled
-    statically into the binary file.  Otherwise the subroutine aes_init()
-    must be called to compute them before the code is first used.
-*/
-#if 1 && !(defined( _MSC_VER ) && ( _MSC_VER <= 800 ))
-#  define FIXED_TABLES
-#endif
-
-/*  9. MASKING OR CASTING FROM LONGER VALUES TO BYTES
-
-    In some systems it is better to mask longer values to extract bytes 
-    rather than using a cast. This option allows this choice.
-*/
-#if 0
-#  define to_byte(x)  ((uint8_t)(x))
-#else
-#  define to_byte(x)  ((x) & 0xff)
-#endif
-
-/*  10. TABLE ALIGNMENT
-
-    On some sytsems speed will be improved by aligning the AES large lookup
-    tables on particular boundaries. This define should be set to a power of
-    two giving the desired alignment. It can be left undefined if alignment
-    is not needed.  This option is specific to the Microsft VC++ compiler -
-    it seems to sometimes cause trouble for the VC++ version 6 compiler.
-*/
-
-#if 1 && defined( _MSC_VER ) && ( _MSC_VER >= 1300 )
-#  define TABLE_ALIGN 32
-#endif
-
-/*  11.  REDUCE CODE AND TABLE SIZE
-
-    This replaces some expanded macros with function calls if AES_ASM_V2 or
-    AES_ASM_V2C are defined
-*/
-
-#if 1 && (defined( ASM_X86_V2 ) || defined( ASM_X86_V2C ))
-#  define REDUCE_CODE_SIZE
-#endif
-
-/*  12. TABLE OPTIONS
-
-    This cipher proceeds by repeating in a number of cycles known as 'rounds'
-    which are implemented by a round function which can optionally be speeded
-    up using tables.  The basic tables are each 256 32-bit words, with either
-    one or four tables being required for each round function depending on
-    how much speed is required. The encryption and decryption round functions
-    are different and the last encryption and decrytpion round functions are
-    different again making four different round functions in all.
-
-    This means that:
-      1. Normal encryption and decryption rounds can each use either 0, 1
-         or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
-      2. The last encryption and decryption rounds can also use either 0, 1
-         or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
-
-    Include or exclude the appropriate definitions below to set the number
-    of tables used by this implementation.
-*/
-
-#if 1   /* set tables for the normal encryption round */
-#  define ENC_ROUND   FOUR_TABLES
-#elif 0
-#  define ENC_ROUND   ONE_TABLE
-#else
-#  define ENC_ROUND   NO_TABLES
-#endif
-
-#if 1   /* set tables for the last encryption round */
-#  define LAST_ENC_ROUND  FOUR_TABLES
-#elif 0
-#  define LAST_ENC_ROUND  ONE_TABLE
-#else
-#  define LAST_ENC_ROUND  NO_TABLES
-#endif
-
-#if 1   /* set tables for the normal decryption round */
-#  define DEC_ROUND   FOUR_TABLES
-#elif 0
-#  define DEC_ROUND   ONE_TABLE
-#else
-#  define DEC_ROUND   NO_TABLES
-#endif
-
-#if 1   /* set tables for the last decryption round */
-#  define LAST_DEC_ROUND  FOUR_TABLES
-#elif 0
-#  define LAST_DEC_ROUND  ONE_TABLE
-#else
-#  define LAST_DEC_ROUND  NO_TABLES
-#endif
-
-/*  The decryption key schedule can be speeded up with tables in the same
-    way that the round functions can.  Include or exclude the following
-    defines to set this requirement.
-*/
-#if 1
-#  define KEY_SCHED   FOUR_TABLES
-#elif 0
-#  define KEY_SCHED   ONE_TABLE
-#else
-#  define KEY_SCHED   NO_TABLES
-#endif
-
-/*  ---- END OF USER CONFIGURED OPTIONS ---- */
-
-/* VIA ACE support is only available for VC++ and GCC */
-
-#if !defined( _MSC_VER ) && !defined( __GNUC__ )
-#  if defined( ASSUME_VIA_ACE_PRESENT )
-#    undef ASSUME_VIA_ACE_PRESENT
-#  endif
-#  if defined( USE_VIA_ACE_IF_PRESENT )
-#    undef USE_VIA_ACE_IF_PRESENT
-#  endif
-#endif
-
-#if defined( ASSUME_VIA_ACE_PRESENT ) && !defined( USE_VIA_ACE_IF_PRESENT )
-#  define USE_VIA_ACE_IF_PRESENT
-#endif
-
-#if defined( USE_VIA_ACE_IF_PRESENT ) && !defined ( AES_REV_DKS )
-#  define AES_REV_DKS
-#endif
-
-/* Assembler support requires the use of platform byte order */
-
-#if ( defined( ASM_X86_V1C ) || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) ) \
-    && (ALGORITHM_BYTE_ORDER != PLATFORM_BYTE_ORDER)
-#  undef  ALGORITHM_BYTE_ORDER
-#  define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
-#endif
-
-/* In this implementation the columns of the state array are each held in
-   32-bit words. The state array can be held in various ways: in an array
-   of words, in a number of individual word variables or in a number of
-   processor registers. The following define maps a variable name x and
-   a column number c to the way the state array variable is to be held.
-   The first define below maps the state into an array x[c] whereas the
-   second form maps the state into a number of individual variables x0,
-   x1, etc.  Another form could map individual state colums to machine
-   register names.
-*/
-
-#if defined( ARRAYS )
-#  define s(x,c) x[c]
-#else
-#  define s(x,c) x##c
-#endif
-
-/*  This implementation provides subroutines for encryption, decryption
-    and for setting the three key lengths (separately) for encryption
-    and decryption. Since not all functions are needed, masks are set
-    up here to determine which will be implemented in C
-*/
-
-#if !defined( AES_ENCRYPT )
-#  define EFUNCS_IN_C   0
-#elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) \
-    || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C )
-#  define EFUNCS_IN_C   ENC_KEYING_IN_C
-#elif !defined( ASM_X86_V2 )
-#  define EFUNCS_IN_C   ( ENCRYPTION_IN_C | ENC_KEYING_IN_C )
-#else
-#  define EFUNCS_IN_C   0
-#endif
-
-#if !defined( AES_DECRYPT )
-#  define DFUNCS_IN_C   0
-#elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) \
-    || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C )
-#  define DFUNCS_IN_C   DEC_KEYING_IN_C
-#elif !defined( ASM_X86_V2 )
-#  define DFUNCS_IN_C   ( DECRYPTION_IN_C | DEC_KEYING_IN_C )
-#else
-#  define DFUNCS_IN_C   0
-#endif
-
-#define FUNCS_IN_C  ( EFUNCS_IN_C | DFUNCS_IN_C )
-
-/* END OF CONFIGURATION OPTIONS */
-
-#define RC_LENGTH   (5 * (AES_BLOCK_SIZE / 4 - 2))
-
-/* Disable or report errors on some combinations of options */
-
-#if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES
-#  undef  LAST_ENC_ROUND
-#  define LAST_ENC_ROUND  NO_TABLES
-#elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES
-#  undef  LAST_ENC_ROUND
-#  define LAST_ENC_ROUND  ONE_TABLE
-#endif
-
-#if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE
-#  undef  ENC_UNROLL
-#  define ENC_UNROLL  NONE
-#endif
-
-#if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES
-#  undef  LAST_DEC_ROUND
-#  define LAST_DEC_ROUND  NO_TABLES
-#elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES
-#  undef  LAST_DEC_ROUND
-#  define LAST_DEC_ROUND  ONE_TABLE
-#endif
-
-#if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE
-#  undef  DEC_UNROLL
-#  define DEC_UNROLL  NONE
-#endif
-
-#if defined( bswap32 )
-#  define aes_sw32    bswap32
-#elif defined( bswap_32 )
-#  define aes_sw32    bswap_32
-#else
-#  define brot(x,n)   (((uint32_t)(x) <<  n) | ((uint32_t)(x) >> (32 - n)))
-#  define aes_sw32(x) ((brot((x),8) & 0x00ff00ff) | (brot((x),24) & 0xff00ff00))
-#endif
-
-/*  upr(x,n):  rotates bytes within words by n positions, moving bytes to
-               higher index positions with wrap around into low positions
-    ups(x,n):  moves bytes by n positions to higher index positions in
-               words but without wrap around
-    bval(x,n): extracts a byte from a word
-
-    WARNING:   The definitions given here are intended only for use with
-               unsigned variables and with shift counts that are compile
-               time constants
-*/
-
-#if ( ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN )
-#  define upr(x,n)      (((uint32_t)(x) << (8 * (n))) | ((uint32_t)(x) >> (32 - 8 * (n))))
-#  define ups(x,n)      ((uint32_t) (x) << (8 * (n)))
-#  define bval(x,n)     to_byte((x) >> (8 * (n)))
-#  define bytes2word(b0, b1, b2, b3)  \
-        (((uint32_t)(b3) << 24) | ((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0))
-#endif
-
-#if ( ALGORITHM_BYTE_ORDER == IS_BIG_ENDIAN )
-#  define upr(x,n)      (((uint32_t)(x) >> (8 * (n))) | ((uint32_t)(x) << (32 - 8 * (n))))
-#  define ups(x,n)      ((uint32_t) (x) >> (8 * (n)))
-#  define bval(x,n)     to_byte((x) >> (24 - 8 * (n)))
-#  define bytes2word(b0, b1, b2, b3)  \
-        (((uint32_t)(b0) << 24) | ((uint32_t)(b1) << 16) | ((uint32_t)(b2) << 8) | (b3))
-#endif
-
-#if defined( SAFE_IO )
-#  define word_in(x,c)    bytes2word(((const uint8_t*)(x)+4*c)[0], ((const uint8_t*)(x)+4*c)[1], \
-                                   ((const uint8_t*)(x)+4*c)[2], ((const uint8_t*)(x)+4*c)[3])
-#  define word_out(x,c,v) { ((uint8_t*)(x)+4*c)[0] = bval(v,0); ((uint8_t*)(x)+4*c)[1] = bval(v,1); \
-                          ((uint8_t*)(x)+4*c)[2] = bval(v,2); ((uint8_t*)(x)+4*c)[3] = bval(v,3); }
-#elif ( ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER )
-#  define word_in(x,c)    (*((uint32_t*)(x)+(c)))
-#  define word_out(x,c,v) (*((uint32_t*)(x)+(c)) = (v))
-#else
-#  define word_in(x,c)    aes_sw32(*((uint32_t*)(x)+(c)))
-#  define word_out(x,c,v) (*((uint32_t*)(x)+(c)) = aes_sw32(v))
-#endif
-
-/* the finite field modular polynomial and elements */
-
-#define WPOLY   0x011b
-#define BPOLY     0x1b
-
-/* multiply four bytes in GF(2^8) by 'x' {02} in parallel */
-
-#define gf_c1  0x80808080
-#define gf_c2  0x7f7f7f7f
-#define gf_mulx(x)  ((((x) & gf_c2) << 1) ^ ((((x) & gf_c1) >> 7) * BPOLY))
-
-/* The following defines provide alternative definitions of gf_mulx that might
-   give improved performance if a fast 32-bit multiply is not available. Note
-   that a temporary variable u needs to be defined where gf_mulx is used.
-
-#define gf_mulx(x) (u = (x) & gf_c1, u |= (u >> 1), ((x) & gf_c2) << 1) ^ ((u >> 3) | (u >> 6))
-#define gf_c4  (0x01010101 * BPOLY)
-#define gf_mulx(x) (u = (x) & gf_c1, ((x) & gf_c2) << 1) ^ ((u - (u >> 7)) & gf_c4)
-*/
-
-/* Work out which tables are needed for the different options   */
-
-#if defined( ASM_X86_V1C )
-#  if defined( ENC_ROUND )
-#    undef  ENC_ROUND
-#  endif
-#  define ENC_ROUND   FOUR_TABLES
-#  if defined( LAST_ENC_ROUND )
-#    undef  LAST_ENC_ROUND
-#  endif
-#  define LAST_ENC_ROUND  FOUR_TABLES
-#  if defined( DEC_ROUND )
-#    undef  DEC_ROUND
-#  endif
-#  define DEC_ROUND   FOUR_TABLES
-#  if defined( LAST_DEC_ROUND )
-#    undef  LAST_DEC_ROUND
-#  endif
-#  define LAST_DEC_ROUND  FOUR_TABLES
-#  if defined( KEY_SCHED )
-#    undef  KEY_SCHED
-#    define KEY_SCHED   FOUR_TABLES
-#  endif
-#endif
-
-#if ( FUNCS_IN_C & ENCRYPTION_IN_C ) || defined( ASM_X86_V1C )
-#  if ENC_ROUND == ONE_TABLE
-#    define FT1_SET
-#  elif ENC_ROUND == FOUR_TABLES
-#    define FT4_SET
-#  else
-#    define SBX_SET
-#  endif
-#  if LAST_ENC_ROUND == ONE_TABLE
-#    define FL1_SET
-#  elif LAST_ENC_ROUND == FOUR_TABLES
-#    define FL4_SET
-#  elif !defined( SBX_SET )
-#    define SBX_SET
-#  endif
-#endif
-
-#if ( FUNCS_IN_C & DECRYPTION_IN_C ) || defined( ASM_X86_V1C )
-#  if DEC_ROUND == ONE_TABLE
-#    define IT1_SET
-#  elif DEC_ROUND == FOUR_TABLES
-#    define IT4_SET
-#  else
-#    define ISB_SET
-#  endif
-#  if LAST_DEC_ROUND == ONE_TABLE
-#    define IL1_SET
-#  elif LAST_DEC_ROUND == FOUR_TABLES
-#    define IL4_SET
-#  elif !defined(ISB_SET)
-#    define ISB_SET
-#  endif
-#endif
-
-#if !(defined( REDUCE_CODE_SIZE ) && (defined( ASM_X86_V2 ) || defined( ASM_X86_V2C )))
-#  if ((FUNCS_IN_C & ENC_KEYING_IN_C) || (FUNCS_IN_C & DEC_KEYING_IN_C))
-#    if KEY_SCHED == ONE_TABLE
-#      if !defined( FL1_SET )  && !defined( FL4_SET ) 
-#        define LS1_SET
-#      endif
-#    elif KEY_SCHED == FOUR_TABLES
-#      if !defined( FL4_SET )
-#        define LS4_SET
-#      endif
-#    elif !defined( SBX_SET )
-#      define SBX_SET
-#    endif
-#  endif
-#  if (FUNCS_IN_C & DEC_KEYING_IN_C)
-#    if KEY_SCHED == ONE_TABLE
-#      define IM1_SET
-#    elif KEY_SCHED == FOUR_TABLES
-#      define IM4_SET
-#    elif !defined( SBX_SET )
-#      define SBX_SET
-#    endif
-#  endif
-#endif
-
-/* generic definitions of Rijndael macros that use tables    */
-
-#define no_table(x,box,vf,rf,c) bytes2word( \
-    box[bval(vf(x,0,c),rf(0,c))], \
-    box[bval(vf(x,1,c),rf(1,c))], \
-    box[bval(vf(x,2,c),rf(2,c))], \
-    box[bval(vf(x,3,c),rf(3,c))])
-
-#define one_table(x,op,tab,vf,rf,c) \
- (     tab[bval(vf(x,0,c),rf(0,c))] \
-  ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \
-  ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \
-  ^ op(tab[bval(vf(x,3,c),rf(3,c))],3))
-
-#define four_tables(x,tab,vf,rf,c) \
- (  tab[0][bval(vf(x,0,c),rf(0,c))] \
-  ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
-  ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
-  ^ tab[3][bval(vf(x,3,c),rf(3,c))])
-
-#define vf1(x,r,c)  (x)
-#define rf1(r,c)    (r)
-#define rf2(r,c)    ((8+r-c)&3)
-
-/* perform forward and inverse column mix operation on four bytes in long word x in */
-/* parallel. NOTE: x must be a simple variable, NOT an expression in these macros.  */
-
-#if !(defined( REDUCE_CODE_SIZE ) && (defined( ASM_X86_V2 ) || defined( ASM_X86_V2C ))) 
-
-#if defined( FM4_SET )      /* not currently used */
-#  define fwd_mcol(x)       four_tables(x,t_use(f,m),vf1,rf1,0)
-#elif defined( FM1_SET )    /* not currently used */
-#  define fwd_mcol(x)       one_table(x,upr,t_use(f,m),vf1,rf1,0)
-#else
-#  define dec_fmvars        uint32_t g2
-#  define fwd_mcol(x)       (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ upr((x), 2) ^ upr((x), 1))
-#endif
-
-#if defined( IM4_SET )
-#  define inv_mcol(x)       four_tables(x,t_use(i,m),vf1,rf1,0)
-#elif defined( IM1_SET )
-#  define inv_mcol(x)       one_table(x,upr,t_use(i,m),vf1,rf1,0)
-#else
-#  define dec_imvars        uint32_t g2, g4, g9
-#  define inv_mcol(x)       (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = (x) ^ gf_mulx(g4), g4 ^= g9, \
-                            (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ upr(g4, 2) ^ upr(g9, 1))
-#endif
-
-#if defined( FL4_SET )
-#  define ls_box(x,c)       four_tables(x,t_use(f,l),vf1,rf2,c)
-#elif defined( LS4_SET )
-#  define ls_box(x,c)       four_tables(x,t_use(l,s),vf1,rf2,c)
-#elif defined( FL1_SET )
-#  define ls_box(x,c)       one_table(x,upr,t_use(f,l),vf1,rf2,c)
-#elif defined( LS1_SET )
-#  define ls_box(x,c)       one_table(x,upr,t_use(l,s),vf1,rf2,c)
-#else
-#  define ls_box(x,c)       no_table(x,t_use(s,box),vf1,rf2,c)
-#endif
-
-#endif
-
-#if defined( ASM_X86_V1C ) && defined( AES_DECRYPT ) && !defined( ISB_SET )
-#  define ISB_SET
-#endif
-
-#endif
+/*
+---------------------------------------------------------------------------
+Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
+
+The redistribution and use of this software (with or without changes)
+is allowed without the payment of fees or royalties provided that:
+
+  source code distributions include the above copyright notice, this
+  list of conditions and the following disclaimer;
+
+  binary distributions include the above copyright notice, this list
+  of conditions and the following disclaimer in their documentation.
+
+This software is provided 'as is' with no explicit or implied warranties
+in respect of its operation, including, but not limited to, correctness
+and fitness for purpose.
+---------------------------------------------------------------------------
+Issue Date: 20/12/2007
+
+ This file contains the compilation options for AES (Rijndael) and code
+ that is common across encryption, key scheduling and table generation.
+
+ OPERATION
+
+ These source code files implement the AES algorithm Rijndael designed by
+ Joan Daemen and Vincent Rijmen. This version is designed for the standard
+ block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24
+ and 32 bytes).
+
+ This version is designed for flexibility and speed using operations on
+ 32-bit words rather than operations on bytes.  It can be compiled with
+ either big or little endian internal byte order but is faster when the
+ native byte order for the processor is used.
+
+ THE CIPHER INTERFACE
+
+ The cipher interface is implemented as an array of bytes in which lower
+ AES bit sequence indexes map to higher numeric significance within bytes.
+
+  uint8_t                 (an unsigned  8-bit type)
+  uint32_t                (an unsigned 32-bit type)
+  struct aes_encrypt_ctx  (structure for the cipher encryption context)
+  struct aes_decrypt_ctx  (structure for the cipher decryption context)
+  AES_RETURN                the function return type
+
+  C subroutine calls:
+
+  AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]);
+  AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]);
+  AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]);
+  AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out,
+                                                  const aes_encrypt_ctx cx[1]);
+
+  AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]);
+  AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]);
+  AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]);
+  AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out,
+                                                  const aes_decrypt_ctx cx[1]);
+
+ IMPORTANT NOTE: If you are using this C interface with dynamic tables make sure that
+ you call aes_init() before AES is used so that the tables are initialised.
+
+ C++ aes class subroutines:
+
+     Class AESencrypt  for encryption
+
+      Construtors:
+          AESencrypt(void)
+          AESencrypt(const unsigned char *key) - 128 bit key
+      Members:
+          AES_RETURN key128(const unsigned char *key)
+          AES_RETURN key192(const unsigned char *key)
+          AES_RETURN key256(const unsigned char *key)
+          AES_RETURN encrypt(const unsigned char *in, unsigned char *out) const
+
+      Class AESdecrypt  for encryption
+      Construtors:
+          AESdecrypt(void)
+          AESdecrypt(const unsigned char *key) - 128 bit key
+      Members:
+          AES_RETURN key128(const unsigned char *key)
+          AES_RETURN key192(const unsigned char *key)
+          AES_RETURN key256(const unsigned char *key)
+          AES_RETURN decrypt(const unsigned char *in, unsigned char *out) const
+*/
+
+#if !defined( _AESOPT_H )
+#define _AESOPT_H
+
+#if defined( __cplusplus )
+#include "aescpp.h"
+#else
+#include "aes.h"
+#endif
+
+/*  PLATFORM SPECIFIC INCLUDES */
+
+#include "brg_endian.h"
+
+/*  CONFIGURATION - THE USE OF DEFINES
+
+    Later in this section there are a number of defines that control the
+    operation of the code.  In each section, the purpose of each define is
+    explained so that the relevant form can be included or excluded by
+    setting either 1's or 0's respectively on the branches of the related
+    #if clauses.  The following local defines should not be changed.
+*/
+
+#define ENCRYPTION_IN_C     1
+#define DECRYPTION_IN_C     2
+#define ENC_KEYING_IN_C     4
+#define DEC_KEYING_IN_C     8
+
+#define NO_TABLES           0
+#define ONE_TABLE           1
+#define FOUR_TABLES         4
+#define NONE                0
+#define PARTIAL             1
+#define FULL                2
+
+/*  --- START OF USER CONFIGURED OPTIONS --- */
+
+/*  1. BYTE ORDER WITHIN 32 BIT WORDS
+
+    The fundamental data processing units in Rijndael are 8-bit bytes. The
+    input, output and key input are all enumerated arrays of bytes in which
+    bytes are numbered starting at zero and increasing to one less than the
+    number of bytes in the array in question. This enumeration is only used
+    for naming bytes and does not imply any adjacency or order relationship
+    from one byte to another. When these inputs and outputs are considered
+    as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to
+    byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte.
+    In this implementation bits are numbered from 0 to 7 starting at the
+    numerically least significant end of each byte (bit n represents 2^n).
+
+    However, Rijndael can be implemented more efficiently using 32-bit
+    words by packing bytes into words so that bytes 4*n to 4*n+3 are placed
+    into word[n]. While in principle these bytes can be assembled into words
+    in any positions, this implementation only supports the two formats in
+    which bytes in adjacent positions within words also have adjacent byte
+    numbers. This order is called big-endian if the lowest numbered bytes
+    in words have the highest numeric significance and little-endian if the
+    opposite applies.
+
+    This code can work in either order irrespective of the order used by the
+    machine on which it runs. Normally the internal byte order will be set
+    to the order of the processor on which the code is to be run but this
+    define can be used to reverse this in special situations
+
+    WARNING: Assembler code versions rely on PLATFORM_BYTE_ORDER being set.
+    This define will hence be redefined later (in section 4) if necessary
+*/
+
+#if 1
+#  define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
+#elif 0
+#  define ALGORITHM_BYTE_ORDER IS_LITTLE_ENDIAN
+#elif 0
+#  define ALGORITHM_BYTE_ORDER IS_BIG_ENDIAN
+#else
+#  error The algorithm byte order is not defined
+#endif
+
+/*  2. VIA ACE SUPPORT */
+
+#if defined( __GNUC__ ) && defined( __i386__ ) \
+ || defined( _WIN32   ) && defined( _M_IX86  ) \
+ && !(defined( _WIN64 ) || defined( _WIN32_WCE ) || defined( _MSC_VER ) && ( _MSC_VER <= 800 ))
+#  define VIA_ACE_POSSIBLE
+#endif
+
+/*  Define this option if support for the VIA ACE is required. This uses
+    inline assembler instructions and is only implemented for the Microsoft,
+    Intel and GCC compilers.  If VIA ACE is known to be present, then defining
+    ASSUME_VIA_ACE_PRESENT will remove the ordinary encryption/decryption
+    code.  If USE_VIA_ACE_IF_PRESENT is defined then VIA ACE will be used if
+    it is detected (both present and enabled) but the normal AES code will
+    also be present.
+
+    When VIA ACE is to be used, all AES encryption contexts MUST be 16 byte
+    aligned; other input/output buffers do not need to be 16 byte aligned
+    but there are very large performance gains if this can be arranged.
+    VIA ACE also requires the decryption key schedule to be in reverse
+    order (which later checks below ensure).
+*/
+
+#if 0 && defined( VIA_ACE_POSSIBLE ) && !defined( USE_VIA_ACE_IF_PRESENT )
+#  define USE_VIA_ACE_IF_PRESENT
+#endif
+
+#if 0 && defined( VIA_ACE_POSSIBLE ) && !defined( ASSUME_VIA_ACE_PRESENT )
+#  define ASSUME_VIA_ACE_PRESENT
+#  endif
+
+/*  3. ASSEMBLER SUPPORT
+
+    This define (which can be on the command line) enables the use of the
+    assembler code routines for encryption, decryption and key scheduling
+    as follows:
+
+    ASM_X86_V1C uses the assembler (aes_x86_v1.asm) with large tables for
+                encryption and decryption and but with key scheduling in C
+    ASM_X86_V2  uses assembler (aes_x86_v2.asm) with compressed tables for
+                encryption, decryption and key scheduling
+    ASM_X86_V2C uses assembler (aes_x86_v2.asm) with compressed tables for
+                encryption and decryption and but with key scheduling in C
+    ASM_AMD64_C uses assembler (aes_amd64.asm) with compressed tables for
+                encryption and decryption and but with key scheduling in C
+
+    Change one 'if 0' below to 'if 1' to select the version or define
+    as a compilation option.
+*/
+
+#if 0 && !defined( ASM_X86_V1C )
+#  define ASM_X86_V1C
+#elif 0 && !defined( ASM_X86_V2  )
+#  define ASM_X86_V2
+#elif 0 && !defined( ASM_X86_V2C )
+#  define ASM_X86_V2C
+#elif 0 && !defined( ASM_AMD64_C )
+#  define ASM_AMD64_C
+#endif
+
+#if (defined ( ASM_X86_V1C ) || defined( ASM_X86_V2 ) || defined( ASM_X86_V2C )) \
+      && !defined( _M_IX86 ) || defined( ASM_AMD64_C ) && !defined( _M_X64 )
+#  error Assembler code is only available for x86 and AMD64 systems
+#endif
+
+/*  4. FAST INPUT/OUTPUT OPERATIONS.
+
+    On some machines it is possible to improve speed by transferring the
+    bytes in the input and output arrays to and from the internal 32-bit
+    variables by addressing these arrays as if they are arrays of 32-bit
+    words.  On some machines this will always be possible but there may
+    be a large performance penalty if the byte arrays are not aligned on
+    the normal word boundaries. On other machines this technique will
+    lead to memory access errors when such 32-bit word accesses are not
+    properly aligned. The option SAFE_IO avoids such problems but will
+    often be slower on those machines that support misaligned access
+    (especially so if care is taken to align the input  and output byte
+    arrays on 32-bit word boundaries). If SAFE_IO is not defined it is
+    assumed that access to byte arrays as if they are arrays of 32-bit
+    words will not cause problems when such accesses are misaligned.
+*/
+#if 1 && !defined( _MSC_VER )
+#  define SAFE_IO
+#endif
+
+/*  5. LOOP UNROLLING
+
+    The code for encryption and decrytpion cycles through a number of rounds
+    that can be implemented either in a loop or by expanding the code into a
+    long sequence of instructions, the latter producing a larger program but
+    one that will often be much faster. The latter is called loop unrolling.
+    There are also potential speed advantages in expanding two iterations in
+    a loop with half the number of iterations, which is called partial loop
+    unrolling.  The following options allow partial or full loop unrolling
+    to be set independently for encryption and decryption
+*/
+#if 1
+#  define ENC_UNROLL  FULL
+#elif 0
+#  define ENC_UNROLL  PARTIAL
+#else
+#  define ENC_UNROLL  NONE
+#endif
+
+#if 1
+#  define DEC_UNROLL  FULL
+#elif 0
+#  define DEC_UNROLL  PARTIAL
+#else
+#  define DEC_UNROLL  NONE
+#endif
+
+#if 1
+#  define ENC_KS_UNROLL
+#endif
+
+#if 1
+#  define DEC_KS_UNROLL
+#endif
+
+/*  6. FAST FINITE FIELD OPERATIONS
+
+    If this section is included, tables are used to provide faster finite
+    field arithmetic (this has no effect if FIXED_TABLES is defined).
+*/
+#if 1
+#  define FF_TABLES
+#endif
+
+/*  7. INTERNAL STATE VARIABLE FORMAT
+
+    The internal state of Rijndael is stored in a number of local 32-bit
+    word varaibles which can be defined either as an array or as individual
+    names variables. Include this section if you want to store these local
+    varaibles in arrays. Otherwise individual local variables will be used.
+*/
+#if 1
+#  define ARRAYS
+#endif
+
+/*  8. FIXED OR DYNAMIC TABLES
+
+    When this section is included the tables used by the code are compiled
+    statically into the binary file.  Otherwise the subroutine aes_init()
+    must be called to compute them before the code is first used.
+*/
+#if 1 && !(defined( _MSC_VER ) && ( _MSC_VER <= 800 ))
+#  define FIXED_TABLES
+#endif
+
+/*  9. MASKING OR CASTING FROM LONGER VALUES TO BYTES
+
+    In some systems it is better to mask longer values to extract bytes 
+    rather than using a cast. This option allows this choice.
+*/
+#if 0
+#  define to_byte(x)  ((uint8_t)(x))
+#else
+#  define to_byte(x)  ((x) & 0xff)
+#endif
+
+/*  10. TABLE ALIGNMENT
+
+    On some sytsems speed will be improved by aligning the AES large lookup
+    tables on particular boundaries. This define should be set to a power of
+    two giving the desired alignment. It can be left undefined if alignment
+    is not needed.  This option is specific to the Microsft VC++ compiler -
+    it seems to sometimes cause trouble for the VC++ version 6 compiler.
+*/
+
+#if 1 && defined( _MSC_VER ) && ( _MSC_VER >= 1300 )
+#  define TABLE_ALIGN 32
+#endif
+
+/*  11.  REDUCE CODE AND TABLE SIZE
+
+    This replaces some expanded macros with function calls if AES_ASM_V2 or
+    AES_ASM_V2C are defined
+*/
+
+#if 1 && (defined( ASM_X86_V2 ) || defined( ASM_X86_V2C ))
+#  define REDUCE_CODE_SIZE
+#endif
+
+/*  12. TABLE OPTIONS
+
+    This cipher proceeds by repeating in a number of cycles known as 'rounds'
+    which are implemented by a round function which can optionally be speeded
+    up using tables.  The basic tables are each 256 32-bit words, with either
+    one or four tables being required for each round function depending on
+    how much speed is required. The encryption and decryption round functions
+    are different and the last encryption and decrytpion round functions are
+    different again making four different round functions in all.
+
+    This means that:
+      1. Normal encryption and decryption rounds can each use either 0, 1
+         or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
+      2. The last encryption and decryption rounds can also use either 0, 1
+         or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
+
+    Include or exclude the appropriate definitions below to set the number
+    of tables used by this implementation.
+*/
+
+#if 1   /* set tables for the normal encryption round */
+#  define ENC_ROUND   FOUR_TABLES
+#elif 0
+#  define ENC_ROUND   ONE_TABLE
+#else
+#  define ENC_ROUND   NO_TABLES
+#endif
+
+#if 1   /* set tables for the last encryption round */
+#  define LAST_ENC_ROUND  FOUR_TABLES
+#elif 0
+#  define LAST_ENC_ROUND  ONE_TABLE
+#else
+#  define LAST_ENC_ROUND  NO_TABLES
+#endif
+
+#if 1   /* set tables for the normal decryption round */
+#  define DEC_ROUND   FOUR_TABLES
+#elif 0
+#  define DEC_ROUND   ONE_TABLE
+#else
+#  define DEC_ROUND   NO_TABLES
+#endif
+
+#if 1   /* set tables for the last decryption round */
+#  define LAST_DEC_ROUND  FOUR_TABLES
+#elif 0
+#  define LAST_DEC_ROUND  ONE_TABLE
+#else
+#  define LAST_DEC_ROUND  NO_TABLES
+#endif
+
+/*  The decryption key schedule can be speeded up with tables in the same
+    way that the round functions can.  Include or exclude the following
+    defines to set this requirement.
+*/
+#if 1
+#  define KEY_SCHED   FOUR_TABLES
+#elif 0
+#  define KEY_SCHED   ONE_TABLE
+#else
+#  define KEY_SCHED   NO_TABLES
+#endif
+
+/*  ---- END OF USER CONFIGURED OPTIONS ---- */
+
+/* VIA ACE support is only available for VC++ and GCC */
+
+#if !defined( _MSC_VER ) && !defined( __GNUC__ )
+#  if defined( ASSUME_VIA_ACE_PRESENT )
+#    undef ASSUME_VIA_ACE_PRESENT
+#  endif
+#  if defined( USE_VIA_ACE_IF_PRESENT )
+#    undef USE_VIA_ACE_IF_PRESENT
+#  endif
+#endif
+
+#if defined( ASSUME_VIA_ACE_PRESENT ) && !defined( USE_VIA_ACE_IF_PRESENT )
+#  define USE_VIA_ACE_IF_PRESENT
+#endif
+
+#if defined( USE_VIA_ACE_IF_PRESENT ) && !defined ( AES_REV_DKS )
+#  define AES_REV_DKS
+#endif
+
+/* Assembler support requires the use of platform byte order */
+
+#if ( defined( ASM_X86_V1C ) || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) ) \
+    && (ALGORITHM_BYTE_ORDER != PLATFORM_BYTE_ORDER)
+#  undef  ALGORITHM_BYTE_ORDER
+#  define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
+#endif
+
+/* In this implementation the columns of the state array are each held in
+   32-bit words. The state array can be held in various ways: in an array
+   of words, in a number of individual word variables or in a number of
+   processor registers. The following define maps a variable name x and
+   a column number c to the way the state array variable is to be held.
+   The first define below maps the state into an array x[c] whereas the
+   second form maps the state into a number of individual variables x0,
+   x1, etc.  Another form could map individual state colums to machine
+   register names.
+*/
+
+#if defined( ARRAYS )
+#  define s(x,c) x[c]
+#else
+#  define s(x,c) x##c
+#endif
+
+/*  This implementation provides subroutines for encryption, decryption
+    and for setting the three key lengths (separately) for encryption
+    and decryption. Since not all functions are needed, masks are set
+    up here to determine which will be implemented in C
+*/
+
+#if !defined( AES_ENCRYPT )
+#  define EFUNCS_IN_C   0
+#elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) \
+    || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C )
+#  define EFUNCS_IN_C   ENC_KEYING_IN_C
+#elif !defined( ASM_X86_V2 )
+#  define EFUNCS_IN_C   ( ENCRYPTION_IN_C | ENC_KEYING_IN_C )
+#else
+#  define EFUNCS_IN_C   0
+#endif
+
+#if !defined( AES_DECRYPT )
+#  define DFUNCS_IN_C   0
+#elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) \
+    || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C )
+#  define DFUNCS_IN_C   DEC_KEYING_IN_C
+#elif !defined( ASM_X86_V2 )
+#  define DFUNCS_IN_C   ( DECRYPTION_IN_C | DEC_KEYING_IN_C )
+#else
+#  define DFUNCS_IN_C   0
+#endif
+
+#define FUNCS_IN_C  ( EFUNCS_IN_C | DFUNCS_IN_C )
+
+/* END OF CONFIGURATION OPTIONS */
+
+#define RC_LENGTH   (5 * (AES_BLOCK_SIZE / 4 - 2))
+
+/* Disable or report errors on some combinations of options */
+
+#if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES
+#  undef  LAST_ENC_ROUND
+#  define LAST_ENC_ROUND  NO_TABLES
+#elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES
+#  undef  LAST_ENC_ROUND
+#  define LAST_ENC_ROUND  ONE_TABLE
+#endif
+
+#if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE
+#  undef  ENC_UNROLL
+#  define ENC_UNROLL  NONE
+#endif
+
+#if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES
+#  undef  LAST_DEC_ROUND
+#  define LAST_DEC_ROUND  NO_TABLES
+#elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES
+#  undef  LAST_DEC_ROUND
+#  define LAST_DEC_ROUND  ONE_TABLE
+#endif
+
+#if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE
+#  undef  DEC_UNROLL
+#  define DEC_UNROLL  NONE
+#endif
+
+#if defined( bswap32 )
+#  define aes_sw32    bswap32
+#elif defined( bswap_32 )
+#  define aes_sw32    bswap_32
+#else
+#  define brot(x,n)   (((uint32_t)(x) <<  n) | ((uint32_t)(x) >> (32 - n)))
+#  define aes_sw32(x) ((brot((x),8) & 0x00ff00ff) | (brot((x),24) & 0xff00ff00))
+#endif
+
+/*  upr(x,n):  rotates bytes within words by n positions, moving bytes to
+               higher index positions with wrap around into low positions
+    ups(x,n):  moves bytes by n positions to higher index positions in
+               words but without wrap around
+    bval(x,n): extracts a byte from a word
+
+    WARNING:   The definitions given here are intended only for use with
+               unsigned variables and with shift counts that are compile
+               time constants
+*/
+
+#if ( ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN )
+#  define upr(x,n)      (((uint32_t)(x) << (8 * (n))) | ((uint32_t)(x) >> (32 - 8 * (n))))
+#  define ups(x,n)      ((uint32_t) (x) << (8 * (n)))
+#  define bval(x,n)     to_byte((x) >> (8 * (n)))
+#  define bytes2word(b0, b1, b2, b3)  \
+        (((uint32_t)(b3) << 24) | ((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0))
+#endif
+
+#if ( ALGORITHM_BYTE_ORDER == IS_BIG_ENDIAN )
+#  define upr(x,n)      (((uint32_t)(x) >> (8 * (n))) | ((uint32_t)(x) << (32 - 8 * (n))))
+#  define ups(x,n)      ((uint32_t) (x) >> (8 * (n)))
+#  define bval(x,n)     to_byte((x) >> (24 - 8 * (n)))
+#  define bytes2word(b0, b1, b2, b3)  \
+        (((uint32_t)(b0) << 24) | ((uint32_t)(b1) << 16) | ((uint32_t)(b2) << 8) | (b3))
+#endif
+
+#if defined( SAFE_IO )
+#  define word_in(x,c)    bytes2word(((const uint8_t*)(x)+4*c)[0], ((const uint8_t*)(x)+4*c)[1], \
+                                   ((const uint8_t*)(x)+4*c)[2], ((const uint8_t*)(x)+4*c)[3])
+#  define word_out(x,c,v) { ((uint8_t*)(x)+4*c)[0] = bval(v,0); ((uint8_t*)(x)+4*c)[1] = bval(v,1); \
+                          ((uint8_t*)(x)+4*c)[2] = bval(v,2); ((uint8_t*)(x)+4*c)[3] = bval(v,3); }
+#elif ( ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER )
+#  define word_in(x,c)    (*((uint32_t*)(x)+(c)))
+#  define word_out(x,c,v) (*((uint32_t*)(x)+(c)) = (v))
+#else
+#  define word_in(x,c)    aes_sw32(*((uint32_t*)(x)+(c)))
+#  define word_out(x,c,v) (*((uint32_t*)(x)+(c)) = aes_sw32(v))
+#endif
+
+/* the finite field modular polynomial and elements */
+
+#define WPOLY   0x011b
+#define BPOLY     0x1b
+
+/* multiply four bytes in GF(2^8) by 'x' {02} in parallel */
+
+#define gf_c1  0x80808080
+#define gf_c2  0x7f7f7f7f
+#define gf_mulx(x)  ((((x) & gf_c2) << 1) ^ ((((x) & gf_c1) >> 7) * BPOLY))
+
+/* The following defines provide alternative definitions of gf_mulx that might
+   give improved performance if a fast 32-bit multiply is not available. Note
+   that a temporary variable u needs to be defined where gf_mulx is used.
+
+#define gf_mulx(x) (u = (x) & gf_c1, u |= (u >> 1), ((x) & gf_c2) << 1) ^ ((u >> 3) | (u >> 6))
+#define gf_c4  (0x01010101 * BPOLY)
+#define gf_mulx(x) (u = (x) & gf_c1, ((x) & gf_c2) << 1) ^ ((u - (u >> 7)) & gf_c4)
+*/
+
+/* Work out which tables are needed for the different options   */
+
+#if defined( ASM_X86_V1C )
+#  if defined( ENC_ROUND )
+#    undef  ENC_ROUND
+#  endif
+#  define ENC_ROUND   FOUR_TABLES
+#  if defined( LAST_ENC_ROUND )
+#    undef  LAST_ENC_ROUND
+#  endif
+#  define LAST_ENC_ROUND  FOUR_TABLES
+#  if defined( DEC_ROUND )
+#    undef  DEC_ROUND
+#  endif
+#  define DEC_ROUND   FOUR_TABLES
+#  if defined( LAST_DEC_ROUND )
+#    undef  LAST_DEC_ROUND
+#  endif
+#  define LAST_DEC_ROUND  FOUR_TABLES
+#  if defined( KEY_SCHED )
+#    undef  KEY_SCHED
+#    define KEY_SCHED   FOUR_TABLES
+#  endif
+#endif
+
+#if ( FUNCS_IN_C & ENCRYPTION_IN_C ) || defined( ASM_X86_V1C )
+#  if ENC_ROUND == ONE_TABLE
+#    define FT1_SET
+#  elif ENC_ROUND == FOUR_TABLES
+#    define FT4_SET
+#  else
+#    define SBX_SET
+#  endif
+#  if LAST_ENC_ROUND == ONE_TABLE
+#    define FL1_SET
+#  elif LAST_ENC_ROUND == FOUR_TABLES
+#    define FL4_SET
+#  elif !defined( SBX_SET )
+#    define SBX_SET
+#  endif
+#endif
+
+#if ( FUNCS_IN_C & DECRYPTION_IN_C ) || defined( ASM_X86_V1C )
+#  if DEC_ROUND == ONE_TABLE
+#    define IT1_SET
+#  elif DEC_ROUND == FOUR_TABLES
+#    define IT4_SET
+#  else
+#    define ISB_SET
+#  endif
+#  if LAST_DEC_ROUND == ONE_TABLE
+#    define IL1_SET
+#  elif LAST_DEC_ROUND == FOUR_TABLES
+#    define IL4_SET
+#  elif !defined(ISB_SET)
+#    define ISB_SET
+#  endif
+#endif
+
+#if !(defined( REDUCE_CODE_SIZE ) && (defined( ASM_X86_V2 ) || defined( ASM_X86_V2C )))
+#  if ((FUNCS_IN_C & ENC_KEYING_IN_C) || (FUNCS_IN_C & DEC_KEYING_IN_C))
+#    if KEY_SCHED == ONE_TABLE
+#      if !defined( FL1_SET )  && !defined( FL4_SET ) 
+#        define LS1_SET
+#      endif
+#    elif KEY_SCHED == FOUR_TABLES
+#      if !defined( FL4_SET )
+#        define LS4_SET
+#      endif
+#    elif !defined( SBX_SET )
+#      define SBX_SET
+#    endif
+#  endif
+#  if (FUNCS_IN_C & DEC_KEYING_IN_C)
+#    if KEY_SCHED == ONE_TABLE
+#      define IM1_SET
+#    elif KEY_SCHED == FOUR_TABLES
+#      define IM4_SET
+#    elif !defined( SBX_SET )
+#      define SBX_SET
+#    endif
+#  endif
+#endif
+
+/* generic definitions of Rijndael macros that use tables    */
+
+#define no_table(x,box,vf,rf,c) bytes2word( \
+    box[bval(vf(x,0,c),rf(0,c))], \
+    box[bval(vf(x,1,c),rf(1,c))], \
+    box[bval(vf(x,2,c),rf(2,c))], \
+    box[bval(vf(x,3,c),rf(3,c))])
+
+#define one_table(x,op,tab,vf,rf,c) \
+ (     tab[bval(vf(x,0,c),rf(0,c))] \
+  ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \
+  ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \
+  ^ op(tab[bval(vf(x,3,c),rf(3,c))],3))
+
+#define four_tables(x,tab,vf,rf,c) \
+ (  tab[0][bval(vf(x,0,c),rf(0,c))] \
+  ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
+  ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
+  ^ tab[3][bval(vf(x,3,c),rf(3,c))])
+
+#define vf1(x,r,c)  (x)
+#define rf1(r,c)    (r)
+#define rf2(r,c)    ((8+r-c)&3)
+
+/* perform forward and inverse column mix operation on four bytes in long word x in */
+/* parallel. NOTE: x must be a simple variable, NOT an expression in these macros.  */
+
+#if !(defined( REDUCE_CODE_SIZE ) && (defined( ASM_X86_V2 ) || defined( ASM_X86_V2C ))) 
+
+#if defined( FM4_SET )      /* not currently used */
+#  define fwd_mcol(x)       four_tables(x,t_use(f,m),vf1,rf1,0)
+#elif defined( FM1_SET )    /* not currently used */
+#  define fwd_mcol(x)       one_table(x,upr,t_use(f,m),vf1,rf1,0)
+#else
+#  define dec_fmvars        uint32_t g2
+#  define fwd_mcol(x)       (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ upr((x), 2) ^ upr((x), 1))
+#endif
+
+#if defined( IM4_SET )
+#  define inv_mcol(x)       four_tables(x,t_use(i,m),vf1,rf1,0)
+#elif defined( IM1_SET )
+#  define inv_mcol(x)       one_table(x,upr,t_use(i,m),vf1,rf1,0)
+#else
+#  define dec_imvars        uint32_t g2, g4, g9
+#  define inv_mcol(x)       (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = (x) ^ gf_mulx(g4), g4 ^= g9, \
+                            (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ upr(g4, 2) ^ upr(g9, 1))
+#endif
+
+#if defined( FL4_SET )
+#  define ls_box(x,c)       four_tables(x,t_use(f,l),vf1,rf2,c)
+#elif defined( LS4_SET )
+#  define ls_box(x,c)       four_tables(x,t_use(l,s),vf1,rf2,c)
+#elif defined( FL1_SET )
+#  define ls_box(x,c)       one_table(x,upr,t_use(f,l),vf1,rf2,c)
+#elif defined( LS1_SET )
+#  define ls_box(x,c)       one_table(x,upr,t_use(l,s),vf1,rf2,c)
+#else
+#  define ls_box(x,c)       no_table(x,t_use(s,box),vf1,rf2,c)
+#endif
+
+#endif
+
+#if defined( ASM_X86_V1C ) && defined( AES_DECRYPT ) && !defined( ISB_SET )
+#  define ISB_SET
+#endif
+
+#endif
diff --git a/Crypto/aestab.c b/Crypto/aestab.c
index be277cc3..7ef94eb7 100755
--- a/Crypto/aestab.c
+++ b/Crypto/aestab.c
@@ -1,391 +1,391 @@
-/*
----------------------------------------------------------------------------
-Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
-
-The redistribution and use of this software (with or without changes)
-is allowed without the payment of fees or royalties provided that:
-
-  source code distributions include the above copyright notice, this
-  list of conditions and the following disclaimer;
-
-  binary distributions include the above copyright notice, this list
-  of conditions and the following disclaimer in their documentation.
-
-This software is provided 'as is' with no explicit or implied warranties
-in respect of its operation, including, but not limited to, correctness
-and fitness for purpose.
----------------------------------------------------------------------------
-Issue Date: 20/12/2007
-*/
-
-#define DO_TABLES
-
-#include "aes.h"
-#include "aesopt.h"
-
-#if defined(FIXED_TABLES)
-
-#define sb_data(w) {\
-    w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
-    w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
-    w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
-    w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
-    w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
-    w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
-    w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
-    w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
-    w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
-    w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
-    w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
-    w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
-    w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
-    w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
-    w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
-    w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
-    w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
-    w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
-    w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
-    w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
-    w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
-    w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
-    w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
-    w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
-    w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
-    w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
-    w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
-    w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
-    w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
-    w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
-    w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
-    w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
-
-#define isb_data(w) {\
-    w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38),\
-    w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7), w(0xfb),\
-    w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f), w(0xff), w(0x87),\
-    w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4), w(0xde), w(0xe9), w(0xcb),\
-    w(0x54), w(0x7b), w(0x94), w(0x32), w(0xa6), w(0xc2), w(0x23), w(0x3d),\
-    w(0xee), w(0x4c), w(0x95), w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e),\
-    w(0x08), w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2),\
-    w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),\
-    w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), w(0x16),\
-    w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), w(0xb6), w(0x92),\
-    w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd), w(0xed), w(0xb9), w(0xda),\
-    w(0x5e), w(0x15), w(0x46), w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84),\
-    w(0x90), w(0xd8), w(0xab), w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a),\
-    w(0xf7), w(0xe4), w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06),\
-    w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02),\
-    w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a), w(0x6b),\
-    w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67), w(0xdc), w(0xea),\
-    w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), w(0xb4), w(0xe6), w(0x73),\
-    w(0x96), w(0xac), w(0x74), w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85),\
-    w(0xe2), w(0xf9), w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e),\
-    w(0x47), w(0xf1), w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89),\
-    w(0x6f), w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b),\
-    w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79), w(0x20),\
-    w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd), w(0x5a), w(0xf4),\
-    w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), w(0x07), w(0xc7), w(0x31),\
-    w(0xb1), w(0x12), w(0x10), w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f),\
-    w(0x60), w(0x51), w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d),\
-    w(0x2d), w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef),\
-    w(0xa0), w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0),\
-    w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99), w(0x61),\
-    w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77), w(0xd6), w(0x26),\
-    w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55), w(0x21), w(0x0c), w(0x7d) }
-
-#define mm_data(w) {\
-    w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07),\
-    w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e), w(0x0f),\
-    w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15), w(0x16), w(0x17),\
-    w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c), w(0x1d), w(0x1e), w(0x1f),\
-    w(0x20), w(0x21), w(0x22), w(0x23), w(0x24), w(0x25), w(0x26), w(0x27),\
-    w(0x28), w(0x29), w(0x2a), w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f),\
-    w(0x30), w(0x31), w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37),\
-    w(0x38), w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),\
-    w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), w(0x47),\
-    w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), w(0x4e), w(0x4f),\
-    w(0x50), w(0x51), w(0x52), w(0x53), w(0x54), w(0x55), w(0x56), w(0x57),\
-    w(0x58), w(0x59), w(0x5a), w(0x5b), w(0x5c), w(0x5d), w(0x5e), w(0x5f),\
-    w(0x60), w(0x61), w(0x62), w(0x63), w(0x64), w(0x65), w(0x66), w(0x67),\
-    w(0x68), w(0x69), w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f),\
-    w(0x70), w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77),\
-    w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e), w(0x7f),\
-    w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85), w(0x86), w(0x87),\
-    w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), w(0x8d), w(0x8e), w(0x8f),\
-    w(0x90), w(0x91), w(0x92), w(0x93), w(0x94), w(0x95), w(0x96), w(0x97),\
-    w(0x98), w(0x99), w(0x9a), w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f),\
-    w(0xa0), w(0xa1), w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7),\
-    w(0xa8), w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf),\
-    w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6), w(0xb7),\
-    w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd), w(0xbe), w(0xbf),\
-    w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), w(0xc5), w(0xc6), w(0xc7),\
-    w(0xc8), w(0xc9), w(0xca), w(0xcb), w(0xcc), w(0xcd), w(0xce), w(0xcf),\
-    w(0xd0), w(0xd1), w(0xd2), w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7),\
-    w(0xd8), w(0xd9), w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf),\
-    w(0xe0), w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7),\
-    w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee), w(0xef),\
-    w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5), w(0xf6), w(0xf7),\
-    w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc), w(0xfd), w(0xfe), w(0xff) }
-
-#define rc_data(w) {\
-    w(0x01), w(0x02), w(0x04), w(0x08), w(0x10),w(0x20), w(0x40), w(0x80),\
-    w(0x1b), w(0x36) }
-
-#define h0(x)   (x)
-
-#define w0(p)   bytes2word(p, 0, 0, 0)
-#define w1(p)   bytes2word(0, p, 0, 0)
-#define w2(p)   bytes2word(0, 0, p, 0)
-#define w3(p)   bytes2word(0, 0, 0, p)
-
-#define u0(p)   bytes2word(f2(p), p, p, f3(p))
-#define u1(p)   bytes2word(f3(p), f2(p), p, p)
-#define u2(p)   bytes2word(p, f3(p), f2(p), p)
-#define u3(p)   bytes2word(p, p, f3(p), f2(p))
-
-#define v0(p)   bytes2word(fe(p), f9(p), fd(p), fb(p))
-#define v1(p)   bytes2word(fb(p), fe(p), f9(p), fd(p))
-#define v2(p)   bytes2word(fd(p), fb(p), fe(p), f9(p))
-#define v3(p)   bytes2word(f9(p), fd(p), fb(p), fe(p))
-
-#endif
-
-#if defined(FIXED_TABLES) || !defined(FF_TABLES)
-
-#define f2(x)   ((x<<1) ^ (((x>>7) & 1) * WPOLY))
-#define f4(x)   ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY))
-#define f8(x)   ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) \
-                        ^ (((x>>5) & 4) * WPOLY))
-#define f3(x)   (f2(x) ^ x)
-#define f9(x)   (f8(x) ^ x)
-#define fb(x)   (f8(x) ^ f2(x) ^ x)
-#define fd(x)   (f8(x) ^ f4(x) ^ x)
-#define fe(x)   (f8(x) ^ f4(x) ^ f2(x))
-
-#else
-
-#define f2(x) ((x) ? pow[log[x] + 0x19] : 0)
-#define f3(x) ((x) ? pow[log[x] + 0x01] : 0)
-#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0)
-#define fb(x) ((x) ? pow[log[x] + 0x68] : 0)
-#define fd(x) ((x) ? pow[log[x] + 0xee] : 0)
-#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0)
-
-#endif
-
-#include "aestab.h"
-
-#if defined(__cplusplus)
-extern "C"
-{
-#endif
-
-#if defined(FIXED_TABLES)
-
-/* implemented in case of wrong call for fixed tables */
-
-int aes_init(void)
-{
-    return EXIT_SUCCESS;
-}
-
-#else   /*  Generate the tables for the dynamic table option */
-
-#if defined(FF_TABLES)
-
-#define gf_inv(x)   ((x) ? pow[ 255 - log[x]] : 0)
-
-#else 
-
-/*  It will generally be sensible to use tables to compute finite
-    field multiplies and inverses but where memory is scarse this
-    code might sometimes be better. But it only has effect during
-    initialisation so its pretty unimportant in overall terms.
-*/
-
-/*  return 2 ^ (n - 1) where n is the bit number of the highest bit
-    set in x with x in the range 1 < x < 0x00000200.   This form is
-    used so that locals within fi can be bytes rather than words
-*/
-
-static uint_8t hibit(const uint_32t x)
-{   uint_8t r = (uint_8t)((x >> 1) | (x >> 2));
-
-    r |= (r >> 2);
-    r |= (r >> 4);
-    return (r + 1) >> 1;
-}
-
-/* return the inverse of the finite field element x */
-
-static uint_8t gf_inv(const uint_8t x)
-{   uint_8t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
-
-    if(x < 2) 
-        return x;
-
-    for( ; ; )
-    {
-        if(n1)
-            while(n2 >= n1)             /* divide polynomial p2 by p1    */
-            {
-                n2 /= n1;               /* shift smaller polynomial left */ 
-                p2 ^= (p1 * n2) & 0xff; /* and remove from larger one    */
-                v2 ^= v1 * n2;          /* shift accumulated value and   */ 
-                n2 = hibit(p2);         /* add into result               */
-            }
-        else
-            return v1;
-
-        if(n2)                          /* repeat with values swapped    */ 
-            while(n1 >= n2)
-            {
-                n1 /= n2; 
-                p1 ^= p2 * n1; 
-                v1 ^= v2 * n1; 
-                n1 = hibit(p1);
-            }
-        else
-            return v2;
-    }
-}
-
-#endif
-
-/* The forward and inverse affine transformations used in the S-box */
-uint_8t fwd_affine(const uint_8t x)
-{   uint_32t w = x;
-    w ^= (w << 1) ^ (w << 2) ^ (w << 3) ^ (w << 4);
-    return 0x63 ^ ((w ^ (w >> 8)) & 0xff);
-}
-
-uint_8t inv_affine(const uint_8t x)
-{   uint_32t w = x;
-    w = (w << 1) ^ (w << 3) ^ (w << 6);
-    return 0x05 ^ ((w ^ (w >> 8)) & 0xff);
-}
-
-static int init = 0;
-
-AES_RETURN aes_init(void)
-{   uint_32t  i, w;
-
-#if defined(FF_TABLES)
-
-    uint_8t  pow[512], log[256];
-
-    if(init)
-        return EXIT_SUCCESS;
-    /*  log and power tables for GF(2^8) finite field with
-        WPOLY as modular polynomial - the simplest primitive
-        root is 0x03, used here to generate the tables
-    */
-
-    i = 0; w = 1;
-    do
-    {
-        pow[i] = (uint_8t)w;
-        pow[i + 255] = (uint_8t)w;
-        log[w] = (uint_8t)i++;
-        w ^=  (w << 1) ^ (w & 0x80 ? WPOLY : 0);
-    }
-    while (w != 1);
-
-#else
-    if(init)
-        return EXIT_SUCCESS;
-#endif
-
-    for(i = 0, w = 1; i < RC_LENGTH; ++i)
-    {
-        t_set(r,c)[i] = bytes2word(w, 0, 0, 0);
-        w = f2(w);
-    }
-
-    for(i = 0; i < 256; ++i)
-    {   uint_8t    b;
-
-        b = fwd_affine(gf_inv((uint_8t)i));
-        w = bytes2word(f2(b), b, b, f3(b));
-
-#if defined( SBX_SET )
-        t_set(s,box)[i] = b;
-#endif
-
-#if defined( FT1_SET )                 /* tables for a normal encryption round */
-        t_set(f,n)[i] = w;
-#endif
-#if defined( FT4_SET )
-        t_set(f,n)[0][i] = w;
-        t_set(f,n)[1][i] = upr(w,1);
-        t_set(f,n)[2][i] = upr(w,2);
-        t_set(f,n)[3][i] = upr(w,3);
-#endif
-        w = bytes2word(b, 0, 0, 0);
-
-#if defined( FL1_SET )            /* tables for last encryption round (may also   */
-        t_set(f,l)[i] = w;        /* be used in the key schedule)                 */
-#endif
-#if defined( FL4_SET )
-        t_set(f,l)[0][i] = w;
-        t_set(f,l)[1][i] = upr(w,1);
-        t_set(f,l)[2][i] = upr(w,2);
-        t_set(f,l)[3][i] = upr(w,3);
-#endif
-
-#if defined( LS1_SET )			/* table for key schedule if t_set(f,l) above is*/
-        t_set(l,s)[i] = w;      /* not of the required form                     */
-#endif
-#if defined( LS4_SET )
-        t_set(l,s)[0][i] = w;
-        t_set(l,s)[1][i] = upr(w,1);
-        t_set(l,s)[2][i] = upr(w,2);
-        t_set(l,s)[3][i] = upr(w,3);
-#endif
-
-        b = gf_inv(inv_affine((uint_8t)i));
-        w = bytes2word(fe(b), f9(b), fd(b), fb(b));
-
-#if defined( IM1_SET )			/* tables for the inverse mix column operation  */
-        t_set(i,m)[b] = w;
-#endif
-#if defined( IM4_SET )
-        t_set(i,m)[0][b] = w;
-        t_set(i,m)[1][b] = upr(w,1);
-        t_set(i,m)[2][b] = upr(w,2);
-        t_set(i,m)[3][b] = upr(w,3);
-#endif
-
-#if defined( ISB_SET )
-        t_set(i,box)[i] = b;
-#endif
-#if defined( IT1_SET )			/* tables for a normal decryption round */
-        t_set(i,n)[i] = w;
-#endif
-#if defined( IT4_SET )
-        t_set(i,n)[0][i] = w;
-        t_set(i,n)[1][i] = upr(w,1);
-        t_set(i,n)[2][i] = upr(w,2);
-        t_set(i,n)[3][i] = upr(w,3);
-#endif
-        w = bytes2word(b, 0, 0, 0);
-#if defined( IL1_SET )			/* tables for last decryption round */
-        t_set(i,l)[i] = w;
-#endif
-#if defined( IL4_SET )
-        t_set(i,l)[0][i] = w;
-        t_set(i,l)[1][i] = upr(w,1);
-        t_set(i,l)[2][i] = upr(w,2);
-        t_set(i,l)[3][i] = upr(w,3);
-#endif
-    }
-    init = 1;
-    return EXIT_SUCCESS;
-}
-
-#endif
-
-#if defined(__cplusplus)
-}
-#endif
-
+/*
+---------------------------------------------------------------------------
+Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
+
+The redistribution and use of this software (with or without changes)
+is allowed without the payment of fees or royalties provided that:
+
+  source code distributions include the above copyright notice, this
+  list of conditions and the following disclaimer;
+
+  binary distributions include the above copyright notice, this list
+  of conditions and the following disclaimer in their documentation.
+
+This software is provided 'as is' with no explicit or implied warranties
+in respect of its operation, including, but not limited to, correctness
+and fitness for purpose.
+---------------------------------------------------------------------------
+Issue Date: 20/12/2007
+*/
+
+#define DO_TABLES
+
+#include "aes.h"
+#include "aesopt.h"
+
+#if defined(FIXED_TABLES)
+
+#define sb_data(w) {\
+    w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
+    w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
+    w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
+    w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
+    w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
+    w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
+    w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
+    w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
+    w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
+    w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
+    w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
+    w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
+    w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
+    w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
+    w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
+    w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
+    w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
+    w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
+    w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
+    w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
+    w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
+    w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
+    w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
+    w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
+    w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
+    w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
+    w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
+    w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
+    w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
+    w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
+    w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
+    w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
+
+#define isb_data(w) {\
+    w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38),\
+    w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7), w(0xfb),\
+    w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f), w(0xff), w(0x87),\
+    w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4), w(0xde), w(0xe9), w(0xcb),\
+    w(0x54), w(0x7b), w(0x94), w(0x32), w(0xa6), w(0xc2), w(0x23), w(0x3d),\
+    w(0xee), w(0x4c), w(0x95), w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e),\
+    w(0x08), w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2),\
+    w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),\
+    w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), w(0x16),\
+    w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), w(0xb6), w(0x92),\
+    w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd), w(0xed), w(0xb9), w(0xda),\
+    w(0x5e), w(0x15), w(0x46), w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84),\
+    w(0x90), w(0xd8), w(0xab), w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a),\
+    w(0xf7), w(0xe4), w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06),\
+    w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02),\
+    w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a), w(0x6b),\
+    w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67), w(0xdc), w(0xea),\
+    w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), w(0xb4), w(0xe6), w(0x73),\
+    w(0x96), w(0xac), w(0x74), w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85),\
+    w(0xe2), w(0xf9), w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e),\
+    w(0x47), w(0xf1), w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89),\
+    w(0x6f), w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b),\
+    w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79), w(0x20),\
+    w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd), w(0x5a), w(0xf4),\
+    w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), w(0x07), w(0xc7), w(0x31),\
+    w(0xb1), w(0x12), w(0x10), w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f),\
+    w(0x60), w(0x51), w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d),\
+    w(0x2d), w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef),\
+    w(0xa0), w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0),\
+    w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99), w(0x61),\
+    w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77), w(0xd6), w(0x26),\
+    w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55), w(0x21), w(0x0c), w(0x7d) }
+
+#define mm_data(w) {\
+    w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07),\
+    w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e), w(0x0f),\
+    w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15), w(0x16), w(0x17),\
+    w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c), w(0x1d), w(0x1e), w(0x1f),\
+    w(0x20), w(0x21), w(0x22), w(0x23), w(0x24), w(0x25), w(0x26), w(0x27),\
+    w(0x28), w(0x29), w(0x2a), w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f),\
+    w(0x30), w(0x31), w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37),\
+    w(0x38), w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),\
+    w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), w(0x47),\
+    w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), w(0x4e), w(0x4f),\
+    w(0x50), w(0x51), w(0x52), w(0x53), w(0x54), w(0x55), w(0x56), w(0x57),\
+    w(0x58), w(0x59), w(0x5a), w(0x5b), w(0x5c), w(0x5d), w(0x5e), w(0x5f),\
+    w(0x60), w(0x61), w(0x62), w(0x63), w(0x64), w(0x65), w(0x66), w(0x67),\
+    w(0x68), w(0x69), w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f),\
+    w(0x70), w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77),\
+    w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e), w(0x7f),\
+    w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85), w(0x86), w(0x87),\
+    w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), w(0x8d), w(0x8e), w(0x8f),\
+    w(0x90), w(0x91), w(0x92), w(0x93), w(0x94), w(0x95), w(0x96), w(0x97),\
+    w(0x98), w(0x99), w(0x9a), w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f),\
+    w(0xa0), w(0xa1), w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7),\
+    w(0xa8), w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf),\
+    w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6), w(0xb7),\
+    w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd), w(0xbe), w(0xbf),\
+    w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), w(0xc5), w(0xc6), w(0xc7),\
+    w(0xc8), w(0xc9), w(0xca), w(0xcb), w(0xcc), w(0xcd), w(0xce), w(0xcf),\
+    w(0xd0), w(0xd1), w(0xd2), w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7),\
+    w(0xd8), w(0xd9), w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf),\
+    w(0xe0), w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7),\
+    w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee), w(0xef),\
+    w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5), w(0xf6), w(0xf7),\
+    w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc), w(0xfd), w(0xfe), w(0xff) }
+
+#define rc_data(w) {\
+    w(0x01), w(0x02), w(0x04), w(0x08), w(0x10),w(0x20), w(0x40), w(0x80),\
+    w(0x1b), w(0x36) }
+
+#define h0(x)   (x)
+
+#define w0(p)   bytes2word(p, 0, 0, 0)
+#define w1(p)   bytes2word(0, p, 0, 0)
+#define w2(p)   bytes2word(0, 0, p, 0)
+#define w3(p)   bytes2word(0, 0, 0, p)
+
+#define u0(p)   bytes2word(f2(p), p, p, f3(p))
+#define u1(p)   bytes2word(f3(p), f2(p), p, p)
+#define u2(p)   bytes2word(p, f3(p), f2(p), p)
+#define u3(p)   bytes2word(p, p, f3(p), f2(p))
+
+#define v0(p)   bytes2word(fe(p), f9(p), fd(p), fb(p))
+#define v1(p)   bytes2word(fb(p), fe(p), f9(p), fd(p))
+#define v2(p)   bytes2word(fd(p), fb(p), fe(p), f9(p))
+#define v3(p)   bytes2word(f9(p), fd(p), fb(p), fe(p))
+
+#endif
+
+#if defined(FIXED_TABLES) || !defined(FF_TABLES)
+
+#define f2(x)   ((x<<1) ^ (((x>>7) & 1) * WPOLY))
+#define f4(x)   ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY))
+#define f8(x)   ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) \
+                        ^ (((x>>5) & 4) * WPOLY))
+#define f3(x)   (f2(x) ^ x)
+#define f9(x)   (f8(x) ^ x)
+#define fb(x)   (f8(x) ^ f2(x) ^ x)
+#define fd(x)   (f8(x) ^ f4(x) ^ x)
+#define fe(x)   (f8(x) ^ f4(x) ^ f2(x))
+
+#else
+
+#define f2(x) ((x) ? pow[log[x] + 0x19] : 0)
+#define f3(x) ((x) ? pow[log[x] + 0x01] : 0)
+#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0)
+#define fb(x) ((x) ? pow[log[x] + 0x68] : 0)
+#define fd(x) ((x) ? pow[log[x] + 0xee] : 0)
+#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0)
+
+#endif
+
+#include "aestab.h"
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+#if defined(FIXED_TABLES)
+
+/* implemented in case of wrong call for fixed tables */
+
+int aes_init(void)
+{
+    return EXIT_SUCCESS;
+}
+
+#else   /*  Generate the tables for the dynamic table option */
+
+#if defined(FF_TABLES)
+
+#define gf_inv(x)   ((x) ? pow[ 255 - log[x]] : 0)
+
+#else 
+
+/*  It will generally be sensible to use tables to compute finite
+    field multiplies and inverses but where memory is scarse this
+    code might sometimes be better. But it only has effect during
+    initialisation so its pretty unimportant in overall terms.
+*/
+
+/*  return 2 ^ (n - 1) where n is the bit number of the highest bit
+    set in x with x in the range 1 < x < 0x00000200.   This form is
+    used so that locals within fi can be bytes rather than words
+*/
+
+static uint_8t hibit(const uint_32t x)
+{   uint_8t r = (uint_8t)((x >> 1) | (x >> 2));
+
+    r |= (r >> 2);
+    r |= (r >> 4);
+    return (r + 1) >> 1;
+}
+
+/* return the inverse of the finite field element x */
+
+static uint_8t gf_inv(const uint_8t x)
+{   uint_8t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
+
+    if(x < 2) 
+        return x;
+
+    for( ; ; )
+    {
+        if(n1)
+            while(n2 >= n1)             /* divide polynomial p2 by p1    */
+            {
+                n2 /= n1;               /* shift smaller polynomial left */ 
+                p2 ^= (p1 * n2) & 0xff; /* and remove from larger one    */
+                v2 ^= v1 * n2;          /* shift accumulated value and   */ 
+                n2 = hibit(p2);         /* add into result               */
+            }
+        else
+            return v1;
+
+        if(n2)                          /* repeat with values swapped    */ 
+            while(n1 >= n2)
+            {
+                n1 /= n2; 
+                p1 ^= p2 * n1; 
+                v1 ^= v2 * n1; 
+                n1 = hibit(p1);
+            }
+        else
+            return v2;
+    }
+}
+
+#endif
+
+/* The forward and inverse affine transformations used in the S-box */
+uint_8t fwd_affine(const uint_8t x)
+{   uint_32t w = x;
+    w ^= (w << 1) ^ (w << 2) ^ (w << 3) ^ (w << 4);
+    return 0x63 ^ ((w ^ (w >> 8)) & 0xff);
+}
+
+uint_8t inv_affine(const uint_8t x)
+{   uint_32t w = x;
+    w = (w << 1) ^ (w << 3) ^ (w << 6);
+    return 0x05 ^ ((w ^ (w >> 8)) & 0xff);
+}
+
+static int init = 0;
+
+AES_RETURN aes_init(void)
+{   uint_32t  i, w;
+
+#if defined(FF_TABLES)
+
+    uint_8t  pow[512], log[256];
+
+    if(init)
+        return EXIT_SUCCESS;
+    /*  log and power tables for GF(2^8) finite field with
+        WPOLY as modular polynomial - the simplest primitive
+        root is 0x03, used here to generate the tables
+    */
+
+    i = 0; w = 1;
+    do
+    {
+        pow[i] = (uint_8t)w;
+        pow[i + 255] = (uint_8t)w;
+        log[w] = (uint_8t)i++;
+        w ^=  (w << 1) ^ (w & 0x80 ? WPOLY : 0);
+    }
+    while (w != 1);
+
+#else
+    if(init)
+        return EXIT_SUCCESS;
+#endif
+
+    for(i = 0, w = 1; i < RC_LENGTH; ++i)
+    {
+        t_set(r,c)[i] = bytes2word(w, 0, 0, 0);
+        w = f2(w);
+    }
+
+    for(i = 0; i < 256; ++i)
+    {   uint_8t    b;
+
+        b = fwd_affine(gf_inv((uint_8t)i));
+        w = bytes2word(f2(b), b, b, f3(b));
+
+#if defined( SBX_SET )
+        t_set(s,box)[i] = b;
+#endif
+
+#if defined( FT1_SET )                 /* tables for a normal encryption round */
+        t_set(f,n)[i] = w;
+#endif
+#if defined( FT4_SET )
+        t_set(f,n)[0][i] = w;
+        t_set(f,n)[1][i] = upr(w,1);
+        t_set(f,n)[2][i] = upr(w,2);
+        t_set(f,n)[3][i] = upr(w,3);
+#endif
+        w = bytes2word(b, 0, 0, 0);
+
+#if defined( FL1_SET )            /* tables for last encryption round (may also   */
+        t_set(f,l)[i] = w;        /* be used in the key schedule)                 */
+#endif
+#if defined( FL4_SET )
+        t_set(f,l)[0][i] = w;
+        t_set(f,l)[1][i] = upr(w,1);
+        t_set(f,l)[2][i] = upr(w,2);
+        t_set(f,l)[3][i] = upr(w,3);
+#endif
+
+#if defined( LS1_SET )			/* table for key schedule if t_set(f,l) above is*/
+        t_set(l,s)[i] = w;      /* not of the required form                     */
+#endif
+#if defined( LS4_SET )
+        t_set(l,s)[0][i] = w;
+        t_set(l,s)[1][i] = upr(w,1);
+        t_set(l,s)[2][i] = upr(w,2);
+        t_set(l,s)[3][i] = upr(w,3);
+#endif
+
+        b = gf_inv(inv_affine((uint_8t)i));
+        w = bytes2word(fe(b), f9(b), fd(b), fb(b));
+
+#if defined( IM1_SET )			/* tables for the inverse mix column operation  */
+        t_set(i,m)[b] = w;
+#endif
+#if defined( IM4_SET )
+        t_set(i,m)[0][b] = w;
+        t_set(i,m)[1][b] = upr(w,1);
+        t_set(i,m)[2][b] = upr(w,2);
+        t_set(i,m)[3][b] = upr(w,3);
+#endif
+
+#if defined( ISB_SET )
+        t_set(i,box)[i] = b;
+#endif
+#if defined( IT1_SET )			/* tables for a normal decryption round */
+        t_set(i,n)[i] = w;
+#endif
+#if defined( IT4_SET )
+        t_set(i,n)[0][i] = w;
+        t_set(i,n)[1][i] = upr(w,1);
+        t_set(i,n)[2][i] = upr(w,2);
+        t_set(i,n)[3][i] = upr(w,3);
+#endif
+        w = bytes2word(b, 0, 0, 0);
+#if defined( IL1_SET )			/* tables for last decryption round */
+        t_set(i,l)[i] = w;
+#endif
+#if defined( IL4_SET )
+        t_set(i,l)[0][i] = w;
+        t_set(i,l)[1][i] = upr(w,1);
+        t_set(i,l)[2][i] = upr(w,2);
+        t_set(i,l)[3][i] = upr(w,3);
+#endif
+    }
+    init = 1;
+    return EXIT_SUCCESS;
+}
+
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
diff --git a/Crypto/aestab.h b/Crypto/aestab.h
index 884de2f6..3e88697d 100755
--- a/Crypto/aestab.h
+++ b/Crypto/aestab.h
@@ -1,173 +1,173 @@
-/*
----------------------------------------------------------------------------
-Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
-
-The redistribution and use of this software (with or without changes)
-is allowed without the payment of fees or royalties provided that:
-
-  source code distributions include the above copyright notice, this
-  list of conditions and the following disclaimer;
-
-  binary distributions include the above copyright notice, this list
-  of conditions and the following disclaimer in their documentation.
-
-This software is provided 'as is' with no explicit or implied warranties
-in respect of its operation, including, but not limited to, correctness
-and fitness for purpose.
----------------------------------------------------------------------------
-Issue Date: 20/12/2007
-
- This file contains the code for declaring the tables needed to implement
- AES. The file aesopt.h is assumed to be included before this header file.
- If there are no global variables, the definitions here can be used to put
- the AES tables in a structure so that a pointer can then be added to the
- AES context to pass them to the AES routines that need them.   If this
- facility is used, the calling program has to ensure that this pointer is
- managed appropriately.  In particular, the value of the t_dec(in,it) item
- in the table structure must be set to zero in order to ensure that the
- tables are initialised. In practice the three code sequences in aeskey.c
- that control the calls to aes_init() and the aes_init() routine itself will
- have to be changed for a specific implementation. If global variables are
- available it will generally be preferable to use them with the precomputed
- FIXED_TABLES option that uses static global tables.
-
- The following defines can be used to control the way the tables
- are defined, initialised and used in embedded environments that
- require special features for these purposes
-
-    the 't_dec' construction is used to declare fixed table arrays
-    the 't_set' construction is used to set fixed table values
-    the 't_use' construction is used to access fixed table values
-
-    256 byte tables:
-
-        t_xxx(s,box)    => forward S box
-        t_xxx(i,box)    => inverse S box
-
-    256 32-bit word OR 4 x 256 32-bit word tables:
-
-        t_xxx(f,n)      => forward normal round
-        t_xxx(f,l)      => forward last round
-        t_xxx(i,n)      => inverse normal round
-        t_xxx(i,l)      => inverse last round
-        t_xxx(l,s)      => key schedule table
-        t_xxx(i,m)      => key schedule table
-
-    Other variables and tables:
-
-        t_xxx(r,c)      => the rcon table
-*/
-
-#if !defined( _AESTAB_H )
-#define _AESTAB_H
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-#define t_dec(m,n) t_##m##n
-#define t_set(m,n) t_##m##n
-#define t_use(m,n) t_##m##n
-
-#if defined(FIXED_TABLES)
-#  if !defined( __GNUC__ ) && (defined( __MSDOS__ ) || defined( __WIN16__ ))
-/*   make tables far data to avoid using too much DGROUP space (PG) */
-#    define CONST const far
-#  else
-#    define CONST const
-#  endif
-#else
-#  define CONST
-#endif
-
-#if defined(DO_TABLES)
-#  define EXTERN
-#else
-#  define EXTERN extern
-#endif
-
-#if defined(_MSC_VER) && defined(TABLE_ALIGN)
-#define ALIGN __declspec(align(TABLE_ALIGN))
-#else
-#define ALIGN
-#endif
-
-#if defined( __WATCOMC__ ) && ( __WATCOMC__ >= 1100 )
-#  define XP_DIR __cdecl
-#else
-#  define XP_DIR
-#endif
-
-#if defined(DO_TABLES) && defined(FIXED_TABLES)
-#define d_1(t,n,b,e)       EXTERN ALIGN CONST XP_DIR t n[256]    =   b(e)
-#define d_4(t,n,b,e,f,g,h) EXTERN ALIGN CONST XP_DIR t n[4][256] = { b(e), b(f), b(g), b(h) }
-EXTERN ALIGN CONST uint32_t t_dec(r,c)[RC_LENGTH] = rc_data(w0);
-#else
-#define d_1(t,n,b,e)       EXTERN ALIGN CONST XP_DIR t n[256]
-#define d_4(t,n,b,e,f,g,h) EXTERN ALIGN CONST XP_DIR t n[4][256]
-EXTERN ALIGN CONST uint32_t t_dec(r,c)[RC_LENGTH];
-#endif
-
-#if defined( SBX_SET )
-    d_1(uint_8t, t_dec(s,box), sb_data, h0);
-#endif
-#if defined( ISB_SET )
-    d_1(uint_8t, t_dec(i,box), isb_data, h0);
-#endif
-
-#if defined( FT1_SET )
-    d_1(uint32_t, t_dec(f,n), sb_data, u0);
-#endif
-#if defined( FT4_SET )
-    d_4(uint32_t, t_dec(f,n), sb_data, u0, u1, u2, u3);
-#endif
-
-#if defined( FL1_SET )
-    d_1(uint32_t, t_dec(f,l), sb_data, w0);
-#endif
-#if defined( FL4_SET )
-    d_4(uint32_t, t_dec(f,l), sb_data, w0, w1, w2, w3);
-#endif
-
-#if defined( IT1_SET )
-    d_1(uint32_t, t_dec(i,n), isb_data, v0);
-#endif
-#if defined( IT4_SET )
-    d_4(uint32_t, t_dec(i,n), isb_data, v0, v1, v2, v3);
-#endif
-
-#if defined( IL1_SET )
-    d_1(uint32_t, t_dec(i,l), isb_data, w0);
-#endif
-#if defined( IL4_SET )
-    d_4(uint32_t, t_dec(i,l), isb_data, w0, w1, w2, w3);
-#endif
-
-#if defined( LS1_SET )
-#if defined( FL1_SET )
-#undef  LS1_SET
-#else
-    d_1(uint32_t, t_dec(l,s), sb_data, w0);
-#endif
-#endif
-
-#if defined( LS4_SET )
-#if defined( FL4_SET )
-#undef  LS4_SET
-#else
-    d_4(uint32_t, t_dec(l,s), sb_data, w0, w1, w2, w3);
-#endif
-#endif
-
-#if defined( IM1_SET )
-    d_1(uint32_t, t_dec(i,m), mm_data, v0);
-#endif
-#if defined( IM4_SET )
-    d_4(uint32_t, t_dec(i,m), mm_data, v0, v1, v2, v3);
-#endif
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif
+/*
+---------------------------------------------------------------------------
+Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
+
+The redistribution and use of this software (with or without changes)
+is allowed without the payment of fees or royalties provided that:
+
+  source code distributions include the above copyright notice, this
+  list of conditions and the following disclaimer;
+
+  binary distributions include the above copyright notice, this list
+  of conditions and the following disclaimer in their documentation.
+
+This software is provided 'as is' with no explicit or implied warranties
+in respect of its operation, including, but not limited to, correctness
+and fitness for purpose.
+---------------------------------------------------------------------------
+Issue Date: 20/12/2007
+
+ This file contains the code for declaring the tables needed to implement
+ AES. The file aesopt.h is assumed to be included before this header file.
+ If there are no global variables, the definitions here can be used to put
+ the AES tables in a structure so that a pointer can then be added to the
+ AES context to pass them to the AES routines that need them.   If this
+ facility is used, the calling program has to ensure that this pointer is
+ managed appropriately.  In particular, the value of the t_dec(in,it) item
+ in the table structure must be set to zero in order to ensure that the
+ tables are initialised. In practice the three code sequences in aeskey.c
+ that control the calls to aes_init() and the aes_init() routine itself will
+ have to be changed for a specific implementation. If global variables are
+ available it will generally be preferable to use them with the precomputed
+ FIXED_TABLES option that uses static global tables.
+
+ The following defines can be used to control the way the tables
+ are defined, initialised and used in embedded environments that
+ require special features for these purposes
+
+    the 't_dec' construction is used to declare fixed table arrays
+    the 't_set' construction is used to set fixed table values
+    the 't_use' construction is used to access fixed table values
+
+    256 byte tables:
+
+        t_xxx(s,box)    => forward S box
+        t_xxx(i,box)    => inverse S box
+
+    256 32-bit word OR 4 x 256 32-bit word tables:
+
+        t_xxx(f,n)      => forward normal round
+        t_xxx(f,l)      => forward last round
+        t_xxx(i,n)      => inverse normal round
+        t_xxx(i,l)      => inverse last round
+        t_xxx(l,s)      => key schedule table
+        t_xxx(i,m)      => key schedule table
+
+    Other variables and tables:
+
+        t_xxx(r,c)      => the rcon table
+*/
+
+#if !defined( _AESTAB_H )
+#define _AESTAB_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define t_dec(m,n) t_##m##n
+#define t_set(m,n) t_##m##n
+#define t_use(m,n) t_##m##n
+
+#if defined(FIXED_TABLES)
+#  if !defined( __GNUC__ ) && (defined( __MSDOS__ ) || defined( __WIN16__ ))
+/*   make tables far data to avoid using too much DGROUP space (PG) */
+#    define CONST const far
+#  else
+#    define CONST const
+#  endif
+#else
+#  define CONST
+#endif
+
+#if defined(DO_TABLES)
+#  define EXTERN
+#else
+#  define EXTERN extern
+#endif
+
+#if defined(_MSC_VER) && defined(TABLE_ALIGN)
+#define ALIGN __declspec(align(TABLE_ALIGN))
+#else
+#define ALIGN
+#endif
+
+#if defined( __WATCOMC__ ) && ( __WATCOMC__ >= 1100 )
+#  define XP_DIR __cdecl
+#else
+#  define XP_DIR
+#endif
+
+#if defined(DO_TABLES) && defined(FIXED_TABLES)
+#define d_1(t,n,b,e)       EXTERN ALIGN CONST XP_DIR t n[256]    =   b(e)
+#define d_4(t,n,b,e,f,g,h) EXTERN ALIGN CONST XP_DIR t n[4][256] = { b(e), b(f), b(g), b(h) }
+EXTERN ALIGN CONST uint32_t t_dec(r,c)[RC_LENGTH] = rc_data(w0);
+#else
+#define d_1(t,n,b,e)       EXTERN ALIGN CONST XP_DIR t n[256]
+#define d_4(t,n,b,e,f,g,h) EXTERN ALIGN CONST XP_DIR t n[4][256]
+EXTERN ALIGN CONST uint32_t t_dec(r,c)[RC_LENGTH];
+#endif
+
+#if defined( SBX_SET )
+    d_1(uint_8t, t_dec(s,box), sb_data, h0);
+#endif
+#if defined( ISB_SET )
+    d_1(uint_8t, t_dec(i,box), isb_data, h0);
+#endif
+
+#if defined( FT1_SET )
+    d_1(uint32_t, t_dec(f,n), sb_data, u0);
+#endif
+#if defined( FT4_SET )
+    d_4(uint32_t, t_dec(f,n), sb_data, u0, u1, u2, u3);
+#endif
+
+#if defined( FL1_SET )
+    d_1(uint32_t, t_dec(f,l), sb_data, w0);
+#endif
+#if defined( FL4_SET )
+    d_4(uint32_t, t_dec(f,l), sb_data, w0, w1, w2, w3);
+#endif
+
+#if defined( IT1_SET )
+    d_1(uint32_t, t_dec(i,n), isb_data, v0);
+#endif
+#if defined( IT4_SET )
+    d_4(uint32_t, t_dec(i,n), isb_data, v0, v1, v2, v3);
+#endif
+
+#if defined( IL1_SET )
+    d_1(uint32_t, t_dec(i,l), isb_data, w0);
+#endif
+#if defined( IL4_SET )
+    d_4(uint32_t, t_dec(i,l), isb_data, w0, w1, w2, w3);
+#endif
+
+#if defined( LS1_SET )
+#if defined( FL1_SET )
+#undef  LS1_SET
+#else
+    d_1(uint32_t, t_dec(l,s), sb_data, w0);
+#endif
+#endif
+
+#if defined( LS4_SET )
+#if defined( FL4_SET )
+#undef  LS4_SET
+#else
+    d_4(uint32_t, t_dec(l,s), sb_data, w0, w1, w2, w3);
+#endif
+#endif
+
+#if defined( IM1_SET )
+    d_1(uint32_t, t_dec(i,m), mm_data, v0);
+#endif
+#if defined( IM4_SET )
+    d_4(uint32_t, t_dec(i,m), mm_data, v0, v1, v2, v3);
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/Crypto/brg_endian.h b/Crypto/brg_endian.h
index 82e48f0b..0f12fbbf 100755
--- a/Crypto/brg_endian.h
+++ b/Crypto/brg_endian.h
@@ -1,126 +1,126 @@
-/*
----------------------------------------------------------------------------
-Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
-
-The redistribution and use of this software (with or without changes)
-is allowed without the payment of fees or royalties provided that:
-
-  source code distributions include the above copyright notice, this
-  list of conditions and the following disclaimer;
-
-  binary distributions include the above copyright notice, this list
-  of conditions and the following disclaimer in their documentation.
-
-This software is provided 'as is' with no explicit or implied warranties
-in respect of its operation, including, but not limited to, correctness
-and fitness for purpose.
----------------------------------------------------------------------------
-Issue Date: 20/12/2007
-*/
-
-#ifndef _BRG_ENDIAN_H
-#define _BRG_ENDIAN_H
-
-#define IS_BIG_ENDIAN      4321 /* byte 0 is most significant (mc68k) */
-#define IS_LITTLE_ENDIAN   1234 /* byte 0 is least significant (i386) */
-
-/* Include files where endian defines and byteswap functions may reside */
-#if defined( __sun )
-#  include <sys/isa_defs.h>
-#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
-#  include <sys/endian.h>
-#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
-      defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
-#  include <machine/endian.h>
-#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
-#  if !defined( __MINGW32__ ) && !defined( _AIX )
-#    include <endian.h>
-#    if !defined( __BEOS__ )
-#      include <byteswap.h>
-#    endif
-#  endif
-#endif
-
-/* Now attempt to set the define for platform byte order using any  */
-/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which  */
-/* seem to encompass most endian symbol definitions                 */
-
-#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
-#  if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
-#    define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
-#  elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN
-#    define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
-#  endif
-#elif defined( BIG_ENDIAN )
-#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
-#elif defined( LITTLE_ENDIAN )
-#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
-#endif
-
-#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
-#  if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
-#    define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
-#  elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
-#    define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
-#  endif
-#elif defined( _BIG_ENDIAN )
-#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
-#elif defined( _LITTLE_ENDIAN )
-#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
-#endif
-
-#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
-#  if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
-#    define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
-#  elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
-#    define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
-#  endif
-#elif defined( __BIG_ENDIAN )
-#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
-#elif defined( __LITTLE_ENDIAN )
-#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
-#endif
-
-#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
-#  if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
-#    define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
-#  elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
-#    define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
-#  endif
-#elif defined( __BIG_ENDIAN__ )
-#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
-#elif defined( __LITTLE_ENDIAN__ )
-#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
-#endif
-
-/*  if the platform byte order could not be determined, then try to */
-/*  set this define using common machine defines                    */
-#if !defined(PLATFORM_BYTE_ORDER)
-
-#if   defined( __alpha__ ) || defined( __alpha ) || defined( i386 )       || \
-      defined( __i386__ )  || defined( _M_I86 )  || defined( _M_IX86 )    || \
-      defined( __OS2__ )   || defined( sun386 )  || defined( __TURBOC__ ) || \
-      defined( vax )       || defined( vms )     || defined( VMS )        || \
-      defined( __VMS )     || defined( _M_X64 )
-#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
-
-#elif defined( AMIGA )   || defined( applec )    || defined( __AS400__ )  || \
-      defined( _CRAY )   || defined( __hppa )    || defined( __hp9000 )   || \
-      defined( ibm370 )  || defined( mc68000 )   || defined( m68k )       || \
-      defined( __MRC__ ) || defined( __MVS__ )   || defined( __MWERKS__ ) || \
-      defined( sparc )   || defined( __sparc)    || defined( SYMANTEC_C ) || \
-      defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM )   || \
-      defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX )
-#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
-
-#elif 0     /* **** EDIT HERE IF NECESSARY **** */
-#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
-#elif 0     /* **** EDIT HERE IF NECESSARY **** */
-#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
-#else
-#  error Please edit lines 126 or 128 in brg_endian.h to set the platform byte order
-#endif
-
-#endif
-
-#endif
+/*
+---------------------------------------------------------------------------
+Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
+
+The redistribution and use of this software (with or without changes)
+is allowed without the payment of fees or royalties provided that:
+
+  source code distributions include the above copyright notice, this
+  list of conditions and the following disclaimer;
+
+  binary distributions include the above copyright notice, this list
+  of conditions and the following disclaimer in their documentation.
+
+This software is provided 'as is' with no explicit or implied warranties
+in respect of its operation, including, but not limited to, correctness
+and fitness for purpose.
+---------------------------------------------------------------------------
+Issue Date: 20/12/2007
+*/
+
+#ifndef _BRG_ENDIAN_H
+#define _BRG_ENDIAN_H
+
+#define IS_BIG_ENDIAN      4321 /* byte 0 is most significant (mc68k) */
+#define IS_LITTLE_ENDIAN   1234 /* byte 0 is least significant (i386) */
+
+/* Include files where endian defines and byteswap functions may reside */
+#if defined( __sun )
+#  include <sys/isa_defs.h>
+#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
+#  include <sys/endian.h>
+#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
+      defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
+#  include <machine/endian.h>
+#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
+#  if !defined( __MINGW32__ ) && !defined( _AIX )
+#    include <endian.h>
+#    if !defined( __BEOS__ )
+#      include <byteswap.h>
+#    endif
+#  endif
+#endif
+
+/* Now attempt to set the define for platform byte order using any  */
+/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which  */
+/* seem to encompass most endian symbol definitions                 */
+
+#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
+#  if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
+#    define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#  elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN
+#    define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#  endif
+#elif defined( BIG_ENDIAN )
+#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#elif defined( LITTLE_ENDIAN )
+#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
+#  if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
+#    define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#  elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
+#    define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#  endif
+#elif defined( _BIG_ENDIAN )
+#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#elif defined( _LITTLE_ENDIAN )
+#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
+#  if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
+#    define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#  elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
+#    define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#  endif
+#elif defined( __BIG_ENDIAN )
+#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#elif defined( __LITTLE_ENDIAN )
+#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
+#  if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
+#    define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#  elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
+#    define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#  endif
+#elif defined( __BIG_ENDIAN__ )
+#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#elif defined( __LITTLE_ENDIAN__ )
+#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+/*  if the platform byte order could not be determined, then try to */
+/*  set this define using common machine defines                    */
+#if !defined(PLATFORM_BYTE_ORDER)
+
+#if   defined( __alpha__ ) || defined( __alpha ) || defined( i386 )       || \
+      defined( __i386__ )  || defined( _M_I86 )  || defined( _M_IX86 )    || \
+      defined( __OS2__ )   || defined( sun386 )  || defined( __TURBOC__ ) || \
+      defined( vax )       || defined( vms )     || defined( VMS )        || \
+      defined( __VMS )     || defined( _M_X64 )
+#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+
+#elif defined( AMIGA )   || defined( applec )    || defined( __AS400__ )  || \
+      defined( _CRAY )   || defined( __hppa )    || defined( __hp9000 )   || \
+      defined( ibm370 )  || defined( mc68000 )   || defined( m68k )       || \
+      defined( __MRC__ ) || defined( __MVS__ )   || defined( __MWERKS__ ) || \
+      defined( sparc )   || defined( __sparc)    || defined( SYMANTEC_C ) || \
+      defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM )   || \
+      defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX )
+#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+
+#elif 0     /* **** EDIT HERE IF NECESSARY **** */
+#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#elif 0     /* **** EDIT HERE IF NECESSARY **** */
+#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#else
+#  error Please edit lines 126 or 128 in brg_endian.h to set the platform byte order
+#endif
+
+#endif
+
+#endif
diff --git a/Crypto/des.c b/Crypto/des.c
index 62907e4a..7ce3274c 100644
--- a/Crypto/des.c
+++ b/Crypto/des.c
@@ -1,482 +1,482 @@
-/*
- * Fast implementation of the DES, as described in the Federal Register,
- * Vol. 40, No. 52, p. 12134, March 17, 1975.
- *
- * Stuart Levy, Minnesota Supercomputer Center, April 1988.
- *
- * Key and data block representation:
- * The 56-bit key (bits 1..64 including "parity" bits 8, 16, 24, ..., 64)
- * and the 64-bit data block (bits 1..64)
- * are each stored in arrays of 8 bytes.
- * Following the NBS numbering, the MSB has the bit number 1, so
- *  key[0] = 128*bit1 + 64*bit2 + ... + 1*bit8, ... through
- *  key[7] = 128*bit57 + 64*bit58 + ... + 1*bit64.
- * In the key, "parity" bits are not checked; their values are ignored.
- */
-
-#include "des.h"
-
-/*
- * Key schedule generation.
- * We begin by pointlessly permuting the 56 useful key bits into
- * two groups of 28 bits called C and D.
- * bK_C and bK_D are indexed by C and D bit numbers, respectively,
- * and give the key bit number (1..64) which should initialize that C/D bit.
- * This is the "permuted choice 1" table.
- */
- 
-static uint8_t bK_C[28] = {
-        57, 49, 41, 33, 25, 17,  9,
-         1, 58, 50, 42, 34, 26, 18,
-        10,  2, 59, 51, 43, 35, 27,
-        19, 11,  3, 60, 52, 44, 36,
-};
-static uint8_t bK_D[28] = {
-        63, 55, 47, 39, 31, 23, 15,
-         7, 62, 54, 46, 38, 30, 22,
-        14,  6, 61, 53, 45, 37, 29,
-        21, 13,  5, 28, 20, 12, 4,
-};
- 
-/*
- * For speed, we invert these, building tables to map groups of
- * key bits into the corresponding C and D bits.
- * We represent C and D each as 28 contiguous bits right-justified in a
- * word, padded on the left with zeros.
- * If key byte `i' is said to contain bits Ki,0 (MSB) Ki,1 ... Ki,7 (LSB)
- * then
- *      wC_K4[i][Ki,0 Ki,1 Ki,2 Ki,3] gives the C bits for Ki,0..3,
- *      wD_K4[i][Ki,0 Ki,1 Ki,2 Ki,3] the corresponding D bits,
- *      wC_K3[i][Ki,4 Ki,5 Ki,6] the C bits for Ki,4..6,
- * and  wD_K3[i][Ki,4 Ki,5 Ki,6] the D bits for Ki,4..6.
- * Ki,7 is ignored since it is the nominal parity bit.
- * We could just use a single table for [i][Ki,0 .. Ki,6] but that
- * would take a lot of storage for such a rarely-used function.
- */
- 
-static  uint32_t wC_K4[8][16], wC_K3[8][8];
-static  uint32_t wD_K4[8][16], wD_K3[8][8];
- 
-/*
- * Successive Ci and Di for the sixteen steps in the key schedule are
- * created by independent 28-bit left circular shifts on C and D.
- * The shift count varies with the step number.
- */
-static uint8_t preshift[16] = {
-        1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1,
-};
- 
-/*
- * Each step in the key schedule is generated by selecting 48 bits
- * (8 groups of 6 bits) from the appropriately shifted Ci and Di.
- * bCD_KS, indexed by the key schedule bit number, gives the bit number
- * in CD (CD1 = MSB of C, CD28 = LSB of C, CD29 = MSB of D, CD56 = LSB of D)
- * which determines that bit of the key schedule.
- * Note that only C bits (1..28) appear in the first (upper) 24 bits of
- * the key schedule, and D bits (29..56) in the second (lower) 24 bits.
- * This is the "permuted-choice-2" table.
- */
- 
-static uint8_t bCD_KS[48] = {
-        14, 17, 11, 24,  1,  5,
-        3,  28, 15,  6, 21, 10,
-        23, 19, 12,  4, 26,  8,
-        16,  7, 27, 20, 13,  2,
-        41, 52, 31, 37, 47, 55,
-        30, 40, 51, 45, 33, 48,
-        44, 49, 39, 56, 34, 53,
-        46, 42, 50, 36, 29, 32,
-};
- 
-/*
- * We invert bCD_KS into a pair of tables which map groups of 4
- * C or D bits into corresponding key schedule bits.
- * We represent each step of the key schedule as 8 groups of 8 bits,
- * with the 6 real bits right-justified in each 8-bit group.
- * hKS_C4[i][C4i+1 .. C4i+4] gives the bits in the high order (first four)
- * key schedule "bytes" which correspond to C bits 4i+1 .. 4i+4.
- * lKS_D4[i][D4i+1 .. D4i+4] gives the appropriate bits in the latter (last 4)
- * key schedule bytes, from the corresponding D bits.
- */
- 
-static uint32_t hKS_C4[7][16];
-static uint32_t lKS_D4[7][16];
- 
-/*
- * Encryption/decryption.
- * Before beginning, and after ending, we perform another useless permutation
- * on the bits in the data block.
- *
- * The initial permutation and its inverse, final permutation
- * are too simple to need a table for.  If we break the input I1 .. I64 into
- * 8-bit chunks I0,0 I0,1 ... I0,7 I1,0 I1,1 ... I7,7
- * then the initial permutation sets LR as follows:
- * L = I7,1 I6,1 I5,1 ... I0,1  I7,3 I6,3 ... I0,3  I7,5 ... I0,5  I7,7 ... I0,7
- * and
- * R = I7,0 I6,0 I5,0 ... I0,0  I7,2 I6,2 ... I0,2  I7,4 ... I0,4  I7,6 ... I0,6
- *
- * If we number the bits in the final LR similarly,
- * L = L0,0 L0,1 ... L3,7  R = R0,0 R0,1 ... R3,7
- * then the output is
- * O = R0,7 L0,7 R1,7 L1,7 ... R3,7 L3,7 R0,6 L0,6 ... L3,6 R0,5 ... R3,0 L3,0
- *
- * To speed I => LR shuffling we use an array of 32-bit values indexed by
- * 8-bit input bytes.
- * wL_I8[ 0 I0,1 0 I0,3 0 I0,5 0 I0,7 ] = the corresponding L bits.
- * Other R and L bits are derived from wL_I8 by shifting.
- *
- * To speed LR => O shuffling, an array of 32-bit values indexed by 4-bit lumps:
- * wO_L4[ L0,4 L0,5 L0,6 L0,7 ] = the corresponding high-order 32 O bits.
- */
- 
-static uint32_t wL_I8[0x55 + 1];
-static uint32_t wO_L4[16];
- 
-/*
- * Core of encryption/decryption.
- * In each key schedule stage, we:
- *      take 8 overlapping groups of 6 bits each from R
- *         (the NBS tabulates the bit selections in the E table,
- *          but it's so simple we just use shifting to get the right bits)
- *      XOR each group with the corresponding bits from the key schedule
- *      Use the resulting 6 bits as an index into the appropriate S table
- *         (there are 8 such tables, one per group of 6 bits)
- *      Each S entry yields 4 bits.
- *      The 8 groups of 4 bits are catenated into a 32-bit value.
- *      Those 32 bits are permuted according to the P table.
- *      Finally the permuted 32-bit value is XORed with L and becomes
- *      the R value for the next stage, while the previous R becomes the new L.
- *
- * Here, we merge the P permutation with the S tables by making the
- * S entries be 32-bit masks, already suitably permuted.
- * Also, the bits in each six-bit group must be permuted before use as
- * an index into the NBS-tabulated S tables.
- * We rearrange entries in wPS so that natural bit order can be used.
- */
- 
-static uint32_t wPS[8][64];
- 
-static uint8_t P[32] = {
-        16,  7, 20, 21,
-        29, 12, 28, 17,
-         1, 15, 23, 26,
-         5, 18, 31, 10,
-         2,  8, 24, 14,
-        32, 27,  3,  9,
-        19, 13, 30,  6,
-        22, 11,  4, 25,
-};
- 
-static uint8_t S[8][64] = {
-        14, 4,13, 1, 2,15,11, 8, 3,10, 6,12, 5, 9, 0, 7,
-         0,15, 7, 4,14, 2,13, 1,10, 6,12,11, 9, 5, 3, 8,
-         4, 1,14, 8,13, 6, 2,11,15,12, 9, 7, 3,10, 5, 0,
-        15,12, 8, 2, 4, 9, 1, 7, 5,11, 3,14,10, 0, 6,13,
- 
-        15, 1, 8,14, 6,11, 3, 4, 9, 7, 2,13,12, 0, 5,10,
-         3,13, 4, 7,15, 2, 8,14,12, 0, 1,10, 6, 9,11, 5,
-         0,14, 7,11,10, 4,13, 1, 5, 8,12, 6, 9, 3, 2,15,
-        13, 8,10, 1, 3,15, 4, 2,11, 6, 7,12, 0, 5,14, 9,
- 
-        10, 0, 9,14, 6, 3,15, 5, 1,13,12, 7,11, 4, 2, 8,
-        13, 7, 0, 9, 3, 4, 6,10, 2, 8, 5,14,12,11,15, 1,
-        13, 6, 4, 9, 8,15, 3, 0,11, 1, 2,12, 5,10,14, 7,
-         1,10,13, 0, 6, 9, 8, 7, 4,15,14, 3,11, 5, 2,12,
- 
-         7,13,14, 3, 0, 6, 9,10, 1, 2, 8, 5,11,12, 4,15,
-        13, 8,11, 5, 6,15, 0, 3, 4, 7, 2,12, 1,10,14, 9,
-        10, 6, 9, 0,12,11, 7,13,15, 1, 3,14, 5, 2, 8, 4,
-         3,15, 0, 6,10, 1,13, 8, 9, 4, 5,11,12, 7, 2,14,
- 
-         2,12, 4, 1, 7,10,11, 6, 8, 5, 3,15,13, 0,14, 9,
-        14,11, 2,12, 4, 7,13, 1, 5, 0,15,10, 3, 9, 8, 6,
-         4, 2, 1,11,10,13, 7, 8,15, 9,12, 5, 6, 3, 0,14,
-        11, 8,12, 7, 1,14, 2,13, 6,15, 0, 9,10, 4, 5, 3,
- 
-        12, 1,10,15, 9, 2, 6, 8, 0,13, 3, 4,14, 7, 5,11,
-        10,15, 4, 2, 7,12, 9, 5, 6, 1,13,14, 0,11, 3, 8,
-         9,14,15, 5, 2, 8,12, 3, 7, 0, 4,10, 1,13,11, 6,
-         4, 3, 2,12, 9, 5,15,10,11,14, 1, 7, 6, 0, 8,13,
- 
-         4,11, 2,14,15, 0, 8,13, 3,12, 9, 7, 5,10, 6, 1,
-        13, 0,11, 7, 4, 9, 1,10,14, 3, 5,12, 2,15, 8, 6,
-         1, 4,11,13,12, 3, 7,14,10,15, 6, 8, 0, 5, 9, 2,
-         6,11,13, 8, 1, 4,10, 7, 9, 5, 0,15,14, 2, 3,12,
- 
-        13, 2, 8, 4, 6,15,11, 1,10, 9, 3,14, 5, 0,12, 7,
-         1,15,13, 8,10, 3, 7, 4,12, 5, 6,11, 0,14, 9, 2,
-         7,11, 4, 1, 9,12,14, 2, 0, 6,10,13,15, 3, 5, 8,
-         2, 1,14, 7, 4,10, 8,13,15,12, 9, 0, 3, 5, 6,11,
-};
- 
-static void
-buildtables()
-{
-        register int i, j;
-        register uint32_t v;
-        uint32_t wC_K[64], wD_K[64];
-        uint32_t hKS_C[28], lKS_D[28];
-        int Smap[64];
-        uint32_t wP[32];
- 
-#if USG
-#  define       ZERO(array)     memset((char *)(array), '\0', sizeof(array))
-#else
-# if BSD
-#  define       ZERO(array)     bzero((char *)(array), sizeof(array))
-# else //!USG && !BSD
-#  define       ZERO(array)     { register uint32_t *p = (uint32_t *)(array); \
-                                  i = sizeof(array) / sizeof(*p); \
-                                  do { *p++ = 0; } while(--i > 0); \
-                                }
-# endif //!USG && !BSD
-#endif //!USG
- 
-        /* Invert permuted-choice-1 (key => C,D) */
- 
-        ZERO(wC_K);
-        ZERO(wD_K);
-        v = 1;
-        for(j = 28; --j >= 0; ) {
-                wC_K[ bK_C[j] - 1 ] = wD_K[ bK_D[j] - 1 ] = v;
-                v += v;         /* (i.e. v <<= 1) */
-        }
- 
-        for(i = 0; i < 64; i++) {
-            int t = 8 >> (i & 3);
-            for(j = 0; j < 16; j++) {
-                if(j & t) {
-                    wC_K4[i >> 3][j] |= wC_K[i];
-                    wD_K4[i >> 3][j] |= wD_K[i];
-                    if(j < 8) {
-                        wC_K3[i >> 3][j] |= wC_K[i + 3];
-                        wD_K3[i >> 3][j] |= wD_K[i + 3];
-                    }
-                }
-            }
-            /* Generate the sequence 0,1,2,3, 8,9,10,11, ..., 56,57,58,59. */
-            if(t == 1) i += 4;
-        }
- 
-        /* Invert permuted-choice-2 */
- 
-        ZERO(hKS_C);
-        ZERO(lKS_D);
-        v = 1;
-        for(i = 24; (i -= 6) >= 0; ) {
-            j = i+5;
-            do {
-                hKS_C[ bCD_KS[j] - 1 ] = lKS_D[ bCD_KS[j+24] - 28 - 1 ] = v;
-                v += v;         /* Like v <<= 1 but may be faster */
-            } while(--j >= i);
-            v <<= 2;            /* Keep byte aligned */
-        }
- 
-        for(i = 0; i < 28; i++) {
-            v = 8 >> (i & 3);
-            for(j = 0; j < 16; j++) {
-                if(j & v) {
-                    hKS_C4[i >> 2][j] |= hKS_C[i];
-                    lKS_D4[i >> 2][j] |= lKS_D[i];
-                }
-            }
-        }
- 
-        /* Initial permutation */
- 
-        for(i = 0; i <= 0x55; i++) {
-            v = 0;
-            if(i & 64) v =  (uint32_t) 1 << 24;
-            if(i & 16) v |= (uint32_t) 1 << 16;
-            if(i & 4)  v |= (uint32_t) 1 << 8;
-            if(i & 1)  v |= 1;
-            wL_I8[i] = v;
-        }
- 
-        /* Final permutation */
- 
-        for(i = 0; i < 16; i++) {
-            v = 0;
-            if(i & 1) v = (uint32_t) 1 << 24;
-            if(i & 2) v |= (uint32_t) 1 << 16;
-            if(i & 4) v |= (uint32_t) 1 << 8;
-            if(i & 8) v |= (uint32_t) 1;
-            wO_L4[i] = v;
-        }
- 
-        /* Funny bit rearrangement on second index into S tables */
- 
-        for(i = 0; i < 64; i++) {
-                Smap[i] = (i & 0x20) | (i & 1) << 4 | (i & 0x1e) >> 1;
-        }
- 
-        /* Invert permutation P into mask indexed by R bit number */
- 
-        v = 1;
-        for(i = 32; --i >= 0; ) {
-                wP[ P[i] - 1 ] = v;
-                v += v;
-        }
- 
-        /* Build bit-mask versions of S tables, indexed in natural bit order */
- 
-        for(i = 0; i < 8; i++) {
-            for(j = 0; j < 64; j++) {
-                int k, t;
- 
-                t = S[i][ Smap[j] ];
-                for(k = 0; k < 4; k++) {
-                    if(t & 8)
-                        wPS[i][j] |= wP[4*i + k];
-                    t += t;
-                }
-            }
-        }
-}
- 
-void DES_set_key(const uint8_t key[8],DES_key_schedule *ks)
-{
-        register int i;
-        register uint32_t C, D;
-        static int built = 0;
- 
-        if(!built) {
-                buildtables();
-                built = 1;
-        }
- 
-        C = D = 0;
-        for(i = 0; i < 8; i++) {
-                register int v;
- 
-                v = key[i] >> 1;        /* Discard "parity" bit */
-                C |= wC_K4[i][(v>>3) & 15] | wC_K3[i][v & 7];
-                D |= wD_K4[i][(v>>3) & 15] | wD_K3[i][v & 7];
-        }
- 
-        /*
-         * C and D now hold the suitably right-justified
-         * 28 permuted key bits each.
-         */
-        for(i = 0; i < 16; i++) {
-#ifdef CRAY
-#define choice2(x, v)  x[6][v&15] | x[5][(v>>4)&15] | x[4][(v>>8)&15] | \
-                    x[3][(v>>12)&15] | x[2][(v>>16)&15] | x[1][(v>>20)&15] | \
-                    x[0][(v>>24)&15]
-#else //!CRAY
-                register uint32_t *ap;
- 
-#  define choice2(x, v)  ( \
-                    ap = &(x)[0][0], \
-                    ap[16*6 + (v&15)] | \
-                    ap[16*5 + ((v>>4)&15)]  | ap[16*4 + ((v>>8)&15)]  | \
-                    ap[16*3 + ((v>>12)&15)] | ap[16*2 + ((v>>16)&15)] | \
-                    ap[16*1 + ((v>>20)&15)] | ap[16*0 + ((v>>24)&15)] )
-#endif //!CRAY
- 
- 
-                /* 28-bit left circular shift */
-                C <<= preshift[i];
-                C = ((C >> 28) & 3) | (C & (((uint32_t)1<<28) - 1));
-                ks->KS[i].h = choice2(hKS_C4, C);
- 
-                D <<= preshift[i];
-                D = ((D >> 28) & 3) | (D & (((uint32_t)1<<28) - 1));
-                ks->KS[i].l = choice2(lKS_D4, D);
-        }
-}
-
-
-void DES_encrypt(uint8_t block[8],int decrypt,DES_key_schedule *ks)
-{
-        int i;
-        register uint32_t L, R;
-        register struct DES_key_stage *ksp;
-        register uint32_t *ap;
- 
-        /* Initial permutation */
- 
-        L = R = 0;
-        i = 7;
-        ap = wL_I8;
-        do {
-                register int v;
- 
-                v = block[i];   /* Could optimize according to ENDIAN */
-                L = ap[v & 0x55] | (L << 1);
-                R = ap[(v >> 1) & 0x55] | (R << 1);
-        } while(--i >= 0);
- 
-        if(decrypt) {
-                ksp = &ks->KS[15];
-        } else {
-                ksp = &ks->KS[0];
-        }
- 
-#ifdef CRAY
-#  define PS(i,j)       wPS[i][j]
-#else //!CRAY
-#  define PS(i,j)       ap[64*(i) + (j)]
-        ap = &wPS[0][0];
-#endif //!CRAY
- 
-        i = 16;
-        do {
-                register uint32_t k, tR;
- 
-                tR = (R >> 15) | (R << 17);
- 
-                k = ksp->h;
-                L ^= PS(0, ((tR >> 12) ^ (k >> 24)) & 63)
-                   | PS(1, ((tR >> 8) ^ (k >> 16)) & 63)
-                   | PS(2, ((tR >> 4) ^ (k >> 8)) & 63)
-                   | PS(3, (tR ^ k) & 63);
- 
-                k = ksp->l;
-                L ^= PS(4, ((R >> 11) ^ (k >> 24)) & 63)
-                   | PS(5, ((R >> 7) ^ (k >> 16)) & 63)
-                   | PS(6, ((R >> 3) ^ (k >> 8)) & 63)
-                   | PS(7, ((tR >> 16) ^ k) & 63);
- 
-                tR = L;
-                L = R;
-                R = tR;
- 
- 
-                if(decrypt)
-                        ksp--;
-                else
-                        ksp++;
-        } while(--i > 0);
-        {
-                register uint32_t t;
- 
-#ifdef CRAY
-# define FP(k)  (wO_L4[ (L >> (k)) & 15 ] << 1 | wO_L4[ (R >> (k)) & 15 ])
-#else //!CRAY
-# define FP(k)  (ap[ (L >> (k)) & 15 ] << 1 | ap[ (R >> (k)) & 15 ])
- 
-                ap = wO_L4;
-#endif //!CRAY
- 
-                t = FP(0) | (FP(8) | (FP(16) | (FP(24) << 2)) << 2) << 2;
-                R = FP(4) | (FP(12) | (FP(20) | (FP(28) << 2)) << 2) << 2;
-                L = t;
-        }
-        {
-                register uint32_t t;
-                register uint8_t *bp;
- 
-                bp = &block[7];
-                t = R;
-                *bp = t & 255;
-                *--bp = (t >>= 8) & 255;
-                *--bp = (t >>= 8) & 255;
-                *--bp = (t >> 8) & 255;
-                t = L;
-                *--bp = t & 255;
-                *--bp = (t >>= 8) & 255;
-                *--bp = (t >>= 8) & 255;
-                *--bp = (t >> 8) & 255;
-        }
-}
+/*
+ * Fast implementation of the DES, as described in the Federal Register,
+ * Vol. 40, No. 52, p. 12134, March 17, 1975.
+ *
+ * Stuart Levy, Minnesota Supercomputer Center, April 1988.
+ *
+ * Key and data block representation:
+ * The 56-bit key (bits 1..64 including "parity" bits 8, 16, 24, ..., 64)
+ * and the 64-bit data block (bits 1..64)
+ * are each stored in arrays of 8 bytes.
+ * Following the NBS numbering, the MSB has the bit number 1, so
+ *  key[0] = 128*bit1 + 64*bit2 + ... + 1*bit8, ... through
+ *  key[7] = 128*bit57 + 64*bit58 + ... + 1*bit64.
+ * In the key, "parity" bits are not checked; their values are ignored.
+ */
+
+#include "des.h"
+
+/*
+ * Key schedule generation.
+ * We begin by pointlessly permuting the 56 useful key bits into
+ * two groups of 28 bits called C and D.
+ * bK_C and bK_D are indexed by C and D bit numbers, respectively,
+ * and give the key bit number (1..64) which should initialize that C/D bit.
+ * This is the "permuted choice 1" table.
+ */
+ 
+static uint8_t bK_C[28] = {
+        57, 49, 41, 33, 25, 17,  9,
+         1, 58, 50, 42, 34, 26, 18,
+        10,  2, 59, 51, 43, 35, 27,
+        19, 11,  3, 60, 52, 44, 36,
+};
+static uint8_t bK_D[28] = {
+        63, 55, 47, 39, 31, 23, 15,
+         7, 62, 54, 46, 38, 30, 22,
+        14,  6, 61, 53, 45, 37, 29,
+        21, 13,  5, 28, 20, 12, 4,
+};
+ 
+/*
+ * For speed, we invert these, building tables to map groups of
+ * key bits into the corresponding C and D bits.
+ * We represent C and D each as 28 contiguous bits right-justified in a
+ * word, padded on the left with zeros.
+ * If key byte `i' is said to contain bits Ki,0 (MSB) Ki,1 ... Ki,7 (LSB)
+ * then
+ *      wC_K4[i][Ki,0 Ki,1 Ki,2 Ki,3] gives the C bits for Ki,0..3,
+ *      wD_K4[i][Ki,0 Ki,1 Ki,2 Ki,3] the corresponding D bits,
+ *      wC_K3[i][Ki,4 Ki,5 Ki,6] the C bits for Ki,4..6,
+ * and  wD_K3[i][Ki,4 Ki,5 Ki,6] the D bits for Ki,4..6.
+ * Ki,7 is ignored since it is the nominal parity bit.
+ * We could just use a single table for [i][Ki,0 .. Ki,6] but that
+ * would take a lot of storage for such a rarely-used function.
+ */
+ 
+static  uint32_t wC_K4[8][16], wC_K3[8][8];
+static  uint32_t wD_K4[8][16], wD_K3[8][8];
+ 
+/*
+ * Successive Ci and Di for the sixteen steps in the key schedule are
+ * created by independent 28-bit left circular shifts on C and D.
+ * The shift count varies with the step number.
+ */
+static uint8_t preshift[16] = {
+        1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1,
+};
+ 
+/*
+ * Each step in the key schedule is generated by selecting 48 bits
+ * (8 groups of 6 bits) from the appropriately shifted Ci and Di.
+ * bCD_KS, indexed by the key schedule bit number, gives the bit number
+ * in CD (CD1 = MSB of C, CD28 = LSB of C, CD29 = MSB of D, CD56 = LSB of D)
+ * which determines that bit of the key schedule.
+ * Note that only C bits (1..28) appear in the first (upper) 24 bits of
+ * the key schedule, and D bits (29..56) in the second (lower) 24 bits.
+ * This is the "permuted-choice-2" table.
+ */
+ 
+static uint8_t bCD_KS[48] = {
+        14, 17, 11, 24,  1,  5,
+        3,  28, 15,  6, 21, 10,
+        23, 19, 12,  4, 26,  8,
+        16,  7, 27, 20, 13,  2,
+        41, 52, 31, 37, 47, 55,
+        30, 40, 51, 45, 33, 48,
+        44, 49, 39, 56, 34, 53,
+        46, 42, 50, 36, 29, 32,
+};
+ 
+/*
+ * We invert bCD_KS into a pair of tables which map groups of 4
+ * C or D bits into corresponding key schedule bits.
+ * We represent each step of the key schedule as 8 groups of 8 bits,
+ * with the 6 real bits right-justified in each 8-bit group.
+ * hKS_C4[i][C4i+1 .. C4i+4] gives the bits in the high order (first four)
+ * key schedule "bytes" which correspond to C bits 4i+1 .. 4i+4.
+ * lKS_D4[i][D4i+1 .. D4i+4] gives the appropriate bits in the latter (last 4)
+ * key schedule bytes, from the corresponding D bits.
+ */
+ 
+static uint32_t hKS_C4[7][16];
+static uint32_t lKS_D4[7][16];
+ 
+/*
+ * Encryption/decryption.
+ * Before beginning, and after ending, we perform another useless permutation
+ * on the bits in the data block.
+ *
+ * The initial permutation and its inverse, final permutation
+ * are too simple to need a table for.  If we break the input I1 .. I64 into
+ * 8-bit chunks I0,0 I0,1 ... I0,7 I1,0 I1,1 ... I7,7
+ * then the initial permutation sets LR as follows:
+ * L = I7,1 I6,1 I5,1 ... I0,1  I7,3 I6,3 ... I0,3  I7,5 ... I0,5  I7,7 ... I0,7
+ * and
+ * R = I7,0 I6,0 I5,0 ... I0,0  I7,2 I6,2 ... I0,2  I7,4 ... I0,4  I7,6 ... I0,6
+ *
+ * If we number the bits in the final LR similarly,
+ * L = L0,0 L0,1 ... L3,7  R = R0,0 R0,1 ... R3,7
+ * then the output is
+ * O = R0,7 L0,7 R1,7 L1,7 ... R3,7 L3,7 R0,6 L0,6 ... L3,6 R0,5 ... R3,0 L3,0
+ *
+ * To speed I => LR shuffling we use an array of 32-bit values indexed by
+ * 8-bit input bytes.
+ * wL_I8[ 0 I0,1 0 I0,3 0 I0,5 0 I0,7 ] = the corresponding L bits.
+ * Other R and L bits are derived from wL_I8 by shifting.
+ *
+ * To speed LR => O shuffling, an array of 32-bit values indexed by 4-bit lumps:
+ * wO_L4[ L0,4 L0,5 L0,6 L0,7 ] = the corresponding high-order 32 O bits.
+ */
+ 
+static uint32_t wL_I8[0x55 + 1];
+static uint32_t wO_L4[16];
+ 
+/*
+ * Core of encryption/decryption.
+ * In each key schedule stage, we:
+ *      take 8 overlapping groups of 6 bits each from R
+ *         (the NBS tabulates the bit selections in the E table,
+ *          but it's so simple we just use shifting to get the right bits)
+ *      XOR each group with the corresponding bits from the key schedule
+ *      Use the resulting 6 bits as an index into the appropriate S table
+ *         (there are 8 such tables, one per group of 6 bits)
+ *      Each S entry yields 4 bits.
+ *      The 8 groups of 4 bits are catenated into a 32-bit value.
+ *      Those 32 bits are permuted according to the P table.
+ *      Finally the permuted 32-bit value is XORed with L and becomes
+ *      the R value for the next stage, while the previous R becomes the new L.
+ *
+ * Here, we merge the P permutation with the S tables by making the
+ * S entries be 32-bit masks, already suitably permuted.
+ * Also, the bits in each six-bit group must be permuted before use as
+ * an index into the NBS-tabulated S tables.
+ * We rearrange entries in wPS so that natural bit order can be used.
+ */
+ 
+static uint32_t wPS[8][64];
+ 
+static uint8_t P[32] = {
+        16,  7, 20, 21,
+        29, 12, 28, 17,
+         1, 15, 23, 26,
+         5, 18, 31, 10,
+         2,  8, 24, 14,
+        32, 27,  3,  9,
+        19, 13, 30,  6,
+        22, 11,  4, 25,
+};
+ 
+static uint8_t S[8][64] = {
+        14, 4,13, 1, 2,15,11, 8, 3,10, 6,12, 5, 9, 0, 7,
+         0,15, 7, 4,14, 2,13, 1,10, 6,12,11, 9, 5, 3, 8,
+         4, 1,14, 8,13, 6, 2,11,15,12, 9, 7, 3,10, 5, 0,
+        15,12, 8, 2, 4, 9, 1, 7, 5,11, 3,14,10, 0, 6,13,
+ 
+        15, 1, 8,14, 6,11, 3, 4, 9, 7, 2,13,12, 0, 5,10,
+         3,13, 4, 7,15, 2, 8,14,12, 0, 1,10, 6, 9,11, 5,
+         0,14, 7,11,10, 4,13, 1, 5, 8,12, 6, 9, 3, 2,15,
+        13, 8,10, 1, 3,15, 4, 2,11, 6, 7,12, 0, 5,14, 9,
+ 
+        10, 0, 9,14, 6, 3,15, 5, 1,13,12, 7,11, 4, 2, 8,
+        13, 7, 0, 9, 3, 4, 6,10, 2, 8, 5,14,12,11,15, 1,
+        13, 6, 4, 9, 8,15, 3, 0,11, 1, 2,12, 5,10,14, 7,
+         1,10,13, 0, 6, 9, 8, 7, 4,15,14, 3,11, 5, 2,12,
+ 
+         7,13,14, 3, 0, 6, 9,10, 1, 2, 8, 5,11,12, 4,15,
+        13, 8,11, 5, 6,15, 0, 3, 4, 7, 2,12, 1,10,14, 9,
+        10, 6, 9, 0,12,11, 7,13,15, 1, 3,14, 5, 2, 8, 4,
+         3,15, 0, 6,10, 1,13, 8, 9, 4, 5,11,12, 7, 2,14,
+ 
+         2,12, 4, 1, 7,10,11, 6, 8, 5, 3,15,13, 0,14, 9,
+        14,11, 2,12, 4, 7,13, 1, 5, 0,15,10, 3, 9, 8, 6,
+         4, 2, 1,11,10,13, 7, 8,15, 9,12, 5, 6, 3, 0,14,
+        11, 8,12, 7, 1,14, 2,13, 6,15, 0, 9,10, 4, 5, 3,
+ 
+        12, 1,10,15, 9, 2, 6, 8, 0,13, 3, 4,14, 7, 5,11,
+        10,15, 4, 2, 7,12, 9, 5, 6, 1,13,14, 0,11, 3, 8,
+         9,14,15, 5, 2, 8,12, 3, 7, 0, 4,10, 1,13,11, 6,
+         4, 3, 2,12, 9, 5,15,10,11,14, 1, 7, 6, 0, 8,13,
+ 
+         4,11, 2,14,15, 0, 8,13, 3,12, 9, 7, 5,10, 6, 1,
+        13, 0,11, 7, 4, 9, 1,10,14, 3, 5,12, 2,15, 8, 6,
+         1, 4,11,13,12, 3, 7,14,10,15, 6, 8, 0, 5, 9, 2,
+         6,11,13, 8, 1, 4,10, 7, 9, 5, 0,15,14, 2, 3,12,
+ 
+        13, 2, 8, 4, 6,15,11, 1,10, 9, 3,14, 5, 0,12, 7,
+         1,15,13, 8,10, 3, 7, 4,12, 5, 6,11, 0,14, 9, 2,
+         7,11, 4, 1, 9,12,14, 2, 0, 6,10,13,15, 3, 5, 8,
+         2, 1,14, 7, 4,10, 8,13,15,12, 9, 0, 3, 5, 6,11,
+};
+ 
+static void
+buildtables()
+{
+        register int i, j;
+        register uint32_t v;
+        uint32_t wC_K[64], wD_K[64];
+        uint32_t hKS_C[28], lKS_D[28];
+        int Smap[64];
+        uint32_t wP[32];
+ 
+#if USG
+#  define       ZERO(array)     memset((char *)(array), '\0', sizeof(array))
+#else
+# if BSD
+#  define       ZERO(array)     bzero((char *)(array), sizeof(array))
+# else //!USG && !BSD
+#  define       ZERO(array)     { register uint32_t *p = (uint32_t *)(array); \
+                                  i = sizeof(array) / sizeof(*p); \
+                                  do { *p++ = 0; } while(--i > 0); \
+                                }
+# endif //!USG && !BSD
+#endif //!USG
+ 
+        /* Invert permuted-choice-1 (key => C,D) */
+ 
+        ZERO(wC_K);
+        ZERO(wD_K);
+        v = 1;
+        for(j = 28; --j >= 0; ) {
+                wC_K[ bK_C[j] - 1 ] = wD_K[ bK_D[j] - 1 ] = v;
+                v += v;         /* (i.e. v <<= 1) */
+        }
+ 
+        for(i = 0; i < 64; i++) {
+            int t = 8 >> (i & 3);
+            for(j = 0; j < 16; j++) {
+                if(j & t) {
+                    wC_K4[i >> 3][j] |= wC_K[i];
+                    wD_K4[i >> 3][j] |= wD_K[i];
+                    if(j < 8) {
+                        wC_K3[i >> 3][j] |= wC_K[i + 3];
+                        wD_K3[i >> 3][j] |= wD_K[i + 3];
+                    }
+                }
+            }
+            /* Generate the sequence 0,1,2,3, 8,9,10,11, ..., 56,57,58,59. */
+            if(t == 1) i += 4;
+        }
+ 
+        /* Invert permuted-choice-2 */
+ 
+        ZERO(hKS_C);
+        ZERO(lKS_D);
+        v = 1;
+        for(i = 24; (i -= 6) >= 0; ) {
+            j = i+5;
+            do {
+                hKS_C[ bCD_KS[j] - 1 ] = lKS_D[ bCD_KS[j+24] - 28 - 1 ] = v;
+                v += v;         /* Like v <<= 1 but may be faster */
+            } while(--j >= i);
+            v <<= 2;            /* Keep byte aligned */
+        }
+ 
+        for(i = 0; i < 28; i++) {
+            v = 8 >> (i & 3);
+            for(j = 0; j < 16; j++) {
+                if(j & v) {
+                    hKS_C4[i >> 2][j] |= hKS_C[i];
+                    lKS_D4[i >> 2][j] |= lKS_D[i];
+                }
+            }
+        }
+ 
+        /* Initial permutation */
+ 
+        for(i = 0; i <= 0x55; i++) {
+            v = 0;
+            if(i & 64) v =  (uint32_t) 1 << 24;
+            if(i & 16) v |= (uint32_t) 1 << 16;
+            if(i & 4)  v |= (uint32_t) 1 << 8;
+            if(i & 1)  v |= 1;
+            wL_I8[i] = v;
+        }
+ 
+        /* Final permutation */
+ 
+        for(i = 0; i < 16; i++) {
+            v = 0;
+            if(i & 1) v = (uint32_t) 1 << 24;
+            if(i & 2) v |= (uint32_t) 1 << 16;
+            if(i & 4) v |= (uint32_t) 1 << 8;
+            if(i & 8) v |= (uint32_t) 1;
+            wO_L4[i] = v;
+        }
+ 
+        /* Funny bit rearrangement on second index into S tables */
+ 
+        for(i = 0; i < 64; i++) {
+                Smap[i] = (i & 0x20) | (i & 1) << 4 | (i & 0x1e) >> 1;
+        }
+ 
+        /* Invert permutation P into mask indexed by R bit number */
+ 
+        v = 1;
+        for(i = 32; --i >= 0; ) {
+                wP[ P[i] - 1 ] = v;
+                v += v;
+        }
+ 
+        /* Build bit-mask versions of S tables, indexed in natural bit order */
+ 
+        for(i = 0; i < 8; i++) {
+            for(j = 0; j < 64; j++) {
+                int k, t;
+ 
+                t = S[i][ Smap[j] ];
+                for(k = 0; k < 4; k++) {
+                    if(t & 8)
+                        wPS[i][j] |= wP[4*i + k];
+                    t += t;
+                }
+            }
+        }
+}
+ 
+void DES_set_key(const uint8_t key[8],DES_key_schedule *ks)
+{
+        register int i;
+        register uint32_t C, D;
+        static int built = 0;
+ 
+        if(!built) {
+                buildtables();
+                built = 1;
+        }
+ 
+        C = D = 0;
+        for(i = 0; i < 8; i++) {
+                register int v;
+ 
+                v = key[i] >> 1;        /* Discard "parity" bit */
+                C |= wC_K4[i][(v>>3) & 15] | wC_K3[i][v & 7];
+                D |= wD_K4[i][(v>>3) & 15] | wD_K3[i][v & 7];
+        }
+ 
+        /*
+         * C and D now hold the suitably right-justified
+         * 28 permuted key bits each.
+         */
+        for(i = 0; i < 16; i++) {
+#ifdef CRAY
+#define choice2(x, v)  x[6][v&15] | x[5][(v>>4)&15] | x[4][(v>>8)&15] | \
+                    x[3][(v>>12)&15] | x[2][(v>>16)&15] | x[1][(v>>20)&15] | \
+                    x[0][(v>>24)&15]
+#else //!CRAY
+                register uint32_t *ap;
+ 
+#  define choice2(x, v)  ( \
+                    ap = &(x)[0][0], \
+                    ap[16*6 + (v&15)] | \
+                    ap[16*5 + ((v>>4)&15)]  | ap[16*4 + ((v>>8)&15)]  | \
+                    ap[16*3 + ((v>>12)&15)] | ap[16*2 + ((v>>16)&15)] | \
+                    ap[16*1 + ((v>>20)&15)] | ap[16*0 + ((v>>24)&15)] )
+#endif //!CRAY
+ 
+ 
+                /* 28-bit left circular shift */
+                C <<= preshift[i];
+                C = ((C >> 28) & 3) | (C & (((uint32_t)1<<28) - 1));
+                ks->KS[i].h = choice2(hKS_C4, C);
+ 
+                D <<= preshift[i];
+                D = ((D >> 28) & 3) | (D & (((uint32_t)1<<28) - 1));
+                ks->KS[i].l = choice2(lKS_D4, D);
+        }
+}
+
+
+void DES_encrypt(uint8_t block[8],int decrypt,DES_key_schedule *ks)
+{
+        int i;
+        register uint32_t L, R;
+        register struct DES_key_stage *ksp;
+        register uint32_t *ap;
+ 
+        /* Initial permutation */
+ 
+        L = R = 0;
+        i = 7;
+        ap = wL_I8;
+        do {
+                register int v;
+ 
+                v = block[i];   /* Could optimize according to ENDIAN */
+                L = ap[v & 0x55] | (L << 1);
+                R = ap[(v >> 1) & 0x55] | (R << 1);
+        } while(--i >= 0);
+ 
+        if(decrypt) {
+                ksp = &ks->KS[15];
+        } else {
+                ksp = &ks->KS[0];
+        }
+ 
+#ifdef CRAY
+#  define PS(i,j)       wPS[i][j]
+#else //!CRAY
+#  define PS(i,j)       ap[64*(i) + (j)]
+        ap = &wPS[0][0];
+#endif //!CRAY
+ 
+        i = 16;
+        do {
+                register uint32_t k, tR;
+ 
+                tR = (R >> 15) | (R << 17);
+ 
+                k = ksp->h;
+                L ^= PS(0, ((tR >> 12) ^ (k >> 24)) & 63)
+                   | PS(1, ((tR >> 8) ^ (k >> 16)) & 63)
+                   | PS(2, ((tR >> 4) ^ (k >> 8)) & 63)
+                   | PS(3, (tR ^ k) & 63);
+ 
+                k = ksp->l;
+                L ^= PS(4, ((R >> 11) ^ (k >> 24)) & 63)
+                   | PS(5, ((R >> 7) ^ (k >> 16)) & 63)
+                   | PS(6, ((R >> 3) ^ (k >> 8)) & 63)
+                   | PS(7, ((tR >> 16) ^ k) & 63);
+ 
+                tR = L;
+                L = R;
+                R = tR;
+ 
+ 
+                if(decrypt)
+                        ksp--;
+                else
+                        ksp++;
+        } while(--i > 0);
+        {
+                register uint32_t t;
+ 
+#ifdef CRAY
+# define FP(k)  (wO_L4[ (L >> (k)) & 15 ] << 1 | wO_L4[ (R >> (k)) & 15 ])
+#else //!CRAY
+# define FP(k)  (ap[ (L >> (k)) & 15 ] << 1 | ap[ (R >> (k)) & 15 ])
+ 
+                ap = wO_L4;
+#endif //!CRAY
+ 
+                t = FP(0) | (FP(8) | (FP(16) | (FP(24) << 2)) << 2) << 2;
+                R = FP(4) | (FP(12) | (FP(20) | (FP(28) << 2)) << 2) << 2;
+                L = t;
+        }
+        {
+                register uint32_t t;
+                register uint8_t *bp;
+ 
+                bp = &block[7];
+                t = R;
+                *bp = t & 255;
+                *--bp = (t >>= 8) & 255;
+                *--bp = (t >>= 8) & 255;
+                *--bp = (t >> 8) & 255;
+                t = L;
+                *--bp = t & 255;
+                *--bp = (t >>= 8) & 255;
+                *--bp = (t >>= 8) & 255;
+                *--bp = (t >> 8) & 255;
+        }
+}
diff --git a/Crypto/des.h b/Crypto/des.h
index e30c22d2..1f956050 100644
--- a/Crypto/des.h
+++ b/Crypto/des.h
@@ -1,17 +1,17 @@
-#ifndef __DES_H__
-#define __DES_H__
-
-#include <stdint.h>
-
-typedef struct DES_key_schedule
-{
-	struct DES_key_stage
-	{
-		uint32_t h, l;
-	} KS[16];
-} DES_key_schedule;
-
-void DES_set_key(const uint8_t key[8],DES_key_schedule *ks);
-void DES_encrypt(uint8_t block[8],int decrypt,DES_key_schedule *ks);
-
-#endif
+#ifndef __DES_H__
+#define __DES_H__
+
+#include <stdint.h>
+
+typedef struct DES_key_schedule
+{
+	struct DES_key_stage
+	{
+		uint32_t h, l;
+	} KS[16];
+} DES_key_schedule;
+
+void DES_set_key(const uint8_t key[8],DES_key_schedule *ks);
+void DES_encrypt(uint8_t block[8],int decrypt,DES_key_schedule *ks);
+
+#endif
diff --git a/WinZipJPEG/Dependencies b/WinZipJPEG/Dependencies
index 7080341a..9a36ce1a 100644
--- a/WinZipJPEG/Dependencies
+++ b/WinZipJPEG/Dependencies
@@ -1,9 +1,9 @@
-ArithmeticDecoder.o: ArithmeticDecoder.c ArithmeticDecoder.h \
- InputStream.h
-Decompressor.o: Decompressor.c Decompressor.h InputStream.h \
- ArithmeticDecoder.h JPEG.h LZMA.h ../lzma/LzmaDec.h ../lzma/Types.h
-JPEG.o: JPEG.c JPEG.h
-RawDecoder.o: RawDecoder.c Decompressor.h InputStream.h \
- ArithmeticDecoder.h JPEG.h
-Test.o: Test.c ArithmeticDecoder.h InputStream.h
-../lzma/LzmaDec.o: ../lzma/LzmaDec.c ../lzma/LzmaDec.h ../lzma/Types.h
+ArithmeticDecoder.o: ArithmeticDecoder.c ArithmeticDecoder.h \
+ InputStream.h
+Decompressor.o: Decompressor.c Decompressor.h InputStream.h \
+ ArithmeticDecoder.h JPEG.h LZMA.h ../lzma/LzmaDec.h ../lzma/Types.h
+JPEG.o: JPEG.c JPEG.h
+RawDecoder.o: RawDecoder.c Decompressor.h InputStream.h \
+ ArithmeticDecoder.h JPEG.h
+Test.o: Test.c ArithmeticDecoder.h InputStream.h
+../lzma/LzmaDec.o: ../lzma/LzmaDec.c ../lzma/LzmaDec.h ../lzma/Types.h
diff --git a/WinZipJPEG/JPEG.c b/WinZipJPEG/JPEG.c
index 41528f51..da1d2078 100644
--- a/WinZipJPEG/JPEG.c
+++ b/WinZipJPEG/JPEG.c
@@ -1,335 +1,335 @@
-/*
- * JPEG.c
- *
- * Copyright (c) 2017-present, MacPaw Inc. All rights reserved.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301  USA
- */
-#include "JPEG.h"
-
-#include <string.h>
-
-//#include <stdio.h>
-//#define DebugPrint(...) fprintf(stderr,__VA_ARGS__)
-#define DebugPrint(...)
-
-static const uint8_t *FindNextMarker(const uint8_t *ptr,const uint8_t *end);
-static int ParseSize(const uint8_t *ptr,const uint8_t *end);
-
-static inline uint16_t ParseUInt16(const uint8_t *ptr) { return (ptr[0]<<8)|ptr[1]; }
-
-const void *FindStartOfWinZipJPEGImage(const void *bytes,size_t length)
-{
-	const uint8_t *ptr=bytes;
-	const uint8_t *end=ptr+length;
-
-	while(ptr+2<=end)
-	{
-		if(ptr[0]==0xff && ptr[1]==0xd8) return ptr;
-		ptr++;
-	}
-
-	return NULL;
-}
-
-void InitializeWinZipJPEGMetadata(WinZipJPEGMetadata *self)
-{
-	memset(self,0,sizeof(*self));
-}
-
-int ParseWinZipJPEGMetadata(WinZipJPEGMetadata *self,const void *bytes,size_t length)
-{
-	const uint8_t *ptr=bytes;
-	const uint8_t *end=ptr+length;
-
-	for(;;)
-	{
-		ptr=FindNextMarker(ptr,end);
-		if(!ptr) return WinZipJPEGMetadataParsingFailed;
-
-		switch(*ptr++)
-		{
-			case 0xd8: // Start of image
-				// Empty marker, do nothing.
-				DebugPrint("Start of image\n");
-			break;
-
-			case 0xc4: // Define huffman table
-			{
-				int size=ParseSize(ptr,end);
-				if(!size) return WinZipJPEGMetadataParsingFailed;
-				const uint8_t *next=ptr+size;
-
-				ptr+=2;
-
-				DebugPrint("Define huffman table(s)\n");
-
-				while(ptr+17<=next)
-				{
-					int class=*ptr>>4;
-					int index=*ptr&0x0f;
-					ptr++;
-
-					if(class!=0 && class!=1) return WinZipJPEGMetadataParsingFailed;
-					if(index>=4) return WinZipJPEGMetadataParsingFailed;
-
-					int numcodes[16];
-					int totalcodes=0;
-					for(int i=0;i<16;i++)
-					{
-						numcodes[i]=ptr[i];
-						totalcodes+=numcodes[i];
-					}
-					ptr+=16;
-
-					if(ptr+totalcodes>next) return WinZipJPEGMetadataParsingFailed;
-
-					DebugPrint(" > %s table at %d with %d codes\n",class==0?"DC":"AC",index,totalcodes);
-
-					unsigned int code=0;
-					for(int i=0;i<16;i++)
-					{
-						for(int j=0;j<numcodes[i];j++)
-						{
-							int value=*ptr++;
-
-							self->huffmantables[class][index].codes[value].code=code;
-							self->huffmantables[class][index].codes[value].length=i+1;
-							//DebugPrint(" >> Code %x length %d for %d\n",code,i+1,value);
-
-							code++;
-						}
-
-						code<<=1;
-					}
-				}
-
-				ptr=next;
-			}
-			break;
-
-			case 0xdb: // Define quantization table(s)
-			{
-				int size=ParseSize(ptr,end);
-				if(!size) return WinZipJPEGMetadataParsingFailed;
-				const uint8_t *next=ptr+size;
-
-				ptr+=2;
-
-				DebugPrint("Define quantization table(s)\n");
-
-				while(ptr+1<=next)
-				{
-					int precision=*ptr>>4;
-					int index=*ptr&0x0f;
-					ptr++;
-
-					if(index>=4) return WinZipJPEGMetadataParsingFailed;
-
-					if(precision==0)
-					{
-						DebugPrint(" > 8 bit table at %d\n",index);
-
-						if(ptr+64>next) return WinZipJPEGMetadataParsingFailed;
-						for(int i=0;i<64;i++) self->quantizationtables[index].c[i]=ptr[i];
-						ptr+=64;
-					}
-					else if(precision==1)
-					{
-						DebugPrint(" > 16 bit table at %d\n",index);
-
-						if(ptr+128>next) return WinZipJPEGMetadataParsingFailed;
-						for(int i=0;i<64;i++) self->quantizationtables[index].c[i]=ParseUInt16(&ptr[2*i]);
-						ptr+=128;
-					}
-					else return WinZipJPEGMetadataParsingFailed;
-				}
-
-				ptr=next;
-			}
-			break;
-
-			case 0xdd: // Define restart interval
-			{
-				int size=ParseSize(ptr,end);
-				if(!size) return WinZipJPEGMetadataParsingFailed;
-				const uint8_t *next=ptr+size;
-
-				self->restartinterval=ParseUInt16(&ptr[2]);
-
-				ptr=next;
-
-				DebugPrint("Define restart interval: %d\n",self->restartinterval);
-			}
-			break;
-
-			case 0xc0: // Start of frame 0
-			case 0xc1: // Start of frame 1
-			{
-				int size=ParseSize(ptr,end);
-				if(!size) return WinZipJPEGMetadataParsingFailed;
-				const uint8_t *next=ptr+size;
-
-				if(size<8) return WinZipJPEGMetadataParsingFailed;
-				self->bits=ptr[2];
-				self->height=ParseUInt16(&ptr[3]);
-				self->width=ParseUInt16(&ptr[5]);
-				self->numcomponents=ptr[7];
-
-				if(self->numcomponents<1 || self->numcomponents>4) return WinZipJPEGMetadataParsingFailed;
-				if(size<8+self->numcomponents*3) return WinZipJPEGMetadataParsingFailed;
-
-				self->maxhorizontalfactor=1;
-				self->maxverticalfactor=1;
-
-				DebugPrint("Start of frame: %dx%d %d bits %d comps\n",self->width,self->height,self->bits,self->numcomponents);
-
-				for(int i=0;i<self->numcomponents;i++)
-				{
-					self->components[i].identifier=ptr[8+i*3];
-					self->components[i].horizontalfactor=ptr[9+i*3]>>4;
-					self->components[i].verticalfactor=ptr[9+i*3]&0x0f;
-					int quantizationindex=ptr[10+i*3];
-					self->components[i].quantizationtable=&self->quantizationtables[quantizationindex];
-
-					if(self->components[i].horizontalfactor>self->maxhorizontalfactor)
-					self->maxhorizontalfactor=self->components[i].horizontalfactor;
-
-					if(self->components[i].verticalfactor>self->maxverticalfactor)
-					self->maxverticalfactor=self->components[i].verticalfactor;
-
-					DebugPrint(" > Component id %d, %dx%d, quant %d\n",
-					self->components[i].identifier,
-					self->components[i].horizontalfactor,
-					self->components[i].verticalfactor,
-					quantizationindex);
-				}
-
-				// TODO: This is a kludge for strange one-component files with
-				// 2x2 sampling factor, that are still stored in exactly the same
-				// way as 1x1. Figure out how to actually handle this properly.
-				if(self->numcomponents==1)
-				{
-					self->components[0].horizontalfactor/=self->maxhorizontalfactor;
-					self->components[0].verticalfactor/=self->maxverticalfactor;
-					self->maxhorizontalfactor=1;
-					self->maxverticalfactor=1;
-				}
-
-				int mcuwidth=self->maxhorizontalfactor*8;
-				int mcuheight=self->maxverticalfactor*8;
-				self->horizontalmcus=(self->width+mcuwidth-1)/mcuwidth;
-				self->verticalmcus=(self->height+mcuheight-1)/mcuheight;
-
-				DebugPrint(" > MCU size %dx%d, %d horizontal MCUs, %d vertical MCUs.\n",mcuwidth,mcuheight,self->horizontalmcus,self->verticalmcus);
-
-				ptr=next;
-			}
-			break;
-
-			case 0xda: // Start of scan
-			{
-				int size=ParseSize(ptr,end);
-				if(!size) return WinZipJPEGMetadataParsingFailed;
-
-				if(size<6) return WinZipJPEGMetadataParsingFailed;
-
-				self->numscancomponents=ptr[2];
-				if(self->numscancomponents<1 || self->numscancomponents>4) return WinZipJPEGMetadataParsingFailed;
-				if(size<6+self->numscancomponents*2) return WinZipJPEGMetadataParsingFailed;
-
-				DebugPrint("Start of scan: %d comps\n",self->numscancomponents);
-
-				for(int i=0;i<self->numscancomponents;i++)
-				{
-					int identifier=ptr[3+i*2];
-					WinZipJPEGComponent *component=NULL;
-					for(int j=0;j<self->numcomponents;j++)
-					{
-						if(self->components[j].identifier==identifier)
-						{
-							component=&self->components[j];
-							break;
-						}
-					}
-					if(!component) return WinZipJPEGMetadataParsingFailed;
-
-					self->scancomponents[i].component=component;
-
-					int dcindex=ptr[4+i*2]>>4;
-					int acindex=ptr[4+i*2]&0x0f;
-					self->scancomponents[i].dctable=&self->huffmantables[0][dcindex];
-					self->scancomponents[i].actable=&self->huffmantables[1][acindex];
-
-					DebugPrint(" > Component id %d, %dx%d, DC %d, AC %d\n",
-					identifier,
-					self->scancomponents[i].component->horizontalfactor,
-					self->scancomponents[i].component->verticalfactor,
-					dcindex,acindex);
-				}
-
-				if(ptr[3+self->numscancomponents*2]!=0) return WinZipJPEGMetadataParsingFailed;
-				if(ptr[4+self->numscancomponents*2]!=63) return WinZipJPEGMetadataParsingFailed;
-				if(ptr[5+self->numscancomponents*2]!=0) return WinZipJPEGMetadataParsingFailed;
-
-				return WinZipJPEGMetadataFoundStartOfScan;
-			}
-			break;
-
-
-			case 0xd9: // End of image
-				return WinZipJPEGMetadataFoundEndOfImage;
-
-			default:
-			{
-				int size=ParseSize(ptr,end);
-				if(!size) return WinZipJPEGMetadataParsingFailed;
-				ptr+=size;
-
-				DebugPrint("Unknown marker %02x\n",ptr[-1]);
-			}
-			break;
-		}
-	}
-}
-
-// Find next marker, skipping pad bytes.
-static const uint8_t *FindNextMarker(const uint8_t *ptr,const uint8_t *end)
-{
-	if(ptr>=end) return NULL;
-	if(*ptr!=0xff) return NULL;
-
-	while(*ptr==0xff)
-	{
-		ptr++;
-		if(ptr>=end) return NULL;
-	}
-
-	return ptr;
-}
-
-// Parse and sanity check the size of a marker.
-static int ParseSize(const uint8_t *ptr,const uint8_t *end)
-{
-	if(ptr+2>end) return 0;
-
-	int size=ParseUInt16(ptr);
-	if(size<2) return 0;
-	if(ptr+size>end) return 0;
-
-	return size;
-}
-
+/*
+ * JPEG.c
+ *
+ * Copyright (c) 2017-present, MacPaw Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301  USA
+ */
+#include "JPEG.h"
+
+#include <string.h>
+
+//#include <stdio.h>
+//#define DebugPrint(...) fprintf(stderr,__VA_ARGS__)
+#define DebugPrint(...)
+
+static const uint8_t *FindNextMarker(const uint8_t *ptr,const uint8_t *end);
+static int ParseSize(const uint8_t *ptr,const uint8_t *end);
+
+static inline uint16_t ParseUInt16(const uint8_t *ptr) { return (ptr[0]<<8)|ptr[1]; }
+
+const void *FindStartOfWinZipJPEGImage(const void *bytes,size_t length)
+{
+	const uint8_t *ptr=bytes;
+	const uint8_t *end=ptr+length;
+
+	while(ptr+2<=end)
+	{
+		if(ptr[0]==0xff && ptr[1]==0xd8) return ptr;
+		ptr++;
+	}
+
+	return NULL;
+}
+
+void InitializeWinZipJPEGMetadata(WinZipJPEGMetadata *self)
+{
+	memset(self,0,sizeof(*self));
+}
+
+int ParseWinZipJPEGMetadata(WinZipJPEGMetadata *self,const void *bytes,size_t length)
+{
+	const uint8_t *ptr=bytes;
+	const uint8_t *end=ptr+length;
+
+	for(;;)
+	{
+		ptr=FindNextMarker(ptr,end);
+		if(!ptr) return WinZipJPEGMetadataParsingFailed;
+
+		switch(*ptr++)
+		{
+			case 0xd8: // Start of image
+				// Empty marker, do nothing.
+				DebugPrint("Start of image\n");
+			break;
+
+			case 0xc4: // Define huffman table
+			{
+				int size=ParseSize(ptr,end);
+				if(!size) return WinZipJPEGMetadataParsingFailed;
+				const uint8_t *next=ptr+size;
+
+				ptr+=2;
+
+				DebugPrint("Define huffman table(s)\n");
+
+				while(ptr+17<=next)
+				{
+					int class=*ptr>>4;
+					int index=*ptr&0x0f;
+					ptr++;
+
+					if(class!=0 && class!=1) return WinZipJPEGMetadataParsingFailed;
+					if(index>=4) return WinZipJPEGMetadataParsingFailed;
+
+					int numcodes[16];
+					int totalcodes=0;
+					for(int i=0;i<16;i++)
+					{
+						numcodes[i]=ptr[i];
+						totalcodes+=numcodes[i];
+					}
+					ptr+=16;
+
+					if(ptr+totalcodes>next) return WinZipJPEGMetadataParsingFailed;
+
+					DebugPrint(" > %s table at %d with %d codes\n",class==0?"DC":"AC",index,totalcodes);
+
+					unsigned int code=0;
+					for(int i=0;i<16;i++)
+					{
+						for(int j=0;j<numcodes[i];j++)
+						{
+							int value=*ptr++;
+
+							self->huffmantables[class][index].codes[value].code=code;
+							self->huffmantables[class][index].codes[value].length=i+1;
+							//DebugPrint(" >> Code %x length %d for %d\n",code,i+1,value);
+
+							code++;
+						}
+
+						code<<=1;
+					}
+				}
+
+				ptr=next;
+			}
+			break;
+
+			case 0xdb: // Define quantization table(s)
+			{
+				int size=ParseSize(ptr,end);
+				if(!size) return WinZipJPEGMetadataParsingFailed;
+				const uint8_t *next=ptr+size;
+
+				ptr+=2;
+
+				DebugPrint("Define quantization table(s)\n");
+
+				while(ptr+1<=next)
+				{
+					int precision=*ptr>>4;
+					int index=*ptr&0x0f;
+					ptr++;
+
+					if(index>=4) return WinZipJPEGMetadataParsingFailed;
+
+					if(precision==0)
+					{
+						DebugPrint(" > 8 bit table at %d\n",index);
+
+						if(ptr+64>next) return WinZipJPEGMetadataParsingFailed;
+						for(int i=0;i<64;i++) self->quantizationtables[index].c[i]=ptr[i];
+						ptr+=64;
+					}
+					else if(precision==1)
+					{
+						DebugPrint(" > 16 bit table at %d\n",index);
+
+						if(ptr+128>next) return WinZipJPEGMetadataParsingFailed;
+						for(int i=0;i<64;i++) self->quantizationtables[index].c[i]=ParseUInt16(&ptr[2*i]);
+						ptr+=128;
+					}
+					else return WinZipJPEGMetadataParsingFailed;
+				}
+
+				ptr=next;
+			}
+			break;
+
+			case 0xdd: // Define restart interval
+			{
+				int size=ParseSize(ptr,end);
+				if(!size) return WinZipJPEGMetadataParsingFailed;
+				const uint8_t *next=ptr+size;
+
+				self->restartinterval=ParseUInt16(&ptr[2]);
+
+				ptr=next;
+
+				DebugPrint("Define restart interval: %d\n",self->restartinterval);
+			}
+			break;
+
+			case 0xc0: // Start of frame 0
+			case 0xc1: // Start of frame 1
+			{
+				int size=ParseSize(ptr,end);
+				if(!size) return WinZipJPEGMetadataParsingFailed;
+				const uint8_t *next=ptr+size;
+
+				if(size<8) return WinZipJPEGMetadataParsingFailed;
+				self->bits=ptr[2];
+				self->height=ParseUInt16(&ptr[3]);
+				self->width=ParseUInt16(&ptr[5]);
+				self->numcomponents=ptr[7];
+
+				if(self->numcomponents<1 || self->numcomponents>4) return WinZipJPEGMetadataParsingFailed;
+				if(size<8+self->numcomponents*3) return WinZipJPEGMetadataParsingFailed;
+
+				self->maxhorizontalfactor=1;
+				self->maxverticalfactor=1;
+
+				DebugPrint("Start of frame: %dx%d %d bits %d comps\n",self->width,self->height,self->bits,self->numcomponents);
+
+				for(int i=0;i<self->numcomponents;i++)
+				{
+					self->components[i].identifier=ptr[8+i*3];
+					self->components[i].horizontalfactor=ptr[9+i*3]>>4;
+					self->components[i].verticalfactor=ptr[9+i*3]&0x0f;
+					int quantizationindex=ptr[10+i*3];
+					self->components[i].quantizationtable=&self->quantizationtables[quantizationindex];
+
+					if(self->components[i].horizontalfactor>self->maxhorizontalfactor)
+					self->maxhorizontalfactor=self->components[i].horizontalfactor;
+
+					if(self->components[i].verticalfactor>self->maxverticalfactor)
+					self->maxverticalfactor=self->components[i].verticalfactor;
+
+					DebugPrint(" > Component id %d, %dx%d, quant %d\n",
+					self->components[i].identifier,
+					self->components[i].horizontalfactor,
+					self->components[i].verticalfactor,
+					quantizationindex);
+				}
+
+				// TODO: This is a kludge for strange one-component files with
+				// 2x2 sampling factor, that are still stored in exactly the same
+				// way as 1x1. Figure out how to actually handle this properly.
+				if(self->numcomponents==1)
+				{
+					self->components[0].horizontalfactor/=self->maxhorizontalfactor;
+					self->components[0].verticalfactor/=self->maxverticalfactor;
+					self->maxhorizontalfactor=1;
+					self->maxverticalfactor=1;
+				}
+
+				int mcuwidth=self->maxhorizontalfactor*8;
+				int mcuheight=self->maxverticalfactor*8;
+				self->horizontalmcus=(self->width+mcuwidth-1)/mcuwidth;
+				self->verticalmcus=(self->height+mcuheight-1)/mcuheight;
+
+				DebugPrint(" > MCU size %dx%d, %d horizontal MCUs, %d vertical MCUs.\n",mcuwidth,mcuheight,self->horizontalmcus,self->verticalmcus);
+
+				ptr=next;
+			}
+			break;
+
+			case 0xda: // Start of scan
+			{
+				int size=ParseSize(ptr,end);
+				if(!size) return WinZipJPEGMetadataParsingFailed;
+
+				if(size<6) return WinZipJPEGMetadataParsingFailed;
+
+				self->numscancomponents=ptr[2];
+				if(self->numscancomponents<1 || self->numscancomponents>4) return WinZipJPEGMetadataParsingFailed;
+				if(size<6+self->numscancomponents*2) return WinZipJPEGMetadataParsingFailed;
+
+				DebugPrint("Start of scan: %d comps\n",self->numscancomponents);
+
+				for(int i=0;i<self->numscancomponents;i++)
+				{
+					int identifier=ptr[3+i*2];
+					WinZipJPEGComponent *component=NULL;
+					for(int j=0;j<self->numcomponents;j++)
+					{
+						if(self->components[j].identifier==identifier)
+						{
+							component=&self->components[j];
+							break;
+						}
+					}
+					if(!component) return WinZipJPEGMetadataParsingFailed;
+
+					self->scancomponents[i].component=component;
+
+					int dcindex=ptr[4+i*2]>>4;
+					int acindex=ptr[4+i*2]&0x0f;
+					self->scancomponents[i].dctable=&self->huffmantables[0][dcindex];
+					self->scancomponents[i].actable=&self->huffmantables[1][acindex];
+
+					DebugPrint(" > Component id %d, %dx%d, DC %d, AC %d\n",
+					identifier,
+					self->scancomponents[i].component->horizontalfactor,
+					self->scancomponents[i].component->verticalfactor,
+					dcindex,acindex);
+				}
+
+				if(ptr[3+self->numscancomponents*2]!=0) return WinZipJPEGMetadataParsingFailed;
+				if(ptr[4+self->numscancomponents*2]!=63) return WinZipJPEGMetadataParsingFailed;
+				if(ptr[5+self->numscancomponents*2]!=0) return WinZipJPEGMetadataParsingFailed;
+
+				return WinZipJPEGMetadataFoundStartOfScan;
+			}
+			break;
+
+
+			case 0xd9: // End of image
+				return WinZipJPEGMetadataFoundEndOfImage;
+
+			default:
+			{
+				int size=ParseSize(ptr,end);
+				if(!size) return WinZipJPEGMetadataParsingFailed;
+				ptr+=size;
+
+				DebugPrint("Unknown marker %02x\n",ptr[-1]);
+			}
+			break;
+		}
+	}
+}
+
+// Find next marker, skipping pad bytes.
+static const uint8_t *FindNextMarker(const uint8_t *ptr,const uint8_t *end)
+{
+	if(ptr>=end) return NULL;
+	if(*ptr!=0xff) return NULL;
+
+	while(*ptr==0xff)
+	{
+		ptr++;
+		if(ptr>=end) return NULL;
+	}
+
+	return ptr;
+}
+
+// Parse and sanity check the size of a marker.
+static int ParseSize(const uint8_t *ptr,const uint8_t *end)
+{
+	if(ptr+2>end) return 0;
+
+	int size=ParseUInt16(ptr);
+	if(size<2) return 0;
+	if(ptr+size>end) return 0;
+
+	return size;
+}
+
diff --git a/WinZipJPEG/JPEG.h b/WinZipJPEG/JPEG.h
index 6782ec8b..e1531939 100644
--- a/WinZipJPEG/JPEG.h
+++ b/WinZipJPEG/JPEG.h
@@ -1,89 +1,89 @@
-/*
- * JPEG.h
- *
- * Copyright (c) 2017-present, MacPaw Inc. All rights reserved.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301  USA
- */
-#ifndef __WINZIP_JPEG_JPEG_H__
-#define __WINZIP_JPEG_JPEG_H__
-
-#include <stdint.h>
-#include <stdbool.h>
-#include <stdlib.h>
-
-#define WinZipJPEGMetadataFoundStartOfScan 1
-#define WinZipJPEGMetadataFoundEndOfImage 2
-#define WinZipJPEGMetadataParsingFailed 3
-
-typedef struct WinZipJPEGBlock
-{
-	int16_t c[64];
-	uint8_t eob;
-} WinZipJPEGBlock;
-
-typedef struct WinZipJPEGQuantizationTable
-{
-	int16_t c[64];
-} WinZipJPEGQuantizationTable;
-
-typedef struct WinZipJPEGHuffmanCode
-{
-	unsigned int code,length;
-} WinZipJPEGHuffmanCode;
-
-typedef struct WinZipJPEGHuffmanTable
-{
-	WinZipJPEGHuffmanCode codes[256];
-} WinZipJPEGHuffmanTable;
-
-typedef struct WinZipJPEGComponent
-{
-	unsigned int identifier;
-	unsigned int horizontalfactor,verticalfactor;
-	WinZipJPEGQuantizationTable *quantizationtable;
-} WinZipJPEGComponent;
-
-typedef struct WinZipJPEGScanComponent
-{
-	WinZipJPEGComponent *component;
-	WinZipJPEGHuffmanTable *dctable,*actable;
-} WinZipJPEGScanComponent;
-
-typedef struct WinZipJPEGMetadata
-{
-	unsigned int width,height,bits;
-	unsigned int restartinterval;
-
-	unsigned int maxhorizontalfactor,maxverticalfactor;
-	unsigned int horizontalmcus,verticalmcus;
-
-	unsigned int numcomponents;
-	WinZipJPEGComponent components[4];
-
-	unsigned int numscancomponents;
-	WinZipJPEGScanComponent scancomponents[4];
-
-	WinZipJPEGQuantizationTable quantizationtables[4];
-	WinZipJPEGHuffmanTable huffmantables[2][4];
-} WinZipJPEGMetadata;
-
-const void *FindStartOfWinZipJPEGImage(const void *bytes,size_t length);
-
-void InitializeWinZipJPEGMetadata(WinZipJPEGMetadata *self);
-int ParseWinZipJPEGMetadata(WinZipJPEGMetadata *self,const void *bytes,size_t length);
-
-#endif
+/*
+ * JPEG.h
+ *
+ * Copyright (c) 2017-present, MacPaw Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301  USA
+ */
+#ifndef __WINZIP_JPEG_JPEG_H__
+#define __WINZIP_JPEG_JPEG_H__
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+#define WinZipJPEGMetadataFoundStartOfScan 1
+#define WinZipJPEGMetadataFoundEndOfImage 2
+#define WinZipJPEGMetadataParsingFailed 3
+
+typedef struct WinZipJPEGBlock
+{
+	int16_t c[64];
+	uint8_t eob;
+} WinZipJPEGBlock;
+
+typedef struct WinZipJPEGQuantizationTable
+{
+	int16_t c[64];
+} WinZipJPEGQuantizationTable;
+
+typedef struct WinZipJPEGHuffmanCode
+{
+	unsigned int code,length;
+} WinZipJPEGHuffmanCode;
+
+typedef struct WinZipJPEGHuffmanTable
+{
+	WinZipJPEGHuffmanCode codes[256];
+} WinZipJPEGHuffmanTable;
+
+typedef struct WinZipJPEGComponent
+{
+	unsigned int identifier;
+	unsigned int horizontalfactor,verticalfactor;
+	WinZipJPEGQuantizationTable *quantizationtable;
+} WinZipJPEGComponent;
+
+typedef struct WinZipJPEGScanComponent
+{
+	WinZipJPEGComponent *component;
+	WinZipJPEGHuffmanTable *dctable,*actable;
+} WinZipJPEGScanComponent;
+
+typedef struct WinZipJPEGMetadata
+{
+	unsigned int width,height,bits;
+	unsigned int restartinterval;
+
+	unsigned int maxhorizontalfactor,maxverticalfactor;
+	unsigned int horizontalmcus,verticalmcus;
+
+	unsigned int numcomponents;
+	WinZipJPEGComponent components[4];
+
+	unsigned int numscancomponents;
+	WinZipJPEGScanComponent scancomponents[4];
+
+	WinZipJPEGQuantizationTable quantizationtables[4];
+	WinZipJPEGHuffmanTable huffmantables[2][4];
+} WinZipJPEGMetadata;
+
+const void *FindStartOfWinZipJPEGImage(const void *bytes,size_t length);
+
+void InitializeWinZipJPEGMetadata(WinZipJPEGMetadata *self);
+int ParseWinZipJPEGMetadata(WinZipJPEGMetadata *self,const void *bytes,size_t length);
+
+#endif
diff --git a/lzma/Bra.c b/lzma/Bra.c
index e405373e..68aa77ea 100755
--- a/lzma/Bra.c
+++ b/lzma/Bra.c
@@ -1,133 +1,133 @@
-/* Bra.c -- Converters for RISC code
-2008-10-04 : Igor Pavlov : Public domain */
-
-#include "Bra.h"
-
-SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
-{
-  SizeT i;
-  if (size < 4)
-    return 0;
-  size -= 4;
-  ip += 8;
-  for (i = 0; i <= size; i += 4)
-  {
-    if (data[i + 3] == 0xEB)
-    {
-      UInt32 dest;
-      UInt32 src = ((UInt32)data[i + 2] << 16) | ((UInt32)data[i + 1] << 8) | (data[i + 0]);
-      src <<= 2;
-      if (encoding)
-        dest = ip + (UInt32)i + src;
-      else
-        dest = src - (ip + (UInt32)i);
-      dest >>= 2;
-      data[i + 2] = (Byte)(dest >> 16);
-      data[i + 1] = (Byte)(dest >> 8);
-      data[i + 0] = (Byte)dest;
-    }
-  }
-  return i;
-}
-
-SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
-{
-  SizeT i;
-  if (size < 4)
-    return 0;
-  size -= 4;
-  ip += 4;
-  for (i = 0; i <= size; i += 2)
-  {
-    if ((data[i + 1] & 0xF8) == 0xF0 &&
-        (data[i + 3] & 0xF8) == 0xF8)
-    {
-      UInt32 dest;
-      UInt32 src =
-        (((UInt32)data[i + 1] & 0x7) << 19) |
-        ((UInt32)data[i + 0] << 11) |
-        (((UInt32)data[i + 3] & 0x7) << 8) |
-        (data[i + 2]);
-      
-      src <<= 1;
-      if (encoding)
-        dest = ip + (UInt32)i + src;
-      else
-        dest = src - (ip + (UInt32)i);
-      dest >>= 1;
-      
-      data[i + 1] = (Byte)(0xF0 | ((dest >> 19) & 0x7));
-      data[i + 0] = (Byte)(dest >> 11);
-      data[i + 3] = (Byte)(0xF8 | ((dest >> 8) & 0x7));
-      data[i + 2] = (Byte)dest;
-      i += 2;
-    }
-  }
-  return i;
-}
-
-SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
-{
-  SizeT i;
-  if (size < 4)
-    return 0;
-  size -= 4;
-  for (i = 0; i <= size; i += 4)
-  {
-    if ((data[i] >> 2) == 0x12 && (data[i + 3] & 3) == 1)
-    {
-      UInt32 src = ((UInt32)(data[i + 0] & 3) << 24) |
-        ((UInt32)data[i + 1] << 16) |
-        ((UInt32)data[i + 2] << 8) |
-        ((UInt32)data[i + 3] & (~3));
-      
-      UInt32 dest;
-      if (encoding)
-        dest = ip + (UInt32)i + src;
-      else
-        dest = src - (ip + (UInt32)i);
-      data[i + 0] = (Byte)(0x48 | ((dest >> 24) &  0x3));
-      data[i + 1] = (Byte)(dest >> 16);
-      data[i + 2] = (Byte)(dest >> 8);
-      data[i + 3] &= 0x3;
-      data[i + 3] |= dest;
-    }
-  }
-  return i;
-}
-
-SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
-{
-  UInt32 i;
-  if (size < 4)
-    return 0;
-  size -= 4;
-  for (i = 0; i <= size; i += 4)
-  {
-    if ((data[i] == 0x40 && (data[i + 1] & 0xC0) == 0x00) ||
-        (data[i] == 0x7F && (data[i + 1] & 0xC0) == 0xC0))
-    {
-      UInt32 src =
-        ((UInt32)data[i + 0] << 24) |
-        ((UInt32)data[i + 1] << 16) |
-        ((UInt32)data[i + 2] << 8) |
-        ((UInt32)data[i + 3]);
-      UInt32 dest;
-      
-      src <<= 2;
-      if (encoding)
-        dest = ip + i + src;
-      else
-        dest = src - (ip + i);
-      dest >>= 2;
-      
-      dest = (((0 - ((dest >> 22) & 1)) << 22) & 0x3FFFFFFF) | (dest & 0x3FFFFF) | 0x40000000;
-
-      data[i + 0] = (Byte)(dest >> 24);
-      data[i + 1] = (Byte)(dest >> 16);
-      data[i + 2] = (Byte)(dest >> 8);
-      data[i + 3] = (Byte)dest;
-    }
-  }
-  return i;
-}
+/* Bra.c -- Converters for RISC code
+2008-10-04 : Igor Pavlov : Public domain */
+
+#include "Bra.h"
+
+SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
+{
+  SizeT i;
+  if (size < 4)
+    return 0;
+  size -= 4;
+  ip += 8;
+  for (i = 0; i <= size; i += 4)
+  {
+    if (data[i + 3] == 0xEB)
+    {
+      UInt32 dest;
+      UInt32 src = ((UInt32)data[i + 2] << 16) | ((UInt32)data[i + 1] << 8) | (data[i + 0]);
+      src <<= 2;
+      if (encoding)
+        dest = ip + (UInt32)i + src;
+      else
+        dest = src - (ip + (UInt32)i);
+      dest >>= 2;
+      data[i + 2] = (Byte)(dest >> 16);
+      data[i + 1] = (Byte)(dest >> 8);
+      data[i + 0] = (Byte)dest;
+    }
+  }
+  return i;
+}
+
+SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
+{
+  SizeT i;
+  if (size < 4)
+    return 0;
+  size -= 4;
+  ip += 4;
+  for (i = 0; i <= size; i += 2)
+  {
+    if ((data[i + 1] & 0xF8) == 0xF0 &&
+        (data[i + 3] & 0xF8) == 0xF8)
+    {
+      UInt32 dest;
+      UInt32 src =
+        (((UInt32)data[i + 1] & 0x7) << 19) |
+        ((UInt32)data[i + 0] << 11) |
+        (((UInt32)data[i + 3] & 0x7) << 8) |
+        (data[i + 2]);
+      
+      src <<= 1;
+      if (encoding)
+        dest = ip + (UInt32)i + src;
+      else
+        dest = src - (ip + (UInt32)i);
+      dest >>= 1;
+      
+      data[i + 1] = (Byte)(0xF0 | ((dest >> 19) & 0x7));
+      data[i + 0] = (Byte)(dest >> 11);
+      data[i + 3] = (Byte)(0xF8 | ((dest >> 8) & 0x7));
+      data[i + 2] = (Byte)dest;
+      i += 2;
+    }
+  }
+  return i;
+}
+
+SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
+{
+  SizeT i;
+  if (size < 4)
+    return 0;
+  size -= 4;
+  for (i = 0; i <= size; i += 4)
+  {
+    if ((data[i] >> 2) == 0x12 && (data[i + 3] & 3) == 1)
+    {
+      UInt32 src = ((UInt32)(data[i + 0] & 3) << 24) |
+        ((UInt32)data[i + 1] << 16) |
+        ((UInt32)data[i + 2] << 8) |
+        ((UInt32)data[i + 3] & (~3));
+      
+      UInt32 dest;
+      if (encoding)
+        dest = ip + (UInt32)i + src;
+      else
+        dest = src - (ip + (UInt32)i);
+      data[i + 0] = (Byte)(0x48 | ((dest >> 24) &  0x3));
+      data[i + 1] = (Byte)(dest >> 16);
+      data[i + 2] = (Byte)(dest >> 8);
+      data[i + 3] &= 0x3;
+      data[i + 3] |= dest;
+    }
+  }
+  return i;
+}
+
+SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
+{
+  UInt32 i;
+  if (size < 4)
+    return 0;
+  size -= 4;
+  for (i = 0; i <= size; i += 4)
+  {
+    if ((data[i] == 0x40 && (data[i + 1] & 0xC0) == 0x00) ||
+        (data[i] == 0x7F && (data[i + 1] & 0xC0) == 0xC0))
+    {
+      UInt32 src =
+        ((UInt32)data[i + 0] << 24) |
+        ((UInt32)data[i + 1] << 16) |
+        ((UInt32)data[i + 2] << 8) |
+        ((UInt32)data[i + 3]);
+      UInt32 dest;
+      
+      src <<= 2;
+      if (encoding)
+        dest = ip + i + src;
+      else
+        dest = src - (ip + i);
+      dest >>= 2;
+      
+      dest = (((0 - ((dest >> 22) & 1)) << 22) & 0x3FFFFFFF) | (dest & 0x3FFFFF) | 0x40000000;
+
+      data[i + 0] = (Byte)(dest >> 24);
+      data[i + 1] = (Byte)(dest >> 16);
+      data[i + 2] = (Byte)(dest >> 8);
+      data[i + 3] = (Byte)dest;
+    }
+  }
+  return i;
+}
diff --git a/lzma/Bra.h b/lzma/Bra.h
index 9c91e332..5748c1c0 100755
--- a/lzma/Bra.h
+++ b/lzma/Bra.h
@@ -1,68 +1,68 @@
-/* Bra.h -- Branch converters for executables
-2009-02-07 : Igor Pavlov : Public domain */
-
-#ifndef __BRA_H
-#define __BRA_H
-
-#include "Types.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
-These functions convert relative addresses to absolute addresses
-in CALL instructions to increase the compression ratio.
-  
-  In:
-    data     - data buffer
-    size     - size of data
-    ip       - current virtual Instruction Pinter (IP) value
-    state    - state variable for x86 converter
-    encoding - 0 (for decoding), 1 (for encoding)
-  
-  Out:
-    state    - state variable for x86 converter
-
-  Returns:
-    The number of processed bytes. If you call these functions with multiple calls,
-    you must start next call with first byte after block of processed bytes.
-  
-  Type   Endian  Alignment  LookAhead
-  
-  x86    little      1          4
-  ARMT   little      2          2
-  ARM    little      4          0
-  PPC     big        4          0
-  SPARC   big        4          0
-  IA64   little     16          0
-
-  size must be >= Alignment + LookAhead, if it's not last block.
-  If (size < Alignment + LookAhead), converter returns 0.
-
-  Example:
-
-    UInt32 ip = 0;
-    for ()
-    {
-      ; size must be >= Alignment + LookAhead, if it's not last block
-      SizeT processed = Convert(data, size, ip, 1);
-      data += processed;
-      size -= processed;
-      ip += processed;
-    }
-*/
-
-#define x86_Convert_Init(state) { state = 0; }
-SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding);
-SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
-SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
-SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
-SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
-SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
+/* Bra.h -- Branch converters for executables
+2009-02-07 : Igor Pavlov : Public domain */
+
+#ifndef __BRA_H
+#define __BRA_H
+
+#include "Types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+These functions convert relative addresses to absolute addresses
+in CALL instructions to increase the compression ratio.
+  
+  In:
+    data     - data buffer
+    size     - size of data
+    ip       - current virtual Instruction Pinter (IP) value
+    state    - state variable for x86 converter
+    encoding - 0 (for decoding), 1 (for encoding)
+  
+  Out:
+    state    - state variable for x86 converter
+
+  Returns:
+    The number of processed bytes. If you call these functions with multiple calls,
+    you must start next call with first byte after block of processed bytes.
+  
+  Type   Endian  Alignment  LookAhead
+  
+  x86    little      1          4
+  ARMT   little      2          2
+  ARM    little      4          0
+  PPC     big        4          0
+  SPARC   big        4          0
+  IA64   little     16          0
+
+  size must be >= Alignment + LookAhead, if it's not last block.
+  If (size < Alignment + LookAhead), converter returns 0.
+
+  Example:
+
+    UInt32 ip = 0;
+    for ()
+    {
+      ; size must be >= Alignment + LookAhead, if it's not last block
+      SizeT processed = Convert(data, size, ip, 1);
+      data += processed;
+      size -= processed;
+      ip += processed;
+    }
+*/
+
+#define x86_Convert_Init(state) { state = 0; }
+SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding);
+SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lzma/Bra86.c b/lzma/Bra86.c
index 93566cb2..1ee0e709 100755
--- a/lzma/Bra86.c
+++ b/lzma/Bra86.c
@@ -1,85 +1,85 @@
-/* Bra86.c -- Converter for x86 code (BCJ)
-2008-10-04 : Igor Pavlov : Public domain */
-
-#include "Bra.h"
-
-#define Test86MSByte(b) ((b) == 0 || (b) == 0xFF)
-
-const Byte kMaskToAllowedStatus[8] = {1, 1, 1, 0, 1, 0, 0, 0};
-const Byte kMaskToBitNumber[8] = {0, 1, 2, 2, 3, 3, 3, 3};
-
-SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding)
-{
-  SizeT bufferPos = 0, prevPosT;
-  UInt32 prevMask = *state & 0x7;
-  if (size < 5)
-    return 0;
-  ip += 5;
-  prevPosT = (SizeT)0 - 1;
-
-  for (;;)
-  {
-    Byte *p = data + bufferPos;
-    Byte *limit = data + size - 4;
-    for (; p < limit; p++)
-      if ((*p & 0xFE) == 0xE8)
-        break;
-    bufferPos = (SizeT)(p - data);
-    if (p >= limit)
-      break;
-    prevPosT = bufferPos - prevPosT;
-    if (prevPosT > 3)
-      prevMask = 0;
-    else
-    {
-      prevMask = (prevMask << ((int)prevPosT - 1)) & 0x7;
-      if (prevMask != 0)
-      {
-        Byte b = p[4 - kMaskToBitNumber[prevMask]];
-        if (!kMaskToAllowedStatus[prevMask] || Test86MSByte(b))
-        {
-          prevPosT = bufferPos;
-          prevMask = ((prevMask << 1) & 0x7) | 1;
-          bufferPos++;
-          continue;
-        }
-      }
-    }
-    prevPosT = bufferPos;
-
-    if (Test86MSByte(p[4]))
-    {
-      UInt32 src = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]);
-      UInt32 dest;
-      for (;;)
-      {
-        Byte b;
-        int index;
-        if (encoding)
-          dest = (ip + (UInt32)bufferPos) + src;
-        else
-          dest = src - (ip + (UInt32)bufferPos);
-        if (prevMask == 0)
-          break;
-        index = kMaskToBitNumber[prevMask] * 8;
-        b = (Byte)(dest >> (24 - index));
-        if (!Test86MSByte(b))
-          break;
-        src = dest ^ ((1 << (32 - index)) - 1);
-      }
-      p[4] = (Byte)(~(((dest >> 24) & 1) - 1));
-      p[3] = (Byte)(dest >> 16);
-      p[2] = (Byte)(dest >> 8);
-      p[1] = (Byte)dest;
-      bufferPos += 5;
-    }
-    else
-    {
-      prevMask = ((prevMask << 1) & 0x7) | 1;
-      bufferPos++;
-    }
-  }
-  prevPosT = bufferPos - prevPosT;
-  *state = ((prevPosT > 3) ? 0 : ((prevMask << ((int)prevPosT - 1)) & 0x7));
-  return bufferPos;
-}
+/* Bra86.c -- Converter for x86 code (BCJ)
+2008-10-04 : Igor Pavlov : Public domain */
+
+#include "Bra.h"
+
+#define Test86MSByte(b) ((b) == 0 || (b) == 0xFF)
+
+const Byte kMaskToAllowedStatus[8] = {1, 1, 1, 0, 1, 0, 0, 0};
+const Byte kMaskToBitNumber[8] = {0, 1, 2, 2, 3, 3, 3, 3};
+
+SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding)
+{
+  SizeT bufferPos = 0, prevPosT;
+  UInt32 prevMask = *state & 0x7;
+  if (size < 5)
+    return 0;
+  ip += 5;
+  prevPosT = (SizeT)0 - 1;
+
+  for (;;)
+  {
+    Byte *p = data + bufferPos;
+    Byte *limit = data + size - 4;
+    for (; p < limit; p++)
+      if ((*p & 0xFE) == 0xE8)
+        break;
+    bufferPos = (SizeT)(p - data);
+    if (p >= limit)
+      break;
+    prevPosT = bufferPos - prevPosT;
+    if (prevPosT > 3)
+      prevMask = 0;
+    else
+    {
+      prevMask = (prevMask << ((int)prevPosT - 1)) & 0x7;
+      if (prevMask != 0)
+      {
+        Byte b = p[4 - kMaskToBitNumber[prevMask]];
+        if (!kMaskToAllowedStatus[prevMask] || Test86MSByte(b))
+        {
+          prevPosT = bufferPos;
+          prevMask = ((prevMask << 1) & 0x7) | 1;
+          bufferPos++;
+          continue;
+        }
+      }
+    }
+    prevPosT = bufferPos;
+
+    if (Test86MSByte(p[4]))
+    {
+      UInt32 src = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]);
+      UInt32 dest;
+      for (;;)
+      {
+        Byte b;
+        int index;
+        if (encoding)
+          dest = (ip + (UInt32)bufferPos) + src;
+        else
+          dest = src - (ip + (UInt32)bufferPos);
+        if (prevMask == 0)
+          break;
+        index = kMaskToBitNumber[prevMask] * 8;
+        b = (Byte)(dest >> (24 - index));
+        if (!Test86MSByte(b))
+          break;
+        src = dest ^ ((1 << (32 - index)) - 1);
+      }
+      p[4] = (Byte)(~(((dest >> 24) & 1) - 1));
+      p[3] = (Byte)(dest >> 16);
+      p[2] = (Byte)(dest >> 8);
+      p[1] = (Byte)dest;
+      bufferPos += 5;
+    }
+    else
+    {
+      prevMask = ((prevMask << 1) & 0x7) | 1;
+      bufferPos++;
+    }
+  }
+  prevPosT = bufferPos - prevPosT;
+  *state = ((prevPosT > 3) ? 0 : ((prevMask << ((int)prevPosT - 1)) & 0x7));
+  return bufferPos;
+}
diff --git a/lzma/BraIA64.c b/lzma/BraIA64.c
index f359f16a..0b4ee85b 100755
--- a/lzma/BraIA64.c
+++ b/lzma/BraIA64.c
@@ -1,67 +1,67 @@
-/* BraIA64.c -- Converter for IA-64 code
-2008-10-04 : Igor Pavlov : Public domain */
-
-#include "Bra.h"
-
-static const Byte kBranchTable[32] =
-{
-  0, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0,
-  4, 4, 6, 6, 0, 0, 7, 7,
-  4, 4, 0, 0, 4, 4, 0, 0
-};
-
-SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
-{
-  SizeT i;
-  if (size < 16)
-    return 0;
-  size -= 16;
-  for (i = 0; i <= size; i += 16)
-  {
-    UInt32 instrTemplate = data[i] & 0x1F;
-    UInt32 mask = kBranchTable[instrTemplate];
-    UInt32 bitPos = 5;
-    int slot;
-    for (slot = 0; slot < 3; slot++, bitPos += 41)
-    {
-      UInt32 bytePos, bitRes;
-      UInt64 instruction, instNorm;
-      int j;
-      if (((mask >> slot) & 1) == 0)
-        continue;
-      bytePos = (bitPos >> 3);
-      bitRes = bitPos & 0x7;
-      instruction = 0;
-      for (j = 0; j < 6; j++)
-        instruction += (UInt64)data[i + j + bytePos] << (8 * j);
-
-      instNorm = instruction >> bitRes;
-      if (((instNorm >> 37) & 0xF) == 0x5 && ((instNorm >> 9) & 0x7) == 0)
-      {
-        UInt32 src = (UInt32)((instNorm >> 13) & 0xFFFFF);
-        UInt32 dest;
-        src |= ((UInt32)(instNorm >> 36) & 1) << 20;
-        
-        src <<= 4;
-        
-        if (encoding)
-          dest = ip + (UInt32)i + src;
-        else
-          dest = src - (ip + (UInt32)i);
-        
-        dest >>= 4;
-        
-        instNorm &= ~((UInt64)(0x8FFFFF) << 13);
-        instNorm |= ((UInt64)(dest & 0xFFFFF) << 13);
-        instNorm |= ((UInt64)(dest & 0x100000) << (36 - 20));
-        
-        instruction &= (1 << bitRes) - 1;
-        instruction |= (instNorm << bitRes);
-        for (j = 0; j < 6; j++)
-          data[i + j + bytePos] = (Byte)(instruction >> (8 * j));
-      }
-    }
-  }
-  return i;
-}
+/* BraIA64.c -- Converter for IA-64 code
+2008-10-04 : Igor Pavlov : Public domain */
+
+#include "Bra.h"
+
+static const Byte kBranchTable[32] =
+{
+  0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,
+  4, 4, 6, 6, 0, 0, 7, 7,
+  4, 4, 0, 0, 4, 4, 0, 0
+};
+
+SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
+{
+  SizeT i;
+  if (size < 16)
+    return 0;
+  size -= 16;
+  for (i = 0; i <= size; i += 16)
+  {
+    UInt32 instrTemplate = data[i] & 0x1F;
+    UInt32 mask = kBranchTable[instrTemplate];
+    UInt32 bitPos = 5;
+    int slot;
+    for (slot = 0; slot < 3; slot++, bitPos += 41)
+    {
+      UInt32 bytePos, bitRes;
+      UInt64 instruction, instNorm;
+      int j;
+      if (((mask >> slot) & 1) == 0)
+        continue;
+      bytePos = (bitPos >> 3);
+      bitRes = bitPos & 0x7;
+      instruction = 0;
+      for (j = 0; j < 6; j++)
+        instruction += (UInt64)data[i + j + bytePos] << (8 * j);
+
+      instNorm = instruction >> bitRes;
+      if (((instNorm >> 37) & 0xF) == 0x5 && ((instNorm >> 9) & 0x7) == 0)
+      {
+        UInt32 src = (UInt32)((instNorm >> 13) & 0xFFFFF);
+        UInt32 dest;
+        src |= ((UInt32)(instNorm >> 36) & 1) << 20;
+        
+        src <<= 4;
+        
+        if (encoding)
+          dest = ip + (UInt32)i + src;
+        else
+          dest = src - (ip + (UInt32)i);
+        
+        dest >>= 4;
+        
+        instNorm &= ~((UInt64)(0x8FFFFF) << 13);
+        instNorm |= ((UInt64)(dest & 0xFFFFF) << 13);
+        instNorm |= ((UInt64)(dest & 0x100000) << (36 - 20));
+        
+        instruction &= (1 << bitRes) - 1;
+        instruction |= (instNorm << bitRes);
+        for (j = 0; j < 6; j++)
+          data[i + j + bytePos] = (Byte)(instruction >> (8 * j));
+      }
+    }
+  }
+  return i;
+}
diff --git a/lzma/Lzma2Dec.c b/lzma/Lzma2Dec.c
index 8f240675..7ea1cc95 100755
--- a/lzma/Lzma2Dec.c
+++ b/lzma/Lzma2Dec.c
@@ -1,356 +1,356 @@
-/* Lzma2Dec.c -- LZMA2 Decoder
-2009-05-03 : Igor Pavlov : Public domain */
-
-/* #define SHOW_DEBUG_INFO */
-
-#ifdef SHOW_DEBUG_INFO
-#include <stdio.h>
-#endif
-
-#include <string.h>
-
-#include "Lzma2Dec.h"
-
-/*
-00000000  -  EOS
-00000001 U U  -  Uncompressed Reset Dic
-00000010 U U  -  Uncompressed No Reset
-100uuuuu U U P P  -  LZMA no reset
-101uuuuu U U P P  -  LZMA reset state
-110uuuuu U U P P S  -  LZMA reset state + new prop
-111uuuuu U U P P S  -  LZMA reset state + new prop + reset dic
-
-  u, U - Unpack Size
-  P - Pack Size
-  S - Props
-*/
-
-#define LZMA2_CONTROL_LZMA (1 << 7)
-#define LZMA2_CONTROL_COPY_NO_RESET 2
-#define LZMA2_CONTROL_COPY_RESET_DIC 1
-#define LZMA2_CONTROL_EOF 0
-
-#define LZMA2_IS_UNCOMPRESSED_STATE(p) (((p)->control & LZMA2_CONTROL_LZMA) == 0)
-
-#define LZMA2_GET_LZMA_MODE(p) (((p)->control >> 5) & 3)
-#define LZMA2_IS_THERE_PROP(mode) ((mode) >= 2)
-
-#define LZMA2_LCLP_MAX 4
-#define LZMA2_DIC_SIZE_FROM_PROP(p) (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11))
-
-#ifdef SHOW_DEBUG_INFO
-#define PRF(x) x
-#else
-#define PRF(x)
-#endif
-
-typedef enum
-{
-  LZMA2_STATE_CONTROL,
-  LZMA2_STATE_UNPACK0,
-  LZMA2_STATE_UNPACK1,
-  LZMA2_STATE_PACK0,
-  LZMA2_STATE_PACK1,
-  LZMA2_STATE_PROP,
-  LZMA2_STATE_DATA,
-  LZMA2_STATE_DATA_CONT,
-  LZMA2_STATE_FINISHED,
-  LZMA2_STATE_ERROR
-} ELzma2State;
-
-static SRes Lzma2Dec_GetOldProps(Byte prop, Byte *props)
-{
-  UInt32 dicSize;
-  if (prop > 40)
-    return SZ_ERROR_UNSUPPORTED;
-  dicSize = (prop == 40) ? 0xFFFFFFFF : LZMA2_DIC_SIZE_FROM_PROP(prop);
-  props[0] = (Byte)LZMA2_LCLP_MAX;
-  props[1] = (Byte)(dicSize);
-  props[2] = (Byte)(dicSize >> 8);
-  props[3] = (Byte)(dicSize >> 16);
-  props[4] = (Byte)(dicSize >> 24);
-  return SZ_OK;
-}
-
-SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAlloc *alloc)
-{
-  Byte props[LZMA_PROPS_SIZE];
-  RINOK(Lzma2Dec_GetOldProps(prop, props));
-  return LzmaDec_AllocateProbs(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
-}
-
-SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAlloc *alloc)
-{
-  Byte props[LZMA_PROPS_SIZE];
-  RINOK(Lzma2Dec_GetOldProps(prop, props));
-  return LzmaDec_Allocate(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
-}
-
-void Lzma2Dec_Init(CLzma2Dec *p)
-{
-  p->state = LZMA2_STATE_CONTROL;
-  p->needInitDic = True;
-  p->needInitState = True;
-  p->needInitProp = True;
-  LzmaDec_Init(&p->decoder);
-}
-
-static ELzma2State Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b)
-{
-  switch(p->state)
-  {
-    case LZMA2_STATE_CONTROL:
-      p->control = b;
-      PRF(printf("\n %4X ", p->decoder.dicPos));
-      PRF(printf(" %2X", b));
-      if (p->control == 0)
-        return LZMA2_STATE_FINISHED;
-      if (LZMA2_IS_UNCOMPRESSED_STATE(p))
-      {
-        if ((p->control & 0x7F) > 2)
-          return LZMA2_STATE_ERROR;
-        p->unpackSize = 0;
-      }
-      else
-        p->unpackSize = (UInt32)(p->control & 0x1F) << 16;
-      return LZMA2_STATE_UNPACK0;
-    
-    case LZMA2_STATE_UNPACK0:
-      p->unpackSize |= (UInt32)b << 8;
-      return LZMA2_STATE_UNPACK1;
-    
-    case LZMA2_STATE_UNPACK1:
-      p->unpackSize |= (UInt32)b;
-      p->unpackSize++;
-      PRF(printf(" %8d", p->unpackSize));
-      return (LZMA2_IS_UNCOMPRESSED_STATE(p)) ? LZMA2_STATE_DATA : LZMA2_STATE_PACK0;
-    
-    case LZMA2_STATE_PACK0:
-      p->packSize = (UInt32)b << 8;
-      return LZMA2_STATE_PACK1;
-
-    case LZMA2_STATE_PACK1:
-      p->packSize |= (UInt32)b;
-      p->packSize++;
-      PRF(printf(" %8d", p->packSize));
-      return LZMA2_IS_THERE_PROP(LZMA2_GET_LZMA_MODE(p)) ? LZMA2_STATE_PROP:
-        (p->needInitProp ? LZMA2_STATE_ERROR : LZMA2_STATE_DATA);
-
-    case LZMA2_STATE_PROP:
-    {
-      int lc, lp;
-      if (b >= (9 * 5 * 5))
-        return LZMA2_STATE_ERROR;
-      lc = b % 9;
-      b /= 9;
-      p->decoder.prop.pb = b / 5;
-      lp = b % 5;
-      if (lc + lp > LZMA2_LCLP_MAX)
-        return LZMA2_STATE_ERROR;
-      p->decoder.prop.lc = lc;
-      p->decoder.prop.lp = lp;
-      p->needInitProp = False;
-      return LZMA2_STATE_DATA;
-    }
-  }
-  return LZMA2_STATE_ERROR;
-}
-
-static void LzmaDec_UpdateWithUncompressed(CLzmaDec *p, const Byte *src, SizeT size)
-{
-  memcpy(p->dic + p->dicPos, src, size);
-  p->dicPos += size;
-  if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= size)
-    p->checkDicSize = p->prop.dicSize;
-  p->processedPos += (UInt32)size;
-}
-
-void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState);
-
-SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit,
-    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
-{
-  SizeT inSize = *srcLen;
-  *srcLen = 0;
-  *status = LZMA_STATUS_NOT_SPECIFIED;
-
-  while (p->state != LZMA2_STATE_FINISHED)
-  {
-    SizeT dicPos = p->decoder.dicPos;
-    if (p->state == LZMA2_STATE_ERROR)
-      return SZ_ERROR_DATA;
-    if (dicPos == dicLimit && finishMode == LZMA_FINISH_ANY)
-    {
-      *status = LZMA_STATUS_NOT_FINISHED;
-      return SZ_OK;
-    }
-    if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT)
-    {
-      if (*srcLen == inSize)
-      {
-        *status = LZMA_STATUS_NEEDS_MORE_INPUT;
-        return SZ_OK;
-      }
-      (*srcLen)++;
-      p->state = Lzma2Dec_UpdateState(p, *src++);
-      continue;
-    }
-    {
-      SizeT destSizeCur = dicLimit - dicPos;
-      SizeT srcSizeCur = inSize - *srcLen;
-      ELzmaFinishMode curFinishMode = LZMA_FINISH_ANY;
-      
-      if (p->unpackSize <= destSizeCur)
-      {
-        destSizeCur = (SizeT)p->unpackSize;
-        curFinishMode = LZMA_FINISH_END;
-      }
-
-      if (LZMA2_IS_UNCOMPRESSED_STATE(p))
-      {
-        if (*srcLen == inSize)
-        {
-          *status = LZMA_STATUS_NEEDS_MORE_INPUT;
-          return SZ_OK;
-        }
-
-        if (p->state == LZMA2_STATE_DATA)
-        {
-          Bool initDic = (p->control == LZMA2_CONTROL_COPY_RESET_DIC);
-          if (initDic)
-            p->needInitProp = p->needInitState = True;
-          else if (p->needInitDic)
-            return SZ_ERROR_DATA;
-          p->needInitDic = False;
-          LzmaDec_InitDicAndState(&p->decoder, initDic, False);
-        }
-
-        if (srcSizeCur > destSizeCur)
-          srcSizeCur = destSizeCur;
-
-        if (srcSizeCur == 0)
-          return SZ_ERROR_DATA;
-
-        LzmaDec_UpdateWithUncompressed(&p->decoder, src, srcSizeCur);
-
-        src += srcSizeCur;
-        *srcLen += srcSizeCur;
-        p->unpackSize -= (UInt32)srcSizeCur;
-        p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT;
-      }
-      else
-      {
-        SizeT outSizeProcessed;
-        SRes res;
-
-        if (p->state == LZMA2_STATE_DATA)
-        {
-          int mode = LZMA2_GET_LZMA_MODE(p);
-          Bool initDic = (mode == 3);
-          Bool initState = (mode > 0);
-          if ((!initDic && p->needInitDic) || (!initState && p->needInitState))
-            return SZ_ERROR_DATA;
-          
-          LzmaDec_InitDicAndState(&p->decoder, initDic, initState);
-          p->needInitDic = False;
-          p->needInitState = False;
-          p->state = LZMA2_STATE_DATA_CONT;
-        }
-        if (srcSizeCur > p->packSize)
-          srcSizeCur = (SizeT)p->packSize;
-          
-        res = LzmaDec_DecodeToDic(&p->decoder, dicPos + destSizeCur, src, &srcSizeCur, curFinishMode, status);
-        
-        src += srcSizeCur;
-        *srcLen += srcSizeCur;
-        p->packSize -= (UInt32)srcSizeCur;
-
-        outSizeProcessed = p->decoder.dicPos - dicPos;
-        p->unpackSize -= (UInt32)outSizeProcessed;
-
-        RINOK(res);
-        if (*status == LZMA_STATUS_NEEDS_MORE_INPUT)
-          return res;
-
-        if (srcSizeCur == 0 && outSizeProcessed == 0)
-        {
-          if (*status != LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK ||
-              p->unpackSize != 0 || p->packSize != 0)
-            return SZ_ERROR_DATA;
-          p->state = LZMA2_STATE_CONTROL;
-        }
-        if (*status == LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK)
-          *status = LZMA_STATUS_NOT_FINISHED;
-      }
-    }
-  }
-  *status = LZMA_STATUS_FINISHED_WITH_MARK;
-  return SZ_OK;
-}
-
-SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
-{
-  SizeT outSize = *destLen, inSize = *srcLen;
-  *srcLen = *destLen = 0;
-  for (;;)
-  {
-    SizeT srcSizeCur = inSize, outSizeCur, dicPos;
-    ELzmaFinishMode curFinishMode;
-    SRes res;
-    if (p->decoder.dicPos == p->decoder.dicBufSize)
-      p->decoder.dicPos = 0;
-    dicPos = p->decoder.dicPos;
-    if (outSize > p->decoder.dicBufSize - dicPos)
-    {
-      outSizeCur = p->decoder.dicBufSize;
-      curFinishMode = LZMA_FINISH_ANY;
-    }
-    else
-    {
-      outSizeCur = dicPos + outSize;
-      curFinishMode = finishMode;
-    }
-
-    res = Lzma2Dec_DecodeToDic(p, outSizeCur, src, &srcSizeCur, curFinishMode, status);
-    src += srcSizeCur;
-    inSize -= srcSizeCur;
-    *srcLen += srcSizeCur;
-    outSizeCur = p->decoder.dicPos - dicPos;
-    memcpy(dest, p->decoder.dic + dicPos, outSizeCur);
-    dest += outSizeCur;
-    outSize -= outSizeCur;
-    *destLen += outSizeCur;
-    if (res != 0)
-      return res;
-    if (outSizeCur == 0 || outSize == 0)
-      return SZ_OK;
-  }
-}
-
-SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
-    Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAlloc *alloc)
-{
-  CLzma2Dec decoder;
-  SRes res;
-  SizeT outSize = *destLen, inSize = *srcLen;
-  Byte props[LZMA_PROPS_SIZE];
-
-  Lzma2Dec_Construct(&decoder);
-
-  *destLen = *srcLen = 0;
-  *status = LZMA_STATUS_NOT_SPECIFIED;
-  decoder.decoder.dic = dest;
-  decoder.decoder.dicBufSize = outSize;
-
-  RINOK(Lzma2Dec_GetOldProps(prop, props));
-  RINOK(LzmaDec_AllocateProbs(&decoder.decoder, props, LZMA_PROPS_SIZE, alloc));
-  
-  *srcLen = inSize;
-  res = Lzma2Dec_DecodeToDic(&decoder, outSize, src, srcLen, finishMode, status);
-  *destLen = decoder.decoder.dicPos;
-  if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
-    res = SZ_ERROR_INPUT_EOF;
-
-  LzmaDec_FreeProbs(&decoder.decoder, alloc);
-  return res;
-}
+/* Lzma2Dec.c -- LZMA2 Decoder
+2009-05-03 : Igor Pavlov : Public domain */
+
+/* #define SHOW_DEBUG_INFO */
+
+#ifdef SHOW_DEBUG_INFO
+#include <stdio.h>
+#endif
+
+#include <string.h>
+
+#include "Lzma2Dec.h"
+
+/*
+00000000  -  EOS
+00000001 U U  -  Uncompressed Reset Dic
+00000010 U U  -  Uncompressed No Reset
+100uuuuu U U P P  -  LZMA no reset
+101uuuuu U U P P  -  LZMA reset state
+110uuuuu U U P P S  -  LZMA reset state + new prop
+111uuuuu U U P P S  -  LZMA reset state + new prop + reset dic
+
+  u, U - Unpack Size
+  P - Pack Size
+  S - Props
+*/
+
+#define LZMA2_CONTROL_LZMA (1 << 7)
+#define LZMA2_CONTROL_COPY_NO_RESET 2
+#define LZMA2_CONTROL_COPY_RESET_DIC 1
+#define LZMA2_CONTROL_EOF 0
+
+#define LZMA2_IS_UNCOMPRESSED_STATE(p) (((p)->control & LZMA2_CONTROL_LZMA) == 0)
+
+#define LZMA2_GET_LZMA_MODE(p) (((p)->control >> 5) & 3)
+#define LZMA2_IS_THERE_PROP(mode) ((mode) >= 2)
+
+#define LZMA2_LCLP_MAX 4
+#define LZMA2_DIC_SIZE_FROM_PROP(p) (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11))
+
+#ifdef SHOW_DEBUG_INFO
+#define PRF(x) x
+#else
+#define PRF(x)
+#endif
+
+typedef enum
+{
+  LZMA2_STATE_CONTROL,
+  LZMA2_STATE_UNPACK0,
+  LZMA2_STATE_UNPACK1,
+  LZMA2_STATE_PACK0,
+  LZMA2_STATE_PACK1,
+  LZMA2_STATE_PROP,
+  LZMA2_STATE_DATA,
+  LZMA2_STATE_DATA_CONT,
+  LZMA2_STATE_FINISHED,
+  LZMA2_STATE_ERROR
+} ELzma2State;
+
+static SRes Lzma2Dec_GetOldProps(Byte prop, Byte *props)
+{
+  UInt32 dicSize;
+  if (prop > 40)
+    return SZ_ERROR_UNSUPPORTED;
+  dicSize = (prop == 40) ? 0xFFFFFFFF : LZMA2_DIC_SIZE_FROM_PROP(prop);
+  props[0] = (Byte)LZMA2_LCLP_MAX;
+  props[1] = (Byte)(dicSize);
+  props[2] = (Byte)(dicSize >> 8);
+  props[3] = (Byte)(dicSize >> 16);
+  props[4] = (Byte)(dicSize >> 24);
+  return SZ_OK;
+}
+
+SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAlloc *alloc)
+{
+  Byte props[LZMA_PROPS_SIZE];
+  RINOK(Lzma2Dec_GetOldProps(prop, props));
+  return LzmaDec_AllocateProbs(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
+}
+
+SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAlloc *alloc)
+{
+  Byte props[LZMA_PROPS_SIZE];
+  RINOK(Lzma2Dec_GetOldProps(prop, props));
+  return LzmaDec_Allocate(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
+}
+
+void Lzma2Dec_Init(CLzma2Dec *p)
+{
+  p->state = LZMA2_STATE_CONTROL;
+  p->needInitDic = True;
+  p->needInitState = True;
+  p->needInitProp = True;
+  LzmaDec_Init(&p->decoder);
+}
+
+static ELzma2State Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b)
+{
+  switch(p->state)
+  {
+    case LZMA2_STATE_CONTROL:
+      p->control = b;
+      PRF(printf("\n %4X ", p->decoder.dicPos));
+      PRF(printf(" %2X", b));
+      if (p->control == 0)
+        return LZMA2_STATE_FINISHED;
+      if (LZMA2_IS_UNCOMPRESSED_STATE(p))
+      {
+        if ((p->control & 0x7F) > 2)
+          return LZMA2_STATE_ERROR;
+        p->unpackSize = 0;
+      }
+      else
+        p->unpackSize = (UInt32)(p->control & 0x1F) << 16;
+      return LZMA2_STATE_UNPACK0;
+    
+    case LZMA2_STATE_UNPACK0:
+      p->unpackSize |= (UInt32)b << 8;
+      return LZMA2_STATE_UNPACK1;
+    
+    case LZMA2_STATE_UNPACK1:
+      p->unpackSize |= (UInt32)b;
+      p->unpackSize++;
+      PRF(printf(" %8d", p->unpackSize));
+      return (LZMA2_IS_UNCOMPRESSED_STATE(p)) ? LZMA2_STATE_DATA : LZMA2_STATE_PACK0;
+    
+    case LZMA2_STATE_PACK0:
+      p->packSize = (UInt32)b << 8;
+      return LZMA2_STATE_PACK1;
+
+    case LZMA2_STATE_PACK1:
+      p->packSize |= (UInt32)b;
+      p->packSize++;
+      PRF(printf(" %8d", p->packSize));
+      return LZMA2_IS_THERE_PROP(LZMA2_GET_LZMA_MODE(p)) ? LZMA2_STATE_PROP:
+        (p->needInitProp ? LZMA2_STATE_ERROR : LZMA2_STATE_DATA);
+
+    case LZMA2_STATE_PROP:
+    {
+      int lc, lp;
+      if (b >= (9 * 5 * 5))
+        return LZMA2_STATE_ERROR;
+      lc = b % 9;
+      b /= 9;
+      p->decoder.prop.pb = b / 5;
+      lp = b % 5;
+      if (lc + lp > LZMA2_LCLP_MAX)
+        return LZMA2_STATE_ERROR;
+      p->decoder.prop.lc = lc;
+      p->decoder.prop.lp = lp;
+      p->needInitProp = False;
+      return LZMA2_STATE_DATA;
+    }
+  }
+  return LZMA2_STATE_ERROR;
+}
+
+static void LzmaDec_UpdateWithUncompressed(CLzmaDec *p, const Byte *src, SizeT size)
+{
+  memcpy(p->dic + p->dicPos, src, size);
+  p->dicPos += size;
+  if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= size)
+    p->checkDicSize = p->prop.dicSize;
+  p->processedPos += (UInt32)size;
+}
+
+void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState);
+
+SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit,
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
+{
+  SizeT inSize = *srcLen;
+  *srcLen = 0;
+  *status = LZMA_STATUS_NOT_SPECIFIED;
+
+  while (p->state != LZMA2_STATE_FINISHED)
+  {
+    SizeT dicPos = p->decoder.dicPos;
+    if (p->state == LZMA2_STATE_ERROR)
+      return SZ_ERROR_DATA;
+    if (dicPos == dicLimit && finishMode == LZMA_FINISH_ANY)
+    {
+      *status = LZMA_STATUS_NOT_FINISHED;
+      return SZ_OK;
+    }
+    if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT)
+    {
+      if (*srcLen == inSize)
+      {
+        *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+        return SZ_OK;
+      }
+      (*srcLen)++;
+      p->state = Lzma2Dec_UpdateState(p, *src++);
+      continue;
+    }
+    {
+      SizeT destSizeCur = dicLimit - dicPos;
+      SizeT srcSizeCur = inSize - *srcLen;
+      ELzmaFinishMode curFinishMode = LZMA_FINISH_ANY;
+      
+      if (p->unpackSize <= destSizeCur)
+      {
+        destSizeCur = (SizeT)p->unpackSize;
+        curFinishMode = LZMA_FINISH_END;
+      }
+
+      if (LZMA2_IS_UNCOMPRESSED_STATE(p))
+      {
+        if (*srcLen == inSize)
+        {
+          *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+          return SZ_OK;
+        }
+
+        if (p->state == LZMA2_STATE_DATA)
+        {
+          Bool initDic = (p->control == LZMA2_CONTROL_COPY_RESET_DIC);
+          if (initDic)
+            p->needInitProp = p->needInitState = True;
+          else if (p->needInitDic)
+            return SZ_ERROR_DATA;
+          p->needInitDic = False;
+          LzmaDec_InitDicAndState(&p->decoder, initDic, False);
+        }
+
+        if (srcSizeCur > destSizeCur)
+          srcSizeCur = destSizeCur;
+
+        if (srcSizeCur == 0)
+          return SZ_ERROR_DATA;
+
+        LzmaDec_UpdateWithUncompressed(&p->decoder, src, srcSizeCur);
+
+        src += srcSizeCur;
+        *srcLen += srcSizeCur;
+        p->unpackSize -= (UInt32)srcSizeCur;
+        p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT;
+      }
+      else
+      {
+        SizeT outSizeProcessed;
+        SRes res;
+
+        if (p->state == LZMA2_STATE_DATA)
+        {
+          int mode = LZMA2_GET_LZMA_MODE(p);
+          Bool initDic = (mode == 3);
+          Bool initState = (mode > 0);
+          if ((!initDic && p->needInitDic) || (!initState && p->needInitState))
+            return SZ_ERROR_DATA;
+          
+          LzmaDec_InitDicAndState(&p->decoder, initDic, initState);
+          p->needInitDic = False;
+          p->needInitState = False;
+          p->state = LZMA2_STATE_DATA_CONT;
+        }
+        if (srcSizeCur > p->packSize)
+          srcSizeCur = (SizeT)p->packSize;
+          
+        res = LzmaDec_DecodeToDic(&p->decoder, dicPos + destSizeCur, src, &srcSizeCur, curFinishMode, status);
+        
+        src += srcSizeCur;
+        *srcLen += srcSizeCur;
+        p->packSize -= (UInt32)srcSizeCur;
+
+        outSizeProcessed = p->decoder.dicPos - dicPos;
+        p->unpackSize -= (UInt32)outSizeProcessed;
+
+        RINOK(res);
+        if (*status == LZMA_STATUS_NEEDS_MORE_INPUT)
+          return res;
+
+        if (srcSizeCur == 0 && outSizeProcessed == 0)
+        {
+          if (*status != LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK ||
+              p->unpackSize != 0 || p->packSize != 0)
+            return SZ_ERROR_DATA;
+          p->state = LZMA2_STATE_CONTROL;
+        }
+        if (*status == LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK)
+          *status = LZMA_STATUS_NOT_FINISHED;
+      }
+    }
+  }
+  *status = LZMA_STATUS_FINISHED_WITH_MARK;
+  return SZ_OK;
+}
+
+SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
+{
+  SizeT outSize = *destLen, inSize = *srcLen;
+  *srcLen = *destLen = 0;
+  for (;;)
+  {
+    SizeT srcSizeCur = inSize, outSizeCur, dicPos;
+    ELzmaFinishMode curFinishMode;
+    SRes res;
+    if (p->decoder.dicPos == p->decoder.dicBufSize)
+      p->decoder.dicPos = 0;
+    dicPos = p->decoder.dicPos;
+    if (outSize > p->decoder.dicBufSize - dicPos)
+    {
+      outSizeCur = p->decoder.dicBufSize;
+      curFinishMode = LZMA_FINISH_ANY;
+    }
+    else
+    {
+      outSizeCur = dicPos + outSize;
+      curFinishMode = finishMode;
+    }
+
+    res = Lzma2Dec_DecodeToDic(p, outSizeCur, src, &srcSizeCur, curFinishMode, status);
+    src += srcSizeCur;
+    inSize -= srcSizeCur;
+    *srcLen += srcSizeCur;
+    outSizeCur = p->decoder.dicPos - dicPos;
+    memcpy(dest, p->decoder.dic + dicPos, outSizeCur);
+    dest += outSizeCur;
+    outSize -= outSizeCur;
+    *destLen += outSizeCur;
+    if (res != 0)
+      return res;
+    if (outSizeCur == 0 || outSize == 0)
+      return SZ_OK;
+  }
+}
+
+SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+    Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAlloc *alloc)
+{
+  CLzma2Dec decoder;
+  SRes res;
+  SizeT outSize = *destLen, inSize = *srcLen;
+  Byte props[LZMA_PROPS_SIZE];
+
+  Lzma2Dec_Construct(&decoder);
+
+  *destLen = *srcLen = 0;
+  *status = LZMA_STATUS_NOT_SPECIFIED;
+  decoder.decoder.dic = dest;
+  decoder.decoder.dicBufSize = outSize;
+
+  RINOK(Lzma2Dec_GetOldProps(prop, props));
+  RINOK(LzmaDec_AllocateProbs(&decoder.decoder, props, LZMA_PROPS_SIZE, alloc));
+  
+  *srcLen = inSize;
+  res = Lzma2Dec_DecodeToDic(&decoder, outSize, src, srcLen, finishMode, status);
+  *destLen = decoder.decoder.dicPos;
+  if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
+    res = SZ_ERROR_INPUT_EOF;
+
+  LzmaDec_FreeProbs(&decoder.decoder, alloc);
+  return res;
+}
diff --git a/lzma/Lzma2Dec.h b/lzma/Lzma2Dec.h
index 827698de..6bc07bbc 100755
--- a/lzma/Lzma2Dec.h
+++ b/lzma/Lzma2Dec.h
@@ -1,84 +1,84 @@
-/* Lzma2Dec.h -- LZMA2 Decoder
-2009-05-03 : Igor Pavlov : Public domain */
-
-#ifndef __LZMA2_DEC_H
-#define __LZMA2_DEC_H
-
-#include "LzmaDec.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* ---------- State Interface ---------- */
-
-typedef struct
-{
-  CLzmaDec decoder;
-  UInt32 packSize;
-  UInt32 unpackSize;
-  int state;
-  Byte control;
-  Bool needInitDic;
-  Bool needInitState;
-  Bool needInitProp;
-} CLzma2Dec;
-
-#define Lzma2Dec_Construct(p) LzmaDec_Construct(&(p)->decoder)
-#define Lzma2Dec_FreeProbs(p, alloc) LzmaDec_FreeProbs(&(p)->decoder, alloc);
-#define Lzma2Dec_Free(p, alloc) LzmaDec_Free(&(p)->decoder, alloc);
-
-SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAlloc *alloc);
-SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAlloc *alloc);
-void Lzma2Dec_Init(CLzma2Dec *p);
-
-
-/*
-finishMode:
-  It has meaning only if the decoding reaches output limit (*destLen or dicLimit).
-  LZMA_FINISH_ANY - use smallest number of input bytes
-  LZMA_FINISH_END - read EndOfStream marker after decoding
-
-Returns:
-  SZ_OK
-    status:
-      LZMA_STATUS_FINISHED_WITH_MARK
-      LZMA_STATUS_NOT_FINISHED
-      LZMA_STATUS_NEEDS_MORE_INPUT
-  SZ_ERROR_DATA - Data error
-*/
-
-SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit,
-    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
-
-SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen,
-    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
-
-
-/* ---------- One Call Interface ---------- */
-
-/*
-finishMode:
-  It has meaning only if the decoding reaches output limit (*destLen).
-  LZMA_FINISH_ANY - use smallest number of input bytes
-  LZMA_FINISH_END - read EndOfStream marker after decoding
-
-Returns:
-  SZ_OK
-    status:
-      LZMA_STATUS_FINISHED_WITH_MARK
-      LZMA_STATUS_NOT_FINISHED
-  SZ_ERROR_DATA - Data error
-  SZ_ERROR_MEM  - Memory allocation error
-  SZ_ERROR_UNSUPPORTED - Unsupported properties
-  SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
-*/
-
-SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
-    Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAlloc *alloc);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
+/* Lzma2Dec.h -- LZMA2 Decoder
+2009-05-03 : Igor Pavlov : Public domain */
+
+#ifndef __LZMA2_DEC_H
+#define __LZMA2_DEC_H
+
+#include "LzmaDec.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* ---------- State Interface ---------- */
+
+typedef struct
+{
+  CLzmaDec decoder;
+  UInt32 packSize;
+  UInt32 unpackSize;
+  int state;
+  Byte control;
+  Bool needInitDic;
+  Bool needInitState;
+  Bool needInitProp;
+} CLzma2Dec;
+
+#define Lzma2Dec_Construct(p) LzmaDec_Construct(&(p)->decoder)
+#define Lzma2Dec_FreeProbs(p, alloc) LzmaDec_FreeProbs(&(p)->decoder, alloc);
+#define Lzma2Dec_Free(p, alloc) LzmaDec_Free(&(p)->decoder, alloc);
+
+SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAlloc *alloc);
+SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAlloc *alloc);
+void Lzma2Dec_Init(CLzma2Dec *p);
+
+
+/*
+finishMode:
+  It has meaning only if the decoding reaches output limit (*destLen or dicLimit).
+  LZMA_FINISH_ANY - use smallest number of input bytes
+  LZMA_FINISH_END - read EndOfStream marker after decoding
+
+Returns:
+  SZ_OK
+    status:
+      LZMA_STATUS_FINISHED_WITH_MARK
+      LZMA_STATUS_NOT_FINISHED
+      LZMA_STATUS_NEEDS_MORE_INPUT
+  SZ_ERROR_DATA - Data error
+*/
+
+SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit,
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen,
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+
+/* ---------- One Call Interface ---------- */
+
+/*
+finishMode:
+  It has meaning only if the decoding reaches output limit (*destLen).
+  LZMA_FINISH_ANY - use smallest number of input bytes
+  LZMA_FINISH_END - read EndOfStream marker after decoding
+
+Returns:
+  SZ_OK
+    status:
+      LZMA_STATUS_FINISHED_WITH_MARK
+      LZMA_STATUS_NOT_FINISHED
+  SZ_ERROR_DATA - Data error
+  SZ_ERROR_MEM  - Memory allocation error
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+  SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
+*/
+
+SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+    Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAlloc *alloc);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lzma/Types.h b/lzma/Types.h
index 7632f278..04580915 100755
--- a/lzma/Types.h
+++ b/lzma/Types.h
@@ -1,222 +1,222 @@
-/* Types.h -- Basic types
-2009-08-14 : Igor Pavlov : Public domain */
-
-#ifndef __7Z_TYPES_H
-#define __7Z_TYPES_H
-
-#include <stddef.h>
-
-#ifdef _WIN32
-#include <windows.h>
-#endif
-
-#ifndef EXTERN_C_BEGIN
-#ifdef __cplusplus
-#define EXTERN_C_BEGIN extern "C" {
-#define EXTERN_C_END }
-#else
-#define EXTERN_C_BEGIN
-#define EXTERN_C_END
-#endif
-#endif
-
-EXTERN_C_BEGIN
-
-#define SZ_OK 0
-
-#define SZ_ERROR_DATA 1
-#define SZ_ERROR_MEM 2
-#define SZ_ERROR_CRC 3
-#define SZ_ERROR_UNSUPPORTED 4
-#define SZ_ERROR_PARAM 5
-#define SZ_ERROR_INPUT_EOF 6
-#define SZ_ERROR_OUTPUT_EOF 7
-#define SZ_ERROR_READ 8
-#define SZ_ERROR_WRITE 9
-#define SZ_ERROR_PROGRESS 10
-#define SZ_ERROR_FAIL 11
-#define SZ_ERROR_THREAD 12
-
-#define SZ_ERROR_ARCHIVE 16
-#define SZ_ERROR_NO_ARCHIVE 17
-
-typedef int SRes;
-
-#ifdef _WIN32
-typedef DWORD WRes;
-#else
-typedef int WRes;
-#endif
-
-#ifndef RINOK
-#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
-#endif
-
-typedef unsigned char Byte;
-typedef short Int16;
-typedef unsigned short UInt16;
-
-#ifdef _LZMA_UINT32_IS_ULONG
-typedef long Int32;
-typedef unsigned long UInt32;
-#else
-typedef int Int32;
-typedef unsigned int UInt32;
-#endif
-
-#ifdef _SZ_NO_INT_64
-
-/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
-   NOTES: Some code will work incorrectly in that case! */
-
-typedef long Int64;
-typedef unsigned long UInt64;
-
-#else
-
-#if defined(_MSC_VER) || defined(__BORLANDC__)
-typedef __int64 Int64;
-typedef unsigned __int64 UInt64;
-#else
-typedef long long int Int64;
-typedef unsigned long long int UInt64;
-#endif
-
-#endif
-
-#ifdef _LZMA_NO_SYSTEM_SIZE_T
-typedef UInt32 SizeT;
-#else
-typedef size_t SizeT;
-#endif
-
-typedef int Bool;
-#define True 1
-#define False 0
-
-
-#ifdef _MSC_VER
-
-#if _MSC_VER >= 1300
-#define MY_NO_INLINE __declspec(noinline)
-#else
-#define MY_NO_INLINE
-#endif
-
-#define MY_CDECL __cdecl
-#define MY_STD_CALL __stdcall
-#define MY_FAST_CALL MY_NO_INLINE __fastcall
-
-#else
-
-#define MY_CDECL
-#define MY_STD_CALL
-#define MY_FAST_CALL
-
-#endif
-
-
-/* The following interfaces use first parameter as pointer to structure */
-
-typedef struct
-{
-  SRes (*Read)(void *p, void *buf, size_t *size);
-    /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
-       (output(*size) < input(*size)) is allowed */
-} ISeqInStream;
-
-/* it can return SZ_ERROR_INPUT_EOF */
-SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size);
-SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType);
-SRes SeqInStream_ReadByte(ISeqInStream *stream, Byte *buf);
-
-typedef struct
-{
-  size_t (*Write)(void *p, const void *buf, size_t size);
-    /* Returns: result - the number of actually written bytes.
-       (result < size) means error */
-} ISeqOutStream;
-
-typedef enum
-{
-  SZ_SEEK_SET = 0,
-  SZ_SEEK_CUR = 1,
-  SZ_SEEK_END = 2
-} ESzSeek;
-
-typedef struct
-{
-  SRes (*Read)(void *p, void *buf, size_t *size);  /* same as ISeqInStream::Read */
-  SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin);
-} ISeekInStream;
-
-typedef struct
-{
-  SRes (*Look)(void *p, void **buf, size_t *size);
-    /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
-       (output(*size) > input(*size)) is not allowed
-       (output(*size) < input(*size)) is allowed */
-  SRes (*Skip)(void *p, size_t offset);
-    /* offset must be <= output(*size) of Look */
-
-  SRes (*Read)(void *p, void *buf, size_t *size);
-    /* reads directly (without buffer). It's same as ISeqInStream::Read */
-  SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin);
-} ILookInStream;
-
-SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size);
-SRes LookInStream_SeekTo(ILookInStream *stream, UInt64 offset);
-
-/* reads via ILookInStream::Read */
-SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType);
-SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size);
-
-#define LookToRead_BUF_SIZE (1 << 14)
-
-typedef struct
-{
-  ILookInStream s;
-  ISeekInStream *realStream;
-  size_t pos;
-  size_t size;
-  Byte buf[LookToRead_BUF_SIZE];
-} CLookToRead;
-
-void LookToRead_CreateVTable(CLookToRead *p, int lookahead);
-void LookToRead_Init(CLookToRead *p);
-
-typedef struct
-{
-  ISeqInStream s;
-  ILookInStream *realStream;
-} CSecToLook;
-
-void SecToLook_CreateVTable(CSecToLook *p);
-
-typedef struct
-{
-  ISeqInStream s;
-  ILookInStream *realStream;
-} CSecToRead;
-
-void SecToRead_CreateVTable(CSecToRead *p);
-
-typedef struct
-{
-  SRes (*Progress)(void *p, UInt64 inSize, UInt64 outSize);
-    /* Returns: result. (result != SZ_OK) means break.
-       Value (UInt64)(Int64)-1 for size means unknown value. */
-} ICompressProgress;
-
-typedef struct
-{
-  void *(*Alloc)(void *p, size_t size);
-  void (*Free)(void *p, void *address); /* address can be 0 */
-} ISzAlloc;
-
-#define IAlloc_Alloc(p, size) (p)->Alloc((p), size)
-#define IAlloc_Free(p, a) (p)->Free((p), a)
-
-EXTERN_C_END
-
-#endif
+/* Types.h -- Basic types
+2009-08-14 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_TYPES_H
+#define __7Z_TYPES_H
+
+#include <stddef.h>
+
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
+#ifndef EXTERN_C_BEGIN
+#ifdef __cplusplus
+#define EXTERN_C_BEGIN extern "C" {
+#define EXTERN_C_END }
+#else
+#define EXTERN_C_BEGIN
+#define EXTERN_C_END
+#endif
+#endif
+
+EXTERN_C_BEGIN
+
+#define SZ_OK 0
+
+#define SZ_ERROR_DATA 1
+#define SZ_ERROR_MEM 2
+#define SZ_ERROR_CRC 3
+#define SZ_ERROR_UNSUPPORTED 4
+#define SZ_ERROR_PARAM 5
+#define SZ_ERROR_INPUT_EOF 6
+#define SZ_ERROR_OUTPUT_EOF 7
+#define SZ_ERROR_READ 8
+#define SZ_ERROR_WRITE 9
+#define SZ_ERROR_PROGRESS 10
+#define SZ_ERROR_FAIL 11
+#define SZ_ERROR_THREAD 12
+
+#define SZ_ERROR_ARCHIVE 16
+#define SZ_ERROR_NO_ARCHIVE 17
+
+typedef int SRes;
+
+#ifdef _WIN32
+typedef DWORD WRes;
+#else
+typedef int WRes;
+#endif
+
+#ifndef RINOK
+#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
+#endif
+
+typedef unsigned char Byte;
+typedef short Int16;
+typedef unsigned short UInt16;
+
+#ifdef _LZMA_UINT32_IS_ULONG
+typedef long Int32;
+typedef unsigned long UInt32;
+#else
+typedef int Int32;
+typedef unsigned int UInt32;
+#endif
+
+#ifdef _SZ_NO_INT_64
+
+/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
+   NOTES: Some code will work incorrectly in that case! */
+
+typedef long Int64;
+typedef unsigned long UInt64;
+
+#else
+
+#if defined(_MSC_VER) || defined(__BORLANDC__)
+typedef __int64 Int64;
+typedef unsigned __int64 UInt64;
+#else
+typedef long long int Int64;
+typedef unsigned long long int UInt64;
+#endif
+
+#endif
+
+#ifdef _LZMA_NO_SYSTEM_SIZE_T
+typedef UInt32 SizeT;
+#else
+typedef size_t SizeT;
+#endif
+
+typedef int Bool;
+#define True 1
+#define False 0
+
+
+#ifdef _MSC_VER
+
+#if _MSC_VER >= 1300
+#define MY_NO_INLINE __declspec(noinline)
+#else
+#define MY_NO_INLINE
+#endif
+
+#define MY_CDECL __cdecl
+#define MY_STD_CALL __stdcall
+#define MY_FAST_CALL MY_NO_INLINE __fastcall
+
+#else
+
+#define MY_CDECL
+#define MY_STD_CALL
+#define MY_FAST_CALL
+
+#endif
+
+
+/* The following interfaces use first parameter as pointer to structure */
+
+typedef struct
+{
+  SRes (*Read)(void *p, void *buf, size_t *size);
+    /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
+       (output(*size) < input(*size)) is allowed */
+} ISeqInStream;
+
+/* it can return SZ_ERROR_INPUT_EOF */
+SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size);
+SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType);
+SRes SeqInStream_ReadByte(ISeqInStream *stream, Byte *buf);
+
+typedef struct
+{
+  size_t (*Write)(void *p, const void *buf, size_t size);
+    /* Returns: result - the number of actually written bytes.
+       (result < size) means error */
+} ISeqOutStream;
+
+typedef enum
+{
+  SZ_SEEK_SET = 0,
+  SZ_SEEK_CUR = 1,
+  SZ_SEEK_END = 2
+} ESzSeek;
+
+typedef struct
+{
+  SRes (*Read)(void *p, void *buf, size_t *size);  /* same as ISeqInStream::Read */
+  SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin);
+} ISeekInStream;
+
+typedef struct
+{
+  SRes (*Look)(void *p, void **buf, size_t *size);
+    /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
+       (output(*size) > input(*size)) is not allowed
+       (output(*size) < input(*size)) is allowed */
+  SRes (*Skip)(void *p, size_t offset);
+    /* offset must be <= output(*size) of Look */
+
+  SRes (*Read)(void *p, void *buf, size_t *size);
+    /* reads directly (without buffer). It's same as ISeqInStream::Read */
+  SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin);
+} ILookInStream;
+
+SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size);
+SRes LookInStream_SeekTo(ILookInStream *stream, UInt64 offset);
+
+/* reads via ILookInStream::Read */
+SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType);
+SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size);
+
+#define LookToRead_BUF_SIZE (1 << 14)
+
+typedef struct
+{
+  ILookInStream s;
+  ISeekInStream *realStream;
+  size_t pos;
+  size_t size;
+  Byte buf[LookToRead_BUF_SIZE];
+} CLookToRead;
+
+void LookToRead_CreateVTable(CLookToRead *p, int lookahead);
+void LookToRead_Init(CLookToRead *p);
+
+typedef struct
+{
+  ISeqInStream s;
+  ILookInStream *realStream;
+} CSecToLook;
+
+void SecToLook_CreateVTable(CSecToLook *p);
+
+typedef struct
+{
+  ISeqInStream s;
+  ILookInStream *realStream;
+} CSecToRead;
+
+void SecToRead_CreateVTable(CSecToRead *p);
+
+typedef struct
+{
+  SRes (*Progress)(void *p, UInt64 inSize, UInt64 outSize);
+    /* Returns: result. (result != SZ_OK) means break.
+       Value (UInt64)(Int64)-1 for size means unknown value. */
+} ICompressProgress;
+
+typedef struct
+{
+  void *(*Alloc)(void *p, size_t size);
+  void (*Free)(void *p, void *address); /* address can be 0 */
+} ISzAlloc;
+
+#define IAlloc_Alloc(p, size) (p)->Alloc((p), size)
+#define IAlloc_Free(p, a) (p)->Free((p), a)
+
+EXTERN_C_END
+
+#endif