diff --git a/Android.mk b/Android.mk new file mode 100644 index 0000000..e9f691d --- /dev/null +++ b/Android.mk @@ -0,0 +1,112 @@ +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_MODULE := tfm + +LOCAL_SRC_FILES := \ + src/addsub/fp_add.c \ + src/addsub/fp_add_d.c \ + src/addsub/fp_addmod.c \ + src/addsub/fp_cmp.c \ + src/addsub/fp_cmp_d.c \ + src/addsub/fp_cmp_mag.c \ + src/addsub/fp_sub.c \ + src/addsub/fp_sub_d.c \ + src/addsub/fp_submod.c \ + src/addsub/s_fp_add.c \ + src/addsub/s_fp_sub.c \ + src/bin/fp_radix_size.c \ + src/bin/fp_read_radix.c \ + src/bin/fp_read_signed_bin.c \ + src/bin/fp_read_unsigned_bin.c \ + src/bin/fp_reverse.c \ + src/bin/fp_signed_bin_size.c \ + src/bin/fp_s_rmap.c \ + src/bin/fp_toradix.c \ + src/bin/fp_to_signed_bin.c \ + src/bin/fp_to_unsigned_bin.c \ + src/bin/fp_unsigned_bin_size.c \ + src/bit/fp_cnt_lsb.c \ + src/bit/fp_count_bits.c \ + src/bit/fp_div_2.c \ + src/bit/fp_div_2d.c \ + src/bit/fp_lshd.c \ + src/bit/fp_mod_2d.c \ + src/bit/fp_rshd.c \ + src/divide/fp_div.c \ + src/divide/fp_div_d.c \ + src/divide/fp_mod.c \ + src/divide/fp_mod_d.c \ + src/exptmod/fp_2expt.c \ + src/exptmod/fp_exptmod.c \ + src/misc/fp_ident.c \ + src/misc/fp_set.c \ + src/mont/fp_montgomery_calc_normalization.c \ + src/mont/fp_montgomery_reduce.c \ + src/mont/fp_montgomery_setup.c \ + src/mul/fp_mul_2.c \ + src/mul/fp_mul_2d.c \ + src/mul/fp_mul.c \ + src/mul/fp_mul_comba_12.c \ + src/mul/fp_mul_comba_17.c \ + src/mul/fp_mul_comba_20.c \ + src/mul/fp_mul_comba_24.c \ + src/mul/fp_mul_comba_28.c \ + src/mul/fp_mul_comba_32.c \ + src/mul/fp_mul_comba_3.c \ + src/mul/fp_mul_comba_48.c \ + src/mul/fp_mul_comba_4.c \ + src/mul/fp_mul_comba_64.c \ + src/mul/fp_mul_comba_6.c \ + src/mul/fp_mul_comba_7.c \ + src/mul/fp_mul_comba_8.c \ + src/mul/fp_mul_comba_9.c \ + src/mul/fp_mul_comba.c \ + src/mul/fp_mul_comba_small_set.c \ + src/mul/fp_mul_d.c \ + src/mul/fp_mulmod.c \ + src/numtheory/fp_gcd.c \ + src/numtheory/fp_invmod.c \ + src/numtheory/fp_isprime.c \ + src/numtheory/fp_lcm.c \ + src/numtheory/fp_prime_miller_rabin.c \ + src/numtheory/fp_prime_random_ex.c \ + src/sqr/fp_sqr.c \ + src/sqr/fp_sqr_comba_12.c \ + src/sqr/fp_sqr_comba_17.c \ + src/sqr/fp_sqr_comba_20.c \ + src/sqr/fp_sqr_comba_24.c \ + src/sqr/fp_sqr_comba_28.c \ + src/sqr/fp_sqr_comba_32.c \ + src/sqr/fp_sqr_comba_3.c \ + src/sqr/fp_sqr_comba_48.c \ + src/sqr/fp_sqr_comba_4.c \ + src/sqr/fp_sqr_comba_64.c \ + src/sqr/fp_sqr_comba_6.c \ + src/sqr/fp_sqr_comba_7.c \ + src/sqr/fp_sqr_comba_8.c \ + src/sqr/fp_sqr_comba_9.c \ + src/sqr/fp_sqr_comba.c \ + src/sqr/fp_sqr_comba_generic.c \ + src/sqr/fp_sqr_comba_small_set.c \ + src/sqr/fp_sqrmod.c + +LOCAL_C_INCLUDES := $(LOCAL_PATH)/src/headers + +LOCAL_CFLAGS += -DTFM_ARM + +ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) +# Possible optimizations: +# -ftree-vectorize: have GCC attempt to automatically vectorize loops +# -ftree-vectorizer-verbose=2: verbose output during compile +# Note: not all V7-a targets support NEON! +LOCAL_ARM_NEON := true +LOCAL_CFLAGS += -DTFM_ARM_V7A -ftree-vectorize +else ifeq ($(TARGET_ARCH_ABI),armeabi) +LOCAL_CFLAGS += -DTFM_ARM_V5TE +else +LOCAL_CFLAGS += -DTFM_ARM_V4M +endif + +include $(BUILD_STATIC_LIBRARY) diff --git a/Android_jni/Android.mk b/Android_jni/Android.mk new file mode 100644 index 0000000..487df6d --- /dev/null +++ b/Android_jni/Android.mk @@ -0,0 +1,2 @@ +# Recursively sources all Android.mk files in subdirs: +include $(call all-subdir-makefiles) diff --git a/Android_jni/Application.mk b/Android_jni/Application.mk new file mode 100644 index 0000000..d1e6201 --- /dev/null +++ b/Android_jni/Application.mk @@ -0,0 +1,3 @@ +APP_MODULES := tfm +APP_OPTIM := release +APP_ABI := armeabi armeabi-v7a diff --git a/Android_jni/README b/Android_jni/README new file mode 100644 index 0000000..0aa3670 --- /dev/null +++ b/Android_jni/README @@ -0,0 +1,36 @@ +This project provides a build framework for TomsFastMath on Android. + +Notes / TODO: + +To use the faster ARM inline assembly, TFM_ARM must be defined at compile time. Unfortunately, is +incompatible with the armeabi build target because some of the instructions are not supported. The +Android SDK defines a few variables which may help with detection: + +when compiled for armeabi-v7a: + __thumb2__ + __ARM_ARCH_7A__ +when compiled for armeabi: + __SOFTFP__ + +For now, the armeabi target will use the unoptimized C code. TFM_ARM is conditionally defined +for the armeabi-v7a target. + +Don't forget to change FP_MAX_SIZE in src/headers/tfm.h to suit your needs, e.g. + + #define FP_MAX_SIZE (16384+(8*DIGIT_BIT)) + +Build instructions: + +(1) Clone this git repository to your project's JNI directory, naming the directory "tfm". +(2) Modify your JNI root's Android.mk and Application.mk using the files in this directory as a model. +(3) Run ndk-build (available from the Android NDK); see Gingerbread's NDK docs for more. + +Known bugs / issues: + +* armeabi builds are known to produce binary code that is over three times larger than armeabi-v7a. + This is probably due to either a bug in the Android NDK or limitations of the Thumb16 instruction set. + +* NEON support can't really be turned on, because not all armeabi-v7a boards support it. + After some trials, it appears that gcc can auto-vectorize some loops when NEON support is enabled + and this leads to a performance increase. But doing so will cause the code to crash on phones + where NEON isn't supported... diff --git a/src/mont/fp_montgomery_reduce.c b/src/mont/fp_montgomery_reduce.c index b8a194f..396087a 100644 --- a/src/mont/fp_montgomery_reduce.c +++ b/src/mont/fp_montgomery_reduce.c @@ -283,15 +283,85 @@ asm( \ : "%eax", "%cc") /******************************************************************/ -#elif defined(TFM_ARM) - /* ARMv4 code */ +#elif defined(TFM_ARM_V4M) + /* generic ARMv4 or higher with M */ -#define MONT_START +#define MONT_START +#define MONT_FINI +#define LOOP_END +#define LOOP_START \ + mu = c[x] * mp + +#define INNERMUL \ +asm( \ + " LDR r0,%1 \n\t" \ + " ADDS r0,r0,%0 \n\t" \ + " MOVCS %0,#1 \n\t" \ + " MOVCC %0,#0 \n\t" \ + " UMLAL r0,%0,%3,%4 \n\t" \ + " STR r0,%1 \n\t" \ +:"=r"(cy),"=g"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","%cc"); + +#define PROPCARRY \ +asm( \ + " LDR r0,%1 \n\t" \ + " ADDS r0,r0,%0 \n\t" \ + " STR r0,%1 \n\t" \ + " MOVCS %0,#1 \n\t" \ + " MOVCC %0,#0 \n\t" \ +:"=r"(cy),"=g"(_c[0]):"0"(cy),"1"(_c[0]):"r0","%cc"); + +/******************************************************************/ +#elif defined(TFM_ARM_V7A) + /* Android: armeabi-v7a target */ + +#define MONT_START +#define MONT_FINI +#define LOOP_END +#define LOOP_START \ + mu = c[x] * mp + +#define INNERMUL \ +asm( \ + " LDR r0,%1 \n\t" \ + " ADDS r0,r0,%0 \n\t" \ + " ITE CS \n\t" \ + " MOVCS %0,#1 \n\t" \ + " MOVCC %0,#0 \n\t" \ + " UMLAL r0,%0,%3,%4 \n\t" \ + " STR r0,%1 \n\t" \ +:"=r"(cy),"=g"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","%cc"); + +#define PROPCARRY \ +asm( \ + " LDR r0,%1 \n\t" \ + " ADDS r0,r0,%0 \n\t" \ + " STR r0,%1 \n\t" \ + " ITE CS \n\t" \ + " MOVCS %0,#1 \n\t" \ + " MOVCC %0,#0 \n\t" \ +:"=r"(cy),"=g"(_c[0]):"0"(cy),"1"(_c[0]):"r0","%cc"); + +/******************************************************************/ +#elif defined(TFM_ARM_V5TE) + /* Android: armeabi target */ + +#define MONT_START #define MONT_FINI #define LOOP_END #define LOOP_START \ mu = c[x] * mp +/* +These will need a complete rewrite for armeabi: + * ADDS is not supported in Thumb16 mode + * Thumb does not support conditional execution (MOVCS/MOVCC) + * armv5te+xscale does not support UMLAL +ADDS - add and set condition flags (bottom of page 16, arm_inst.pdf) +MOVCS / MOVCC - move if carry bit is set / clear +UMLAL RdLo, RdHi, Rn, Rm - unsigned multiply Rn and Rm, then + add resulting 64-bit value to (RdHi,RdLo) +*/ #define INNERMUL \ asm( \ " LDR r0,%1 \n\t" \ @@ -300,7 +370,7 @@ asm( \ " MOVCC %0,#0 \n\t" \ " UMLAL r0,%0,%3,%4 \n\t" \ " STR r0,%1 \n\t" \ -:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","%cc"); +:"=r"(cy),"=g"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","%cc"); #define PROPCARRY \ asm( \ @@ -309,7 +379,7 @@ asm( \ " STR r0,%1 \n\t" \ " MOVCS %0,#1 \n\t" \ " MOVCC %0,#0 \n\t" \ -:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","%cc"); +:"=r"(cy),"=g"(_c[0]):"0"(cy),"1"(_c[0]):"r0","%cc"); /******************************************************************/ #elif defined(TFM_PPC32)