diff --git a/jni/Android.mk b/jni/Android.mk index fbf1a5a..da71723 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -1,7 +1,7 @@ LOCAL_PATH := $(call my-dir) include $(CLEAR_VARS) -LOCAL_CFLAGS := -fvisibility=hidden +LOCAL_CFLAGS := -fvisibility=hidden -DDEBUG LOCAL_MODULE := android-ndk-profiler LOCAL_SRC_FILES := gnu_mcount.S prof.c read_maps.c include $(BUILD_STATIC_LIBRARY) diff --git a/jni/prof.c b/jni/prof.c index 7184f0c..0441e9b 100644 --- a/jni/prof.c +++ b/jni/prof.c @@ -63,29 +63,31 @@ #include "ucontext.h" /* for ucontext_t */ #define LOGI(...) __android_log_print(ANDROID_LOG_INFO, "PROFILING", __VA_ARGS__) -#define FREQ_HZ 100 + #define DEFAULT_GMON_OUT "/sdcard/gmon.out" -/* - * froms is actually a bunch of unsigned shorts indexing tos - */ -static int profiling = 3; -static unsigned short *froms; -static struct tostruct *tos = 0; -static long tolimit = 0; -static uint32_t s_lowpc = 0; -static uint32_t s_highpc = 0; -static unsigned long s_textsize = 0; - -static int ssiz; -static char *sbuf; -static int s_scale; - -static int hist_num_bins = 0; -static char hist_dimension[16] = "seconds"; -static char hist_dimension_abbrev = 's'; +typedef struct { + unsigned short *froms; + struct tostruct *tos; + long tolimit; +} callgraph_t; + +typedef struct { + size_t low_pc; + size_t high_pc; + size_t text_size; +} process_t; + +typedef struct { + uint32_t nb_bins; + char *bins; +} histogram_t; + +static histogram_t hist; +static process_t process; +static callgraph_t cg; static struct proc_map *s_maps = NULL; -static int s_freq_hz = FREQ_HZ; +int opt_is_shared_lib = 0; static void systemMessage(int a, const char *msg) { @@ -132,155 +134,199 @@ static int profWrite(FILE *f, char *buf, unsigned int n) return 0; } -static void check_profil(uint32_t frompcindex) +static int get_max_samples_per_sec() { - if (sbuf && ssiz) { - uint16_t *b = (uint16_t *)sbuf; - int pc = (frompcindex - s_lowpc) / s_scale; - if(pc >= 0 && pc < ssiz) - b[pc]++; - } + struct itimerval timer; + + timer.it_interval.tv_sec = 0; + timer.it_interval.tv_usec = 1; + timer.it_value = timer.it_interval; + setitimer(ITIMER_PROF, &timer, 0); + setitimer(ITIMER_PROF, 0, &timer); + return 1000000 / timer.it_interval.tv_usec; } -static void profile_action(int sig, siginfo_t *info, void *context) +static void histogram_bin_incr(int sig, siginfo_t *info, void *context) { - ucontext_t *ucontext = (ucontext_t*) context; + ucontext_t *ucontext = (ucontext_t *) context; struct sigcontext *mcontext = &ucontext->uc_mcontext; - if (profiling) - return; - check_profil(mcontext->arm_pc); + uint32_t frompcindex = mcontext->arm_pc; + + uint16_t *b = (uint16_t *) hist.bins; + + /* the pc should be divided by HISTFRACTION, but we do */ + /* a right shift with 1 because HISTFRACTION=2 */ + size_t pc = (frompcindex - process.low_pc) >> 1; + + if (pc < hist.nb_bins) { + b[pc]++; + } } -static void add_profile_handler(void) +static void add_profile_handler(int sample_freq) { - struct sigaction action; /* request info, sigaction called instead of sighandler */ action.sa_flags = SA_SIGINFO | SA_RESTART; - action.sa_sigaction = profile_action; + action.sa_sigaction = histogram_bin_incr; sigemptyset(&action.sa_mask); int result = sigaction(SIGPROF, &action, NULL); if (result != 0) { /* panic */ - LOGI("add_profile_handler, sigaction failed %d %d", result, errno); + LOGI("add_profile_handler, sigaction failed %d %d", result, + errno); return; } struct itimerval timer; timer.it_interval.tv_sec = 0; - timer.it_interval.tv_usec = 1000000 / s_freq_hz; + timer.it_interval.tv_usec = 1000000 / sample_freq; timer.it_value = timer.it_interval; setitimer(ITIMER_PROF, &timer, 0); } -static void remove_profile_handler(void) +static long remove_profile_handler(void) { struct itimerval timer; + struct itimerval oldtimer; + memset(&timer, 0, sizeof(timer)); - setitimer(ITIMER_PROF, &timer, 0); + setitimer(ITIMER_PROF, &timer, &oldtimer); + + return oldtimer.it_value.tv_usec; } +static int select_frequency() +{ + int max_samples = get_max_samples_per_sec(); + char *freq = getenv("CPUPROFILE_FREQUENCY"); -/* Control profiling; - profiling is what mcount checks to see if - all the data structures are ready. */ + if (!freq) { + LOGI("using sample frequency: %d", max_samples); + return max_samples; + } -static void profControl(int mode) -{ - if (mode) { - /* start */ - add_profile_handler(); - profiling = 0; - } else { - remove_profile_handler(); - /* stop */ - profiling = 3; - systemMessage(1, "parent: done profiling"); + int freqval = strtol(freq, 0, 0); + + if (freqval <= 0) { + LOGI("Invalid frequency value: %d, using default: %d", + freqval, max_samples); + return max_samples; + } + + LOGI("Maximum number of samples per second: %d", max_samples); + LOGI("Specified frequency: %d", freqval); + + if (freqval > max_samples) { + LOGI("Specified sample rate is too large, using %d", + max_samples); + return max_samples; } + + return freqval; } -static void get_frequency(void) +static void process_init() { - char *freq = getenv("CPUPROFILE_FREQUENCY"); - if (freq != 0) { - int freqval = strtol(freq, 0, 0); - if (freqval > 0) - s_freq_hz = freqval; - else - LOGI("Invalid frequency value: %d", freqval); - } + process.low_pc = s_maps->lo; + process.high_pc = s_maps->hi; + /* + * round lowpc and highpc to multiples of the density we're using + * so the rest of the scaling (here and in gprof) stays in ints. + */ + process.low_pc = ROUNDDOWN(process.low_pc, HISTFRACTION * sizeof(HISTCOUNTER)); + process.high_pc = ROUNDUP(process.high_pc, HISTFRACTION * sizeof(HISTCOUNTER)); + process.text_size = process.high_pc - process.low_pc; } #define MSG ("No space for profiling buffer(s)\n") +static int histogram_init() +{ + hist.nb_bins = (process.text_size / HISTFRACTION); + + /* FIXME: check if '2' is the size of short or if it has another meaning */ + hist.bins = calloc(1, sizeof(short) * hist.nb_bins); + if (!hist.bins) { + systemMessage(0, MSG); + return 0; + } + return 1; +} + +static int cg_init() +{ + /* FIXME: what should be the size of 'froms' */ + /* froms = calloc(1, 4 * process.text_size / HASHFRACTION); */ + cg.froms = calloc(1, sizeof(short) * hist.nb_bins); + + if (cg.froms == NULL) { + systemMessage(0, MSG); + free(hist.bins); + return 0; + } + + cg.tolimit = process.text_size * ARCDENSITY / 100; + + if (cg.tolimit < MINARCS) { + cg.tolimit = MINARCS; + } else if (cg.tolimit > 65534) { + cg.tolimit = 65534; + } + + cg.tos = (struct tostruct *) calloc(1, cg.tolimit * sizeof(struct tostruct)); + if (cg.tos == NULL) { + systemMessage(0, MSG); + free(hist.bins); + free(cg.froms); + cg.froms = NULL; + return 0; + } + cg.tos[0].link = 0; + + return 1; +} + __attribute__((visibility("default"))) void monstartup(const char *libname) { - int monsize; - char *buffer; - uint32_t lowpc, highpc; FILE *self = fopen("/proc/self/maps", "r"); - s_maps = read_maps(self, libname); - if (s_maps == NULL) { - systemMessage(0, "No maps found"); + + if (!self) { + systemMessage(1, "Cannot open memory maps file"); return; } - lowpc = s_maps->lo; - highpc = s_maps->hi; - __android_log_print(ANDROID_LOG_INFO, "PROFILING", - "Profile %s %x-%x: %d", - libname, - lowpc, highpc, - s_maps->base); - - get_frequency(); - /* - * round lowpc and highpc to multiples of the density we're using - * so the rest of the scaling (here and in gprof) stays in ints. - */ - lowpc = ROUNDDOWN(lowpc, HISTFRACTION * sizeof(HISTCOUNTER)); - s_lowpc = lowpc; - highpc = ROUNDUP(highpc, HISTFRACTION * sizeof(HISTCOUNTER)); - s_highpc = highpc; - s_textsize = highpc - lowpc; - monsize = (s_textsize / HISTFRACTION); - s_scale = HISTFRACTION; - buffer = calloc(1, 2 * monsize); - if (buffer == NULL) { - systemMessage(0, MSG); - return; + + if (strstr(libname, ".so")) { + LOGI("start profiling shared library %s", libname); + opt_is_shared_lib = 1; + } else { + LOGI("start profiling executable %s", libname); + opt_is_shared_lib = 0; } - froms = calloc(1, 4 * s_textsize / HASHFRACTION); - if (froms == NULL) { - systemMessage(0, MSG); - free(buffer); - buffer = NULL; + + s_maps = read_maps(self, libname); + + if (s_maps == NULL) { + systemMessage(0, "No maps found"); return; } - tolimit = s_textsize * ARCDENSITY / 100; - if (tolimit < MINARCS) { - tolimit = MINARCS; - } else if (tolimit > 65534) { - tolimit = 65534; - } - tos = - (struct tostruct *) calloc(1, - tolimit * sizeof(struct tostruct)); - if (tos == NULL) { - systemMessage(0, MSG); - free(buffer); - buffer = NULL; - free(froms); - froms = NULL; + + process_init(); + + if (!histogram_init()) { return; } - tos[0].link = 0; - sbuf = buffer; - ssiz = monsize; - if (monsize <= 0) { + + LOGI("Profile %s, pc: 0x%x-0x%x, base: 0x%d", libname, + process.low_pc, process.high_pc, s_maps->base); + + if (!cg_init()) { return; } - profControl(1); + + int sample_freq = select_frequency(); + add_profile_handler(sample_freq); } static const char *get_gmon_out(void) @@ -301,9 +347,13 @@ void moncleanup(void) int toindex; struct gmon_hdr ghdr; const char *gmon_name = get_gmon_out(); - LOGI("parent: moncleanup called"); - profControl(0); - LOGI("writing gmon.out"); + + long ival = remove_profile_handler(); + int sample_freq = 1000000 / ival; + + LOGI("Sampling frequency: %d", sample_freq); + LOGI("moncleanup, writing output to %s", gmon_name); + fd = fopen(gmon_name, "wb"); if (fd == NULL) { systemMessage(0, "mcount: gmon.out"); @@ -316,48 +366,50 @@ void moncleanup(void) fclose(fd); return; } - hist_num_bins = ssiz; - if (profWrite8(fd, GMON_TAG_TIME_HIST) || - profWrite32(fd, get_real_address(s_maps, (uint32_t) s_lowpc)) || - profWrite32(fd, get_real_address(s_maps, (uint32_t) s_highpc)) || - profWrite32(fd, hist_num_bins) || - profWrite32(fd, s_freq_hz) || - profWrite(fd, hist_dimension, 15) || - profWrite(fd, &hist_dimension_abbrev, 1)) { - systemMessage(0, "mcount: gmon.out hist"); + + if (profWrite8(fd, GMON_TAG_TIME_HIST) + || profWrite32(fd, get_real_address(s_maps, (uint32_t) process.low_pc)) + || profWrite32(fd, get_real_address(s_maps, (uint32_t) process.high_pc)) + || profWrite32(fd, hist.nb_bins) + || profWrite32(fd, sample_freq) + || profWrite(fd, "seconds", 15) + || profWrite(fd, "s", 1) + ) { + systemMessage(0, "ERROR writing mcount: gmon.out hist"); fclose(fd); return; } - uint16_t *hist_sample = (uint16_t *) sbuf; + uint16_t *hist_sample = (uint16_t *) hist.bins; uint16_t count; int i; - for (i = 0; i < hist_num_bins; ++i) { + for (i = 0; i < hist.nb_bins; ++i) { profPut16((char *) &count, hist_sample[i]); + /* LOGI("bin: %d, value: %d", i, hist_sample[i]); */ if (fwrite(&count, sizeof(count), 1, fd) != 1) { - systemMessage(0, "mcount: gmon.out sample"); + systemMessage(0, "ERROR writing file mcount: gmon.out sample"); fclose(fd); return; } } - endfrom = s_textsize / (HASHFRACTION * sizeof(*froms)); + endfrom = process.text_size / (HASHFRACTION * sizeof(*cg.froms)); for (fromindex = 0; fromindex < endfrom; fromindex++) { - if (froms[fromindex] == 0) { + if (cg.froms[fromindex] == 0) { continue; } frompc = - s_lowpc + (fromindex * HASHFRACTION * sizeof(*froms)); + process.low_pc + (fromindex * HASHFRACTION * sizeof(*cg.froms)); frompc = get_real_address(s_maps, frompc); - for (toindex = froms[fromindex]; toindex != 0; - toindex = tos[toindex].link) { + for (toindex = cg.froms[fromindex]; toindex != 0; + toindex = cg.tos[toindex].link) { if (profWrite8(fd, GMON_TAG_CG_ARC) || profWrite32(fd, (uint32_t) frompc) || profWrite32(fd, get_real_address(s_maps, (uint32_t) - tos[toindex]. + cg.tos[toindex]. selfpc)) - || profWrite32(fd, tos[toindex].count)) { - systemMessage(0, "mcount: arc"); + || profWrite32(fd, cg.tos[toindex].count)) { + systemMessage(0, "ERROR writing mcount: arc"); fclose(fd); return; } @@ -366,11 +418,11 @@ void moncleanup(void) fclose(fd); } -void profCount(unsigned short *frompcindex, char *selfpc) +void profCount(size_t *frompcindex, char *selfpc) { struct tostruct *top; struct tostruct *prevtop; - long toindex; + size_t toindex; /* * find the return address for mcount, * and the return address for mcount's caller. @@ -385,46 +437,49 @@ void profCount(unsigned short *frompcindex, char *selfpc) * check that we are profiling * and that we aren't recursively invoked. */ - if (profiling) { - return; - } - profiling++; + /* * check that frompcindex is a reasonable pc value. * for example: signal catchers get called from the stack, * not from text space. too bad. */ - frompcindex = - (unsigned short *)((long) frompcindex - (long) s_lowpc); - if ((unsigned long) frompcindex > s_textsize) { - goto done; + + /* frompcindex = (size_t *) ( (size_t) frompcindex - process.low_pc); */ + + size_t frompc_val = (size_t) frompcindex - process.low_pc; + size_t *frompc_ptr = (size_t *) frompc_val; + + if (frompc_val > process.text_size) { + return; } - frompcindex = - &froms[((long) frompcindex) / (HASHFRACTION * sizeof(*froms))]; - toindex = *frompcindex; + + frompc_ptr = (size_t *) &cg.froms[frompc_val / (HASHFRACTION * sizeof(*cg.froms))]; + toindex = *frompc_ptr; + if (toindex == 0) { /* * first time traversing this arc */ - toindex = ++tos[0].link; - if (toindex >= tolimit) { + toindex = ++cg.tos[0].link; + if (toindex >= cg.tolimit) { goto overflow; } - *frompcindex = (unsigned short) toindex; - top = &tos[toindex]; + *frompc_ptr = (size_t) toindex; + top = &cg.tos[toindex]; top->selfpc = selfpc; top->count = 1; top->link = 0; - goto done; + return; } - top = &tos[toindex]; + top = &cg.tos[toindex]; if (top->selfpc == selfpc) { /* * arc at front of chain; usual case. */ top->count++; - goto done; + return; } + /* * have to go looking down chain for it. * top points to what we are looking at, @@ -439,22 +494,22 @@ void profCount(unsigned short *frompcindex, char *selfpc) * so we allocate a new tostruct * and link it to the head of the chain. */ - toindex = ++tos[0].link; - if (toindex >= tolimit) { + toindex = ++cg.tos[0].link; + if (toindex >= cg.tolimit) { goto overflow; } - top = &tos[toindex]; + top = &cg.tos[toindex]; top->selfpc = selfpc; top->count = 1; - top->link = *frompcindex; - *frompcindex = (unsigned short) toindex; - goto done; + top->link = *frompc_ptr; + *frompc_ptr = (size_t) toindex; + return; } /* * otherwise, check the next arc on the chain. */ prevtop = top; - top = &tos[top->link]; + top = &cg.tos[top->link]; if (top->selfpc == selfpc) { /* * there it is. @@ -464,19 +519,16 @@ void profCount(unsigned short *frompcindex, char *selfpc) top->count++; toindex = prevtop->link; prevtop->link = top->link; - top->link = *frompcindex; - *frompcindex = (unsigned short) toindex; - goto done; + top->link = *frompc_ptr; + *frompc_ptr = (size_t) toindex; + return; } } -done: - profiling--; - /* and fall through */ + out: return; /* normal return restores saved registers */ overflow: - profiling++; /* halt further profiling */ -#define TOLIMIT "mcount: tos overflow\n" - systemMessage(0, TOLIMIT); + systemMessage(0, "mcount: tos overflow\n"); goto out; + } diff --git a/jni/read_maps.c b/jni/read_maps.c index dd84b22..598370d 100644 --- a/jni/read_maps.c +++ b/jni/read_maps.c @@ -12,58 +12,85 @@ #include #include "read_maps.h" +#ifdef ANDROID +#include /* for __android_log_print, ANDROID_LOG_INFO, etc */ +#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, "PROFILING", __VA_ARGS__) +#else +#define LOGI(...) do { printf(__VA_ARGS__) ; printf("\n"); } while (0) +#endif + static char s_line[256]; +extern int opt_is_shared_lib; void free_maps(struct proc_map *s) { - struct proc_map *next = s->next; - while (next != NULL) { - struct proc_map *tmp = next; - next = next->next; - free(tmp); - } - free(s); + struct proc_map *next = s->next; + while (next != NULL) { + struct proc_map *tmp = next; + next = next->next; + free(tmp); + } + free(s); } struct proc_map *read_maps(FILE *fp, const char *lname) { - struct proc_map *results = NULL; - struct proc_map *current = NULL; - size_t namelen = strlen(lname); - while (fgets(s_line, sizeof(s_line), fp) != NULL) { - size_t len = strlen(s_line); - len--; - s_line[len] = 0; - if (namelen < len && strcmp(lname, &s_line[len - namelen]) == 0) { - char c[1]; - char perm[4]; - int lo, base, hi; - sscanf(s_line, "%x-%x %4c %x %c", &lo, &hi, perm, &base, c); - if (results == NULL) { - current = malloc(sizeof(struct proc_map)); - current->next = NULL; - results = current; - } else { - current->next = malloc(sizeof(struct proc_map)); - current = current->next; - current->next = NULL; - } - current->base = base; - current->lo = lo; - current->hi = hi; - } - } - return results; + struct proc_map *results = NULL; + struct proc_map *current = NULL; + size_t namelen = strlen(lname); + + while (fgets(s_line, sizeof(s_line), fp) != NULL) { + size_t len = strlen(s_line); + len--; + s_line[len] = 0; + + if (namelen < len + && strcmp(lname, &s_line[len - namelen]) == 0) { + char c[1]; + char perm[4]; + int lo, base, hi; + sscanf(s_line, "%x-%x %4c %x %c", &lo, &hi, perm, &base, c); + + if (results == NULL) { + current = malloc(sizeof(struct proc_map)); + if (!current) { + LOGI("error allocating memory"); + return NULL; + } + current->next = NULL; + results = current; + } else { + current->next = malloc(sizeof(struct proc_map)); + current = current->next; + if (!current) { + LOGI("error allocating memory"); + return NULL; + } + current->next = NULL; + } + + LOGI("process '%s', base = 0x%x, lo = 0x%x, hi = 0x%x", lname, base, lo, hi); + + current->base = base; + current->lo = lo; + current->hi = hi; + } + } + return results; } -unsigned int get_real_address(const struct proc_map *maps, unsigned int fake) +unsigned int get_real_address(const struct proc_map *maps, + unsigned int fake) { - const struct proc_map *mp = maps; - while (mp) { - if (fake >= mp->lo && fake <= mp->hi) { - return fake - mp->lo; - } - mp = mp->next; - } - return fake; + const struct proc_map *mp = maps; + while (mp) { + if (fake >= mp->lo && fake <= mp->hi) { + if (opt_is_shared_lib) { + return fake - mp->lo; + } + return fake; + } + mp = mp->next; + } + return fake; } diff --git a/test/test_read_maps.c b/test/test_read_maps.c index 4b51f72..eb2ac94 100644 --- a/test/test_read_maps.c +++ b/test/test_read_maps.c @@ -3,6 +3,8 @@ #include #include "read_maps.h" +int opt_is_shared_lib = 1; + static void test_read_maps(void) { FILE *fp = fopen("maps.txt", "r");