diff --git a/ChangeLog b/ChangeLog index 0ff46db8..c5c5ef6b 100755 --- a/ChangeLog +++ b/ChangeLog @@ -2,6 +2,15 @@ ChangeLog ========= +---------------------------------- +Version 1.15 (based on afl 2.43b): +---------------------------------- + + - Intel PT support. + + - Fix target_path in afl-showmap in drioless mode + Contributed by L4ys + ---------------------------------- Version 1.15 (based on afl 2.43b): ---------------------------------- diff --git a/afl-fuzz.c b/afl-fuzz.c index bf8c846e..8abb59ad 100644 --- a/afl-fuzz.c +++ b/afl-fuzz.c @@ -39,7 +39,7 @@ #include #define VERSION "2.43b" -#define WINAFL_VERSION "1.15" +#define WINAFL_VERSION "1.16" #include "config.h" #include "types.h" diff --git a/bin32/afl-fuzz.exe b/bin32/afl-fuzz.exe index 155fa67f..16eb23f4 100644 Binary files a/bin32/afl-fuzz.exe and b/bin32/afl-fuzz.exe differ diff --git a/bin32/afl-showmap.exe b/bin32/afl-showmap.exe index e14b88d9..2c8c88c2 100644 Binary files a/bin32/afl-showmap.exe and b/bin32/afl-showmap.exe differ diff --git a/bin32/libipt.dll b/bin32/libipt.dll index f801b99e..7b8e4682 100644 Binary files a/bin32/libipt.dll and b/bin32/libipt.dll differ diff --git a/bin32/winaflpt-debug.exe b/bin32/winaflpt-debug.exe index 4d7fbc9e..f2dd8503 100644 Binary files a/bin32/winaflpt-debug.exe and b/bin32/winaflpt-debug.exe differ diff --git a/bin64/afl-fuzz.exe b/bin64/afl-fuzz.exe index b42f0b47..26363f59 100644 Binary files a/bin64/afl-fuzz.exe and b/bin64/afl-fuzz.exe differ diff --git a/bin64/afl-showmap.exe b/bin64/afl-showmap.exe index 668efa4f..9c76de32 100644 Binary files a/bin64/afl-showmap.exe and b/bin64/afl-showmap.exe differ diff --git a/bin64/libipt.dll b/bin64/libipt.dll index ae467df1..36ab8d73 100644 Binary files a/bin64/libipt.dll and b/bin64/libipt.dll differ diff --git a/bin64/winaflpt-debug.exe b/bin64/winaflpt-debug.exe index 0aa2d957..db1fbf37 100644 Binary files a/bin64/winaflpt-debug.exe and b/bin64/winaflpt-debug.exe differ diff --git a/ptdecode.c b/ptdecode.c index c7f3636c..39c49c32 100644 --- a/ptdecode.c +++ b/ptdecode.c @@ -1,3 +1,23 @@ +/* + WinAFL - Intel PT decoding + ------------------------------------------------ + + Written and maintained by Ivan Fratric + + Copyright 2016 Google Inc. All Rights Reserved. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + #include #include #include @@ -7,6 +27,8 @@ #include "pt_cpu.h" #include "pt_cpuid.h" #include "pt_opcodes.h" +#include "pt_retstack.h" +#include "pt_block_decoder.h" #include "types.h" #include "config.h" @@ -17,14 +39,18 @@ #define PPT_EXT 0xFF -uint64_t previous_offset; +uint32_t previous_offset; uint64_t previous_ip; extern address_range* coverage_ip_ranges; extern size_t num_ip_ranges; - static address_range* current_range; +extern u8 *trace_bits; + +#define MAX_TRACELET_SIZE 100 // just a hint, the tracelets could end up larger +#define MIN_TRACELET_SIZE 20 // just a hint, the tracelets could end up smaller + unsigned char opc_lut[] = { 0x02, 0x08, 0xff, 0x12, 0x09, 0x00, 0x09, 0x12, 0x09, 0x00, 0x09, 0x12, 0x09, 0x06, 0x09, 0x12, @@ -170,6 +196,257 @@ unsigned char psb[16] = { 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82 }; +static unsigned char psb_and_psbend[18] = { + 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, + 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, + 0x02, 0x23 +}; + +typedef struct decoder_state_t { + uint64_t query_ip; + uint64_t block_ip; + uint8_t mode; +} decoder_state; + +typedef struct tracelet_cache_node_t { + uint64_t hash; + size_t size; + + struct tracelet_cache_node_t *hash_prev; + struct tracelet_cache_node_t *hash_next; + + struct tracelet_cache_node_t *lru_prev; + struct tracelet_cache_node_t *lru_next; + + decoder_state state_prev; + decoder_state state_next; + + uint8_t stack_removed; + uint8_t stack_added; + + uint64_t *stack_prev; + uint64_t *stack_next; + + uint32_t tracelet_size; + unsigned char * tracelet; + + uint32_t map_update_size; + uint32_t *map_offsets; + uint8_t *map_updates; + +} tracelet_cache_node; + +struct tracelet_cahe_t { + tracelet_cache_node **hashtable; + + tracelet_cache_node *lru_first; + tracelet_cache_node *lru_last; + + size_t size; + size_t num_entries; + + size_t max_size; + size_t max_entries; +}; + +static struct tracelet_cahe_t tracelet_cache; + +struct coverage_cache_t { + uint32_t index_buffer[MAP_SIZE]; + // need + 2 for edge coverage + uint32_t map_offsets[MAP_SIZE + 2]; + uint8_t counters[MAP_SIZE + 2]; + uint32_t size; +}; + +void tracelet_coverage_init(struct coverage_cache_t *coverage_cache) { + memset(coverage_cache->index_buffer, 0, MAP_SIZE * sizeof(coverage_cache->index_buffer[0])); + coverage_cache->size = 0; +} + +void tracelet_coverage_clear(struct coverage_cache_t *coverage_cache, int coverage_kind) { + if (!coverage_cache->size) return; + + uint32_t from = 0; + uint32_t to = coverage_cache->size; + + if (coverage_kind == COVERAGE_EDGE) { + // the first and the last value have special meaning + // in the case of edge coverage + from++; + to--; + } + + for (uint32_t i = from; i < to; i++) { + coverage_cache->index_buffer[coverage_cache->map_offsets[i]] = 0; + } + coverage_cache->size = 0; +} + +void tracelet_coverage_add_bb(struct coverage_cache_t *coverage_cache, uint32_t offset) { + offset = offset % MAP_SIZE; + + if (coverage_cache->index_buffer[offset]) { + coverage_cache->counters[coverage_cache->index_buffer[offset] - 1]++; + } else { + coverage_cache->index_buffer[offset] = coverage_cache->size + 1; + coverage_cache->map_offsets[coverage_cache->size] = offset; + coverage_cache->counters[coverage_cache->size] = 1; + coverage_cache->size++; + } +} + +void tracelet_coverage_add_edge(struct coverage_cache_t *coverage_cache, uint32_t offset) { + uint32_t edge; + // don't touch the global previous_offset while building the cache + // we'll update everything once the cache gets replayed + uint32_t previous_offset; + + if (!coverage_cache->size) { + // store the first offset as the first value + coverage_cache->map_offsets[0] = offset; + coverage_cache->counters[0] = 0; + coverage_cache->size = 2; + } else { + previous_offset = coverage_cache->map_offsets[coverage_cache->size - 1]; + + edge = (offset ^ previous_offset) % MAP_SIZE; + + if (coverage_cache->index_buffer[edge]) { + coverage_cache->counters[coverage_cache->index_buffer[edge]]++; + } else { + coverage_cache->index_buffer[edge] = coverage_cache->size - 1; + coverage_cache->map_offsets[coverage_cache->size - 1] = edge; + coverage_cache->counters[coverage_cache->size - 1] = 1; + coverage_cache->size++; + } + } + + // always store the previous offset as the last value + previous_offset = offset >> 1; + coverage_cache->map_offsets[coverage_cache->size - 1] = previous_offset; + coverage_cache->counters[coverage_cache->size - 1] = 0; +} + +static inline uint64_t djb2(unsigned char *data, size_t size) { + uint64_t hash = 5381; + + for (size_t i = 0; i < size; i++) { + hash = (hash << 5) + hash + data[i]; + } + + return hash; +} + +void tracelet_cache_init(size_t max_entries, size_t max_size) { + tracelet_cache.max_entries = max_entries; + tracelet_cache.max_size = max_size; + + tracelet_cache.hashtable = (tracelet_cache_node **)calloc(max_entries, sizeof(tracelet_cache_node *)); + + tracelet_cache.lru_first = NULL; + tracelet_cache.lru_last = NULL; + + tracelet_cache.size = 0; + tracelet_cache.num_entries = 0; +} + +// sets the node as the least recently used +void cache_node_touch(tracelet_cache_node *node) { + // printf("accessing %p in cache\n", node); + + if (!node->lru_prev) return; //already at the beginning + else node->lru_prev->lru_next = node->lru_next; + + if (node->lru_next) node->lru_next->lru_prev = node->lru_prev; + else tracelet_cache.lru_last = node->lru_prev; + + node->lru_prev = NULL; + node->lru_next = tracelet_cache.lru_first; + if (node->lru_next) node->lru_next->lru_prev = node; + tracelet_cache.lru_first = node; +} + +void cache_node_remove(tracelet_cache_node *node) { + // printf("removing %p from cache\n", node); + + if (node->lru_prev) node->lru_prev->lru_next = node->lru_next; + else tracelet_cache.lru_first = node->lru_next; + + if (node->lru_next) node->lru_next->lru_prev = node->lru_prev; + else tracelet_cache.lru_last = node->lru_prev; + + if (node->hash_prev) node->hash_prev->hash_next = node->hash_next; + else tracelet_cache.hashtable[node->hash % tracelet_cache.max_entries] = node->hash_next; + + if (node->hash_next) node->hash_next->hash_prev = node->hash_prev; + + tracelet_cache.num_entries--; + tracelet_cache.size -= node->size; + + free(node); +} + +void cache_remove_lru() { + tracelet_cache_node *node = tracelet_cache.lru_last; + if (node) cache_node_remove(node); +} + +void cache_node_add(tracelet_cache_node *node) { + // printf("adding %p to cache\n", node); + + while (tracelet_cache.num_entries >= tracelet_cache.max_entries) cache_remove_lru(); + while ((tracelet_cache.size + node->size) >= tracelet_cache.max_size) cache_remove_lru(); + + tracelet_cache_node *prev_first; + + prev_first = tracelet_cache.hashtable[node->hash % tracelet_cache.max_entries]; + tracelet_cache.hashtable[node->hash % tracelet_cache.max_entries] = node; + + node->hash_prev = NULL; + node->hash_next = prev_first; + if (prev_first) prev_first->hash_prev = node; + + prev_first = tracelet_cache.lru_first; + tracelet_cache.lru_first = node; + + node->lru_prev = NULL; + node->lru_next = prev_first; + if (prev_first) prev_first->lru_prev = node; + else tracelet_cache.lru_last = node; + + tracelet_cache.num_entries++; + tracelet_cache.size += node->size; +} + +tracelet_cache_node *cache_find_node(uint64_t hash, decoder_state *state, unsigned char *tracelet, size_t tracelet_size, struct pt_retstack *retstack) { + tracelet_cache_node *node = tracelet_cache.hashtable[hash % tracelet_cache.max_entries]; + + while (node) { + if ((node->hash == hash) && + (node->state_prev.block_ip == state->block_ip) && + (node->state_prev.query_ip == state->query_ip) && + (node->state_prev.mode == state->mode) && + (node->tracelet_size == tracelet_size) && + (memcmp(node->tracelet, tracelet, tracelet_size) == 0)) + { + + uint8_t top = retstack->top; + size_t i; + for (i = 0; i < node->stack_removed; i++) { + if (top == retstack->bottom) break; + top = (!top ? pt_retstack_size : top - 1); + if (retstack->stack[top] != node->stack_prev[i]) break; + } + if (i == node->stack_removed) return node; // finally + + } + node = node->hash_next; + } + + return NULL; +} + void dump_lut(unsigned char *lut, char *lutname) { printf("unsigned char %s[] = {\n", lutname); for (int i = 0; i<16; i++) { @@ -184,6 +461,7 @@ void dump_lut(unsigned char *lut, char *lutname) { printf("}; \n\n"); } +// function that was used to build the lookup tables for the packet decoder void build_luts() { for (int i = 0; i<256; i++) { opc_lut[i] = ppt_invalid; @@ -420,6 +698,7 @@ void build_luts() { dump_lut(ext_size_lut, "ext_size_lut"); } +// sign extend inline static uint64_t sext(uint64_t val, uint8_t sign) { uint64_t signbit, mask; @@ -429,6 +708,7 @@ inline static uint64_t sext(uint64_t val, uint8_t sign) { return val & signbit ? val | mask : val & ~mask; } +// finds the next psb packet in the data buffer bool findpsb(unsigned char **data, size_t *size) { if (*size < 16) return false; @@ -447,8 +727,10 @@ bool findpsb(unsigned char **data, size_t *size) { return false; } -inline static int update_coverage_map(uint64_t next_ip, u8 *trace_bits, int coverage_kind) { - uint64_t offset; +// checks if the IP address is in one of the modules we are interested in +// and updates the coverage map +inline static int update_coverage_map(uint64_t next_ip, int coverage_kind) { + uint32_t offset; if (next_ip < current_range->start) { do { @@ -464,7 +746,7 @@ inline static int update_coverage_map(uint64_t next_ip, u8 *trace_bits, int cove // printf("ip: %p\n", (void*)next_ip); - offset = next_ip - current_range->start; + offset = (uint32_t)(next_ip - current_range->start); switch (coverage_kind) { case COVERAGE_BB: @@ -479,108 +761,48 @@ inline static int update_coverage_map(uint64_t next_ip, u8 *trace_bits, int cove return 1; } -// analyze collected PT trace -void analyze_trace_buffer_full(unsigned char *trace_data, size_t trace_size, u8 *trace_bits, int coverage_kind, module_info_t* modules, struct pt_image_section_cache *section_cache) { - // printf("analyzing trace\n"); - - struct pt_block_decoder *decoder; - struct pt_config config; - struct pt_event event; - struct pt_block block; +// checks if the IP address is in one of the modules we are interested in +// and updates the coverage_cache datastructure +inline static int update_coverage_cache(struct coverage_cache_t *coverage_cache, + uint64_t next_ip, int coverage_kind) +{ - bool skip_next = false; + uint32_t offset; - previous_offset = 0; - previous_ip = 0; - current_range = &(coverage_ip_ranges[0]); - - pt_config_init(&config); - pt_cpu_read(&config.cpu); - pt_cpu_errata(&config.errata, &config.cpu); - config.begin = trace_data; - config.end = trace_data + trace_size; - - // This is important not only for accurate coverage, but also because - // if we don't set it, the decoder is sometimes going to break - // blocks on these instructions anyway, resulting in new coverage being - // detected where there in fact was none. - // See also skip_next comment below - config.flags.variant.block.end_on_call = 1; - config.flags.variant.block.end_on_jump = 1; - - decoder = pt_blk_alloc_decoder(&config); - if (!decoder) { - FATAL("Error allocating decoder\n"); + if (next_ip < current_range->start) { + do { + current_range--; + } while (next_ip < current_range->start); } - - struct pt_image *image = pt_image_alloc("winafl_image"); - module_info_t *cur_module = modules; - while (cur_module) { - if (cur_module->isid > 0) { - int ret = pt_image_add_cached(image, section_cache, cur_module->isid, NULL); - } - cur_module = cur_module->next; + else if (next_ip > current_range->end) { + do { + current_range++; + } while (next_ip > current_range->end); } - int ret = pt_blk_set_image(decoder, image); - - int status; - - for (;;) { - status = pt_blk_sync_forward(decoder); - if (status < 0) { - // printf("cant't sync\n"); - break; - } - - for (;;) { - - // we aren't really interested in events - // but have to empty the event queue - while (status & pts_event_pending) { - status = pt_blk_event(decoder, &event, sizeof(event)); - if (status < 0) - break; - - // printf("event %d\n", event.type); - } - if (status < 0) - break; - - status = pt_blk_next(decoder, &block, sizeof(block)); + if (!current_range->collect) return 0; - if (status < 0) { - break; - } + // printf("ip: %p\n", (void*)next_ip); - if (!skip_next) { - skip_next = false; - update_coverage_map(block.ip, trace_bits, coverage_kind); - // printf("ip: %p, %d %d\n", (void *)block.ip, status, block.iclass); - } + offset = (uint32_t)(next_ip - current_range->start); - // Sometimes, due to asynchronous events and other reasons (?) - // the tracing of a basic block will break in the middle of it - // and the subsequent basic block will continue where the previous - // one was broken, resulting in new coverage detected where there - // was none. - // Currently, this is resolved by examining the instruction class of - // the last instruction in the basic block. If it is not one of the - // instructions that normally terminate a basic block, we will simply - // ignore the subsequent block. - // Another way to do this could be to compute the address of the next - // instruction after the basic block, and only ignore a subsequent block - // if it starts on that address - if(block.iclass == ptic_other) skip_next = true; - else skip_next = false; - } + switch (coverage_kind) { + case COVERAGE_BB: + tracelet_coverage_add_bb(coverage_cache, offset); + break; + case COVERAGE_EDGE: + tracelet_coverage_add_edge(coverage_cache, offset); + break; } - pt_image_free(image); - pt_blk_free_decoder(decoder); + return 1; } -static inline int get_next_opcode(unsigned char **data_p, size_t *size_p, unsigned char *opcode_p, unsigned char *opcodesize_p) { +// gets the opcode and the size of the next packet in the trace buffer +static inline int get_next_opcode(unsigned char **data_p, size_t *size_p, + unsigned char *opcode_p, unsigned char *opcodesize_p) +{ + unsigned char *data = *data_p; size_t size = *size_p; @@ -657,9 +879,112 @@ static inline uint64_t decode_ip(unsigned char *data) { return next_ip; } +// returns the type of the first packet or ppt_invalid +int get_next_tracelet(unsigned char **data, size_t *size, + unsigned char **tracelet_data, size_t *tracelet_size) +{ + unsigned char opcode; + unsigned char opcodesize; + unsigned char previous_opcode = ppt_invalid; + int ret = ppt_tnt_8; + + while (*size) { + + if (!get_next_opcode(data, size, &opcode, &opcodesize)) + return ppt_invalid; + + if (opcode == ppt_invalid) return ppt_invalid; + + // printf("packet type: %d\n", opcode); + + switch (opcode) { + case ppt_tnt_8: + case ppt_tnt_64: + // merge tiny tracelets + if (*tracelet_size > MIN_TRACELET_SIZE) { + // always cut before tnt preceeded by non-tnt + if (previous_opcode != ppt_invalid && + previous_opcode != ppt_tnt_8 && + previous_opcode != ppt_tnt_64) + { + return ret; + } + // cut very long streams of tnt packets + if (*tracelet_size > MAX_TRACELET_SIZE) { + return ret; + } + } + memcpy(*tracelet_data, *data, opcodesize); + *tracelet_data += opcodesize; + *tracelet_size += opcodesize; + *size -= opcodesize; + *data += opcodesize; + previous_opcode = opcode; + break; + case ppt_psb: + // let the caller know there is a psb in this tracelet + ret = ppt_psb; + case ppt_psbend: + case ppt_fup: + case ppt_tip: + case ppt_tip_pge: + case ppt_tip_pgd: + case ppt_ovf: + case ppt_mode: + // just copy these packets + memcpy(*tracelet_data, *data, opcodesize); + *tracelet_data += opcodesize; + *tracelet_size += opcodesize; + *size -= opcodesize; + *data += opcodesize; + previous_opcode = opcode; + break; + default: + // skip over all other packets + *size -= opcodesize; + *data += opcodesize; + break; + } + } + + return ret; +} + +// checks if the trace starts with the expected IP address +int check_trace_start(unsigned char *data, size_t size, uint64_t expected_ip) { + unsigned char opcode; + unsigned char opcodesize; + + previous_ip = 0; + + while (size) { + if (!get_next_opcode(&data, &size, &opcode, &opcodesize)) return 0; + + switch (opcode) { + case ppt_tip_pge: + if (decode_ip(data) == expected_ip) return 1; + else return 0; + case ppt_fup: + case ppt_tip: + case ppt_tnt_8: + case ppt_tnt_64: + case ppt_tip_pgd: + case ppt_invalid: + return 0; + default: + break; + } + + size -= opcodesize; + data += opcodesize; + } + + return 0; +} + // fast decoder that decodes only tip (and related packets) // and skips over the reset -void decode_trace_tip_fast(unsigned char *data, size_t size, u8 *trace_bits, int coverage_kind) { +void decode_trace_tip_fast(unsigned char *data, size_t size, int coverage_kind) { uint64_t next_ip; unsigned char opcode; @@ -701,7 +1026,7 @@ void decode_trace_tip_fast(unsigned char *data, size_t size, u8 *trace_bits, int if (opcode == ppt_tip) { // printf("ip: %p\n", (void*)next_ip); - update_coverage_map(next_ip, trace_bits, coverage_kind); + update_coverage_map(next_ip, coverage_kind); } size -= opcodesize; @@ -709,39 +1034,8 @@ void decode_trace_tip_fast(unsigned char *data, size_t size, u8 *trace_bits, int } } -int check_trace_start(unsigned char *data, size_t size, uint64_t expected_ip) { - unsigned char opcode; - unsigned char opcodesize; - - previous_ip = 0; - - while (size) { - if (!get_next_opcode(&data, &size, &opcode, &opcodesize)) return 0; - - switch (opcode) { - case ppt_tip_pge: - if (decode_ip(data) == expected_ip) return 1; - else return 0; - case ppt_fup: - case ppt_tip: - case ppt_tnt_8: - case ppt_tnt_64: - case ppt_tip_pgd: - case ppt_invalid: - return 0; - default: - break; - } - - size -= opcodesize; - data += opcodesize; - } - - return 0; -} - // process a sinle IPT packet and update AFL map -inline static void process_packet(struct pt_packet *packet, u8 *trace_bits, int coverage_kind) { +inline static void process_packet(struct pt_packet *packet, int coverage_kind) { // printf("packet type: %d\n", packet->type); if ((packet->type != ppt_tip) && (packet->type != ppt_tip_pge) && (packet->type != ppt_tip_pgd) && (packet->type != ppt_fup)) { @@ -773,12 +1067,14 @@ inline static void process_packet(struct pt_packet *packet, u8 *trace_bits, int if (packet->type == ppt_tip) { // printf("ip: %p\n", (void*)next_ip); - update_coverage_map(next_ip, trace_bits, coverage_kind); + update_coverage_map(next_ip, coverage_kind); } } -// analyze collected PT trace -void decode_trace_tip_reference(unsigned char *trace_data, size_t trace_size, u8 *trace_bits, int coverage_kind) { +// decodes only TIP packets using the reference implementation +void decode_trace_tip_reference(unsigned char *trace_data, size_t trace_size, + int coverage_kind) +{ // printf("analyzing trace\n"); struct pt_packet_decoder *decoder; @@ -812,9 +1108,509 @@ void decode_trace_tip_reference(unsigned char *trace_data, size_t trace_size, u8 break; } - process_packet(&packet, trace_bits, coverage_kind); + process_packet(&packet, coverage_kind); } } pt_pkt_free_decoder(decoder); } + + +// looks up if we already have the tracelet in cache and if so update +// the state and coverage from the cache entry +inline static bool process_tracelet_from_cache(uint64_t hash, + decoder_state *state, unsigned char *tracelet, + size_t tracelet_size, struct pt_retstack *retstack, + int coverage_kind) +{ + + tracelet_cache_node *cache_node = cache_find_node(hash, state, tracelet, tracelet_size, retstack); + + if (!cache_node) return false; + + // mark the node as least recently used + cache_node_touch(cache_node); + + // update state from cache + *state = cache_node->state_next; + + // update stack if needed + if (cache_node->stack_removed || cache_node->stack_added) { + uint8_t top, bottom; + top = retstack->top; + bottom = retstack->bottom; + + for (uint32_t i = 0; i < cache_node->stack_removed; i++) { + top = (!top ? pt_retstack_size : top - 1); + } + + for (uint32_t i = 0; i < cache_node->stack_added; i++) { + retstack->stack[top] = cache_node->stack_next[i]; + top = (top == pt_retstack_size ? 0 : top + 1); + if (bottom == top) bottom = (bottom == pt_retstack_size ? 0 : bottom + 1); + } + + retstack->top = top; + retstack->bottom = bottom; + } + + // update trace_bits + switch (coverage_kind) { + case COVERAGE_BB: + for (uint32_t i = 0; i < cache_node->map_update_size; i++) { + trace_bits[cache_node->map_offsets[i]] += cache_node->map_updates[i]; + } + break; + case COVERAGE_EDGE: + if (cache_node->map_update_size) { + trace_bits[(cache_node->map_offsets[0] ^ previous_offset) % MAP_SIZE]++; + for (uint32_t i = 1; i < cache_node->map_update_size - 1; i++) { + trace_bits[cache_node->map_offsets[i]] += cache_node->map_updates[i]; + } + previous_offset = cache_node->map_offsets[cache_node->map_update_size - 1]; + } + break; + } + + return true; +} + +// processes a tracelet using the reference decoder +inline static int process_tracelet_reference(struct pt_block_decoder *decoder, + uint8_t *tracelet_end, decoder_state *state_before, decoder_state *state_after, + struct pt_retstack *retstack_before, int *stack_added, int *stack_removed, + struct coverage_cache_t *coverage_cache, int coverage_kind, + bool first_tracelet, bool track_stack, bool *skip_next) +{ + + int stack_last; + int status; + + struct pt_event event; + struct pt_block block; + + decoder->query.config.end = tracelet_end; + status = pt_blk_sync_set(decoder, 0); + + if (status < 0) return status; + + // restore state + if (!first_tracelet) { + decoder->query.ip.ip = state_before->query_ip; + decoder->query.ip.have_ip = 1; + decoder->query.ip.suppressed = 0; + decoder->enabled = 1; + decoder->mode = state_before->mode; + decoder->ip = state_before->block_ip; + decoder->retstack = *retstack_before; + } + + stack_last = retstack_before->top; + + *stack_added = 0; + *stack_removed = 0; + + tracelet_coverage_clear(coverage_cache, coverage_kind); + + for (;;) { + // we aren't really interested in events + // but have to empty the event queue + while (status & pts_event_pending) { + status = pt_blk_event(decoder, &event, sizeof(event)); + if (status < 0) + break; + + // printf("event %d\n", event.type); + } + + if (status < 0) { + break; + } + + status = pt_blk_next(decoder, &block, sizeof(block)); + + if (track_stack) { + if (decoder->retstack.top != stack_last) { + if ((decoder->retstack.top == stack_last - 1) || + (decoder->retstack.top == 64) && (stack_last == 0)) { + *stack_added -= 1; + if (*stack_added < *stack_removed) *stack_removed = *stack_added; + } + else if ((decoder->retstack.top == stack_last + 1) || + (decoder->retstack.top == 0) && (stack_last == 64)) { + *stack_added += 1; + } + else { + FATAL("Error: unexpected stack change"); + } + stack_last = decoder->retstack.top; + } + } + + if (status < 0) { + // printf("status: %d\n", status); + break; + } + + if (!*skip_next) { + *skip_next = false; + update_coverage_cache(coverage_cache, block.ip, coverage_kind); + // printf("ip: %p, %d %d\n", (void *)block.ip, status, block.iclass); + } + + // Sometimes, due to asynchronous events and other reasons (?) + // the tracing of a basic block will break in the middle of it + // and the subsequent basic block will continue where the previous + // one was broken, resulting in new coverage detected where there + // was none. + // Currently, this is resolved by examining the instruction class of + // the last instruction in the basic block. If it is not one of the + // instructions that normally terminate a basic block, we will simply + // ignore the subsequent block. + // Another way to do this could be to compute the address of the next + // instruction after the basic block, and only ignore a subsequent block + // if it starts on that address + if (block.iclass == ptic_other) *skip_next = true; + else *skip_next = false; + } + + state_after->query_ip = decoder->query.ip.ip; + state_after->mode = decoder->mode; + state_after->block_ip = block.ip; + + switch (coverage_kind) { + case COVERAGE_BB: + for (uint32_t i = 0; i < coverage_cache->size; i++) { + trace_bits[coverage_cache->map_offsets[i]] += coverage_cache->counters[i]; + } + break; + case COVERAGE_EDGE: + if (coverage_cache->size) { + trace_bits[(coverage_cache->map_offsets[0] ^ previous_offset) % MAP_SIZE]++; + for (uint32_t i = 1; i < coverage_cache->size - 1; i++) { + trace_bits[coverage_cache->map_offsets[i]] += coverage_cache->counters[i]; + } + previous_offset = coverage_cache->map_offsets[coverage_cache->size - 1]; + } + break; + } + + return status; +} + + +// constructs the cache node from the decoder state, tracelet etc +// and adds it to the cache +static inline void add_cache_node( + uint8_t *tracelet, size_t tracelet_size, uint64_t hash, + decoder_state *state_before, decoder_state *state_after, + struct pt_retstack *retstack_before, struct pt_retstack *retstack_after, + int stack_added, int stack_removed, + struct coverage_cache_t *coverage_cache) +{ + stack_removed = -stack_removed; + stack_added += stack_removed; + + if (stack_removed > (pt_retstack_size + 1)) stack_removed = (pt_retstack_size + 1); + + if (stack_added < 0) stack_added = 0; + if (stack_added >(pt_retstack_size + 1)) stack_added = (pt_retstack_size + 1); + + size_t node_size = sizeof(tracelet_cache_node) + + stack_removed * sizeof(uint64_t) + stack_added * sizeof(uint64_t) + + coverage_cache->size * sizeof(uint32_t) + coverage_cache->size * sizeof(uint8_t) + + tracelet_size; + + tracelet_cache_node *cache_node = (tracelet_cache_node *)malloc(node_size); + + cache_node->size = node_size; + cache_node->hash = hash; + + uint8_t* ptr = (uint8_t*)cache_node + sizeof(tracelet_cache_node); + cache_node->stack_prev = (uint64_t *)ptr; + cache_node->stack_removed = stack_removed; + ptr += stack_removed * sizeof(uint64_t); + cache_node->stack_next = (uint64_t *)ptr; + cache_node->stack_added = stack_added; + ptr += stack_added * sizeof(uint64_t); + cache_node->map_offsets = (uint32_t *)ptr; + ptr += coverage_cache->size * sizeof(uint32_t); + cache_node->map_updates = ptr; + cache_node->map_update_size = coverage_cache->size; + ptr += coverage_cache->size * sizeof(uint8_t); + cache_node->tracelet = ptr; + cache_node->tracelet_size = (uint32_t)tracelet_size; + + uint8_t top; + top = retstack_before->top; + for (int i = 0; i < stack_removed; i++) { + top = (!top ? pt_retstack_size : top - 1); + cache_node->stack_prev[i] = retstack_before->stack[top]; + } + + top = retstack_after->top; + for (int i = 0; i < stack_added; i++) { + top = (!top ? pt_retstack_size : top - 1); + cache_node->stack_next[stack_added - i - 1] = retstack_after->stack[top]; + } + + memcpy(cache_node->map_offsets, coverage_cache->map_offsets, coverage_cache->size * sizeof(uint32_t)); + memcpy(cache_node->map_updates, coverage_cache->counters, coverage_cache->size * sizeof(uint8_t)); + + memcpy(cache_node->tracelet, tracelet, tracelet_size); + + cache_node->state_prev = *state_before; + cache_node->state_next = *state_after; + + cache_node_add(cache_node); +} + +// uses a faster basic block decoder to decode the full trace +// tl;dr the faster decoder is essentially a caching layer on top of the +// reference decoder +// needs to have access to executable memory of the process that generated +// the trace (passed through pt_image) +void analyze_trace_full_fast(unsigned char *trace_data, size_t trace_size, + int coverage_kind, struct pt_image *image, bool skip_first_bb) +{ + // some stats + int num_tracelets=0, num_cache_hits=0; + + size_t tracelet_buffer_size = trace_size + sizeof(psb_and_psbend); + unsigned char *tracelet_buffer = malloc(tracelet_buffer_size); + size_t tracelet_size; + + memcpy(tracelet_buffer, psb_and_psbend, sizeof(psb_and_psbend)); + unsigned char *buffer_after_psb = tracelet_buffer + sizeof(psb_and_psbend); + unsigned char *tracelet_start; + + decoder_state state, state_next; + struct pt_retstack retstack; + retstack.top = 0; + retstack.bottom = 0; + + uint64_t hash; + + int stack_removed; + int stack_added; + + struct pt_block_decoder *decoder; + struct pt_config config; + + bool skip_next = skip_first_bb; + bool first_tracelet = true; + bool use_cache = false; + + previous_offset = 0; + previous_ip = 0; + current_range = &(coverage_ip_ranges[0]); + + struct coverage_cache_t *coverage_cache = + (struct coverage_cache_t *)malloc(sizeof(struct coverage_cache_t)); + tracelet_coverage_init(coverage_cache); + + pt_config_init(&config); + pt_cpu_read(&config.cpu); + pt_cpu_errata(&config.errata, &config.cpu); + config.begin = tracelet_buffer; + config.end = tracelet_buffer + tracelet_buffer_size; + + // This is important not only for accurate coverage, but also because + // if we don't set it, the decoder is sometimes going to break + // blocks on these instructions anyway, resulting in new coverage being + // detected where there in fact was none. + // See also skip_next comment below + config.flags.variant.block.end_on_call = 1; + config.flags.variant.block.end_on_jump = 1; + + decoder = pt_blk_alloc_decoder(&config); + if (!decoder) { + FATAL("Error allocating decoder\n"); + } + + int ret = pt_blk_set_image(decoder, image); + + int status; + + if (!findpsb(&trace_data, &trace_size)) { + FATAL("No sync packets in trace\n"); + return; + } + + for (;;) { + tracelet_start = buffer_after_psb; + tracelet_size = 0; + + int ret = get_next_tracelet(&trace_data, &trace_size, &tracelet_start, &tracelet_size); + + if (!tracelet_size) break; + + if (ret == ppt_invalid) { + if (!findpsb(&trace_data, &trace_size)) { + break; + } + first_tracelet = true; + skip_next = true; + continue; + } + else if (ret == ppt_psb) { + // don't use cache for tracelets containing psb + // psbs are going to mess up our stack tracking + use_cache = false; + } + else { + use_cache = true; + } + + if (skip_next) { + use_cache = false; + } + + num_tracelets++; + + // printf("tracelet size: %llu\n", tracelet_size); + + hash = djb2(buffer_after_psb, tracelet_size); + + // printf("hash: %llx\n", hash); + + if (use_cache && + process_tracelet_from_cache(hash, &state, buffer_after_psb, + tracelet_size, &retstack, coverage_kind)) + { + num_cache_hits++; + continue; + } + + status = process_tracelet_reference(decoder, tracelet_start, + &state, &state_next, &retstack, &stack_added, &stack_removed, + coverage_cache, coverage_kind, first_tracelet, use_cache, + &skip_next); + + first_tracelet = false; + + if ((status < 0) && (status != -pte_eos)) { + if (!findpsb(&trace_data, &trace_size)) { + printf("cant't sync\n"); + break; + } + skip_next = true; + continue; + } + + if (use_cache && !skip_next) { + // create a new cache node and add it to the cache + add_cache_node(buffer_after_psb, tracelet_size, hash, + &state, &state_next, &retstack, &decoder->retstack, + stack_added, stack_removed, coverage_cache); + } + + // switch state + state = state_next; + retstack = decoder->retstack; + } + + free(coverage_cache); + free(tracelet_buffer); + + pt_blk_free_decoder(decoder); + + // printf("Cache hits: %d/%d (%g%%)\n", num_cache_hits, num_tracelets, + // ((float)num_cache_hits / num_tracelets) * 100); + // printf("tracelet cache num entries: %llu, size: %llu\n", + // tracelet_cache.num_entries, tracelet_cache.size); +} + +// uses Intel's reference basic block decoder to decode the full trace +// needs to have access to executable memory of the process that generated +// the trace (passed through pt_image) +void analyze_trace_full_reference(unsigned char *trace_data, size_t trace_size, + int coverage_kind, struct pt_image *image, bool skip_first_bb) { + + struct pt_block_decoder *decoder; + struct pt_config config; + struct pt_event event; + struct pt_block block; + + bool skip_next = skip_first_bb; + + previous_offset = 0; + previous_ip = 0; + current_range = &(coverage_ip_ranges[0]); + + pt_config_init(&config); + pt_cpu_read(&config.cpu); + pt_cpu_errata(&config.errata, &config.cpu); + config.begin = trace_data; + config.end = trace_data + trace_size; + + // This is important not only for accurate coverage, but also because + // if we don't set it, the decoder is sometimes going to break + // blocks on these instructions anyway, resulting in new coverage being + // detected where there in fact was none. + // See also skip_next comment below + config.flags.variant.block.end_on_call = 1; + config.flags.variant.block.end_on_jump = 1; + + decoder = pt_blk_alloc_decoder(&config); + if (!decoder) { + FATAL("Error allocating decoder\n"); + } + + int ret = pt_blk_set_image(decoder, image); + + int status; + + for (;;) { + status = pt_blk_sync_forward(decoder); + if (status < 0) { + // printf("cant't sync\n"); + break; + } + + for (;;) { + + // we aren't really interested in events + // but have to empty the event queue + while (status & pts_event_pending) { + status = pt_blk_event(decoder, &event, sizeof(event)); + if (status < 0) + break; + + // printf("event %d\n", event.type); + } + + if (status < 0) + break; + + status = pt_blk_next(decoder, &block, sizeof(block)); + + if (status < 0) { + break; + } + + if (!skip_next) { + skip_next = false; + update_coverage_map(block.ip, coverage_kind); + // printf("ip: %p, %d %d\n", (void *)block.ip, status, block.iclass); + } + + // Sometimes, due to asynchronous events and other reasons (?) + // the tracing of a basic block will break in the middle of it + // and the subsequent basic block will continue where the previous + // one was broken, resulting in new coverage detected where there + // was none. + // Currently, this is resolved by examining the instruction class of + // the last instruction in the basic block. If it is not one of the + // instructions that normally terminate a basic block, we will simply + // ignore the subsequent block. + // Another way to do this could be to compute the address of the next + // instruction after the basic block, and only ignore a subsequent block + // if it starts on that address + if (block.iclass == ptic_other) skip_next = true; + else skip_next = false; + } + } + + pt_blk_free_decoder(decoder); +} diff --git a/ptdecode.h b/ptdecode.h index 927552f3..68917e0a 100644 --- a/ptdecode.h +++ b/ptdecode.h @@ -1,10 +1,22 @@ -typedef struct _module_info_t { - char module_name[MAX_PATH]; - int isid; - void *base; - size_t size; - struct _module_info_t *next; -} module_info_t; +/* + WinAFL - Intel PT decoding + ------------------------------------------------ + + Written and maintained by Ivan Fratric + + Copyright 2016 Google Inc. All Rights Reserved. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ typedef struct _address_range { uint64_t start; @@ -14,6 +26,9 @@ typedef struct _address_range { int check_trace_start(unsigned char *data, size_t size, uint64_t expected_ip); -void analyze_trace_buffer_full(unsigned char *trace_data, size_t trace_size, u8 *trace_bits, int coverage_kind, module_info_t* modules, struct pt_image_section_cache *section_cache); -void decode_trace_tip_fast(unsigned char *data, size_t size, u8 *trace_bits, int coverage_kind); -void decode_trace_tip_reference(unsigned char *trace_data, size_t trace_size, u8 *trace_bits, int coverage_kind); \ No newline at end of file +void tracelet_cache_init(size_t max_entries, size_t max_size); + +void analyze_trace_full_reference(unsigned char *trace_data, size_t trace_size, int coverage_kind, struct pt_image *image, bool skip_first_bb); +void analyze_trace_full_fast(unsigned char *trace_data, size_t trace_size, int coverage_kind, struct pt_image *image, bool skip_first_bb); +void decode_trace_tip_fast(unsigned char *data, size_t size, int coverage_kind); +void decode_trace_tip_reference(unsigned char *trace_data, size_t trace_size, int coverage_kind); \ No newline at end of file diff --git a/readme_pt.md b/readme_pt.md index 3ad30d84..389fe303 100644 --- a/readme_pt.md +++ b/readme_pt.md @@ -1,7 +1,5 @@ # WinAFL Intel PT mode -WinAFL has an Intel PT mode which, at this time, is still considered experimental - ## How it works Intel PT (Processor Tracing) is a feature on modern Intel CPUs that allows tracing code executed by the CPU. If the trace collection is enabled, the CPU generates a highly compressed trace of the instructions executed. This trace can be retrieved and decoded in software later. @@ -12,7 +10,7 @@ When a target is fuzzed with WinAFL in Intel PT mode, WinAFL opens the target in ## Building and using -To build WinAFL with Intel PT support add `-DINTELPT=1` to the build options. +To build WinAFL with Intel PT support `-DINTELPT=1` must be added to the build options. To use the Intel PT mode set the -P flag (without any arguments) instead of -D flag (for DynamoRIO) when calling afl-fuzz.exe. Intel PT tracing mode understands the same instrumentation flags as the DynamoRIO mode, as well as several others: @@ -22,22 +20,24 @@ To use the Intel PT mode set the -P flag (without any arguments) instead of -D f - `-nopersistent_trace` By default, due to large performance hit associated, WinAFL will not restart tracing for each iteration. If this optimization ever causes problems, it can be turned off via this flag. Mostly here for debugging reasons. -## Remarks + - `-trace_cache_size ` The size (in bytes) of trace cache. Used only in combination with the `full` decorer. + +The following trace decoders are available: + + - `full_ref` Uses Intel's reference implementation to fully decode the trace. Note that full trace decoding introduces a significant overhead. Full trace decoding requires information about the code being executed. WinAFL accomplishes this by saving the memory from all executable modules in the process once they are loaded. However, if the instruction pointer ever ends up outside of an executable module (e.g. due to target using some kind of JIT), the decoding is going to fail and the trace will be decoded only partially. Additinally, if the target modifies executable modules on the fly, the result of the decoding is going to be unpredictable. + + - `full` (default) A custom decoder that adds a trace caching layer on top of Intel's reference decoder. Like the `full_ref` decoder it fully decodes all basic blocks in the trace (also provided that code is't generated / modified dynamically), but is significantly faster. + + - `tip_ref` Uses Intel's reference decoder implementation and decodes only the packets that contain the raw IP address (emitted for e.g. indirect jumps and calls, sometimes returns) but don't decode other packets, e.g. containing info about indirect jumps. This option does not require having any information on the code being executed and is much faster than full decoding. This is similar to how Intel PT is used in [Honggfuzz](https://github.com/google/honggfuzz). + + - `tip` A faster custom implementation of the `tip_ref` decoder. It should behave the same as `tip_ref` - - Intel PT support is still consider experimental. Please report any bugs you encounter. +## Limitations and other remarks - A relatively recent Intel CPU with the Processor Tracing feature is needed for this mode and Windows 10 v1809 is needed to be able to interact with it. Running WinAFL inside a VM won't work unless the VM software explicitly supports Intel PT. - The CPU writes trace information into a ring buffer. If the space in the ring buffer is not sufficient to store the full trace of the iteration execution, the buffer will wrap around and only the last `trace_size` bytes (or a little less, depending on the synchronization packets) will be available for processing. You should set the `trace_size` flags to be able to contain the full trace for a sample that exhibits full target behavior. The default `trace_size` should be sufficient for most targets, however reducing it might increase performance for small targets and you might want to increase it if you get trace buffer overflow warnings. - - There are several options for decoding the trace, each with its advantages and drawbacks: - - - `full` (default) Uses Intel's reference implementation to fully decode the trace. Note that full trace decoding introduces a significant overhead. Full trace decoding requires information about the code being executed. WinAFL accomplishes this by saving the memory from all executable modules in the process once they are loaded. However, if the instruction pointer ever ends up outside of an executable module (e.g. due to target using some kind of JIT), the decoding is going to fail and the trace will be decoded only partially. Additinally, if the target modifies executable modules on the fly, the result of the decoding is going to be unpredictable. - - - `tip_ref` Uses Intel's reference decoder implementation and decodes only the packets that contain the raw IP address (emitted for e.g. indirect jumps and calls, sometimes returns) but don't decode other packets, e.g. containing info about indirect jumps. This option does not require having any information on the code being executed and is much faster than full decoding. This is similar to how Intel PT is used in [Honggfuzz](https://github.com/google/honggfuzz). - - - `tip` A faster custom implementation of the `tip_ref` decoder. It should behave the same as `tip_ref` - - Currently, WinAFL will only record the trace from a thread that executes the target function. In most cases this is desirable, but not always. Currently, Intel PT driver does collect information from all threads and the debugger gets information about threads being created and threads exiting. However, when the debugger gets the EXIT_THREAD_DEBUG_EVENT, it is too late and the trace information for this thread is already lost. WinAFL could read out the trace while the thread is still running, however there would be a gap between the last time the trace was read out and the time the thread exited. This would result in a non-deterministic trace with a part of it cut off and, likely, not recording trace for very short threads. Thus, to address this problem deterministically, a better way of tracking thread exits is needed. ## Examples diff --git a/winaflpt.c b/winaflpt.c index 2a23309c..72e60c9f 100644 --- a/winaflpt.c +++ b/winaflpt.c @@ -1,21 +1,21 @@ /* -WinAFL - Intel PT instrumentation and presistence via debugger code ------------------------------------------------- + WinAFL - Intel PT instrumentation and presistence via debugger code + ------------------------------------------------ -Written and maintained by Ivan Fratric + Written and maintained by Ivan Fratric -Copyright 2016 Google Inc. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at + Copyright 2016 Google Inc. All Rights Reserved. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ #define _CRT_SECURE_NO_WARNINGS @@ -40,11 +40,14 @@ limitations under the License. #include "ptdecode.h" +// tests the custom decoders gainst the corresponding +// reference implementatopns from Intel +// used only for debugging +// #define DECODER_CORRECTNESS_TEST + u64 get_cur_time(void); char *argv_to_cmd(char** argv); -#define TRACE_BUFFER_SIZE_DEFAULT 131072 //should be a power of 2 - #define CALLCONV_MICROSOFT_X64 0 #define CALLCONV_THISCALL 1 #define CALLCONV_FASTCALL 2 @@ -123,6 +126,14 @@ enum { /* 05 */ FAULT_NOBITS }; +typedef struct _module_info_t { + char module_name[MAX_PATH]; + int isid; + void *base; + size_t size; + struct _module_info_t *next; +} module_info_t; + static module_info_t *all_modules = NULL; typedef struct _winafl_option_t { @@ -138,6 +149,7 @@ typedef struct _winafl_option_t { int decoder; bool thread_coverage; unsigned long trace_buffer_size; + unsigned long trace_cache_size; bool persistent_trace; void **func_args; @@ -174,8 +186,9 @@ winaflpt_options_init(int argc, const char *argv[]) options.num_fuz_args = 0; options.thread_coverage = true; options.callconv = CALLCONV_DEFAULT; - options.decoder = DECODER_FULL_REFERENCE; + options.decoder = DECODER_FULL_FAST; options.trace_buffer_size = TRACE_BUFFER_SIZE_DEFAULT; + options.trace_cache_size = 0; options.persistent_trace = true; for (i = 0; i < argc; i++) { @@ -227,6 +240,10 @@ winaflpt_options_init(int argc, const char *argv[]) USAGE_CHECK((i + 1) < argc, "missing trace size"); options.trace_buffer_size = strtoul(argv[++i], NULL, 0); } + else if (strcmp(token, "-trace_cache_size") == 0) { + USAGE_CHECK((i + 1) < argc, "missing trace cache size"); + options.trace_cache_size = strtoul(argv[++i], NULL, 0); + } else if (strcmp(token, "-call_convention") == 0) { USAGE_CHECK((i + 1) < argc, "missing calling convention"); ++i; @@ -248,6 +265,8 @@ winaflpt_options_init(int argc, const char *argv[]) else if (strcmp(argv[i], "tip_ref") == 0) options.decoder = DECODER_TIP_REFERENCE; else if (strcmp(argv[i], "full") == 0) + options.decoder = DECODER_FULL_FAST; + else if (strcmp(argv[i], "full_ref") == 0) options.decoder = DECODER_FULL_REFERENCE; else FATAL("Unknown decoder value"); @@ -349,15 +368,19 @@ void append_trace_data(unsigned char *trace_data, size_t append_size) { trace_size += append_size; } -unsigned char psb_and_psbend[] = { - 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, - 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, - 0x02, 0x23 -}; -void collect_thread_trace(PIPT_TRACE_HEADER traceHeader) { +// returns true if the ring buffer was overflowed +bool collect_thread_trace(PIPT_TRACE_HEADER traceHeader) { // printf("ring offset: %u\n", traceHeader->RingBufferOffset); + bool trace_buffer_overflow = false; + + unsigned char psb_and_psbend[] = { + 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, + 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, + 0x02, 0x23 + }; + trace_size = 0; if (options.persistent_trace) { @@ -382,6 +405,7 @@ void collect_thread_trace(PIPT_TRACE_HEADER traceHeader) { // most likely the ring buffer overflowd, extract what we can (trace tail) trace_size = 0; + trace_buffer_overflow = true; printf("Warning: Trace buffer overflowed, trace will be truncated\n"); if (options.debug_mode) fprintf(debug_log, "Trace buffer overflowed, trace will be truncated\n"); @@ -403,6 +427,7 @@ void collect_thread_trace(PIPT_TRACE_HEADER traceHeader) { char *trailing_data = traceHeader->Trace + traceHeader->RingBufferOffset; size_t trailing_size = traceHeader->TraceSize - traceHeader->RingBufferOffset; if (findpsb(&trailing_data, &trailing_size)) { + trace_buffer_overflow = true; printf("Warning: Trace buffer overflowed, trace will be truncated\n"); if (options.debug_mode) fprintf(debug_log, "Trace buffer overflowed, trace will be truncated\n"); append_trace_data(trailing_data, trailing_size); @@ -410,11 +435,16 @@ void collect_thread_trace(PIPT_TRACE_HEADER traceHeader) { append_trace_data(traceHeader->Trace, traceHeader->RingBufferOffset); } + + return trace_buffer_overflow; } // parse PIPT_TRACE_DATA, extract trace bits and add them to the trace_buffer -int collect_trace(PIPT_TRACE_DATA pTraceData) +// returns true if the trace ring buffer overflowed +bool collect_trace(PIPT_TRACE_DATA pTraceData) { + bool trace_buffer_overflow = false; + PIPT_TRACE_HEADER traceHeader; DWORD dwTraceSize; @@ -424,7 +454,7 @@ int collect_trace(PIPT_TRACE_DATA pTraceData) while (dwTraceSize > (unsigned)(FIELD_OFFSET(IPT_TRACE_HEADER, Trace))) { if (traceHeader->ThreadId == fuzz_thread_id) { - collect_thread_trace(traceHeader); + trace_buffer_overflow = collect_thread_trace(traceHeader); } dwTraceSize -= (FIELD_OFFSET(IPT_TRACE_HEADER, Trace) + traceHeader->TraceSize); @@ -433,7 +463,7 @@ int collect_trace(PIPT_TRACE_DATA pTraceData) traceHeader->TraceSize); } - return 0; + return trace_buffer_overflow; } // returns an array of handles for all modules loaded in the target process @@ -1421,12 +1451,12 @@ int run_target_pt(char **argv, uint32_t timeout) { // printf("iteration end\n"); // collect trace + bool trace_buffer_overflowed = false; PIPT_TRACE_DATA trace_data = GetIptTrace(child_handle); if (!trace_data) { printf("Error getting ipt trace\n"); - } - else { - collect_trace(trace_data); + } else { + trace_buffer_overflowed = collect_trace(trace_data); HeapFree(GetProcessHeap(), 0, trace_data); } @@ -1446,14 +1476,52 @@ int run_target_pt(char **argv, uint32_t timeout) { // printf("decoding trace of %llu bytes\n", trace_size); + struct pt_image *image = NULL; + if ((options.decoder == DECODER_FULL_FAST) || (options.decoder == DECODER_FULL_REFERENCE)) { + image = pt_image_alloc("winafl_image"); + module_info_t *cur_module = all_modules; + while (cur_module) { + if (cur_module->isid > 0) { + int ret = pt_image_add_cached(image, section_cache, cur_module->isid, NULL); + } + cur_module = cur_module->next; + } + } + if (options.decoder == DECODER_TIP_FAST) { - decode_trace_tip_fast(trace_buffer, trace_size, trace_bits, options.coverage_kind); + decode_trace_tip_fast(trace_buffer, trace_size, options.coverage_kind); +#ifdef DECODER_CORRECTNESS_TEST + printf("Testing decoder correctness\n"); + unsigned char *fast_trace_bits = (unsigned char *)malloc(MAP_SIZE); + memcpy(fast_trace_bits, trace_bits, MAP_SIZE); + memset(trace_bits, 0, MAP_SIZE); + decode_trace_tip_reference(trace_buffer, trace_size, options.coverage_kind); + if (memcmp(fast_trace_bits, trace_bits, MAP_SIZE)) { + FATAL("Fast decoder returned different coverage than the reference decoder"); + } + free(fast_trace_bits); +#endif } else if (options.decoder == DECODER_TIP_REFERENCE) { - decode_trace_tip_reference(trace_buffer, trace_size, trace_bits, options.coverage_kind); + decode_trace_tip_reference(trace_buffer, trace_size, options.coverage_kind); + } else if (options.decoder == DECODER_FULL_FAST) { + analyze_trace_full_fast(trace_buffer, trace_size, options.coverage_kind, image, trace_buffer_overflowed); +#ifdef DECODER_CORRECTNESS_TEST + printf("Testing decoder correctness\n"); + unsigned char *fast_trace_bits = (unsigned char *)malloc(MAP_SIZE); + memcpy(fast_trace_bits, trace_bits, MAP_SIZE); + memset(trace_bits, 0, MAP_SIZE); + analyze_trace_full_reference(trace_buffer, trace_size, options.coverage_kind, image, trace_buffer_overflowed); + if (memcmp(fast_trace_bits, trace_bits, MAP_SIZE)) { + FATAL("Fast decoder returned different coverage than the reference decoder"); + } + free(fast_trace_bits); +#endif } else if (options.decoder == DECODER_FULL_REFERENCE) { - analyze_trace_buffer_full(trace_buffer, trace_size, trace_bits, options.coverage_kind, all_modules, section_cache); + analyze_trace_full_reference(trace_buffer, trace_size, options.coverage_kind, image, trace_buffer_overflowed); } + if(image) pt_image_free(image); + if (debugger_status == DEBUGGER_PROCESS_EXIT) { CloseHandle(child_handle); CloseHandle(child_thread_handle); @@ -1513,6 +1581,20 @@ int pt_init(int argc, char **argv, char *module_dir) { } strcpy(section_cache_dir, module_dir); + if (options.decoder == DECODER_FULL_FAST) { + if (!options.trace_cache_size) { + // simple heuristics for determining tracelet cache size + // within reasonable bounds + options.trace_cache_size = options.trace_buffer_size * 10; + if (options.trace_cache_size < TRACE_CACHE_SIZE_MIN) + options.trace_cache_size = TRACE_CACHE_SIZE_MIN; + if (options.trace_cache_size > TRACE_CACHE_SIZE_MAX) + options.trace_cache_size = TRACE_CACHE_SIZE_MAX; + + } + tracelet_cache_init(options.trace_cache_size / 100, options.trace_cache_size); + } + return lastoption; } diff --git a/winaflpt.h b/winaflpt.h index aef09226..31a650b8 100644 --- a/winaflpt.h +++ b/winaflpt.h @@ -1,6 +1,31 @@ +/* + WinAFL - Intel PT instrumentation and presistence via debugger code + ------------------------------------------------ + + Written and maintained by Ivan Fratric + + Copyright 2016 Google Inc. All Rights Reserved. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + #define COVERAGE_BB 0 #define COVERAGE_EDGE 1 +#define TRACE_BUFFER_SIZE_DEFAULT (128*1024) //should be a power of 2 + +#define TRACE_CACHE_SIZE_MIN 10000000 +#define TRACE_CACHE_SIZE_MAX 100000000 + bool findpsb(unsigned char **data, size_t *size); int run_target_pt(char **argv, uint32_t timeout);