Skip to content

Commit

Permalink
Add /xr regex byte search.
Browse files Browse the repository at this point in the history
  • Loading branch information
Rot127 committed Feb 4, 2025
1 parent 0d5cb34 commit 49edd19
Show file tree
Hide file tree
Showing 11 changed files with 276 additions and 33 deletions.
78 changes: 77 additions & 1 deletion librz/core/cmd/cmd_search.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include "cmd_search_rop.c"
#include "rz_cons.h"
#include <rz_util/rz_str_util.h>
#include <rz_util/rz_strbuf.h>
#include <rz_util/rz_regex.h>
#include <rz_util/rz_str.h>
Expand Down Expand Up @@ -2794,7 +2795,8 @@ RZ_IPI RzCmdStatus rz_cmd_search_hex_handler(RzCore *core, int argc, const char
CMD_SEARCH_BEGIN();

RzList *hits = NULL;
RzSearchBytesPattern *pattern = rz_search_parse_byte_pattern(argv[1], "bytes");
const char *arg = argv[1];
RzSearchBytesPattern *pattern = rz_search_parse_byte_pattern(arg, "bytes");

if (!pattern) {
RZ_LOG_ERROR("Failed to parse given pattern.\n");
Expand Down Expand Up @@ -2823,6 +2825,80 @@ RZ_IPI RzCmdStatus rz_cmd_search_hex_handler(RzCore *core, int argc, const char
return RZ_CMD_STATUS_ERROR;
}

static bool parse_pattern_arg(const char *arg, RZ_OUT ut8 *re, RZ_OUT size_t *len) {
*len = 0;
size_t arg_len = strlen(arg);
// Convert to real bytes.
for (size_t i = 0; i < arg_len;) {
if (arg[i] == 'x') {
if (i + 2 >= arg_len) {
RZ_LOG_ERROR("'x' in the pattern must be followed by two hexadecimal nibbles (N = [a-fA-F0-9]): xNN.\n");
return false;
}
if (!IS_HEXCHAR(arg[i + 1]) || !IS_HEXCHAR(arg[i + 2])) {
RZ_LOG_ERROR("Bytes with non-hexadecimal nibbles are not allowed. Got: 'x%c%c'.\n", arg[i + 1], arg[i + 2]);
return false;
}
ut16 byte = rz_hex_digit_pair_to_byte(arg + i + 1);
re[*len] = byte;
i += 3;
} else {
// Just copy normal character.
re[*len] = arg[i];
i++;
}
*len += 1;
}
return true;
}

// "/xr"
RZ_IPI RzCmdStatus rz_cmd_search_hex_regex_handler(RzCore *core, int argc, const char **argv, RzCmdStateOutput *state) {
ut8 *re = RZ_NEWS0(ut8, strlen(argv[1]));
RzSearchOpt *search_opts = setup_search_options(core);
if (!search_opts) {
goto error;
}

CMD_SEARCH_BEGIN();

RzList *hits = NULL;
const char *arg = argv[1];
size_t r = 0;
if (!parse_pattern_arg(arg, re, &r)) {
goto error;
}
RzSearchBytesPattern *pattern = rz_search_bytes_pattern_new(rz_new_copy(r, re), NULL, r, "bytes", true);

if (!pattern) {
RZ_LOG_ERROR("Failed to parse given pattern.\n");
goto error;
}

bool progress = rz_config_get_b(core->config, "search.show_progress");
if (!rz_search_opt_set_cancel_cb(search_opts, cmd_search_progress_cancel, progress ? state : NULL)) {
RZ_LOG_ERROR("code: Failed to setup default search options.\n");
goto error;
}
hits = rz_core_search_bytes(core, search_opts, pattern);
if (!hits) {
RZ_LOG_ERROR("Failed to perform search.\n");
goto error;
}

CMD_SEARCH_END();
free(re);
rz_search_opt_free(search_opts);
return cmd_core_handle_search_hits(core, state, hits);

error:
free(re);
rz_list_free(hits);
rz_search_opt_free(search_opts);
CMD_SEARCH_END();
return RZ_CMD_STATUS_ERROR;
}

static RzCmdStatus cmd_string_search_generic(RzCore *core, const char *string, const char *encoding, RzRegexFlags flags, RzCmdStateOutput *state) {
RzSearchOpt *search_opts = setup_search_options(core);
if (!search_opts) {
Expand Down
40 changes: 37 additions & 3 deletions librz/core/cmd_descs/cmd_descs.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ static const RzCmdDescDetail cmd_search_hash_block_details[2];
static const RzCmdDescDetail slash_v_details[2];
static const RzCmdDescDetail slash_V_details[2];
static const RzCmdDescDetail cmd_search_hex_details[2];
static const RzCmdDescDetail cmd_search_hex_regex_details[2];
static const RzCmdDescDetail slash_z_details[3];
static const RzCmdDescDetail base64_encode_details[2];
static const RzCmdDescDetail base64_decode_details[2];
Expand Down Expand Up @@ -159,6 +160,7 @@ static const RzCmdDescArg cmd_search_value_16be_args[3];
static const RzCmdDescArg cmd_search_value_32be_args[3];
static const RzCmdDescArg cmd_search_value_64be_args[3];
static const RzCmdDescArg cmd_search_hex_args[2];
static const RzCmdDescArg cmd_search_hex_regex_args[2];
static const RzCmdDescArg cmd_search_string_sensitive_args[4];
static const RzCmdDescArg remote_args[3];
static const RzCmdDescArg remote_send_args[3];
Expand Down Expand Up @@ -2338,6 +2340,9 @@ static const RzCmdDescHelp cmd_search_value_64be_help = {
.args = cmd_search_value_64be_args,
};

static const RzCmdDescHelp slash_x_help = {
.summary = "Raw hexadecimal search.",
};
static const RzCmdDescDetailEntry cmd_search_hex_Usage_space_example_detail_entries[] = {
{ .text = "Hexadecimal search for the exact bytes 'ffcc33'.", .arg_str = NULL, .comment = "/x ffcc33" },
{ .text = "Hexadecimal search for the byte pattern 'ff..33.0.'. The '.' is a wildcard for 4bits.", .arg_str = NULL, .comment = "/x ff..33.0" },
Expand All @@ -2364,6 +2369,32 @@ static const RzCmdDescHelp cmd_search_hex_help = {
.args = cmd_search_hex_args,
};

static const RzCmdDescDetailEntry cmd_search_hex_regex_Usage_space_examples_detail_entries[] = {
{ .text = " Bytes are prefixed with a 'x'. Search exact match '\\x99\\x0a'.", .arg_str = NULL, .comment = "/xr x99x0a" },
{ .text = "Search 2-8 NUL bytes, then '\\x99' and '\\x0a'", .arg_str = NULL, .comment = "/xr x00{2,8}x99x0a" },
{ .text = "A '.' matches one byte. Search matches: '\\x72\\x??\\x00'. '\\x??' can appear 0-1 times.", .arg_str = NULL, .comment = "/xr x72.?x00" },
{ .text = "Using simple ASCII is allowed. Search matches: '\\x61\\x41'", .arg_str = NULL, .comment = "/xr aA" },
{ 0 },
};
static const RzCmdDescDetail cmd_search_hex_regex_details[] = {
{ .name = "Usage examples", .entries = cmd_search_hex_regex_Usage_space_examples_detail_entries },
{ 0 },
};
static const RzCmdDescArg cmd_search_hex_regex_args[] = {
{
.name = "regex_pattern",
.type = RZ_CMD_ARG_TYPE_STRING,
.flags = RZ_CMD_ARG_FLAG_LAST,

},
{ 0 },
};
static const RzCmdDescHelp cmd_search_hex_regex_help = {
.summary = "Regex bytes search.",
.details = cmd_search_hex_regex_details,
.args = cmd_search_hex_regex_args,
};

static const RzCmdDescDetailEntry slash_z_Encodings_detail_entries[] = {
{ .text = "ascii", .arg_str = NULL, .comment = "ASCII encoding" },
{ .text = "8bit", .arg_str = NULL, .comment = "8bit encoding. Alias: ASCII" },
Expand Down Expand Up @@ -21017,9 +21048,12 @@ RZ_IPI void rzshell_cmddescs_init(RzCore *core) {
rz_warn_if_fail(cmd_search_value_64be_cd);
rz_cmd_desc_set_default_mode(cmd_search_value_64be_cd, RZ_OUTPUT_MODE_STANDARD);

RzCmdDesc *cmd_search_hex_cd = rz_cmd_desc_argv_state_new(core->rcmd, slash__cd, "/x", RZ_OUTPUT_MODE_STANDARD | RZ_OUTPUT_MODE_JSON | RZ_OUTPUT_MODE_QUIET | RZ_OUTPUT_MODE_TABLE, rz_cmd_search_hex_handler, &cmd_search_hex_help);
rz_warn_if_fail(cmd_search_hex_cd);
rz_cmd_desc_set_default_mode(cmd_search_hex_cd, RZ_OUTPUT_MODE_STANDARD);
RzCmdDesc *slash_x_cd = rz_cmd_desc_group_state_new(core->rcmd, slash__cd, "/x", RZ_OUTPUT_MODE_STANDARD | RZ_OUTPUT_MODE_JSON | RZ_OUTPUT_MODE_QUIET | RZ_OUTPUT_MODE_TABLE, rz_cmd_search_hex_handler, &cmd_search_hex_help, &slash_x_help);
rz_warn_if_fail(slash_x_cd);
rz_cmd_desc_set_default_mode(slash_x_cd, RZ_OUTPUT_MODE_STANDARD);
RzCmdDesc *cmd_search_hex_regex_cd = rz_cmd_desc_argv_state_new(core->rcmd, slash_x_cd, "/xr", RZ_OUTPUT_MODE_STANDARD | RZ_OUTPUT_MODE_JSON | RZ_OUTPUT_MODE_QUIET | RZ_OUTPUT_MODE_TABLE, rz_cmd_search_hex_regex_handler, &cmd_search_hex_regex_help);
rz_warn_if_fail(cmd_search_hex_regex_cd);
rz_cmd_desc_set_default_mode(cmd_search_hex_regex_cd, RZ_OUTPUT_MODE_STANDARD);

RzCmdDesc *slash_z_cd = rz_cmd_desc_group_state_new(core->rcmd, slash__cd, "/z", RZ_OUTPUT_MODE_STANDARD | RZ_OUTPUT_MODE_JSON | RZ_OUTPUT_MODE_QUIET | RZ_OUTPUT_MODE_TABLE, rz_cmd_search_string_sensitive_handler, &cmd_search_string_sensitive_help, &slash_z_help);
rz_warn_if_fail(slash_z_cd);
Expand Down
2 changes: 2 additions & 0 deletions librz/core/cmd_descs/cmd_descs.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,8 @@ RZ_IPI RzCmdStatus rz_cmd_search_value_32be_handler(RzCore *core, int argc, cons
RZ_IPI RzCmdStatus rz_cmd_search_value_64be_handler(RzCore *core, int argc, const char **argv, RzCmdStateOutput *state);
// "/x"
RZ_IPI RzCmdStatus rz_cmd_search_hex_handler(RzCore *core, int argc, const char **argv, RzCmdStateOutput *state);
// "/xr"
RZ_IPI RzCmdStatus rz_cmd_search_hex_regex_handler(RzCore *core, int argc, const char **argv, RzCmdStateOutput *state);
// "/z"
RZ_IPI RzCmdStatus rz_cmd_search_string_sensitive_handler(RzCore *core, int argc, const char **argv, RzCmdStateOutput *state);
// "R"
Expand Down
71 changes: 49 additions & 22 deletions librz/core/cmd_descs/cmd_search.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -690,28 +690,55 @@ commands:
comment: "/V4 512"
- name: "/x"
summary: Raw hexadecimal search.
cname: cmd_search_hex
type: RZ_CMD_DESC_TYPE_ARGV_STATE
default_mode: RZ_OUTPUT_MODE_STANDARD
modes:
- RZ_OUTPUT_MODE_STANDARD
- RZ_OUTPUT_MODE_JSON
- RZ_OUTPUT_MODE_QUIET
- RZ_OUTPUT_MODE_TABLE
args:
- name: pattern
type: RZ_CMD_ARG_TYPE_STRING
details:
- name: Usage example
entries:
- text: "Hexadecimal search for the exact bytes 'ffcc33'."
comment: "/x ffcc33"
- text: "Hexadecimal search for the byte pattern 'ff..33.0.'. The '.' is a wildcard for 4bits."
comment: "/x ff..33.0"
- text: "Hexadecimal search of the bytes with mask. Pattern: '<resulting bytes>:<mask>'"
comment: "/x ffd0:ff43"
- text: "Hexadecimal search with an odd number of nibbles."
comment: "'aabbc' is equivalent to '.aabbc'"
subcommands:
- name: "/x"
summary: Raw hexadecimal search.
cname: cmd_search_hex
type: RZ_CMD_DESC_TYPE_ARGV_STATE
default_mode: RZ_OUTPUT_MODE_STANDARD
modes:
- RZ_OUTPUT_MODE_STANDARD
- RZ_OUTPUT_MODE_JSON
- RZ_OUTPUT_MODE_QUIET
- RZ_OUTPUT_MODE_TABLE
args:
- name: pattern
type: RZ_CMD_ARG_TYPE_STRING
details:
- name: Usage example
entries:
- text: "Hexadecimal search for the exact bytes 'ffcc33'."
comment: "/x ffcc33"
- text: "Hexadecimal search for the byte pattern 'ff..33.0.'. The '.' is a wildcard for 4bits."
comment: "/x ff..33.0"
- text: "Hexadecimal search of the bytes with mask. Pattern: '<resulting bytes>:<mask>'"
comment: "/x ffd0:ff43"
- text: "Hexadecimal search with an odd number of nibbles."
comment: "'aabbc' is equivalent to '.aabbc'"
- name: "/xr"
summary: Regex bytes search.
cname: cmd_search_hex_regex
type: RZ_CMD_DESC_TYPE_ARGV_STATE
default_mode: RZ_OUTPUT_MODE_STANDARD
modes:
- RZ_OUTPUT_MODE_STANDARD
- RZ_OUTPUT_MODE_JSON
- RZ_OUTPUT_MODE_QUIET
- RZ_OUTPUT_MODE_TABLE
args:
- name: regex_pattern
type: RZ_CMD_ARG_TYPE_STRING
details:
- name: Usage examples
entries:
- text: " Bytes are prefixed with a 'x'. Search exact match '\\x99\\x0a'."
comment: "/xr x99x0a"
- text: "Search 2-8 NUL bytes, then '\\x99' and '\\x0a'"
comment: "/xr x00{2,8}x99x0a"
- text: "A '.' matches one byte. Search matches: '\\x72\\x??\\x00'. '\\x??' can appear 0-1 times."
comment: "/xr x72.?x00"
- text: "Using simple ASCII is allowed. Search matches: '\\x61\\x41'"
comment: "/xr aA"
- name: "/z"
summary: String search.
details:
Expand Down
2 changes: 1 addition & 1 deletion librz/include/rz_search.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ RZ_API RZ_OWN char *rz_search_hit_flag_name(RZ_NONNULL const RzSearchHit *hit, s

RZ_API void rz_search_bytes_pattern_free(RZ_NULLABLE RZ_OWN RzSearchBytesPattern *hp);
RZ_API RZ_OWN RzSearchBytesPattern *rz_search_bytes_pattern_copy(RZ_NONNULL RZ_BORROW RzSearchBytesPattern *hp);
RZ_API RZ_OWN RzSearchBytesPattern *rz_search_bytes_pattern_new(RZ_OWN ut8 *bytes, RZ_NULLABLE RZ_OWN ut8 *mask, size_t length, RZ_NULLABLE const char *pattern_desc);
RZ_API RZ_OWN RzSearchBytesPattern *rz_search_bytes_pattern_new(RZ_OWN ut8 *bytes, RZ_NULLABLE RZ_OWN ut8 *mask, size_t length, RZ_NULLABLE const char *pattern_desc, bool compile_regex);
RZ_API RZ_OWN RzSearchBytesPattern *rz_search_parse_byte_pattern(const char *byte_pattern, RZ_NULLABLE const char *pattern_desc);
RZ_API size_t rz_search_bytes_pattern_len(RZ_NONNULL const RzSearchBytesPattern *hp);
RZ_API const char *rz_search_bytes_pattern_desc(RZ_NONNULL const RzSearchBytesPattern *bp);
Expand Down
2 changes: 2 additions & 0 deletions librz/include/rz_util/rz_regex.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ typedef void RzRegexMatchData; ///< PCRE2 internal match data type

RZ_API RZ_OWN RzRegex *rz_regex_new(RZ_NONNULL const char *pattern, RzRegexFlags cflags, RzRegexFlags jflags,
RzRegexCompContext *ccontext);
RZ_API RZ_OWN RzRegex *rz_regex_new_bytes(RZ_NONNULL const ut8 *pattern, size_t pattern_len, RzRegexFlags cflags, RzRegexFlags jflags,
RzRegexCompContext *ccontext);
RZ_API void rz_regex_free(RZ_OWN RzRegex *regex);
RZ_API void rz_regex_error_msg(RzRegexStatus errcode, RZ_OUT char *errbuf, RzRegexSize errbuf_size);
RZ_API const ut8 *rz_regex_get_match_name(RZ_NONNULL const RzRegex *regex, ut32 name_idx);
Expand Down
32 changes: 28 additions & 4 deletions librz/search/bytes_search.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,23 @@
* \param mask The mask to apply to the pattern and the data before comparison. (optional)
* \param length Length of \p bytes and \p mask (if not NULL).
* \param pattern_desc An optional description string of the pattern.
* \param compile_regex If true it compiles \p bytes as regex.
* This will make the search use the regex, instead of comparing the bytes.
* The \p mask is ignored in this case.
*
* \return The initalized pattern or NULL in case of failure.
*/
RZ_API RZ_OWN RzSearchBytesPattern *rz_search_bytes_pattern_new(RZ_OWN ut8 *bytes, RZ_NULLABLE RZ_OWN ut8 *mask, size_t length, RZ_NULLABLE const char *pattern_desc) {
RZ_API RZ_OWN RzSearchBytesPattern *rz_search_bytes_pattern_new(RZ_OWN ut8 *bytes, RZ_NULLABLE RZ_OWN ut8 *mask, size_t length, RZ_NULLABLE const char *pattern_desc, bool compile_regex) {
rz_return_val_if_fail(bytes && length > 0, NULL);
RzSearchBytesPattern *pat = RZ_NEW0(RzSearchBytesPattern);
if (!pat) {
RZ_LOG_ERROR("Failed to allocate pattern struct.\n");
return NULL;
}
pat->bytes = bytes;
if (compile_regex) {
pat->regex = rz_regex_new_bytes(bytes, length, RZ_REGEX_DEFAULT, RZ_REGEX_DEFAULT, NULL);
}
pat->mask = mask;
pat->length = length;
pat->pattern_desc = pattern_desc;
Expand All @@ -40,6 +46,7 @@ RZ_API void rz_search_bytes_pattern_free(RZ_NULLABLE RZ_OWN RzSearchBytesPattern
return;
}
free(hp->bytes);
rz_regex_free(hp->regex);
free(hp->mask);
free(hp);
}
Expand All @@ -66,7 +73,7 @@ RZ_API size_t rz_search_bytes_pattern_len(RZ_NONNULL const RzSearchBytesPattern

RZ_API RZ_OWN RzSearchBytesPattern *rz_search_bytes_pattern_copy(RZ_NONNULL RZ_BORROW RzSearchBytesPattern *hp) {
rz_return_val_if_fail(hp, NULL);
return rz_search_bytes_pattern_new(rz_new_copy(hp->length, hp->bytes), rz_new_copy(hp->length, hp->mask), hp->length, hp->pattern_desc);
return rz_search_bytes_pattern_new(rz_new_copy(hp->length, hp->bytes), rz_new_copy(hp->length, hp->mask), hp->length, hp->pattern_desc, hp->regex != NULL);
}

static bool parse_custom_mask(const char *bytes_pattern, const RzRegexMatch *mask_match, const RzRegexMatch *bytes_match, ut8 *mask) {
Expand Down Expand Up @@ -159,7 +166,7 @@ RZ_API RZ_OWN RzSearchBytesPattern *rz_search_parse_byte_pattern(const char *byt
free(byte_str);

rz_pvector_free(matches);
RzSearchBytesPattern *pat = rz_search_bytes_pattern_new(bytes, use_mask ? mask : NULL, size, pattern_desc);
RzSearchBytesPattern *pat = rz_search_bytes_pattern_new(bytes, use_mask ? mask : NULL, size, pattern_desc, false);
if (!use_mask) {
free(mask);
}
Expand Down Expand Up @@ -205,6 +212,23 @@ static bool bytes_find(RzSearchFindOpt *fopts, void *user, ut64 address, const R
RzPVector /*<BytesPattern *>*/ *patterns = (RzPVector *)user;
rz_pvector_foreach (patterns, it) {
RzSearchBytesPattern *hp = (RzSearchBytesPattern *)*it;
if (hp->regex) {
RzPVector *matches = rz_regex_match_all(hp->regex, (const char *)raw_buf, size, 0, RZ_REGEX_DEFAULT);
void **it;
RzPVector *match;
rz_pvector_foreach (matches, it) {
match = *it;
RzRegexMatch *group0 = rz_pvector_at(match, 0);
RzSearchHit *hit = rz_search_hit_new(hp->pattern_desc, group0->start, group0->len);
if (!hit || !rz_th_queue_push(hits, hit, true)) {
rz_search_hit_free(hit);
rz_pvector_free(matches);
return false;
}
}
rz_pvector_free(matches);
continue;
}
for (size_t offset = 0; offset < size;) {
size_t leftovers = size - offset;
if (hp->length > leftovers) {
Expand Down Expand Up @@ -298,7 +322,7 @@ RZ_API bool rz_search_collection_bytes_add(RZ_NONNULL RzSearchCollection *col, R
return false;
}

RzSearchBytesPattern *hp = rz_search_bytes_pattern_new(rz_new_copy(length, bytes), rz_new_copy(length, mask), length, pattern_desc);
RzSearchBytesPattern *hp = rz_search_bytes_pattern_new(rz_new_copy(length, bytes), rz_new_copy(length, mask), length, pattern_desc, false);
if (!hp) {
return false;
} else if (!rz_pvector_push((RzPVector *)col->user, hp)) {
Expand Down
1 change: 1 addition & 0 deletions librz/search/search_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ struct rz_search_bytes_pattern_t {
const char *pattern_desc; ///< Pattern description string.
ut8 *bytes; ///< Pattern bytes.
ut8 *mask; ///< Pattern mask (when NULL full match)
RzRegex *regex; ///< Regex patterns of the bytes. Is optional.
size_t length; ///< Pattern & mask length
};

Expand Down
3 changes: 1 addition & 2 deletions librz/util/hex.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ RZ_API ut8 rz_hex_digit_to_byte(const char c) {
* \param The string to parse as hex digit pair.
*
* \return The byte value of the nibble pair.
* Or UT16_MAX if the nibble is no hexadecimal character.
* Or UT16_MAX if the first nibble is no hexadecimal character.
*/
RZ_API ut16 rz_hex_digit_pair_to_byte(const char *npair) {
if (!isxdigit(npair[0])) {
Expand Down Expand Up @@ -544,7 +544,6 @@ RZ_API int rz_hex_str2bin_msb(RZ_NONNULL const char *in, RZ_NONNULL RZ_OUT ut8 *
j++;
}


for (byte = rz_hex_digit_pair_to_byte(in + i); i < strlen(in) && byte <= UT8_MAX; j++, i += 2, byte = rz_hex_digit_pair_to_byte(in + i)) {
out[j] = byte;
}
Expand Down
Loading

0 comments on commit 49edd19

Please sign in to comment.