From 3f174163ff7d488975e8966f01f3ba5c9e872ead Mon Sep 17 00:00:00 2001 From: ashariyar Date: Sun, 16 Oct 2022 22:07:06 -0400 Subject: [PATCH] Changelog/version bump --- CHANGELOG.md | 6 ++++++ README.md | 4 ++++ pyproject.toml | 2 +- tests/test_yaralyze.py | 2 +- yaralyzer/bytes_match.py | 5 +++++ yaralyzer/decoding/bytes_decoder.py | 13 ++++++++----- yaralyzer/util/argument_parser.py | 6 ++++++ 7 files changed, 31 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 844ffad..8776e07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,12 @@ # NEXT RELEASE +# 0.8.0 +* Add `--log-level` option +* `BytesMatch.is_decodable()` method + + ### 0.7.1 +* Bump deps # 0.7.0 * Show hex and ascii side by side in decodes table diff --git a/README.md b/README.md index 95d172f..97fd131 100644 --- a/README.md +++ b/README.md @@ -101,4 +101,8 @@ The Yaralyzer can export visualizations to HTML, ANSI colored text, and SVG vect ![Font Scan Regex](doc/rendered_images/decoding_and_chardet_table_2.png) +# TODO +* highlight decodes done at `chardet`s behest +* deal with repetitive matches + [^1]: As I was until recently. diff --git a/pyproject.toml b/pyproject.toml index a288f8c..8222f71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "yaralyzer" -version = "0.7.1" +version = "0.8.0" description = "Visualize and force decode YARA and regex matches found in a file or byte stream. With colors. Lots of colors." authors = ["Michel de Cryptadamus "] readme = "README.md" diff --git a/tests/test_yaralyze.py b/tests/test_yaralyze.py index 2f04bb5..ca322a6 100644 --- a/tests/test_yaralyze.py +++ b/tests/test_yaralyze.py @@ -20,7 +20,7 @@ def test_help_option(): help_text = _run_with_args('-h') assert 'maximize-width' in help_text - _assert_line_count_within_range(111, help_text) + _assert_line_count_within_range(118, help_text) def test_no_rule_args(il_tulipano_path): diff --git a/yaralyzer/bytes_match.py b/yaralyzer/bytes_match.py index 66eb8ab..f07cdef 100644 --- a/yaralyzer/bytes_match.py +++ b/yaralyzer/bytes_match.py @@ -114,6 +114,11 @@ def location(self) -> Text: location_txt.append(')', style='off_white') return location_txt + def is_decodable(self) -> bool: + return self.match_length >= YaralyzerConfig.MIN_DECODE_LENGTH \ + and self.match_length <= YaralyzerConfig.MAX_DECODE_LENGTH \ + and not YaralyzerConfig.SUPPRESS_DECODES + def _find_surrounding_bytes(self, num_before: Optional[int] = None, num_after: Optional[int] = None) -> None: """Find the surrounding bytes, making sure not to step off the beginning or end""" num_after = num_after or num_before or YaralyzerConfig.NUM_SURROUNDING_BYTES diff --git a/yaralyzer/decoding/bytes_decoder.py b/yaralyzer/decoding/bytes_decoder.py index dc99f81..4869164 100644 --- a/yaralyzer/decoding/bytes_decoder.py +++ b/yaralyzer/decoding/bytes_decoder.py @@ -63,12 +63,14 @@ def print_decode_attempts(self) -> None: def _generate_decodings_table(self) -> Table: """First rows are the raw / hex views of the bytes, then attempted decodings""" - if YaralyzerConfig.SUPPRESS_DECODES or \ - self.bytes_match.match_length < YaralyzerConfig.MIN_DECODE_LENGTH or \ - self.bytes_match.match_length > YaralyzerConfig.MAX_DECODE_LENGTH: + if not self.bytes_match.is_decodable(): + log.debug(f"{self.bytes_match} is not decodable") return self.table - self.decodings = [DecodingAttempt(self.bytes_match, encoding) for encoding in ENCODINGS_TO_ATTEMPT.keys()] + self.decodings = [ + DecodingAttempt(self.bytes_match, encoding) + for encoding in ENCODINGS_TO_ATTEMPT.keys() + ] # Attempt decodings we don't usually attempt if chardet is insistent enough forced_decodes = self._undecoded_assessments(self.encoding_detector.force_decode_assessments) @@ -77,7 +79,7 @@ def _generate_decodings_table(self) -> Table: # If we still haven't decoded chardets top choice, decode it if len(self._forced_displays()) > 0 and not self._was_decoded(self._forced_displays()[0].encoding): chardet_top_encoding = self._forced_displays()[0].encoding - log.debug(f"Decoding {chardet_top_encoding} because it's chardet top choice...") + log.info(f"Decoding {chardet_top_encoding} because it's chardet top choice...") self.decodings.append(DecodingAttempt(self.bytes_match, chardet_top_encoding)) rows = [self._row_from_decoding_attempt(decoding) for decoding in self.decodings] @@ -147,6 +149,7 @@ def _row_from_decoding_attempt(self, decoding: DecodingAttempt) -> DecodingTable return decoding_table_row(assessment, was_forced, display_text, sort_score) + def _build_encodings_metric_dict(): """One key for each key in ENCODINGS_TO_ATTEMPT, values are all 0""" metrics_dict = defaultdict(lambda: 0) diff --git a/yaralyzer/util/argument_parser.py b/yaralyzer/util/argument_parser.py index cc2fe90..4a4904d 100644 --- a/yaralyzer/util/argument_parser.py +++ b/yaralyzer/util/argument_parser.py @@ -195,6 +195,10 @@ debug.add_argument('-D', '--debug', action='store_true', help='show verbose debug log output') +debug.add_argument('-L', '--log-level', + help='set the log level', + choices=['DEBUG', 'INFO', 'WARN', 'ERROR']) + def parse_arguments(args: Optional[Namespace] = None): """ @@ -212,6 +216,8 @@ def parse_arguments(args: Optional[Namespace] = None): if args.debug: log.setLevel(logging.DEBUG) + elif args.log_level: + log.setLevel(args.log_level) yara_rules_args = [arg for arg in YARA_RULES_ARGS if vars(args)[arg] is not None]