From 3f174163ff7d488975e8966f01f3ba5c9e872ead Mon Sep 17 00:00:00 2001
From: ashariyar <ashariyar@users.noreply.github.com>
Date: Sun, 16 Oct 2022 22:07:06 -0400
Subject: [PATCH] Changelog/version bump

---
 CHANGELOG.md                        |  6 ++++++
 README.md                           |  4 ++++
 pyproject.toml                      |  2 +-
 tests/test_yaralyze.py              |  2 +-
 yaralyzer/bytes_match.py            |  5 +++++
 yaralyzer/decoding/bytes_decoder.py | 13 ++++++++-----
 yaralyzer/util/argument_parser.py   |  6 ++++++
 7 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 844ffad..8776e07 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,12 @@
 # NEXT RELEASE
 
+# 0.8.0
+* Add `--log-level` option
+* `BytesMatch.is_decodable()` method
+
+
 ### 0.7.1
+* Bump deps
 
 # 0.7.0
 * Show hex and ascii side by side in decodes table
diff --git a/README.md b/README.md
index 95d172f..97fd131 100644
--- a/README.md
+++ b/README.md
@@ -101,4 +101,8 @@ The Yaralyzer can export visualizations to HTML, ANSI colored text, and SVG vect
 ![Font Scan Regex](doc/rendered_images/decoding_and_chardet_table_2.png)
 
 
+# TODO
+* highlight decodes done at `chardet`s behest
+* deal with repetitive matches
+
 [^1]: As I was until recently.
diff --git a/pyproject.toml b/pyproject.toml
index a288f8c..8222f71 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "yaralyzer"
-version = "0.7.1"
+version = "0.8.0"
 description = "Visualize and force decode YARA and regex matches found in a file or byte stream. With colors. Lots of colors."
 authors = ["Michel de Cryptadamus <michel@cryptadamus.com>"]
 readme = "README.md"
diff --git a/tests/test_yaralyze.py b/tests/test_yaralyze.py
index 2f04bb5..ca322a6 100644
--- a/tests/test_yaralyze.py
+++ b/tests/test_yaralyze.py
@@ -20,7 +20,7 @@
 def test_help_option():
     help_text = _run_with_args('-h')
     assert 'maximize-width' in help_text
-    _assert_line_count_within_range(111, help_text)
+    _assert_line_count_within_range(118, help_text)
 
 
 def test_no_rule_args(il_tulipano_path):
diff --git a/yaralyzer/bytes_match.py b/yaralyzer/bytes_match.py
index 66eb8ab..f07cdef 100644
--- a/yaralyzer/bytes_match.py
+++ b/yaralyzer/bytes_match.py
@@ -114,6 +114,11 @@ def location(self) -> Text:
         location_txt.append(')', style='off_white')
         return location_txt
 
+    def is_decodable(self) -> bool:
+        return self.match_length >= YaralyzerConfig.MIN_DECODE_LENGTH \
+           and self.match_length <= YaralyzerConfig.MAX_DECODE_LENGTH \
+           and not YaralyzerConfig.SUPPRESS_DECODES
+
     def _find_surrounding_bytes(self, num_before: Optional[int] = None, num_after: Optional[int] = None) -> None:
         """Find the surrounding bytes, making sure not to step off the beginning or end"""
         num_after = num_after or num_before or YaralyzerConfig.NUM_SURROUNDING_BYTES
diff --git a/yaralyzer/decoding/bytes_decoder.py b/yaralyzer/decoding/bytes_decoder.py
index dc99f81..4869164 100644
--- a/yaralyzer/decoding/bytes_decoder.py
+++ b/yaralyzer/decoding/bytes_decoder.py
@@ -63,12 +63,14 @@ def print_decode_attempts(self) -> None:
 
     def _generate_decodings_table(self) -> Table:
         """First rows are the raw / hex views of the bytes, then attempted decodings"""
-        if YaralyzerConfig.SUPPRESS_DECODES or \
-                self.bytes_match.match_length < YaralyzerConfig.MIN_DECODE_LENGTH or \
-                self.bytes_match.match_length > YaralyzerConfig.MAX_DECODE_LENGTH:
+        if not self.bytes_match.is_decodable():
+            log.debug(f"{self.bytes_match} is not decodable")
             return self.table
 
-        self.decodings = [DecodingAttempt(self.bytes_match, encoding) for encoding in ENCODINGS_TO_ATTEMPT.keys()]
+        self.decodings = [
+            DecodingAttempt(self.bytes_match, encoding)
+            for encoding in ENCODINGS_TO_ATTEMPT.keys()
+        ]
 
         # Attempt decodings we don't usually attempt if chardet is insistent enough
         forced_decodes = self._undecoded_assessments(self.encoding_detector.force_decode_assessments)
@@ -77,7 +79,7 @@ def _generate_decodings_table(self) -> Table:
         # If we still haven't decoded chardets top choice, decode it
         if len(self._forced_displays()) > 0 and not self._was_decoded(self._forced_displays()[0].encoding):
             chardet_top_encoding = self._forced_displays()[0].encoding
-            log.debug(f"Decoding {chardet_top_encoding} because it's chardet top choice...")
+            log.info(f"Decoding {chardet_top_encoding} because it's chardet top choice...")
             self.decodings.append(DecodingAttempt(self.bytes_match, chardet_top_encoding))
 
         rows = [self._row_from_decoding_attempt(decoding) for decoding in self.decodings]
@@ -147,6 +149,7 @@ def _row_from_decoding_attempt(self, decoding: DecodingAttempt) -> DecodingTable
         return decoding_table_row(assessment, was_forced, display_text, sort_score)
 
 
+
 def _build_encodings_metric_dict():
     """One key for each key in ENCODINGS_TO_ATTEMPT, values are all 0"""
     metrics_dict = defaultdict(lambda: 0)
diff --git a/yaralyzer/util/argument_parser.py b/yaralyzer/util/argument_parser.py
index cc2fe90..4a4904d 100644
--- a/yaralyzer/util/argument_parser.py
+++ b/yaralyzer/util/argument_parser.py
@@ -195,6 +195,10 @@
 debug.add_argument('-D', '--debug', action='store_true',
                     help='show verbose debug log output')
 
+debug.add_argument('-L', '--log-level',
+                    help='set the log level',
+                    choices=['DEBUG', 'INFO', 'WARN', 'ERROR'])
+
 
 def parse_arguments(args: Optional[Namespace] = None):
     """
@@ -212,6 +216,8 @@ def parse_arguments(args: Optional[Namespace] = None):
 
     if args.debug:
         log.setLevel(logging.DEBUG)
+    elif args.log_level:
+        log.setLevel(args.log_level)
 
     yara_rules_args = [arg for arg in YARA_RULES_ARGS if vars(args)[arg] is not None]