Skip to content

Commit

Permalink
Show hex and ascii side by side in decodes table
Browse files Browse the repository at this point in the history
  • Loading branch information
ashariyar committed Oct 15, 2022
1 parent 7bb2f8f commit cf00058
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 39 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# NEXT RELEASE
* Show hex and ascii side by side in decodes table

### 0.6.2
* Remove `cairosvg` dependency
Expand Down
8 changes: 4 additions & 4 deletions tests/test_yaralyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,17 @@ def test_yaralyze_with_files(il_tulipano_path, tulips_yara_path):


def test_yaralyze_with_patterns(il_tulipano_path, binary_file_path, tulips_yara_regex):
_assert_output_line_count_is_close(942, il_tulipano_path, '-re', tulips_yara_regex)
_assert_output_line_count_is_close(83, binary_file_path, '-re', '3Hl0')
_assert_output_line_count_is_close(90, binary_file_path, '-hex', HEX_STRING)
_assert_output_line_count_is_close(999, il_tulipano_path, '-re', tulips_yara_regex)
_assert_output_line_count_is_close(90, binary_file_path, '-re', '3Hl0')
_assert_output_line_count_is_close(96, binary_file_path, '-hex', HEX_STRING)


def test_file_export(binary_file_path, tulips_yara_path, tmp_dir):
_run_with_args(binary_file_path, '-Y', tulips_yara_path, '-svg', '-html', '-txt', '-out', tmp_dir)
rendered_files = files_in_dir(tmp_dir)
assert len(rendered_files) == 3
file_sizes = [path.getsize(f) for f in rendered_files]
_assert_array_is_close(sorted(file_sizes), [40867, 69127, 216719])
_assert_array_is_close(sorted(file_sizes), [45191, 78832, 240046])

for file in rendered_files:
remove(file)
Expand Down
6 changes: 3 additions & 3 deletions tests/test_yaralyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from yaralyzer.yaralyzer import Yaralyzer

CLOSENESS_THRESHOLD = 0.05
EXPECTED_LINES = 942
EXPECTED_LINES = 1002


def test_filename_string(a_yaralyzer):
Expand All @@ -22,15 +22,15 @@ def test_yaralyzer_with_files(il_tulipano_path, tulips_yara_path):


def test_yaralyzer_with_patterns(il_tulipano_path, tulips_yara_regex):
_check_output_linecount(Yaralyzer.for_patterns([tulips_yara_regex], REGEX, il_tulipano_path), 942)
_check_output_linecount(Yaralyzer.for_patterns([tulips_yara_regex], REGEX, il_tulipano_path), EXPECTED_LINES)


def test_yaralyzer_for_rules_dir(il_tulipano_path):
_check_output_linecount(Yaralyzer.for_rules_dirs([dirname(il_tulipano_path)], il_tulipano_path))


def test_hex_rules(binary_file_path, tulips_yara_path):
_check_output_linecount(Yaralyzer.for_rules_files([tulips_yara_path], binary_file_path), 95)
_check_output_linecount(Yaralyzer.for_rules_files([tulips_yara_path], binary_file_path), 102)


def _check_output_linecount(yaralzyer: Yaralyzer, expected_line_count: int = EXPECTED_LINES) -> None:
Expand Down
14 changes: 9 additions & 5 deletions yaralyzer/decoding/bytes_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
from yaralyzer.bytes_match import BytesMatch
from yaralyzer.config import YaralyzerConfig
from yaralyzer.decoding.decoding_attempt import DecodingAttempt
from yaralyzer.encoding_detection.encoding_assessment import EncodingAssessment
from yaralyzer.encoding_detection.character_encodings import ENCODING, ENCODINGS_TO_ATTEMPT
from yaralyzer.encoding_detection.encoding_assessment import EncodingAssessment
from yaralyzer.encoding_detection.encoding_detector import EncodingDetector
from yaralyzer.helpers.dict_helper import get_dict_key_by_value
from yaralyzer.helpers.rich_text_helper import CENTER, DECODING_ERRORS_MSG, NO_DECODING_ERRORS_MSG
Expand All @@ -25,7 +25,7 @@
from yaralyzer.output.rich_console import console
from yaralyzer.util.logging import log

# Messages used in the table to show true vs. false (a two element array can be indexed by booleans)
# A 2-tuple that can be indexed by booleans of messages used in the table to show true vs. false
WAS_DECODABLE_YES_NO = [NO_DECODING_ERRORS_MSG, DECODING_ERRORS_MSG]

# Multiply chardet scores by 100 (again) to make sorting the table easy
Expand All @@ -34,7 +34,6 @@

class BytesDecoder:
def __init__(self, bytes_match: BytesMatch, label: Optional[str] = None) -> None:
"""Instantiated with _bytes as the whole stream; :bytes_seq tells it how to pull the bytes it will decode"""
self.bytes_match = bytes_match
self.bytes = bytes_match.surrounding_bytes
self.label = label or bytes_match.label
Expand All @@ -51,6 +50,7 @@ def __init__(self, bytes_match: BytesMatch, label: Optional[str] = None) -> None
self.encoding_detector = EncodingDetector(self.bytes)

def print_decode_attempts(self) -> None:
"""Print the DecodingAttemptsTable."""
console.line(2)
self._print_decode_attempt_subheading()

Expand Down Expand Up @@ -108,7 +108,7 @@ def _print_decode_attempt_subheading(self) -> None:
console.print(panel, justify=CENTER)

def _track_decode_stats(self) -> None:
"Track stats about successful vs. forced vs. failed decode attempts"
"""Track stats about successful vs. forced vs. failed decode attempts"""
for decoding in self.decodings:
if decoding.failed_to_decode:
self.was_match_undecodable[decoding.encoding] += 1
Expand All @@ -120,11 +120,15 @@ def _track_decode_stats(self) -> None:
self.was_match_force_decoded[decoding.encoding] += 1

def _row_from_decoding_attempt(self, decoding: DecodingAttempt) -> DecodingTableRow:
"""
Create a DecodingAttemptTable row from a DecodingAttempt.
If the decoding result is a duplicate of a previous decoding, replace the decoded text
with "same output as X" where X is the previous encoding that gave the same result.
"""
assessment = self.encoding_detector.get_encoding_assessment(decoding.encoding)
plain_decoded_string = decoding.decoded_string.plain
sort_score = assessment.confidence * SCORE_SCALER

# Replace the decoded text with a "same output as X" where X is the encoding that gave the same result
if plain_decoded_string in self.decoded_strings.values():
encoding_with_same_output = get_dict_key_by_value(self.decoded_strings, plain_decoded_string)
display_text = Text('same output as ', style='color(66) dim italic')
Expand Down
47 changes: 42 additions & 5 deletions yaralyzer/helpers/bytes_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import re
from collections import namedtuple
from io import StringIO
from sys import byteorder

from rich.console import Console
from rich.markup import escape
Expand All @@ -10,14 +11,18 @@
from yaralyzer.bytes_match import BytesMatch
from yaralyzer.config import YaralyzerConfig
from yaralyzer.encoding_detection.character_encodings import NEWLINE_BYTE
from yaralyzer.output.rich_console import GREY, console
from yaralyzer.output.rich_console import (BYTES, BYTES_BRIGHTER, BYTES_BRIGHTEST,
BYTES_HIGHLIGHT, GREY, console, console_width)
from yaralyzer.util.logging import log

HEX_CHARS_PER_GROUP = 8
HEX_GROUPS_PER_LINE = 4

BytesInfo = namedtuple('BytesInfo', ['size', 'md5', 'sha1', 'sha256'])

HEX_CHARS_PER_GROUP = 8
SUBTABLE_MAX_WIDTH = console_width() - 35 - 5 # 35 for first 3 cols, 5 for in between hex and ascii
HEX_UNIT_LENGTH = (HEX_CHARS_PER_GROUP * 3) + HEX_CHARS_PER_GROUP + 4 # 4 for padding between groups
HEX_GROUPS_PER_LINE = divmod(SUBTABLE_MAX_WIDTH, HEX_UNIT_LENGTH)[0]
HEX_CHARS_PER_LINE = HEX_CHARS_PER_GROUP * HEX_GROUPS_PER_LINE


def get_bytes_info(_bytes: bytes) -> BytesInfo:
return BytesInfo(
Expand Down Expand Up @@ -85,10 +90,42 @@ def hex_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
highlight_start_idx = bytes_match.highlight_start_idx * 3
highlight_end_idx = bytes_match.highlight_end_idx * 3
hex_str.stylize(bytes_match.highlight_style, highlight_start_idx, highlight_end_idx)
lines = hex_str.wrap(console, HEX_CHARS_PER_GROUP * HEX_GROUPS_PER_LINE * 3)
lines = hex_str.wrap(console, HEX_CHARS_PER_LINE * 3)
return Text("\n").join([Text(' ').join(line.wrap(console, HEX_CHARS_PER_GROUP * 3)) for line in lines])


def ascii_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
txt = Text('', style=BYTES)

for i, b in enumerate(_bytes):
if i < bytes_match.highlight_start_idx or i > bytes_match.highlight_end_idx:
style1 = 'color(246)'
style2 = 'color(234)'
else:
style1 = None
style2 = None

_byte = b.to_bytes(1, byteorder)

if b < 32:
txt.append('*', style=style2 or BYTES_BRIGHTER)
elif b < 127:
txt.append(_byte.decode('UTF-8'), style1 or BYTES_BRIGHTEST)
elif b <= 160:
txt.append('*', style=style2 or BYTES_HIGHLIGHT)
else:
txt.append('*', style=style2 or BYTES)

segments = [txt[i:i + HEX_CHARS_PER_GROUP] for i in range(0, len(txt), HEX_CHARS_PER_GROUP)]

lines = [
Text(' ').join(segments[i:min(len(segments), i + HEX_GROUPS_PER_LINE)])
for i in range(0, len(segments), HEX_GROUPS_PER_LINE)
]

return Text("\n").join(lines)


def hex_text(_bytes: bytes) -> Text:
return Text(hex_string(_bytes), style=GREY)

Expand Down
1 change: 0 additions & 1 deletion yaralyzer/helpers/rich_text_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ def size_text(num_bytes: int) -> Text:
kb_txt = prefix_with_plain_text_obj("{:,.1f}".format(num_bytes / 1024), style='bright_cyan', root_style='white')
kb_txt.append(' kb ')
bytes_txt = Text('(', 'white') + size_in_bytes_text(num_bytes) + Text(')')

return kb_txt + bytes_txt


Expand Down
58 changes: 41 additions & 17 deletions yaralyzer/output/decoding_attempts_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,37 @@

from collections import namedtuple

from rich import box
from rich.table import Table
from rich.text import Text

from yaralyzer.bytes_match import BytesMatch
from yaralyzer.encoding_detection.encoding_assessment import EncodingAssessment
from yaralyzer.helpers.bytes_helper import hex_view_of_raw_bytes, rich_text_view_of_raw_bytes
from yaralyzer.helpers.bytes_helper import (ascii_view_of_raw_bytes, hex_view_of_raw_bytes,
rich_text_view_of_raw_bytes)
from yaralyzer.helpers.rich_text_helper import CENTER, FOLD, MIDDLE, RIGHT, na_txt

# The confidence and encoding will not be shown in the final display - instead their Text versions are shown
DecodingTableRow = namedtuple(
'DecodingTableRow',
[
'encoding_text',
'confidence_text',
'errors_while_decoded',
'decoded_string',
'confidence',
'encoding',
'sort_score'
]
)

DECODE_NOT_ATTEMPTED_MSG = Text('(decode not attempted)', style='no_attempt')
HEX = Text('HEX', style='bytes.title')
RAW_BYTES = Text('Raw', style=f"bytes")


def build_decoding_attempts_table(bytes_match: BytesMatch) -> Table:
"""First rows are the raw / hex views of the bytes"""
"""First rows are the raw / hex views of the bytes then 1 row per decoding attempt."""
table = Table(show_lines=True, border_style='bytes', header_style='color(101) bold')

def add_col(title, **kwargs):
Expand All @@ -42,26 +57,12 @@ def add_col(title, **kwargs):
add_col('Decoded Output', justify='left')

na = na_txt(style=HEX.style)
table.add_row(HEX, na, na, hex_view_of_raw_bytes(bytes_match.surrounding_bytes, bytes_match))
table.add_row(HEX, na, na, _hex_preview_subtable(bytes_match))
na = na_txt(style=RAW_BYTES.style)
table.add_row(RAW_BYTES, na, na, rich_text_view_of_raw_bytes(bytes_match.surrounding_bytes, bytes_match))
return table


# The confidence and encoding will not be shown in the final display - instead their Text versions are shown
DecodingTableRow = namedtuple(
'DecodingTableRow',
[
'encoding_text',
'confidence_text',
'errors_while_decoded',
'decoded_string',
'confidence',
'encoding',
'sort_score'
])


def decoding_table_row(assessment: EncodingAssessment, is_forced: Text, txt: Text, score: float) -> DecodingTableRow:
"""Get a table row for a decoding attempt"""
return DecodingTableRow(
Expand All @@ -77,3 +78,26 @@ def decoding_table_row(assessment: EncodingAssessment, is_forced: Text, txt: Tex
def assessment_only_row(assessment: EncodingAssessment, score) -> DecodingTableRow:
"""Build a row with just chardet assessment data and no actual decoded string"""
return decoding_table_row(assessment, na_txt(), DECODE_NOT_ATTEMPTED_MSG, score)


def _hex_preview_subtable(bytes_match: BytesMatch) -> Table:
"""Build a sub table for hex view (hex on one side, ascii on the other side)."""
hex_table = Table(
'hex',
'ascii',
border_style='color(235) dim',
header_style='color(101) bold',
box=box.MINIMAL,
show_lines=True,
show_header=True,
show_edge=False,
padding=(0, 1, 0, 2),
pad_edge=False
)

hex_table.add_row(
hex_view_of_raw_bytes(bytes_match.surrounding_bytes, bytes_match),
ascii_view_of_raw_bytes(bytes_match.surrounding_bytes, bytes_match)
)

return hex_table
9 changes: 5 additions & 4 deletions yaralyzer/yara/yara_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,20 +49,21 @@

class YaraMatch:
def __init__(self, match: dict, matched_against_bytes_label: Text) -> None:
self.match: dict = match
self.match = match
self.rule_name = match['rule']
self.label: Text = matched_against_bytes_label.copy().append(f" matched rule: '", style='matched_rule')
self.label = matched_against_bytes_label.copy().append(f" matched rule: '", style='matched_rule')
self.label.append(self.rule_name, style='on bright_red bold').append("'!", style='siren')

def __rich_console__(self, _console: Console, options: ConsoleOptions) -> RenderResult:
"""Renders a panel showing the color highlighted raw YARA match info."""
yield(Text("\n"))
yield Padding(Panel(self.label, expand=False, style=f"on color(251) reverse"), MATCH_PADDING)
yield(RAW_YARA_THEME_TXT)
yield Padding(Panel(_rich_yara_match(self.match)), MATCH_PADDING)


def _rich_yara_match(element: Any, depth: int = 0) -> Text:
"""Mildly painful/hacky way of coloring a yara result hash"""
"""Mildly painful/hacky way of coloring a yara result hash."""
indent = Text((depth + 1) * 4 * ' ')
end_indent = Text(depth * 4 * ' ')

Expand All @@ -78,7 +79,7 @@ def _rich_yara_match(element: Any, depth: int = 0) -> Text:
if len(element) == 0:
txt = Text('[]', style='white')
else:
total_length = sum([len(str(e)) for e in element]) + ((len(element) - 1) * 2) + + len(indent) + 2
total_length = sum([len(str(e)) for e in element]) + ((len(element) - 1) * 2) + len(indent) + 2
elements_txt = [_rich_yara_match(e, depth + 1) for e in element]
list_txt = Text('[', style='white')

Expand Down

0 comments on commit cf00058

Please sign in to comment.