diff --git a/CHANGELOG.md b/CHANGELOG.md index c6bcde932..df7ca001c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,8 +32,8 @@ - `intelmq.bots.parsers.shadowserver._config`: - Fetch schema before first run (PR#2482 by elsif2, fixes #2480). - `intelmq.bots.parsers.dataplane.parser`: Use ` | ` as field delimiter, fix parsing of AS names including `|` (PR#2488 by DigitalTrustCenter). -- all parsers: add `copy_custom_fields` parameter allowing copying additional fields from the report, e.g. `extra.file_name`. - (PR# by Kamil Mankowski). +- all parsers: add `copy_collector_provided_fields` parameter allowing copying additional fields from the report, e.g. `extra.file_name`. + (PR#2513 by Kamil Mankowski). #### Experts - `intelmq.bots.experts.sieve.expert`: diff --git a/docs/user/bots.md b/docs/user/bots.md index 351cd0da8..2c8ec6e9d 100644 --- a/docs/user/bots.md +++ b/docs/user/bots.md @@ -1357,7 +1357,7 @@ defaults_fields: protocol.transport: tcp ``` -#### `copy_custom_fields` +#### `copy_collector_provided_fields` (optional, list) List of additional fields to be copy from the report (only applied if parsing the event doesn't set the value). @@ -1365,7 +1365,7 @@ event doesn't set the value). Example usage: ```yaml -copy_custom_fields: +copy_collector_provided_fields: - extra.file_name ``` diff --git a/intelmq/lib/bot.py b/intelmq/lib/bot.py index dd0696810..49591f9a1 100644 --- a/intelmq/lib/bot.py +++ b/intelmq/lib/bot.py @@ -1082,7 +1082,7 @@ class ParserBot(Bot): _default_message_type = 'Report' default_fields: Optional[dict] = {} - copy_custom_fields: Optional[list] = [] + copy_collector_provided_fields: Optional[list] = [] def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -1127,6 +1127,11 @@ def _get_io_and_save_line_ending(self, raw: str) -> io.StringIO: if not self._line_ending or isinstance(self._line_ending, tuple): self._line_ending = '\r\n' return data_io + + def new_event(self, *args, **kwargs): + if self.copy_collector_provided_fields: + kwargs['copy_collector_provided_fields'] = self.copy_collector_provided_fields + return super().new_event(*args, **kwargs) def parse_csv(self, report: libmessage.Report): """ @@ -1246,13 +1251,6 @@ def process(self): for key, value in self.default_fields.items(): event.add(key, value, overwrite=False) - if self.copy_custom_fields: - for key in self.copy_custom_fields: - if key not in report: - continue - for event in events: - event.add(key, report.get(key), overwrite=False) - except Exception: self.logger.exception('Failed to parse line.') self.__failed.append((traceback.format_exc(), self._current_line)) diff --git a/intelmq/lib/message.py b/intelmq/lib/message.py index e99e22731..603bcd281 100644 --- a/intelmq/lib/message.py +++ b/intelmq/lib/message.py @@ -98,7 +98,7 @@ class Message(dict): _default_value_set = False def __init__(self, message: Union[dict, tuple] = (), auto: bool = False, - harmonization: dict = None) -> None: + harmonization: dict = None, **_) -> None: try: classname = message['__type'].lower() del message['__type'] @@ -522,9 +522,13 @@ def __contains__(self, item: str) -> bool: class Event(Message): - - def __init__(self, message: Union[dict, tuple] = (), auto: bool = False, - harmonization: Optional[dict] = None) -> None: + def __init__( + self, + message: Union[dict, tuple] = (), + auto: bool = False, + harmonization: Optional[dict] = None, + copy_collector_provided_fields: Optional[dict] = None, + ) -> None: """ Parameters: message: Give a report and feed.name, feed.url and @@ -551,6 +555,12 @@ def __init__(self, message: Union[dict, tuple] = (), auto: bool = False, template['rtir_id'] = message['rtir_id'] if 'time.observation' in message: template['time.observation'] = message['time.observation'] + + if copy_collector_provided_fields: + for key in copy_collector_provided_fields: + if key not in message: + continue + template[key] = message.get(key) else: template = message super().__init__(template, auto, harmonization) @@ -559,7 +569,7 @@ def __init__(self, message: Union[dict, tuple] = (), auto: bool = False, class Report(Message): def __init__(self, message: Union[dict, tuple] = (), auto: bool = False, - harmonization: Optional[dict] = None) -> None: + harmonization: Optional[dict] = None, **_) -> None: """ Parameters: message: Passed along to Message's and dict's init. diff --git a/intelmq/tests/lib/test_parser_bot.py b/intelmq/tests/lib/test_parser_bot.py index c1d9faa6d..43b83f71d 100644 --- a/intelmq/tests/lib/test_parser_bot.py +++ b/intelmq/tests/lib/test_parser_bot.py @@ -167,12 +167,12 @@ def test_bad_default_fields_parameter_2(self): self.assertAnyLoglineEqual(message="Invalid value of key 'source.port' in default_fields parameter.", levelname="ERROR") - def test_copy_custom_fields_from_report(self): + def test_copy_collector_provided_fields_from_report(self): """Allow copying custom fields from the report message to support more context from reports""" report = {**EXAMPLE_SHORT, "extra.file_name": "file.txt", "extra.field2": "value2"} self.input_message = report - self.run_bot(parameters={"copy_custom_fields": + self.run_bot(parameters={"copy_collector_provided_fields": ["extra.file_name", "extra.not_exists"]}) output_message = EXAMPLE_EVENT.copy()