From 42424d02cfd431a3e401b1fca1482517f2dc8687 Mon Sep 17 00:00:00 2001 From: Jeff Ashton Date: Mon, 29 Nov 2021 19:33:27 -0500 Subject: [PATCH 1/4] Fixing loader imports --- elastalert/loaders.py | 50 +++++++++++++------- tests/loaders_test.py | 103 ++++++++++++++---------------------------- 2 files changed, 68 insertions(+), 85 deletions(-) diff --git a/elastalert/loaders.py b/elastalert/loaders.py index 77d310b5..bc127091 100644 --- a/elastalert/loaders.py +++ b/elastalert/loaders.py @@ -212,7 +212,7 @@ def get_import_rule(self, rule): Retrieve the name of the rule to import. :param dict rule: Rule dict :return: rule name that will all `get_yaml` to retrieve the yaml of the rule - :rtype: str + :rtype: str or List[str] """ return rule['import'] @@ -240,10 +240,15 @@ def load_yaml(self, filename): 'rule_file': filename, } - self.import_rules.pop(filename, None) # clear `filename` dependency - files_to_import = [] + current_path = filename + + # Imports are applied using a Depth First Search (DFS) traversal. + # If a rule defines multiple imports, both of which define the same value, + # the value from the later import will take precedent. + import_paths_stack = [] + while True: - loaded = self.get_yaml(filename) + loaded = self.get_yaml(current_path) # Special case for merging filters - if both files specify a filter merge (AND) them if 'filter' in rule and 'filter' in loaded: @@ -251,17 +256,30 @@ def load_yaml(self, filename): loaded.update(rule) rule = loaded - if 'import' in rule: - # add all of the files to load into the load queue - files_to_import += self.get_import_rule(rule) - del (rule['import']) # or we could go on forever! - if len(files_to_import) > 0: - # set the next file to load - next_file_to_import = files_to_import.pop() - rules = self.import_rules.get(filename, []) - rules.append(next_file_to_import) - self.import_rules[filename] = rules - filename = next_file_to_import + + if 'import' not in rule: + # clear import_rules cache for current path + self.import_rules.pop(current_path, None) + + else: + # read the set of import paths from the rule + import_paths = self.get_import_rule(rule) + if type(import_paths) is str: + import_paths = [import_paths] + + # remove the processed import property to prevent infinite loop + del (rule['import']) + + # update import_rules cache for current path + self.import_rules[current_path] = import_paths + + # push the imports paths onto the top of the stack + for import_path in import_paths: + import_paths_stack.append(import_path) + + # pop the next import path from the top of the stack + if len(import_paths_stack) > 0: + current_path = import_paths_stack.pop() else: break @@ -596,7 +614,7 @@ def get_import_rule(self, rule): Allow for relative paths to the import rule. :param dict rule: :return: Path the import rule - :rtype: str + :rtype: List[str] """ rule_imports = rule['import'] if type(rule_imports) is str: diff --git a/tests/loaders_test.py b/tests/loaders_test.py index 0bc71318..8bfc43f8 100644 --- a/tests/loaders_test.py +++ b/tests/loaders_test.py @@ -512,41 +512,25 @@ def test_load_yaml_recursive_import(): branch_path = os.path.join(loaders_test_cases_path, 'recursive_import/branch.yaml') leaf_path = os.path.join(loaders_test_cases_path, 'recursive_import/leaf.yaml') - leaf_yaml = rules_loader.load_yaml(leaf_path) - assert leaf_yaml == { - 'name': 'leaf', - 'rule_file': leaf_path, - 'diameter': '5cm', - } - assert sorted(rules_loader.import_rules.keys()) == [ - branch_path, - leaf_path, - ] - assert rules_loader.import_rules[branch_path] == [ - trunk_path, - ] - assert rules_loader.import_rules[leaf_path] == [ - branch_path, - ] - - # reload the rule - leaf_yaml = rules_loader.load_yaml(leaf_path) - assert leaf_yaml == { - 'name': 'leaf', - 'rule_file': leaf_path, - 'diameter': '5cm', - } - assert sorted(rules_loader.import_rules.keys()) == [ - branch_path, - leaf_path, - ] - assert rules_loader.import_rules[branch_path] == [ - trunk_path, - trunk_path, # memory leak - ] - assert rules_loader.import_rules[leaf_path] == [ - branch_path, - ] + # re-load the rule a couple times to ensure import_rules cache is updated correctly + for i in range(3): + + leaf_yaml = rules_loader.load_yaml(leaf_path) + assert leaf_yaml == { + 'name': 'leaf', + 'rule_file': leaf_path, + 'diameter': '5cm', + } + assert sorted(rules_loader.import_rules.keys()) == [ + branch_path, + leaf_path, + ] + assert rules_loader.import_rules[branch_path] == [ + trunk_path, + ] + assert rules_loader.import_rules[leaf_path] == [ + branch_path, + ] def test_load_yaml_multiple_imports(): @@ -557,38 +541,19 @@ def test_load_yaml_multiple_imports(): oxygen_path = os.path.join(loaders_test_cases_path, 'multiple_imports/oxygen.yaml') water_path = os.path.join(loaders_test_cases_path, 'multiple_imports/water.yaml') - water_yaml = rules_loader.load_yaml(water_path) - assert water_yaml == { - 'name': 'water', - 'rule_file': water_path, - 'symbol': 'O', - } - assert sorted(rules_loader.import_rules.keys()) == [ - oxygen_path, - water_path, - ] - assert rules_loader.import_rules[oxygen_path] == [ - hydrogen_path, - ] - assert rules_loader.import_rules[water_path] == [ - oxygen_path, - ] + # re-load the rule a couple times to ensure import_rules cache is updated correctly + for i in range(3): - # reload the rule - water_yaml = rules_loader.load_yaml(water_path) - assert water_yaml == { - 'name': 'water', - 'rule_file': water_path, - 'symbol': 'O', - } - assert sorted(rules_loader.import_rules.keys()) == [ - oxygen_path, - water_path, - ] - assert rules_loader.import_rules[oxygen_path] == [ - hydrogen_path, - hydrogen_path, # memory leak - ] - assert rules_loader.import_rules[water_path] == [ - oxygen_path, - ] + water_yaml = rules_loader.load_yaml(water_path) + assert water_yaml == { + 'name': 'water', + 'rule_file': water_path, + 'symbol': 'O', + } + assert sorted(rules_loader.import_rules.keys()) == [ + water_path, + ] + assert rules_loader.import_rules[water_path] == [ + hydrogen_path, + oxygen_path, + ] From 77217a3962fc73d3d10dd166b975b221c778985f Mon Sep 17 00:00:00 2001 From: Jeff Ashton Date: Tue, 30 Nov 2021 08:14:22 -0500 Subject: [PATCH 2/4] Updating change log --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d501bf0..b1b421d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ - jinja2 3.0.1 to 3.0.3 - [#562](https://github.com/jertel/elastalert2/pull/562) - @nsano-rururu - Fix `get_rule_file_hash` TypeError - [#566](https://github.com/jertel/elastalert2/pull/566) - @JeffAshton - Ensure `schema.yaml` stream closed - [#567](https://github.com/jertel/elastalert2/pull/567) - @JeffAshton +- Fixing `import` bugs & memory leak in `RulesLoader`/`FileRulesLoader` - [#580](https://github.com/jertel/elastalert2/pull/580) # 2.2.3 From ef062016b7622cb8b5125aa8c867dc6139002c5a Mon Sep 17 00:00:00 2001 From: Jeff Ashton Date: Tue, 30 Nov 2021 08:43:49 -0500 Subject: [PATCH 3/4] Adding test case to cover import changing & being removed --- tests/loaders_test.py | 70 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/tests/loaders_test.py b/tests/loaders_test.py index 8bfc43f8..3a473066 100644 --- a/tests/loaders_test.py +++ b/tests/loaders_test.py @@ -557,3 +557,73 @@ def test_load_yaml_multiple_imports(): hydrogen_path, oxygen_path, ] + + +def test_load_yaml_imports_modified(): + config = {} + rules_loader = FileRulesLoader(config) + + rule_path = os.path.join(empty_folder_test_path, 'rule.yaml') + first_import_path = os.path.join(empty_folder_test_path, 'first.yaml') + second_import_path = os.path.join(empty_folder_test_path, 'second.yaml') + + with mock.patch.object(rules_loader, 'get_yaml') as get_yaml: + get_yaml.side_effect = [ + { + 'name': 'rule', + 'import': first_import_path, + }, + { + 'imported': 'first', + } + ] + rule_yaml = rules_loader.load_yaml(rule_path) + assert rule_yaml == { + 'name': 'rule', + 'rule_file': rule_path, + 'imported': 'first', + } + assert sorted(rules_loader.import_rules.keys()) == [ + rule_path, + ] + assert rules_loader.import_rules[rule_path] == [ + first_import_path + ] + + # simulate the import changing + with mock.patch.object(rules_loader, 'get_yaml') as get_yaml: + get_yaml.side_effect = [ + { + 'name': 'rule', + 'import': second_import_path, + }, + { + 'imported': 'second', + } + ] + rule_yaml = rules_loader.load_yaml(rule_path) + assert rule_yaml == { + 'name': 'rule', + 'rule_file': rule_path, + 'imported': 'second', + } + assert sorted(rules_loader.import_rules.keys()) == [ + rule_path, + ] + assert rules_loader.import_rules[rule_path] == [ + second_import_path + ] + + # simulate the import being removed + with mock.patch.object(rules_loader, 'get_yaml') as get_yaml: + get_yaml.side_effect = [ + { + 'name': 'rule', + }, + ] + rule_yaml = rules_loader.load_yaml(rule_path) + assert rule_yaml == { + 'name': 'rule', + 'rule_file': rule_path, + } + assert len(rules_loader.import_rules) == 0 From 8f87767943f10149ae567d422a7420ea53d7df7f Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Tue, 30 Nov 2021 09:10:06 -0500 Subject: [PATCH 4/4] Add author to changelog entry --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b1b421d9..00c21bb4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,7 +23,7 @@ - jinja2 3.0.1 to 3.0.3 - [#562](https://github.com/jertel/elastalert2/pull/562) - @nsano-rururu - Fix `get_rule_file_hash` TypeError - [#566](https://github.com/jertel/elastalert2/pull/566) - @JeffAshton - Ensure `schema.yaml` stream closed - [#567](https://github.com/jertel/elastalert2/pull/567) - @JeffAshton -- Fixing `import` bugs & memory leak in `RulesLoader`/`FileRulesLoader` - [#580](https://github.com/jertel/elastalert2/pull/580) +- Fixing `import` bugs & memory leak in `RulesLoader`/`FileRulesLoader` - [#580](https://github.com/jertel/elastalert2/pull/580) - @JeffAshton # 2.2.3