From a5ba094bd8e03a0b1b6447f26836336576be5a71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=85ke=20Forslund?= Date: Thu, 18 Mar 2021 22:30:31 +0100 Subject: [PATCH 1/8] Add functionality to drop intent parsers --- adapt/engine.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/adapt/engine.py b/adapt/engine.py index 8405323..9dcdaa6 100644 --- a/adapt/engine.py +++ b/adapt/engine.py @@ -175,6 +175,25 @@ def register_intent_parser(self, intent_parser): else: raise ValueError("%s is not an intent parser" % str(intent_parser)) + def drop_intent_parser(self, parser_names): + """Drop a registered intent parser. + + Arguments: + parser_names (str or iterable): parser name to drop or list of + names + + Returns: + (bool) True if a parser was dropped else False + """ + if isinstance(parser_names, str): + parser_names = [parser_names] + + new_parsers = [p for p in self.intent_parsers + if p.name not in parser_names] + num_original_parsers = len(self.intent_parsers) + self.intent_parsers = new_parsers + + return len(self.intent_parsers != num_original_parsers) class DomainIntentDeterminationEngine(object): """ @@ -368,3 +387,15 @@ def register_intent_parser(self, intent_parser, domain=0): self.register_domain(domain=domain) self.domains[domain].register_intent_parser( intent_parser=intent_parser) + + def drop_intent_parser(self, parser_names, domain): + """Drop a registered intent parser. + + Arguments: + parser_names (list, str): parser names to drop. + domain (str): domain to drop from + + Returns: + (bool) True if an intent parser was dropped else false. + """ + return self.domains[domain].drop_intent_parser(parser_name) From 44ce7c98c178af5b0c72cb3194319335105047cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=85ke=20Forslund?= Date: Wed, 24 Mar 2021 07:30:58 +0100 Subject: [PATCH 2/8] Add drop_intent tests --- adapt/engine.py | 4 ++-- test/DomainIntentEngineTest.py | 19 +++++++++++++++++++ test/IntentEngineTest.py | 21 +++++++++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/adapt/engine.py b/adapt/engine.py index 9dcdaa6..a54dddb 100644 --- a/adapt/engine.py +++ b/adapt/engine.py @@ -193,7 +193,7 @@ def drop_intent_parser(self, parser_names): num_original_parsers = len(self.intent_parsers) self.intent_parsers = new_parsers - return len(self.intent_parsers != num_original_parsers) + return len(self.intent_parsers) != num_original_parsers class DomainIntentDeterminationEngine(object): """ @@ -398,4 +398,4 @@ def drop_intent_parser(self, parser_names, domain): Returns: (bool) True if an intent parser was dropped else false. """ - return self.domains[domain].drop_intent_parser(parser_name) + return self.domains[domain].drop_intent_parser(parser_names) diff --git a/test/DomainIntentEngineTest.py b/test/DomainIntentEngineTest.py index 4bcfb7c..ec4b7be 100644 --- a/test/DomainIntentEngineTest.py +++ b/test/DomainIntentEngineTest.py @@ -219,3 +219,22 @@ def test_select_best_intent_enuse_enitities_dont_register_in_multiple_domains(se intents = self.engine.determine_intent(utterance, 1) for intent in intents: self.assertNotEqual(intent['intent_type'], 'Parser2') + + def test_drop_intent_from_domain(self): + """Test that intent is dropped from the correct domain.""" + self.engine.register_domain('Domain1') + self.engine.register_domain('Domain2') + + # Creating first intent domain + parser1 = IntentBuilder("Parser1").require("Entity1").build() + self.engine.register_intent_parser(parser1, domain='Domain1') + self.engine.register_entity("tree", "Entity1", domain='Domain1') + + # Creating second intent domain + parser2 = IntentBuilder("Parser2").require("Entity2").build() + self.engine.register_intent_parser(parser2, domain="Domain2") + self.engine.register_entity("house", "Entity2", domain="Domain2") + + self.engine.drop_intent_parser(domain="Domain2", + parser_names=['Parser2']) + self.assertEqual(len(self.engine.domains['Domain2'].intent_parsers), 0) diff --git a/test/IntentEngineTest.py b/test/IntentEngineTest.py index 43b6d62..795f17f 100644 --- a/test/IntentEngineTest.py +++ b/test/IntentEngineTest.py @@ -58,3 +58,24 @@ def testSelectBestIntent(self): intent = next(self.engine.determine_intent(utterance)) assert intent assert intent['intent_type'] == 'Parser2' + + def testDropIntent(self): + parser1 = IntentBuilder("Parser1").require("Entity1").build() + self.engine.register_intent_parser(parser1) + self.engine.register_entity("tree", "Entity1") + + parser2 = (IntentBuilder("Parser2").require("Entity1") + .require("Entity2").build()) + self.engine.register_intent_parser(parser2) + self.engine.register_entity("house", "Entity2") + + utterance = "go to the tree house" + + intent = next(self.engine.determine_intent(utterance)) + assert intent + assert intent['intent_type'] == 'Parser2' + + assert self.engine.drop_intent_parser('Parser2') is True + intent = next(self.engine.determine_intent(utterance)) + assert intent + assert intent['intent_type'] == 'Parser1' From 8e9ef8b3150ba57beca908e61001b4f9a1abe0f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=85ke=20Forslund?= Date: Fri, 19 Mar 2021 12:30:10 +0100 Subject: [PATCH 3/8] Add basic tests for removing from Trie --- test/TrieTest.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/test/TrieTest.py b/test/TrieTest.py index 61fa4c8..9f6d8c5 100644 --- a/test/TrieTest.py +++ b/test/TrieTest.py @@ -125,6 +125,42 @@ def test_edit_distance_no_confidence(self): results = list(trie.gather("of the big bang theory")) assert len(results) == 0 + def test_remove(self): + trie = Trie(max_edit_distance=2) + trie.insert("1", "Number") + trie.insert("2", "Number") + trie.remove("2") + + one_lookup = list(trie.gather("1")) + two_lookup = list(trie.gather("2")) + assert len(one_lookup) == 1 # One match found + assert len(two_lookup) == 0 # Zero matches since removed + + def test_remove_multi_last(self): + trie = Trie(max_edit_distance=2) + trie.insert("Kermit", "Muppets") + trie.insert("Kermit", "Frogs") + kermit_lookup = list(trie.lookup("Kermit"))[0] + assert 'Frogs' in kermit_lookup['data'] + assert 'Muppets' in kermit_lookup['data'] + + trie.remove("Kermit", "Frogs") + + kermit_lookup = list(trie.gather("Kermit"))[0] + assert kermit_lookup['data'] == {"Muppets"} # Right data remains + + def test_remove_multi_first(self): + trie = Trie(max_edit_distance=2) + trie.insert("Kermit", "Muppets") + trie.insert("Kermit", "Frogs") + kermit_lookup = list(trie.lookup("Kermit"))[0] + assert 'Frogs' in kermit_lookup['data'] + assert 'Muppets' in kermit_lookup['data'] + + trie.remove("Kermit", "Muppets") + + kermit_lookup = list(trie.lookup("Kermit"))[0] + assert kermit_lookup['data'] == {"Frogs"} # Right data remains def tearDown(self): pass From ee70335dc05d9542046739048ac435d02654f99f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=85ke=20Forslund?= Date: Fri, 19 Mar 2021 21:57:00 +0100 Subject: [PATCH 4/8] Add "scan" method to Trie The method searches the Trie for a data matching a criteria. The criteria is determined by a function passed as an argument --- adapt/tools/text/trie.py | 32 ++++++++++++++++++++++++++++++++ test/TrieTest.py | 17 +++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/adapt/tools/text/trie.py b/adapt/tools/text/trie.py index 7e5edd2..5276028 100644 --- a/adapt/tools/text/trie.py +++ b/adapt/tools/text/trie.py @@ -209,3 +209,35 @@ def remove(self, iterable, data=None): data: data to be paired with the key """ return self.root.remove(iterable, data=data) + + def scan(self, match_func): + """Traverse the trie scanning for end nodes with matching data. + + Arguments: + match_func (callable): function used to match data. + + Returns: + (list) list with matching (data, value) pairs. + """ + result = [] + + def _traverse(node, current=''): + """Traverse Trie searching for nodes with matching data + + Performs recursive depth first search of Trie and collects + value / data pairs matched by the match_func + + Arguments: + node (trie node): Node to parse + current (str): string "position" in Trie + """ + nonlocal result + nonlocal match_func + # Check if node matches + result += [(current, d) for d in node.data if match_func(d)] + + for c in node.children: + _traverse(node.children[c], current + c) + + _traverse(self.root) + return result diff --git a/test/TrieTest.py b/test/TrieTest.py index 9f6d8c5..79548f3 100644 --- a/test/TrieTest.py +++ b/test/TrieTest.py @@ -162,5 +162,22 @@ def test_remove_multi_first(self): kermit_lookup = list(trie.lookup("Kermit"))[0] assert kermit_lookup['data'] == {"Frogs"} # Right data remains + def test_scan(self): + trie = Trie(max_edit_distance=2) + trie.insert("Kermit", "Muppets") + trie.insert("Gonzo", "Muppets") + trie.insert("Rowlf", "Muppets") + trie.insert("Gobo", "Fraggles") + + def match_func(data): + return data == "Muppets" + + results = trie.scan(match_func) + assert len(results) == 3 + muppet_names = [r[0] for r in results] + assert "Kermit" in muppet_names + assert "Gonzo" in muppet_names + assert "Rowlf" in muppet_names + def tearDown(self): pass From 584e58564f4e158e1b55249abd1499643be1f7d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=85ke=20Forslund?= Date: Thu, 18 Mar 2021 22:30:58 +0100 Subject: [PATCH 5/8] Add support for dropping registered entities --- adapt/engine.py | 34 +++++++++++++++++++ test/DomainIntentEngineTest.py | 18 ++++++++++ test/IntentEngineTest.py | 61 ++++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+) diff --git a/adapt/engine.py b/adapt/engine.py index a54dddb..f7fe91f 100644 --- a/adapt/engine.py +++ b/adapt/engine.py @@ -195,6 +195,26 @@ def drop_intent_parser(self, parser_names): return len(self.intent_parsers) != num_original_parsers + def drop_entity(self, entity_type=None, match_func=None): + """Drop all entities mathching the given entity type or match function + + Arguments: + entity_type (str): entity name to match against + match_func (callable): match function to find entities + + Returns: + (bool) True if vocab was found and removed otherwise False. + """ + def default_match_func(data): + return data and data[1] == entity_type + + ent_tuples = self.trie.scan(match_func or default_match_func) + for entity in ent_tuples: + self.trie.remove(*entity) + + return len(ent_tuples) != 0 + + class DomainIntentDeterminationEngine(object): """ DomainIntentDeterminationEngine. @@ -399,3 +419,17 @@ def drop_intent_parser(self, parser_names, domain): (bool) True if an intent parser was dropped else false. """ return self.domains[domain].drop_intent_parser(parser_names) + + def drop_entity(self, domain, entity_type=None, match_func=None): + """Drop all entities mathching the given entity type or match function. + + Arguments: + domain (str): intent domain + entity_type (str): entity name to match against + match_func (callable): match function to find entities + + Returns: + (bool) True if vocab was found and removed otherwise False. + """ + return self.domains[domain].drop_entity(entity_type=entity_type, + match_func=match_func) diff --git a/test/DomainIntentEngineTest.py b/test/DomainIntentEngineTest.py index ec4b7be..a870a0c 100644 --- a/test/DomainIntentEngineTest.py +++ b/test/DomainIntentEngineTest.py @@ -238,3 +238,21 @@ def test_drop_intent_from_domain(self): self.engine.drop_intent_parser(domain="Domain2", parser_names=['Parser2']) self.assertEqual(len(self.engine.domains['Domain2'].intent_parsers), 0) + + def test_drop_entity_from_domain(self): + """Test that entity is dropped from domain.""" + self.engine.register_domain('Domain1') + self.engine.register_domain('Domain2') + + # Creating first intent domain + parser1 = IntentBuilder("Parser1").require("Entity1").build() + self.engine.register_intent_parser(parser1, domain='Domain1') + self.engine.register_entity("tree", "Entity1", domain='Domain1') + + # Creating second intent domain + parser2 = IntentBuilder("Parser2").require("Entity2").build() + self.engine.register_intent_parser(parser2, domain="Domain2") + self.engine.register_entity("house", "Entity2", domain="Domain2") + + self.assertTrue(self.engine.drop_entity(domain="Domain2", + entity_type='Entity2')) diff --git a/test/IntentEngineTest.py b/test/IntentEngineTest.py index 795f17f..2211331 100644 --- a/test/IntentEngineTest.py +++ b/test/IntentEngineTest.py @@ -79,3 +79,64 @@ def testDropIntent(self): intent = next(self.engine.determine_intent(utterance)) assert intent assert intent['intent_type'] == 'Parser1' + + def testDropEntity(self): + parser1 = IntentBuilder("Parser1").require("Entity1").build() + self.engine.register_intent_parser(parser1) + self.engine.register_entity("laboratory", "Entity1") + self.engine.register_entity("lab", "Entity1") + + utterance = "get out of my lab" + utterance2 = "get out of my laboratory" + intent = next(self.engine.determine_intent(utterance)) + assert intent + assert intent['intent_type'] == 'Parser1' + + intent = next(self.engine.determine_intent(utterance2)) + assert intent + assert intent['intent_type'] == 'Parser1' + + # Remove Entity and re-register laboratory and make sure only that + # matches. + self.engine.drop_entity(entity_type='Entity1') + self.engine.register_entity("laboratory", "Entity1") + + # Sentence containing lab should not produce any results + with self.assertRaises(StopIteration): + intent = next(self.engine.determine_intent(utterance)) + + # But sentence with laboratory should + intent = next(self.engine.determine_intent(utterance2)) + assert intent + assert intent['intent_type'] == 'Parser1' + + def testCustomDropEntity(self): + parser1 = (IntentBuilder("Parser1").one_of("Entity1", "Entity2") + .build()) + self.engine.register_intent_parser(parser1) + self.engine.register_entity("laboratory", "Entity1") + self.engine.register_entity("lab", "Entity2") + + utterance = "get out of my lab" + utterance2 = "get out of my laboratory" + intent = next(self.engine.determine_intent(utterance)) + assert intent + assert intent['intent_type'] == 'Parser1' + + intent = next(self.engine.determine_intent(utterance2)) + assert intent + assert intent['intent_type'] == 'Parser1' + + def matcher(data): + return data[1].startswith('Entity') + + self.engine.drop_entity(match_func=matcher) + self.engine.register_entity("laboratory", "Entity1") + + # Sentence containing lab should not produce any results + with self.assertRaises(StopIteration): + intent = next(self.engine.determine_intent(utterance)) + + # But sentence with laboratory should + intent = next(self.engine.determine_intent(utterance2)) + assert intent From 04064f16b7f1c056dcb50a66ed2a3e393c317b0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=85ke=20Forslund?= Date: Tue, 23 Mar 2021 21:51:55 +0100 Subject: [PATCH 6/8] Drop nonlocal variables in scan Instead use returns to make it properly recursive --- adapt/tools/text/trie.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/adapt/tools/text/trie.py b/adapt/tools/text/trie.py index 5276028..9352fa9 100644 --- a/adapt/tools/text/trie.py +++ b/adapt/tools/text/trie.py @@ -219,9 +219,7 @@ def scan(self, match_func): Returns: (list) list with matching (data, value) pairs. """ - result = [] - - def _traverse(node, current=''): + def _traverse(node, match_func, current=''): """Traverse Trie searching for nodes with matching data Performs recursive depth first search of Trie and collects @@ -229,15 +227,18 @@ def _traverse(node, current=''): Arguments: node (trie node): Node to parse + match_func (callable): Function performing match current (str): string "position" in Trie + + Returns: + (list) list with matching (data, value) pairs. """ - nonlocal result - nonlocal match_func # Check if node matches - result += [(current, d) for d in node.data if match_func(d)] + result = [(current, d) for d in node.data if match_func(d)] + # Traverse further down into the tree for c in node.children: - _traverse(node.children[c], current + c) + result += _traverse(node.children[c], match_func, current + c) + return result - _traverse(self.root) - return result + return _traverse(self.root, match_func) From 94929e8d5978b25f2a4798c1e540ba3564315463 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=85ke=20Forslund?= Date: Thu, 25 Mar 2021 20:17:36 +0100 Subject: [PATCH 7/8] Add support for dropping regex entities --- adapt/engine.py | 41 ++++++++++++++++++++++++++++++++++ test/DomainIntentEngineTest.py | 13 +++++++++++ test/IntentEngineTest.py | 25 +++++++++++++++++++++ 3 files changed, 79 insertions(+) diff --git a/adapt/engine.py b/adapt/engine.py index f7fe91f..0f4b6d2 100644 --- a/adapt/engine.py +++ b/adapt/engine.py @@ -214,6 +214,33 @@ def default_match_func(data): return len(ent_tuples) != 0 + def drop_regex_entity(self, entity_type=None, match_func=None): + """Remove registered regex entity. + + Arguments: + entity_type (str): entity name to match against + match_func (callable): match function to find entities + + Returns: + (bool) True if vocab was found and removed otherwise False. + """ + def default_match_func(regexp): + return entity_type in regexp.groupindex.keys() + + match_func = match_func or default_match_func + matches = [r for r in self.regular_expressions_entities + if match_func(r)] + matching_patterns = [r.pattern for r in matches] + + self.regular_expressions_entities = [ + r for r in self.regular_expressions_entities if r not in matches + ] + self._regex_strings = [ + r for r in self._regex_strings if r not in matching_patterns + ] + + return len(matches) != 0 + class DomainIntentDeterminationEngine(object): """ @@ -433,3 +460,17 @@ def drop_entity(self, domain, entity_type=None, match_func=None): """ return self.domains[domain].drop_entity(entity_type=entity_type, match_func=match_func) + + def drop_regex_entity(self, domain, entity_type=None, match_func=None): + """Remove registered regex entity. + + Arguments: + domain (str): intent domain + entity_type (str): entity name to match against + match_func (callable): match function to find entities + + Returns: + (bool) True if vocab was found and removed otherwise False. + """ + return self.domains[domain].drop_regex_entity(entity_type=entity_type, + match_func=match_func) diff --git a/test/DomainIntentEngineTest.py b/test/DomainIntentEngineTest.py index a870a0c..075b78d 100644 --- a/test/DomainIntentEngineTest.py +++ b/test/DomainIntentEngineTest.py @@ -256,3 +256,16 @@ def test_drop_entity_from_domain(self): self.assertTrue(self.engine.drop_entity(domain="Domain2", entity_type='Entity2')) + + def testDropRegexEntity(self): + self.engine.register_domain("Domain1") + self.engine.register_domain("Domain2") + + self.engine.register_regex_entity(r"the dog (?P.*)", + "Domain1") + self.engine.register_regex_entity(r"the cat (?P.*)", + "Domain2") + self.assertTrue(self.engine.drop_regex_entity(domain='Domain2', + entity_type='Cat')) + self.assertFalse(self.engine.drop_regex_entity(domain='Domain1', + entity_type='Cat')) diff --git a/test/IntentEngineTest.py b/test/IntentEngineTest.py index 2211331..be7ca24 100644 --- a/test/IntentEngineTest.py +++ b/test/IntentEngineTest.py @@ -140,3 +140,28 @@ def matcher(data): # But sentence with laboratory should intent = next(self.engine.determine_intent(utterance2)) assert intent + + def testDropRegexEntity(self): + self.engine.register_regex_entity(r"the dog (?P.*)") + self.engine.register_regex_entity(r"the cat (?P.*)") + assert len(self.engine._regex_strings) == 2 + assert len(self.engine.regular_expressions_entities) == 2 + self.engine.drop_regex_entity(entity_type='Cat') + assert len(self.engine._regex_strings) == 1 + assert len(self.engine.regular_expressions_entities) == 1 + + def testCustomDropRegexEntity(self): + self.engine.register_regex_entity(r"the dog (?P.*)") + self.engine.register_regex_entity(r"the cat (?P.*)") + self.engine.register_regex_entity(r"the mangy dog (?P.*)") + assert len(self.engine._regex_strings) == 3 + assert len(self.engine.regular_expressions_entities) == 3 + + def matcher(regexp): + """Matcher for all match groups defined for SkillB""" + match_groups = regexp.groupindex.keys() + return any([k.startswith('SkillB') for k in match_groups]) + + self.engine.drop_regex_entity(match_func=matcher) + assert len(self.engine._regex_strings) == 2 + assert len(self.engine.regular_expressions_entities) == 2 From 20f1aac73815f7031656b346a10f1c6433de17b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=85ke=20Forslund?= Date: Fri, 26 Mar 2021 07:24:56 +0100 Subject: [PATCH 8/8] Bump version to 0.4.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 189a6bd..4e68f73 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ setup( name="adapt-parser", - version="0.3.7", + version="0.4.0", author="Sean Fitzgerald", author_email="sean@fitzgeralds.me", description=("A text-to-intent parsing framework."),