Skip to content

Commit

Permalink
Scoring tests and fixes (#131)
Browse files Browse the repository at this point in the history
* Initial set of default scoring

Resulted in new tests for `one_of`, as well as a replacement of a
function for an itertools equivalent.

Lots of updated and added docstrings!

* Added tests for regex and fuzzy matching weights

* Remove comment that proved... inaccurate.

* Rename `parse_confidence` back to `confidence`

* Version bump
  • Loading branch information
clusterfudge authored May 26, 2021
1 parent e748b20 commit 4a8fe8a
Show file tree
Hide file tree
Showing 3 changed files with 288 additions and 78 deletions.
109 changes: 66 additions & 43 deletions adapt/intent.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

__author__ = 'seanfitz'

import itertools

CLIENT_ENTITY_NAME = 'Client'


Expand All @@ -30,21 +32,24 @@ def find_first_tag(tags, entity_type, after_index=-1):
"""Searches tags for entity type after given index
Args:
tags(list): a list of tags with entity types to be compaired too entity_type
tags(list): a list of tags with entity types to be compared to
entity_type
entity_type(str): This is he entity type to be looking for in tags
after_index(int): the start token must be greaterthan this.
after_index(int): the start token must be greater than this.
Returns:
( tag, v, confidence ):
tag(str): is the tag that matched
v(str): ? the word that matched?
confidence(float): is a mesure of accuacy. 1 is full confidence and 0 is none.
confidence(float): is a measure of accuracy. 1 is full confidence
and 0 is none.
"""
for tag in tags:
for entity in tag.get('entities'):
for v, t in entity.get('data'):
if t.lower() == entity_type.lower() and \
(tag.get('start_token', 0) > after_index or tag.get('from_context', False)):
(tag.get('start_token', 0) > after_index or \
tag.get('from_context', False)):
return tag, v, entity.get('confidence')

return None, None, None
Expand All @@ -58,52 +63,53 @@ def find_next_tag(tags, end_index=0):


def choose_1_from_each(lists):
"""Takes a list of lists and returns a list of lists with one item
from each list. This new list should be the length of each list multiplied
by the others. 18 for an list with lists of 3, 2 and 3. Also the lenght
of each sub list should be same as the length of lists passed in.
"""
The original implementation here was functionally equivalent to
:func:`~itertools.product`, except that the former returns a generator
of lists, and itertools returns a generator of tuples. This is going to do
a light transform for now, until callers can be verified to work with
tuples.
Args:
lists(list of Lists): A list of lists
A list of lists or tuples, expected as input to
:func:`~itertools.product`
Returns:
list of lists: returns a list of lists constructions of one item from each
list in lists.
a generator of lists, see docs on :func:`~itertools.product`
"""
if len(lists) == 0:
yield []
else:
for el in lists[0]:
for next_list in choose_1_from_each(lists[1:]):
yield [el] + next_list
for result in itertools.product(*lists):
yield list(result)


def resolve_one_of(tags, at_least_one):
"""This searches tags for Entities in at_least_one and returns any match
"""Search through all combinations of at_least_one rules to find a
combination that is covered by tags
Args:
tags(list): List of tags with Entities to search for Entities
at_least_one(list): List of Entities to find in tags
Returns:
object: returns None if no match is found but returns any match as an object
object:
returns None if no match is found but returns any match as an object
"""
if len(tags) < len(at_least_one):
return None

for possible_resolution in choose_1_from_each(at_least_one):
resolution = {}
pr = possible_resolution[:]
for entity_type in pr:
last_end_index = -1
if entity_type in resolution:
last_end_index = resolution[entity_type][-1].get('end_token')
tag, value, c = find_first_tag(tags, entity_type, after_index=last_end_index)
tag, value, c = find_first_tag(tags, entity_type,
after_index=last_end_index)
if not tag:
break
else:
if entity_type not in resolution:
resolution[entity_type] = []
resolution[entity_type].append(tag)
# Check if this is a valid resolution (all one_of rules matched)
if len(resolution) == len(possible_resolution):
return resolution

Expand All @@ -129,23 +135,24 @@ def validate(self, tags, confidence):
"""Using this method removes tags from the result of validate_with_tags
Returns:
intent(intent): Resuts from validate_with_tags
intent(intent): Results from validate_with_tags
"""
intent, tags = self.validate_with_tags(tags, confidence)
return intent

def validate_with_tags(self, tags, parse_weight):
def validate_with_tags(self, tags, confidence):
"""Validate whether tags has required entites for this intent to fire
Args:
tags(list): Tags and Entities used for validation
parse_weight(float): The weight associate to the parse result,
confidence(float): The weight associate to the parse result,
as indicated by the parser. This is influenced by a parser
that uses edit distance or context.
Returns:
intent, tags: Returns intent and tags used by the intent on
falure to meat required entities then returns intent with confidence
failure to meat required entities then returns intent with
confidence
of 0.0 and an empty list for tags.
"""
result = {'intent_type': self.name}
Expand All @@ -154,7 +161,8 @@ def validate_with_tags(self, tags, parse_weight):
used_tags = []

for require_type, attribute_name in self.requires:
required_tag, canonical_form, tag_confidence = find_first_tag(local_tags, require_type)
required_tag, canonical_form, tag_confidence = \
find_first_tag(local_tags, require_type)
if not required_tag:
result['confidence'] = 0.0
return result, []
Expand All @@ -166,20 +174,24 @@ def validate_with_tags(self, tags, parse_weight):
intent_confidence += tag_confidence

if len(self.at_least_one) > 0:
best_resolution = resolve_one_of(tags, self.at_least_one)
best_resolution = resolve_one_of(local_tags, self.at_least_one)
if not best_resolution:
result['confidence'] = 0.0
return result, []
else:
for key in best_resolution:
result[key] = best_resolution[key][0].get('key') # TODO: at least one must support aliases
intent_confidence += 1.0 * best_resolution[key][0]['entities'][0].get('confidence', 1.0)
used_tags.append(best_resolution)
# TODO: at least one should support aliases
result[key] = best_resolution[key][0].get('key')
intent_confidence += \
1.0 * best_resolution[key][0]['entities'][0]\
.get('confidence', 1.0)
used_tags.append(best_resolution[key][0])
if best_resolution in local_tags:
local_tags.remove(best_resolution)
local_tags.remove(best_resolution[key][0])

for optional_type, attribute_name in self.optional:
optional_tag, canonical_form, tag_confidence = find_first_tag(local_tags, optional_type)
optional_tag, canonical_form, tag_confidence = \
find_first_tag(local_tags, optional_type)
if not optional_tag or attribute_name in result:
continue
result[attribute_name] = canonical_form
Expand All @@ -188,9 +200,11 @@ def validate_with_tags(self, tags, parse_weight):
used_tags.append(optional_tag)
intent_confidence += tag_confidence

total_confidence = (intent_confidence / len(tags) * parse_weight) if tags else 0.0
total_confidence = (intent_confidence / len(tags) * confidence) \
if tags else 0.0

target_client, canonical_form, parse_weight = find_first_tag(local_tags, CLIENT_ENTITY_NAME)
target_client, canonical_form, confidence = \
find_first_tag(local_tags, CLIENT_ENTITY_NAME)

result['target'] = target_client.get('key') if target_client else None
result['confidence'] = total_confidence
Expand All @@ -204,7 +218,7 @@ class IntentBuilder(object):
Attributes:
at_least_one(list): A list of Entities where one is required.
These are seperated into lists so you can have one of (A or B) and
These are separated into lists so you can have one of (A or B) and
then require one of (D or F).
requires(list): A list of Required Entities
optional(list): A list of optional Entities
Expand All @@ -214,14 +228,18 @@ class IntentBuilder(object):
This is designed to allow construction of intents in one line.
Example:
IntentBuilder("Intent").requires("A").one_of("C","D").optional("G").build()
IntentBuilder("Intent")\
.requires("A")\
.one_of("C","D")\
.optional("G").build()
"""
def __init__(self, intent_name):
"""
Constructor
Args:
intent_name(str): the name of the intents that this parser parses/validates
intent_name(str): the name of the intents that this parser
parses/validates
"""
self.at_least_one = []
self.requires = []
Expand All @@ -230,7 +248,8 @@ def __init__(self, intent_name):

def one_of(self, *args):
"""
The intent parser should require one of the provided entity types to validate this clause.
The intent parser should require one of the provided entity types to
validate this clause.
Args:
args(args): *args notation list of entity names
Expand All @@ -247,7 +266,8 @@ def require(self, entity_type, attribute_name=None):
Args:
entity_type(str): an entity type
attribute_name(str): the name of the attribute on the parsed intent. Defaults to match entity_type.
attribute_name(str): the name of the attribute on the parsed intent.
Defaults to match entity_type.
Returns:
self: to continue modifications.
Expand All @@ -259,11 +279,13 @@ def require(self, entity_type, attribute_name=None):

def optionally(self, entity_type, attribute_name=None):
"""
Parsed intents from this parser can optionally include an entity of the provided type.
Parsed intents from this parser can optionally include an entity of the
provided type.
Args:
entity_type(str): an entity type
attribute_name(str): the name of the attribute on the parsed intent. Defaults to match entity_type.
attribute_name(str): the name of the attribute on the parsed intent.
Defaults to match entity_type.
Returns:
self: to continue modifications.
Expand All @@ -279,4 +301,5 @@ def build(self):
:return: an Intent instance.
"""
return Intent(self.name, self.requires, self.at_least_one, self.optional)
return Intent(self.name, self.requires,
self.at_least_one, self.optional)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def required(requirements_file):

setup(
name="adapt-parser",
version="0.4.0",
version="0.4.1",
author="Sean Fitzgerald",
author_email="sean@fitzgeralds.me",
description=("A text-to-intent parsing framework."),
Expand Down
Loading

0 comments on commit 4a8fe8a

Please sign in to comment.