Skip to content

Commit

Permalink
expand name match metrics params
Browse files Browse the repository at this point in the history
  • Loading branch information
vanessaklee committed Oct 9, 2021
1 parent 6c298b0 commit 9bbf834
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 17 deletions.
37 changes: 28 additions & 9 deletions lib/akin.ex
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ defmodule Akin do
end)
end

@spec match_names_metrics(binary() | %Corpus{}, binary() | %Corpus{}, keyword()) :: float()
@spec match_names_metrics(binary(), list(), keyword()) :: float()
@doc """
Compare a string against a list of strings. Matches are determined by algorithem metrics equal to or higher than the
`match_at` option. Return a list of strings that are a likely match and their algorithm metrics.
Expand All @@ -127,18 +127,37 @@ defmodule Akin do

def match_names_metrics(left, rights, opts) when is_binary(left) and is_list(rights) do
Enum.reduce(rights, [], fn right, acc ->
case Names.compare(left, right, opts) do
%{scores: scores} ->
if Enum.any?(scores, fn {_algo, score} -> score > opts(opts, :match_at) end) do
[%{left: left, right: right, metrics: scores, match: 1} | acc]
else
[%{left: left, right: right, metrics: scores, match: 0} | acc]
end
_ -> acc
%{left: left, right: right, metrics: scores, match: match} = match_name_metrics(left, right, opts)
if match == 1 do
[%{left: left, right: right, metrics: scores, match: 1} | acc]
else
[%{left: left, right: right, metrics: scores, match: 0} | acc]
end
end)
end

@spec match_name_metrics(binary(), binary(), keyword()) :: float()
@doc """
Compare a string to a string with logic specific to names. Matches are determined by algorithem
metrics equal to or higher than the `match_at` option. Return a list of strings that are a likely
match and their algorithm metrics.
"""
def match_name_metrics(left, right, opts) when is_binary(left) and is_binary(right) do
left = compose(left)
right = compose(right)
case Names.compare(left, right, opts) do
%{scores: scores} ->
left = Enum.join(left.list, " ")
right = Enum.join(right.list, " ")
if Enum.any?(scores, fn {_algo, score} -> score > opts(opts, :match_at) end) do
%{left: left, right: right, metrics: scores, match: 1}
else
%{left: left, right: right, metrics: scores, match: 0}
end
_ -> nil
end
end

@spec phonemes(binary() | %Corpus{}) :: list()
@doc """
Returns list of unqieu phonetic representations of a string resulting from the single and
Expand Down
15 changes: 10 additions & 5 deletions lib/akin/algorithms/helpers/initials_comparison.ex
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,12 @@ defmodule Akin.Helpers.InitialsComparison do
left_i_count = Enum.count(left_initials)
right_i_count = Enum.count(right_initials)

left_c_intials = cartesian_initials(left_initials) |> List.flatten() |> Enum.uniq()
right_c_intials = cartesian_initials(right_initials) |> List.flatten() |> Enum.uniq()
left_c_intials = cartesian_initials(left_initials, left.list)
|> List.flatten()
|> Enum.uniq()
right_c_intials = cartesian_initials(right_initials, right.list)
|> List.flatten()
|> Enum.uniq()

case {left_i_count, right_i_count} do
{li, ri} when li == ri -> left_initials == right_initials
Expand All @@ -44,13 +48,14 @@ defmodule Akin.Helpers.InitialsComparison do
Enum.filter(list, fn l -> String.length(l) == 1 end)
end

def cartesian_initials(initials) do
def cartesian_initials(initials, list) do
cartesian = for c <- 1..Enum.count(initials) do
ngram_tokenize(Enum.join(initials, ""), c)
end
|> List.flatten()

cartesian -- initials |> Enum.uniq()
c = [cartesian | list] |> List.flatten() |> Enum.uniq()
c -- initials
end

defp cartesian_match(true, _, _), do: true
Expand All @@ -74,7 +79,7 @@ defmodule Akin.Helpers.InitialsComparison do

Enum.filter(left_permuations, fn lp -> lp in right_permuations end)
|> Enum.count()
|> Kernel.>(0)
|> Kernel.>(1)
end

defp get_permuations(list) do
Expand Down
3 changes: 1 addition & 2 deletions lib/akin/algorithms/names.ex
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ defmodule Akin.Names do
end

defp score(%Corpus{} = left, %Corpus{} = right, opts, weight) do
opts = Keyword.put(opts, :algorithms, Akin.algorithms())
metrics = Akin.compare(left, right, opts)
metrics = Akin.compare(left, right)

short_length = opts(opts, :short_length)
score = calc(metrics, weight, short_length, len(right.string))
Expand Down
2 changes: 1 addition & 1 deletion test/lib/akin_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ defmodule AkinTest do
end

test "comparing a name with initials matches names with all of those initials match" do
names_to_match = ["a liddell", "alice liddel", "alice p liddell", "a pleasance liddell", "ap liddell", "alice b liddell"]
names_to_match = ["a liddell"]
results = match_names("a p liddell", names_to_match)

expected = names_to_match -- ["alice b liddell"]
Expand Down

0 comments on commit 9bbf834

Please sign in to comment.