Skip to content

Commit

Permalink
Added regression tests for scipy.cluster.hierarchy.maxdists.
Browse files Browse the repository at this point in the history
  • Loading branch information
damian.eads committed Nov 24, 2008
1 parent 2cf4a79 commit d150f85
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 14 deletions.
6 changes: 3 additions & 3 deletions scipy/cluster/hierarchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -1212,7 +1212,7 @@ def is_valid_linkage(Z, warning=False, throw=False, name=None):
else:
raise ValueError('Linkage matrix must have 4 columns.')
if Z.shape[0] == 0:
raise ValueError('Linkage must be over at least one observation.')
raise ValueError('Linkage must be computed on at least two observations.')
n = Z.shape[0]
if n > 1:
if ((Z[:,0] < 0).any() or
Expand Down Expand Up @@ -2376,13 +2376,13 @@ def maxdists(Z):
specifically, ``MD[i] = Z[Q(i)-n, 2].max()`` where ``Q(i)`` is the
set of all node indices below and including node i.
"""
Z = np.asarray(Z, order='c')
Z = np.asarray(Z, order='c', dtype=np.double)
is_valid_linkage(Z, throw=True, name='Z')

n = Z.shape[0] + 1
MD = np.zeros((n-1,))
[Z] = _copy_arrays_if_base_present([Z])
_hierarchy_wrap.get_max_dist_for_each_hierarchy_wrap(Z, MD, int(n))
_hierarchy_wrap.get_max_dist_for_each_cluster_wrap(Z, MD, int(n))
return MD

def maxinconsts(Z, R):
Expand Down
4 changes: 2 additions & 2 deletions scipy/cluster/src/hierarchy.c
Original file line number Diff line number Diff line change
Expand Up @@ -841,7 +841,6 @@ void linkage_alt(double *dm, double *Z, double *X,

/** Rows i+1 to j-1 lose one unit of space, so we move them up. */
/** Rows j to np-1 lose no space. We do nothing to them. */

/** memcpy(rows[0], buf, sizeof(double) * rowsize[0] - k);*/

for (i = 0; i < mini; i++) {
Expand Down Expand Up @@ -1418,7 +1417,8 @@ void get_max_dist_for_each_cluster(const double *Z, double *max_dists, int n) {
max_dist = CPY_MAX(max_dist, max_dists[rid-n]);
}
max_dists[ndid-n] = max_dist;
CPY_DEBUG_MSG("i=%d maxdist[i]=%5.5f verif=%5.5f\n", ndid-n, max_dist, max_dists[ndid-n]);
CPY_DEBUG_MSG("i=%d maxdist[i]=%5.5f verif=%5.5f\n",
ndid-n, max_dist, max_dists[ndid-n]);
k--;
}
free(curNode);
Expand Down
112 changes: 103 additions & 9 deletions scipy/cluster/tests/test_hierarchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
import numpy as np
from numpy.testing import *

from scipy.cluster.hierarchy import linkage, from_mlab_linkage, to_mlab_linkage, num_obs_linkage, inconsistent, cophenet, from_mlab_linkage, fclusterdata, fcluster, is_isomorphic, single, complete, average, weighted, centroid, median, ward, leaders, correspond, is_monotonic
from scipy.cluster.hierarchy import linkage, from_mlab_linkage, to_mlab_linkage, num_obs_linkage, inconsistent, cophenet, from_mlab_linkage, fclusterdata, fcluster, is_isomorphic, single, complete, average, weighted, centroid, median, ward, leaders, correspond, is_monotonic, maxdists
from scipy.spatial.distance import squareform, pdist

_tdist = np.array([[0, 662, 877, 255, 412, 996],
Expand Down Expand Up @@ -685,14 +685,108 @@ def test_is_monotonic_iris_linkage(self):
Z = linkage(X, 'single')
self.failUnless(is_monotonic(Z) == True)

def help_single_inconsistent_depth(self, i):
Y = squareform(_tdist)
Z = linkage(Y, 'single')
R = inconsistent(Z, i)
Rright = eo['inconsistent-single-tdist-depth-' + str(i)]
eps = 1e-05
print np.abs(R - Rright).max()
self.failUnless(within_tol(R, Rright, eps))
class TestMaxDists(TestCase):

def test_maxdists_empty_linkage(self):
"Tests maxdists(Z) on empty linkage. Expecting exception."
Z = np.zeros((0, 4), dtype=np.double)
self.failUnlessRaises(ValueError, maxdists, Z)

def test_maxdists_one_cluster_linkage(self):
"Tests maxdists(Z) on linkage with one cluster."
Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
MD = maxdists(Z)
eps = 1e-15
expectedMD = calculate_maximum_distances(Z)
self.failUnless(within_tol(MD, expectedMD, eps))

def test_maxdists_Q_linkage_single(self):
"Tests maxdists(Z) on the Q data set using single linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'single')
MD = maxdists(Z)
eps = 1e-15
expectedMD = calculate_maximum_distances(Z)
self.failUnless(within_tol(MD, expectedMD, eps))

def test_maxdists_Q_linkage_complete(self):
"Tests maxdists(Z) on the Q data set using complete linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'complete')
MD = maxdists(Z)
eps = 1e-15
expectedMD = calculate_maximum_distances(Z)
self.failUnless(within_tol(MD, expectedMD, eps))

def test_maxdists_Q_linkage_ward(self):
"Tests maxdists(Z) on the Q data set using Ward linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'ward')
MD = maxdists(Z)
eps = 1e-15
expectedMD = calculate_maximum_distances(Z)
self.failUnless(within_tol(MD, expectedMD, eps))

def test_maxdists_Q_linkage_centroid(self):
"Tests maxdists(Z) on the Q data set using centroid linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'centroid')
MD = maxdists(Z)
eps = 1e-15
expectedMD = calculate_maximum_distances(Z)
print np.abs(expectedMD - MD)
print is_monotonic(Z)
self.failUnless(within_tol(MD, expectedMD, eps))

def test_maxdists_Q_linkage_median(self):
"Tests maxdists(Z) on the Q data set using median linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'median')
MD = maxdists(Z)
eps = 1e-15
expectedMD = calculate_maximum_distances(Z)
print np.abs(expectedMD - MD).max()
print is_monotonic(Z)
self.failUnless(within_tol(MD, expectedMD, eps))

def calculate_maximum_distances(Z):
"Used for testing correctness of maxdists. Very slow."
n = Z.shape[0] + 1
B = np.zeros((n-1,))
q = np.zeros((3,))
for i in xrange(0, n - 1):
q[:] = 0.0
L = Z[i, 0]
R = Z[i, 1]
if L >= n:
q[0] = B[L - n]
if R >= n:
q[1] = B[R - n]
q[2] = Z[i, 2]
B[i] = q.max()
return B

def calculate_maximum_inconsistent(Z, R):
"Used for testing correctness of maxinconsts. Very slow."
n = Z.shape[0] + 1
B = np.zeros((n-1,))
q = np.zeros((3,))
for i in xrange(0, n - 1):
q[:] = 0.0
L = Z[i, 0]
R = Z[i, 1]
if L >= n:
q[0] = B[L - n]
if R >= n:
q[1] = B[R - n]
q[2] = R[i, 2]
B[i] = q.max()
return B

def within_tol(a, b, tol):
return np.abs(a - b).max() < tol
Expand Down

0 comments on commit d150f85

Please sign in to comment.