From 269c11c10907351d98acfb929af5036a23a2e5c3 Mon Sep 17 00:00:00 2001 From: Karsten Schmidt Date: Tue, 20 Apr 2021 10:13:58 +0100 Subject: [PATCH] feat(k-means): add meansLatLon centroid strategy, docstrings --- packages/k-means/src/kmeans.ts | 65 +++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/packages/k-means/src/kmeans.ts b/packages/k-means/src/kmeans.ts index b9778306f3..f1bf9236ee 100644 --- a/packages/k-means/src/kmeans.ts +++ b/packages/k-means/src/kmeans.ts @@ -4,6 +4,18 @@ import { SYSTEM, uniqueIndices, weightedRandom } from "@thi.ng/random"; import { add, median, mulN, ReadonlyVec, Vec, zeroes } from "@thi.ng/vectors"; import type { CentroidStrategy, Cluster, KMeansOpts } from "./api"; +/** + * Takes an array of n-dimensional `samples` and attempts to assign them to `k` + * clusters, using the behavior defined by (optionally) given `opts`. + * + * @remarks + * https://en.wikipedia.org/wiki/K-medians_clustering + * + * @param k + * @param samples + * @param opts + * @returns + */ export const kmeans = ( k: number, samples: T[], @@ -45,7 +57,8 @@ export const kmeans = ( }; /** - * k-means++ initialization / selection of initial cluster centroids. + * k-means++ initialization / selection of initial cluster centroids. Default + * centroid initialization method for {@link kmeans}. * * @remarks * References: @@ -113,6 +126,12 @@ const buildClusters = (centroids: ReadonlyVec[], clusters: number[]) => { return indices; }; +/** + * Default centroid strategy forming new centroids by averaging the position of + * participating samples. + * + * @param dim + */ export const means: CentroidStrategy = (dim) => { const acc = zeroes(dim); let n = 0; @@ -125,6 +144,12 @@ export const means: CentroidStrategy = (dim) => { }; }; +/** + * Centroid strategy forming new centroids via componentwise medians. + * + * @remarks + * https://en.wikipedia.org/wiki/K-medians_clustering + */ export const medians: CentroidStrategy = () => { const acc: ReadonlyVec[] = []; return { @@ -132,3 +157,41 @@ export const medians: CentroidStrategy = () => { finish: () => (acc.length ? median([], acc) : undefined), }; }; + +/** + * Means centroid strategy for decimal degree lat/lon positions (e.g. WGS84). + * Unlike the default {@link means} strategy, this one treats latitude values + * correctly in terms of the ±180 deg boundary and ensures samples on either + * side of the Pacific are forming correct centroids. + * + * @remarks + * When using this strategy, you should also use the + * {@link @thi.ng/distance#HAVERSINE_LATLON} distance metric for + * {@link KMeansOpts.distance}. + * + * @example + * ```ts + * kmeans(3, [...], { strategy: meansLatLon, dist: HAVERSINE_LATLON }) + * ``` + * + * https://en.wikipedia.org/wiki/World_Geodetic_System + */ +export const meansLatLon: CentroidStrategy = () => { + let lat = 0; + let lon = 0; + let n = 0; + return { + update: ([$lat, $lon]) => { + lat += $lat < 0 ? $lat + 360 : $lat; + lon += $lon; + n++; + }, + finish: () => { + if (!n) return; + lat /= n; + if (lat > 180) lat -= 360; + lon /= n; + return [lat, lon]; + }, + }; +};