diff --git a/docs/grammar/transform/aggregate.md b/docs/grammar/transform/aggregate.md
index 834e805a..40c74be8 100644
--- a/docs/grammar/transform/aggregate.md
+++ b/docs/grammar/transform/aggregate.md
@@ -1,18 +1,28 @@
# Aggregate
-The `"aggregate"` transform is currently minimal – it adds a new `count` field
-that contains the number of data items in a group. More aggregate operations
-will be added later.
-
-!!! warning
-
- The parameterization will change in the future to support other aggregate
- operations.
+The `"aggregate"` transform summarizes data fields using aggregate functions,
+such as `"sum"` or `"max"`. The data can be grouped by one or more fields,
+which results in a list of objects with the grouped fields and the aggregate
+values.
## Parameters
SCHEMA AggregateParams
+### Available aggregate functions
+
+Aggregate functions are applied to the data fields in each group.
+
+- `"count"`: Count the number of records in each group.
+- `"valid"`: Count the number of non-null and non-NaN values.
+- `"sum"`: Sum the values.
+- `"mean"`: Calculate the mean value.
+- `"average"`: A synonym for `"mean"`.
+- `"median"`: Calculate the median value.
+- `"min"`: Find the minimum value.
+- `"max"`: Find the maximum value.
+- `"variance"`: Calculate the variance.
+
## Example
Given the following data:
@@ -38,3 +48,54 @@ A new list of data objects is created:
| ------ | ----- |
| first | 2 |
| second | 1 |
+
+### Calculating min and max
+
+
+
+```json
+{
+ "data": {
+ "values": [
+ { "Category": "A", "Value": 5 },
+ { "Category": "A", "Value": 9 },
+ { "Category": "A", "Value": 9.5 },
+ { "Category": "B", "Value": 3 },
+ { "Category": "B", "Value": 5 },
+ { "Category": "B", "Value": 7.5 },
+ { "Category": "B", "Value": 8 }
+ ]
+ },
+
+ "encoding": {
+ "y": { "field": "Category", "type": "nominal" }
+ },
+
+ "layer": [
+ {
+ "encoding": {
+ "x": { "field": "Value", "type": "quantitative" }
+ },
+ "mark": "point"
+ },
+ {
+ "transform": [
+ {
+ "type": "aggregate",
+ "groupby": ["Category"],
+ "fields": ["Value", "Value"],
+ "ops": ["min", "max"],
+ "as": ["minValue", "maxValue"]
+ }
+ ],
+ "encoding": {
+ "x": { "field": "minValue", "type": "quantitative" },
+ "x2": { "field": "maxValue" }
+ },
+ "mark": "rule"
+ }
+ ]
+}
+```
+
+
diff --git a/packages/core/src/data/transforms/aggregate.js b/packages/core/src/data/transforms/aggregate.js
index e3cd8f96..9b15aebf 100644
--- a/packages/core/src/data/transforms/aggregate.js
+++ b/packages/core/src/data/transforms/aggregate.js
@@ -2,14 +2,8 @@ import { group as d3group } from "d3-array";
import FlowNode, { BEHAVIOR_CLONES } from "../flowNode.js";
import { field } from "../../utils/field.js";
import iterateNestedMaps from "../../utils/iterateNestedMaps.js";
+import AGGREGATE_OPS from "./aggregateOps.js";
-/**
- * A minimal aggregate transform that just counts grouped (by a single field) data items.
- * Work in progress.
- *
- * Eventually this will implement the most of Vega's aggregate transform:
- * https://vega.github.io/vega/docs/transforms/aggregate/
- */
export default class AggregateTransform extends FlowNode {
get behavior() {
return BEHAVIOR_CLONES;
@@ -17,6 +11,8 @@ export default class AggregateTransform extends FlowNode {
/**
* @param {import("../../spec/transform.js").AggregateParams} params
+ *
+ * @typedef {import("../flowNode.js").Datum} Datum
*/
constructor(params) {
super();
@@ -24,9 +20,45 @@ export default class AggregateTransform extends FlowNode {
/** @type {any[]} */
this.buffer = [];
+
+ /**
+ * @type {((arr: Datum[]) => number)[]}
+ */
+ this.ops = [];
+ /**
+ * @type {string[]}
+ */
+ this.as = [];
+
+ if (params.fields) {
+ if (params.fields.length != params.ops.length) {
+ throw new Error("Fields and ops must have the same length!");
+ }
+
+ if (params.as && params.as.length != params.ops.length) {
+ throw new Error(
+ 'If "as" is defined, "fields" and "as" must have the same length!'
+ );
+ }
+
+ params.fields.forEach((fieldName, i) => {
+ const accessor = field(fieldName);
+ const op = AGGREGATE_OPS[params.ops[i]];
+ this.ops.push((arr) => op(arr, accessor));
+ this.as.push(
+ params.as
+ ? params.as[i]
+ : `${params.ops[i]}_${params.fields[i]}`
+ );
+ });
+ } else {
+ this.ops.push((arr) => AGGREGATE_OPS.count(arr));
+ this.as.push("count");
+ }
}
reset() {
+ super.reset();
this.buffer = [];
}
@@ -41,26 +73,36 @@ export default class AggregateTransform extends FlowNode {
complete() {
const params = this.params;
- const groupby = params.groupby;
+ const groupby = params?.groupby;
+
+ if (groupby?.length > 0) {
+ const groupFieldAccessors = groupby.map((f) => field(f));
- const groupFieldAccessors = groupby.map((f) => field(f));
+ // There's something strange in d3-array's typings
+ const groups = /** @type {Map} */ /** @type {any} */ (
+ d3group(this.buffer, ...groupFieldAccessors)
+ );
- // TODO: Fix case where no group fields are specified
+ for (const [group, data] of iterateNestedMaps(groups)) {
+ /** @type {any} */
+ const datum = {};
- // There's something strange in d3-array's typings
- const groups = /** @type {Map} */ /** @type {any} */ (
- d3group(this.buffer, ...groupFieldAccessors)
- );
+ for (let i = 0; i < groupby.length; i++) {
+ datum[groupby[i]] = group[i];
+ }
- for (const [group, data] of iterateNestedMaps(groups)) {
- /** @type {any} */
- const datum = {
- count: data.length,
- };
+ this.ops.forEach((op, i) => {
+ datum[this.as[i]] = op(data);
+ });
- for (let i = 0; i < groupby.length; i++) {
- datum[groupby[i]] = group[i];
+ this._propagate(datum);
}
+ } else {
+ /** @type {Datum} */
+ const datum = {};
+ this.ops.forEach((op, i) => {
+ datum[this.as[i]] = op(this.buffer);
+ });
this._propagate(datum);
}
diff --git a/packages/core/src/data/transforms/aggregate.test.js b/packages/core/src/data/transforms/aggregate.test.js
new file mode 100644
index 00000000..c288482c
--- /dev/null
+++ b/packages/core/src/data/transforms/aggregate.test.js
@@ -0,0 +1,134 @@
+import { describe, expect, test } from "vitest";
+import { processData } from "../flowTestUtils.js";
+import AggregateTransform from "./aggregate.js";
+
+/**
+ * @param {import("../../spec/transform.js").AggregateParams} params
+ * @param {any[]} data
+ */
+function transform(params, data) {
+ return processData(new AggregateTransform(params), data);
+}
+
+describe("Aggregate transform", () => {
+ test("Default to count when no data fields or group-by fields are specified", () => {
+ const input = [
+ { name: "alpha", data: 123 },
+ { name: "beta", data: 456 },
+ { name: "beta", data: 789 },
+ ];
+
+ expect(transform({ type: "aggregate" }, input)).toEqual([{ count: 3 }]);
+ });
+
+ test("Default to count when no data fields fields are specified", () => {
+ const input = [
+ { name: "alpha", data: 123 },
+ { name: "beta", data: 456 },
+ { name: "beta", data: 789 },
+ ];
+
+ expect(
+ transform({ type: "aggregate", groupby: ["name"] }, input)
+ ).toEqual([
+ { name: "alpha", count: 1 },
+ { name: "beta", count: 2 },
+ ]);
+ });
+
+ test("Compute count, sum, min, max, and mean for groups. Use default output field names.", () => {
+ const input = [
+ { name: "alpha", data: 123 },
+ { name: "beta", data: 456 },
+ { name: "beta", data: 789 },
+ ];
+
+ expect(
+ transform(
+ {
+ type: "aggregate",
+ groupby: ["name"],
+ fields: ["data", "data", "data", "data", "data"],
+ ops: ["count", "sum", "min", "max", "mean"],
+ },
+ input
+ )
+ ).toEqual([
+ {
+ name: "alpha",
+ count_data: 1,
+ sum_data: 123,
+ min_data: 123,
+ max_data: 123,
+ mean_data: 123,
+ },
+ {
+ name: "beta",
+ count_data: 2,
+ sum_data: 1245,
+ min_data: 456,
+ max_data: 789,
+ mean_data: 622.5,
+ },
+ ]);
+ });
+
+ test("Allow custom output field names", () => {
+ const input = [
+ { name: "alpha", data: 123 },
+ { name: "beta", data: 456 },
+ { name: "beta", data: 789 },
+ ];
+
+ expect(
+ transform(
+ {
+ type: "aggregate",
+ fields: ["data", "data", "data", "data", "data"],
+ ops: ["count", "sum", "min", "max", "mean"],
+ as: ["count", "total", "min", "max", "average"],
+ },
+ input
+ )
+ ).toEqual([
+ {
+ count: 3,
+ total: 1368,
+ min: 123,
+ max: 789,
+ average: 456,
+ },
+ ]);
+ });
+
+ test("Throw if the length of fields and ops does not match", () => {
+ const input = [{ name: "beta", data: 789 }];
+
+ expect(() =>
+ transform(
+ {
+ type: "aggregate",
+ fields: ["data", "data", "data", "data"],
+ ops: ["count", "sum", "min", "max", "mean"],
+ },
+ input
+ )
+ ).toThrow();
+ });
+
+ test("Throw if the length of fields and as does not match", () => {
+ const input = [{ name: "beta", data: 789 }];
+
+ expect(() =>
+ transform(
+ {
+ type: "aggregate",
+ fields: ["data"],
+ ops: ["count"],
+ as: ["count", "total"],
+ },
+ input
+ )
+ ).toThrow();
+ });
+});
diff --git a/packages/core/src/data/transforms/aggregateOps.js b/packages/core/src/data/transforms/aggregateOps.js
new file mode 100644
index 00000000..6b4039b3
--- /dev/null
+++ b/packages/core/src/data/transforms/aggregateOps.js
@@ -0,0 +1,17 @@
+import { count, max, mean, median, min, sum, variance } from "d3-array";
+
+/**
+ * @type {Record number) => number>}
+ */
+const AGGREGATE_OPS = {
+ count: (arr) => arr.length,
+ valid: count,
+ sum,
+ min,
+ max,
+ mean,
+ median,
+ variance,
+};
+
+export default AGGREGATE_OPS;
diff --git a/packages/core/src/spec/transform.d.ts b/packages/core/src/spec/transform.d.ts
index 526b4abb..6fc291cd 100644
--- a/packages/core/src/spec/transform.d.ts
+++ b/packages/core/src/spec/transform.d.ts
@@ -188,14 +188,44 @@ export interface StackParams extends TransformParamsBase {
baseField?: Field;
}
+export type AggregateOp =
+ | "count"
+ | "valid"
+ | "sum"
+ | "min"
+ | "max"
+ | "mean"
+ | "median"
+ | "variance";
+
export interface AggregateParams extends TransformParamsBase {
type: "aggregate";
/**
- * Which fields to use for grouping. Missing `groupby` results in a single
- * group that includes all the data items.
+ * The fields by which to group the data. If these are not defined, all data
+ * objects will be grouped into a single category.
*/
groupby?: Field[];
+
+ /**
+ * The data fields to apply aggregate functions to. This array should
+ * correspond with the `ops` and `as` arrays. If no fields or operations
+ * are specified, a count aggregation will be applied by default.
+ */
+ fields?: Field[];
+
+ /**
+ * The aggregation operations to be performed on the fields, such as `"sum"`,
+ * `"average"`, or `"count"`.
+ */
+ ops?: AggregateOp[];
+
+ /**
+ * The names for the output fields corresponding to each aggregated field.
+ * If not provided, names will be automatically created using the operation
+ * and field names (e.g., `sum_field`, `average_field`).
+ */
+ as?: string[];
}
export interface FlattenParams extends TransformParamsBase {