From 4fae7414305478a89e87507c293539b7d2931eba Mon Sep 17 00:00:00 2001 From: Alexander Tobi Fashakin Date: Fri, 2 Jun 2023 09:56:47 +0100 Subject: [PATCH] Document aggregation operations (#2672) Closes #2603. Co-authored-by: Chi Fujii Co-authored-by: Alexey Palazhchenko --- .../aggregation-operations/_category_.yml | 6 ++ .../aggregation-pipeline-and-commands.md | 77 +++++++++++++++++++ .../aggregation-stages.md | 17 ++++ website/docs/configuration/_category_.yml | 2 +- website/docs/contributing/_category_.yml | 2 +- website/docs/diff.md | 2 +- website/docs/indexes.md | 2 +- website/docs/pushdown.md | 2 +- website/docs/reference/_category_.yml | 2 +- website/docs/security.md | 2 +- website/docs/telemetry.md | 2 +- .../static/img/docs/aggregation-stages.jpg | 3 + 12 files changed, 111 insertions(+), 8 deletions(-) create mode 100644 website/docs/aggregation-operations/_category_.yml create mode 100644 website/docs/aggregation-operations/aggregation-pipeline-and-commands.md create mode 100644 website/docs/aggregation-operations/aggregation-stages.md create mode 100644 website/static/img/docs/aggregation-stages.jpg diff --git a/website/docs/aggregation-operations/_category_.yml b/website/docs/aggregation-operations/_category_.yml new file mode 100644 index 000000000000..0f49f7e1a137 --- /dev/null +++ b/website/docs/aggregation-operations/_category_.yml @@ -0,0 +1,6 @@ +--- +label: Aggregation Operations +position: 6 +link: + description: > + This section details aggregation operations in FerretDB, including aggregation commands, stages, and operators diff --git a/website/docs/aggregation-operations/aggregation-pipeline-and-commands.md b/website/docs/aggregation-operations/aggregation-pipeline-and-commands.md new file mode 100644 index 000000000000..129926ec89b0 --- /dev/null +++ b/website/docs/aggregation-operations/aggregation-pipeline-and-commands.md @@ -0,0 +1,77 @@ +--- +sidebar_position: 1 +--- + +# Aggregation pipeline and commands + +Aggregation operations involve performing various operations on a large number of data records, such as data grouping, sorting, restructuring, or modifying. +These operations pass through one or more stages, which make up a pipeline. + +![aggregation stages](/img/docs/aggregation-stages.jpg) + +Each stage acts upon the returned documents of the previous stage, starting with the input documents. +As shown above, the documents pass through the pipeline with the result of the previous stage acting as input for the next stage, going from `$match` => `$group` => `$sort` stage. + +For example, insert the following documents in a `sales` collection: + +```js +db.sales.insertMany([ + { _id: 1, category: 'Electronics', price: 1000 }, + { _id: 2, category: 'Electronics', price: 800 }, + { _id: 3, category: 'Clothing', price: 30 }, + { _id: 4, category: 'Clothing', price: 50 }, + { _id: 5, category: 'Home', price: 1500 }, + { _id: 6, category: 'Home', price: 1200 }, + { _id: 7, category: 'Books', price: 20 }, + { _id: 8, category: 'Books', price: 40 } +]) +``` + +A typical aggregation pipeline would look like this: + +```js +db.sales.aggregate([ + { $match: { category: { $ne: 'Electronics' } } }, + { + $group: { + _id: '$category', + totalPrice: { $sum: '$price' }, + productCount: { $sum: 1 } + } + }, + { $sort: { totalPrice: -1 } } +]) +``` + +In the pipeline, the complex query is broken down into separate stages where the record goes through a series of transformations until it finally produces the desired result. +First, the `$match` stage filters out all documents where the `category` field is not `Electronics`. +Then, the `$group` stage groups the documents by their `category` and calculates the total price and product count for each of those category. +Finally, the `$sort` stage sorts the documents by the `totalPrice` field in descending order. + +So the above aggregation pipeline operation would return the following result: + +```json5 +[ + { _id: 'Home', totalPrice: 2700, productCount: 2 }, + { _id: 'Clothing', totalPrice: 80, productCount: 2 }, + { _id: 'Books', totalPrice: 60, productCount: 2 } +] +``` + +This section of the documentation will focus on [`aggregate` command](#aggregate-command), [aggregation stages](./aggregation-stages.md), and aggregation operators. + +## `aggregate` command + +The aggregation command `aggregate` is a top-level command used for aggregating data across various pipeline stages. + +The command is used for performing aggregation operations on a collection and lets you specify aggregation operations in a pipeline consisting of one or more stages and operators for transforming and analyzing data, such as grouping, filtering, sorting, projecting, and calculating aggregates. + +```js +// Aggregation pipeline to perform aggregation operations on a collection +db.collection.aggregate([ + // Stage 1: Matching documents based on a specific field and value + { $match: { field: value } }, + // Stage 2: Grouping documents by the "category" field and calculating the sum of the "quantity" field + { $group: { _id: '$category', total: { $sum: '$quantity' } } } +]) +``` diff --git a/website/docs/aggregation-operations/aggregation-stages.md b/website/docs/aggregation-operations/aggregation-stages.md new file mode 100644 index 000000000000..3c1fb76552ad --- /dev/null +++ b/website/docs/aggregation-operations/aggregation-stages.md @@ -0,0 +1,17 @@ +--- +sidebar_position: 2 +--- + +# Aggregation stages + +Aggregation stages are a series of one or more processes in a pipeline that acts upon the returned result of the previous stage, starting with the input documents. + +| Supported aggregation stages | Description | +| ---------------------------- | ----------------------------------------------------------------------------------------------------- | +| `$count` | Returns the count of all matched documents in a specified query | +| `$group` | Groups documents based on specific value or expression and returns a single document for each group | +| `$limit` | Limits specific documents and passes the rest to the next stage | +| `$match` | Acts as a `find` operation by only returning documents that match a specified query to the next stage | +| `$skip` | Skips a specified `n` number of documents and passes the rest to the next stage | +| `$sort` | Sorts and returns all the documents based on a specified order | +| `$unwind` | Deconstructs and returns a document for every element in an array field | diff --git a/website/docs/configuration/_category_.yml b/website/docs/configuration/_category_.yml index 8457a36691a2..a1f39560e4e8 100644 --- a/website/docs/configuration/_category_.yml +++ b/website/docs/configuration/_category_.yml @@ -1,6 +1,6 @@ --- label: Configuration -position: 8 +position: 9 link: type: generated-index description: > diff --git a/website/docs/contributing/_category_.yml b/website/docs/contributing/_category_.yml index 8d7315adee3c..b8875e15fa87 100644 --- a/website/docs/contributing/_category_.yml +++ b/website/docs/contributing/_category_.yml @@ -1,6 +1,6 @@ --- label: Contributing to FerretDB -position: 13 +position: 14 link: type: generated-index description: > diff --git a/website/docs/diff.md b/website/docs/diff.md index 3d9023c37ca2..da7318a8d3d4 100644 --- a/website/docs/diff.md +++ b/website/docs/diff.md @@ -1,5 +1,5 @@ --- -sidebar_position: 9 +sidebar_position: 10 slug: /diff/ # referenced in README.md and beacon --- diff --git a/website/docs/indexes.md b/website/docs/indexes.md index 4a483cd6abcd..23f0b005db23 100644 --- a/website/docs/indexes.md +++ b/website/docs/indexes.md @@ -1,5 +1,5 @@ --- -sidebar_position: 7 +sidebar_position: 8 --- # Indexes diff --git a/website/docs/pushdown.md b/website/docs/pushdown.md index 6a09a88b9f31..c6f0f228cfcf 100644 --- a/website/docs/pushdown.md +++ b/website/docs/pushdown.md @@ -1,5 +1,5 @@ --- -sidebar_position: 6 +sidebar_position: 7 hide_table_of_contents: true --- diff --git a/website/docs/reference/_category_.yml b/website/docs/reference/_category_.yml index 92aa530b15f7..74d90ba245fd 100644 --- a/website/docs/reference/_category_.yml +++ b/website/docs/reference/_category_.yml @@ -1,6 +1,6 @@ --- label: Reference -position: 12 +position: 13 link: type: generated-index description: > diff --git a/website/docs/security.md b/website/docs/security.md index 1d55322df179..37cb6fed37d6 100644 --- a/website/docs/security.md +++ b/website/docs/security.md @@ -1,5 +1,5 @@ --- -sidebar_position: 11 +sidebar_position: 12 slug: /security/ # referenced in README.md description: TLS and authentication --- diff --git a/website/docs/telemetry.md b/website/docs/telemetry.md index c3b29b58154b..e7dffa0bd480 100644 --- a/website/docs/telemetry.md +++ b/website/docs/telemetry.md @@ -1,5 +1,5 @@ --- -sidebar_position: 10 +sidebar_position: 11 slug: /telemetry/ # referenced in many places; must not change --- diff --git a/website/static/img/docs/aggregation-stages.jpg b/website/static/img/docs/aggregation-stages.jpg new file mode 100644 index 000000000000..c4b4b675c64f --- /dev/null +++ b/website/static/img/docs/aggregation-stages.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0380eb1fa85c07ee5c6a353df846f5205311b212cd29620ba499dd1722166ef0 +size 150276