Skip to content

Commit

Permalink
add upload snapshot functionality using multipart request (qdrant#1578)
Browse files Browse the repository at this point in the history
* add upload snapshot functionality using multipart request

* remove unnecessary printing

* Add proper error handling

* add snapshot upload integration tests

* Fix maximum uploaded snapshot size limit

* review changes

* update OpenAPI with upload endpoint

* add missed query param in openapi

* remove unused function

---------

Co-authored-by: Andrey Vasnetsov <andrey@vasnetsov.com>
  • Loading branch information
ibrahim-akrab and generall committed Apr 11, 2023
1 parent adb148b commit 2164134
Show file tree
Hide file tree
Showing 10 changed files with 484 additions and 4 deletions.
95 changes: 95 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[features]
default = [ "web", "parking_lot" ]
default = ["web", "parking_lot"]
web = ["actix-web"]
service_debug = ["parking_lot", "parking_lot/deadlock_detection"]

Expand Down Expand Up @@ -68,6 +68,7 @@ segment = { path = "lib/segment" }
collection = { path = "lib/collection" }
storage = { path = "lib/storage" }
api = { path = "lib/api" }
actix-multipart = "0.6.0"

[target.'cfg(not(target_env = "msvc"))'.dependencies]
tikv-jemallocator = "0.5"
Expand All @@ -83,4 +84,4 @@ members = ["lib/*"]
exclude = ["benches/search-points"]

[profile.release]
lto = "fat"
lto = "fat"
104 changes: 104 additions & 0 deletions docs/redoc/master/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -1350,6 +1350,110 @@
}
}
},
"/collections/{collection_name}/snapshots/upload": {
"post": {
"tags": [
"snapshots",
"collections"
],
"summary": "Recover from an uploaded snapshot",
"description": "Recover local collection data from an uploaded snapshot. This will overwrite any data, stored on this node, for the collection. If collection does not exist - it will be created.",
"operationId": "recover_from_uploaded_snapshot",
"parameters": [
{
"name": "collection_name",
"in": "path",
"description": "Name of the collection",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "wait",
"in": "query",
"description": "If true, wait for changes to actually happen. If false - let changes happen in background. Default is true.",
"required": false,
"schema": {
"type": "boolean"
}
},
{
"name": "priority",
"in": "query",
"description": "Defines source of truth for snapshot recovery",
"required": false,
"schema": {
"$ref": "#/components/schemas/SnapshotPriority"
}
}
],
"requestBody": {
"description": "Snapshot to recover from",
"content": {
"multipart/form-data": {
"schema": {
"type": "object",
"properties": {
"snapshot": {
"type": "string",
"format": "binary"
}
}
}
}
}
},
"responses": {
"default": {
"description": "error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
}
}
}
},
"4XX": {
"description": "error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
}
}
}
},
"200": {
"description": "successful operation",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"time": {
"type": "number",
"format": "float",
"description": "Time spent to process this request"
},
"status": {
"type": "string",
"enum": [
"ok"
]
},
"result": {
"type": "boolean"
}
}
}
}
}
}
}
}
},
"/collections/{collection_name}/snapshots/recover": {
"put": {
"tags": [
Expand Down
2 changes: 1 addition & 1 deletion lib/collection/src/operations/snapshot_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use crate::operations::types::CollectionResult;
/// Defines source of truth for snapshot recovery
/// `Snapshot` means - prefer snapshot data over the current state
/// `Replica` means - prefer existing data over the snapshot
#[derive(Debug, Deserialize, Serialize, JsonSchema, Default, Clone)]
#[derive(Debug, Deserialize, Serialize, JsonSchema, Default, Clone, Copy)]
#[serde(rename_all = "snake_case")]
pub enum SnapshotPriority {
Snapshot,
Expand Down
1 change: 1 addition & 0 deletions lib/storage/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,4 @@ uuid = "1.3.0"
url = "2.3.1"
reqwest = { version = "0.11", features = ["stream", "rustls-tls"] }
openssl = { version = "0.10", features = ["vendored"] }
tempfile = "3.4.0"
10 changes: 10 additions & 0 deletions lib/storage/src/content_manager/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::io::Error as IoError;

use collection::operations::types::CollectionError;
use segment::common::file_operations::FileStorageError;
use tempfile::PersistError;
use thiserror::Error;

#[derive(Error, Debug, Clone)]
Expand Down Expand Up @@ -230,3 +231,12 @@ impl From<tokio::task::JoinError> for StorageError {
}
}
}

impl From<PersistError> for StorageError {
fn from(err: PersistError) -> Self {
StorageError::ServiceError {
description: format!("Persist error: {err}"),
backtrace: Some(Backtrace::force_capture().to_string()),
}
}
}
38 changes: 38 additions & 0 deletions openapi/openapi-snapshots.ytt.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,44 @@
#@ load("openapi.lib.yml", "response", "reference", "type", "array")

paths:
/collections/{collection_name}/snapshots/upload:
post:
tags:
- snapshots
- collections
summary: Recover from an uploaded snapshot
description: Recover local collection data from an uploaded snapshot. This will overwrite any data, stored on this node, for the collection. If collection does not exist - it will be created.
operationId: recover_from_uploaded_snapshot
parameters:
- name: collection_name
in: path
description: Name of the collection
required: true
schema:
type: string
- name: wait
in: query
description: "If true, wait for changes to actually happen. If false - let changes happen in background. Default is true."
required: false
schema:
type: boolean
- name: priority
in: query
description: "Defines source of truth for snapshot recovery"
required: false
schema:
$ref: "#/components/schemas/SnapshotPriority"
requestBody:
description: Snapshot to recover from
content:
multipart/form-data:
schema:
type: object
properties:
snapshot:
type: string
format: binary
responses: #@ response(type("boolean"))
/collections/{collection_name}/snapshots/recover:
put:
tags:
Expand Down
Loading

0 comments on commit 2164134

Please sign in to comment.