From 8053b9ac597ce5eb4ee2cd54d1a031ff07a5a50d Mon Sep 17 00:00:00 2001 From: DataHearth Date: Mon, 22 May 2023 20:18:58 +0200 Subject: [PATCH 01/52] add regex query filter --- src/polodb_core/Cargo.toml | 1 + src/polodb_core/errors.rs | 63 +++---- src/polodb_core/vm/codegen.rs | 274 ++++++++++++++++++------------- src/polodb_core/vm/op.rs | 1 + src/polodb_core/vm/subprogram.rs | 122 +++++++------- src/polodb_core/vm/vm.rs | 188 +++++++++++++-------- 6 files changed, 377 insertions(+), 272 deletions(-) diff --git a/src/polodb_core/Cargo.toml b/src/polodb_core/Cargo.toml index d56fe980..e9e98446 100644 --- a/src/polodb_core/Cargo.toml +++ b/src/polodb_core/Cargo.toml @@ -32,6 +32,7 @@ serde-wasm-bindgen = "0.5.0" wasm-bindgen-futures = "0.4.34" thiserror = "1.0.40" indexmap = { version = "1.9.3", features = ["serde"] } +regex = "1" [dependencies.web-sys] version = "0.3.61" diff --git a/src/polodb_core/errors.rs b/src/polodb_core/errors.rs index 6962e64e..82f4fe7e 100644 --- a/src/polodb_core/errors.rs +++ b/src/polodb_core/errors.rs @@ -3,12 +3,12 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ -use std::io; +use bson::ser::Error as BsonErr; +use bson::Document; use std::fmt; +use std::io; use std::string::FromUtf8Error; use std::sync::PoisonError; -use bson::Document; -use bson::ser::Error as BsonErr; use thiserror::Error; #[derive(Debug)] @@ -39,12 +39,13 @@ impl From for Error { } impl fmt::Display for FieldTypeUnexpectedStruct { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "unexpected type for field '{}', expected: {}, actual: {}", - self.field_name, self.expected_ty, self.actual_ty) + write!( + f, + "unexpected type for field '{}', expected: {}, actual: {}", + self.field_name, self.expected_ty, self.actual_ty + ) } - } #[derive(Debug)] @@ -55,15 +56,17 @@ pub struct UnexpectedHeader { } impl fmt::Display for UnexpectedHeader { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "page_id: {}, expected header: 0x{:02X} 0x{:02X}, actual: 0x{:02X} 0x{:02X}", - self.page_id, - self.expected_header[0], self.expected_header[1], - self.actual_header[0], self.actual_header[1] + write!( + f, + "page_id: {}, expected header: 0x{:02X} 0x{:02X}, actual: 0x{:02X} 0x{:02X}", + self.page_id, + self.expected_header[0], + self.expected_header[1], + self.actual_header[0], + self.actual_header[1] ) } - } #[derive(Debug)] @@ -113,9 +116,16 @@ pub struct DataMalformedReason { #[derive(Debug)] pub struct DuplicateKeyError { - pub name: String, // index name - pub key: String, // key name - pub ns: String, // collection name + pub name: String, // index name + pub key: String, // key name + pub ns: String, // collection name +} + +#[derive(Debug)] +pub struct RegexCompileError { + pub error: String, + pub expression: String, + pub options: String, } #[derive(Error, Debug)] @@ -139,7 +149,7 @@ pub enum Error { #[error("parse error: {0}")] ParseError(String), #[error("io error: {}, backtrace: {}", .0.source, .0.backtrace)] - IOErr(Box>) , + IOErr(Box>), #[error("utf8 error: {source}")] UTF8Err { #[from] @@ -237,10 +247,11 @@ pub enum Error { DuplicateKey(Box), #[error("the element type {0} is unknown")] UnknownBsonElementType(u8), + #[error("failed to compile regex expression: {}, expression: {}, options: {}", .0.error, .0.expression, .0.options)] + RegexCompileError(Box), } impl Error { - pub(crate) fn add(self, next: Error) -> Error { match self { Error::Multiple(mut result) => { @@ -260,26 +271,21 @@ impl Error { backtrace: std::backtrace::Backtrace::capture(), })) } - } impl From for Error { - fn from(error: bson::de::Error) -> Self { Error::BsonDeErr(Box::new(error)) } - } impl From for Error { - fn from(error: BsonErr) -> Self { Error::BsonErr(Box::new(BtWrapper { source: error, backtrace: std::backtrace::Backtrace::capture(), })) } - } impl From> for Error { @@ -289,19 +295,15 @@ impl From> for Error { } impl From for Error { - fn from(value: FromUtf8Error) -> Self { Error::FromUtf8Error(Box::new(value)) } - } impl From for Error { - fn from(value: DuplicateKeyError) -> Self { Error::DuplicateKey(Box::new(value)) } - } impl From for Error { @@ -313,6 +315,12 @@ impl From for Error { } } +impl From for Error { + fn from(value: RegexCompileError) -> Self { + Error::RegexCompileError(Box::new(value)) + } +} + #[cfg(test)] mod tests { use crate::Error; @@ -322,5 +330,4 @@ mod tests { let size = std::mem::size_of::(); assert_eq!(size, 32); } - } diff --git a/src/polodb_core/vm/codegen.rs b/src/polodb_core/vm/codegen.rs index 84c30f78..b60a0670 100644 --- a/src/polodb_core/vm/codegen.rs +++ b/src/polodb_core/vm/codegen.rs @@ -3,25 +3,25 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ -use bson::{Bson, Document, Array, Binary}; -use bson::spec::{BinarySubtype, ElementType}; -use super::label::{Label, LabelSlot, JumpTableRecord}; -use crate::vm::SubProgram; -use crate::vm::op::DbOp; -use crate::index::INDEX_PREFIX; -use crate::{Result, Error}; +use super::label::{JumpTableRecord, Label, LabelSlot}; use crate::coll::collection_info::CollectionSpecification; -use crate::errors::{FieldTypeUnexpectedStruct, mk_invalid_query_field}; +use crate::errors::{mk_invalid_query_field, FieldTypeUnexpectedStruct}; +use crate::index::INDEX_PREFIX; +use crate::vm::op::DbOp; use crate::vm::subprogram::SubProgramIndexItem; +use crate::vm::SubProgram; +use crate::{Error, Result}; +use bson::spec::{BinarySubtype, ElementType}; +use bson::{Array, Binary, Bson, Document}; const JUMP_TABLE_DEFAULT_SIZE: usize = 8; const PATH_DEFAULT_SIZE: usize = 8; mod update_op { - use bson::Bson; use crate::vm::codegen::Codegen; - use crate::Result; use crate::vm::op::DbOp; + use crate::Result; + use bson::Bson; pub(super) fn update_op_min_max(codegen: &mut Codegen, doc: &Bson, min: bool) -> Result<()> { let doc = crate::try_unwrap_document!("$min", doc); @@ -35,9 +35,9 @@ mod update_op { let key_id_2 = codegen.push_static(Bson::from(key.clone())); let value_id = codegen.push_static(value.clone()); - codegen.emit_goto2(DbOp::GetField, key_id_1, next_element_label); // stack +1 + codegen.emit_goto2(DbOp::GetField, key_id_1, next_element_label); // stack +1 - codegen.emit_push_value(value_id); // stack +2 + codegen.emit_push_value(value_id); // stack +2 if min { codegen.emit(DbOp::Less); @@ -51,7 +51,7 @@ mod update_op { codegen.emit_label(set_field_label); codegen.emit(DbOp::Pop); - codegen.emit(DbOp::Pop); // stack + codegen.emit(DbOp::Pop); // stack codegen.emit_push_value(value_id); @@ -72,15 +72,14 @@ mod update_op { Ok(()) } - } pub(super) struct Codegen { - program: Box, - jump_table: Vec, + program: Box, + jump_table: Vec, skip_annotation: bool, - is_write: bool, - paths: Vec, + is_write: bool, + paths: Vec, } macro_rules! path_hint { @@ -88,11 +87,10 @@ macro_rules! path_hint { $self.paths.push($key); $content; $self.paths.pop(); - } + }; } impl Codegen { - pub(super) fn new(skip_annotation: bool, is_write: bool) -> Codegen { Codegen { program: Box::new(SubProgram::new()), @@ -144,9 +142,8 @@ impl Codegen { if self.skip_annotation { self.program.label_slots[label.u_pos()] = LabelSlot::UnnamedLabel(current_loc); } else { - self.program.label_slots[label.u_pos()] = LabelSlot::LabelWithString( - current_loc, name.into() - ); + self.program.label_slots[label.u_pos()] = + LabelSlot::LabelWithString(current_loc, name.into()); } } @@ -154,10 +151,10 @@ impl Codegen { &mut self, pkey: Bson, query: &Document, - result_callback: F + result_callback: F, ) -> Result<()> - where - F: FnOnce(&mut Codegen) -> Result<()> + where + F: FnOnce(&mut Codegen) -> Result<()>, { let close_label = self.new_label(); let result_label = self.new_label(); @@ -184,7 +181,7 @@ impl Codegen { let value_static_id = self.push_static(value.clone()); self.emit_goto2(DbOp::GetField, key_static_id, close_label); // push a value1 - self.emit_push_value(value_static_id); // push a value2 + self.emit_push_value(value_static_id); // push a value2 self.emit(DbOp::Equal); // if not equal,go to next @@ -209,24 +206,16 @@ impl Codegen { is_many: bool, ) -> Result<()> where - F: FnOnce(&mut Codegen) -> Result<()> + F: FnOnce(&mut Codegen) -> Result<()>, { - let try_pkey_result = self.try_query_by_pkey( - col_spec, - query, - result_callback, - )?; + let try_pkey_result = self.try_query_by_pkey(col_spec, query, result_callback)?; if try_pkey_result.is_none() { return Ok(()); } let result_callback: F = try_pkey_result.unwrap(); - let try_index_result = self.try_query_by_index( - col_spec, - query, - result_callback, - )?; + let try_index_result = self.try_query_by_index(col_spec, query, result_callback)?; if try_index_result.is_none() { return Ok(()); } @@ -258,7 +247,7 @@ impl Codegen { // <==== not this item, go to next item self.emit_label_with_name(not_found_label, "Not this item"); self.emit(DbOp::RecoverStackPos); - self.emit(DbOp::Pop); // pop the current value; + self.emit(DbOp::Pop); // pop the current value; self.emit_goto(DbOp::Goto, next_label); // <==== get field failed, got to next item @@ -287,12 +276,7 @@ impl Codegen { self.emit_label_with_name(compare_label, "Compare"); self.emit(DbOp::SaveStackPos); - self.emit_standard_query_doc( - query, - result_label, - get_field_failed_label, - not_found_label - )?; + self.emit_standard_query_doc(query, result_label, get_field_failed_label, not_found_label)?; self.emit_goto(DbOp::Goto, result_label); @@ -306,7 +290,7 @@ impl Codegen { result_callback: F, ) -> Result> where - F: FnOnce(&mut Codegen) -> Result<()> + F: FnOnce(&mut Codegen) -> Result<()>, { if let Some(id_value) = query.get("_id") { if id_value.element_type() != ElementType::EmbeddedDocument { @@ -326,7 +310,7 @@ impl Codegen { result_callback: F, ) -> Result> where - F: FnOnce(&mut Codegen) -> Result<()> + F: FnOnce(&mut Codegen) -> Result<()>, { if self.is_write { return Ok(Some(result_callback)); @@ -368,18 +352,14 @@ impl Codegen { result_callback: F, ) -> Result<()> where - F: FnOnce(&mut Codegen) -> Result<()> + F: FnOnce(&mut Codegen) -> Result<()>, { let prefix_bytes = { let b_prefix = Bson::String(INDEX_PREFIX.to_string()); let b_col_name = Bson::String(col_name.to_string()); let b_index_name = &Bson::String(index_name.to_string()); - let buf: Vec<&Bson> = vec![ - &b_prefix, - &b_col_name, - &b_index_name, - ]; + let buf: Vec<&Bson> = vec![&b_prefix, &b_col_name, &b_index_name]; crate::utils::bson::stacked_key(buf)? }; @@ -407,8 +387,8 @@ impl Codegen { self.emit_label(close_label); - self.emit(DbOp::Pop); // pop the collection name - self.emit(DbOp::Pop); // pop the query value + self.emit(DbOp::Pop); // pop the collection name + self.emit(DbOp::Pop); // pop the query value self.emit(DbOp::Close); self.emit(DbOp::Halt); @@ -419,7 +399,7 @@ impl Codegen { let value_static_id = self.push_static(value.clone()); self.emit_goto2(DbOp::GetField, key_static_id, close_label); // push a value1 - self.emit_push_value(value_static_id); // push a value2 + self.emit_push_value(value_static_id); // push a value2 self.emit(DbOp::Equal); // if not equal,go to next @@ -446,7 +426,8 @@ impl Codegen { for (key, value) in query_doc.iter() { path_hint!(self, key.clone(), { self.emit_query_tuple( - key, value, + key, + value, result_label, get_field_failed_label, not_found_label, @@ -478,7 +459,7 @@ impl Codegen { arr: &Array, result_label: Label, get_field_failed_label: Label, - not_found_label: Label + not_found_label: Label, ) -> Result<()> { for (index, item_doc_value) in arr.iter().enumerate() { let path_msg = format!("[{}]", index); @@ -486,7 +467,9 @@ impl Codegen { let item_doc = crate::try_unwrap_document!("$and", item_doc_value); self.emit_standard_query_doc( item_doc, - result_label, get_field_failed_label, not_found_label + result_label, + get_field_failed_label, + not_found_label, )?; }); } @@ -499,17 +482,21 @@ impl Codegen { arr: &Array, result_label: Label, global_get_field_failed_label: Label, - not_found_label: Label + not_found_label: Label, ) -> Result<()> { for (index, item_doc_value) in arr.iter().enumerate() { let path_msg = format!("[{}]", index); path_hint!(self, path_msg, { let item_doc = crate::try_unwrap_document!("$or", item_doc_value); - if index == (arr.len() as usize) - 1 { // last item + if index == (arr.len() as usize) - 1 { + // last item for (key, value) in item_doc.iter() { self.emit_query_tuple( - key, value, result_label, - global_get_field_failed_label, not_found_label + key, + value, + result_label, + global_get_field_failed_label, + not_found_label, )?; } } else { @@ -527,7 +514,7 @@ impl Codegen { item_doc, result_label, local_get_field_failed_label, - local_get_field_failed_label + local_get_field_failed_label, )?; // pass, goto result self.emit_goto(DbOp::Goto, result_label); @@ -558,7 +545,7 @@ impl Codegen { sub_arr.as_ref(), result_label, get_field_failed_label, - not_found_label + not_found_label, )?; } @@ -568,45 +555,54 @@ impl Codegen { sub_arr.as_ref(), result_label, get_field_failed_label, - not_found_label + not_found_label, )?; } "$not" => { let sub_doc = crate::try_unwrap_document!("$not", value); // swap label - let (get_field_failed_label, not_found_label) = (not_found_label, get_field_failed_label); + let (get_field_failed_label, not_found_label) = + (not_found_label, get_field_failed_label); return self.emit_query_tuple_document( - key, &sub_doc, - get_field_failed_label, not_found_label + key, + &sub_doc, + get_field_failed_label, + not_found_label, ); } - _ => return Err(Error::InvalidField(mk_invalid_query_field( - self.last_key().into(), self.gen_path()) - )), - + _ => { + return Err(Error::InvalidField(mk_invalid_query_field( + self.last_key().into(), + self.gen_path(), + ))) + } } } else { match value { Bson::Document(doc) => { return self.emit_query_tuple_document( - key, doc, - get_field_failed_label, not_found_label + key, + doc, + get_field_failed_label, + not_found_label, ); } - Bson::Array(_) => + Bson::Array(_) => { return Err(Error::InvalidField(mk_invalid_query_field( - self.last_key().into(), self.gen_path()) - )), + self.last_key().into(), + self.gen_path(), + ))) + } _ => { let key_static_id = self.push_static(key.into()); self.emit_goto2(DbOp::GetField, key_static_id, get_field_failed_label); let value_static_id = self.push_static(value.clone()); - self.emit_push_value(value_static_id); // push a value2 + self.emit_push_value(value_static_id); // push a value2 self.emit(DbOp::Equal); // if not equal,go to next @@ -634,8 +630,9 @@ impl Codegen { &mut self, key: &str, get_field_failed_label: Label, - not_found_label: Label, sub_key: &str, - sub_value: &Bson + not_found_label: Label, + sub_key: &str, + sub_value: &Bson, ) -> Result<()> { match sub_key { "$eq" => { @@ -682,9 +679,12 @@ impl Codegen { "$in" => { match sub_value { Bson::Array(_) => (), - _ => return Err(Error::InvalidField(mk_invalid_query_field( - self.last_key().into(), self.gen_path()) - )), + _ => { + return Err(Error::InvalidField(mk_invalid_query_field( + self.last_key().into(), + self.gen_path(), + ))) + } } let field_size = self.recursively_get_field(key, get_field_failed_label); @@ -743,9 +743,12 @@ impl Codegen { "$nin" => { match sub_value { Bson::Array(_) => (), - _ => return Err(Error::InvalidField(mk_invalid_query_field( - self.last_key().into(), self.gen_path()) - )), + _ => { + return Err(Error::InvalidField(mk_invalid_query_field( + self.last_key().into(), + self.gen_path(), + ))) + } } let field_size = self.recursively_get_field(key, get_field_failed_label); @@ -763,9 +766,12 @@ impl Codegen { "$size" => { let expected_size = match sub_value { Bson::Int64(i) => *i, - _ => return Err(Error::InvalidField(mk_invalid_query_field( - self.last_key().into(), self.gen_path() - ))), + _ => { + return Err(Error::InvalidField(mk_invalid_query_field( + self.last_key().into(), + self.gen_path(), + ))) + } }; let field_size = self.recursively_get_field(key, get_field_failed_label); @@ -782,9 +788,36 @@ impl Codegen { self.emit_u32((field_size + 1) as u32); } - _ => return Err(Error::InvalidField(mk_invalid_query_field( - self.last_key().into(), self.gen_path()) - )), + "$regex" => { + match sub_value { + Bson::RegularExpression(_) => (), + _ => { + return Err(Error::InvalidField(mk_invalid_query_field( + self.last_key().into(), + self.gen_path(), + ))) + } + } + + let field_size = self.recursively_get_field(key, get_field_failed_label); + + let stat_val_id = self.push_static(sub_value.clone()); + self.emit_push_value(stat_val_id); + self.emit(DbOp::Regex); + + // if not equal,go to next + self.emit_goto(DbOp::IfFalse, not_found_label); + + self.emit(DbOp::Pop2); + self.emit_u32((field_size + 1) as u32); + } + + _ => { + return Err(Error::InvalidField(mk_invalid_query_field( + self.last_key().into(), + self.gen_path(), + ))) + } } Ok(()) } @@ -795,13 +828,16 @@ impl Codegen { key: &str, value: &Document, get_field_failed_label: Label, - not_found_label: Label + not_found_label: Label, ) -> Result<()> { for (sub_key, sub_value) in value.iter() { path_hint!(self, sub_key.clone(), { self.emit_query_tuple_document_kv( - key, get_field_failed_label, not_found_label, - sub_key.as_ref(), sub_value + key, + get_field_failed_label, + not_found_label, + sub_key.as_ref(), + sub_value, )?; }); } @@ -864,7 +900,8 @@ impl Codegen { field_name: key.into(), expected_ty: "String".into(), actual_ty: name, - }.into()); + } + .into()); } }; @@ -894,24 +931,30 @@ impl Codegen { for (key, value) in doc.iter() { let num = match value { Bson::Int64(i) => *i, - _ => return Err(Error::InvalidField(mk_invalid_query_field( - self.last_key().into(), - self.gen_path() - ))) + _ => { + return Err(Error::InvalidField(mk_invalid_query_field( + self.last_key().into(), + self.gen_path(), + ))) + } }; - self.emit_pop_field(key.as_str(), match num { - 1 => false, - -1 => true, - _ => return Err(Error::InvalidField(mk_invalid_query_field( - self.last_key().into(), - self.gen_path() - ))) - }); + self.emit_pop_field( + key.as_str(), + match num { + 1 => false, + -1 => true, + _ => { + return Err(Error::InvalidField(mk_invalid_query_field( + self.last_key().into(), + self.gen_path(), + ))) + } + }, + ); } } _ => return Err(Error::UnknownUpdateOperation(key.into())), - } Ok(()) @@ -1057,9 +1100,8 @@ impl Codegen { } let bytes: [u8; 4] = (-1 as i32).to_le_bytes(); self.program.instructions.extend_from_slice(&bytes); - self.jump_table.push( - JumpTableRecord::new(record_loc, 1, label.pos()) - ); + self.jump_table + .push(JumpTableRecord::new(record_loc, 1, label.pos())); } pub(super) fn emit_goto2(&mut self, op: DbOp, op1: u32, label: Label) { @@ -1076,9 +1118,7 @@ impl Codegen { } let bytes2: [u8; 4] = (-1 as i32).to_le_bytes(); self.program.instructions.extend_from_slice(&bytes2); - self.jump_table.push( - JumpTableRecord::new(record_loc, 5, label.pos()) - ); + self.jump_table + .push(JumpTableRecord::new(record_loc, 5, label.pos())); } - } diff --git a/src/polodb_core/vm/op.rs b/src/polodb_core/vm/op.rs index ed14e565..e87ee0cb 100644 --- a/src/polodb_core/vm/op.rs +++ b/src/polodb_core/vm/op.rs @@ -203,6 +203,7 @@ pub enum DbOp { GreaterEqual, Less, LessEqual, + Regex, // check if top0 is in top2 // the result is stored in r0 diff --git a/src/polodb_core/vm/subprogram.rs b/src/polodb_core/vm/subprogram.rs index 877429a8..26ac84f2 100644 --- a/src/polodb_core/vm/subprogram.rs +++ b/src/polodb_core/vm/subprogram.rs @@ -3,18 +3,15 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ -use std::fmt; -use bson::{Bson, Document}; -use indexmap::IndexMap; -use crate::coll::collection_info::{ - CollectionSpecification, - IndexInfo, -}; -use crate::Result; -use crate::utils::str::escape_binary_to_string; -use super::op::DbOp; use super::label::LabelSlot; +use super::op::DbOp; +use crate::coll::collection_info::{CollectionSpecification, IndexInfo}; +use crate::utils::str::escape_binary_to_string; use crate::vm::codegen::Codegen; +use crate::Result; +use bson::{Bson, Document}; +use indexmap::IndexMap; +use std::fmt; pub(crate) struct SubProgramIndexItem { pub col_name: String, @@ -23,13 +20,12 @@ pub(crate) struct SubProgramIndexItem { pub(crate) struct SubProgram { pub(super) static_values: Vec, - pub(super) instructions: Vec, - pub(super) label_slots: Vec, - pub(super) index_infos: Vec, + pub(super) instructions: Vec, + pub(super) label_slots: Vec, + pub(super) index_infos: Vec, } impl SubProgram { - pub(super) fn new() -> SubProgram { SubProgram { static_values: Vec::with_capacity(32), @@ -62,7 +58,7 @@ impl SubProgram { codegen.emit(DbOp::Pop); Ok(()) }, - true + true, )?; Ok(codegen.take()) @@ -72,7 +68,8 @@ impl SubProgram { col_spec: &CollectionSpecification, query: Option<&Document>, update: &Document, - skip_annotation: bool, is_many: bool, + skip_annotation: bool, + is_many: bool, ) -> Result { let mut codegen = Codegen::new(skip_annotation, true); @@ -80,7 +77,7 @@ impl SubProgram { let index_item_id: u32 = if has_indexes { codegen.push_index_info(SubProgramIndexItem { col_name: col_spec._id.to_string(), - indexes: col_spec.indexes.clone() + indexes: col_spec.indexes.clone(), }) } else { u32::MAX @@ -105,7 +102,7 @@ impl SubProgram { codegen.emit(DbOp::Pop); Ok(()) }, - is_many + is_many, )?; Ok(codegen.take()) @@ -115,7 +112,8 @@ impl SubProgram { col_spec: &CollectionSpecification, col_name: &str, query: Option<&Document>, - skip_annotation: bool, is_many: bool, + skip_annotation: bool, + is_many: bool, ) -> Result { let mut codegen = Codegen::new(skip_annotation, true); @@ -123,7 +121,7 @@ impl SubProgram { let index_item_id: u32 = if has_indexes { codegen.push_index_info(SubProgramIndexItem { col_name: col_spec._id.to_string(), - indexes: col_spec.indexes.clone() + indexes: col_spec.indexes.clone(), }) } else { u32::MAX @@ -144,7 +142,7 @@ impl SubProgram { codegen.emit(DbOp::Pop); Ok(()) }, - is_many + is_many, )?; Ok(codegen.take()) @@ -154,7 +152,7 @@ impl SubProgram { pub(crate) fn compile_delete_all( col_spec: &CollectionSpecification, col_name: &str, - skip_annotation: bool + skip_annotation: bool, ) -> Result { let mut codegen = Codegen::new(skip_annotation, true); @@ -162,7 +160,7 @@ impl SubProgram { let index_item_id: u32 = if has_indexes { codegen.push_index_info(SubProgramIndexItem { col_name: col_spec._id.to_string(), - indexes: col_spec.indexes.clone() + indexes: col_spec.indexes.clone(), }) } else { u32::MAX @@ -198,11 +196,17 @@ impl SubProgram { Ok(codegen.take()) } - pub(crate) fn compile_query_all(col_spec: &CollectionSpecification, skip_annotation: bool) -> Result { + pub(crate) fn compile_query_all( + col_spec: &CollectionSpecification, + skip_annotation: bool, + ) -> Result { SubProgram::compile_query_all_by_name(col_spec.name(), skip_annotation) } - pub(crate) fn compile_query_all_by_name(col_name: &str, skip_annotation: bool) -> Result { + pub(crate) fn compile_query_all_by_name( + col_name: &str, + skip_annotation: bool, + ) -> Result { let mut codegen = Codegen::new(skip_annotation, false); let result_label = codegen.new_label(); let next_label = codegen.new_label(); @@ -229,7 +233,6 @@ impl SubProgram { Ok(codegen.take()) } - } fn open_bson_to_str(val: &Bson) -> Result { @@ -251,7 +254,6 @@ fn open_bson_to_str(val: &Bson) -> Result { } impl fmt::Display for SubProgram { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { unsafe { let begin = self.instructions.as_ptr(); @@ -270,10 +272,12 @@ impl fmt::Display for SubProgram { let label_id = begin.add(pc + 1).cast::().read(); match &self.label_slots[label_id as usize] { LabelSlot::Empty => unreachable!(), - LabelSlot::UnnamedLabel(_) => - writeln!(f, "{}: Label({})", pc, label_id)?, - LabelSlot::LabelWithString(_, name) => - writeln!(f, "{}: Label({}, \"{}\")", pc, label_id, name)?, + LabelSlot::UnnamedLabel(_) => { + writeln!(f, "{}: Label({})", pc, label_id)? + } + LabelSlot::LabelWithString(_, name) => { + writeln!(f, "{}: Label({}, \"{}\")", pc, label_id, name)? + } } pc += 5; } @@ -402,6 +406,11 @@ impl fmt::Display for SubProgram { pc += 1; } + DbOp::Regex => { + writeln!(f, "{}: Regex", pc)?; + pc += 1; + } + DbOp::In => { writeln!(f, "{}: In", pc)?; pc += 1; @@ -503,16 +512,15 @@ impl fmt::Display for SubProgram { } Ok(()) } - } #[cfg(test)] mod tests { + use crate::coll::collection_info::{CollectionSpecification, IndexInfo}; + use crate::vm::SubProgram; use bson::doc; use indexmap::indexmap; use polodb_line_diff::assert_eq; - use crate::coll::collection_info::{CollectionSpecification, IndexInfo}; - use crate::vm::SubProgram; #[inline] fn new_spec>(name: T) -> CollectionSpecification { @@ -665,7 +673,6 @@ mod tests { let program = SubProgram::compile_query(&col_spec, &test_doc, false).unwrap(); let actual = format!("Program:\n\n{}", program); - let expect = r#"Program: 0: OpenRead("test") @@ -696,12 +703,15 @@ mod tests { fn print_query_by_index() { let mut col_spec = new_spec("test"); - col_spec.indexes.insert("age_1".into(), IndexInfo { - keys: indexmap! { - "age".into() => 1, + col_spec.indexes.insert( + "age_1".into(), + IndexInfo { + keys: indexmap! { + "age".into() => 1, + }, + options: None, }, - options: None, - }); + ); let test_doc = doc! { "age": 32, @@ -966,12 +976,9 @@ mod tests { "hello1": "hello2", }, }; - let program = SubProgram::compile_update( - &col_spec, - Some(&query_doc), - &update_doc, - false, true - ).unwrap(); + let program = + SubProgram::compile_update(&col_spec, Some(&query_doc), &update_doc, false, true) + .unwrap(); let actual = format!("Program:\n\n{}", program); let expect = r#"Program: @@ -1053,12 +1060,15 @@ mod tests { fn print_update_with_index() { let mut col_spec = new_spec("test"); - col_spec.indexes.insert("age_1".into(), IndexInfo { - keys: indexmap! { - "age".into() => 1, + col_spec.indexes.insert( + "age_1".into(), + IndexInfo { + keys: indexmap! { + "age".into() => 1, + }, + options: None, }, - options: None, - }); + ); let query_doc = doc! { "_id": { @@ -1070,12 +1080,9 @@ mod tests { "name": "Alan Chan", }, }; - let program = SubProgram::compile_update( - &col_spec, - Some(&query_doc), - &update_doc, - false, true - ).unwrap(); + let program = + SubProgram::compile_update(&col_spec, Some(&query_doc), &update_doc, false, true) + .unwrap(); let actual = format!("Program:\n\n{}", program); let expect = r#"Program: @@ -1122,5 +1129,4 @@ mod tests { "#; assert_eq!(expect, actual); } - } diff --git a/src/polodb_core/vm/vm.rs b/src/polodb_core/vm/vm.rs index 6fc0f947..55c8c102 100644 --- a/src/polodb_core/vm/vm.rs +++ b/src/polodb_core/vm/vm.rs @@ -3,20 +3,20 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ -use bson::Bson; -use std::cell::Cell; -use std::cmp::Ordering; -use crate::{Error, Result, LsmKv, TransactionType, Metrics}; use crate::cursor::Cursor; use crate::errors::{ - CannotApplyOperationForTypes, - FieldTypeUnexpectedStruct, + CannotApplyOperationForTypes, FieldTypeUnexpectedStruct, RegexCompileError, UnexpectedTypeForOpStruct, }; use crate::index::{IndexHelper, IndexHelperOperation}; use crate::session::SessionInner; use crate::vm::op::DbOp; use crate::vm::SubProgram; +use crate::{Error, LsmKv, Metrics, Result, TransactionType}; +use bson::Bson; +use regex::RegexBuilder; +use std::cell::Cell; +use std::cmp::Ordering; macro_rules! try_vm { ($self:ident, $action:expr) => { @@ -27,7 +27,7 @@ macro_rules! try_vm { return Err(err); } } - } + }; } const STACK_SIZE: usize = 256; @@ -42,34 +42,34 @@ pub enum VmState { } pub(crate) struct VM { - kv_engine: LsmKv, - pub(crate) state: VmState, - pc: *const u8, - r0: i32, // usually the logic register - r1: Option, - pub(crate) r2: i64, // usually the counter - r3: usize, - stack: Vec, - pub(crate) program: SubProgram, - metrics: Metrics, + kv_engine: LsmKv, + pub(crate) state: VmState, + pc: *const u8, + r0: i32, // usually the logic register + r1: Option, + pub(crate) r2: i64, // usually the counter + r3: usize, + stack: Vec, + pub(crate) program: SubProgram, + metrics: Metrics, } fn generic_cmp(op: DbOp, val1: &Bson, val2: &Bson) -> Result { let ord = crate::utils::bson::value_cmp(val1, val2)?; - let result = matches!((op, ord), - (DbOp::Equal, Ordering::Equal) | - (DbOp::Greater, Ordering::Greater) | - (DbOp::GreaterEqual, Ordering::Equal) | - (DbOp::GreaterEqual, Ordering::Greater) | - (DbOp::Less, Ordering::Less) | - (DbOp::LessEqual, Ordering::Equal) | - (DbOp::LessEqual, Ordering::Less) + let result = matches!( + (op, ord), + (DbOp::Equal, Ordering::Equal) + | (DbOp::Greater, Ordering::Greater) + | (DbOp::GreaterEqual, Ordering::Equal) + | (DbOp::GreaterEqual, Ordering::Greater) + | (DbOp::Less, Ordering::Less) + | (DbOp::LessEqual, Ordering::Equal) + | (DbOp::LessEqual, Ordering::Less) ); Ok(result) } impl VM { - pub(crate) fn new(kv_engine: LsmKv, program: SubProgram, metrics: Metrics) -> VM { let stack = Vec::with_capacity(STACK_SIZE); let pc = program.instructions.as_ptr(); @@ -95,9 +95,7 @@ impl VM { Ok(prefix_bytes) } - Bson::Binary(bin) => { - Ok(bin.bytes) - } + Bson::Binary(bin) => Ok(bin.bytes), _ => panic!("unexpected bson value: {:?}", val), } @@ -105,7 +103,7 @@ impl VM { fn open_read(&mut self, session: &mut SessionInner, prefix: Bson) -> Result<()> { session.auto_start_transaction(TransactionType::Read)?; - let mut cursor = self.kv_engine.open_multi_cursor(Some(session.kv_session())) ; + let mut cursor = self.kv_engine.open_multi_cursor(Some(session.kv_session())); cursor.go_to_min()?; let prefix_bytes = VM::prefix_bytes_from_bson(prefix)?; @@ -168,12 +166,9 @@ impl VM { return Ok(false); } - let key= cursor.peek_key().expect("key must exist"); + let key = cursor.peek_key().expect("key must exist"); - let index_value = self.read_index_value_by_index_key( - key.as_ref(), - session, - )?; + let index_value = self.read_index_value_by_index_key(key.as_ref(), session)?; if index_value.is_none() { return Ok(false); @@ -196,12 +191,9 @@ impl VM { let col_name = &slices[1]; - let pkey_in_kv = crate::utils::bson::stacked_key(vec![ - col_name, - pkey, - ])?; + let pkey_in_kv = crate::utils::bson::stacked_key(vec![col_name, pkey])?; - let mut value_cursor = self.kv_engine.open_multi_cursor(Some(session.kv_session())) ; + let mut value_cursor = self.kv_engine.open_multi_cursor(Some(session.kv_session())); value_cursor.go_to_min()?; value_cursor.seek(pkey_in_kv.as_slice())?; @@ -231,7 +223,11 @@ impl VM { let doc = bson::from_slice(bytes.as_ref())?; self.stack.push(Bson::Document(doc)); - debug_assert!(self.stack.len() <= 64, "stack too large: {}", self.stack.len()); + debug_assert!( + self.stack.len() <= 64, + "stack too large: {}", + self.stack.len() + ); self.r0 = 1; } @@ -303,7 +299,8 @@ impl VM { field_name: key.into(), field_type: a.to_string(), target_type: b.to_string(), - }.into()); + } + .into()); } }; Ok(val) @@ -327,7 +324,8 @@ impl VM { field_name: key.into(), field_type: a.to_string(), target_type: b.to_string(), - }.into()); + } + .into()); } }; Ok(val) @@ -356,7 +354,6 @@ impl VM { None => { mut_doc.insert::(key.into(), value); } - } Ok(()) } @@ -380,13 +377,14 @@ impl VM { None => { mut_doc.insert::(key.into(), value); } - } Ok(()) } fn unset_field(&mut self, field_id: u32) -> Result<()> { - let key = self.program.static_values[field_id as usize].as_str().unwrap(); + let key = self.program.static_values[field_id as usize] + .as_str() + .unwrap(); let doc_index = self.stack.len() - 1; let mut_doc = self.stack[doc_index].as_document_mut().unwrap(); @@ -408,12 +406,13 @@ impl VM { let mut array_value = match &self.stack[st - 2] { Bson::Array(arr) => arr.clone(), _ => { - let name = format!("{}", self.stack[st- 2]); + let name = format!("{}", self.stack[st - 2]); return Err(UnexpectedTypeForOpStruct { operation: "$push", expected_ty: "Array", - actual_ty: name - }.into()); + actual_ty: name, + } + .into()); } }; array_value.push(val); @@ -431,7 +430,8 @@ impl VM { operation: "$pop", expected_ty: "Array", actual_ty: name, - }.into()); + } + .into()); } }; array_value.drain(0..1); @@ -449,7 +449,8 @@ impl VM { operation: "$pop", expected_ty: "Array", actual_ty: name, - }.into()); + } + .into()); } }; array_value.pop(); @@ -549,7 +550,8 @@ impl VM { DbOp::IfTrue => { let location = self.pc.add(1).cast::().read(); - if self.r0 != 0 { // true + if self.r0 != 0 { + // true self.reset_location(location); } else { self.pc = self.pc.add(5); @@ -558,7 +560,8 @@ impl VM { DbOp::IfFalse => { let location = self.pc.add(1).cast::().read(); - if self.r0 == 0 { // false + if self.r0 == 0 { + // false self.reset_location(location); } else { self.pc = self.pc.add(5); @@ -657,7 +660,7 @@ impl VM { actual_ty: name, }; self.state = VmState::Halt; - return Err(err.into()) + return Err(err.into()); } }; @@ -670,9 +673,7 @@ impl VM { None => { self.reset_location(location); } - } - } DbOp::UnsetField => { @@ -702,7 +703,9 @@ impl VM { DbOp::SetField => { let filed_id = self.pc.add(1).cast::().read(); - let key = self.program.static_values[filed_id as usize].as_str().unwrap(); + let key = self.program.static_values[filed_id as usize] + .as_str() + .unwrap(); let value_index = self.stack.len() - 1; let doc_index = self.stack.len() - 2; @@ -784,23 +787,23 @@ impl VM { DbOp::Pop2 => { let offset = self.pc.add(1).cast::().read(); - self.stack.resize(self.stack.len() - (offset as usize), Bson::Null); + self.stack + .resize(self.stack.len() - (offset as usize), Bson::Null); self.pc = self.pc.add(5); } - DbOp::Equal | DbOp::Greater | DbOp::GreaterEqual | - DbOp::Less | DbOp::LessEqual => { + DbOp::Equal + | DbOp::Greater + | DbOp::GreaterEqual + | DbOp::Less + | DbOp::LessEqual => { let val1 = &self.stack[self.stack.len() - 2]; let val2 = &self.stack[self.stack.len() - 1]; let cmp = try_vm!(self, generic_cmp(op, val1, val2)); - self.r0 = if cmp { - 1 - } else { - 0 - }; + self.r0 = if cmp { 1 } else { 0 }; self.pc = self.pc.add(1); } @@ -827,6 +830,56 @@ impl VM { self.pc = self.pc.add(1); } + DbOp::Regex => { + let val1 = &self.stack[self.stack.len() - 2]; + let val2 = &self.stack[self.stack.len() - 1]; + + self.r0 = 0; + + println!("string: {:?}", val1); + println!("regex: {:?}", val2); + if let Bson::RegularExpression(re) = val2 { + let mut re_build = RegexBuilder::new(re.pattern.as_str()); + for char in re.pattern.chars() { + match char { + 'i' => { + re_build.case_insensitive(true); + } + 'm' => { + re_build.multi_line(true); + } + 's' => { + re_build.dot_matches_new_line(true); + } + 'u' => { + re_build.unicode(true); + } + 'U' => { + re_build.swap_greed(true); + } + 'x' => { + re_build.ignore_whitespace(true); + } + _ => {} + } + } + + let re_build = re_build.build().map_err(|err| { + Error::from(RegexCompileError { + error: err.to_string(), + expression: re.pattern.clone(), + options: re.options.clone(), + }) + })?; + + if re_build.is_match(&val1.to_string()) { + self.r0 = 1; + } + } + + self.pc = self.pc.add(1); + } + DbOp::OpenRead => { let prefix_idx = self.pc.add(1).cast::().read(); let prefix = self.program.static_values[prefix_idx as usize].clone(); @@ -868,16 +921,13 @@ impl VM { self.pc = self.pc.add(1); } - DbOp::_EOF | - DbOp::Halt => { + DbOp::_EOF | DbOp::Halt => { self.r1 = None; self.state = VmState::Halt; return Ok(()); } - } } } } - } From 3f1212aa180b53ade31466f12de06bf0c60d9658 Mon Sep 17 00:00:00 2001 From: DataHearth Date: Mon, 22 May 2023 21:57:02 +0200 Subject: [PATCH 02/52] add error when using invalid regex option --- src/polodb_core/errors.rs | 8 ++++---- src/polodb_core/vm/vm.rs | 17 ++++++++++------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/polodb_core/errors.rs b/src/polodb_core/errors.rs index 82f4fe7e..8b713f06 100644 --- a/src/polodb_core/errors.rs +++ b/src/polodb_core/errors.rs @@ -122,7 +122,7 @@ pub struct DuplicateKeyError { } #[derive(Debug)] -pub struct RegexCompileError { +pub struct RegexError { pub error: String, pub expression: String, pub options: String, @@ -248,7 +248,7 @@ pub enum Error { #[error("the element type {0} is unknown")] UnknownBsonElementType(u8), #[error("failed to compile regex expression: {}, expression: {}, options: {}", .0.error, .0.expression, .0.options)] - RegexCompileError(Box), + RegexCompileError(Box), } impl Error { @@ -315,8 +315,8 @@ impl From for Error { } } -impl From for Error { - fn from(value: RegexCompileError) -> Self { +impl From for Error { + fn from(value: RegexError) -> Self { Error::RegexCompileError(Box::new(value)) } } diff --git a/src/polodb_core/vm/vm.rs b/src/polodb_core/vm/vm.rs index 55c8c102..eab227ac 100644 --- a/src/polodb_core/vm/vm.rs +++ b/src/polodb_core/vm/vm.rs @@ -5,8 +5,7 @@ */ use crate::cursor::Cursor; use crate::errors::{ - CannotApplyOperationForTypes, FieldTypeUnexpectedStruct, RegexCompileError, - UnexpectedTypeForOpStruct, + CannotApplyOperationForTypes, FieldTypeUnexpectedStruct, RegexError, UnexpectedTypeForOpStruct, }; use crate::index::{IndexHelper, IndexHelperOperation}; use crate::session::SessionInner; @@ -836,8 +835,6 @@ impl VM { self.r0 = 0; - println!("string: {:?}", val1); - println!("regex: {:?}", val2); if let Bson::RegularExpression(re) = val2 { let mut re_build = RegexBuilder::new(re.pattern.as_str()); for char in re.pattern.chars() { @@ -860,13 +857,19 @@ impl VM { 'x' => { re_build.ignore_whitespace(true); } - _ => {} + _ => { + return Err(Error::from(RegexError { + error: format!("unknown regex option: {}", char), + expression: re.pattern.clone(), + options: re.options.clone(), + })) + } } } let re_build = re_build.build().map_err(|err| { - Error::from(RegexCompileError { - error: err.to_string(), + Error::from(RegexError { + error: format!("regex build error: {err}"), expression: re.pattern.clone(), options: re.options.clone(), }) From d1bd69d63310c130ea076a1533c6f1b786ae0129 Mon Sep 17 00:00:00 2001 From: DataHearth Date: Wed, 24 May 2023 15:17:27 +0200 Subject: [PATCH 03/52] add test to subprogram --- src/polodb_core/vm/subprogram.rs | 56 +++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/src/polodb_core/vm/subprogram.rs b/src/polodb_core/vm/subprogram.rs index 26ac84f2..b1f93a8b 100644 --- a/src/polodb_core/vm/subprogram.rs +++ b/src/polodb_core/vm/subprogram.rs @@ -518,7 +518,7 @@ impl fmt::Display for SubProgram { mod tests { use crate::coll::collection_info::{CollectionSpecification, IndexInfo}; use crate::vm::SubProgram; - use bson::doc; + use bson::{doc, Regex}; use indexmap::indexmap; use polodb_line_diff::assert_eq; @@ -948,6 +948,60 @@ mod tests { assert_eq!(expect, actual); } + #[test] + fn print_regex() { + let col_spec = new_spec("test"); + let test_doc = doc! { + "name": doc! { + "$regex": Regex { + options: String::new(), + pattern: "/^Vincent/".into(), + }, + }, + }; + let program = SubProgram::compile_query(&col_spec, &test_doc, false).unwrap(); + let actual = format!("Program:\n\n{}", program); + + let expect = r#"Program: + +0: OpenRead("test") +5: Rewind(30) +10: Goto(73) + +15: Label(1) +20: Next(73) + +25: Label(5, "Close") +30: Close +31: Halt + +32: Label(4, "Not this item") +37: RecoverStackPos +38: Pop +39: Goto(20) + +44: Label(3, "Get field failed") +49: RecoverStackPos +50: Pop +51: Goto(20) + +56: Label(2, "Result") +61: ResultRow +62: Pop +63: Goto(20) + +68: Label(0, "Compare") +73: SaveStackPos +74: GetField("name", 49) +83: PushValue(//^Vincent//) +88: Regex +89: FalseJump(37) +94: Pop2(2) +99: Goto(61) +"#; + assert_eq!(expect, actual); + } + #[test] fn print_update() { let col_spec = new_spec("test"); From 17646e3a46cdfba1c6969442fd6a5d314702d087 Mon Sep 17 00:00:00 2001 From: DataHearth Date: Wed, 24 May 2023 15:35:09 +0200 Subject: [PATCH 04/52] update regex error msg --- src/polodb_core/errors.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/polodb_core/errors.rs b/src/polodb_core/errors.rs index 8b713f06..06db3ef0 100644 --- a/src/polodb_core/errors.rs +++ b/src/polodb_core/errors.rs @@ -247,8 +247,8 @@ pub enum Error { DuplicateKey(Box), #[error("the element type {0} is unknown")] UnknownBsonElementType(u8), - #[error("failed to compile regex expression: {}, expression: {}, options: {}", .0.error, .0.expression, .0.options)] - RegexCompileError(Box), + #[error("failed to run regex expression: {}, expression: {}, options: {}", .0.error, .0.expression, .0.options)] + RegexError(Box), } impl Error { @@ -317,7 +317,7 @@ impl From for Error { impl From for Error { fn from(value: RegexError) -> Self { - Error::RegexCompileError(Box::new(value)) + Error::RegexError(Box::new(value)) } } From b50a2da4da4928cb41bebefb4012622ea1e347c3 Mon Sep 17 00:00:00 2001 From: DataHearth Date: Thu, 25 May 2023 15:29:58 +0200 Subject: [PATCH 05/52] fix field error --- src/polodb_core/Cargo.toml | 20 ++++++++++++-------- src/polodb_core/vm/vm.rs | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/polodb_core/Cargo.toml b/src/polodb_core/Cargo.toml index e9e98446..78db354d 100644 --- a/src/polodb_core/Cargo.toml +++ b/src/polodb_core/Cargo.toml @@ -9,8 +9,8 @@ description = "An embedded document database" keywords = ["database", "embedded", "cross-platform"] [lib] -name="polodb_core" -path="lib.rs" +name = "polodb_core" +path = "lib.rs" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] @@ -22,7 +22,14 @@ getrandom = { version = "0.2.3", features = ["js"] } byteorder = "1.4.3" num_enum = "0.6.0" serde = { version = "1.0.125", features = ["rc"] } -uuid = { version = "1.3.0", features = ["atomic", "v1", "v4", "wasm-bindgen", "js", "getrandom"] } +uuid = { version = "1.3.0", features = [ + "atomic", + "v1", + "v4", + "wasm-bindgen", + "js", + "getrandom", +] } smallvec = { version = "1.10.0", features = ["union", "write", "const_new"] } memmap2 = "0.5.10" wasm-bindgen = "0.2.84" @@ -32,14 +39,11 @@ serde-wasm-bindgen = "0.5.0" wasm-bindgen-futures = "0.4.34" thiserror = "1.0.40" indexmap = { version = "1.9.3", features = ["serde"] } -regex = "1" +regex = "1.8" [dependencies.web-sys] version = "0.3.61" -features = [ - 'Window', - 'console', -] +features = ['Window', 'console'] [dev-dependencies] polodb_line_diff = { path = "../polodb_line_diff" } diff --git a/src/polodb_core/vm/vm.rs b/src/polodb_core/vm/vm.rs index eab227ac..f50e5087 100644 --- a/src/polodb_core/vm/vm.rs +++ b/src/polodb_core/vm/vm.rs @@ -837,7 +837,7 @@ impl VM { if let Bson::RegularExpression(re) = val2 { let mut re_build = RegexBuilder::new(re.pattern.as_str()); - for char in re.pattern.chars() { + for char in re.options.chars() { match char { 'i' => { re_build.case_insensitive(true); From a59c971aeca5abb9c2daa9fd7c83b89a7739954e Mon Sep 17 00:00:00 2001 From: DataHearth Date: Thu, 25 May 2023 16:14:12 +0200 Subject: [PATCH 06/52] add e2e regex test --- src/polodb_core/tests/test_regex.rs | 92 +++++++++++++++++++++++++++++ src/polodb_core/vm/vm.rs | 2 +- 2 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 src/polodb_core/tests/test_regex.rs diff --git a/src/polodb_core/tests/test_regex.rs b/src/polodb_core/tests/test_regex.rs new file mode 100644 index 00000000..726c1eb3 --- /dev/null +++ b/src/polodb_core/tests/test_regex.rs @@ -0,0 +1,92 @@ +use bson::{doc, Document, Regex}; +use polodb_core::Database; + +mod common; + +use common::prepare_db; + +#[test] +fn test_regex() { + vec![ + (prepare_db("test-regex").unwrap(), true), + (Database::open_memory().unwrap(), false), + ] + .iter() + .for_each(|(db, _)| { + let metrics = db.metrics(); + metrics.enable(); + + let collection = db.collection::("config"); + let docs = vec![ + doc! { + "_id": "c1", + "value": "c1", + }, + doc! { + "_id": "invalid", + "value": "not-valid-value", + }, + doc! { + "_id": "c3", + "value": "c3" + }, + ]; + collection.insert_many(&docs).unwrap(); + + let res = collection + .find(doc! { + "value": { + "$regex": Regex { + pattern: "c[0-9]+".into(), + options: "i".into(), + }, + } + }) + .unwrap(); + + assert_eq!(res.count(), docs.len() - 1); + }); +} + +#[test] +fn test_regex_error() { + vec![ + (prepare_db("test-regex-error").unwrap(), true), + (Database::open_memory().unwrap(), false), + ] + .iter() + .for_each(|(db, _)| { + let metrics = db.metrics(); + metrics.enable(); + + let collection = db.collection::("config"); + let docs = vec![ + doc! { + "_id": "c1", + "value": "c1", + }, + doc! { + "_id": "invalid", + "value": "not-valid-value", + }, + doc! { + "_id": "c3", + "value": "c3" + }, + ]; + collection.insert_many(&docs).unwrap(); + + let mut res = collection + .find(doc! { + "value": { + "$regex": Regex { + pattern: "c[0-9]+".into(), + options: "pml".into(), // invalid option + }, + } + }) + .unwrap(); + + assert!(res.next().unwrap().is_err()); + }); +} diff --git a/src/polodb_core/vm/vm.rs b/src/polodb_core/vm/vm.rs index f50e5087..a5f64add 100644 --- a/src/polodb_core/vm/vm.rs +++ b/src/polodb_core/vm/vm.rs @@ -862,7 +862,7 @@ impl VM { error: format!("unknown regex option: {}", char), expression: re.pattern.clone(), options: re.options.clone(), - })) + })); } } } From 9cebca53c947942b9ef40959221efcda7e5fe240 Mon Sep 17 00:00:00 2001 From: Vincent Chan Date: Sat, 27 May 2023 14:44:12 +0800 Subject: [PATCH 07/52] Feat/aggregate stage 1 (#117) * feat: add aggregate API * feat: add CALL and RET op * fix(test): for call op * fix: logic or * feat: logic not * feat: add not expression * test: find not expression --- src/polodb_core/coll/collection.rs | 7 + src/polodb_core/db/db_inner.rs | 32 ++ src/polodb_core/tests/test_db.rs | 10 +- src/polodb_core/tests/test_find.rs | 87 +++- src/polodb_core/tests/test_insert.rs | 4 +- src/polodb_core/vm/codegen.rs | 194 ++++--- src/polodb_core/vm/op.rs | 28 +- src/polodb_core/vm/subprogram.rs | 743 ++++++++++++++++----------- src/polodb_core/vm/vm.rs | 75 +++ 9 files changed, 741 insertions(+), 439 deletions(-) diff --git a/src/polodb_core/coll/collection.rs b/src/polodb_core/coll/collection.rs index 65d59de5..f295f1ab 100644 --- a/src/polodb_core/coll/collection.rs +++ b/src/polodb_core/coll/collection.rs @@ -247,4 +247,11 @@ impl Collection return Ok(Some(cursor.deserialize_current()?)); } + /// Runs an aggregation operation. + pub fn aggregate(&self, pipeline: impl IntoIterator) -> Result> { + let db = self.db.upgrade().ok_or(Error::DbIsClosed)?; + let session = db.start_session()?; + db.aggregate_with_owned_session(&self.name, pipeline, session) + } + } diff --git a/src/polodb_core/db/db_inner.rs b/src/polodb_core/db/db_inner.rs index 6f2a2e5f..3160108d 100644 --- a/src/polodb_core/db/db_inner.rs +++ b/src/polodb_core/db/db_inner.rs @@ -986,6 +986,38 @@ impl DatabaseInner { } } + pub(crate) fn aggregate_with_owned_session( + &self, + col_name: &str, + pipeline: impl IntoIterator, + mut session: SessionInner, + ) -> Result> { + DatabaseInner::validate_col_name(col_name)?; + let meta_opt = self.get_collection_meta_by_name_advanced_auto(col_name, false, &mut session)?; + let subprogram = match meta_opt { + Some(col_spec) => { + let subprogram = SubProgram::compile_aggregate( + &col_spec, + pipeline, + true + )?; + + subprogram + } + None => SubProgram::compile_empty_query(), + }; + + let vm = VM::new( + self.kv_engine.clone(), + subprogram, + self.metrics.clone(), + ); + + let handle = ClientCursor::new(vm, session); + + Ok(handle) + } + } fn collection_metas_to_names(doc_meta: Vec) -> Vec { diff --git a/src/polodb_core/tests/test_db.rs b/src/polodb_core/tests/test_db.rs index 6b207adb..1c29fa2c 100644 --- a/src/polodb_core/tests/test_db.rs +++ b/src/polodb_core/tests/test_db.rs @@ -39,7 +39,7 @@ fn test_reopen_db() { { let db = Database::open_file(db_path.as_path().to_str().unwrap()).unwrap(); let collection = db.collection::("books"); - let book = collection.find_one(doc! {}).unwrap().unwrap(); + let book = collection.find_one(None).unwrap().unwrap(); assert_eq!(book.get("author").unwrap().as_str().unwrap(), "Liu Cixin"); } } @@ -79,7 +79,7 @@ fn test_reopen_db_file_size() { { let db = Database::open_file(db_path.as_path().to_str().unwrap()).unwrap(); let collection = db.collection::("books"); - let book = collection.find_one(doc! {}).unwrap().unwrap(); + let book = collection.find_one(None).unwrap().unwrap(); assert_eq!(book.get("author").unwrap().as_str().unwrap(), "Liu Cixin"); } @@ -127,14 +127,14 @@ fn test_multi_threads() { let t = thread::spawn(move || { let collection = db2.collection("test2"); collection.insert_one(doc! { - "content": "Hello" - }).unwrap(); + "content": "Hello" + }).unwrap(); }); t.join().unwrap(); let collection = db.collection::("test2"); - let one = collection.find_one(doc! {}).unwrap().unwrap(); + let one = collection.find_one(None).unwrap().unwrap(); assert_eq!(one.get("content").unwrap().as_str().unwrap(), "Hello"); } diff --git a/src/polodb_core/tests/test_find.rs b/src/polodb_core/tests/test_find.rs index 14d478f7..1332b7b9 100644 --- a/src/polodb_core/tests/test_find.rs +++ b/src/polodb_core/tests/test_find.rs @@ -27,23 +27,20 @@ fn test_multiple_find_one() { { let collection = db.collection("config"); - let doc1 = doc! { + collection.insert_many(vec![ + doc! { "_id": "c1", "value": "c1", - }; - collection.insert_one(doc1).unwrap(); - - let doc2 = doc! { + }, + doc! { "_id": "c2", "value": "c2", - }; - collection.insert_one(doc2).unwrap(); - - let doc2 = doc! { + }, + doc! { "_id": "c3", "value": "c3", - }; - collection.insert_one(doc2).unwrap(); + }, + ]).unwrap(); assert_eq!(collection.count_documents().unwrap(), 3); } @@ -51,19 +48,19 @@ fn test_multiple_find_one() { { let collection = db.collection::("config"); collection.update_many(doc! { - "_id": "c2" - }, doc! { - "$set": doc! { - "value": "c33", - }, - }).unwrap(); + "_id": "c2" + }, doc! { + "$set": doc! { + "value": "c33", + }, + }).unwrap(); collection.update_many(doc! { - "_id": "c2", - }, doc! { - "$set": doc! { - "value": "c22", - }, - }).unwrap(); + "_id": "c2", + }, doc! { + "$set": doc! { + "value": "c22", + }, + }).unwrap(); } let collection = db.collection::("config"); @@ -126,3 +123,47 @@ fn test_find_empty_collection() { assert!(!cursor.advance(&mut session).unwrap()); } + +#[test] +fn test_not_expression() { + vec![ + prepare_db("test-not-expression").unwrap(), + Database::open_memory().unwrap(), + ].iter().for_each(|db| { + let metrics = db.metrics(); + metrics.enable(); + + let col = db.collection::("teacher"); + + col.insert_many(vec![ + doc! { + "name": "David", + "age": 33, + }, + doc! { + "name": "John", + "age": 22, + }, + doc! { + "name": "Mary", + "age": 18, + }, + doc! { + "name": "Peter", + "age": 18, + }, + ]).unwrap(); + + let result = col.find(doc! { + "age": { + "$not": { + "$eq": 18, + }, + }, + }).unwrap().collect::>>().unwrap(); + assert_eq!(result.len(), 2); + + assert_eq!(result[0].get("name").unwrap().as_str().unwrap(), "David"); + assert_eq!(result[1].get("name").unwrap().as_str().unwrap(), "John"); + }); +} diff --git a/src/polodb_core/tests/test_insert.rs b/src/polodb_core/tests/test_insert.rs index ebfc915f..cdf446ef 100644 --- a/src/polodb_core/tests/test_insert.rs +++ b/src/polodb_core/tests/test_insert.rs @@ -60,7 +60,7 @@ fn test_insert_struct() { } ] }).unwrap(); - let result: Vec> = cursor.collect(); + let result = cursor.collect::>>().unwrap(); assert_eq!(result.len(), 2); }); } @@ -182,7 +182,7 @@ fn test_insert_different_types_as_key() { "_id": "0", }).unwrap(); - let cursor = collection.find(doc! {}).unwrap(); + let cursor = collection.find(None).unwrap(); let result: Vec> = cursor.collect(); assert_eq!(result.len(), 2); diff --git a/src/polodb_core/vm/codegen.rs b/src/polodb_core/vm/codegen.rs index b60a0670..184905a2 100644 --- a/src/polodb_core/vm/codegen.rs +++ b/src/polodb_core/vm/codegen.rs @@ -126,9 +126,9 @@ impl Codegen { if !self.program.label_slots[label.u_pos()].is_empty() { unreachable!("this label has been emit"); } + let current_loc = self.current_location(); self.emit(DbOp::Label); self.emit_u32(label.pos()); - let current_loc = self.current_location(); self.program.label_slots[label.u_pos()] = LabelSlot::UnnamedLabel(current_loc); } @@ -136,9 +136,9 @@ impl Codegen { if !self.program.label_slots[label.u_pos()].is_empty() { unreachable!("this label has been emit"); } + let current_loc = self.current_location(); self.emit(DbOp::Label); self.emit_u32(label.pos()); - let current_loc = self.current_location(); if self.skip_annotation { self.program.label_slots[label.u_pos()] = LabelSlot::UnnamedLabel(current_loc); } else { @@ -224,10 +224,11 @@ impl Codegen { let result_callback: F = try_index_result.unwrap(); + let compare_fun = self.new_label(); + let compare_fun_clean = self.new_label(); let compare_label = self.new_label(); let next_label = self.new_label(); let result_label = self.new_label(); - let get_field_failed_label = self.new_label(); let not_found_label = self.new_label(); let close_label = self.new_label(); @@ -239,26 +240,19 @@ impl Codegen { self.emit_goto(DbOp::Next, compare_label); // <==== close cursor - self.emit_label_with_name(close_label, "Close"); + self.emit_label_with_name(close_label, "close"); self.emit(DbOp::Close); self.emit(DbOp::Halt); // <==== not this item, go to next item - self.emit_label_with_name(not_found_label, "Not this item"); - self.emit(DbOp::RecoverStackPos); + self.emit_label_with_name(not_found_label, "not_this_item"); self.emit(DbOp::Pop); // pop the current value; self.emit_goto(DbOp::Goto, next_label); - // <==== get field failed, got to next item - self.emit_label_with_name(get_field_failed_label, "Get field failed"); - self.emit(DbOp::RecoverStackPos); - self.emit(DbOp::Pop); - self.emit_goto(DbOp::Goto, next_label); - // <==== result position // give out the result, or update the item - self.emit_label_with_name(result_label, "Result"); + self.emit_label_with_name(result_label, "result"); result_callback(self)?; if is_many { @@ -273,12 +267,20 @@ impl Codegen { // // begin to execute compare logic // save the stack first - self.emit_label_with_name(compare_label, "Compare"); - self.emit(DbOp::SaveStackPos); + self.emit_label_with_name(compare_label, "compare"); + self.emit(DbOp::Dup); + self.emit_goto(DbOp::Call, compare_fun); + self.emit_u32(1); + self.emit_goto(DbOp::IfFalse, not_found_label); + self.emit_goto(DbOp::Goto, result_label); - self.emit_standard_query_doc(query, result_label, get_field_failed_label, not_found_label)?; + self.emit_label_with_name(compare_fun, "compare_function"); - self.emit_goto(DbOp::Goto, result_label); + self.emit_standard_query_doc(query, result_label, compare_fun_clean)?; + + self.emit_label_with_name(compare_fun_clean, "compare_function_clean"); + self.emit(DbOp::Ret); + self.emit_u32(0); Ok(()) } @@ -420,7 +422,6 @@ impl Codegen { &mut self, query_doc: &Document, result_label: Label, - get_field_failed_label: Label, not_found_label: Label, ) -> Result<()> { for (key, value) in query_doc.iter() { @@ -429,7 +430,6 @@ impl Codegen { key, value, result_label, - get_field_failed_label, not_found_label, )?; }); @@ -458,7 +458,6 @@ impl Codegen { &mut self, arr: &Array, result_label: Label, - get_field_failed_label: Label, not_found_label: Label, ) -> Result<()> { for (index, item_doc_value) in arr.iter().enumerate() { @@ -468,7 +467,7 @@ impl Codegen { self.emit_standard_query_doc( item_doc, result_label, - get_field_failed_label, + not_found_label, )?; }); @@ -480,61 +479,52 @@ impl Codegen { fn emit_logic_or( &mut self, arr: &Array, - result_label: Label, - global_get_field_failed_label: Label, - not_found_label: Label, + ret_label: Label, ) -> Result<()> { + let cmp_label = self.new_label(); + self.emit_goto(DbOp::Goto, cmp_label); + + let mut functions = Vec::