From 19b9b54e7f15bac5244c13a9a18430648bbe7802 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Commaille?= Date: Fri, 24 Jun 2022 14:37:52 +0200 Subject: [PATCH 1/2] push: Fix word matching without wildcards Don't match non-ASCII alphanumeric characters --- crates/ruma-common/CHANGELOG.md | 4 ++-- crates/ruma-common/src/push/condition.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/ruma-common/CHANGELOG.md b/crates/ruma-common/CHANGELOG.md index e75c69a178..0d66b8ed9e 100644 --- a/crates/ruma-common/CHANGELOG.md +++ b/crates/ruma-common/CHANGELOG.md @@ -4,8 +4,8 @@ Bug fixes: * Expose `MatrixIdError`, `MatrixToError`, `MatrixUriError` and `MxcUriError` at the crate root -* Allow wildcards for push conditions on `content.body` - * The spec clarified the behavior of the `event_match` condition: +* Fix matching of `event_match` condition + * The spec clarified its behavior: Breaking changes: diff --git a/crates/ruma-common/src/push/condition.rs b/crates/ruma-common/src/push/condition.rs index e0b99523a6..070bf6499b 100644 --- a/crates/ruma-common/src/push/condition.rs +++ b/crates/ruma-common/src/push/condition.rs @@ -155,7 +155,7 @@ trait CharExt { impl CharExt for char { fn is_word_char(&self) -> bool { - self.is_alphanumeric() || *self == '_' + self.is_ascii_alphanumeric() || *self == '_' } } From 8902d913c28362fcde0841b30afdf773c700d566 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Commaille?= Date: Fri, 24 Jun 2022 14:42:53 +0200 Subject: [PATCH 2/2] push: Fix regex for word boundaries --- crates/ruma-common/src/push/condition.rs | 38 ++++++++++++++++-------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/crates/ruma-common/src/push/condition.rs b/crates/ruma-common/src/push/condition.rs index 070bf6499b..360a431416 100644 --- a/crates/ruma-common/src/push/condition.rs +++ b/crates/ruma-common/src/push/condition.rs @@ -13,13 +13,6 @@ mod room_member_count_is; pub use room_member_count_is::{ComparisonOperator, RoomMemberCountIs}; -/// The characters that are defined as a word boundary in the [Matrix spec]. -/// -/// Any character not in the sets `[A-Z]`, `[a-z]`, `[0-9]` or `_`. -/// -/// [Matrix spec]: https://spec.matrix.org/v1.3/client-server-api/#conditions-1 -const WORD_BOUNDARY_CHARACTERS: &str = "[^A-Za-z0-9_]"; - /// A condition that must apply for an associated push rule's action to be taken. #[derive(Clone, Debug, Deserialize, Serialize)] #[cfg_attr(not(feature = "unstable-exhaustive-types"), non_exhaustive)] @@ -279,10 +272,9 @@ impl StrExt for str { chunks.push(chunk.wildcards_to_regex()); } - let regex = format!( - "(?:^|{WORD_BOUNDARY_CHARACTERS}){}(?:{WORD_BOUNDARY_CHARACTERS}|$)", - chunks.concat() - ); + // The word characters in ASCII compatible mode (with the `-u` flag) match the + // definition in the spec: any character not in the set `[A-Za-z0-9_]`. + let regex = format!(r"(?-u:^|\W|\b){}(?-u:\b|\W|$)", chunks.concat()); Regex::new(®ex).ok().filter(|re| re.is_match(self)).is_some() } else { match self.find(pattern) { @@ -530,13 +522,33 @@ mod tests { #[test] fn patterns_match() { - // Word matching + // Word matching without glob assert!("foo bar".matches_pattern("foo", true)); assert!("Foo bar".matches_pattern("foo", true)); assert!(!"foobar".matches_pattern("foo", true)); - assert!("foo bar".matches_pattern("foo*", true)); assert!("".matches_pattern("", true)); assert!(!"foo".matches_pattern("", true)); + assert!("foo bar".matches_pattern("foo bar", true)); + assert!(" foo bar ".matches_pattern("foo bar", true)); + assert!("baz foo bar baz".matches_pattern("foo bar", true)); + assert!("foo barĂ©".matches_pattern("foo bar", true)); + assert!(!"bar foo".matches_pattern("foo bar", true)); + assert!("foo bar".matches_pattern("foo ", true)); + assert!("foo ".matches_pattern("foo ", true)); + assert!("foo ".matches_pattern("foo ", true)); + assert!(" foo ".matches_pattern("foo ", true)); + + // Word matching with glob + assert!("foo bar".matches_pattern("foo*", true)); + assert!("foo bar".matches_pattern("foo b?r", true)); + assert!(" foo bar ".matches_pattern("foo b?r", true)); + assert!("baz foo bar baz".matches_pattern("foo b?r", true)); + assert!("foo barĂ©".matches_pattern("foo b?r", true)); + assert!(!"bar foo".matches_pattern("foo b?r", true)); + assert!("foo bar".matches_pattern("f*o ", true)); + assert!("foo ".matches_pattern("f*o ", true)); + assert!("foo ".matches_pattern("f*o ", true)); + assert!(" foo ".matches_pattern("f*o ", true)); // Glob matching assert!(!"foo bar".matches_pattern("foo", false));