From 1879bf9463cc2945437b35c440b769d8cc415559 Mon Sep 17 00:00:00 2001
From: Daniel Lemire
Date: Fri, 7 Apr 2023 15:27:29 -0400
Subject: [PATCH 1/6] src: allow simdutf::convert_* functions to return zero
---
src/inspector/node_string.cc | 27 ++++++++++++++++++++++++---
1 file changed, 24 insertions(+), 3 deletions(-)
diff --git a/src/inspector/node_string.cc b/src/inspector/node_string.cc
index 171ba04bef113b..d147302a7983ae 100644
--- a/src/inspector/node_string.cc
+++ b/src/inspector/node_string.cc
@@ -19,12 +19,17 @@ void builderAppendQuotedString(StringBuilder& builder,
size_t expected_utf16_length =
simdutf::utf16_length_from_utf8(string.data(), string.length());
MaybeStackBuffer buffer(expected_utf16_length);
+ // simdutf::convert_utf8_to_utf16 returns zero in case of error.
size_t utf16_length = simdutf::convert_utf8_to_utf16(
string.data(), string.length(), buffer.out());
- CHECK_EQ(expected_utf16_length, utf16_length);
- escapeWideStringForJSON(reinterpret_cast(buffer.out()),
+ // We have that utf16_length == expected_utf16_length if and only
+ // if the input was a valid UTF-8 string.
+ if (utf16_length != 0) {
+ CHECK_EQ(expected_utf16_length, utf16_length);
+ escapeWideStringForJSON(reinterpret_cast(buffer.out()),
utf16_length,
&builder);
+ } // Otherwise, we had an invalid UTF-8 input.
}
builder.put('"');
}
@@ -35,8 +40,13 @@ std::unique_ptr parseJSON(const std::string_view string) {
size_t expected_utf16_length =
simdutf::utf16_length_from_utf8(string.data(), string.length());
MaybeStackBuffer buffer(expected_utf16_length);
+ // simdutf::convert_utf8_to_utf16 returns zero in case of error.
size_t utf16_length = simdutf::convert_utf8_to_utf16(
string.data(), string.length(), buffer.out());
+ // We have that utf16_length == expected_utf16_length if and only
+ // if the input was a valid UTF-8 string.
+ if (utf16_length == 0)
+ return nullptr; // We had an invalid UTF-8 input.
CHECK_EQ(expected_utf16_length, utf16_length);
return parseJSONCharacters(reinterpret_cast(buffer.out()),
utf16_length);
@@ -62,9 +72,14 @@ String StringViewToUtf8(v8_inspector::StringView view) {
size_t expected_utf8_length =
simdutf::utf8_length_from_utf16(source, view.length());
MaybeStackBuffer buffer(expected_utf8_length);
+ // convert_utf16_to_utf8 returns zero in case of error.
size_t utf8_length =
simdutf::convert_utf16_to_utf8(source, view.length(), buffer.out());
- CHECK_EQ(expected_utf8_length, utf8_length);
+ // We have that utf8_length == expected_utf8_length if and only
+ // if the input was a valid UTF-16 string.
+ if (utf8_length == 0)
+ return ""; // We had an invalid UTF-16 input.
+ CHECK_EQ(expected_utf16_length, utf16_length);
return String(buffer.out(), utf8_length);
}
@@ -112,8 +127,11 @@ String fromUTF16(const uint16_t* data, size_t length) {
size_t expected_utf8_length =
simdutf::utf8_length_from_utf16(casted_data, length);
MaybeStackBuffer buffer(expected_utf8_length);
+ // simdutf::convert_utf16_to_utf8 returns zero in case of error.
size_t utf8_length =
simdutf::convert_utf16_to_utf8(casted_data, length, buffer.out());
+ if (utf8_length == 0)
+ return ""; // We had an invalid UTF-16 input.
CHECK_EQ(expected_utf8_length, utf8_length);
return String(buffer.out(), utf8_length);
}
@@ -123,6 +141,9 @@ const uint8_t* CharactersUTF8(const std::string_view s) {
}
size_t CharacterCount(const std::string_view s) {
+ // The utf32_length_from_utf8 function calls count_utf8.
+ // The count_utf8 function counts the number of code points
+ // (characters) in the string, assuming that the string is valid Unicode.
// TODO(@anonrig): Test to make sure CharacterCount returns correctly.
return simdutf::utf32_length_from_utf8(s.data(), s.length());
}
From 5c8cb88e197f76dd7d4ef6d4a29d34624fb52668 Mon Sep 17 00:00:00 2001
From: Daniel Lemire
Date: Fri, 7 Apr 2023 15:55:59 -0400
Subject: [PATCH 2/6] src: replacing some CHECK_EQ with assert to simplify the
code
---
src/inspector/node_string.cc | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/src/inspector/node_string.cc b/src/inspector/node_string.cc
index d147302a7983ae..644764d1131456 100644
--- a/src/inspector/node_string.cc
+++ b/src/inspector/node_string.cc
@@ -76,10 +76,10 @@ String StringViewToUtf8(v8_inspector::StringView view) {
size_t utf8_length =
simdutf::convert_utf16_to_utf8(source, view.length(), buffer.out());
// We have that utf8_length == expected_utf8_length if and only
- // if the input was a valid UTF-16 string.
- if (utf8_length == 0)
- return ""; // We had an invalid UTF-16 input.
- CHECK_EQ(expected_utf16_length, utf16_length);
+ // if the input was a valid UTF-16 string. Otherwise, utf8_length
+ // must be zero.
+ assert(utf8_length == 0 || utf8_length == expected_utf8_length);
+ // An invalid UTF-16 input will generate the empty string:
return String(buffer.out(), utf8_length);
}
@@ -130,9 +130,11 @@ String fromUTF16(const uint16_t* data, size_t length) {
// simdutf::convert_utf16_to_utf8 returns zero in case of error.
size_t utf8_length =
simdutf::convert_utf16_to_utf8(casted_data, length, buffer.out());
- if (utf8_length == 0)
- return ""; // We had an invalid UTF-16 input.
- CHECK_EQ(expected_utf8_length, utf8_length);
+ // We have that utf8_length == expected_utf8_length if and only
+ // if the input was a valid UTF-16 string. Otherwise, utf8_length
+ // must be zero.
+ assert(utf8_length == 0 || utf8_length == expected_utf8_length);
+ // An invalid UTF-16 input will generate the empty string:
return String(buffer.out(), utf8_length);
}
From 848782bc7a10fb3a7376f658e4ce5adeaa7b147d Mon Sep 17 00:00:00 2001
From: Daniel Lemire
Date: Fri, 7 Apr 2023 16:00:31 -0400
Subject: [PATCH 3/6] src: adding spaces before comments
---
src/inspector/node_string.cc | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/inspector/node_string.cc b/src/inspector/node_string.cc
index 644764d1131456..14a68c5cf9c808 100644
--- a/src/inspector/node_string.cc
+++ b/src/inspector/node_string.cc
@@ -29,7 +29,7 @@ void builderAppendQuotedString(StringBuilder& builder,
escapeWideStringForJSON(reinterpret_cast(buffer.out()),
utf16_length,
&builder);
- } // Otherwise, we had an invalid UTF-8 input.
+ } // Otherwise, we had an invalid UTF-8 input.
}
builder.put('"');
}
@@ -46,7 +46,7 @@ std::unique_ptr parseJSON(const std::string_view string) {
// We have that utf16_length == expected_utf16_length if and only
// if the input was a valid UTF-8 string.
if (utf16_length == 0)
- return nullptr; // We had an invalid UTF-8 input.
+ return nullptr; // We had an invalid UTF-8 input.
CHECK_EQ(expected_utf16_length, utf16_length);
return parseJSONCharacters(reinterpret_cast(buffer.out()),
utf16_length);
From 2e851ec22818f9301d0cfa143ff5d1c50bacfbe9 Mon Sep 17 00:00:00 2001
From: Daniel Lemire
Date: Fri, 7 Apr 2023 16:07:25 -0400
Subject: [PATCH 4/6] src: reformat
---
src/inspector/node_string.cc | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/src/inspector/node_string.cc b/src/inspector/node_string.cc
index 14a68c5cf9c808..6f44c9e466363f 100644
--- a/src/inspector/node_string.cc
+++ b/src/inspector/node_string.cc
@@ -27,8 +27,8 @@ void builderAppendQuotedString(StringBuilder& builder,
if (utf16_length != 0) {
CHECK_EQ(expected_utf16_length, utf16_length);
escapeWideStringForJSON(reinterpret_cast(buffer.out()),
- utf16_length,
- &builder);
+ utf16_length,
+ &builder);
} // Otherwise, we had an invalid UTF-8 input.
}
builder.put('"');
@@ -45,8 +45,7 @@ std::unique_ptr parseJSON(const std::string_view string) {
string.data(), string.length(), buffer.out());
// We have that utf16_length == expected_utf16_length if and only
// if the input was a valid UTF-8 string.
- if (utf16_length == 0)
- return nullptr; // We had an invalid UTF-8 input.
+ if (utf16_length == 0) return nullptr; // We had an invalid UTF-8 input.
CHECK_EQ(expected_utf16_length, utf16_length);
return parseJSONCharacters(reinterpret_cast(buffer.out()),
utf16_length);
From a2f8863017ac0b0814705300abf1e8d13e5a1f64 Mon Sep 17 00:00:00 2001
From: Daniel Lemire
Date: Fri, 7 Apr 2023 18:24:12 -0400
Subject: [PATCH 5/6] Update src/inspector/node_string.cc
Co-authored-by: Yagiz Nizipli
---
src/inspector/node_string.cc | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/inspector/node_string.cc b/src/inspector/node_string.cc
index 6f44c9e466363f..e0ca9856f82dcc 100644
--- a/src/inspector/node_string.cc
+++ b/src/inspector/node_string.cc
@@ -132,7 +132,7 @@ String fromUTF16(const uint16_t* data, size_t length) {
// We have that utf8_length == expected_utf8_length if and only
// if the input was a valid UTF-16 string. Otherwise, utf8_length
// must be zero.
- assert(utf8_length == 0 || utf8_length == expected_utf8_length);
+ CHECK(utf8_length == 0 || utf8_length == expected_utf8_length);
// An invalid UTF-16 input will generate the empty string:
return String(buffer.out(), utf8_length);
}
From 89c85b3c2a2daa35863ef716e211f884d31ef3f0 Mon Sep 17 00:00:00 2001
From: Daniel Lemire
Date: Fri, 7 Apr 2023 18:24:24 -0400
Subject: [PATCH 6/6] Update src/inspector/node_string.cc
Co-authored-by: Yagiz Nizipli
---
src/inspector/node_string.cc | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/inspector/node_string.cc b/src/inspector/node_string.cc
index e0ca9856f82dcc..6b59cd73f9742d 100644
--- a/src/inspector/node_string.cc
+++ b/src/inspector/node_string.cc
@@ -77,7 +77,7 @@ String StringViewToUtf8(v8_inspector::StringView view) {
// We have that utf8_length == expected_utf8_length if and only
// if the input was a valid UTF-16 string. Otherwise, utf8_length
// must be zero.
- assert(utf8_length == 0 || utf8_length == expected_utf8_length);
+ CHECK(utf8_length == 0 || utf8_length == expected_utf8_length);
// An invalid UTF-16 input will generate the empty string:
return String(buffer.out(), utf8_length);
}