Skip to content

Commit

Permalink
ICU-22954 USet C++ iterator return std::u16string
Browse files Browse the repository at this point in the history
  • Loading branch information
markusicu committed Dec 20, 2024
1 parent ba012a7 commit d03826c
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 25 deletions.
11 changes: 7 additions & 4 deletions icu4c/source/common/unicode/uniset.h
Original file line number Diff line number Diff line change
Expand Up @@ -1173,10 +1173,12 @@ class U_COMMON_API UnicodeSet final : public UnicodeFilter {
inline U_HEADER_NESTED_NAMESPACE::USetStrings strings() const {
return U_HEADER_NESTED_NAMESPACE::USetStrings(toUSet());
}
#endif // U_HIDE_DRAFT_API

#ifndef U_HIDE_DRAFT_API
/**
* Returns a C++ iterator for iterating over all of the elements of this set.
* Convenient all-in one iteration, but creates a UnicodeString for each
* Convenient all-in one iteration, but creates a std::u16string for each
* code point or string.
* (Similar to how Java UnicodeSet *is an* Iterable<String>.)
*
Expand All @@ -1185,13 +1187,14 @@ class U_COMMON_API UnicodeSet final : public UnicodeFilter {
* \code
* UnicodeSet set(u"[abcçカ🚴{}{abc}{de}]", errorCode);
* for (auto el : set) {
* UnicodeString us(el);
* std::string u8;
* printf("set.string length %ld \"%s\"\n", (long)el.length(), el.toUTF8String(u8).c_str());
* printf("set.string length %ld \"%s\"\n", (long)us.length(), us.toUTF8String(u8).c_str());
* }
* \endcode
*
* @return an all-elements iterator.
* @draft ICU 76
* @draft ICU 77
* @see end
* @see codePoints
* @see ranges
Expand All @@ -1203,7 +1206,7 @@ class U_COMMON_API UnicodeSet final : public UnicodeFilter {

/**
* @return an exclusive-end sentinel for iterating over all of the elements of this set.
* @draft ICU 76
* @draft ICU 77
* @see begin
* @see codePoints
* @see ranges
Expand Down
44 changes: 25 additions & 19 deletions icu4c/source/common/unicode/uset.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,11 @@
#include "unicode/uchar.h"

#if U_SHOW_CPLUSPLUS_API
#include <string>
#include <string_view>
#include "unicode/char16ptr.h"
#include "unicode/localpointer.h"
#include "unicode/unistr.h"
#include "unicode/utf16.h"
#endif // U_SHOW_CPLUSPLUS_API

#ifndef USET_DEFINED
Expand Down Expand Up @@ -1737,17 +1738,19 @@ class USetStrings {
const USet *uset;
int32_t count;
};
#endif // U_HIDE_DRAFT_API

#ifndef U_HIDE_DRAFT_API
/**
* Iterator returned by USetElements.
* @draft ICU 76
* @draft ICU 77
*/
class USetElementIterator {
public:
/** @draft ICU 76 */
/** @draft ICU 77 */
USetElementIterator(const USetElementIterator &other) = default;

/** @draft ICU 76 */
/** @draft ICU 77 */
bool operator==(const USetElementIterator &other) const {
// No need to compare rangeCount & end given private constructor
// and assuming we don't compare iterators across the set being modified.
Expand All @@ -1756,26 +1759,28 @@ class USetElementIterator {
return uset == other.uset && c == other.c && index == other.index;
}

/** @draft ICU 76 */
/** @draft ICU 77 */
bool operator!=(const USetElementIterator &other) const { return !operator==(other); }

/** @draft ICU 76 */
UnicodeString operator*() const {
/** @draft ICU 77 */
std::u16string operator*() const {
if (c >= 0) {
return UnicodeString(c);
return c <= 0xffff ?
std::u16string({static_cast<char16_t>(c)}) :
std::u16string({U16_LEAD(c), U16_TRAIL(c)});
} else if (index < totalCount) {
int32_t length;
const UChar *uchars = uset_getString(uset, index - rangeCount, &length);
// assert uchars != nullptr;
return UnicodeString(uchars, length);
return {ConstChar16Ptr(uchars), static_cast<uint32_t>(length)};
} else {
return UnicodeString();
return {};
}
}

/**
* Pre-increment.
* @draft ICU 76
* @draft ICU 77
*/
USetElementIterator &operator++() {
if (c < end) {
Expand All @@ -1800,7 +1805,7 @@ class USetElementIterator {

/**
* Post-increment.
* @draft ICU 76
* @draft ICU 77
*/
USetElementIterator operator++(int) {
USetElementIterator result(*this);
Expand Down Expand Up @@ -1840,7 +1845,7 @@ class USetElementIterator {

/**
* A C++ "range" for iterating over all of the elements of a USet.
* Convenient all-in one iteration, but creates a UnicodeString for each
* Convenient all-in one iteration, but creates a std::u16string for each
* code point or string.
*
* Code points are returned first, then empty and multi-character strings.
Expand All @@ -1849,15 +1854,16 @@ class USetElementIterator {
* using U_HEADER_NESTED_NAMESPACE::USetElements;
* LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, &errorCode));
* for (auto el : USetElements(uset.getAlias())) {
* UnicodeString us(el);
* std::string u8;
* printf("uset.string length %ld \"%s\"\n", (long)el.length(), el.toUTF8String(u8).c_str());
* printf("uset.string length %ld \"%s\"\n", (long)us.length(), us.toUTF8String(u8).c_str());
* }
* \endcode
*
* C++ UnicodeSet has member functions for iteration, including begin() and end().
*
* @return an all-elements iterator.
* @draft ICU 76
* @draft ICU 77
* @see USetCodePoints
* @see USetRanges
* @see USetStrings
Expand All @@ -1866,21 +1872,21 @@ class USetElements {
public:
/**
* Constructs a C++ "range" object over all of the elements of the USet.
* @draft ICU 76
* @draft ICU 77
*/
USetElements(const USet *uset)
: uset(uset), rangeCount(uset_getRangeCount(uset)),
stringCount(uset_getStringCount(uset)) {}

/** @draft ICU 76 */
/** @draft ICU 77 */
USetElements(const USetElements &other) = default;

/** @draft ICU 76 */
/** @draft ICU 77 */
USetElementIterator begin() const {
return USetElementIterator(uset, 0, rangeCount, rangeCount + stringCount);
}

/** @draft ICU 76 */
/** @draft ICU 77 */
USetElementIterator end() const {
return USetElementIterator(uset, rangeCount + stringCount, rangeCount, rangeCount + stringCount);
}
Expand Down
6 changes: 4 additions & 2 deletions icu4c/source/test/intltest/usettest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4448,8 +4448,9 @@ void UnicodeSetTest::TestElementIterator() {
UnicodeSet set(u"[abcçカ🚴{}{abc}{de}]", errorCode);
UnicodeString result;
for (auto el : set) {
// UnicodeString us(el);
// std::string u8;
// printf("set.string length %ld \"%s\"\n", (long)el.length(), el.toUTF8String(u8).c_str());
// printf("set.string length %ld \"%s\"\n", (long)us.length(), us.toUTF8String(u8).c_str());
result.append(u" \"").append(el).append(u'"');
}
assertEquals(WHERE, uR"( "a" "b" "c" "ç" "カ" "🚴" "" "abc" "de")", result);
Expand All @@ -4463,8 +4464,9 @@ void UnicodeSetTest::TestUSetElementIterator() {
LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, errorCode));
UnicodeString result;
for (auto el : USetElements(uset.getAlias())) {
// UnicodeString us(el);
// std::string u8;
// printf("uset.string length %ld \"%s\"\n", (long)el.length(), el.toUTF8String(u8).c_str());
// printf("uset.string length %ld \"%s\"\n", (long)us.length(), us.toUTF8String(u8).c_str());
result.append(u" \"").append(el).append(u'"');
}
assertEquals(WHERE, uR"( "a" "b" "c" "ç" "カ" "🚴" "" "abc" "de")", result);
Expand Down

0 comments on commit d03826c

Please sign in to comment.