Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions icu4c/source/common/uniset_props.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,7 @@ class UnicodeSet::Lexer {
std::optional<int32_t> queryOperatorPosition;
int32_t queryExpressionStart = parsePosition_.getIndex();
bool exteriorlyNegated = false;
bool interiorlyNegated = false;
UBool unusedEscaped;
// Do not skip whitespace so we can recognize unspaced :]. Lex escapes and
// named-element: while ICU does not support string-valued properties and thus has no
Expand Down Expand Up @@ -742,7 +743,14 @@ class UnicodeSet::Lexer {
// Neither a named-element nor an escaped-element can be part of a closing :].
lastUnescaped = -1;
} else if (!queryOperatorPosition.has_value() && lastUnescaped == u'=') {
// TODO(egg): Propose and add support for ≠.
queryOperatorPosition = parsePosition_.getIndex() - 1;
} else if (!queryOperatorPosition.has_value() && lastUnescaped == u'≠') {
if (exteriorlyNegated) {
// Reject doubly negated property queries.
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return {};
}
interiorlyNegated = true;
queryOperatorPosition = parsePosition_.getIndex() - 1;
} else if ((first == u'[' && penultimateUnescaped == u':' && lastUnescaped == u']') ||
(first == u'\\' && lastUnescaped == u'}')) {
Expand Down Expand Up @@ -772,7 +780,7 @@ class UnicodeSet::Lexer {
pattern_.tempSubStringBetween(queryExpressionStart,
queryOperatorPosition.value_or(queryExpressionLimit)),
propertyPredicate, errorCode);
if (exteriorlyNegated) {
if (exteriorlyNegated != interiorlyNegated) {
result.complement().removeAllStrings();
}
result.setPattern(pattern_.tempSubStringBetween(queryStart, parsePosition_.getIndex()));
Expand Down
31 changes: 31 additions & 0 deletions icu4c/source/test/intltest/usettest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1118,6 +1118,34 @@ void UnicodeSetTest::TestPropertySet() {
expectContainment(UnicodeString(DATA[i], -1, US_INV), CharsToUnicodeString(DATA[i+1]),
CharsToUnicodeString(DATA[i+2]));
}
{
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1(u"[:Noncharacter_Code_Point≠No:]", status);
UnicodeSet s2(u"[:Noncharacter_Code_Point:]", status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1 == s2);
}
{
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1(uR"(\p{Noncharacter_Code_Point≠No})", status);
UnicodeSet s2(uR"(\p{Noncharacter_Code_Point})", status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1 == s2);
}
{
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1(uR"(\p{dt≠can})", status);
UnicodeSet s2(uR"(\P{dt=can})", status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1 == s2);
}
{
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1(uR"([:dt≠can:])", status);
UnicodeSet s2(uR"([:^dt=can:])", status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1 == s2);
}
}

/**
Expand Down Expand Up @@ -4778,6 +4806,9 @@ void UnicodeSetTest::TestParseErrors() {
uR"(\p{Uppercase_Letter=})",
// Well-formed in ICU 78 and earlier, disallowed by ICU-23306.
uR"([: ^general category = punctuation :])",
// Doubly negated property queries.
uR"(\P{Decomposition_Type≠compat})",
u"[:^Noncharacter_Code_Point≠No:]",
}) {
UErrorCode errorCode = U_ZERO_ERROR;
const UnicodeSet set(expression, errorCode);
Expand Down