mirror of
https://github.com/zebrajr/node.git
synced 2025-12-06 00:20:08 +01:00
deps: update ICU to 69.1
Refs: https://github.com/unicode-org/icu/releases/tag/release-69-1 PR-URL: https://github.com/nodejs/node/pull/38178 Reviewed-By: Colin Ihrig <cjihrig@gmail.com> Reviewed-By: Richard Lau <rlau@redhat.com> Reviewed-By: Jiawen Geng <technicalcute@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com>
This commit is contained in:
parent
1d21a8d140
commit
ee669a0d29
4
deps/icu-small/README-FULL-ICU.txt
vendored
4
deps/icu-small/README-FULL-ICU.txt
vendored
|
|
@ -1,8 +1,8 @@
|
|||
ICU sources - auto generated by shrink-icu-src.py
|
||||
|
||||
This directory contains the ICU subset used by --with-intl=full-icu
|
||||
It is a strict subset of ICU 68 source files with the following exception(s):
|
||||
* deps/icu-small/source/data/in/icudt68l.dat.bz2 : compressed data file
|
||||
It is a strict subset of ICU 69 source files with the following exception(s):
|
||||
* deps/icu-small/source/data/in/icudt69l.dat.bz2 : compressed data file
|
||||
|
||||
|
||||
To rebuild this directory, see ../../tools/icu/README.md
|
||||
|
|
|
|||
|
|
@ -474,31 +474,39 @@ BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
|
|||
U_ASSERT(i>=0);
|
||||
if(i<=BytesTrie::kMaxOneByteDelta) {
|
||||
return write(i);
|
||||
} else {
|
||||
char intBytes[5];
|
||||
return write(intBytes, internalEncodeDelta(i, intBytes));
|
||||
}
|
||||
char intBytes[5];
|
||||
int32_t length;
|
||||
}
|
||||
|
||||
int32_t
|
||||
BytesTrieBuilder::internalEncodeDelta(int32_t i, char intBytes[]) {
|
||||
U_ASSERT(i>=0);
|
||||
if(i<=BytesTrie::kMaxOneByteDelta) {
|
||||
intBytes[0]=(char)i;
|
||||
return 1;
|
||||
}
|
||||
int32_t length=1;
|
||||
if(i<=BytesTrie::kMaxTwoByteDelta) {
|
||||
intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8));
|
||||
length=1;
|
||||
} else {
|
||||
if(i<=BytesTrie::kMaxThreeByteDelta) {
|
||||
intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16));
|
||||
length=2;
|
||||
} else {
|
||||
if(i<=0xffffff) {
|
||||
intBytes[0]=(char)BytesTrie::kFourByteDeltaLead;
|
||||
length=3;
|
||||
} else {
|
||||
intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead;
|
||||
intBytes[1]=(char)(i>>24);
|
||||
length=4;
|
||||
length=2;
|
||||
}
|
||||
intBytes[1]=(char)(i>>16);
|
||||
intBytes[length++]=(char)(i>>16);
|
||||
}
|
||||
intBytes[1]=(char)(i>>8);
|
||||
intBytes[length++]=(char)(i>>8);
|
||||
}
|
||||
intBytes[length++]=(char)i;
|
||||
return write(intBytes, length);
|
||||
return length;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
|||
34
deps/icu-small/source/common/charstr.cpp
vendored
34
deps/icu-small/source/common/charstr.cpp
vendored
|
|
@ -14,6 +14,8 @@
|
|||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "charstr.h"
|
||||
|
|
@ -141,6 +143,38 @@ CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &error
|
|||
return *this;
|
||||
}
|
||||
|
||||
CharString &CharString::appendNumber(int32_t number, UErrorCode &status) {
|
||||
if (number < 0) {
|
||||
this->append('-', status);
|
||||
if (U_FAILURE(status)) {
|
||||
return *this;
|
||||
}
|
||||
}
|
||||
|
||||
if (number == 0) {
|
||||
this->append('0', status);
|
||||
return *this;
|
||||
}
|
||||
|
||||
int32_t numLen = 0;
|
||||
while (number != 0) {
|
||||
int32_t residue = number % 10;
|
||||
number /= 10;
|
||||
this->append(std::abs(residue) + '0', status);
|
||||
numLen++;
|
||||
if (U_FAILURE(status)) {
|
||||
return *this;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t start = this->length() - numLen, end = this->length() - 1;
|
||||
while(start < end) {
|
||||
std::swap(this->data()[start++], this->data()[end--]);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
char *CharString::getAppendBuffer(int32_t minCapacity,
|
||||
int32_t desiredCapacityHint,
|
||||
int32_t &resultCapacity,
|
||||
|
|
|
|||
3
deps/icu-small/source/common/charstr.h
vendored
3
deps/icu-small/source/common/charstr.h
vendored
|
|
@ -127,6 +127,9 @@ public:
|
|||
return append(s.data(), s.length(), errorCode);
|
||||
}
|
||||
CharString &append(const char *s, int32_t sLength, UErrorCode &status);
|
||||
|
||||
CharString &appendNumber(int32_t number, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Returns a writable buffer for appending and writes the buffer's capacity to
|
||||
* resultCapacity. Guarantees resultCapacity>=minCapacity if U_SUCCESS().
|
||||
|
|
|
|||
55
deps/icu-small/source/common/cmemory.h
vendored
55
deps/icu-small/source/common/cmemory.h
vendored
|
|
@ -31,14 +31,63 @@
|
|||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include "unicode/localpointer.h"
|
||||
#include "uassert.h"
|
||||
|
||||
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
|
||||
#define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)
|
||||
#define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size)
|
||||
// uprv_memcpy and uprv_memmove
|
||||
#if defined(__clang__)
|
||||
#define uprv_memcpy(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
/* Suppress warnings about addresses that will never be NULL */ \
|
||||
_Pragma("clang diagnostic push") \
|
||||
_Pragma("clang diagnostic ignored \"-Waddress\"") \
|
||||
U_ASSERT(dst != NULL); \
|
||||
U_ASSERT(src != NULL); \
|
||||
_Pragma("clang diagnostic pop") \
|
||||
U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
#define uprv_memmove(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
/* Suppress warnings about addresses that will never be NULL */ \
|
||||
_Pragma("clang diagnostic push") \
|
||||
_Pragma("clang diagnostic ignored \"-Waddress\"") \
|
||||
U_ASSERT(dst != NULL); \
|
||||
U_ASSERT(src != NULL); \
|
||||
_Pragma("clang diagnostic pop") \
|
||||
U_STANDARD_CPP_NAMESPACE memmove(dst, src, size); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
#elif defined(__GNUC__)
|
||||
#define uprv_memcpy(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
/* Suppress warnings about addresses that will never be NULL */ \
|
||||
_Pragma("GCC diagnostic push") \
|
||||
_Pragma("GCC diagnostic ignored \"-Waddress\"") \
|
||||
U_ASSERT(dst != NULL); \
|
||||
U_ASSERT(src != NULL); \
|
||||
_Pragma("GCC diagnostic pop") \
|
||||
U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
#define uprv_memmove(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
/* Suppress warnings about addresses that will never be NULL */ \
|
||||
_Pragma("GCC diagnostic push") \
|
||||
_Pragma("GCC diagnostic ignored \"-Waddress\"") \
|
||||
U_ASSERT(dst != NULL); \
|
||||
U_ASSERT(src != NULL); \
|
||||
_Pragma("GCC diagnostic pop") \
|
||||
U_STANDARD_CPP_NAMESPACE memmove(dst, src, size); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
#else
|
||||
#define uprv_memcpy(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
U_ASSERT(dst != NULL); \
|
||||
U_ASSERT(src != NULL); \
|
||||
U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
#define uprv_memmove(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
U_ASSERT(dst != NULL); \
|
||||
U_ASSERT(src != NULL); \
|
||||
U_STANDARD_CPP_NAMESPACE memmove(dst, src, size); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UPRV_LENGTHOF
|
||||
|
|
|
|||
32
deps/icu-small/source/common/dictbe.cpp
vendored
32
deps/icu-small/source/common/dictbe.cpp
vendored
|
|
@ -265,13 +265,9 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text,
|
|||
goto foundBest;
|
||||
}
|
||||
do {
|
||||
int32_t wordsMatched = 1;
|
||||
if (words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
|
||||
if (wordsMatched < 2) {
|
||||
// Followed by another dictionary word; mark first word as a good candidate
|
||||
words[wordsFound%THAI_LOOKAHEAD].markCurrent();
|
||||
wordsMatched = 2;
|
||||
}
|
||||
// Followed by another dictionary word; mark first word as a good candidate
|
||||
words[wordsFound%THAI_LOOKAHEAD].markCurrent();
|
||||
|
||||
// If we're already at the end of the range, we're done
|
||||
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
|
||||
|
|
@ -503,13 +499,9 @@ LaoBreakEngine::divideUpDictionaryRange( UText *text,
|
|||
goto foundBest;
|
||||
}
|
||||
do {
|
||||
int32_t wordsMatched = 1;
|
||||
if (words[(wordsFound + 1) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
|
||||
if (wordsMatched < 2) {
|
||||
// Followed by another dictionary word; mark first word as a good candidate
|
||||
words[wordsFound%LAO_LOOKAHEAD].markCurrent();
|
||||
wordsMatched = 2;
|
||||
}
|
||||
// Followed by another dictionary word; mark first word as a good candidate
|
||||
words[wordsFound%LAO_LOOKAHEAD].markCurrent();
|
||||
|
||||
// If we're already at the end of the range, we're done
|
||||
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
|
||||
|
|
@ -699,13 +691,9 @@ BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
|
|||
goto foundBest;
|
||||
}
|
||||
do {
|
||||
int32_t wordsMatched = 1;
|
||||
if (words[(wordsFound + 1) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
|
||||
if (wordsMatched < 2) {
|
||||
// Followed by another dictionary word; mark first word as a good candidate
|
||||
words[wordsFound%BURMESE_LOOKAHEAD].markCurrent();
|
||||
wordsMatched = 2;
|
||||
}
|
||||
// Followed by another dictionary word; mark first word as a good candidate
|
||||
words[wordsFound%BURMESE_LOOKAHEAD].markCurrent();
|
||||
|
||||
// If we're already at the end of the range, we're done
|
||||
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
|
||||
|
|
@ -908,13 +896,9 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text,
|
|||
goto foundBest;
|
||||
}
|
||||
do {
|
||||
int32_t wordsMatched = 1;
|
||||
if (words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
|
||||
if (wordsMatched < 2) {
|
||||
// Followed by another dictionary word; mark first word as a good candidate
|
||||
words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
|
||||
wordsMatched = 2;
|
||||
}
|
||||
// Followed by another dictionary word; mark first word as a good candidate
|
||||
words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
|
||||
|
||||
// If we're already at the end of the range, we're done
|
||||
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
|
||||
|
|
|
|||
1
deps/icu-small/source/common/edits.cpp
vendored
1
deps/icu-small/source/common/edits.cpp
vendored
|
|
@ -86,6 +86,7 @@ Edits &Edits::moveArray(Edits &src) U_NOEXCEPT {
|
|||
}
|
||||
|
||||
Edits &Edits::operator=(const Edits &other) {
|
||||
if (this == &other) { return *this; } // self-assignment: no-op
|
||||
length = other.length;
|
||||
delta = other.delta;
|
||||
numChanges = other.numChanges;
|
||||
|
|
|
|||
93
deps/icu-small/source/common/filteredbrk.cpp
vendored
93
deps/icu-small/source/common/filteredbrk.cpp
vendored
|
|
@ -20,6 +20,7 @@
|
|||
#include "ubrkimpl.h" // U_ICUDATA_BRKITR
|
||||
#include "uvector.h"
|
||||
#include "cmemory.h"
|
||||
#include "umutex.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
|
@ -139,13 +140,30 @@ class SimpleFilteredSentenceBreakData : public UMemory {
|
|||
public:
|
||||
SimpleFilteredSentenceBreakData(UCharsTrie *forwards, UCharsTrie *backwards )
|
||||
: fForwardsPartialTrie(forwards), fBackwardsTrie(backwards), refcount(1) { }
|
||||
SimpleFilteredSentenceBreakData *incr() { refcount++; return this; }
|
||||
SimpleFilteredSentenceBreakData *decr() { if((--refcount) <= 0) delete this; return 0; }
|
||||
virtual ~SimpleFilteredSentenceBreakData();
|
||||
SimpleFilteredSentenceBreakData *incr() {
|
||||
umtx_atomic_inc(&refcount);
|
||||
return this;
|
||||
}
|
||||
SimpleFilteredSentenceBreakData *decr() {
|
||||
if(umtx_atomic_dec(&refcount) <= 0) {
|
||||
delete this;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
virtual ~SimpleFilteredSentenceBreakData();
|
||||
|
||||
LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M."
|
||||
LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs.
|
||||
int32_t refcount;
|
||||
bool hasForwardsPartialTrie() const { return fForwardsPartialTrie.isValid(); }
|
||||
bool hasBackwardsTrie() const { return fBackwardsTrie.isValid(); }
|
||||
|
||||
const UCharsTrie &getForwardsPartialTrie() const { return *fForwardsPartialTrie; }
|
||||
const UCharsTrie &getBackwardsTrie() const { return *fBackwardsTrie; }
|
||||
|
||||
private:
|
||||
// These tries own their data arrays.
|
||||
// They are shared and must therefore not be modified.
|
||||
LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M."
|
||||
LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs.
|
||||
u_atomic_int32_t refcount;
|
||||
};
|
||||
|
||||
SimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {}
|
||||
|
|
@ -244,7 +262,13 @@ SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIt
|
|||
fData(new SimpleFilteredSentenceBreakData(forwards, backwards)),
|
||||
fDelegate(adopt)
|
||||
{
|
||||
// all set..
|
||||
if (fData == nullptr) {
|
||||
delete forwards;
|
||||
delete backwards;
|
||||
if (U_SUCCESS(status)) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() {
|
||||
|
|
@ -261,59 +285,62 @@ SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
|
|||
int32_t bestValue = -1;
|
||||
// loops while 'n' points to an exception.
|
||||
utext_setNativeIndex(fText.getAlias(), n); // from n..
|
||||
fData->fBackwardsTrie->reset();
|
||||
UChar32 uch;
|
||||
|
||||
//if(debug2) u_printf(" n@ %d\n", n);
|
||||
// Assume a space is following the '.' (so we handle the case: "Mr. /Brown")
|
||||
if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: skip a class of chars here??
|
||||
if(utext_previous32(fText.getAlias())==u' ') { // TODO: skip a class of chars here??
|
||||
// TODO only do this the 1st time?
|
||||
//if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch);
|
||||
} else {
|
||||
//if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch);
|
||||
uch = utext_next32(fText.getAlias());
|
||||
utext_next32(fText.getAlias());
|
||||
//if(debug2) u_printf(" -> : |%C| \n", (UChar)uch);
|
||||
}
|
||||
|
||||
UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE;
|
||||
|
||||
while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to consume backwards and..
|
||||
USTRINGTRIE_HAS_NEXT(r=fData->fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie
|
||||
if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
|
||||
bestPosn = utext_getNativeIndex(fText.getAlias());
|
||||
bestValue = fData->fBackwardsTrie->getValue();
|
||||
}
|
||||
//if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
|
||||
{
|
||||
// Do not modify the shared trie!
|
||||
UCharsTrie iter(fData->getBackwardsTrie());
|
||||
UChar32 uch;
|
||||
while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL) { // more to consume backwards
|
||||
UStringTrieResult r = iter.nextForCodePoint(uch);
|
||||
if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
|
||||
bestPosn = utext_getNativeIndex(fText.getAlias());
|
||||
bestValue = iter.getValue();
|
||||
}
|
||||
if(!USTRINGTRIE_HAS_NEXT(r)) {
|
||||
break;
|
||||
}
|
||||
//if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
|
||||
}
|
||||
}
|
||||
|
||||
if(USTRINGTRIE_MATCHES(r)) { // exact match?
|
||||
//if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
|
||||
bestValue = fData->fBackwardsTrie->getValue();
|
||||
bestPosn = utext_getNativeIndex(fText.getAlias());
|
||||
//if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
|
||||
}
|
||||
//if(bestValue >= 0) {
|
||||
//if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
|
||||
//}
|
||||
|
||||
if(bestPosn>=0) {
|
||||
//if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
|
||||
|
||||
//if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what?
|
||||
//int32_t bestValue = fBackwardsTrie->getValue();
|
||||
//int32_t bestValue = iter.getValue();
|
||||
////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UChar)uch, r, bestValue);
|
||||
|
||||
if(bestValue == kMATCH) { // exact match!
|
||||
//if(debug2) u_printf(" exact backward match\n");
|
||||
return kExceptionHere; // See if the next is another exception.
|
||||
} else if(bestValue == kPARTIAL
|
||||
&& fData->fForwardsPartialTrie.isValid()) { // make sure there's a forward trie
|
||||
&& fData->hasForwardsPartialTrie()) { // make sure there's a forward trie
|
||||
//if(debug2) u_printf(" partial backward match\n");
|
||||
// We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
|
||||
// to see if it matches something going forward.
|
||||
fData->fForwardsPartialTrie->reset();
|
||||
UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE;
|
||||
utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close ..
|
||||
//if(debug2) u_printf("Retrying at %d\n", bestPosn);
|
||||
// Do not modify the shared trie!
|
||||
UCharsTrie iter(fData->getForwardsPartialTrie());
|
||||
UChar32 uch;
|
||||
while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL &&
|
||||
USTRINGTRIE_HAS_NEXT(rfwd=fData->fForwardsPartialTrie->nextForCodePoint(uch))) {
|
||||
USTRINGTRIE_HAS_NEXT(rfwd=iter.nextForCodePoint(uch))) {
|
||||
//if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, utext_getNativeIndex(fText.getAlias()));
|
||||
}
|
||||
if(USTRINGTRIE_MATCHES(rfwd)) {
|
||||
|
|
@ -339,7 +366,7 @@ SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
|
|||
int32_t
|
||||
SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
|
||||
if(n == UBRK_DONE || // at end or
|
||||
fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
|
||||
!fData->hasBackwardsTrie()) { // .. no backwards table loaded == no exceptions
|
||||
return n;
|
||||
}
|
||||
// OK, do we need to break here?
|
||||
|
|
@ -369,7 +396,7 @@ SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
|
|||
int32_t
|
||||
SimpleFilteredSentenceBreakIterator::internalPrev(int32_t n) {
|
||||
if(n == 0 || n == UBRK_DONE || // at end or
|
||||
fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
|
||||
!fData->hasBackwardsTrie()) { // .. no backwards table loaded == no exceptions
|
||||
return n;
|
||||
}
|
||||
// OK, do we need to break here?
|
||||
|
|
@ -420,7 +447,7 @@ SimpleFilteredSentenceBreakIterator::previous(void) {
|
|||
UBool SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset) {
|
||||
if (!fDelegate->isBoundary(offset)) return false; // no break to suppress
|
||||
|
||||
if (fData->fBackwardsTrie.isNull()) return true; // no data = no suppressions
|
||||
if (!fData->hasBackwardsTrie()) return true; // no data = no suppressions
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
resetState(status);
|
||||
|
|
|
|||
19
deps/icu-small/source/common/hash.h
vendored
19
deps/icu-small/source/common/hash.h
vendored
|
|
@ -85,16 +85,22 @@ public:
|
|||
|
||||
inline int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status);
|
||||
|
||||
inline int32_t putiAllowZero(const UnicodeString& key, int32_t value, UErrorCode& status);
|
||||
|
||||
inline void* get(const UnicodeString& key) const;
|
||||
|
||||
inline int32_t geti(const UnicodeString& key) const;
|
||||
|
||||
inline int32_t getiAndFound(const UnicodeString& key, UBool &found) const;
|
||||
|
||||
inline void* remove(const UnicodeString& key);
|
||||
|
||||
inline int32_t removei(const UnicodeString& key);
|
||||
|
||||
inline void removeAll(void);
|
||||
|
||||
inline UBool containsKey(const UnicodeString& key) const;
|
||||
|
||||
inline const UHashElement* find(const UnicodeString& key) const;
|
||||
|
||||
/**
|
||||
|
|
@ -203,6 +209,11 @@ inline int32_t Hashtable::puti(const UnicodeString& key, int32_t value, UErrorCo
|
|||
return uhash_puti(hash, new UnicodeString(key), value, &status);
|
||||
}
|
||||
|
||||
inline int32_t Hashtable::putiAllowZero(const UnicodeString& key, int32_t value,
|
||||
UErrorCode& status) {
|
||||
return uhash_putiAllowZero(hash, new UnicodeString(key), value, &status);
|
||||
}
|
||||
|
||||
inline void* Hashtable::get(const UnicodeString& key) const {
|
||||
return uhash_get(hash, &key);
|
||||
}
|
||||
|
|
@ -211,6 +222,10 @@ inline int32_t Hashtable::geti(const UnicodeString& key) const {
|
|||
return uhash_geti(hash, &key);
|
||||
}
|
||||
|
||||
inline int32_t Hashtable::getiAndFound(const UnicodeString& key, UBool &found) const {
|
||||
return uhash_getiAndFound(hash, &key, &found);
|
||||
}
|
||||
|
||||
inline void* Hashtable::remove(const UnicodeString& key) {
|
||||
return uhash_remove(hash, &key);
|
||||
}
|
||||
|
|
@ -219,6 +234,10 @@ inline int32_t Hashtable::removei(const UnicodeString& key) {
|
|||
return uhash_removei(hash, &key);
|
||||
}
|
||||
|
||||
inline UBool Hashtable::containsKey(const UnicodeString& key) const {
|
||||
return uhash_containsKey(hash, &key);
|
||||
}
|
||||
|
||||
inline const UHashElement* Hashtable::find(const UnicodeString& key) const {
|
||||
return uhash_find(hash, &key);
|
||||
}
|
||||
|
|
|
|||
12
deps/icu-small/source/common/localematcher.cpp
vendored
12
deps/icu-small/source/common/localematcher.cpp
vendored
|
|
@ -345,9 +345,8 @@ UBool compareLSRs(const UHashTok t1, const UHashTok t2) {
|
|||
int32_t LocaleMatcher::putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength,
|
||||
UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return suppLength; }
|
||||
int32_t index = uhash_geti(supportedLsrToIndex, &lsr);
|
||||
if (index == 0) {
|
||||
uhash_puti(supportedLsrToIndex, const_cast<LSR *>(&lsr), i + 1, &errorCode);
|
||||
if (!uhash_containsKey(supportedLsrToIndex, &lsr)) {
|
||||
uhash_putiAllowZero(supportedLsrToIndex, const_cast<LSR *>(&lsr), i, &errorCode);
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
supportedLSRs[suppLength] = &lsr;
|
||||
supportedIndexes[suppLength++] = i;
|
||||
|
|
@ -685,12 +684,11 @@ int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remai
|
|||
int32_t bestSupportedLsrIndex = -1;
|
||||
for (int32_t bestShiftedDistance = LocaleDistance::shiftDistance(thresholdDistance);;) {
|
||||
// Quick check for exact maximized LSR.
|
||||
// Returns suppIndex+1 where 0 means not found.
|
||||
if (supportedLsrToIndex != nullptr) {
|
||||
desiredLSR.setHashCode();
|
||||
int32_t index = uhash_geti(supportedLsrToIndex, &desiredLSR);
|
||||
if (index != 0) {
|
||||
int32_t suppIndex = index - 1;
|
||||
UBool found = false;
|
||||
int32_t suppIndex = uhash_getiAndFound(supportedLsrToIndex, &desiredLSR, &found);
|
||||
if (found) {
|
||||
if (remainingIter != nullptr) {
|
||||
remainingIter->rememberCurrent(desiredIndex, errorCode);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -187,17 +187,18 @@ bool LocalePriorityList::add(const Locale &locale, int32_t weight, UErrorCode &e
|
|||
if (U_FAILURE(errorCode)) { return false; }
|
||||
}
|
||||
LocalPointer<Locale> clone;
|
||||
int32_t index = uhash_geti(map, &locale);
|
||||
if (index != 0) {
|
||||
UBool found = false;
|
||||
int32_t index = uhash_getiAndFound(map, &locale, &found);
|
||||
if (found) {
|
||||
// Duplicate: Remove the old item and append it anew.
|
||||
LocaleAndWeight &lw = list->array[index - 1];
|
||||
LocaleAndWeight &lw = list->array[index];
|
||||
clone.adoptInstead(lw.locale);
|
||||
lw.locale = nullptr;
|
||||
lw.weight = 0;
|
||||
++numRemoved;
|
||||
}
|
||||
if (weight <= 0) { // do not add q=0
|
||||
if (index != 0) {
|
||||
if (found) {
|
||||
// Not strictly necessary but cleaner.
|
||||
uhash_removei(map, &locale);
|
||||
}
|
||||
|
|
@ -217,7 +218,7 @@ bool LocalePriorityList::add(const Locale &locale, int32_t weight, UErrorCode &e
|
|||
return false;
|
||||
}
|
||||
}
|
||||
uhash_puti(map, clone.getAlias(), listLength + 1, &errorCode);
|
||||
uhash_putiAllowZero(map, clone.getAlias(), listLength, &errorCode);
|
||||
if (U_FAILURE(errorCode)) { return false; }
|
||||
LocaleAndWeight &lw = list->array[listLength];
|
||||
lw.locale = clone.orphan();
|
||||
|
|
|
|||
|
|
@ -698,7 +698,7 @@ uloc_getDisplayName(const char *locale,
|
|||
} /* end switch */
|
||||
|
||||
if (len>0) {
|
||||
/* we addeed a component, so add separator and write it if there's room. */
|
||||
/* we added a component, so add separator and write it if there's room. */
|
||||
if(len+sepLen<=cap) {
|
||||
const UChar * plimit = p + len;
|
||||
for (; p < plimit; p++) {
|
||||
|
|
|
|||
244
deps/icu-small/source/common/locid.cpp
vendored
244
deps/icu-small/source/common/locid.cpp
vendored
|
|
@ -254,7 +254,7 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)
|
|||
|
||||
Locale::~Locale()
|
||||
{
|
||||
if (baseName != fullName) {
|
||||
if ((baseName != fullName) && (baseName != fullNameBuffer)) {
|
||||
uprv_free(baseName);
|
||||
}
|
||||
baseName = NULL;
|
||||
|
|
@ -466,7 +466,7 @@ Locale& Locale::operator=(const Locale& other) {
|
|||
}
|
||||
|
||||
Locale& Locale::operator=(Locale&& other) U_NOEXCEPT {
|
||||
if (baseName != fullName) uprv_free(baseName);
|
||||
if ((baseName != fullName) && (baseName != fullNameBuffer)) uprv_free(baseName);
|
||||
if (fullName != fullNameBuffer) uprv_free(fullName);
|
||||
|
||||
if (other.fullName == other.fullNameBuffer) {
|
||||
|
|
@ -524,7 +524,7 @@ static const char* const KNOWN_CANONICALIZED[] = {
|
|||
"km", "km_KH", "kn", "kn_IN", "ko", "ko_KR", "ky", "ky_KG", "lo", "lo_LA",
|
||||
"lt", "lt_LT", "lv", "lv_LV", "mk", "mk_MK", "ml", "ml_IN", "mn", "mn_MN",
|
||||
"mr", "mr_IN", "ms", "ms_MY", "my", "my_MM", "nb", "nb_NO", "ne", "ne_NP",
|
||||
"nl", "nl_NL", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF",
|
||||
"nl", "nl_NL", "no", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF",
|
||||
"pt", "pt_BR", "pt_PT", "ro", "ro_RO", "ru", "ru_RU", "sd", "sd_IN", "si",
|
||||
"si_LK", "sk", "sk_SK", "sl", "sl_SI", "so", "so_SO", "sq", "sq_AL", "sr",
|
||||
"sr_Cyrl_RS", "sr_Latn", "sr_RS", "sv", "sv_SE", "sw", "sw_TZ", "ta",
|
||||
|
|
@ -627,6 +627,17 @@ private:
|
|||
LocalMemory<const char*>& types,
|
||||
LocalMemory<int32_t>& replacementIndexes,
|
||||
int32_t &length, UErrorCode &status);
|
||||
|
||||
// Read the subdivisionAlias data from alias to
|
||||
// strings+types+replacementIndexes
|
||||
// Allocate length items for types, to store the type field.
|
||||
// Allocate length items for replacementIndexes,
|
||||
// to store the index in the strings for the replacement variant.
|
||||
void readSubdivisionAlias(UResourceBundle* alias,
|
||||
UniqueCharStrings* strings,
|
||||
LocalMemory<const char*>& types,
|
||||
LocalMemory<int32_t>& replacementIndexes,
|
||||
int32_t &length, UErrorCode &status);
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -647,6 +658,7 @@ public:
|
|||
const CharStringMap& scriptMap() const { return script; }
|
||||
const CharStringMap& territoryMap() const { return territory; }
|
||||
const CharStringMap& variantMap() const { return variant; }
|
||||
const CharStringMap& subdivisionMap() const { return subdivision; }
|
||||
|
||||
static void U_CALLCONV loadData(UErrorCode &status);
|
||||
static UBool U_CALLCONV cleanup();
|
||||
|
|
@ -658,11 +670,13 @@ private:
|
|||
CharStringMap scriptMap,
|
||||
CharStringMap territoryMap,
|
||||
CharStringMap variantMap,
|
||||
CharStringMap subdivisionMap,
|
||||
CharString* strings)
|
||||
: language(std::move(languageMap)),
|
||||
script(std::move(scriptMap)),
|
||||
territory(std::move(territoryMap)),
|
||||
variant(std::move(variantMap)),
|
||||
subdivision(std::move(subdivisionMap)),
|
||||
strings(strings) {
|
||||
}
|
||||
|
||||
|
|
@ -676,6 +690,7 @@ private:
|
|||
CharStringMap script;
|
||||
CharStringMap territory;
|
||||
CharStringMap variant;
|
||||
CharStringMap subdivision;
|
||||
CharString* strings;
|
||||
|
||||
friend class AliasDataBuilder;
|
||||
|
|
@ -866,6 +881,34 @@ AliasDataBuilder::readVariantAlias(
|
|||
status);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the subdivisionAlias data from alias to strings+types+replacementIndexes.
|
||||
* Allocate length items for types, to store the type field. Allocate length
|
||||
* items for replacementIndexes, to store the index in the strings for the
|
||||
* replacement regions.
|
||||
*/
|
||||
void
|
||||
AliasDataBuilder::readSubdivisionAlias(
|
||||
UResourceBundle* alias,
|
||||
UniqueCharStrings* strings,
|
||||
LocalMemory<const char*>& types,
|
||||
LocalMemory<int32_t>& replacementIndexes,
|
||||
int32_t &length,
|
||||
UErrorCode &status)
|
||||
{
|
||||
return readAlias(
|
||||
alias, strings, types, replacementIndexes, length,
|
||||
#if U_DEBUG
|
||||
[](const char* type) {
|
||||
U_ASSERT(uprv_strlen(type) >= 3 && uprv_strlen(type) <= 8);
|
||||
},
|
||||
#else
|
||||
[](const char*) {},
|
||||
#endif
|
||||
[](const UnicodeString&) { },
|
||||
status);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the alias data from the ICU resource bundles. The alias data
|
||||
* contains alias of language, country, script and variants.
|
||||
|
|
@ -905,12 +948,14 @@ AliasDataBuilder::build(UErrorCode &status) {
|
|||
ures_getByKey(metadataAlias.getAlias(), "territory", nullptr, &status));
|
||||
LocalUResourceBundlePointer variantAlias(
|
||||
ures_getByKey(metadataAlias.getAlias(), "variant", nullptr, &status));
|
||||
LocalUResourceBundlePointer subdivisionAlias(
|
||||
ures_getByKey(metadataAlias.getAlias(), "subdivision", nullptr, &status));
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return nullptr;
|
||||
}
|
||||
int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0,
|
||||
variantLength = 0;
|
||||
variantLength = 0, subdivisionLength = 0;
|
||||
|
||||
// Read the languageAlias into languageTypes, languageReplacementIndexes
|
||||
// and strings
|
||||
|
|
@ -955,6 +1000,16 @@ AliasDataBuilder::build(UErrorCode &status) {
|
|||
variantReplacementIndexes,
|
||||
variantLength, status);
|
||||
|
||||
// Read the subdivisionAlias into subdivisionTypes, subdivisionReplacementIndexes
|
||||
// and strings
|
||||
LocalMemory<const char*> subdivisionTypes;
|
||||
LocalMemory<int32_t> subdivisionReplacementIndexes;
|
||||
readSubdivisionAlias(subdivisionAlias.getAlias(),
|
||||
&strings,
|
||||
subdivisionTypes,
|
||||
subdivisionReplacementIndexes,
|
||||
subdivisionLength, status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
|
@ -994,6 +1049,14 @@ AliasDataBuilder::build(UErrorCode &status) {
|
|||
status);
|
||||
}
|
||||
|
||||
// Build the subdivisionMap from subdivisionTypes & subdivisionReplacementIndexes.
|
||||
CharStringMap subdivisionMap(2, status);
|
||||
for (int32_t i = 0; U_SUCCESS(status) && i < subdivisionLength; i++) {
|
||||
subdivisionMap.put(subdivisionTypes[i],
|
||||
strings.get(subdivisionReplacementIndexes[i]),
|
||||
status);
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
|
@ -1004,6 +1067,7 @@ AliasDataBuilder::build(UErrorCode &status) {
|
|||
std::move(scriptMap),
|
||||
std::move(territoryMap),
|
||||
std::move(variantMap),
|
||||
std::move(subdivisionMap),
|
||||
strings.orphanCharStrings());
|
||||
|
||||
if (data == nullptr) {
|
||||
|
|
@ -1105,6 +1169,14 @@ private:
|
|||
|
||||
// Replace by using variantAlias.
|
||||
bool replaceVariant(UErrorCode& status);
|
||||
|
||||
// Replace by using subdivisionAlias.
|
||||
bool replaceSubdivision(StringPiece subdivision,
|
||||
CharString& output, UErrorCode& status);
|
||||
|
||||
// Replace transformed extensions.
|
||||
bool replaceTransformedExtensions(
|
||||
CharString& transformedExtensions, CharString& output, UErrorCode& status);
|
||||
};
|
||||
|
||||
CharString&
|
||||
|
|
@ -1294,7 +1366,6 @@ AliasReplacer::replaceLanguage(
|
|||
}
|
||||
}
|
||||
if (replacedExtensions != nullptr) {
|
||||
// TODO(ICU-21292)
|
||||
// DO NOTHING
|
||||
// UTS35 does not specifiy what should we do if we have extensions in the
|
||||
// replacement. Currently we know only the following 4 "BCP47 LegacyRules" have
|
||||
|
|
@ -1435,6 +1506,106 @@ AliasReplacer::replaceVariant(UErrorCode& status)
|
|||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
AliasReplacer::replaceSubdivision(
|
||||
StringPiece subdivision, CharString& output, UErrorCode& status)
|
||||
{
|
||||
if (U_FAILURE(status)) {
|
||||
return false;
|
||||
}
|
||||
const char *replacement = data->subdivisionMap().get(subdivision.data());
|
||||
if (replacement != nullptr) {
|
||||
const char* firstSpace = uprv_strchr(replacement, ' ');
|
||||
// Found replacement data for this subdivision.
|
||||
size_t len = (firstSpace != nullptr) ?
|
||||
(firstSpace - replacement) : uprv_strlen(replacement);
|
||||
if (2 <= len && len <= 8) {
|
||||
output.append(replacement, (int32_t)len, status);
|
||||
if (2 == len) {
|
||||
// Add 'zzzz' based on changes to UTS #35 for CLDR-14312.
|
||||
output.append("zzzz", 4, status);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
AliasReplacer::replaceTransformedExtensions(
|
||||
CharString& transformedExtensions, CharString& output, UErrorCode& status)
|
||||
{
|
||||
// The content of the transformedExtensions will be modified in this
|
||||
// function to NULL-terminating (tkey-tvalue) pairs.
|
||||
if (U_FAILURE(status)) {
|
||||
return false;
|
||||
}
|
||||
int32_t len = transformedExtensions.length();
|
||||
const char* str = transformedExtensions.data();
|
||||
const char* tkey = ultag_getTKeyStart(str);
|
||||
int32_t tlangLen = (tkey == str) ? 0 :
|
||||
((tkey == nullptr) ? len : static_cast<int32_t>((tkey - str - 1)));
|
||||
CharStringByteSink sink(&output);
|
||||
if (tlangLen > 0) {
|
||||
Locale tlang = LocaleBuilder()
|
||||
.setLanguageTag(StringPiece(str, tlangLen))
|
||||
.build(status);
|
||||
tlang.canonicalize(status);
|
||||
tlang.toLanguageTag(sink, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return false;
|
||||
}
|
||||
T_CString_toLowerCase(output.data());
|
||||
}
|
||||
if (tkey != nullptr) {
|
||||
// We need to sort the tfields by tkey
|
||||
UVector tfields(status);
|
||||
if (U_FAILURE(status)) {
|
||||
return false;
|
||||
}
|
||||
do {
|
||||
const char* tvalue = uprv_strchr(tkey, '-');
|
||||
if (tvalue == nullptr) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
const char* nextTKey = ultag_getTKeyStart(tvalue);
|
||||
if (nextTKey != nullptr) {
|
||||
*((char*)(nextTKey-1)) = '\0'; // NULL terminate tvalue
|
||||
}
|
||||
tfields.insertElementAt((void*)tkey, tfields.size(), status);
|
||||
if (U_FAILURE(status)) {
|
||||
return false;
|
||||
}
|
||||
tkey = nextTKey;
|
||||
} while (tkey != nullptr);
|
||||
tfields.sort([](UElement e1, UElement e2) -> int8_t {
|
||||
// uprv_strcmp return int and in some platform, such as arm64-v8a,
|
||||
// it may return positive values > 127 which cause the casted value
|
||||
// of int8_t negative.
|
||||
int res = uprv_strcmp(
|
||||
(const char*)e1.pointer, (const char*)e2.pointer);
|
||||
return (res == 0) ? 0 : ((res > 0) ? 1 : -1);
|
||||
}, status);
|
||||
for (int32_t i = 0; i < tfields.size(); i++) {
|
||||
if (output.length() > 0) {
|
||||
output.append('-', status);
|
||||
}
|
||||
const char* tfield = (const char*) tfields.elementAt(i);
|
||||
const char* tvalue = uprv_strchr(tfield, '-');
|
||||
// Split the "tkey-tvalue" pair string so that we can canonicalize the tvalue.
|
||||
U_ASSERT(tvalue != nullptr);
|
||||
*((char*)tvalue++) = '\0'; // NULL terminate tkey
|
||||
output.append(tfield, status).append('-', status);
|
||||
const char* bcpTValue = ulocimp_toBcpType(tfield, tvalue, nullptr, nullptr);
|
||||
output.append((bcpTValue == nullptr) ? tvalue : bcpTValue, status);
|
||||
}
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
CharString&
|
||||
AliasReplacer::outputToString(
|
||||
CharString& out, UErrorCode status)
|
||||
|
|
@ -1453,8 +1624,12 @@ AliasReplacer::outputToString(
|
|||
out.append(SEP_CHAR, status);
|
||||
}
|
||||
variants.sort([](UElement e1, UElement e2) -> int8_t {
|
||||
return uprv_strcmp(
|
||||
// uprv_strcmp return int and in some platform, such as arm64-v8a,
|
||||
// it may return positive values > 127 which cause the casted value
|
||||
// of int8_t negative.
|
||||
int res = uprv_strcmp(
|
||||
(const char*)e1.pointer, (const char*)e2.pointer);
|
||||
return (res == 0) ? 0 : ((res > 0) ? 1 : -1);
|
||||
}, status);
|
||||
int32_t variantsStart = out.length();
|
||||
for (int32_t i = 0; i < variants.size(); i++) {
|
||||
|
|
@ -1497,7 +1672,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
|
|||
region = nullptr;
|
||||
}
|
||||
const char* variantsStr = locale.getVariant();
|
||||
const char* extensionsStr = locale_getKeywordsStart(locale.getName());
|
||||
CharString variantsBuff(variantsStr, -1, status);
|
||||
if (!variantsBuff.isEmpty()) {
|
||||
if (U_FAILURE(status)) { return false; }
|
||||
|
|
@ -1516,8 +1690,12 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
|
|||
|
||||
// Sort the variants
|
||||
variants.sort([](UElement e1, UElement e2) -> int8_t {
|
||||
return uprv_strcmp(
|
||||
// uprv_strcmp return int and in some platform, such as arm64-v8a,
|
||||
// it may return positive values > 127 which cause the casted value
|
||||
// of int8_t negative.
|
||||
int res = uprv_strcmp(
|
||||
(const char*)e1.pointer, (const char*)e2.pointer);
|
||||
return (res == 0) ? 0 : ((res > 0) ? 1 : -1);
|
||||
}, status);
|
||||
|
||||
// A changed count to assert when loop too many times.
|
||||
|
|
@ -1561,11 +1739,52 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
|
|||
if (U_FAILURE(status)) { return false; }
|
||||
// Nothing changed and we know the order of the vaiants are not change
|
||||
// because we have no variant or only one.
|
||||
if (changed == 0 && variants.size() <= 1) {
|
||||
const char* extensionsStr = locale_getKeywordsStart(locale.getName());
|
||||
if (changed == 0 && variants.size() <= 1 && extensionsStr == nullptr) {
|
||||
return false;
|
||||
}
|
||||
outputToString(out, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return false;
|
||||
}
|
||||
if (extensionsStr != nullptr) {
|
||||
changed = 0;
|
||||
Locale temp(locale);
|
||||
LocalPointer<icu::StringEnumeration> iter(locale.createKeywords(status));
|
||||
if (U_SUCCESS(status) && !iter.isNull()) {
|
||||
const char* key;
|
||||
while ((key = iter->next(nullptr, status)) != nullptr) {
|
||||
if (uprv_strcmp("sd", key) == 0 || uprv_strcmp("rg", key) == 0 ||
|
||||
uprv_strcmp("t", key) == 0) {
|
||||
CharString value;
|
||||
CharStringByteSink valueSink(&value);
|
||||
locale.getKeywordValue(key, valueSink, status);
|
||||
if (U_FAILURE(status)) {
|
||||
status = U_ZERO_ERROR;
|
||||
continue;
|
||||
}
|
||||
CharString replacement;
|
||||
if (uprv_strlen(key) == 2) {
|
||||
if (replaceSubdivision(value.toStringPiece(), replacement, status)) {
|
||||
changed++;
|
||||
temp.setKeywordValue(key, replacement.data(), status);
|
||||
}
|
||||
} else {
|
||||
U_ASSERT(uprv_strcmp(key, "t") == 0);
|
||||
if (replaceTransformedExtensions(value, replacement, status)) {
|
||||
changed++;
|
||||
temp.setKeywordValue(key, replacement.data(), status);
|
||||
}
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (changed != 0) {
|
||||
extensionsStr = locale_getKeywordsStart(temp.getName());
|
||||
}
|
||||
out.append(extensionsStr, status);
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
|
|
@ -1573,8 +1792,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
|
|||
}
|
||||
// If the tag is not changed, return.
|
||||
if (uprv_strcmp(out.data(), locale.getName()) == 0) {
|
||||
U_ASSERT(changed == 0);
|
||||
U_ASSERT(variants.size() > 1);
|
||||
out.clear();
|
||||
return false;
|
||||
}
|
||||
|
|
@ -1636,7 +1853,7 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
|
|||
{
|
||||
fIsBogus = FALSE;
|
||||
/* Free our current storage */
|
||||
if (baseName != fullName) {
|
||||
if ((baseName != fullName) && (baseName != fullNameBuffer)) {
|
||||
uprv_free(baseName);
|
||||
}
|
||||
baseName = NULL;
|
||||
|
|
@ -1672,6 +1889,7 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
|
|||
uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err);
|
||||
|
||||
if(err == U_BUFFER_OVERFLOW_ERROR || length >= (int32_t)sizeof(fullNameBuffer)) {
|
||||
U_ASSERT(baseName == nullptr);
|
||||
/*Go to heap for the fullName if necessary*/
|
||||
fullName = (char *)uprv_malloc(sizeof(char)*(length + 1));
|
||||
if(fullName == 0) {
|
||||
|
|
@ -1825,7 +2043,7 @@ Locale::hashCode() const
|
|||
void
|
||||
Locale::setToBogus() {
|
||||
/* Free our current storage */
|
||||
if(baseName != fullName) {
|
||||
if((baseName != fullName) && (baseName != fullNameBuffer)) {
|
||||
uprv_free(baseName);
|
||||
}
|
||||
baseName = NULL;
|
||||
|
|
|
|||
|
|
@ -320,7 +320,8 @@ XLikelySubtags::~XLikelySubtags() {
|
|||
LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const {
|
||||
const char *name = locale.getName();
|
||||
if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=")
|
||||
// Private use language tag x-subtag-subtag...
|
||||
// Private use language tag x-subtag-subtag... which CLDR changes to
|
||||
// und-x-subtag-subtag...
|
||||
return LSR(name, "", "", LSR::EXPLICIT_LSR);
|
||||
}
|
||||
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
|
||||
|
|
|
|||
82
deps/icu-small/source/common/norm2allmodes.h
vendored
82
deps/icu-small/source/common/norm2allmodes.h
vendored
|
|
@ -38,7 +38,7 @@ public:
|
|||
virtual UnicodeString &
|
||||
normalize(const UnicodeString &src,
|
||||
UnicodeString &dest,
|
||||
UErrorCode &errorCode) const {
|
||||
UErrorCode &errorCode) const U_OVERRIDE {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
dest.setToBogus();
|
||||
return dest;
|
||||
|
|
@ -64,13 +64,13 @@ public:
|
|||
virtual UnicodeString &
|
||||
normalizeSecondAndAppend(UnicodeString &first,
|
||||
const UnicodeString &second,
|
||||
UErrorCode &errorCode) const {
|
||||
UErrorCode &errorCode) const U_OVERRIDE {
|
||||
return normalizeSecondAndAppend(first, second, true, errorCode);
|
||||
}
|
||||
virtual UnicodeString &
|
||||
append(UnicodeString &first,
|
||||
const UnicodeString &second,
|
||||
UErrorCode &errorCode) const {
|
||||
UErrorCode &errorCode) const U_OVERRIDE {
|
||||
return normalizeSecondAndAppend(first, second, false, errorCode);
|
||||
}
|
||||
UnicodeString &
|
||||
|
|
@ -107,7 +107,7 @@ public:
|
|||
UnicodeString &safeMiddle,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
|
||||
virtual UBool
|
||||
getDecomposition(UChar32 c, UnicodeString &decomposition) const {
|
||||
getDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE {
|
||||
UChar buffer[4];
|
||||
int32_t length;
|
||||
const UChar *d=impl.getDecomposition(c, buffer, length);
|
||||
|
|
@ -122,7 +122,7 @@ public:
|
|||
return true;
|
||||
}
|
||||
virtual UBool
|
||||
getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
|
||||
getRawDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE {
|
||||
UChar buffer[30];
|
||||
int32_t length;
|
||||
const UChar *d=impl.getRawDecomposition(c, buffer, length);
|
||||
|
|
@ -137,18 +137,18 @@ public:
|
|||
return true;
|
||||
}
|
||||
virtual UChar32
|
||||
composePair(UChar32 a, UChar32 b) const {
|
||||
composePair(UChar32 a, UChar32 b) const U_OVERRIDE {
|
||||
return impl.composePair(a, b);
|
||||
}
|
||||
|
||||
virtual uint8_t
|
||||
getCombiningClass(UChar32 c) const {
|
||||
getCombiningClass(UChar32 c) const U_OVERRIDE {
|
||||
return impl.getCC(impl.getNorm16(c));
|
||||
}
|
||||
|
||||
// quick checks
|
||||
virtual UBool
|
||||
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -161,11 +161,11 @@ public:
|
|||
return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
|
||||
}
|
||||
virtual UNormalizationCheckResult
|
||||
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
|
||||
return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
|
||||
}
|
||||
virtual int32_t
|
||||
spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -194,27 +194,57 @@ public:
|
|||
private:
|
||||
virtual void
|
||||
normalize(const UChar *src, const UChar *limit,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
|
||||
impl.decompose(src, limit, &buffer, errorCode);
|
||||
}
|
||||
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
|
||||
virtual void
|
||||
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
|
||||
UnicodeString &safeMiddle,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
|
||||
impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
|
||||
}
|
||||
|
||||
void
|
||||
normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
|
||||
Edits *edits, UErrorCode &errorCode) const U_OVERRIDE {
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
|
||||
edits->reset();
|
||||
}
|
||||
const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
|
||||
impl.decomposeUTF8(options, s, s + src.length(), &sink, edits, errorCode);
|
||||
sink.Flush();
|
||||
}
|
||||
virtual UBool
|
||||
isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return false;
|
||||
}
|
||||
const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
|
||||
const uint8_t *sLimit = s + sp.length();
|
||||
return sLimit == impl.decomposeUTF8(0, s, sLimit, nullptr, nullptr, errorCode);
|
||||
}
|
||||
|
||||
virtual const UChar *
|
||||
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
|
||||
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const U_OVERRIDE {
|
||||
return impl.decompose(src, limit, NULL, errorCode);
|
||||
}
|
||||
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
|
||||
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
|
||||
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE {
|
||||
return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
|
||||
}
|
||||
virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundaryBefore(c); }
|
||||
virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundaryAfter(c); }
|
||||
virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
|
||||
virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE {
|
||||
return impl.hasDecompBoundaryBefore(c);
|
||||
}
|
||||
virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE {
|
||||
return impl.hasDecompBoundaryAfter(c);
|
||||
}
|
||||
virtual UBool isInert(UChar32 c) const U_OVERRIDE {
|
||||
return impl.isDecompInert(c);
|
||||
}
|
||||
};
|
||||
|
||||
class ComposeNormalizer2 : public Normalizer2WithImpl {
|
||||
|
|
@ -321,24 +351,30 @@ public:
|
|||
private:
|
||||
virtual void
|
||||
normalize(const UChar *src, const UChar *limit,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
|
||||
impl.makeFCD(src, limit, &buffer, errorCode);
|
||||
}
|
||||
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
|
||||
virtual void
|
||||
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
|
||||
UnicodeString &safeMiddle,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
|
||||
impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
|
||||
}
|
||||
virtual const UChar *
|
||||
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
|
||||
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const U_OVERRIDE {
|
||||
return impl.makeFCD(src, limit, NULL, errorCode);
|
||||
}
|
||||
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
|
||||
virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
|
||||
virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
|
||||
virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
|
||||
virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE {
|
||||
return impl.hasFCDBoundaryBefore(c);
|
||||
}
|
||||
virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE {
|
||||
return impl.hasFCDBoundaryAfter(c);
|
||||
}
|
||||
virtual UBool isInert(UChar32 c) const U_OVERRIDE {
|
||||
return impl.isFCDInert(c);
|
||||
}
|
||||
};
|
||||
|
||||
struct Norm2AllModes : public UMemory {
|
||||
|
|
|
|||
152
deps/icu-small/source/common/normalizer2impl.cpp
vendored
152
deps/icu-small/source/common/normalizer2impl.cpp
vendored
|
|
@ -731,9 +731,131 @@ UBool Normalizer2Impl::decompose(UChar32 c, uint16_t norm16,
|
|||
return buffer.append((const UChar *)mapping+1, length, TRUE, leadCC, trailCC, errorCode);
|
||||
}
|
||||
|
||||
// Dual functionality:
|
||||
// sink != nullptr: normalize
|
||||
// sink == nullptr: isNormalized/spanQuickCheckYes
|
||||
const uint8_t *
|
||||
Normalizer2Impl::decomposeUTF8(uint32_t options,
|
||||
const uint8_t *src, const uint8_t *limit,
|
||||
ByteSink *sink, Edits *edits, UErrorCode &errorCode) const {
|
||||
U_ASSERT(limit != nullptr);
|
||||
UnicodeString s16;
|
||||
uint8_t minNoLead = leadByteForCP(minDecompNoCP);
|
||||
|
||||
const uint8_t *prevBoundary = src;
|
||||
// only for quick check
|
||||
uint8_t prevCC = 0;
|
||||
|
||||
for (;;) {
|
||||
// Fast path: Scan over a sequence of characters below the minimum "no" code point,
|
||||
// or with (decompYes && ccc==0) properties.
|
||||
const uint8_t *fastStart = src;
|
||||
const uint8_t *prevSrc;
|
||||
uint16_t norm16 = 0;
|
||||
|
||||
for (;;) {
|
||||
if (src == limit) {
|
||||
if (prevBoundary != limit && sink != nullptr) {
|
||||
ByteSinkUtil::appendUnchanged(prevBoundary, limit,
|
||||
*sink, options, edits, errorCode);
|
||||
}
|
||||
return src;
|
||||
}
|
||||
if (*src < minNoLead) {
|
||||
++src;
|
||||
} else {
|
||||
prevSrc = src;
|
||||
UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);
|
||||
if (!isMostDecompYesAndZeroCC(norm16)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// isMostDecompYesAndZeroCC(norm16) is false, that is, norm16>=minYesNo,
|
||||
// and the current character at [prevSrc..src[ is not a common case with cc=0
|
||||
// (MIN_NORMAL_MAYBE_YES or JAMO_VT).
|
||||
// It could still be a maybeYes with cc=0.
|
||||
if (prevSrc != fastStart) {
|
||||
// The fast path looped over yes/0 characters before the current one.
|
||||
if (sink != nullptr &&
|
||||
!ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
|
||||
*sink, options, edits, errorCode)) {
|
||||
break;
|
||||
}
|
||||
prevBoundary = prevSrc;
|
||||
prevCC = 0;
|
||||
}
|
||||
|
||||
// Medium-fast path: Quick check.
|
||||
if (isMaybeOrNonZeroCC(norm16)) {
|
||||
// Does not decompose.
|
||||
uint8_t cc = getCCFromYesOrMaybe(norm16);
|
||||
if (prevCC <= cc || cc == 0) {
|
||||
prevCC = cc;
|
||||
if (cc <= 1) {
|
||||
if (sink != nullptr &&
|
||||
!ByteSinkUtil::appendUnchanged(prevBoundary, src,
|
||||
*sink, options, edits, errorCode)) {
|
||||
break;
|
||||
}
|
||||
prevBoundary = src;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (sink == nullptr) {
|
||||
return prevBoundary; // quick check: "no" or cc out of order
|
||||
}
|
||||
|
||||
// Slow path
|
||||
// Decompose up to and including the current character.
|
||||
if (prevBoundary != prevSrc && norm16HasDecompBoundaryBefore(norm16)) {
|
||||
if (!ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
|
||||
*sink, options, edits, errorCode)) {
|
||||
break;
|
||||
}
|
||||
prevBoundary = prevSrc;
|
||||
}
|
||||
ReorderingBuffer buffer(*this, s16, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
break;
|
||||
}
|
||||
decomposeShort(prevBoundary, src, STOP_AT_LIMIT, FALSE /* onlyContiguous */,
|
||||
buffer, errorCode);
|
||||
// Decompose until the next boundary.
|
||||
if (buffer.getLastCC() > 1) {
|
||||
src = decomposeShort(src, limit, STOP_AT_DECOMP_BOUNDARY, FALSE /* onlyContiguous */,
|
||||
buffer, errorCode);
|
||||
}
|
||||
if (U_FAILURE(errorCode)) {
|
||||
break;
|
||||
}
|
||||
if ((src - prevSrc) > INT32_MAX) { // guard before buffer.equals()
|
||||
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
break;
|
||||
}
|
||||
// We already know there was a change if the original character decomposed;
|
||||
// otherwise compare.
|
||||
if (isMaybeOrNonZeroCC(norm16) && buffer.equals(prevBoundary, src)) {
|
||||
if (!ByteSinkUtil::appendUnchanged(prevBoundary, src,
|
||||
*sink, options, edits, errorCode)) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (!ByteSinkUtil::appendChange(prevBoundary, src, buffer.getStart(), buffer.length(),
|
||||
*sink, edits, errorCode)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
prevBoundary = src;
|
||||
prevCC = 0;
|
||||
}
|
||||
return src;
|
||||
}
|
||||
|
||||
const uint8_t *
|
||||
Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
|
||||
UBool stopAtCompBoundary, UBool onlyContiguous,
|
||||
StopAt stopAt, UBool onlyContiguous,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return nullptr;
|
||||
|
|
@ -746,21 +868,28 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
|
|||
UChar32 c = U_SENTINEL;
|
||||
if (norm16 >= limitNoNo) {
|
||||
if (isMaybeOrNonZeroCC(norm16)) {
|
||||
// No boundaries around this character.
|
||||
// No comp boundaries around this character.
|
||||
uint8_t cc = getCCFromYesOrMaybe(norm16);
|
||||
if (cc == 0 && stopAt == STOP_AT_DECOMP_BOUNDARY) {
|
||||
return prevSrc;
|
||||
}
|
||||
c = codePointFromValidUTF8(prevSrc, src);
|
||||
if (!buffer.append(c, getCCFromYesOrMaybe(norm16), errorCode)) {
|
||||
if (!buffer.append(c, cc, errorCode)) {
|
||||
return nullptr;
|
||||
}
|
||||
if (stopAt == STOP_AT_DECOMP_BOUNDARY && buffer.getLastCC() <= 1) {
|
||||
return src;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// Maps to an isCompYesAndZeroCC.
|
||||
if (stopAtCompBoundary) {
|
||||
if (stopAt != STOP_AT_LIMIT) {
|
||||
return prevSrc;
|
||||
}
|
||||
c = codePointFromValidUTF8(prevSrc, src);
|
||||
c = mapAlgorithmic(c, norm16);
|
||||
norm16 = getRawNorm16(c);
|
||||
} else if (stopAtCompBoundary && norm16 < minNoNoCompNoMaybeCC) {
|
||||
} else if (stopAt != STOP_AT_LIMIT && norm16 < minNoNoCompNoMaybeCC) {
|
||||
return prevSrc;
|
||||
}
|
||||
// norm16!=INERT guarantees that [prevSrc, src[ is valid UTF-8.
|
||||
|
|
@ -768,7 +897,8 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
|
|||
// its norm16==INERT is normalization-inert,
|
||||
// so it gets copied unchanged in the fast path,
|
||||
// and we stop the slow path where invalid UTF-8 begins.
|
||||
U_ASSERT(norm16 != INERT);
|
||||
// c >= 0 is the result of an algorithmic mapping.
|
||||
U_ASSERT(c >= 0 || norm16 != INERT);
|
||||
if (norm16 < minYesNo) {
|
||||
if (c < 0) {
|
||||
c = codePointFromValidUTF8(prevSrc, src);
|
||||
|
|
@ -798,11 +928,15 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
|
|||
} else {
|
||||
leadCC = 0;
|
||||
}
|
||||
if (leadCC == 0 && stopAt == STOP_AT_DECOMP_BOUNDARY) {
|
||||
return prevSrc;
|
||||
}
|
||||
if (!buffer.append((const char16_t *)mapping+1, length, TRUE, leadCC, trailCC, errorCode)) {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
if (stopAtCompBoundary && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
|
||||
if ((stopAt == STOP_AT_COMP_BOUNDARY && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) ||
|
||||
(stopAt == STOP_AT_DECOMP_BOUNDARY && buffer.getLastCC() <= 1)) {
|
||||
return src;
|
||||
}
|
||||
}
|
||||
|
|
@ -1954,10 +2088,10 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
|
|||
break;
|
||||
}
|
||||
// We know there is not a boundary here.
|
||||
decomposeShort(prevSrc, src, FALSE /* !stopAtCompBoundary */, onlyContiguous,
|
||||
decomposeShort(prevSrc, src, STOP_AT_LIMIT, onlyContiguous,
|
||||
buffer, errorCode);
|
||||
// Decompose until the next boundary.
|
||||
src = decomposeShort(src, limit, TRUE /* stopAtCompBoundary */, onlyContiguous,
|
||||
src = decomposeShort(src, limit, STOP_AT_COMP_BOUNDARY, onlyContiguous,
|
||||
buffer, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
break;
|
||||
|
|
|
|||
11
deps/icu-small/source/common/normalizer2impl.h
vendored
11
deps/icu-small/source/common/normalizer2impl.h
vendored
|
|
@ -491,6 +491,12 @@ public:
|
|||
UnicodeString &safeMiddle,
|
||||
ReorderingBuffer &buffer,
|
||||
UErrorCode &errorCode) const;
|
||||
|
||||
/** sink==nullptr: isNormalized()/spanQuickCheckYes() */
|
||||
const uint8_t *decomposeUTF8(uint32_t options,
|
||||
const uint8_t *src, const uint8_t *limit,
|
||||
ByteSink *sink, Edits *edits, UErrorCode &errorCode) const;
|
||||
|
||||
UBool compose(const UChar *src, const UChar *limit,
|
||||
UBool onlyContiguous,
|
||||
UBool doCompose,
|
||||
|
|
@ -649,6 +655,9 @@ private:
|
|||
UChar32 minNeedDataCP,
|
||||
ReorderingBuffer *buffer,
|
||||
UErrorCode &errorCode) const;
|
||||
|
||||
enum StopAt { STOP_AT_LIMIT, STOP_AT_DECOMP_BOUNDARY, STOP_AT_COMP_BOUNDARY };
|
||||
|
||||
const UChar *decomposeShort(const UChar *src, const UChar *limit,
|
||||
UBool stopAtCompBoundary, UBool onlyContiguous,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const;
|
||||
|
|
@ -656,7 +665,7 @@ private:
|
|||
ReorderingBuffer &buffer, UErrorCode &errorCode) const;
|
||||
|
||||
const uint8_t *decomposeShort(const uint8_t *src, const uint8_t *limit,
|
||||
UBool stopAtCompBoundary, UBool onlyContiguous,
|
||||
StopAt stopAt, UBool onlyContiguous,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const;
|
||||
|
||||
static int32_t combine(const uint16_t *list, UChar32 trail);
|
||||
|
|
|
|||
2
deps/icu-small/source/common/pluralmap.h
vendored
2
deps/icu-small/source/common/pluralmap.h
vendored
|
|
@ -24,7 +24,7 @@ class U_COMMON_API PluralMapBase : public UMemory {
|
|||
public:
|
||||
/**
|
||||
* The names of all the plural categories. NONE is not an actual plural
|
||||
* category, but rather represents the absense of a plural category.
|
||||
* category, but rather represents the absence of a plural category.
|
||||
*/
|
||||
enum Category {
|
||||
NONE = -1,
|
||||
|
|
|
|||
4
deps/icu-small/source/common/putil.cpp
vendored
4
deps/icu-small/source/common/putil.cpp
vendored
|
|
@ -1139,7 +1139,7 @@ uprv_tzname(int n)
|
|||
#endif
|
||||
if (tzid != NULL && isValidOlsonID(tzid)
|
||||
#if U_PLATFORM == U_PF_SOLARIS
|
||||
/* When TZ equals localtime on Solaris, check the /etc/localtime file. */
|
||||
/* Don't misinterpret TZ "localtime" on Solaris as a time zone name. */
|
||||
&& uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
|
||||
#endif
|
||||
) {
|
||||
|
|
@ -1361,7 +1361,7 @@ uprv_pathIsAbsolute(const char *path)
|
|||
|
||||
/* Backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
|
||||
(needed for some Darwin ICU build environments) */
|
||||
#if U_PLATFORM_IS_DARWIN_BASED && TARGET_OS_SIMULATOR
|
||||
#if U_PLATFORM_IS_DARWIN_BASED && defined(TARGET_OS_SIMULATOR) && TARGET_OS_SIMULATOR
|
||||
# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
|
||||
# define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
|
||||
# endif
|
||||
|
|
|
|||
2
deps/icu-small/source/common/putilimp.h
vendored
2
deps/icu-small/source/common/putilimp.h
vendored
|
|
@ -527,7 +527,7 @@ U_CAPI void * U_EXPORT2 uprv_maximumPtr(void *base);
|
|||
* on the destination pointer and capacity cannot overflow.
|
||||
*
|
||||
* The pinned capacity must fulfill the following conditions (for positive capacities):
|
||||
* - dest + capacity is a valid pointer according to the machine arcitecture (AS/400, 64-bit, etc.)
|
||||
* - dest + capacity is a valid pointer according to the machine architecture (AS/400, 64-bit, etc.)
|
||||
* - (dest + capacity) >= dest
|
||||
* - The size (in bytes) of T[capacity] does not exceed 0x7fffffff
|
||||
*
|
||||
|
|
|
|||
2
deps/icu-small/source/common/rbbi.cpp
vendored
2
deps/icu-small/source/common/rbbi.cpp
vendored
|
|
@ -812,7 +812,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
|
|||
}
|
||||
#endif
|
||||
|
||||
// handleNext alway sets the break tag value.
|
||||
// handleNext always sets the break tag value.
|
||||
// Set the default for it.
|
||||
fRuleStatusIndex = 0;
|
||||
|
||||
|
|
|
|||
2
deps/icu-small/source/common/rbbi_cache.cpp
vendored
2
deps/icu-small/source/common/rbbi_cache.cpp
vendored
|
|
@ -258,7 +258,7 @@ void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode
|
|||
previous(status);
|
||||
} else {
|
||||
// seek() leaves the BreakCache positioned at the preceding boundary
|
||||
// if the requested position is between two bounaries.
|
||||
// if the requested position is between two boundaries.
|
||||
// current() pushes the BreakCache position out to the BreakIterator itself.
|
||||
U_ASSERT(startPos > fTextIdx);
|
||||
current();
|
||||
|
|
|
|||
6
deps/icu-small/source/common/rbbiscan.cpp
vendored
6
deps/icu-small/source/common/rbbiscan.cpp
vendored
|
|
@ -284,7 +284,7 @@ UBool RBBIRuleScanner::doParseActions(int32_t action)
|
|||
|
||||
case doEndAssign:
|
||||
{
|
||||
// We have reached the end of an assignement statement.
|
||||
// We have reached the end of an assignment statement.
|
||||
// Current scan char is the ';' that terminates the assignment.
|
||||
|
||||
// Terminate expression, leaves expression parse tree rooted in TOS node.
|
||||
|
|
@ -856,6 +856,10 @@ UChar32 RBBIRuleScanner::nextCharLL() {
|
|||
return (UChar32)-1;
|
||||
}
|
||||
ch = fRB->fRules.char32At(fNextIndex);
|
||||
if (U_IS_SURROGATE(ch)) {
|
||||
error(U_ILLEGAL_CHAR_FOUND);
|
||||
return U_SENTINEL;
|
||||
}
|
||||
fNextIndex = fRB->fRules.moveIndex32(fNextIndex, 1);
|
||||
|
||||
if (ch == chCR ||
|
||||
|
|
|
|||
2
deps/icu-small/source/common/rbbitblb.cpp
vendored
2
deps/icu-small/source/common/rbbitblb.cpp
vendored
|
|
@ -151,7 +151,7 @@ void RBBITableBuilder::buildForwardTable() {
|
|||
//
|
||||
// calculate the functions nullable, firstpos, lastpos and followpos on
|
||||
// nodes in the parse tree.
|
||||
// See the alogrithm description in Aho.
|
||||
// See the algorithm description in Aho.
|
||||
// Understanding how this works by looking at the code alone will be
|
||||
// nearly impossible.
|
||||
//
|
||||
|
|
|
|||
6
deps/icu-small/source/common/resource.h
vendored
6
deps/icu-small/source/common/resource.h
vendored
|
|
@ -274,8 +274,10 @@ public:
|
|||
*
|
||||
* @param key The key string of the enumeration-start resource.
|
||||
* Empty if the enumeration starts at the top level of the bundle.
|
||||
* @param value Call getArray() or getTable() as appropriate.
|
||||
* Then reuse for output values from Array and Table getters.
|
||||
* @param value Call getArray() or getTable() as appropriate. Then reuse for
|
||||
* output values from Array and Table getters. Note: ResourceTable and
|
||||
* ResourceArray instances must outlive the ResourceValue instance for
|
||||
* ResourceTracer to be happy.
|
||||
* @param noFallback true if the bundle has no parent;
|
||||
* that is, its top-level table has the nofallback attribute,
|
||||
* or it is the root bundle of a locale tree.
|
||||
|
|
|
|||
3
deps/icu-small/source/common/restrace.cpp
vendored
3
deps/icu-small/source/common/restrace.cpp
vendored
|
|
@ -54,6 +54,9 @@ void ResourceTracer::traceOpen() const {
|
|||
|
||||
CharString& ResourceTracer::getFilePath(CharString& output, UErrorCode& status) const {
|
||||
if (fResB) {
|
||||
// Note: if you get a segfault around here, check that ResourceTable and
|
||||
// ResourceArray instances outlive ResourceValue instances referring to
|
||||
// their contents:
|
||||
output.append(fResB->fData->fPath, status);
|
||||
output.append('/', status);
|
||||
output.append(fResB->fData->fName, status);
|
||||
|
|
|
|||
4
deps/icu-small/source/common/servnotf.h
vendored
4
deps/icu-small/source/common/servnotf.h
vendored
|
|
@ -82,7 +82,7 @@ public:
|
|||
/**
|
||||
* Add a listener to be notified when notifyChanged is called.
|
||||
* The listener must not be null. AcceptsListener must return
|
||||
* true for the listener. Attempts to concurrently
|
||||
* true for the listener. Attempts to concurrently
|
||||
* register the identical listener more than once will be
|
||||
* silently ignored.
|
||||
*/
|
||||
|
|
@ -90,7 +90,7 @@ public:
|
|||
|
||||
/**
|
||||
* Stop notifying this listener. The listener must
|
||||
* not be null. Attemps to remove a listener that is
|
||||
* not be null. Attempts to remove a listener that is
|
||||
* not registered will be silently ignored.
|
||||
*/
|
||||
virtual void removeListener(const EventListener* l, UErrorCode& status);
|
||||
|
|
|
|||
12
deps/icu-small/source/common/ubrk.cpp
vendored
12
deps/icu-small/source/common/ubrk.cpp
vendored
|
|
@ -174,6 +174,18 @@ ubrk_safeClone(
|
|||
return (UBreakIterator *)newBI;
|
||||
}
|
||||
|
||||
U_CAPI UBreakIterator * U_EXPORT2
|
||||
ubrk_clone(const UBreakIterator *bi, UErrorCode *status) {
|
||||
if (U_FAILURE(*status)) {
|
||||
return nullptr;
|
||||
}
|
||||
BreakIterator *newBI = ((BreakIterator *)bi)->clone();
|
||||
if (newBI == nullptr) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
return (UBreakIterator *)newBI;
|
||||
}
|
||||
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
|
|
|
|||
2
deps/icu-small/source/common/ucase.cpp
vendored
2
deps/icu-small/source/common/ucase.cpp
vendored
|
|
@ -681,7 +681,7 @@ ucase_isCaseSensitive(UChar32 c) {
|
|||
* - In [CoreProps], C has one of the properties Uppercase, or Lowercase
|
||||
* - Given D = NFD(C), then it is not the case that:
|
||||
* D = UCD_lower(D) = UCD_upper(D) = UCD_title(D)
|
||||
* (This third criterium does not add any characters to the list
|
||||
* (This third criterion does not add any characters to the list
|
||||
* for Unicode 3.2. Ignored.)
|
||||
*
|
||||
* D2. A character C is defined to be case-ignorable
|
||||
|
|
|
|||
38
deps/icu-small/source/common/uchar.cpp
vendored
38
deps/icu-small/source/common/uchar.cpp
vendored
|
|
@ -194,7 +194,7 @@ u_isISOControl(UChar32 c) {
|
|||
|
||||
/* Some control characters that are used as space. */
|
||||
#define IS_THAT_CONTROL_SPACE(c) \
|
||||
(c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL))
|
||||
(c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==0x85))
|
||||
|
||||
/* Java has decided that U+0085 New Line is not whitespace any more. */
|
||||
#define IS_THAT_ASCII_CONTROL_SPACE(c) \
|
||||
|
|
@ -677,14 +677,14 @@ uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
|
|||
sa->add(sa->set, CR+1); /* range TAB..CR */
|
||||
sa->add(sa->set, 0x1c);
|
||||
sa->add(sa->set, 0x1f+1);
|
||||
USET_ADD_CP_AND_NEXT(sa, NL);
|
||||
USET_ADD_CP_AND_NEXT(sa, 0x85); // NEXT LINE (NEL)
|
||||
|
||||
/* add for u_isIDIgnorable() what was not added above */
|
||||
sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */
|
||||
sa->add(sa->set, 0x7f); /* range DEL..NBSP-1, NBSP added below */
|
||||
sa->add(sa->set, HAIRSP);
|
||||
sa->add(sa->set, RLM+1);
|
||||
sa->add(sa->set, INHSWAP);
|
||||
sa->add(sa->set, NOMDIG+1);
|
||||
sa->add(sa->set, 0x206a); // INHIBIT SYMMETRIC SWAPPING
|
||||
sa->add(sa->set, 0x206f+1); // NOMINAL DIGIT SHAPES
|
||||
USET_ADD_CP_AND_NEXT(sa, ZWNBSP);
|
||||
|
||||
/* add no-break spaces for u_isWhitespace() what was not added above */
|
||||
|
|
@ -693,23 +693,25 @@ uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
|
|||
USET_ADD_CP_AND_NEXT(sa, NNBSP);
|
||||
|
||||
/* add for u_digit() */
|
||||
sa->add(sa->set, U_a);
|
||||
sa->add(sa->set, U_z+1);
|
||||
sa->add(sa->set, U_A);
|
||||
sa->add(sa->set, U_Z+1);
|
||||
sa->add(sa->set, U_FW_a);
|
||||
sa->add(sa->set, U_FW_z+1);
|
||||
sa->add(sa->set, U_FW_A);
|
||||
sa->add(sa->set, U_FW_Z+1);
|
||||
sa->add(sa->set, u'a');
|
||||
sa->add(sa->set, u'z'+1);
|
||||
sa->add(sa->set, u'A');
|
||||
sa->add(sa->set, u'Z'+1);
|
||||
// fullwidth
|
||||
sa->add(sa->set, u'a');
|
||||
sa->add(sa->set, u'z'+1);
|
||||
sa->add(sa->set, u'A');
|
||||
sa->add(sa->set, u'Z'+1);
|
||||
|
||||
/* add for u_isxdigit() */
|
||||
sa->add(sa->set, U_f+1);
|
||||
sa->add(sa->set, U_F+1);
|
||||
sa->add(sa->set, U_FW_f+1);
|
||||
sa->add(sa->set, U_FW_F+1);
|
||||
sa->add(sa->set, u'f'+1);
|
||||
sa->add(sa->set, u'F'+1);
|
||||
// fullwidth
|
||||
sa->add(sa->set, u'f'+1);
|
||||
sa->add(sa->set, u'F'+1);
|
||||
|
||||
/* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */
|
||||
sa->add(sa->set, WJ); /* range WJ..NOMDIG */
|
||||
sa->add(sa->set, 0x2060); /* range 2060..206f */
|
||||
sa->add(sa->set, 0xfff0);
|
||||
sa->add(sa->set, 0xfffb+1);
|
||||
sa->add(sa->set, 0xe0000);
|
||||
|
|
|
|||
6
deps/icu-small/source/common/ucnv2022.cpp
vendored
6
deps/icu-small/source/common/ucnv2022.cpp
vendored
|
|
@ -820,7 +820,7 @@ getKey_2022(char c,int32_t* key,int32_t* offset){
|
|||
return INVALID_2022;
|
||||
}
|
||||
|
||||
/*runs through a state machine to determine the escape sequence - codepage correspondance
|
||||
/*runs through a state machine to determine the escape sequence - codepage correspondence
|
||||
*/
|
||||
static void
|
||||
changeState_2022(UConverter* _this,
|
||||
|
|
@ -1424,7 +1424,7 @@ toUnicodeCallback(UConverter *cnv,
|
|||
* KSC5601 : alias to ibm-949 mapping table
|
||||
* GB2312 : alias to ibm-1386 mapping table
|
||||
* ISO-8859-1 : Algorithmic implemented as LATIN1 case
|
||||
* ISO-8859-7 : alisas to ibm-9409 mapping table
|
||||
* ISO-8859-7 : alias to ibm-9409 mapping table
|
||||
*/
|
||||
|
||||
/* preference order of JP charsets */
|
||||
|
|
@ -2324,7 +2324,7 @@ endloop:
|
|||
/***************************************************************
|
||||
* Rules for ISO-2022-KR encoding
|
||||
* i) The KSC5601 designator sequence should appear only once in a file,
|
||||
* at the begining of a line before any KSC5601 characters. This usually
|
||||
* at the beginning of a line before any KSC5601 characters. This usually
|
||||
* means that it appears by itself on the first line of the file
|
||||
* ii) There are only 2 shifting sequences SO to shift into double byte mode
|
||||
* and SI to shift into single byte mode
|
||||
|
|
|
|||
2
deps/icu-small/source/common/ucnv_bld.cpp
vendored
2
deps/icu-small/source/common/ucnv_bld.cpp
vendored
|
|
@ -427,7 +427,7 @@ getAlgorithmicTypeFromName(const char *realName)
|
|||
#define UCNV_CACHE_LOAD_FACTOR 2
|
||||
|
||||
/* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */
|
||||
/* Will always be called with the cnvCacheMutex alrady being held */
|
||||
/* Will always be called with the cnvCacheMutex already being held */
|
||||
/* by the calling function. */
|
||||
/* Stores the shared data in the SHARED_DATA_HASHTABLE
|
||||
* @param data The shared data
|
||||
|
|
|
|||
2
deps/icu-small/source/common/ucnv_err.cpp
vendored
2
deps/icu-small/source/common/ucnv_err.cpp
vendored
|
|
@ -321,7 +321,7 @@ UCNV_FROM_U_CALLBACK_ESCAPE (
|
|||
case UCNV_PRV_ESCAPE_CSS2:
|
||||
valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
|
||||
valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
|
||||
/* Always add space character, becase the next character might be whitespace,
|
||||
/* Always add space character, because the next character might be whitespace,
|
||||
which would erroneously be considered the termination of the escape sequence. */
|
||||
valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT;
|
||||
break;
|
||||
|
|
|
|||
4
deps/icu-small/source/common/ucnv_lmb.cpp
vendored
4
deps/icu-small/source/common/ucnv_lmb.cpp
vendored
|
|
@ -81,7 +81,7 @@
|
|||
[G] D1 [D2]
|
||||
|
||||
That is, a sometimes-optional 'group' byte, followed by 1 and sometimes 2
|
||||
data bytes. The maximum size of a LMBCS chjaracter is 3 bytes:
|
||||
data bytes. The maximum size of a LMBCS character is 3 bytes:
|
||||
*/
|
||||
#define ULMBCS_CHARSIZE_MAX 3
|
||||
/*
|
||||
|
|
@ -164,7 +164,7 @@ beginning of internal 'system' range names: */
|
|||
/* Then we needed a place to put all the other ansi control characters
|
||||
that must be moved to different values because LMBCS reserves those
|
||||
values for other purposes. To represent the control characters, we start
|
||||
with a first byte of 0xF & add the control chaarcter value as the
|
||||
with a first byte of 0xF & add the control character value as the
|
||||
second byte */
|
||||
#define ULMBCS_GRP_CTRL 0x0F
|
||||
|
||||
|
|
|
|||
2
deps/icu-small/source/common/ucnv_u7.cpp
vendored
2
deps/icu-small/source/common/ucnv_u7.cpp
vendored
|
|
@ -814,7 +814,7 @@ const UConverterSharedData _UTF7Data=
|
|||
* the use of "~" in some servers as a home directory indicator.
|
||||
*
|
||||
* 5) UTF-7 permits multiple alternate forms to represent the same
|
||||
* string; in particular, printable US-ASCII chararacters can be
|
||||
* string; in particular, printable US-ASCII characters can be
|
||||
* represented in encoded form.
|
||||
*
|
||||
* In modified UTF-7, printable US-ASCII characters except for "&"
|
||||
|
|
|
|||
6
deps/icu-small/source/common/ucnvisci.cpp
vendored
6
deps/icu-small/source/common/ucnvisci.cpp
vendored
|
|
@ -992,7 +992,7 @@ UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(
|
|||
|
||||
if (converterData->currentDeltaFromUnicode == PNJ_DELTA) {
|
||||
if (sourceChar == PNJ_TIPPI) {
|
||||
/* Make sure Tippi is converterd to Bindi. */
|
||||
/* Make sure Tippi is converted to Bindi. */
|
||||
sourceChar = PNJ_BINDI;
|
||||
} else if (sourceChar == PNJ_ADHAK) {
|
||||
/* This is for consonant cluster handling. */
|
||||
|
|
@ -1147,7 +1147,7 @@ static const uint16_t lookupTable[][2]={
|
|||
/* is the code point valid in current script? */ \
|
||||
if(sourceChar> ASCII_END && \
|
||||
(validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){ \
|
||||
/* Vocallic RR is assigne in ISCII Telugu and Unicode */ \
|
||||
/* Vocallic RR is assigned in ISCII Telugu and Unicode */ \
|
||||
if(data->currentDeltaToUnicode!=(TELUGU_DELTA) || \
|
||||
targetUniChar!=VOCALLIC_RR){ \
|
||||
targetUniChar=missingCharMarker; \
|
||||
|
|
@ -1272,7 +1272,7 @@ UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCo
|
|||
goto CALLBACK;
|
||||
} else if (*contextCharToUnicode==ISCII_INV) {
|
||||
if (sourceChar==ISCII_HALANT) {
|
||||
targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */
|
||||
targetUniChar = 0x0020; /* replace with space according to Indic FAQ */
|
||||
} else {
|
||||
targetUniChar = ZWJ;
|
||||
}
|
||||
|
|
|
|||
4
deps/icu-small/source/common/ucurr.cpp
vendored
4
deps/icu-small/source/common/ucurr.cpp
vendored
|
|
@ -844,7 +844,7 @@ typedef struct {
|
|||
#endif
|
||||
|
||||
|
||||
// Comparason function used in quick sort.
|
||||
// Comparison function used in quick sort.
|
||||
static int U_CALLCONV currencyNameComparator(const void* a, const void* b) {
|
||||
const CurrencyNameStruct* currName_1 = (const CurrencyNameStruct*)a;
|
||||
const CurrencyNameStruct* currName_2 = (const CurrencyNameStruct*)b;
|
||||
|
|
@ -1530,7 +1530,7 @@ uprv_parseCurrency(const char* locale,
|
|||
|
||||
int32_t max = 0;
|
||||
int32_t matchIndex = -1;
|
||||
// case in-sensitive comparision against currency names
|
||||
// case in-sensitive comparison against currency names
|
||||
searchCurrencyName(currencyNames, total_currency_name_count,
|
||||
upperText, textLen, partialMatchLen, &max, &matchIndex);
|
||||
|
||||
|
|
|
|||
81
deps/icu-small/source/common/uhash.cpp
vendored
81
deps/icu-small/source/common/uhash.cpp
vendored
|
|
@ -133,8 +133,10 @@ static const float RESIZE_POLICY_RATIO_TABLE[6] = {
|
|||
* or a pointer. If a hint bit is zero, then the associated
|
||||
* token is assumed to be an integer.
|
||||
*/
|
||||
#define HINT_BOTH_INTEGERS (0)
|
||||
#define HINT_KEY_POINTER (1)
|
||||
#define HINT_VALUE_POINTER (2)
|
||||
#define HINT_ALLOW_ZERO (4)
|
||||
|
||||
/********************************************************************
|
||||
* PRIVATE Implementation
|
||||
|
|
@ -479,8 +481,9 @@ _uhash_put(UHashtable *hash,
|
|||
goto err;
|
||||
}
|
||||
U_ASSERT(hash != NULL);
|
||||
/* Cannot always check pointer here or iSeries sees NULL every time. */
|
||||
if ((hint & HINT_VALUE_POINTER) && value.pointer == NULL) {
|
||||
if ((hint & HINT_VALUE_POINTER) ?
|
||||
value.pointer == NULL :
|
||||
value.integer == 0 && (hint & HINT_ALLOW_ZERO) == 0) {
|
||||
/* Disallow storage of NULL values, since NULL is returned by
|
||||
* get() to indicate an absent key. Storing NULL == removing.
|
||||
*/
|
||||
|
|
@ -687,6 +690,28 @@ uhash_igeti(const UHashtable *hash,
|
|||
return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer;
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uhash_getiAndFound(const UHashtable *hash,
|
||||
const void *key,
|
||||
UBool *found) {
|
||||
UHashTok keyholder;
|
||||
keyholder.pointer = (void *)key;
|
||||
const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
|
||||
*found = !IS_EMPTY_OR_DELETED(e->hashcode);
|
||||
return e->value.integer;
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uhash_igetiAndFound(const UHashtable *hash,
|
||||
int32_t key,
|
||||
UBool *found) {
|
||||
UHashTok keyholder;
|
||||
keyholder.integer = key;
|
||||
const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
|
||||
*found = !IS_EMPTY_OR_DELETED(e->hashcode);
|
||||
return e->value.integer;
|
||||
}
|
||||
|
||||
U_CAPI void* U_EXPORT2
|
||||
uhash_put(UHashtable *hash,
|
||||
void* key,
|
||||
|
|
@ -736,7 +761,34 @@ uhash_iputi(UHashtable *hash,
|
|||
keyholder.integer = key;
|
||||
valueholder.integer = value;
|
||||
return _uhash_put(hash, keyholder, valueholder,
|
||||
0, /* neither is a ptr */
|
||||
HINT_BOTH_INTEGERS,
|
||||
status).integer;
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uhash_putiAllowZero(UHashtable *hash,
|
||||
void *key,
|
||||
int32_t value,
|
||||
UErrorCode *status) {
|
||||
UHashTok keyholder, valueholder;
|
||||
keyholder.pointer = key;
|
||||
valueholder.integer = value;
|
||||
return _uhash_put(hash, keyholder, valueholder,
|
||||
HINT_KEY_POINTER | HINT_ALLOW_ZERO,
|
||||
status).integer;
|
||||
}
|
||||
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uhash_iputiAllowZero(UHashtable *hash,
|
||||
int32_t key,
|
||||
int32_t value,
|
||||
UErrorCode *status) {
|
||||
UHashTok keyholder, valueholder;
|
||||
keyholder.integer = key;
|
||||
valueholder.integer = value;
|
||||
return _uhash_put(hash, keyholder, valueholder,
|
||||
HINT_BOTH_INTEGERS | HINT_ALLOW_ZERO,
|
||||
status).integer;
|
||||
}
|
||||
|
||||
|
|
@ -785,6 +837,29 @@ uhash_removeAll(UHashtable *hash) {
|
|||
U_ASSERT(hash->count == 0);
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
uhash_containsKey(const UHashtable *hash, const void *key) {
|
||||
UHashTok keyholder;
|
||||
keyholder.pointer = (void *)key;
|
||||
const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
|
||||
return !IS_EMPTY_OR_DELETED(e->hashcode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the UHashtable contains an item with this integer key.
|
||||
*
|
||||
* @param hash The target UHashtable.
|
||||
* @param key An integer key stored in a hashtable
|
||||
* @return true if the key is found.
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
uhash_icontainsKey(const UHashtable *hash, int32_t key) {
|
||||
UHashTok keyholder;
|
||||
keyholder.integer = key;
|
||||
const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
|
||||
return !IS_EMPTY_OR_DELETED(e->hashcode);
|
||||
}
|
||||
|
||||
U_CAPI const UHashElement* U_EXPORT2
|
||||
uhash_find(const UHashtable *hash, const void* key) {
|
||||
UHashTok keyholder;
|
||||
|
|
|
|||
95
deps/icu-small/source/common/uhash.h
vendored
95
deps/icu-small/source/common/uhash.h
vendored
|
|
@ -23,7 +23,7 @@
|
|||
/**
|
||||
* UHashtable stores key-value pairs and does moderately fast lookup
|
||||
* based on keys. It provides a good tradeoff between access time and
|
||||
* storage space. As elements are added to it, it grows to accomodate
|
||||
* storage space. As elements are added to it, it grows to accommodate
|
||||
* them. By default, the table never shrinks, even if all elements
|
||||
* are removed from it.
|
||||
*
|
||||
|
|
@ -54,6 +54,13 @@
|
|||
* uhash_remove() on that key. This keeps uhash_get(), uhash_count(),
|
||||
* and uhash_nextElement() consistent with one another.
|
||||
*
|
||||
* Keys and values can be integers.
|
||||
* Functions that work with an integer key have an "i" prefix.
|
||||
* Functions that work with an integer value have an "i" suffix.
|
||||
* As with putting a NULL value pointer, putting a zero value integer removes the item.
|
||||
* Except, there are pairs of functions that allow setting zero values
|
||||
* and fetching (value, found) pairs.
|
||||
*
|
||||
* To see everything in a hashtable, use uhash_nextElement() to
|
||||
* iterate through its contents. Each call to this function returns a
|
||||
* UHashElement pointer. A hash element contains a key, value, and
|
||||
|
|
@ -405,6 +412,44 @@ uhash_iputi(UHashtable *hash,
|
|||
int32_t value,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Put a (key=pointer, value=integer) item in a UHashtable. If the
|
||||
* keyDeleter is non-NULL, then the hashtable owns 'key' after this
|
||||
* call. valueDeleter must be NULL.
|
||||
* Storing a 0 value is possible; call uhash_igetiAndFound() to retrieve values including zero.
|
||||
*
|
||||
* @param hash The target UHashtable.
|
||||
* @param key The key to store.
|
||||
* @param value The integer value to store.
|
||||
* @param status A pointer to an UErrorCode to receive any errors.
|
||||
* @return The previous value, or 0 if none.
|
||||
* @see uhash_getiAndFound
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uhash_putiAllowZero(UHashtable *hash,
|
||||
void *key,
|
||||
int32_t value,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Put a (key=integer, value=integer) item in a UHashtable. If the
|
||||
* keyDeleter is non-NULL, then the hashtable owns 'key' after this
|
||||
* call. valueDeleter must be NULL.
|
||||
* Storing a 0 value is possible; call uhash_igetiAndFound() to retrieve values including zero.
|
||||
*
|
||||
* @param hash The target UHashtable.
|
||||
* @param key The key to store.
|
||||
* @param value The integer value to store.
|
||||
* @param status A pointer to an UErrorCode to receive any errors.
|
||||
* @return The previous value, or 0 if none.
|
||||
* @see uhash_igetiAndFound
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uhash_iputiAllowZero(UHashtable *hash,
|
||||
int32_t key,
|
||||
int32_t value,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Retrieve a pointer value from a UHashtable using a pointer key,
|
||||
* as previously stored by uhash_put().
|
||||
|
|
@ -448,6 +493,34 @@ U_CAPI int32_t U_EXPORT2
|
|||
uhash_igeti(const UHashtable *hash,
|
||||
int32_t key);
|
||||
|
||||
/**
|
||||
* Retrieves an integer value from a UHashtable using a pointer key,
|
||||
* as previously stored by uhash_putiAllowZero() or uhash_puti().
|
||||
*
|
||||
* @param hash The target UHashtable.
|
||||
* @param key A pointer key stored in a hashtable
|
||||
* @param found A pointer to a boolean which will be set for whether the key was found.
|
||||
* @return The requested item, or 0 if not found.
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uhash_getiAndFound(const UHashtable *hash,
|
||||
const void *key,
|
||||
UBool *found);
|
||||
|
||||
/**
|
||||
* Retrieves an integer value from a UHashtable using an integer key,
|
||||
* as previously stored by uhash_iputiAllowZero() or uhash_iputi().
|
||||
*
|
||||
* @param hash The target UHashtable.
|
||||
* @param key An integer key stored in a hashtable
|
||||
* @param found A pointer to a boolean which will be set for whether the key was found.
|
||||
* @return The requested item, or 0 if not found.
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uhash_igetiAndFound(const UHashtable *hash,
|
||||
int32_t key,
|
||||
UBool *found);
|
||||
|
||||
/**
|
||||
* Remove an item from a UHashtable stored by uhash_put().
|
||||
* @param hash The target UHashtable.
|
||||
|
|
@ -495,6 +568,26 @@ uhash_iremovei(UHashtable *hash,
|
|||
U_CAPI void U_EXPORT2
|
||||
uhash_removeAll(UHashtable *hash);
|
||||
|
||||
/**
|
||||
* Returns true if the UHashtable contains an item with this pointer key.
|
||||
*
|
||||
* @param hash The target UHashtable.
|
||||
* @param key A pointer key stored in a hashtable
|
||||
* @return true if the key is found.
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
uhash_containsKey(const UHashtable *hash, const void *key);
|
||||
|
||||
/**
|
||||
* Returns true if the UHashtable contains an item with this integer key.
|
||||
*
|
||||
* @param hash The target UHashtable.
|
||||
* @param key An integer key stored in a hashtable
|
||||
* @return true if the key is found.
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
uhash_icontainsKey(const UHashtable *hash, int32_t key);
|
||||
|
||||
/**
|
||||
* Locate an element of a UHashtable. The caller must not modify the
|
||||
* returned object. The primary use of this function is to obtain the
|
||||
|
|
|
|||
42
deps/icu-small/source/common/uloc.cpp
vendored
42
deps/icu-small/source/common/uloc.cpp
vendored
|
|
@ -143,7 +143,7 @@ static const char * const LANGUAGES[] = {
|
|||
"mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
|
||||
"mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
|
||||
"mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
|
||||
"ml", "mn", "mnc", "mni", "mo",
|
||||
"ml", "mn", "mnc", "mni",
|
||||
"moh", "mos", "mr", "mrj",
|
||||
"ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
|
||||
"my", "mye", "myv", "mzn",
|
||||
|
|
@ -166,9 +166,9 @@ static const char * const LANGUAGES[] = {
|
|||
"sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
|
||||
"sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
|
||||
"ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
|
||||
"sv", "sw", "swb", "swc", "syc", "syr", "szl",
|
||||
"sv", "sw", "swb", "syc", "syr", "szl",
|
||||
"ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
|
||||
"th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
|
||||
"th", "ti", "tig", "tiv", "tk", "tkl", "tkr",
|
||||
"tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
|
||||
"tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
|
||||
"tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
|
||||
|
|
@ -181,7 +181,7 @@ static const char * const LANGUAGES[] = {
|
|||
"za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
|
||||
"zun", "zxx", "zza",
|
||||
NULL,
|
||||
"in", "iw", "ji", "jw", "sh", /* obsolete language codes */
|
||||
"in", "iw", "ji", "jw", "mo", "sh", "swc", "tl", /* obsolete language codes */
|
||||
NULL
|
||||
};
|
||||
|
||||
|
|
@ -260,7 +260,7 @@ static const char * const LANGUAGES_3[] = {
|
|||
"mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
|
||||
"mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
|
||||
"mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
|
||||
"mal", "mon", "mnc", "mni", "mol",
|
||||
"mal", "mon", "mnc", "mni",
|
||||
"moh", "mos", "mar", "mrj",
|
||||
"msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
|
||||
"mya", "mye", "myv", "mzn",
|
||||
|
|
@ -283,9 +283,9 @@ static const char * const LANGUAGES_3[] = {
|
|||
"slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
|
||||
"sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
|
||||
"ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
|
||||
"swe", "swa", "swb", "swc", "syc", "syr", "szl",
|
||||
"swe", "swa", "swb", "syc", "syr", "szl",
|
||||
"tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
|
||||
"tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
|
||||
"tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr",
|
||||
"tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
|
||||
"tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
|
||||
"tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
|
||||
|
|
@ -298,8 +298,8 @@ static const char * const LANGUAGES_3[] = {
|
|||
"zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
|
||||
"zun", "zxx", "zza",
|
||||
NULL,
|
||||
/* "in", "iw", "ji", "jw", "sh", */
|
||||
"ind", "heb", "yid", "jaw", "srp",
|
||||
/* "in", "iw", "ji", "jw", "mo", "sh", "swc", "tl", */
|
||||
"ind", "heb", "yid", "jaw", "mol", "srp", "swc", "tgl",
|
||||
NULL
|
||||
};
|
||||
|
||||
|
|
@ -334,13 +334,13 @@ static const char * const COUNTRIES[] = {
|
|||
"BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
|
||||
"BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
|
||||
"CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
|
||||
"CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK",
|
||||
"DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
|
||||
"CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK",
|
||||
"DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER",
|
||||
"ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
|
||||
"GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
|
||||
"GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
|
||||
"GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
|
||||
"ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
|
||||
"IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
|
||||
"IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
|
||||
"KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
|
||||
"LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
|
||||
|
|
@ -357,7 +357,7 @@ static const char * const COUNTRIES[] = {
|
|||
"TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
|
||||
"TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
|
||||
"VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
|
||||
"WS", "YE", "YT", "ZA", "ZM", "ZW",
|
||||
"WS", "XK", "YE", "YT", "ZA", "ZM", "ZW",
|
||||
NULL,
|
||||
"AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
|
||||
NULL
|
||||
|
|
@ -397,10 +397,10 @@ static const char * const COUNTRIES_3[] = {
|
|||
"BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
|
||||
/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
|
||||
"CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
|
||||
/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */
|
||||
"CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
|
||||
/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
|
||||
"DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
|
||||
/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK", */
|
||||
"CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",
|
||||
/* "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER", */
|
||||
"DMA", "DOM", "DZA", "XEA", "ECU", "EST", "EGY", "ESH", "ERI",
|
||||
/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
|
||||
"ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
|
||||
/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
|
||||
|
|
@ -409,8 +409,8 @@ static const char * const COUNTRIES_3[] = {
|
|||
"GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
|
||||
/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
|
||||
"GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
|
||||
/* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
|
||||
"IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
|
||||
/* "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
|
||||
"XIC", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
|
||||
/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
|
||||
"ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
|
||||
/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
|
||||
|
|
@ -443,8 +443,8 @@ static const char * const COUNTRIES_3[] = {
|
|||
"TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
|
||||
/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
|
||||
"VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
|
||||
/* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
|
||||
"WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
|
||||
/* "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", */
|
||||
"WSM", "XXK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
|
||||
NULL,
|
||||
/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
|
||||
"ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
|
||||
|
|
|
|||
|
|
@ -271,7 +271,7 @@ initFromResourceBundle(UErrorCode& sts) {
|
|||
if (U_FAILURE(sts)) {
|
||||
break;
|
||||
}
|
||||
// check if this is an alias of canoncal legacy type
|
||||
// check if this is an alias of canonical legacy type
|
||||
if (uprv_compareInvWithUChar(NULL, legacyTypeId, -1, to, toLen) == 0) {
|
||||
const char* from = ures_getKey(typeAliasDataEntry.getAlias());
|
||||
if (isTZ) {
|
||||
|
|
|
|||
41
deps/icu-small/source/common/uloc_tag.cpp
vendored
41
deps/icu-small/source/common/uloc_tag.cpp
vendored
|
|
@ -129,7 +129,6 @@ static const char* const LEGACY[] = {
|
|||
// Legacy tags with no preferred value in the IANA
|
||||
// registry. Kept for now for the backward compatibility
|
||||
// because ICU has mapped them this way.
|
||||
"cel-gaulish", "xtg-x-cel-gaulish",
|
||||
"i-default", "en-x-i-default",
|
||||
"i-enochian", "und-x-i-enochian",
|
||||
"i-mingo", "see-x-i-mingo",
|
||||
|
|
@ -647,6 +646,22 @@ _isTKey(const char* s, int32_t len)
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
U_CAPI const char * U_EXPORT2
|
||||
ultag_getTKeyStart(const char *localeID) {
|
||||
const char *result = localeID;
|
||||
const char *sep;
|
||||
while((sep = uprv_strchr(result, SEP)) != nullptr) {
|
||||
if (_isTKey(result, static_cast<int32_t>(sep - result))) {
|
||||
return result;
|
||||
}
|
||||
result = ++sep;
|
||||
}
|
||||
if (_isTKey(result, -1)) {
|
||||
return result;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static UBool
|
||||
_isTValue(const char* s, int32_t len)
|
||||
{
|
||||
|
|
@ -671,9 +686,13 @@ _isTransformedExtensionSubtag(int32_t& state, const char* s, int32_t len)
|
|||
const int32_t kGotTKey = -1; // Got tkey, wait for tvalue. ERROR if stop here.
|
||||
const int32_t kGotTValue = 6; // Got tvalue, wait for tkey, tvalue or end
|
||||
|
||||
|
||||
if (len < 0) {
|
||||
len = (int32_t)uprv_strlen(s);
|
||||
}
|
||||
switch (state) {
|
||||
case kStart:
|
||||
if (ultag_isLanguageSubtag(s, len)) {
|
||||
if (ultag_isLanguageSubtag(s, len) && len != 4) {
|
||||
state = kGotLanguage;
|
||||
return TRUE;
|
||||
}
|
||||
|
|
@ -1775,11 +1794,6 @@ _appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode* status)
|
|||
return;
|
||||
}
|
||||
|
||||
/* Determine if variants already exists */
|
||||
if (ultag_getVariantsSize(langtag)) {
|
||||
posixVariant = TRUE;
|
||||
}
|
||||
|
||||
n = ultag_getExtensionsSize(langtag);
|
||||
|
||||
/* resolve locale keywords and reordering keys */
|
||||
|
|
@ -1787,6 +1801,11 @@ _appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode* status)
|
|||
key = ultag_getExtensionKey(langtag, i);
|
||||
type = ultag_getExtensionValue(langtag, i);
|
||||
if (*key == LDMLEXT) {
|
||||
/* Determine if variants already exists */
|
||||
if (ultag_getVariantsSize(langtag)) {
|
||||
posixVariant = TRUE;
|
||||
}
|
||||
|
||||
_appendLDMLExtensionAsKeywords(type, &kwdFirst, extPool, kwdBuf, &posixVariant, status);
|
||||
if (U_FAILURE(*status)) {
|
||||
break;
|
||||
|
|
@ -2028,7 +2047,10 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
|
|||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
uprv_memcpy(tagBuf, tag, tagLen);
|
||||
|
||||
if (tagLen > 0) {
|
||||
uprv_memcpy(tagBuf, tag, tagLen);
|
||||
}
|
||||
*(tagBuf + tagLen) = 0;
|
||||
|
||||
/* create a ULanguageTag */
|
||||
|
|
@ -2692,8 +2714,7 @@ ulocimp_toLanguageTag(const char* localeID,
|
|||
if (U_SUCCESS(tmpStatus)) {
|
||||
if (ultag_isPrivateuseValueSubtags(buf.data(), buf.length())) {
|
||||
/* return private use only tag */
|
||||
static const char PREFIX[] = { PRIVATEUSE, SEP };
|
||||
sink.Append(PREFIX, sizeof(PREFIX));
|
||||
sink.Append("und-x-", 6);
|
||||
sink.Append(buf.data(), buf.length());
|
||||
done = TRUE;
|
||||
} else if (strict) {
|
||||
|
|
|
|||
3
deps/icu-small/source/common/ulocimp.h
vendored
3
deps/icu-small/source/common/ulocimp.h
vendored
|
|
@ -286,6 +286,9 @@ ultag_isUnicodeLocaleType(const char* s, int32_t len);
|
|||
U_CFUNC UBool
|
||||
ultag_isVariantSubtags(const char* s, int32_t len);
|
||||
|
||||
U_CAPI const char * U_EXPORT2
|
||||
ultag_getTKeyStart(const char *localeID);
|
||||
|
||||
U_CFUNC const char*
|
||||
ulocimp_toBcpKey(const char* key);
|
||||
|
||||
|
|
|
|||
|
|
@ -71,7 +71,6 @@ public:
|
|||
*/
|
||||
virtual void Append(const char* bytes, int32_t n) = 0;
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Appends n bytes to this. Same as Append().
|
||||
* Call AppendU8() with u8"string literals" which are const char * in C++11
|
||||
|
|
@ -81,7 +80,7 @@ public:
|
|||
*
|
||||
* @param bytes the pointer to the bytes
|
||||
* @param n the number of bytes; must be non-negative
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
inline void AppendU8(const char* bytes, int32_t n) {
|
||||
Append(bytes, n);
|
||||
|
|
@ -97,13 +96,12 @@ public:
|
|||
*
|
||||
* @param bytes the pointer to the bytes
|
||||
* @param n the number of bytes; must be non-negative
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
inline void AppendU8(const char8_t* bytes, int32_t n) {
|
||||
Append(reinterpret_cast<const char*>(bytes), n);
|
||||
}
|
||||
#endif
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Returns a writable buffer for appending and writes the buffer's capacity to
|
||||
|
|
|
|||
|
|
@ -30,6 +30,8 @@
|
|||
#include "unicode/uobject.h"
|
||||
#include "unicode/ustringtrie.h"
|
||||
|
||||
class BytesTrieTest;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class ByteSink;
|
||||
|
|
@ -378,6 +380,7 @@ public:
|
|||
|
||||
private:
|
||||
friend class BytesTrieBuilder;
|
||||
friend class ::BytesTrieTest;
|
||||
|
||||
/**
|
||||
* Constructs a BytesTrie reader instance.
|
||||
|
|
|
|||
|
|
@ -30,6 +30,8 @@
|
|||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/stringtriebuilder.h"
|
||||
|
||||
class BytesTrieTest;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class BytesTrieElement;
|
||||
|
|
@ -125,6 +127,8 @@ public:
|
|||
BytesTrieBuilder &clear();
|
||||
|
||||
private:
|
||||
friend class ::BytesTrieTest;
|
||||
|
||||
BytesTrieBuilder(const BytesTrieBuilder &other); // no copy constructor
|
||||
BytesTrieBuilder &operator=(const BytesTrieBuilder &other); // no assignment operator
|
||||
|
||||
|
|
@ -168,6 +172,7 @@ private:
|
|||
virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal);
|
||||
virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node);
|
||||
virtual int32_t writeDeltaTo(int32_t jumpTarget);
|
||||
static int32_t internalEncodeDelta(int32_t i, char intBytes[]);
|
||||
|
||||
CharString *strings; // Pointer not object so we need not #include internal charstr.h.
|
||||
BytesTrieElement *elements;
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@
|
|||
* \file
|
||||
* \brief (Non API- contains Doxygen definitions)
|
||||
*
|
||||
* This file contains documentation for Doxygen and doesnot have
|
||||
* This file contains documentation for Doxygen and does not have
|
||||
* any significance with respect to C or C++ API
|
||||
*/
|
||||
|
||||
|
|
@ -74,7 +74,7 @@
|
|||
* </tr>
|
||||
* <tr>
|
||||
* <td>Strings and Character Iteration</td>
|
||||
* <td>ustring.h, utf8.h, utf16.h, UText, UCharIterator</td>
|
||||
* <td>ustring.h, utf8.h, utf16.h, icu::StringPiece, UText, UCharIterator, icu::ByteSink</td>
|
||||
* <td>icu::UnicodeString, icu::CharacterIterator, icu::Appendable, icu::StringPiece,icu::ByteSink</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
|
|
@ -128,9 +128,9 @@
|
|||
* <td>icu::Normalizer2</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Calendars</td>
|
||||
* <td>Calendars and Time Zones</td>
|
||||
* <td>ucal.h</td>
|
||||
* <td>icu::Calendar</td>
|
||||
* <td>icu::Calendar, icu::TimeZone</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Date and Time Formatting</td>
|
||||
|
|
|
|||
|
|
@ -117,14 +117,13 @@
|
|||
/* === Basic types === */
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
struct UPlugData;
|
||||
/**
|
||||
* @{
|
||||
* Opaque structure passed to/from a plugin.
|
||||
* use the APIs to access it.
|
||||
* Typedef for opaque structure passed to/from a plugin.
|
||||
* Use the APIs to access it.
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
|
||||
struct UPlugData;
|
||||
typedef struct UPlugData UPlugData;
|
||||
|
||||
/** @} */
|
||||
|
|
|
|||
|
|
@ -91,8 +91,6 @@ enum ULocMatchDemotion {
|
|||
typedef enum ULocMatchDemotion ULocMatchDemotion;
|
||||
#endif
|
||||
|
||||
#ifndef U_FORCE_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Builder option for whether to include or ignore one-way (fallback) match data.
|
||||
* The LocaleMatcher uses CLDR languageMatch data which includes fallback (oneway=true) entries.
|
||||
|
|
@ -108,20 +106,20 @@ typedef enum ULocMatchDemotion ULocMatchDemotion;
|
|||
* but not if it is merely a fallback.
|
||||
*
|
||||
* @see LocaleMatcher::Builder#setDirection(ULocMatchDirection)
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
enum ULocMatchDirection {
|
||||
/**
|
||||
* Locale matching includes one-way matches such as Breton→French. (default)
|
||||
*
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
ULOCMATCH_DIRECTION_WITH_ONE_WAY,
|
||||
/**
|
||||
* Locale matching limited to two-way matches including e.g. Danish↔Norwegian
|
||||
* but ignoring one-way matches.
|
||||
*
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
ULOCMATCH_DIRECTION_ONLY_TWO_WAY
|
||||
};
|
||||
|
|
@ -129,8 +127,6 @@ enum ULocMatchDirection {
|
|||
typedef enum ULocMatchDirection ULocMatchDirection;
|
||||
#endif
|
||||
|
||||
#endif // U_FORCE_HIDE_DRAFT_API
|
||||
|
||||
struct UHashtable;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
|
@ -463,14 +459,13 @@ public:
|
|||
*/
|
||||
Builder &setDemotionPerDesiredLocale(ULocMatchDemotion demotion);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Option for whether to include or ignore one-way (fallback) match data.
|
||||
* By default, they are included.
|
||||
*
|
||||
* @param direction the match direction to set.
|
||||
* @return this Builder object
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
Builder &setDirection(ULocMatchDirection direction) {
|
||||
if (U_SUCCESS(errorCode_)) {
|
||||
|
|
@ -478,7 +473,6 @@ public:
|
|||
}
|
||||
return *this;
|
||||
}
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
|
|
@ -704,7 +698,7 @@ private:
|
|||
LSR *lsrs;
|
||||
int32_t supportedLocalesLength;
|
||||
// These are in preference order: 1. Default locale 2. paradigm locales 3. others.
|
||||
UHashtable *supportedLsrToIndex; // Map<LSR, Integer> stores index+1 because 0 is "not found"
|
||||
UHashtable *supportedLsrToIndex; // Map<LSR, Integer>
|
||||
// Array versions of the supportedLsrToIndex keys and values.
|
||||
// The distance lookup loops over the supportedLSRs and returns the index of the best match.
|
||||
const LSR **supportedLSRs;
|
||||
|
|
|
|||
4
deps/icu-small/source/common/unicode/locid.h
vendored
4
deps/icu-small/source/common/unicode/locid.h
vendored
|
|
@ -571,15 +571,13 @@ public:
|
|||
*/
|
||||
void minimizeSubtags(UErrorCode& status);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Canonicalize the locale ID of this object according to CLDR.
|
||||
* @param status the status code
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
* @see createCanonical
|
||||
*/
|
||||
void canonicalize(UErrorCode& status);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Gets the list of keywords for the specified locale.
|
||||
|
|
|
|||
|
|
@ -225,10 +225,8 @@ public:
|
|||
* Normalizes a UTF-8 string and optionally records how source substrings
|
||||
* relate to changed and unchanged result substrings.
|
||||
*
|
||||
* Currently implemented completely only for "compose" modes,
|
||||
* such as for NFC, NFKC, and NFKC_Casefold
|
||||
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
|
||||
* Otherwise currently converts to & from UTF-16 and does not support edits.
|
||||
* Implemented completely for all built-in modes except for FCD.
|
||||
* The base class implementation converts to & from UTF-16 and does not support edits.
|
||||
*
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
|
||||
* @param src Source UTF-8 string.
|
||||
|
|
@ -381,11 +379,9 @@ public:
|
|||
* resolves to "yes" or "no" to provide a definitive result,
|
||||
* at the cost of doing more work in those cases.
|
||||
*
|
||||
* This works for all normalization modes,
|
||||
* but it is currently optimized for UTF-8 only for "compose" modes,
|
||||
* such as for NFC, NFKC, and NFKC_Casefold
|
||||
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
|
||||
* For other modes it currently converts to UTF-16 and calls isNormalized().
|
||||
* This works for all normalization modes.
|
||||
* It is optimized for UTF-8 for all built-in modes except for FCD.
|
||||
* The base class implementation converts to UTF-16 and calls isNormalized().
|
||||
*
|
||||
* @param s UTF-8 input string
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
|
|
@ -543,10 +539,8 @@ public:
|
|||
* Normalizes a UTF-8 string and optionally records how source substrings
|
||||
* relate to changed and unchanged result substrings.
|
||||
*
|
||||
* Currently implemented completely only for "compose" modes,
|
||||
* such as for NFC, NFKC, and NFKC_Casefold
|
||||
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
|
||||
* Otherwise currently converts to & from UTF-16 and does not support edits.
|
||||
* Implemented completely for most built-in modes except for FCD.
|
||||
* The base class implementation converts to & from UTF-16 and does not support edits.
|
||||
*
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
|
||||
* @param src Source UTF-8 string.
|
||||
|
|
@ -676,11 +670,9 @@ public:
|
|||
* resolves to "yes" or "no" to provide a definitive result,
|
||||
* at the cost of doing more work in those cases.
|
||||
*
|
||||
* This works for all normalization modes,
|
||||
* but it is currently optimized for UTF-8 only for "compose" modes,
|
||||
* such as for NFC, NFKC, and NFKC_Casefold
|
||||
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
|
||||
* For other modes it currently converts to UTF-16 and calls isNormalized().
|
||||
* This works for all normalization modes.
|
||||
* It is optimized for UTF-8 for all built-in modes except for FCD.
|
||||
* The base class implementation converts to UTF-16 and calls isNormalized().
|
||||
*
|
||||
* @param s UTF-8 input string
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
|
|
|
|||
|
|
@ -880,6 +880,6 @@ namespace std {
|
|||
#else
|
||||
# define U_CALLCONV_FPTR
|
||||
#endif
|
||||
/* @} */
|
||||
/** @} */
|
||||
|
||||
#endif // _PLATFORM_H
|
||||
|
|
|
|||
|
|
@ -75,12 +75,11 @@ class U_COMMON_API StringPiece : public UMemory {
|
|||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece(const char* str);
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Constructs from a NUL-terminated const char8_t * pointer.
|
||||
* @param str a NUL-terminated const char8_t * pointer
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
StringPiece(const char8_t* str) : StringPiece(reinterpret_cast<const char*>(str)) {}
|
||||
#endif
|
||||
|
|
@ -88,10 +87,9 @@ class U_COMMON_API StringPiece : public UMemory {
|
|||
* Constructs an empty StringPiece.
|
||||
* Needed for type disambiguation from multiple other overloads.
|
||||
* @param p nullptr
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
StringPiece(std::nullptr_t p) : ptr_(p), length_(0) {}
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Constructs from a std::string.
|
||||
|
|
@ -99,17 +97,15 @@ class U_COMMON_API StringPiece : public UMemory {
|
|||
*/
|
||||
StringPiece(const std::string& str)
|
||||
: ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { }
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
#if defined(__cpp_lib_char8_t) || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Constructs from a std::u8string.
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
StringPiece(const std::u8string& str)
|
||||
: ptr_(reinterpret_cast<const char*>(str.data())),
|
||||
length_(static_cast<int32_t>(str.size())) { }
|
||||
#endif
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Constructs from some other implementation of a string piece class, from any
|
||||
|
|
@ -152,18 +148,16 @@ class U_COMMON_API StringPiece : public UMemory {
|
|||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { }
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Constructs from a const char8_t * pointer and a specified length.
|
||||
* @param str a const char8_t * pointer (need not be terminated)
|
||||
* @param len the length of the string; must be non-negative
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
StringPiece(const char8_t* str, int32_t len) :
|
||||
StringPiece(reinterpret_cast<const char*>(str), len) {}
|
||||
#endif
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Substring of another StringPiece.
|
||||
|
|
@ -233,13 +227,12 @@ class U_COMMON_API StringPiece : public UMemory {
|
|||
*/
|
||||
void set(const char* str);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Resets the stringpiece to refer to new data.
|
||||
* @param xdata pointer the new string data. Need not be NUL-terminated.
|
||||
* @param len the length of the new data
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
inline void set(const char8_t* xdata, int32_t len) {
|
||||
set(reinterpret_cast<const char*>(xdata), len);
|
||||
|
|
@ -248,13 +241,12 @@ class U_COMMON_API StringPiece : public UMemory {
|
|||
/**
|
||||
* Resets the stringpiece to refer to new data.
|
||||
* @param str a pointer to a NUL-terminated string.
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
inline void set(const char8_t* str) {
|
||||
set(reinterpret_cast<const char*>(str));
|
||||
}
|
||||
#endif
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Removes the first n string units.
|
||||
|
|
@ -286,13 +278,12 @@ class U_COMMON_API StringPiece : public UMemory {
|
|||
}
|
||||
}
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Searches the StringPiece for the given search string (needle);
|
||||
* @param needle The string for which to search.
|
||||
* @param offset Where to start searching within this string (haystack).
|
||||
* @return The offset of needle in haystack, or -1 if not found.
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
int32_t find(StringPiece needle, int32_t offset);
|
||||
|
||||
|
|
@ -301,10 +292,9 @@ class U_COMMON_API StringPiece : public UMemory {
|
|||
* similar to std::string::compare().
|
||||
* @param other The string to compare to.
|
||||
* @return below zero if this < other; above zero if this > other; 0 if this == other.
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
int32_t compare(StringPiece other);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Maximum integer, used as a default value for substring methods.
|
||||
|
|
|
|||
21
deps/icu-small/source/common/unicode/ubrk.h
vendored
21
deps/icu-small/source/common/unicode/ubrk.h
vendored
|
|
@ -296,6 +296,8 @@ ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
|
|||
const UChar * text, int32_t textLength,
|
||||
UErrorCode * status);
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
||||
/**
|
||||
* Thread safe cloning operation
|
||||
* @param bi iterator to be cloned
|
||||
|
|
@ -312,7 +314,7 @@ ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
|
|||
* @param status to indicate whether the operation went on smoothly or there were errors
|
||||
* An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
|
||||
* @return pointer to the new clone
|
||||
* @stable ICU 2.0
|
||||
* @deprecated ICU 69 Use ubrk_clone() instead.
|
||||
*/
|
||||
U_CAPI UBreakIterator * U_EXPORT2
|
||||
ubrk_safeClone(
|
||||
|
|
@ -321,6 +323,23 @@ ubrk_safeClone(
|
|||
int32_t *pBufferSize,
|
||||
UErrorCode *status);
|
||||
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Thread safe cloning operation.
|
||||
* @param bi iterator to be cloned
|
||||
* @param status to indicate whether the operation went on smoothly or there were errors
|
||||
* @return pointer to the new clone
|
||||
* @draft ICU 69
|
||||
*/
|
||||
U_CAPI UBreakIterator * U_EXPORT2
|
||||
ubrk_clone(const UBreakIterator *bi,
|
||||
UErrorCode *status);
|
||||
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
||||
/**
|
||||
|
|
|
|||
4
deps/icu-small/source/common/unicode/ucnv.h
vendored
4
deps/icu-small/source/common/unicode/ucnv.h
vendored
|
|
@ -1699,10 +1699,10 @@ ucnv_countAvailable(void);
|
|||
|
||||
/**
|
||||
* Gets the canonical converter name of the specified converter from a list of
|
||||
* all available converters contaied in the alias file. All converters
|
||||
* all available converters contained in the alias file. All converters
|
||||
* in this list can be opened.
|
||||
*
|
||||
* @param n the index to a converter available on the system (in the range <TT>[0..ucnv_countAvaiable()]</TT>)
|
||||
* @param n the index to a converter available on the system (in the range <TT>[0..ucnv_countAvailable()]</TT>)
|
||||
* @return a pointer a string (library owned), or <TT>NULL</TT> if the index is out of bounds.
|
||||
* @see ucnv_countAvailable
|
||||
* @stable ICU 2.0
|
||||
|
|
|
|||
|
|
@ -45,11 +45,11 @@
|
|||
* from the serialized form.
|
||||
*/
|
||||
|
||||
struct UConverterSelector;
|
||||
/**
|
||||
* @{
|
||||
* The selector data structure
|
||||
* Typedef for selector data structure.
|
||||
*/
|
||||
struct UConverterSelector;
|
||||
typedef struct UConverterSelector UConverterSelector;
|
||||
/** @} */
|
||||
|
||||
|
|
|
|||
|
|
@ -40,8 +40,8 @@ U_NAMESPACE_BEGIN
|
|||
*
|
||||
* <code>UnicodeFilter</code> defines a protocol for selecting a
|
||||
* subset of the full range (U+0000 to U+10FFFF) of Unicode characters.
|
||||
* Currently, filters are used in conjunction with classes like {@link
|
||||
* Transliterator} to only process selected characters through a
|
||||
* Currently, filters are used in conjunction with classes like
|
||||
* {@link Transliterator} to only process selected characters through a
|
||||
* transformation.
|
||||
*
|
||||
* <p>Note: UnicodeFilter currently stubs out two pure virtual methods
|
||||
|
|
|
|||
66
deps/icu-small/source/common/unicode/uniset.h
vendored
66
deps/icu-small/source/common/unicode/uniset.h
vendored
|
|
@ -178,8 +178,6 @@ class RuleCharacterIterator;
|
|||
* Unicode property
|
||||
* </table>
|
||||
*
|
||||
* <p><b>Warning</b>: you cannot add an empty string ("") to a UnicodeSet.</p>
|
||||
*
|
||||
* <p><b>Formal syntax</b></p>
|
||||
*
|
||||
* \htmlonly<blockquote>\endhtmlonly
|
||||
|
|
@ -601,7 +599,7 @@ public:
|
|||
|
||||
/**
|
||||
* Make this object represent the range `start - end`.
|
||||
* If `end > start` then this object is set to an empty range.
|
||||
* If `start > end` then this object is set to an empty range.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param start first character in the set, inclusive
|
||||
|
|
@ -1077,7 +1075,7 @@ public:
|
|||
/**
|
||||
* Adds the specified range to this set if it is not already
|
||||
* present. If this set already contains the specified range,
|
||||
* the call leaves this set unchanged. If <code>end > start</code>
|
||||
* the call leaves this set unchanged. If <code>start > end</code>
|
||||
* then an empty range is added, leaving the set unchanged.
|
||||
* This is equivalent to a boolean logic OR, or a set UNION.
|
||||
* A frozen set will not be modified.
|
||||
|
|
@ -1095,6 +1093,9 @@ public:
|
|||
* present. If this set already contains the specified character,
|
||||
* the call leaves this set unchanged.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param c the character (code point)
|
||||
* @return this object, for chaining
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UnicodeSet& add(UChar32 c);
|
||||
|
|
@ -1104,8 +1105,8 @@ public:
|
|||
* present. If this set already contains the multicharacter,
|
||||
* the call leaves this set unchanged.
|
||||
* Thus "ch" => {"ch"}
|
||||
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param s the source string
|
||||
* @return this object, for chaining
|
||||
* @stable ICU 2.4
|
||||
|
|
@ -1124,8 +1125,8 @@ public:
|
|||
|
||||
public:
|
||||
/**
|
||||
* Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
|
||||
* If this set already any particular character, it has no effect on that character.
|
||||
* Adds each of the characters in this string to the set. Note: "ch" => {"c", "h"}
|
||||
* If this set already contains any particular character, it has no effect on that character.
|
||||
* A frozen set will not be modified.
|
||||
* @param s the source string
|
||||
* @return this object, for chaining
|
||||
|
|
@ -1135,7 +1136,6 @@ public:
|
|||
|
||||
/**
|
||||
* Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
|
||||
* If this set already any particular character, it has no effect on that character.
|
||||
* A frozen set will not be modified.
|
||||
* @param s the source string
|
||||
* @return this object, for chaining
|
||||
|
|
@ -1145,7 +1145,6 @@ public:
|
|||
|
||||
/**
|
||||
* Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
|
||||
* If this set already any particular character, it has no effect on that character.
|
||||
* A frozen set will not be modified.
|
||||
* @param s the source string
|
||||
* @return this object, for chaining
|
||||
|
|
@ -1155,7 +1154,6 @@ public:
|
|||
|
||||
/**
|
||||
* Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
|
||||
* If this set already any particular character, it has no effect on that character.
|
||||
* A frozen set will not be modified.
|
||||
* @param s the source string
|
||||
* @return this object, for chaining
|
||||
|
|
@ -1165,7 +1163,7 @@ public:
|
|||
|
||||
/**
|
||||
* Makes a set from a multicharacter string. Thus "ch" => {"ch"}
|
||||
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
|
||||
*
|
||||
* @param s the source string
|
||||
* @return a newly created set containing the given string.
|
||||
* The caller owns the return object and is responsible for deleting it.
|
||||
|
|
@ -1185,15 +1183,13 @@ public:
|
|||
|
||||
/**
|
||||
* Retain only the elements in this set that are contained in the
|
||||
* specified range. If <code>end > start</code> then an empty range is
|
||||
* specified range. If <code>start > end</code> then an empty range is
|
||||
* retained, leaving the set empty. This is equivalent to
|
||||
* a boolean logic AND, or a set INTERSECTION.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param start first character, inclusive, of range to be retained
|
||||
* to this set.
|
||||
* @param end last character, inclusive, of range to be retained
|
||||
* to this set.
|
||||
* @param start first character, inclusive, of range
|
||||
* @param end last character, inclusive, of range
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UnicodeSet& retain(UChar32 start, UChar32 end);
|
||||
|
|
@ -1202,14 +1198,31 @@ public:
|
|||
/**
|
||||
* Retain the specified character from this set if it is present.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param c the character (code point)
|
||||
* @return this object, for chaining
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UnicodeSet& retain(UChar32 c);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Retains only the specified string from this set if it is present.
|
||||
* Upon return this set will be empty if it did not contain s, or
|
||||
* will only contain s if it did contain s.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param s the source string
|
||||
* @return this object, for chaining
|
||||
* @draft ICU 69
|
||||
*/
|
||||
UnicodeSet& retain(const UnicodeString &s);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Removes the specified range from this set if it is present.
|
||||
* The set will not contain the specified range once the call
|
||||
* returns. If <code>end > start</code> then an empty range is
|
||||
* returns. If <code>start > end</code> then an empty range is
|
||||
* removed, leaving the set unchanged.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
|
|
@ -1226,6 +1239,9 @@ public:
|
|||
* The set will not contain the specified range once the call
|
||||
* returns.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param c the character (code point)
|
||||
* @return this object, for chaining
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UnicodeSet& remove(UChar32 c);
|
||||
|
|
@ -1253,15 +1269,13 @@ public:
|
|||
/**
|
||||
* Complements the specified range in this set. Any character in
|
||||
* the range will be removed if it is in this set, or will be
|
||||
* added if it is not in this set. If <code>end > start</code>
|
||||
* added if it is not in this set. If <code>start > end</code>
|
||||
* then an empty range is complemented, leaving the set unchanged.
|
||||
* This is equivalent to a boolean logic XOR.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param start first character, inclusive, of range to be removed
|
||||
* from this set.
|
||||
* @param end last character, inclusive, of range to be removed
|
||||
* from this set.
|
||||
* @param start first character, inclusive, of range
|
||||
* @param end last character, inclusive, of range
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UnicodeSet& complement(UChar32 start, UChar32 end);
|
||||
|
|
@ -1271,16 +1285,18 @@ public:
|
|||
* will be removed if it is in this set, or will be added if it is
|
||||
* not in this set.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param c the character (code point)
|
||||
* @return this object, for chaining
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UnicodeSet& complement(UChar32 c);
|
||||
|
||||
/**
|
||||
* Complement the specified string in this set.
|
||||
* The set will not contain the specified string once the call
|
||||
* returns.
|
||||
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
|
||||
* The string will be removed if it is in this set, or will be added if it is not in this set.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param s the string to complement
|
||||
* @return this object, for chaining
|
||||
* @stable ICU 2.4
|
||||
|
|
|
|||
|
|
@ -44,9 +44,10 @@ struct UConverter; // unicode/ucnv.h
|
|||
#ifndef USTRING_H
|
||||
/**
|
||||
* \ingroup ustring_ustrlen
|
||||
* @param s Pointer to sequence of UChars.
|
||||
* @return Length of sequence.
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_strlen(const UChar *s);
|
||||
U_CAPI int32_t U_EXPORT2 u_strlen(const UChar *s);
|
||||
#endif
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
|
@ -2766,7 +2767,6 @@ public:
|
|||
* @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE,
|
||||
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
|
||||
* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
|
||||
* @param options Options bit set, see ucasemap_open().
|
||||
* @return A reference to this.
|
||||
* @stable ICU 3.8
|
||||
*/
|
||||
|
|
@ -3614,7 +3614,7 @@ private:
|
|||
// turn a bogus string into an empty one
|
||||
void unBogus();
|
||||
|
||||
// implements assigment operator, copy constructor, and fastCopyFrom()
|
||||
// implements assignment operator, copy constructor, and fastCopyFrom()
|
||||
UnicodeString ©From(const UnicodeString &src, UBool fastCopy=false);
|
||||
|
||||
// Copies just the fields without memory management.
|
||||
|
|
|
|||
16
deps/icu-small/source/common/unicode/urename.h
vendored
16
deps/icu-small/source/common/unicode/urename.h
vendored
|
|
@ -482,6 +482,7 @@
|
|||
#define ubiditransform_open U_ICU_ENTRY_POINT_RENAME(ubiditransform_open)
|
||||
#define ubiditransform_transform U_ICU_ENTRY_POINT_RENAME(ubiditransform_transform)
|
||||
#define ublock_getCode U_ICU_ENTRY_POINT_RENAME(ublock_getCode)
|
||||
#define ubrk_clone U_ICU_ENTRY_POINT_RENAME(ubrk_clone)
|
||||
#define ubrk_close U_ICU_ENTRY_POINT_RENAME(ubrk_close)
|
||||
#define ubrk_countAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_countAvailable)
|
||||
#define ubrk_current U_ICU_ENTRY_POINT_RENAME(ubrk_current)
|
||||
|
|
@ -534,6 +535,7 @@
|
|||
#define ucal_getTimeZoneDisplayName U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneDisplayName)
|
||||
#define ucal_getTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneID)
|
||||
#define ucal_getTimeZoneIDForWindowsID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneIDForWindowsID)
|
||||
#define ucal_getTimeZoneOffsetFromLocal U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneOffsetFromLocal)
|
||||
#define ucal_getTimeZoneTransitionDate U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneTransitionDate)
|
||||
#define ucal_getType U_ICU_ENTRY_POINT_RENAME(ucal_getType)
|
||||
#define ucal_getWeekendTransition U_ICU_ENTRY_POINT_RENAME(ucal_getWeekendTransition)
|
||||
|
|
@ -962,6 +964,7 @@
|
|||
#define uhash_compareScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_compareScriptSet)
|
||||
#define uhash_compareUChars U_ICU_ENTRY_POINT_RENAME(uhash_compareUChars)
|
||||
#define uhash_compareUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareUnicodeString)
|
||||
#define uhash_containsKey U_ICU_ENTRY_POINT_RENAME(uhash_containsKey)
|
||||
#define uhash_count U_ICU_ENTRY_POINT_RENAME(uhash_count)
|
||||
#define uhash_deleteHashtable U_ICU_ENTRY_POINT_RENAME(uhash_deleteHashtable)
|
||||
#define uhash_deleteScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_deleteScriptSet)
|
||||
|
|
@ -970,6 +973,7 @@
|
|||
#define uhash_find U_ICU_ENTRY_POINT_RENAME(uhash_find)
|
||||
#define uhash_get U_ICU_ENTRY_POINT_RENAME(uhash_get)
|
||||
#define uhash_geti U_ICU_ENTRY_POINT_RENAME(uhash_geti)
|
||||
#define uhash_getiAndFound U_ICU_ENTRY_POINT_RENAME(uhash_getiAndFound)
|
||||
#define uhash_hashCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashCaselessUnicodeString)
|
||||
#define uhash_hashChars U_ICU_ENTRY_POINT_RENAME(uhash_hashChars)
|
||||
#define uhash_hashIChars U_ICU_ENTRY_POINT_RENAME(uhash_hashIChars)
|
||||
|
|
@ -977,12 +981,15 @@
|
|||
#define uhash_hashScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_hashScriptSet)
|
||||
#define uhash_hashUChars U_ICU_ENTRY_POINT_RENAME(uhash_hashUChars)
|
||||
#define uhash_hashUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashUnicodeString)
|
||||
#define uhash_icontainsKey U_ICU_ENTRY_POINT_RENAME(uhash_icontainsKey)
|
||||
#define uhash_iget U_ICU_ENTRY_POINT_RENAME(uhash_iget)
|
||||
#define uhash_igeti U_ICU_ENTRY_POINT_RENAME(uhash_igeti)
|
||||
#define uhash_igetiAndFound U_ICU_ENTRY_POINT_RENAME(uhash_igetiAndFound)
|
||||
#define uhash_init U_ICU_ENTRY_POINT_RENAME(uhash_init)
|
||||
#define uhash_initSize U_ICU_ENTRY_POINT_RENAME(uhash_initSize)
|
||||
#define uhash_iput U_ICU_ENTRY_POINT_RENAME(uhash_iput)
|
||||
#define uhash_iputi U_ICU_ENTRY_POINT_RENAME(uhash_iputi)
|
||||
#define uhash_iputiAllowZero U_ICU_ENTRY_POINT_RENAME(uhash_iputiAllowZero)
|
||||
#define uhash_iremove U_ICU_ENTRY_POINT_RENAME(uhash_iremove)
|
||||
#define uhash_iremovei U_ICU_ENTRY_POINT_RENAME(uhash_iremovei)
|
||||
#define uhash_nextElement U_ICU_ENTRY_POINT_RENAME(uhash_nextElement)
|
||||
|
|
@ -990,6 +997,7 @@
|
|||
#define uhash_openSize U_ICU_ENTRY_POINT_RENAME(uhash_openSize)
|
||||
#define uhash_put U_ICU_ENTRY_POINT_RENAME(uhash_put)
|
||||
#define uhash_puti U_ICU_ENTRY_POINT_RENAME(uhash_puti)
|
||||
#define uhash_putiAllowZero U_ICU_ENTRY_POINT_RENAME(uhash_putiAllowZero)
|
||||
#define uhash_remove U_ICU_ENTRY_POINT_RENAME(uhash_remove)
|
||||
#define uhash_removeAll U_ICU_ENTRY_POINT_RENAME(uhash_removeAll)
|
||||
#define uhash_removeElement U_ICU_ENTRY_POINT_RENAME(uhash_removeElement)
|
||||
|
|
@ -1150,6 +1158,8 @@
|
|||
#define ultag_isUnicodeLocaleKey U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleKey)
|
||||
#define ultag_isUnicodeLocaleType U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleType)
|
||||
#define ultag_isVariantSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isVariantSubtags)
|
||||
#define umeas_getPrefixBase U_ICU_ENTRY_POINT_RENAME(umeas_getPrefixBase)
|
||||
#define umeas_getPrefixPower U_ICU_ENTRY_POINT_RENAME(umeas_getPrefixPower)
|
||||
#define umsg_applyPattern U_ICU_ENTRY_POINT_RENAME(umsg_applyPattern)
|
||||
#define umsg_autoQuoteApostrophe U_ICU_ENTRY_POINT_RENAME(umsg_autoQuoteApostrophe)
|
||||
#define umsg_clone U_ICU_ENTRY_POINT_RENAME(umsg_clone)
|
||||
|
|
@ -1672,6 +1682,9 @@
|
|||
#define uset_compact U_ICU_ENTRY_POINT_RENAME(uset_compact)
|
||||
#define uset_complement U_ICU_ENTRY_POINT_RENAME(uset_complement)
|
||||
#define uset_complementAll U_ICU_ENTRY_POINT_RENAME(uset_complementAll)
|
||||
#define uset_complementAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_complementAllCodePoints)
|
||||
#define uset_complementRange U_ICU_ENTRY_POINT_RENAME(uset_complementRange)
|
||||
#define uset_complementString U_ICU_ENTRY_POINT_RENAME(uset_complementString)
|
||||
#define uset_contains U_ICU_ENTRY_POINT_RENAME(uset_contains)
|
||||
#define uset_containsAll U_ICU_ENTRY_POINT_RENAME(uset_containsAll)
|
||||
#define uset_containsAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_containsAllCodePoints)
|
||||
|
|
@ -1695,12 +1708,15 @@
|
|||
#define uset_openPatternOptions U_ICU_ENTRY_POINT_RENAME(uset_openPatternOptions)
|
||||
#define uset_remove U_ICU_ENTRY_POINT_RENAME(uset_remove)
|
||||
#define uset_removeAll U_ICU_ENTRY_POINT_RENAME(uset_removeAll)
|
||||
#define uset_removeAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_removeAllCodePoints)
|
||||
#define uset_removeAllStrings U_ICU_ENTRY_POINT_RENAME(uset_removeAllStrings)
|
||||
#define uset_removeRange U_ICU_ENTRY_POINT_RENAME(uset_removeRange)
|
||||
#define uset_removeString U_ICU_ENTRY_POINT_RENAME(uset_removeString)
|
||||
#define uset_resemblesPattern U_ICU_ENTRY_POINT_RENAME(uset_resemblesPattern)
|
||||
#define uset_retain U_ICU_ENTRY_POINT_RENAME(uset_retain)
|
||||
#define uset_retainAll U_ICU_ENTRY_POINT_RENAME(uset_retainAll)
|
||||
#define uset_retainAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_retainAllCodePoints)
|
||||
#define uset_retainString U_ICU_ENTRY_POINT_RENAME(uset_retainString)
|
||||
#define uset_serialize U_ICU_ENTRY_POINT_RENAME(uset_serialize)
|
||||
#define uset_serializedContains U_ICU_ENTRY_POINT_RENAME(uset_serializedContains)
|
||||
#define uset_set U_ICU_ENTRY_POINT_RENAME(uset_set)
|
||||
|
|
|
|||
95
deps/icu-small/source/common/unicode/uset.h
vendored
95
deps/icu-small/source/common/unicode/uset.h
vendored
|
|
@ -582,8 +582,8 @@ U_CAPI void U_EXPORT2
|
|||
uset_addString(USet* set, const UChar* str, int32_t strLen);
|
||||
|
||||
/**
|
||||
* Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
|
||||
* If this set already any particular character, it has no effect on that character.
|
||||
* Adds each of the characters in this string to the set. Note: "ch" => {"c", "h"}
|
||||
* If this set already contains any particular character, it has no effect on that character.
|
||||
* A frozen set will not be modified.
|
||||
* @param set the object to which to add the character
|
||||
* @param str the source string
|
||||
|
|
@ -628,6 +628,20 @@ uset_removeRange(USet* set, UChar32 start, UChar32 end);
|
|||
U_CAPI void U_EXPORT2
|
||||
uset_removeString(USet* set, const UChar* str, int32_t strLen);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Removes EACH of the characters in this string. Note: "ch" == {"c", "h"}
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param set the object to be modified
|
||||
* @param str the string
|
||||
* @param length the length of the string, or -1 if NUL-terminated
|
||||
* @draft ICU 69
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Removes from this set all of its elements that are contained in the
|
||||
* specified set. This operation effectively modifies this
|
||||
|
|
@ -650,15 +664,41 @@ uset_removeAll(USet* set, const USet* removeSet);
|
|||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param set the object for which to retain only the specified range
|
||||
* @param start first character, inclusive, of range to be retained
|
||||
* to this set.
|
||||
* @param end last character, inclusive, of range to be retained
|
||||
* to this set.
|
||||
* @param start first character, inclusive, of range
|
||||
* @param end last character, inclusive, of range
|
||||
* @stable ICU 3.2
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_retain(USet* set, UChar32 start, UChar32 end);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Retains only the specified string from this set if it is present.
|
||||
* Upon return this set will be empty if it did not contain s, or
|
||||
* will only contain s if it did contain s.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param set the object to be modified
|
||||
* @param str the string
|
||||
* @param length the length of the string, or -1 if NUL-terminated
|
||||
* @draft ICU 69
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_retainString(USet *set, const UChar *str, int32_t length);
|
||||
|
||||
/**
|
||||
* Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param set the object to be modified
|
||||
* @param str the string
|
||||
* @param length the length of the string, or -1 if NUL-terminated
|
||||
* @draft ICU 69
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Retains only the elements in this set that are contained in the
|
||||
* specified set. In other words, removes from this set all of
|
||||
|
|
@ -696,6 +736,49 @@ uset_compact(USet* set);
|
|||
U_CAPI void U_EXPORT2
|
||||
uset_complement(USet* set);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Complements the specified range in this set. Any character in
|
||||
* the range will be removed if it is in this set, or will be
|
||||
* added if it is not in this set. If <code>start > end</code>
|
||||
* then an empty range is complemented, leaving the set unchanged.
|
||||
* This is equivalent to a boolean logic XOR.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param set the object to be modified
|
||||
* @param start first character, inclusive, of range
|
||||
* @param end last character, inclusive, of range
|
||||
* @draft ICU 69
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_complementRange(USet *set, UChar32 start, UChar32 end);
|
||||
|
||||
/**
|
||||
* Complements the specified string in this set.
|
||||
* The string will be removed if it is in this set, or will be added if it is not in this set.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param set the object to be modified
|
||||
* @param str the string
|
||||
* @param length the length of the string, or -1 if NUL-terminated
|
||||
* @draft ICU 69
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_complementString(USet *set, const UChar *str, int32_t length);
|
||||
|
||||
/**
|
||||
* Complements EACH of the characters in this string. Note: "ch" == {"c", "h"}
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param set the object to be modified
|
||||
* @param str the string
|
||||
* @param length the length of the string, or -1 if NUL-terminated
|
||||
* @draft ICU 69
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Complements in this set all elements contained in the specified
|
||||
* set. Any character in the other set will be removed if it is
|
||||
|
|
|
|||
|
|
@ -323,7 +323,7 @@ u_shapeArabic(const UChar *source, int32_t sourceLength,
|
|||
#define U_SHAPE_PRESERVE_PRESENTATION 0x8000
|
||||
/** Presentation form option:
|
||||
* Replace Arabic Presentation Forms-A and Arabic Presentationo Forms-B with
|
||||
* their unshaped correspondants in range 0+06xx, before shaping.
|
||||
* their unshaped correspondents in range 0+06xx, before shaping.
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
#define U_SHAPE_PRESERVE_PRESENTATION_NOOP 0
|
||||
|
|
|
|||
17
deps/icu-small/source/common/unicode/utrace.h
vendored
17
deps/icu-small/source/common/unicode/utrace.h
vendored
|
|
@ -173,24 +173,23 @@ typedef enum UTraceFunctionNumber {
|
|||
UTRACE_RES_DATA_LIMIT,
|
||||
#endif // U_HIDE_INTERNAL_API
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* The lowest break iterator location.
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
UTRACE_UBRK_START=0x4000,
|
||||
|
||||
/**
|
||||
* Indicates that a character instance of break iterator was created.
|
||||
*
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
UTRACE_UBRK_CREATE_CHARACTER = UTRACE_UBRK_START,
|
||||
|
||||
/**
|
||||
* Indicates that a word instance of break iterator was created.
|
||||
*
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
UTRACE_UBRK_CREATE_WORD,
|
||||
|
||||
|
|
@ -200,21 +199,21 @@ typedef enum UTraceFunctionNumber {
|
|||
* Provides one C-style string to UTraceData: the lb value ("",
|
||||
* "loose", "strict", or "normal").
|
||||
*
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
UTRACE_UBRK_CREATE_LINE,
|
||||
|
||||
/**
|
||||
* Indicates that a sentence instance of break iterator was created.
|
||||
*
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
UTRACE_UBRK_CREATE_SENTENCE,
|
||||
|
||||
/**
|
||||
* Indicates that a title instance of break iterator was created.
|
||||
*
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
UTRACE_UBRK_CREATE_TITLE,
|
||||
|
||||
|
|
@ -224,12 +223,10 @@ typedef enum UTraceFunctionNumber {
|
|||
* Provides one C-style string to UTraceData: the script code of what
|
||||
* the break engine cover ("Hani", "Khmr", "Laoo", "Mymr", or "Thai").
|
||||
*
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
UTRACE_UBRK_CREATE_BREAK_ENGINE,
|
||||
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* One more than the highest normal break iterator trace location.
|
||||
|
|
|
|||
12
deps/icu-small/source/common/unicode/uvernum.h
vendored
12
deps/icu-small/source/common/unicode/uvernum.h
vendored
|
|
@ -60,13 +60,13 @@
|
|||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICU_VERSION_MAJOR_NUM 68
|
||||
#define U_ICU_VERSION_MAJOR_NUM 69
|
||||
|
||||
/** The current ICU minor version as an integer.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_MINOR_NUM 2
|
||||
#define U_ICU_VERSION_MINOR_NUM 1
|
||||
|
||||
/** The current ICU patchlevel version as an integer.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
|
|
@ -86,7 +86,7 @@
|
|||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_SUFFIX _68
|
||||
#define U_ICU_VERSION_SUFFIX _69
|
||||
|
||||
/**
|
||||
* \def U_DEF2_ICU_ENTRY_POINT_RENAME
|
||||
|
|
@ -139,7 +139,7 @@
|
|||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICU_VERSION "68.2"
|
||||
#define U_ICU_VERSION "69.1"
|
||||
|
||||
/**
|
||||
* The current ICU library major version number as a string, for library name suffixes.
|
||||
|
|
@ -152,13 +152,13 @@
|
|||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_SHORT "68"
|
||||
#define U_ICU_VERSION_SHORT "69"
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/** Data version in ICU4C.
|
||||
* @internal ICU 4.4 Internal Use Only
|
||||
**/
|
||||
#define U_ICU_DATA_VERSION "68.2"
|
||||
#define U_ICU_DATA_VERSION "69.1"
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/*===========================================================================
|
||||
|
|
|
|||
128
deps/icu-small/source/common/uniset.cpp
vendored
128
deps/icu-small/source/common/uniset.cpp
vendored
|
|
@ -30,24 +30,6 @@
|
|||
#include "bmpset.h"
|
||||
#include "unisetspan.h"
|
||||
|
||||
// Define UChar constants using hex for EBCDIC compatibility
|
||||
// Used #define to reduce private static exports and memory access time.
|
||||
#define SET_OPEN ((UChar)0x005B) /*[*/
|
||||
#define SET_CLOSE ((UChar)0x005D) /*]*/
|
||||
#define HYPHEN ((UChar)0x002D) /*-*/
|
||||
#define COMPLEMENT ((UChar)0x005E) /*^*/
|
||||
#define COLON ((UChar)0x003A) /*:*/
|
||||
#define BACKSLASH ((UChar)0x005C) /*\*/
|
||||
#define INTERSECTION ((UChar)0x0026) /*&*/
|
||||
#define UPPER_U ((UChar)0x0055) /*U*/
|
||||
#define LOWER_U ((UChar)0x0075) /*u*/
|
||||
#define OPEN_BRACE ((UChar)123) /*{*/
|
||||
#define CLOSE_BRACE ((UChar)125) /*}*/
|
||||
#define UPPER_P ((UChar)0x0050) /*P*/
|
||||
#define LOWER_P ((UChar)0x0070) /*p*/
|
||||
#define UPPER_N ((UChar)78) /*N*/
|
||||
#define EQUALS ((UChar)0x003D) /*=*/
|
||||
|
||||
// HIGH_VALUE > all valid values. 110000 for codepoints
|
||||
#define UNICODESET_HIGH 0x0110000
|
||||
|
||||
|
|
@ -444,7 +426,6 @@ UBool UnicodeSet::contains(UChar32 start, UChar32 end) const {
|
|||
* @return <tt>true</tt> if this set contains the specified string
|
||||
*/
|
||||
UBool UnicodeSet::contains(const UnicodeString& s) const {
|
||||
if (s.length() == 0) return FALSE;
|
||||
int32_t cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
return stringsContains(s);
|
||||
|
|
@ -559,11 +540,9 @@ UBool UnicodeSet::matchesIndexValue(uint8_t v) const {
|
|||
if (hasStrings()) {
|
||||
for (i=0; i<strings->size(); ++i) {
|
||||
const UnicodeString& s = *(const UnicodeString*)strings->elementAt(i);
|
||||
//if (s.length() == 0) {
|
||||
// // Empty strings match everything
|
||||
// return TRUE;
|
||||
//}
|
||||
// assert(s.length() != 0); // We enforce this elsewhere
|
||||
if (s.isEmpty()) {
|
||||
continue; // skip the empty string
|
||||
}
|
||||
UChar32 c = s.char32At(0);
|
||||
if ((c & 0xFF) == v) {
|
||||
return TRUE;
|
||||
|
|
@ -582,9 +561,6 @@ UMatchDegree UnicodeSet::matches(const Replaceable& text,
|
|||
int32_t limit,
|
||||
UBool incremental) {
|
||||
if (offset == limit) {
|
||||
// Strings, if any, have length != 0, so we don't worry
|
||||
// about them here. If we ever allow zero-length strings
|
||||
// we much check for them here.
|
||||
if (contains(U_ETHER)) {
|
||||
return incremental ? U_PARTIAL_MATCH : U_MATCH;
|
||||
} else {
|
||||
|
|
@ -614,11 +590,9 @@ UMatchDegree UnicodeSet::matches(const Replaceable& text,
|
|||
|
||||
for (i=0; i<strings->size(); ++i) {
|
||||
const UnicodeString& trial = *(const UnicodeString*)strings->elementAt(i);
|
||||
|
||||
//if (trial.length() == 0) {
|
||||
// return U_MATCH; // null-string always matches
|
||||
//}
|
||||
// assert(trial.length() != 0); // We ensure this elsewhere
|
||||
if (trial.isEmpty()) {
|
||||
continue; // skip the empty string
|
||||
}
|
||||
|
||||
UChar c = trial.charAt(forward ? 0 : trial.length() - 1);
|
||||
|
||||
|
|
@ -971,12 +945,12 @@ UnicodeSet& UnicodeSet::add(UChar32 c) {
|
|||
* present. If this set already contains the multicharacter,
|
||||
* the call leaves this set unchanged.
|
||||
* Thus "ch" => {"ch"}
|
||||
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
|
||||
*
|
||||
* @param s the source string
|
||||
* @return the modified set, for chaining
|
||||
*/
|
||||
UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
|
||||
if (s.length() == 0 || isFrozen() || isBogus()) return *this;
|
||||
if (isFrozen() || isBogus()) return *this;
|
||||
int32_t cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
if (!stringsContains(s)) {
|
||||
|
|
@ -991,8 +965,7 @@ UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
|
|||
|
||||
/**
|
||||
* Adds the given string, in order, to 'strings'. The given string
|
||||
* must have been checked by the caller to not be empty and to not
|
||||
* already be in 'strings'.
|
||||
* must have been checked by the caller to not already be in 'strings'.
|
||||
*/
|
||||
void UnicodeSet::_add(const UnicodeString& s) {
|
||||
if (isFrozen() || isBogus()) {
|
||||
|
|
@ -1021,16 +994,13 @@ void UnicodeSet::_add(const UnicodeString& s) {
|
|||
* @param string to test
|
||||
*/
|
||||
int32_t UnicodeSet::getSingleCP(const UnicodeString& s) {
|
||||
//if (s.length() < 1) {
|
||||
// throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet");
|
||||
//}
|
||||
if (s.length() > 2) return -1;
|
||||
if (s.length() == 1) return s.charAt(0);
|
||||
|
||||
// at this point, len = 2
|
||||
UChar32 cp = s.char32At(0);
|
||||
if (cp > 0xFFFF) { // is surrogate pair
|
||||
return cp;
|
||||
int32_t sLength = s.length();
|
||||
if (sLength == 1) return s.charAt(0);
|
||||
if (sLength == 2) {
|
||||
UChar32 cp = s.char32At(0);
|
||||
if (cp > 0xFFFF) { // is surrogate pair
|
||||
return cp;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
|
@ -1150,6 +1120,26 @@ UnicodeSet& UnicodeSet::retain(UChar32 c) {
|
|||
return retain(c, c);
|
||||
}
|
||||
|
||||
UnicodeSet& UnicodeSet::retain(const UnicodeString &s) {
|
||||
if (isFrozen() || isBogus()) { return *this; }
|
||||
UChar32 cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
bool isIn = stringsContains(s);
|
||||
// Check for getRangeCount() first to avoid somewhat-expensive size()
|
||||
// when there are single code points.
|
||||
if (isIn && getRangeCount() == 0 && size() == 1) {
|
||||
return *this;
|
||||
}
|
||||
clear();
|
||||
if (isIn) {
|
||||
_add(s);
|
||||
}
|
||||
} else {
|
||||
retain(cp, cp);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the specified range from this set if it is present.
|
||||
* The set will not contain the specified range once the call
|
||||
|
|
@ -1186,7 +1176,7 @@ UnicodeSet& UnicodeSet::remove(UChar32 c) {
|
|||
* @return the modified set, for chaining
|
||||
*/
|
||||
UnicodeSet& UnicodeSet::remove(const UnicodeString& s) {
|
||||
if (s.length() == 0 || isFrozen() || isBogus()) return *this;
|
||||
if (isFrozen() || isBogus()) return *this;
|
||||
int32_t cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
if (strings != nullptr && strings->removeElement((void*) &s)) {
|
||||
|
|
@ -1252,12 +1242,12 @@ UnicodeSet& UnicodeSet::complement(void) {
|
|||
* Complement the specified string in this set.
|
||||
* The set will not contain the specified string once the call
|
||||
* returns.
|
||||
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
|
||||
*
|
||||
* @param s the string to complement
|
||||
* @return this object, for chaining
|
||||
*/
|
||||
UnicodeSet& UnicodeSet::complement(const UnicodeString& s) {
|
||||
if (s.length() == 0 || isFrozen() || isBogus()) return *this;
|
||||
if (isFrozen() || isBogus()) return *this;
|
||||
int32_t cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
if (stringsContains(s)) {
|
||||
|
|
@ -2001,22 +1991,22 @@ escapeUnprintable) {
|
|||
}
|
||||
// Okay to let ':' pass through
|
||||
switch (c) {
|
||||
case SET_OPEN:
|
||||
case SET_CLOSE:
|
||||
case HYPHEN:
|
||||
case COMPLEMENT:
|
||||
case INTERSECTION:
|
||||
case BACKSLASH:
|
||||
case OPEN_BRACE:
|
||||
case CLOSE_BRACE:
|
||||
case COLON:
|
||||
case u'[':
|
||||
case u']':
|
||||
case u'-':
|
||||
case u'^':
|
||||
case u'&':
|
||||
case u'\\':
|
||||
case u'{':
|
||||
case u'}':
|
||||
case u':':
|
||||
case SymbolTable::SYMBOL_REF:
|
||||
buf.append(BACKSLASH);
|
||||
buf.append(u'\\');
|
||||
break;
|
||||
default:
|
||||
// Escape whitespace
|
||||
if (PatternProps::isWhiteSpace(c)) {
|
||||
buf.append(BACKSLASH);
|
||||
buf.append(u'\\');
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
@ -2049,7 +2039,7 @@ UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
|
|||
backslashCount = 0;
|
||||
} else {
|
||||
result.append(c);
|
||||
if (c == BACKSLASH) {
|
||||
if (c == u'\\') {
|
||||
++backslashCount;
|
||||
} else {
|
||||
backslashCount = 0;
|
||||
|
|
@ -2082,13 +2072,13 @@ UnicodeString& UnicodeSet::toPattern(UnicodeString& result,
|
|||
UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
|
||||
UBool escapeUnprintable) const
|
||||
{
|
||||
result.append(SET_OPEN);
|
||||
result.append(u'[');
|
||||
|
||||
// // Check against the predefined categories. We implicitly build
|
||||
// // up ALL category sets the first time toPattern() is called.
|
||||
// for (int8_t cat=0; cat<Unicode::GENERAL_TYPES_COUNT; ++cat) {
|
||||
// if (*this == getCategorySet(cat)) {
|
||||
// result.append(COLON);
|
||||
// result.append(u':');
|
||||
// result.append(CATEGORY_NAMES, cat*2, 2);
|
||||
// return result.append(CATEGORY_CLOSE);
|
||||
// }
|
||||
|
|
@ -2104,7 +2094,7 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
|
|||
getRangeEnd(count-1) == MAX_VALUE) {
|
||||
|
||||
// Emit the inverse
|
||||
result.append(COMPLEMENT);
|
||||
result.append(u'^');
|
||||
|
||||
for (int32_t i = 1; i < count; ++i) {
|
||||
UChar32 start = getRangeEnd(i-1)+1;
|
||||
|
|
@ -2112,7 +2102,7 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
|
|||
_appendToPat(result, start, escapeUnprintable);
|
||||
if (start != end) {
|
||||
if ((start+1) != end) {
|
||||
result.append(HYPHEN);
|
||||
result.append(u'-');
|
||||
}
|
||||
_appendToPat(result, end, escapeUnprintable);
|
||||
}
|
||||
|
|
@ -2127,7 +2117,7 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
|
|||
_appendToPat(result, start, escapeUnprintable);
|
||||
if (start != end) {
|
||||
if ((start+1) != end) {
|
||||
result.append(HYPHEN);
|
||||
result.append(u'-');
|
||||
}
|
||||
_appendToPat(result, end, escapeUnprintable);
|
||||
}
|
||||
|
|
@ -2136,14 +2126,14 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
|
|||
|
||||
if (strings != nullptr) {
|
||||
for (int32_t i = 0; i<strings->size(); ++i) {
|
||||
result.append(OPEN_BRACE);
|
||||
result.append(u'{');
|
||||
_appendToPat(result,
|
||||
*(const UnicodeString*) strings->elementAt(i),
|
||||
escapeUnprintable);
|
||||
result.append(CLOSE_BRACE);
|
||||
result.append(u'}');
|
||||
}
|
||||
}
|
||||
return result.append(SET_CLOSE);
|
||||
return result.append(u']');
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
116
deps/icu-small/source/common/uniset_props.cpp
vendored
116
deps/icu-small/source/common/uniset_props.cpp
vendored
|
|
@ -47,31 +47,6 @@
|
|||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
// Define UChar constants using hex for EBCDIC compatibility
|
||||
// Used #define to reduce private static exports and memory access time.
|
||||
#define SET_OPEN ((UChar)0x005B) /*[*/
|
||||
#define SET_CLOSE ((UChar)0x005D) /*]*/
|
||||
#define HYPHEN ((UChar)0x002D) /*-*/
|
||||
#define COMPLEMENT ((UChar)0x005E) /*^*/
|
||||
#define COLON ((UChar)0x003A) /*:*/
|
||||
#define BACKSLASH ((UChar)0x005C) /*\*/
|
||||
#define INTERSECTION ((UChar)0x0026) /*&*/
|
||||
#define UPPER_U ((UChar)0x0055) /*U*/
|
||||
#define LOWER_U ((UChar)0x0075) /*u*/
|
||||
#define OPEN_BRACE ((UChar)123) /*{*/
|
||||
#define CLOSE_BRACE ((UChar)125) /*}*/
|
||||
#define UPPER_P ((UChar)0x0050) /*P*/
|
||||
#define LOWER_P ((UChar)0x0070) /*p*/
|
||||
#define UPPER_N ((UChar)78) /*N*/
|
||||
#define EQUALS ((UChar)0x003D) /*=*/
|
||||
|
||||
//static const UChar POSIX_OPEN[] = { SET_OPEN,COLON,0 }; // "[:"
|
||||
static const UChar POSIX_CLOSE[] = { COLON,SET_CLOSE,0 }; // ":]"
|
||||
//static const UChar PERL_OPEN[] = { BACKSLASH,LOWER_P,0 }; // "\\p"
|
||||
//static const UChar PERL_CLOSE[] = { CLOSE_BRACE,0 }; // "}"
|
||||
//static const UChar NAME_OPEN[] = { BACKSLASH,UPPER_N,0 }; // "\\N"
|
||||
static const UChar HYPHEN_RIGHT_BRACE[] = {HYPHEN,SET_CLOSE,0}; /*-]*/
|
||||
|
||||
// Special property set IDs
|
||||
static const char ANY[] = "ANY"; // [\u0000-\U0010FFFF]
|
||||
static const char ASCII[] = "ASCII"; // [\u0000-\u007F]
|
||||
|
|
@ -81,12 +56,6 @@ static const char ASSIGNED[] = "Assigned"; // [:^Cn:]
|
|||
#define NAME_PROP "na"
|
||||
#define NAME_PROP_LENGTH 2
|
||||
|
||||
/**
|
||||
* Delimiter string used in patterns to close a category reference:
|
||||
* ":]". Example: "[:Lu:]".
|
||||
*/
|
||||
//static const UChar CATEGORY_CLOSE[] = {COLON, SET_CLOSE, 0x0000}; /* ":]" */
|
||||
|
||||
// Cached sets ------------------------------------------------------------- ***
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
|
@ -140,27 +109,27 @@ uniset_getUnicode32Instance(UErrorCode &errorCode) {
|
|||
static inline UBool
|
||||
isPerlOpen(const UnicodeString &pattern, int32_t pos) {
|
||||
UChar c;
|
||||
return pattern.charAt(pos)==BACKSLASH && ((c=pattern.charAt(pos+1))==LOWER_P || c==UPPER_P);
|
||||
return pattern.charAt(pos)==u'\\' && ((c=pattern.charAt(pos+1))==u'p' || c==u'P');
|
||||
}
|
||||
|
||||
/*static inline UBool
|
||||
isPerlClose(const UnicodeString &pattern, int32_t pos) {
|
||||
return pattern.charAt(pos)==CLOSE_BRACE;
|
||||
return pattern.charAt(pos)==u'}';
|
||||
}*/
|
||||
|
||||
static inline UBool
|
||||
isNameOpen(const UnicodeString &pattern, int32_t pos) {
|
||||
return pattern.charAt(pos)==BACKSLASH && pattern.charAt(pos+1)==UPPER_N;
|
||||
return pattern.charAt(pos)==u'\\' && pattern.charAt(pos+1)==u'N';
|
||||
}
|
||||
|
||||
static inline UBool
|
||||
isPOSIXOpen(const UnicodeString &pattern, int32_t pos) {
|
||||
return pattern.charAt(pos)==SET_OPEN && pattern.charAt(pos+1)==COLON;
|
||||
return pattern.charAt(pos)==u'[' && pattern.charAt(pos+1)==u':';
|
||||
}
|
||||
|
||||
/*static inline UBool
|
||||
isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
|
||||
return pattern.charAt(pos)==COLON && pattern.charAt(pos+1)==SET_CLOSE;
|
||||
return pattern.charAt(pos)==u':' && pattern.charAt(pos+1)==u']';
|
||||
}*/
|
||||
|
||||
// TODO memory debugging provided inside uniset.cpp
|
||||
|
|
@ -326,9 +295,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
|
||||
while (mode != 2 && !chars.atEnd()) {
|
||||
U_ASSERT((lastItem == 0 && op == 0) ||
|
||||
(lastItem == 1 && (op == 0 || op == HYPHEN /*'-'*/)) ||
|
||||
(lastItem == 2 && (op == 0 || op == HYPHEN /*'-'*/ ||
|
||||
op == INTERSECTION /*'&'*/)));
|
||||
(lastItem == 1 && (op == 0 || op == u'-')) ||
|
||||
(lastItem == 2 && (op == 0 || op == u'-' || op == u'&')));
|
||||
|
||||
UChar32 c = 0;
|
||||
UBool literal = FALSE;
|
||||
|
|
@ -356,27 +324,27 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
c = chars.next(opts, literal, ec);
|
||||
if (U_FAILURE(ec)) return;
|
||||
|
||||
if (c == 0x5B /*'['*/ && !literal) {
|
||||
if (c == u'[' && !literal) {
|
||||
if (mode == 1) {
|
||||
chars.setPos(backup); // backup
|
||||
setMode = 1;
|
||||
} else {
|
||||
// Handle opening '[' delimiter
|
||||
mode = 1;
|
||||
patLocal.append((UChar) 0x5B /*'['*/);
|
||||
patLocal.append(u'[');
|
||||
chars.getPos(backup); // prepare to backup
|
||||
c = chars.next(opts, literal, ec);
|
||||
if (U_FAILURE(ec)) return;
|
||||
if (c == 0x5E /*'^'*/ && !literal) {
|
||||
if (c == u'^' && !literal) {
|
||||
invert = TRUE;
|
||||
patLocal.append((UChar) 0x5E /*'^'*/);
|
||||
patLocal.append(u'^');
|
||||
chars.getPos(backup); // prepare to backup
|
||||
c = chars.next(opts, literal, ec);
|
||||
if (U_FAILURE(ec)) return;
|
||||
}
|
||||
// Fall through to handle special leading '-';
|
||||
// otherwise restart loop for nested [], \p{}, etc.
|
||||
if (c == HYPHEN /*'-'*/) {
|
||||
if (c == u'-') {
|
||||
literal = TRUE;
|
||||
// Fall through to handle literal '-' below
|
||||
} else {
|
||||
|
|
@ -418,7 +386,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
op = 0;
|
||||
}
|
||||
|
||||
if (op == HYPHEN /*'-'*/ || op == INTERSECTION /*'&'*/) {
|
||||
if (op == u'-' || op == u'&') {
|
||||
patLocal.append(op);
|
||||
}
|
||||
|
||||
|
|
@ -454,10 +422,10 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
}
|
||||
|
||||
switch (op) {
|
||||
case HYPHEN: /*'-'*/
|
||||
case u'-':
|
||||
removeAll(*nested);
|
||||
break;
|
||||
case INTERSECTION: /*'&'*/
|
||||
case u'&':
|
||||
retainAll(*nested);
|
||||
break;
|
||||
case 0:
|
||||
|
|
@ -483,24 +451,24 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
|
||||
if (!literal) {
|
||||
switch (c) {
|
||||
case 0x5D /*']'*/:
|
||||
case u']':
|
||||
if (lastItem == 1) {
|
||||
add(lastChar, lastChar);
|
||||
_appendToPat(patLocal, lastChar, FALSE);
|
||||
}
|
||||
// Treat final trailing '-' as a literal
|
||||
if (op == HYPHEN /*'-'*/) {
|
||||
if (op == u'-') {
|
||||
add(op, op);
|
||||
patLocal.append(op);
|
||||
} else if (op == INTERSECTION /*'&'*/) {
|
||||
} else if (op == u'&') {
|
||||
// syntaxError(chars, "Trailing '&'");
|
||||
ec = U_MALFORMED_SET;
|
||||
return;
|
||||
}
|
||||
patLocal.append((UChar) 0x5D /*']'*/);
|
||||
patLocal.append(u']');
|
||||
mode = 2;
|
||||
continue;
|
||||
case HYPHEN /*'-'*/:
|
||||
case u'-':
|
||||
if (op == 0) {
|
||||
if (lastItem != 0) {
|
||||
op = (UChar) c;
|
||||
|
|
@ -510,8 +478,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
add(c, c);
|
||||
c = chars.next(opts, literal, ec);
|
||||
if (U_FAILURE(ec)) return;
|
||||
if (c == 0x5D /*']'*/ && !literal) {
|
||||
patLocal.append(HYPHEN_RIGHT_BRACE, 2);
|
||||
if (c == u']' && !literal) {
|
||||
patLocal.append(u"-]", 2);
|
||||
mode = 2;
|
||||
continue;
|
||||
}
|
||||
|
|
@ -520,7 +488,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
// syntaxError(chars, "'-' not after char or set");
|
||||
ec = U_MALFORMED_SET;
|
||||
return;
|
||||
case INTERSECTION /*'&'*/:
|
||||
case u'&':
|
||||
if (lastItem == 2 && op == 0) {
|
||||
op = (UChar) c;
|
||||
continue;
|
||||
|
|
@ -528,11 +496,11 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
// syntaxError(chars, "'&' not after set");
|
||||
ec = U_MALFORMED_SET;
|
||||
return;
|
||||
case 0x5E /*'^'*/:
|
||||
case u'^':
|
||||
// syntaxError(chars, "'^' not after '['");
|
||||
ec = U_MALFORMED_SET;
|
||||
return;
|
||||
case 0x7B /*'{'*/:
|
||||
case u'{':
|
||||
if (op != 0) {
|
||||
// syntaxError(chars, "Missing operand after operator");
|
||||
ec = U_MALFORMED_SET;
|
||||
|
|
@ -549,13 +517,13 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
while (!chars.atEnd()) {
|
||||
c = chars.next(opts, literal, ec);
|
||||
if (U_FAILURE(ec)) return;
|
||||
if (c == 0x7D /*'}'*/ && !literal) {
|
||||
if (c == u'}' && !literal) {
|
||||
ok = TRUE;
|
||||
break;
|
||||
}
|
||||
buf.append(c);
|
||||
}
|
||||
if (buf.length() < 1 || !ok) {
|
||||
if (!ok) {
|
||||
// syntaxError(chars, "Invalid multicharacter string");
|
||||
ec = U_MALFORMED_SET;
|
||||
return;
|
||||
|
|
@ -565,9 +533,9 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
// we don't need to drop through to the further
|
||||
// processing
|
||||
add(buf);
|
||||
patLocal.append((UChar) 0x7B /*'{'*/);
|
||||
patLocal.append(u'{');
|
||||
_appendToPat(patLocal, buf, FALSE);
|
||||
patLocal.append((UChar) 0x7D /*'}'*/);
|
||||
patLocal.append(u'}');
|
||||
continue;
|
||||
case SymbolTable::SYMBOL_REF:
|
||||
// symbols nosymbols
|
||||
|
|
@ -580,7 +548,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
chars.getPos(backup);
|
||||
c = chars.next(opts, literal, ec);
|
||||
if (U_FAILURE(ec)) return;
|
||||
UBool anchor = (c == 0x5D /*']'*/ && !literal);
|
||||
UBool anchor = (c == u']' && !literal);
|
||||
if (symbols == 0 && !anchor) {
|
||||
c = SymbolTable::SYMBOL_REF;
|
||||
chars.setPos(backup);
|
||||
|
|
@ -594,7 +562,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
add(U_ETHER);
|
||||
usePat = TRUE;
|
||||
patLocal.append((UChar) SymbolTable::SYMBOL_REF);
|
||||
patLocal.append((UChar) 0x5D /*']'*/);
|
||||
patLocal.append(u']');
|
||||
mode = 2;
|
||||
continue;
|
||||
}
|
||||
|
|
@ -617,7 +585,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
lastChar = c;
|
||||
break;
|
||||
case 1:
|
||||
if (op == HYPHEN /*'-'*/) {
|
||||
if (op == u'-') {
|
||||
if (lastChar >= c) {
|
||||
// Don't allow redundant (a-a) or empty (b-a) ranges;
|
||||
// these are most likely typos.
|
||||
|
|
@ -1036,11 +1004,11 @@ UBool UnicodeSet::resemblesPropertyPattern(RuleCharacterIterator& chars,
|
|||
RuleCharacterIterator::Pos pos;
|
||||
chars.getPos(pos);
|
||||
UChar32 c = chars.next(iterOpts, literal, ec);
|
||||
if (c == 0x5B /*'['*/ || c == 0x5C /*'\\'*/) {
|
||||
if (c == u'[' || c == u'\\') {
|
||||
UChar32 d = chars.next(iterOpts & ~RuleCharacterIterator::SKIP_WHITESPACE,
|
||||
literal, ec);
|
||||
result = (c == 0x5B /*'['*/) ? (d == 0x3A /*':'*/) :
|
||||
(d == 0x4E /*'N'*/ || d == 0x70 /*'p'*/ || d == 0x50 /*'P'*/);
|
||||
result = (c == u'[') ? (d == u':') :
|
||||
(d == u'N' || d == u'p' || d == u'P');
|
||||
}
|
||||
chars.setPos(pos);
|
||||
return result && U_SUCCESS(ec);
|
||||
|
|
@ -1071,17 +1039,17 @@ UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern,
|
|||
posix = TRUE;
|
||||
pos += 2;
|
||||
pos = ICU_Utility::skipWhitespace(pattern, pos);
|
||||
if (pos < pattern.length() && pattern.charAt(pos) == COMPLEMENT) {
|
||||
if (pos < pattern.length() && pattern.charAt(pos) == u'^') {
|
||||
++pos;
|
||||
invert = TRUE;
|
||||
}
|
||||
} else if (isPerlOpen(pattern, pos) || isNameOpen(pattern, pos)) {
|
||||
UChar c = pattern.charAt(pos+1);
|
||||
invert = (c == UPPER_P);
|
||||
isName = (c == UPPER_N);
|
||||
invert = (c == u'P');
|
||||
isName = (c == u'N');
|
||||
pos += 2;
|
||||
pos = ICU_Utility::skipWhitespace(pattern, pos);
|
||||
if (pos == pattern.length() || pattern.charAt(pos++) != OPEN_BRACE) {
|
||||
if (pos == pattern.length() || pattern.charAt(pos++) != u'{') {
|
||||
// Syntax error; "\p" or "\P" not followed by "{"
|
||||
FAIL(ec);
|
||||
}
|
||||
|
|
@ -1093,9 +1061,9 @@ UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern,
|
|||
// Look for the matching close delimiter, either :] or }
|
||||
int32_t close;
|
||||
if (posix) {
|
||||
close = pattern.indexOf(POSIX_CLOSE, 2, pos);
|
||||
close = pattern.indexOf(u":]", 2, pos);
|
||||
} else {
|
||||
close = pattern.indexOf(CLOSE_BRACE, pos);
|
||||
close = pattern.indexOf(u'}', pos);
|
||||
}
|
||||
if (close < 0) {
|
||||
// Syntax error; close delimiter missing
|
||||
|
|
@ -1105,7 +1073,7 @@ UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern,
|
|||
// Look for an '=' sign. If this is present, we will parse a
|
||||
// medium \p{gc=Cf} or long \p{GeneralCategory=Format}
|
||||
// pattern.
|
||||
int32_t equals = pattern.indexOf(EQUALS, pos);
|
||||
int32_t equals = pattern.indexOf(u'=', pos);
|
||||
UnicodeString propName, valueName;
|
||||
if (equals >= 0 && equals < close && !isName) {
|
||||
// Equals seen; parse medium/long pattern
|
||||
|
|
|
|||
25
deps/icu-small/source/common/unisetspan.cpp
vendored
25
deps/icu-small/source/common/unisetspan.cpp
vendored
|
|
@ -231,6 +231,9 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set,
|
|||
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
|
||||
const UChar *s16=string.getBuffer();
|
||||
int32_t length16=string.length();
|
||||
if (length16==0) {
|
||||
continue; // skip the empty string
|
||||
}
|
||||
UBool thisRelevant;
|
||||
spanLength=spanSet.span(s16, length16, USET_SPAN_CONTAINED);
|
||||
if(spanLength<length16) { // Relevant string.
|
||||
|
|
@ -312,7 +315,7 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set,
|
|||
const UChar *s16=string.getBuffer();
|
||||
int32_t length16=string.length();
|
||||
spanLength=spanSet.span(s16, length16, USET_SPAN_CONTAINED);
|
||||
if(spanLength<length16) { // Relevant string.
|
||||
if(spanLength<length16 && length16>0) { // Relevant string.
|
||||
if(which&UTF16) {
|
||||
if(which&CONTAINED) {
|
||||
if(which&FWD) {
|
||||
|
|
@ -362,7 +365,7 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set,
|
|||
addToSpanNotSet(c);
|
||||
}
|
||||
}
|
||||
} else { // Irrelevant string.
|
||||
} else { // Irrelevant string. (Also the empty string.)
|
||||
if(which&UTF8) {
|
||||
if(which&CONTAINED) { // Only necessary for LONGEST_MATCH.
|
||||
uint8_t *s8=utf8+utf8Count;
|
||||
|
|
@ -653,11 +656,12 @@ int32_t UnicodeSetStringSpan::span(const UChar *s, int32_t length, USetSpanCondi
|
|||
for(i=0; i<stringsLength; ++i) {
|
||||
int32_t overlap=spanLengths[i];
|
||||
if(overlap==ALL_CP_CONTAINED) {
|
||||
continue; // Irrelevant string.
|
||||
continue; // Irrelevant string. (Also the empty string.)
|
||||
}
|
||||
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
|
||||
const UChar *s16=string.getBuffer();
|
||||
int32_t length16=string.length();
|
||||
U_ASSERT(length>0);
|
||||
|
||||
// Try to match this string at pos-overlap..pos.
|
||||
if(overlap>=LONG_SPAN) {
|
||||
|
|
@ -697,6 +701,9 @@ int32_t UnicodeSetStringSpan::span(const UChar *s, int32_t length, USetSpanCondi
|
|||
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
|
||||
const UChar *s16=string.getBuffer();
|
||||
int32_t length16=string.length();
|
||||
if (length16==0) {
|
||||
continue; // skip the empty string
|
||||
}
|
||||
|
||||
// Try to match this string at pos-overlap..pos.
|
||||
if(overlap>=LONG_SPAN) {
|
||||
|
|
@ -817,11 +824,12 @@ int32_t UnicodeSetStringSpan::spanBack(const UChar *s, int32_t length, USetSpanC
|
|||
for(i=0; i<stringsLength; ++i) {
|
||||
int32_t overlap=spanBackLengths[i];
|
||||
if(overlap==ALL_CP_CONTAINED) {
|
||||
continue; // Irrelevant string.
|
||||
continue; // Irrelevant string. (Also the empty string.)
|
||||
}
|
||||
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
|
||||
const UChar *s16=string.getBuffer();
|
||||
int32_t length16=string.length();
|
||||
U_ASSERT(length>0);
|
||||
|
||||
// Try to match this string at pos-(length16-overlap)..pos-length16.
|
||||
if(overlap>=LONG_SPAN) {
|
||||
|
|
@ -863,6 +871,9 @@ int32_t UnicodeSetStringSpan::spanBack(const UChar *s, int32_t length, USetSpanC
|
|||
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
|
||||
const UChar *s16=string.getBuffer();
|
||||
int32_t length16=string.length();
|
||||
if (length16==0) {
|
||||
continue; // skip the empty string
|
||||
}
|
||||
|
||||
// Try to match this string at pos-(length16-overlap)..pos-length16.
|
||||
if(overlap>=LONG_SPAN) {
|
||||
|
|
@ -1358,11 +1369,12 @@ int32_t UnicodeSetStringSpan::spanNot(const UChar *s, int32_t length) const {
|
|||
// Try to match the strings at pos.
|
||||
for(i=0; i<stringsLength; ++i) {
|
||||
if(spanLengths[i]==ALL_CP_CONTAINED) {
|
||||
continue; // Irrelevant string.
|
||||
continue; // Irrelevant string. (Also the empty string.)
|
||||
}
|
||||
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
|
||||
const UChar *s16=string.getBuffer();
|
||||
int32_t length16=string.length();
|
||||
U_ASSERT(length>0);
|
||||
if(length16<=rest && matches16CPB(s, pos, length, s16, length16)) {
|
||||
return pos; // There is a set element at pos.
|
||||
}
|
||||
|
|
@ -1401,11 +1413,12 @@ int32_t UnicodeSetStringSpan::spanNotBack(const UChar *s, int32_t length) const
|
|||
// it is easier and we only need to know whether the string is irrelevant
|
||||
// which is the same in either array.
|
||||
if(spanLengths[i]==ALL_CP_CONTAINED) {
|
||||
continue; // Irrelevant string.
|
||||
continue; // Irrelevant string. (Also the empty string.)
|
||||
}
|
||||
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
|
||||
const UChar *s16=string.getBuffer();
|
||||
int32_t length16=string.length();
|
||||
U_ASSERT(length>0);
|
||||
if(length16<=pos && matches16CPB(s, pos-length16, length, s16, length16)) {
|
||||
return pos; // There is a set element at pos.
|
||||
}
|
||||
|
|
|
|||
52
deps/icu-small/source/common/uprops.h
vendored
52
deps/icu-small/source/common/uprops.h
vendored
|
|
@ -310,55 +310,12 @@ u_isgraphPOSIX(UChar32 c);
|
|||
U_CFUNC UBool
|
||||
u_isprintPOSIX(UChar32 c);
|
||||
|
||||
/** Turn a bit index into a bit flag. @internal */
|
||||
#define FLAG(n) ((uint32_t)1<<(n))
|
||||
|
||||
/** Flags for general categories in the order of UCharCategory. @internal */
|
||||
#define _Cn FLAG(U_GENERAL_OTHER_TYPES)
|
||||
#define _Lu FLAG(U_UPPERCASE_LETTER)
|
||||
#define _Ll FLAG(U_LOWERCASE_LETTER)
|
||||
#define _Lt FLAG(U_TITLECASE_LETTER)
|
||||
#define _Lm FLAG(U_MODIFIER_LETTER)
|
||||
/* #define _Lo FLAG(U_OTHER_LETTER) -- conflicts with MS Visual Studio 9.0 xiosbase */
|
||||
#define _Mn FLAG(U_NON_SPACING_MARK)
|
||||
#define _Me FLAG(U_ENCLOSING_MARK)
|
||||
#define _Mc FLAG(U_COMBINING_SPACING_MARK)
|
||||
#define _Nd FLAG(U_DECIMAL_DIGIT_NUMBER)
|
||||
#define _Nl FLAG(U_LETTER_NUMBER)
|
||||
#define _No FLAG(U_OTHER_NUMBER)
|
||||
#define _Zs FLAG(U_SPACE_SEPARATOR)
|
||||
#define _Zl FLAG(U_LINE_SEPARATOR)
|
||||
#define _Zp FLAG(U_PARAGRAPH_SEPARATOR)
|
||||
#define _Cc FLAG(U_CONTROL_CHAR)
|
||||
#define _Cf FLAG(U_FORMAT_CHAR)
|
||||
#define _Co FLAG(U_PRIVATE_USE_CHAR)
|
||||
#define _Cs FLAG(U_SURROGATE)
|
||||
#define _Pd FLAG(U_DASH_PUNCTUATION)
|
||||
#define _Ps FLAG(U_START_PUNCTUATION)
|
||||
/* #define _Pe FLAG(U_END_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 xlocnum */
|
||||
/* #define _Pc FLAG(U_CONNECTOR_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 streambuf */
|
||||
#define _Po FLAG(U_OTHER_PUNCTUATION)
|
||||
#define _Sm FLAG(U_MATH_SYMBOL)
|
||||
#define _Sc FLAG(U_CURRENCY_SYMBOL)
|
||||
#define _Sk FLAG(U_MODIFIER_SYMBOL)
|
||||
#define _So FLAG(U_OTHER_SYMBOL)
|
||||
#define _Pi FLAG(U_INITIAL_PUNCTUATION)
|
||||
/* #define _Pf FLAG(U_FINAL_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 streambuf */
|
||||
|
||||
/** Some code points. @internal */
|
||||
enum {
|
||||
TAB =0x0009,
|
||||
LF =0x000a,
|
||||
FF =0x000c,
|
||||
CR =0x000d,
|
||||
U_A =0x0041,
|
||||
U_F =0x0046,
|
||||
U_Z =0x005a,
|
||||
U_a =0x0061,
|
||||
U_f =0x0066,
|
||||
U_z =0x007a,
|
||||
DEL =0x007f,
|
||||
NL =0x0085,
|
||||
NBSP =0x00a0,
|
||||
CGJ =0x034f,
|
||||
FIGURESP=0x2007,
|
||||
|
|
@ -367,15 +324,6 @@ enum {
|
|||
ZWJ =0x200d,
|
||||
RLM =0x200f,
|
||||
NNBSP =0x202f,
|
||||
WJ =0x2060,
|
||||
INHSWAP =0x206a,
|
||||
NOMDIG =0x206f,
|
||||
U_FW_A =0xff21,
|
||||
U_FW_F =0xff26,
|
||||
U_FW_Z =0xff3a,
|
||||
U_FW_a =0xff41,
|
||||
U_FW_f =0xff46,
|
||||
U_FW_z =0xff5a,
|
||||
ZWNBSP =0xfeff
|
||||
};
|
||||
|
||||
|
|
|
|||
23
deps/icu-small/source/common/uresbund.cpp
vendored
23
deps/icu-small/source/common/uresbund.cpp
vendored
|
|
@ -91,6 +91,15 @@ static UBool chopLocale(char *name) {
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Called to check whether a name without '_' needs to be checked for a parent.
|
||||
* Some code had assumed that locale IDs with '_' could not have a non-root parent.
|
||||
* We may want a better way of doing this.
|
||||
*/
|
||||
static UBool mayHaveParent(char *name) {
|
||||
return (name[0] != 0 && uprv_strstr("nb nn",name) != nullptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal function
|
||||
*/
|
||||
|
|
@ -529,8 +538,8 @@ loadParentsExceptRoot(UResourceDataEntry *&t1,
|
|||
char name[], int32_t nameCapacity,
|
||||
UBool usingUSRData, char usrDataPath[], UErrorCode *status) {
|
||||
if (U_FAILURE(*status)) { return FALSE; }
|
||||
UBool hasChopped = TRUE;
|
||||
while (hasChopped && t1->fParent == NULL && !t1->fData.noFallback &&
|
||||
UBool checkParent = TRUE;
|
||||
while (checkParent && t1->fParent == NULL && !t1->fData.noFallback &&
|
||||
res_getResource(&t1->fData,"%%ParentIsRoot") == RES_BOGUS) {
|
||||
Resource parentRes = res_getResource(&t1->fData, "%%Parent");
|
||||
if (parentRes != RES_BOGUS) { // An explicit parent was found.
|
||||
|
|
@ -573,7 +582,7 @@ loadParentsExceptRoot(UResourceDataEntry *&t1,
|
|||
}
|
||||
}
|
||||
t1 = t2;
|
||||
hasChopped = chopLocale(name);
|
||||
checkParent = chopLocale(name) || mayHaveParent(name);
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
|
@ -692,7 +701,7 @@ static UResourceDataEntry *entryOpen(const char* path, const char* localeID,
|
|||
}
|
||||
}
|
||||
}
|
||||
if (hasChopped && !isRoot) {
|
||||
if ((hasChopped || mayHaveParent(name)) && !isRoot) {
|
||||
if (!loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), usingUSRData, usrDataPath, status)) {
|
||||
goto finish;
|
||||
}
|
||||
|
|
@ -716,7 +725,7 @@ static UResourceDataEntry *entryOpen(const char* path, const char* localeID,
|
|||
hasRealData = TRUE;
|
||||
isDefault = TRUE;
|
||||
// TODO: Why not if (usingUSRData) { ... } like in the non-default-locale code path?
|
||||
if (hasChopped && !isRoot) {
|
||||
if ((hasChopped || mayHaveParent(name)) && !isRoot) {
|
||||
if (!loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), usingUSRData, usrDataPath, status)) {
|
||||
goto finish;
|
||||
}
|
||||
|
|
@ -1908,6 +1917,8 @@ ures_getByKeyWithFallback(const UResourceBundle *resB,
|
|||
} else {
|
||||
break;
|
||||
}
|
||||
} else if (res == RES_BOGUS) {
|
||||
break;
|
||||
}
|
||||
} while(*myPath); /* Continue until the whole path is consumed */
|
||||
}
|
||||
|
|
@ -3019,7 +3030,7 @@ ures_getKeywordValues(const char *path, const char *keyword, UErrorCode *status)
|
|||
U_CAPI UBool U_EXPORT2
|
||||
ures_equal(const UResourceBundle* res1, const UResourceBundle* res2){
|
||||
if(res1==NULL || res2==NULL){
|
||||
return res1==res2; /* pointer comparision */
|
||||
return res1==res2; /* pointer comparison */
|
||||
}
|
||||
if(res1->fKey==NULL|| res2->fKey==NULL){
|
||||
return (res1->fKey==res2->fKey);
|
||||
|
|
|
|||
8
deps/icu-small/source/common/uresdata.cpp
vendored
8
deps/icu-small/source/common/uresdata.cpp
vendored
|
|
@ -960,14 +960,6 @@ res_findResource(const ResourceData *pResData, Resource r, char** path, const ch
|
|||
if(URES_IS_TABLE(type)) {
|
||||
*key = pathP;
|
||||
t2 = res_getTableItemByKey(pResData, t1, &indexR, key);
|
||||
if(t2 == RES_BOGUS) {
|
||||
/* if we fail to get the resource by key, maybe we got an index */
|
||||
indexR = uprv_strtol(pathP, &closeIndex, 10);
|
||||
if(indexR >= 0 && *closeIndex == 0 && (*pathP != '0' || closeIndex - pathP == 1)) {
|
||||
/* if we indeed have an index, try to get the item by index */
|
||||
t2 = res_getTableItemByIndex(pResData, t1, indexR, key);
|
||||
} // else t2 is already RES_BOGUS
|
||||
}
|
||||
} else if(URES_IS_ARRAY(type)) {
|
||||
indexR = uprv_strtol(pathP, &closeIndex, 10);
|
||||
if(indexR >= 0 && *closeIndex == 0) {
|
||||
|
|
|
|||
4
deps/icu-small/source/common/uresimp.h
vendored
4
deps/icu-small/source/common/uresimp.h
vendored
|
|
@ -270,11 +270,13 @@ ures_getByKeyWithFallback(const UResourceBundle *resB,
|
|||
* function can perform fallback on the sub-resources of the table.
|
||||
* @param resB a resource
|
||||
* @param inKey a key associated with the requested resource
|
||||
* @param len if not NULL, used to return the length of the string
|
||||
* @param status: fills in the outgoing error code
|
||||
* could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
|
||||
* could be a non-failing error
|
||||
* e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
|
||||
* @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it
|
||||
* @return returns a pointer to a zero-terminated UChar array which lives in a
|
||||
* memory mapped/DLL file.
|
||||
*/
|
||||
U_CAPI const UChar* U_EXPORT2
|
||||
ures_getStringByKeyWithFallback(const UResourceBundle *resB,
|
||||
|
|
|
|||
35
deps/icu-small/source/common/uset.cpp
vendored
35
deps/icu-small/source/common/uset.cpp
vendored
|
|
@ -116,6 +116,12 @@ uset_removeString(USet* set, const UChar* str, int32_t strLen) {
|
|||
((UnicodeSet*) set)->UnicodeSet::remove(s);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length) {
|
||||
UnicodeString s(length==-1, str, length);
|
||||
((UnicodeSet*) set)->UnicodeSet::removeAll(s);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_removeAll(USet* set, const USet* remove) {
|
||||
((UnicodeSet*) set)->UnicodeSet::removeAll(*(const UnicodeSet*)remove);
|
||||
|
|
@ -126,6 +132,18 @@ uset_retain(USet* set, UChar32 start, UChar32 end) {
|
|||
((UnicodeSet*) set)->UnicodeSet::retain(start, end);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_retainString(USet *set, const UChar *str, int32_t length) {
|
||||
UnicodeString s(length==-1, str, length);
|
||||
((UnicodeSet*) set)->UnicodeSet::retain(s);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length) {
|
||||
UnicodeString s(length==-1, str, length);
|
||||
((UnicodeSet*) set)->UnicodeSet::retainAll(s);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_retainAll(USet* set, const USet* retain) {
|
||||
((UnicodeSet*) set)->UnicodeSet::retainAll(*(const UnicodeSet*)retain);
|
||||
|
|
@ -141,6 +159,23 @@ uset_complement(USet* set) {
|
|||
((UnicodeSet*) set)->UnicodeSet::complement();
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_complementRange(USet *set, UChar32 start, UChar32 end) {
|
||||
((UnicodeSet*) set)->UnicodeSet::complement(start, end);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_complementString(USet *set, const UChar *str, int32_t length) {
|
||||
UnicodeString s(length==-1, str, length);
|
||||
((UnicodeSet*) set)->UnicodeSet::complement(s);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length) {
|
||||
UnicodeString s(length==-1, str, length);
|
||||
((UnicodeSet*) set)->UnicodeSet::complementAll(s);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_complementAll(USet* set, const USet* complement) {
|
||||
((UnicodeSet*) set)->UnicodeSet::complementAll(*(const UnicodeSet*)complement);
|
||||
|
|
|
|||
2
deps/icu-small/source/common/usprep.cpp
vendored
2
deps/icu-small/source/common/usprep.cpp
vendored
|
|
@ -575,7 +575,7 @@ usprep_map( const UStringPrepProfile* profile,
|
|||
}
|
||||
|
||||
}else if(type==USPREP_DELETE){
|
||||
// just consume the codepoint and contine
|
||||
// just consume the codepoint and continue
|
||||
continue;
|
||||
}
|
||||
//copy the code point into destination
|
||||
|
|
|
|||
4
deps/icu-small/source/common/ustr_wcs.cpp
vendored
4
deps/icu-small/source/common/ustr_wcs.cpp
vendored
|
|
@ -364,7 +364,7 @@ _strFromWCS( UChar *dest,
|
|||
}
|
||||
|
||||
/* we have found a null so convert the
|
||||
* chunk from begining of non-null char to null
|
||||
* chunk from beginning of non-null char to null
|
||||
*/
|
||||
retVal = uprv_wcstombs(pCSrc,pSrc,remaining);
|
||||
|
||||
|
|
@ -387,7 +387,7 @@ _strFromWCS( UChar *dest,
|
|||
* null terminate it and convert wchar_ts to chars
|
||||
*/
|
||||
if(nulLen >= _STACK_BUFFER_CAPACITY){
|
||||
/* Should rarely occcur */
|
||||
/* Should rarely occur */
|
||||
/* allocate new buffer buffer */
|
||||
pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1));
|
||||
if(pWStack==NULL){
|
||||
|
|
|
|||
6
deps/icu-small/source/common/utext.cpp
vendored
6
deps/icu-small/source/common/utext.cpp
vendored
|
|
@ -382,7 +382,7 @@ utext_previous32From(UText *ut, int64_t index) {
|
|||
//
|
||||
UChar32 cPrev; // The character preceding cCurr, which is what we will return.
|
||||
|
||||
// Address the chunk containg the position preceding the incoming index
|
||||
// Address the chunk containing the position preceding the incoming index
|
||||
// A tricky edge case:
|
||||
// We try to test the requested native index against the chunkNativeStart to determine
|
||||
// whether the character preceding the one at the index is in the current chunk.
|
||||
|
|
@ -894,7 +894,7 @@ struct UTF8Buf {
|
|||
// one for a supplementary starting in the last normal position,
|
||||
// and one for an entry for the buffer limit position.
|
||||
uint8_t mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to
|
||||
// correspoding offset in filled part of buf.
|
||||
// corresponding offset in filled part of buf.
|
||||
int32_t align;
|
||||
};
|
||||
|
||||
|
|
@ -1545,7 +1545,7 @@ utf8TextMapOffsetToNative(const UText *ut) {
|
|||
}
|
||||
|
||||
//
|
||||
// Map a native index to the corrsponding chunk offset
|
||||
// Map a native index to the corresponding chunk offset
|
||||
//
|
||||
static int32_t U_CALLCONV
|
||||
utf8TextMapIndexToUTF16(const UText *ut, int64_t index64) {
|
||||
|
|
|
|||
6
deps/icu-small/source/common/util.h
vendored
6
deps/icu-small/source/common/util.h
vendored
|
|
@ -13,10 +13,10 @@
|
|||
#ifndef ICU_UTIL_H
|
||||
#define ICU_UTIL_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "charstr.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/utypes.h"
|
||||
//--------------------------------------------------------------------
|
||||
// class ICU_Utility
|
||||
// i18n utility functions, scoped into the class ICU_Utility.
|
||||
|
|
|
|||
2
deps/icu-small/source/common/utracimp.h
vendored
2
deps/icu-small/source/common/utracimp.h
vendored
|
|
@ -193,7 +193,7 @@ UPRV_BLOCK_MACRO_BEGIN { \
|
|||
* Trace statement for each exit point of a function that has a UTRACE_ENTRY()
|
||||
* statement, and that returns a value.
|
||||
*
|
||||
* @param val The function's return value, int32_t or comatible type.
|
||||
* @param val The function's return value, int32_t or compatible type.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
|
|
|
|||
4
deps/icu-small/source/common/uvector.cpp
vendored
4
deps/icu-small/source/common/uvector.cpp
vendored
|
|
@ -312,7 +312,7 @@ int32_t UVector::indexOf(UElement key, int32_t startIndex, int8_t hint) const {
|
|||
} else {
|
||||
for (i=startIndex; i<count; ++i) {
|
||||
/* Pointers are not always the same size as ints so to perform
|
||||
* a valid comparision we need to know whether we are being
|
||||
* a valid comparison we need to know whether we are being
|
||||
* provided an int or a pointer. */
|
||||
if (hint & HINT_KEY_POINTER) {
|
||||
if (key.pointer == elements[i].pointer) {
|
||||
|
|
@ -518,7 +518,7 @@ sortiComparator(const void * /*context */, const void *left, const void *right)
|
|||
}
|
||||
|
||||
/**
|
||||
* Sort the vector, assuming it constains ints.
|
||||
* Sort the vector, assuming it contains ints.
|
||||
* (A more general sort would take a comparison function, but it's
|
||||
* not clear whether UVector's UElementComparator or
|
||||
* UComparator from uprv_sortAray would be more appropriate.)
|
||||
|
|
|
|||
24
deps/icu-small/source/common/wintz.cpp
vendored
24
deps/icu-small/source/common/wintz.cpp
vendored
|
|
@ -124,10 +124,26 @@ uprv_detectWindowsTimeZone()
|
|||
// No way to support when DST is turned off and the offset in minutes is not a multiple of 60.
|
||||
if (utcOffsetMins % 60 == 0) {
|
||||
char gmtOffsetTz[11] = {}; // "Etc/GMT+dd" is 11-char long with a terminal null.
|
||||
// Note '-' before 'utcOffsetMin'. The timezone ID's sign convention
|
||||
// is that a timezone ahead of UTC is Etc/GMT-<offset> and a timezone
|
||||
// behind UTC is Etc/GMT+<offset>.
|
||||
int ret = snprintf(gmtOffsetTz, UPRV_LENGTHOF(gmtOffsetTz), "Etc/GMT%+ld", -utcOffsetMins / 60);
|
||||
// Important note on the sign convention for zones:
|
||||
//
|
||||
// From https://en.wikipedia.org/wiki/Tz_database#Area
|
||||
// "In order to conform with the POSIX style, those zone names beginning with "Etc/GMT" have their sign reversed
|
||||
// from the standard ISO 8601 convention. In the "Etc" area, zones west of GMT have a positive sign and those
|
||||
// east have a negative sign in their name (e.g "Etc/GMT-14" is 14 hours ahead of GMT)."
|
||||
//
|
||||
// Regarding the POSIX style, from https://www.gnu.org/software/libc/manual/html_node/TZ-Variable.html
|
||||
// "The offset specifies the time value you must add to the local time to get a Coordinated Universal Time value."
|
||||
//
|
||||
// However, the Bias value in DYNAMIC_TIME_ZONE_INFORMATION *already* follows the POSIX convention.
|
||||
//
|
||||
// From https://docs.microsoft.com/en-us/windows/win32/api/timezoneapi/ns-timezoneapi-dynamic_time_zone_information
|
||||
// "The bias is the difference, in minutes, between Coordinated Universal Time (UTC) and
|
||||
// local time. All translations between UTC and local time are based on the following formula:
|
||||
// UTC = local time + bias"
|
||||
//
|
||||
// For example, a time zone that is 3 hours ahead of UTC (UTC+03:00) would have a Bias value of -180, and the
|
||||
// corresponding time zone ID would be "Etc/GMT-3". (So there is no need to negate utcOffsetMins below.)
|
||||
int ret = snprintf(gmtOffsetTz, UPRV_LENGTHOF(gmtOffsetTz), "Etc/GMT%+ld", utcOffsetMins / 60);
|
||||
if (ret > 0 && ret < UPRV_LENGTHOF(gmtOffsetTz)) {
|
||||
return uprv_strdup(gmtOffsetTz);
|
||||
}
|
||||
|
|
|
|||
Binary file not shown.
15
deps/icu-small/source/i18n/basictz.cpp
vendored
15
deps/icu-small/source/i18n/basictz.cpp
vendored
|
|
@ -423,7 +423,7 @@ BasicTimeZone::getTimeZoneRulesAfter(UDate start, InitialTimeZoneRule*& initial,
|
|||
goto error;
|
||||
}
|
||||
} else {
|
||||
// Colllect transitions after the start time
|
||||
// Collect transitions after the start time
|
||||
int32_t startTimes;
|
||||
DateTimeRule::TimeRuleType timeType;
|
||||
int32_t idx;
|
||||
|
|
@ -547,14 +547,23 @@ error:
|
|||
}
|
||||
|
||||
void
|
||||
BasicTimeZone::getOffsetFromLocal(UDate /*date*/, int32_t /*nonExistingTimeOpt*/, int32_t /*duplicatedTimeOpt*/,
|
||||
int32_t& /*rawOffset*/, int32_t& /*dstOffset*/, UErrorCode& status) const {
|
||||
BasicTimeZone::getOffsetFromLocal(UDate /*date*/, UTimeZoneLocalOption /*nonExistingTimeOpt*/,
|
||||
UTimeZoneLocalOption /*duplicatedTimeOpt*/,
|
||||
int32_t& /*rawOffset*/, int32_t& /*dstOffset*/,
|
||||
UErrorCode& status) const {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
status = U_UNSUPPORTED_ERROR;
|
||||
}
|
||||
|
||||
void BasicTimeZone::getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt,
|
||||
int32_t& rawOffset, int32_t& dstOffset,
|
||||
UErrorCode& status) const {
|
||||
getOffsetFromLocal(date, (UTimeZoneLocalOption)nonExistingTimeOpt,
|
||||
(UTimeZoneLocalOption)duplicatedTimeOpt, rawOffset, dstOffset, status);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
|
|
|||
4
deps/icu-small/source/i18n/calendar.cpp
vendored
4
deps/icu-small/source/i18n/calendar.cpp
vendored
|
|
@ -308,7 +308,7 @@ static ECalType getCalendarTypeForLocale(const char *locid) {
|
|||
|
||||
calTypeBuf[0] = 0;
|
||||
if (U_SUCCESS(status) && order != NULL) {
|
||||
// the first calender type is the default for the region
|
||||
// the first calendar type is the default for the region
|
||||
int32_t len = 0;
|
||||
const UChar *uCalType = ures_getStringByIndex(order, 0, &len, &status);
|
||||
if (len < (int32_t)sizeof(calTypeBuf)) {
|
||||
|
|
@ -2291,7 +2291,7 @@ int32_t Calendar::fieldDifference(UDate targetMs, UCalendarDateFields field, UEr
|
|||
if (U_FAILURE(ec)) return 0;
|
||||
int32_t min = 0;
|
||||
double startMs = getTimeInMillis(ec);
|
||||
// Always add from the start millis. This accomodates
|
||||
// Always add from the start millis. This accommodates
|
||||
// operations like adding years from February 29, 2000 up to
|
||||
// February 29, 2004. If 1, 1, 1, 1 is added to the year
|
||||
// field, the DOM gets pinned to 28 and stays there, giving an
|
||||
|
|
|
|||
|
|
@ -688,7 +688,7 @@ CollationBuilder::addRelation(int32_t strength, const UnicodeString &prefix,
|
|||
// A Hangul syllable completely inside a contraction is ok.
|
||||
}
|
||||
// Note: If there is a prefix, then the parser checked that
|
||||
// both the prefix and the string beging with NFC boundaries (not Jamo V or T).
|
||||
// both the prefix and the string begin with NFC boundaries (not Jamo V or T).
|
||||
// Therefore: prefix.isEmpty() || !isJamoVOrT(nfdString.charAt(0))
|
||||
// (While handling a Hangul syllable, prefixes on Jamo V or T
|
||||
// would not see the previous Jamo of that syllable.)
|
||||
|
|
|
|||
|
|
@ -255,12 +255,18 @@ DataBuilderCollationIterator::getDataCE32(UChar32 c) const {
|
|||
|
||||
uint32_t
|
||||
DataBuilderCollationIterator::getCE32FromBuilderData(uint32_t ce32, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return 0; }
|
||||
U_ASSERT(Collation::hasCE32Tag(ce32, Collation::BUILDER_DATA_TAG));
|
||||
if((ce32 & CollationDataBuilder::IS_BUILDER_JAMO_CE32) != 0) {
|
||||
UChar32 jamo = Collation::indexFromCE32(ce32);
|
||||
return utrie2_get32(builder.trie, jamo);
|
||||
} else {
|
||||
ConditionalCE32 *cond = builder.getConditionalCE32ForCE32(ce32);
|
||||
if (cond == nullptr) {
|
||||
errorCode = U_INTERNAL_PROGRAM_ERROR;
|
||||
// TODO: ICU-21531 figure out why this happens.
|
||||
return 0;
|
||||
}
|
||||
if(cond->builtCE32 == Collation::NO_CE32) {
|
||||
// Build the context-sensitive mappings into their runtime form and cache the result.
|
||||
cond->builtCE32 = builder.buildContext(cond, errorCode);
|
||||
|
|
|
|||
1
deps/icu-small/source/i18n/cpdtrans.cpp
vendored
1
deps/icu-small/source/i18n/cpdtrans.cpp
vendored
|
|
@ -282,6 +282,7 @@ void CompoundTransliterator::freeTransliterators(void) {
|
|||
CompoundTransliterator& CompoundTransliterator::operator=(
|
||||
const CompoundTransliterator& t)
|
||||
{
|
||||
if (this == &t) { return *this; } // self-assignment: no-op
|
||||
Transliterator::operator=(t);
|
||||
int32_t i = 0;
|
||||
UBool failed = FALSE;
|
||||
|
|
|
|||
2
deps/icu-small/source/i18n/csrmbcs.cpp
vendored
2
deps/icu-small/source/i18n/csrmbcs.cpp
vendored
|
|
@ -186,7 +186,7 @@ int32_t CharsetRecog_mbcs::match_mbcs(InputText *det, const uint16_t commonChars
|
|||
if (doubleByteCharCount == 0 && totalCharCount < 10) {
|
||||
// There weren't any multibyte sequences, and there was a low density of non-ASCII single bytes.
|
||||
// We don't have enough data to have any confidence.
|
||||
// Statistical analysis of single byte non-ASCII charcters would probably help here.
|
||||
// Statistical analysis of single byte non-ASCII characters would probably help here.
|
||||
confidence = 0;
|
||||
}
|
||||
else {
|
||||
|
|
|
|||
2
deps/icu-small/source/i18n/csrucode.cpp
vendored
2
deps/icu-small/source/i18n/csrucode.cpp
vendored
|
|
@ -155,7 +155,7 @@ UBool CharsetRecog_UTF_32::match(InputText* textIn, CharsetMatch *results) const
|
|||
} else if (numValid > 0 && numInvalid == 0) {
|
||||
confidence = 80;
|
||||
} else if (numValid > numInvalid*10) {
|
||||
// Probably corruput UTF-32BE data. Valid sequences aren't likely by chance.
|
||||
// Probably corrupt UTF-32BE data. Valid sequences aren't likely by chance.
|
||||
confidence = 25;
|
||||
}
|
||||
|
||||
|
|
|
|||
2
deps/icu-small/source/i18n/csrutf8.cpp
vendored
2
deps/icu-small/source/i18n/csrutf8.cpp
vendored
|
|
@ -99,7 +99,7 @@ UBool CharsetRecog_UTF8::match(InputText* input, CharsetMatch *results) const {
|
|||
// accepts ASCII with confidence = 10.
|
||||
confidence = 15;
|
||||
} else if (numValid > numInvalid*10) {
|
||||
// Probably corruput utf-8 data. Valid sequences aren't likely by chance.
|
||||
// Probably corrupt utf-8 data. Valid sequences aren't likely by chance.
|
||||
confidence = 25;
|
||||
}
|
||||
|
||||
|
|
|
|||
2
deps/icu-small/source/i18n/decContext.cpp
vendored
2
deps/icu-small/source/i18n/decContext.cpp
vendored
|
|
@ -150,7 +150,7 @@ U_CAPI uInt U_EXPORT2 uprv_decContextGetStatus(decContext *context) {
|
|||
/* newstatus is the source for the bits to be restored */
|
||||
/* mask indicates the bits to be restored (the status bit that */
|
||||
/* corresponds to each 1 bit in the mask is set to the value of */
|
||||
/* the correspnding bit in newstatus) */
|
||||
/* the corresponding bit in newstatus) */
|
||||
/* returns context */
|
||||
/* */
|
||||
/* No error is possible. */
|
||||
|
|
|
|||
4
deps/icu-small/source/i18n/decNumber.cpp
vendored
4
deps/icu-small/source/i18n/decNumber.cpp
vendored
|
|
@ -2203,7 +2203,7 @@ U_CAPI decNumber * U_EXPORT2 uprv_decNumberPower(decNumber *res, const decNumber
|
|||
/* if a negative power the constant 1 is needed, and if not subset */
|
||||
/* invert the lhs now rather than inverting the result later */
|
||||
if (decNumberIsNegative(rhs)) { /* was a **-n [hence digits>0] */
|
||||
decNumber *inv=invbuff; /* asssume use fixed buffer */
|
||||
decNumber *inv=invbuff; /* assume use fixed buffer */
|
||||
uprv_decNumberCopy(&dnOne, dac); /* dnOne=1; [needed now or later] */
|
||||
#if DECSUBSET
|
||||
if (set->extended) { /* need to calculate 1/lhs */
|
||||
|
|
@ -5242,7 +5242,7 @@ static decNumber * decMultiplyOp(decNumber *res, const decNumber *lhs,
|
|||
/* exp(-x) where x can be the tiniest number (Ntiny). */
|
||||
/* */
|
||||
/* 2. Normalizing x to be <=0.1 (instead of <=1) reduces loop */
|
||||
/* iterations by appoximately a third with additional (although */
|
||||
/* iterations by approximately a third with additional (although */
|
||||
/* diminishing) returns as the range is reduced to even smaller */
|
||||
/* fractions. However, h (the power of 10 used to correct the */
|
||||
/* result at the end, see below) must be kept <=8 as otherwise */
|
||||
|
|
|
|||
2
deps/icu-small/source/i18n/decNumberLocal.h
vendored
2
deps/icu-small/source/i18n/decNumberLocal.h
vendored
|
|
@ -146,7 +146,7 @@
|
|||
|
||||
|
||||
/* ---------------------------------------------------------------- */
|
||||
/* Definitions for arbitary-precision modules (only valid after */
|
||||
/* Definitions for arbitrary-precision modules (only valid after */
|
||||
/* decNumber.h has been included) */
|
||||
/* ---------------------------------------------------------------- */
|
||||
|
||||
|
|
|
|||
|
|
@ -384,7 +384,7 @@ static void BignumToFixed(int requested_digits, int* decimal_point,
|
|||
// Returns an estimation of k such that 10^(k-1) <= v < 10^k where
|
||||
// v = f * 2^exponent and 2^52 <= f < 2^53.
|
||||
// v is hence a normalized double with the given exponent. The output is an
|
||||
// approximation for the exponent of the decimal approimation .digits * 10^k.
|
||||
// approximation for the exponent of the decimal approximation .digits * 10^k.
|
||||
//
|
||||
// The result might undershoot by 1 in which case 10^k <= v < 10^k+1.
|
||||
// Note: this property holds for v's upper boundary m+ too.
|
||||
|
|
@ -562,7 +562,7 @@ static void InitialScaledStartValuesNegativeExponentNegativePower(
|
|||
//
|
||||
// Let ep == estimated_power, then the returned values will satisfy:
|
||||
// v / 10^ep = numerator / denominator.
|
||||
// v's boundarys m- and m+:
|
||||
// v's boundaries m- and m+:
|
||||
// m- / 10^ep == v / 10^ep - delta_minus / denominator
|
||||
// m+ / 10^ep == v / 10^ep + delta_plus / denominator
|
||||
// Or in other words:
|
||||
|
|
|
|||
|
|
@ -107,19 +107,19 @@ void DoubleToStringConverter::CreateExponentialRepresentation(
|
|||
result_builder->AddCharacter('+');
|
||||
}
|
||||
}
|
||||
if (exponent == 0) {
|
||||
result_builder->AddCharacter('0');
|
||||
return;
|
||||
}
|
||||
DOUBLE_CONVERSION_ASSERT(exponent < 1e4);
|
||||
// Changing this constant requires updating the comment of DoubleToStringConverter constructor
|
||||
const int kMaxExponentLength = 5;
|
||||
char buffer[kMaxExponentLength + 1];
|
||||
buffer[kMaxExponentLength] = '\0';
|
||||
int first_char_pos = kMaxExponentLength;
|
||||
while (exponent > 0) {
|
||||
buffer[--first_char_pos] = '0' + (exponent % 10);
|
||||
exponent /= 10;
|
||||
if (exponent == 0) {
|
||||
buffer[--first_char_pos] = '0';
|
||||
} else {
|
||||
while (exponent > 0) {
|
||||
buffer[--first_char_pos] = '0' + (exponent % 10);
|
||||
exponent /= 10;
|
||||
}
|
||||
}
|
||||
// Add prefix '0' to make exponent width >= min(min_exponent_with_, kMaxExponentLength)
|
||||
// For example: convert 1e+9 -> 1e+09, if min_exponent_with_ is set to 2
|
||||
|
|
@ -342,9 +342,21 @@ bool DoubleToStringConverter::ToPrecision(double value,
|
|||
int exponent = decimal_point - 1;
|
||||
|
||||
int extra_zero = ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) ? 1 : 0;
|
||||
if ((-decimal_point + 1 > max_leading_padding_zeroes_in_precision_mode_) ||
|
||||
bool as_exponential =
|
||||
(-decimal_point + 1 > max_leading_padding_zeroes_in_precision_mode_) ||
|
||||
(decimal_point - precision + extra_zero >
|
||||
max_trailing_padding_zeroes_in_precision_mode_)) {
|
||||
max_trailing_padding_zeroes_in_precision_mode_);
|
||||
if ((flags_ & NO_TRAILING_ZERO) != 0) {
|
||||
// Truncate trailing zeros that occur after the decimal point (if exponential,
|
||||
// that is everything after the first digit).
|
||||
int stop = as_exponential ? 1 : std::max(1, decimal_point);
|
||||
while (decimal_rep_length > stop && decimal_rep[decimal_rep_length - 1] == '0') {
|
||||
--decimal_rep_length;
|
||||
}
|
||||
// Clamp precision to avoid the code below re-adding the zeros.
|
||||
precision = std::min(precision, decimal_rep_length);
|
||||
}
|
||||
if (as_exponential) {
|
||||
// Fill buffer to contain 'precision' digits.
|
||||
// Usually the buffer is already at the correct length, but 'DoubleToAscii'
|
||||
// is allowed to return less characters.
|
||||
|
|
|
|||
|
|
@ -48,12 +48,11 @@ namespace double_conversion {
|
|||
|
||||
class DoubleToStringConverter {
|
||||
public:
|
||||
#if 0 // not needed for ICU
|
||||
// When calling ToFixed with a double > 10^kMaxFixedDigitsBeforePoint
|
||||
// or a requested_digits parameter > kMaxFixedDigitsAfterPoint then the
|
||||
// function returns false.
|
||||
static const int kMaxFixedDigitsBeforePoint = 60;
|
||||
static const int kMaxFixedDigitsAfterPoint = 60;
|
||||
static const int kMaxFixedDigitsAfterPoint = 100;
|
||||
|
||||
// When calling ToExponential with a requested_digits
|
||||
// parameter > kMaxExponentialDigits then the function returns false.
|
||||
|
|
@ -65,12 +64,36 @@ class DoubleToStringConverter {
|
|||
static const int kMinPrecisionDigits = 1;
|
||||
static const int kMaxPrecisionDigits = 120;
|
||||
|
||||
// The maximal number of digits that are needed to emit a double in base 10.
|
||||
// A higher precision can be achieved by using more digits, but the shortest
|
||||
// accurate representation of any double will never use more digits than
|
||||
// kBase10MaximalLength.
|
||||
// Note that DoubleToAscii null-terminates its input. So the given buffer
|
||||
// should be at least kBase10MaximalLength + 1 characters long.
|
||||
static const int kBase10MaximalLength = 17;
|
||||
|
||||
// The maximal number of digits that are needed to emit a single in base 10.
|
||||
// A higher precision can be achieved by using more digits, but the shortest
|
||||
// accurate representation of any single will never use more digits than
|
||||
// kBase10MaximalLengthSingle.
|
||||
static const int kBase10MaximalLengthSingle = 9;
|
||||
|
||||
// The length of the longest string that 'ToShortest' can produce when the
|
||||
// converter is instantiated with EcmaScript defaults (see
|
||||
// 'EcmaScriptConverter')
|
||||
// This value does not include the trailing '\0' character.
|
||||
// This amount of characters is needed for negative values that hit the
|
||||
// 'decimal_in_shortest_low' limit. For example: "-0.0000033333333333333333"
|
||||
static const int kMaxCharsEcmaScriptShortest = 25;
|
||||
|
||||
#if 0 // not needed for ICU
|
||||
enum Flags {
|
||||
NO_FLAGS = 0,
|
||||
EMIT_POSITIVE_EXPONENT_SIGN = 1,
|
||||
EMIT_TRAILING_DECIMAL_POINT = 2,
|
||||
EMIT_TRAILING_ZERO_AFTER_POINT = 4,
|
||||
UNIQUE_ZERO = 8
|
||||
UNIQUE_ZERO = 8,
|
||||
NO_TRAILING_ZERO = 16
|
||||
};
|
||||
|
||||
// Flags should be a bit-or combination of the possible Flags-enum.
|
||||
|
|
@ -82,9 +105,13 @@ class DoubleToStringConverter {
|
|||
// Example: 2345.0 is converted to "2345.".
|
||||
// - EMIT_TRAILING_ZERO_AFTER_POINT: in addition to a trailing decimal point
|
||||
// emits a trailing '0'-character. This flag requires the
|
||||
// EXMIT_TRAILING_DECIMAL_POINT flag.
|
||||
// EMIT_TRAILING_DECIMAL_POINT flag.
|
||||
// Example: 2345.0 is converted to "2345.0".
|
||||
// - UNIQUE_ZERO: "-0.0" is converted to "0.0".
|
||||
// - NO_TRAILING_ZERO: Trailing zeros are removed from the fractional portion
|
||||
// of the result in precision mode. Matches printf's %g.
|
||||
// When EMIT_TRAILING_ZERO_AFTER_POINT is also given, one trailing zero is
|
||||
// preserved.
|
||||
//
|
||||
// Infinity symbol and nan_symbol provide the string representation for these
|
||||
// special values. If the string is NULL and the special value is encountered
|
||||
|
|
@ -152,6 +179,14 @@ class DoubleToStringConverter {
|
|||
}
|
||||
|
||||
// Returns a converter following the EcmaScript specification.
|
||||
//
|
||||
// Flags: UNIQUE_ZERO and EMIT_POSITIVE_EXPONENT_SIGN.
|
||||
// Special values: "Infinity" and "NaN".
|
||||
// Lower case 'e' for exponential values.
|
||||
// decimal_in_shortest_low: -6
|
||||
// decimal_in_shortest_high: 21
|
||||
// max_leading_padding_zeroes_in_precision_mode: 6
|
||||
// max_trailing_padding_zeroes_in_precision_mode: 0
|
||||
static const DoubleToStringConverter& EcmaScriptConverter();
|
||||
|
||||
// Computes the shortest string of digits that correctly represent the input
|
||||
|
|
@ -177,6 +212,21 @@ class DoubleToStringConverter {
|
|||
// Returns true if the conversion succeeds. The conversion always succeeds
|
||||
// except when the input value is special and no infinity_symbol or
|
||||
// nan_symbol has been given to the constructor.
|
||||
//
|
||||
// The length of the longest result is the maximum of the length of the
|
||||
// following string representations (each with possible examples):
|
||||
// - NaN and negative infinity: "NaN", "-Infinity", "-inf".
|
||||
// - -10^(decimal_in_shortest_high - 1):
|
||||
// "-100000000000000000000", "-1000000000000000.0"
|
||||
// - the longest string in range [0; -10^decimal_in_shortest_low]. Generally,
|
||||
// this string is 3 + kBase10MaximalLength - decimal_in_shortest_low.
|
||||
// (Sign, '0', decimal point, padding zeroes for decimal_in_shortest_low,
|
||||
// and the significant digits).
|
||||
// "-0.0000033333333333333333", "-0.0012345678901234567"
|
||||
// - the longest exponential representation. (A negative number with
|
||||
// kBase10MaximalLength significant digits).
|
||||
// "-1.7976931348623157e+308", "-1.7976931348623157E308"
|
||||
// In addition, the buffer must be able to hold the trailing '\0' character.
|
||||
bool ToShortest(double value, StringBuilder* result_builder) const {
|
||||
return ToShortestIeeeNumber(value, result_builder, SHORTEST);
|
||||
}
|
||||
|
|
@ -217,9 +267,11 @@ class DoubleToStringConverter {
|
|||
// been provided to the constructor,
|
||||
// - 'value' > 10^kMaxFixedDigitsBeforePoint, or
|
||||
// - 'requested_digits' > kMaxFixedDigitsAfterPoint.
|
||||
// The last two conditions imply that the result will never contain more than
|
||||
// 1 + kMaxFixedDigitsBeforePoint + 1 + kMaxFixedDigitsAfterPoint characters
|
||||
// The last two conditions imply that the result for non-special values never
|
||||
// contains more than
|
||||
// 1 + kMaxFixedDigitsBeforePoint + 1 + kMaxFixedDigitsAfterPoint characters
|
||||
// (one additional character for the sign, and one for the decimal point).
|
||||
// In addition, the buffer must be able to hold the trailing '\0' character.
|
||||
bool ToFixed(double value,
|
||||
int requested_digits,
|
||||
StringBuilder* result_builder) const;
|
||||
|
|
@ -248,14 +300,17 @@ class DoubleToStringConverter {
|
|||
// - the input value is special and no infinity_symbol or nan_symbol has
|
||||
// been provided to the constructor,
|
||||
// - 'requested_digits' > kMaxExponentialDigits.
|
||||
// The last condition implies that the result will never contain more than
|
||||
//
|
||||
// The last condition implies that the result never contains more than
|
||||
// kMaxExponentialDigits + 8 characters (the sign, the digit before the
|
||||
// decimal point, the decimal point, the exponent character, the
|
||||
// exponent's sign, and at most 3 exponent digits).
|
||||
// In addition, the buffer must be able to hold the trailing '\0' character.
|
||||
bool ToExponential(double value,
|
||||
int requested_digits,
|
||||
StringBuilder* result_builder) const;
|
||||
|
||||
|
||||
// Computes 'precision' leading digits of the given 'value' and returns them
|
||||
// either in exponential or decimal format, depending on
|
||||
// max_{leading|trailing}_padding_zeroes_in_precision_mode (given to the
|
||||
|
|
@ -287,9 +342,11 @@ class DoubleToStringConverter {
|
|||
// been provided to the constructor,
|
||||
// - precision < kMinPericisionDigits
|
||||
// - precision > kMaxPrecisionDigits
|
||||
// The last condition implies that the result will never contain more than
|
||||
//
|
||||
// The last condition implies that the result never contains more than
|
||||
// kMaxPrecisionDigits + 7 characters (the sign, the decimal point, the
|
||||
// exponent character, the exponent's sign, and at most 3 exponent digits).
|
||||
// In addition, the buffer must be able to hold the trailing '\0' character.
|
||||
bool ToPrecision(double value,
|
||||
int precision,
|
||||
StringBuilder* result_builder) const;
|
||||
|
|
@ -310,14 +367,6 @@ class DoubleToStringConverter {
|
|||
PRECISION
|
||||
};
|
||||
|
||||
// The maximal number of digits that are needed to emit a double in base 10.
|
||||
// A higher precision can be achieved by using more digits, but the shortest
|
||||
// accurate representation of any double will never use more digits than
|
||||
// kBase10MaximalLength.
|
||||
// Note that DoubleToAscii null-terminates its input. So the given buffer
|
||||
// should be at least kBase10MaximalLength + 1 characters long.
|
||||
static const int kBase10MaximalLength = 17;
|
||||
|
||||
// Converts the given double 'v' to digit characters. 'v' must not be NaN,
|
||||
// +Infinity, or -Infinity. In SHORTEST_SINGLE-mode this restriction also
|
||||
// applies to 'v' after it has been casted to a single-precision float. That
|
||||
|
|
|
|||
|
|
@ -51,6 +51,18 @@
|
|||
// ICU PATCH: Wrap in ICU namespace
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# if _MSC_VER >= 1900
|
||||
// Fix MSVC >= 2015 (_MSC_VER == 1900) warning
|
||||
// C4244: 'argument': conversion from 'const uc16' to 'char', possible loss of data
|
||||
// against Advance and friends, when instantiated with **it as char, not uc16.
|
||||
__pragma(warning(disable: 4244))
|
||||
# endif
|
||||
# if _MSC_VER <= 1700 // VS2012, see IsDecimalDigitForRadix warning fix, below
|
||||
# define VS2012_RADIXWARN
|
||||
# endif
|
||||
#endif
|
||||
|
||||
namespace double_conversion {
|
||||
|
||||
namespace {
|
||||
|
|
@ -170,9 +182,9 @@ static double SignedZero(bool sign) {
|
|||
//
|
||||
// The function is small and could be inlined, but VS2012 emitted a warning
|
||||
// because it constant-propagated the radix and concluded that the last
|
||||
// condition was always true. By moving it into a separate function the
|
||||
// compiler wouldn't warn anymore.
|
||||
#ifdef _MSC_VER
|
||||
// condition was always true. Moving it into a separate function and
|
||||
// suppressing optimisation keeps the compiler from warning.
|
||||
#ifdef VS2012_RADIXWARN
|
||||
#pragma optimize("",off)
|
||||
static bool IsDecimalDigitForRadix(int c, int radix) {
|
||||
return '0' <= c && c <= '9' && (c - '0') < radix;
|
||||
|
|
@ -738,11 +750,17 @@ double StringToDoubleConverter::StringToIeee(
|
|||
DOUBLE_CONVERSION_ASSERT(buffer_pos < kBufferSize);
|
||||
buffer[buffer_pos] = '\0';
|
||||
|
||||
// Code above ensures there are no leading zeros and the buffer has fewer than
|
||||
// kMaxSignificantDecimalDigits characters. Trim trailing zeros.
|
||||
Vector<const char> chars(buffer, buffer_pos);
|
||||
chars = TrimTrailingZeros(chars);
|
||||
exponent += buffer_pos - chars.length();
|
||||
|
||||
double converted;
|
||||
if (read_as_double) {
|
||||
converted = Strtod(Vector<const char>(buffer, buffer_pos), exponent);
|
||||
converted = StrtodTrimmed(chars, exponent);
|
||||
} else {
|
||||
converted = Strtof(Vector<const char>(buffer, buffer_pos), exponent);
|
||||
converted = StrtofTrimmed(chars, exponent);
|
||||
}
|
||||
*processed_characters_count = static_cast<int>(current - input);
|
||||
return sign? -converted: converted;
|
||||
|
|
|
|||
|
|
@ -115,17 +115,6 @@ static Vector<const char> TrimLeadingZeros(Vector<const char> buffer) {
|
|||
return Vector<const char>(buffer.start(), 0);
|
||||
}
|
||||
|
||||
|
||||
static Vector<const char> TrimTrailingZeros(Vector<const char> buffer) {
|
||||
for (int i = buffer.length() - 1; i >= 0; --i) {
|
||||
if (buffer[i] != '0') {
|
||||
return buffer.SubVector(0, i + 1);
|
||||
}
|
||||
}
|
||||
return Vector<const char>(buffer.start(), 0);
|
||||
}
|
||||
|
||||
|
||||
static void CutToMaxSignificantDigits(Vector<const char> buffer,
|
||||
int exponent,
|
||||
char* significant_buffer,
|
||||
|
|
@ -216,12 +205,14 @@ static bool DoubleStrtod(Vector<const char> trimmed,
|
|||
int exponent,
|
||||
double* result) {
|
||||
#if !defined(DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS)
|
||||
// Avoid "unused parameter" warnings
|
||||
(void) trimmed;
|
||||
(void) exponent;
|
||||
(void) result;
|
||||
// On x86 the floating-point stack can be 64 or 80 bits wide. If it is
|
||||
// 80 bits wide (as is the case on Linux) then double-rounding occurs and the
|
||||
// result is not accurate.
|
||||
// We know that Windows32 uses 64 bits and is therefore accurate.
|
||||
// Note that the ARM simulator is compiled for 32bits. It therefore exhibits
|
||||
// the same problem.
|
||||
return false;
|
||||
#else
|
||||
if (trimmed.length() <= kMaxExactDoubleIntegerDecimalDigits) {
|
||||
|
|
@ -473,6 +464,11 @@ static bool IsNonZeroDigit(const char d) {
|
|||
return ('1' <= d) && (d <= '9');
|
||||
}
|
||||
|
||||
#ifdef __has_cpp_attribute
|
||||
#if __has_cpp_attribute(maybe_unused)
|
||||
[[maybe_unused]]
|
||||
#endif
|
||||
#endif
|
||||
static bool AssertTrimmedDigits(const Vector<const char>& buffer) {
|
||||
for(int i = 0; i < buffer.length(); ++i) {
|
||||
if(!IsDigit(buffer[i])) {
|
||||
|
|
@ -545,6 +541,12 @@ float Strtof(Vector<const char> buffer, int exponent) {
|
|||
TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits,
|
||||
&trimmed, &updated_exponent);
|
||||
exponent = updated_exponent;
|
||||
return StrtofTrimmed(trimmed, exponent);
|
||||
}
|
||||
|
||||
float StrtofTrimmed(Vector<const char> trimmed, int exponent) {
|
||||
DOUBLE_CONVERSION_ASSERT(trimmed.length() <= kMaxSignificantDecimalDigits);
|
||||
DOUBLE_CONVERSION_ASSERT(AssertTrimmedDigits(trimmed));
|
||||
|
||||
double double_guess;
|
||||
bool is_correct = ComputeGuess(trimmed, exponent, &double_guess);
|
||||
|
|
|
|||
|
|
@ -54,11 +54,25 @@ double Strtod(Vector<const char> buffer, int exponent);
|
|||
// contain a dot or a sign. It must not start with '0', and must not be empty.
|
||||
float Strtof(Vector<const char> buffer, int exponent);
|
||||
|
||||
// For special use cases, the heart of the Strtod() function is also available
|
||||
// separately, it assumes that 'trimmed' is as produced by TrimAndCut(), i.e.
|
||||
// no leading or trailing zeros, also no lone zero, and not 'too many' digits.
|
||||
// Same as Strtod, but assumes that 'trimmed' is already trimmed, as if run
|
||||
// through TrimAndCut. That is, 'trimmed' must have no leading or trailing
|
||||
// zeros, must not be a lone zero, and must not have 'too many' digits.
|
||||
double StrtodTrimmed(Vector<const char> trimmed, int exponent);
|
||||
|
||||
// Same as Strtof, but assumes that 'trimmed' is already trimmed, as if run
|
||||
// through TrimAndCut. That is, 'trimmed' must have no leading or trailing
|
||||
// zeros, must not be a lone zero, and must not have 'too many' digits.
|
||||
float StrtofTrimmed(Vector<const char> trimmed, int exponent);
|
||||
|
||||
inline Vector<const char> TrimTrailingZeros(Vector<const char> buffer) {
|
||||
for (int i = buffer.length() - 1; i >= 0; --i) {
|
||||
if (buffer[i] != '0') {
|
||||
return buffer.SubVector(0, i + 1);
|
||||
}
|
||||
}
|
||||
return Vector<const char>(buffer.start(), 0);
|
||||
}
|
||||
|
||||
} // namespace double_conversion
|
||||
|
||||
// ICU PATCH: Close ICU namespace
|
||||
|
|
|
|||
|
|
@ -118,7 +118,7 @@ int main(int argc, char** argv) {
|
|||
defined(__ARMEL__) || defined(__avr32__) || defined(_M_ARM) || defined(_M_ARM64) || \
|
||||
defined(__hppa__) || defined(__ia64__) || \
|
||||
defined(__mips__) || \
|
||||
defined(__nios2__) || \
|
||||
defined(__nios2__) || defined(__ghs) || \
|
||||
defined(__powerpc__) || defined(__ppc__) || defined(__ppc64__) || \
|
||||
defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \
|
||||
defined(__sparc__) || defined(__sparc) || defined(__s390__) || \
|
||||
|
|
|
|||
3
deps/icu-small/source/i18n/dtfmtsym.cpp
vendored
3
deps/icu-small/source/i18n/dtfmtsym.cpp
vendored
|
|
@ -450,6 +450,7 @@ DateFormatSymbols::copyData(const DateFormatSymbols& other) {
|
|||
*/
|
||||
DateFormatSymbols& DateFormatSymbols::operator=(const DateFormatSymbols& other)
|
||||
{
|
||||
if (this == &other) { return *this; } // self-assignment: no-op
|
||||
dispose();
|
||||
copyData(other);
|
||||
|
||||
|
|
@ -2330,7 +2331,7 @@ DateFormatSymbols::initializeData(const Locale& locale, const char *type, UError
|
|||
// If format/narrow not available, use standalone/narrow
|
||||
assignArray(fNarrowMonths, fNarrowMonthsCount, fStandaloneNarrowMonths, fStandaloneNarrowMonthsCount);
|
||||
} else if (narrowMonthsEC != U_MISSING_RESOURCE_ERROR && standaloneNarrowMonthsEC == U_MISSING_RESOURCE_ERROR) {
|
||||
// If standalone/narrow not availabe, use format/narrow
|
||||
// If standalone/narrow not available, use format/narrow
|
||||
assignArray(fStandaloneNarrowMonths, fStandaloneNarrowMonthsCount, fNarrowMonths, fNarrowMonthsCount);
|
||||
} else if (narrowMonthsEC == U_MISSING_RESOURCE_ERROR && standaloneNarrowMonthsEC == U_MISSING_RESOURCE_ERROR) {
|
||||
// If neither is available, use format/abbreviated
|
||||
|
|
|
|||
27
deps/icu-small/source/i18n/dtitvfmt.cpp
vendored
27
deps/icu-small/source/i18n/dtitvfmt.cpp
vendored
|
|
@ -704,7 +704,7 @@ DateIntervalFormat::create(const Locale& locale,
|
|||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
delete dtitvinf;
|
||||
} else if ( U_FAILURE(status) ) {
|
||||
// safe to delete f, although nothing acutally is saved
|
||||
// safe to delete f, although nothing actually is saved
|
||||
delete f;
|
||||
f = 0;
|
||||
}
|
||||
|
|
@ -863,6 +863,14 @@ DateIntervalFormat::initializePattern(UErrorCode& status) {
|
|||
setPatternInfo(UCAL_DATE, nullptr, &pattern, fInfo->getDefaultOrder());
|
||||
setPatternInfo(UCAL_MONTH, nullptr, &pattern, fInfo->getDefaultOrder());
|
||||
setPatternInfo(UCAL_YEAR, nullptr, &pattern, fInfo->getDefaultOrder());
|
||||
|
||||
timeSkeleton.insert(0, CAP_G);
|
||||
pattern = DateFormat::getBestPattern(
|
||||
locale, timeSkeleton, status);
|
||||
if ( U_FAILURE(status) ) {
|
||||
return;
|
||||
}
|
||||
setPatternInfo(UCAL_ERA, nullptr, &pattern, fInfo->getDefaultOrder());
|
||||
} else {
|
||||
// TODO: fall back
|
||||
}
|
||||
|
|
@ -889,15 +897,23 @@ DateIntervalFormat::initializePattern(UErrorCode& status) {
|
|||
setPatternInfo(UCAL_DATE, nullptr, &pattern, fInfo->getDefaultOrder());
|
||||
setPatternInfo(UCAL_MONTH, nullptr, &pattern, fInfo->getDefaultOrder());
|
||||
setPatternInfo(UCAL_YEAR, nullptr, &pattern, fInfo->getDefaultOrder());
|
||||
|
||||
timeSkeleton.insert(0, CAP_G);
|
||||
pattern = DateFormat::getBestPattern(
|
||||
locale, timeSkeleton, status);
|
||||
if ( U_FAILURE(status) ) {
|
||||
return;
|
||||
}
|
||||
setPatternInfo(UCAL_ERA, nullptr, &pattern, fInfo->getDefaultOrder());
|
||||
} else {
|
||||
/* if both present,
|
||||
* 1) when the year, month, or day differs,
|
||||
* 1) when the era, year, month, or day differs,
|
||||
* concatenate the two original expressions with a separator between,
|
||||
* 2) otherwise, present the date followed by the
|
||||
* range expression for the time.
|
||||
*/
|
||||
/*
|
||||
* 1) when the year, month, or day differs,
|
||||
* 1) when the era, year, month, or day differs,
|
||||
* concatenate the two original expressions with a separator between,
|
||||
*/
|
||||
// if field exists, use fall back
|
||||
|
|
@ -917,6 +933,11 @@ DateIntervalFormat::initializePattern(UErrorCode& status) {
|
|||
skeleton.insert(0, LOW_Y);
|
||||
setFallbackPattern(UCAL_YEAR, skeleton, status);
|
||||
}
|
||||
if ( !fieldExistsInSkeleton(UCAL_ERA, dateSkeleton) ) {
|
||||
// then prefix skeleton with 'G'
|
||||
skeleton.insert(0, CAP_G);
|
||||
setFallbackPattern(UCAL_ERA, skeleton, status);
|
||||
}
|
||||
|
||||
/*
|
||||
* 2) otherwise, present the date followed by the
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user