xref: /aosp_15_r20/external/skia/modules/skunicode/src/SkUnicode_icu.cpp (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1*c8dee2aaSAndroid Build Coastguard Worker /*
2*c8dee2aaSAndroid Build Coastguard Worker * Copyright 2020 Google Inc.
3*c8dee2aaSAndroid Build Coastguard Worker *
4*c8dee2aaSAndroid Build Coastguard Worker * Use of this source code is governed by a BSD-style license that can be
5*c8dee2aaSAndroid Build Coastguard Worker * found in the LICENSE file.
6*c8dee2aaSAndroid Build Coastguard Worker */
7*c8dee2aaSAndroid Build Coastguard Worker #include "modules/skunicode/include/SkUnicode_icu.h"
8*c8dee2aaSAndroid Build Coastguard Worker 
9*c8dee2aaSAndroid Build Coastguard Worker #include "include/core/SkRefCnt.h"
10*c8dee2aaSAndroid Build Coastguard Worker #include "include/core/SkString.h"
11*c8dee2aaSAndroid Build Coastguard Worker #include "include/core/SkTypes.h"
12*c8dee2aaSAndroid Build Coastguard Worker #include "include/private/base/SkDebug.h"
13*c8dee2aaSAndroid Build Coastguard Worker #include "include/private/base/SkMutex.h"
14*c8dee2aaSAndroid Build Coastguard Worker #include "include/private/base/SkSpan_impl.h"
15*c8dee2aaSAndroid Build Coastguard Worker #include "include/private/base/SkTArray.h"
16*c8dee2aaSAndroid Build Coastguard Worker #include "include/private/base/SkTemplates.h"
17*c8dee2aaSAndroid Build Coastguard Worker #include "include/private/base/SkTo.h"
18*c8dee2aaSAndroid Build Coastguard Worker #include "modules/skunicode/include/SkUnicode.h"
19*c8dee2aaSAndroid Build Coastguard Worker #include "modules/skunicode/src/SkBidiFactory_icu_full.h"
20*c8dee2aaSAndroid Build Coastguard Worker #include "modules/skunicode/src/SkUnicode_icu_bidi.h"
21*c8dee2aaSAndroid Build Coastguard Worker #include "modules/skunicode/src/SkUnicode_icupriv.h"
22*c8dee2aaSAndroid Build Coastguard Worker #include "src/base/SkBitmaskEnum.h"
23*c8dee2aaSAndroid Build Coastguard Worker #include "src/base/SkUTF.h"
24*c8dee2aaSAndroid Build Coastguard Worker #include "src/core/SkChecksum.h"
25*c8dee2aaSAndroid Build Coastguard Worker #include "src/core/SkTHash.h"
26*c8dee2aaSAndroid Build Coastguard Worker 
27*c8dee2aaSAndroid Build Coastguard Worker #include <unicode/ubrk.h>
28*c8dee2aaSAndroid Build Coastguard Worker #include <unicode/uchar.h>
29*c8dee2aaSAndroid Build Coastguard Worker #include <unicode/uloc.h>
30*c8dee2aaSAndroid Build Coastguard Worker #include <unicode/umachine.h>
31*c8dee2aaSAndroid Build Coastguard Worker #include <unicode/utext.h>
32*c8dee2aaSAndroid Build Coastguard Worker #include <unicode/utypes.h>
33*c8dee2aaSAndroid Build Coastguard Worker 
34*c8dee2aaSAndroid Build Coastguard Worker #include <cstdint>
35*c8dee2aaSAndroid Build Coastguard Worker #include <cstring>
36*c8dee2aaSAndroid Build Coastguard Worker #include <functional>
37*c8dee2aaSAndroid Build Coastguard Worker #include <memory>
38*c8dee2aaSAndroid Build Coastguard Worker #include <string>
39*c8dee2aaSAndroid Build Coastguard Worker #include <utility>
40*c8dee2aaSAndroid Build Coastguard Worker #include <vector>
41*c8dee2aaSAndroid Build Coastguard Worker 
42*c8dee2aaSAndroid Build Coastguard Worker #if defined(SK_USING_THIRD_PARTY_ICU) && defined(SK_BUILD_FOR_WIN)
43*c8dee2aaSAndroid Build Coastguard Worker #include "SkLoadICU.h"
44*c8dee2aaSAndroid Build Coastguard Worker #include "include/private/base/SkOnce.h"
45*c8dee2aaSAndroid Build Coastguard Worker #endif
46*c8dee2aaSAndroid Build Coastguard Worker 
47*c8dee2aaSAndroid Build Coastguard Worker using namespace skia_private;
48*c8dee2aaSAndroid Build Coastguard Worker 
SkGetICULib()49*c8dee2aaSAndroid Build Coastguard Worker const SkICULib* SkGetICULib() {
50*c8dee2aaSAndroid Build Coastguard Worker     static const auto gICU = SkLoadICULib();
51*c8dee2aaSAndroid Build Coastguard Worker     return gICU.get();
52*c8dee2aaSAndroid Build Coastguard Worker }
53*c8dee2aaSAndroid Build Coastguard Worker 
54*c8dee2aaSAndroid Build Coastguard Worker // sk_* wrappers for ICU funcs
55*c8dee2aaSAndroid Build Coastguard Worker #define SKICU_FUNC(funcname)                                                                \
56*c8dee2aaSAndroid Build Coastguard Worker     template <typename... Args>                                                             \
57*c8dee2aaSAndroid Build Coastguard Worker     auto sk_##funcname(Args&&... args) -> decltype(funcname(std::forward<Args>(args)...)) { \
58*c8dee2aaSAndroid Build Coastguard Worker         return SkGetICULib()->f_##funcname(std::forward<Args>(args)...);                    \
59*c8dee2aaSAndroid Build Coastguard Worker     }                                                                                       \
60*c8dee2aaSAndroid Build Coastguard Worker 
61*c8dee2aaSAndroid Build Coastguard Worker SKICU_EMIT_FUNCS
62*c8dee2aaSAndroid Build Coastguard Worker #undef SKICU_FUNC
63*c8dee2aaSAndroid Build Coastguard Worker 
sk_ubrk_clone(const UBreakIterator * bi,UErrorCode * status)64*c8dee2aaSAndroid Build Coastguard Worker static inline UBreakIterator* sk_ubrk_clone(const UBreakIterator* bi, UErrorCode* status) {
65*c8dee2aaSAndroid Build Coastguard Worker     const auto* icu = SkGetICULib();
66*c8dee2aaSAndroid Build Coastguard Worker     SkASSERT(icu->f_ubrk_clone_ || icu->f_ubrk_safeClone_);
67*c8dee2aaSAndroid Build Coastguard Worker     return icu->f_ubrk_clone_
68*c8dee2aaSAndroid Build Coastguard Worker         ? icu->f_ubrk_clone_(bi, status)
69*c8dee2aaSAndroid Build Coastguard Worker         : icu->f_ubrk_safeClone_(bi, nullptr, nullptr, status);
70*c8dee2aaSAndroid Build Coastguard Worker }
71*c8dee2aaSAndroid Build Coastguard Worker 
utext_close_wrapper(UText * ut)72*c8dee2aaSAndroid Build Coastguard Worker static UText* utext_close_wrapper(UText* ut) {
73*c8dee2aaSAndroid Build Coastguard Worker     return sk_utext_close(ut);
74*c8dee2aaSAndroid Build Coastguard Worker }
ubrk_close_wrapper(UBreakIterator * bi)75*c8dee2aaSAndroid Build Coastguard Worker static void ubrk_close_wrapper(UBreakIterator* bi) {
76*c8dee2aaSAndroid Build Coastguard Worker     sk_ubrk_close(bi);
77*c8dee2aaSAndroid Build Coastguard Worker }
78*c8dee2aaSAndroid Build Coastguard Worker 
79*c8dee2aaSAndroid Build Coastguard Worker using ICUUText = std::unique_ptr<UText, SkFunctionObject<utext_close_wrapper>>;
80*c8dee2aaSAndroid Build Coastguard Worker using ICUBreakIterator = std::unique_ptr<UBreakIterator, SkFunctionObject<ubrk_close_wrapper>>;
81*c8dee2aaSAndroid Build Coastguard Worker /** Replaces invalid utf-8 sequences with REPLACEMENT CHARACTER U+FFFD. */
utf8_next(const char ** ptr,const char * end)82*c8dee2aaSAndroid Build Coastguard Worker static inline SkUnichar utf8_next(const char** ptr, const char* end) {
83*c8dee2aaSAndroid Build Coastguard Worker     SkUnichar val = SkUTF::NextUTF8(ptr, end);
84*c8dee2aaSAndroid Build Coastguard Worker     return val < 0 ? 0xFFFD : val;
85*c8dee2aaSAndroid Build Coastguard Worker }
86*c8dee2aaSAndroid Build Coastguard Worker 
convertType(SkUnicode::BreakType type)87*c8dee2aaSAndroid Build Coastguard Worker static UBreakIteratorType convertType(SkUnicode::BreakType type) {
88*c8dee2aaSAndroid Build Coastguard Worker     switch (type) {
89*c8dee2aaSAndroid Build Coastguard Worker         case SkUnicode::BreakType::kLines: return UBRK_LINE;
90*c8dee2aaSAndroid Build Coastguard Worker         case SkUnicode::BreakType::kGraphemes: return UBRK_CHARACTER;
91*c8dee2aaSAndroid Build Coastguard Worker         case SkUnicode::BreakType::kWords: return UBRK_WORD;
92*c8dee2aaSAndroid Build Coastguard Worker         case SkUnicode::BreakType::kSentences:
93*c8dee2aaSAndroid Build Coastguard Worker             return UBRK_SENTENCE;
94*c8dee2aaSAndroid Build Coastguard Worker         default:
95*c8dee2aaSAndroid Build Coastguard Worker             return UBRK_CHARACTER;
96*c8dee2aaSAndroid Build Coastguard Worker     }
97*c8dee2aaSAndroid Build Coastguard Worker }
98*c8dee2aaSAndroid Build Coastguard Worker 
99*c8dee2aaSAndroid Build Coastguard Worker class SkBreakIterator_icu : public SkBreakIterator {
100*c8dee2aaSAndroid Build Coastguard Worker     ICUBreakIterator fBreakIterator;
101*c8dee2aaSAndroid Build Coastguard Worker     Position fLastResult;
102*c8dee2aaSAndroid Build Coastguard Worker  public:
SkBreakIterator_icu(ICUBreakIterator iter)103*c8dee2aaSAndroid Build Coastguard Worker     explicit SkBreakIterator_icu(ICUBreakIterator iter)
104*c8dee2aaSAndroid Build Coastguard Worker             : fBreakIterator(std::move(iter))
105*c8dee2aaSAndroid Build Coastguard Worker             , fLastResult(0) {}
first()106*c8dee2aaSAndroid Build Coastguard Worker     Position first() override { return fLastResult = sk_ubrk_first(fBreakIterator.get()); }
current()107*c8dee2aaSAndroid Build Coastguard Worker     Position current() override { return fLastResult = sk_ubrk_current(fBreakIterator.get()); }
next()108*c8dee2aaSAndroid Build Coastguard Worker     Position next() override { return fLastResult = sk_ubrk_next(fBreakIterator.get()); }
status()109*c8dee2aaSAndroid Build Coastguard Worker     Status status() override { return sk_ubrk_getRuleStatus(fBreakIterator.get()); }
isDone()110*c8dee2aaSAndroid Build Coastguard Worker     bool isDone() override { return fLastResult == UBRK_DONE; }
111*c8dee2aaSAndroid Build Coastguard Worker 
setText(const char utftext8[],int utf8Units)112*c8dee2aaSAndroid Build Coastguard Worker     bool setText(const char utftext8[], int utf8Units) override {
113*c8dee2aaSAndroid Build Coastguard Worker         UErrorCode status = U_ZERO_ERROR;
114*c8dee2aaSAndroid Build Coastguard Worker         ICUUText text(sk_utext_openUTF8(nullptr, &utftext8[0], utf8Units, &status));
115*c8dee2aaSAndroid Build Coastguard Worker 
116*c8dee2aaSAndroid Build Coastguard Worker         if (U_FAILURE(status)) {
117*c8dee2aaSAndroid Build Coastguard Worker             SkDEBUGF("Break error: %s", sk_u_errorName(status));
118*c8dee2aaSAndroid Build Coastguard Worker             return false;
119*c8dee2aaSAndroid Build Coastguard Worker         }
120*c8dee2aaSAndroid Build Coastguard Worker         SkASSERT(text);
121*c8dee2aaSAndroid Build Coastguard Worker         sk_ubrk_setUText(fBreakIterator.get(), text.get(), &status);
122*c8dee2aaSAndroid Build Coastguard Worker         if (U_FAILURE(status)) {
123*c8dee2aaSAndroid Build Coastguard Worker             SkDEBUGF("Break error: %s", sk_u_errorName(status));
124*c8dee2aaSAndroid Build Coastguard Worker             return false;
125*c8dee2aaSAndroid Build Coastguard Worker         }
126*c8dee2aaSAndroid Build Coastguard Worker         fLastResult = 0;
127*c8dee2aaSAndroid Build Coastguard Worker         return true;
128*c8dee2aaSAndroid Build Coastguard Worker     }
setText(const char16_t utftext16[],int utf16Units)129*c8dee2aaSAndroid Build Coastguard Worker     bool setText(const char16_t utftext16[], int utf16Units) override {
130*c8dee2aaSAndroid Build Coastguard Worker         UErrorCode status = U_ZERO_ERROR;
131*c8dee2aaSAndroid Build Coastguard Worker         ICUUText text(sk_utext_openUChars(nullptr, reinterpret_cast<const UChar*>(&utftext16[0]),
132*c8dee2aaSAndroid Build Coastguard Worker                                           utf16Units, &status));
133*c8dee2aaSAndroid Build Coastguard Worker 
134*c8dee2aaSAndroid Build Coastguard Worker         if (U_FAILURE(status)) {
135*c8dee2aaSAndroid Build Coastguard Worker             SkDEBUGF("Break error: %s", sk_u_errorName(status));
136*c8dee2aaSAndroid Build Coastguard Worker             return false;
137*c8dee2aaSAndroid Build Coastguard Worker         }
138*c8dee2aaSAndroid Build Coastguard Worker         SkASSERT(text);
139*c8dee2aaSAndroid Build Coastguard Worker         sk_ubrk_setUText(fBreakIterator.get(), text.get(), &status);
140*c8dee2aaSAndroid Build Coastguard Worker         if (U_FAILURE(status)) {
141*c8dee2aaSAndroid Build Coastguard Worker             SkDEBUGF("Break error: %s", sk_u_errorName(status));
142*c8dee2aaSAndroid Build Coastguard Worker             return false;
143*c8dee2aaSAndroid Build Coastguard Worker         }
144*c8dee2aaSAndroid Build Coastguard Worker         fLastResult = 0;
145*c8dee2aaSAndroid Build Coastguard Worker         return true;
146*c8dee2aaSAndroid Build Coastguard Worker     }
147*c8dee2aaSAndroid Build Coastguard Worker };
148*c8dee2aaSAndroid Build Coastguard Worker 
149*c8dee2aaSAndroid Build Coastguard Worker class SkIcuBreakIteratorCache final {
150*c8dee2aaSAndroid Build Coastguard Worker     struct Request final {
RequestSkIcuBreakIteratorCache::Request151*c8dee2aaSAndroid Build Coastguard Worker         Request(SkUnicode::BreakType type, const char* icuLocale)
152*c8dee2aaSAndroid Build Coastguard Worker             : fType(type)
153*c8dee2aaSAndroid Build Coastguard Worker             , fIcuLocale(icuLocale)
154*c8dee2aaSAndroid Build Coastguard Worker             , hash(SkGoodHash()(type) ^ SkGoodHash()(fIcuLocale))
155*c8dee2aaSAndroid Build Coastguard Worker         {}
156*c8dee2aaSAndroid Build Coastguard Worker         const SkUnicode::BreakType fType;
157*c8dee2aaSAndroid Build Coastguard Worker         const SkString fIcuLocale;
158*c8dee2aaSAndroid Build Coastguard Worker         const uint32_t hash;
159*c8dee2aaSAndroid Build Coastguard Worker         struct Hash {
operator ()SkIcuBreakIteratorCache::Request::Hash160*c8dee2aaSAndroid Build Coastguard Worker             uint32_t operator()(const Request& key) const {
161*c8dee2aaSAndroid Build Coastguard Worker                 return key.hash;
162*c8dee2aaSAndroid Build Coastguard Worker             }
163*c8dee2aaSAndroid Build Coastguard Worker         };
operator ==SkIcuBreakIteratorCache::Request164*c8dee2aaSAndroid Build Coastguard Worker         bool operator==(const Request& that) const {
165*c8dee2aaSAndroid Build Coastguard Worker             return this->fType == that.fType && this->fIcuLocale == that.fIcuLocale;
166*c8dee2aaSAndroid Build Coastguard Worker         }
167*c8dee2aaSAndroid Build Coastguard Worker     };
168*c8dee2aaSAndroid Build Coastguard Worker     /* Every holder of this class is referencing the same (logical) break iterator.
169*c8dee2aaSAndroid Build Coastguard Worker      * Due to caching, the actual break iterator may come and go.
170*c8dee2aaSAndroid Build Coastguard Worker      */
171*c8dee2aaSAndroid Build Coastguard Worker     class BreakIteratorRef final {
172*c8dee2aaSAndroid Build Coastguard Worker     public:
BreakIteratorRef(ICUBreakIterator iter)173*c8dee2aaSAndroid Build Coastguard Worker         BreakIteratorRef(ICUBreakIterator iter) : breakIterator(iter.release()), fRefCnt(1) {
174*c8dee2aaSAndroid Build Coastguard Worker             ++Instances;
175*c8dee2aaSAndroid Build Coastguard Worker         }
176*c8dee2aaSAndroid Build Coastguard Worker         BreakIteratorRef(SkRefCntBase&&) = delete;
177*c8dee2aaSAndroid Build Coastguard Worker         BreakIteratorRef(const SkRefCntBase&) = delete;
178*c8dee2aaSAndroid Build Coastguard Worker         BreakIteratorRef& operator=(SkRefCntBase&&) = delete;
179*c8dee2aaSAndroid Build Coastguard Worker         BreakIteratorRef& operator=(const SkRefCntBase&) = delete;
~BreakIteratorRef()180*c8dee2aaSAndroid Build Coastguard Worker         ~BreakIteratorRef() {
181*c8dee2aaSAndroid Build Coastguard Worker             if (breakIterator) {
182*c8dee2aaSAndroid Build Coastguard Worker                 ubrk_close_wrapper(breakIterator);
183*c8dee2aaSAndroid Build Coastguard Worker             }
184*c8dee2aaSAndroid Build Coastguard Worker         }
185*c8dee2aaSAndroid Build Coastguard Worker 
ref() const186*c8dee2aaSAndroid Build Coastguard Worker         void ref() const {
187*c8dee2aaSAndroid Build Coastguard Worker             SkASSERT(fRefCnt > 0);
188*c8dee2aaSAndroid Build Coastguard Worker             ++fRefCnt;
189*c8dee2aaSAndroid Build Coastguard Worker         }
unref() const190*c8dee2aaSAndroid Build Coastguard Worker         void unref() const {
191*c8dee2aaSAndroid Build Coastguard Worker             SkASSERT(fRefCnt > 0);
192*c8dee2aaSAndroid Build Coastguard Worker             if (1 == fRefCnt--) {
193*c8dee2aaSAndroid Build Coastguard Worker                 delete this;
194*c8dee2aaSAndroid Build Coastguard Worker                 --Instances;
195*c8dee2aaSAndroid Build Coastguard Worker             }
196*c8dee2aaSAndroid Build Coastguard Worker         }
197*c8dee2aaSAndroid Build Coastguard Worker 
198*c8dee2aaSAndroid Build Coastguard Worker         UBreakIterator* breakIterator;
GetInstanceCount()199*c8dee2aaSAndroid Build Coastguard Worker         static int32_t GetInstanceCount() { return Instances; }
200*c8dee2aaSAndroid Build Coastguard Worker     private:
201*c8dee2aaSAndroid Build Coastguard Worker         mutable int32_t fRefCnt;
202*c8dee2aaSAndroid Build Coastguard Worker         static int32_t Instances;
203*c8dee2aaSAndroid Build Coastguard Worker     };
204*c8dee2aaSAndroid Build Coastguard Worker     THashMap<Request, sk_sp<BreakIteratorRef>, Request::Hash> fRequestCache;
205*c8dee2aaSAndroid Build Coastguard Worker     SkMutex fCacheMutex;
206*c8dee2aaSAndroid Build Coastguard Worker 
purgeIfNeeded()207*c8dee2aaSAndroid Build Coastguard Worker     void purgeIfNeeded() {
208*c8dee2aaSAndroid Build Coastguard Worker         // If there are too many requests remove some (oldest first?)
209*c8dee2aaSAndroid Build Coastguard Worker         // This may free some break iterators
210*c8dee2aaSAndroid Build Coastguard Worker         if (fRequestCache.count() > 100) {
211*c8dee2aaSAndroid Build Coastguard Worker             // remove the oldest requests
212*c8dee2aaSAndroid Build Coastguard Worker             fRequestCache.reset();
213*c8dee2aaSAndroid Build Coastguard Worker         }
214*c8dee2aaSAndroid Build Coastguard Worker         // If there are still too many break iterators remove some (oldest first?)
215*c8dee2aaSAndroid Build Coastguard Worker         if (BreakIteratorRef::GetInstanceCount() > 4) {
216*c8dee2aaSAndroid Build Coastguard Worker             // delete the oldest break iterators and set the references to nullptr
217*c8dee2aaSAndroid Build Coastguard Worker             for (auto&& [key, value] : fRequestCache) {
218*c8dee2aaSAndroid Build Coastguard Worker                 if (value->breakIterator) {
219*c8dee2aaSAndroid Build Coastguard Worker                     sk_ubrk_close(value->breakIterator);
220*c8dee2aaSAndroid Build Coastguard Worker                     value->breakIterator = nullptr;
221*c8dee2aaSAndroid Build Coastguard Worker                 }
222*c8dee2aaSAndroid Build Coastguard Worker             }
223*c8dee2aaSAndroid Build Coastguard Worker         }
224*c8dee2aaSAndroid Build Coastguard Worker     }
225*c8dee2aaSAndroid Build Coastguard Worker 
226*c8dee2aaSAndroid Build Coastguard Worker  public:
get()227*c8dee2aaSAndroid Build Coastguard Worker     static SkIcuBreakIteratorCache& get() {
228*c8dee2aaSAndroid Build Coastguard Worker         static SkIcuBreakIteratorCache instance;
229*c8dee2aaSAndroid Build Coastguard Worker         return instance;
230*c8dee2aaSAndroid Build Coastguard Worker     }
231*c8dee2aaSAndroid Build Coastguard Worker 
makeBreakIterator(SkUnicode::BreakType type,const char * bcp47)232*c8dee2aaSAndroid Build Coastguard Worker     ICUBreakIterator makeBreakIterator(SkUnicode::BreakType type, const char* bcp47) {
233*c8dee2aaSAndroid Build Coastguard Worker         SkAutoMutexExclusive lock(fCacheMutex);
234*c8dee2aaSAndroid Build Coastguard Worker         UErrorCode status = U_ZERO_ERROR;
235*c8dee2aaSAndroid Build Coastguard Worker 
236*c8dee2aaSAndroid Build Coastguard Worker         // Get ICU locale for BCP47 langtag
237*c8dee2aaSAndroid Build Coastguard Worker         char localeIDStorage[ULOC_FULLNAME_CAPACITY];
238*c8dee2aaSAndroid Build Coastguard Worker         const char* localeID = nullptr;
239*c8dee2aaSAndroid Build Coastguard Worker         if (bcp47) {
240*c8dee2aaSAndroid Build Coastguard Worker             sk_uloc_forLanguageTag(bcp47, localeIDStorage, ULOC_FULLNAME_CAPACITY, nullptr, &status);
241*c8dee2aaSAndroid Build Coastguard Worker             if (U_FAILURE(status)) {
242*c8dee2aaSAndroid Build Coastguard Worker                 SkDEBUGF("Break error could not get language tag: %s", sk_u_errorName(status));
243*c8dee2aaSAndroid Build Coastguard Worker             } else if (localeIDStorage[0]) {
244*c8dee2aaSAndroid Build Coastguard Worker                 localeID = localeIDStorage;
245*c8dee2aaSAndroid Build Coastguard Worker             }
246*c8dee2aaSAndroid Build Coastguard Worker         }
247*c8dee2aaSAndroid Build Coastguard Worker         if (!localeID) {
248*c8dee2aaSAndroid Build Coastguard Worker             localeID = sk_uloc_getDefault();
249*c8dee2aaSAndroid Build Coastguard Worker         }
250*c8dee2aaSAndroid Build Coastguard Worker 
251*c8dee2aaSAndroid Build Coastguard Worker         auto make = [](const Request& request) -> UBreakIterator* {
252*c8dee2aaSAndroid Build Coastguard Worker             UErrorCode status = U_ZERO_ERROR;
253*c8dee2aaSAndroid Build Coastguard Worker             UBreakIterator* bi = sk_ubrk_open(convertType(request.fType),
254*c8dee2aaSAndroid Build Coastguard Worker                                               request.fIcuLocale.c_str(),
255*c8dee2aaSAndroid Build Coastguard Worker                                               nullptr, 0, &status);
256*c8dee2aaSAndroid Build Coastguard Worker             if (U_FAILURE(status)) {
257*c8dee2aaSAndroid Build Coastguard Worker                 SkDEBUGF("Break error: %s", sk_u_errorName(status));
258*c8dee2aaSAndroid Build Coastguard Worker             }
259*c8dee2aaSAndroid Build Coastguard Worker             return bi;
260*c8dee2aaSAndroid Build Coastguard Worker         };
261*c8dee2aaSAndroid Build Coastguard Worker 
262*c8dee2aaSAndroid Build Coastguard Worker         auto clone = [](const UBreakIterator* existing) -> ICUBreakIterator {
263*c8dee2aaSAndroid Build Coastguard Worker             if (!existing) {
264*c8dee2aaSAndroid Build Coastguard Worker                 return nullptr;
265*c8dee2aaSAndroid Build Coastguard Worker             }
266*c8dee2aaSAndroid Build Coastguard Worker 
267*c8dee2aaSAndroid Build Coastguard Worker             UErrorCode status = U_ZERO_ERROR;
268*c8dee2aaSAndroid Build Coastguard Worker             ICUBreakIterator clone(sk_ubrk_clone(existing, &status));
269*c8dee2aaSAndroid Build Coastguard Worker             if (U_FAILURE(status)) {
270*c8dee2aaSAndroid Build Coastguard Worker                 SkDEBUGF("Break error: %s", sk_u_errorName(status));
271*c8dee2aaSAndroid Build Coastguard Worker             }
272*c8dee2aaSAndroid Build Coastguard Worker             return clone;
273*c8dee2aaSAndroid Build Coastguard Worker         };
274*c8dee2aaSAndroid Build Coastguard Worker 
275*c8dee2aaSAndroid Build Coastguard Worker         Request request(type, localeID);
276*c8dee2aaSAndroid Build Coastguard Worker 
277*c8dee2aaSAndroid Build Coastguard Worker         // See if this request is already in the cache
278*c8dee2aaSAndroid Build Coastguard Worker         const sk_sp<BreakIteratorRef>* ref = fRequestCache.find(request);
279*c8dee2aaSAndroid Build Coastguard Worker         if (ref) {
280*c8dee2aaSAndroid Build Coastguard Worker             // See if the breakIterator needs to be re-created
281*c8dee2aaSAndroid Build Coastguard Worker             if (!(*ref)->breakIterator) {
282*c8dee2aaSAndroid Build Coastguard Worker                 (*ref)->breakIterator = make(request);
283*c8dee2aaSAndroid Build Coastguard Worker             }
284*c8dee2aaSAndroid Build Coastguard Worker             return clone((*ref)->breakIterator);
285*c8dee2aaSAndroid Build Coastguard Worker         }
286*c8dee2aaSAndroid Build Coastguard Worker 
287*c8dee2aaSAndroid Build Coastguard Worker         // This request was not in the cache, create an iterator.
288*c8dee2aaSAndroid Build Coastguard Worker         ICUBreakIterator newIter(make(request));
289*c8dee2aaSAndroid Build Coastguard Worker         if (!newIter) {
290*c8dee2aaSAndroid Build Coastguard Worker             return nullptr;
291*c8dee2aaSAndroid Build Coastguard Worker         }
292*c8dee2aaSAndroid Build Coastguard Worker 
293*c8dee2aaSAndroid Build Coastguard Worker         sk_sp<BreakIteratorRef> newRef;
294*c8dee2aaSAndroid Build Coastguard Worker 
295*c8dee2aaSAndroid Build Coastguard Worker         // Check if the new iterator is a duplicate
296*c8dee2aaSAndroid Build Coastguard Worker         // Android doesn't expose ubrk_getLocaleByType so there is no means of de-duplicating.
297*c8dee2aaSAndroid Build Coastguard Worker         // ubrk_getAvailable seems like it should work, but the implementation is just every locale.
298*c8dee2aaSAndroid Build Coastguard Worker         if (SkGetICULib()->f_ubrk_getLocaleByType) {
299*c8dee2aaSAndroid Build Coastguard Worker             const char* actualLocale = SkGetICULib()->f_ubrk_getLocaleByType(
300*c8dee2aaSAndroid Build Coastguard Worker                                            newIter.get(), ULOC_ACTUAL_LOCALE, &status);
301*c8dee2aaSAndroid Build Coastguard Worker             // Android doesn't expose ubrk_getLocaleByType so a wrapper may return an error.
302*c8dee2aaSAndroid Build Coastguard Worker             if (!U_FAILURE(status)) {
303*c8dee2aaSAndroid Build Coastguard Worker                 if (!actualLocale) {
304*c8dee2aaSAndroid Build Coastguard Worker                     actualLocale = "";
305*c8dee2aaSAndroid Build Coastguard Worker                 }
306*c8dee2aaSAndroid Build Coastguard Worker                 // If the actual locale is the same as the requested locale we know there is no entry.
307*c8dee2aaSAndroid Build Coastguard Worker                 if (strcmp(actualLocale, localeID) != 0) {
308*c8dee2aaSAndroid Build Coastguard Worker                     Request actualRequest(type, actualLocale);
309*c8dee2aaSAndroid Build Coastguard Worker                     const sk_sp<BreakIteratorRef>* actualRef = fRequestCache.find(actualRequest);
310*c8dee2aaSAndroid Build Coastguard Worker                     if (actualRef) {
311*c8dee2aaSAndroid Build Coastguard Worker                         if (!(*actualRef)->breakIterator) {
312*c8dee2aaSAndroid Build Coastguard Worker                             (*actualRef)->breakIterator = newIter.release();
313*c8dee2aaSAndroid Build Coastguard Worker                         }
314*c8dee2aaSAndroid Build Coastguard Worker                         actualRef = fRequestCache.set(request, *actualRef);
315*c8dee2aaSAndroid Build Coastguard Worker                         return clone((*actualRef)->breakIterator);
316*c8dee2aaSAndroid Build Coastguard Worker                     } else {
317*c8dee2aaSAndroid Build Coastguard Worker                         this->purgeIfNeeded();
318*c8dee2aaSAndroid Build Coastguard Worker                         newRef = sk_make_sp<BreakIteratorRef>(std::move(newIter));
319*c8dee2aaSAndroid Build Coastguard Worker                         fRequestCache.set(actualRequest, newRef);
320*c8dee2aaSAndroid Build Coastguard Worker                     }
321*c8dee2aaSAndroid Build Coastguard Worker                 }
322*c8dee2aaSAndroid Build Coastguard Worker             }
323*c8dee2aaSAndroid Build Coastguard Worker         }
324*c8dee2aaSAndroid Build Coastguard Worker 
325*c8dee2aaSAndroid Build Coastguard Worker         if (!newRef) {
326*c8dee2aaSAndroid Build Coastguard Worker             this->purgeIfNeeded();
327*c8dee2aaSAndroid Build Coastguard Worker             newRef = sk_make_sp<BreakIteratorRef>(std::move(newIter));
328*c8dee2aaSAndroid Build Coastguard Worker         }
329*c8dee2aaSAndroid Build Coastguard Worker         fRequestCache.set(request, newRef);
330*c8dee2aaSAndroid Build Coastguard Worker 
331*c8dee2aaSAndroid Build Coastguard Worker         return clone(newRef->breakIterator);
332*c8dee2aaSAndroid Build Coastguard Worker     }
333*c8dee2aaSAndroid Build Coastguard Worker };
334*c8dee2aaSAndroid Build Coastguard Worker /*static*/ int32_t SkIcuBreakIteratorCache::BreakIteratorRef::Instances{0};
335*c8dee2aaSAndroid Build Coastguard Worker 
336*c8dee2aaSAndroid Build Coastguard Worker class SkUnicode_icu : public SkUnicode {
337*c8dee2aaSAndroid Build Coastguard Worker 
extractWords(uint16_t utf16[],int utf16Units,const char * locale,std::vector<Position> * words)338*c8dee2aaSAndroid Build Coastguard Worker     static bool extractWords(uint16_t utf16[], int utf16Units, const char* locale,
339*c8dee2aaSAndroid Build Coastguard Worker                              std::vector<Position>* words) {
340*c8dee2aaSAndroid Build Coastguard Worker 
341*c8dee2aaSAndroid Build Coastguard Worker         UErrorCode status = U_ZERO_ERROR;
342*c8dee2aaSAndroid Build Coastguard Worker 
343*c8dee2aaSAndroid Build Coastguard Worker         const BreakType type = BreakType::kWords;
344*c8dee2aaSAndroid Build Coastguard Worker         ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(type, locale);
345*c8dee2aaSAndroid Build Coastguard Worker         if (!iterator) {
346*c8dee2aaSAndroid Build Coastguard Worker             SkDEBUGF("Break error: %s", sk_u_errorName(status));
347*c8dee2aaSAndroid Build Coastguard Worker             return false;
348*c8dee2aaSAndroid Build Coastguard Worker         }
349*c8dee2aaSAndroid Build Coastguard Worker         SkASSERT(iterator);
350*c8dee2aaSAndroid Build Coastguard Worker 
351*c8dee2aaSAndroid Build Coastguard Worker         ICUUText utf16UText(sk_utext_openUChars(nullptr, (UChar*)utf16, utf16Units, &status));
352*c8dee2aaSAndroid Build Coastguard Worker         if (U_FAILURE(status)) {
353*c8dee2aaSAndroid Build Coastguard Worker             SkDEBUGF("Break error: %s", sk_u_errorName(status));
354*c8dee2aaSAndroid Build Coastguard Worker             return false;
355*c8dee2aaSAndroid Build Coastguard Worker         }
356*c8dee2aaSAndroid Build Coastguard Worker 
357*c8dee2aaSAndroid Build Coastguard Worker         sk_ubrk_setUText(iterator.get(), utf16UText.get(), &status);
358*c8dee2aaSAndroid Build Coastguard Worker         if (U_FAILURE(status)) {
359*c8dee2aaSAndroid Build Coastguard Worker             SkDEBUGF("Break error: %s", sk_u_errorName(status));
360*c8dee2aaSAndroid Build Coastguard Worker             return false;
361*c8dee2aaSAndroid Build Coastguard Worker         }
362*c8dee2aaSAndroid Build Coastguard Worker 
363*c8dee2aaSAndroid Build Coastguard Worker         // Get the words
364*c8dee2aaSAndroid Build Coastguard Worker         int32_t pos = sk_ubrk_first(iterator.get());
365*c8dee2aaSAndroid Build Coastguard Worker         while (pos != UBRK_DONE) {
366*c8dee2aaSAndroid Build Coastguard Worker             words->emplace_back(pos);
367*c8dee2aaSAndroid Build Coastguard Worker             pos = sk_ubrk_next(iterator.get());
368*c8dee2aaSAndroid Build Coastguard Worker         }
369*c8dee2aaSAndroid Build Coastguard Worker 
370*c8dee2aaSAndroid Build Coastguard Worker         return true;
371*c8dee2aaSAndroid Build Coastguard Worker     }
372*c8dee2aaSAndroid Build Coastguard Worker 
extractPositions(const char utf8[],int utf8Units,BreakType type,const char * locale,const std::function<void (int,int)> & setBreak)373*c8dee2aaSAndroid Build Coastguard Worker     static bool extractPositions(const char utf8[], int utf8Units,
374*c8dee2aaSAndroid Build Coastguard Worker                                  BreakType type, const char* locale,
375*c8dee2aaSAndroid Build Coastguard Worker                                  const std::function<void(int, int)>& setBreak) {
376*c8dee2aaSAndroid Build Coastguard Worker 
377*c8dee2aaSAndroid Build Coastguard Worker         UErrorCode status = U_ZERO_ERROR;
378*c8dee2aaSAndroid Build Coastguard Worker         ICUUText text(sk_utext_openUTF8(nullptr, &utf8[0], utf8Units, &status));
379*c8dee2aaSAndroid Build Coastguard Worker         if (U_FAILURE(status)) {
380*c8dee2aaSAndroid Build Coastguard Worker             SkDEBUGF("Break error: %s", sk_u_errorName(status));
381*c8dee2aaSAndroid Build Coastguard Worker             return false;
382*c8dee2aaSAndroid Build Coastguard Worker         }
383*c8dee2aaSAndroid Build Coastguard Worker         SkASSERT(text);
384*c8dee2aaSAndroid Build Coastguard Worker 
385*c8dee2aaSAndroid Build Coastguard Worker         ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(type, locale);
386*c8dee2aaSAndroid Build Coastguard Worker         if (!iterator) {
387*c8dee2aaSAndroid Build Coastguard Worker             return false;
388*c8dee2aaSAndroid Build Coastguard Worker         }
389*c8dee2aaSAndroid Build Coastguard Worker 
390*c8dee2aaSAndroid Build Coastguard Worker         sk_ubrk_setUText(iterator.get(), text.get(), &status);
391*c8dee2aaSAndroid Build Coastguard Worker         if (U_FAILURE(status)) {
392*c8dee2aaSAndroid Build Coastguard Worker             SkDEBUGF("Break error: %s", sk_u_errorName(status));
393*c8dee2aaSAndroid Build Coastguard Worker             return false;
394*c8dee2aaSAndroid Build Coastguard Worker         }
395*c8dee2aaSAndroid Build Coastguard Worker 
396*c8dee2aaSAndroid Build Coastguard Worker         auto iter = iterator.get();
397*c8dee2aaSAndroid Build Coastguard Worker         int32_t pos = sk_ubrk_first(iter);
398*c8dee2aaSAndroid Build Coastguard Worker         while (pos != UBRK_DONE) {
399*c8dee2aaSAndroid Build Coastguard Worker             int s = type == SkUnicode::BreakType::kLines
400*c8dee2aaSAndroid Build Coastguard Worker                         ? UBRK_LINE_SOFT
401*c8dee2aaSAndroid Build Coastguard Worker                         : sk_ubrk_getRuleStatus(iter);
402*c8dee2aaSAndroid Build Coastguard Worker             setBreak(pos, s);
403*c8dee2aaSAndroid Build Coastguard Worker             pos = sk_ubrk_next(iter);
404*c8dee2aaSAndroid Build Coastguard Worker         }
405*c8dee2aaSAndroid Build Coastguard Worker 
406*c8dee2aaSAndroid Build Coastguard Worker         if (type == SkUnicode::BreakType::kLines) {
407*c8dee2aaSAndroid Build Coastguard Worker             // This is a workaround for https://bugs.chromium.org/p/skia/issues/detail?id=10715
408*c8dee2aaSAndroid Build Coastguard Worker             // (ICU line break iterator does not work correctly on Thai text with new lines)
409*c8dee2aaSAndroid Build Coastguard Worker             // So, we only use the iterator to collect soft line breaks and
410*c8dee2aaSAndroid Build Coastguard Worker             // scan the text for all hard line breaks ourselves
411*c8dee2aaSAndroid Build Coastguard Worker             const char* end = utf8 + utf8Units;
412*c8dee2aaSAndroid Build Coastguard Worker             const char* ch = utf8;
413*c8dee2aaSAndroid Build Coastguard Worker             while (ch < end) {
414*c8dee2aaSAndroid Build Coastguard Worker                 auto unichar = utf8_next(&ch, end);
415*c8dee2aaSAndroid Build Coastguard Worker                 if (SkUnicode_icu::isHardLineBreak(unichar)) {
416*c8dee2aaSAndroid Build Coastguard Worker                     setBreak(ch - utf8, UBRK_LINE_HARD);
417*c8dee2aaSAndroid Build Coastguard Worker                 }
418*c8dee2aaSAndroid Build Coastguard Worker             }
419*c8dee2aaSAndroid Build Coastguard Worker         }
420*c8dee2aaSAndroid Build Coastguard Worker         return true;
421*c8dee2aaSAndroid Build Coastguard Worker     }
422*c8dee2aaSAndroid Build Coastguard Worker 
isControl(SkUnichar utf8)423*c8dee2aaSAndroid Build Coastguard Worker     bool isControl(SkUnichar utf8) override {
424*c8dee2aaSAndroid Build Coastguard Worker         return sk_u_iscntrl(utf8);
425*c8dee2aaSAndroid Build Coastguard Worker     }
426*c8dee2aaSAndroid Build Coastguard Worker 
isWhitespace(SkUnichar utf8)427*c8dee2aaSAndroid Build Coastguard Worker     bool isWhitespace(SkUnichar utf8) override {
428*c8dee2aaSAndroid Build Coastguard Worker         return sk_u_isWhitespace(utf8);
429*c8dee2aaSAndroid Build Coastguard Worker     }
430*c8dee2aaSAndroid Build Coastguard Worker 
isSpace(SkUnichar utf8)431*c8dee2aaSAndroid Build Coastguard Worker     bool isSpace(SkUnichar utf8) override {
432*c8dee2aaSAndroid Build Coastguard Worker         return sk_u_isspace(utf8);
433*c8dee2aaSAndroid Build Coastguard Worker     }
434*c8dee2aaSAndroid Build Coastguard Worker 
isHardBreak(SkUnichar utf8)435*c8dee2aaSAndroid Build Coastguard Worker     bool isHardBreak(SkUnichar utf8) override {
436*c8dee2aaSAndroid Build Coastguard Worker         return SkUnicode_icu::isHardLineBreak(utf8);
437*c8dee2aaSAndroid Build Coastguard Worker     }
438*c8dee2aaSAndroid Build Coastguard Worker 
isEmoji(SkUnichar unichar)439*c8dee2aaSAndroid Build Coastguard Worker     bool isEmoji(SkUnichar unichar) override {
440*c8dee2aaSAndroid Build Coastguard Worker         return sk_u_hasBinaryProperty(unichar, UCHAR_EMOJI);
441*c8dee2aaSAndroid Build Coastguard Worker     }
442*c8dee2aaSAndroid Build Coastguard Worker 
isEmojiComponent(SkUnichar unichar)443*c8dee2aaSAndroid Build Coastguard Worker     bool isEmojiComponent(SkUnichar unichar) override {
444*c8dee2aaSAndroid Build Coastguard Worker         return sk_u_hasBinaryProperty(unichar, UCHAR_EMOJI_COMPONENT);
445*c8dee2aaSAndroid Build Coastguard Worker     }
446*c8dee2aaSAndroid Build Coastguard Worker 
isEmojiModifierBase(SkUnichar unichar)447*c8dee2aaSAndroid Build Coastguard Worker     bool isEmojiModifierBase(SkUnichar unichar) override {
448*c8dee2aaSAndroid Build Coastguard Worker         return sk_u_hasBinaryProperty(unichar, UCHAR_EMOJI_MODIFIER_BASE);
449*c8dee2aaSAndroid Build Coastguard Worker     }
450*c8dee2aaSAndroid Build Coastguard Worker 
isEmojiModifier(SkUnichar unichar)451*c8dee2aaSAndroid Build Coastguard Worker     bool isEmojiModifier(SkUnichar unichar) override {
452*c8dee2aaSAndroid Build Coastguard Worker         return sk_u_hasBinaryProperty(unichar, UCHAR_EMOJI_MODIFIER);
453*c8dee2aaSAndroid Build Coastguard Worker     }
454*c8dee2aaSAndroid Build Coastguard Worker 
isRegionalIndicator(SkUnichar unichar)455*c8dee2aaSAndroid Build Coastguard Worker     bool isRegionalIndicator(SkUnichar unichar) override {
456*c8dee2aaSAndroid Build Coastguard Worker         return sk_u_hasBinaryProperty(unichar, UCHAR_REGIONAL_INDICATOR);
457*c8dee2aaSAndroid Build Coastguard Worker     }
458*c8dee2aaSAndroid Build Coastguard Worker 
isIdeographic(SkUnichar unichar)459*c8dee2aaSAndroid Build Coastguard Worker     bool isIdeographic(SkUnichar unichar) override {
460*c8dee2aaSAndroid Build Coastguard Worker         return sk_u_hasBinaryProperty(unichar, UCHAR_IDEOGRAPHIC);
461*c8dee2aaSAndroid Build Coastguard Worker     }
462*c8dee2aaSAndroid Build Coastguard Worker 
isTabulation(SkUnichar utf8)463*c8dee2aaSAndroid Build Coastguard Worker     bool isTabulation(SkUnichar utf8) override {
464*c8dee2aaSAndroid Build Coastguard Worker         return utf8 == '\t';
465*c8dee2aaSAndroid Build Coastguard Worker     }
466*c8dee2aaSAndroid Build Coastguard Worker 
isHardLineBreak(SkUnichar utf8)467*c8dee2aaSAndroid Build Coastguard Worker     static bool isHardLineBreak(SkUnichar utf8) {
468*c8dee2aaSAndroid Build Coastguard Worker         auto property = sk_u_getIntPropertyValue(utf8, UCHAR_LINE_BREAK);
469*c8dee2aaSAndroid Build Coastguard Worker         return property == U_LB_LINE_FEED || property == U_LB_MANDATORY_BREAK;
470*c8dee2aaSAndroid Build Coastguard Worker     }
471*c8dee2aaSAndroid Build Coastguard Worker 
472*c8dee2aaSAndroid Build Coastguard Worker public:
~SkUnicode_icu()473*c8dee2aaSAndroid Build Coastguard Worker     ~SkUnicode_icu() override { }
makeBidiIterator(const uint16_t text[],int count,SkBidiIterator::Direction dir)474*c8dee2aaSAndroid Build Coastguard Worker     std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
475*c8dee2aaSAndroid Build Coastguard Worker                                                      SkBidiIterator::Direction dir) override {
476*c8dee2aaSAndroid Build Coastguard Worker         return fBidiFact->MakeIterator(text, count, dir);
477*c8dee2aaSAndroid Build Coastguard Worker     }
makeBidiIterator(const char text[],int count,SkBidiIterator::Direction dir)478*c8dee2aaSAndroid Build Coastguard Worker     std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
479*c8dee2aaSAndroid Build Coastguard Worker                                                      int count,
480*c8dee2aaSAndroid Build Coastguard Worker                                                      SkBidiIterator::Direction dir) override {
481*c8dee2aaSAndroid Build Coastguard Worker         return fBidiFact->MakeIterator(text, count, dir);
482*c8dee2aaSAndroid Build Coastguard Worker     }
makeBreakIterator(const char locale[],BreakType type)483*c8dee2aaSAndroid Build Coastguard Worker     std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
484*c8dee2aaSAndroid Build Coastguard Worker                                                        BreakType type) override {
485*c8dee2aaSAndroid Build Coastguard Worker         ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(type, locale);
486*c8dee2aaSAndroid Build Coastguard Worker         if (!iterator) {
487*c8dee2aaSAndroid Build Coastguard Worker             return nullptr;
488*c8dee2aaSAndroid Build Coastguard Worker         }
489*c8dee2aaSAndroid Build Coastguard Worker         return std::unique_ptr<SkBreakIterator>(new SkBreakIterator_icu(std::move(iterator)));
490*c8dee2aaSAndroid Build Coastguard Worker     }
makeBreakIterator(BreakType type)491*c8dee2aaSAndroid Build Coastguard Worker     std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType type) override {
492*c8dee2aaSAndroid Build Coastguard Worker         return makeBreakIterator(sk_uloc_getDefault(), type);
493*c8dee2aaSAndroid Build Coastguard Worker     }
494*c8dee2aaSAndroid Build Coastguard Worker 
toUpper(const SkString & str)495*c8dee2aaSAndroid Build Coastguard Worker     SkString toUpper(const SkString& str) override {
496*c8dee2aaSAndroid Build Coastguard Worker         return this->toUpper(str, nullptr);
497*c8dee2aaSAndroid Build Coastguard Worker     }
498*c8dee2aaSAndroid Build Coastguard Worker 
toUpper(const SkString & str,const char * locale)499*c8dee2aaSAndroid Build Coastguard Worker     SkString toUpper(const SkString& str, const char* locale) override {
500*c8dee2aaSAndroid Build Coastguard Worker         // Convert to UTF16 since that's what ICU wants.
501*c8dee2aaSAndroid Build Coastguard Worker         auto str16 = SkUnicode::convertUtf8ToUtf16(str.c_str(), str.size());
502*c8dee2aaSAndroid Build Coastguard Worker 
503*c8dee2aaSAndroid Build Coastguard Worker         UErrorCode icu_err = U_ZERO_ERROR;
504*c8dee2aaSAndroid Build Coastguard Worker         const auto upper16len = sk_u_strToUpper(nullptr, 0, (UChar*)(str16.c_str()), str16.size(),
505*c8dee2aaSAndroid Build Coastguard Worker                                                 locale, &icu_err);
506*c8dee2aaSAndroid Build Coastguard Worker         if (icu_err != U_BUFFER_OVERFLOW_ERROR || upper16len <= 0) {
507*c8dee2aaSAndroid Build Coastguard Worker             return SkString();
508*c8dee2aaSAndroid Build Coastguard Worker         }
509*c8dee2aaSAndroid Build Coastguard Worker 
510*c8dee2aaSAndroid Build Coastguard Worker         AutoSTArray<128, uint16_t> upper16(upper16len);
511*c8dee2aaSAndroid Build Coastguard Worker         icu_err = U_ZERO_ERROR;
512*c8dee2aaSAndroid Build Coastguard Worker         sk_u_strToUpper((UChar*)(upper16.get()), SkToS32(upper16.size()),
513*c8dee2aaSAndroid Build Coastguard Worker                         (UChar*)(str16.c_str()), str16.size(),
514*c8dee2aaSAndroid Build Coastguard Worker                         locale, &icu_err);
515*c8dee2aaSAndroid Build Coastguard Worker         SkASSERT(!U_FAILURE(icu_err));
516*c8dee2aaSAndroid Build Coastguard Worker 
517*c8dee2aaSAndroid Build Coastguard Worker         // ... and back to utf8 'cause that's what we want.
518*c8dee2aaSAndroid Build Coastguard Worker         return convertUtf16ToUtf8((char16_t*)upper16.get(), upper16.size());
519*c8dee2aaSAndroid Build Coastguard Worker     }
520*c8dee2aaSAndroid Build Coastguard Worker 
getBidiRegions(const char utf8[],int utf8Units,TextDirection dir,std::vector<BidiRegion> * results)521*c8dee2aaSAndroid Build Coastguard Worker     bool getBidiRegions(const char utf8[],
522*c8dee2aaSAndroid Build Coastguard Worker                         int utf8Units,
523*c8dee2aaSAndroid Build Coastguard Worker                         TextDirection dir,
524*c8dee2aaSAndroid Build Coastguard Worker                         std::vector<BidiRegion>* results) override {
525*c8dee2aaSAndroid Build Coastguard Worker         return fBidiFact->ExtractBidi(utf8, utf8Units, dir, results);
526*c8dee2aaSAndroid Build Coastguard Worker     }
527*c8dee2aaSAndroid Build Coastguard Worker 
getWords(const char utf8[],int utf8Units,const char * locale,std::vector<Position> * results)528*c8dee2aaSAndroid Build Coastguard Worker     bool getWords(const char utf8[], int utf8Units, const char* locale,
529*c8dee2aaSAndroid Build Coastguard Worker                   std::vector<Position>* results) override {
530*c8dee2aaSAndroid Build Coastguard Worker 
531*c8dee2aaSAndroid Build Coastguard Worker         // Convert to UTF16 since we want the results in utf16
532*c8dee2aaSAndroid Build Coastguard Worker         auto utf16 = convertUtf8ToUtf16(utf8, utf8Units);
533*c8dee2aaSAndroid Build Coastguard Worker         return SkUnicode_icu::extractWords((uint16_t*)utf16.c_str(), utf16.size(), locale, results);
534*c8dee2aaSAndroid Build Coastguard Worker     }
535*c8dee2aaSAndroid Build Coastguard Worker 
getUtf8Words(const char utf8[],int utf8Units,const char * locale,std::vector<Position> * results)536*c8dee2aaSAndroid Build Coastguard Worker     bool getUtf8Words(const char utf8[],
537*c8dee2aaSAndroid Build Coastguard Worker                       int utf8Units,
538*c8dee2aaSAndroid Build Coastguard Worker                       const char* locale,
539*c8dee2aaSAndroid Build Coastguard Worker                       std::vector<Position>* results) override {
540*c8dee2aaSAndroid Build Coastguard Worker         // Convert to UTF16 since we want the results in utf16
541*c8dee2aaSAndroid Build Coastguard Worker         auto utf16 = convertUtf8ToUtf16(utf8, utf8Units);
542*c8dee2aaSAndroid Build Coastguard Worker         std::vector<Position> utf16Results;
543*c8dee2aaSAndroid Build Coastguard Worker         if (!SkUnicode_icu::extractWords(
544*c8dee2aaSAndroid Build Coastguard Worker                     (uint16_t*)utf16.c_str(), utf16.size(), locale, &utf16Results)) {
545*c8dee2aaSAndroid Build Coastguard Worker             return false;
546*c8dee2aaSAndroid Build Coastguard Worker         }
547*c8dee2aaSAndroid Build Coastguard Worker 
548*c8dee2aaSAndroid Build Coastguard Worker         std::vector<Position> mapping;
549*c8dee2aaSAndroid Build Coastguard Worker         SkSpan<const char> text(utf8, utf8Units);
550*c8dee2aaSAndroid Build Coastguard Worker         SkUnicode::extractUtfConversionMapping(
551*c8dee2aaSAndroid Build Coastguard Worker                 text, [&](size_t index) { mapping.emplace_back(index); }, [&](size_t index) {});
552*c8dee2aaSAndroid Build Coastguard Worker 
553*c8dee2aaSAndroid Build Coastguard Worker         for (auto i16 : utf16Results) {
554*c8dee2aaSAndroid Build Coastguard Worker             results->emplace_back(mapping[i16]);
555*c8dee2aaSAndroid Build Coastguard Worker         }
556*c8dee2aaSAndroid Build Coastguard Worker         return true;
557*c8dee2aaSAndroid Build Coastguard Worker     }
558*c8dee2aaSAndroid Build Coastguard Worker 
getSentences(const char utf8[],int utf8Units,const char * locale,std::vector<SkUnicode::Position> * results)559*c8dee2aaSAndroid Build Coastguard Worker     bool getSentences(const char utf8[],
560*c8dee2aaSAndroid Build Coastguard Worker                       int utf8Units,
561*c8dee2aaSAndroid Build Coastguard Worker                       const char* locale,
562*c8dee2aaSAndroid Build Coastguard Worker                       std::vector<SkUnicode::Position>* results) override {
563*c8dee2aaSAndroid Build Coastguard Worker         SkUnicode_icu::extractPositions(
564*c8dee2aaSAndroid Build Coastguard Worker                 utf8, utf8Units, BreakType::kSentences, nullptr,
565*c8dee2aaSAndroid Build Coastguard Worker                 [&](int pos, int status) {
566*c8dee2aaSAndroid Build Coastguard Worker                     results->emplace_back(pos);
567*c8dee2aaSAndroid Build Coastguard Worker                 });
568*c8dee2aaSAndroid Build Coastguard Worker         return true;
569*c8dee2aaSAndroid Build Coastguard Worker     }
570*c8dee2aaSAndroid Build Coastguard Worker 
computeCodeUnitFlags(char utf8[],int utf8Units,bool replaceTabs,TArray<SkUnicode::CodeUnitFlags,true> * results)571*c8dee2aaSAndroid Build Coastguard Worker     bool computeCodeUnitFlags(char utf8[], int utf8Units, bool replaceTabs,
572*c8dee2aaSAndroid Build Coastguard Worker                               TArray<SkUnicode::CodeUnitFlags, true>* results) override {
573*c8dee2aaSAndroid Build Coastguard Worker         results->clear();
574*c8dee2aaSAndroid Build Coastguard Worker         results->push_back_n(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
575*c8dee2aaSAndroid Build Coastguard Worker 
576*c8dee2aaSAndroid Build Coastguard Worker         SkUnicode_icu::extractPositions(utf8, utf8Units, BreakType::kLines, nullptr, // TODO: locale
577*c8dee2aaSAndroid Build Coastguard Worker                                         [&](int pos, int status) {
578*c8dee2aaSAndroid Build Coastguard Worker             (*results)[pos] |= status == UBRK_LINE_HARD
579*c8dee2aaSAndroid Build Coastguard Worker                                        ? CodeUnitFlags::kHardLineBreakBefore
580*c8dee2aaSAndroid Build Coastguard Worker                                        : CodeUnitFlags::kSoftLineBreakBefore;
581*c8dee2aaSAndroid Build Coastguard Worker         });
582*c8dee2aaSAndroid Build Coastguard Worker 
583*c8dee2aaSAndroid Build Coastguard Worker         SkUnicode_icu::extractPositions(utf8, utf8Units, BreakType::kGraphemes, nullptr, //TODO
584*c8dee2aaSAndroid Build Coastguard Worker                                         [&](int pos, int status) {
585*c8dee2aaSAndroid Build Coastguard Worker             (*results)[pos] |= CodeUnitFlags::kGraphemeStart;
586*c8dee2aaSAndroid Build Coastguard Worker         });
587*c8dee2aaSAndroid Build Coastguard Worker 
588*c8dee2aaSAndroid Build Coastguard Worker         const char* current = utf8;
589*c8dee2aaSAndroid Build Coastguard Worker         const char* end = utf8 + utf8Units;
590*c8dee2aaSAndroid Build Coastguard Worker         while (current < end) {
591*c8dee2aaSAndroid Build Coastguard Worker             auto before = current - utf8;
592*c8dee2aaSAndroid Build Coastguard Worker             SkUnichar unichar = SkUTF::NextUTF8(&current, end);
593*c8dee2aaSAndroid Build Coastguard Worker             if (unichar < 0) unichar = 0xFFFD;
594*c8dee2aaSAndroid Build Coastguard Worker             auto after = current - utf8;
595*c8dee2aaSAndroid Build Coastguard Worker             if (replaceTabs && this->isTabulation(unichar)) {
596*c8dee2aaSAndroid Build Coastguard Worker                 results->at(before) |= SkUnicode::kTabulation;
597*c8dee2aaSAndroid Build Coastguard Worker                 if (replaceTabs) {
598*c8dee2aaSAndroid Build Coastguard Worker                     unichar = ' ';
599*c8dee2aaSAndroid Build Coastguard Worker                     utf8[before] = ' ';
600*c8dee2aaSAndroid Build Coastguard Worker                 }
601*c8dee2aaSAndroid Build Coastguard Worker             }
602*c8dee2aaSAndroid Build Coastguard Worker             for (auto i = before; i < after; ++i) {
603*c8dee2aaSAndroid Build Coastguard Worker                 if (this->isSpace(unichar)) {
604*c8dee2aaSAndroid Build Coastguard Worker                     results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
605*c8dee2aaSAndroid Build Coastguard Worker                 }
606*c8dee2aaSAndroid Build Coastguard Worker                 if (this->isWhitespace(unichar)) {
607*c8dee2aaSAndroid Build Coastguard Worker                     results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
608*c8dee2aaSAndroid Build Coastguard Worker                 }
609*c8dee2aaSAndroid Build Coastguard Worker                 if (this->isControl(unichar)) {
610*c8dee2aaSAndroid Build Coastguard Worker                     results->at(i) |= SkUnicode::kControl;
611*c8dee2aaSAndroid Build Coastguard Worker                 }
612*c8dee2aaSAndroid Build Coastguard Worker                 if (this->isIdeographic(unichar)) {
613*c8dee2aaSAndroid Build Coastguard Worker                     results->at(i) |= SkUnicode::kIdeographic;
614*c8dee2aaSAndroid Build Coastguard Worker                 }
615*c8dee2aaSAndroid Build Coastguard Worker             }
616*c8dee2aaSAndroid Build Coastguard Worker         }
617*c8dee2aaSAndroid Build Coastguard Worker 
618*c8dee2aaSAndroid Build Coastguard Worker         return true;
619*c8dee2aaSAndroid Build Coastguard Worker     }
620*c8dee2aaSAndroid Build Coastguard Worker 
computeCodeUnitFlags(char16_t utf16[],int utf16Units,bool replaceTabs,TArray<SkUnicode::CodeUnitFlags,true> * results)621*c8dee2aaSAndroid Build Coastguard Worker     bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs,
622*c8dee2aaSAndroid Build Coastguard Worker                           TArray<SkUnicode::CodeUnitFlags, true>* results) override {
623*c8dee2aaSAndroid Build Coastguard Worker         results->clear();
624*c8dee2aaSAndroid Build Coastguard Worker         results->push_back_n(utf16Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
625*c8dee2aaSAndroid Build Coastguard Worker 
626*c8dee2aaSAndroid Build Coastguard Worker         // Get white spaces
627*c8dee2aaSAndroid Build Coastguard Worker         this->forEachCodepoint((char16_t*)&utf16[0], utf16Units,
628*c8dee2aaSAndroid Build Coastguard Worker            [this, results, replaceTabs, &utf16](SkUnichar unichar, int32_t start, int32_t end) {
629*c8dee2aaSAndroid Build Coastguard Worker                 for (auto i = start; i < end; ++i) {
630*c8dee2aaSAndroid Build Coastguard Worker                     if (replaceTabs && this->isTabulation(unichar)) {
631*c8dee2aaSAndroid Build Coastguard Worker                         results->at(i) |= SkUnicode::kTabulation;
632*c8dee2aaSAndroid Build Coastguard Worker                     if (replaceTabs) {
633*c8dee2aaSAndroid Build Coastguard Worker                             unichar = ' ';
634*c8dee2aaSAndroid Build Coastguard Worker                             utf16[start] = ' ';
635*c8dee2aaSAndroid Build Coastguard Worker                         }
636*c8dee2aaSAndroid Build Coastguard Worker                     }
637*c8dee2aaSAndroid Build Coastguard Worker                     if (this->isSpace(unichar)) {
638*c8dee2aaSAndroid Build Coastguard Worker                         results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
639*c8dee2aaSAndroid Build Coastguard Worker                     }
640*c8dee2aaSAndroid Build Coastguard Worker                     if (this->isWhitespace(unichar)) {
641*c8dee2aaSAndroid Build Coastguard Worker                         results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
642*c8dee2aaSAndroid Build Coastguard Worker                     }
643*c8dee2aaSAndroid Build Coastguard Worker                     if (this->isControl(unichar)) {
644*c8dee2aaSAndroid Build Coastguard Worker                         results->at(i) |= SkUnicode::kControl;
645*c8dee2aaSAndroid Build Coastguard Worker                     }
646*c8dee2aaSAndroid Build Coastguard Worker                 }
647*c8dee2aaSAndroid Build Coastguard Worker            });
648*c8dee2aaSAndroid Build Coastguard Worker         // Get graphemes
649*c8dee2aaSAndroid Build Coastguard Worker         this->forEachBreak((char16_t*)&utf16[0],
650*c8dee2aaSAndroid Build Coastguard Worker                            utf16Units,
651*c8dee2aaSAndroid Build Coastguard Worker                            SkUnicode::BreakType::kGraphemes,
652*c8dee2aaSAndroid Build Coastguard Worker                            [results](SkBreakIterator::Position pos, SkBreakIterator::Status) {
653*c8dee2aaSAndroid Build Coastguard Worker                                (*results)[pos] |= CodeUnitFlags::kGraphemeStart;
654*c8dee2aaSAndroid Build Coastguard Worker                            });
655*c8dee2aaSAndroid Build Coastguard Worker         // Get line breaks
656*c8dee2aaSAndroid Build Coastguard Worker         this->forEachBreak(
657*c8dee2aaSAndroid Build Coastguard Worker                 (char16_t*)&utf16[0],
658*c8dee2aaSAndroid Build Coastguard Worker                 utf16Units,
659*c8dee2aaSAndroid Build Coastguard Worker                 SkUnicode::BreakType::kLines,
660*c8dee2aaSAndroid Build Coastguard Worker                 [results](SkBreakIterator::Position pos, SkBreakIterator::Status status) {
661*c8dee2aaSAndroid Build Coastguard Worker                     if (status ==
662*c8dee2aaSAndroid Build Coastguard Worker                         (SkBreakIterator::Status)SkUnicode::LineBreakType::kHardLineBreak) {
663*c8dee2aaSAndroid Build Coastguard Worker                         // Hard line breaks clears off all the other flags
664*c8dee2aaSAndroid Build Coastguard Worker                         // TODO: Treat \n as a formatting mark and do not pass it to SkShaper
665*c8dee2aaSAndroid Build Coastguard Worker                         (*results)[pos-1] = CodeUnitFlags::kHardLineBreakBefore;
666*c8dee2aaSAndroid Build Coastguard Worker                     } else {
667*c8dee2aaSAndroid Build Coastguard Worker                         (*results)[pos] |= CodeUnitFlags::kSoftLineBreakBefore;
668*c8dee2aaSAndroid Build Coastguard Worker                     }
669*c8dee2aaSAndroid Build Coastguard Worker                 });
670*c8dee2aaSAndroid Build Coastguard Worker 
671*c8dee2aaSAndroid Build Coastguard Worker         return true;
672*c8dee2aaSAndroid Build Coastguard Worker     }
673*c8dee2aaSAndroid Build Coastguard Worker 
reorderVisual(const BidiLevel runLevels[],int levelsCount,int32_t logicalFromVisual[])674*c8dee2aaSAndroid Build Coastguard Worker     void reorderVisual(const BidiLevel runLevels[],
675*c8dee2aaSAndroid Build Coastguard Worker                        int levelsCount,
676*c8dee2aaSAndroid Build Coastguard Worker                        int32_t logicalFromVisual[]) override {
677*c8dee2aaSAndroid Build Coastguard Worker         fBidiFact->bidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
678*c8dee2aaSAndroid Build Coastguard Worker     }
679*c8dee2aaSAndroid Build Coastguard Worker 
680*c8dee2aaSAndroid Build Coastguard Worker private:
681*c8dee2aaSAndroid Build Coastguard Worker     sk_sp<SkBidiFactory> fBidiFact = sk_make_sp<SkBidiICUFactory>();
682*c8dee2aaSAndroid Build Coastguard Worker };
683*c8dee2aaSAndroid Build Coastguard Worker 
684*c8dee2aaSAndroid Build Coastguard Worker namespace SkUnicodes::ICU {
Make()685*c8dee2aaSAndroid Build Coastguard Worker sk_sp<SkUnicode> Make() {
686*c8dee2aaSAndroid Build Coastguard Worker     // We haven't yet created a way to encode the ICU data for assembly on Windows,
687*c8dee2aaSAndroid Build Coastguard Worker     // so we use a helper library to load icudtl.dat from the harddrive.
688*c8dee2aaSAndroid Build Coastguard Worker #if defined(SK_USING_THIRD_PARTY_ICU) && defined(SK_BUILD_FOR_WIN)
689*c8dee2aaSAndroid Build Coastguard Worker     if (!SkLoadICU()) {
690*c8dee2aaSAndroid Build Coastguard Worker         static SkOnce once;
691*c8dee2aaSAndroid Build Coastguard Worker         once([] { SkDEBUGF("SkLoadICU() failed!\n"); });
692*c8dee2aaSAndroid Build Coastguard Worker         return nullptr;
693*c8dee2aaSAndroid Build Coastguard Worker     }
694*c8dee2aaSAndroid Build Coastguard Worker #endif
695*c8dee2aaSAndroid Build Coastguard Worker     if (SkGetICULib()) {
696*c8dee2aaSAndroid Build Coastguard Worker         return sk_make_sp<SkUnicode_icu>();
697*c8dee2aaSAndroid Build Coastguard Worker     }
698*c8dee2aaSAndroid Build Coastguard Worker     return nullptr;
699*c8dee2aaSAndroid Build Coastguard Worker }
700*c8dee2aaSAndroid Build Coastguard Worker }  // namespace SkUnicodes::ICU
701