xref: /aosp_15_r20/external/lzma/CPP/Common/StringConvert.cpp (revision f6dc9357d832569d4d1f5d24eacdb3935a1ae8e6)
1  // Common/StringConvert.cpp
2  
3  #include "StdAfx.h"
4  
5  #include "StringConvert.h"
6  
7  #ifndef _WIN32
8  // #include <stdio.h>
9  #include <stdlib.h>
10  #endif
11  
12  #if !defined(_WIN32) || defined(ENV_HAVE_LOCALE)
13  #include "UTFConvert.h"
14  #endif
15  
16  #ifdef ENV_HAVE_LOCALE
17  #include <locale.h>
18  #endif
19  
20  static const char k_DefultChar = '_';
21  
22  #ifdef _WIN32
23  
24  /*
25  MultiByteToWideChar(CodePage, DWORD dwFlags,
26      LPCSTR lpMultiByteStr, int cbMultiByte,
27      LPWSTR lpWideCharStr, int cchWideChar)
28  
29    if (cbMultiByte == 0)
30      return: 0. ERR: ERROR_INVALID_PARAMETER
31  
32    if (cchWideChar == 0)
33      return: the required buffer size in characters.
34  
35    if (supplied buffer size was not large enough)
36      return: 0. ERR: ERROR_INSUFFICIENT_BUFFER
37      The number of filled characters in lpWideCharStr can be smaller than cchWideChar (if last character is complex)
38  
39    If there are illegal characters:
40      if MB_ERR_INVALID_CHARS is set in dwFlags:
41        - the function stops conversion on illegal character.
42        - Return: 0. ERR: ERROR_NO_UNICODE_TRANSLATION.
43  
44      if MB_ERR_INVALID_CHARS is NOT set in dwFlags:
45        before Vista: illegal character is dropped (skipped). WinXP-64: GetLastError() returns 0.
46        in Vista+:    illegal character is not dropped (MSDN). Undocumented: illegal
47                      character is converted to U+FFFD, which is REPLACEMENT CHARACTER.
48  */
49  
50  
MultiByteToUnicodeString2(UString & dest,const AString & src,UINT codePage)51  void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage)
52  {
53    dest.Empty();
54    if (src.IsEmpty())
55      return;
56    {
57      /*
58      wchar_t *d = dest.GetBuf(src.Len());
59      const char *s = (const char *)src;
60      unsigned i;
61  
62      for (i = 0;;)
63      {
64        Byte c = (Byte)s[i];
65        if (c >= 0x80 || c == 0)
66          break;
67        d[i++] = (wchar_t)c;
68      }
69  
70      if (i != src.Len())
71      {
72        unsigned len = MultiByteToWideChar(codePage, 0, s + i,
73            src.Len() - i, d + i,
74            src.Len() + 1 - i);
75        if (len == 0)
76          throw 282228;
77        i += len;
78      }
79  
80      d[i] = 0;
81      dest.ReleaseBuf_SetLen(i);
82      */
83      unsigned len = (unsigned)MultiByteToWideChar(codePage, 0, src, (int)src.Len(), NULL, 0);
84      if (len == 0)
85      {
86        if (GetLastError() != 0)
87          throw 282228;
88      }
89      else
90      {
91        len = (unsigned)MultiByteToWideChar(codePage, 0, src, (int)src.Len(), dest.GetBuf(len), (int)len);
92        if (len == 0)
93          throw 282228;
94        dest.ReleaseBuf_SetEnd(len);
95      }
96    }
97  }
98  
99  /*
100    int WideCharToMultiByte(
101        UINT CodePage, DWORD dwFlags,
102        LPCWSTR lpWideCharStr, int cchWideChar,
103        LPSTR lpMultiByteStr, int cbMultiByte,
104        LPCSTR lpDefaultChar, LPBOOL lpUsedDefaultChar);
105  
106  if (lpDefaultChar == NULL),
107    - it uses system default value.
108  
109  if (CodePage == CP_UTF7 || CodePage == CP_UTF8)
110    if (lpDefaultChar != NULL || lpUsedDefaultChar != NULL)
111      return: 0. ERR: ERROR_INVALID_PARAMETER.
112  
113  The function operates most efficiently, if (lpDefaultChar == NULL && lpUsedDefaultChar == NULL)
114  
115  */
116  
UnicodeStringToMultiByte2(AString & dest,const UString & src,UINT codePage,char defaultChar,bool & defaultCharWasUsed)117  static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT codePage, char defaultChar, bool &defaultCharWasUsed)
118  {
119    dest.Empty();
120    defaultCharWasUsed = false;
121    if (src.IsEmpty())
122      return;
123    {
124      /*
125      unsigned numRequiredBytes = src.Len() * 2;
126      char *d = dest.GetBuf(numRequiredBytes);
127      const wchar_t *s = (const wchar_t *)src;
128      unsigned i;
129  
130      for (i = 0;;)
131      {
132        wchar_t c = s[i];
133        if (c >= 0x80 || c == 0)
134          break;
135        d[i++] = (char)c;
136      }
137  
138      if (i != src.Len())
139      {
140        BOOL defUsed = FALSE;
141        defaultChar = defaultChar;
142  
143        bool isUtf = (codePage == CP_UTF8 || codePage == CP_UTF7);
144        unsigned len = WideCharToMultiByte(codePage, 0, s + i, src.Len() - i,
145            d + i, numRequiredBytes + 1 - i,
146            (isUtf ? NULL : &defaultChar),
147            (isUtf ? NULL : &defUsed));
148        defaultCharWasUsed = (defUsed != FALSE);
149        if (len == 0)
150          throw 282229;
151        i += len;
152      }
153  
154      d[i] = 0;
155      dest.ReleaseBuf_SetLen(i);
156      */
157  
158      /*
159      if (codePage != CP_UTF7)
160      {
161        const wchar_t *s = (const wchar_t *)src;
162        unsigned i;
163        for (i = 0;; i++)
164        {
165          wchar_t c = s[i];
166          if (c >= 0x80 || c == 0)
167            break;
168        }
169  
170        if (s[i] == 0)
171        {
172          char *d = dest.GetBuf(src.Len());
173          for (i = 0;;)
174          {
175            wchar_t c = s[i];
176            if (c == 0)
177              break;
178            d[i++] = (char)c;
179          }
180          d[i] = 0;
181          dest.ReleaseBuf_SetLen(i);
182          return;
183        }
184      }
185      */
186  
187      unsigned len = (unsigned)WideCharToMultiByte(codePage, 0, src, (int)src.Len(), NULL, 0, NULL, NULL);
188      if (len == 0)
189      {
190        if (GetLastError() != 0)
191          throw 282228;
192      }
193      else
194      {
195        BOOL defUsed = FALSE;
196        bool isUtf = (codePage == CP_UTF8 || codePage == CP_UTF7);
197        // defaultChar = defaultChar;
198        len = (unsigned)WideCharToMultiByte(codePage, 0, src, (int)src.Len(),
199            dest.GetBuf(len), (int)len,
200            (isUtf ? NULL : &defaultChar),
201            (isUtf ? NULL : &defUsed)
202            );
203        if (!isUtf)
204          defaultCharWasUsed = (defUsed != FALSE);
205        if (len == 0)
206          throw 282228;
207        dest.ReleaseBuf_SetEnd(len);
208      }
209    }
210  }
211  
212  /*
213  #ifndef UNDER_CE
214  AString SystemStringToOemString(const CSysString &src)
215  {
216    AString dest;
217    const unsigned len = src.Len() * 2;
218    CharToOem(src, dest.GetBuf(len));
219    dest.ReleaseBuf_CalcLen(len);
220    return dest;
221  }
222  #endif
223  */
224  
225  #else // _WIN32
226  
227  // #include <stdio.h>
228  /*
229    if (wchar_t is 32-bit (#if WCHAR_MAX > 0xffff),
230        and utf-8 string contains big unicode character > 0xffff),
231    then we still use 16-bit surrogate pair in UString.
232    It simplifies another code where utf-16 encoding is used.
233    So we use surrogate-conversion code only in is file.
234  */
235  
236  /*
237     mbstowcs() returns error if there is error in utf-8 stream,
238     mbstowcs() returns error if there is single surrogates point (d800-dfff) in utf-8 stream
239  */
240  
241  /*
242  static void MultiByteToUnicodeString2_Native(UString &dest, const AString &src)
243  {
244    dest.Empty();
245    if (src.IsEmpty())
246      return;
247  
248    const size_t limit = ((size_t)src.Len() + 1) * 2;
249    wchar_t *d = dest.GetBuf((unsigned)limit);
250    const size_t len = mbstowcs(d, src, limit);
251    if (len != (size_t)-1)
252    {
253      dest.ReleaseBuf_SetEnd((unsigned)len);
254      return;
255    }
256    dest.ReleaseBuf_SetEnd(0);
257  }
258  */
259  
260  bool g_ForceToUTF8 = true; // false;
261  
MultiByteToUnicodeString2(UString & dest,const AString & src,UINT codePage)262  void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage)
263  {
264    dest.Empty();
265    if (src.IsEmpty())
266      return;
267  
268    if (codePage == CP_UTF8 || g_ForceToUTF8)
269    {
270  #if 1
271      ConvertUTF8ToUnicode(src, dest);
272      return;
273  #endif
274    }
275  
276    const size_t limit = ((size_t)src.Len() + 1) * 2;
277    wchar_t *d = dest.GetBuf((unsigned)limit);
278    const size_t len = mbstowcs(d, src, limit);
279    if (len != (size_t)-1)
280    {
281      dest.ReleaseBuf_SetEnd((unsigned)len);
282  
283  #if WCHAR_MAX > 0xffff
284      d = dest.GetBuf();
285      for (size_t i = 0;; i++)
286      {
287        wchar_t c = d[i];
288        // printf("\ni=%2d c = %4x\n", (unsigned)i, (unsigned)c);
289        if (c == 0)
290          break;
291        if (c >= 0x10000 && c < 0x110000)
292        {
293          UString tempString = d + i;
294          const wchar_t *t = tempString.Ptr();
295  
296          for (;;)
297          {
298            wchar_t w = *t++;
299            // printf("\nchar=%x\n", w);
300            if (w == 0)
301              break;
302            if (i == limit)
303              break; // unexpected error
304            if (w >= 0x10000 && w < 0x110000)
305            {
306  #if 1
307              if (i + 1 == limit)
308                break; // unexpected error
309              w -= 0x10000;
310              d[i++] = (unsigned)0xd800 + (((unsigned)w >> 10) & 0x3ff);
311              w = 0xdc00 + (w & 0x3ff);
312  #else
313              // w = '_'; // for debug
314  #endif
315            }
316            d[i++] = w;
317          }
318          dest.ReleaseBuf_SetEnd((unsigned)i);
319          break;
320        }
321      }
322  
323  #endif
324  
325      /*
326      printf("\nMultiByteToUnicodeString2 (%d) %s\n", (int)src.Len(),  src.Ptr());
327      printf("char:    ");
328      for (unsigned i = 0; i < src.Len(); i++)
329        printf (" %02x", (int)(Byte)src[i]);
330      printf("\n");
331      printf("\n-> (%d) %ls\n", (int)dest.Len(), dest.Ptr());
332      printf("wchar_t: ");
333      for (unsigned i = 0; i < dest.Len(); i++)
334      {
335        printf (" %02x", (int)dest[i]);
336      }
337      printf("\n");
338      */
339  
340      return;
341    }
342  
343    /* if there is mbstowcs() error, we have two ways:
344  
345       1) change 0x80+ characters to some character: '_'
346          in that case we lose data, but we have correct UString()
347          and that scheme can show errors to user in early stages,
348          when file converted back to mbs() cannot be found
349  
350       2) transfer bad characters in some UTF-16 range.
351          it can be non-original Unicode character.
352          but later we still can restore original character.
353    */
354  
355  
356    // printf("\nmbstowcs  ERROR !!!!!! s=%s\n", src.Ptr());
357    {
358      unsigned i;
359      const char *s = (const char *)src;
360      for (i = 0;;)
361      {
362        Byte c = (Byte)s[i];
363        if (c == 0)
364          break;
365        // we can use ascii compatibilty character '_'
366        // if (c > 0x7F) c = '_'; // we replace "bad: character
367        d[i++] = (wchar_t)c;
368      }
369      d[i] = 0;
370      dest.ReleaseBuf_SetLen(i);
371    }
372  }
373  
UnicodeStringToMultiByte2_Native(AString & dest,const UString & src)374  static void UnicodeStringToMultiByte2_Native(AString &dest, const UString &src)
375  {
376    dest.Empty();
377    if (src.IsEmpty())
378      return;
379  
380    const size_t limit = ((size_t)src.Len() + 1) * 6;
381    char *d = dest.GetBuf((unsigned)limit);
382  
383    const size_t len = wcstombs(d, src, limit);
384  
385    if (len != (size_t)-1)
386    {
387      dest.ReleaseBuf_SetEnd((unsigned)len);
388      return;
389    }
390    dest.ReleaseBuf_SetEnd(0);
391  }
392  
393  
UnicodeStringToMultiByte2(AString & dest,const UString & src2,UINT codePage,char defaultChar,bool & defaultCharWasUsed)394  static void UnicodeStringToMultiByte2(AString &dest, const UString &src2, UINT codePage, char defaultChar, bool &defaultCharWasUsed)
395  {
396    // if (codePage == 1234567) // for debug purposes
397    if (codePage == CP_UTF8 || g_ForceToUTF8)
398    {
399  #if 1
400      defaultCharWasUsed = false;
401      ConvertUnicodeToUTF8(src2, dest);
402      return;
403  #endif
404    }
405  
406    UString src = src2;
407  #if WCHAR_MAX > 0xffff
408    {
409      src.Empty();
410      for (unsigned i = 0; i < src2.Len();)
411      {
412        wchar_t c = src2[i++];
413        if (c >= 0xd800 && c < 0xdc00 && i != src2.Len())
414        {
415          const wchar_t c2 = src2[i];
416          if (c2 >= 0xdc00 && c2 < 0xe000)
417          {
418  #if 1
419            // printf("\nSurragate [%d]: %4x %4x -> ", i, (int)c, (int)c2);
420            c = 0x10000 + ((c & 0x3ff) << 10) + (c2 & 0x3ff);
421            // printf("%4x\n", (int)c);
422            i++;
423  #else
424            // c = '_'; // for debug
425  #endif
426          }
427        }
428        src += c;
429      }
430    }
431  #endif
432  
433    dest.Empty();
434    defaultCharWasUsed = false;
435    if (src.IsEmpty())
436      return;
437  
438    const size_t len = wcstombs(NULL, src, 0);
439  
440    if (len != (size_t)-1)
441    {
442      const unsigned limit = ((unsigned)len);
443      if (limit == len)
444      {
445        char *d = dest.GetBuf(limit);
446  
447        /*
448        {
449          printf("\nwcstombs; len = %d %ls \n", (int)src.Len(), src.Ptr());
450          for (unsigned i = 0; i < src.Len(); i++)
451            printf (" %02x", (int)src[i]);
452          printf("\n");
453          printf("\ndest Limit = %d \n", limit);
454        }
455        */
456  
457        const size_t len2 = wcstombs(d, src, len + 1);
458  
459        if (len2 != (size_t)-1 && len2 <= limit)
460        {
461          /*
462          printf("\nOK : destLen = %d : %s\n", (int)len, dest.Ptr());
463          for (unsigned i = 0; i < len2; i++)
464            printf(" %02x", (int)(Byte)dest[i]);
465          printf("\n");
466          */
467          dest.ReleaseBuf_SetEnd((unsigned)len2);
468          return;
469        }
470      }
471    }
472  
473    {
474      const wchar_t *s = (const wchar_t *)src;
475      char *d = dest.GetBuf(src.Len());
476  
477      unsigned i;
478      for (i = 0;;)
479      {
480        wchar_t c = s[i];
481        if (c == 0)
482          break;
483        if (c >=
484              0x100
485              // 0x80
486            )
487        {
488          c = defaultChar;
489          defaultCharWasUsed = true;
490        }
491  
492        d[i++] = (char)c;
493      }
494      d[i] = 0;
495      dest.ReleaseBuf_SetLen(i);
496      /*
497      printf("\nUnicodeStringToMultiByte2; len = %d \n", (int)src.Len());
498      printf("ERROR: %s\n", dest.Ptr());
499      */
500    }
501  }
502  
503  #endif // _WIN32
504  
505  
MultiByteToUnicodeString(const AString & src,UINT codePage)506  UString MultiByteToUnicodeString(const AString &src, UINT codePage)
507  {
508    UString dest;
509    MultiByteToUnicodeString2(dest, src, codePage);
510    return dest;
511  }
512  
MultiByteToUnicodeString(const char * src,UINT codePage)513  UString MultiByteToUnicodeString(const char *src, UINT codePage)
514  {
515    return MultiByteToUnicodeString(AString(src), codePage);
516  }
517  
518  
UnicodeStringToMultiByte2(AString & dest,const UString & src,UINT codePage)519  void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT codePage)
520  {
521    bool defaultCharWasUsed;
522    UnicodeStringToMultiByte2(dest, src, codePage, k_DefultChar, defaultCharWasUsed);
523  }
524  
UnicodeStringToMultiByte(const UString & src,UINT codePage,char defaultChar,bool & defaultCharWasUsed)525  AString UnicodeStringToMultiByte(const UString &src, UINT codePage, char defaultChar, bool &defaultCharWasUsed)
526  {
527    AString dest;
528    UnicodeStringToMultiByte2(dest, src, codePage, defaultChar, defaultCharWasUsed);
529    return dest;
530  }
531  
UnicodeStringToMultiByte(const UString & src,UINT codePage)532  AString UnicodeStringToMultiByte(const UString &src, UINT codePage)
533  {
534    AString dest;
535    bool defaultCharWasUsed;
536    UnicodeStringToMultiByte2(dest, src, codePage, k_DefultChar, defaultCharWasUsed);
537    return dest;
538  }
539  
540  
541  
542  
543  #if !defined(_WIN32) || defined(ENV_HAVE_LOCALE)
544  
545  #ifdef _WIN32
546  #define U_to_A(a, b, c)  UnicodeStringToMultiByte2
547  // #define A_to_U(a, b, c)  MultiByteToUnicodeString2
548  #else
549  // void MultiByteToUnicodeString2_Native(UString &dest, const AString &src);
550  #define U_to_A(a, b, c)  UnicodeStringToMultiByte2_Native(a, b)
551  // #define A_to_U(a, b, c)  MultiByteToUnicodeString2_Native(a, b)
552  #endif
553  
IsNativeUTF8()554  bool IsNativeUTF8()
555  {
556    UString u;
557    AString a, a2;
558    // for (unsigned c = 0x80; c < (UInt32)0x10000; c += (c >> 9) + 1)
559    for (unsigned c = 0x80; c < (UInt32)0xD000; c += (c >> 2) + 1)
560    {
561      u.Empty();
562      u += (wchar_t)c;
563      /*
564      if (Unicode_Is_There_Utf16SurrogateError(u))
565        continue;
566      #ifndef _WIN32
567      if (Unicode_Is_There_BmpEscape(u))
568        continue;
569      #endif
570      */
571      ConvertUnicodeToUTF8(u, a);
572      U_to_A(a2, u, CP_OEMCP);
573      if (a != a2)
574        return false;
575    }
576    return true;
577  }
578  
579  #endif
580  
581  
582  #ifdef ENV_HAVE_LOCALE
583  
GetLocale(void)584  const char *GetLocale(void)
585  {
586    #ifdef ENV_HAVE_LOCALE
587      // printf("\n\nsetlocale(LC_CTYPE, NULL) : return : ");
588      const char *s = setlocale(LC_CTYPE, NULL);
589      if (!s)
590      {
591        // printf("[NULL]\n");
592        s = "C";
593      }
594      else
595      {
596        // ubuntu returns "C" after program start
597        // printf("\"%s\"\n", s);
598      }
599      return s;
600    #elif defined(LOCALE_IS_UTF8)
601      return "utf8";
602    #else
603      return "C";
604    #endif
605  }
606  
607  #ifdef _WIN32
Set_ForceToUTF8(bool)608    static void Set_ForceToUTF8(bool) {}
609  #else
Set_ForceToUTF8(bool val)610    static void Set_ForceToUTF8(bool val) { g_ForceToUTF8 = val; }
611  #endif
612  
Is_Default_Basic_Locale(const char * locale)613  static bool Is_Default_Basic_Locale(const char *locale)
614  {
615    const AString a (locale);
616    if (a.IsEqualTo_Ascii_NoCase("")
617        || a.IsEqualTo_Ascii_NoCase("C")
618        || a.IsEqualTo_Ascii_NoCase("POSIX"))
619        return true;
620    return false;
621  }
622  
Is_Default_Basic_Locale()623  static bool Is_Default_Basic_Locale()
624  {
625    return Is_Default_Basic_Locale(GetLocale());
626  }
627  
628  
MY_SetLocale()629  void MY_SetLocale()
630  {
631    #ifdef ENV_HAVE_LOCALE
632    /*
633    {
634      const char *s = GetLocale();
635      printf("\nGetLocale() : returned : \"%s\"\n", s);
636    }
637    */
638  
639    unsigned start = 0;
640    // unsigned lim = 0;
641    unsigned lim = 3;
642  
643    /*
644    #define MY_SET_LOCALE_FLAGS__FROM_ENV 1
645    #define MY_SET_LOCALE_FLAGS__TRY_UTF8 2
646  
647    unsigned flags =
648        MY_SET_LOCALE_FLAGS__FROM_ENV |
649        MY_SET_LOCALE_FLAGS__TRY_UTF8
650  
651    if (flags != 0)
652    {
653      if (flags & MY_SET_LOCALE_FLAGS__FROM_ENV)
654        lim = (flags & MY_SET_LOCALE_FLAGS__TRY_UTF8) ? 3 : 1;
655      else
656      {
657        start = 1;
658        lim = 2;
659      }
660    }
661    */
662  
663    for (unsigned i = start; i < lim; i++)
664    {
665      /*
666      man7: "If locale is an empty string, "", each part of the locale that
667      should be modified is set according to the environment variables.
668      for glibc: glibc, first from the user's environment variables:
669        1) the environment variable LC_ALL,
670        2) environment variable with the same name as the category (see the
671        3) the environment variable LANG
672      The locale "C" or "POSIX" is a portable locale; it exists on all conforming systems.
673  
674      for WIN32 : MSDN :
675        Sets the locale to the default, which is the user-default
676        ANSI code page obtained from the operating system.
677        The locale name is set to the value returned by GetUserDefaultLocaleName.
678        The code page is set to the value returned by GetACP
679    */
680      const char *newLocale = "";
681  
682      #ifdef __APPLE__
683  
684      /* look also CFLocale
685         there is no C.UTF-8 in macos
686         macos has UTF-8 locale only with some language like en_US.UTF-8
687         what is best way to set UTF-8 locale in macos? */
688      if (i == 1)
689        newLocale = "en_US.UTF-8";
690  
691      /* file open with non-utf8 sequencies return
692        #define EILSEQ    92    // "Illegal byte sequence"
693      */
694  #else
695      // newLocale = "C";
696      if (i == 1)
697      {
698        newLocale = "C.UTF-8";    // main UTF-8 locale in ubuntu
699        // newLocale = ".utf8";    // supported in new Windows 10 build 17134 (April 2018 Update), the Universal C Runtime
700        // newLocale = "en_US.utf8"; // supported by ubuntu ?
701        // newLocale = "en_US.UTF-8";
702        /* setlocale() in ubuntu allows locales with minor chracter changes in strings
703          "en_US.UTF-8" /  "en_US.utf8" */
704      }
705  
706  #endif
707  
708      // printf("\nsetlocale(LC_ALL, \"%s\") : returned: ", newLocale);
709  
710      // const char *s =
711      setlocale(LC_ALL, newLocale);
712  
713      /*
714      if (!s)
715        printf("NULL: can't set locale");
716      else
717        printf("\"%s\"\n", s);
718      */
719  
720      // request curent locale of program
721      const char *locale = GetLocale();
722      if (locale)
723      {
724        AString a (locale);
725        a.MakeLower_Ascii();
726        // if (a.Find("utf") >= 0)
727        {
728          if (IsNativeUTF8())
729          {
730            Set_ForceToUTF8(true);
731            return;
732          }
733        }
734        if (!Is_Default_Basic_Locale(locale))
735        {
736          // if there is some non-default and non-utf locale, we want to use it
737          break; // comment it for debug
738        }
739      }
740    }
741  
742    if (IsNativeUTF8())
743    {
744      Set_ForceToUTF8(true);
745      return;
746    }
747  
748    if (Is_Default_Basic_Locale())
749    {
750      Set_ForceToUTF8(true);
751      return;
752    }
753  
754    Set_ForceToUTF8(false);
755  
756    #elif defined(LOCALE_IS_UTF8)
757      // assume LC_CTYPE="utf8"
758    #else
759      // assume LC_CTYPE="C"
760    #endif
761  }
762  #endif
763