perl/locale.c
<<
>>
Prefs
   1/*    locale.c
   2 *
   3 *    Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
   4 *    2002, 2003, 2005, 2006, 2007, 2008 by Larry Wall and others
   5 *
   6 *    You may distribute under the terms of either the GNU General Public
   7 *    License or the Artistic License, as specified in the README file.
   8 *
   9 */
  10
  11/*
  12 *      A Elbereth Gilthoniel,
  13 *      silivren penna míriel
  14 *      o menel aglar elenath!
  15 *      Na-chaered palan-díriel
  16 *      o galadhremmin ennorath,
  17 *      Fanuilos, le linnathon
  18 *      nef aear, si nef aearon!
  19 *
  20 *     [p.238 of _The Lord of the Rings_, II/i: "Many Meetings"]
  21 */
  22
  23/* utility functions for handling locale-specific stuff like what
  24 * character represents the decimal point.
  25 */
  26
  27#include "EXTERN.h"
  28#define PERL_IN_LOCALE_C
  29#include "perl.h"
  30
  31#ifdef I_LOCALE
  32#  include <locale.h>
  33#endif
  34
  35#ifdef I_LANGINFO
  36#   include <langinfo.h>
  37#endif
  38
  39#include "reentr.h"
  40
  41#if defined(USE_LOCALE_NUMERIC) || defined(USE_LOCALE_COLLATE)
  42/*
  43 * Standardize the locale name from a string returned by 'setlocale'.
  44 *
  45 * The standard return value of setlocale() is either
  46 * (1) "xx_YY" if the first argument of setlocale() is not LC_ALL
  47 * (2) "xa_YY xb_YY ..." if the first argument of setlocale() is LC_ALL
  48 *     (the space-separated values represent the various sublocales,
  49 *      in some unspecificed order)
  50 *
  51 * In some platforms it has a form like "LC_SOMETHING=Lang_Country.866\n",
  52 * which is harmful for further use of the string in setlocale().
  53 *
  54 */
  55STATIC char *
  56S_stdize_locale(pTHX_ char *locs)
  57{
  58    const char * const s = strchr(locs, '=');
  59    bool okay = TRUE;
  60
  61    PERL_ARGS_ASSERT_STDIZE_LOCALE;
  62
  63    if (s) {
  64        const char * const t = strchr(s, '.');
  65        okay = FALSE;
  66        if (t) {
  67            const char * const u = strchr(t, '\n');
  68            if (u && (u[1] == 0)) {
  69                const STRLEN len = u - s;
  70                Move(s + 1, locs, len, char);
  71                locs[len] = 0;
  72                okay = TRUE;
  73            }
  74        }
  75    }
  76
  77    if (!okay)
  78        Perl_croak(aTHX_ "Can't fix broken locale name \"%s\"", locs);
  79
  80    return locs;
  81}
  82#endif
  83
  84void
  85Perl_set_numeric_radix(pTHX)
  86{
  87#ifdef USE_LOCALE_NUMERIC
  88    dVAR;
  89# ifdef HAS_LOCALECONV
  90    const struct lconv* const lc = localeconv();
  91
  92    if (lc && lc->decimal_point) {
  93        if (lc->decimal_point[0] == '.' && lc->decimal_point[1] == 0) {
  94            SvREFCNT_dec(PL_numeric_radix_sv);
  95            PL_numeric_radix_sv = NULL;
  96        }
  97        else {
  98            if (PL_numeric_radix_sv)
  99                sv_setpv(PL_numeric_radix_sv, lc->decimal_point);
 100            else
 101                PL_numeric_radix_sv = newSVpv(lc->decimal_point, 0);
 102        }
 103    }
 104    else
 105        PL_numeric_radix_sv = NULL;
 106# endif /* HAS_LOCALECONV */
 107#endif /* USE_LOCALE_NUMERIC */
 108}
 109
 110/*
 111 * Set up for a new numeric locale.
 112 */
 113void
 114Perl_new_numeric(pTHX_ const char *newnum)
 115{
 116#ifdef USE_LOCALE_NUMERIC
 117    dVAR;
 118
 119    if (! newnum) {
 120        Safefree(PL_numeric_name);
 121        PL_numeric_name = NULL;
 122        PL_numeric_standard = TRUE;
 123        PL_numeric_local = TRUE;
 124        return;
 125    }
 126
 127    if (! PL_numeric_name || strNE(PL_numeric_name, newnum)) {
 128        Safefree(PL_numeric_name);
 129        PL_numeric_name = stdize_locale(savepv(newnum));
 130        PL_numeric_standard = ((*newnum == 'C' && newnum[1] == '\0')
 131                               || strEQ(newnum, "POSIX"));
 132        PL_numeric_local = TRUE;
 133        set_numeric_radix();
 134    }
 135
 136#endif /* USE_LOCALE_NUMERIC */
 137}
 138
 139void
 140Perl_set_numeric_standard(pTHX)
 141{
 142#ifdef USE_LOCALE_NUMERIC
 143    dVAR;
 144
 145    if (! PL_numeric_standard) {
 146        setlocale(LC_NUMERIC, "C");
 147        PL_numeric_standard = TRUE;
 148        PL_numeric_local = FALSE;
 149        set_numeric_radix();
 150    }
 151
 152#endif /* USE_LOCALE_NUMERIC */
 153}
 154
 155void
 156Perl_set_numeric_local(pTHX)
 157{
 158#ifdef USE_LOCALE_NUMERIC
 159    dVAR;
 160
 161    if (! PL_numeric_local) {
 162        setlocale(LC_NUMERIC, PL_numeric_name);
 163        PL_numeric_standard = FALSE;
 164        PL_numeric_local = TRUE;
 165        set_numeric_radix();
 166    }
 167
 168#endif /* USE_LOCALE_NUMERIC */
 169}
 170
 171/*
 172 * Set up for a new ctype locale.
 173 */
 174void
 175Perl_new_ctype(pTHX_ const char *newctype)
 176{
 177#ifdef USE_LOCALE_CTYPE
 178    dVAR;
 179    int i;
 180
 181    PERL_ARGS_ASSERT_NEW_CTYPE;
 182
 183    for (i = 0; i < 256; i++) {
 184        if (isUPPER_LC(i))
 185            PL_fold_locale[i] = toLOWER_LC(i);
 186        else if (isLOWER_LC(i))
 187            PL_fold_locale[i] = toUPPER_LC(i);
 188        else
 189            PL_fold_locale[i] = i;
 190    }
 191
 192#endif /* USE_LOCALE_CTYPE */
 193    PERL_ARGS_ASSERT_NEW_CTYPE;
 194    PERL_UNUSED_ARG(newctype);
 195    PERL_UNUSED_CONTEXT;
 196}
 197
 198/*
 199 * Set up for a new collation locale.
 200 */
 201void
 202Perl_new_collate(pTHX_ const char *newcoll)
 203{
 204#ifdef USE_LOCALE_COLLATE
 205    dVAR;
 206
 207    if (! newcoll) {
 208        if (PL_collation_name) {
 209            ++PL_collation_ix;
 210            Safefree(PL_collation_name);
 211            PL_collation_name = NULL;
 212        }
 213        PL_collation_standard = TRUE;
 214        PL_collxfrm_base = 0;
 215        PL_collxfrm_mult = 2;
 216        return;
 217    }
 218
 219    if (! PL_collation_name || strNE(PL_collation_name, newcoll)) {
 220        ++PL_collation_ix;
 221        Safefree(PL_collation_name);
 222        PL_collation_name = stdize_locale(savepv(newcoll));
 223        PL_collation_standard = ((*newcoll == 'C' && newcoll[1] == '\0')
 224                                 || strEQ(newcoll, "POSIX"));
 225
 226        {
 227          /*  2: at most so many chars ('a', 'b'). */
 228          /* 50: surely no system expands a char more. */
 229#define XFRMBUFSIZE  (2 * 50)
 230          char xbuf[XFRMBUFSIZE];
 231          const Size_t fa = strxfrm(xbuf, "a",  XFRMBUFSIZE);
 232          const Size_t fb = strxfrm(xbuf, "ab", XFRMBUFSIZE);
 233          const SSize_t mult = fb - fa;
 234          if (mult < 1)
 235              Perl_croak(aTHX_ "strxfrm() gets absurd");
 236          PL_collxfrm_base = (fa > (Size_t)mult) ? (fa - mult) : 0;
 237          PL_collxfrm_mult = mult;
 238        }
 239    }
 240
 241#endif /* USE_LOCALE_COLLATE */
 242}
 243
 244/*
 245 * Initialize locale awareness.
 246 */
 247int
 248Perl_init_i18nl10n(pTHX_ int printwarn)
 249{
 250    int ok = 1;
 251    /* returns
 252     *    1 = set ok or not applicable,
 253     *    0 = fallback to C locale,
 254     *   -1 = fallback to C locale failed
 255     */
 256
 257#if defined(USE_LOCALE)
 258    dVAR;
 259
 260#ifdef USE_LOCALE_CTYPE
 261    char *curctype   = NULL;
 262#endif /* USE_LOCALE_CTYPE */
 263#ifdef USE_LOCALE_COLLATE
 264    char *curcoll    = NULL;
 265#endif /* USE_LOCALE_COLLATE */
 266#ifdef USE_LOCALE_NUMERIC
 267    char *curnum     = NULL;
 268#endif /* USE_LOCALE_NUMERIC */
 269#ifdef __GLIBC__
 270    char * const language   = PerlEnv_getenv("LANGUAGE");
 271#endif
 272    char * const lc_all     = PerlEnv_getenv("LC_ALL");
 273    char * const lang       = PerlEnv_getenv("LANG");
 274    bool setlocale_failure = FALSE;
 275
 276#ifdef LOCALE_ENVIRON_REQUIRED
 277
 278    /*
 279     * Ultrix setlocale(..., "") fails if there are no environment
 280     * variables from which to get a locale name.
 281     */
 282
 283    bool done = FALSE;
 284
 285#ifdef LC_ALL
 286    if (lang) {
 287        if (setlocale(LC_ALL, ""))
 288            done = TRUE;
 289        else
 290            setlocale_failure = TRUE;
 291    }
 292    if (!setlocale_failure) {
 293#ifdef USE_LOCALE_CTYPE
 294        Safefree(curctype);
 295        if (! (curctype =
 296               setlocale(LC_CTYPE,
 297                         (!done && (lang || PerlEnv_getenv("LC_CTYPE")))
 298                                    ? "" : NULL)))
 299            setlocale_failure = TRUE;
 300        else
 301            curctype = savepv(curctype);
 302#endif /* USE_LOCALE_CTYPE */
 303#ifdef USE_LOCALE_COLLATE
 304        Safefree(curcoll);
 305        if (! (curcoll =
 306               setlocale(LC_COLLATE,
 307                         (!done && (lang || PerlEnv_getenv("LC_COLLATE")))
 308                                   ? "" : NULL)))
 309            setlocale_failure = TRUE;
 310        else
 311            curcoll = savepv(curcoll);
 312#endif /* USE_LOCALE_COLLATE */
 313#ifdef USE_LOCALE_NUMERIC
 314        Safefree(curnum);
 315        if (! (curnum =
 316               setlocale(LC_NUMERIC,
 317                         (!done && (lang || PerlEnv_getenv("LC_NUMERIC")))
 318                                  ? "" : NULL)))
 319            setlocale_failure = TRUE;
 320        else
 321            curnum = savepv(curnum);
 322#endif /* USE_LOCALE_NUMERIC */
 323    }
 324
 325#endif /* LC_ALL */
 326
 327#endif /* !LOCALE_ENVIRON_REQUIRED */
 328
 329#ifdef LC_ALL
 330    if (! setlocale(LC_ALL, ""))
 331        setlocale_failure = TRUE;
 332#endif /* LC_ALL */
 333
 334    if (!setlocale_failure) {
 335#ifdef USE_LOCALE_CTYPE
 336        Safefree(curctype);
 337        if (! (curctype = setlocale(LC_CTYPE, "")))
 338            setlocale_failure = TRUE;
 339        else
 340            curctype = savepv(curctype);
 341#endif /* USE_LOCALE_CTYPE */
 342#ifdef USE_LOCALE_COLLATE
 343        Safefree(curcoll);
 344        if (! (curcoll = setlocale(LC_COLLATE, "")))
 345            setlocale_failure = TRUE;
 346        else
 347            curcoll = savepv(curcoll);
 348#endif /* USE_LOCALE_COLLATE */
 349#ifdef USE_LOCALE_NUMERIC
 350        Safefree(curnum);
 351        if (! (curnum = setlocale(LC_NUMERIC, "")))
 352            setlocale_failure = TRUE;
 353        else
 354            curnum = savepv(curnum);
 355#endif /* USE_LOCALE_NUMERIC */
 356    }
 357
 358    if (setlocale_failure) {
 359        char *p;
 360        const bool locwarn = (printwarn > 1 ||
 361                        (printwarn &&
 362                         (!(p = PerlEnv_getenv("PERL_BADLANG")) || atoi(p))));
 363
 364        if (locwarn) {
 365#ifdef LC_ALL
 366
 367            PerlIO_printf(Perl_error_log,
 368               "perl: warning: Setting locale failed.\n");
 369
 370#else /* !LC_ALL */
 371
 372            PerlIO_printf(Perl_error_log,
 373               "perl: warning: Setting locale failed for the categories:\n\t");
 374#ifdef USE_LOCALE_CTYPE
 375            if (! curctype)
 376                PerlIO_printf(Perl_error_log, "LC_CTYPE ");
 377#endif /* USE_LOCALE_CTYPE */
 378#ifdef USE_LOCALE_COLLATE
 379            if (! curcoll)
 380                PerlIO_printf(Perl_error_log, "LC_COLLATE ");
 381#endif /* USE_LOCALE_COLLATE */
 382#ifdef USE_LOCALE_NUMERIC
 383            if (! curnum)
 384                PerlIO_printf(Perl_error_log, "LC_NUMERIC ");
 385#endif /* USE_LOCALE_NUMERIC */
 386            PerlIO_printf(Perl_error_log, "\n");
 387
 388#endif /* LC_ALL */
 389
 390            PerlIO_printf(Perl_error_log,
 391                "perl: warning: Please check that your locale settings:\n");
 392
 393#ifdef __GLIBC__
 394            PerlIO_printf(Perl_error_log,
 395                          "\tLANGUAGE = %c%s%c,\n",
 396                          language ? '"' : '(',
 397                          language ? language : "unset",
 398                          language ? '"' : ')');
 399#endif
 400
 401            PerlIO_printf(Perl_error_log,
 402                          "\tLC_ALL = %c%s%c,\n",
 403                          lc_all ? '"' : '(',
 404                          lc_all ? lc_all : "unset",
 405                          lc_all ? '"' : ')');
 406
 407#if defined(USE_ENVIRON_ARRAY)
 408            {
 409              char **e;
 410              for (e = environ; *e; e++) {
 411                  if (strnEQ(*e, "LC_", 3)
 412                        && strnNE(*e, "LC_ALL=", 7)
 413                        && (p = strchr(*e, '=')))
 414                      PerlIO_printf(Perl_error_log, "\t%.*s = \"%s\",\n",
 415                                    (int)(p - *e), *e, p + 1);
 416              }
 417            }
 418#else
 419            PerlIO_printf(Perl_error_log,
 420                          "\t(possibly more locale environment variables)\n");
 421#endif
 422
 423            PerlIO_printf(Perl_error_log,
 424                          "\tLANG = %c%s%c\n",
 425                          lang ? '"' : '(',
 426                          lang ? lang : "unset",
 427                          lang ? '"' : ')');
 428
 429            PerlIO_printf(Perl_error_log,
 430                          "    are supported and installed on your system.\n");
 431        }
 432
 433#ifdef LC_ALL
 434
 435        if (setlocale(LC_ALL, "C")) {
 436            if (locwarn)
 437                PerlIO_printf(Perl_error_log,
 438      "perl: warning: Falling back to the standard locale (\"C\").\n");
 439            ok = 0;
 440        }
 441        else {
 442            if (locwarn)
 443                PerlIO_printf(Perl_error_log,
 444      "perl: warning: Failed to fall back to the standard locale (\"C\").\n");
 445            ok = -1;
 446        }
 447
 448#else /* ! LC_ALL */
 449
 450        if (0
 451#ifdef USE_LOCALE_CTYPE
 452            || !(curctype || setlocale(LC_CTYPE, "C"))
 453#endif /* USE_LOCALE_CTYPE */
 454#ifdef USE_LOCALE_COLLATE
 455            || !(curcoll || setlocale(LC_COLLATE, "C"))
 456#endif /* USE_LOCALE_COLLATE */
 457#ifdef USE_LOCALE_NUMERIC
 458            || !(curnum || setlocale(LC_NUMERIC, "C"))
 459#endif /* USE_LOCALE_NUMERIC */
 460            )
 461        {
 462            if (locwarn)
 463                PerlIO_printf(Perl_error_log,
 464      "perl: warning: Cannot fall back to the standard locale (\"C\").\n");
 465            ok = -1;
 466        }
 467
 468#endif /* ! LC_ALL */
 469
 470#ifdef USE_LOCALE_CTYPE
 471        Safefree(curctype);
 472        curctype = savepv(setlocale(LC_CTYPE, NULL));
 473#endif /* USE_LOCALE_CTYPE */
 474#ifdef USE_LOCALE_COLLATE
 475        Safefree(curcoll);
 476        curcoll = savepv(setlocale(LC_COLLATE, NULL));
 477#endif /* USE_LOCALE_COLLATE */
 478#ifdef USE_LOCALE_NUMERIC
 479        Safefree(curnum);
 480        curnum = savepv(setlocale(LC_NUMERIC, NULL));
 481#endif /* USE_LOCALE_NUMERIC */
 482    }
 483    else {
 484
 485#ifdef USE_LOCALE_CTYPE
 486    new_ctype(curctype);
 487#endif /* USE_LOCALE_CTYPE */
 488
 489#ifdef USE_LOCALE_COLLATE
 490    new_collate(curcoll);
 491#endif /* USE_LOCALE_COLLATE */
 492
 493#ifdef USE_LOCALE_NUMERIC
 494    new_numeric(curnum);
 495#endif /* USE_LOCALE_NUMERIC */
 496
 497    }
 498
 499#endif /* USE_LOCALE */
 500
 501#ifdef USE_PERLIO
 502    {
 503      /* Set PL_utf8locale to TRUE if using PerlIO _and_
 504         any of the following are true:
 505         - nl_langinfo(CODESET) contains /^utf-?8/i
 506         - $ENV{LC_ALL}   contains /^utf-?8/i
 507         - $ENV{LC_CTYPE} contains /^utf-?8/i
 508         - $ENV{LANG}     contains /^utf-?8/i
 509         The LC_ALL, LC_CTYPE, LANG obey the usual override
 510         hierarchy of locale environment variables.  (LANGUAGE
 511         affects only LC_MESSAGES only under glibc.) (If present,
 512         it overrides LC_MESSAGES for GNU gettext, and it also
 513         can have more than one locale, separated by spaces,
 514         in case you need to know.)
 515         If PL_utf8locale and PL_unicode (set by -C or by $ENV{PERL_UNICODE})
 516         are true, perl.c:S_parse_body() will turn on the PerlIO :utf8 layer
 517         on STDIN, STDOUT, STDERR, _and_ the default open discipline.
 518      */
 519         bool utf8locale = FALSE;
 520         char *codeset = NULL;
 521#if defined(HAS_NL_LANGINFO) && defined(CODESET)
 522         codeset = nl_langinfo(CODESET);
 523#endif
 524         if (codeset)
 525              utf8locale = (Perl_ibcmp(aTHX_ codeset, STR_WITH_LEN("UTF-8")) == 0 ||
 526                            Perl_ibcmp(aTHX_ codeset, STR_WITH_LEN("UTF8") ) == 0);
 527#if defined(USE_LOCALE)
 528         else { /* nl_langinfo(CODESET) is supposed to correctly
 529                 * interpret the locale environment variables,
 530                 * but just in case it fails, let's do this manually. */ 
 531              if (lang)
 532                   utf8locale = (Perl_ibcmp(aTHX_ lang, STR_WITH_LEN("UTF-8")) == 0 ||
 533                                 Perl_ibcmp(aTHX_ lang, STR_WITH_LEN("UTF8") ) == 0);
 534#ifdef USE_LOCALE_CTYPE
 535              if (curctype)
 536                   utf8locale = (Perl_ibcmp(aTHX_ curctype, STR_WITH_LEN("UTF-8")) == 0 ||
 537                                 Perl_ibcmp(aTHX_ curctype, STR_WITH_LEN("UTF8") ) == 0);
 538#endif
 539              if (lc_all)
 540                   utf8locale = (Perl_ibcmp(aTHX_ lc_all, STR_WITH_LEN("UTF-8")) == 0 ||
 541                                 Perl_ibcmp(aTHX_ lc_all, STR_WITH_LEN("UTF8") ) == 0);
 542         }
 543#endif /* USE_LOCALE */
 544         if (utf8locale)
 545              PL_utf8locale = TRUE;
 546    }
 547    /* Set PL_unicode to $ENV{PERL_UNICODE} if using PerlIO.
 548       This is an alternative to using the -C command line switch
 549       (the -C if present will override this). */
 550    {
 551         const char *p = PerlEnv_getenv("PERL_UNICODE");
 552         PL_unicode = p ? parse_unicode_opts(&p) : 0;
 553         if (PL_unicode & PERL_UNICODE_UTF8CACHEASSERT_FLAG)
 554             PL_utf8cache = -1;
 555    }
 556#endif
 557
 558#ifdef USE_LOCALE_CTYPE
 559    Safefree(curctype);
 560#endif /* USE_LOCALE_CTYPE */
 561#ifdef USE_LOCALE_COLLATE
 562    Safefree(curcoll);
 563#endif /* USE_LOCALE_COLLATE */
 564#ifdef USE_LOCALE_NUMERIC
 565    Safefree(curnum);
 566#endif /* USE_LOCALE_NUMERIC */
 567    return ok;
 568}
 569
 570#ifdef USE_LOCALE_COLLATE
 571
 572/*
 573 * mem_collxfrm() is a bit like strxfrm() but with two important
 574 * differences. First, it handles embedded NULs. Second, it allocates
 575 * a bit more memory than needed for the transformed data itself.
 576 * The real transformed data begins at offset sizeof(collationix).
 577 * Please see sv_collxfrm() to see how this is used.
 578 */
 579
 580char *
 581Perl_mem_collxfrm(pTHX_ const char *s, STRLEN len, STRLEN *xlen)
 582{
 583    dVAR;
 584    char *xbuf;
 585    STRLEN xAlloc, xin, xout; /* xalloc is a reserved word in VC */
 586
 587    PERL_ARGS_ASSERT_MEM_COLLXFRM;
 588
 589    /* the first sizeof(collationix) bytes are used by sv_collxfrm(). */
 590    /* the +1 is for the terminating NUL. */
 591
 592    xAlloc = sizeof(PL_collation_ix) + PL_collxfrm_base + (PL_collxfrm_mult * len) + 1;
 593    Newx(xbuf, xAlloc, char);
 594    if (! xbuf)
 595        goto bad;
 596
 597    *(U32*)xbuf = PL_collation_ix;
 598    xout = sizeof(PL_collation_ix);
 599    for (xin = 0; xin < len; ) {
 600        Size_t xused;
 601
 602        for (;;) {
 603            xused = strxfrm(xbuf + xout, s + xin, xAlloc - xout);
 604            if (xused >= PERL_INT_MAX)
 605                goto bad;
 606            if ((STRLEN)xused < xAlloc - xout)
 607                break;
 608            xAlloc = (2 * xAlloc) + 1;
 609            Renew(xbuf, xAlloc, char);
 610            if (! xbuf)
 611                goto bad;
 612        }
 613
 614        xin += strlen(s + xin) + 1;
 615        xout += xused;
 616
 617        /* Embedded NULs are understood but silently skipped
 618         * because they make no sense in locale collation. */
 619    }
 620
 621    xbuf[xout] = '\0';
 622    *xlen = xout - sizeof(PL_collation_ix);
 623    return xbuf;
 624
 625  bad:
 626    Safefree(xbuf);
 627    *xlen = 0;
 628    return NULL;
 629}
 630
 631#endif /* USE_LOCALE_COLLATE */
 632
 633/*
 634 * Local variables:
 635 * c-indentation-style: bsd
 636 * c-basic-offset: 4
 637 * indent-tabs-mode: t
 638 * End:
 639 *
 640 * ex: set ts=8 sts=4 sw=4 noet:
 641 */
 642
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.