perl/pp_pack.c
<<
>>
Prefs
   1/*    pp_pack.c
   2 *
   3 *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4 *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5 *
   6 *    You may distribute under the terms of either the GNU General Public
   7 *    License or the Artistic License, as specified in the README file.
   8 *
   9 */
  10
  11/*
  12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
  13 * two small shallow pans, the smaller fitting into the larger; inside them a
  14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
  15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
  16 * some salt.
  17 *
  18 *     [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
  19 */
  20
  21/* This file contains pp ("push/pop") functions that
  22 * execute the opcodes that make up a perl program. A typical pp function
  23 * expects to find its arguments on the stack, and usually pushes its
  24 * results onto the stack, hence the 'pp' terminology. Each OP structure
  25 * contains a pointer to the relevant pp_foo() function.
  26 *
  27 * This particular file just contains pp_pack() and pp_unpack(). See the
  28 * other pp*.c files for the rest of the pp_ functions.
  29 */
  30
  31#include "EXTERN.h"
  32#define PERL_IN_PP_PACK_C
  33#include "perl.h"
  34
  35/* Types used by pack/unpack */ 
  36typedef enum {
  37  e_no_len,     /* no length  */
  38  e_number,     /* number, [] */
  39  e_star        /* asterisk   */
  40} howlen_t;
  41
  42typedef struct tempsym {
  43  const char*    patptr;   /* current template char */
  44  const char*    patend;   /* one after last char   */
  45  const char*    grpbeg;   /* 1st char of ()-group  */
  46  const char*    grpend;   /* end of ()-group       */
  47  I32      code;     /* template code (!<>)   */
  48  I32      length;   /* length/repeat count   */
  49  howlen_t howlen;   /* how length is given   */ 
  50  int      level;    /* () nesting level      */
  51  U32      flags;    /* /=4, comma=2, pack=1  */
  52                     /*   and group modifiers */
  53  STRLEN   strbeg;   /* offset of group start */
  54  struct tempsym *previous; /* previous group */
  55} tempsym_t;
  56
  57#define TEMPSYM_INIT(symptr, p, e, f) \
  58    STMT_START {        \
  59        (symptr)->patptr   = (p);       \
  60        (symptr)->patend   = (e);       \
  61        (symptr)->grpbeg   = NULL;      \
  62        (symptr)->grpend   = NULL;      \
  63        (symptr)->grpend   = NULL;      \
  64        (symptr)->code     = 0;         \
  65        (symptr)->length   = 0;         \
  66        (symptr)->howlen   = e_no_len;  \
  67        (symptr)->level    = 0;         \
  68        (symptr)->flags    = (f);       \
  69        (symptr)->strbeg   = 0;         \
  70        (symptr)->previous = NULL;      \
  71   } STMT_END
  72
  73#if PERL_VERSION >= 9
  74# define PERL_PACK_CAN_BYTEORDER
  75# define PERL_PACK_CAN_SHRIEKSIGN
  76#endif
  77
  78#ifndef CHAR_BIT
  79# define CHAR_BIT       8
  80#endif
  81/* Maximum number of bytes to which a byte can grow due to upgrade */
  82#define UTF8_EXPAND     2
  83
  84/*
  85 * Offset for integer pack/unpack.
  86 *
  87 * On architectures where I16 and I32 aren't really 16 and 32 bits,
  88 * which for now are all Crays, pack and unpack have to play games.
  89 */
  90
  91/*
  92 * These values are required for portability of pack() output.
  93 * If they're not right on your machine, then pack() and unpack()
  94 * wouldn't work right anyway; you'll need to apply the Cray hack.
  95 * (I'd like to check them with #if, but you can't use sizeof() in
  96 * the preprocessor.)  --???
  97 */
  98/*
  99    The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
 100    defines are now in config.h.  --Andy Dougherty  April 1998
 101 */
 102#define SIZE16 2
 103#define SIZE32 4
 104
 105/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
 106   --jhi Feb 1999 */
 107
 108#if U16SIZE > SIZE16 || U32SIZE > SIZE32
 109#  if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678    /* little-endian */
 110#    define OFF16(p)    ((char*)(p))
 111#    define OFF32(p)    ((char*)(p))
 112#  else
 113#    if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321  /* big-endian */
 114#      define OFF16(p)  ((char*)(p) + (sizeof(U16) - SIZE16))
 115#      define OFF32(p)  ((char*)(p) + (sizeof(U32) - SIZE32))
 116#    else
 117       ++++ bad cray byte order
 118#    endif
 119#  endif
 120#else
 121#  define OFF16(p)     ((char *) (p))
 122#  define OFF32(p)     ((char *) (p))
 123#endif
 124
 125/* Only to be used inside a loop (see the break) */
 126#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START {             \
 127    if (utf8) {                                                         \
 128        if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break;      \
 129    } else {                                                            \
 130        Copy(s, OFF16(p), SIZE16, char);                                \
 131        (s) += SIZE16;                                                  \
 132    }                                                                   \
 133} STMT_END
 134
 135/* Only to be used inside a loop (see the break) */
 136#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START {             \
 137    if (utf8) {                                                         \
 138        if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break;      \
 139    } else {                                                            \
 140        Copy(s, OFF32(p), SIZE32, char);                                \
 141        (s) += SIZE32;                                                  \
 142    }                                                                   \
 143} STMT_END
 144
 145#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
 146#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
 147
 148/* Only to be used inside a loop (see the break) */
 149#define SHIFT_VAR(utf8, s, strend, var, datumtype)      \
 150STMT_START {                                            \
 151    if (utf8) {                                         \
 152        if (!uni_to_bytes(aTHX_ &s, strend,             \
 153            (char *) &var, sizeof(var), datumtype)) break;\
 154    } else {                                            \
 155        Copy(s, (char *) &var, sizeof(var), char);      \
 156        s += sizeof(var);                               \
 157    }                                                   \
 158} STMT_END
 159
 160#define PUSH_VAR(utf8, aptr, var)       \
 161        PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
 162
 163/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
 164#define MAX_SUB_TEMPLATE_LEVEL 100
 165
 166/* flags (note that type modifiers can also be used as flags!) */
 167#define FLAG_WAS_UTF8         0x40
 168#define FLAG_PARSE_UTF8       0x20      /* Parse as utf8 */
 169#define FLAG_UNPACK_ONLY_ONE  0x10
 170#define FLAG_DO_UTF8          0x08      /* The underlying string is utf8 */
 171#define FLAG_SLASH            0x04
 172#define FLAG_COMMA            0x02
 173#define FLAG_PACK             0x01
 174
 175STATIC SV *
 176S_mul128(pTHX_ SV *sv, U8 m)
 177{
 178  STRLEN          len;
 179  char           *s = SvPV(sv, len);
 180  char           *t;
 181
 182  PERL_ARGS_ASSERT_MUL128;
 183
 184  if (!strnEQ(s, "0000", 4)) {  /* need to grow sv */
 185    SV * const tmpNew = newSVpvs("0000000000");
 186
 187    sv_catsv(tmpNew, sv);
 188    SvREFCNT_dec(sv);           /* free old sv */
 189    sv = tmpNew;
 190    s = SvPV(sv, len);
 191  }
 192  t = s + len - 1;
 193  while (!*t)                   /* trailing '\0'? */
 194    t--;
 195  while (t > s) {
 196    const U32 i = ((*t - '0') << 7) + m;
 197    *(t--) = '0' + (char)(i % 10);
 198    m = (char)(i / 10);
 199  }
 200  return (sv);
 201}
 202
 203/* Explosives and implosives. */
 204
 205#if 'I' == 73 && 'J' == 74
 206/* On an ASCII/ISO kind of system */
 207#define ISUUCHAR(ch)    ((ch) >= ' ' && (ch) < 'a')
 208#else
 209/*
 210  Some other sort of character set - use memchr() so we don't match
 211  the null byte.
 212 */
 213#define ISUUCHAR(ch)    (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
 214#endif
 215
 216/* type modifiers */
 217#define TYPE_IS_SHRIEKING       0x100
 218#define TYPE_IS_BIG_ENDIAN      0x200
 219#define TYPE_IS_LITTLE_ENDIAN   0x400
 220#define TYPE_IS_PACK            0x800
 221#define TYPE_ENDIANNESS_MASK    (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
 222#define TYPE_MODIFIERS(t)       ((t) & ~0xFF)
 223#define TYPE_NO_MODIFIERS(t)    ((t) & 0xFF)
 224
 225#ifdef PERL_PACK_CAN_SHRIEKSIGN
 226# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
 227#else
 228# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
 229#endif
 230
 231#ifndef PERL_PACK_CAN_BYTEORDER
 232/* Put "can't" first because it is shorter  */
 233# define TYPE_ENDIANNESS(t)     0
 234# define TYPE_NO_ENDIANNESS(t)  (t)
 235
 236# define ENDIANNESS_ALLOWED_TYPES   ""
 237
 238# define DO_BO_UNPACK(var, type)
 239# define DO_BO_PACK(var, type)
 240# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
 241# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
 242# define DO_BO_UNPACK_N(var, type)
 243# define DO_BO_PACK_N(var, type)
 244# define DO_BO_UNPACK_P(var)
 245# define DO_BO_PACK_P(var)
 246# define DO_BO_UNPACK_PC(var)
 247# define DO_BO_PACK_PC(var)
 248
 249#else /* PERL_PACK_CAN_BYTEORDER */
 250
 251# define TYPE_ENDIANNESS(t)     ((t) & TYPE_ENDIANNESS_MASK)
 252# define TYPE_NO_ENDIANNESS(t)  ((t) & ~TYPE_ENDIANNESS_MASK)
 253
 254# define ENDIANNESS_ALLOWED_TYPES   "sSiIlLqQjJfFdDpP("
 255
 256# define DO_BO_UNPACK(var, type)                                              \
 257        STMT_START {                                                          \
 258          switch (TYPE_ENDIANNESS(datumtype)) {                               \
 259            case TYPE_IS_BIG_ENDIAN:    var = my_betoh ## type (var); break;  \
 260            case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break;  \
 261            default: break;                                                   \
 262          }                                                                   \
 263        } STMT_END
 264
 265# define DO_BO_PACK(var, type)                                                \
 266        STMT_START {                                                          \
 267          switch (TYPE_ENDIANNESS(datumtype)) {                               \
 268            case TYPE_IS_BIG_ENDIAN:    var = my_htobe ## type (var); break;  \
 269            case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break;  \
 270            default: break;                                                   \
 271          }                                                                   \
 272        } STMT_END
 273
 274# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)                     \
 275        STMT_START {                                                          \
 276          switch (TYPE_ENDIANNESS(datumtype)) {                               \
 277            case TYPE_IS_BIG_ENDIAN:                                          \
 278              var = (post_cast*) my_betoh ## type ((pre_cast) var);           \
 279              break;                                                          \
 280            case TYPE_IS_LITTLE_ENDIAN:                                       \
 281              var = (post_cast *) my_letoh ## type ((pre_cast) var);          \
 282              break;                                                          \
 283            default:                                                          \
 284              break;                                                          \
 285          }                                                                   \
 286        } STMT_END
 287
 288# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)                       \
 289        STMT_START {                                                          \
 290          switch (TYPE_ENDIANNESS(datumtype)) {                               \
 291            case TYPE_IS_BIG_ENDIAN:                                          \
 292              var = (post_cast *) my_htobe ## type ((pre_cast) var);          \
 293              break;                                                          \
 294            case TYPE_IS_LITTLE_ENDIAN:                                       \
 295              var = (post_cast *) my_htole ## type ((pre_cast) var);          \
 296              break;                                                          \
 297            default:                                                          \
 298              break;                                                          \
 299          }                                                                   \
 300        } STMT_END
 301
 302# define BO_CANT_DOIT(action, type)                                           \
 303        STMT_START {                                                          \
 304          switch (TYPE_ENDIANNESS(datumtype)) {                               \
 305             case TYPE_IS_BIG_ENDIAN:                                         \
 306               Perl_croak(aTHX_ "Can't %s big-endian %ss on this "            \
 307                                "platform", #action, #type);                  \
 308               break;                                                         \
 309             case TYPE_IS_LITTLE_ENDIAN:                                      \
 310               Perl_croak(aTHX_ "Can't %s little-endian %ss on this "         \
 311                                "platform", #action, #type);                  \
 312               break;                                                         \
 313             default:                                                         \
 314               break;                                                         \
 315           }                                                                  \
 316         } STMT_END
 317
 318# if PTRSIZE == INTSIZE
 319#  define DO_BO_UNPACK_P(var)   DO_BO_UNPACK_PTR(var, i, int, void)
 320#  define DO_BO_PACK_P(var)     DO_BO_PACK_PTR(var, i, int, void)
 321#  define DO_BO_UNPACK_PC(var)  DO_BO_UNPACK_PTR(var, i, int, char)
 322#  define DO_BO_PACK_PC(var)    DO_BO_PACK_PTR(var, i, int, char)
 323# elif PTRSIZE == LONGSIZE
 324#  define DO_BO_UNPACK_P(var)   DO_BO_UNPACK_PTR(var, l, long, void)
 325#  define DO_BO_PACK_P(var)     DO_BO_PACK_PTR(var, l, long, void)
 326#  define DO_BO_UNPACK_PC(var)  DO_BO_UNPACK_PTR(var, l, long, char)
 327#  define DO_BO_PACK_PC(var)    DO_BO_PACK_PTR(var, l, long, char)
 328# elif PTRSIZE == IVSIZE
 329#  define DO_BO_UNPACK_P(var)   DO_BO_UNPACK_PTR(var, l, IV, void)
 330#  define DO_BO_PACK_P(var)     DO_BO_PACK_PTR(var, l, IV, void)
 331#  define DO_BO_UNPACK_PC(var)  DO_BO_UNPACK_PTR(var, l, IV, char)
 332#  define DO_BO_PACK_PC(var)    DO_BO_PACK_PTR(var, l, IV, char)
 333# else
 334#  define DO_BO_UNPACK_P(var)   BO_CANT_DOIT(unpack, pointer)
 335#  define DO_BO_PACK_P(var)     BO_CANT_DOIT(pack, pointer)
 336#  define DO_BO_UNPACK_PC(var)  BO_CANT_DOIT(unpack, pointer)
 337#  define DO_BO_PACK_PC(var)    BO_CANT_DOIT(pack, pointer)
 338# endif
 339
 340# if defined(my_htolen) && defined(my_letohn) && \
 341    defined(my_htoben) && defined(my_betohn)
 342#  define DO_BO_UNPACK_N(var, type)                                           \
 343         STMT_START {                                                         \
 344           switch (TYPE_ENDIANNESS(datumtype)) {                              \
 345             case TYPE_IS_BIG_ENDIAN:    my_betohn(&var, sizeof(type)); break;\
 346             case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
 347             default: break;                                                  \
 348           }                                                                  \
 349         } STMT_END
 350
 351#  define DO_BO_PACK_N(var, type)                                             \
 352         STMT_START {                                                         \
 353           switch (TYPE_ENDIANNESS(datumtype)) {                              \
 354             case TYPE_IS_BIG_ENDIAN:    my_htoben(&var, sizeof(type)); break;\
 355             case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
 356             default: break;                                                  \
 357           }                                                                  \
 358         } STMT_END
 359# else
 360#  define DO_BO_UNPACK_N(var, type)     BO_CANT_DOIT(unpack, type)
 361#  define DO_BO_PACK_N(var, type)       BO_CANT_DOIT(pack, type)
 362# endif
 363
 364#endif /* PERL_PACK_CAN_BYTEORDER */
 365
 366#define PACK_SIZE_CANNOT_CSUM           0x80
 367#define PACK_SIZE_UNPREDICTABLE         0x40    /* Not a fixed size element */
 368#define PACK_SIZE_MASK                  0x3F
 369
 370/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
 371   in).  You're unlikely ever to need to regenerate them.  */
 372
 373#if TYPE_IS_SHRIEKING != 0x100
 374   ++++shriek offset should be 256
 375#endif
 376
 377typedef U8 packprops_t;
 378#if 'J'-'I' == 1
 379/* ASCII */
 380STATIC const packprops_t packprops[512] = {
 381    /* normal */
 382    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 383    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 384    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 385    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 386    0, 0, 0,
 387    /* C */ sizeof(unsigned char),
 388#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
 389    /* D */ LONG_DOUBLESIZE,
 390#else
 391    0,
 392#endif
 393    0,
 394    /* F */ NVSIZE,
 395    0, 0,
 396    /* I */ sizeof(unsigned int),
 397    /* J */ UVSIZE,
 398    0,
 399    /* L */ SIZE32,
 400    0,
 401    /* N */ SIZE32,
 402    0, 0,
 403#if defined(HAS_QUAD)
 404    /* Q */ sizeof(Uquad_t),
 405#else
 406    0,
 407#endif
 408    0,
 409    /* S */ SIZE16,
 410    0,
 411    /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
 412    /* V */ SIZE32,
 413    /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
 414    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 415    /* c */ sizeof(char),
 416    /* d */ sizeof(double),
 417    0,
 418    /* f */ sizeof(float),
 419    0, 0,
 420    /* i */ sizeof(int),
 421    /* j */ IVSIZE,
 422    0,
 423    /* l */ SIZE32,
 424    0,
 425    /* n */ SIZE16,
 426    0,
 427    /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
 428#if defined(HAS_QUAD)
 429    /* q */ sizeof(Quad_t),
 430#else
 431    0,
 432#endif
 433    0,
 434    /* s */ SIZE16,
 435    0, 0,
 436    /* v */ SIZE16,
 437    /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
 438    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 439    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 440    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 441    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 442    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 443    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 444    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 445    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 446    0, 0, 0, 0, 0, 0, 0, 0,
 447    /* shrieking */
 448    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 449    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 450    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 451    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 452    0, 0, 0, 0, 0, 0, 0, 0, 0,
 453    /* I */ sizeof(unsigned int),
 454    0, 0,
 455    /* L */ sizeof(unsigned long),
 456    0,
 457#if defined(PERL_PACK_CAN_SHRIEKSIGN)
 458    /* N */ SIZE32,
 459#else
 460    0,
 461#endif
 462    0, 0, 0, 0,
 463    /* S */ sizeof(unsigned short),
 464    0, 0,
 465#if defined(PERL_PACK_CAN_SHRIEKSIGN)
 466    /* V */ SIZE32,
 467#else
 468    0,
 469#endif
 470    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 471    0, 0,
 472    /* i */ sizeof(int),
 473    0, 0,
 474    /* l */ sizeof(long),
 475    0,
 476#if defined(PERL_PACK_CAN_SHRIEKSIGN)
 477    /* n */ SIZE16,
 478#else
 479    0,
 480#endif
 481    0, 0, 0, 0,
 482    /* s */ sizeof(short),
 483    0, 0,
 484#if defined(PERL_PACK_CAN_SHRIEKSIGN)
 485    /* v */ SIZE16,
 486#else
 487    0,
 488#endif
 489    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 490    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 491    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 492    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 493    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 494    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 495    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 496    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 497    0, 0, 0, 0, 0, 0, 0, 0, 0
 498};
 499#else
 500/* EBCDIC (or bust) */
 501STATIC const packprops_t packprops[512] = {
 502    /* normal */
 503    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 504    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 505    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 506    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 507    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 508    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 509    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 510    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 511    0, 0, 0,
 512    /* c */ sizeof(char),
 513    /* d */ sizeof(double),
 514    0,
 515    /* f */ sizeof(float),
 516    0, 0,
 517    /* i */ sizeof(int),
 518    0, 0, 0, 0, 0, 0, 0,
 519    /* j */ IVSIZE,
 520    0,
 521    /* l */ SIZE32,
 522    0,
 523    /* n */ SIZE16,
 524    0,
 525    /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
 526#if defined(HAS_QUAD)
 527    /* q */ sizeof(Quad_t),
 528#else
 529    0,
 530#endif
 531    0, 0, 0, 0, 0, 0, 0, 0, 0,
 532    /* s */ SIZE16,
 533    0, 0,
 534    /* v */ SIZE16,
 535    /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
 536    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 537    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 538    /* C */ sizeof(unsigned char),
 539#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
 540    /* D */ LONG_DOUBLESIZE,
 541#else
 542    0,
 543#endif
 544    0,
 545    /* F */ NVSIZE,
 546    0, 0,
 547    /* I */ sizeof(unsigned int),
 548    0, 0, 0, 0, 0, 0, 0,
 549    /* J */ UVSIZE,
 550    0,
 551    /* L */ SIZE32,
 552    0,
 553    /* N */ SIZE32,
 554    0, 0,
 555#if defined(HAS_QUAD)
 556    /* Q */ sizeof(Uquad_t),
 557#else
 558    0,
 559#endif
 560    0, 0, 0, 0, 0, 0, 0, 0, 0,
 561    /* S */ SIZE16,
 562    0,
 563    /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
 564    /* V */ SIZE32,
 565    /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
 566    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 567    0, 0, 0, 0, 0, 0, 0, 0, 0,
 568    /* shrieking */
 569    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 570    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 571    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 572    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 573    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 574    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 575    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 576    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 577    0, 0, 0, 0, 0, 0, 0, 0, 0,
 578    /* i */ sizeof(int),
 579    0, 0, 0, 0, 0, 0, 0, 0, 0,
 580    /* l */ sizeof(long),
 581    0,
 582#if defined(PERL_PACK_CAN_SHRIEKSIGN)
 583    /* n */ SIZE16,
 584#else
 585    0,
 586#endif
 587    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 588    /* s */ sizeof(short),
 589    0, 0,
 590#if defined(PERL_PACK_CAN_SHRIEKSIGN)
 591    /* v */ SIZE16,
 592#else
 593    0,
 594#endif
 595    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 596    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 597    0, 0, 0,
 598    /* I */ sizeof(unsigned int),
 599    0, 0, 0, 0, 0, 0, 0, 0, 0,
 600    /* L */ sizeof(unsigned long),
 601    0,
 602#if defined(PERL_PACK_CAN_SHRIEKSIGN)
 603    /* N */ SIZE32,
 604#else
 605    0,
 606#endif
 607    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 608    /* S */ sizeof(unsigned short),
 609    0, 0,
 610#if defined(PERL_PACK_CAN_SHRIEKSIGN)
 611    /* V */ SIZE32,
 612#else
 613    0,
 614#endif
 615    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 616    0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 617};
 618#endif
 619
 620STATIC U8
 621uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
 622{
 623    STRLEN retlen;
 624    UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
 625                         ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
 626    /* We try to process malformed UTF-8 as much as possible (preferrably with
 627       warnings), but these two mean we make no progress in the string and
 628       might enter an infinite loop */
 629    if (retlen == (STRLEN) -1 || retlen == 0)
 630        Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
 631                   (int) TYPE_NO_MODIFIERS(datumtype));
 632    if (val >= 0x100) {
 633        if (ckWARN(WARN_UNPACK))
 634        Perl_warner(aTHX_ packWARN(WARN_UNPACK),
 635                    "Character in '%c' format wrapped in unpack",
 636                    (int) TYPE_NO_MODIFIERS(datumtype));
 637        val &= 0xff;
 638    }
 639    *s += retlen;
 640    return (U8)val;
 641}
 642
 643#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
 644        uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
 645        *(U8 *)(s)++)
 646
 647STATIC bool
 648uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
 649{
 650    UV val;
 651    STRLEN retlen;
 652    const char *from = *s;
 653    int bad = 0;
 654    const U32 flags = ckWARN(WARN_UTF8) ?
 655        UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
 656    for (;buf_len > 0; buf_len--) {
 657        if (from >= end) return FALSE;
 658        val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
 659        if (retlen == (STRLEN) -1 || retlen == 0) {
 660            from += UTF8SKIP(from);
 661            bad |= 1;
 662        } else from += retlen;
 663        if (val >= 0x100) {
 664            bad |= 2;
 665            val &= 0xff;
 666        }
 667        *(U8 *)buf++ = (U8)val;
 668    }
 669    /* We have enough characters for the buffer. Did we have problems ? */
 670    if (bad) {
 671        if (bad & 1) {
 672            /* Rewalk the string fragment while warning */
 673            const char *ptr;
 674            const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
 675            for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
 676                if (ptr >= end) break;
 677                utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
 678            }
 679            if (from > end) from = end;
 680        }
 681        if ((bad & 2) && ckWARN(WARN_UNPACK))
 682            Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
 683                                       WARN_PACK : WARN_UNPACK),
 684                        "Character(s) in '%c' format wrapped in %s",
 685                        (int) TYPE_NO_MODIFIERS(datumtype),
 686                        datumtype & TYPE_IS_PACK ? "pack" : "unpack");
 687    }
 688    *s = from;
 689    return TRUE;
 690}
 691
 692STATIC bool
 693next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
 694{
 695    dVAR;
 696    STRLEN retlen;
 697    const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
 698    if (val >= 0x100 || !ISUUCHAR(val) ||
 699        retlen == (STRLEN) -1 || retlen == 0) {
 700        *out = 0;
 701        return FALSE;
 702    }
 703    *out = PL_uudmap[val] & 077;
 704    *s += retlen;
 705    return TRUE;
 706}
 707
 708STATIC char *
 709S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
 710    const U8 * const end = start + len;
 711
 712    PERL_ARGS_ASSERT_BYTES_TO_UNI;
 713
 714    while (start < end) {
 715        const UV uv = NATIVE_TO_ASCII(*start);
 716        if (UNI_IS_INVARIANT(uv))
 717            *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
 718        else {
 719            *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
 720            *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
 721        }
 722        start++;
 723    }
 724    return dest;
 725}
 726
 727#define PUSH_BYTES(utf8, cur, buf, len)                         \
 728STMT_START {                                                    \
 729    if (utf8)                                                   \
 730        (cur) = bytes_to_uni((U8 *) buf, len, (cur));           \
 731    else {                                                      \
 732        Copy(buf, cur, len, char);                              \
 733        (cur) += (len);                                         \
 734    }                                                           \
 735} STMT_END
 736
 737#define GROWING(utf8, cat, start, cur, in_len)  \
 738STMT_START {                                    \
 739    STRLEN glen = (in_len);                     \
 740    if (utf8) glen *= UTF8_EXPAND;              \
 741    if ((cur) + glen >= (start) + SvLEN(cat)) { \
 742        (start) = sv_exp_grow(cat, glen);       \
 743        (cur) = (start) + SvCUR(cat);           \
 744    }                                           \
 745} STMT_END
 746
 747#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
 748STMT_START {                                    \
 749    const STRLEN glen = (in_len);               \
 750    STRLEN gl = glen;                           \
 751    if (utf8) gl *= UTF8_EXPAND;                \
 752    if ((cur) + gl >= (start) + SvLEN(cat)) {   \
 753        *cur = '\0';                            \
 754        SvCUR_set((cat), (cur) - (start));      \
 755        (start) = sv_exp_grow(cat, gl);         \
 756        (cur) = (start) + SvCUR(cat);           \
 757    }                                           \
 758    PUSH_BYTES(utf8, cur, buf, glen);           \
 759} STMT_END
 760
 761#define PUSH_BYTE(utf8, s, byte)                \
 762STMT_START {                                    \
 763    if (utf8) {                                 \
 764        const U8 au8 = (byte);                  \
 765        (s) = bytes_to_uni(&au8, 1, (s));       \
 766    } else *(U8 *)(s)++ = (byte);               \
 767} STMT_END
 768
 769/* Only to be used inside a loop (see the break) */
 770#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags)            \
 771STMT_START {                                                    \
 772    STRLEN retlen;                                              \
 773    if (str >= end) break;                                      \
 774    val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags);     \
 775    if (retlen == (STRLEN) -1 || retlen == 0) {                 \
 776        *cur = '\0';                                            \
 777        Perl_croak(aTHX_ "Malformed UTF-8 string in pack");     \
 778    }                                                           \
 779    str += retlen;                                              \
 780} STMT_END
 781
 782static const char *_action( const tempsym_t* symptr )
 783{
 784    return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
 785}
 786
 787/* Returns the sizeof() struct described by pat */
 788STATIC I32
 789S_measure_struct(pTHX_ tempsym_t* symptr)
 790{
 791    I32 total = 0;
 792
 793    PERL_ARGS_ASSERT_MEASURE_STRUCT;
 794
 795    while (next_symbol(symptr)) {
 796        I32 len;
 797        int size;
 798
 799        switch (symptr->howlen) {
 800          case e_star:
 801            Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
 802                        _action( symptr ) );
 803            break;
 804          default:
 805            /* e_no_len and e_number */
 806            len = symptr->length;
 807            break;
 808        }
 809
 810        size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
 811        if (!size) {
 812            int star;
 813            /* endianness doesn't influence the size of a type */
 814            switch(TYPE_NO_ENDIANNESS(symptr->code)) {
 815            default:
 816                Perl_croak(aTHX_ "Invalid type '%c' in %s",
 817                           (int)TYPE_NO_MODIFIERS(symptr->code),
 818                           _action( symptr ) );
 819#ifdef PERL_PACK_CAN_SHRIEKSIGN
 820            case '.' | TYPE_IS_SHRIEKING:
 821            case '@' | TYPE_IS_SHRIEKING:
 822#endif
 823            case '@':
 824            case '.':
 825            case '/':
 826            case 'U':                   /* XXXX Is it correct? */
 827            case 'w':
 828            case 'u':
 829                Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
 830                           (int) TYPE_NO_MODIFIERS(symptr->code),
 831                           _action( symptr ) );
 832            case '%':
 833                size = 0;
 834                break;
 835            case '(':
 836            {
 837                tempsym_t savsym = *symptr;
 838                symptr->patptr = savsym.grpbeg;
 839                symptr->patend = savsym.grpend;
 840                /* XXXX Theoretically, we need to measure many times at
 841                   different positions, since the subexpression may contain
 842                   alignment commands, but be not of aligned length.
 843                   Need to detect this and croak().  */
 844                size = measure_struct(symptr);
 845                *symptr = savsym;
 846                break;
 847            }
 848            case 'X' | TYPE_IS_SHRIEKING:
 849                /* XXXX Is this useful?  Then need to treat MEASURE_BACKWARDS.
 850                 */
 851                if (!len)               /* Avoid division by 0 */
 852                    len = 1;
 853                len = total % len;      /* Assumed: the start is aligned. */
 854                /* FALL THROUGH */
 855            case 'X':
 856                size = -1;
 857                if (total < len)
 858                    Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
 859                break;
 860            case 'x' | TYPE_IS_SHRIEKING:
 861                if (!len)               /* Avoid division by 0 */
 862                    len = 1;
 863                star = total % len;     /* Assumed: the start is aligned. */
 864                if (star)               /* Other portable ways? */
 865                    len = len - star;
 866                else
 867                    len = 0;
 868                /* FALL THROUGH */
 869            case 'x':
 870            case 'A':
 871            case 'Z':
 872            case 'a':
 873                size = 1;
 874                break;
 875            case 'B':
 876            case 'b':
 877                len = (len + 7)/8;
 878                size = 1;
 879                break;
 880            case 'H':
 881            case 'h':
 882                len = (len + 1)/2;
 883                size = 1;
 884                break;
 885
 886            case 'P':
 887                len = 1;
 888                size = sizeof(char*);
 889                break;
 890            }
 891        }
 892        total += len * size;
 893    }
 894    return total;
 895}
 896
 897
 898/* locate matching closing parenthesis or bracket
 899 * returns char pointer to char after match, or NULL
 900 */
 901STATIC const char *
 902S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
 903{
 904    PERL_ARGS_ASSERT_GROUP_END;
 905
 906    while (patptr < patend) {
 907        const char c = *patptr++;
 908
 909        if (isSPACE(c))
 910            continue;
 911        else if (c == ender)
 912            return patptr-1;
 913        else if (c == '#') {
 914            while (patptr < patend && *patptr != '\n')
 915                patptr++;
 916            continue;
 917        } else if (c == '(')
 918            patptr = group_end(patptr, patend, ')') + 1;
 919        else if (c == '[')
 920            patptr = group_end(patptr, patend, ']') + 1;
 921    }
 922    Perl_croak(aTHX_ "No group ending character '%c' found in template",
 923               ender);
 924    return 0;
 925}
 926
 927
 928/* Convert unsigned decimal number to binary.
 929 * Expects a pointer to the first digit and address of length variable
 930 * Advances char pointer to 1st non-digit char and returns number
 931 */
 932STATIC const char *
 933S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
 934{
 935  I32 len = *patptr++ - '0';
 936
 937  PERL_ARGS_ASSERT_GET_NUM;
 938
 939  while (isDIGIT(*patptr)) {
 940    if (len >= 0x7FFFFFFF/10)
 941      Perl_croak(aTHX_ "pack/unpack repeat count overflow");
 942    len = (len * 10) + (*patptr++ - '0');
 943  }
 944  *lenptr = len;
 945  return patptr;
 946}
 947
 948/* The marvellous template parsing routine: Using state stored in *symptr,
 949 * locates next template code and count
 950 */
 951STATIC bool
 952S_next_symbol(pTHX_ tempsym_t* symptr )
 953{
 954  const char* patptr = symptr->patptr;
 955  const char* const patend = symptr->patend;
 956
 957  PERL_ARGS_ASSERT_NEXT_SYMBOL;
 958
 959  symptr->flags &= ~FLAG_SLASH;
 960
 961  while (patptr < patend) {
 962    if (isSPACE(*patptr))
 963      patptr++;
 964    else if (*patptr == '#') {
 965      patptr++;
 966      while (patptr < patend && *patptr != '\n')
 967        patptr++;
 968      if (patptr < patend)
 969        patptr++;
 970    } else {
 971      /* We should have found a template code */
 972      I32 code = *patptr++ & 0xFF;
 973      U32 inherited_modifiers = 0;
 974
 975      if (code == ','){ /* grandfather in commas but with a warning */
 976        if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
 977          symptr->flags |= FLAG_COMMA;
 978          Perl_warner(aTHX_ packWARN(WARN_UNPACK),
 979                      "Invalid type ',' in %s", _action( symptr ) );
 980        }
 981        continue;
 982      }
 983
 984      /* for '(', skip to ')' */
 985      if (code == '(') {
 986        if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
 987          Perl_croak(aTHX_ "()-group starts with a count in %s",
 988                        _action( symptr ) );
 989        symptr->grpbeg = patptr;
 990        patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
 991        if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
 992          Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
 993                        _action( symptr ) );
 994      }
 995
 996      /* look for group modifiers to inherit */
 997      if (TYPE_ENDIANNESS(symptr->flags)) {
 998        if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
 999          inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1000      }
1001
1002      /* look for modifiers */
1003      while (patptr < patend) {
1004        const char *allowed;
1005        I32 modifier;
1006        switch (*patptr) {
1007          case '!':
1008            modifier = TYPE_IS_SHRIEKING;
1009            allowed = SHRIEKING_ALLOWED_TYPES;
1010            break;
1011#ifdef PERL_PACK_CAN_BYTEORDER
1012          case '>':
1013            modifier = TYPE_IS_BIG_ENDIAN;
1014            allowed = ENDIANNESS_ALLOWED_TYPES;
1015            break;
1016          case '<':
1017            modifier = TYPE_IS_LITTLE_ENDIAN;
1018            allowed = ENDIANNESS_ALLOWED_TYPES;
1019            break;
1020#endif /* PERL_PACK_CAN_BYTEORDER */
1021          default:
1022            allowed = "";
1023            modifier = 0;
1024            break;
1025        }
1026
1027        if (modifier == 0)
1028          break;
1029
1030        if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1031          Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1032                        allowed, _action( symptr ) );
1033
1034        if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1035          Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1036                     (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1037        else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1038                 TYPE_ENDIANNESS_MASK)
1039          Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1040                     *patptr, _action( symptr ) );
1041
1042        if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1043            Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1044                        "Duplicate modifier '%c' after '%c' in %s",
1045                        *patptr, (int) TYPE_NO_MODIFIERS(code),
1046                        _action( symptr ) );
1047        }
1048
1049        code |= modifier;
1050        patptr++;
1051      }
1052
1053      /* inherit modifiers */
1054      code |= inherited_modifiers;
1055
1056      /* look for count and/or / */
1057      if (patptr < patend) {
1058        if (isDIGIT(*patptr)) {
1059          patptr = get_num( patptr, &symptr->length );
1060          symptr->howlen = e_number;
1061
1062        } else if (*patptr == '*') {
1063          patptr++;
1064          symptr->howlen = e_star;
1065
1066        } else if (*patptr == '[') {
1067          const char* lenptr = ++patptr;
1068          symptr->howlen = e_number;
1069          patptr = group_end( patptr, patend, ']' ) + 1;
1070          /* what kind of [] is it? */
1071          if (isDIGIT(*lenptr)) {
1072            lenptr = get_num( lenptr, &symptr->length );
1073            if( *lenptr != ']' )
1074              Perl_croak(aTHX_ "Malformed integer in [] in %s",
1075                            _action( symptr ) );
1076          } else {
1077            tempsym_t savsym = *symptr;
1078            symptr->patend = patptr-1;
1079            symptr->patptr = lenptr;
1080            savsym.length = measure_struct(symptr);
1081            *symptr = savsym;
1082          }
1083        } else {
1084          symptr->howlen = e_no_len;
1085          symptr->length = 1;
1086        }
1087
1088        /* try to find / */
1089        while (patptr < patend) {
1090          if (isSPACE(*patptr))
1091            patptr++;
1092          else if (*patptr == '#') {
1093            patptr++;
1094            while (patptr < patend && *patptr != '\n')
1095              patptr++;
1096            if (patptr < patend)
1097              patptr++;
1098          } else {
1099            if (*patptr == '/') {
1100              symptr->flags |= FLAG_SLASH;
1101              patptr++;
1102              if (patptr < patend &&
1103                  (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1104                Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1105                            _action( symptr ) );
1106            }
1107            break;
1108          }
1109        }
1110      } else {
1111        /* at end - no count, no / */
1112        symptr->howlen = e_no_len;
1113        symptr->length = 1;
1114      }
1115
1116      symptr->code = code;
1117      symptr->patptr = patptr;
1118      return TRUE;
1119    }
1120  }
1121  symptr->patptr = patptr;
1122  return FALSE;
1123}
1124
1125/*
1126   There is no way to cleanly handle the case where we should process the
1127   string per byte in its upgraded form while it's really in downgraded form
1128   (e.g. estimates like strend-s as an upper bound for the number of
1129   characters left wouldn't work). So if we foresee the need of this
1130   (pattern starts with U or contains U0), we want to work on the encoded
1131   version of the string. Users are advised to upgrade their pack string
1132   themselves if they need to do a lot of unpacks like this on it
1133*/
1134STATIC bool
1135need_utf8(const char *pat, const char *patend)
1136{
1137    bool first = TRUE;
1138
1139    PERL_ARGS_ASSERT_NEED_UTF8;
1140
1141    while (pat < patend) {
1142        if (pat[0] == '#') {
1143            pat++;
1144            pat = (const char *) memchr(pat, '\n', patend-pat);
1145            if (!pat) return FALSE;
1146        } else if (pat[0] == 'U') {
1147            if (first || pat[1] == '0') return TRUE;
1148        } else first = FALSE;
1149        pat++;
1150    }
1151    return FALSE;
1152}
1153
1154STATIC char
1155first_symbol(const char *pat, const char *patend) {
1156    PERL_ARGS_ASSERT_FIRST_SYMBOL;
1157
1158    while (pat < patend) {
1159        if (pat[0] != '#') return pat[0];
1160        pat++;
1161        pat = (const char *) memchr(pat, '\n', patend-pat);
1162        if (!pat) return 0;
1163        pat++;
1164    }
1165    return 0;
1166}
1167
1168/*
1169=for apidoc unpackstring
1170
1171The engine implementing unpack() Perl function. C<unpackstring> puts the
1172extracted list items on the stack and returns the number of elements.
1173Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1174
1175=cut */
1176
1177I32
1178Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1179{
1180    tempsym_t sym;
1181
1182    PERL_ARGS_ASSERT_UNPACKSTRING;
1183
1184    if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1185    else if (need_utf8(pat, patend)) {
1186        /* We probably should try to avoid this in case a scalar context call
1187           wouldn't get to the "U0" */
1188        STRLEN len = strend - s;
1189        s = (char *) bytes_to_utf8((U8 *) s, &len);
1190        SAVEFREEPV(s);
1191        strend = s + len;
1192        flags |= FLAG_DO_UTF8;
1193    }
1194
1195    if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1196        flags |= FLAG_PARSE_UTF8;
1197
1198    TEMPSYM_INIT(&sym, pat, patend, flags);
1199
1200    return unpack_rec(&sym, s, s, strend, NULL );
1201}
1202
1203STATIC I32
1204S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1205{
1206    dVAR; dSP;
1207    SV *sv;
1208    const I32 start_sp_offset = SP - PL_stack_base;
1209    howlen_t howlen;
1210    I32 checksum = 0;
1211    UV cuv = 0;
1212    NV cdouble = 0.0;
1213    const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1214    bool beyond = FALSE;
1215    bool explicit_length;
1216    const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1217    bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1218
1219    PERL_ARGS_ASSERT_UNPACK_REC;
1220
1221    symptr->strbeg = s - strbeg;
1222
1223    while (next_symbol(symptr)) {
1224        packprops_t props;
1225        I32 len;
1226        I32 datumtype = symptr->code;
1227        /* do first one only unless in list context
1228           / is implemented by unpacking the count, then popping it from the
1229           stack, so must check that we're not in the middle of a /  */
1230        if ( unpack_only_one
1231             && (SP - PL_stack_base == start_sp_offset + 1)
1232             && (datumtype != '/') )   /* XXX can this be omitted */
1233            break;
1234
1235        switch (howlen = symptr->howlen) {
1236          case e_star:
1237            len = strend - strbeg;      /* long enough */
1238            break;
1239          default:
1240            /* e_no_len and e_number */
1241            len = symptr->length;
1242            break;
1243        }
1244
1245        explicit_length = TRUE;
1246      redo_switch:
1247        beyond = s >= strend;
1248
1249        props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1250        if (props) {
1251            /* props nonzero means we can process this letter. */
1252            const long size = props & PACK_SIZE_MASK;
1253            const long howmany = (strend - s) / size;
1254            if (len > howmany)
1255                len = howmany;
1256
1257            if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1258                if (len && unpack_only_one) len = 1;
1259                EXTEND(SP, len);
1260                EXTEND_MORTAL(len);
1261            }
1262        }
1263
1264        switch(TYPE_NO_ENDIANNESS(datumtype)) {
1265        default:
1266            Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1267
1268        case '%':
1269            if (howlen == e_no_len)
1270                len = 16;               /* len is not specified */
1271            checksum = len;
1272            cuv = 0;
1273            cdouble = 0;
1274            continue;
1275            break;
1276        case '(':
1277        {
1278            tempsym_t savsym = *symptr;
1279            const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1280            symptr->flags |= group_modifiers;
1281            symptr->patend = savsym.grpend;
1282            symptr->previous = &savsym;
1283            symptr->level++;
1284            PUTBACK;
1285            if (len && unpack_only_one) len = 1;
1286            while (len--) {
1287                symptr->patptr = savsym.grpbeg;
1288                if (utf8) symptr->flags |=  FLAG_PARSE_UTF8;
1289                else      symptr->flags &= ~FLAG_PARSE_UTF8;
1290                unpack_rec(symptr, s, strbeg, strend, &s);
1291                if (s == strend && savsym.howlen == e_star)
1292                    break; /* No way to continue */
1293            }
1294            SPAGAIN;
1295            savsym.flags = symptr->flags & ~group_modifiers;
1296            *symptr = savsym;
1297            break;
1298        }
1299#ifdef PERL_PACK_CAN_SHRIEKSIGN
1300        case '.' | TYPE_IS_SHRIEKING:
1301#endif
1302        case '.': {
1303            const char *from;
1304            SV *sv;
1305#ifdef PERL_PACK_CAN_SHRIEKSIGN
1306            const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1307#else /* PERL_PACK_CAN_SHRIEKSIGN */
1308            const bool u8 = utf8;
1309#endif
1310            if (howlen == e_star) from = strbeg;
1311            else if (len <= 0) from = s;
1312            else {
1313                tempsym_t *group = symptr;
1314
1315                while (--len && group) group = group->previous;
1316                from = group ? strbeg + group->strbeg : strbeg;
1317            }
1318            sv = from <= s ?
1319                newSVuv(  u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1320                newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1321            mXPUSHs(sv);
1322            break;
1323        }
1324#ifdef PERL_PACK_CAN_SHRIEKSIGN
1325        case '@' | TYPE_IS_SHRIEKING:
1326#endif
1327        case '@':
1328            s = strbeg + symptr->strbeg;
1329#ifdef PERL_PACK_CAN_SHRIEKSIGN
1330            if (utf8  && !(datumtype & TYPE_IS_SHRIEKING))
1331#else /* PERL_PACK_CAN_SHRIEKSIGN */
1332            if (utf8)
1333#endif
1334            {
1335                while (len > 0) {
1336                    if (s >= strend)
1337                        Perl_croak(aTHX_ "'@' outside of string in unpack");
1338                    s += UTF8SKIP(s);
1339                    len--;
1340                }
1341                if (s > strend)
1342                    Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1343            } else {
1344                if (strend-s < len)
1345                    Perl_croak(aTHX_ "'@' outside of string in unpack");
1346                s += len;
1347            }
1348            break;
1349        case 'X' | TYPE_IS_SHRIEKING:
1350            if (!len)                   /* Avoid division by 0 */
1351                len = 1;
1352            if (utf8) {
1353                const char *hop, *last;
1354                I32 l = len;
1355                hop = last = strbeg;
1356                while (hop < s) {
1357                    hop += UTF8SKIP(hop);
1358                    if (--l == 0) {
1359                        last = hop;
1360                        l = len;
1361                    }
1362                }
1363                if (last > s)
1364                    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1365                s = last;
1366                break;
1367            }
1368            len = (s - strbeg) % len;
1369            /* FALL THROUGH */
1370        case 'X':
1371            if (utf8) {
1372                while (len > 0) {
1373                    if (s <= strbeg)
1374                        Perl_croak(aTHX_ "'X' outside of string in unpack");
1375                    while (--s, UTF8_IS_CONTINUATION(*s)) {
1376                        if (s <= strbeg)
1377                            Perl_croak(aTHX_ "'X' outside of string in unpack");
1378                    }
1379                    len--;
1380                }
1381            } else {
1382                if (len > s - strbeg)
1383                    Perl_croak(aTHX_ "'X' outside of string in unpack" );
1384                s -= len;
1385            }
1386            break;
1387        case 'x' | TYPE_IS_SHRIEKING: {
1388            I32 ai32;
1389            if (!len)                   /* Avoid division by 0 */
1390                len = 1;
1391            if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1392            else      ai32 = (s - strbeg)                         % len;
1393            if (ai32 == 0) break;
1394            len -= ai32;
1395            }
1396            /* FALL THROUGH */
1397        case 'x':
1398            if (utf8) {
1399                while (len>0) {
1400                    if (s >= strend)
1401                        Perl_croak(aTHX_ "'x' outside of string in unpack");
1402                    s += UTF8SKIP(s);
1403                    len--;
1404                }
1405            } else {
1406                if (len > strend - s)
1407                    Perl_croak(aTHX_ "'x' outside of string in unpack");
1408                s += len;
1409            }
1410            break;
1411        case '/':
1412            Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1413            break;
1414        case 'A':
1415        case 'Z':
1416        case 'a':
1417            if (checksum) {
1418                /* Preliminary length estimate is assumed done in 'W' */
1419                if (len > strend - s) len = strend - s;
1420                goto W_checksum;
1421            }
1422            if (utf8) {
1423                I32 l;
1424                const char *hop;
1425                for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1426                    if (hop >= strend) {
1427                        if (hop > strend)
1428                            Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1429                        break;
1430                    }
1431                }
1432                if (hop > strend)
1433                    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1434                len = hop - s;
1435            } else if (len > strend - s)
1436                len = strend - s;
1437
1438            if (datumtype == 'Z') {
1439                /* 'Z' strips stuff after first null */
1440                const char *ptr, *end;
1441                end = s + len;
1442                for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1443                sv = newSVpvn(s, ptr-s);
1444                if (howlen == e_star) /* exact for 'Z*' */
1445                    len = ptr-s + (ptr != strend ? 1 : 0);
1446            } else if (datumtype == 'A') {
1447                /* 'A' strips both nulls and spaces */
1448                const char *ptr;
1449                if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1450                    for (ptr = s+len-1; ptr >= s; ptr--)
1451                        if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1452                            !is_utf8_space((U8 *) ptr)) break;
1453                    if (ptr >= s) ptr += UTF8SKIP(ptr);
1454                    else ptr++;
1455                    if (ptr > s+len)
1456                        Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1457                } else {
1458                    for (ptr = s+len-1; ptr >= s; ptr--)
1459                        if (*ptr != 0 && !isSPACE(*ptr)) break;
1460                    ptr++;
1461                }
1462                sv = newSVpvn(s, ptr-s);
1463            } else sv = newSVpvn(s, len);
1464
1465            if (utf8) {
1466                SvUTF8_on(sv);
1467                /* Undo any upgrade done due to need_utf8() */
1468                if (!(symptr->flags & FLAG_WAS_UTF8))
1469                    sv_utf8_downgrade(sv, 0);
1470            }
1471            mXPUSHs(sv);
1472            s += len;
1473            break;
1474        case 'B':
1475        case 'b': {
1476            char *str;
1477            if (howlen == e_star || len > (strend - s) * 8)
1478                len = (strend - s) * 8;
1479            if (checksum) {
1480                if (!PL_bitcount) {
1481                    int bits;
1482                    Newxz(PL_bitcount, 256, char);
1483                    for (bits = 1; bits < 256; bits++) {
1484                        if (bits & 1)   PL_bitcount[bits]++;
1485                        if (bits & 2)   PL_bitcount[bits]++;
1486                        if (bits & 4)   PL_bitcount[bits]++;
1487                        if (bits & 8)   PL_bitcount[bits]++;
1488                        if (bits & 16)  PL_bitcount[bits]++;
1489                        if (bits & 32)  PL_bitcount[bits]++;
1490                        if (bits & 64)  PL_bitcount[bits]++;
1491                        if (bits & 128) PL_bitcount[bits]++;
1492                    }
1493                }
1494                if (utf8)
1495                    while (len >= 8 && s < strend) {
1496                        cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1497                        len -= 8;
1498                    }
1499                else
1500                    while (len >= 8) {
1501                        cuv += PL_bitcount[*(U8 *)s++];
1502                        len -= 8;
1503                    }
1504                if (len && s < strend) {
1505                    U8 bits;
1506                    bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1507                    if (datumtype == 'b')
1508                        while (len-- > 0) {
1509                            if (bits & 1) cuv++;
1510                            bits >>= 1;
1511                        }
1512                    else
1513                        while (len-- > 0) {
1514                            if (bits & 0x80) cuv++;
1515                            bits <<= 1;
1516                        }
1517                }
1518                break;
1519            }
1520
1521            sv = sv_2mortal(newSV(len ? len : 1));
1522            SvPOK_on(sv);
1523            str = SvPVX(sv);
1524            if (datumtype == 'b') {
1525                U8 bits = 0;
1526                const I32 ai32 = len;
1527                for (len = 0; len < ai32; len++) {
1528                    if (len & 7) bits >>= 1;
1529                    else if (utf8) {
1530                        if (s >= strend) break;
1531                        bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1532                    } else bits = *(U8 *) s++;
1533                    *str++ = bits & 1 ? '1' : '0';
1534                }
1535            } else {
1536                U8 bits = 0;
1537                const I32 ai32 = len;
1538                for (len = 0; len < ai32; len++) {
1539                    if (len & 7) bits <<= 1;
1540                    else if (utf8) {
1541                        if (s >= strend) break;
1542                        bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1543                    } else bits = *(U8 *) s++;
1544                    *str++ = bits & 0x80 ? '1' : '0';
1545                }
1546            }
1547            *str = '\0';
1548            SvCUR_set(sv, str - SvPVX_const(sv));
1549            XPUSHs(sv);
1550            break;
1551        }
1552        case 'H':
1553        case 'h': {
1554            char *str;
1555            /* Preliminary length estimate, acceptable for utf8 too */
1556            if (howlen == e_star || len > (strend - s) * 2)
1557                len = (strend - s) * 2;
1558            sv = sv_2mortal(newSV(len ? len : 1));
1559            SvPOK_on(sv);
1560            str = SvPVX(sv);
1561            if (datumtype == 'h') {
1562                U8 bits = 0;
1563                I32 ai32 = len;
1564                for (len = 0; len < ai32; len++) {
1565                    if (len & 1) bits >>= 4;
1566                    else if (utf8) {
1567                        if (s >= strend) break;
1568                        bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1569                    } else bits = * (U8 *) s++;
1570                    *str++ = PL_hexdigit[bits & 15];
1571                }
1572            } else {
1573                U8 bits = 0;
1574                const I32 ai32 = len;
1575                for (len = 0; len < ai32; len++) {
1576                    if (len & 1) bits <<= 4;
1577                    else if (utf8) {
1578                        if (s >= strend) break;
1579                        bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1580                    } else bits = *(U8 *) s++;
1581                    *str++ = PL_hexdigit[(bits >> 4) & 15];
1582                }
1583            }
1584            *str = '\0';
1585            SvCUR_set(sv, str - SvPVX_const(sv));
1586            XPUSHs(sv);
1587            break;
1588        }
1589        case 'C':
1590            if (len == 0) {
1591                if (explicit_length)
1592                    /* Switch to "character" mode */
1593                    utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1594                break;
1595            }
1596            /* FALL THROUGH */
1597        case 'c':
1598            while (len-- > 0 && s < strend) {
1599                int aint;
1600                if (utf8)
1601                  {
1602                    STRLEN retlen;
1603                    aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1604                                 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1605                    if (retlen == (STRLEN) -1 || retlen == 0)
1606                        Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1607                    s += retlen;
1608                  }
1609                else
1610                  aint = *(U8 *)(s)++;
1611                if (aint >= 128 && datumtype != 'C')    /* fake up signed chars */
1612                    aint -= 256;
1613                if (!checksum)
1614                    mPUSHi(aint);
1615                else if (checksum > bits_in_uv)
1616                    cdouble += (NV)aint;
1617                else
1618                    cuv += aint;
1619            }
1620            break;
1621        case 'W':
1622          W_checksum:
1623            if (utf8) {
1624                while (len-- > 0 && s < strend) {
1625                    STRLEN retlen;
1626                    const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1627                                         ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1628                    if (retlen == (STRLEN) -1 || retlen == 0)
1629                        Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1630                    s += retlen;
1631                    if (!checksum)
1632                        mPUSHu(val);
1633                    else if (checksum > bits_in_uv)
1634                        cdouble += (NV) val;
1635                    else
1636                        cuv += val;
1637                }
1638            } else if (!checksum)
1639                while (len-- > 0) {
1640                    const U8 ch = *(U8 *) s++;
1641                    mPUSHu(ch);
1642            }
1643            else if (checksum > bits_in_uv)
1644                while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1645            else
1646                while (len-- > 0) cuv += *(U8 *) s++;
1647            break;
1648        case 'U':
1649            if (len == 0) {
1650                if (explicit_length) {
1651                    /* Switch to "bytes in UTF-8" mode */
1652                    if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1653                    else
1654                        /* Should be impossible due to the need_utf8() test */
1655                        Perl_croak(aTHX_ "U0 mode on a byte string");
1656                }
1657                break;
1658            }
1659            if (len > strend - s) len = strend - s;
1660            if (!checksum) {
1661                if (len && unpack_only_one) len = 1;
1662                EXTEND(SP, len);
1663                EXTEND_MORTAL(len);
1664            }
1665            while (len-- > 0 && s < strend) {
1666                STRLEN retlen;
1667                UV auv;
1668                if (utf8) {
1669                    U8 result[UTF8_MAXLEN];
1670                    const char *ptr = s;
1671                    STRLEN len;
1672                    /* Bug: warns about bad utf8 even if we are short on bytes
1673                       and will break out of the loop */
1674                    if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1675                                      'U'))
1676                        break;
1677                    len = UTF8SKIP(result);
1678                    if (!uni_to_bytes(aTHX_ &ptr, strend,
1679                                      (char *) &result[1], len-1, 'U')) break;
1680                    auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1681                    s = ptr;
1682                } else {
1683                    auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1684                    if (retlen == (STRLEN) -1 || retlen == 0)
1685                        Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1686                    s += retlen;
1687                }
1688                if (!checksum)
1689                    mPUSHu(auv);
1690                else if (checksum > bits_in_uv)
1691                    cdouble += (NV) auv;
1692                else
1693                    cuv += auv;
1694            }
1695            break;
1696        case 's' | TYPE_IS_SHRIEKING:
1697#if SHORTSIZE != SIZE16
1698            while (len-- > 0) {
1699                short ashort;
1700                SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1701                DO_BO_UNPACK(ashort, s);
1702                if (!checksum)
1703                    mPUSHi(ashort);
1704                else if (checksum > bits_in_uv)
1705                    cdouble += (NV)ashort;
1706                else
1707                    cuv += ashort;
1708            }
1709            break;
1710#else
1711            /* Fallthrough! */
1712#endif
1713        case 's':
1714            while (len-- > 0) {
1715                I16 ai16;
1716
1717#if U16SIZE > SIZE16
1718                ai16 = 0;
1719#endif
1720                SHIFT16(utf8, s, strend, &ai16, datumtype);
1721                DO_BO_UNPACK(ai16, 16);
1722#if U16SIZE > SIZE16
1723                if (ai16 > 32767)
1724                    ai16 -= 65536;
1725#endif
1726                if (!checksum)
1727                    mPUSHi(ai16);
1728                else if (checksum > bits_in_uv)
1729                    cdouble += (NV)ai16;
1730                else
1731                    cuv += ai16;
1732            }
1733            break;
1734        case 'S' | TYPE_IS_SHRIEKING:
1735#if SHORTSIZE != SIZE16
1736            while (len-- > 0) {
1737                unsigned short aushort;
1738                SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1739                DO_BO_UNPACK(aushort, s);
1740                if (!checksum)
1741                    mPUSHu(aushort);
1742                else if (checksum > bits_in_uv)
1743                    cdouble += (NV)aushort;
1744                else
1745                    cuv += aushort;
1746            }
1747            break;
1748#else
1749            /* Fallhrough! */
1750#endif
1751        case 'v':
1752        case 'n':
1753        case 'S':
1754            while (len-- > 0) {
1755                U16 au16;
1756#if U16SIZE > SIZE16
1757                au16 = 0;
1758#endif
1759                SHIFT16(utf8, s, strend, &au16, datumtype);
1760                DO_BO_UNPACK(au16, 16);
1761#ifdef HAS_NTOHS
1762                if (datumtype == 'n')
1763                    au16 = PerlSock_ntohs(au16);
1764#endif
1765#ifdef HAS_VTOHS
1766                if (datumtype == 'v')
1767                    au16 = vtohs(au16);
1768#endif
1769                if (!checksum)
1770                    mPUSHu(au16);
1771                else if (checksum > bits_in_uv)
1772                    cdouble += (NV) au16;
1773                else
1774                    cuv += au16;
1775            }
1776            break;
1777#ifdef PERL_PACK_CAN_SHRIEKSIGN
1778        case 'v' | TYPE_IS_SHRIEKING:
1779        case 'n' | TYPE_IS_SHRIEKING:
1780            while (len-- > 0) {
1781                I16 ai16;
1782# if U16SIZE > SIZE16
1783                ai16 = 0;
1784# endif
1785                SHIFT16(utf8, s, strend, &ai16, datumtype);
1786# ifdef HAS_NTOHS
1787                if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1788                    ai16 = (I16) PerlSock_ntohs((U16) ai16);
1789# endif /* HAS_NTOHS */
1790# ifdef HAS_VTOHS
1791                if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1792                    ai16 = (I16) vtohs((U16) ai16);
1793# endif /* HAS_VTOHS */
1794                if (!checksum)
1795                    mPUSHi(ai16);
1796                else if (checksum > bits_in_uv)
1797                    cdouble += (NV) ai16;
1798                else
1799                    cuv += ai16;
1800            }
1801            break;
1802#endif /* PERL_PACK_CAN_SHRIEKSIGN */
1803        case 'i':
1804        case 'i' | TYPE_IS_SHRIEKING:
1805            while (len-- > 0) {
1806                int aint;
1807                SHIFT_VAR(utf8, s, strend, aint, datumtype);
1808                DO_BO_UNPACK(aint, i);
1809                if (!checksum)
1810                    mPUSHi(aint);
1811                else if (checksum > bits_in_uv)
1812                    cdouble += (NV)aint;
1813                else
1814                    cuv += aint;
1815            }
1816            break;
1817        case 'I':
1818        case 'I' | TYPE_IS_SHRIEKING:
1819            while (len-- > 0) {
1820                unsigned int auint;
1821                SHIFT_VAR(utf8, s, strend, auint, datumtype);
1822                DO_BO_UNPACK(auint, i);
1823                if (!checksum)
1824                    mPUSHu(auint);
1825                else if (checksum > bits_in_uv)
1826                    cdouble += (NV)auint;
1827                else
1828                    cuv += auint;
1829            }
1830            break;
1831        case 'j':
1832            while (len-- > 0) {
1833                IV aiv;
1834                SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1835#if IVSIZE == INTSIZE
1836                DO_BO_UNPACK(aiv, i);
1837#elif IVSIZE == LONGSIZE
1838                DO_BO_UNPACK(aiv, l);
1839#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1840                DO_BO_UNPACK(aiv, 64);
1841#else
1842                Perl_croak(aTHX_ "'j' not supported on this platform");
1843#endif
1844                if (!checksum)
1845                    mPUSHi(aiv);
1846                else if (checksum > bits_in_uv)
1847                    cdouble += (NV)aiv;
1848                else
1849                    cuv += aiv;
1850            }
1851            break;
1852        case 'J':
1853            while (len-- > 0) {
1854                UV auv;
1855                SHIFT_VAR(utf8, s, strend, auv, datumtype);
1856#if IVSIZE == INTSIZE
1857                DO_BO_UNPACK(auv, i);
1858#elif IVSIZE == LONGSIZE
1859                DO_BO_UNPACK(auv, l);
1860#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1861                DO_BO_UNPACK(auv, 64);
1862#else
1863                Perl_croak(aTHX_ "'J' not supported on this platform");
1864#endif
1865                if (!checksum)
1866                    mPUSHu(auv);
1867                else if (checksum > bits_in_uv)
1868                    cdouble += (NV)auv;
1869                else
1870                    cuv += auv;
1871            }
1872            break;
1873        case 'l' | TYPE_IS_SHRIEKING:
1874#if LONGSIZE != SIZE32
1875            while (len-- > 0) {
1876                long along;
1877                SHIFT_VAR(utf8, s, strend, along, datumtype);
1878                DO_BO_UNPACK(along, l);
1879                if (!checksum)
1880                    mPUSHi(along);
1881                else if (checksum > bits_in_uv)
1882                    cdouble += (NV)along;
1883                else
1884                    cuv += along;
1885            }
1886            break;
1887#else
1888            /* Fallthrough! */
1889#endif
1890        case 'l':
1891            while (len-- > 0) {
1892                I32 ai32;
1893#if U32SIZE > SIZE32
1894                ai32 = 0;
1895#endif
1896                SHIFT32(utf8, s, strend, &ai32, datumtype);
1897                DO_BO_UNPACK(ai32, 32);
1898#if U32SIZE > SIZE32
1899                if (ai32 > 2147483647) ai32 -= 4294967296;
1900#endif
1901                if (!checksum)
1902                    mPUSHi(ai32);
1903                else if (checksum > bits_in_uv)
1904                    cdouble += (NV)ai32;
1905                else
1906                    cuv += ai32;
1907            }
1908            break;
1909        case 'L' | TYPE_IS_SHRIEKING:
1910#if LONGSIZE != SIZE32
1911            while (len-- > 0) {
1912                unsigned long aulong;
1913                SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1914                DO_BO_UNPACK(aulong, l);
1915                if (!checksum)
1916                    mPUSHu(aulong);
1917                else if (checksum > bits_in_uv)
1918                    cdouble += (NV)aulong;
1919                else
1920                    cuv += aulong;
1921            }
1922            break;
1923#else
1924            /* Fall through! */
1925#endif
1926        case 'V':
1927        case 'N':
1928        case 'L':
1929            while (len-- > 0) {
1930                U32 au32;
1931#if U32SIZE > SIZE32
1932                au32 = 0;
1933#endif
1934                SHIFT32(utf8, s, strend, &au32, datumtype);
1935                DO_BO_UNPACK(au32, 32);
1936#ifdef HAS_NTOHL
1937                if (datumtype == 'N')
1938                    au32 = PerlSock_ntohl(au32);
1939#endif
1940#ifdef HAS_VTOHL
1941                if (datumtype == 'V')
1942                    au32 = vtohl(au32);
1943#endif
1944                if (!checksum)
1945                    mPUSHu(au32);
1946                else if (checksum > bits_in_uv)
1947                    cdouble += (NV)au32;
1948                else
1949                    cuv += au32;
1950            }
1951            break;
1952#ifdef PERL_PACK_CAN_SHRIEKSIGN
1953        case 'V' | TYPE_IS_SHRIEKING:
1954        case 'N' | TYPE_IS_SHRIEKING:
1955            while (len-- > 0) {
1956                I32 ai32;
1957# if U32SIZE > SIZE32
1958                ai32 = 0;
1959# endif
1960                SHIFT32(utf8, s, strend, &ai32, datumtype);
1961# ifdef HAS_NTOHL
1962                if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1963                    ai32 = (I32)PerlSock_ntohl((U32)ai32);
1964# endif
1965# ifdef HAS_VTOHL
1966                if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1967                    ai32 = (I32)vtohl((U32)ai32);
1968# endif
1969                if (!checksum)
1970                    mPUSHi(ai32);
1971                else if (checksum > bits_in_uv)
1972                    cdouble += (NV)ai32;
1973                else
1974                    cuv += ai32;
1975            }
1976            break;
1977#endif /* PERL_PACK_CAN_SHRIEKSIGN */
1978        case 'p':
1979            while (len-- > 0) {
1980                const char *aptr;
1981                SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1982                DO_BO_UNPACK_PC(aptr);
1983                /* newSVpv generates undef if aptr is NULL */
1984                mPUSHs(newSVpv(aptr, 0));
1985            }
1986            break;
1987        case 'w':
1988            {
1989                UV auv = 0;
1990                U32 bytes = 0;
1991
1992                while (len > 0 && s < strend) {
1993                    U8 ch;
1994                    ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1995                    auv = (auv << 7) | (ch & 0x7f);
1996                    /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1997                    if (ch < 0x80) {
1998                        bytes = 0;
1999                        mPUSHu(auv);
2000                        len--;
2001                        auv = 0;
2002                        continue;
2003                    }
2004                    if (++bytes >= sizeof(UV)) {        /* promote to string */
2005                        const char *t;
2006
2007                        sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
2008                        while (s < strend) {
2009                            ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2010                            sv = mul128(sv, (U8)(ch & 0x7f));
2011                            if (!(ch & 0x80)) {
2012                                bytes = 0;
2013                                break;
2014                            }
2015                        }
2016                        t = SvPV_nolen_const(sv);
2017                        while (*t == '0')
2018                            t++;
2019                        sv_chop(sv, t);
2020                        mPUSHs(sv);
2021                        len--;
2022                        auv = 0;
2023                    }
2024                }
2025                if ((s >= strend) && bytes)
2026                    Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2027            }
2028            break;
2029        case 'P':
2030            if (symptr->howlen == e_star)
2031                Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2032            EXTEND(SP, 1);
2033            if (s + sizeof(char*) <= strend) {
2034                char *aptr;
2035                SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2036                DO_BO_UNPACK_PC(aptr);
2037                /* newSVpvn generates undef if aptr is NULL */
2038                PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
2039            }
2040            break;
2041#ifdef HAS_QUAD
2042        case 'q':
2043            while (len-- > 0) {
2044                Quad_t aquad;
2045                SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2046                DO_BO_UNPACK(aquad, 64);
2047                if (!checksum)
2048                    mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2049                           newSViv((IV)aquad) : newSVnv((NV)aquad));
2050                else if (checksum > bits_in_uv)
2051                    cdouble += (NV)aquad;
2052                else
2053                    cuv += aquad;
2054            }
2055            break;
2056        case 'Q':
2057            while (len-- > 0) {
2058                Uquad_t auquad;
2059                SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2060                DO_BO_UNPACK(auquad, 64);
2061                if (!checksum)
2062                    mPUSHs(auquad <= UV_MAX ?
2063                           newSVuv((UV)auquad) : newSVnv((NV)auquad));
2064                else if (checksum > bits_in_uv)
2065                    cdouble += (NV)auquad;
2066                else
2067                    cuv += auquad;
2068            }
2069            break;
2070#endif /* HAS_QUAD */
2071        /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2072        case 'f':
2073            while (len-- > 0) {
2074                float afloat;
2075                SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2076                DO_BO_UNPACK_N(afloat, float);
2077                if (!checksum)
2078                    mPUSHn(afloat);
2079                else
2080                    cdouble += afloat;
2081            }
2082            break;
2083        case 'd':
2084            while (len-- > 0) {
2085                double adouble;
2086                SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2087                DO_BO_UNPACK_N(adouble, double);
2088                if (!checksum)
2089                    mPUSHn(adouble);
2090                else
2091                    cdouble += adouble;
2092            }
2093            break;
2094        case 'F':
2095            while (len-- > 0) {
2096                NV anv;
2097                SHIFT_VAR(utf8, s, strend, anv, datumtype);
2098                DO_BO_UNPACK_N(anv, NV);
2099                if (!checksum)
2100                    mPUSHn(anv);
2101                else
2102                    cdouble += anv;
2103            }
2104            break;
2105#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2106        case 'D':
2107            while (len-- > 0) {
2108                long double aldouble;
2109                SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2110                DO_BO_UNPACK_N(aldouble, long double);
2111                if (!checksum)
2112                    mPUSHn(aldouble);
2113                else
2114                    cdouble += aldouble;
2115            }
2116            break;
2117#endif
2118        case 'u':
2119            {
2120                const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2121                sv = sv_2mortal(newSV(l));
2122                if (l) SvPOK_on(sv);
2123            }
2124            if (utf8) {
2125                while (next_uni_uu(aTHX_ &s, strend, &len)) {
2126                    I32 a, b, c, d;
2127                    char hunk[3];
2128
2129                    while (len > 0) {
2130                        next_uni_uu(aTHX_ &s, strend, &a);
2131                        next_uni_uu(aTHX_ &s, strend, &b);
2132                        next_uni_uu(aTHX_ &s, strend, &c);
2133                        next_uni_uu(aTHX_ &s, strend, &d);
2134                        hunk[0] = (char)((a << 2) | (b >> 4));
2135                        hunk[1] = (char)((b << 4) | (c >> 2));
2136                        hunk[2] = (char)((c << 6) | d);
2137                        sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2138                        len -= 3;
2139                    }
2140                    if (s < strend) {
2141                        if (*s == '\n') {
2142                            s++;
2143                        }
2144                        else {
2145                            /* possible checksum byte */
2146                            const char *skip = s+UTF8SKIP(s);
2147                            if (skip < strend && *skip == '\n')
2148                                s = skip+1;
2149                        }
2150                    }
2151                }
2152            } else {
2153                while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2154                    I32 a, b, c, d;
2155                    char hunk[3];
2156
2157                    len = PL_uudmap[*(U8*)s++] & 077;
2158                    while (len > 0) {
2159                        if (s < strend && ISUUCHAR(*s))
2160                            a = PL_uudmap[*(U8*)s++] & 077;
2161                        else
2162                            a = 0;
2163                        if (s < strend && ISUUCHAR(*s))
2164                            b = PL_uudmap[*(U8*)s++] & 077;
2165                        else
2166                            b = 0;
2167                        if (s < strend && ISUUCHAR(*s))
2168                            c = PL_uudmap[*(U8*)s++] & 077;
2169                        else
2170                            c = 0;
2171                        if (s < strend && ISUUCHAR(*s))
2172                            d = PL_uudmap[*(U8*)s++] & 077;
2173                        else
2174                            d = 0;
2175                        hunk[0] = (char)((a << 2) | (b >> 4));
2176                        hunk[1] = (char)((b << 4) | (c >> 2));
2177                        hunk[2] = (char)((c << 6) | d);
2178                        sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2179                        len -= 3;
2180                    }
2181                    if (*s == '\n')
2182                        s++;
2183                    else        /* possible checksum byte */
2184                        if (s + 1 < strend && s[1] == '\n')
2185                            s += 2;
2186                }
2187            }
2188            XPUSHs(sv);
2189            break;
2190        }
2191
2192        if (checksum) {
2193            if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2194              (checksum > bits_in_uv &&
2195               strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2196                NV trouble, anv;
2197
2198                anv = (NV) (1 << (checksum & 15));
2199                while (checksum >= 16) {
2200                    checksum -= 16;
2201                    anv *= 65536.0;
2202                }
2203                while (cdouble < 0.0)
2204                    cdouble += anv;
2205                cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2206                sv = newSVnv(cdouble);
2207            }
2208            else {
2209                if (checksum < bits_in_uv) {
2210                    UV mask = ((UV)1 << checksum) - 1;
2211                    cuv &= mask;
2212                }
2213                sv = newSVuv(cuv);
2214            }
2215            mXPUSHs(sv);
2216            checksum = 0;
2217        }
2218
2219        if (symptr->flags & FLAG_SLASH){
2220            if (SP - PL_stack_base - start_sp_offset <= 0)
2221                Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2222            if( next_symbol(symptr) ){
2223              if( symptr->howlen == e_number )
2224                Perl_croak(aTHX_ "Count after length/code in unpack" );
2225              if( beyond ){
2226                /* ...end of char buffer then no decent length available */
2227                Perl_croak(aTHX_ "length/code after end of string in unpack" );
2228              } else {
2229                /* take top of stack (hope it's numeric) */
2230                len = POPi;
2231                if( len < 0 )
2232                    Perl_croak(aTHX_ "Negative '/' count in unpack" );
2233              }
2234            } else {
2235                Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2236            }
2237            datumtype = symptr->code;
2238            explicit_length = FALSE;
2239            goto redo_switch;
2240        }
2241    }
2242
2243    if (new_s)
2244        *new_s = s;
2245    PUTBACK;
2246    return SP - PL_stack_base - start_sp_offset;
2247}
2248
2249PP(pp_unpack)
2250{
2251    dVAR;
2252    dSP;
2253    dPOPPOPssrl;
2254    I32 gimme = GIMME_V;
2255    STRLEN llen;
2256    STRLEN rlen;
2257    const char *pat = SvPV_const(left,  llen);
2258    const char *s   = SvPV_const(right, rlen);
2259    const char *strend = s + rlen;
2260    const char *patend = pat + llen;
2261    I32 cnt;
2262
2263    PUTBACK;
2264    cnt = unpackstring(pat, patend, s, strend,
2265                     ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2266                     | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2267
2268    SPAGAIN;
2269    if ( !cnt && gimme == G_SCALAR )
2270       PUSHs(&PL_sv_undef);
2271    RETURN;
2272}
2273
2274STATIC U8 *
2275doencodes(U8 *h, const char *s, I32 len)
2276{
2277    *h++ = PL_uuemap[len];
2278    while (len > 2) {
2279        *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2280        *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2281        *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2282        *h++ = PL_uuemap[(077 & (s[2] & 077))];
2283        s += 3;
2284        len -= 3;
2285    }
2286    if (len > 0) {
2287        const char r = (len > 1 ? s[1] : '\0');
2288        *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2289        *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2290        *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2291        *h++ = PL_uuemap[0];
2292    }
2293    *h++ = '\n';
2294    return h;
2295}
2296
2297STATIC SV *
2298S_is_an_int(pTHX_ const char *s, STRLEN l)
2299{
2300  SV *result = newSVpvn(s, l);
2301  char *const result_c = SvPV_nolen(result);    /* convenience */
2302  char *out = result_c;
2303  bool skip = 1;
2304  bool ignore = 0;
2305
2306  PERL_ARGS_ASSERT_IS_AN_INT;
2307
2308  while (*s) {
2309    switch (*s) {
2310    case ' ':
2311      break;
2312    case '+':
2313      if (!skip) {
2314        SvREFCNT_dec(result);
2315        return (NULL);
2316      }
2317      break;
2318    case '0':
2319    case '1':
2320    case '2':
2321    case '3':
2322    case '4':
2323    case '5':
2324    case '6':
2325    case '7':
2326    case '8':
2327    case '9':
2328      skip = 0;
2329      if (!ignore) {
2330        *(out++) = *s;
2331      }
2332      break;
2333    case '.':
2334      ignore = 1;
2335      break;
2336    default:
2337      SvREFCNT_dec(result);
2338      return (NULL);
2339    }
2340    s++;
2341  }
2342  *(out++) = '\0';
2343  SvCUR_set(result, out - result_c);
2344  return (result);
2345}
2346
2347/* pnum must be '\0' terminated */
2348STATIC int
2349S_div128(pTHX_ SV *pnum, bool *done)
2350{
2351    STRLEN len;
2352    char * const s = SvPV(pnum, len);
2353    char *t = s;
2354    int m = 0;
2355
2356    PERL_ARGS_ASSERT_DIV128;
2357
2358    *done = 1;
2359    while (*t) {
2360        const int i = m * 10 + (*t - '0');
2361        const int r = (i >> 7); /* r < 10 */
2362        m = i & 0x7F;
2363        if (r) {
2364            *done = 0;
2365        }
2366        *(t++) = '0' + r;
2367    }
2368    *(t++) = '\0';
2369    SvCUR_set(pnum, (STRLEN) (t - s));
2370    return (m);
2371}
2372
2373/*
2374=for apidoc packlist
2375
2376The engine implementing pack() Perl function.
2377
2378=cut
2379*/
2380
2381void
2382Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2383{
2384    dVAR;
2385    tempsym_t sym;
2386
2387    PERL_ARGS_ASSERT_PACKLIST;
2388
2389    TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2390
2391    /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2392       Also make sure any UTF8 flag is loaded */
2393    SvPV_force_nolen(cat);
2394    if (DO_UTF8(cat))
2395        sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2396
2397    (void)pack_rec( cat, &sym, beglist, endlist );
2398}
2399
2400/* like sv_utf8_upgrade, but also repoint the group start markers */
2401STATIC void
2402marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2403    STRLEN len;
2404    tempsym_t *group;
2405    const char *from_ptr, *from_start, *from_end, **marks, **m;
2406    char *to_start, *to_ptr;
2407
2408    if (SvUTF8(sv)) return;
2409
2410    from_start = SvPVX_const(sv);
2411    from_end = from_start + SvCUR(sv);
2412    for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2413        if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2414    if (from_ptr == from_end) {
2415        /* Simple case: no character needs to be changed */
2416        SvUTF8_on(sv);
2417        return;
2418    }
2419
2420    len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2421    Newx(to_start, len, char);
2422    Copy(from_start, to_start, from_ptr-from_start, char);
2423    to_ptr = to_start + (from_ptr-from_start);
2424
2425    Newx(marks, sym_ptr->level+2, const char *);
2426    for (group=sym_ptr; group; group = group->previous)
2427        marks[group->level] = from_start + group->strbeg;
2428    marks[sym_ptr->level+1] = from_end+1;
2429    for (m = marks; *m < from_ptr; m++)
2430        *m = to_start + (*m-from_start);
2431
2432    for (;from_ptr < from_end; from_ptr++) {
2433        while (*m == from_ptr) *m++ = to_ptr;
2434        to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2435    }
2436    *to_ptr = 0;
2437
2438    while (*m == from_ptr) *m++ = to_ptr;
2439    if (m != marks + sym_ptr->level+1) {
2440        Safefree(marks);
2441        Safefree(to_start);
2442        Perl_croak(aTHX_ "Assertion: marks beyond string end");
2443    }
2444    for (group=sym_ptr; group; group = group->previous)
2445        group->strbeg = marks[group->level] - to_start;
2446    Safefree(marks);
2447
2448    if (SvOOK(sv)) {
2449        if (SvIVX(sv)) {
2450            SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2451            from_start -= SvIVX(sv);
2452            SvIV_set(sv, 0);
2453        }
2454        SvFLAGS(sv) &= ~SVf_OOK;
2455    }
2456    if (SvLEN(sv) != 0)
2457        Safefree(from_start);
2458    SvPV_set(sv, to_start);
2459    SvCUR_set(sv, to_ptr - to_start);
2460    SvLEN_set(sv, len);
2461    SvUTF8_on(sv);
2462}
2463
2464/* Exponential string grower. Makes string extension effectively O(n)
2465   needed says how many extra bytes we need (not counting the final '\0')
2466   Only grows the string if there is an actual lack of space
2467*/
2468STATIC char *
2469S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2470    const STRLEN cur = SvCUR(sv);
2471    const STRLEN len = SvLEN(sv);
2472    STRLEN extend;
2473
2474    PERL_ARGS_ASSERT_SV_EXP_GROW;
2475
2476    if (len - cur > needed) return SvPVX(sv);
2477    extend = needed > len ? needed : len;
2478    return SvGROW(sv, len+extend+1);
2479}
2480
2481STATIC
2482SV **
2483S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2484{
2485    dVAR;
2486    tempsym_t lookahead;
2487    I32 items  = endlist - beglist;
2488    bool found = next_symbol(symptr);
2489    bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2490    bool warn_utf8 = ckWARN(WARN_UTF8);
2491
2492    PERL_ARGS_ASSERT_PACK_REC;
2493
2494    if (symptr->level == 0 && found && symptr->code == 'U') {
2495        marked_upgrade(aTHX_ cat, symptr);
2496        symptr->flags |= FLAG_DO_UTF8;
2497        utf8 = 0;
2498    }
2499    symptr->strbeg = SvCUR(cat);
2500
2501    while (found) {
2502        SV *fromstr;
2503        STRLEN fromlen;
2504        I32 len;
2505        SV *lengthcode = NULL;
2506        I32 datumtype = symptr->code;
2507        howlen_t howlen = symptr->howlen;
2508        char *start = SvPVX(cat);
2509        char *cur   = start + SvCUR(cat);
2510
2511#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2512
2513        switch (howlen) {
2514          case e_star:
2515            len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2516                0 : items;
2517            break;
2518          default:
2519            /* e_no_len and e_number */
2520            len = symptr->length;
2521            break;
2522        }
2523
2524        if (len) {
2525            packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2526
2527            if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2528                /* We can process this letter. */
2529                STRLEN size = props & PACK_SIZE_MASK;
2530                GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2531            }
2532        }
2533
2534        /* Look ahead for next symbol. Do we have code/code? */
2535        lookahead = *symptr;
2536        found = next_symbol(&lookahead);
2537        if (symptr->flags & FLAG_SLASH) {
2538            IV count;
2539            if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2540            if (strchr("aAZ", lookahead.code)) {
2541                if (lookahead.howlen == e_number) count = lookahead.length;
2542                else {
2543                    if (items > 0) {
2544                        if (SvGAMAGIC(*beglist)) {
2545                            /* Avoid reading the active data more than once
2546                               by copying it to a temporary.  */
2547                            STRLEN len;
2548                            const char *const pv = SvPV_const(*beglist, len);
2549                            SV *const temp
2550                                = newSVpvn_flags(pv, len,
2551                                                 SVs_TEMP | SvUTF8(*beglist));
2552                            *beglist = temp;
2553                        }
2554                        count = DO_UTF8(*beglist) ?
2555                            sv_len_utf8(*beglist) : sv_len(*beglist);
2556                    }
2557                    else count = 0;
2558                    if (lookahead.code == 'Z') count++;
2559                }
2560            } else {
2561                if (lookahead.howlen == e_number && lookahead.length < items)
2562                    count = lookahead.length;
2563                else count = items;
2564            }
2565            lookahead.howlen = e_number;
2566            lookahead.length = count;
2567            lengthcode = sv_2mortal(newSViv(count));
2568        }
2569
2570        /* Code inside the switch must take care to properly update
2571           cat (CUR length and '\0' termination) if it updated *cur and
2572           doesn't simply leave using break */
2573        switch(TYPE_NO_ENDIANNESS(datumtype)) {
2574        default:
2575            Perl_croak(aTHX_ "Invalid type '%c' in pack",
2576                       (int) TYPE_NO_MODIFIERS(datumtype));
2577        case '%':
2578            Perl_croak(aTHX_ "'%%' may not be used in pack");
2579        {
2580            char *from;
2581#ifdef PERL_PACK_CAN_SHRIEKSIGN
2582        case '.' | TYPE_IS_SHRIEKING:
2583#endif
2584        case '.':
2585            if (howlen == e_star) from = start;
2586            else if (len == 0) from = cur;
2587            else {
2588                tempsym_t *group = symptr;
2589
2590                while (--len && group) group = group->previous;
2591                from = group ? start + group->strbeg : start;
2592            }
2593            fromstr = NEXTFROM;
2594            len = SvIV(fromstr);
2595            goto resize;
2596#ifdef PERL_PACK_CAN_SHRIEKSIGN
2597        case '@' | TYPE_IS_SHRIEKING:
2598#endif
2599        case '@':
2600            from = start + symptr->strbeg;
2601          resize:
2602#ifdef PERL_PACK_CAN_SHRIEKSIGN
2603            if (utf8  && !(datumtype & TYPE_IS_SHRIEKING))
2604#else /* PERL_PACK_CAN_SHRIEKSIGN */
2605            if (utf8)
2606#endif
2607                if (len >= 0) {
2608                    while (len && from < cur) {
2609                        from += UTF8SKIP(from);
2610                        len--;
2611                    }
2612                    if (from > cur)
2613                        Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2614                    if (len) {
2615                        /* Here we know from == cur */
2616                      grow:
2617                        GROWING(0, cat, start, cur, len);
2618                        Zero(cur, len, char);
2619                        cur += len;
2620                    } else if (from < cur) {
2621                        len = cur - from;
2622                        goto shrink;
2623                    } else goto no_change;
2624                } else {
2625                    cur = from;
2626                    len = -len;
2627                    goto utf8_shrink;
2628                }
2629            else {
2630                len -= cur - from;
2631                if (len > 0) goto grow;
2632                if (len == 0) goto no_change;
2633                len = -len;
2634                goto shrink;
2635            }
2636            break;
2637        }
2638        case '(': {
2639            tempsym_t savsym = *symptr;
2640            U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2641            symptr->flags |= group_modifiers;
2642            symptr->patend = savsym.grpend;
2643            symptr->level++;
2644            symptr->previous = &lookahead;
2645            while (len--) {
2646                U32 was_utf8;
2647                if (utf8) symptr->flags |=  FLAG_PARSE_UTF8;
2648                else      symptr->flags &= ~FLAG_PARSE_UTF8;
2649                was_utf8 = SvUTF8(cat);
2650                symptr->patptr = savsym.grpbeg;
2651                beglist = pack_rec(cat, symptr, beglist, endlist);
2652                if (SvUTF8(cat) != was_utf8)
2653                    /* This had better be an upgrade while in utf8==0 mode */
2654                    utf8 = 1;
2655
2656                if (savsym.howlen == e_star && beglist == endlist)
2657                    break;              /* No way to continue */
2658            }
2659            items = endlist - beglist;
2660            lookahead.flags  = symptr->flags & ~group_modifiers;
2661            goto no_change;
2662        }
2663        case 'X' | TYPE_IS_SHRIEKING:
2664            if (!len)                   /* Avoid division by 0 */
2665                len = 1;
2666            if (utf8) {
2667                char *hop, *last;
2668                I32 l = len;
2669                hop = last = start;
2670                while (hop < cur) {
2671                    hop += UTF8SKIP(hop);
2672                    if (--l == 0) {
2673                        last = hop;
2674                        l = len;
2675                    }
2676                }
2677                if (last > cur)
2678                    Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2679                cur = last;
2680                break;
2681            }
2682            len = (cur-start) % len;
2683            /* FALL THROUGH */
2684        case 'X':
2685            if (utf8) {
2686                if (len < 1) goto no_change;
2687              utf8_shrink:
2688                while (len > 0) {
2689                    if (cur <= start)
2690                        Perl_croak(aTHX_ "'%c' outside of string in pack",
2691                                   (int) TYPE_NO_MODIFIERS(datumtype));
2692                    while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2693                        if (cur <= start)
2694                            Perl_croak(aTHX_ "'%c' outside of string in pack",
2695                                       (int) TYPE_NO_MODIFIERS(datumtype));
2696                    }
2697                    len--;
2698                }
2699            } else {
2700              shrink:
2701                if (cur - start < len)
2702                    Perl_croak(aTHX_ "'%c' outside of string in pack",
2703                               (int) TYPE_NO_MODIFIERS(datumtype));
2704                cur -= len;
2705            }
2706            if (cur < start+symptr->strbeg) {
2707                /* Make sure group starts don't point into the void */
2708                tempsym_t *group;
2709                const STRLEN length = cur-start;
2710                for (group = symptr;
2711                     group && length < group->strbeg;
2712                     group = group->previous) group->strbeg = length;
2713                lookahead.strbeg = length;
2714            }
2715            break;
2716        case 'x' | TYPE_IS_SHRIEKING: {
2717            I32 ai32;
2718            if (!len)                   /* Avoid division by 0 */
2719                len = 1;
2720            if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2721            else      ai32 = (cur - start) % len;
2722            if (ai32 == 0) goto no_change;
2723            len -= ai32;
2724        }
2725        /* FALL THROUGH */
2726        case 'x':
2727            goto grow;
2728        case 'A':
2729        case 'Z':
2730        case 'a': {
2731            const char *aptr;
2732
2733            fromstr = NEXTFROM;
2734            aptr = SvPV_const(fromstr, fromlen);
2735            if (DO_UTF8(fromstr)) {
2736                const char *end, *s;
2737
2738                if (!utf8 && !SvUTF8(cat)) {
2739                    marked_upgrade(aTHX_ cat, symptr);
2740                    lookahead.flags |= FLAG_DO_UTF8;
2741                    lookahead.strbeg = symptr->strbeg;
2742                    utf8 = 1;
2743                    start = SvPVX(cat);
2744                    cur = start + SvCUR(cat);
2745                }
2746                if (howlen == e_star) {
2747                    if (utf8) goto string_copy;
2748                    len = fromlen+1;
2749                }
2750                s = aptr;
2751                end = aptr + fromlen;
2752                fromlen = datumtype == 'Z' ? len-1 : len;
2753                while ((I32) fromlen > 0 && s < end) {
2754                    s += UTF8SKIP(s);
2755                    fromlen--;
2756                }
2757                if (s > end)
2758                    Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2759                if (utf8) {
2760                    len = fromlen;
2761                    if (datumtype == 'Z') len++;
2762                    fromlen = s-aptr;
2763                    len += fromlen;
2764
2765                    goto string_copy;
2766                }
2767                fromlen = len - fromlen;
2768                if (datumtype == 'Z') fromlen--;
2769                if (howlen == e_star) {
2770                    len = fromlen;
2771                    if (datumtype == 'Z') len++;
2772                }
2773                GROWING(0, cat, start, cur, len);
2774                if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2775                                  datumtype | TYPE_IS_PACK))
2776                    Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2777                cur += fromlen;
2778                len -= fromlen;
2779            } else if (utf8) {
2780                if (howlen == e_star) {
2781                    len = fromlen;
2782                    if (datumtype == 'Z') len++;
2783                }
2784                if (len <= (I32) fromlen) {
2785                    fromlen = len;
2786                    if (datumtype == 'Z' && fromlen > 0) fromlen--;
2787                }
2788                /* assumes a byte expands to at most UTF8_EXPAND bytes on
2789                   upgrade, so:
2790                   expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2791                GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2792                len -= fromlen;
2793                while (fromlen > 0) {
2794                    cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2795                    aptr++;
2796                    fromlen--;
2797                }
2798            } else {
2799              string_copy:
2800                if (howlen == e_star) {
2801                    len = fromlen;
2802                    if (datumtype == 'Z') len++;
2803                }
2804                if (len <= (I32) fromlen) {
2805                    fromlen = len;
2806                    if (datumtype == 'Z' && fromlen > 0) fromlen--;
2807                }
2808                GROWING(0, cat, start, cur, len);
2809                Copy(aptr, cur, fromlen, char);
2810                cur += fromlen;
2811                len -= fromlen;
2812            }
2813            memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2814            cur += len;
2815            SvTAINT(cat);
2816            break;
2817        }
2818        case 'B':
2819        case 'b': {
2820            const char *str, *end;
2821            I32 l, field_len;
2822            U8 bits;
2823            bool utf8_source;
2824            U32 utf8_flags;
2825
2826            fromstr = NEXTFROM;
2827            str = SvPV_const(fromstr, fromlen);
2828            end = str + fromlen;
2829            if (DO_UTF8(fromstr)) {
2830                utf8_source = TRUE;
2831                utf8_flags  = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2832            } else {
2833                utf8_source = FALSE;
2834                utf8_flags  = 0; /* Unused, but keep compilers happy */
2835            }
2836            if (howlen == e_star) len = fromlen;
2837            field_len = (len+7)/8;
2838            GROWING(utf8, cat, start, cur, field_len);
2839            if (len > (I32)fromlen) len = fromlen;
2840            bits = 0;
2841            l = 0;
2842            if (datumtype == 'B')
2843                while (l++ < len) {
2844                    if (utf8_source) {
2845                        UV val = 0;
2846                        NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2847                        bits |= val & 1;
2848                    } else bits |= *str++ & 1;
2849                    if (l & 7) bits <<= 1;
2850                    else {
2851                        PUSH_BYTE(utf8, cur, bits);
2852                        bits = 0;
2853                    }
2854                }
2855            else
2856                /* datumtype == 'b' */
2857                while (l++ < len) {
2858                    if (utf8_source) {
2859                        UV val = 0;
2860                        NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2861                        if (val & 1) bits |= 0x80;
2862                    } else if (*str++ & 1)
2863                        bits |= 0x80;
2864                    if (l & 7) bits >>= 1;
2865                    else {
2866                        PUSH_BYTE(utf8, cur, bits);
2867                        bits = 0;
2868                    }
2869                }
2870            l--;
2871            if (l & 7) {
2872                if (datumtype == 'B')
2873                    bits <<= 7 - (l & 7);
2874                else
2875                    bits >>= 7 - (l & 7);
2876                PUSH_BYTE(utf8, cur, bits);
2877                l += 7;
2878            }
2879            /* Determine how many chars are left in the requested field */
2880            l /= 8;
2881            if (howlen == e_star) field_len = 0;
2882            else field_len -= l;
2883            Zero(cur, field_len, char);
2884            cur += field_len;
2885            break;
2886        }
2887        case 'H':
2888        case 'h': {
2889            const char *str, *end;
2890            I32 l, field_len;
2891            U8 bits;
2892            bool utf8_source;
2893            U32 utf8_flags;
2894
2895            fromstr = NEXTFROM;
2896            str = SvPV_const(fromstr, fromlen);
2897            end = str + fromlen;
2898            if (DO_UTF8(fromstr)) {
2899                utf8_source = TRUE;
2900                utf8_flags  = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2901            } else {
2902                utf8_source = FALSE;
2903                utf8_flags  = 0; /* Unused, but keep compilers happy */
2904            }
2905            if (howlen == e_star) len = fromlen;
2906            field_len = (len+1)/2;
2907            GROWING(utf8, cat, start, cur, field_len);
2908            if (!utf8 && len > (I32)fromlen) len = fromlen;
2909            bits = 0;
2910            l = 0;
2911            if (datumtype == 'H')
2912                while (l++ < len) {
2913                    if (utf8_source) {
2914                        UV val = 0;
2915                        NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2916                        if (val < 256 && isALPHA(val))
2917                            bits |= (val + 9) & 0xf;
2918                        else
2919                            bits |= val & 0xf;
2920                    } else if (isALPHA(*str))
2921                        bits |= (*str++ + 9) & 0xf;
2922                    else
2923                        bits |= *str++ & 0xf;
2924                    if (l & 1) bits <<= 4;
2925                    else {
2926                        PUSH_BYTE(utf8, cur, bits);
2927                        bits = 0;
2928                    }
2929                }
2930            else
2931                while (l++ < len) {
2932                    if (utf8_source) {
2933                        UV val = 0;
2934                        NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2935                        if (val < 256 && isALPHA(val))
2936                            bits |= ((val + 9) & 0xf) << 4;
2937                        else
2938                            bits |= (val & 0xf) << 4;
2939                    } else if (isALPHA(*str))
2940                        bits |= ((*str++ + 9) & 0xf) << 4;
2941                    else
2942                        bits |= (*str++ & 0xf) << 4;
2943                    if (l & 1) bits >>= 4;
2944                    else {
2945                        PUSH_BYTE(utf8, cur, bits);
2946                        bits = 0;
2947                    }
2948                }
2949            l--;
2950            if (l & 1) {
2951                PUSH_BYTE(utf8, cur, bits);
2952                l++;
2953            }
2954            /* Determine how many chars are left in the requested field */
2955            l /= 2;
2956            if (howlen == e_star) field_len = 0;
2957            else field_len -= l;
2958            Zero(cur, field_len, char);
2959            cur += field_len;
2960            break;
2961        }
2962        case 'c':
2963            while (len-- > 0) {
2964                IV aiv;
2965                fromstr = NEXTFROM;
2966                aiv = SvIV(fromstr);
2967                if ((-128 > aiv || aiv > 127) &&
2968                    ckWARN(WARN_PACK))
2969                    Perl_warner(aTHX_ packWARN(WARN_PACK),
2970                                "Character in 'c' format wrapped in pack");
2971                PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2972            }
2973            break;
2974        case 'C':
2975            if (len == 0) {
2976                utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2977                break;
2978            }
2979            while (len-- > 0) {
2980                IV aiv;
2981                fromstr = NEXTFROM;
2982                aiv = SvIV(fromstr);
2983                if ((0 > aiv || aiv > 0xff) &&
2984                    ckWARN(WARN_PACK))
2985                    Perl_warner(aTHX_ packWARN(WARN_PACK),
2986                                "Character in 'C' format wrapped in pack");
2987                PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2988            }
2989            break;
2990        case 'W': {
2991            char *end;
2992            U8 in_bytes = (U8)IN_BYTES;
2993
2994            end = start+SvLEN(cat)-1;
2995            if (utf8) end -= UTF8_MAXLEN-1;
2996            while (len-- > 0) {
2997                UV auv;
2998                fromstr = NEXTFROM;
2999                auv = SvUV(fromstr);
3000                if (in_bytes) auv = auv % 0x100;
3001                if (utf8) {
3002                  W_utf8:
3003                    if (cur > end) {
3004                        *cur = '\0';
3005                        SvCUR_set(cat, cur - start);
3006
3007                        GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3008                        end = start+SvLEN(cat)-UTF8_MAXLEN;
3009                    }
3010                    cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3011                                                       NATIVE_TO_UNI(auv),
3012                                                       warn_utf8 ?
3013                                                       0 : UNICODE_ALLOW_ANY);
3014                } else {
3015                    if (auv >= 0x100) {
3016                        if (!SvUTF8(cat)) {
3017                            *cur = '\0';
3018                            SvCUR_set(cat, cur - start);
3019                            marked_upgrade(aTHX_ cat, symptr);
3020                            lookahead.flags |= FLAG_DO_UTF8;
3021                            lookahead.strbeg = symptr->strbeg;
3022                            utf8 = 1;
3023                            start = SvPVX(cat);
3024                            cur = start + SvCUR(cat);
3025                            end = start+SvLEN(cat)-UTF8_MAXLEN;
3026                            goto W_utf8;
3027                        }
3028                        if (ckWARN(WARN_PACK))
3029                            Perl_warner(aTHX_ packWARN(WARN_PACK),
3030                                        "Character in 'W' format wrapped in pack");
3031                        auv &= 0xff;
3032                    }
3033                    if (cur >= end) {
3034                        *cur = '\0';
3035                        SvCUR_set(cat, cur - start);
3036                        GROWING(0, cat, start, cur, len+1);
3037                        end = start+SvLEN(cat)-1;
3038                    }
3039                    *(U8 *) cur++ = (U8)auv;
3040                }
3041            }
3042            break;
3043        }
3044        case 'U': {
3045            char *end;
3046
3047            if (len == 0) {
3048                if (!(symptr->flags & FLAG_DO_UTF8)) {
3049                    marked_upgrade(aTHX_ cat, symptr);
3050                    lookahead.flags |= FLAG_DO_UTF8;
3051                    lookahead.strbeg = symptr->strbeg;
3052                }
3053                utf8 = 0;
3054                goto no_change;
3055            }
3056
3057            end = start+SvLEN(cat);
3058            if (!utf8) end -= UTF8_MAXLEN;
3059            while (len-- > 0) {
3060                UV auv;
3061                fromstr = NEXTFROM;
3062                auv = SvUV(fromstr);
3063                if (utf8) {
3064                    U8 buffer[UTF8_MAXLEN], *endb;
3065                    endb = uvuni_to_utf8_flags(buffer, auv,
3066                                               warn_utf8 ?
3067                                               0 : UNICODE_ALLOW_ANY);
3068                    if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3069                        *cur = '\0';
3070                        SvCUR_set(cat, cur - start);
3071                        GROWING(0, cat, start, cur,
3072                                len+(endb-buffer)*UTF8_EXPAND);
3073                        end = start+SvLEN(cat);
3074                    }
3075                    cur = bytes_to_uni(buffer, endb-buffer, cur);
3076                } else {
3077                    if (cur >= end) {
3078                        *cur = '\0';
3079                        SvCUR_set(cat, cur - start);
3080                        GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3081                        end = start+SvLEN(cat)-UTF8_MAXLEN;
3082                    }
3083                    cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3084                                                       warn_utf8 ?
3085                                                       0 : UNICODE_ALLOW_ANY);
3086                }
3087            }
3088            break;
3089        }
3090        /* Float and double added by gnb@melba.bby.oz.au  22/11/89 */
3091        case 'f':
3092            while (len-- > 0) {
3093                float afloat;
3094                NV anv;
3095                fromstr = NEXTFROM;
3096                anv = SvNV(fromstr);
3097#ifdef __VOS__
3098                /* VOS does not automatically map a floating-point overflow
3099                   during conversion from double to float into infinity, so we
3100                   do it by hand.  This code should either be generalized for
3101                   any OS that needs it, or removed if and when VOS implements
3102                   posix-976 (suggestion to support mapping to infinity).
3103                   Paul.Green@stratus.com 02-04-02.  */
3104{
3105extern const float _float_constants[];
3106                if (anv > FLT_MAX)
3107                    afloat = _float_constants[0];   /* single prec. inf. */
3108                else if (anv < -FLT_MAX)
3109                    afloat = _float_constants[0];   /* single prec. inf. */
3110                else afloat = (float) anv;
3111}
3112#else /* __VOS__ */
3113# if defined(VMS) && !defined(__IEEE_FP)
3114                /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3115                 * on Alpha; fake it if we don't have them.
3116                 */
3117                if (anv > FLT_MAX)
3118                    afloat = FLT_MAX;
3119                else if (anv < -FLT_MAX)
3120                    afloat = -FLT_MAX;
3121                else afloat = (float)anv;
3122# else
3123                afloat = (float)anv;
3124# endif
3125#endif /* __VOS__ */
3126                DO_BO_PACK_N(afloat, float);
3127                PUSH_VAR(utf8, cur, afloat);
3128            }
3129            break;
3130        case 'd':
3131            while (len-- > 0) {
3132                double adouble;
3133                NV anv;
3134                fromstr = NEXTFROM;
3135                anv = SvNV(fromstr);
3136#ifdef __VOS__
3137                /* VOS does not automatically map a floating-point overflow
3138                   during conversion from long double to double into infinity,
3139                   so we do it by hand.  This code should either be generalized
3140                   for any OS that needs it, or removed if and when VOS
3141                   implements posix-976 (suggestion to support mapping to
3142                   infinity).  Paul.Green@stratus.com 02-04-02.  */
3143{
3144extern const double _double_constants[];
3145                if (anv > DBL_MAX)
3146                    adouble = _double_constants[0];   /* double prec. inf. */
3147                else if (anv < -DBL_MAX)
3148                    adouble = _double_constants[0];   /* double prec. inf. */
3149                else adouble = (double) anv;
3150}
3151#else /* __VOS__ */
3152# if defined(VMS) && !defined(__IEEE_FP)
3153                /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3154                 * on Alpha; fake it if we don't have them.
3155                 */
3156                if (anv > DBL_MAX)
3157                    adouble = DBL_MAX;
3158                else if (anv < -DBL_MAX)
3159                    adouble = -DBL_MAX;
3160                else adouble = (double)anv;
3161# else
3162                adouble = (double)anv;
3163# endif
3164#endif /* __VOS__ */
3165                DO_BO_PACK_N(adouble, double);
3166                PUSH_VAR(utf8, cur, adouble);
3167            }
3168            break;
3169        case 'F': {
3170            NV anv;
3171            Zero(&anv, 1, NV); /* can be long double with unused bits */
3172            while (len-- > 0) {
3173                fromstr = NEXTFROM;
3174                anv = SvNV(fromstr);
3175                DO_BO_PACK_N(anv, NV);
3176                PUSH_VAR(utf8, cur, anv);
3177            }
3178            break;
3179        }
3180#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3181        case 'D': {
3182            long double aldouble;
3183            /* long doubles can have unused bits, which may be nonzero */
3184            Zero(&aldouble, 1, long double);
3185            while (len-- > 0) {
3186                fromstr = NEXTFROM;
3187                aldouble = (long double)SvNV(fromstr);
3188                DO_BO_PACK_N(aldouble, long double);
3189                PUSH_VAR(utf8, cur, aldouble);
3190            }
3191            break;
3192        }
3193#endif
3194#ifdef PERL_PACK_CAN_SHRIEKSIGN
3195        case 'n' | TYPE_IS_SHRIEKING:
3196#endif
3197        case 'n':
3198            while (len-- > 0) {
3199                I16 ai16;
3200                fromstr = NEXTFROM;
3201                ai16 = (I16)SvIV(fromstr);
3202#ifdef HAS_HTONS
3203                ai16 = PerlSock_htons(ai16);
3204#endif
3205                PUSH16(utf8, cur, &ai16);
3206            }
3207            break;
3208#ifdef PERL_PACK_CAN_SHRIEKSIGN
3209        case 'v' | TYPE_IS_SHRIEKING:
3210#endif
3211        case 'v':
3212            while (len-- > 0) {
3213                I16 ai16;
3214                fromstr = NEXTFROM;
3215                ai16 = (I16)SvIV(fromstr);
3216#ifdef HAS_HTOVS
3217                ai16 = htovs(ai16);
3218#endif
3219                PUSH16(utf8, cur, &ai16);
3220            }
3221            break;
3222        case 'S' | TYPE_IS_SHRIEKING:
3223#if SHORTSIZE != SIZE16
3224            while (len-- > 0) {
3225                unsigned short aushort;
3226                fromstr = NEXTFROM;
3227                aushort = SvUV(fromstr);
3228                DO_BO_PACK(aushort, s);
3229                PUSH_VAR(utf8, cur, aushort);
3230            }
3231            break;
3232#else
3233            /* Fall through! */
3234#endif
3235        case 'S':
3236            while (len-- > 0) {
3237                U16 au16;
3238                fromstr = NEXTFROM;
3239                au16 = (U16)SvUV(fromstr);
3240                DO_BO_PACK(au16, 16);
3241                PUSH16(utf8, cur, &au16);
3242            }
3243            break;
3244        case 's' | TYPE_IS_SHRIEKING:
3245#if SHORTSIZE != SIZE16
3246            while (len-- > 0) {
3247                short ashort;
3248                fromstr = NEXTFROM;
3249                ashort = SvIV(fromstr);
3250                DO_BO_PACK(ashort, s);
3251                PUSH_VAR(utf8, cur, ashort);
3252            }
3253            break;
3254#else
3255            /* Fall through! */
3256#endif
3257        case 's':
3258            while (len-- > 0) {
3259                I16 ai16;
3260                fromstr = NEXTFROM;
3261                ai16 = (I16)SvIV(fromstr);
3262                DO_BO_PACK(ai16, 16);
3263                PUSH16(utf8, cur, &ai16);
3264            }
3265            break;
3266        case 'I':
3267        case 'I' | TYPE_IS_SHRIEKING:
3268            while (len-- > 0) {
3269                unsigned int auint;
3270                fromstr = NEXTFROM;
3271                auint = SvUV(fromstr);
3272                DO_BO_PACK(auint, i);
3273                PUSH_VAR(utf8, cur, auint);
3274            }
3275            break;
3276        case 'j':
3277            while (len-- > 0) {
3278                IV aiv;
3279                fromstr = NEXTFROM;
3280                aiv = SvIV(fromstr);
3281#if IVSIZE == INTSIZE
3282                DO_BO_PACK(aiv, i);
3283#elif IVSIZE == LONGSIZE
3284                DO_BO_PACK(aiv, l);
3285#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3286                DO_BO_PACK(aiv, 64);
3287#else
3288                Perl_croak(aTHX_ "'j' not supported on this platform");
3289#endif
3290                PUSH_VAR(utf8, cur, aiv);
3291            }
3292            break;
3293        case 'J':
3294            while (len-- > 0) {
3295                UV auv;
3296                fromstr = NEXTFROM;
3297                auv = SvUV(fromstr);
3298#if UVSIZE == INTSIZE
3299                DO_BO_PACK(auv, i);
3300#elif UVSIZE == LONGSIZE
3301                DO_BO_PACK(auv, l);
3302#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3303                DO_BO_PACK(auv, 64);
3304#else
3305                Perl_croak(aTHX_ "'J' not supported on this platform");
3306#endif
3307                PUSH_VAR(utf8, cur, auv);
3308            }
3309            break;
3310        case 'w':
3311            while (len-- > 0) {
3312                NV anv;
3313                fromstr = NEXTFROM;
3314                anv = SvNV(fromstr);
3315
3316                if (anv < 0) {
3317                    *cur = '\0';
3318                    SvCUR_set(cat, cur - start);
3319                    Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3320                }
3321
3322                /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3323                   which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3324                   any negative IVs will have already been got by the croak()
3325                   above. IOK is untrue for fractions, so we test them
3326                   against UV_MAX_P1.  */
3327                if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3328                    char   buf[(sizeof(UV)*CHAR_BIT)/7+1];
3329                    char  *in = buf + sizeof(buf);
3330                    UV     auv = SvUV(fromstr);
3331
3332                    do {
3333                        *--in = (char)((auv & 0x7f) | 0x80);
3334                        auv >>= 7;
3335                    } while (auv);
3336                    buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3337                    PUSH_GROWING_BYTES(utf8, cat, start, cur,
3338                                       in, (buf + sizeof(buf)) - in);
3339                } else if (SvPOKp(fromstr))
3340                    goto w_string;
3341                else if (SvNOKp(fromstr)) {
3342                    /* 10**NV_MAX_10_EXP is the largest power of 10
3343                       so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3344                       given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3345                       x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3346                       And with that many bytes only Inf can overflow.
3347                       Some C compilers are strict about integral constant
3348                       expressions so we conservatively divide by a slightly
3349                       smaller integer instead of multiplying by the exact
3350                       floating-point value.
3351                    */
3352#ifdef NV_MAX_10_EXP
3353                    /* char   buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3354                    char   buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3355#else
3356                    /* char   buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3357                    char   buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3358#endif
3359                    char  *in = buf + sizeof(buf);
3360
3361                    anv = Perl_floor(anv);
3362                    do {
3363                        const NV next = Perl_floor(anv / 128);
3364                        if (in <= buf)  /* this cannot happen ;-) */
3365                            Perl_croak(aTHX_ "Cannot compress integer in pack");
3366                        *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3367                        anv = next;
3368                    } while (anv > 0);
3369                    buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3370                    PUSH_GROWING_BYTES(utf8, cat, start, cur,
3371                                       in, (buf + sizeof(buf)) - in);
3372                } else {
3373                    const char     *from;
3374                    char           *result, *in;
3375                    SV             *norm;
3376                    STRLEN          len;
3377                    bool            done;
3378
3379                  w_string:
3380                    /* Copy string and check for compliance */
3381                    from = SvPV_const(fromstr, len);
3382                    if ((norm = is_an_int(from, len)) == NULL)
3383                        Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3384
3385                    Newx(result, len, char);
3386                    in = result + len;
3387                    done = FALSE;
3388                    while (!done) *--in = div128(norm, &done) | 0x80;
3389                    result[len - 1] &= 0x7F; /* clear continue bit */
3390                    PUSH_GROWING_BYTES(utf8, cat, start, cur,
3391                                       in, (result + len) - in);
3392                    Safefree(result);
3393                    SvREFCNT_dec(norm); /* free norm */
3394                }
3395            }
3396            break;
3397        case 'i':
3398        case 'i' | TYPE_IS_SHRIEKING:
3399            while (len-- > 0) {
3400                int aint;
3401                fromstr = NEXTFROM;
3402                aint = SvIV(fromstr);
3403                DO_BO_PACK(aint, i);
3404                PUSH_VAR(utf8, cur, aint);
3405            }
3406            break;
3407#ifdef PERL_PACK_CAN_SHRIEKSIGN
3408        case 'N' | TYPE_IS_SHRIEKING:
3409#endif
3410        case 'N':
3411            while (len-- > 0) {
3412                U32 au32;
3413                fromstr = NEXTFROM;
3414                au32 = SvUV(fromstr);
3415#ifdef HAS_HTONL
3416                au32 = PerlSock_htonl(au32);
3417#endif
3418                PUSH32(utf8, cur, &au32);
3419            }
3420            break;
3421#ifdef PERL_PACK_CAN_SHRIEKSIGN
3422        case 'V' | TYPE_IS_SHRIEKING:
3423#endif
3424        case 'V':
3425            while (len-- > 0) {
3426                U32 au32;
3427                fromstr = NEXTFROM;
3428                au32 = SvUV(fromstr);
3429#ifdef HAS_HTOVL
3430                au32 = htovl(au32);
3431#endif
3432                PUSH32(utf8, cur, &au32);
3433            }
3434            break;
3435        case 'L' | TYPE_IS_SHRIEKING:
3436#if LONGSIZE != SIZE32
3437            while (len-- > 0) {
3438                unsigned long aulong;
3439                fromstr = NEXTFROM;
3440                aulong = SvUV(fromstr);
3441                DO_BO_PACK(aulong, l);
3442                PUSH_VAR(utf8, cur, aulong);
3443            }
3444            break;
3445#else
3446            /* Fall though! */
3447#endif
3448        case 'L':
3449            while (len-- > 0) {
3450                U32 au32;
3451                fromstr = NEXTFROM;
3452                au32 = SvUV(fromstr);
3453                DO_BO_PACK(au32, 32);
3454                PUSH32(utf8, cur, &au32);
3455            }
3456            break;
3457        case 'l' | TYPE_IS_SHRIEKING:
3458#if LONGSIZE != SIZE32
3459            while (len-- > 0) {
3460                long along;
3461                fromstr = NEXTFROM;
3462                along = SvIV(fromstr);
3463                DO_BO_PACK(along, l);
3464                PUSH_VAR(utf8, cur, along);
3465            }
3466            break;
3467#else
3468            /* Fall though! */
3469#endif
3470        case 'l':
3471            while (len-- > 0) {
3472                I32 ai32;
3473                fromstr = NEXTFROM;
3474                ai32 = SvIV(fromstr);
3475                DO_BO_PACK(ai32, 32);
3476                PUSH32(utf8, cur, &ai32);
3477            }
3478            break;
3479#ifdef HAS_QUAD
3480        case 'Q':
3481            while (len-- > 0) {
3482                Uquad_t auquad;
3483                fromstr = NEXTFROM;
3484                auquad = (Uquad_t) SvUV(fromstr);
3485                DO_BO_PACK(auquad, 64);
3486                PUSH_VAR(utf8, cur, auquad);
3487            }
3488            break;
3489        case 'q':
3490            while (len-- > 0) {
3491                Quad_t aquad;
3492                fromstr = NEXTFROM;
3493                aquad = (Quad_t)SvIV(fromstr);
3494                DO_BO_PACK(aquad, 64);
3495                PUSH_VAR(utf8, cur, aquad);
3496            }
3497            break;
3498#endif /* HAS_QUAD */
3499        case 'P':
3500            len = 1;            /* assume SV is correct length */
3501            GROWING(utf8, cat, start, cur, sizeof(char *));
3502            /* Fall through! */
3503        case 'p':
3504            while (len-- > 0) {
3505                const char *aptr;
3506
3507                fromstr = NEXTFROM;
3508                SvGETMAGIC(fromstr);
3509                if (!SvOK(fromstr)) aptr = NULL;
3510                else {
3511                    /* XXX better yet, could spirit away the string to
3512                     * a safe spot and hang on to it until the result
3513                     * of pack() (and all copies of the result) are
3514                     * gone.
3515                     */
3516                    if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3517                             !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
3518                        Perl_warner(aTHX_ packWARN(WARN_PACK),
3519                                    "Attempt to pack pointer to temporary value");
3520                    }
3521                    if (SvPOK(fromstr) || SvNIOK(fromstr))
3522                        aptr = SvPV_nomg_const_nolen(fromstr);
3523                    else
3524                        aptr = SvPV_force_flags_nolen(fromstr, 0);
3525                }
3526                DO_BO_PACK_PC(aptr);
3527                PUSH_VAR(utf8, cur, aptr);
3528            }
3529            break;
3530        case 'u': {
3531            const char *aptr, *aend;
3532            bool from_utf8;
3533
3534            fromstr = NEXTFROM;
3535            if (len <= 2) len = 45;
3536            else len = len / 3 * 3;
3537            if (len >= 64) {
3538                if (ckWARN(WARN_PACK))
3539                    Perl_warner(aTHX_ packWARN(WARN_PACK),
3540                            "Field too wide in 'u' format in pack");
3541                len = 63;
3542            }
3543            aptr = SvPV_const(fromstr, fromlen);
3544            from_utf8 = DO_UTF8(fromstr);
3545            if (from_utf8) {
3546                aend = aptr + fromlen;
3547                fromlen = sv_len_utf8(fromstr);
3548            } else aend = NULL; /* Unused, but keep compilers happy */
3549            GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3550            while (fromlen > 0) {
3551                U8 *end;
3552                I32 todo;
3553                U8 hunk[1+63/3*4+1];
3554
3555                if ((I32)fromlen > len)
3556                    todo = len;
3557                else
3558                    todo = fromlen;
3559                if (from_utf8) {
3560                    char buffer[64];
3561                    if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3562                                      'u' | TYPE_IS_PACK)) {
3563                        *cur = '\0';
3564                        SvCUR_set(cat, cur - start);
3565                        Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3566                    }
3567                    end = doencodes(hunk, buffer, todo);
3568                } else {
3569                    end = doencodes(hunk, aptr, todo);
3570                    aptr += todo;
3571                }
3572                PUSH_BYTES(utf8, cur, hunk, end-hunk);
3573                fromlen -= todo;
3574            }
3575            break;
3576        }
3577        }
3578        *cur = '\0';
3579        SvCUR_set(cat, cur - start);
3580      no_change:
3581        *symptr = lookahead;
3582    }
3583    return beglist;
3584}
3585#undef NEXTFROM
3586
3587
3588PP(pp_pack)
3589{
3590    dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3591    register SV *cat = TARG;
3592    STRLEN fromlen;
3593    SV *pat_sv = *++MARK;
3594    register const char *pat = SvPV_const(pat_sv, fromlen);
3595    register const char *patend = pat + fromlen;
3596
3597    MARK++;
3598    sv_setpvs(cat, "");
3599    SvUTF8_off(cat);
3600
3601    packlist(cat, pat, patend, MARK, SP + 1);
3602
3603    SvSETMAGIC(cat);
3604    SP = ORIGMARK;
3605    PUSHs(cat);
3606    RETURN;
3607}
3608
3609/*
3610 * Local variables:
3611 * c-indentation-style: bsd
3612 * c-basic-offset: 4
3613 * indent-tabs-mode: t
3614 * End:
3615 *
3616 * ex: set ts=8 sts=4 sw=4 noet:
3617 */
3618
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.