darwin-xnu/libkern/stdio/scanf.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
   3 *
   4 * @APPLE_LICENSE_HEADER_START@
   5 * 
   6 * This file contains Original Code and/or Modifications of Original Code
   7 * as defined in and that are subject to the Apple Public Source License
   8 * Version 2.0 (the 'License'). You may not use this file except in
   9 * compliance with the License. Please obtain a copy of the License at
  10 * http://www.opensource.apple.com/apsl/ and read it before using this
  11 * file.
  12 * 
  13 * The Original Code and all software distributed under the License are
  14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18 * Please see the License for the specific language governing rights and
  19 * limitations under the License.
  20 * 
  21 * @APPLE_LICENSE_HEADER_END@
  22 */
  23/*-
  24 * Copyright (c) 1990, 1993
  25 *      The Regents of the University of California.  All rights reserved.
  26 *
  27 * This code is derived from software contributed to Berkeley by
  28 * Chris Torek.
  29 *
  30 * Redistribution and use in source and binary forms, with or without
  31 * modification, are permitted provided that the following conditions
  32 * are met:
  33 * 1. Redistributions of source code must retain the above copyright
  34 *    notice, this list of conditions and the following disclaimer.
  35 * 2. Redistributions in binary form must reproduce the above copyright
  36 *    notice, this list of conditions and the following disclaimer in the
  37 *    documentation and/or other materials provided with the distribution.
  38 * 3. All advertising materials mentioning features or use of this software
  39 *    must display the following acknowledgement:
  40 *      This product includes software developed by the University of
  41 *      California, Berkeley and its contributors.
  42 * 4. Neither the name of the University nor the names of its contributors
  43 *    may be used to endorse or promote products derived from this software
  44 *    without specific prior written permission.
  45 *
  46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  49 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  56 * SUCH DAMAGE.
  57 */
  58
  59#include <sys/cdefs.h>
  60
  61#if 0 /* XXX coming soon */
  62#include <ctype.h>
  63#else
  64static inline int
  65isspace(char c)
  66{
  67        return (c == ' ' || c == '\t' || c == '\n' || c == '\12');
  68}
  69#endif
  70#include <stdarg.h>
  71#include <string.h>
  72#include <sys/param.h>
  73#include <sys/systm.h>
  74
  75#define BUF             32      /* Maximum length of numeric string. */
  76
  77/*
  78 * Flags used during conversion.
  79 */
  80#define LONG            0x01    /* l: long or double */
  81#define SHORT           0x04    /* h: short */
  82#define SUPPRESS        0x08    /* *: suppress assignment */
  83#define POINTER         0x10    /* p: void * (as hex) */
  84#define NOSKIP          0x20    /* [ or c: do not skip blanks */
  85#define LONGLONG        0x400   /* ll: long long (+ deprecated q: quad) */
  86#define SHORTSHORT      0x4000  /* hh: char */
  87#define UNSIGNED        0x8000  /* %[oupxX] conversions */
  88
  89/*
  90 * The following are used in numeric conversions only:
  91 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
  92 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
  93 */
  94#define SIGNOK          0x40    /* +/- is (still) legal */
  95#define NDIGITS         0x80    /* no digits detected */
  96
  97#define DPTOK           0x100   /* (float) decimal point is still legal */
  98#define EXPOK           0x200   /* (float) exponent (e+3, etc) still legal */
  99
 100#define PFXOK           0x100   /* 0x prefix is (still) legal */
 101#define NZDIGITS        0x200   /* no zero digits detected */
 102
 103/*
 104 * Conversion types.
 105 */
 106#define CT_CHAR         0       /* %c conversion */
 107#define CT_CCL          1       /* %[...] conversion */
 108#define CT_STRING       2       /* %s conversion */
 109#define CT_INT          3       /* %[dioupxX] conversion */
 110
 111static const u_char *__sccl(char *, const u_char *);
 112
 113int
 114sscanf(const char *ibuf, const char *fmt, ...)
 115{
 116        va_list ap;
 117        int ret;
 118        
 119        va_start(ap, fmt);
 120        ret = vsscanf(ibuf, fmt, ap);
 121        va_end(ap);
 122        return(ret);
 123}
 124
 125int
 126vsscanf(const char *inp, char const *fmt0, va_list ap)
 127{
 128        int inr;
 129        const u_char *fmt = (const u_char *)fmt0;
 130        int c;                  /* character from format, or conversion */
 131        size_t width;           /* field width, or 0 */
 132        char *p;                /* points into all kinds of strings */
 133        int n;                  /* handy integer */
 134        int flags;              /* flags as defined above */
 135        char *p0;               /* saves original value of p when necessary */
 136        int nassigned;          /* number of fields assigned */
 137        int nconversions;       /* number of conversions */
 138        int nread;              /* number of characters consumed from fp */
 139        int base;               /* base argument to conversion function */
 140        char ccltab[256];       /* character class table for %[...] */
 141        char buf[BUF];          /* buffer for numeric conversions */
 142
 143        /* `basefix' is used to avoid `if' tests in the integer scanner */
 144        static short basefix[17] =
 145                { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
 146
 147        inr = strlen(inp);
 148        
 149        nassigned = 0;
 150        nconversions = 0;
 151        nread = 0;
 152        base = 0;               /* XXX just to keep gcc happy */
 153        for (;;) {
 154                c = *fmt++;
 155                if (c == 0)
 156                        return (nassigned);
 157                if (isspace(c)) {
 158                        while (inr > 0 && isspace(*inp))
 159                                nread++, inr--, inp++;
 160                        continue;
 161                }
 162                if (c != '%')
 163                        goto literal;
 164                width = 0;
 165                flags = 0;
 166                /*
 167                 * switch on the format.  continue if done;
 168                 * break once format type is derived.
 169                 */
 170again:          c = *fmt++;
 171                switch (c) {
 172                case '%':
 173literal:
 174                        if (inr <= 0)
 175                                goto input_failure;
 176                        if (*inp != c)
 177                                goto match_failure;
 178                        inr--, inp++;
 179                        nread++;
 180                        continue;
 181
 182                case '*':
 183                        flags |= SUPPRESS;
 184                        goto again;
 185                case 'l':
 186                        if (flags & LONG) {
 187                                flags &= ~LONG;
 188                                flags |= LONGLONG;
 189                        } else
 190                                flags |= LONG;
 191                        goto again;
 192                case 'q':
 193                        flags |= LONGLONG;      /* not quite */
 194                        goto again;
 195                case 'h':
 196                        if (flags & SHORT) {
 197                                flags &= ~SHORT;
 198                                flags |= SHORTSHORT;
 199                        } else
 200                                flags |= SHORT;
 201                        goto again;
 202
 203                case '0': case '1': case '2': case '3': case '4':
 204                case '5': case '6': case '7': case '8': case '9':
 205                        width = width * 10 + c - '0';
 206                        goto again;
 207
 208                /*
 209                 * Conversions.
 210                 */
 211                case 'd':
 212                        c = CT_INT;
 213                        base = 10;
 214                        break;
 215
 216                case 'i':
 217                        c = CT_INT;
 218                        base = 0;
 219                        break;
 220
 221                case 'o':
 222                        c = CT_INT;
 223                        flags |= UNSIGNED;
 224                        base = 8;
 225                        break;
 226
 227                case 'u':
 228                        c = CT_INT;
 229                        flags |= UNSIGNED;
 230                        base = 10;
 231                        break;
 232
 233                case 'X':
 234                case 'x':
 235                        flags |= PFXOK; /* enable 0x prefixing */
 236                        c = CT_INT;
 237                        flags |= UNSIGNED;
 238                        base = 16;
 239                        break;
 240
 241                case 's':
 242                        c = CT_STRING;
 243                        break;
 244
 245                case '[':
 246                        fmt = __sccl(ccltab, fmt);
 247                        flags |= NOSKIP;
 248                        c = CT_CCL;
 249                        break;
 250
 251                case 'c':
 252                        flags |= NOSKIP;
 253                        c = CT_CHAR;
 254                        break;
 255
 256                case 'p':       /* pointer format is like hex */
 257                        flags |= POINTER | PFXOK;
 258                        c = CT_INT;
 259                        flags |= UNSIGNED;
 260                        base = 16;
 261                        break;
 262
 263                case 'n':
 264                        nconversions++;
 265                        if (flags & SUPPRESS)   /* ??? */
 266                                continue;
 267                        if (flags & SHORTSHORT)
 268                                *va_arg(ap, char *) = nread;
 269                        else if (flags & SHORT)
 270                                *va_arg(ap, short *) = nread;
 271                        else if (flags & LONG)
 272                                *va_arg(ap, long *) = nread;
 273                        else if (flags & LONGLONG)
 274                                *va_arg(ap, long long *) = nread;
 275                        else
 276                                *va_arg(ap, int *) = nread;
 277                        continue;
 278                }
 279
 280                /*
 281                 * We have a conversion that requires input.
 282                 */
 283                if (inr <= 0)
 284                        goto input_failure;
 285
 286                /*
 287                 * Consume leading white space, except for formats
 288                 * that suppress this.
 289                 */
 290                if ((flags & NOSKIP) == 0) {
 291                        while (isspace(*inp)) {
 292                                nread++;
 293                                if (--inr > 0)
 294                                        inp++;
 295                                else 
 296                                        goto input_failure;
 297                        }
 298                        /*
 299                         * Note that there is at least one character in
 300                         * the buffer, so conversions that do not set NOSKIP
 301                         * can no longer result in an input failure.
 302                         */
 303                }
 304
 305                /*
 306                 * Do the conversion.
 307                 */
 308                switch (c) {
 309
 310                case CT_CHAR:
 311                        /* scan arbitrary characters (sets NOSKIP) */
 312                        if (width == 0)
 313                                width = 1;
 314                        if (flags & SUPPRESS) {
 315                                size_t sum = 0;
 316                                for (;;) {
 317                                        if ((n = inr) < (int)width) {
 318                                                sum += n;
 319                                                width -= n;
 320                                                inp += n;
 321                                                if (sum == 0)
 322                                                        goto input_failure;
 323                                                break;
 324                                        } else {
 325                                                sum += width;
 326                                                inr -= width;
 327                                                inp += width;
 328                                                break;
 329                                        }
 330                                }
 331                                nread += sum;
 332                        } else {
 333                                bcopy(inp, va_arg(ap, char *), width);
 334                                inr -= width;
 335                                inp += width;
 336                                nread += width;
 337                                nassigned++;
 338                        }
 339                        nconversions++;
 340                        break;
 341
 342                case CT_CCL:
 343                        /* scan a (nonempty) character class (sets NOSKIP) */
 344                        if (width == 0)
 345                                width = (size_t)~0;     /* `infinity' */
 346                        /* take only those things in the class */
 347                        if (flags & SUPPRESS) {
 348                                n = 0;
 349                                while (ccltab[(unsigned char)*inp]) {
 350                                        n++, inr--, inp++;
 351                                        if (--width == 0)
 352                                                break;
 353                                        if (inr <= 0) {
 354                                                if (n == 0)
 355                                                        goto input_failure;
 356                                                break;
 357                                        }
 358                                }
 359                                if (n == 0)
 360                                        goto match_failure;
 361                        } else {
 362                                p0 = p = va_arg(ap, char *);
 363                                while (ccltab[(unsigned char)*inp]) {
 364                                        inr--;
 365                                        *p++ = *inp++;
 366                                        if (--width == 0)
 367                                                break;
 368                                        if (inr <= 0) {
 369                                                if (p == p0)
 370                                                        goto input_failure;
 371                                                break;
 372                                        }
 373                                }
 374                                n = p - p0;
 375                                if (n == 0)
 376                                        goto match_failure;
 377                                *p = 0;
 378                                nassigned++;
 379                        }
 380                        nread += n;
 381                        nconversions++;
 382                        break;
 383
 384                case CT_STRING:
 385                        /* like CCL, but zero-length string OK, & no NOSKIP */
 386                        if (width == 0)
 387                                width = (size_t)~0;
 388                        if (flags & SUPPRESS) {
 389                                n = 0;
 390                                while (!isspace(*inp)) {
 391                                        n++, inr--, inp++;
 392                                        if (--width == 0)
 393                                                break;
 394                                        if (inr <= 0)
 395                                                break;
 396                                }
 397                                nread += n;
 398                        } else {
 399                                p0 = p = va_arg(ap, char *);
 400                                while (!isspace(*inp)) {
 401                                        inr--;
 402                                        *p++ = *inp++;
 403                                        if (--width == 0)
 404                                                break;
 405                                        if (inr <= 0)
 406                                                break;
 407                                }
 408                                *p = 0;
 409                                nread += p - p0;
 410                                nassigned++;
 411                        }
 412                        nconversions++;
 413                        continue;
 414
 415                case CT_INT:
 416                        /* scan an integer as if by the conversion function */
 417#ifdef hardway
 418                        if (width == 0 || width > sizeof(buf) - 1)
 419                                width = sizeof(buf) - 1;
 420#else
 421                        /* size_t is unsigned, hence this optimisation */
 422                        if (--width > sizeof(buf) - 2)
 423                                width = sizeof(buf) - 2;
 424                        width++;
 425#endif
 426                        flags |= SIGNOK | NDIGITS | NZDIGITS;
 427                        for (p = buf; width; width--) {
 428                                c = *inp;
 429                                /*
 430                                 * Switch on the character; `goto ok'
 431                                 * if we accept it as a part of number.
 432                                 */
 433                                switch (c) {
 434
 435                                /*
 436                                 * The digit 0 is always legal, but is
 437                                 * special.  For %i conversions, if no
 438                                 * digits (zero or nonzero) have been
 439                                 * scanned (only signs), we will have
 440                                 * base==0.  In that case, we should set
 441                                 * it to 8 and enable 0x prefixing.
 442                                 * Also, if we have not scanned zero digits
 443                                 * before this, do not turn off prefixing
 444                                 * (someone else will turn it off if we
 445                                 * have scanned any nonzero digits).
 446                                 */
 447                                case '0':
 448                                        if (base == 0) {
 449                                                base = 8;
 450                                                flags |= PFXOK;
 451                                        }
 452                                        if (flags & NZDIGITS)
 453                                            flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
 454                                        else
 455                                            flags &= ~(SIGNOK|PFXOK|NDIGITS);
 456                                        goto ok;
 457
 458                                /* 1 through 7 always legal */
 459                                case '1': case '2': case '3':
 460                                case '4': case '5': case '6': case '7':
 461                                        base = basefix[base];
 462                                        flags &= ~(SIGNOK | PFXOK | NDIGITS);
 463                                        goto ok;
 464
 465                                /* digits 8 and 9 ok iff decimal or hex */
 466                                case '8': case '9':
 467                                        base = basefix[base];
 468                                        if (base <= 8)
 469                                                break;  /* not legal here */
 470                                        flags &= ~(SIGNOK | PFXOK | NDIGITS);
 471                                        goto ok;
 472
 473                                /* letters ok iff hex */
 474                                case 'A': case 'B': case 'C':
 475                                case 'D': case 'E': case 'F':
 476                                case 'a': case 'b': case 'c':
 477                                case 'd': case 'e': case 'f':
 478                                        /* no need to fix base here */
 479                                        if (base <= 10)
 480                                                break;  /* not legal here */
 481                                        flags &= ~(SIGNOK | PFXOK | NDIGITS);
 482                                        goto ok;
 483
 484                                /* sign ok only as first character */
 485                                case '+': case '-':
 486                                        if (flags & SIGNOK) {
 487                                                flags &= ~SIGNOK;
 488                                                goto ok;
 489                                        }
 490                                        break;
 491
 492                                /* x ok iff flag still set & 2nd char */
 493                                case 'x': case 'X':
 494                                        if (flags & PFXOK && p == buf + 1) {
 495                                                base = 16;      /* if %i */
 496                                                flags &= ~PFXOK;
 497                                                goto ok;
 498                                        }
 499                                        break;
 500                                }
 501
 502                                /*
 503                                 * If we got here, c is not a legal character
 504                                 * for a number.  Stop accumulating digits.
 505                                 */
 506                                break;
 507                ok:
 508                                /*
 509                                 * c is legal: store it and look at the next.
 510                                 */
 511                                *p++ = c;
 512                                if (--inr > 0)
 513                                        inp++;
 514                                else 
 515                                        break;          /* end of input */
 516                        }
 517                        /*
 518                         * If we had only a sign, it is no good; push
 519                         * back the sign.  If the number ends in `x',
 520                         * it was [sign] '0' 'x', so push back the x
 521                         * and treat it as [sign] '0'.
 522                         */
 523                        if (flags & NDIGITS) {
 524                                if (p > buf) {
 525                                        inp--;
 526                                        inr++;
 527                                }
 528                                goto match_failure;
 529                        }
 530                        c = ((u_char *)p)[-1];
 531                        if (c == 'x' || c == 'X') {
 532                                --p;
 533                                inp--;
 534                                inr++;
 535                        }
 536                        if ((flags & SUPPRESS) == 0) {
 537                                u_quad_t res;
 538
 539                                *p = 0;
 540                                if ((flags & UNSIGNED) == 0)
 541                                    res = strtoq(buf, (char **)NULL, base);
 542                                else
 543                                    res = strtouq(buf, (char **)NULL, base);
 544                                if (flags & POINTER)
 545                                        *va_arg(ap, void **) =
 546                                                (void *)(uintptr_t)res;
 547                                else if (flags & SHORTSHORT)
 548                                        *va_arg(ap, char *) = res;
 549                                else if (flags & SHORT)
 550                                        *va_arg(ap, short *) = res;
 551                                else if (flags & LONG)
 552                                        *va_arg(ap, long *) = res;
 553                                else if (flags & LONGLONG)
 554                                        *va_arg(ap, long long *) = res;
 555                                else
 556                                        *va_arg(ap, int *) = res;
 557                                nassigned++;
 558                        }
 559                        nread += p - buf;
 560                        nconversions++;
 561                        break;
 562
 563                }
 564        }
 565input_failure:
 566        return (nconversions != 0 ? nassigned : -1);
 567match_failure:
 568        return (nassigned);
 569}
 570
 571/*
 572 * Fill in the given table from the scanset at the given format
 573 * (just after `[').  Return a pointer to the character past the
 574 * closing `]'.  The table has a 1 wherever characters should be
 575 * considered part of the scanset.
 576 */
 577static const u_char *
 578__sccl(char *tab, const u_char *fmt)
 579{
 580        int c, n, v;
 581
 582        /* first `clear' the whole table */
 583        c = *fmt++;             /* first char hat => negated scanset */
 584        if (c == '^') {
 585                v = 1;          /* default => accept */
 586                c = *fmt++;     /* get new first char */
 587        } else
 588                v = 0;          /* default => reject */
 589
 590        /* XXX: Will not work if sizeof(tab*) > sizeof(char) */
 591        (void) memset(tab, v, 256);
 592
 593        if (c == 0)
 594                return (fmt - 1);/* format ended before closing ] */
 595
 596        /*
 597         * Now set the entries corresponding to the actual scanset
 598         * to the opposite of the above.
 599         *
 600         * The first character may be ']' (or '-') without being special;
 601         * the last character may be '-'.
 602         */
 603        v = 1 - v;
 604        for (;;) {
 605                tab[c] = v;             /* take character c */
 606doswitch:
 607                n = *fmt++;             /* and examine the next */
 608                switch (n) {
 609
 610                case 0:                 /* format ended too soon */
 611                        return (fmt - 1);
 612
 613                case '-':
 614                        /*
 615                         * A scanset of the form
 616                         *      [01+-]
 617                         * is defined as `the digit 0, the digit 1,
 618                         * the character +, the character -', but
 619                         * the effect of a scanset such as
 620                         *      [a-zA-Z0-9]
 621                         * is implementation defined.  The V7 Unix
 622                         * scanf treats `a-z' as `the letters a through
 623                         * z', but treats `a-a' as `the letter a, the
 624                         * character -, and the letter a'.
 625                         *
 626                         * For compatibility, the `-' is not considerd
 627                         * to define a range if the character following
 628                         * it is either a close bracket (required by ANSI)
 629                         * or is not numerically greater than the character
 630                         * we just stored in the table (c).
 631                         */
 632                        n = *fmt;
 633                        if (n == ']' || n < c) {
 634                                c = '-';
 635                                break;  /* resume the for(;;) */
 636                        }
 637                        fmt++;
 638                        /* fill in the range */
 639                        do {
 640                            tab[++c] = v;
 641                        } while (c < n);
 642                        c = n;
 643                        /*
 644                         * Alas, the V7 Unix scanf also treats formats
 645                         * such as [a-c-e] as `the letters a through e'.
 646                         * This too is permitted by the standard....
 647                         */
 648                        goto doswitch;
 649                        break;
 650
 651                case ']':               /* end of scanset */
 652                        return (fmt);
 653
 654                default:                /* just another character */
 655                        c = n;
 656                        break;
 657                }
 658        }
 659        /* NOTREACHED */
 660}
 661
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.