code.delx.au - gnu-emacs/blob - src/doprnt.c

   1 /* Output like sprintf to a buffer of specified size.
   2    Also takes args differently: pass one pointer to the end
   3    of the format string in addition to the format string itself.
   4    Copyright (C) 1985, 2001-2011  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21 /* If you think about replacing this with some similar standard C function of
  22    the printf family (such as vsnprintf), please note that this function
  23    supports the following Emacs-specific features:
  24
  25    . For %c conversions, it produces a string with the multibyte representation
  26      of the (`int') argument, suitable for display in an Emacs buffer.
  27
  28    . For %s and %c, when field width is specified (e.g., %25s), it accounts for
  29      the diplay width of each character, according to char-width-table.  That
  30      is, it does not assume that each character takes one column on display.
  31
  32    . If the size of the buffer is not enough to produce the formatted string in
  33      its entirety, it makes sure that truncation does not chop the last
  34      character in the middle of its multibyte sequence, producing an invalid
  35      sequence.
  36
  37    . It accepts a pointer to the end of the format string, so the format string
  38      could include embedded null characters.
  39
  40    . It signals an error if the length of the formatted string is about to
  41      overflow MOST_POSITIVE_FIXNUM, to avoid producing strings longer than what
  42      Emacs can handle.
  43
  44    OTOH, this function supports only a small subset of the standard C formatted
  45    output facilities.  E.g., %u and %ll are not supported, and precision is
  46    ignored %s and %c conversions.  (See below for the detailed documentation of
  47    what is supported.)  However, this is okay, as this function is supposed to
  48    be called from `error' and similar functions, and thus does not need to
  49    support features beyond those in `Fformat', which is used by `error' on the
  50    Lisp level.  */
  51
  52 /* This function supports the following %-sequences in the `format'
  53    argument:
  54
  55    %s means print a string argument.
  56    %S is silently treated as %s, for loose compatibility with `Fformat'.
  57    %d means print a `signed int' argument in decimal.
  58    %o means print an `unsigned int' argument in octal.
  59    %x means print an `unsigned int' argument in hex.
  60    %e means print a `double' argument in exponential notation.
  61    %f means print a `double' argument in decimal-point notation.
  62    %g means print a `double' argument in exponential notation
  63       or in decimal-point notation, whichever uses fewer characters.
  64    %c means print a `signed int' argument as a single character.
  65    %% means produce a literal % character.
  66
  67    A %-sequence may contain optional flag, width, and precision specifiers, and
  68    a length modifier, as follows:
  69
  70      %<flags><width><precision><length>character
  71
  72    where flags is [+ -0], width is [0-9]+, precision is .[0-9]+, and length
  73    is empty or l or the value of the pI macro.  Also, %% in a format
  74    stands for a single % in the output.  A % that does not introduce a
  75    valid %-sequence causes undefined behavior.
  76
  77    The + flag character inserts a + before any positive number, while a space
  78    inserts a space before any positive number; these flags only affect %d, %o,
  79    %x, %e, %f, and %g sequences.  The - and 0 flags affect the width specifier,
  80    as described below.  For signed numerical arguments only, the ` ' (space)
  81    flag causes the result to be prefixed with a space character if it does not
  82    start with a sign (+ or -).
  83
  84    The l (lower-case letter ell) length modifier is a `long' data type
  85    modifier: it is supported for %d, %o, and %x conversions of integral
  86    arguments, must immediately precede the conversion specifier, and means that
  87    the respective argument is to be treated as `long int' or `unsigned long
  88    int'.  Similarly, the value of the pI macro means to use EMACS_INT or
  89    EMACS_UINT and the empty length modifier means `int' or `unsigned int'.
  90
  91    The width specifier supplies a lower limit for the length of the printed
  92    representation.  The padding, if any, normally goes on the left, but it goes
  93    on the right if the - flag is present.  The padding character is normally a
  94    space, but (for numerical arguments only) it is 0 if the 0 flag is present.
  95    The - flag takes precedence over the 0 flag.
  96
  97    For %e, %f, and %g sequences, the number after the "." in the precision
  98    specifier says how many decimal places to show; if zero, the decimal point
  99    itself is omitted.  For %s and %S, the precision specifier is ignored.  */
 100
 101 #include <config.h>
 102 #include <stdio.h>
 103 #include <ctype.h>
 104 #include <setjmp.h>
 105
 106 #ifdef STDC_HEADERS
 107 #include <float.h>
 108 #endif
 109
 110 #include <unistd.h>
 111
 112 #include <limits.h>
 113
 114 #include "lisp.h"
 115
 116 /* Since we use the macro CHAR_HEAD_P, we have to include this, but
 117    don't have to include others because CHAR_HEAD_P does not contains
 118    another macro.  */
 119 #include "character.h"
 120
 121 #ifndef SIZE_MAX
 122 # define SIZE_MAX ((size_t) -1)
 123 #endif
 124
 125 #ifndef DBL_MAX_10_EXP
 126 #define DBL_MAX_10_EXP 308 /* IEEE double */
 127 #endif
 128
 129 /* Generate output from a format-spec FORMAT,
 130    terminated at position FORMAT_END.
 131    (*FORMAT_END is not part of the format, but must exist and be readable.)
 132    Output goes in BUFFER, which has room for BUFSIZE chars.
 133    BUFSIZE must be positive.  If the output does not fit, truncate it
 134    to fit and return BUFSIZE - 1; if this truncates a multibyte
 135    sequence, store '\0' into the sequence's first byte.
 136    Returns the number of bytes stored into BUFFER, excluding
 137    the terminating null byte.  Output is always null-terminated.
 138    String arguments are passed as C strings.
 139    Integers are passed as C integers.  */
 140
 141 size_t
 142 doprnt (char *buffer, register size_t bufsize, const char *format,
 143         const char *format_end, va_list ap)
 144 {
 145   const char *fmt = format;     /* Pointer into format string */
 146   register char *bufptr = buffer; /* Pointer into output buffer.. */
 147
 148   /* Use this for sprintf unless we need something really big.  */
 149   char tembuf[DBL_MAX_10_EXP + 100];
 150
 151   /* Size of sprintf_buffer.  */
 152   size_t size_allocated = sizeof (tembuf);
 153
 154   /* Buffer to use for sprintf.  Either tembuf or same as BIG_BUFFER.  */
 155   char *sprintf_buffer = tembuf;
 156
 157   /* Buffer we have got with malloc.  */
 158   char *big_buffer = NULL;
 159
 160   register size_t tem;
 161   char *string;
 162   char fixed_buffer[20];        /* Default buffer for small formatting. */
 163   char *fmtcpy;
 164   int minlen;
 165   char charbuf[MAX_MULTIBYTE_LENGTH + 1];       /* Used for %c.  */
 166   USE_SAFE_ALLOCA;
 167
 168   if (format_end == 0)
 169     format_end = format + strlen (format);
 170
 171   if ((format_end - format + 1) < sizeof (fixed_buffer))
 172     fmtcpy = fixed_buffer;
 173   else
 174     SAFE_ALLOCA (fmtcpy, char *, format_end - format + 1);
 175
 176   bufsize--;
 177
 178   /* Loop until end of format string or buffer full. */
 179   while (fmt < format_end && bufsize > 0)
 180     {
 181       if (*fmt == '%')  /* Check for a '%' character */
 182         {
 183           size_t size_bound = 0;
 184           EMACS_INT width;  /* Columns occupied by STRING on display.  */
 185           int long_flag = 0;
 186           int pIlen = sizeof pI - 1;
 187
 188           fmt++;
 189           /* Copy this one %-spec into fmtcpy.  */
 190           string = fmtcpy;
 191           *string++ = '%';
 192           while (fmt < format_end)
 193             {
 194               *string++ = *fmt;
 195               if ('0' <= *fmt && *fmt <= '9')
 196                 {
 197                   /* Get an idea of how much space we might need.
 198                      This might be a field width or a precision; e.g.
 199                      %1.1000f and %1000.1f both might need 1000+ bytes.
 200                      Parse the width or precision, checking for overflow.  */
 201                   size_t n = *fmt - '0';
 202                   while (fmt + 1 < format_end
 203                          && '0' <= fmt[1] && fmt[1] <= '9')
 204                     {
 205                       /* Avoid size_t overflow.  Avoid int overflow too, as
 206                          many sprintfs mishandle widths greater than INT_MAX.
 207                          This test is simple but slightly conservative: e.g.,
 208                          (INT_MAX - INT_MAX % 10) is reported as an overflow
 209                          even when it's not.  */
 210                       if (n >= min (INT_MAX, SIZE_MAX) / 10)
 211                         error ("Format width or precision too large");
 212                       n = n * 10 + fmt[1] - '0';
 213                       *string++ = *++fmt;
 214                     }
 215
 216                   if (size_bound < n)
 217                     size_bound = n;
 218                 }
 219               else if (! (*fmt == '-' || *fmt == ' ' || *fmt == '.'
 220                           || *fmt == '+'))
 221                 break;
 222               fmt++;
 223             }
 224
 225           if (0 < pIlen && pIlen <= format_end - fmt
 226               && memcmp (fmt, pI, pIlen) == 0)
 227             {
 228               long_flag = 2;
 229               memcpy (string, fmt + 1, pIlen);
 230               string += pIlen;
 231               fmt += pIlen;
 232             }
 233           else if (fmt < format_end && *fmt == 'l')
 234             {
 235               long_flag = 1;
 236               *string++ = *++fmt;
 237             }
 238           *string = 0;
 239
 240           /* Make the size bound large enough to handle floating point formats
 241              with large numbers.  */
 242           if (size_bound > SIZE_MAX - DBL_MAX_10_EXP - 50)
 243             error ("Format width or precision too large");
 244           size_bound += DBL_MAX_10_EXP + 50;
 245
 246           /* Make sure we have that much.  */
 247           if (size_bound > size_allocated)
 248             {
 249               if (big_buffer)
 250                 xfree (big_buffer);
 251               big_buffer = (char *) xmalloc (size_bound);
 252               sprintf_buffer = big_buffer;
 253               size_allocated = size_bound;
 254             }
 255           minlen = 0;
 256           switch (*fmt++)
 257             {
 258             default:
 259               error ("Invalid format operation %s", fmtcpy);
 260
 261 /*          case 'b': */
 262             case 'l':
 263             case 'd':
 264               {
 265                 int i;
 266                 long l;
 267
 268                 if (1 < long_flag)
 269                   {
 270                     EMACS_INT ll = va_arg (ap, EMACS_INT);
 271                     sprintf (sprintf_buffer, fmtcpy, ll);
 272                   }
 273                 else if (long_flag)
 274                   {
 275                     l = va_arg(ap, long);
 276                     sprintf (sprintf_buffer, fmtcpy, l);
 277                   }
 278                 else
 279                   {
 280                     i = va_arg(ap, int);
 281                     sprintf (sprintf_buffer, fmtcpy, i);
 282                   }
 283                 /* Now copy into final output, truncating as necessary.  */
 284                 string = sprintf_buffer;
 285                 goto doit;
 286               }
 287
 288             case 'o':
 289             case 'x':
 290               {
 291                 unsigned u;
 292                 unsigned long ul;
 293
 294                 if (1 < long_flag)
 295                   {
 296                     EMACS_UINT ull = va_arg (ap, EMACS_UINT);
 297                     sprintf (sprintf_buffer, fmtcpy, ull);
 298                   }
 299                 else if (long_flag)
 300                   {
 301                     ul = va_arg(ap, unsigned long);
 302                     sprintf (sprintf_buffer, fmtcpy, ul);
 303                   }
 304                 else
 305                   {
 306                     u = va_arg(ap, unsigned);
 307                     sprintf (sprintf_buffer, fmtcpy, u);
 308                   }
 309                 /* Now copy into final output, truncating as necessary.  */
 310                 string = sprintf_buffer;
 311                 goto doit;
 312               }
 313
 314             case 'f':
 315             case 'e':
 316             case 'g':
 317               {
 318                 double d = va_arg(ap, double);
 319                 sprintf (sprintf_buffer, fmtcpy, d);
 320                 /* Now copy into final output, truncating as necessary.  */
 321                 string = sprintf_buffer;
 322                 goto doit;
 323               }
 324
 325             case 'S':
 326               string[-1] = 's';
 327             case 's':
 328               if (fmtcpy[1] != 's')
 329                 minlen = atoi (&fmtcpy[1]);
 330               string = va_arg (ap, char *);
 331               tem = strlen (string);
 332               if (STRING_BYTES_BOUND < tem)
 333                 error ("String for %%s or %%S format is too long");
 334               width = strwidth (string, tem);
 335               goto doit1;
 336
 337               /* Copy string into final output, truncating if no room.  */
 338             doit:
 339               /* Coming here means STRING contains ASCII only.  */
 340               tem = strlen (string);
 341               if (STRING_BYTES_BOUND < tem)
 342                 error ("Format width or precision too large");
 343               width = tem;
 344             doit1:
 345               /* We have already calculated:
 346                  TEM -- length of STRING,
 347                  WIDTH -- columns occupied by STRING when displayed, and
 348                  MINLEN -- minimum columns of the output.  */
 349               if (minlen > 0)
 350                 {
 351                   while (minlen > width && bufsize > 0)
 352                     {
 353                       *bufptr++ = ' ';
 354                       bufsize--;
 355                       minlen--;
 356                     }
 357                   minlen = 0;
 358                 }
 359               if (tem > bufsize)
 360                 {
 361                   /* Truncate the string at character boundary.  */
 362                   tem = bufsize;
 363                   while (!CHAR_HEAD_P (string[tem - 1])) tem--;
 364                   /* If the multibyte sequence of this character is
 365                      too long for the space we have left in the
 366                      buffer, truncate before it.  */
 367                   if (tem > 0
 368                       && BYTES_BY_CHAR_HEAD (string[tem - 1]) > bufsize)
 369                     tem--;
 370                   if (tem > 0)
 371                     memcpy (bufptr, string, tem);
 372                   bufptr[tem] = 0;
 373                   /* Trigger exit from the loop, but make sure we
 374                      return to the caller a value which will indicate
 375                      that the buffer was too small.  */
 376                   bufptr += bufsize;
 377                   bufsize = 0;
 378                   continue;
 379                 }
 380               else
 381                 memcpy (bufptr, string, tem);
 382               bufptr += tem;
 383               bufsize -= tem;
 384               if (minlen < 0)
 385                 {
 386                   while (minlen < - width && bufsize > 0)
 387                     {
 388                       *bufptr++ = ' ';
 389                       bufsize--;
 390                       minlen++;
 391                     }
 392                   minlen = 0;
 393                 }
 394               continue;
 395
 396             case 'c':
 397               {
 398                 int chr = va_arg(ap, int);
 399                 tem = CHAR_STRING (chr, (unsigned char *) charbuf);
 400                 string = charbuf;
 401                 string[tem] = 0;
 402                 width = strwidth (string, tem);
 403                 if (fmtcpy[1] != 'c')
 404                   minlen = atoi (&fmtcpy[1]);
 405                 goto doit1;
 406               }
 407
 408             case '%':
 409               fmt--;    /* Drop thru and this % will be treated as normal */
 410             }
 411         }
 412
 413       {
 414         /* Just some character; Copy it if the whole multi-byte form
 415            fit in the buffer.  */
 416         char *save_bufptr = bufptr;
 417
 418         do { *bufptr++ = *fmt++; }
 419         while (fmt < format_end && --bufsize > 0 && !CHAR_HEAD_P (*fmt));
 420         if (!CHAR_HEAD_P (*fmt))
 421           {
 422             /* Truncate, but return value that will signal to caller
 423                that the buffer was too small.  */
 424             *save_bufptr = 0;
 425             break;
 426           }
 427       }
 428     };
 429
 430   /* If we had to malloc something, free it.  */
 431   xfree (big_buffer);
 432
 433   *bufptr = 0;          /* Make sure our string ends with a '\0' */
 434
 435   SAFE_FREE ();
 436   return bufptr - buffer;
 437 }