/* Output like sprintf to a buffer of specified size.
- Also takes args differently: pass one pointer to an array of strings
- in addition to the format string which is separate.
+ Also takes args differently: pass one pointer to the end
+ of the format string in addition to the format string itself.
Copyright (C) 1985, 2001-2011 Free Software Foundation, Inc.
This file is part of GNU Emacs.
You should have received a copy of the GNU General Public License
along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
+/* If you think about replacing this with some similar standard C function of
+ the printf family (such as vsnprintf), please note that this function
+ supports the following Emacs-specific features:
+
+ . For %c conversions, it produces a string with the multibyte representation
+ of the (`int') argument, suitable for display in an Emacs buffer.
+
+ . For %s and %c, when field width is specified (e.g., %25s), it accounts for
+ the diplay width of each character, according to char-width-table. That
+ is, it does not assume that each character takes one column on display.
+
+ . If the size of the buffer is not enough to produce the formatted string in
+ its entirety, it makes sure that truncation does not chop the last
+ character in the middle of its multibyte sequence, producing an invalid
+ sequence.
+
+ . It accepts a pointer to the end of the format string, so the format string
+ could include embedded null characters.
+
+ . It signals an error if the length of the formatted string is about to
+ overflow MOST_POSITIVE_FIXNUM, to avoid producing strings longer than what
+ Emacs can handle.
+
+ OTOH, this function supports only a small subset of the standard C formatted
+ output facilities. E.g., %u and %ll are not supported, and precision is
+ ignored %s and %c conversions. (See below for the detailed documentation of
+ what is supported.) However, this is okay, as this function is supposed to
+ be called from `error' and similar functions, and thus does not need to
+ support features beyond those in `Fformat', which is used by `error' on the
+ Lisp level. */
+
+/* This function supports the following %-sequences in the `format'
+ argument:
+
+ %s means print a string argument.
+ %S is silently treated as %s, for loose compatibility with `Fformat'.
+ %d means print a `signed int' argument in decimal.
+ %l means print a `long int' argument in decimal.
+ %o means print an `unsigned int' argument in octal.
+ %x means print an `unsigned int' argument in hex.
+ %e means print a `double' argument in exponential notation.
+ %f means print a `double' argument in decimal-point notation.
+ %g means print a `double' argument in exponential notation
+ or in decimal-point notation, whichever uses fewer characters.
+ %c means print a `signed int' argument as a single character.
+ %% means produce a literal % character.
+
+ A %-sequence may contain optional flag, width, and precision specifiers, as
+ follows:
+
+ %<flags><width><precision>character
+
+ where flags is [+ -0l], width is [0-9]+, and precision is .[0-9]+
+
+ The + flag character inserts a + before any positive number, while a space
+ inserts a space before any positive number; these flags only affect %d, %l,
+ %o, %x, %e, %f, and %g sequences. The - and 0 flags affect the width
+ specifier, as described below.
+
+ The l (lower-case letter ell) flag is a `long' data type modifier: it is
+ supported for %d, %o, and %x conversions of integral arguments, and means
+ that the respective argument is to be treated as `long int' or `unsigned
+ long int'. The EMACS_INT data type should use this modifier.
+
+ The width specifier supplies a lower limit for the length of the printed
+ representation. The padding, if any, normally goes on the left, but it goes
+ on the right if the - flag is present. The padding character is normally a
+ space, but (for numerical arguments only) it is 0 if the 0 flag is present.
+ The - flag takes precedence over the 0 flag.
+
+ For %e, %f, and %g sequences, the number after the "." in the precision
+ specifier says how many decimal places to show; if zero, the decimal point
+ itself is omitted. For %s and %S, the precision specifier is ignored. */
#include <config.h>
#include <stdio.h>
#include <unistd.h>
+#include <limits.h>
+#ifndef SIZE_MAX
+# define SIZE_MAX ((size_t) -1)
+#endif
+
#include "lisp.h"
/* Since we use the macro CHAR_HEAD_P, we have to include this, but
terminated at position FORMAT_END.
Output goes in BUFFER, which has room for BUFSIZE chars.
If the output does not fit, truncate it to fit.
- Returns the number of bytes stored into BUFFER.
- ARGS points to the vector of arguments, and NARGS says how many.
- A double counts as two arguments.
+ Returns the number of bytes stored into BUFFER, excluding
+ the terminating null byte. Output is always null-terminated.
String arguments are passed as C strings.
Integers are passed as C integers. */
-EMACS_INT
-doprnt (char *buffer, register int bufsize, const char *format,
+size_t
+doprnt (char *buffer, register size_t bufsize, const char *format,
const char *format_end, va_list ap)
{
const char *fmt = format; /* Pointer into format string */
char tembuf[DBL_MAX_10_EXP + 100];
/* Size of sprintf_buffer. */
- unsigned size_allocated = sizeof (tembuf);
+ size_t size_allocated = sizeof (tembuf);
/* Buffer to use for sprintf. Either tembuf or same as BIG_BUFFER. */
char *sprintf_buffer = tembuf;
/* Buffer we have got with malloc. */
- char *big_buffer = 0;
+ char *big_buffer = NULL;
- register int tem;
+ register size_t tem;
char *string;
char fixed_buffer[20]; /* Default buffer for small formatting. */
char *fmtcpy;
int minlen;
char charbuf[MAX_MULTIBYTE_LENGTH + 1]; /* Used for %c. */
+ USE_SAFE_ALLOCA;
if (format_end == 0)
format_end = format + strlen (format);
if ((format_end - format + 1) < sizeof (fixed_buffer))
fmtcpy = fixed_buffer;
else
- fmtcpy = (char *) alloca (format_end - format + 1);
+ SAFE_ALLOCA (fmtcpy, char *, format_end - format + 1);
bufsize--;
{
if (*fmt == '%') /* Check for a '%' character */
{
- unsigned size_bound = 0;
- EMACS_INT width; /* Columns occupied by STRING. */
+ size_t size_bound = 0;
+ EMACS_INT width; /* Columns occupied by STRING on display. */
+ int long_flag = 0;
fmt++;
/* Copy this one %-spec into fmtcpy. */
This might be a field width or a precision; e.g.
%1.1000f and %1000.1f both might need 1000+ bytes.
Parse the width or precision, checking for overflow. */
- unsigned n = *fmt - '0';
+ size_t n = *fmt - '0';
while ('0' <= fmt[1] && fmt[1] <= '9')
{
- if (n * 10 + fmt[1] - '0' < n)
+ if (n >= SIZE_MAX / 10
+ || n * 10 > SIZE_MAX - (fmt[1] - '0'))
error ("Format width or precision too large");
n = n * 10 + fmt[1] - '0';
*string++ = *++fmt;
}
else if (*fmt == '-' || *fmt == ' ' || *fmt == '.' || *fmt == '+')
;
+ else if (*fmt == 'l')
+ {
+ long_flag = 1;
+ if (!strchr ("dox", fmt[1]))
+ /* %l as conversion specifier, not as modifier. */
+ break;
+ }
else
break;
fmt++;
/* Make the size bound large enough to handle floating point formats
with large numbers. */
- if (size_bound + DBL_MAX_10_EXP + 50 < size_bound)
+ if (size_bound > SIZE_MAX - DBL_MAX_10_EXP - 50)
error ("Format width or precision too large");
size_bound += DBL_MAX_10_EXP + 50;
error ("Invalid format operation %%%c", fmt[-1]);
/* case 'b': */
+ case 'l':
case 'd':
+ {
+ int i;
+ long l;
+
+ if (long_flag)
+ {
+ l = va_arg(ap, long);
+ sprintf (sprintf_buffer, fmtcpy, l);
+ }
+ else
+ {
+ i = va_arg(ap, int);
+ sprintf (sprintf_buffer, fmtcpy, i);
+ }
+ /* Now copy into final output, truncating as necessary. */
+ string = sprintf_buffer;
+ goto doit;
+ }
+
case 'o':
case 'x':
- if (sizeof (int) == sizeof (EMACS_INT))
- ;
- else if (sizeof (long) == sizeof (EMACS_INT))
- /* Insert an `l' the right place. */
- string[1] = string[0],
- string[0] = string[-1],
- string[-1] = 'l',
- string++;
- else
- abort ();
- sprintf (sprintf_buffer, fmtcpy, va_arg(ap, char *));
- /* Now copy into final output, truncating as nec. */
- string = sprintf_buffer;
- goto doit;
+ {
+ unsigned u;
+ unsigned long ul;
+
+ if (long_flag)
+ {
+ ul = va_arg(ap, unsigned long);
+ sprintf (sprintf_buffer, fmtcpy, ul);
+ }
+ else
+ {
+ u = va_arg(ap, unsigned);
+ sprintf (sprintf_buffer, fmtcpy, u);
+ }
+ /* Now copy into final output, truncating as necessary. */
+ string = sprintf_buffer;
+ goto doit;
+ }
case 'f':
case 'e':
{
double d = va_arg(ap, double);
sprintf (sprintf_buffer, fmtcpy, d);
- /* Now copy into final output, truncating as nec. */
+ /* Now copy into final output, truncating as necessary. */
string = sprintf_buffer;
goto doit;
}
minlen = atoi (&fmtcpy[1]);
string = va_arg (ap, char *);
tem = strlen (string);
+ if (tem > MOST_POSITIVE_FIXNUM)
+ error ("String for %%s or %%S format is too long");
width = strwidth (string, tem);
goto doit1;
/* Copy string into final output, truncating if no room. */
doit:
/* Coming here means STRING contains ASCII only. */
- width = tem = strlen (string);
+ tem = strlen (string);
+ if (tem > MOST_POSITIVE_FIXNUM)
+ error ("Format width or precision too large");
+ width = tem;
doit1:
/* We have already calculated:
TEM -- length of STRING,
case 'c':
{
- /* Sometimes for %c we pass a char, which would widen
- to int. Sometimes we pass XFASTINT() or XINT()
- values, which would be EMACS_INT. Let's hope that
- both are passed the same way, otherwise we'll need
- to rewrite callers. */
- EMACS_INT chr = va_arg(ap, EMACS_INT);
- tem = CHAR_STRING ((int) chr, (unsigned char *) charbuf);
+ int chr = va_arg(ap, int);
+ tem = CHAR_STRING (chr, (unsigned char *) charbuf);
string = charbuf;
string[tem] = 0;
width = strwidth (string, tem);
/* If we had to malloc something, free it. */
xfree (big_buffer);
- *bufptr = 0; /* Make sure our string end with a '\0' */
+ *bufptr = 0; /* Make sure our string ends with a '\0' */
+
+ SAFE_FREE ();
return bufptr - buffer;
}