static int read_buffer_size;
static char *read_buffer;
-/* Read multibyte form and return it as a character. C is a first
- byte of multibyte form, and rest of them are read from
- READCHARFUN. */
-
-static int
-read_multibyte (c, readcharfun)
- register int c;
- Lisp_Object readcharfun;
-{
- /* We need the actual character code of this multibyte
- characters. */
- unsigned char str[MAX_MULTIBYTE_LENGTH];
- int len = 0;
- int bytes;
-
- if (c < 0)
- return c;
-
- str[len++] = c;
- while ((c = READCHAR) >= 0xA0
- && len < MAX_MULTIBYTE_LENGTH)
- {
- str[len++] = c;
- readchar_count--;
- }
- UNREAD (c);
- if (UNIBYTE_STR_AS_MULTIBYTE_P (str, len, bytes))
- return STRING_CHAR (str, len);
- /* The byte sequence is not valid as multibyte. Unread all bytes
- but the first one, and return the first byte. */
- while (--len > 0)
- UNREAD (str[len]);
- return str[0];
-}
-
/* Read a \-escape sequence, assuming we already read the `\'.
- If the escape sequence forces unibyte, store 1 into *BYTEREP.
- If the escape sequence forces multibyte, store 2 into *BYTEREP.
- Otherwise store 0 into *BYTEREP. */
+ If the escape sequence forces unibyte, return eight-bit char. */
static int
-read_escape (readcharfun, stringp, byterep)
+read_escape (readcharfun, stringp)
Lisp_Object readcharfun;
int stringp;
- int *byterep;
{
register int c = READCHAR;
+ /* \u allows up to four hex digits, \U up to eight. Default to the
+ behaviour for \u, and change this value in the case that \U is seen. */
+ int unicode_hex_count = 4;
- *byterep = 0;
-
switch (c)
{
case -1:
return i;
}
+ case 'U':
+ /* Post-Unicode-2.0: Up to eight hex chars. */
+ unicode_hex_count = 8;
+ case 'u':
+
+ /* A Unicode escape. We only permit them in strings and characters,
+ not arbitrarily in the source code, as in some other languages. */
+ {
+ int i = 0;
+ int count = 0;
+ Lisp_Object lisp_char;
+ struct gcpro gcpro1;
+
+ while (++count <= unicode_hex_count)
+ {
+ c = READCHAR;
+ /* isdigit(), isalpha() may be locale-specific, which we don't
+ want. */
+ if (c >= '0' && c <= '9') i = (i << 4) + (c - '0');
+ else if (c >= 'a' && c <= 'f') i = (i << 4) + (c - 'a') + 10;
+ else if (c >= 'A' && c <= 'F') i = (i << 4) + (c - 'A') + 10;
+ else
+ {
+ error ("Non-hex digit used for Unicode escape");
+ break;
+ }
+ }
+
+ GCPRO1 (readcharfun);
+ lisp_char = call2(intern("decode-char"), intern("ucs"),
+ make_number(i));
+ UNGCPRO;
+
+ if (EQ(Qnil, lisp_char))
+ {
+ /* This is ugly and horrible and trashes the user's data. */
+ XSETFASTINT (i, MAKE_CHAR (charset_katakana_jisx0201,
+ 34 + 128, 46 + 128));
+ return i;
+ }
+ else
+ {
+ return XFASTINT (lisp_char);
+ }
+ }
+
default:
- if (BASE_LEADING_CODE_P (c))
- c = read_multibyte (c, readcharfun);
return c;
}
}