]> code.delx.au - gnu-emacs/blob - src/character.c
(Funibyte_char_to_multibyte): If C can't be decoded
[gnu-emacs] / src / character.c
1 /* Basic character support.
2 Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001 Free Software Foundation, Inc.
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
8
9 This file is part of GNU Emacs.
10
11 GNU Emacs is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2, or (at your option)
14 any later version.
15
16 GNU Emacs is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with GNU Emacs; see the file COPYING. If not, write to
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 Boston, MA 02111-1307, USA. */
25
26 /* At first, see the document in `character.h' to understand the code
27 in this file. */
28
29 #ifdef emacs
30 #include <config.h>
31 #endif
32
33 #include <stdio.h>
34
35 #ifdef emacs
36
37 #include <sys/types.h>
38 #include "lisp.h"
39 #include "character.h"
40 #include "buffer.h"
41 #include "charset.h"
42 #include "composite.h"
43 #include "disptab.h"
44
45 #else /* not emacs */
46
47 #include "mulelib.h"
48
49 #endif /* emacs */
50
51 Lisp_Object Qcharacterp;
52
53 /* Vector of translation table ever defined.
54 ID of a translation table is used to index this vector. */
55 Lisp_Object Vtranslation_table_vector;
56
57 /* A char-table for characters which may invoke auto-filling. */
58 Lisp_Object Vauto_fill_chars;
59
60 Lisp_Object Qauto_fill_chars;
61
62 Lisp_Object Vchar_unify_table;
63
64 /* A char-table. An element is non-nil iff the corresponding
65 character has a printable glyph. */
66 Lisp_Object Vprintable_chars;
67
68 /* A char-table. An elemnent is a column-width of the corresponding
69 character. */
70 Lisp_Object Vchar_width_table;
71
72 /* A char-table. An element is a symbol indicating the direction
73 property of corresponding character. */
74 Lisp_Object Vchar_direction_table;
75
76 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
77 unsigned char *_fetch_multibyte_char_p;
78 int _fetch_multibyte_char_len;
79
80 /* Char table of scripts. */
81 Lisp_Object Vchar_script_table;
82
83 static Lisp_Object Qchar_script_table;
84
85
86 \f
87
88 int
89 char_string_with_unification (c, p)
90 int c;
91 unsigned char *p;
92 {
93 int bytes;
94
95 MAYBE_UNIFY_CHAR (c);
96
97 if (c <= MAX_3_BYTE_CHAR || c > MAX_5_BYTE_CHAR)
98 {
99 bytes = CHAR_STRING (c, p);
100 }
101 else if (c <= MAX_4_BYTE_CHAR)
102 {
103 p[0] = (0xF0 | (c >> 18));
104 p[1] = (0x80 | ((c >> 12) & 0x3F));
105 p[2] = (0x80 | ((c >> 6) & 0x3F));
106 p[3] = (0x80 | (c & 0x3F));
107 bytes = 4;
108 }
109 else
110 {
111 p[0] = 0xF8;
112 p[1] = (0x80 | ((c >> 18) & 0x0F));
113 p[2] = (0x80 | ((c >> 12) & 0x3F));
114 p[3] = (0x80 | ((c >> 6) & 0x3F));
115 p[4] = (0x80 | (c & 0x3F));
116 bytes = 5;
117 }
118
119 return bytes;
120 }
121
122
123 int
124 string_char_with_unification (p, advanced, len)
125 const unsigned char *p;
126 const unsigned char **advanced;
127 int *len;
128 {
129 int c;
130 const unsigned char *saved_p = p;
131
132 if (*p < 0x80 || ! (*p & 0x20) || ! (*p & 0x10))
133 {
134 c = STRING_CHAR_ADVANCE (p);
135 }
136 else if (! (*p & 0x08))
137 {
138 c = ((((p)[0] & 0xF) << 18)
139 | (((p)[1] & 0x3F) << 12)
140 | (((p)[2] & 0x3F) << 6)
141 | ((p)[3] & 0x3F));
142 p += 4;
143 }
144 else
145 {
146 c = ((((p)[1] & 0x3F) << 18)
147 | (((p)[2] & 0x3F) << 12)
148 | (((p)[3] & 0x3F) << 6)
149 | ((p)[4] & 0x3F));
150 p += 5;
151 }
152
153 MAYBE_UNIFY_CHAR (c);
154
155 if (len)
156 *len = p - saved_p;
157 if (advanced)
158 *advanced = p;
159 return c;
160 }
161
162
163 /* Translate character C by translation table TABLE. If C is
164 negative, translate a character specified by CHARSET and CODE. If
165 no translation is found in TABLE, return the untranslated
166 character. */
167
168 int
169 translate_char (table, c)
170 Lisp_Object table;
171 int c;
172 {
173 Lisp_Object ch;
174
175 if (! CHAR_TABLE_P (table))
176 return c;
177 ch = CHAR_TABLE_REF (table, c);
178 if (! CHARACTERP (ch))
179 return c;
180 return XINT (ch);
181 }
182
183 /* Convert the unibyte character C to the corresponding multibyte
184 character based on the current value of charset_primary. If C
185 can't be converted, return C. */
186
187 int
188 unibyte_char_to_multibyte (c)
189 int c;
190 {
191 struct charset *charset = CHARSET_FROM_ID (charset_primary);
192 int c1 = DECODE_CHAR (charset, c);
193
194 return ((c1 >= 0) ? c1 : c);
195 }
196
197
198 /* Convert the multibyte character C to unibyte 8-bit character based
199 on the current value of charset_primary. If dimension of
200 charset_primary is more than one, return (C & 0xFF).
201
202 The argument REV_TBL is now ignored. It will be removed in the
203 future. */
204
205 int
206 multibyte_char_to_unibyte (c, rev_tbl)
207 int c;
208 Lisp_Object rev_tbl;
209 {
210 struct charset *charset = CHARSET_FROM_ID (charset_primary);
211 unsigned c1 = ENCODE_CHAR (charset, c);
212
213 return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : c & 0xFF);
214 }
215
216
217 DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0,
218 doc: /* Return non-nil if OBJECT is a character. */)
219 (object, ignore)
220 Lisp_Object object, ignore;
221 {
222 return (CHARACTERP (object) ? Qt : Qnil);
223 }
224
225 DEFUN ("max-char", Fmax_char, Smax_char, 0, 0, 0,
226 doc: /* Return the character of the maximum code. */)
227 ()
228 {
229 return make_number (MAX_CHAR);
230 }
231
232 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
233 Sunibyte_char_to_multibyte, 1, 1, 0,
234 doc: /* Convert the unibyte character CH to multibyte character.
235 The multibyte character is a result of decoding CH by
236 the current primary charset (value of `charset-primary'). */)
237 (ch)
238 Lisp_Object ch;
239 {
240 int c;
241 struct charset *charset;
242
243 CHECK_CHARACTER (ch);
244 c = XFASTINT (ch);
245 if (c >= 0400)
246 error ("Invalid unibyte character: %d", c);
247 charset = CHARSET_FROM_ID (charset_primary);
248 c = DECODE_CHAR (charset, c);
249 if (c < 0)
250 c = BYTE8_TO_CHAR (XFASTINT (ch));
251 return make_number (c);
252 }
253
254 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
255 Smultibyte_char_to_unibyte, 1, 1, 0,
256 doc: /* Convert the multibyte character CH to unibyte character.\n\
257 The unibyte character is a result of encoding CH by
258 the current primary charset (value of `charset-primary'). */)
259 (ch)
260 Lisp_Object ch;
261 {
262 int c;
263 struct charset *charset;
264
265 CHECK_CHARACTER (ch);
266 c = XFASTINT (ch);
267 c = CHAR_TO_BYTE8 (c);
268 return make_number (c);
269 }
270
271 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
272 doc: /* Return 1 regardless of the argument CHAR.
273 This is now an obsolete function. We keep it just for backward compatibility. */)
274 (ch)
275 Lisp_Object ch;
276 {
277 CHECK_CHARACTER (ch);
278 return make_number (1);
279 }
280
281 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
282 doc: /* Return width of CHAR when displayed in the current buffer.
283 The width is measured by how many columns it occupies on the screen.
284 Tab is taken to occupy `tab-width' columns. */)
285 (ch)
286 Lisp_Object ch;
287 {
288 Lisp_Object disp;
289 int c, width;
290 struct Lisp_Char_Table *dp = buffer_display_table ();
291
292 CHECK_CHARACTER (ch);
293 c = XINT (ch);
294
295 /* Get the way the display table would display it. */
296 disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
297
298 if (VECTORP (disp))
299 width = ASIZE (disp);
300 else
301 width = CHAR_WIDTH (c);
302
303 return make_number (width);
304 }
305
306 /* Return width of string STR of length LEN when displayed in the
307 current buffer. The width is measured by how many columns it
308 occupies on the screen. If PRECISION > 0, return the width of
309 longest substring that doesn't exceed PRECISION, and set number of
310 characters and bytes of the substring in *NCHARS and *NBYTES
311 respectively. */
312
313 int
314 c_string_width (str, len, precision, nchars, nbytes)
315 unsigned char *str;
316 int precision, *nchars, *nbytes;
317 {
318 int i = 0, i_byte = 0;
319 int width = 0;
320 struct Lisp_Char_Table *dp = buffer_display_table ();
321
322 while (i_byte < len)
323 {
324 int bytes, thiswidth;
325 Lisp_Object val;
326 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
327
328 if (dp)
329 {
330 val = DISP_CHAR_VECTOR (dp, c);
331 if (VECTORP (val))
332 thiswidth = XVECTOR (val)->size;
333 else
334 thiswidth = CHAR_WIDTH (c);
335 }
336 else
337 {
338 thiswidth = CHAR_WIDTH (c);
339 }
340
341 if (precision > 0
342 && (width + thiswidth > precision))
343 {
344 *nchars = i;
345 *nbytes = i_byte;
346 return width;
347 }
348 i++;
349 i_byte += bytes;
350 width += thiswidth;
351 }
352
353 if (precision > 0)
354 {
355 *nchars = i;
356 *nbytes = i_byte;
357 }
358
359 return width;
360 }
361
362 /* Return width of string STR of length LEN when displayed in the
363 current buffer. The width is measured by how many columns it
364 occupies on the screen. */
365
366 int
367 strwidth (str, len)
368 unsigned char *str;
369 int len;
370 {
371 return c_string_width (str, len, -1, NULL, NULL);
372 }
373
374 /* Return width of Lisp string STRING when displayed in the current
375 buffer. The width is measured by how many columns it occupies on
376 the screen while paying attention to compositions. If PRECISION >
377 0, return the width of longest substring that doesn't exceed
378 PRECISION, and set number of characters and bytes of the substring
379 in *NCHARS and *NBYTES respectively. */
380
381 int
382 lisp_string_width (string, precision, nchars, nbytes)
383 Lisp_Object string;
384 int precision, *nchars, *nbytes;
385 {
386 int len = XSTRING (string)->size;
387 unsigned char *str = XSTRING (string)->data;
388 int i = 0, i_byte = 0;
389 int width = 0;
390 struct Lisp_Char_Table *dp = buffer_display_table ();
391
392 while (i < len)
393 {
394 int chars, bytes, thiswidth;
395 Lisp_Object val;
396 int cmp_id;
397 int ignore, end;
398
399 if (find_composition (i, -1, &ignore, &end, &val, string)
400 && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
401 >= 0))
402 {
403 thiswidth = composition_table[cmp_id]->width;
404 chars = end - i;
405 bytes = string_char_to_byte (string, end) - i_byte;
406 }
407 else if (dp)
408 {
409 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
410
411 chars = 1;
412 val = DISP_CHAR_VECTOR (dp, c);
413 if (VECTORP (val))
414 thiswidth = XVECTOR (val)->size;
415 else
416 thiswidth = CHAR_WIDTH (c);
417 }
418 else
419 {
420 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
421
422 chars = 1;
423 thiswidth = CHAR_WIDTH (c);
424 }
425
426 if (precision > 0
427 && (width + thiswidth > precision))
428 {
429 *nchars = i;
430 *nbytes = i_byte;
431 return width;
432 }
433 i += chars;
434 i_byte += bytes;
435 width += thiswidth;
436 }
437
438 if (precision > 0)
439 {
440 *nchars = i;
441 *nbytes = i_byte;
442 }
443
444 return width;
445 }
446
447 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
448 doc: /* Return width of STRING when displayed in the current buffer.
449 Width is measured by how many columns it occupies on the screen.
450 When calculating width of a multibyte character in STRING,
451 only the base leading-code is considered; the validity of
452 the following bytes is not checked. Tabs in STRING are always
453 taken to occupy `tab-width' columns. */)
454 (str)
455 Lisp_Object str;
456 {
457 Lisp_Object val;
458
459 CHECK_STRING (str);
460 XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
461 return val;
462 }
463
464 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
465 doc: /* Return the direction of CHAR.
466 The returned value is 0 for left-to-right and 1 for right-to-left. */)
467 (ch)
468 Lisp_Object ch;
469 {
470 int c;
471
472 CHECK_CHARACTER (ch);
473 c = XINT (ch);
474 return CHAR_TABLE_REF (Vchar_direction_table, c);
475 }
476
477 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
478 doc: /* Return number of characters between BEG and END.
479 This is now an obsolete function. We keep it just for backward compatibility. */)
480 (beg, end)
481 Lisp_Object beg, end;
482 {
483 int from, to;
484
485 CHECK_NUMBER_COERCE_MARKER (beg);
486 CHECK_NUMBER_COERCE_MARKER (end);
487
488 from = min (XFASTINT (beg), XFASTINT (end));
489 to = max (XFASTINT (beg), XFASTINT (end));
490
491 return make_number (to - from);
492 }
493
494 /* Return the number of characters in the NBYTES bytes at PTR.
495 This works by looking at the contents and checking for multibyte
496 sequences while assuming that there's no invalid sequence.
497 However, if the current buffer has enable-multibyte-characters =
498 nil, we treat each byte as a character. */
499
500 int
501 chars_in_text (ptr, nbytes)
502 unsigned char *ptr;
503 int nbytes;
504 {
505 /* current_buffer is null at early stages of Emacs initialization. */
506 if (current_buffer == 0
507 || NILP (current_buffer->enable_multibyte_characters))
508 return nbytes;
509
510 return multibyte_chars_in_text (ptr, nbytes);
511 }
512
513 /* Return the number of characters in the NBYTES bytes at PTR.
514 This works by looking at the contents and checking for multibyte
515 sequences while assuming that there's no invalid sequence. It
516 ignores enable-multibyte-characters. */
517
518 int
519 multibyte_chars_in_text (ptr, nbytes)
520 unsigned char *ptr;
521 int nbytes;
522 {
523 unsigned char *endp = ptr + nbytes;
524 int chars = 0;
525
526 while (ptr < endp)
527 {
528 int len = MULTIBYTE_LENGTH (ptr, endp);
529
530 if (len == 0)
531 abort ();
532 ptr += len;
533 chars++;
534 }
535
536 return chars;
537 }
538
539 /* Parse unibyte text at STR of LEN bytes as a multibyte text, count
540 characters and bytes in it, and store them in *NCHARS and *NBYTES
541 respectively. On counting bytes, pay attention to that 8-bit
542 characters not constructing a valid multibyte sequence are
543 represented by 2-byte in a multibyte text. */
544
545 void
546 parse_str_as_multibyte (str, len, nchars, nbytes)
547 unsigned char *str;
548 int len, *nchars, *nbytes;
549 {
550 unsigned char *endp = str + len;
551 int n, chars = 0, bytes = 0;
552
553 if (len >= MAX_MULTIBYTE_LENGTH)
554 {
555 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
556 while (str < adjusted_endp)
557 {
558 if ((n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0)
559 str += n, bytes += n;
560 else
561 str++, bytes += 2;
562 chars++;
563 }
564 }
565 while (str < endp)
566 {
567 if ((n = MULTIBYTE_LENGTH (str, endp)) > 0)
568 str += n, bytes += n;
569 else
570 str++, bytes += 2;
571 chars++;
572 }
573
574 *nchars = chars;
575 *nbytes = bytes;
576 return;
577 }
578
579 /* Arrange unibyte text at STR of NBYTES bytes as a multibyte text.
580 It actually converts only such 8-bit characters that don't contruct
581 a multibyte sequence to multibyte forms of Latin-1 characters. If
582 NCHARS is nonzero, set *NCHARS to the number of characters in the
583 text. It is assured that we can use LEN bytes at STR as a work
584 area and that is enough. Return the number of bytes of the
585 resulting text. */
586
587 int
588 str_as_multibyte (str, len, nbytes, nchars)
589 unsigned char *str;
590 int len, nbytes, *nchars;
591 {
592 unsigned char *p = str, *endp = str + nbytes;
593 unsigned char *to;
594 int chars = 0;
595 int n;
596
597 if (nbytes >= MAX_MULTIBYTE_LENGTH)
598 {
599 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
600 while (p < adjusted_endp
601 && (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
602 p += n, chars++;
603 }
604 while ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
605 p += n, chars++;
606 if (nchars)
607 *nchars = chars;
608 if (p == endp)
609 return nbytes;
610
611 to = p;
612 nbytes = endp - p;
613 endp = str + len;
614 safe_bcopy ((char *) p, (char *) (endp - nbytes), nbytes);
615 p = endp - nbytes;
616
617 if (nbytes >= MAX_MULTIBYTE_LENGTH)
618 {
619 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
620 while (p < adjusted_endp)
621 {
622 if ((n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
623 {
624 while (n--)
625 *to++ = *p++;
626 }
627 else
628 {
629 int c = *p++;
630 c = BYTE8_TO_CHAR (c);
631 to += CHAR_STRING (c, to);
632 }
633 }
634 chars++;
635 }
636 while (p < endp)
637 {
638 if ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
639 {
640 while (n--)
641 *to++ = *p++;
642 }
643 else
644 {
645 int c = *p++;
646 c = BYTE8_TO_CHAR (c);
647 to += CHAR_STRING (c, to);
648 }
649 chars++;
650 }
651 if (nchars)
652 *nchars = chars;
653 return (to - str);
654 }
655
656 /* Parse unibyte string at STR of LEN bytes, and return the number of
657 bytes it may ocupy when converted to multibyte string by
658 `str_to_multibyte'. */
659
660 int
661 parse_str_to_multibyte (str, len)
662 unsigned char *str;
663 int len;
664 {
665 unsigned char *endp = str + len;
666 int bytes;
667
668 for (bytes = 0; str < endp; str++)
669 bytes += (*str < 0x80) ? 1 : 2;
670 return bytes;
671 }
672
673
674 /* Convert unibyte text at STR of NBYTES bytes to a multibyte text
675 that contains the same single-byte characters. It actually
676 converts all 8-bit characters to multibyte forms. It is assured
677 that we can use LEN bytes at STR as a work area and that is
678 enough. */
679
680 int
681 str_to_multibyte (str, len, bytes)
682 unsigned char *str;
683 int len, bytes;
684 {
685 unsigned char *p = str, *endp = str + bytes;
686 unsigned char *to;
687
688 while (p < endp && *p < 0x80) p++;
689 if (p == endp)
690 return bytes;
691 to = p;
692 bytes = endp - p;
693 endp = str + len;
694 safe_bcopy ((char *) p, (char *) (endp - bytes), bytes);
695 p = endp - bytes;
696 while (p < endp)
697 {
698 int c = *p++;
699
700 if (c >= 0x80)
701 c = BYTE8_TO_CHAR (c);
702 to += CHAR_STRING (c, to);
703 }
704 return (to - str);
705 }
706
707 /* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
708 actually converts characters in the range 0x80..0xFF to
709 unibyte. */
710
711 int
712 str_as_unibyte (str, bytes)
713 unsigned char *str;
714 int bytes;
715 {
716 const unsigned char *p = str, *endp = str + bytes;
717 unsigned char *to;
718 int c, len;
719
720 while (p < endp)
721 {
722 c = *p;
723 len = BYTES_BY_CHAR_HEAD (c);
724 if (CHAR_BYTE8_HEAD_P (c))
725 break;
726 p += len;
727 }
728 to = str + (p - str);
729 while (p < endp)
730 {
731 c = *p;
732 len = BYTES_BY_CHAR_HEAD (c);
733 if (CHAR_BYTE8_HEAD_P (c))
734 {
735 c = STRING_CHAR_ADVANCE (p);
736 *to++ = CHAR_TO_BYTE8 (c);
737 }
738 else
739 {
740 while (len--) *to++ = *p++;
741 }
742 }
743 return (to - str);
744 }
745
746 int
747 string_count_byte8 (string)
748 Lisp_Object string;
749 {
750 int multibyte = STRING_MULTIBYTE (string);
751 int nbytes = STRING_BYTES (XSTRING (string));
752 unsigned char *p = XSTRING (string)->data;
753 unsigned char *pend = p + nbytes;
754 int count = 0;
755 int c, len;
756
757 if (multibyte)
758 while (p < pend)
759 {
760 c = *p;
761 len = BYTES_BY_CHAR_HEAD (c);
762
763 if (CHAR_BYTE8_HEAD_P (c))
764 count++;
765 p += len;
766 }
767 else
768 while (p < pend)
769 {
770 if (*p++ >= 0x80)
771 count++;
772 }
773 return count;
774 }
775
776
777 Lisp_Object
778 string_escape_byte8 (string)
779 Lisp_Object string;
780 {
781 int nchars = XSTRING (string)->size;
782 int nbytes = STRING_BYTES (XSTRING (string));
783 int multibyte = STRING_MULTIBYTE (string);
784 int byte8_count;
785 const unsigned char *src, *src_end;
786 unsigned char *dst;
787 Lisp_Object val;
788 int c, len;
789
790 if (multibyte && nchars == nbytes)
791 return string;
792
793 byte8_count = string_count_byte8 (string);
794
795 if (byte8_count == 0)
796 return string;
797
798 if (multibyte)
799 /* Convert 2-byte sequence of byte8 chars to 4-byte octal. */
800 val = make_uninit_multibyte_string (nchars + byte8_count * 3,
801 nbytes + byte8_count * 2);
802 else
803 /* Convert 1-byte sequence of byte8 chars to 4-byte octal. */
804 val = make_uninit_string (nbytes + byte8_count * 3);
805
806 src = XSTRING (string)->data;
807 src_end = src + nbytes;
808 dst = XSTRING (val)->data;
809 if (multibyte)
810 while (src < src_end)
811 {
812 c = *src;
813 len = BYTES_BY_CHAR_HEAD (c);
814
815 if (CHAR_BYTE8_HEAD_P (c))
816 {
817 c = STRING_CHAR_ADVANCE (src);
818 c = CHAR_TO_BYTE8 (c);
819 sprintf ((char *) dst, "\\%03o", c);
820 dst += 4;
821 }
822 else
823 while (len--) *dst++ = *src++;
824 }
825 else
826 while (src < src_end)
827 {
828 c = *src++;
829 if (c >= 0x80)
830 {
831 sprintf ((char *) dst, "\\%03o", c);
832 dst += 4;
833 }
834 else
835 *dst++ = c;
836 }
837 return val;
838 }
839
840 \f
841 DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
842 doc: /*
843 Concatenate all the argument characters and make the result a string.
844 usage: (string &rest CHARACTERS) */)
845 (n, args)
846 int n;
847 Lisp_Object *args;
848 {
849 int i;
850 unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
851 unsigned char *p = buf;
852 int c;
853
854 for (i = 0; i < n; i++)
855 {
856 CHECK_CHARACTER (args[i]);
857 c = XINT (args[i]);
858 p += CHAR_STRING (c, p);
859 }
860
861 return make_string_from_bytes ((char *) buf, n, p - buf);
862 }
863
864 void
865 init_character_once ()
866 {
867 }
868
869 #ifdef emacs
870
871 void
872 syms_of_character ()
873 {
874 DEFSYM (Qcharacterp, "characterp");
875 DEFSYM (Qauto_fill_chars, "auto-fill-chars");
876
877 staticpro (&Vchar_unify_table);
878 Vchar_unify_table = Qnil;
879
880 defsubr (&Smax_char);
881 defsubr (&Scharacterp);
882 defsubr (&Sunibyte_char_to_multibyte);
883 defsubr (&Smultibyte_char_to_unibyte);
884 defsubr (&Schar_bytes);
885 defsubr (&Schar_width);
886 defsubr (&Sstring_width);
887 defsubr (&Schar_direction);
888 defsubr (&Schars_in_region);
889 defsubr (&Sstring);
890
891 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
892 doc: /*
893 Vector recording all translation tables ever defined.
894 Each element is a pair (SYMBOL . TABLE) relating the table to the
895 symbol naming it. The ID of a translation table is an index into this vector. */);
896 Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
897
898 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
899 doc: /*
900 A char-table for characters which invoke auto-filling.
901 Such characters have value t in this table. */);
902 Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
903 CHAR_TABLE_SET (Vauto_fill_chars, ' ', Qt);
904 CHAR_TABLE_SET (Vauto_fill_chars, '\n', Qt);
905
906 DEFVAR_LISP ("char-width-table", &Vchar_width_table,
907 doc: /*
908 A char-table for width (columns) of each character. */);
909 Vchar_width_table = Fmake_char_table (Qnil, make_number (1));
910 char_table_set_range (Vchar_width_table, 0x80, 0x9F, make_number (4));
911 char_table_set_range (Vchar_width_table, MAX_5_BYTE_CHAR + 1, MAX_CHAR,
912 make_number (4));
913
914 DEFVAR_LISP ("char-direction-table", &Vchar_direction_table,
915 doc: /* A char-table for direction of each character. */);
916 Vchar_direction_table = Fmake_char_table (Qnil, make_number (1));
917
918 DEFVAR_LISP ("printable-chars", &Vprintable_chars,
919 doc: /* A char-table for each printable character. */);
920 Vprintable_chars = Fmake_char_table (Qnil, Qnil);
921
922 DEFVAR_LISP ("char-script-table", &Vchar_script_table,
923 doc: /* Char table of script symbols.
924 It has one extra slot whose value is a list of script symbols. */);
925
926 /* Intern this now in case it isn't already done.
927 Setting this variable twice is harmless.
928 But don't staticpro it here--that is done in alloc.c. */
929 Qchar_table_extra_slots = intern ("char-table-extra-slots");
930 DEFSYM (Qchar_script_table, "char-script-table");
931 Fput (Qchar_script_table, Qchar_table_extra_slots, make_number (1));
932 Vchar_script_table = Fmake_char_table (Qchar_script_table, Qnil);
933 }
934
935 #endif /* emacs */