]> code.delx.au - gnu-emacs/blob - src/character.c
(_fetch_multibyte_char_len): This variable deleted.
[gnu-emacs] / src / character.c
1 /* Basic character support.
2 Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001 Free Software Foundation, Inc.
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
8
9 This file is part of GNU Emacs.
10
11 GNU Emacs is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2, or (at your option)
14 any later version.
15
16 GNU Emacs is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with GNU Emacs; see the file COPYING. If not, write to
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 Boston, MA 02111-1307, USA. */
25
26 /* At first, see the document in `character.h' to understand the code
27 in this file. */
28
29 #ifdef emacs
30 #include <config.h>
31 #endif
32
33 #include <stdio.h>
34
35 #ifdef emacs
36
37 #include <sys/types.h>
38 #include "lisp.h"
39 #include "character.h"
40 #include "buffer.h"
41 #include "charset.h"
42 #include "composite.h"
43 #include "disptab.h"
44
45 #else /* not emacs */
46
47 #include "mulelib.h"
48
49 #endif /* emacs */
50
51 Lisp_Object Qcharacterp;
52
53 /* Vector of translation table ever defined.
54 ID of a translation table is used to index this vector. */
55 Lisp_Object Vtranslation_table_vector;
56
57 /* A char-table for characters which may invoke auto-filling. */
58 Lisp_Object Vauto_fill_chars;
59
60 Lisp_Object Qauto_fill_chars;
61
62 Lisp_Object Vchar_unify_table;
63
64 /* A char-table. An element is non-nil iff the corresponding
65 character has a printable glyph. */
66 Lisp_Object Vprintable_chars;
67
68 /* A char-table. An elemnent is a column-width of the corresponding
69 character. */
70 Lisp_Object Vchar_width_table;
71
72 /* A char-table. An element is a symbol indicating the direction
73 property of corresponding character. */
74 Lisp_Object Vchar_direction_table;
75
76 /* Variable used locally in the macro FETCH_MULTIBYTE_CHAR. */
77 unsigned char *_fetch_multibyte_char_p;
78
79 /* Char table of scripts. */
80 Lisp_Object Vchar_script_table;
81
82 static Lisp_Object Qchar_script_table;
83
84
85 \f
86
87 int
88 char_string_with_unification (c, p)
89 int c;
90 unsigned char *p;
91 {
92 int bytes;
93
94 MAYBE_UNIFY_CHAR (c);
95
96 if (c <= MAX_3_BYTE_CHAR || c > MAX_5_BYTE_CHAR)
97 {
98 bytes = CHAR_STRING (c, p);
99 }
100 else if (c <= MAX_4_BYTE_CHAR)
101 {
102 p[0] = (0xF0 | (c >> 18));
103 p[1] = (0x80 | ((c >> 12) & 0x3F));
104 p[2] = (0x80 | ((c >> 6) & 0x3F));
105 p[3] = (0x80 | (c & 0x3F));
106 bytes = 4;
107 }
108 else
109 {
110 p[0] = 0xF8;
111 p[1] = (0x80 | ((c >> 18) & 0x0F));
112 p[2] = (0x80 | ((c >> 12) & 0x3F));
113 p[3] = (0x80 | ((c >> 6) & 0x3F));
114 p[4] = (0x80 | (c & 0x3F));
115 bytes = 5;
116 }
117
118 return bytes;
119 }
120
121
122 int
123 string_char_with_unification (p, advanced, len)
124 const unsigned char *p;
125 const unsigned char **advanced;
126 int *len;
127 {
128 int c;
129 const unsigned char *saved_p = p;
130
131 if (*p < 0x80 || ! (*p & 0x20) || ! (*p & 0x10))
132 {
133 c = STRING_CHAR_ADVANCE (p);
134 }
135 else if (! (*p & 0x08))
136 {
137 c = ((((p)[0] & 0xF) << 18)
138 | (((p)[1] & 0x3F) << 12)
139 | (((p)[2] & 0x3F) << 6)
140 | ((p)[3] & 0x3F));
141 p += 4;
142 }
143 else
144 {
145 c = ((((p)[1] & 0x3F) << 18)
146 | (((p)[2] & 0x3F) << 12)
147 | (((p)[3] & 0x3F) << 6)
148 | ((p)[4] & 0x3F));
149 p += 5;
150 }
151
152 MAYBE_UNIFY_CHAR (c);
153
154 if (len)
155 *len = p - saved_p;
156 if (advanced)
157 *advanced = p;
158 return c;
159 }
160
161
162 /* Translate character C by translation table TABLE. If C is
163 negative, translate a character specified by CHARSET and CODE. If
164 no translation is found in TABLE, return the untranslated
165 character. */
166
167 int
168 translate_char (table, c)
169 Lisp_Object table;
170 int c;
171 {
172 Lisp_Object ch;
173
174 if (! CHAR_TABLE_P (table))
175 return c;
176 ch = CHAR_TABLE_REF (table, c);
177 if (! CHARACTERP (ch))
178 return c;
179 return XINT (ch);
180 }
181
182 /* Convert the unibyte character C to the corresponding multibyte
183 character based on the current value of charset_unibyte. If C
184 can't be converted, return C. */
185
186 int
187 unibyte_char_to_multibyte (c)
188 int c;
189 {
190 struct charset *charset = CHARSET_FROM_ID (charset_unibyte);
191 int c1 = DECODE_CHAR (charset, c);
192
193 return ((c1 >= 0) ? c1 : c);
194 }
195
196
197 /* Convert the multibyte character C to unibyte 8-bit character based
198 on the current value of charset_unibyte. If dimension of
199 charset_unibyte is more than one, return (C & 0xFF).
200
201 The argument REV_TBL is now ignored. It will be removed in the
202 future. */
203
204 int
205 multibyte_char_to_unibyte (c, rev_tbl)
206 int c;
207 Lisp_Object rev_tbl;
208 {
209 struct charset *charset = CHARSET_FROM_ID (charset_unibyte);
210 unsigned c1 = ENCODE_CHAR (charset, c);
211
212 return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : c & 0xFF);
213 }
214
215
216 DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0,
217 doc: /* Return non-nil if OBJECT is a character. */)
218 (object, ignore)
219 Lisp_Object object, ignore;
220 {
221 return (CHARACTERP (object) ? Qt : Qnil);
222 }
223
224 DEFUN ("max-char", Fmax_char, Smax_char, 0, 0, 0,
225 doc: /* Return the character of the maximum code. */)
226 ()
227 {
228 return make_number (MAX_CHAR);
229 }
230
231 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
232 Sunibyte_char_to_multibyte, 1, 1, 0,
233 doc: /* Convert the unibyte character CH to multibyte character.
234 The multibyte character is a result of decoding CH by
235 the current unibyte charset (see `unibyte-charset'). */)
236 (ch)
237 Lisp_Object ch;
238 {
239 int c;
240 struct charset *charset;
241
242 CHECK_CHARACTER (ch);
243 c = XFASTINT (ch);
244 if (c >= 0400)
245 error ("Invalid unibyte character: %d", c);
246 charset = CHARSET_FROM_ID (charset_unibyte);
247 c = DECODE_CHAR (charset, c);
248 if (c < 0)
249 c = BYTE8_TO_CHAR (XFASTINT (ch));
250 return make_number (c);
251 }
252
253 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
254 Smultibyte_char_to_unibyte, 1, 1, 0,
255 doc: /* Convert the multibyte character CH to unibyte character.\n\
256 The unibyte character is a result of encoding CH by
257 the current primary charset (value of `charset-primary'). */)
258 (ch)
259 Lisp_Object ch;
260 {
261 int c;
262
263 CHECK_CHARACTER (ch);
264 c = XFASTINT (ch);
265 c = CHAR_TO_BYTE8 (c);
266 return make_number (c);
267 }
268
269 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
270 doc: /* Return 1 regardless of the argument CHAR.
271 This is now an obsolete function. We keep it just for backward compatibility. */)
272 (ch)
273 Lisp_Object ch;
274 {
275 CHECK_CHARACTER (ch);
276 return make_number (1);
277 }
278
279 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
280 doc: /* Return width of CHAR when displayed in the current buffer.
281 The width is measured by how many columns it occupies on the screen.
282 Tab is taken to occupy `tab-width' columns. */)
283 (ch)
284 Lisp_Object ch;
285 {
286 Lisp_Object disp;
287 int c, width;
288 struct Lisp_Char_Table *dp = buffer_display_table ();
289
290 CHECK_CHARACTER (ch);
291 c = XINT (ch);
292
293 /* Get the way the display table would display it. */
294 disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
295
296 if (VECTORP (disp))
297 width = ASIZE (disp);
298 else
299 width = CHAR_WIDTH (c);
300
301 return make_number (width);
302 }
303
304 /* Return width of string STR of length LEN when displayed in the
305 current buffer. The width is measured by how many columns it
306 occupies on the screen. If PRECISION > 0, return the width of
307 longest substring that doesn't exceed PRECISION, and set number of
308 characters and bytes of the substring in *NCHARS and *NBYTES
309 respectively. */
310
311 int
312 c_string_width (str, len, precision, nchars, nbytes)
313 unsigned char *str;
314 int precision, *nchars, *nbytes;
315 {
316 int i = 0, i_byte = 0;
317 int width = 0;
318 struct Lisp_Char_Table *dp = buffer_display_table ();
319
320 while (i_byte < len)
321 {
322 int bytes, thiswidth;
323 Lisp_Object val;
324 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
325
326 if (dp)
327 {
328 val = DISP_CHAR_VECTOR (dp, c);
329 if (VECTORP (val))
330 thiswidth = XVECTOR (val)->size;
331 else
332 thiswidth = CHAR_WIDTH (c);
333 }
334 else
335 {
336 thiswidth = CHAR_WIDTH (c);
337 }
338
339 if (precision > 0
340 && (width + thiswidth > precision))
341 {
342 *nchars = i;
343 *nbytes = i_byte;
344 return width;
345 }
346 i++;
347 i_byte += bytes;
348 width += thiswidth;
349 }
350
351 if (precision > 0)
352 {
353 *nchars = i;
354 *nbytes = i_byte;
355 }
356
357 return width;
358 }
359
360 /* Return width of string STR of length LEN when displayed in the
361 current buffer. The width is measured by how many columns it
362 occupies on the screen. */
363
364 int
365 strwidth (str, len)
366 unsigned char *str;
367 int len;
368 {
369 return c_string_width (str, len, -1, NULL, NULL);
370 }
371
372 /* Return width of Lisp string STRING when displayed in the current
373 buffer. The width is measured by how many columns it occupies on
374 the screen while paying attention to compositions. If PRECISION >
375 0, return the width of longest substring that doesn't exceed
376 PRECISION, and set number of characters and bytes of the substring
377 in *NCHARS and *NBYTES respectively. */
378
379 int
380 lisp_string_width (string, precision, nchars, nbytes)
381 Lisp_Object string;
382 int precision, *nchars, *nbytes;
383 {
384 int len = XSTRING (string)->size;
385 unsigned char *str = XSTRING (string)->data;
386 int i = 0, i_byte = 0;
387 int width = 0;
388 struct Lisp_Char_Table *dp = buffer_display_table ();
389
390 while (i < len)
391 {
392 int chars, bytes, thiswidth;
393 Lisp_Object val;
394 int cmp_id;
395 int ignore, end;
396
397 if (find_composition (i, -1, &ignore, &end, &val, string)
398 && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
399 >= 0))
400 {
401 thiswidth = composition_table[cmp_id]->width;
402 chars = end - i;
403 bytes = string_char_to_byte (string, end) - i_byte;
404 }
405 else if (dp)
406 {
407 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
408
409 chars = 1;
410 val = DISP_CHAR_VECTOR (dp, c);
411 if (VECTORP (val))
412 thiswidth = XVECTOR (val)->size;
413 else
414 thiswidth = CHAR_WIDTH (c);
415 }
416 else
417 {
418 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
419
420 chars = 1;
421 thiswidth = CHAR_WIDTH (c);
422 }
423
424 if (precision > 0
425 && (width + thiswidth > precision))
426 {
427 *nchars = i;
428 *nbytes = i_byte;
429 return width;
430 }
431 i += chars;
432 i_byte += bytes;
433 width += thiswidth;
434 }
435
436 if (precision > 0)
437 {
438 *nchars = i;
439 *nbytes = i_byte;
440 }
441
442 return width;
443 }
444
445 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
446 doc: /* Return width of STRING when displayed in the current buffer.
447 Width is measured by how many columns it occupies on the screen.
448 When calculating width of a multibyte character in STRING,
449 only the base leading-code is considered; the validity of
450 the following bytes is not checked. Tabs in STRING are always
451 taken to occupy `tab-width' columns. */)
452 (str)
453 Lisp_Object str;
454 {
455 Lisp_Object val;
456
457 CHECK_STRING (str);
458 XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
459 return val;
460 }
461
462 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
463 doc: /* Return the direction of CHAR.
464 The returned value is 0 for left-to-right and 1 for right-to-left. */)
465 (ch)
466 Lisp_Object ch;
467 {
468 int c;
469
470 CHECK_CHARACTER (ch);
471 c = XINT (ch);
472 return CHAR_TABLE_REF (Vchar_direction_table, c);
473 }
474
475 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
476 doc: /* Return number of characters between BEG and END.
477 This is now an obsolete function. We keep it just for backward compatibility. */)
478 (beg, end)
479 Lisp_Object beg, end;
480 {
481 int from, to;
482
483 CHECK_NUMBER_COERCE_MARKER (beg);
484 CHECK_NUMBER_COERCE_MARKER (end);
485
486 from = min (XFASTINT (beg), XFASTINT (end));
487 to = max (XFASTINT (beg), XFASTINT (end));
488
489 return make_number (to - from);
490 }
491
492 /* Return the number of characters in the NBYTES bytes at PTR.
493 This works by looking at the contents and checking for multibyte
494 sequences while assuming that there's no invalid sequence.
495 However, if the current buffer has enable-multibyte-characters =
496 nil, we treat each byte as a character. */
497
498 int
499 chars_in_text (ptr, nbytes)
500 unsigned char *ptr;
501 int nbytes;
502 {
503 /* current_buffer is null at early stages of Emacs initialization. */
504 if (current_buffer == 0
505 || NILP (current_buffer->enable_multibyte_characters))
506 return nbytes;
507
508 return multibyte_chars_in_text (ptr, nbytes);
509 }
510
511 /* Return the number of characters in the NBYTES bytes at PTR.
512 This works by looking at the contents and checking for multibyte
513 sequences while assuming that there's no invalid sequence. It
514 ignores enable-multibyte-characters. */
515
516 int
517 multibyte_chars_in_text (ptr, nbytes)
518 unsigned char *ptr;
519 int nbytes;
520 {
521 unsigned char *endp = ptr + nbytes;
522 int chars = 0;
523
524 while (ptr < endp)
525 {
526 int len = MULTIBYTE_LENGTH (ptr, endp);
527
528 if (len == 0)
529 abort ();
530 ptr += len;
531 chars++;
532 }
533
534 return chars;
535 }
536
537 /* Parse unibyte text at STR of LEN bytes as a multibyte text, count
538 characters and bytes in it, and store them in *NCHARS and *NBYTES
539 respectively. On counting bytes, pay attention to that 8-bit
540 characters not constructing a valid multibyte sequence are
541 represented by 2-byte in a multibyte text. */
542
543 void
544 parse_str_as_multibyte (str, len, nchars, nbytes)
545 unsigned char *str;
546 int len, *nchars, *nbytes;
547 {
548 unsigned char *endp = str + len;
549 int n, chars = 0, bytes = 0;
550
551 if (len >= MAX_MULTIBYTE_LENGTH)
552 {
553 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
554 while (str < adjusted_endp)
555 {
556 if ((n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0)
557 str += n, bytes += n;
558 else
559 str++, bytes += 2;
560 chars++;
561 }
562 }
563 while (str < endp)
564 {
565 if ((n = MULTIBYTE_LENGTH (str, endp)) > 0)
566 str += n, bytes += n;
567 else
568 str++, bytes += 2;
569 chars++;
570 }
571
572 *nchars = chars;
573 *nbytes = bytes;
574 return;
575 }
576
577 /* Arrange unibyte text at STR of NBYTES bytes as a multibyte text.
578 It actually converts only such 8-bit characters that don't contruct
579 a multibyte sequence to multibyte forms of Latin-1 characters. If
580 NCHARS is nonzero, set *NCHARS to the number of characters in the
581 text. It is assured that we can use LEN bytes at STR as a work
582 area and that is enough. Return the number of bytes of the
583 resulting text. */
584
585 int
586 str_as_multibyte (str, len, nbytes, nchars)
587 unsigned char *str;
588 int len, nbytes, *nchars;
589 {
590 unsigned char *p = str, *endp = str + nbytes;
591 unsigned char *to;
592 int chars = 0;
593 int n;
594
595 if (nbytes >= MAX_MULTIBYTE_LENGTH)
596 {
597 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
598 while (p < adjusted_endp
599 && (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
600 p += n, chars++;
601 }
602 while ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
603 p += n, chars++;
604 if (nchars)
605 *nchars = chars;
606 if (p == endp)
607 return nbytes;
608
609 to = p;
610 nbytes = endp - p;
611 endp = str + len;
612 safe_bcopy ((char *) p, (char *) (endp - nbytes), nbytes);
613 p = endp - nbytes;
614
615 if (nbytes >= MAX_MULTIBYTE_LENGTH)
616 {
617 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
618 while (p < adjusted_endp)
619 {
620 if ((n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
621 {
622 while (n--)
623 *to++ = *p++;
624 }
625 else
626 {
627 int c = *p++;
628 c = BYTE8_TO_CHAR (c);
629 to += CHAR_STRING (c, to);
630 }
631 }
632 chars++;
633 }
634 while (p < endp)
635 {
636 if ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
637 {
638 while (n--)
639 *to++ = *p++;
640 }
641 else
642 {
643 int c = *p++;
644 c = BYTE8_TO_CHAR (c);
645 to += CHAR_STRING (c, to);
646 }
647 chars++;
648 }
649 if (nchars)
650 *nchars = chars;
651 return (to - str);
652 }
653
654 /* Parse unibyte string at STR of LEN bytes, and return the number of
655 bytes it may ocupy when converted to multibyte string by
656 `str_to_multibyte'. */
657
658 int
659 parse_str_to_multibyte (str, len)
660 unsigned char *str;
661 int len;
662 {
663 unsigned char *endp = str + len;
664 int bytes;
665
666 for (bytes = 0; str < endp; str++)
667 bytes += (*str < 0x80) ? 1 : 2;
668 return bytes;
669 }
670
671
672 /* Convert unibyte text at STR of NBYTES bytes to a multibyte text
673 that contains the same single-byte characters. It actually
674 converts all 8-bit characters to multibyte forms. It is assured
675 that we can use LEN bytes at STR as a work area and that is
676 enough. */
677
678 int
679 str_to_multibyte (str, len, bytes)
680 unsigned char *str;
681 int len, bytes;
682 {
683 unsigned char *p = str, *endp = str + bytes;
684 unsigned char *to;
685
686 while (p < endp && *p < 0x80) p++;
687 if (p == endp)
688 return bytes;
689 to = p;
690 bytes = endp - p;
691 endp = str + len;
692 safe_bcopy ((char *) p, (char *) (endp - bytes), bytes);
693 p = endp - bytes;
694 while (p < endp)
695 {
696 int c = *p++;
697
698 if (c >= 0x80)
699 c = BYTE8_TO_CHAR (c);
700 to += CHAR_STRING (c, to);
701 }
702 return (to - str);
703 }
704
705 /* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
706 actually converts characters in the range 0x80..0xFF to
707 unibyte. */
708
709 int
710 str_as_unibyte (str, bytes)
711 unsigned char *str;
712 int bytes;
713 {
714 const unsigned char *p = str, *endp = str + bytes;
715 unsigned char *to;
716 int c, len;
717
718 while (p < endp)
719 {
720 c = *p;
721 len = BYTES_BY_CHAR_HEAD (c);
722 if (CHAR_BYTE8_HEAD_P (c))
723 break;
724 p += len;
725 }
726 to = str + (p - str);
727 while (p < endp)
728 {
729 c = *p;
730 len = BYTES_BY_CHAR_HEAD (c);
731 if (CHAR_BYTE8_HEAD_P (c))
732 {
733 c = STRING_CHAR_ADVANCE (p);
734 *to++ = CHAR_TO_BYTE8 (c);
735 }
736 else
737 {
738 while (len--) *to++ = *p++;
739 }
740 }
741 return (to - str);
742 }
743
744 int
745 string_count_byte8 (string)
746 Lisp_Object string;
747 {
748 int multibyte = STRING_MULTIBYTE (string);
749 int nbytes = STRING_BYTES (XSTRING (string));
750 unsigned char *p = XSTRING (string)->data;
751 unsigned char *pend = p + nbytes;
752 int count = 0;
753 int c, len;
754
755 if (multibyte)
756 while (p < pend)
757 {
758 c = *p;
759 len = BYTES_BY_CHAR_HEAD (c);
760
761 if (CHAR_BYTE8_HEAD_P (c))
762 count++;
763 p += len;
764 }
765 else
766 while (p < pend)
767 {
768 if (*p++ >= 0x80)
769 count++;
770 }
771 return count;
772 }
773
774
775 Lisp_Object
776 string_escape_byte8 (string)
777 Lisp_Object string;
778 {
779 int nchars = XSTRING (string)->size;
780 int nbytes = STRING_BYTES (XSTRING (string));
781 int multibyte = STRING_MULTIBYTE (string);
782 int byte8_count;
783 const unsigned char *src, *src_end;
784 unsigned char *dst;
785 Lisp_Object val;
786 int c, len;
787
788 if (multibyte && nchars == nbytes)
789 return string;
790
791 byte8_count = string_count_byte8 (string);
792
793 if (byte8_count == 0)
794 return string;
795
796 if (multibyte)
797 /* Convert 2-byte sequence of byte8 chars to 4-byte octal. */
798 val = make_uninit_multibyte_string (nchars + byte8_count * 3,
799 nbytes + byte8_count * 2);
800 else
801 /* Convert 1-byte sequence of byte8 chars to 4-byte octal. */
802 val = make_uninit_string (nbytes + byte8_count * 3);
803
804 src = XSTRING (string)->data;
805 src_end = src + nbytes;
806 dst = XSTRING (val)->data;
807 if (multibyte)
808 while (src < src_end)
809 {
810 c = *src;
811 len = BYTES_BY_CHAR_HEAD (c);
812
813 if (CHAR_BYTE8_HEAD_P (c))
814 {
815 c = STRING_CHAR_ADVANCE (src);
816 c = CHAR_TO_BYTE8 (c);
817 sprintf ((char *) dst, "\\%03o", c);
818 dst += 4;
819 }
820 else
821 while (len--) *dst++ = *src++;
822 }
823 else
824 while (src < src_end)
825 {
826 c = *src++;
827 if (c >= 0x80)
828 {
829 sprintf ((char *) dst, "\\%03o", c);
830 dst += 4;
831 }
832 else
833 *dst++ = c;
834 }
835 return val;
836 }
837
838 \f
839 DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
840 doc: /*
841 Concatenate all the argument characters and make the result a string.
842 usage: (string &rest CHARACTERS) */)
843 (n, args)
844 int n;
845 Lisp_Object *args;
846 {
847 int i;
848 unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
849 unsigned char *p = buf;
850 int c;
851
852 for (i = 0; i < n; i++)
853 {
854 CHECK_CHARACTER (args[i]);
855 c = XINT (args[i]);
856 p += CHAR_STRING (c, p);
857 }
858
859 return make_string_from_bytes ((char *) buf, n, p - buf);
860 }
861
862 void
863 init_character_once ()
864 {
865 }
866
867 #ifdef emacs
868
869 void
870 syms_of_character ()
871 {
872 DEFSYM (Qcharacterp, "characterp");
873 DEFSYM (Qauto_fill_chars, "auto-fill-chars");
874
875 staticpro (&Vchar_unify_table);
876 Vchar_unify_table = Qnil;
877
878 defsubr (&Smax_char);
879 defsubr (&Scharacterp);
880 defsubr (&Sunibyte_char_to_multibyte);
881 defsubr (&Smultibyte_char_to_unibyte);
882 defsubr (&Schar_bytes);
883 defsubr (&Schar_width);
884 defsubr (&Sstring_width);
885 defsubr (&Schar_direction);
886 defsubr (&Schars_in_region);
887 defsubr (&Sstring);
888
889 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
890 doc: /*
891 Vector recording all translation tables ever defined.
892 Each element is a pair (SYMBOL . TABLE) relating the table to the
893 symbol naming it. The ID of a translation table is an index into this vector. */);
894 Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
895
896 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
897 doc: /*
898 A char-table for characters which invoke auto-filling.
899 Such characters have value t in this table. */);
900 Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
901 CHAR_TABLE_SET (Vauto_fill_chars, ' ', Qt);
902 CHAR_TABLE_SET (Vauto_fill_chars, '\n', Qt);
903
904 DEFVAR_LISP ("char-width-table", &Vchar_width_table,
905 doc: /*
906 A char-table for width (columns) of each character. */);
907 Vchar_width_table = Fmake_char_table (Qnil, make_number (1));
908 char_table_set_range (Vchar_width_table, 0x80, 0x9F, make_number (4));
909 char_table_set_range (Vchar_width_table, MAX_5_BYTE_CHAR + 1, MAX_CHAR,
910 make_number (4));
911
912 DEFVAR_LISP ("char-direction-table", &Vchar_direction_table,
913 doc: /* A char-table for direction of each character. */);
914 Vchar_direction_table = Fmake_char_table (Qnil, make_number (1));
915
916 DEFVAR_LISP ("printable-chars", &Vprintable_chars,
917 doc: /* A char-table for each printable character. */);
918 Vprintable_chars = Fmake_char_table (Qnil, Qnil);
919
920 DEFVAR_LISP ("char-script-table", &Vchar_script_table,
921 doc: /* Char table of script symbols.
922 It has one extra slot whose value is a list of script symbols. */);
923
924 /* Intern this now in case it isn't already done.
925 Setting this variable twice is harmless.
926 But don't staticpro it here--that is done in alloc.c. */
927 Qchar_table_extra_slots = intern ("char-table-extra-slots");
928 DEFSYM (Qchar_script_table, "char-script-table");
929 Fput (Qchar_script_table, Qchar_table_extra_slots, make_number (1));
930 Vchar_script_table = Fmake_char_table (Qchar_script_table, Qnil);
931 }
932
933 #endif /* emacs */