]> code.delx.au - gnu-emacs/blob - src/character.c
(unibyte_char_to_multibyte): Refer to
[gnu-emacs] / src / character.c
1 /* Basic character support.
2 Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001 Free Software Foundation, Inc.
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
8
9 This file is part of GNU Emacs.
10
11 GNU Emacs is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2, or (at your option)
14 any later version.
15
16 GNU Emacs is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with GNU Emacs; see the file COPYING. If not, write to
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 Boston, MA 02111-1307, USA. */
25
26 /* At first, see the document in `character.h' to understand the code
27 in this file. */
28
29 #ifdef emacs
30 #include <config.h>
31 #endif
32
33 #include <stdio.h>
34
35 #ifdef emacs
36
37 #include <sys/types.h>
38 #include "lisp.h"
39 #include "character.h"
40 #include "buffer.h"
41 #include "charset.h"
42 #include "composite.h"
43 #include "disptab.h"
44
45 #else /* not emacs */
46
47 #include "mulelib.h"
48
49 #endif /* emacs */
50
51 Lisp_Object Qcharacterp;
52
53 /* Vector of translation table ever defined.
54 ID of a translation table is used to index this vector. */
55 Lisp_Object Vtranslation_table_vector;
56
57 /* A char-table for characters which may invoke auto-filling. */
58 Lisp_Object Vauto_fill_chars;
59
60 Lisp_Object Qauto_fill_chars;
61
62 Lisp_Object Vchar_unify_table;
63
64 /* A char-table. An element is non-nil iff the corresponding
65 character has a printable glyph. */
66 Lisp_Object Vprintable_chars;
67
68 /* A char-table. An elemnent is a column-width of the corresponding
69 character. */
70 Lisp_Object Vchar_width_table;
71
72 /* A char-table. An element is a symbol indicating the direction
73 property of corresponding character. */
74 Lisp_Object Vchar_direction_table;
75
76 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
77 unsigned char *_fetch_multibyte_char_p;
78 int _fetch_multibyte_char_len;
79
80 /* Char table of scripts. */
81 Lisp_Object Vchar_script_table;
82
83 static Lisp_Object Qchar_script_table;
84
85
86 \f
87
88 int
89 char_string_with_unification (c, p)
90 int c;
91 unsigned char *p;
92 {
93 int bytes;
94
95 MAYBE_UNIFY_CHAR (c);
96
97 if (c <= MAX_3_BYTE_CHAR || c > MAX_5_BYTE_CHAR)
98 {
99 bytes = CHAR_STRING (c, p);
100 }
101 else if (c <= MAX_4_BYTE_CHAR)
102 {
103 p[0] = (0xF0 | (c >> 18));
104 p[1] = (0x80 | ((c >> 12) & 0x3F));
105 p[2] = (0x80 | ((c >> 6) & 0x3F));
106 p[3] = (0x80 | (c & 0x3F));
107 bytes = 4;
108 }
109 else
110 {
111 p[0] = 0xF8;
112 p[1] = (0x80 | ((c >> 18) & 0x0F));
113 p[2] = (0x80 | ((c >> 12) & 0x3F));
114 p[3] = (0x80 | ((c >> 6) & 0x3F));
115 p[4] = (0x80 | (c & 0x3F));
116 bytes = 5;
117 }
118
119 return bytes;
120 }
121
122
123 int
124 string_char_with_unification (p, advanced, len)
125 const unsigned char *p;
126 const unsigned char **advanced;
127 int *len;
128 {
129 int c;
130 const unsigned char *saved_p = p;
131
132 if (*p < 0x80 || ! (*p & 0x20) || ! (*p & 0x10))
133 {
134 c = STRING_CHAR_ADVANCE (p);
135 }
136 else if (! (*p & 0x08))
137 {
138 c = ((((p)[0] & 0xF) << 18)
139 | (((p)[1] & 0x3F) << 12)
140 | (((p)[2] & 0x3F) << 6)
141 | ((p)[3] & 0x3F));
142 p += 4;
143 }
144 else
145 {
146 c = ((((p)[1] & 0x3F) << 18)
147 | (((p)[2] & 0x3F) << 12)
148 | (((p)[3] & 0x3F) << 6)
149 | ((p)[4] & 0x3F));
150 p += 5;
151 }
152
153 MAYBE_UNIFY_CHAR (c);
154
155 if (len)
156 *len = p - saved_p;
157 if (advanced)
158 *advanced = p;
159 return c;
160 }
161
162
163 /* Translate character C by translation table TABLE. If C is
164 negative, translate a character specified by CHARSET and CODE. If
165 no translation is found in TABLE, return the untranslated
166 character. */
167
168 int
169 translate_char (table, c)
170 Lisp_Object table;
171 int c;
172 {
173 Lisp_Object ch;
174
175 if (! CHAR_TABLE_P (table))
176 return c;
177 ch = CHAR_TABLE_REF (table, c);
178 if (! CHARACTERP (ch))
179 return c;
180 return XINT (ch);
181 }
182
183 /* Convert the unibyte character C to the corresponding multibyte
184 character based on the current value of charset_unibyte. If C
185 can't be converted, return C. */
186
187 int
188 unibyte_char_to_multibyte (c)
189 int c;
190 {
191 struct charset *charset = CHARSET_FROM_ID (charset_unibyte);
192 int c1 = DECODE_CHAR (charset, c);
193
194 return ((c1 >= 0) ? c1 : c);
195 }
196
197
198 /* Convert the multibyte character C to unibyte 8-bit character based
199 on the current value of charset_unibyte. If dimension of
200 charset_unibyte is more than one, return (C & 0xFF).
201
202 The argument REV_TBL is now ignored. It will be removed in the
203 future. */
204
205 int
206 multibyte_char_to_unibyte (c, rev_tbl)
207 int c;
208 Lisp_Object rev_tbl;
209 {
210 struct charset *charset = CHARSET_FROM_ID (charset_unibyte);
211 unsigned c1 = ENCODE_CHAR (charset, c);
212
213 return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : c & 0xFF);
214 }
215
216
217 DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0,
218 doc: /* Return non-nil if OBJECT is a character. */)
219 (object, ignore)
220 Lisp_Object object, ignore;
221 {
222 return (CHARACTERP (object) ? Qt : Qnil);
223 }
224
225 DEFUN ("max-char", Fmax_char, Smax_char, 0, 0, 0,
226 doc: /* Return the character of the maximum code. */)
227 ()
228 {
229 return make_number (MAX_CHAR);
230 }
231
232 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
233 Sunibyte_char_to_multibyte, 1, 1, 0,
234 doc: /* Convert the unibyte character CH to multibyte character.
235 The multibyte character is a result of decoding CH by
236 the current primary charset (value of `charset-primary'). */)
237 (ch)
238 Lisp_Object ch;
239 {
240 int c;
241 struct charset *charset;
242
243 CHECK_CHARACTER (ch);
244 c = XFASTINT (ch);
245 if (c >= 0400)
246 error ("Invalid unibyte character: %d", c);
247 charset = CHARSET_FROM_ID (charset_unibyte);
248 c = DECODE_CHAR (charset, c);
249 if (c < 0)
250 c = BYTE8_TO_CHAR (XFASTINT (ch));
251 return make_number (c);
252 }
253
254 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
255 Smultibyte_char_to_unibyte, 1, 1, 0,
256 doc: /* Convert the multibyte character CH to unibyte character.\n\
257 The unibyte character is a result of encoding CH by
258 the current primary charset (value of `charset-primary'). */)
259 (ch)
260 Lisp_Object ch;
261 {
262 int c;
263
264 CHECK_CHARACTER (ch);
265 c = XFASTINT (ch);
266 c = CHAR_TO_BYTE8 (c);
267 return make_number (c);
268 }
269
270 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
271 doc: /* Return 1 regardless of the argument CHAR.
272 This is now an obsolete function. We keep it just for backward compatibility. */)
273 (ch)
274 Lisp_Object ch;
275 {
276 CHECK_CHARACTER (ch);
277 return make_number (1);
278 }
279
280 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
281 doc: /* Return width of CHAR when displayed in the current buffer.
282 The width is measured by how many columns it occupies on the screen.
283 Tab is taken to occupy `tab-width' columns. */)
284 (ch)
285 Lisp_Object ch;
286 {
287 Lisp_Object disp;
288 int c, width;
289 struct Lisp_Char_Table *dp = buffer_display_table ();
290
291 CHECK_CHARACTER (ch);
292 c = XINT (ch);
293
294 /* Get the way the display table would display it. */
295 disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
296
297 if (VECTORP (disp))
298 width = ASIZE (disp);
299 else
300 width = CHAR_WIDTH (c);
301
302 return make_number (width);
303 }
304
305 /* Return width of string STR of length LEN when displayed in the
306 current buffer. The width is measured by how many columns it
307 occupies on the screen. If PRECISION > 0, return the width of
308 longest substring that doesn't exceed PRECISION, and set number of
309 characters and bytes of the substring in *NCHARS and *NBYTES
310 respectively. */
311
312 int
313 c_string_width (str, len, precision, nchars, nbytes)
314 unsigned char *str;
315 int precision, *nchars, *nbytes;
316 {
317 int i = 0, i_byte = 0;
318 int width = 0;
319 struct Lisp_Char_Table *dp = buffer_display_table ();
320
321 while (i_byte < len)
322 {
323 int bytes, thiswidth;
324 Lisp_Object val;
325 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
326
327 if (dp)
328 {
329 val = DISP_CHAR_VECTOR (dp, c);
330 if (VECTORP (val))
331 thiswidth = XVECTOR (val)->size;
332 else
333 thiswidth = CHAR_WIDTH (c);
334 }
335 else
336 {
337 thiswidth = CHAR_WIDTH (c);
338 }
339
340 if (precision > 0
341 && (width + thiswidth > precision))
342 {
343 *nchars = i;
344 *nbytes = i_byte;
345 return width;
346 }
347 i++;
348 i_byte += bytes;
349 width += thiswidth;
350 }
351
352 if (precision > 0)
353 {
354 *nchars = i;
355 *nbytes = i_byte;
356 }
357
358 return width;
359 }
360
361 /* Return width of string STR of length LEN when displayed in the
362 current buffer. The width is measured by how many columns it
363 occupies on the screen. */
364
365 int
366 strwidth (str, len)
367 unsigned char *str;
368 int len;
369 {
370 return c_string_width (str, len, -1, NULL, NULL);
371 }
372
373 /* Return width of Lisp string STRING when displayed in the current
374 buffer. The width is measured by how many columns it occupies on
375 the screen while paying attention to compositions. If PRECISION >
376 0, return the width of longest substring that doesn't exceed
377 PRECISION, and set number of characters and bytes of the substring
378 in *NCHARS and *NBYTES respectively. */
379
380 int
381 lisp_string_width (string, precision, nchars, nbytes)
382 Lisp_Object string;
383 int precision, *nchars, *nbytes;
384 {
385 int len = XSTRING (string)->size;
386 unsigned char *str = XSTRING (string)->data;
387 int i = 0, i_byte = 0;
388 int width = 0;
389 struct Lisp_Char_Table *dp = buffer_display_table ();
390
391 while (i < len)
392 {
393 int chars, bytes, thiswidth;
394 Lisp_Object val;
395 int cmp_id;
396 int ignore, end;
397
398 if (find_composition (i, -1, &ignore, &end, &val, string)
399 && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
400 >= 0))
401 {
402 thiswidth = composition_table[cmp_id]->width;
403 chars = end - i;
404 bytes = string_char_to_byte (string, end) - i_byte;
405 }
406 else if (dp)
407 {
408 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
409
410 chars = 1;
411 val = DISP_CHAR_VECTOR (dp, c);
412 if (VECTORP (val))
413 thiswidth = XVECTOR (val)->size;
414 else
415 thiswidth = CHAR_WIDTH (c);
416 }
417 else
418 {
419 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
420
421 chars = 1;
422 thiswidth = CHAR_WIDTH (c);
423 }
424
425 if (precision > 0
426 && (width + thiswidth > precision))
427 {
428 *nchars = i;
429 *nbytes = i_byte;
430 return width;
431 }
432 i += chars;
433 i_byte += bytes;
434 width += thiswidth;
435 }
436
437 if (precision > 0)
438 {
439 *nchars = i;
440 *nbytes = i_byte;
441 }
442
443 return width;
444 }
445
446 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
447 doc: /* Return width of STRING when displayed in the current buffer.
448 Width is measured by how many columns it occupies on the screen.
449 When calculating width of a multibyte character in STRING,
450 only the base leading-code is considered; the validity of
451 the following bytes is not checked. Tabs in STRING are always
452 taken to occupy `tab-width' columns. */)
453 (str)
454 Lisp_Object str;
455 {
456 Lisp_Object val;
457
458 CHECK_STRING (str);
459 XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
460 return val;
461 }
462
463 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
464 doc: /* Return the direction of CHAR.
465 The returned value is 0 for left-to-right and 1 for right-to-left. */)
466 (ch)
467 Lisp_Object ch;
468 {
469 int c;
470
471 CHECK_CHARACTER (ch);
472 c = XINT (ch);
473 return CHAR_TABLE_REF (Vchar_direction_table, c);
474 }
475
476 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
477 doc: /* Return number of characters between BEG and END.
478 This is now an obsolete function. We keep it just for backward compatibility. */)
479 (beg, end)
480 Lisp_Object beg, end;
481 {
482 int from, to;
483
484 CHECK_NUMBER_COERCE_MARKER (beg);
485 CHECK_NUMBER_COERCE_MARKER (end);
486
487 from = min (XFASTINT (beg), XFASTINT (end));
488 to = max (XFASTINT (beg), XFASTINT (end));
489
490 return make_number (to - from);
491 }
492
493 /* Return the number of characters in the NBYTES bytes at PTR.
494 This works by looking at the contents and checking for multibyte
495 sequences while assuming that there's no invalid sequence.
496 However, if the current buffer has enable-multibyte-characters =
497 nil, we treat each byte as a character. */
498
499 int
500 chars_in_text (ptr, nbytes)
501 unsigned char *ptr;
502 int nbytes;
503 {
504 /* current_buffer is null at early stages of Emacs initialization. */
505 if (current_buffer == 0
506 || NILP (current_buffer->enable_multibyte_characters))
507 return nbytes;
508
509 return multibyte_chars_in_text (ptr, nbytes);
510 }
511
512 /* Return the number of characters in the NBYTES bytes at PTR.
513 This works by looking at the contents and checking for multibyte
514 sequences while assuming that there's no invalid sequence. It
515 ignores enable-multibyte-characters. */
516
517 int
518 multibyte_chars_in_text (ptr, nbytes)
519 unsigned char *ptr;
520 int nbytes;
521 {
522 unsigned char *endp = ptr + nbytes;
523 int chars = 0;
524
525 while (ptr < endp)
526 {
527 int len = MULTIBYTE_LENGTH (ptr, endp);
528
529 if (len == 0)
530 abort ();
531 ptr += len;
532 chars++;
533 }
534
535 return chars;
536 }
537
538 /* Parse unibyte text at STR of LEN bytes as a multibyte text, count
539 characters and bytes in it, and store them in *NCHARS and *NBYTES
540 respectively. On counting bytes, pay attention to that 8-bit
541 characters not constructing a valid multibyte sequence are
542 represented by 2-byte in a multibyte text. */
543
544 void
545 parse_str_as_multibyte (str, len, nchars, nbytes)
546 unsigned char *str;
547 int len, *nchars, *nbytes;
548 {
549 unsigned char *endp = str + len;
550 int n, chars = 0, bytes = 0;
551
552 if (len >= MAX_MULTIBYTE_LENGTH)
553 {
554 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
555 while (str < adjusted_endp)
556 {
557 if ((n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0)
558 str += n, bytes += n;
559 else
560 str++, bytes += 2;
561 chars++;
562 }
563 }
564 while (str < endp)
565 {
566 if ((n = MULTIBYTE_LENGTH (str, endp)) > 0)
567 str += n, bytes += n;
568 else
569 str++, bytes += 2;
570 chars++;
571 }
572
573 *nchars = chars;
574 *nbytes = bytes;
575 return;
576 }
577
578 /* Arrange unibyte text at STR of NBYTES bytes as a multibyte text.
579 It actually converts only such 8-bit characters that don't contruct
580 a multibyte sequence to multibyte forms of Latin-1 characters. If
581 NCHARS is nonzero, set *NCHARS to the number of characters in the
582 text. It is assured that we can use LEN bytes at STR as a work
583 area and that is enough. Return the number of bytes of the
584 resulting text. */
585
586 int
587 str_as_multibyte (str, len, nbytes, nchars)
588 unsigned char *str;
589 int len, nbytes, *nchars;
590 {
591 unsigned char *p = str, *endp = str + nbytes;
592 unsigned char *to;
593 int chars = 0;
594 int n;
595
596 if (nbytes >= MAX_MULTIBYTE_LENGTH)
597 {
598 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
599 while (p < adjusted_endp
600 && (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
601 p += n, chars++;
602 }
603 while ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
604 p += n, chars++;
605 if (nchars)
606 *nchars = chars;
607 if (p == endp)
608 return nbytes;
609
610 to = p;
611 nbytes = endp - p;
612 endp = str + len;
613 safe_bcopy ((char *) p, (char *) (endp - nbytes), nbytes);
614 p = endp - nbytes;
615
616 if (nbytes >= MAX_MULTIBYTE_LENGTH)
617 {
618 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
619 while (p < adjusted_endp)
620 {
621 if ((n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
622 {
623 while (n--)
624 *to++ = *p++;
625 }
626 else
627 {
628 int c = *p++;
629 c = BYTE8_TO_CHAR (c);
630 to += CHAR_STRING (c, to);
631 }
632 }
633 chars++;
634 }
635 while (p < endp)
636 {
637 if ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
638 {
639 while (n--)
640 *to++ = *p++;
641 }
642 else
643 {
644 int c = *p++;
645 c = BYTE8_TO_CHAR (c);
646 to += CHAR_STRING (c, to);
647 }
648 chars++;
649 }
650 if (nchars)
651 *nchars = chars;
652 return (to - str);
653 }
654
655 /* Parse unibyte string at STR of LEN bytes, and return the number of
656 bytes it may ocupy when converted to multibyte string by
657 `str_to_multibyte'. */
658
659 int
660 parse_str_to_multibyte (str, len)
661 unsigned char *str;
662 int len;
663 {
664 unsigned char *endp = str + len;
665 int bytes;
666
667 for (bytes = 0; str < endp; str++)
668 bytes += (*str < 0x80) ? 1 : 2;
669 return bytes;
670 }
671
672
673 /* Convert unibyte text at STR of NBYTES bytes to a multibyte text
674 that contains the same single-byte characters. It actually
675 converts all 8-bit characters to multibyte forms. It is assured
676 that we can use LEN bytes at STR as a work area and that is
677 enough. */
678
679 int
680 str_to_multibyte (str, len, bytes)
681 unsigned char *str;
682 int len, bytes;
683 {
684 unsigned char *p = str, *endp = str + bytes;
685 unsigned char *to;
686
687 while (p < endp && *p < 0x80) p++;
688 if (p == endp)
689 return bytes;
690 to = p;
691 bytes = endp - p;
692 endp = str + len;
693 safe_bcopy ((char *) p, (char *) (endp - bytes), bytes);
694 p = endp - bytes;
695 while (p < endp)
696 {
697 int c = *p++;
698
699 if (c >= 0x80)
700 c = BYTE8_TO_CHAR (c);
701 to += CHAR_STRING (c, to);
702 }
703 return (to - str);
704 }
705
706 /* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
707 actually converts characters in the range 0x80..0xFF to
708 unibyte. */
709
710 int
711 str_as_unibyte (str, bytes)
712 unsigned char *str;
713 int bytes;
714 {
715 const unsigned char *p = str, *endp = str + bytes;
716 unsigned char *to;
717 int c, len;
718
719 while (p < endp)
720 {
721 c = *p;
722 len = BYTES_BY_CHAR_HEAD (c);
723 if (CHAR_BYTE8_HEAD_P (c))
724 break;
725 p += len;
726 }
727 to = str + (p - str);
728 while (p < endp)
729 {
730 c = *p;
731 len = BYTES_BY_CHAR_HEAD (c);
732 if (CHAR_BYTE8_HEAD_P (c))
733 {
734 c = STRING_CHAR_ADVANCE (p);
735 *to++ = CHAR_TO_BYTE8 (c);
736 }
737 else
738 {
739 while (len--) *to++ = *p++;
740 }
741 }
742 return (to - str);
743 }
744
745 int
746 string_count_byte8 (string)
747 Lisp_Object string;
748 {
749 int multibyte = STRING_MULTIBYTE (string);
750 int nbytes = STRING_BYTES (XSTRING (string));
751 unsigned char *p = XSTRING (string)->data;
752 unsigned char *pend = p + nbytes;
753 int count = 0;
754 int c, len;
755
756 if (multibyte)
757 while (p < pend)
758 {
759 c = *p;
760 len = BYTES_BY_CHAR_HEAD (c);
761
762 if (CHAR_BYTE8_HEAD_P (c))
763 count++;
764 p += len;
765 }
766 else
767 while (p < pend)
768 {
769 if (*p++ >= 0x80)
770 count++;
771 }
772 return count;
773 }
774
775
776 Lisp_Object
777 string_escape_byte8 (string)
778 Lisp_Object string;
779 {
780 int nchars = XSTRING (string)->size;
781 int nbytes = STRING_BYTES (XSTRING (string));
782 int multibyte = STRING_MULTIBYTE (string);
783 int byte8_count;
784 const unsigned char *src, *src_end;
785 unsigned char *dst;
786 Lisp_Object val;
787 int c, len;
788
789 if (multibyte && nchars == nbytes)
790 return string;
791
792 byte8_count = string_count_byte8 (string);
793
794 if (byte8_count == 0)
795 return string;
796
797 if (multibyte)
798 /* Convert 2-byte sequence of byte8 chars to 4-byte octal. */
799 val = make_uninit_multibyte_string (nchars + byte8_count * 3,
800 nbytes + byte8_count * 2);
801 else
802 /* Convert 1-byte sequence of byte8 chars to 4-byte octal. */
803 val = make_uninit_string (nbytes + byte8_count * 3);
804
805 src = XSTRING (string)->data;
806 src_end = src + nbytes;
807 dst = XSTRING (val)->data;
808 if (multibyte)
809 while (src < src_end)
810 {
811 c = *src;
812 len = BYTES_BY_CHAR_HEAD (c);
813
814 if (CHAR_BYTE8_HEAD_P (c))
815 {
816 c = STRING_CHAR_ADVANCE (src);
817 c = CHAR_TO_BYTE8 (c);
818 sprintf ((char *) dst, "\\%03o", c);
819 dst += 4;
820 }
821 else
822 while (len--) *dst++ = *src++;
823 }
824 else
825 while (src < src_end)
826 {
827 c = *src++;
828 if (c >= 0x80)
829 {
830 sprintf ((char *) dst, "\\%03o", c);
831 dst += 4;
832 }
833 else
834 *dst++ = c;
835 }
836 return val;
837 }
838
839 \f
840 DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
841 doc: /*
842 Concatenate all the argument characters and make the result a string.
843 usage: (string &rest CHARACTERS) */)
844 (n, args)
845 int n;
846 Lisp_Object *args;
847 {
848 int i;
849 unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
850 unsigned char *p = buf;
851 int c;
852
853 for (i = 0; i < n; i++)
854 {
855 CHECK_CHARACTER (args[i]);
856 c = XINT (args[i]);
857 p += CHAR_STRING (c, p);
858 }
859
860 return make_string_from_bytes ((char *) buf, n, p - buf);
861 }
862
863 void
864 init_character_once ()
865 {
866 }
867
868 #ifdef emacs
869
870 void
871 syms_of_character ()
872 {
873 DEFSYM (Qcharacterp, "characterp");
874 DEFSYM (Qauto_fill_chars, "auto-fill-chars");
875
876 staticpro (&Vchar_unify_table);
877 Vchar_unify_table = Qnil;
878
879 defsubr (&Smax_char);
880 defsubr (&Scharacterp);
881 defsubr (&Sunibyte_char_to_multibyte);
882 defsubr (&Smultibyte_char_to_unibyte);
883 defsubr (&Schar_bytes);
884 defsubr (&Schar_width);
885 defsubr (&Sstring_width);
886 defsubr (&Schar_direction);
887 defsubr (&Schars_in_region);
888 defsubr (&Sstring);
889
890 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
891 doc: /*
892 Vector recording all translation tables ever defined.
893 Each element is a pair (SYMBOL . TABLE) relating the table to the
894 symbol naming it. The ID of a translation table is an index into this vector. */);
895 Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
896
897 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
898 doc: /*
899 A char-table for characters which invoke auto-filling.
900 Such characters have value t in this table. */);
901 Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
902 CHAR_TABLE_SET (Vauto_fill_chars, ' ', Qt);
903 CHAR_TABLE_SET (Vauto_fill_chars, '\n', Qt);
904
905 DEFVAR_LISP ("char-width-table", &Vchar_width_table,
906 doc: /*
907 A char-table for width (columns) of each character. */);
908 Vchar_width_table = Fmake_char_table (Qnil, make_number (1));
909 char_table_set_range (Vchar_width_table, 0x80, 0x9F, make_number (4));
910 char_table_set_range (Vchar_width_table, MAX_5_BYTE_CHAR + 1, MAX_CHAR,
911 make_number (4));
912
913 DEFVAR_LISP ("char-direction-table", &Vchar_direction_table,
914 doc: /* A char-table for direction of each character. */);
915 Vchar_direction_table = Fmake_char_table (Qnil, make_number (1));
916
917 DEFVAR_LISP ("printable-chars", &Vprintable_chars,
918 doc: /* A char-table for each printable character. */);
919 Vprintable_chars = Fmake_char_table (Qnil, Qnil);
920
921 DEFVAR_LISP ("char-script-table", &Vchar_script_table,
922 doc: /* Char table of script symbols.
923 It has one extra slot whose value is a list of script symbols. */);
924
925 /* Intern this now in case it isn't already done.
926 Setting this variable twice is harmless.
927 But don't staticpro it here--that is done in alloc.c. */
928 Qchar_table_extra_slots = intern ("char-table-extra-slots");
929 DEFSYM (Qchar_script_table, "char-script-table");
930 Fput (Qchar_script_table, Qchar_table_extra_slots, make_number (1));
931 Vchar_script_table = Fmake_char_table (Qchar_script_table, Qnil);
932 }
933
934 #endif /* emacs */