1 /* Composite sequence support.
2 Copyright (C) 1999 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001 Free Software Foundation, Inc.
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
9 This file is part of GNU Emacs.
11 GNU Emacs is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2, or (at your option)
16 GNU Emacs is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GNU Emacs; see the file COPYING. If not, write to
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 Boston, MA 02111-1307, USA. */
29 #include "character.h"
30 #include "intervals.h"
32 /* Emacs uses special text property `composition' to support character
33 composition. A sequence of characters that have the same (i.e. eq)
34 `composition' property value is treated as a single composite
35 sequence (we call it just `composition' here after). Characters in
36 a composition are all composed somehow on the screen.
38 The property value has this form when the composition is made:
39 ((LENGTH . COMPONENTS) . MODIFICATION-FUNC)
40 then turns to this form:
41 (COMPOSITION-ID . (LENGTH COMPONENTS-VEC . MODIFICATION-FUNC))
42 when the composition is registered in composition_hash_table and
43 composition_table. These rather peculiar structures were designed
44 to make it easy to distinguish them quickly (we can do that by
45 checking only the first element) and to extract LENGTH (from the
46 former form) and COMPOSITION-ID (from the latter form).
48 We register a composition when it is displayed, or when the width
49 is required (for instance, to calculate columns).
51 LENGTH -- Length of the composition. This information is used to
52 check the validity of the composition.
54 COMPONENTS -- Character, string, vector, list, or nil.
56 If it is nil, characters in the text are composed relatively
57 according to their metrics in font glyphs.
59 If it is a character or a string, the character or characters
60 in the string are composed relatively.
62 If it is a vector or list of integers, the element is a
63 character or an encoded composition rule. The characters are
64 composed according to the rules. (2N)th elements are
65 characters to be composed and (2N+1)th elements are
66 composition rules to tell how to compose (2N+2)th element with
67 the previously composed 2N glyphs.
69 COMPONENTS-VEC -- Vector of integers. In relative composition, the
70 elements are characters to be composed. In rule-base
71 composition, the elements are characters or encoded
74 MODIFICATION-FUNC -- If non nil, it is a function to call when the
75 composition gets invalid after a modification in a buffer. If
76 it is nil, a function in `composition-function-table' of the
77 first character in the sequence is called.
79 COMPOSITION-ID --Identification number of the composition. It is
80 used as an index to composition_table for the composition.
82 When Emacs has to display a composition or has to know its
83 displaying width, the function get_composition_id is called. It
84 returns COMPOSITION-ID so that the caller can access the
85 information about the composition through composition_table. If a
86 COMPOSITION-ID has not yet been assigned to the composition,
87 get_composition_id checks the validity of `composition' property,
88 and, if valid, assigns a new ID, registers the information in
89 composition_hash_table and composition_table, and changes the form
90 of the property value. If the property is invalid, return -1
91 without changing the property value.
93 We use two tables to keep information about composition;
94 composition_hash_table and composition_table.
96 The former is a hash table in which keys are COMPONENTS-VECs and
97 values are the corresponding COMPOSITION-IDs. This hash table is
98 weak, but as each key (COMPONENTS-VEC) is also kept as a value of the
99 `composition' property, it won't be collected as garbage until all
100 bits of text that have the same COMPONENTS-VEC are deleted.
102 The latter is a table of pointers to `struct composition' indexed
103 by COMPOSITION-ID. This structure keeps the other information (see
106 In general, a text property holds information about individual
107 characters. But, a `composition' property holds information about
108 a sequence of characters (in this sense, it is like the `intangible'
109 property). That means that we should not share the property value
110 in adjacent compositions -- we can't distinguish them if they have the
111 same property. So, after any changes, we call
112 `update_compositions' and change a property of one of adjacent
113 compositions to a copy of it. This function also runs a proper
114 composition modification function to make a composition that gets
115 invalid by the change valid again.
117 As the value of the `composition' property holds information about a
118 specific range of text, the value gets invalid if we change the
119 text in the range. We treat the `composition' property as always
120 rear-nonsticky (currently by setting default-text-properties to
121 (rear-nonsticky (composition))) and we never make properties of
122 adjacent compositions identical. Thus, any such changes make the
123 range just shorter. So, we can check the validity of the `composition'
124 property by comparing LENGTH information with the actual length of
130 Lisp_Object Qcomposition
;
132 /* Table of pointers to the structure `composition' indexed by
133 COMPOSITION-ID. This structure is for storing information about
134 each composition except for COMPONENTS-VEC. */
135 struct composition
**composition_table
;
137 /* The current size of `composition_table'. */
138 static int composition_table_size
;
140 /* Number of compositions currently made. */
143 /* Hash table for compositions. The key is COMPONENTS-VEC of
144 `composition' property. The value is the corresponding
146 Lisp_Object composition_hash_table
;
148 /* Function to call to adjust composition. */
149 Lisp_Object Vcompose_chars_after_function
;
151 Lisp_Object Qauto_composed
;
152 Lisp_Object Vauto_composition_function
;
153 Lisp_Object Qauto_composition_function
;
155 EXFUN (Fremove_list_of_text_properties
, 4);
157 /* Temporary variable used in macros COMPOSITION_XXX. */
158 Lisp_Object composition_temp
;
160 /* Return COMPOSITION-ID of a composition at buffer position
161 CHARPOS/BYTEPOS and length NCHARS. The `composition' property of
162 the sequence is PROP. STRING, if non-nil, is a string that
163 contains the composition instead of the current buffer.
165 If the composition is invalid, return -1. */
168 get_composition_id (charpos
, bytepos
, nchars
, prop
, string
)
169 int charpos
, bytepos
, nchars
;
170 Lisp_Object prop
, string
;
172 Lisp_Object id
, length
, components
, key
, *key_contents
;
174 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (composition_hash_table
);
177 struct composition
*cmp
;
181 Form-A: ((LENGTH . COMPONENTS) . MODIFICATION-FUNC)
183 Form-B: (COMPOSITION-ID . (LENGTH COMPONENTS-VEC . MODIFICATION-FUNC))
185 if (nchars
== 0 || !CONSP (prop
))
186 goto invalid_composition
;
191 /* PROP should be Form-B. */
192 if (XINT (id
) < 0 || XINT (id
) >= n_compositions
)
193 goto invalid_composition
;
197 /* PROP should be Form-A.
198 Thus, ID should be (LENGTH . COMPONENTS). */
200 goto invalid_composition
;
202 if (!INTEGERP (length
) || XINT (length
) != nchars
)
203 goto invalid_composition
;
205 components
= XCDR (id
);
207 /* Check if the same composition has already been registered or not
208 by consulting composition_hash_table. The key for this table is
209 COMPONENTS (converted to a vector COMPONENTS-VEC) or, if it is
210 nil, vector of characters in the composition range. */
211 if (INTEGERP (components
))
212 key
= Fmake_vector (make_number (1), components
);
213 else if (STRINGP (components
) || CONSP (components
))
214 key
= Fvconcat (1, &components
);
215 else if (VECTORP (components
))
217 else if (NILP (components
))
219 key
= Fmake_vector (make_number (nchars
), Qnil
);
220 if (STRINGP (string
))
221 for (i
= 0; i
< nchars
; i
++)
223 FETCH_STRING_CHAR_ADVANCE (ch
, string
, charpos
, bytepos
);
224 XVECTOR (key
)->contents
[i
] = make_number (ch
);
227 for (i
= 0; i
< nchars
; i
++)
229 FETCH_CHAR_ADVANCE (ch
, charpos
, bytepos
);
230 XVECTOR (key
)->contents
[i
] = make_number (ch
);
234 goto invalid_composition
;
236 hash_index
= hash_lookup (hash_table
, key
, &hash_code
);
239 /* We have already registered the same composition. Change PROP
240 from Form-A above to Form-B while replacing COMPONENTS with
241 COMPONENTS-VEC stored in the hash table. We can directly
242 modify the cons cell of PROP because it is not shared. */
243 key
= HASH_KEY (hash_table
, hash_index
);
244 id
= HASH_VALUE (hash_table
, hash_index
);
246 XSETCDR (prop
, Fcons (make_number (nchars
), Fcons (key
, XCDR (prop
))));
250 /* This composition is a new one. We must register it. */
252 /* Check if we have sufficient memory to store this information. */
253 if (composition_table_size
== 0)
255 composition_table_size
= 256;
257 = (struct composition
**) xmalloc (sizeof (composition_table
[0])
258 * composition_table_size
);
260 else if (composition_table_size
<= n_compositions
)
262 composition_table_size
+= 256;
264 = (struct composition
**) xrealloc (composition_table
,
265 sizeof (composition_table
[0])
266 * composition_table_size
);
269 key_contents
= XVECTOR (key
)->contents
;
271 /* Check if the contents of COMPONENTS are valid if COMPONENTS is a
272 vector or a list. It should be a sequence of:
273 char1 rule1 char2 rule2 char3 ... ruleN charN+1 */
274 if (VECTORP (components
) || CONSP (components
))
276 int len
= XVECTOR (key
)->size
;
278 /* The number of elements should be odd. */
280 goto invalid_composition
;
281 /* All elements should be integers (character or encoded
282 composition rule). */
283 for (i
= 0; i
< len
; i
++)
285 if (!INTEGERP (key_contents
[i
]))
286 goto invalid_composition
;
290 /* Change PROP from Form-A above to Form-B. We can directly modify
291 the cons cell of PROP because it is not shared. */
292 XSETFASTINT (id
, n_compositions
);
294 XSETCDR (prop
, Fcons (make_number (nchars
), Fcons (key
, XCDR (prop
))));
296 /* Register the composition in composition_hash_table. */
297 hash_index
= hash_put (hash_table
, key
, id
, hash_code
);
299 /* Register the composition in composition_table. */
300 cmp
= (struct composition
*) xmalloc (sizeof (struct composition
));
302 cmp
->method
= (NILP (components
)
303 ? COMPOSITION_RELATIVE
304 : ((INTEGERP (components
) || STRINGP (components
))
305 ? COMPOSITION_WITH_ALTCHARS
306 : COMPOSITION_WITH_RULE_ALTCHARS
));
307 cmp
->hash_index
= hash_index
;
308 glyph_len
= (cmp
->method
== COMPOSITION_WITH_RULE_ALTCHARS
309 ? (XVECTOR (key
)->size
+ 1) / 2
310 : XVECTOR (key
)->size
);
311 cmp
->glyph_len
= glyph_len
;
312 cmp
->offsets
= (short *) xmalloc (sizeof (short) * glyph_len
* 2);
315 /* Calculate the width of overall glyphs of the composition. */
316 if (cmp
->method
!= COMPOSITION_WITH_RULE_ALTCHARS
)
318 /* Relative composition. */
320 for (i
= 0; i
< glyph_len
; i
++)
323 ch
= XINT (key_contents
[i
]);
324 this_width
= CHAR_WIDTH (ch
);
325 if (cmp
->width
< this_width
)
326 cmp
->width
= this_width
;
331 /* Rule-base composition. */
332 float leftmost
= 0.0, rightmost
;
334 ch
= XINT (key_contents
[0]);
335 rightmost
= CHAR_WIDTH (ch
);
337 for (i
= 1; i
< glyph_len
; i
+= 2)
339 int rule
, gref
, nref
;
343 rule
= XINT (key_contents
[i
]);
344 ch
= XINT (key_contents
[i
+ 1]);
345 this_width
= CHAR_WIDTH (ch
);
347 /* A composition rule is specified by an integer value
348 that encodes global and new reference points (GREF and
349 NREF). GREF and NREF are specified by numbers as
357 ---3---4---5--- baseline
361 COMPOSITION_DECODE_RULE (rule
, gref
, nref
);
362 this_left
= (leftmost
363 + (gref
% 3) * (rightmost
- leftmost
) / 2.0
364 - (nref
% 3) * this_width
/ 2.0);
366 if (this_left
< leftmost
)
367 leftmost
= this_left
;
368 if (this_left
+ this_width
> rightmost
)
369 rightmost
= this_left
+ this_width
;
372 cmp
->width
= rightmost
- leftmost
;
373 if (cmp
->width
< (rightmost
- leftmost
))
374 /* To get a ceiling integer value. */
378 composition_table
[n_compositions
] = cmp
;
380 return n_compositions
++;
383 /* Would it be better to remove this `composition' property? */
388 /* Find a composition at or nearest to position POS of OBJECT (buffer
391 OBJECT defaults to the current buffer. If there's a composition at
392 POS, set *START and *END to the start and end of the sequence,
393 *PROP to the `composition' property, and return 1.
395 If there's no composition at POS and LIMIT is negative, return 0.
397 Otherwise, search for a composition forward (LIMIT > POS) or
398 backward (LIMIT < POS). In this case, LIMIT bounds the search.
400 If a composition is found, set *START, *END, and *PROP as above,
401 and return 1, else return 0.
403 This doesn't check the validity of composition. */
406 find_composition (pos
, limit
, start
, end
, prop
, object
)
408 EMACS_INT
*start
, *end
;
409 Lisp_Object
*prop
, object
;
413 if (get_property_and_range (pos
, Qcomposition
, prop
, start
, end
, object
))
416 if (limit
< 0 || limit
== pos
)
419 if (limit
> pos
) /* search forward */
421 val
= Fnext_single_property_change (make_number (pos
), Qcomposition
,
422 object
, make_number (limit
));
427 else /* search backward */
429 if (get_property_and_range (pos
- 1, Qcomposition
, prop
, start
, end
,
432 val
= Fprevious_single_property_change (make_number (pos
), Qcomposition
,
433 object
, make_number (limit
));
439 get_property_and_range (pos
, Qcomposition
, prop
, start
, end
, object
);
443 /* Run a proper function to adjust the composition sitting between
444 FROM and TO with property PROP. */
447 run_composition_function (from
, to
, prop
)
452 EMACS_INT start
, end
;
454 func
= COMPOSITION_MODIFICATION_FUNC (prop
);
455 /* If an invalid composition precedes or follows, try to make them
458 && find_composition (from
- 1, -1, &start
, &end
, &prop
, Qnil
)
459 && !COMPOSITION_VALID_P (start
, end
, prop
))
462 && find_composition (to
, -1, &start
, &end
, &prop
, Qnil
)
463 && !COMPOSITION_VALID_P (start
, end
, prop
))
465 if (!NILP (Ffboundp (func
)))
466 call2 (func
, make_number (from
), make_number (to
));
469 /* Make invalid compositions adjacent to or inside FROM and TO valid.
470 CHECK_MASK is bitwise `or' of mask bits defined by macros
471 CHECK_XXX (see the comment in composite.h).
473 It also resets the text-property `auto-composed' to a proper region
474 so that automatic character composition works correctly later while
475 displaying the region.
477 This function is called when a buffer text is changed. If the
478 change is deletion, FROM == TO. Otherwise, FROM < TO. */
481 update_compositions (from
, to
, check_mask
)
486 EMACS_INT start
, end
;
487 /* The beginning and end of the region to set the property
488 `auto-composed' to nil. */
489 EMACS_INT min_pos
= from
, max_pos
= to
;
491 if (inhibit_modification_hooks
)
494 /* If FROM and TO are not in a valid range, do nothing. */
495 if (! (BEGV
<= from
&& from
<= to
&& to
<= ZV
))
498 if (check_mask
& CHECK_HEAD
)
500 /* FROM should be at composition boundary. But, insertion or
501 deletion will make two compositions adjacent and
502 indistinguishable when they have same (eq) property. To
503 avoid it, in such a case, we change the property of the
504 latter to the copy of it. */
506 && find_composition (from
- 1, -1, &start
, &end
, &prop
, Qnil
))
512 Fput_text_property (make_number (from
), make_number (end
),
514 Fcons (XCAR (prop
), XCDR (prop
)), Qnil
);
515 run_composition_function (start
, end
, prop
);
519 && find_composition (from
, -1, &start
, &from
, &prop
, Qnil
))
523 run_composition_function (start
, from
, prop
);
527 if (check_mask
& CHECK_INSIDE
)
529 /* In this case, we are sure that (check & CHECK_TAIL) is also
530 nonzero. Thus, here we should check only compositions before
533 && find_composition (from
, to
, &start
, &from
, &prop
, Qnil
)
535 run_composition_function (start
, from
, prop
);
538 if (check_mask
& CHECK_TAIL
)
541 && find_composition (to
- 1, -1, &start
, &end
, &prop
, Qnil
))
543 /* TO should be also at composition boundary. But,
544 insertion or deletion will make two compositions adjacent
545 and indistinguishable when they have same (eq) property.
546 To avoid it, in such a case, we change the property of
547 the former to the copy of it. */
550 Fput_text_property (make_number (start
), make_number (to
),
552 Fcons (XCAR (prop
), XCDR (prop
)), Qnil
);
555 run_composition_function (start
, end
, prop
);
558 && find_composition (to
, -1, &start
, &end
, &prop
, Qnil
))
560 run_composition_function (start
, end
, prop
);
564 if (min_pos
< max_pos
)
565 Fremove_list_of_text_properties (make_number (min_pos
),
566 make_number (max_pos
),
567 Fcons (Qauto_composed
, Qnil
), Qnil
);
571 /* Modify composition property values in LIST destructively. LIST is
572 a list as returned from text_property_list. Change values to the
573 top-level copies of them so that none of them are `eq'. */
576 make_composition_value_copy (list
)
579 Lisp_Object plist
, val
;
581 for (; CONSP (list
); list
= XCDR (list
))
583 plist
= XCAR (XCDR (XCDR (XCAR (list
))));
584 while (CONSP (plist
) && CONSP (XCDR (plist
)))
586 if (EQ (XCAR (plist
), Qcomposition
)
587 && (val
= XCAR (XCDR (plist
)), CONSP (val
)))
588 XSETCAR (XCDR (plist
), Fcons (XCAR (val
), XCDR (val
)));
589 plist
= XCDR (XCDR (plist
));
595 /* Make text in the region between START and END a composition that
596 has COMPONENTS and MODIFICATION-FUNC.
598 If STRING is non-nil, then operate on characters contained between
599 indices START and END in STRING. */
602 compose_text (start
, end
, components
, modification_func
, string
)
604 Lisp_Object components
, modification_func
, string
;
608 prop
= Fcons (Fcons (make_number (end
- start
), components
),
610 Fput_text_property (make_number (start
), make_number (end
),
611 Qcomposition
, prop
, string
);
614 /* Emacs Lisp APIs. */
616 DEFUN ("compose-region-internal", Fcompose_region_internal
,
617 Scompose_region_internal
, 2, 4, 0,
618 doc
: /* Internal use only.
620 Compose text in the region between START and END.
621 Optional 3rd and 4th arguments are COMPONENTS and MODIFICATION-FUNC
622 for the composition. See `compose-region' for more detail. */)
623 (start
, end
, components
, mod_func
)
624 Lisp_Object start
, end
, components
, mod_func
;
626 validate_region (&start
, &end
);
627 if (!NILP (components
)
628 && !INTEGERP (components
)
629 && !CONSP (components
)
630 && !STRINGP (components
))
631 CHECK_VECTOR (components
);
633 compose_text (XINT (start
), XINT (end
), components
, mod_func
, Qnil
);
637 DEFUN ("compose-string-internal", Fcompose_string_internal
,
638 Scompose_string_internal
, 3, 5, 0,
639 doc
: /* Internal use only.
641 Compose text between indices START and END of STRING.
642 Optional 4th and 5th arguments are COMPONENTS and MODIFICATION-FUNC
643 for the composition. See `compose-string' for more detail. */)
644 (string
, start
, end
, components
, mod_func
)
645 Lisp_Object string
, start
, end
, components
, mod_func
;
647 CHECK_STRING (string
);
648 CHECK_NUMBER (start
);
651 if (XINT (start
) < 0 ||
652 XINT (start
) > XINT (end
)
653 || XINT (end
) > SCHARS (string
))
654 args_out_of_range (start
, end
);
656 compose_text (XINT (start
), XINT (end
), components
, mod_func
, string
);
660 DEFUN ("find-composition-internal", Ffind_composition_internal
,
661 Sfind_composition_internal
, 4, 4, 0,
662 doc
: /* Internal use only.
664 Return information about composition at or nearest to position POS.
665 See `find-composition' for more detail. */)
666 (pos
, limit
, string
, detail_p
)
667 Lisp_Object pos
, limit
, string
, detail_p
;
669 Lisp_Object prop
, tail
;
670 EMACS_INT start
, end
;
673 CHECK_NUMBER_COERCE_MARKER (pos
);
677 CHECK_NUMBER_COERCE_MARKER (limit
);
685 CHECK_STRING (string
);
686 if (XINT (pos
) < 0 || XINT (pos
) > SCHARS (string
))
687 args_out_of_range (string
, pos
);
691 if (XINT (pos
) < BEGV
|| XINT (pos
) > ZV
)
692 args_out_of_range (Fcurrent_buffer (), pos
);
695 if (!find_composition (start
, end
, &start
, &end
, &prop
, string
))
697 if (!COMPOSITION_VALID_P (start
, end
, prop
))
698 return Fcons (make_number (start
), Fcons (make_number (end
),
699 Fcons (Qnil
, Qnil
)));
701 return Fcons (make_number (start
), Fcons (make_number (end
),
704 if (COMPOSITION_REGISTERD_P (prop
))
705 id
= COMPOSITION_ID (prop
);
708 int start_byte
= (NILP (string
)
709 ? CHAR_TO_BYTE (start
)
710 : string_char_to_byte (string
, start
));
711 id
= get_composition_id (start
, start_byte
, end
- start
, prop
, string
);
716 Lisp_Object components
, relative_p
, mod_func
;
717 enum composition_method method
= COMPOSITION_METHOD (prop
);
718 int width
= composition_table
[id
]->width
;
720 components
= Fcopy_sequence (COMPOSITION_COMPONENTS (prop
));
721 relative_p
= (method
== COMPOSITION_WITH_RULE_ALTCHARS
723 mod_func
= COMPOSITION_MODIFICATION_FUNC (prop
);
724 tail
= Fcons (components
,
727 Fcons (make_number (width
), Qnil
))));
732 return Fcons (make_number (start
), Fcons (make_number (end
), tail
));
739 Qcomposition
= intern ("composition");
740 staticpro (&Qcomposition
);
742 /* Make a hash table for composition. */
745 extern Lisp_Object QCsize
;
749 args
[2] = QCweakness
;
752 args
[5] = make_number (311);
753 composition_hash_table
= Fmake_hash_table (6, args
);
754 staticpro (&composition_hash_table
);
757 /* Text property `composition' should be nonsticky by default. */
758 Vtext_property_default_nonsticky
759 = Fcons (Fcons (Qcomposition
, Qt
), Vtext_property_default_nonsticky
);
761 DEFVAR_LISP ("compose-chars-after-function", &Vcompose_chars_after_function
,
762 doc
: /* Function to adjust composition of buffer text.
764 The function is called with three arguments FROM, TO, and OBJECT.
765 FROM and TO specify the range of text of which composition should be
766 adjusted. OBJECT, if non-nil, is a string that contains the text.
768 This function is called after a text with `composition' property is
769 inserted or deleted to keep `composition' property of buffer text
772 The default value is the function `compose-chars-after'. */);
773 Vcompose_chars_after_function
= intern ("compose-chars-after");
775 Qauto_composed
= intern ("auto-composed");
776 staticpro (&Qauto_composed
);
778 Qauto_composition_function
= intern ("auto-composition-function");
779 staticpro (&Qauto_composition_function
);
781 DEFVAR_LISP ("auto-composition-function", &Vauto_composition_function
,
782 doc
: /* Function to call to compose characters automatically.
783 The function is called from the display routine with two arguments,
786 If STRING is nil, the function must compose characters following POS
787 in the current buffer.
789 Otherwise, STRING is a string, and POS is an index to the string. In
790 this case, the function must compose characters following POS in
792 Vauto_composition_function
= Qnil
;
794 defsubr (&Scompose_region_internal
);
795 defsubr (&Scompose_string_internal
);
796 defsubr (&Sfind_composition_internal
);