1 /* Composite sequence support.
2 Copyright (C) 1999 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001 Free Software Foundation, Inc.
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
9 This file is part of GNU Emacs.
11 GNU Emacs is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2, or (at your option)
16 GNU Emacs is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GNU Emacs; see the file COPYING. If not, write to
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 Boston, MA 02111-1307, USA. */
29 #include "character.h"
30 #include "intervals.h"
32 /* Emacs uses special text property `composition' to support character
33 composition. A sequence of characters that have the same (i.e. eq)
34 `composition' property value is treated as a single composite
35 sequence (we call it just `composition' here after). Characters in
36 a composition are all composed somehow on the screen.
38 The property value has this form when the composition is made:
39 ((LENGTH . COMPONENTS) . MODIFICATION-FUNC)
40 then turns to this form:
41 (COMPOSITION-ID . (LENGTH COMPONENTS-VEC . MODIFICATION-FUNC))
42 when the composition is registered in composition_hash_table and
43 composition_table. These rather peculiar structures were designed
44 to make it easy to distinguish them quickly (we can do that by
45 checking only the first element) and to extract LENGTH (from the
46 former form) and COMPOSITION-ID (from the latter form).
48 We register a composition when it is displayed, or when the width
49 is required (for instance, to calculate columns).
51 LENGTH -- Length of the composition. This information is used to
52 check the validity of the composition.
54 COMPONENTS -- Character, string, vector, list, or nil.
56 If it is nil, characters in the text are composed relatively
57 according to their metrics in font glyphs.
59 If it is a character or a string, the character or characters
60 in the string are composed relatively.
62 If it is a vector or list of integers, the element is a
63 character or an encoded composition rule. The characters are
64 composed according to the rules. (2N)th elements are
65 characters to be composed and (2N+1)th elements are
66 composition rules to tell how to compose (2N+2)th element with
67 the previously composed 2N glyphs.
69 COMPONENTS-VEC -- Vector of integers. In relative composition, the
70 elements are characters to be composed. In rule-base
71 composition, the elements are characters or encoded
74 MODIFICATION-FUNC -- If non nil, it is a function to call when the
75 composition gets invalid after a modification in a buffer. If
76 it is nil, a function in `composition-function-table' of the
77 first character in the sequence is called.
79 COMPOSITION-ID --Identification number of the composition. It is
80 used as an index to composition_table for the composition.
82 When Emacs has to display a composition or has to know its
83 displaying width, the function get_composition_id is called. It
84 returns COMPOSITION-ID so that the caller can access the
85 information about the composition through composition_table. If a
86 COMPOSITION-ID has not yet been assigned to the composition,
87 get_composition_id checks the validity of `composition' property,
88 and, if valid, assigns a new ID, registers the information in
89 composition_hash_table and composition_table, and changes the form
90 of the property value. If the property is invalid, return -1
91 without changing the property value.
93 We use two tables to keep information about composition;
94 composition_hash_table and composition_table.
96 The former is a hash table in which keys are COMPONENTS-VECs and
97 values are the corresponding COMPOSITION-IDs. This hash table is
98 weak, but as each key (COMPONENTS-VEC) is also kept as a value of
99 `composition' property, it won't be collected as garbage until all
100 text that have the same COMPONENTS-VEC are deleted.
102 The latter is a table of pointers to `struct composition' indexed
103 by COMPOSITION-ID. This structure keep the other information (see
106 In general, a text property holds information about individual
107 characters. But, a `composition' property holds information about
108 a sequence of characters (in this sense, it is like `intangible'
109 property). That means that we should not share the property value
110 in adjacent compositions we can't distinguish them if they have the
111 same property. So, after any changes, we call
112 `update_compositions' and change a property of one of adjacent
113 compositions to a copy of it. This function also runs a proper
114 composition modification function to make a composition that gets
115 invalid by the change valid again.
117 As a value of `composition' property holds information about a
118 specific range of text, the value gets invalid if we change the
119 text in the range. We treat `composition' property always
120 rear-nonsticky (currently by setting default-text-properties to
121 (rear-nonsticky (composition))) and we never make properties of
122 adjacent compositions identical. Thus, any such changes make the
123 range just shorter. So, we can check the validity of `composition'
124 property by comparing LENGTH information with the actual length of
130 Lisp_Object Qcomposition
;
132 /* Table of pointers to the structure `composition' indexed by
133 COMPOSITION-ID. This structure is for storing information about
134 each composition except for COMPONENTS-VEC. */
135 struct composition
**composition_table
;
137 /* The current size of `composition_table'. */
138 static int composition_table_size
;
140 /* Number of compositions currently made. */
143 /* Hash table for compositions. The key is COMPONENTS-VEC of
144 `composition' property. The value is the corresponding
146 Lisp_Object composition_hash_table
;
148 /* Function to call to adjust composition. */
149 Lisp_Object Vcompose_chars_after_function
;
151 Lisp_Object Qauto_composed
;
152 Lisp_Object Vauto_composition_function
;
153 Lisp_Object Qauto_composition_function
;
155 /* Temporary variable used in macros COMPOSITION_XXX. */
156 Lisp_Object composition_temp
;
158 /* Return COMPOSITION-ID of a composition at buffer position
159 CHARPOS/BYTEPOS and length NCHARS. The `composition' property of
160 the sequence is PROP. STRING, if non-nil, is a string that
161 contains the composition instead of the current buffer.
163 If the composition is invalid, return -1. */
166 get_composition_id (charpos
, bytepos
, nchars
, prop
, string
)
167 int charpos
, bytepos
, nchars
;
168 Lisp_Object prop
, string
;
170 Lisp_Object id
, length
, components
, key
, *key_contents
;
172 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (composition_hash_table
);
175 struct composition
*cmp
;
179 Form-A: ((LENGTH . COMPONENTS) . MODIFICATION-FUNC)
181 Form-B: (COMPOSITION-ID . (LENGTH COMPONENTS-VEC . MODIFICATION-FUNC))
183 if (nchars
== 0 || !CONSP (prop
))
184 goto invalid_composition
;
189 /* PROP should be Form-B. */
190 if (XINT (id
) < 0 || XINT (id
) >= n_compositions
)
191 goto invalid_composition
;
195 /* PROP should be Form-A.
196 Thus, ID should be (LENGTH . COMPONENTS). */
198 goto invalid_composition
;
200 if (!INTEGERP (length
) || XINT (length
) != nchars
)
201 goto invalid_composition
;
203 components
= XCDR (id
);
205 /* Check if the same composition has already been registered or not
206 by consulting composition_hash_table. The key for this table is
207 COMPONENTS (converted to a vector COMPONENTS-VEC) or, if it is
208 nil, vector of characters in the composition range. */
209 if (INTEGERP (components
))
210 key
= Fmake_vector (make_number (1), components
);
211 else if (STRINGP (components
) || CONSP (components
))
212 key
= Fvconcat (1, &components
);
213 else if (VECTORP (components
))
215 else if (NILP (components
))
217 key
= Fmake_vector (make_number (nchars
), Qnil
);
218 if (STRINGP (string
))
219 for (i
= 0; i
< nchars
; i
++)
221 FETCH_STRING_CHAR_ADVANCE (ch
, string
, charpos
, bytepos
);
222 XVECTOR (key
)->contents
[i
] = make_number (ch
);
225 for (i
= 0; i
< nchars
; i
++)
227 FETCH_CHAR_ADVANCE (ch
, charpos
, bytepos
);
228 XVECTOR (key
)->contents
[i
] = make_number (ch
);
232 goto invalid_composition
;
234 hash_index
= hash_lookup (hash_table
, key
, &hash_code
);
237 /* We have already registered the same composition. Change PROP
238 from Form-A above to Form-B while replacing COMPONENTS with
239 COMPONENTS-VEC stored in the hash table. We can directly
240 modify the cons cell of PROP because it is not shared. */
241 key
= HASH_KEY (hash_table
, hash_index
);
242 id
= HASH_VALUE (hash_table
, hash_index
);
244 XSETCDR (prop
, Fcons (make_number (nchars
), Fcons (key
, XCDR (prop
))));
248 /* This composition is a new one. We must register it. */
250 /* Check if we have sufficient memory to store this information. */
251 if (composition_table_size
== 0)
253 composition_table_size
= 256;
255 = (struct composition
**) xmalloc (sizeof (composition_table
[0])
256 * composition_table_size
);
258 else if (composition_table_size
<= n_compositions
)
260 composition_table_size
+= 256;
262 = (struct composition
**) xrealloc (composition_table
,
263 sizeof (composition_table
[0])
264 * composition_table_size
);
267 key_contents
= XVECTOR (key
)->contents
;
269 /* Check if the contents of COMPONENTS are valid if COMPONENTS is a
270 vector or a list. It should be a sequence of:
271 char1 rule1 char2 rule2 char3 ... ruleN charN+1 */
272 if (VECTORP (components
) || CONSP (components
))
274 int len
= XVECTOR (key
)->size
;
276 /* The number of elements should be odd. */
278 goto invalid_composition
;
279 /* All elements should be integers (character or encoded
280 composition rule). */
281 for (i
= 0; i
< len
; i
++)
283 if (!INTEGERP (key_contents
[i
]))
284 goto invalid_composition
;
288 /* Change PROP from Form-A above to Form-B. We can directly modify
289 the cons cell of PROP because it is not shared. */
290 XSETFASTINT (id
, n_compositions
);
292 XSETCDR (prop
, Fcons (make_number (nchars
), Fcons (key
, XCDR (prop
))));
294 /* Register the composition in composition_hash_table. */
295 hash_index
= hash_put (hash_table
, key
, id
, hash_code
);
297 /* Register the composition in composition_table. */
298 cmp
= (struct composition
*) xmalloc (sizeof (struct composition
));
300 cmp
->method
= (NILP (components
)
301 ? COMPOSITION_RELATIVE
302 : ((INTEGERP (components
) || STRINGP (components
))
303 ? COMPOSITION_WITH_ALTCHARS
304 : COMPOSITION_WITH_RULE_ALTCHARS
));
305 cmp
->hash_index
= hash_index
;
306 glyph_len
= (cmp
->method
== COMPOSITION_WITH_RULE_ALTCHARS
307 ? (XVECTOR (key
)->size
+ 1) / 2
308 : XVECTOR (key
)->size
);
309 cmp
->glyph_len
= glyph_len
;
310 cmp
->offsets
= (short *) xmalloc (sizeof (short) * glyph_len
* 2);
313 /* Calculate the width of overall glyphs of the composition. */
314 if (cmp
->method
!= COMPOSITION_WITH_RULE_ALTCHARS
)
316 /* Relative composition. */
318 for (i
= 0; i
< glyph_len
; i
++)
321 ch
= XINT (key_contents
[i
]);
322 this_width
= CHAR_WIDTH (ch
);
323 if (cmp
->width
< this_width
)
324 cmp
->width
= this_width
;
329 /* Rule-base composition. */
330 float leftmost
= 0.0, rightmost
;
332 ch
= XINT (key_contents
[0]);
333 rightmost
= CHAR_WIDTH (ch
);
335 for (i
= 1; i
< glyph_len
; i
+= 2)
337 int rule
, gref
, nref
;
341 rule
= XINT (key_contents
[i
]);
342 ch
= XINT (key_contents
[i
+ 1]);
343 this_width
= CHAR_WIDTH (ch
);
345 /* A composition rule is specified by an integer value
346 that encodes global and new reference points (GREF and
347 NREF). GREF and NREF are specified by numbers as
355 ---3---4---5--- baseline
359 COMPOSITION_DECODE_RULE (rule
, gref
, nref
);
360 this_left
= (leftmost
361 + (gref
% 3) * (rightmost
- leftmost
) / 2.0
362 - (nref
% 3) * this_width
/ 2.0);
364 if (this_left
< leftmost
)
365 leftmost
= this_left
;
366 if (this_left
+ this_width
> rightmost
)
367 rightmost
= this_left
+ this_width
;
370 cmp
->width
= rightmost
- leftmost
;
371 if (cmp
->width
< (rightmost
- leftmost
))
372 /* To get a ceiling integer value. */
376 composition_table
[n_compositions
] = cmp
;
378 return n_compositions
++;
381 /* Would it be better to remove this `composition' property? */
386 /* Find a composition at or nearest to position POS of OBJECT (buffer
389 OBJECT defaults to the current buffer. If there's a composition at
390 POS, set *START and *END to the start and end of the sequence,
391 *PROP to the `composition' property, and return 1.
393 If there's no composition at POS and LIMIT is negative, return 0.
395 Otherwise, search for a composition forward (LIMIT > POS) or
396 backward (LIMIT < POS). In this case, LIMIT bounds the search.
398 If a composition is found, set *START, *END, and *PROP as above,
399 and return 1, else return 0.
401 This doesn't check the validity of composition. */
404 find_composition (pos
, limit
, start
, end
, prop
, object
)
405 int pos
, limit
, *start
, *end
;
406 Lisp_Object
*prop
, object
;
410 if (get_property_and_range (pos
, Qcomposition
, prop
, start
, end
, object
))
413 if (limit
< 0 || limit
== pos
)
416 if (limit
> pos
) /* search forward */
418 val
= Fnext_single_property_change (make_number (pos
), Qcomposition
,
419 object
, make_number (limit
));
424 else /* search backward */
426 if (get_property_and_range (pos
- 1, Qcomposition
, prop
, start
, end
,
429 val
= Fprevious_single_property_change (make_number (pos
), Qcomposition
,
430 object
, make_number (limit
));
436 get_property_and_range (pos
, Qcomposition
, prop
, start
, end
, object
);
440 /* Run a proper function to adjust the composition sitting between
441 FROM and TO with property PROP. */
444 run_composition_function (from
, to
, prop
)
451 func
= COMPOSITION_MODIFICATION_FUNC (prop
);
452 /* If an invalid composition precedes or follows, try to make them
455 && find_composition (from
- 1, -1, &start
, &end
, &prop
, Qnil
)
456 && !COMPOSITION_VALID_P (start
, end
, prop
))
459 && find_composition (to
, -1, &start
, &end
, &prop
, Qnil
)
460 && !COMPOSITION_VALID_P (start
, end
, prop
))
463 call2 (func
, make_number (from
), make_number (to
));
466 /* Make invalid compositions adjacent to or inside FROM and TO valid.
467 CHECK_MASK is bitwise `or' of mask bits defined by macros
468 CHECK_XXX (see the comment in composite.h).
470 It also reset the text-property `auto-composed' on a proper region
471 so that automatic character composition works correctly later while
472 displaying the region.
474 This function is called when a buffer text is changed. If the
475 change is deletion, FROM == TO. Otherwise, FROM < TO. */
478 update_compositions (from
, to
, check_mask
)
479 int from
, to
, check_mask
;
483 /* The beginning and end of the region to set the property
484 `auto-composed' to nil. */
485 int min_pos
= from
, max_pos
= to
;
487 if (inhibit_modification_hooks
)
490 /* If FROM and TO are not in a valid range, do nothing. */
491 if (! (BEGV
<= from
&& from
<= to
&& to
<= ZV
))
494 if (check_mask
& CHECK_HEAD
)
496 /* FROM should be at composition boundary. But, insertion or
497 deletion will make two compositions adjacent and
498 indistinguishable when they have same (eq) property. To
499 avoid it, in such a case, we change the property of the
500 latter to the copy of it. */
502 && find_composition (from
- 1, -1, &start
, &end
, &prop
, Qnil
))
508 Fput_text_property (make_number (from
), make_number (end
),
510 Fcons (XCAR (prop
), XCDR (prop
)), Qnil
);
511 run_composition_function (start
, end
, prop
);
515 && find_composition (from
, -1, &start
, &from
, &prop
, Qnil
))
519 run_composition_function (start
, from
, prop
);
523 if (check_mask
& CHECK_INSIDE
)
525 /* In this case, we are sure that (check & CHECK_TAIL) is also
526 nonzero. Thus, here we should check only compositions before
529 && find_composition (from
, to
, &start
, &from
, &prop
, Qnil
)
531 run_composition_function (start
, from
, prop
);
534 if (check_mask
& CHECK_TAIL
)
537 && find_composition (to
- 1, -1, &start
, &end
, &prop
, Qnil
))
539 /* TO should be also at composition boundary. But,
540 insertion or deletion will make two compositions adjacent
541 and indistinguishable when they have same (eq) property.
542 To avoid it, in such a case, we change the property of
543 the former to the copy of it. */
546 Fput_text_property (make_number (start
), make_number (to
),
548 Fcons (XCAR (prop
), XCDR (prop
)), Qnil
);
551 run_composition_function (start
, end
, prop
);
554 && find_composition (to
, -1, &start
, &end
, &prop
, Qnil
))
556 run_composition_function (start
, end
, prop
);
561 if (min_pos
< max_pos
)
562 Fput_text_property (make_number (min_pos
), make_number (max_pos
),
563 Qauto_composed
, Qnil
, Qnil
);
567 /* Modify composition property values in LIST destructively. LIST is
568 a list as returned from text_property_list. Change values to the
569 top-level copies of them so that none of them are `eq'. */
572 make_composition_value_copy (list
)
575 Lisp_Object plist
, val
;
577 for (; CONSP (list
); list
= XCDR (list
))
579 plist
= XCAR (XCDR (XCDR (XCAR (list
))));
580 while (CONSP (plist
) && CONSP (XCDR (plist
)))
582 if (EQ (XCAR (plist
), Qcomposition
)
583 && (val
= XCAR (XCDR (plist
)), CONSP (val
)))
584 XSETCAR (XCDR (plist
), Fcons (XCAR (val
), XCDR (val
)));
585 plist
= XCDR (XCDR (plist
));
591 /* Make text in the region between START and END a composition that
592 has COMPONENTS and MODIFICATION-FUNC.
594 If STRING is non-nil, then operate on characters contained between
595 indices START and END in STRING. */
598 compose_text (start
, end
, components
, modification_func
, string
)
600 Lisp_Object components
, modification_func
, string
;
604 prop
= Fcons (Fcons (make_number (end
- start
), components
),
606 Fput_text_property (make_number (start
), make_number (end
),
607 Qcomposition
, prop
, string
);
611 /* Emacs Lisp APIs. */
613 DEFUN ("compose-region-internal", Fcompose_region_internal
,
614 Scompose_region_internal
, 2, 4, 0,
615 doc
: /* Internal use only.
617 Compose text in the region between START and END.
618 Optional 3rd and 4th arguments are COMPONENTS and MODIFICATION-FUNC
619 for the composition. See `compose-region' for more detial. */)
620 (start
, end
, components
, mod_func
)
621 Lisp_Object start
, end
, components
, mod_func
;
623 validate_region (&start
, &end
);
624 if (!NILP (components
)
625 && !INTEGERP (components
)
626 && !CONSP (components
)
627 && !STRINGP (components
))
628 CHECK_VECTOR (components
);
630 compose_text (XINT (start
), XINT (end
), components
, mod_func
, Qnil
);
634 DEFUN ("compose-string-internal", Fcompose_string_internal
,
635 Scompose_string_internal
, 3, 5, 0,
636 doc
: /* Internal use only.
638 Compose text between indices START and END of STRING.
639 Optional 4th and 5th arguments are COMPONENTS and MODIFICATION-FUNC
640 for the composition. See `compose-string' for more detial. */)
641 (string
, start
, end
, components
, mod_func
)
642 Lisp_Object string
, start
, end
, components
, mod_func
;
644 CHECK_STRING (string
);
645 CHECK_NUMBER (start
);
648 if (XINT (start
) < 0 ||
649 XINT (start
) > XINT (end
)
650 || XINT (end
) > XSTRING (string
)->size
)
651 args_out_of_range (start
, end
);
653 compose_text (XINT (start
), XINT (end
), components
, mod_func
, string
);
657 DEFUN ("find-composition-internal", Ffind_composition_internal
,
658 Sfind_composition_internal
, 4, 4, 0,
659 doc
: /* Internal use only.
661 Return information about composition at or nearest to position POS.
662 See `find-composition' for more detail. */)
663 (pos
, limit
, string
, detail_p
)
664 Lisp_Object pos
, limit
, string
, detail_p
;
666 Lisp_Object prop
, tail
;
670 CHECK_NUMBER_COERCE_MARKER (pos
);
674 CHECK_NUMBER_COERCE_MARKER (limit
);
682 CHECK_STRING (string
);
683 if (XINT (pos
) < 0 || XINT (pos
) > XSTRING (string
)->size
)
684 args_out_of_range (string
, pos
);
688 if (XINT (pos
) < BEGV
|| XINT (pos
) > ZV
)
689 args_out_of_range (Fcurrent_buffer (), pos
);
692 if (!find_composition (start
, end
, &start
, &end
, &prop
, string
))
694 if (!COMPOSITION_VALID_P (start
, end
, prop
))
695 return Fcons (make_number (start
), Fcons (make_number (end
),
696 Fcons (Qnil
, Qnil
)));
698 return Fcons (make_number (start
), Fcons (make_number (end
),
701 if (COMPOSITION_REGISTERD_P (prop
))
702 id
= COMPOSITION_ID (prop
);
705 int start_byte
= (NILP (string
)
706 ? CHAR_TO_BYTE (start
)
707 : string_char_to_byte (string
, start
));
708 id
= get_composition_id (start
, start_byte
, end
- start
, prop
, string
);
713 Lisp_Object components
, relative_p
, mod_func
;
714 enum composition_method method
= COMPOSITION_METHOD (prop
);
715 int width
= composition_table
[id
]->width
;
717 components
= Fcopy_sequence (COMPOSITION_COMPONENTS (prop
));
718 relative_p
= (method
== COMPOSITION_WITH_RULE_ALTCHARS
720 mod_func
= COMPOSITION_MODIFICATION_FUNC (prop
);
721 tail
= Fcons (components
,
724 Fcons (make_number (width
), Qnil
))));
729 return Fcons (make_number (start
), Fcons (make_number (end
), tail
));
736 Qcomposition
= intern ("composition");
737 staticpro (&Qcomposition
);
739 /* Make a hash table for composition. */
742 extern Lisp_Object QCsize
;
746 args
[2] = QCweakness
;
749 args
[5] = make_number (311);
750 composition_hash_table
= Fmake_hash_table (6, args
);
751 staticpro (&composition_hash_table
);
754 /* Text property `composition' should be nonsticky by default. */
755 Vtext_property_default_nonsticky
756 = Fcons (Fcons (Qcomposition
, Qt
), Vtext_property_default_nonsticky
);
758 DEFVAR_LISP ("compose-chars-after-function", &Vcompose_chars_after_function
,
759 doc
: /* Function to adjust composition of buffer text.
761 The function is called with three arguments FROM, TO, and OBJECT.
762 FROM and TO specify the range of text of which composition should be
763 adjusted. OBJECT, if non-nil, is a string that contains the text.
765 This function is called after a text with `composition' property is
766 inserted or deleted to keep `composition' property of buffer text
769 The default value is the function `compose-chars-after'. */);
770 Vcompose_chars_after_function
= intern ("compose-chars-after");
772 Qauto_composed
= intern ("auto-composed");
773 staticpro (&Qauto_composed
);
775 Qauto_composition_function
= intern ("auto-composition-function");
776 staticpro (&Qauto_composition_function
);
778 DEFVAR_LISP ("auto-composition-function", &Vauto_composition_function
,
779 doc
: /* Function to call to compose characters automatically.
780 The function is called from the display routine with two arguments,
783 If STRING is nil, the function must compose characters following POS
784 in the current buffer.
786 Otherwise, STRING is a string, and POS is an index to the string. In
787 this case, the function must compose characters following POS in
789 Vauto_composition_function
= Qnil
;
791 defsubr (&Scompose_region_internal
);
792 defsubr (&Scompose_string_internal
);
793 defsubr (&Sfind_composition_internal
);