]> code.delx.au - gnu-emacs/blob - src/bidi.c
* gnutls.c: Include gnutls.h.
[gnu-emacs] / src / bidi.c
1 /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
2 Copyright (C) 2000-2001, 2004-2005, 2009-2014 Free Software
3 Foundation, Inc.
4
5 This file is part of GNU Emacs.
6
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
19
20 /* Written by Eli Zaretskii <eliz@gnu.org>.
21
22 A sequential implementation of the Unicode Bidirectional algorithm,
23 (UBA) as per UAX#9, a part of the Unicode Standard.
24
25 Unlike the Reference Implementation and most other implementations,
26 this one is designed to be called once for every character in the
27 buffer or string. That way, we can leave intact the design of the
28 Emacs display engine, whereby an iterator object is used to
29 traverse buffer or string text character by character, and generate
30 the necessary data for displaying each character in 'struct glyph'
31 objects. (See xdisp.c for the details of that iteration.) The
32 functions on this file replace the original linear iteration in the
33 logical order of the text with a non-linear iteration in the visual
34 order, i.e. in the order characters should be shown on display.
35
36 The main entry point is bidi_move_to_visually_next. Each time it
37 is called, it finds the next character in the visual order, and
38 returns its information in a special structure. The caller is then
39 expected to process this character for display or any other
40 purposes, and call bidi_move_to_visually_next for the next
41 character. See the comments in bidi_move_to_visually_next for more
42 details about its algorithm that finds the next visual-order
43 character by resolving their levels on the fly.
44
45 Two other entry points are bidi_paragraph_init and
46 bidi_mirror_char. The first determines the base direction of a
47 paragraph, while the second returns the mirrored version of its
48 argument character.
49
50 A few auxiliary entry points are used to initialize the bidi
51 iterator for iterating an object (buffer or string), push and pop
52 the bidi iterator state, and save and restore the state of the bidi
53 cache.
54
55 If you want to understand the code, you will have to read it
56 together with the relevant portions of UAX#9. The comments include
57 references to UAX#9 rules, for that very reason.
58
59 A note about references to UAX#9 rules: if the reference says
60 something like "X9/Retaining", it means that you need to refer to
61 rule X9 and to its modifications described in the "Implementation
62 Notes" section of UAX#9, under "Retaining Format Codes".
63
64 Here's the overview of the design of the reordering engine
65 implemented by this file.
66
67 Basic implementation structure
68 ------------------------------
69
70 The sequential processing steps described by UAX#9 are implemented
71 as recursive levels of processing, all of which examine the next
72 character in the logical order. This hierarchy of processing looks
73 as follows, from the innermost (deepest) to the outermost level,
74 omitting some subroutines used by each level:
75
76 bidi_fetch_char -- fetch next character
77 bidi_resolve_explicit -- resolve explicit levels and directions
78 bidi_resolve_weak -- resolve weak types
79 bidi_resolve_brackets -- resolve "paired brackets" neutral types
80 bidi_resolve_neutral -- resolve neutral types
81 bidi_level_of_next_char -- resolve implicit levels
82
83 Each level calls the level below it, and works on the result
84 returned by the lower level, including all of its sub-levels.
85
86 Unlike all the levels below it, bidi_level_of_next_char can return
87 the information about either the next or previous character in the
88 logical order, depending on the current direction of scanning the
89 buffer or string. For the next character, it calls all the levels
90 below it; for the previous character, it uses the cache, described
91 below.
92
93 Thus, the result of calling bidi_level_of_next_char is the resolved
94 level of the next or the previous character in the logical order.
95 Based on this information, the function bidi_move_to_visually_next
96 finds the next character in the visual order and updates the
97 direction in which the buffer is scanned, either forward or
98 backward, to find the next character to be displayed. (Text is
99 scanned backwards when it needs to be reversed for display, i.e. if
100 the visual order is the inverse of the logical order.) This
101 implements the last, reordering steps of the UBA, by successively
102 calling bidi_level_of_next_char until the character of the required
103 embedding level is found; the scan direction is dynamically updated
104 as a side effect. See the commentary before the 'while' loop in
105 bidi_move_to_visually_next, for the details.
106
107 Fetching characters
108 -------------------
109
110 In a nutshell, fetching the next character boils down to calling
111 STRING_CHAR_AND_LENGTH, passing it the address of a buffer or
112 string position. See bidi_fetch_char. However, if the next
113 character is "covered" by a display property of some kind,
114 bidi_fetch_char returns the u+FFFC "object replacement character"
115 that represents the entire run of text covered by the display
116 property. (The ch_len and nchars members of 'struct bidi_it'
117 reflect the length in bytes and characters of that text.) This is
118 so we reorder text on both sides of the display property as
119 appropriate for an image or embedded string. Similarly, text
120 covered by a display spec of the form '(space ...)', is replaced
121 with the u+2029 paragraph separator character, so such display
122 specs produce the same effect as a TAB under UBA. Both these
123 special characters are not actually displayed -- the display
124 property is displayed instead -- but just used to compute the
125 embedding level of the surrounding text so as to produce the
126 required effect.
127
128 Bidi iterator states
129 --------------------
130
131 The UBA is highly context dependent in some of its parts,
132 i.e. results of processing a character can generally depend on
133 characters very far away. The UAX#9 description of the UBA
134 prescribes a stateful processing of each character, whereby the
135 results of this processing depend on various state variables, such
136 as the current embedding level, level stack, and directional
137 override status. In addition, the UAX#9 description includes many
138 passages like this (from rule W2 in this case):
139
140 Search backward from each instance of a European number until the
141 first strong type (R, L, AL, or sos) is found. If an AL is found,
142 change the type of the European number to Arabic number.
143
144 To support this, we use a bidi iterator object, 'struct bidi_it',
145 which is a sub-structure of 'struct it' used by xdisp.c (see
146 dispextern.h for the definition of both of these structures). The
147 bidi iterator holds the entire state of the iteration required by
148 the UBA, and is updated as the text is traversed. In particular,
149 the embedding level of the current character being resolved is
150 recorded in the iterator state. To avoid costly searches backward
151 in support of rules like W2 above, the necessary character types
152 are also recorded in the iterator state as they are found during
153 the forward scan, and then used when such rules need to be applied.
154 (Forward scans cannot be avoided in this way; they need to be
155 performed at least once, and the results recorded in the iterator
156 state, to be reused until the forward scan oversteps the recorded
157 position.)
158
159 In this manner, the iterator state acts as a mini-cache of
160 contextual information required for resolving the level of the
161 current character by various UBA rules.
162
163 Caching of bidi iterator states
164 -------------------------------
165
166 As described above, the reordering engine uses the information
167 recorded in the bidi iterator state in order to resolve the
168 embedding level of the current character. When the reordering
169 engine needs to process the next character in the logical order, it
170 fetches it and applies to it all the UBA levels, updating the
171 iterator state as it goes. But when the buffer or string is
172 scanned backwards, i.e. in the reverse order of buffer/string
173 positions, the scanned characters were already processed during the
174 preceding forward scan (see bidi_find_other_level_edge). To avoid
175 costly re-processing of characters that were already processed
176 during the forward scan, the iterator states computed while
177 scanning forward are cached.
178
179 The cache is just a linear array of 'struct bidi_it' objects, which
180 is dynamically allocated and reallocated as needed, since the size
181 of the cache depends on the text being processed. We only need the
182 cache while processing embedded levels higher than the base
183 paragraph embedding level, because these higher levels require
184 changes in scan direction. Therefore, as soon as we are back to
185 the base embedding level, we can free the cache; see the calls to
186 bidi_cache_reset and bidi_cache_shrink, for the conditions to do
187 this.
188
189 The cache maintains the index of the next unused cache slot -- this
190 is where the next iterator state will be cached. The function
191 bidi_cache_iterator_state saves an instance of the state in the
192 cache and increments the unused slot index. The companion function
193 bidi_cache_find looks up a cached state that corresponds to a given
194 buffer/string position. All of the cached states must correspond
195 1:1 to the buffer or string region whose processing they reflect;
196 bidi.c will abort if it finds cache slots that violate this 1:1
197 correspondence.
198
199 When the parent iterator 'struct it' is pushed (see push_it in
200 xdisp.c) to pause the current iteration and start iterating over a
201 different object (e.g., a 'display' string that covers some buffer
202 text), the bidi iterator cache needs to be "pushed" as well, so
203 that a new empty cache could be used while iterating over the new
204 object. Later, when the new object is exhausted, and xdisp.c calls
205 pop_it, we need to "pop" the bidi cache as well and return to the
206 original cache. See bidi_push_it and bidi_pop_it for how this is
207 done.
208
209 Some functions of the display engine save copies of 'struct it' in
210 local variables, and restore them later. For examples, see
211 pos_visible_p and move_it_in_display_line_to in xdisp.c, and
212 window_scroll_pixel_based in window.c. When this happens, we need
213 to save and restore the bidi cache as well, because conceptually
214 the cache is part of the 'struct it' state, and needs to be in
215 perfect sync with the portion of the buffer/string that is being
216 processed. This saving and restoring of the cache state is handled
217 by bidi_shelve_cache and bidi_unshelve_cache, and the helper macros
218 SAVE_IT and RESTORE_IT defined on xdisp.c.
219
220 Note that, because reordering is implemented below the level in
221 xdisp.c that breaks glyphs into screen lines, we are violating
222 paragraph 3.4 of UAX#9. which mandates that line breaking shall be
223 done before reordering each screen line separately. However,
224 following UAX#9 to the letter in this matter goes against the basic
225 design of the Emacs display engine, and so we choose here this
226 minor deviation from the UBA letter in preference to redesign of
227 the display engine. The effect of this is only seen in continued
228 lines that are broken into screen lines in the middle of a run
229 whose direction is opposite to the paragraph's base direction.
230
231 Important design and implementation note: when the code needs to
232 scan far ahead, be sure to avoid such scans as much as possible
233 when the buffer/string doesn't contain any RTL characters. Users
234 of left-to-right scripts will never forgive you if you introduce
235 some slow-down due to bidi in situations that don't involve any
236 bidirectional text. See the large comment near the beginning of
237 bidi_resolve_neutral, for one situation where such shortcut was
238 necessary. */
239
240 #include <config.h>
241 #include <stdio.h>
242
243 #include "lisp.h"
244 #include "character.h"
245 #include "buffer.h"
246 #include "dispextern.h"
247 #include "region-cache.h"
248
249 static bool bidi_initialized = 0;
250
251 static Lisp_Object bidi_type_table, bidi_mirror_table, bidi_brackets_table;
252
253 #define BIDI_EOB (-1)
254
255 /* Data type for describing the bidirectional character categories. */
256 typedef enum {
257 UNKNOWN_BC,
258 NEUTRAL,
259 WEAK,
260 STRONG,
261 EXPLICIT_FORMATTING
262 } bidi_category_t;
263
264 static Lisp_Object paragraph_start_re, paragraph_separate_re;
265 static Lisp_Object Qparagraph_start, Qparagraph_separate;
266
267 \f
268 /***********************************************************************
269 Utilities
270 ***********************************************************************/
271
272 /* Return the bidi type of a character CH, subject to the current
273 directional OVERRIDE. */
274 static bidi_type_t
275 bidi_get_type (int ch, bidi_dir_t override)
276 {
277 bidi_type_t default_type;
278
279 if (ch == BIDI_EOB)
280 return NEUTRAL_B;
281 if (ch < 0 || ch > MAX_CHAR)
282 emacs_abort ();
283
284 default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
285 /* Every valid character code, even those that are unassigned by the
286 UCD, have some bidi-class property, according to
287 DerivedBidiClass.txt file. Therefore, if we ever get UNKNOWN_BT
288 (= zero) code from CHAR_TABLE_REF, that's a bug. */
289 if (default_type == UNKNOWN_BT)
290 emacs_abort ();
291
292 switch (default_type)
293 {
294 case WEAK_BN:
295 case NEUTRAL_B:
296 case LRE:
297 case LRO:
298 case RLE:
299 case RLO:
300 case PDF:
301 case LRI:
302 case RLI:
303 case FSI:
304 case PDI:
305 return default_type;
306 default:
307 if (override == L2R)
308 return STRONG_L;
309 else if (override == R2L)
310 return STRONG_R;
311 else
312 return default_type;
313 }
314 }
315
316 static void
317 bidi_check_type (bidi_type_t type)
318 {
319 eassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON);
320 }
321
322 /* Given a bidi TYPE of a character, return its category. */
323 static bidi_category_t
324 bidi_get_category (bidi_type_t type)
325 {
326 switch (type)
327 {
328 case UNKNOWN_BT:
329 return UNKNOWN_BC;
330 case STRONG_L:
331 case STRONG_R:
332 case STRONG_AL:
333 return STRONG;
334 case WEAK_EN:
335 case WEAK_ES:
336 case WEAK_ET:
337 case WEAK_AN:
338 case WEAK_CS:
339 case WEAK_NSM:
340 case WEAK_BN:
341 return WEAK;
342 case NEUTRAL_B:
343 case NEUTRAL_S:
344 case NEUTRAL_WS:
345 case NEUTRAL_ON:
346 return NEUTRAL;
347 case LRE:
348 case LRO:
349 case RLE:
350 case RLO:
351 case PDF:
352 case LRI:
353 case RLI:
354 case FSI:
355 case PDI:
356 return EXPLICIT_FORMATTING;
357 default:
358 emacs_abort ();
359 }
360 }
361
362 static bool
363 bidi_isolate_fmt_char (bidi_type_t ch_type)
364 {
365 return (ch_type == LRI || ch_type == RLI || ch_type == PDI || ch_type == FSI);
366 }
367
368 /* Return the mirrored character of C, if it has one. If C has no
369 mirrored counterpart, return C.
370 Note: The conditions in UAX#9 clause L4 regarding the surrounding
371 context must be tested by the caller. */
372 int
373 bidi_mirror_char (int c)
374 {
375 Lisp_Object val;
376
377 if (c == BIDI_EOB)
378 return c;
379 if (c < 0 || c > MAX_CHAR)
380 emacs_abort ();
381
382 val = CHAR_TABLE_REF (bidi_mirror_table, c);
383 if (INTEGERP (val))
384 {
385 int v;
386
387 /* When debugging, check before assigning to V, so that the check
388 isn't broken by undefined behavior due to int overflow. */
389 eassert (CHAR_VALID_P (XINT (val)));
390
391 v = XINT (val);
392
393 /* Minimal test we must do in optimized builds, to prevent weird
394 crashes further down the road. */
395 if (v < 0 || v > MAX_CHAR)
396 emacs_abort ();
397
398 return v;
399 }
400
401 return c;
402 }
403
404 /* Return the Bidi_Paired_Bracket_Type property of the character C. */
405 static bidi_bracket_type_t
406 bidi_paired_bracket_type (int c)
407 {
408 if (c == BIDI_EOB)
409 return BIDI_BRACKET_NONE;
410 if (c < 0 || c > MAX_CHAR)
411 emacs_abort ();
412
413 return (bidi_bracket_type_t) XINT (CHAR_TABLE_REF (bidi_brackets_table, c));
414 }
415
416 /* Determine the start-of-sequence (sos) directional type given the two
417 embedding levels on either side of the run boundary. Also, update
418 the saved info about previously seen characters, since that info is
419 generally valid for a single level run. */
420 static void
421 bidi_set_sos_type (struct bidi_it *bidi_it, int level_before, int level_after)
422 {
423 int higher_level = (level_before > level_after ? level_before : level_after);
424
425 /* FIXME: should the default sos direction be user selectable? */
426 bidi_it->sos = ((higher_level & 1) != 0 ? R2L : L2R); /* X10 */
427
428 bidi_it->prev.type = UNKNOWN_BT;
429 bidi_it->last_strong.type = bidi_it->last_strong.orig_type = UNKNOWN_BT;
430 bidi_it->prev_for_neutral.type = (bidi_it->sos == R2L ? STRONG_R : STRONG_L);
431 bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
432 bidi_it->next_for_neutral.type
433 = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
434 }
435
436 #define ISOLATE_STATUS(BIDI_IT, IDX) ((BIDI_IT)->level_stack[IDX].flags & 1)
437 #define OVERRIDE(BIDI_IT, IDX) (((BIDI_IT)->level_stack[IDX].flags >> 1) & 3)
438
439 /* Push the current embedding level and override status; reset the
440 current level to LEVEL and the current override status to OVERRIDE. */
441 static void
442 bidi_push_embedding_level (struct bidi_it *bidi_it,
443 int level, bidi_dir_t override, bool isolate_status)
444 {
445 struct bidi_stack *st;
446 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
447
448 bidi_it->stack_idx++;
449 eassert (bidi_it->stack_idx < BIDI_MAXDEPTH+2+1);
450 st = &bidi_it->level_stack[bidi_it->stack_idx];
451 eassert (level <= (1 << 7));
452 st->level = level;
453 st->flags = (((override & 3) << 1) | (isolate_status != 0));
454 if (isolate_status)
455 {
456 st->last_strong_type = bidi_it->last_strong.type;
457 st->prev_for_neutral_type = bidi_it->prev_for_neutral.type;
458 st->next_for_neutral_type = bidi_it->next_for_neutral.type;
459 st->next_for_neutral_pos = bidi_it->next_for_neutral.charpos;
460 st->flags |= ((bidi_it->sos == L2R ? 0 : 1) << 3);
461 }
462 /* We've got a new isolating sequence, compute the directional type
463 of sos and initialize per-sequence variables (UAX#9, clause X10). */
464 bidi_set_sos_type (bidi_it, prev_level, level);
465 }
466
467 /* Pop from the stack the embedding level, the directional override
468 status, and optionally saved information for the isolating run
469 sequence. Return the new level. */
470 static int
471 bidi_pop_embedding_level (struct bidi_it *bidi_it)
472 {
473 int level;
474
475 /* UAX#9 says to ignore invalid PDFs (X7, last bullet)
476 and PDIs (X6a, 2nd bullet). */
477 if (bidi_it->stack_idx > 0)
478 {
479 bool isolate_status = ISOLATE_STATUS (bidi_it, bidi_it->stack_idx);
480 int old_level = bidi_it->level_stack[bidi_it->stack_idx].level;
481
482 struct bidi_stack st;
483
484 st = bidi_it->level_stack[bidi_it->stack_idx];
485 if (isolate_status)
486 {
487 bidi_dir_t sos = ((st.flags >> 3) & 1);
488 /* PREV is used in W1 for resolving WEAK_NSM. By the time
489 we get to an NSM, we must have gotten past at least one
490 character: the PDI that ends the isolate from which we
491 are popping here. So PREV will have been filled up by
492 the time we first use it. We initialize it here to
493 UNKNOWN_BT to be able to catch any blunders in this
494 logic. */
495 bidi_it->prev.orig_type = bidi_it->prev.type = UNKNOWN_BT;
496 bidi_it->last_strong.type = st.last_strong_type;
497 bidi_it->prev_for_neutral.type = st.prev_for_neutral_type;
498 bidi_it->next_for_neutral.type = st.next_for_neutral_type;
499 bidi_it->next_for_neutral.charpos = st.next_for_neutral_pos;
500 bidi_it->sos = (sos == 0 ? L2R : R2L);
501 }
502 else
503 bidi_set_sos_type (bidi_it, old_level,
504 bidi_it->level_stack[bidi_it->stack_idx - 1].level);
505
506 bidi_it->stack_idx--;
507 }
508 level = bidi_it->level_stack[bidi_it->stack_idx].level;
509 eassert (0 <= level && level <= BIDI_MAXDEPTH + 1);
510 return level;
511 }
512
513 /* Record in SAVED_INFO the information about the current character. */
514 static void
515 bidi_remember_char (struct bidi_saved_info *saved_info,
516 struct bidi_it *bidi_it, bool from_type)
517 {
518 saved_info->charpos = bidi_it->charpos;
519 if (from_type)
520 saved_info->type = bidi_it->type;
521 else
522 saved_info->type = bidi_it->type_after_wn;
523 bidi_check_type (saved_info->type);
524 saved_info->orig_type = bidi_it->orig_type;
525 bidi_check_type (saved_info->orig_type);
526 }
527
528 /* Copy the bidi iterator from FROM to TO. To save cycles, this only
529 copies the part of the level stack that is actually in use. */
530 static void
531 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
532 {
533 /* Copy everything from the start through the active part of
534 the level stack. */
535 memcpy (to, from,
536 (offsetof (struct bidi_it, level_stack[1])
537 + from->stack_idx * sizeof from->level_stack[0]));
538 }
539
540 \f
541 /***********************************************************************
542 Caching the bidi iterator states
543 ***********************************************************************/
544
545 /* We allocate and de-allocate the cache in chunks of this size (in
546 characters). 200 was chosen as an upper limit for reasonably-long
547 lines in a text file/buffer. */
548 #define BIDI_CACHE_CHUNK 200
549 /* Maximum size we allow the cache to become, per iterator stack slot,
550 in units of struct bidi_it size. If we allow unlimited growth, we
551 could run out of memory for pathologically long bracketed text or
552 very long text lines that need to be reordered. This is aggravated
553 when word-wrap is in effect, since then functions display_line and
554 move_it_in_display_line_to need to keep up to 4 copies of the
555 cache.
556
557 This limitation means there can be no more than that amount of
558 contiguous RTL text on any single physical line in a LTR paragraph,
559 and similarly with contiguous LTR + numeric text in a RTL
560 paragraph. (LTR text in a LTR paragraph and RTL text in a RTL
561 paragraph are not reordered, and so don't need the cache, and
562 cannot hit this limit.) More importantly, no single line can have
563 text longer than this inside paired brackets (because bracket pairs
564 resolution uses the cache). If the limit is exceeded, the fallback
565 code will produce visual order that will be incorrect if there are
566 RTL characters in the offending line of text. */
567 /* Do we need to allow customization of this limit? */
568 #define BIDI_CACHE_MAX_ELTS_PER_SLOT 50000
569 #if BIDI_CACHE_CHUNK >= BIDI_CACHE_MAX_ELTS_PER_SLOT
570 # error BIDI_CACHE_CHUNK must be less than BIDI_CACHE_MAX_ELTS_PER_SLOT
571 #endif
572 static ptrdiff_t bidi_cache_max_elts = BIDI_CACHE_MAX_ELTS_PER_SLOT;
573 static struct bidi_it *bidi_cache;
574 static ptrdiff_t bidi_cache_size = 0;
575 enum { elsz = sizeof (struct bidi_it) };
576 static ptrdiff_t bidi_cache_idx; /* next unused cache slot */
577 static ptrdiff_t bidi_cache_last_idx; /* slot of last cache hit */
578 static ptrdiff_t bidi_cache_start = 0; /* start of cache for this
579 "stack" level */
580
581 /* 5-slot stack for saving the start of the previous level of the
582 cache. xdisp.c maintains a 5-slot stack for its iterator state,
583 and we need the same size of our stack. */
584 static ptrdiff_t bidi_cache_start_stack[IT_STACK_SIZE];
585 static int bidi_cache_sp;
586
587 /* Size of header used by bidi_shelve_cache. */
588 enum
589 {
590 bidi_shelve_header_size
591 = (sizeof (bidi_cache_idx) + sizeof (bidi_cache_start_stack)
592 + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
593 + sizeof (bidi_cache_last_idx) + sizeof (bidi_cache_max_elts))
594 };
595
596 /* Effectively remove the cached states beyond the Nth state from the
597 part of the cache relevant to iteration of the current object
598 (buffer or string). */
599 static void
600 bidi_cache_reset_to (int n)
601 {
602 bidi_cache_idx = bidi_cache_start + n;
603 bidi_cache_last_idx = -1;
604 }
605
606 /* Reset the cache state to the empty state. We only reset the part
607 of the cache relevant to iteration of the current object. Previous
608 objects, which are pushed on the display iterator's stack, are left
609 intact. This is called when the cached information is no more
610 useful for the current iteration, e.g. when we were reseated to a
611 new position on the same object. */
612 static void
613 bidi_cache_reset (void)
614 {
615 bidi_cache_reset_to (0);
616 }
617
618 /* Shrink the cache to its minimal size. Called when we init the bidi
619 iterator for reordering a buffer or a string that does not come
620 from display properties, because that means all the previously
621 cached info is of no further use. */
622 static void
623 bidi_cache_shrink (void)
624 {
625 if (bidi_cache_size > BIDI_CACHE_CHUNK)
626 {
627 bidi_cache = xrealloc (bidi_cache, BIDI_CACHE_CHUNK * elsz);
628 bidi_cache_size = BIDI_CACHE_CHUNK;
629 }
630 bidi_cache_reset ();
631 bidi_cache_max_elts = BIDI_CACHE_MAX_ELTS_PER_SLOT;
632 }
633
634 static void
635 bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
636 {
637 int current_scan_dir = bidi_it->scan_dir;
638
639 if (idx < bidi_cache_start || idx >= bidi_cache_idx)
640 emacs_abort ();
641
642 bidi_copy_it (bidi_it, &bidi_cache[idx]);
643 bidi_it->scan_dir = current_scan_dir;
644 bidi_cache_last_idx = idx;
645 }
646
647 /* Find a cached state with a given CHARPOS and resolved embedding
648 level less or equal to LEVEL. If LEVEL is -1, disregard the
649 resolved levels in cached states. DIR, if non-zero, means search
650 in that direction from the last cache hit.
651
652 Value is the index of the cached state, or -1 if not found. */
653 static ptrdiff_t
654 bidi_cache_search (ptrdiff_t charpos, int level, int dir)
655 {
656 ptrdiff_t i, i_start;
657
658 if (bidi_cache_idx > bidi_cache_start)
659 {
660 if (bidi_cache_last_idx == -1)
661 bidi_cache_last_idx = bidi_cache_idx - 1;
662 if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
663 {
664 dir = -1;
665 i_start = bidi_cache_last_idx - 1;
666 }
667 else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
668 + bidi_cache[bidi_cache_last_idx].nchars - 1))
669 {
670 dir = 1;
671 i_start = bidi_cache_last_idx + 1;
672 }
673 else if (dir)
674 i_start = bidi_cache_last_idx;
675 else
676 {
677 dir = -1;
678 i_start = bidi_cache_idx - 1;
679 }
680
681 if (dir < 0)
682 {
683 /* Linear search for now; FIXME! */
684 for (i = i_start; i >= bidi_cache_start; i--)
685 if (bidi_cache[i].charpos <= charpos
686 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
687 && (level == -1 || bidi_cache[i].resolved_level <= level))
688 return i;
689 }
690 else
691 {
692 for (i = i_start; i < bidi_cache_idx; i++)
693 if (bidi_cache[i].charpos <= charpos
694 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
695 && (level == -1 || bidi_cache[i].resolved_level <= level))
696 return i;
697 }
698 }
699
700 return -1;
701 }
702
703 /* Find a cached state where the resolved level changes to a value
704 that is lower than LEVEL, and return its cache slot index. DIR is
705 the direction to search, starting with the last used cache slot.
706 If DIR is zero, we search backwards from the last occupied cache
707 slot. BEFORE means return the index of the slot that
708 is ``before'' the level change in the search direction. That is,
709 given the cached levels like this:
710
711 1122333442211
712 AB C
713
714 and assuming we are at the position cached at the slot marked with
715 C, searching backwards (DIR = -1) for LEVEL = 2 will return the
716 index of slot B or A, depending whether BEFORE is, respectively,
717 true or false. */
718 static ptrdiff_t
719 bidi_cache_find_level_change (int level, int dir, bool before)
720 {
721 if (bidi_cache_idx)
722 {
723 ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
724 int incr = before ? 1 : 0;
725
726 if (i < 0) /* cache overflowed? */
727 i = 0;
728
729 if (!dir)
730 dir = -1;
731 else if (!incr)
732 i += dir;
733
734 if (dir < 0)
735 {
736 while (i >= bidi_cache_start + incr)
737 {
738 if (bidi_cache[i - incr].resolved_level >= 0
739 && bidi_cache[i - incr].resolved_level < level)
740 return i;
741 i--;
742 }
743 }
744 else
745 {
746 while (i < bidi_cache_idx - incr)
747 {
748 if (bidi_cache[i + incr].resolved_level >= 0
749 && bidi_cache[i + incr].resolved_level < level)
750 return i;
751 i++;
752 }
753 }
754 }
755
756 return -1;
757 }
758
759 static void
760 bidi_cache_ensure_space (ptrdiff_t idx)
761 {
762 /* Enlarge the cache as needed. */
763 if (idx >= bidi_cache_size)
764 {
765 ptrdiff_t chunk_size = BIDI_CACHE_CHUNK;
766
767 if (bidi_cache_size > bidi_cache_max_elts - chunk_size)
768 chunk_size = bidi_cache_max_elts - bidi_cache_size;
769
770 if (max (idx + 1,
771 bidi_cache_size + chunk_size) <= bidi_cache_max_elts)
772 {
773 /* The bidi cache cannot be larger than the largest Lisp
774 string or buffer. */
775 ptrdiff_t string_or_buffer_bound
776 = max (BUF_BYTES_MAX, STRING_BYTES_BOUND);
777
778 /* Also, it cannot be larger than what C can represent. */
779 ptrdiff_t c_bound
780 = (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz;
781 ptrdiff_t max_elts = bidi_cache_max_elts;
782
783 max_elts = min (max_elts, min (string_or_buffer_bound, c_bound));
784
785 /* Force xpalloc not to over-allocate by passing it MAX_ELTS
786 as its 4th argument. */
787 bidi_cache = xpalloc (bidi_cache, &bidi_cache_size,
788 max (chunk_size, idx - bidi_cache_size + 1),
789 max_elts, elsz);
790 eassert (bidi_cache_size > idx);
791 }
792 }
793 }
794
795 static int
796 bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved,
797 bool update_only)
798 {
799 ptrdiff_t idx;
800
801 /* We should never cache on backward scans. */
802 if (bidi_it->scan_dir == -1)
803 emacs_abort ();
804 idx = bidi_cache_search (bidi_it->charpos, -1, 1);
805
806 if (idx < 0 && update_only)
807 return 0;
808
809 if (idx < 0)
810 {
811 idx = bidi_cache_idx;
812 bidi_cache_ensure_space (idx);
813 /* Character positions should correspond to cache positions 1:1.
814 If we are outside the range of cached positions, the cache is
815 useless and must be reset. */
816 if (bidi_cache_start < idx && idx < bidi_cache_size
817 && (bidi_it->charpos > (bidi_cache[idx - 1].charpos
818 + bidi_cache[idx - 1].nchars)
819 || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos))
820 {
821 bidi_cache_reset ();
822 idx = bidi_cache_start;
823 }
824 if (bidi_it->nchars <= 0)
825 emacs_abort ();
826 /* Don't cache if no available space in the cache. */
827 if (bidi_cache_size > idx)
828 {
829 bidi_copy_it (&bidi_cache[idx], bidi_it);
830 if (!resolved)
831 bidi_cache[idx].resolved_level = -1;
832 }
833 }
834 else
835 {
836 /* Copy only the members which could have changed, to avoid
837 costly copying of the entire struct. */
838 bidi_cache[idx].type = bidi_it->type;
839 bidi_check_type (bidi_it->type);
840 bidi_cache[idx].type_after_wn = bidi_it->type_after_wn;
841 bidi_check_type (bidi_it->type_after_wn);
842 if (resolved)
843 bidi_cache[idx].resolved_level = bidi_it->resolved_level;
844 else
845 bidi_cache[idx].resolved_level = -1;
846 bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
847 bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
848 bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
849 bidi_cache[idx].disp_pos = bidi_it->disp_pos;
850 bidi_cache[idx].disp_prop = bidi_it->disp_prop;
851 bidi_cache[idx].bracket_pairing_pos = bidi_it->bracket_pairing_pos;
852 bidi_cache[idx].bracket_enclosed_type = bidi_it->bracket_enclosed_type;
853 }
854
855 if (bidi_cache_size > idx)
856 {
857 bidi_cache_last_idx = idx;
858 if (idx >= bidi_cache_idx)
859 bidi_cache_idx = idx + 1;
860 return 1;
861 }
862 else
863 {
864 /* The cache overflowed. */
865 bidi_cache_last_idx = -1;
866 return 0;
867 }
868 }
869
870 /* Look for a cached iterator state that corresponds to CHARPOS. If
871 found, copy the cached state into BIDI_IT and return the type of
872 the cached entry. If not found, return UNKNOWN_BT. RESOLVED_ONLY
873 zero means it is OK to return cached states that were not fully
874 resolved yet. This can happen if the state was cached before it
875 was resolved in bidi_resolve_neutral. */
876 static bidi_type_t
877 bidi_cache_find (ptrdiff_t charpos, bool resolved_only, struct bidi_it *bidi_it)
878 {
879 ptrdiff_t i = bidi_cache_search (charpos, -1, bidi_it->scan_dir);
880
881 if (i >= bidi_cache_start
882 && (!resolved_only
883 /* Callers that want only fully resolved states (and set
884 resolved_only = true) need to be sure that there's enough
885 info in the cached state to return the state as final,
886 and if not, they don't want the cached state. */
887 || bidi_cache[i].resolved_level >= 0))
888 {
889 bidi_dir_t current_scan_dir = bidi_it->scan_dir;
890
891 bidi_copy_it (bidi_it, &bidi_cache[i]);
892 bidi_cache_last_idx = i;
893 /* Don't let scan direction from the cached state override
894 the current scan direction. */
895 bidi_it->scan_dir = current_scan_dir;
896 return bidi_it->type;
897 }
898
899 return UNKNOWN_BT;
900 }
901
902 static int
903 bidi_peek_at_next_level (struct bidi_it *bidi_it)
904 {
905 if (bidi_cache_idx == bidi_cache_start)
906 emacs_abort ();
907 /* If the cache overflowed, return the level of the last cached
908 character. */
909 if (bidi_cache_last_idx == -1
910 || (bidi_cache_last_idx >= bidi_cache_idx - 1 && bidi_it->scan_dir > 0))
911 return bidi_cache[bidi_cache_idx - 1].resolved_level;
912 return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
913 }
914
915 \f
916 /***********************************************************************
917 Pushing and popping the bidi iterator state
918 ***********************************************************************/
919
920 /* Push the bidi iterator state in preparation for reordering a
921 different object, e.g. display string found at certain buffer
922 position. Pushing the bidi iterator boils down to saving its
923 entire state on the cache and starting a new cache "stacked" on top
924 of the current cache. */
925 void
926 bidi_push_it (struct bidi_it *bidi_it)
927 {
928 /* Give this stack slot its cache room. */
929 bidi_cache_max_elts += BIDI_CACHE_MAX_ELTS_PER_SLOT;
930 /* Save the current iterator state in its entirety after the last
931 used cache slot. */
932 bidi_cache_ensure_space (bidi_cache_idx);
933 bidi_cache[bidi_cache_idx++] = *bidi_it;
934
935 /* Push the current cache start onto the stack. */
936 eassert (bidi_cache_sp < IT_STACK_SIZE);
937 bidi_cache_start_stack[bidi_cache_sp++] = bidi_cache_start;
938
939 /* Start a new level of cache, and make it empty. */
940 bidi_cache_start = bidi_cache_idx;
941 bidi_cache_last_idx = -1;
942 }
943
944 /* Restore the iterator state saved by bidi_push_it and return the
945 cache to the corresponding state. */
946 void
947 bidi_pop_it (struct bidi_it *bidi_it)
948 {
949 if (bidi_cache_start <= 0)
950 emacs_abort ();
951
952 /* Reset the next free cache slot index to what it was before the
953 call to bidi_push_it. */
954 bidi_cache_idx = bidi_cache_start - 1;
955
956 /* Restore the bidi iterator state saved in the cache. */
957 *bidi_it = bidi_cache[bidi_cache_idx];
958
959 /* Pop the previous cache start from the stack. */
960 if (bidi_cache_sp <= 0)
961 emacs_abort ();
962 bidi_cache_start = bidi_cache_start_stack[--bidi_cache_sp];
963
964 /* Invalidate the last-used cache slot data. */
965 bidi_cache_last_idx = -1;
966
967 bidi_cache_max_elts -= BIDI_CACHE_MAX_ELTS_PER_SLOT;
968 eassert (bidi_cache_max_elts > 0);
969 }
970
971 static ptrdiff_t bidi_cache_total_alloc;
972
973 /* Stash away a copy of the cache and its control variables. */
974 void *
975 bidi_shelve_cache (void)
976 {
977 unsigned char *databuf;
978 ptrdiff_t alloc;
979
980 /* Empty cache. */
981 if (bidi_cache_idx == 0)
982 return NULL;
983
984 alloc = (bidi_shelve_header_size
985 + bidi_cache_idx * sizeof (struct bidi_it));
986 databuf = xmalloc (alloc);
987 bidi_cache_total_alloc += alloc;
988
989 memcpy (databuf, &bidi_cache_idx, sizeof (bidi_cache_idx));
990 memcpy (databuf + sizeof (bidi_cache_idx),
991 bidi_cache, bidi_cache_idx * sizeof (struct bidi_it));
992 memcpy (databuf + sizeof (bidi_cache_idx)
993 + bidi_cache_idx * sizeof (struct bidi_it),
994 bidi_cache_start_stack, sizeof (bidi_cache_start_stack));
995 memcpy (databuf + sizeof (bidi_cache_idx)
996 + bidi_cache_idx * sizeof (struct bidi_it)
997 + sizeof (bidi_cache_start_stack),
998 &bidi_cache_sp, sizeof (bidi_cache_sp));
999 memcpy (databuf + sizeof (bidi_cache_idx)
1000 + bidi_cache_idx * sizeof (struct bidi_it)
1001 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
1002 &bidi_cache_start, sizeof (bidi_cache_start));
1003 memcpy (databuf + sizeof (bidi_cache_idx)
1004 + bidi_cache_idx * sizeof (struct bidi_it)
1005 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
1006 + sizeof (bidi_cache_start),
1007 &bidi_cache_last_idx, sizeof (bidi_cache_last_idx));
1008 memcpy (databuf + sizeof (bidi_cache_idx)
1009 + bidi_cache_idx * sizeof (struct bidi_it)
1010 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
1011 + sizeof (bidi_cache_start) + sizeof (bidi_cache_last_idx),
1012 &bidi_cache_max_elts, sizeof (bidi_cache_max_elts));
1013
1014 return databuf;
1015 }
1016
1017 /* Restore the cache state from a copy stashed away by
1018 bidi_shelve_cache, and free the buffer used to stash that copy.
1019 JUST_FREE means free the buffer, but don't restore the
1020 cache; used when the corresponding iterator is discarded instead of
1021 being restored. */
1022 void
1023 bidi_unshelve_cache (void *databuf, bool just_free)
1024 {
1025 unsigned char *p = databuf;
1026
1027 if (!p)
1028 {
1029 if (!just_free)
1030 {
1031 /* A NULL pointer means an empty cache. */
1032 bidi_cache_start = 0;
1033 bidi_cache_sp = 0;
1034 bidi_cache_max_elts = BIDI_CACHE_MAX_ELTS_PER_SLOT;
1035 bidi_cache_reset ();
1036 }
1037 }
1038 else
1039 {
1040 if (just_free)
1041 {
1042 ptrdiff_t idx;
1043
1044 memcpy (&idx, p, sizeof (bidi_cache_idx));
1045 bidi_cache_total_alloc
1046 -= bidi_shelve_header_size + idx * sizeof (struct bidi_it);
1047 }
1048 else
1049 {
1050 memcpy (&bidi_cache_idx, p, sizeof (bidi_cache_idx));
1051 bidi_cache_ensure_space (bidi_cache_idx);
1052 memcpy (bidi_cache, p + sizeof (bidi_cache_idx),
1053 bidi_cache_idx * sizeof (struct bidi_it));
1054 memcpy (bidi_cache_start_stack,
1055 p + sizeof (bidi_cache_idx)
1056 + bidi_cache_idx * sizeof (struct bidi_it),
1057 sizeof (bidi_cache_start_stack));
1058 memcpy (&bidi_cache_sp,
1059 p + sizeof (bidi_cache_idx)
1060 + bidi_cache_idx * sizeof (struct bidi_it)
1061 + sizeof (bidi_cache_start_stack),
1062 sizeof (bidi_cache_sp));
1063 memcpy (&bidi_cache_start,
1064 p + sizeof (bidi_cache_idx)
1065 + bidi_cache_idx * sizeof (struct bidi_it)
1066 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
1067 sizeof (bidi_cache_start));
1068 memcpy (&bidi_cache_last_idx,
1069 p + sizeof (bidi_cache_idx)
1070 + bidi_cache_idx * sizeof (struct bidi_it)
1071 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
1072 + sizeof (bidi_cache_start),
1073 sizeof (bidi_cache_last_idx));
1074 memcpy (&bidi_cache_max_elts,
1075 p + sizeof (bidi_cache_idx)
1076 + bidi_cache_idx * sizeof (struct bidi_it)
1077 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
1078 + sizeof (bidi_cache_start) + sizeof (bidi_cache_last_idx),
1079 sizeof (bidi_cache_max_elts));
1080 bidi_cache_total_alloc
1081 -= (bidi_shelve_header_size
1082 + bidi_cache_idx * sizeof (struct bidi_it));
1083 }
1084
1085 xfree (p);
1086 }
1087 }
1088
1089 \f
1090 /***********************************************************************
1091 Initialization
1092 ***********************************************************************/
1093 static void
1094 bidi_initialize (void)
1095 {
1096 bidi_type_table = uniprop_table (intern ("bidi-class"));
1097 if (NILP (bidi_type_table))
1098 emacs_abort ();
1099 staticpro (&bidi_type_table);
1100
1101 bidi_mirror_table = uniprop_table (intern ("mirroring"));
1102 if (NILP (bidi_mirror_table))
1103 emacs_abort ();
1104 staticpro (&bidi_mirror_table);
1105
1106 bidi_brackets_table = uniprop_table (intern ("bracket-type"));
1107 if (NILP (bidi_brackets_table))
1108 emacs_abort ();
1109 staticpro (&bidi_brackets_table);
1110
1111 Qparagraph_start = intern ("paragraph-start");
1112 staticpro (&Qparagraph_start);
1113 paragraph_start_re = Fsymbol_value (Qparagraph_start);
1114 if (!STRINGP (paragraph_start_re))
1115 paragraph_start_re = build_string ("\f\\|[ \t]*$");
1116 staticpro (&paragraph_start_re);
1117 Qparagraph_separate = intern ("paragraph-separate");
1118 staticpro (&Qparagraph_separate);
1119 paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
1120 if (!STRINGP (paragraph_separate_re))
1121 paragraph_separate_re = build_string ("[ \t\f]*$");
1122 staticpro (&paragraph_separate_re);
1123
1124 bidi_cache_sp = 0;
1125 bidi_cache_total_alloc = 0;
1126 bidi_cache_max_elts = BIDI_CACHE_MAX_ELTS_PER_SLOT;
1127
1128 bidi_initialized = 1;
1129 }
1130
1131 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
1132 end. */
1133 static void
1134 bidi_set_paragraph_end (struct bidi_it *bidi_it)
1135 {
1136 bidi_it->invalid_levels = 0;
1137 bidi_it->invalid_isolates = 0;
1138 bidi_it->stack_idx = 0;
1139 bidi_it->resolved_level = bidi_it->level_stack[0].level;
1140 }
1141
1142 /* Initialize the bidi iterator from buffer/string position CHARPOS. */
1143 void
1144 bidi_init_it (ptrdiff_t charpos, ptrdiff_t bytepos, bool frame_window_p,
1145 struct bidi_it *bidi_it)
1146 {
1147 if (! bidi_initialized)
1148 bidi_initialize ();
1149 if (charpos >= 0)
1150 bidi_it->charpos = charpos;
1151 if (bytepos >= 0)
1152 bidi_it->bytepos = bytepos;
1153 bidi_it->frame_window_p = frame_window_p;
1154 bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit */
1155 bidi_it->first_elt = 1;
1156 bidi_set_paragraph_end (bidi_it);
1157 bidi_it->new_paragraph = 1;
1158 bidi_it->separator_limit = -1;
1159 bidi_it->type = NEUTRAL_B;
1160 bidi_it->type_after_wn = NEUTRAL_B;
1161 bidi_it->orig_type = NEUTRAL_B;
1162 /* FIXME: Review this!!! */
1163 bidi_it->prev.type = bidi_it->prev.orig_type = UNKNOWN_BT;
1164 bidi_it->last_strong.type = bidi_it->last_strong.orig_type = UNKNOWN_BT;
1165 bidi_it->next_for_neutral.charpos = -1;
1166 bidi_it->next_for_neutral.type
1167 = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
1168 bidi_it->prev_for_neutral.charpos = -1;
1169 bidi_it->prev_for_neutral.type
1170 = bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
1171 bidi_it->bracket_pairing_pos = -1;
1172 bidi_it->sos = L2R; /* FIXME: should it be user-selectable? */
1173 bidi_it->disp_pos = -1; /* invalid/unknown */
1174 bidi_it->disp_prop = 0;
1175 /* We can only shrink the cache if we are at the bottom level of its
1176 "stack". */
1177 if (bidi_cache_start == 0)
1178 bidi_cache_shrink ();
1179 else
1180 bidi_cache_reset ();
1181 }
1182
1183 /* Perform initializations for reordering a new line of bidi text. */
1184 static void
1185 bidi_line_init (struct bidi_it *bidi_it)
1186 {
1187 bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
1188 bidi_it->stack_idx = 0;
1189 bidi_it->resolved_level = bidi_it->level_stack[0].level;
1190 bidi_it->level_stack[0].flags = 0; /* NEUTRAL_DIR, false per X1 */
1191 bidi_it->invalid_levels = 0;
1192 bidi_it->isolate_level = 0; /* X1 */
1193 bidi_it->invalid_isolates = 0; /* X1 */
1194 /* Setting this to zero will force its recomputation the first time
1195 we need it for W5. */
1196 bidi_it->next_en_pos = 0;
1197 bidi_it->next_en_type = UNKNOWN_BT;
1198 bidi_it->next_for_ws.charpos = -1;
1199 bidi_it->next_for_ws.type = UNKNOWN_BT;
1200 bidi_it->bracket_pairing_pos = -1;
1201 bidi_set_sos_type (bidi_it,
1202 (bidi_it->paragraph_dir == R2L ? 1 : 0),
1203 bidi_it->level_stack[0].level); /* X10 */
1204
1205 bidi_cache_reset ();
1206 }
1207
1208 \f
1209 /***********************************************************************
1210 Fetching characters
1211 ***********************************************************************/
1212
1213 /* Count bytes in string S between BEG/BEGBYTE and END. BEG and END
1214 are zero-based character positions in S, BEGBYTE is byte position
1215 corresponding to BEG. UNIBYTE means S is a unibyte string. */
1216 static ptrdiff_t
1217 bidi_count_bytes (const unsigned char *s, ptrdiff_t beg,
1218 ptrdiff_t begbyte, ptrdiff_t end, bool unibyte)
1219 {
1220 ptrdiff_t pos = beg;
1221 const unsigned char *p = s + begbyte, *start = p;
1222
1223 if (unibyte)
1224 p = s + end;
1225 else
1226 {
1227 if (!CHAR_HEAD_P (*p))
1228 emacs_abort ();
1229
1230 while (pos < end)
1231 {
1232 p += BYTES_BY_CHAR_HEAD (*p);
1233 pos++;
1234 }
1235 }
1236
1237 return p - start;
1238 }
1239
1240 /* Fetch and return the character at byte position BYTEPOS. If S is
1241 non-NULL, fetch the character from string S; otherwise fetch the
1242 character from the current buffer. UNIBYTE means S is a
1243 unibyte string. */
1244 static int
1245 bidi_char_at_pos (ptrdiff_t bytepos, const unsigned char *s, bool unibyte)
1246 {
1247 if (s)
1248 {
1249 s += bytepos;
1250 if (unibyte)
1251 return *s;
1252 }
1253 else
1254 s = BYTE_POS_ADDR (bytepos);
1255 return STRING_CHAR (s);
1256 }
1257
1258 /* Fetch and return the character at CHARPOS/BYTEPOS. If that
1259 character is covered by a display string, treat the entire run of
1260 covered characters as a single character, either u+2029 or u+FFFC,
1261 and return their combined length in CH_LEN and NCHARS. DISP_POS
1262 specifies the character position of the next display string, or -1
1263 if not yet computed. When the next character is at or beyond that
1264 position, the function updates DISP_POS with the position of the
1265 next display string. *DISP_PROP non-zero means that there's really
1266 a display string at DISP_POS, as opposed to when we searched till
1267 DISP_POS without finding one. If *DISP_PROP is 2, it means the
1268 display spec is of the form `(space ...)', which is replaced with
1269 u+2029 to handle it as a paragraph separator. STRING->s is the C
1270 string to iterate, or NULL if iterating over a buffer or a Lisp
1271 string; in the latter case, STRING->lstring is the Lisp string. */
1272 static int
1273 bidi_fetch_char (ptrdiff_t charpos, ptrdiff_t bytepos, ptrdiff_t *disp_pos,
1274 int *disp_prop, struct bidi_string_data *string,
1275 struct window *w,
1276 bool frame_window_p, ptrdiff_t *ch_len, ptrdiff_t *nchars)
1277 {
1278 int ch;
1279 ptrdiff_t endpos
1280 = (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
1281 struct text_pos pos;
1282 int len;
1283
1284 /* If we got past the last known position of display string, compute
1285 the position of the next one. That position could be at CHARPOS. */
1286 if (charpos < endpos && charpos > *disp_pos)
1287 {
1288 SET_TEXT_POS (pos, charpos, bytepos);
1289 *disp_pos = compute_display_string_pos (&pos, string, w, frame_window_p,
1290 disp_prop);
1291 }
1292
1293 /* Fetch the character at BYTEPOS. */
1294 if (charpos >= endpos)
1295 {
1296 ch = BIDI_EOB;
1297 *ch_len = 1;
1298 *nchars = 1;
1299 *disp_pos = endpos;
1300 *disp_prop = 0;
1301 }
1302 else if (charpos >= *disp_pos && *disp_prop)
1303 {
1304 ptrdiff_t disp_end_pos;
1305
1306 /* We don't expect to find ourselves in the middle of a display
1307 property. Hopefully, it will never be needed. */
1308 if (charpos > *disp_pos)
1309 emacs_abort ();
1310 /* Text covered by `display' properties and overlays with
1311 display properties or display strings is handled as a single
1312 character that represents the entire run of characters
1313 covered by the display property. */
1314 if (*disp_prop == 2)
1315 {
1316 /* `(space ...)' display specs are handled as paragraph
1317 separators for the purposes of the reordering; see UAX#9
1318 section 3 and clause HL1 in section 4.3 there. */
1319 ch = 0x2029;
1320 }
1321 else
1322 {
1323 /* All other display specs are handled as the Unicode Object
1324 Replacement Character. */
1325 ch = 0xFFFC;
1326 }
1327 disp_end_pos = compute_display_string_end (*disp_pos, string);
1328 if (disp_end_pos < 0)
1329 {
1330 /* Somebody removed the display string from the buffer
1331 behind our back. Recover by processing this buffer
1332 position as if no display property were present there to
1333 begin with. */
1334 *disp_prop = 0;
1335 goto normal_char;
1336 }
1337 *nchars = disp_end_pos - *disp_pos;
1338 if (*nchars <= 0)
1339 emacs_abort ();
1340 if (string->s)
1341 *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
1342 disp_end_pos, string->unibyte);
1343 else if (STRINGP (string->lstring))
1344 *ch_len = bidi_count_bytes (SDATA (string->lstring), *disp_pos,
1345 bytepos, disp_end_pos, string->unibyte);
1346 else
1347 *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
1348 }
1349 else
1350 {
1351 normal_char:
1352 if (string->s)
1353 {
1354
1355 if (!string->unibyte)
1356 {
1357 ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
1358 *ch_len = len;
1359 }
1360 else
1361 {
1362 ch = UNIBYTE_TO_CHAR (string->s[bytepos]);
1363 *ch_len = 1;
1364 }
1365 }
1366 else if (STRINGP (string->lstring))
1367 {
1368 if (!string->unibyte)
1369 {
1370 ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos,
1371 len);
1372 *ch_len = len;
1373 }
1374 else
1375 {
1376 ch = UNIBYTE_TO_CHAR (SREF (string->lstring, bytepos));
1377 *ch_len = 1;
1378 }
1379 }
1380 else
1381 {
1382 ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (bytepos), len);
1383 *ch_len = len;
1384 }
1385 *nchars = 1;
1386 }
1387
1388 /* If we just entered a run of characters covered by a display
1389 string, compute the position of the next display string. */
1390 if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos
1391 && *disp_prop)
1392 {
1393 SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
1394 *disp_pos = compute_display_string_pos (&pos, string, w, frame_window_p,
1395 disp_prop);
1396 }
1397
1398 return ch;
1399 }
1400
1401 /* Like bidi_fetch_char, but ignore any text between an isolate
1402 initiator and its matching PDI or, if it has no matching PDI, the
1403 end of the paragraph. If isolates were skipped, CH_LEN and NCHARS
1404 are set to the number of bytes and characters between BYTEPOS/CHARPOS
1405 and the character that was fetched after skipping the isolates. */
1406 static int
1407 bidi_fetch_char_skip_isolates (ptrdiff_t charpos, ptrdiff_t bytepos,
1408 ptrdiff_t *disp_pos, int *disp_prop,
1409 struct bidi_string_data *string,
1410 struct window *w, bool frame_window_p,
1411 ptrdiff_t *ch_len, ptrdiff_t *nchars)
1412 {
1413 ptrdiff_t orig_charpos = charpos, orig_bytepos = bytepos;
1414 int ch = bidi_fetch_char (charpos, bytepos, disp_pos, disp_prop, string, w,
1415 frame_window_p, ch_len, nchars);
1416 bidi_type_t ch_type = bidi_get_type (ch, NEUTRAL_DIR);
1417 ptrdiff_t level = 0;
1418
1419 if (ch_type == LRI || ch_type == RLI || ch_type == FSI)
1420 {
1421 level++;
1422 while (level > 0 && ch_type != NEUTRAL_B)
1423 {
1424 charpos += *nchars;
1425 bytepos += *ch_len;
1426 ch = bidi_fetch_char (charpos, bytepos, disp_pos, disp_prop, string,
1427 w, frame_window_p, ch_len, nchars);
1428 ch_type = bidi_get_type (ch, NEUTRAL_DIR);
1429 /* A Note to P2 says to ignore max_depth limit. */
1430 if (ch_type == LRI || ch_type == RLI || ch_type == FSI)
1431 level++;
1432 else if (ch_type == PDI)
1433 level--;
1434 }
1435 }
1436
1437 /* Communicate to the caller how much did we skip, so it could get
1438 past the last character position we examined. */
1439 *nchars += charpos - orig_charpos;
1440 *ch_len += bytepos - orig_bytepos;
1441 return ch;
1442 }
1443
1444
1445 \f
1446 /***********************************************************************
1447 Determining paragraph direction
1448 ***********************************************************************/
1449
1450 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
1451 Value is the non-negative length of the paragraph separator
1452 following the buffer position, -1 if position is at the beginning
1453 of a new paragraph, or -2 if position is neither at beginning nor
1454 at end of a paragraph. */
1455 static ptrdiff_t
1456 bidi_at_paragraph_end (ptrdiff_t charpos, ptrdiff_t bytepos)
1457 {
1458 Lisp_Object sep_re;
1459 Lisp_Object start_re;
1460 ptrdiff_t val;
1461
1462 sep_re = paragraph_separate_re;
1463 start_re = paragraph_start_re;
1464
1465 val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
1466 if (val < 0)
1467 {
1468 if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
1469 val = -1;
1470 else
1471 val = -2;
1472 }
1473
1474 return val;
1475 }
1476
1477 /* If the user has requested the long scans caching, make sure that
1478 BIDI cache is enabled. Otherwise, make sure it's disabled. */
1479
1480 static struct region_cache *
1481 bidi_paragraph_cache_on_off (void)
1482 {
1483 struct buffer *cache_buffer = current_buffer;
1484 bool indirect_p = false;
1485
1486 /* For indirect buffers, make sure to use the cache of their base
1487 buffer. */
1488 if (cache_buffer->base_buffer)
1489 {
1490 cache_buffer = cache_buffer->base_buffer;
1491 indirect_p = true;
1492 }
1493
1494 /* Don't turn on or off the cache in the base buffer, if the value
1495 of cache-long-scans of the base buffer is inconsistent with that.
1496 This is because doing so will just make the cache pure overhead,
1497 since if we turn it on via indirect buffer, it will be
1498 immediately turned off by its base buffer. */
1499 if (NILP (BVAR (current_buffer, cache_long_scans)))
1500 {
1501 if (!indirect_p
1502 || NILP (BVAR (cache_buffer, cache_long_scans)))
1503 {
1504 if (cache_buffer->bidi_paragraph_cache)
1505 {
1506 free_region_cache (cache_buffer->bidi_paragraph_cache);
1507 cache_buffer->bidi_paragraph_cache = 0;
1508 }
1509 }
1510 return NULL;
1511 }
1512 else
1513 {
1514 if (!indirect_p
1515 || !NILP (BVAR (cache_buffer, cache_long_scans)))
1516 {
1517 if (!cache_buffer->bidi_paragraph_cache)
1518 cache_buffer->bidi_paragraph_cache = new_region_cache ();
1519 }
1520 return cache_buffer->bidi_paragraph_cache;
1521 }
1522 }
1523
1524 /* On my 2005-vintage machine, searching back for paragraph start
1525 takes ~1 ms per line. And bidi_paragraph_init is called 4 times
1526 when user types C-p. The number below limits each call to
1527 bidi_paragraph_init to about 10 ms. */
1528 #define MAX_PARAGRAPH_SEARCH 7500
1529
1530 /* Find the beginning of this paragraph by looking back in the buffer.
1531 Value is the byte position of the paragraph's beginning, or
1532 BEGV_BYTE if paragraph_start_re is still not found after looking
1533 back MAX_PARAGRAPH_SEARCH lines in the buffer. */
1534 static ptrdiff_t
1535 bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte)
1536 {
1537 Lisp_Object re = paragraph_start_re;
1538 ptrdiff_t limit = ZV, limit_byte = ZV_BYTE;
1539 struct region_cache *bpc = bidi_paragraph_cache_on_off ();
1540 ptrdiff_t n = 0, oldpos = pos, next;
1541 struct buffer *cache_buffer = current_buffer;
1542
1543 if (cache_buffer->base_buffer)
1544 cache_buffer = cache_buffer->base_buffer;
1545
1546 while (pos_byte > BEGV_BYTE
1547 && n++ < MAX_PARAGRAPH_SEARCH
1548 && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
1549 {
1550 /* FIXME: What if the paragraph beginning is covered by a
1551 display string? And what if a display string covering some
1552 of the text over which we scan back includes
1553 paragraph_start_re? */
1554 DEC_BOTH (pos, pos_byte);
1555 if (bpc && region_cache_backward (cache_buffer, bpc, pos, &next))
1556 {
1557 pos = next, pos_byte = CHAR_TO_BYTE (pos);
1558 break;
1559 }
1560 else
1561 pos = find_newline_no_quit (pos, pos_byte, -1, &pos_byte);
1562 }
1563 if (n >= MAX_PARAGRAPH_SEARCH)
1564 pos = BEGV, pos_byte = BEGV_BYTE;
1565 if (bpc)
1566 know_region_cache (cache_buffer, bpc, pos, oldpos);
1567 /* Positions returned by the region cache are not limited to
1568 BEGV..ZV range, so we limit them here. */
1569 pos_byte = clip_to_bounds (BEGV_BYTE, pos_byte, ZV_BYTE);
1570 return pos_byte;
1571 }
1572
1573 /* On a 3.4 GHz machine, searching forward for a strong directional
1574 character in a long paragraph full of weaks or neutrals takes about
1575 1 ms for each 20K characters. The number below limits each call to
1576 bidi_paragraph_init to less than 10 ms even on slow machines. */
1577 #define MAX_STRONG_CHAR_SEARCH 100000
1578
1579 /* Starting from POS, find the first strong (L, R, or AL) character,
1580 while skipping over any characters between an isolate initiator and
1581 its matching PDI. STOP_AT_PDI non-zero means stop at the PDI that
1582 matches the isolate initiator at POS. Return the bidi type of the
1583 character where the search stopped. Give up if after examining
1584 MAX_STRONG_CHAR_SEARCH buffer or string positions no strong
1585 character was found. */
1586 static bidi_type_t
1587 find_first_strong_char (ptrdiff_t pos, ptrdiff_t bytepos, ptrdiff_t end,
1588 ptrdiff_t *disp_pos, int *disp_prop,
1589 struct bidi_string_data *string, struct window *w,
1590 bool string_p, bool frame_window_p,
1591 ptrdiff_t *ch_len, ptrdiff_t *nchars, bool stop_at_pdi)
1592 {
1593 ptrdiff_t pos1;
1594 bidi_type_t type;
1595 int ch;
1596
1597 if (stop_at_pdi)
1598 {
1599 /* If STOP_AT_PDI is non-zero, we must have been called with FSI
1600 at POS. Get past it. */
1601 #ifdef ENABLE_CHECKING
1602 ch = bidi_fetch_char (pos, bytepos, disp_pos, disp_prop, string, w,
1603 frame_window_p, ch_len, nchars);
1604 type = bidi_get_type (ch, NEUTRAL_DIR);
1605 eassert (type == FSI /* || type == LRI || type == RLI */);
1606 #endif
1607 pos += *nchars;
1608 bytepos += *ch_len;
1609 }
1610 ch = bidi_fetch_char_skip_isolates (pos, bytepos, disp_pos, disp_prop, string,
1611 w, frame_window_p, ch_len, nchars);
1612 type = bidi_get_type (ch, NEUTRAL_DIR);
1613
1614 pos1 = pos;
1615 for (pos += *nchars, bytepos += *ch_len;
1616 bidi_get_category (type) != STRONG
1617 /* If requested to stop at first PDI, stop there. */
1618 && !(stop_at_pdi && type == PDI)
1619 /* Stop when searched too far into an abnormally large
1620 paragraph full of weak or neutral characters. */
1621 && pos - pos1 < MAX_STRONG_CHAR_SEARCH;
1622 type = bidi_get_type (ch, NEUTRAL_DIR))
1623 {
1624 if (pos >= end)
1625 {
1626 /* Pretend there's a paragraph separator at end of
1627 buffer/string. */
1628 type = NEUTRAL_B;
1629 break;
1630 }
1631 if (!string_p
1632 && type == NEUTRAL_B
1633 && bidi_at_paragraph_end (pos, bytepos) >= -1)
1634 break;
1635 /* Fetch next character and advance to get past it. */
1636 ch = bidi_fetch_char_skip_isolates (pos, bytepos, disp_pos, disp_prop,
1637 string, w, frame_window_p,
1638 ch_len, nchars);
1639 pos += *nchars;
1640 bytepos += *ch_len;
1641 }
1642 return type;
1643 }
1644
1645 /* Determine the base direction, a.k.a. base embedding level, of the
1646 paragraph we are about to iterate through. If DIR is either L2R or
1647 R2L, just use that. Otherwise, determine the paragraph direction
1648 from the first strong directional character of the paragraph.
1649
1650 NO_DEFAULT_P means don't default to L2R if the paragraph
1651 has no strong directional characters and both DIR and
1652 bidi_it->paragraph_dir are NEUTRAL_DIR. In that case, search back
1653 in the buffer until a paragraph is found with a strong character,
1654 or until hitting BEGV. In the latter case, fall back to L2R. This
1655 flag is used in current-bidi-paragraph-direction.
1656
1657 Note that this function gives the paragraph separator the same
1658 direction as the preceding paragraph, even though Emacs generally
1659 views the separator as not belonging to any paragraph. */
1660 void
1661 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p)
1662 {
1663 ptrdiff_t bytepos = bidi_it->bytepos;
1664 bool string_p = bidi_it->string.s || STRINGP (bidi_it->string.lstring);
1665 ptrdiff_t pstartbyte;
1666 /* Note that begbyte is a byte position, while end is a character
1667 position. Yes, this is ugly, but we are trying to avoid costly
1668 calls to BYTE_TO_CHAR and its ilk. */
1669 ptrdiff_t begbyte = string_p ? 0 : BEGV_BYTE;
1670 ptrdiff_t end = string_p ? bidi_it->string.schars : ZV;
1671
1672 /* Special case for an empty buffer. */
1673 if (bytepos == begbyte && bidi_it->charpos == end)
1674 dir = L2R;
1675 /* We should never be called at EOB or before BEGV. */
1676 else if (bidi_it->charpos >= end || bytepos < begbyte)
1677 emacs_abort ();
1678
1679 if (dir == L2R)
1680 {
1681 bidi_it->paragraph_dir = L2R;
1682 bidi_it->new_paragraph = 0;
1683 }
1684 else if (dir == R2L)
1685 {
1686 bidi_it->paragraph_dir = R2L;
1687 bidi_it->new_paragraph = 0;
1688 }
1689 else if (dir == NEUTRAL_DIR) /* P2 */
1690 {
1691 ptrdiff_t ch_len, nchars;
1692 ptrdiff_t pos, disp_pos = -1;
1693 int disp_prop = 0;
1694 bidi_type_t type;
1695 const unsigned char *s;
1696
1697 if (!bidi_initialized)
1698 bidi_initialize ();
1699
1700 /* If we are inside a paragraph separator, we are just waiting
1701 for the separator to be exhausted; use the previous paragraph
1702 direction. But don't do that if we have been just reseated,
1703 because we need to reinitialize below in that case. */
1704 if (!bidi_it->first_elt
1705 && bidi_it->charpos < bidi_it->separator_limit)
1706 return;
1707
1708 /* If we are on a newline, get past it to where the next
1709 paragraph might start. But don't do that at BEGV since then
1710 we are potentially in a new paragraph that doesn't yet
1711 exist. */
1712 pos = bidi_it->charpos;
1713 s = (STRINGP (bidi_it->string.lstring)
1714 ? SDATA (bidi_it->string.lstring)
1715 : bidi_it->string.s);
1716 if (bytepos > begbyte
1717 && bidi_char_at_pos (bytepos, s, bidi_it->string.unibyte) == '\n')
1718 {
1719 bytepos++;
1720 pos++;
1721 }
1722
1723 /* We are either at the beginning of a paragraph or in the
1724 middle of it. Find where this paragraph starts. */
1725 if (string_p)
1726 {
1727 /* We don't support changes of paragraph direction inside a
1728 string. It is treated as a single paragraph. */
1729 pstartbyte = 0;
1730 }
1731 else
1732 pstartbyte = bidi_find_paragraph_start (pos, bytepos);
1733 bidi_it->separator_limit = -1;
1734 bidi_it->new_paragraph = 0;
1735
1736 /* The following loop is run more than once only if NO_DEFAULT_P,
1737 and only if we are iterating on a buffer. */
1738 do {
1739 bytepos = pstartbyte;
1740 if (!string_p)
1741 pos = BYTE_TO_CHAR (bytepos);
1742 type = find_first_strong_char (pos, bytepos, end, &disp_pos, &disp_prop,
1743 &bidi_it->string, bidi_it->w,
1744 string_p, bidi_it->frame_window_p,
1745 &ch_len, &nchars, false);
1746 if (type == STRONG_R || type == STRONG_AL) /* P3 */
1747 bidi_it->paragraph_dir = R2L;
1748 else if (type == STRONG_L)
1749 bidi_it->paragraph_dir = L2R;
1750 if (!string_p
1751 && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
1752 {
1753 /* If this paragraph is at BEGV, default to L2R. */
1754 if (pstartbyte == BEGV_BYTE)
1755 bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
1756 else
1757 {
1758 ptrdiff_t prevpbyte = pstartbyte;
1759 ptrdiff_t p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
1760
1761 /* Find the beginning of the previous paragraph, if any. */
1762 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
1763 {
1764 /* FXIME: What if p is covered by a display
1765 string? See also a FIXME inside
1766 bidi_find_paragraph_start. */
1767 DEC_BOTH (p, pbyte);
1768 prevpbyte = bidi_find_paragraph_start (p, pbyte);
1769 }
1770 pstartbyte = prevpbyte;
1771 }
1772 }
1773 } while (!string_p
1774 && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
1775 }
1776 else
1777 emacs_abort ();
1778
1779 /* Contrary to UAX#9 clause P3, we only default the paragraph
1780 direction to L2R if we have no previous usable paragraph
1781 direction. This is allowed by the HL1 clause. */
1782 if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
1783 bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
1784 if (bidi_it->paragraph_dir == R2L)
1785 bidi_it->level_stack[0].level = 1;
1786 else
1787 bidi_it->level_stack[0].level = 0;
1788
1789 bidi_line_init (bidi_it);
1790 }
1791
1792 \f
1793 /***********************************************************************
1794 Resolving explicit and implicit levels.
1795 The rest of this file constitutes the core of the UBA implementation.
1796 ***********************************************************************/
1797
1798 static bool
1799 bidi_explicit_dir_char (int ch)
1800 {
1801 bidi_type_t ch_type;
1802
1803 if (!bidi_initialized)
1804 emacs_abort ();
1805 ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
1806 return (ch_type == LRE || ch_type == LRO
1807 || ch_type == RLE || ch_type == RLO
1808 || ch_type == PDF);
1809 }
1810
1811 /* Given an iterator state in BIDI_IT, advance one character position
1812 in the buffer/string to the next character (in the logical order),
1813 resolve any explicit embeddings, directional overrides, and isolate
1814 initiators and terminators, and return the embedding level of the
1815 character after resolving these explicit directives. */
1816 static int
1817 bidi_resolve_explicit (struct bidi_it *bidi_it)
1818 {
1819 int curchar;
1820 bidi_type_t type, typ1, prev_type = UNKNOWN_BT;
1821 int current_level;
1822 int new_level;
1823 bidi_dir_t override;
1824 bool isolate_status;
1825 bool string_p = bidi_it->string.s || STRINGP (bidi_it->string.lstring);
1826 ptrdiff_t ch_len, nchars, disp_pos, end;
1827 int disp_prop;
1828 ptrdiff_t eob
1829 = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
1830 ? bidi_it->string.schars : ZV);
1831
1832 /* Record the info about the previous character. */
1833 if (bidi_it->type_after_wn != WEAK_BN /* W1/Retaining */
1834 && bidi_it->type != WEAK_BN)
1835 {
1836 /* This special case is needed in support of Unicode 8.0
1837 correction to N0, as implemented in bidi_resolve_weak/W1
1838 below. */
1839 if (bidi_it->type_after_wn == NEUTRAL_ON
1840 && bidi_get_category (bidi_it->type) == STRONG
1841 && bidi_paired_bracket_type (bidi_it->ch) == BIDI_BRACKET_CLOSE)
1842 bidi_remember_char (&bidi_it->prev, bidi_it, 1);
1843 else
1844 bidi_remember_char (&bidi_it->prev, bidi_it, 0);
1845 }
1846 if (bidi_it->type_after_wn == STRONG_R
1847 || bidi_it->type_after_wn == STRONG_L
1848 || bidi_it->type_after_wn == STRONG_AL)
1849 bidi_remember_char (&bidi_it->last_strong, bidi_it, 0);
1850 if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
1851 || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
1852 bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it, 1);
1853
1854 /* If we overstepped the characters used for resolving neutrals
1855 and whitespace, invalidate their info in the iterator. */
1856 if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
1857 {
1858 bidi_it->next_for_neutral.type = UNKNOWN_BT;
1859 /* If needed, reset the "magical" value of pairing bracket
1860 position, so that bidi_resolve_brackets will resume
1861 resolution of brackets according to BPA. */
1862 if (bidi_it->bracket_pairing_pos == eob)
1863 bidi_it->bracket_pairing_pos = -1;
1864 }
1865 if (bidi_it->next_en_pos >= 0
1866 && bidi_it->charpos >= bidi_it->next_en_pos)
1867 {
1868 bidi_it->next_en_pos = 0;
1869 bidi_it->next_en_type = UNKNOWN_BT;
1870 }
1871
1872 /* Reset the bracket resolution info, unless we previously decided
1873 (in bidi_find_bracket_pairs) that brackets in this level run
1874 should be resolved as neutrals. */
1875 if (bidi_it->bracket_pairing_pos != eob)
1876 {
1877 bidi_it->bracket_pairing_pos = -1;
1878 bidi_it->bracket_enclosed_type = UNKNOWN_BT;
1879 }
1880
1881 /* If reseat()'ed, don't advance, so as to start iteration from the
1882 position where we were reseated. bidi_it->bytepos can be less
1883 than BEGV_BYTE after reseat to BEGV. */
1884 if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE)
1885 || bidi_it->first_elt)
1886 {
1887 bidi_it->first_elt = 0;
1888 if (string_p)
1889 {
1890 const unsigned char *p
1891 = (STRINGP (bidi_it->string.lstring)
1892 ? SDATA (bidi_it->string.lstring)
1893 : bidi_it->string.s);
1894
1895 if (bidi_it->charpos < 0)
1896 bidi_it->charpos = bidi_it->bytepos = 0;
1897 eassert (bidi_it->bytepos == bidi_count_bytes (p, 0, 0,
1898 bidi_it->charpos,
1899 bidi_it->string.unibyte));
1900 }
1901 else
1902 {
1903 if (bidi_it->charpos < BEGV)
1904 {
1905 bidi_it->charpos = BEGV;
1906 bidi_it->bytepos = BEGV_BYTE;
1907 }
1908 eassert (bidi_it->bytepos == CHAR_TO_BYTE (bidi_it->charpos));
1909 }
1910 /* Determine the original bidi type of the previous character,
1911 which is needed for handling isolate initiators and PDF. The
1912 type of the previous character will be non-trivial only if
1913 our caller moved through some previous text in
1914 get_visually_first_element, in which case bidi_it->prev holds
1915 the information we want. */
1916 if (bidi_it->first_elt && bidi_it->prev.type != UNKNOWN_BT)
1917 {
1918 eassert (bidi_it->prev.charpos == bidi_it->charpos - 1);
1919 prev_type = bidi_it->prev.orig_type;
1920 if (prev_type == FSI)
1921 prev_type = bidi_it->type_after_wn;
1922 }
1923 }
1924 /* Don't move at end of buffer/string. */
1925 else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
1926 {
1927 /* Advance to the next character, skipping characters covered by
1928 display strings (nchars > 1). */
1929 if (bidi_it->nchars <= 0)
1930 emacs_abort ();
1931 bidi_it->charpos += bidi_it->nchars;
1932 if (bidi_it->ch_len == 0)
1933 emacs_abort ();
1934 bidi_it->bytepos += bidi_it->ch_len;
1935 prev_type = bidi_it->orig_type;
1936 if (prev_type == FSI)
1937 prev_type = bidi_it->type_after_wn;
1938 }
1939 else /* EOB or end of string */
1940 prev_type = NEUTRAL_B;
1941
1942 current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
1943 isolate_status = ISOLATE_STATUS (bidi_it, bidi_it->stack_idx);
1944 override = OVERRIDE (bidi_it, bidi_it->stack_idx);
1945 new_level = current_level;
1946
1947 if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
1948 {
1949 curchar = BIDI_EOB;
1950 bidi_it->ch_len = 1;
1951 bidi_it->nchars = 1;
1952 bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
1953 bidi_it->disp_prop = 0;
1954 }
1955 else
1956 {
1957 /* LRI, RLI, and FSI increment, and PDF decrements, the
1958 embedding level of the _following_ characters, so we must
1959 first look at the type of the previous character to support
1960 that. */
1961 switch (prev_type)
1962 {
1963 case RLI: /* X5a */
1964 if (current_level < BIDI_MAXDEPTH
1965 && bidi_it->invalid_levels == 0
1966 && bidi_it->invalid_isolates == 0)
1967 {
1968 new_level = ((current_level + 1) & ~1) + 1;
1969 bidi_it->isolate_level++;
1970 bidi_push_embedding_level (bidi_it, new_level,
1971 NEUTRAL_DIR, true);
1972 }
1973 else
1974 bidi_it->invalid_isolates++;
1975 break;
1976 case LRI: /* X5b */
1977 if (current_level < BIDI_MAXDEPTH - 1
1978 && bidi_it->invalid_levels == 0
1979 && bidi_it->invalid_isolates == 0)
1980 {
1981 new_level = ((current_level + 2) & ~1);
1982 bidi_it->isolate_level++;
1983 bidi_push_embedding_level (bidi_it, new_level,
1984 NEUTRAL_DIR, true);
1985 }
1986 else
1987 bidi_it->invalid_isolates++;
1988 break;
1989 case PDF: /* X7 */
1990 if (!bidi_it->invalid_isolates)
1991 {
1992 if (bidi_it->invalid_levels)
1993 bidi_it->invalid_levels--;
1994 else if (!isolate_status && bidi_it->stack_idx >= 1)
1995 new_level = bidi_pop_embedding_level (bidi_it);
1996 }
1997 break;
1998 default:
1999 eassert (prev_type != FSI);
2000 /* Nothing. */
2001 break;
2002 }
2003 /* Fetch the character at BYTEPOS. If it is covered by a
2004 display string, treat the entire run of covered characters as
2005 a single character u+FFFC. */
2006 curchar = bidi_fetch_char (bidi_it->charpos, bidi_it->bytepos,
2007 &bidi_it->disp_pos, &bidi_it->disp_prop,
2008 &bidi_it->string, bidi_it->w,
2009 bidi_it->frame_window_p,
2010 &bidi_it->ch_len, &bidi_it->nchars);
2011 }
2012 bidi_it->ch = curchar;
2013 bidi_it->resolved_level = new_level;
2014
2015 /* Don't apply directional override here, as all the types we handle
2016 below will not be affected by the override anyway, and we need
2017 the original type unaltered. The override will be applied in
2018 bidi_resolve_weak. */
2019 type = bidi_get_type (curchar, NEUTRAL_DIR);
2020 bidi_it->orig_type = type;
2021 bidi_check_type (bidi_it->orig_type);
2022
2023 bidi_it->type_after_wn = UNKNOWN_BT;
2024
2025 switch (type)
2026 {
2027 case RLE: /* X2 */
2028 case RLO: /* X4 */
2029 bidi_it->type_after_wn = type;
2030 bidi_check_type (bidi_it->type_after_wn);
2031 type = WEAK_BN; /* X9/Retaining */
2032 if (new_level < BIDI_MAXDEPTH
2033 && bidi_it->invalid_levels == 0
2034 && bidi_it->invalid_isolates == 0)
2035 {
2036 /* Compute the least odd embedding level greater than
2037 the current level. */
2038 new_level = ((new_level + 1) & ~1) + 1;
2039 if (bidi_it->type_after_wn == RLE)
2040 override = NEUTRAL_DIR;
2041 else
2042 override = R2L;
2043 bidi_push_embedding_level (bidi_it, new_level, override, false);
2044 bidi_it->resolved_level = new_level;
2045 }
2046 else
2047 {
2048 if (bidi_it->invalid_isolates == 0)
2049 bidi_it->invalid_levels++;
2050 }
2051 break;
2052 case LRE: /* X3 */
2053 case LRO: /* X5 */
2054 bidi_it->type_after_wn = type;
2055 bidi_check_type (bidi_it->type_after_wn);
2056 type = WEAK_BN; /* X9/Retaining */
2057 if (new_level < BIDI_MAXDEPTH - 1
2058 && bidi_it->invalid_levels == 0
2059 && bidi_it->invalid_isolates == 0)
2060 {
2061 /* Compute the least even embedding level greater than
2062 the current level. */
2063 new_level = ((new_level + 2) & ~1);
2064 if (bidi_it->type_after_wn == LRE)
2065 override = NEUTRAL_DIR;
2066 else
2067 override = L2R;
2068 bidi_push_embedding_level (bidi_it, new_level, override, false);
2069 bidi_it->resolved_level = new_level;
2070 }
2071 else
2072 {
2073 if (bidi_it->invalid_isolates == 0)
2074 bidi_it->invalid_levels++;
2075 }
2076 break;
2077 case FSI: /* X5c */
2078 end = string_p ? bidi_it->string.schars : ZV;
2079 disp_pos = bidi_it->disp_pos;
2080 disp_prop = bidi_it->disp_prop;
2081 nchars = bidi_it->nchars;
2082 ch_len = bidi_it->ch_len;
2083 typ1 = find_first_strong_char (bidi_it->charpos,
2084 bidi_it->bytepos, end,
2085 &disp_pos, &disp_prop,
2086 &bidi_it->string, bidi_it->w,
2087 string_p, bidi_it->frame_window_p,
2088 &ch_len, &nchars, true);
2089 if (typ1 != STRONG_R && typ1 != STRONG_AL)
2090 {
2091 type = LRI;
2092 goto fsi_as_lri;
2093 }
2094 else
2095 type = RLI;
2096 /* FALLTHROUGH */
2097 case RLI: /* X5a */
2098 if (override == NEUTRAL_DIR)
2099 bidi_it->type_after_wn = type;
2100 else /* Unicode 8.0 correction. */
2101 bidi_it->type_after_wn = (override == L2R ? STRONG_L : STRONG_R);
2102 bidi_check_type (bidi_it->type_after_wn);
2103 break;
2104 case LRI: /* X5b */
2105 fsi_as_lri:
2106 if (override == NEUTRAL_DIR)
2107 bidi_it->type_after_wn = type;
2108 else /* Unicode 8.0 correction. */
2109 bidi_it->type_after_wn = (override == L2R ? STRONG_L : STRONG_R);
2110 bidi_check_type (bidi_it->type_after_wn);
2111 break;
2112 case PDI: /* X6a */
2113 if (bidi_it->invalid_isolates)
2114 bidi_it->invalid_isolates--;
2115 else if (bidi_it->isolate_level > 0)
2116 {
2117 bidi_it->invalid_levels = 0;
2118 while (!ISOLATE_STATUS (bidi_it, bidi_it->stack_idx))
2119 bidi_pop_embedding_level (bidi_it);
2120 eassert (bidi_it->stack_idx > 0);
2121 new_level = bidi_pop_embedding_level (bidi_it);
2122 bidi_it->isolate_level--;
2123 }
2124 bidi_it->resolved_level = new_level;
2125 /* Unicode 8.0 correction. */
2126 {
2127 bidi_dir_t stack_override = OVERRIDE (bidi_it, bidi_it->stack_idx);
2128 if (stack_override == L2R)
2129 bidi_it->type_after_wn = STRONG_L;
2130 else if (stack_override == R2L)
2131 bidi_it->type_after_wn = STRONG_R;
2132 else
2133 bidi_it->type_after_wn = type;
2134 }
2135 break;
2136 case PDF: /* X7 */
2137 bidi_it->type_after_wn = type;
2138 bidi_check_type (bidi_it->type_after_wn);
2139 type = WEAK_BN; /* X9/Retaining */
2140 break;
2141 default:
2142 /* Nothing. */
2143 break;
2144 }
2145
2146 bidi_it->type = type;
2147 bidi_check_type (bidi_it->type);
2148
2149 if (bidi_it->type == NEUTRAL_B) /* X8 */
2150 {
2151 bidi_set_paragraph_end (bidi_it);
2152 /* This is needed by bidi_resolve_weak below, and in L1. */
2153 bidi_it->type_after_wn = bidi_it->type;
2154 }
2155
2156 eassert (bidi_it->resolved_level >= 0);
2157 return bidi_it->resolved_level;
2158 }
2159
2160 /* Advance in the buffer/string, resolve weak types and return the
2161 type of the next character after weak type resolution. */
2162 static bidi_type_t
2163 bidi_resolve_weak (struct bidi_it *bidi_it)
2164 {
2165 bidi_type_t type;
2166 bidi_dir_t override;
2167 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2168 int new_level = bidi_resolve_explicit (bidi_it);
2169 int next_char;
2170 bidi_type_t type_of_next;
2171 struct bidi_it saved_it;
2172 ptrdiff_t eob
2173 = ((STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
2174 ? bidi_it->string.schars : ZV);
2175
2176 type = bidi_it->type;
2177 override = OVERRIDE (bidi_it, bidi_it->stack_idx);
2178
2179 eassert (!(type == UNKNOWN_BT
2180 || type == LRE
2181 || type == LRO
2182 || type == RLE
2183 || type == RLO
2184 || type == PDF));
2185
2186 eassert (prev_level >= 0);
2187 if (bidi_it->type == NEUTRAL_B)
2188 {
2189 /* We've got a new isolating sequence, compute the directional
2190 type of sos and initialize per-run variables (UAX#9, clause
2191 X10). */
2192 bidi_set_sos_type (bidi_it, prev_level, new_level);
2193 }
2194 if (type == NEUTRAL_S || type == NEUTRAL_WS
2195 || type == WEAK_BN || type == STRONG_AL)
2196 bidi_it->type_after_wn = type; /* needed in L1 */
2197 bidi_check_type (bidi_it->type_after_wn);
2198
2199 /* Level and directional override status are already recorded in
2200 bidi_it, and do not need any change; see X6. */
2201 if (override == R2L) /* X6 */
2202 type = STRONG_R;
2203 else if (override == L2R)
2204 type = STRONG_L;
2205 else
2206 {
2207 if (type == WEAK_NSM) /* W1 */
2208 {
2209 /* Note that we don't need to consider the case where the
2210 prev character has its type overridden by an RLO or LRO,
2211 because then either the type of this NSM would have been
2212 also overridden, or the previous character is outside the
2213 current level run, and thus not relevant to this NSM.
2214 This is why NSM gets the type_after_wn of the previous
2215 character. */
2216 /* bidi_set_sos_type sets type_after_wn to UNKNOWN_BT. */
2217 if (bidi_it->prev.type != UNKNOWN_BT
2218 /* If type_after_wn is NEUTRAL_B, this NSM is at sos. */
2219 && bidi_it->prev.type != NEUTRAL_B)
2220 {
2221 if (bidi_isolate_fmt_char (bidi_it->prev.type))
2222 {
2223 /* From W1: "Note that in an isolating run sequence,
2224 an isolate initiator followed by an NSM or any
2225 type other than PDI must be an overflow isolate
2226 initiator." */
2227 eassert (bidi_it->invalid_isolates > 0);
2228 type = NEUTRAL_ON;
2229 }
2230 else
2231 {
2232 /* This includes the Unicode 8.0 correction for N0,
2233 due to how we set prev.type in bidi_resolve_explicit,
2234 which see. */
2235 type = bidi_it->prev.type;
2236 }
2237 }
2238 else if (bidi_it->sos == R2L)
2239 type = STRONG_R;
2240 else if (bidi_it->sos == L2R)
2241 type = STRONG_L;
2242 else /* shouldn't happen! */
2243 emacs_abort ();
2244 }
2245 if (type == WEAK_EN /* W2 */
2246 && bidi_it->last_strong.type == STRONG_AL)
2247 type = WEAK_AN;
2248 else if (type == STRONG_AL) /* W3 */
2249 type = STRONG_R;
2250 else if ((type == WEAK_ES /* W4 */
2251 && bidi_it->prev.type == WEAK_EN
2252 && bidi_it->prev.orig_type == WEAK_EN)
2253 || (type == WEAK_CS
2254 && ((bidi_it->prev.type == WEAK_EN
2255 && bidi_it->prev.orig_type == WEAK_EN)
2256 || bidi_it->prev.type == WEAK_AN)))
2257 {
2258 const unsigned char *s
2259 = (STRINGP (bidi_it->string.lstring)
2260 ? SDATA (bidi_it->string.lstring)
2261 : bidi_it->string.s);
2262
2263 next_char = (bidi_it->charpos + bidi_it->nchars >= eob
2264 ? BIDI_EOB
2265 : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len,
2266 s, bidi_it->string.unibyte));
2267 type_of_next = bidi_get_type (next_char, override);
2268
2269 if (type_of_next == WEAK_BN
2270 || bidi_explicit_dir_char (next_char))
2271 {
2272 bidi_copy_it (&saved_it, bidi_it);
2273 while (bidi_resolve_explicit (bidi_it) == new_level
2274 && bidi_it->type == WEAK_BN)
2275 type_of_next = bidi_it->type;
2276 bidi_copy_it (bidi_it, &saved_it);
2277 }
2278
2279 /* If the next character is EN, but the last strong-type
2280 character is AL, that next EN will be changed to AN when
2281 we process it in W2 above. So in that case, this ES
2282 should not be changed into EN. */
2283 if (type == WEAK_ES
2284 && type_of_next == WEAK_EN
2285 && bidi_it->last_strong.type != STRONG_AL)
2286 type = WEAK_EN;
2287 else if (type == WEAK_CS)
2288 {
2289 if (bidi_it->prev.type == WEAK_AN
2290 && (type_of_next == WEAK_AN
2291 /* If the next character is EN, but the last
2292 strong-type character is AL, EN will be later
2293 changed to AN when we process it in W2 above.
2294 So in that case, this ES should not be
2295 changed into EN. */
2296 || (type_of_next == WEAK_EN
2297 && bidi_it->last_strong.type == STRONG_AL)))
2298 type = WEAK_AN;
2299 else if (bidi_it->prev.type == WEAK_EN
2300 && type_of_next == WEAK_EN
2301 && bidi_it->last_strong.type != STRONG_AL)
2302 type = WEAK_EN;
2303 }
2304 }
2305 else if (type == WEAK_ET /* W5: ET with EN before or after it */
2306 || type == WEAK_BN) /* W5/Retaining */
2307 {
2308 if (bidi_it->prev.type == WEAK_EN) /* ET/BN w/EN before it */
2309 type = WEAK_EN;
2310 else if (bidi_it->next_en_pos > bidi_it->charpos
2311 && bidi_it->next_en_type != WEAK_BN)
2312 {
2313 if (bidi_it->next_en_type == WEAK_EN) /* ET/BN with EN after it */
2314 type = WEAK_EN;
2315 }
2316 else if (bidi_it->next_en_pos >=0)
2317 {
2318 /* We overstepped the last known position for ET
2319 resolution but there could be other such characters
2320 in this paragraph (when we are sure there are no more
2321 such positions, we set next_en_pos to a negative
2322 value). Try to find the next position for ET
2323 resolution. */
2324 ptrdiff_t en_pos = bidi_it->charpos + bidi_it->nchars;
2325 const unsigned char *s = (STRINGP (bidi_it->string.lstring)
2326 ? SDATA (bidi_it->string.lstring)
2327 : bidi_it->string.s);
2328
2329 if (bidi_it->nchars <= 0)
2330 emacs_abort ();
2331 next_char
2332 = (bidi_it->charpos + bidi_it->nchars >= eob
2333 ? BIDI_EOB
2334 : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
2335 bidi_it->string.unibyte));
2336 type_of_next = bidi_get_type (next_char, override);
2337
2338 if (type_of_next == WEAK_ET
2339 || type_of_next == WEAK_BN
2340 || bidi_explicit_dir_char (next_char))
2341 {
2342 bidi_copy_it (&saved_it, bidi_it);
2343 while (bidi_resolve_explicit (bidi_it) == new_level
2344 && (bidi_it->type == WEAK_BN
2345 || bidi_it->type == WEAK_ET))
2346 type_of_next = bidi_it->type;
2347 if (type == WEAK_BN
2348 && bidi_it->charpos == saved_it.charpos + saved_it.nchars)
2349 {
2350 /* If we entered the above loop with a BN that
2351 changes the level, the type of next
2352 character, which is in a different level, is
2353 not relevant to resolving this series of ET
2354 and BN. */
2355 en_pos = saved_it.charpos;
2356 type_of_next = type;
2357 }
2358 else
2359 en_pos = bidi_it->charpos;
2360 bidi_copy_it (bidi_it, &saved_it);
2361 }
2362 /* Remember this position, to speed up processing of the
2363 next ETs. */
2364 bidi_it->next_en_pos = en_pos;
2365 if (type_of_next == WEAK_EN)
2366 {
2367 /* If the last strong character is AL, the EN we've
2368 found will become AN when we get to it (W2). */
2369 if (bidi_it->last_strong.type == STRONG_AL)
2370 type_of_next = WEAK_AN;
2371 else if (type == WEAK_BN)
2372 type = NEUTRAL_ON; /* W6/Retaining */
2373 else
2374 type = WEAK_EN;
2375 }
2376 else if (type_of_next == NEUTRAL_B)
2377 /* Record the fact that there are no more ENs from
2378 here to the end of paragraph, to avoid entering the
2379 loop above ever again in this paragraph. */
2380 bidi_it->next_en_pos = -1;
2381 /* Record the type of the character where we ended our search. */
2382 bidi_it->next_en_type = type_of_next;
2383 }
2384 }
2385 }
2386
2387 if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
2388 || (type == WEAK_BN
2389 && (bidi_it->prev.type == WEAK_CS /* W6/Retaining */
2390 || bidi_it->prev.type == WEAK_ES
2391 || bidi_it->prev.type == WEAK_ET)))
2392 type = NEUTRAL_ON;
2393
2394 /* Store the type we've got so far, before we clobber it with strong
2395 types in W7 and while resolving neutral types. But leave alone
2396 the original types that were recorded above, because we will need
2397 them for the L1 clause. */
2398 if (bidi_it->type_after_wn == UNKNOWN_BT)
2399 bidi_it->type_after_wn = type;
2400 bidi_check_type (bidi_it->type_after_wn);
2401
2402 if (type == WEAK_EN) /* W7 */
2403 {
2404 if ((bidi_it->last_strong.type == STRONG_L)
2405 || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sos == L2R))
2406 type = STRONG_L;
2407 }
2408
2409 bidi_it->type = type;
2410 bidi_check_type (bidi_it->type);
2411 return type;
2412 }
2413
2414 /* Resolve the type of a neutral character according to the type of
2415 surrounding strong text and the current embedding level. */
2416 static bidi_type_t
2417 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
2418 {
2419 /* N1: "European and Arabic numbers act as if they were R in terms
2420 of their influence on NIs." */
2421 if (next_type == WEAK_EN || next_type == WEAK_AN)
2422 next_type = STRONG_R;
2423 if (prev_type == WEAK_EN || prev_type == WEAK_AN)
2424 prev_type = STRONG_R;
2425
2426 if (next_type == prev_type) /* N1 */
2427 return next_type;
2428 else if ((lev & 1) == 0) /* N2 */
2429 return STRONG_L;
2430 else
2431 return STRONG_R;
2432 }
2433
2434 #define FLAG_EMBEDDING_INSIDE 1
2435 #define FLAG_OPPOSITE_INSIDE 2
2436
2437 /* A data type used in the stack maintained by
2438 bidi_find_bracket_pairs below. */
2439 typedef struct bpa_stack_entry {
2440 int close_bracket_char;
2441 int open_bracket_idx;
2442 #ifdef ENABLE_CHECKING
2443 ptrdiff_t open_bracket_pos;
2444 #endif
2445 unsigned flags : 2;
2446 } bpa_stack_entry;
2447
2448 /* With MAX_ALLOCA of 16KB, this should allow at least 1K slots in the
2449 BPA stack, which should be more than enough for actual bidi text. */
2450 #define MAX_BPA_STACK ((int)max (MAX_ALLOCA / sizeof (bpa_stack_entry), 1))
2451
2452 /* UAX#9 says to match opening brackets with the matching closing
2453 brackets or their canonical equivalents. As of Unicode 7.0, there
2454 are only 2 bracket characters that have canonical equivalence
2455 decompositions: u+2329 and u+232A. So instead of accessing the
2456 table in uni-decomposition.el, we just handle these 2 characters
2457 with this simple macro. Note that ASCII characters don't have
2458 canonical equivalents by definition. */
2459
2460 /* To find all the characters that need to be processed by
2461 CANONICAL_EQU, first find all the characters which have
2462 decompositions in UnicodeData.txt, with this Awk script:
2463
2464 awk -F ";" " {if ($6 != \"\") print $1, $6}" UnicodeData.txt
2465
2466 Then produce a list of all the bracket characters in BidiBrackets.txt:
2467
2468 awk -F "[ ;]" " {if ($1 != \"#\" && $1 != \"\") print $1}" BidiBrackets.txt
2469
2470 And finally, cross-reference these two:
2471
2472 fgrep -w -f brackets.txt decompositions.txt
2473
2474 where "decompositions.txt" was produced by the 1st script, and
2475 "brackets.txt" by the 2nd script. In the output of fgrep, look
2476 only for decompositions that don't begin with some compatibility
2477 formatting tag, such as "<compat>". Only decompositions that
2478 consist solely of character codepoints are relevant to bidi
2479 brackets processing. */
2480
2481 #define CANONICAL_EQU(c) \
2482 ( ASCII_CHAR_P (c) ? c \
2483 : (c) == 0x2329 ? 0x3008 \
2484 : (c) == 0x232a ? 0x3009 \
2485 : c )
2486
2487 #ifdef ENABLE_CHECKING
2488 # define STORE_BRACKET_CHARPOS \
2489 bpa_stack[bpa_sp].open_bracket_pos = bidi_it->charpos
2490 #else
2491 # define STORE_BRACKET_CHARPOS /* nothing */
2492 #endif
2493
2494 #define PUSH_BPA_STACK \
2495 do { \
2496 int ch; \
2497 if (bpa_sp < MAX_BPA_STACK - 1) \
2498 { \
2499 bpa_sp++; \
2500 ch = CANONICAL_EQU (bidi_it->ch); \
2501 bpa_stack[bpa_sp].close_bracket_char = bidi_mirror_char (ch); \
2502 bpa_stack[bpa_sp].open_bracket_idx = bidi_cache_last_idx; \
2503 bpa_stack[bpa_sp].flags = 0; \
2504 STORE_BRACKET_CHARPOS; \
2505 } \
2506 } while (0)
2507
2508
2509 /* This function implements BPA, the Bidi Parenthesis Algorithm,
2510 described in BD16 and N0 of UAX#9. It finds all the bracket pairs
2511 in the current isolating sequence, and records the enclosed type
2512 and the position of the matching bracket in the cache. It returns
2513 non-zero if called with the iterator on the opening bracket which
2514 has a matching closing bracket in the current isolating sequence,
2515 zero otherwise. */
2516 static bool
2517 bidi_find_bracket_pairs (struct bidi_it *bidi_it)
2518 {
2519 bidi_bracket_type_t btype;
2520 bidi_type_t type = bidi_it->type;
2521 bool retval = false;
2522
2523 /* When scanning backwards, we don't expect any unresolved bidi
2524 bracket characters. */
2525 if (bidi_it->scan_dir != 1)
2526 emacs_abort ();
2527
2528 btype = bidi_paired_bracket_type (bidi_it->ch);
2529 if (btype == BIDI_BRACKET_OPEN)
2530 {
2531 bpa_stack_entry bpa_stack[MAX_BPA_STACK];
2532 int bpa_sp = -1;
2533 struct bidi_it saved_it;
2534 int base_level = bidi_it->level_stack[0].level;
2535 int embedding_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2536 int maxlevel = embedding_level;
2537 bidi_type_t embedding_type = (embedding_level & 1) ? STRONG_R : STRONG_L;
2538 struct bidi_it tem_it;
2539 bool l2r_seen = false, r2l_seen = false;
2540 ptrdiff_t pairing_pos;
2541 int idx_at_entry = bidi_cache_idx;
2542
2543 eassert (MAX_BPA_STACK >= 100);
2544 bidi_copy_it (&saved_it, bidi_it);
2545 /* bidi_cache_iterator_state refuses to cache on backward scans,
2546 and bidi_cache_fetch_state doesn't bring scan_dir from the
2547 cache, so we must initialize this explicitly. */
2548 tem_it.scan_dir = 1;
2549
2550 while (1)
2551 {
2552 int old_sidx, new_sidx;
2553 int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2554
2555 if (maxlevel < current_level)
2556 maxlevel = current_level;
2557 /* Mark every opening bracket character we've traversed by
2558 putting its own position into bracket_pairing_pos. This
2559 is examined in bidi_resolve_brackets to distinguish
2560 brackets that were already resolved to stay NEUTRAL_ON,
2561 and those that were not yet processed by this function
2562 (because they were skipped when we skip higher embedding
2563 levels below). */
2564 if (btype == BIDI_BRACKET_OPEN && bidi_it->bracket_pairing_pos == -1)
2565 bidi_it->bracket_pairing_pos = bidi_it->charpos;
2566 if (!bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0))
2567 {
2568 /* No more space in cache -- give up and let the opening
2569 bracket that started this be processed as a
2570 NEUTRAL_ON. */
2571 bidi_cache_reset_to (idx_at_entry - bidi_cache_start);
2572 bidi_copy_it (bidi_it, &saved_it);
2573 goto give_up;
2574 }
2575 if (btype == BIDI_BRACKET_OPEN)
2576 PUSH_BPA_STACK;
2577 else if (btype == BIDI_BRACKET_CLOSE)
2578 {
2579 int sp = bpa_sp;
2580 int curchar = CANONICAL_EQU (bidi_it->ch);
2581
2582 eassert (sp >= 0);
2583 while (sp >= 0 && bpa_stack[sp].close_bracket_char != curchar)
2584 sp--;
2585 if (sp >= 0)
2586 {
2587 /* Update and cache the corresponding opening bracket. */
2588 bidi_cache_fetch_state (bpa_stack[sp].open_bracket_idx,
2589 &tem_it);
2590 #ifdef ENABLE_CHECKING
2591 eassert (bpa_stack[sp].open_bracket_pos == tem_it.charpos);
2592 #endif
2593 /* Determine the enclosed type for this bracket
2594 pair's type resolution according to N0. */
2595 if (bpa_stack[sp].flags & FLAG_EMBEDDING_INSIDE)
2596 tem_it.bracket_enclosed_type = embedding_type; /* N0b */
2597 else if (bpa_stack[sp].flags & FLAG_OPPOSITE_INSIDE)
2598 tem_it.bracket_enclosed_type /* N0c */
2599 = (embedding_type == STRONG_L ? STRONG_R : STRONG_L);
2600 else /* N0d */
2601 tem_it.bracket_enclosed_type = UNKNOWN_BT;
2602
2603 /* Record the position of the matching closing
2604 bracket, and update the cache. */
2605 tem_it.bracket_pairing_pos = bidi_it->charpos;
2606 bidi_cache_iterator_state (&tem_it, 0, 1);
2607
2608 /* Pop the BPA stack. */
2609 bpa_sp = sp - 1;
2610 }
2611 if (bpa_sp < 0)
2612 {
2613 retval = true;
2614 break;
2615 }
2616 }
2617 else if (bidi_get_category (bidi_it->type_after_wn) != NEUTRAL)
2618 {
2619 unsigned flag = 0;
2620 int sp;
2621
2622 /* Whenever we see a strong type, update the flags of
2623 all the slots on the stack. */
2624 switch (bidi_it->type)
2625 {
2626 case STRONG_L:
2627 flag = ((embedding_level & 1) == 0
2628 ? FLAG_EMBEDDING_INSIDE
2629 : FLAG_OPPOSITE_INSIDE);
2630 l2r_seen = true;
2631 break;
2632 case STRONG_R:
2633 case WEAK_EN:
2634 case WEAK_AN:
2635 flag = ((embedding_level & 1) == 1
2636 ? FLAG_EMBEDDING_INSIDE
2637 : FLAG_OPPOSITE_INSIDE);
2638 r2l_seen = true;
2639 break;
2640 default:
2641 break;
2642 }
2643 if (flag)
2644 {
2645 for (sp = bpa_sp; sp >= 0; sp--)
2646 bpa_stack[sp].flags |= flag;
2647 }
2648 }
2649 old_sidx = bidi_it->stack_idx;
2650 type = bidi_resolve_weak (bidi_it);
2651 /* Skip level runs excluded from this isolating run sequence. */
2652 new_sidx = bidi_it->stack_idx;
2653 if (bidi_it->level_stack[new_sidx].level > current_level
2654 && (ISOLATE_STATUS (bidi_it, new_sidx)
2655 || (new_sidx > old_sidx + 1
2656 && ISOLATE_STATUS (bidi_it, new_sidx - 1))))
2657 {
2658 while (bidi_it->level_stack[bidi_it->stack_idx].level
2659 > current_level)
2660 {
2661 if (maxlevel < bidi_it->level_stack[bidi_it->stack_idx].level)
2662 maxlevel = bidi_it->level_stack[bidi_it->stack_idx].level;
2663 if (!bidi_cache_iterator_state (bidi_it,
2664 type == NEUTRAL_B, 0))
2665 {
2666 /* No more space in cache -- give up and let the
2667 opening bracket that started this be
2668 processed as any other NEUTRAL_ON. */
2669 bidi_cache_reset_to (idx_at_entry - bidi_cache_start);
2670 bidi_copy_it (bidi_it, &saved_it);
2671 goto give_up;
2672 }
2673 type = bidi_resolve_weak (bidi_it);
2674 }
2675 }
2676 if (type == NEUTRAL_B
2677 || (bidi_it->level_stack[bidi_it->stack_idx].level
2678 != current_level))
2679 {
2680 /* We've marched all the way to the end of this
2681 isolating run sequence, and didn't find matching
2682 closing brackets for some opening brackets. Leave
2683 their type unchanged. */
2684 pairing_pos = bidi_it->charpos;
2685 break;
2686 }
2687 if (bidi_it->type_after_wn == NEUTRAL_ON) /* Unicode 8.0 correction */
2688 btype = bidi_paired_bracket_type (bidi_it->ch);
2689 else
2690 btype = BIDI_BRACKET_NONE;
2691 }
2692
2693 /* Restore bidi_it from the cache, which should have the bracket
2694 resolution members set as determined by the above loop. */
2695 type = bidi_cache_find (saved_it.charpos, 0, bidi_it);
2696 eassert (type == NEUTRAL_ON);
2697
2698 /* The following is an optimization for bracketed text that has
2699 only one level which is equal to the paragraph's base
2700 embedding level. That is, only L2R and weak/neutral
2701 characters in a L2R paragraph, or only R2L and weak/neutral
2702 characters in a R2L paragraph. Such brackets can be resolved
2703 by bidi_resolve_neutral, which has a further shortcut for
2704 this case. So we pretend we did not resolve the brackets in
2705 this case, set up next_for_neutral for the entire bracketed
2706 text, and reset the cache to the character before the opening
2707 bracket. The upshot is to allow bidi_move_to_visually_next
2708 reset the cache when it returns this opening bracket, thus
2709 cutting significantly on the size of the cache, which is
2710 important with long lines, especially if word-wrap is non-nil
2711 (which requires the display engine to copy the cache back and
2712 forth many times). */
2713 if (maxlevel == base_level
2714 && ((base_level == 0 && !r2l_seen)
2715 || (base_level == 1 && !l2r_seen)))
2716 {
2717 ptrdiff_t eob
2718 = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2719 ? bidi_it->string.schars : ZV);
2720
2721 if (retval)
2722 pairing_pos = bidi_it->bracket_pairing_pos;
2723
2724 /* This special value (which cannot possibly happen when
2725 brackets are resolved, since there's no character at ZV)
2726 will be noticed by bidi_resolve_explicit, and will be
2727 copied to the following iterator states, instead of being
2728 reset to -1. */
2729 bidi_it->bracket_pairing_pos = eob;
2730 /* This type value will be used for resolving the outermost
2731 closing bracket in bidi_resolve_brackets. */
2732 bidi_it->bracket_enclosed_type = embedding_type;
2733 /* bidi_cache_last_idx is set to the index of the current
2734 state, because we just called bidi_cache_find above.
2735 That state describes the outermost opening bracket, the
2736 one with which we entered this function. Force the cache
2737 to "forget" all the cached states starting from that state. */
2738 bidi_cache_reset_to (bidi_cache_last_idx - bidi_cache_start);
2739 /* Set up the next_for_neutral member, to help
2740 bidi_resolve_neutral. */
2741 bidi_it->next_for_neutral.type = embedding_type;
2742 bidi_it->next_for_neutral.charpos = pairing_pos;
2743 /* Pretend we didn't resolve this bracket. */
2744 retval = false;
2745 }
2746 }
2747
2748 give_up:
2749 return retval;
2750 }
2751
2752 static void
2753 bidi_record_type_for_neutral (struct bidi_saved_info *info, int level,
2754 bool nextp)
2755 {
2756 int idx;
2757
2758 for (idx = bidi_cache_last_idx + 1; idx < bidi_cache_idx; idx++)
2759 {
2760 int lev = bidi_cache[idx].level_stack[bidi_cache[idx].stack_idx].level;
2761
2762 if (lev <= level)
2763 {
2764 eassert (lev == level);
2765 if (nextp)
2766 bidi_cache[idx].next_for_neutral = *info;
2767 else
2768 bidi_cache[idx].prev_for_neutral = *info;
2769 break;
2770 }
2771 }
2772 }
2773
2774 static bidi_type_t
2775 bidi_resolve_brackets (struct bidi_it *bidi_it)
2776 {
2777 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2778 bool resolve_bracket = false;
2779 bidi_type_t type = UNKNOWN_BT;
2780 int ch;
2781 struct bidi_saved_info prev_for_neutral, next_for_neutral;
2782 ptrdiff_t eob
2783 = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2784 ? bidi_it->string.schars : ZV);
2785
2786 /* Record the prev_for_neutral type either from the previous
2787 character, if it was a strong or AN/EN, or from the
2788 prev_for_neutral information recorded previously. */
2789 if (bidi_it->type == STRONG_L || bidi_it->type == STRONG_R
2790 || bidi_it->type == WEAK_AN || bidi_it->type == WEAK_EN)
2791 bidi_remember_char (&prev_for_neutral, bidi_it, 1);
2792 else
2793 prev_for_neutral = bidi_it->prev_for_neutral;
2794 /* Record the next_for_neutral type information. */
2795 if (bidi_it->next_for_neutral.charpos > bidi_it->charpos)
2796 next_for_neutral = bidi_it->next_for_neutral;
2797 else
2798 next_for_neutral.charpos = -1;
2799 if (!bidi_it->first_elt)
2800 {
2801 type = bidi_cache_find (bidi_it->charpos + bidi_it->nchars, 0, bidi_it);
2802 ch = bidi_it->ch;
2803 }
2804 if (type == UNKNOWN_BT)
2805 {
2806 type = bidi_resolve_weak (bidi_it);
2807 if (type == NEUTRAL_ON)
2808 {
2809 /* bracket_pairing_pos == eob means this bracket does not
2810 need to be resolved as a bracket, but as a neutral, see
2811 the optimization trick we play near the end of
2812 bidi_find_bracket_pairs. */
2813 if (bidi_it->bracket_pairing_pos == eob)
2814 {
2815 /* If this is the outermost closing bracket of a run of
2816 characters in which we decided to resolve brackets as
2817 neutrals, use the embedding level's type, recorded in
2818 bracket_enclosed_type, to resolve the bracket. */
2819 if (bidi_it->next_for_neutral.charpos == bidi_it->charpos
2820 && bidi_paired_bracket_type (bidi_it->ch) == BIDI_BRACKET_CLOSE)
2821 type = bidi_it->bracket_enclosed_type;
2822 }
2823 else if (bidi_find_bracket_pairs (bidi_it))
2824 resolve_bracket = true;
2825 }
2826 }
2827 else if (bidi_it->bracket_pairing_pos != eob)
2828 {
2829 eassert (bidi_it->resolved_level == -1);
2830 /* If the cached state shows an increase of embedding level due
2831 to an isolate initiator, we need to update the 1st cached
2832 state of the next run of the current isolating sequence with
2833 the prev_for_neutral and next_for_neutral information, so
2834 that it will be picked up when we advance to that next run. */
2835 if (bidi_it->level_stack[bidi_it->stack_idx].level > prev_level
2836 && ISOLATE_STATUS (bidi_it, bidi_it->stack_idx))
2837 {
2838 bidi_record_type_for_neutral (&prev_for_neutral, prev_level, 0);
2839 bidi_record_type_for_neutral (&next_for_neutral, prev_level, 1);
2840 }
2841 if (type == NEUTRAL_ON
2842 && bidi_paired_bracket_type (ch) == BIDI_BRACKET_OPEN)
2843 {
2844 if (bidi_it->bracket_pairing_pos > bidi_it->charpos)
2845 {
2846 /* A cached opening bracket that wasn't completely
2847 resolved yet. */
2848 resolve_bracket = true;
2849 }
2850 else if (bidi_it->bracket_pairing_pos == -1)
2851 {
2852 /* Higher levels were not BPA-resolved yet, even if
2853 cached by bidi_find_bracket_pairs. Force application
2854 of BPA to the new level now. */
2855 if (bidi_find_bracket_pairs (bidi_it))
2856 resolve_bracket = true;
2857 }
2858 }
2859 /* Keep track of the prev_for_neutral and next_for_neutral
2860 types, needed for resolving brackets below and for resolving
2861 neutrals in bidi_resolve_neutral. */
2862 if (bidi_it->level_stack[bidi_it->stack_idx].level == prev_level)
2863 {
2864 bidi_it->prev_for_neutral = prev_for_neutral;
2865 if (next_for_neutral.charpos > 0)
2866 bidi_it->next_for_neutral = next_for_neutral;
2867 }
2868 }
2869
2870 /* If needed, resolve the bracket type according to N0. */
2871 if (resolve_bracket)
2872 {
2873 int embedding_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2874 bidi_type_t embedding_type = (embedding_level & 1) ? STRONG_R : STRONG_L;
2875
2876 eassert (bidi_it->prev_for_neutral.type != UNKNOWN_BT);
2877 eassert (bidi_it->bracket_pairing_pos > bidi_it->charpos);
2878 if (bidi_it->bracket_enclosed_type == embedding_type) /* N0b */
2879 type = embedding_type;
2880 else
2881 {
2882 switch (bidi_it->prev_for_neutral.type)
2883 {
2884 case STRONG_R:
2885 case WEAK_EN:
2886 case WEAK_AN:
2887 type =
2888 (bidi_it->bracket_enclosed_type == STRONG_R) /* N0c */
2889 ? STRONG_R /* N0c1 */
2890 : embedding_type; /* N0c2 */
2891 break;
2892 case STRONG_L:
2893 type =
2894 (bidi_it->bracket_enclosed_type == STRONG_L) /* N0c */
2895 ? STRONG_L /* N0c1 */
2896 : embedding_type; /* N0c2 */
2897 break;
2898 default:
2899 /* N0d: Do not set the type for that bracket pair. */
2900 break;
2901 }
2902 }
2903 eassert (type == STRONG_L || type == STRONG_R || type == NEUTRAL_ON);
2904
2905 /* Update the type of the paired closing bracket to the same
2906 type as for the resolved opening bracket. */
2907 if (type != NEUTRAL_ON)
2908 {
2909 ptrdiff_t idx = bidi_cache_search (bidi_it->bracket_pairing_pos,
2910 -1, 1);
2911
2912 if (idx < bidi_cache_start)
2913 emacs_abort ();
2914 bidi_cache[idx].type = type;
2915 }
2916 }
2917
2918 return type;
2919 }
2920
2921 static bidi_type_t
2922 bidi_resolve_neutral (struct bidi_it *bidi_it)
2923 {
2924 bidi_type_t type = bidi_resolve_brackets (bidi_it);
2925 int current_level;
2926 bool is_neutral;
2927
2928 eassert (type == STRONG_R
2929 || type == STRONG_L
2930 || type == WEAK_BN
2931 || type == WEAK_EN
2932 || type == WEAK_AN
2933 || type == NEUTRAL_B
2934 || type == NEUTRAL_S
2935 || type == NEUTRAL_WS
2936 || type == NEUTRAL_ON
2937 || type == LRI
2938 || type == RLI
2939 || type == PDI);
2940
2941 current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2942 eassert (current_level >= 0);
2943 is_neutral = bidi_get_category (type) == NEUTRAL;
2944
2945 if ((type != NEUTRAL_B /* Don't risk entering the long loop below if
2946 we are already at paragraph end. */
2947 && (is_neutral || bidi_isolate_fmt_char (type)))
2948 /* N1-N2/Retaining */
2949 || (type == WEAK_BN && bidi_explicit_dir_char (bidi_it->ch)))
2950 {
2951 if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
2952 {
2953 /* Make sure the data for resolving neutrals we are
2954 about to use is valid. */
2955 eassert (bidi_it->next_for_neutral.charpos > bidi_it->charpos
2956 /* PDI defines an eos, so it's OK for it to
2957 serve as its own next_for_neutral. */
2958 || (bidi_it->next_for_neutral.charpos == bidi_it->charpos
2959 && bidi_it->type == PDI));
2960 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2961 bidi_it->next_for_neutral.type,
2962 current_level);
2963 }
2964 /* The next two "else if" clauses are shortcuts for the
2965 important special case when we have a long sequence of
2966 neutral or WEAK_BN characters, such as whitespace or nulls or
2967 other control characters, on the base embedding level of the
2968 paragraph, and that sequence goes all the way to the end of
2969 the paragraph and follows a character whose resolved
2970 directionality is identical to the base embedding level.
2971 (This is what happens in a buffer with plain L2R text that
2972 happens to include long sequences of control characters.) By
2973 virtue of N1, the result of examining this long sequence will
2974 always be either STRONG_L or STRONG_R, depending on the base
2975 embedding level. So we use this fact directly instead of
2976 entering the expensive loop in the "else" clause. */
2977 else if (current_level == 0
2978 && bidi_it->prev_for_neutral.type == STRONG_L
2979 && !bidi_explicit_dir_char (bidi_it->ch)
2980 && !bidi_isolate_fmt_char (type))
2981 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2982 STRONG_L, current_level);
2983 else if (/* current level is 1 */
2984 current_level == 1
2985 /* base embedding level is also 1 */
2986 && bidi_it->level_stack[0].level == 1
2987 /* previous character is one of those considered R for
2988 the purposes of W5 */
2989 && (bidi_it->prev_for_neutral.type == STRONG_R
2990 || bidi_it->prev_for_neutral.type == WEAK_EN
2991 || bidi_it->prev_for_neutral.type == WEAK_AN)
2992 && !bidi_explicit_dir_char (bidi_it->ch)
2993 && !bidi_isolate_fmt_char (type))
2994 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2995 STRONG_R, current_level);
2996 else
2997 {
2998 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in
2999 the assumption of batch-style processing; see clauses W4,
3000 W5, and especially N1, which require to look far forward
3001 (as well as back) in the buffer/string. May the fleas of
3002 a thousand camels infest the armpits of those who design
3003 supposedly general-purpose algorithms by looking at their
3004 own implementations, and fail to consider other possible
3005 implementations! */
3006 struct bidi_it saved_it;
3007 bidi_type_t next_type;
3008 bool adjacent_to_neutrals = is_neutral;
3009
3010 bidi_copy_it (&saved_it, bidi_it);
3011 /* Scan the text forward until we find the first non-neutral
3012 character, and then use that to resolve the neutral we
3013 are dealing with now. We also cache the scanned iterator
3014 states, to salvage some of the effort later. */
3015 do {
3016 int old_sidx, new_sidx;
3017
3018 /* Paragraph separators have their levels fully resolved
3019 at this point, so cache them as resolved. */
3020 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0);
3021 old_sidx = bidi_it->stack_idx;
3022 type = bidi_resolve_brackets (bidi_it);
3023 /* Skip level runs excluded from this isolating run sequence. */
3024 new_sidx = bidi_it->stack_idx;
3025 if (bidi_it->level_stack[new_sidx].level > current_level
3026 && (ISOLATE_STATUS (bidi_it, new_sidx)
3027 /* This is for when we have an isolate initiator
3028 immediately followed by an embedding or
3029 override initiator, in which case we get the
3030 level stack pushed twice by the single call to
3031 bidi_resolve_weak above. */
3032 || (new_sidx > old_sidx + 1
3033 && ISOLATE_STATUS (bidi_it, new_sidx - 1))))
3034 {
3035 while (bidi_it->level_stack[bidi_it->stack_idx].level
3036 > current_level)
3037 {
3038 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0);
3039 type = bidi_resolve_brackets (bidi_it);
3040 }
3041 }
3042 if (!adjacent_to_neutrals
3043 && (bidi_get_category (type) == NEUTRAL
3044 || bidi_isolate_fmt_char (type)))
3045 adjacent_to_neutrals = true;
3046 } while (!(type == NEUTRAL_B
3047 || (type != WEAK_BN
3048 && bidi_get_category (type) != NEUTRAL
3049 && !bidi_isolate_fmt_char (type))
3050 /* This is all per level run, so stop when we
3051 reach the end of this level run. */
3052 || (bidi_it->level_stack[bidi_it->stack_idx].level
3053 != current_level)));
3054
3055 /* Record the character we stopped at. */
3056 bidi_remember_char (&saved_it.next_for_neutral, bidi_it, 1);
3057
3058 if ((bidi_it->level_stack[bidi_it->stack_idx].level != current_level)
3059 || type == NEUTRAL_B)
3060 {
3061 /* Marched all the way to the end of this level run. We
3062 need to use the eos type, whose information is stored
3063 by bidi_set_sos_type in the prev_for_neutral
3064 member. */
3065 if (adjacent_to_neutrals)
3066 next_type = bidi_it->prev_for_neutral.type;
3067 else
3068 {
3069 /* This is a BN which does not adjoin neutrals.
3070 Leave its type alone. */
3071 bidi_copy_it (bidi_it, &saved_it);
3072 return bidi_it->type;
3073 }
3074 }
3075 else
3076 {
3077 switch (type)
3078 {
3079 case STRONG_L:
3080 case STRONG_R:
3081 case STRONG_AL:
3082 /* Actually, STRONG_AL cannot happen here, because
3083 bidi_resolve_weak converts it to STRONG_R, per W3. */
3084 eassert (type != STRONG_AL);
3085 next_type = type;
3086 break;
3087 case WEAK_EN:
3088 case WEAK_AN:
3089 /* N1: "European and Arabic numbers act as if they
3090 were R in terms of their influence on NIs." */
3091 next_type = STRONG_R;
3092 break;
3093 default:
3094 emacs_abort ();
3095 break;
3096 }
3097 }
3098 /* Resolve the type of all the NIs found during the above loop. */
3099 type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
3100 next_type, current_level);
3101 /* Update next_for_neutral with the resolved type, so we
3102 could use it for all the other NIs up to the place where
3103 we exited the loop. */
3104 saved_it.next_for_neutral.type = next_type;
3105 bidi_check_type (type);
3106 /* Update the character which caused us to enter the above loop. */
3107 saved_it.type = type;
3108 bidi_check_type (next_type);
3109 bidi_copy_it (bidi_it, &saved_it);
3110 }
3111 }
3112 return type;
3113 }
3114
3115 /* Given an iterator state in BIDI_IT, advance one character position
3116 in the buffer/string to the next character (in the logical order),
3117 resolve the bidi type of that next character, and return that
3118 type. */
3119 static bidi_type_t
3120 bidi_type_of_next_char (struct bidi_it *bidi_it)
3121 {
3122 bidi_type_t type;
3123
3124 /* This should always be called during a forward scan. */
3125 if (bidi_it->scan_dir != 1)
3126 emacs_abort ();
3127
3128 type = bidi_resolve_neutral (bidi_it);
3129
3130 return type;
3131 }
3132
3133 /* Given an iterator state BIDI_IT, advance one character position in
3134 the buffer/string to the next character (in the current scan
3135 direction), resolve the embedding and implicit levels of that next
3136 character, and return the resulting level. */
3137 static int
3138 bidi_level_of_next_char (struct bidi_it *bidi_it)
3139 {
3140 bidi_type_t type = UNKNOWN_BT;
3141 int level;
3142 ptrdiff_t next_char_pos = -2;
3143
3144 if (bidi_it->scan_dir == 1)
3145 {
3146 ptrdiff_t eob
3147 = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
3148 ? bidi_it->string.schars : ZV);
3149
3150 /* There's no sense in trying to advance if we've already hit
3151 the end of text. */
3152 if (bidi_it->charpos >= eob)
3153 {
3154 eassert (bidi_it->resolved_level >= 0);
3155 return bidi_it->resolved_level;
3156 }
3157 }
3158
3159 /* Perhaps the character we want is already cached s fully resolved.
3160 If it is, the call to bidi_cache_find below will return a type
3161 other than UNKNOWN_BT. */
3162 if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
3163 {
3164 int bob = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
3165 ? 0 : 1);
3166
3167 if (bidi_it->scan_dir > 0)
3168 {
3169 if (bidi_it->nchars <= 0)
3170 emacs_abort ();
3171 next_char_pos = bidi_it->charpos + bidi_it->nchars;
3172 }
3173 else if (bidi_it->charpos >= bob)
3174 /* Implementation note: we allow next_char_pos to be as low as
3175 0 for buffers or -1 for strings, and that is okay because
3176 that's the "position" of the sentinel iterator state we
3177 cached at the beginning of the iteration. */
3178 next_char_pos = bidi_it->charpos - 1;
3179 if (next_char_pos >= bob - 1)
3180 type = bidi_cache_find (next_char_pos, 1, bidi_it);
3181 if (type != UNKNOWN_BT)
3182 {
3183 /* We asked the cache for fully resolved states. */
3184 eassert (bidi_it->resolved_level >= 0);
3185 return bidi_it->resolved_level;
3186 }
3187 }
3188
3189 if (bidi_it->scan_dir == -1)
3190 /* If we are going backwards, the iterator state is already cached
3191 from previous scans, and should be fully resolved. */
3192 emacs_abort ();
3193
3194 if (type == UNKNOWN_BT)
3195 type = bidi_type_of_next_char (bidi_it);
3196
3197 if (type == NEUTRAL_B)
3198 {
3199 eassert (bidi_it->resolved_level >= 0);
3200 return bidi_it->resolved_level;
3201 }
3202
3203 level = bidi_it->level_stack[bidi_it->stack_idx].level;
3204
3205 eassert ((type == STRONG_R
3206 || type == STRONG_L
3207 || type == WEAK_BN
3208 || type == WEAK_EN
3209 || type == WEAK_AN));
3210 bidi_it->type = type;
3211 bidi_check_type (bidi_it->type);
3212
3213 /* For L1 below, we need to know, for each WS character, whether
3214 it belongs to a sequence of WS characters preceding a newline
3215 or a TAB or a paragraph separator. */
3216 if ((bidi_it->orig_type == NEUTRAL_WS
3217 || bidi_isolate_fmt_char (bidi_it->orig_type))
3218 && bidi_it->next_for_ws.charpos < bidi_it->charpos)
3219 {
3220 int ch;
3221 ptrdiff_t clen = bidi_it->ch_len;
3222 ptrdiff_t bpos = bidi_it->bytepos;
3223 ptrdiff_t cpos = bidi_it->charpos;
3224 ptrdiff_t disp_pos = bidi_it->disp_pos;
3225 ptrdiff_t nc = bidi_it->nchars;
3226 struct bidi_string_data bs = bidi_it->string;
3227 bidi_type_t chtype;
3228 bool fwp = bidi_it->frame_window_p;
3229 int dpp = bidi_it->disp_prop;
3230
3231 if (bidi_it->nchars <= 0)
3232 emacs_abort ();
3233 do {
3234 ch = bidi_fetch_char (cpos += nc, bpos += clen, &disp_pos, &dpp, &bs,
3235 bidi_it->w, fwp, &clen, &nc);
3236 chtype = bidi_get_type (ch, NEUTRAL_DIR);
3237 } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
3238 || bidi_isolate_fmt_char (chtype)
3239 || bidi_explicit_dir_char (ch)); /* L1/Retaining */
3240 bidi_it->next_for_ws.type = chtype;
3241 bidi_check_type (bidi_it->next_for_ws.type);
3242 bidi_it->next_for_ws.charpos = cpos;
3243 }
3244
3245 /* Update the cache, but only if this state was already cached. */
3246 bidi_cache_iterator_state (bidi_it, 1, 1);
3247
3248 /* Resolve implicit levels. */
3249 if (bidi_it->orig_type == NEUTRAL_B /* L1 */
3250 || bidi_it->orig_type == NEUTRAL_S
3251 || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
3252 || (bidi_it->orig_type == NEUTRAL_WS
3253 && (bidi_it->next_for_ws.type == NEUTRAL_B
3254 || bidi_it->next_for_ws.type == NEUTRAL_S)))
3255 level = bidi_it->level_stack[0].level;
3256 else if ((level & 1) == 0) /* I1 */
3257 {
3258 if (type == STRONG_R)
3259 level++;
3260 else if (type == WEAK_EN || type == WEAK_AN)
3261 level += 2;
3262 }
3263 else /* I2 */
3264 {
3265 if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
3266 level++;
3267 }
3268
3269 bidi_it->resolved_level = level;
3270 return level;
3271 }
3272
3273 /* Move to the other edge of a level given by LEVEL. If END_FLAG,
3274 we are at the end of a level, and we need to prepare to
3275 resume the scan of the lower level.
3276
3277 If this level's other edge is cached, we simply jump to it, filling
3278 the iterator structure with the iterator state on the other edge.
3279 Otherwise, we walk the buffer or string until we come back to the
3280 same level as LEVEL.
3281
3282 Note: we are not talking here about a ``level run'' in the UAX#9
3283 sense of the term, but rather about a ``level'' which includes
3284 all the levels higher than it. In other words, given the levels
3285 like this:
3286
3287 11111112222222333333334443343222222111111112223322111
3288 A B C
3289
3290 and assuming we are at point A scanning left to right, this
3291 function moves to point C, whereas the UAX#9 ``level 2 run'' ends
3292 at point B. */
3293 static void
3294 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, bool end_flag)
3295 {
3296 int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
3297 ptrdiff_t idx;
3298
3299 /* Try the cache first. */
3300 if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
3301 >= bidi_cache_start)
3302 bidi_cache_fetch_state (idx, bidi_it);
3303 else
3304 {
3305 int new_level;
3306
3307 /* If we are at end of level, its edges must be cached. */
3308 if (end_flag)
3309 emacs_abort ();
3310
3311 if (!bidi_cache_iterator_state (bidi_it, 1, 0))
3312 {
3313 /* Can't happen: if the cache needs to grow, it means we
3314 were at base embedding level, so the cache should have
3315 been either empty or already large enough to cover this
3316 character position. */
3317 emacs_abort ();
3318 }
3319 do {
3320 new_level = bidi_level_of_next_char (bidi_it);
3321 /* If the cache is full, perform an emergency return by
3322 pretending that the level ended. */
3323 if (!bidi_cache_iterator_state (bidi_it, 1, 0))
3324 {
3325 new_level = level - 1;
3326 /* Since the cache should only grow when we are scanning
3327 forward looking for the edge of the level that is one
3328 above the base embedding level, we can only have this
3329 contingency when LEVEL - 1 is the base embedding
3330 level. */
3331 eassert (new_level == bidi_it->level_stack[0].level);
3332 /* Plan B, for when the cache overflows: Back up to the
3333 previous character by fetching the last cached state,
3334 and force the resolved level of that character be the
3335 base embedding level. */
3336 bidi_cache_fetch_state (bidi_cache_idx - 1, bidi_it);
3337 bidi_it->resolved_level = new_level;
3338 bidi_cache_iterator_state (bidi_it, 1, 1);
3339 }
3340 } while (new_level >= level);
3341 }
3342 }
3343
3344 void
3345 bidi_move_to_visually_next (struct bidi_it *bidi_it)
3346 {
3347 int old_level, new_level, next_level;
3348 struct bidi_it sentinel;
3349 struct gcpro gcpro1;
3350
3351 if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
3352 emacs_abort ();
3353
3354 if (bidi_it->scan_dir == 0)
3355 {
3356 bidi_it->scan_dir = 1; /* default to logical order */
3357 }
3358
3359 /* The code below can call eval, and thus cause GC. If we are
3360 iterating a Lisp string, make sure it won't be GCed. */
3361 if (STRINGP (bidi_it->string.lstring))
3362 GCPRO1 (bidi_it->string.lstring);
3363
3364 /* If we just passed a newline, initialize for the next line. */
3365 if (!bidi_it->first_elt
3366 && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
3367 bidi_line_init (bidi_it);
3368
3369 /* Prepare the sentinel iterator state, and cache it. When we bump
3370 into it, scanning backwards, we'll know that the last non-base
3371 level is exhausted. */
3372 if (bidi_cache_idx == bidi_cache_start)
3373 {
3374 bidi_copy_it (&sentinel, bidi_it);
3375 if (bidi_it->first_elt)
3376 {
3377 sentinel.charpos--; /* cached charpos needs to be monotonic */
3378 sentinel.bytepos--;
3379 sentinel.ch = '\n'; /* doesn't matter, but why not? */
3380 sentinel.ch_len = 1;
3381 sentinel.nchars = 1;
3382 }
3383 bidi_cache_iterator_state (&sentinel, 1, 0);
3384 }
3385
3386 old_level = bidi_it->resolved_level;
3387 new_level = bidi_level_of_next_char (bidi_it);
3388
3389 /* Reordering of resolved levels (clause L2) is implemented by
3390 jumping to the other edge of the level and flipping direction of
3391 scanning the text whenever we find a level change. */
3392 if (new_level != old_level)
3393 {
3394 bool ascending = new_level > old_level;
3395 int level_to_search = ascending ? old_level + 1 : old_level;
3396 int incr = ascending ? 1 : -1;
3397 int expected_next_level = old_level + incr;
3398
3399 /* Jump (or walk) to the other edge of this level. */
3400 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
3401 /* Switch scan direction and peek at the next character in the
3402 new direction. */
3403 bidi_it->scan_dir = -bidi_it->scan_dir;
3404
3405 /* The following loop handles the case where the resolved level
3406 jumps by more than one. This is typical for numbers inside a
3407 run of text with left-to-right embedding direction, but can
3408 also happen in other situations. In those cases the decision
3409 where to continue after a level change, and in what direction,
3410 is tricky. For example, given a text like below:
3411
3412 abcdefgh
3413 11336622
3414
3415 (where the numbers below the text show the resolved levels),
3416 the result of reordering according to UAX#9 should be this:
3417
3418 efdcghba
3419
3420 This is implemented by the loop below which flips direction
3421 and jumps to the other edge of the level each time it finds
3422 the new level not to be the expected one. The expected level
3423 is always one more or one less than the previous one. */
3424 next_level = bidi_peek_at_next_level (bidi_it);
3425 while (next_level != expected_next_level)
3426 {
3427 /* If next_level is -1, it means we have an unresolved level
3428 in the cache, which at this point should not happen. If
3429 it does, we will infloop. */
3430 eassert (next_level >= 0);
3431 /* If next_level is not consistent with incr, we might
3432 infloop. */
3433 eassert (incr > 0
3434 ? next_level > expected_next_level
3435 : next_level < expected_next_level);
3436 expected_next_level += incr;
3437 level_to_search += incr;
3438 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
3439 bidi_it->scan_dir = -bidi_it->scan_dir;
3440 next_level = bidi_peek_at_next_level (bidi_it);
3441 }
3442
3443 /* Finally, deliver the next character in the new direction. */
3444 next_level = bidi_level_of_next_char (bidi_it);
3445 }
3446
3447 /* Take note when we have just processed the newline that precedes
3448 the end of the paragraph. The next time we are about to be
3449 called, set_iterator_to_next will automatically reinit the
3450 paragraph direction, if needed. We do this at the newline before
3451 the paragraph separator, because the next character might not be
3452 the first character of the next paragraph, due to the bidi
3453 reordering, whereas we _must_ know the paragraph base direction
3454 _before_ we process the paragraph's text, since the base
3455 direction affects the reordering. */
3456 if (bidi_it->scan_dir == 1
3457 && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
3458 {
3459 /* The paragraph direction of the entire string, once
3460 determined, is in effect for the entire string. Setting the
3461 separator limit to the end of the string prevents
3462 bidi_paragraph_init from being called automatically on this
3463 string. */
3464 if (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
3465 bidi_it->separator_limit = bidi_it->string.schars;
3466 else if (bidi_it->bytepos < ZV_BYTE)
3467 {
3468 ptrdiff_t sep_len
3469 = bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
3470 bidi_it->bytepos + bidi_it->ch_len);
3471 if (bidi_it->nchars <= 0)
3472 emacs_abort ();
3473 if (sep_len >= 0)
3474 {
3475 bidi_it->new_paragraph = 1;
3476 /* Record the buffer position of the last character of the
3477 paragraph separator. */
3478 bidi_it->separator_limit
3479 = bidi_it->charpos + bidi_it->nchars + sep_len;
3480 }
3481 }
3482 }
3483
3484 if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start)
3485 {
3486 /* If we are at paragraph's base embedding level and beyond the
3487 last cached position, the cache's job is done and we can
3488 discard it. */
3489 if (bidi_it->resolved_level == bidi_it->level_stack[0].level
3490 && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
3491 + bidi_cache[bidi_cache_idx - 1].nchars - 1))
3492 bidi_cache_reset ();
3493 /* Also reset the cache if it overflowed and we have just
3494 emergency-exited using Plan B. */
3495 else if (bidi_it->resolved_level == bidi_it->level_stack[0].level
3496 && bidi_cache_idx >= bidi_cache_size
3497 && bidi_it->charpos == bidi_cache[bidi_cache_idx - 1].charpos)
3498 bidi_cache_reset ();
3499 /* But as long as we are caching during forward scan, we must
3500 cache each state, or else the cache integrity will be
3501 compromised: it assumes cached states correspond to buffer
3502 positions 1:1. */
3503 else
3504 bidi_cache_iterator_state (bidi_it, 1, 0);
3505 }
3506
3507 eassert (bidi_it->resolved_level >= 0
3508 && bidi_it->resolved_level <= BIDI_MAXDEPTH + 2);
3509
3510 if (STRINGP (bidi_it->string.lstring))
3511 UNGCPRO;
3512 }
3513
3514 /* Utility function for looking for strong directional characters
3515 whose bidi type was overridden by a directional override. */
3516 ptrdiff_t
3517 bidi_find_first_overridden (struct bidi_it *bidi_it)
3518 {
3519 ptrdiff_t found_pos = ZV;
3520
3521 do
3522 {
3523 /* Need to call bidi_resolve_weak, not bidi_resolve_explicit,
3524 because the directional overrides are applied by the
3525 former. */
3526 bidi_type_t type = bidi_resolve_weak (bidi_it);
3527
3528 if ((type == STRONG_R && bidi_it->orig_type == STRONG_L)
3529 || (type == STRONG_L
3530 && (bidi_it->orig_type == STRONG_R
3531 || bidi_it->orig_type == STRONG_AL)))
3532 found_pos = bidi_it->charpos;
3533 } while (found_pos == ZV
3534 && bidi_it->charpos < ZV
3535 && bidi_it->ch != BIDI_EOB
3536 && bidi_it->ch != '\n');
3537
3538 return found_pos;
3539 }
3540
3541 /* This is meant to be called from within the debugger, whenever you
3542 wish to examine the cache contents. */
3543 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
3544 void
3545 bidi_dump_cached_states (void)
3546 {
3547 ptrdiff_t i;
3548 int ndigits = 1;
3549
3550 if (bidi_cache_idx == 0)
3551 {
3552 fprintf (stderr, "The cache is empty.\n");
3553 return;
3554 }
3555 fprintf (stderr, "Total of %"pD"d state%s in cache:\n",
3556 bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
3557
3558 for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
3559 ndigits++;
3560 fputs ("ch ", stderr);
3561 for (i = 0; i < bidi_cache_idx; i++)
3562 fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
3563 fputs ("\n", stderr);
3564 fputs ("lvl ", stderr);
3565 for (i = 0; i < bidi_cache_idx; i++)
3566 fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
3567 fputs ("\n", stderr);
3568 fputs ("pos ", stderr);
3569 for (i = 0; i < bidi_cache_idx; i++)
3570 fprintf (stderr, "%*"pD"d", ndigits, bidi_cache[i].charpos);
3571 fputs ("\n", stderr);
3572 }