]> code.delx.au - gnu-emacs/blob - src/bidi.c
Started work on string reordering. Just compiled, not yet tested.
[gnu-emacs] / src / bidi.c
1 /* Low-level bidirectional buffer-scanning functions for GNU Emacs.
2 Copyright (C) 2000-2001, 2004-2005, 2009-2011
3 Free Software Foundation, Inc.
4
5 This file is part of GNU Emacs.
6
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
19
20 /* Written by Eli Zaretskii <eliz@gnu.org>.
21
22 A sequential implementation of the Unicode Bidirectional algorithm,
23 as per UAX#9, a part of the Unicode Standard.
24
25 Unlike the reference and most other implementations, this one is
26 designed to be called once for every character in the buffer or
27 string.
28
29 The main entry point is bidi_move_to_visually_next. Each time it
30 is called, it finds the next character in the visual order, and
31 returns its information in a special structure. The caller is then
32 expected to process this character for display or any other
33 purposes, and call bidi_move_to_visually_next for the next
34 character. See the comments in bidi_move_to_visually_next for more
35 details about its algorithm that finds the next visual-order
36 character by resolving their levels on the fly.
37
38 The two other entry points are bidi_paragraph_init and
39 bidi_mirror_char. The first determines the base direction of a
40 paragraph, while the second returns the mirrored version of its
41 argument character.
42
43 If you want to understand the code, you will have to read it
44 together with the relevant portions of UAX#9. The comments include
45 references to UAX#9 rules, for that very reason.
46
47 A note about references to UAX#9 rules: if the reference says
48 something like "X9/Retaining", it means that you need to refer to
49 rule X9 and to its modifications decribed in the "Implementation
50 Notes" section of UAX#9, under "Retaining Format Codes". */
51
52 #include <config.h>
53 #include <stdio.h>
54 #include <setjmp.h>
55
56 #include "lisp.h"
57 #include "buffer.h"
58 #include "character.h"
59 #include "dispextern.h"
60
61 static int bidi_initialized = 0;
62
63 static Lisp_Object bidi_type_table, bidi_mirror_table;
64
65 #define LRM_CHAR 0x200E
66 #define RLM_CHAR 0x200F
67 #define BIDI_EOB -1
68
69 /* Data type for describing the bidirectional character categories. */
70 typedef enum {
71 UNKNOWN_BC,
72 NEUTRAL,
73 WEAK,
74 STRONG
75 } bidi_category_t;
76
77 extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE;
78 int bidi_ignore_explicit_marks_for_paragraph_level = 1;
79
80 static Lisp_Object paragraph_start_re, paragraph_separate_re;
81 static Lisp_Object Qparagraph_start, Qparagraph_separate;
82
83 static void
84 bidi_initialize (void)
85 {
86
87 #include "biditype.h"
88 #include "bidimirror.h"
89
90 int i;
91
92 bidi_type_table = Fmake_char_table (Qnil, make_number (STRONG_L));
93 staticpro (&bidi_type_table);
94
95 for (i = 0; i < sizeof bidi_type / sizeof bidi_type[0]; i++)
96 char_table_set_range (bidi_type_table, bidi_type[i].from, bidi_type[i].to,
97 make_number (bidi_type[i].type));
98
99 bidi_mirror_table = Fmake_char_table (Qnil, Qnil);
100 staticpro (&bidi_mirror_table);
101
102 for (i = 0; i < sizeof bidi_mirror / sizeof bidi_mirror[0]; i++)
103 char_table_set (bidi_mirror_table, bidi_mirror[i].from,
104 make_number (bidi_mirror[i].to));
105
106 Qparagraph_start = intern ("paragraph-start");
107 staticpro (&Qparagraph_start);
108 paragraph_start_re = Fsymbol_value (Qparagraph_start);
109 if (!STRINGP (paragraph_start_re))
110 paragraph_start_re = build_string ("\f\\|[ \t]*$");
111 staticpro (&paragraph_start_re);
112 Qparagraph_separate = intern ("paragraph-separate");
113 staticpro (&Qparagraph_separate);
114 paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
115 if (!STRINGP (paragraph_separate_re))
116 paragraph_separate_re = build_string ("[ \t\f]*$");
117 staticpro (&paragraph_separate_re);
118 bidi_initialized = 1;
119 }
120
121 /* Return the bidi type of a character CH, subject to the current
122 directional OVERRIDE. */
123 static INLINE bidi_type_t
124 bidi_get_type (int ch, bidi_dir_t override)
125 {
126 bidi_type_t default_type;
127
128 if (ch == BIDI_EOB)
129 return NEUTRAL_B;
130 if (ch < 0 || ch > MAX_CHAR)
131 abort ();
132
133 default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
134
135 if (override == NEUTRAL_DIR)
136 return default_type;
137
138 switch (default_type)
139 {
140 /* Although UAX#9 does not tell, it doesn't make sense to
141 override NEUTRAL_B and LRM/RLM characters. */
142 case NEUTRAL_B:
143 case LRE:
144 case LRO:
145 case RLE:
146 case RLO:
147 case PDF:
148 return default_type;
149 default:
150 switch (ch)
151 {
152 case LRM_CHAR:
153 case RLM_CHAR:
154 return default_type;
155 default:
156 if (override == L2R) /* X6 */
157 return STRONG_L;
158 else if (override == R2L)
159 return STRONG_R;
160 else
161 abort (); /* can't happen: handled above */
162 }
163 }
164 }
165
166 static void
167 bidi_check_type (bidi_type_t type)
168 {
169 if (type < UNKNOWN_BT || type > NEUTRAL_ON)
170 abort ();
171 }
172
173 /* Given a bidi TYPE of a character, return its category. */
174 static INLINE bidi_category_t
175 bidi_get_category (bidi_type_t type)
176 {
177 switch (type)
178 {
179 case UNKNOWN_BT:
180 return UNKNOWN_BC;
181 case STRONG_L:
182 case STRONG_R:
183 case STRONG_AL:
184 case LRE:
185 case LRO:
186 case RLE:
187 case RLO:
188 return STRONG;
189 case PDF: /* ??? really?? */
190 case WEAK_EN:
191 case WEAK_ES:
192 case WEAK_ET:
193 case WEAK_AN:
194 case WEAK_CS:
195 case WEAK_NSM:
196 case WEAK_BN:
197 return WEAK;
198 case NEUTRAL_B:
199 case NEUTRAL_S:
200 case NEUTRAL_WS:
201 case NEUTRAL_ON:
202 return NEUTRAL;
203 default:
204 abort ();
205 }
206 }
207
208 /* Return the mirrored character of C, if it has one. If C has no
209 mirrored counterpart, return C.
210 Note: The conditions in UAX#9 clause L4 regarding the surrounding
211 context must be tested by the caller. */
212 int
213 bidi_mirror_char (int c)
214 {
215 Lisp_Object val;
216
217 if (c == BIDI_EOB)
218 return c;
219 if (c < 0 || c > MAX_CHAR)
220 abort ();
221
222 val = CHAR_TABLE_REF (bidi_mirror_table, c);
223 if (INTEGERP (val))
224 {
225 int v = XINT (val);
226
227 if (v < 0 || v > MAX_CHAR)
228 abort ();
229
230 return v;
231 }
232
233 return c;
234 }
235
236 /* Copy the bidi iterator from FROM to TO. To save cycles, this only
237 copies the part of the level stack that is actually in use. */
238 static INLINE void
239 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
240 {
241 int i;
242
243 /* Copy everything except the level stack and beyond. */
244 memcpy (to, from, offsetof (struct bidi_it, level_stack[0]));
245
246 /* Copy the active part of the level stack. */
247 to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
248 for (i = 1; i <= from->stack_idx; i++)
249 to->level_stack[i] = from->level_stack[i];
250 }
251
252 /* Caching the bidi iterator states. */
253
254 #define BIDI_CACHE_CHUNK 200
255 static struct bidi_it *bidi_cache;
256 static size_t bidi_cache_size = 0;
257 static size_t elsz = sizeof (struct bidi_it);
258 static EMACS_INT bidi_cache_idx; /* next unused cache slot */
259 static EMACS_INT bidi_cache_last_idx; /* slot of last cache hit */
260 static EMACS_INT bidi_cache_start = 0; /* start of cache for this
261 "stack" level */
262
263 /* Reset the cache state to the empty state. We only reset the part
264 of the cache relevant to iteration of the current object. Previous
265 objects, which are pushed on the display iterator's stack, are left
266 intact. This is called when the cached information is no more
267 useful for the current iteration, e.g. when we were reseated to a
268 new position on the same object. */
269 static INLINE void
270 bidi_cache_reset (void)
271 {
272 bidi_cache_idx = bidi_cache_start;
273 bidi_cache_last_idx = -1;
274 }
275
276 /* Shrink the cache to its minimal size. Called when we init the bidi
277 iterator for reordering a buffer or a string that does not come
278 from display properties, because that means all the previously
279 cached info is of no further use. */
280 static INLINE void
281 bidi_cache_shrink (void)
282 {
283 if (bidi_cache_size > BIDI_CACHE_CHUNK)
284 {
285 bidi_cache_size = BIDI_CACHE_CHUNK;
286 bidi_cache =
287 (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
288 }
289 bidi_cache_reset ();
290 }
291
292 static INLINE void
293 bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it)
294 {
295 int current_scan_dir = bidi_it->scan_dir;
296
297 if (idx < bidi_cache_start || idx >= bidi_cache_idx)
298 abort ();
299
300 bidi_copy_it (bidi_it, &bidi_cache[idx]);
301 bidi_it->scan_dir = current_scan_dir;
302 bidi_cache_last_idx = idx;
303 }
304
305 /* Find a cached state with a given CHARPOS and resolved embedding
306 level less or equal to LEVEL. if LEVEL is -1, disregard the
307 resolved levels in cached states. DIR, if non-zero, means search
308 in that direction from the last cache hit. */
309 static INLINE int
310 bidi_cache_search (EMACS_INT charpos, int level, int dir)
311 {
312 int i, i_start;
313
314 if (bidi_cache_idx)
315 {
316 if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
317 {
318 dir = -1;
319 i_start = bidi_cache_last_idx - 1;
320 }
321 else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
322 + bidi_cache[bidi_cache_last_idx].nchars - 1))
323 {
324 dir = 1;
325 i_start = bidi_cache_last_idx + 1;
326 }
327 else if (dir)
328 i_start = bidi_cache_last_idx;
329 else
330 {
331 dir = -1;
332 i_start = bidi_cache_idx - 1;
333 }
334
335 if (dir < 0)
336 {
337 /* Linear search for now; FIXME! */
338 for (i = i_start; i >= bidi_cache_start; i--)
339 if (bidi_cache[i].charpos <= charpos
340 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
341 && (level == -1 || bidi_cache[i].resolved_level <= level))
342 return i;
343 }
344 else
345 {
346 for (i = i_start; i < bidi_cache_idx; i++)
347 if (bidi_cache[i].charpos <= charpos
348 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
349 && (level == -1 || bidi_cache[i].resolved_level <= level))
350 return i;
351 }
352 }
353
354 return -1;
355 }
356
357 /* Find a cached state where the resolved level changes to a value
358 that is lower than LEVEL, and return its cache slot index. DIR is
359 the direction to search, starting with the last used cache slot.
360 If DIR is zero, we search backwards from the last occupied cache
361 slot. BEFORE, if non-zero, means return the index of the slot that
362 is ``before'' the level change in the search direction. That is,
363 given the cached levels like this:
364
365 1122333442211
366 AB C
367
368 and assuming we are at the position cached at the slot marked with
369 C, searching backwards (DIR = -1) for LEVEL = 2 will return the
370 index of slot B or A, depending whether BEFORE is, respectively,
371 non-zero or zero. */
372 static int
373 bidi_cache_find_level_change (int level, int dir, int before)
374 {
375 if (bidi_cache_idx)
376 {
377 int i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
378 int incr = before ? 1 : 0;
379
380 if (!dir)
381 dir = -1;
382 else if (!incr)
383 i += dir;
384
385 if (dir < 0)
386 {
387 while (i >= bidi_cache_start + incr)
388 {
389 if (bidi_cache[i - incr].resolved_level >= 0
390 && bidi_cache[i - incr].resolved_level < level)
391 return i;
392 i--;
393 }
394 }
395 else
396 {
397 while (i < bidi_cache_idx - incr)
398 {
399 if (bidi_cache[i + incr].resolved_level >= 0
400 && bidi_cache[i + incr].resolved_level < level)
401 return i;
402 i++;
403 }
404 }
405 }
406
407 return -1;
408 }
409
410 static INLINE void
411 bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
412 {
413 int idx;
414
415 /* We should never cache on backward scans. */
416 if (bidi_it->scan_dir == -1)
417 abort ();
418 idx = bidi_cache_search (bidi_it->charpos, -1, 1);
419
420 if (idx < 0)
421 {
422 idx = bidi_cache_idx;
423 /* Enlarge the cache as needed. */
424 if (idx >= bidi_cache_size)
425 {
426 bidi_cache_size += BIDI_CACHE_CHUNK;
427 bidi_cache =
428 (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
429 }
430 /* Character positions should correspond to cache positions 1:1.
431 If we are outside the range of cached positions, the cache is
432 useless and must be reset. */
433 if (idx > bidi_cache_start &&
434 (bidi_it->charpos > (bidi_cache[idx - 1].charpos
435 + bidi_cache[idx - 1].nchars)
436 || bidi_it->charpos < bidi_cache[0].charpos))
437 {
438 bidi_cache_reset ();
439 idx = bidi_cache_start;
440 }
441 if (bidi_it->nchars <= 0)
442 abort ();
443 bidi_copy_it (&bidi_cache[idx], bidi_it);
444 if (!resolved)
445 bidi_cache[idx].resolved_level = -1;
446 }
447 else
448 {
449 /* Copy only the members which could have changed, to avoid
450 costly copying of the entire struct. */
451 bidi_cache[idx].type = bidi_it->type;
452 bidi_check_type (bidi_it->type);
453 bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
454 bidi_check_type (bidi_it->type_after_w1);
455 if (resolved)
456 bidi_cache[idx].resolved_level = bidi_it->resolved_level;
457 else
458 bidi_cache[idx].resolved_level = -1;
459 bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
460 bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
461 bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
462 bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
463 bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
464 }
465
466 bidi_cache_last_idx = idx;
467 if (idx >= bidi_cache_idx)
468 bidi_cache_idx = idx + 1;
469 }
470
471 static INLINE bidi_type_t
472 bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it)
473 {
474 int i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
475
476 if (i >= bidi_cache_start)
477 {
478 bidi_dir_t current_scan_dir = bidi_it->scan_dir;
479
480 bidi_copy_it (bidi_it, &bidi_cache[i]);
481 bidi_cache_last_idx = i;
482 /* Don't let scan direction from from the cached state override
483 the current scan direction. */
484 bidi_it->scan_dir = current_scan_dir;
485 return bidi_it->type;
486 }
487
488 return UNKNOWN_BT;
489 }
490
491 static INLINE int
492 bidi_peek_at_next_level (struct bidi_it *bidi_it)
493 {
494 if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1)
495 abort ();
496 return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
497 }
498
499 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
500 Value is the non-negative length of the paragraph separator
501 following the buffer position, -1 if position is at the beginning
502 of a new paragraph, or -2 if position is neither at beginning nor
503 at end of a paragraph. */
504 static EMACS_INT
505 bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
506 {
507 Lisp_Object sep_re;
508 Lisp_Object start_re;
509 EMACS_INT val;
510
511 sep_re = paragraph_separate_re;
512 start_re = paragraph_start_re;
513
514 val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
515 if (val < 0)
516 {
517 if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
518 val = -1;
519 else
520 val = -2;
521 }
522
523 return val;
524 }
525
526 /* Determine the start-of-run (sor) directional type given the two
527 embedding levels on either side of the run boundary. Also, update
528 the saved info about previously seen characters, since that info is
529 generally valid for a single level run. */
530 static INLINE void
531 bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
532 {
533 int higher_level = level_before > level_after ? level_before : level_after;
534
535 /* The prev_was_pdf gork is required for when we have several PDFs
536 in a row. In that case, we want to compute the sor type for the
537 next level run only once: when we see the first PDF. That's
538 because the sor type depends only on the higher of the two levels
539 that we find on the two sides of the level boundary (see UAX#9,
540 clause X10), and so we don't need to know the final embedding
541 level to which we descend after processing all the PDFs. */
542 if (!bidi_it->prev_was_pdf || level_before < level_after)
543 /* FIXME: should the default sor direction be user selectable? */
544 bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
545 if (level_before > level_after)
546 bidi_it->prev_was_pdf = 1;
547
548 bidi_it->prev.type = UNKNOWN_BT;
549 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
550 bidi_it->last_strong.orig_type = UNKNOWN_BT;
551 bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L;
552 bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
553 bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
554 bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 =
555 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
556 bidi_it->ignore_bn_limit = -1; /* meaning it's unknown */
557 }
558
559 /* Perform initializations for reordering a new line of bidi text. */
560 static void
561 bidi_line_init (struct bidi_it *bidi_it)
562 {
563 bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
564 bidi_it->resolved_level = bidi_it->level_stack[0].level;
565 bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
566 bidi_it->invalid_levels = 0;
567 bidi_it->invalid_rl_levels = -1;
568 bidi_it->next_en_pos = -1;
569 bidi_it->next_for_ws.type = UNKNOWN_BT;
570 bidi_set_sor_type (bidi_it,
571 bidi_it->paragraph_dir == R2L ? 1 : 0,
572 bidi_it->level_stack[0].level); /* X10 */
573
574 bidi_cache_reset ();
575 }
576
577 /* Count bytes in multibyte string S between BEG/BEGBYTE and END. BEG
578 and END are zero-based character positions in S, BEGBYTE is byte
579 position corresponding to BEG. */
580 static inline EMACS_INT
581 bidi_count_bytes (const unsigned char *s, const EMACS_INT beg,
582 const EMACS_INT begbyte, const EMACS_INT end)
583 {
584 EMACS_INT pos = beg;
585 const unsigned char *p = s + begbyte, *start = p;
586
587 if (!CHAR_HEAD_P (*p))
588 abort ();
589
590 while (pos < end)
591 {
592 p += BYTES_BY_CHAR_HEAD (*p);
593 pos++;
594 }
595
596 return p - start;
597 }
598
599 /* Fetch and returns the character at byte position BYTEPOS. If S is
600 non-NULL, fetch the character from string S; otherwise fetch the
601 character from the current buffer. */
602 static inline int
603 bidi_char_at_pos (EMACS_INT bytepos, const unsigned char *s)
604 {
605 if (s)
606 return STRING_CHAR (s + bytepos);
607 else
608 return FETCH_MULTIBYTE_CHAR (bytepos);
609 }
610
611 /* Fetch and return the character at BYTEPOS/CHARPOS. If that
612 character is covered by a display string, treat the entire run of
613 covered characters as a single character u+FFFC, and return their
614 combined length in CH_LEN and NCHARS. DISP_POS specifies the
615 character position of the next display string, or -1 if not yet
616 computed. When the next character is at or beyond that position,
617 the function updates DISP_POS with the position of the next display
618 string. STRING->s is the string to iterate, or NULL if iterating over
619 a buffer. */
620 static inline int
621 bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
622 struct bidi_string_data *string,
623 int frame_window_p, EMACS_INT *ch_len, EMACS_INT *nchars)
624 {
625 int ch;
626 EMACS_INT endpos = string->s ? string->schars : ZV;
627
628 /* If we got past the last known position of display string, compute
629 the position of the next one. That position could be at CHARPOS. */
630 if (charpos < endpos && charpos > *disp_pos)
631 *disp_pos = compute_display_string_pos (charpos, string, frame_window_p);
632
633 /* Fetch the character at BYTEPOS. */
634 if (charpos >= endpos)
635 {
636 ch = BIDI_EOB;
637 *ch_len = 1;
638 *nchars = 1;
639 *disp_pos = endpos;
640 }
641 else if (charpos >= *disp_pos)
642 {
643 EMACS_INT disp_end_pos;
644
645 /* We don't expect to find ourselves in the middle of a display
646 property. Hopefully, it will never be needed. */
647 if (charpos > *disp_pos)
648 abort ();
649 /* Return the Unicode Object Replacement Character to represent
650 the entire run of characters covered by the display string. */
651 ch = 0xFFFC;
652 disp_end_pos = compute_display_string_end (*disp_pos, string);
653 *nchars = disp_end_pos - *disp_pos;
654 if (string->s)
655 *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
656 disp_end_pos);
657 else
658 *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
659 }
660 else
661 {
662 if (string->s)
663 {
664 EMACS_INT len;
665
666 ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
667 *ch_len = len;
668 }
669 else
670 {
671 ch = FETCH_MULTIBYTE_CHAR (bytepos);
672 *ch_len = CHAR_BYTES (ch);
673 }
674 *nchars = 1;
675 }
676
677 /* If we just entered a run of characters covered by a display
678 string, compute the position of the next display string. */
679 if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos)
680 *disp_pos = compute_display_string_pos (charpos + *nchars, string,
681 frame_window_p);
682
683 return ch;
684 }
685
686 /* Find the beginning of this paragraph by looking back in the buffer.
687 Value is the byte position of the paragraph's beginning. */
688 static EMACS_INT
689 bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
690 {
691 Lisp_Object re = paragraph_start_re;
692 EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
693
694 while (pos_byte > BEGV_BYTE
695 && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
696 {
697 /* FIXME: What if the paragraph beginning is covered by a
698 display string? And what if a display string covering some
699 of the text over which we scan back includes
700 paragraph_start_re? */
701 pos = find_next_newline_no_quit (pos - 1, -1);
702 pos_byte = CHAR_TO_BYTE (pos);
703 }
704 return pos_byte;
705 }
706
707 /* Determine the base direction, a.k.a. base embedding level, of the
708 paragraph we are about to iterate through. If DIR is either L2R or
709 R2L, just use that. Otherwise, determine the paragraph direction
710 from the first strong directional character of the paragraph.
711
712 NO_DEFAULT_P non-zero means don't default to L2R if the paragraph
713 has no strong directional characters and both DIR and
714 bidi_it->paragraph_dir are NEUTRAL_DIR. In that case, search back
715 in the buffer until a paragraph is found with a strong character,
716 or until hitting BEGV. In the latter case, fall back to L2R. This
717 flag is used in current-bidi-paragraph-direction.
718
719 Note that this function gives the paragraph separator the same
720 direction as the preceding paragraph, even though Emacs generally
721 views the separartor as not belonging to any paragraph. */
722 void
723 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
724 {
725 EMACS_INT bytepos = bidi_it->bytepos;
726 int string_p = bidi_it->string.s != NULL;
727 EMACS_INT pstartbyte;
728 /* Note that begbyte is a byte position, while end is a character
729 position. Yes, this is ugly, but we are trying to avoid costly
730 calls to BYTE_TO_CHAR and its ilk. */
731 EMACS_INT begbyte = string_p ? 0 : BEGV_BYTE;
732 EMACS_INT end = string_p ? bidi_it->string.schars : ZV;
733
734 /* Special case for an empty buffer. */
735 if (bytepos == begbyte && bidi_it->charpos == end)
736 dir = L2R;
737 /* We should never be called at EOB or before BEGV. */
738 else if (bidi_it->charpos >= end || bytepos < begbyte)
739 abort ();
740
741 if (dir == L2R)
742 {
743 bidi_it->paragraph_dir = L2R;
744 bidi_it->new_paragraph = 0;
745 }
746 else if (dir == R2L)
747 {
748 bidi_it->paragraph_dir = R2L;
749 bidi_it->new_paragraph = 0;
750 }
751 else if (dir == NEUTRAL_DIR) /* P2 */
752 {
753 int ch;
754 EMACS_INT ch_len, nchars;
755 EMACS_INT pos, disp_pos = -1;
756 bidi_type_t type;
757
758 if (!bidi_initialized)
759 bidi_initialize ();
760
761 /* If we are inside a paragraph separator, we are just waiting
762 for the separator to be exhausted; use the previous paragraph
763 direction. But don't do that if we have been just reseated,
764 because we need to reinitialize below in that case. */
765 if (!bidi_it->first_elt
766 && bidi_it->charpos < bidi_it->separator_limit)
767 return;
768
769 /* If we are on a newline, get past it to where the next
770 paragraph might start. But don't do that at BEGV since then
771 we are potentially in a new paragraph that doesn't yet
772 exist. */
773 pos = bidi_it->charpos;
774 if (bytepos > begbyte
775 && bidi_char_at_pos (bytepos, bidi_it->string.s) == '\n')
776 {
777 bytepos++;
778 pos++;
779 }
780
781 /* We are either at the beginning of a paragraph or in the
782 middle of it. Find where this paragraph starts. */
783 if (string_p)
784 {
785 /* We don't support changes of paragraph direction inside a
786 string. It is treated as a single paragraph. */
787 pstartbyte = 0;
788 }
789 else
790 pstartbyte = bidi_find_paragraph_start (pos, bytepos);
791 bidi_it->separator_limit = -1;
792 bidi_it->new_paragraph = 0;
793
794 /* The following loop is run more than once only if NO_DEFAULT_P
795 is non-zero, and only if we are iterating on a buffer. */
796 do {
797 bytepos = pstartbyte;
798 if (!string_p)
799 pos = BYTE_TO_CHAR (bytepos);
800 ch = bidi_fetch_char (bytepos, pos, &disp_pos, &bidi_it->string,
801 bidi_it->frame_window_p, &ch_len, &nchars);
802 type = bidi_get_type (ch, NEUTRAL_DIR);
803
804 for (pos += nchars, bytepos += ch_len;
805 /* NOTE: UAX#9 says to search only for L, AL, or R types
806 of characters, and ignore RLE, RLO, LRE, and LRO.
807 However, I'm not sure it makes sense to omit those 4;
808 should try with and without that to see the effect. */
809 (bidi_get_category (type) != STRONG)
810 || (bidi_ignore_explicit_marks_for_paragraph_level
811 && (type == RLE || type == RLO
812 || type == LRE || type == LRO));
813 type = bidi_get_type (ch, NEUTRAL_DIR))
814 {
815 if (!string_p
816 && type == NEUTRAL_B
817 && bidi_at_paragraph_end (pos, bytepos) >= -1)
818 break;
819 if (pos >= end)
820 {
821 /* Pretend there's a paragraph separator at end of
822 buffer/string. */
823 type = NEUTRAL_B;
824 break;
825 }
826 /* Fetch next character and advance to get past it. */
827 ch = bidi_fetch_char (bytepos, pos, &disp_pos, &bidi_it->string,
828 bidi_it->frame_window_p, &ch_len, &nchars);
829 pos += nchars;
830 bytepos += ch_len;
831 }
832 if (type == STRONG_R || type == STRONG_AL) /* P3 */
833 bidi_it->paragraph_dir = R2L;
834 else if (type == STRONG_L)
835 bidi_it->paragraph_dir = L2R;
836 if (!string_p
837 && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
838 {
839 /* If this paragraph is at BEGV, default to L2R. */
840 if (pstartbyte == BEGV_BYTE)
841 bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
842 else
843 {
844 EMACS_INT prevpbyte = pstartbyte;
845 EMACS_INT p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
846
847 /* Find the beginning of the previous paragraph, if any. */
848 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
849 {
850 /* FXIME: What if p is covered by a display
851 string? See also a FIXME inside
852 bidi_find_paragraph_start. */
853 p--;
854 pbyte = CHAR_TO_BYTE (p);
855 prevpbyte = bidi_find_paragraph_start (p, pbyte);
856 }
857 pstartbyte = prevpbyte;
858 }
859 }
860 } while (!string_p
861 && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
862 }
863 else
864 abort ();
865
866 /* Contrary to UAX#9 clause P3, we only default the paragraph
867 direction to L2R if we have no previous usable paragraph
868 direction. This is allowed by the HL1 clause. */
869 if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
870 bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
871 if (bidi_it->paragraph_dir == R2L)
872 bidi_it->level_stack[0].level = 1;
873 else
874 bidi_it->level_stack[0].level = 0;
875
876 bidi_line_init (bidi_it);
877 }
878
879 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
880 end. */
881 static INLINE void
882 bidi_set_paragraph_end (struct bidi_it *bidi_it)
883 {
884 bidi_it->invalid_levels = 0;
885 bidi_it->invalid_rl_levels = -1;
886 bidi_it->stack_idx = 0;
887 bidi_it->resolved_level = bidi_it->level_stack[0].level;
888 }
889
890 /* Initialize the bidi iterator from buffer/string position CHARPOS. */
891 void
892 bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p,
893 struct bidi_it *bidi_it)
894 {
895 if (! bidi_initialized)
896 bidi_initialize ();
897 if (charpos >= 0)
898 bidi_it->charpos = charpos;
899 if (bytepos >= 0)
900 bidi_it->bytepos = bytepos;
901 bidi_it->frame_window_p = frame_window_p;
902 bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
903 bidi_it->first_elt = 1;
904 bidi_set_paragraph_end (bidi_it);
905 bidi_it->new_paragraph = 1;
906 bidi_it->separator_limit = -1;
907 bidi_it->type = NEUTRAL_B;
908 bidi_it->type_after_w1 = NEUTRAL_B;
909 bidi_it->orig_type = NEUTRAL_B;
910 bidi_it->prev_was_pdf = 0;
911 bidi_it->prev.type = bidi_it->prev.type_after_w1 =
912 bidi_it->prev.orig_type = UNKNOWN_BT;
913 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
914 bidi_it->last_strong.orig_type = UNKNOWN_BT;
915 bidi_it->next_for_neutral.charpos = -1;
916 bidi_it->next_for_neutral.type =
917 bidi_it->next_for_neutral.type_after_w1 =
918 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
919 bidi_it->prev_for_neutral.charpos = -1;
920 bidi_it->prev_for_neutral.type =
921 bidi_it->prev_for_neutral.type_after_w1 =
922 bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
923 bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */
924 bidi_it->disp_pos = -1; /* invalid/unknown */
925 /* We can only shrink the cache if we are at the bottom level of its
926 "stack". */
927 if (bidi_cache_start == 0)
928 bidi_cache_shrink ();
929 }
930
931 /* Push the current embedding level and override status; reset the
932 current level to LEVEL and the current override status to OVERRIDE. */
933 static INLINE void
934 bidi_push_embedding_level (struct bidi_it *bidi_it,
935 int level, bidi_dir_t override)
936 {
937 bidi_it->stack_idx++;
938 if (bidi_it->stack_idx >= BIDI_MAXLEVEL)
939 abort ();
940 bidi_it->level_stack[bidi_it->stack_idx].level = level;
941 bidi_it->level_stack[bidi_it->stack_idx].override = override;
942 }
943
944 /* Pop the embedding level and directional override status from the
945 stack, and return the new level. */
946 static INLINE int
947 bidi_pop_embedding_level (struct bidi_it *bidi_it)
948 {
949 /* UAX#9 says to ignore invalid PDFs. */
950 if (bidi_it->stack_idx > 0)
951 bidi_it->stack_idx--;
952 return bidi_it->level_stack[bidi_it->stack_idx].level;
953 }
954
955 /* Record in SAVED_INFO the information about the current character. */
956 static INLINE void
957 bidi_remember_char (struct bidi_saved_info *saved_info,
958 struct bidi_it *bidi_it)
959 {
960 saved_info->charpos = bidi_it->charpos;
961 saved_info->bytepos = bidi_it->bytepos;
962 saved_info->type = bidi_it->type;
963 bidi_check_type (bidi_it->type);
964 saved_info->type_after_w1 = bidi_it->type_after_w1;
965 bidi_check_type (bidi_it->type_after_w1);
966 saved_info->orig_type = bidi_it->orig_type;
967 bidi_check_type (bidi_it->orig_type);
968 }
969
970 /* Resolve the type of a neutral character according to the type of
971 surrounding strong text and the current embedding level. */
972 static INLINE bidi_type_t
973 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
974 {
975 /* N1: European and Arabic numbers are treated as though they were R. */
976 if (next_type == WEAK_EN || next_type == WEAK_AN)
977 next_type = STRONG_R;
978 if (prev_type == WEAK_EN || prev_type == WEAK_AN)
979 prev_type = STRONG_R;
980
981 if (next_type == prev_type) /* N1 */
982 return next_type;
983 else if ((lev & 1) == 0) /* N2 */
984 return STRONG_L;
985 else
986 return STRONG_R;
987 }
988
989 static INLINE int
990 bidi_explicit_dir_char (int ch)
991 {
992 bidi_type_t ch_type;
993
994 if (!bidi_initialized)
995 abort ();
996 ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
997 return (ch_type == LRE || ch_type == LRO
998 || ch_type == RLE || ch_type == RLO
999 || ch_type == PDF);
1000 }
1001
1002 /* A helper function for bidi_resolve_explicit. It advances to the
1003 next character in logical order and determines the new embedding
1004 level and directional override, but does not take into account
1005 empty embeddings. */
1006 static int
1007 bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1008 {
1009 int curchar;
1010 bidi_type_t type;
1011 int current_level;
1012 int new_level;
1013 bidi_dir_t override;
1014 int string_p = bidi_it->string.s != NULL;
1015
1016 /* If reseat()'ed, don't advance, so as to start iteration from the
1017 position where we were reseated. bidi_it->bytepos can be less
1018 than BEGV_BYTE after reseat to BEGV. */
1019 if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE)
1020 || bidi_it->first_elt)
1021 {
1022 bidi_it->first_elt = 0;
1023 if (string_p)
1024 {
1025 if (bidi_it->charpos < 0)
1026 bidi_it->charpos = 0;
1027 bidi_it->bytepos = bidi_count_bytes (bidi_it->string.s, 0, 0,
1028 bidi_it->charpos);
1029 }
1030 else
1031 {
1032 if (bidi_it->charpos < BEGV)
1033 bidi_it->charpos = BEGV;
1034 bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
1035 }
1036 }
1037 /* Don't move at end of buffer/string. */
1038 else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
1039 {
1040 /* Advance to the next character, skipping characters covered by
1041 display strings (nchars > 1). */
1042 if (bidi_it->nchars <= 0)
1043 abort ();
1044 bidi_it->charpos += bidi_it->nchars;
1045 if (bidi_it->ch_len == 0)
1046 abort ();
1047 bidi_it->bytepos += bidi_it->ch_len;
1048 }
1049
1050 current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
1051 override = bidi_it->level_stack[bidi_it->stack_idx].override;
1052 new_level = current_level;
1053
1054 if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
1055 {
1056 curchar = BIDI_EOB;
1057 bidi_it->ch_len = 1;
1058 bidi_it->nchars = 1;
1059 bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
1060 }
1061 else
1062 {
1063 /* Fetch the character at BYTEPOS. If it is covered by a
1064 display string, treat the entire run of covered characters as
1065 a single character u+FFFC. */
1066 curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
1067 &bidi_it->disp_pos, &bidi_it->string,
1068 bidi_it->frame_window_p,
1069 &bidi_it->ch_len, &bidi_it->nchars);
1070 }
1071 bidi_it->ch = curchar;
1072
1073 /* Don't apply directional override here, as all the types we handle
1074 below will not be affected by the override anyway, and we need
1075 the original type unaltered. The override will be applied in
1076 bidi_resolve_weak. */
1077 type = bidi_get_type (curchar, NEUTRAL_DIR);
1078 bidi_it->orig_type = type;
1079 bidi_check_type (bidi_it->orig_type);
1080
1081 if (type != PDF)
1082 bidi_it->prev_was_pdf = 0;
1083
1084 bidi_it->type_after_w1 = UNKNOWN_BT;
1085
1086 switch (type)
1087 {
1088 case RLE: /* X2 */
1089 case RLO: /* X4 */
1090 bidi_it->type_after_w1 = type;
1091 bidi_check_type (bidi_it->type_after_w1);
1092 type = WEAK_BN; /* X9/Retaining */
1093 if (bidi_it->ignore_bn_limit <= -1)
1094 {
1095 if (current_level <= BIDI_MAXLEVEL - 4)
1096 {
1097 /* Compute the least odd embedding level greater than
1098 the current level. */
1099 new_level = ((current_level + 1) & ~1) + 1;
1100 if (bidi_it->type_after_w1 == RLE)
1101 override = NEUTRAL_DIR;
1102 else
1103 override = R2L;
1104 if (current_level == BIDI_MAXLEVEL - 4)
1105 bidi_it->invalid_rl_levels = 0;
1106 bidi_push_embedding_level (bidi_it, new_level, override);
1107 }
1108 else
1109 {
1110 bidi_it->invalid_levels++;
1111 /* See the commentary about invalid_rl_levels below. */
1112 if (bidi_it->invalid_rl_levels < 0)
1113 bidi_it->invalid_rl_levels = 0;
1114 bidi_it->invalid_rl_levels++;
1115 }
1116 }
1117 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1118 || bidi_it->next_en_pos > bidi_it->charpos)
1119 type = WEAK_EN;
1120 break;
1121 case LRE: /* X3 */
1122 case LRO: /* X5 */
1123 bidi_it->type_after_w1 = type;
1124 bidi_check_type (bidi_it->type_after_w1);
1125 type = WEAK_BN; /* X9/Retaining */
1126 if (bidi_it->ignore_bn_limit <= -1)
1127 {
1128 if (current_level <= BIDI_MAXLEVEL - 5)
1129 {
1130 /* Compute the least even embedding level greater than
1131 the current level. */
1132 new_level = ((current_level + 2) & ~1);
1133 if (bidi_it->type_after_w1 == LRE)
1134 override = NEUTRAL_DIR;
1135 else
1136 override = L2R;
1137 bidi_push_embedding_level (bidi_it, new_level, override);
1138 }
1139 else
1140 {
1141 bidi_it->invalid_levels++;
1142 /* invalid_rl_levels counts invalid levels encountered
1143 while the embedding level was already too high for
1144 LRE/LRO, but not for RLE/RLO. That is because
1145 there may be exactly one PDF which we should not
1146 ignore even though invalid_levels is non-zero.
1147 invalid_rl_levels helps to know what PDF is
1148 that. */
1149 if (bidi_it->invalid_rl_levels >= 0)
1150 bidi_it->invalid_rl_levels++;
1151 }
1152 }
1153 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1154 || bidi_it->next_en_pos > bidi_it->charpos)
1155 type = WEAK_EN;
1156 break;
1157 case PDF: /* X7 */
1158 bidi_it->type_after_w1 = type;
1159 bidi_check_type (bidi_it->type_after_w1);
1160 type = WEAK_BN; /* X9/Retaining */
1161 if (bidi_it->ignore_bn_limit <= -1)
1162 {
1163 if (!bidi_it->invalid_rl_levels)
1164 {
1165 new_level = bidi_pop_embedding_level (bidi_it);
1166 bidi_it->invalid_rl_levels = -1;
1167 if (bidi_it->invalid_levels)
1168 bidi_it->invalid_levels--;
1169 /* else nothing: UAX#9 says to ignore invalid PDFs */
1170 }
1171 if (!bidi_it->invalid_levels)
1172 new_level = bidi_pop_embedding_level (bidi_it);
1173 else
1174 {
1175 bidi_it->invalid_levels--;
1176 bidi_it->invalid_rl_levels--;
1177 }
1178 }
1179 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1180 || bidi_it->next_en_pos > bidi_it->charpos)
1181 type = WEAK_EN;
1182 break;
1183 default:
1184 /* Nothing. */
1185 break;
1186 }
1187
1188 bidi_it->type = type;
1189 bidi_check_type (bidi_it->type);
1190
1191 return new_level;
1192 }
1193
1194 /* Given an iterator state in BIDI_IT, advance one character position
1195 in the buffer/string to the next character (in the logical order),
1196 resolve any explicit embeddings and directional overrides, and
1197 return the embedding level of the character after resolving
1198 explicit directives and ignoring empty embeddings. */
1199 static int
1200 bidi_resolve_explicit (struct bidi_it *bidi_it)
1201 {
1202 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1203 int new_level = bidi_resolve_explicit_1 (bidi_it);
1204 EMACS_INT eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
1205
1206 if (prev_level < new_level
1207 && bidi_it->type == WEAK_BN
1208 && bidi_it->ignore_bn_limit == -1 /* only if not already known */
1209 && bidi_it->charpos < eob /* not already at EOB */
1210 && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1211 + bidi_it->ch_len,
1212 bidi_it->string.s)))
1213 {
1214 /* Avoid pushing and popping embedding levels if the level run
1215 is empty, as this breaks level runs where it shouldn't.
1216 UAX#9 removes all the explicit embedding and override codes,
1217 so empty embeddings disappear without a trace. We need to
1218 behave as if we did the same. */
1219 struct bidi_it saved_it;
1220 int level = prev_level;
1221
1222 bidi_copy_it (&saved_it, bidi_it);
1223
1224 while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1225 + bidi_it->ch_len,
1226 bidi_it->string.s)))
1227 {
1228 /* This advances to the next character, skipping any
1229 characters covered by display strings. */
1230 level = bidi_resolve_explicit_1 (bidi_it);
1231 }
1232
1233 if (bidi_it->nchars <= 0)
1234 abort ();
1235 if (level == prev_level) /* empty embedding */
1236 saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
1237 else /* this embedding is non-empty */
1238 saved_it.ignore_bn_limit = -2;
1239
1240 bidi_copy_it (bidi_it, &saved_it);
1241 if (bidi_it->ignore_bn_limit > -1)
1242 {
1243 /* We pushed a level, but we shouldn't have. Undo that. */
1244 if (!bidi_it->invalid_rl_levels)
1245 {
1246 new_level = bidi_pop_embedding_level (bidi_it);
1247 bidi_it->invalid_rl_levels = -1;
1248 if (bidi_it->invalid_levels)
1249 bidi_it->invalid_levels--;
1250 }
1251 if (!bidi_it->invalid_levels)
1252 new_level = bidi_pop_embedding_level (bidi_it);
1253 else
1254 {
1255 bidi_it->invalid_levels--;
1256 bidi_it->invalid_rl_levels--;
1257 }
1258 }
1259 }
1260
1261 if (bidi_it->type == NEUTRAL_B) /* X8 */
1262 {
1263 bidi_set_paragraph_end (bidi_it);
1264 /* This is needed by bidi_resolve_weak below, and in L1. */
1265 bidi_it->type_after_w1 = bidi_it->type;
1266 bidi_check_type (bidi_it->type_after_w1);
1267 }
1268
1269 return new_level;
1270 }
1271
1272 /* Advance in the buffer/string, resolve weak types and return the
1273 type of the next character after weak type resolution. */
1274 static bidi_type_t
1275 bidi_resolve_weak (struct bidi_it *bidi_it)
1276 {
1277 bidi_type_t type;
1278 bidi_dir_t override;
1279 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1280 int new_level = bidi_resolve_explicit (bidi_it);
1281 int next_char;
1282 bidi_type_t type_of_next;
1283 struct bidi_it saved_it;
1284 EMACS_INT eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
1285
1286 type = bidi_it->type;
1287 override = bidi_it->level_stack[bidi_it->stack_idx].override;
1288
1289 if (type == UNKNOWN_BT
1290 || type == LRE
1291 || type == LRO
1292 || type == RLE
1293 || type == RLO
1294 || type == PDF)
1295 abort ();
1296
1297 if (new_level != prev_level
1298 || bidi_it->type == NEUTRAL_B)
1299 {
1300 /* We've got a new embedding level run, compute the directional
1301 type of sor and initialize per-run variables (UAX#9, clause
1302 X10). */
1303 bidi_set_sor_type (bidi_it, prev_level, new_level);
1304 }
1305 else if (type == NEUTRAL_S || type == NEUTRAL_WS
1306 || type == WEAK_BN || type == STRONG_AL)
1307 bidi_it->type_after_w1 = type; /* needed in L1 */
1308 bidi_check_type (bidi_it->type_after_w1);
1309
1310 /* Level and directional override status are already recorded in
1311 bidi_it, and do not need any change; see X6. */
1312 if (override == R2L) /* X6 */
1313 type = STRONG_R;
1314 else if (override == L2R)
1315 type = STRONG_L;
1316 else
1317 {
1318 if (type == WEAK_NSM) /* W1 */
1319 {
1320 /* Note that we don't need to consider the case where the
1321 prev character has its type overridden by an RLO or LRO,
1322 because then either the type of this NSM would have been
1323 also overridden, or the previous character is outside the
1324 current level run, and thus not relevant to this NSM.
1325 This is why NSM gets the type_after_w1 of the previous
1326 character. */
1327 if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
1328 /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1329 && bidi_it->prev.type_after_w1 != NEUTRAL_B)
1330 type = bidi_it->prev.type_after_w1;
1331 else if (bidi_it->sor == R2L)
1332 type = STRONG_R;
1333 else if (bidi_it->sor == L2R)
1334 type = STRONG_L;
1335 else /* shouldn't happen! */
1336 abort ();
1337 }
1338 if (type == WEAK_EN /* W2 */
1339 && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1340 type = WEAK_AN;
1341 else if (type == STRONG_AL) /* W3 */
1342 type = STRONG_R;
1343 else if ((type == WEAK_ES /* W4 */
1344 && bidi_it->prev.type_after_w1 == WEAK_EN
1345 && bidi_it->prev.orig_type == WEAK_EN)
1346 || (type == WEAK_CS
1347 && ((bidi_it->prev.type_after_w1 == WEAK_EN
1348 && bidi_it->prev.orig_type == WEAK_EN)
1349 || bidi_it->prev.type_after_w1 == WEAK_AN)))
1350 {
1351 next_char =
1352 bidi_it->charpos + bidi_it->nchars >= eob
1353 ? BIDI_EOB
1354 : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len,
1355 bidi_it->string.s);
1356 type_of_next = bidi_get_type (next_char, override);
1357
1358 if (type_of_next == WEAK_BN
1359 || bidi_explicit_dir_char (next_char))
1360 {
1361 bidi_copy_it (&saved_it, bidi_it);
1362 while (bidi_resolve_explicit (bidi_it) == new_level
1363 && bidi_it->type == WEAK_BN)
1364 ;
1365 type_of_next = bidi_it->type;
1366 bidi_copy_it (bidi_it, &saved_it);
1367 }
1368
1369 /* If the next character is EN, but the last strong-type
1370 character is AL, that next EN will be changed to AN when
1371 we process it in W2 above. So in that case, this ES
1372 should not be changed into EN. */
1373 if (type == WEAK_ES
1374 && type_of_next == WEAK_EN
1375 && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1376 type = WEAK_EN;
1377 else if (type == WEAK_CS)
1378 {
1379 if (bidi_it->prev.type_after_w1 == WEAK_AN
1380 && (type_of_next == WEAK_AN
1381 /* If the next character is EN, but the last
1382 strong-type character is AL, EN will be later
1383 changed to AN when we process it in W2 above.
1384 So in that case, this ES should not be
1385 changed into EN. */
1386 || (type_of_next == WEAK_EN
1387 && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1388 type = WEAK_AN;
1389 else if (bidi_it->prev.type_after_w1 == WEAK_EN
1390 && type_of_next == WEAK_EN
1391 && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1392 type = WEAK_EN;
1393 }
1394 }
1395 else if (type == WEAK_ET /* W5: ET with EN before or after it */
1396 || type == WEAK_BN) /* W5/Retaining */
1397 {
1398 if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */
1399 || bidi_it->next_en_pos > bidi_it->charpos)
1400 type = WEAK_EN;
1401 else /* W5: ET/BN with EN after it. */
1402 {
1403 EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars;
1404
1405 if (bidi_it->nchars <= 0)
1406 abort ();
1407 next_char =
1408 bidi_it->charpos + bidi_it->nchars >= eob
1409 ? BIDI_EOB
1410 : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len,
1411 bidi_it->string.s);
1412 type_of_next = bidi_get_type (next_char, override);
1413
1414 if (type_of_next == WEAK_ET
1415 || type_of_next == WEAK_BN
1416 || bidi_explicit_dir_char (next_char))
1417 {
1418 bidi_copy_it (&saved_it, bidi_it);
1419 while (bidi_resolve_explicit (bidi_it) == new_level
1420 && (bidi_it->type == WEAK_BN
1421 || bidi_it->type == WEAK_ET))
1422 ;
1423 type_of_next = bidi_it->type;
1424 en_pos = bidi_it->charpos;
1425 bidi_copy_it (bidi_it, &saved_it);
1426 }
1427 if (type_of_next == WEAK_EN)
1428 {
1429 /* If the last strong character is AL, the EN we've
1430 found will become AN when we get to it (W2). */
1431 if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
1432 {
1433 type = WEAK_EN;
1434 /* Remember this EN position, to speed up processing
1435 of the next ETs. */
1436 bidi_it->next_en_pos = en_pos;
1437 }
1438 else if (type == WEAK_BN)
1439 type = NEUTRAL_ON; /* W6/Retaining */
1440 }
1441 }
1442 }
1443 }
1444
1445 if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
1446 || (type == WEAK_BN
1447 && (bidi_it->prev.type_after_w1 == WEAK_CS /* W6/Retaining */
1448 || bidi_it->prev.type_after_w1 == WEAK_ES
1449 || bidi_it->prev.type_after_w1 == WEAK_ET)))
1450 type = NEUTRAL_ON;
1451
1452 /* Store the type we've got so far, before we clobber it with strong
1453 types in W7 and while resolving neutral types. But leave alone
1454 the original types that were recorded above, because we will need
1455 them for the L1 clause. */
1456 if (bidi_it->type_after_w1 == UNKNOWN_BT)
1457 bidi_it->type_after_w1 = type;
1458 bidi_check_type (bidi_it->type_after_w1);
1459
1460 if (type == WEAK_EN) /* W7 */
1461 {
1462 if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
1463 || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1464 type = STRONG_L;
1465 }
1466
1467 bidi_it->type = type;
1468 bidi_check_type (bidi_it->type);
1469 return type;
1470 }
1471
1472 static bidi_type_t
1473 bidi_resolve_neutral (struct bidi_it *bidi_it)
1474 {
1475 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1476 bidi_type_t type = bidi_resolve_weak (bidi_it);
1477 int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1478
1479 if (!(type == STRONG_R
1480 || type == STRONG_L
1481 || type == WEAK_BN
1482 || type == WEAK_EN
1483 || type == WEAK_AN
1484 || type == NEUTRAL_B
1485 || type == NEUTRAL_S
1486 || type == NEUTRAL_WS
1487 || type == NEUTRAL_ON))
1488 abort ();
1489
1490 if (bidi_get_category (type) == NEUTRAL
1491 || (type == WEAK_BN && prev_level == current_level))
1492 {
1493 if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1494 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1495 bidi_it->next_for_neutral.type,
1496 current_level);
1497 else
1498 {
1499 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in
1500 the assumption of batch-style processing; see clauses W4,
1501 W5, and especially N1, which require to look far forward
1502 (as well as back) in the buffer/string. May the fleas of
1503 a thousand camels infest the armpits of those who design
1504 supposedly general-purpose algorithms by looking at their
1505 own implementations, and fail to consider other possible
1506 implementations! */
1507 struct bidi_it saved_it;
1508 bidi_type_t next_type;
1509
1510 if (bidi_it->scan_dir == -1)
1511 abort ();
1512
1513 bidi_copy_it (&saved_it, bidi_it);
1514 /* Scan the text forward until we find the first non-neutral
1515 character, and then use that to resolve the neutral we
1516 are dealing with now. We also cache the scanned iterator
1517 states, to salvage some of the effort later. */
1518 bidi_cache_iterator_state (bidi_it, 0);
1519 do {
1520 /* Record the info about the previous character, so that
1521 it will be cached below with this state. */
1522 if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1523 && bidi_it->type != WEAK_BN)
1524 bidi_remember_char (&bidi_it->prev, bidi_it);
1525 type = bidi_resolve_weak (bidi_it);
1526 /* Paragraph separators have their levels fully resolved
1527 at this point, so cache them as resolved. */
1528 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1529 /* FIXME: implement L1 here, by testing for a newline and
1530 resetting the level for any sequence of whitespace
1531 characters adjacent to it. */
1532 } while (!(type == NEUTRAL_B
1533 || (type != WEAK_BN
1534 && bidi_get_category (type) != NEUTRAL)
1535 /* This is all per level run, so stop when we
1536 reach the end of this level run. */
1537 || bidi_it->level_stack[bidi_it->stack_idx].level !=
1538 current_level));
1539
1540 bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1541
1542 switch (type)
1543 {
1544 case STRONG_L:
1545 case STRONG_R:
1546 case STRONG_AL:
1547 next_type = type;
1548 break;
1549 case WEAK_EN:
1550 case WEAK_AN:
1551 /* N1: ``European and Arabic numbers are treated as
1552 though they were R.'' */
1553 next_type = STRONG_R;
1554 saved_it.next_for_neutral.type = STRONG_R;
1555 break;
1556 case WEAK_BN:
1557 if (!bidi_explicit_dir_char (bidi_it->ch))
1558 abort (); /* can't happen: BNs are skipped */
1559 /* FALLTHROUGH */
1560 case NEUTRAL_B:
1561 /* Marched all the way to the end of this level run.
1562 We need to use the eor type, whose information is
1563 stored by bidi_set_sor_type in the prev_for_neutral
1564 member. */
1565 if (saved_it.type != WEAK_BN
1566 || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
1567 {
1568 next_type = bidi_it->prev_for_neutral.type;
1569 saved_it.next_for_neutral.type = next_type;
1570 bidi_check_type (next_type);
1571 }
1572 else
1573 {
1574 /* This is a BN which does not adjoin neutrals.
1575 Leave its type alone. */
1576 bidi_copy_it (bidi_it, &saved_it);
1577 return bidi_it->type;
1578 }
1579 break;
1580 default:
1581 abort ();
1582 }
1583 type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1584 next_type, current_level);
1585 saved_it.type = type;
1586 bidi_check_type (type);
1587 bidi_copy_it (bidi_it, &saved_it);
1588 }
1589 }
1590 return type;
1591 }
1592
1593 /* Given an iterator state in BIDI_IT, advance one character position
1594 in the buffer/string to the next character (in the logical order),
1595 resolve the bidi type of that next character, and return that
1596 type. */
1597 static bidi_type_t
1598 bidi_type_of_next_char (struct bidi_it *bidi_it)
1599 {
1600 bidi_type_t type;
1601
1602 /* This should always be called during a forward scan. */
1603 if (bidi_it->scan_dir != 1)
1604 abort ();
1605
1606 /* Reset the limit until which to ignore BNs if we step out of the
1607 area where we found only empty levels. */
1608 if ((bidi_it->ignore_bn_limit > -1
1609 && bidi_it->ignore_bn_limit <= bidi_it->charpos)
1610 || (bidi_it->ignore_bn_limit == -2
1611 && !bidi_explicit_dir_char (bidi_it->ch)))
1612 bidi_it->ignore_bn_limit = -1;
1613
1614 type = bidi_resolve_neutral (bidi_it);
1615
1616 return type;
1617 }
1618
1619 /* Given an iterator state BIDI_IT, advance one character position in
1620 the buffer/string to the next character (in the current scan
1621 direction), resolve the embedding and implicit levels of that next
1622 character, and return the resulting level. */
1623 static int
1624 bidi_level_of_next_char (struct bidi_it *bidi_it)
1625 {
1626 bidi_type_t type;
1627 int level, prev_level = -1;
1628 struct bidi_saved_info next_for_neutral;
1629 EMACS_INT next_char_pos = -1;
1630
1631 if (bidi_it->scan_dir == 1)
1632 {
1633 /* There's no sense in trying to advance if we hit end of text. */
1634 if (bidi_it->charpos >= (bidi_it->string.s ? bidi_it->string.schars : ZV))
1635 return bidi_it->resolved_level;
1636
1637 /* Record the info about the previous character. */
1638 if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1639 && bidi_it->type != WEAK_BN)
1640 bidi_remember_char (&bidi_it->prev, bidi_it);
1641 if (bidi_it->type_after_w1 == STRONG_R
1642 || bidi_it->type_after_w1 == STRONG_L
1643 || bidi_it->type_after_w1 == STRONG_AL)
1644 bidi_remember_char (&bidi_it->last_strong, bidi_it);
1645 /* FIXME: it sounds like we don't need both prev and
1646 prev_for_neutral members, but I'm leaving them both for now. */
1647 if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
1648 || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
1649 bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
1650
1651 /* If we overstepped the characters used for resolving neutrals
1652 and whitespace, invalidate their info in the iterator. */
1653 if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
1654 bidi_it->next_for_neutral.type = UNKNOWN_BT;
1655 if (bidi_it->next_en_pos >= 0
1656 && bidi_it->charpos >= bidi_it->next_en_pos)
1657 bidi_it->next_en_pos = -1;
1658 if (bidi_it->next_for_ws.type != UNKNOWN_BT
1659 && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
1660 bidi_it->next_for_ws.type = UNKNOWN_BT;
1661
1662 /* This must be taken before we fill the iterator with the info
1663 about the next char. If we scan backwards, the iterator
1664 state must be already cached, so there's no need to know the
1665 embedding level of the previous character, since we will be
1666 returning to our caller shortly. */
1667 prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1668 }
1669 next_for_neutral = bidi_it->next_for_neutral;
1670
1671 /* Perhaps the character we want is already cached. If it is, the
1672 call to bidi_cache_find below will return a type other than
1673 UNKNOWN_BT. */
1674 if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
1675 {
1676 if (bidi_it->scan_dir > 0)
1677 {
1678 if (bidi_it->nchars <= 0)
1679 abort ();
1680 next_char_pos = bidi_it->charpos + bidi_it->nchars;
1681 }
1682 else if (bidi_it->charpos > (bidi_it->string.s ? 0 : 1))
1683 next_char_pos = bidi_it->charpos - 1;
1684 if (next_char_pos >= 0)
1685 type = bidi_cache_find (next_char_pos, -1, bidi_it);
1686 else
1687 type = UNKNOWN_BT;
1688 }
1689 else
1690 type = UNKNOWN_BT;
1691 if (type != UNKNOWN_BT)
1692 {
1693 /* Don't lose the information for resolving neutrals! The
1694 cached states could have been cached before their
1695 next_for_neutral member was computed. If we are on our way
1696 forward, we can simply take the info from the previous
1697 state. */
1698 if (bidi_it->scan_dir == 1
1699 && bidi_it->next_for_neutral.type == UNKNOWN_BT)
1700 bidi_it->next_for_neutral = next_for_neutral;
1701
1702 /* If resolved_level is -1, it means this state was cached
1703 before it was completely resolved, so we cannot return
1704 it. */
1705 if (bidi_it->resolved_level != -1)
1706 return bidi_it->resolved_level;
1707 }
1708 if (bidi_it->scan_dir == -1)
1709 /* If we are going backwards, the iterator state is already cached
1710 from previous scans, and should be fully resolved. */
1711 abort ();
1712
1713 if (type == UNKNOWN_BT)
1714 type = bidi_type_of_next_char (bidi_it);
1715
1716 if (type == NEUTRAL_B)
1717 return bidi_it->resolved_level;
1718
1719 level = bidi_it->level_stack[bidi_it->stack_idx].level;
1720 if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
1721 || (type == WEAK_BN && prev_level == level))
1722 {
1723 if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
1724 abort ();
1725
1726 /* If the cached state shows a neutral character, it was not
1727 resolved by bidi_resolve_neutral, so do it now. */
1728 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1729 bidi_it->next_for_neutral.type,
1730 level);
1731 }
1732
1733 if (!(type == STRONG_R
1734 || type == STRONG_L
1735 || type == WEAK_BN
1736 || type == WEAK_EN
1737 || type == WEAK_AN))
1738 abort ();
1739 bidi_it->type = type;
1740 bidi_check_type (bidi_it->type);
1741
1742 /* For L1 below, we need to know, for each WS character, whether
1743 it belongs to a sequence of WS characters preceding a newline
1744 or a TAB or a paragraph separator. */
1745 if (bidi_it->orig_type == NEUTRAL_WS
1746 && bidi_it->next_for_ws.type == UNKNOWN_BT)
1747 {
1748 int ch;
1749 EMACS_INT clen = bidi_it->ch_len;
1750 EMACS_INT bpos = bidi_it->bytepos;
1751 EMACS_INT cpos = bidi_it->charpos;
1752 EMACS_INT disp_pos = bidi_it->disp_pos;
1753 EMACS_INT nc = bidi_it->nchars;
1754 struct bidi_string_data bs = bidi_it->string;
1755 bidi_type_t chtype;
1756 int fwp = bidi_it->frame_window_p;
1757
1758 if (bidi_it->nchars <= 0)
1759 abort ();
1760 do {
1761 ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &bs, fwp,
1762 &clen, &nc);
1763 if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
1764 chtype = NEUTRAL_B;
1765 else
1766 chtype = bidi_get_type (ch, NEUTRAL_DIR);
1767 } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
1768 || bidi_explicit_dir_char (ch)); /* L1/Retaining */
1769 bidi_it->next_for_ws.type = chtype;
1770 bidi_check_type (bidi_it->next_for_ws.type);
1771 bidi_it->next_for_ws.charpos = cpos;
1772 bidi_it->next_for_ws.bytepos = bpos;
1773 }
1774
1775 /* Resolve implicit levels, with a twist: PDFs get the embedding
1776 level of the enbedding they terminate. See below for the
1777 reason. */
1778 if (bidi_it->orig_type == PDF
1779 /* Don't do this if this formatting code didn't change the
1780 embedding level due to invalid or empty embeddings. */
1781 && prev_level != level)
1782 {
1783 /* Don't look in UAX#9 for the reason for this: it's our own
1784 private quirk. The reason is that we want the formatting
1785 codes to be delivered so that they bracket the text of their
1786 embedding. For example, given the text
1787
1788 {RLO}teST{PDF}
1789
1790 we want it to be displayed as
1791
1792 {PDF}STet{RLO}
1793
1794 not as
1795
1796 STet{RLO}{PDF}
1797
1798 which will result because we bump up the embedding level as
1799 soon as we see the RLO and pop it as soon as we see the PDF,
1800 so RLO itself has the same embedding level as "teST", and
1801 thus would be normally delivered last, just before the PDF.
1802 The switch below fiddles with the level of PDF so that this
1803 ugly side effect does not happen.
1804
1805 (This is, of course, only important if the formatting codes
1806 are actually displayed, but Emacs does need to display them
1807 if the user wants to.) */
1808 level = prev_level;
1809 }
1810 else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
1811 || bidi_it->orig_type == NEUTRAL_S
1812 || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
1813 /* || bidi_it->ch == LINESEP_CHAR */
1814 || (bidi_it->orig_type == NEUTRAL_WS
1815 && (bidi_it->next_for_ws.type == NEUTRAL_B
1816 || bidi_it->next_for_ws.type == NEUTRAL_S)))
1817 level = bidi_it->level_stack[0].level;
1818 else if ((level & 1) == 0) /* I1 */
1819 {
1820 if (type == STRONG_R)
1821 level++;
1822 else if (type == WEAK_EN || type == WEAK_AN)
1823 level += 2;
1824 }
1825 else /* I2 */
1826 {
1827 if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
1828 level++;
1829 }
1830
1831 bidi_it->resolved_level = level;
1832 return level;
1833 }
1834
1835 /* Move to the other edge of a level given by LEVEL. If END_FLAG is
1836 non-zero, we are at the end of a level, and we need to prepare to
1837 resume the scan of the lower level.
1838
1839 If this level's other edge is cached, we simply jump to it, filling
1840 the iterator structure with the iterator state on the other edge.
1841 Otherwise, we walk the buffer or string until we come back to the
1842 same level as LEVEL.
1843
1844 Note: we are not talking here about a ``level run'' in the UAX#9
1845 sense of the term, but rather about a ``level'' which includes
1846 all the levels higher than it. In other words, given the levels
1847 like this:
1848
1849 11111112222222333333334443343222222111111112223322111
1850 A B C
1851
1852 and assuming we are at point A scanning left to right, this
1853 function moves to point C, whereas the UAX#9 ``level 2 run'' ends
1854 at point B. */
1855 static void
1856 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
1857 {
1858 int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
1859 int idx;
1860
1861 /* Try the cache first. */
1862 if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
1863 >= bidi_cache_start)
1864 bidi_cache_fetch_state (idx, bidi_it);
1865 else
1866 {
1867 int new_level;
1868
1869 if (end_flag)
1870 abort (); /* if we are at end of level, its edges must be cached */
1871
1872 bidi_cache_iterator_state (bidi_it, 1);
1873 do {
1874 new_level = bidi_level_of_next_char (bidi_it);
1875 bidi_cache_iterator_state (bidi_it, 1);
1876 } while (new_level >= level);
1877 }
1878 }
1879
1880 void
1881 bidi_move_to_visually_next (struct bidi_it *bidi_it)
1882 {
1883 int old_level, new_level, next_level;
1884 struct bidi_it sentinel;
1885
1886 if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
1887 abort ();
1888
1889 if (bidi_it->scan_dir == 0)
1890 {
1891 bidi_it->scan_dir = 1; /* default to logical order */
1892 }
1893
1894 /* If we just passed a newline, initialize for the next line. */
1895 if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B)
1896 bidi_line_init (bidi_it);
1897
1898 /* Prepare the sentinel iterator state, and cache it. When we bump
1899 into it, scanning backwards, we'll know that the last non-base
1900 level is exhausted. */
1901 if (bidi_cache_idx == bidi_cache_start)
1902 {
1903 bidi_copy_it (&sentinel, bidi_it);
1904 if (bidi_it->first_elt)
1905 {
1906 sentinel.charpos--; /* cached charpos needs to be monotonic */
1907 sentinel.bytepos--;
1908 sentinel.ch = '\n'; /* doesn't matter, but why not? */
1909 sentinel.ch_len = 1;
1910 sentinel.nchars = 1;
1911 }
1912 bidi_cache_iterator_state (&sentinel, 1);
1913 }
1914
1915 old_level = bidi_it->resolved_level;
1916 new_level = bidi_level_of_next_char (bidi_it);
1917
1918 /* Reordering of resolved levels (clause L2) is implemented by
1919 jumping to the other edge of the level and flipping direction of
1920 scanning the text whenever we find a level change. */
1921 if (new_level != old_level)
1922 {
1923 int ascending = new_level > old_level;
1924 int level_to_search = ascending ? old_level + 1 : old_level;
1925 int incr = ascending ? 1 : -1;
1926 int expected_next_level = old_level + incr;
1927
1928 /* Jump (or walk) to the other edge of this level. */
1929 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
1930 /* Switch scan direction and peek at the next character in the
1931 new direction. */
1932 bidi_it->scan_dir = -bidi_it->scan_dir;
1933
1934 /* The following loop handles the case where the resolved level
1935 jumps by more than one. This is typical for numbers inside a
1936 run of text with left-to-right embedding direction, but can
1937 also happen in other situations. In those cases the decision
1938 where to continue after a level change, and in what direction,
1939 is tricky. For example, given a text like below:
1940
1941 abcdefgh
1942 11336622
1943
1944 (where the numbers below the text show the resolved levels),
1945 the result of reordering according to UAX#9 should be this:
1946
1947 efdcghba
1948
1949 This is implemented by the loop below which flips direction
1950 and jumps to the other edge of the level each time it finds
1951 the new level not to be the expected one. The expected level
1952 is always one more or one less than the previous one. */
1953 next_level = bidi_peek_at_next_level (bidi_it);
1954 while (next_level != expected_next_level)
1955 {
1956 expected_next_level += incr;
1957 level_to_search += incr;
1958 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
1959 bidi_it->scan_dir = -bidi_it->scan_dir;
1960 next_level = bidi_peek_at_next_level (bidi_it);
1961 }
1962
1963 /* Finally, deliver the next character in the new direction. */
1964 next_level = bidi_level_of_next_char (bidi_it);
1965 }
1966
1967 /* Take note when we have just processed the newline that precedes
1968 the end of the paragraph. The next time we are about to be
1969 called, set_iterator_to_next will automatically reinit the
1970 paragraph direction, if needed. We do this at the newline before
1971 the paragraph separator, because the next character might not be
1972 the first character of the next paragraph, due to the bidi
1973 reordering, whereas we _must_ know the paragraph base direction
1974 _before_ we process the paragraph's text, since the base
1975 direction affects the reordering. */
1976 if (bidi_it->scan_dir == 1 && bidi_it->orig_type == NEUTRAL_B)
1977 {
1978 /* The paragraph direction of the entire string, once
1979 determined, is in effect for the entire string. Setting the
1980 separator limit to the end of the string prevents
1981 bidi_paragraph_init from being called automatically on this
1982 string. */
1983 if (bidi_it->string.s)
1984 bidi_it->separator_limit = bidi_it->string.schars;
1985 else if (bidi_it->bytepos < ZV_BYTE)
1986 {
1987 EMACS_INT sep_len =
1988 bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
1989 bidi_it->bytepos + bidi_it->ch_len);
1990 if (bidi_it->nchars <= 0)
1991 abort ();
1992 if (sep_len >= 0)
1993 {
1994 bidi_it->new_paragraph = 1;
1995 /* Record the buffer position of the last character of the
1996 paragraph separator. */
1997 bidi_it->separator_limit =
1998 bidi_it->charpos + bidi_it->nchars + sep_len;
1999 }
2000 }
2001 }
2002
2003 if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start)
2004 {
2005 /* If we are at paragraph's base embedding level and beyond the
2006 last cached position, the cache's job is done and we can
2007 discard it. */
2008 if (bidi_it->resolved_level == bidi_it->level_stack[0].level
2009 && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
2010 + bidi_cache[bidi_cache_idx - 1].nchars - 1))
2011 bidi_cache_reset ();
2012 /* But as long as we are caching during forward scan, we must
2013 cache each state, or else the cache integrity will be
2014 compromised: it assumes cached states correspond to buffer
2015 positions 1:1. */
2016 else
2017 bidi_cache_iterator_state (bidi_it, 1);
2018 }
2019 }
2020
2021 /* This is meant to be called from within the debugger, whenever you
2022 wish to examine the cache contents. */
2023 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
2024 void
2025 bidi_dump_cached_states (void)
2026 {
2027 int i;
2028 int ndigits = 1;
2029
2030 if (bidi_cache_idx == 0)
2031 {
2032 fprintf (stderr, "The cache is empty.\n");
2033 return;
2034 }
2035 fprintf (stderr, "Total of %d state%s in cache:\n",
2036 bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
2037
2038 for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
2039 ndigits++;
2040 fputs ("ch ", stderr);
2041 for (i = 0; i < bidi_cache_idx; i++)
2042 fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
2043 fputs ("\n", stderr);
2044 fputs ("lvl ", stderr);
2045 for (i = 0; i < bidi_cache_idx; i++)
2046 fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
2047 fputs ("\n", stderr);
2048 fputs ("pos ", stderr);
2049 for (i = 0; i < bidi_cache_idx; i++)
2050 fprintf (stderr, "%*"pI"d", ndigits, bidi_cache[i].charpos);
2051 fputs ("\n", stderr);
2052 }