1 /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
2 Copyright (C) 2000-2001, 2004-2005, 2009-2011
3 Free Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
20 /* Written by Eli Zaretskii <eliz@gnu.org>.
22 A sequential implementation of the Unicode Bidirectional algorithm,
23 (UBA) as per UAX#9, a part of the Unicode Standard.
25 Unlike the reference and most other implementations, this one is
26 designed to be called once for every character in the buffer or
29 The main entry point is bidi_move_to_visually_next. Each time it
30 is called, it finds the next character in the visual order, and
31 returns its information in a special structure. The caller is then
32 expected to process this character for display or any other
33 purposes, and call bidi_move_to_visually_next for the next
34 character. See the comments in bidi_move_to_visually_next for more
35 details about its algorithm that finds the next visual-order
36 character by resolving their levels on the fly.
38 Two other entry points are bidi_paragraph_init and
39 bidi_mirror_char. The first determines the base direction of a
40 paragraph, while the second returns the mirrored version of its
43 A few auxiliary entry points are used to initialize the bidi
44 iterator for iterating an object (buffer or string), push and pop
45 the bidi iterator state, and save and restore the state of the bidi
48 If you want to understand the code, you will have to read it
49 together with the relevant portions of UAX#9. The comments include
50 references to UAX#9 rules, for that very reason.
52 A note about references to UAX#9 rules: if the reference says
53 something like "X9/Retaining", it means that you need to refer to
54 rule X9 and to its modifications decribed in the "Implementation
55 Notes" section of UAX#9, under "Retaining Format Codes". */
63 #include "character.h"
64 #include "dispextern.h"
66 static int bidi_initialized
= 0;
68 static Lisp_Object bidi_type_table
, bidi_mirror_table
;
70 #define LRM_CHAR 0x200E
71 #define RLM_CHAR 0x200F
74 /* Data type for describing the bidirectional character categories. */
82 extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE
;
83 int bidi_ignore_explicit_marks_for_paragraph_level
= 1;
85 static Lisp_Object paragraph_start_re
, paragraph_separate_re
;
86 static Lisp_Object Qparagraph_start
, Qparagraph_separate
;
89 /***********************************************************************
91 ***********************************************************************/
93 /* Return the bidi type of a character CH, subject to the current
94 directional OVERRIDE. */
95 static inline bidi_type_t
96 bidi_get_type (int ch
, bidi_dir_t override
)
98 bidi_type_t default_type
;
102 if (ch
< 0 || ch
> MAX_CHAR
)
105 default_type
= (bidi_type_t
) XINT (CHAR_TABLE_REF (bidi_type_table
, ch
));
107 if (override
== NEUTRAL_DIR
)
110 switch (default_type
)
112 /* Although UAX#9 does not tell, it doesn't make sense to
113 override NEUTRAL_B and LRM/RLM characters. */
128 if (override
== L2R
) /* X6 */
130 else if (override
== R2L
)
133 abort (); /* can't happen: handled above */
139 bidi_check_type (bidi_type_t type
)
141 if (type
< UNKNOWN_BT
|| type
> NEUTRAL_ON
)
145 /* Given a bidi TYPE of a character, return its category. */
146 static inline bidi_category_t
147 bidi_get_category (bidi_type_t type
)
161 case PDF
: /* ??? really?? */
180 /* Return the mirrored character of C, if it has one. If C has no
181 mirrored counterpart, return C.
182 Note: The conditions in UAX#9 clause L4 regarding the surrounding
183 context must be tested by the caller. */
185 bidi_mirror_char (int c
)
191 if (c
< 0 || c
> MAX_CHAR
)
194 val
= CHAR_TABLE_REF (bidi_mirror_table
, c
);
199 if (v
< 0 || v
> MAX_CHAR
)
208 /* Determine the start-of-run (sor) directional type given the two
209 embedding levels on either side of the run boundary. Also, update
210 the saved info about previously seen characters, since that info is
211 generally valid for a single level run. */
213 bidi_set_sor_type (struct bidi_it
*bidi_it
, int level_before
, int level_after
)
215 int higher_level
= level_before
> level_after
? level_before
: level_after
;
217 /* The prev_was_pdf gork is required for when we have several PDFs
218 in a row. In that case, we want to compute the sor type for the
219 next level run only once: when we see the first PDF. That's
220 because the sor type depends only on the higher of the two levels
221 that we find on the two sides of the level boundary (see UAX#9,
222 clause X10), and so we don't need to know the final embedding
223 level to which we descend after processing all the PDFs. */
224 if (!bidi_it
->prev_was_pdf
|| level_before
< level_after
)
225 /* FIXME: should the default sor direction be user selectable? */
226 bidi_it
->sor
= (higher_level
& 1) != 0 ? R2L
: L2R
;
227 if (level_before
> level_after
)
228 bidi_it
->prev_was_pdf
= 1;
230 bidi_it
->prev
.type
= UNKNOWN_BT
;
231 bidi_it
->last_strong
.type
= bidi_it
->last_strong
.type_after_w1
=
232 bidi_it
->last_strong
.orig_type
= UNKNOWN_BT
;
233 bidi_it
->prev_for_neutral
.type
= bidi_it
->sor
== R2L
? STRONG_R
: STRONG_L
;
234 bidi_it
->prev_for_neutral
.charpos
= bidi_it
->charpos
;
235 bidi_it
->prev_for_neutral
.bytepos
= bidi_it
->bytepos
;
236 bidi_it
->next_for_neutral
.type
= bidi_it
->next_for_neutral
.type_after_w1
=
237 bidi_it
->next_for_neutral
.orig_type
= UNKNOWN_BT
;
238 bidi_it
->ignore_bn_limit
= -1; /* meaning it's unknown */
241 /* Push the current embedding level and override status; reset the
242 current level to LEVEL and the current override status to OVERRIDE. */
244 bidi_push_embedding_level (struct bidi_it
*bidi_it
,
245 int level
, bidi_dir_t override
)
247 bidi_it
->stack_idx
++;
248 xassert (bidi_it
->stack_idx
< BIDI_MAXLEVEL
);
249 bidi_it
->level_stack
[bidi_it
->stack_idx
].level
= level
;
250 bidi_it
->level_stack
[bidi_it
->stack_idx
].override
= override
;
253 /* Pop the embedding level and directional override status from the
254 stack, and return the new level. */
256 bidi_pop_embedding_level (struct bidi_it
*bidi_it
)
258 /* UAX#9 says to ignore invalid PDFs. */
259 if (bidi_it
->stack_idx
> 0)
260 bidi_it
->stack_idx
--;
261 return bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
264 /* Record in SAVED_INFO the information about the current character. */
266 bidi_remember_char (struct bidi_saved_info
*saved_info
,
267 struct bidi_it
*bidi_it
)
269 saved_info
->charpos
= bidi_it
->charpos
;
270 saved_info
->bytepos
= bidi_it
->bytepos
;
271 saved_info
->type
= bidi_it
->type
;
272 bidi_check_type (bidi_it
->type
);
273 saved_info
->type_after_w1
= bidi_it
->type_after_w1
;
274 bidi_check_type (bidi_it
->type_after_w1
);
275 saved_info
->orig_type
= bidi_it
->orig_type
;
276 bidi_check_type (bidi_it
->orig_type
);
279 /* Copy the bidi iterator from FROM to TO. To save cycles, this only
280 copies the part of the level stack that is actually in use. */
282 bidi_copy_it (struct bidi_it
*to
, struct bidi_it
*from
)
286 /* Copy everything except the level stack and beyond. */
287 memcpy (to
, from
, offsetof (struct bidi_it
, level_stack
[0]));
289 /* Copy the active part of the level stack. */
290 to
->level_stack
[0] = from
->level_stack
[0]; /* level zero is always in use */
291 for (i
= 1; i
<= from
->stack_idx
; i
++)
292 to
->level_stack
[i
] = from
->level_stack
[i
];
296 /***********************************************************************
297 Caching the bidi iterator states
298 ***********************************************************************/
300 #define BIDI_CACHE_CHUNK 200
301 static struct bidi_it
*bidi_cache
;
302 static ptrdiff_t bidi_cache_size
= 0;
303 enum { elsz
= sizeof (struct bidi_it
) };
304 static ptrdiff_t bidi_cache_idx
; /* next unused cache slot */
305 static ptrdiff_t bidi_cache_last_idx
; /* slot of last cache hit */
306 static ptrdiff_t bidi_cache_start
= 0; /* start of cache for this
309 /* 5-slot stack for saving the start of the previous level of the
310 cache. xdisp.c maintains a 5-slot stack for its iterator state,
311 and we need the same size of our stack. */
312 static ptrdiff_t bidi_cache_start_stack
[IT_STACK_SIZE
];
313 static int bidi_cache_sp
;
315 /* Size of header used by bidi_shelve_cache. */
318 bidi_shelve_header_size
=
319 (sizeof (bidi_cache_idx
) + sizeof (bidi_cache_start_stack
)
320 + sizeof (bidi_cache_sp
) + sizeof (bidi_cache_start
)
321 + sizeof (bidi_cache_last_idx
))
324 /* Reset the cache state to the empty state. We only reset the part
325 of the cache relevant to iteration of the current object. Previous
326 objects, which are pushed on the display iterator's stack, are left
327 intact. This is called when the cached information is no more
328 useful for the current iteration, e.g. when we were reseated to a
329 new position on the same object. */
331 bidi_cache_reset (void)
333 bidi_cache_idx
= bidi_cache_start
;
334 bidi_cache_last_idx
= -1;
337 /* Shrink the cache to its minimal size. Called when we init the bidi
338 iterator for reordering a buffer or a string that does not come
339 from display properties, because that means all the previously
340 cached info is of no further use. */
342 bidi_cache_shrink (void)
344 if (bidi_cache_size
> BIDI_CACHE_CHUNK
)
346 bidi_cache_size
= BIDI_CACHE_CHUNK
;
348 (struct bidi_it
*) xrealloc (bidi_cache
, bidi_cache_size
* elsz
);
354 bidi_cache_fetch_state (ptrdiff_t idx
, struct bidi_it
*bidi_it
)
356 int current_scan_dir
= bidi_it
->scan_dir
;
358 if (idx
< bidi_cache_start
|| idx
>= bidi_cache_idx
)
361 bidi_copy_it (bidi_it
, &bidi_cache
[idx
]);
362 bidi_it
->scan_dir
= current_scan_dir
;
363 bidi_cache_last_idx
= idx
;
366 /* Find a cached state with a given CHARPOS and resolved embedding
367 level less or equal to LEVEL. if LEVEL is -1, disregard the
368 resolved levels in cached states. DIR, if non-zero, means search
369 in that direction from the last cache hit. */
370 static inline ptrdiff_t
371 bidi_cache_search (EMACS_INT charpos
, int level
, int dir
)
373 ptrdiff_t i
, i_start
;
375 if (bidi_cache_idx
> bidi_cache_start
)
377 if (bidi_cache_last_idx
== -1)
378 bidi_cache_last_idx
= bidi_cache_idx
- 1;
379 if (charpos
< bidi_cache
[bidi_cache_last_idx
].charpos
)
382 i_start
= bidi_cache_last_idx
- 1;
384 else if (charpos
> (bidi_cache
[bidi_cache_last_idx
].charpos
385 + bidi_cache
[bidi_cache_last_idx
].nchars
- 1))
388 i_start
= bidi_cache_last_idx
+ 1;
391 i_start
= bidi_cache_last_idx
;
395 i_start
= bidi_cache_idx
- 1;
400 /* Linear search for now; FIXME! */
401 for (i
= i_start
; i
>= bidi_cache_start
; i
--)
402 if (bidi_cache
[i
].charpos
<= charpos
403 && charpos
< bidi_cache
[i
].charpos
+ bidi_cache
[i
].nchars
404 && (level
== -1 || bidi_cache
[i
].resolved_level
<= level
))
409 for (i
= i_start
; i
< bidi_cache_idx
; i
++)
410 if (bidi_cache
[i
].charpos
<= charpos
411 && charpos
< bidi_cache
[i
].charpos
+ bidi_cache
[i
].nchars
412 && (level
== -1 || bidi_cache
[i
].resolved_level
<= level
))
420 /* Find a cached state where the resolved level changes to a value
421 that is lower than LEVEL, and return its cache slot index. DIR is
422 the direction to search, starting with the last used cache slot.
423 If DIR is zero, we search backwards from the last occupied cache
424 slot. BEFORE, if non-zero, means return the index of the slot that
425 is ``before'' the level change in the search direction. That is,
426 given the cached levels like this:
431 and assuming we are at the position cached at the slot marked with
432 C, searching backwards (DIR = -1) for LEVEL = 2 will return the
433 index of slot B or A, depending whether BEFORE is, respectively,
436 bidi_cache_find_level_change (int level
, int dir
, int before
)
440 ptrdiff_t i
= dir
? bidi_cache_last_idx
: bidi_cache_idx
- 1;
441 int incr
= before
? 1 : 0;
443 xassert (!dir
|| bidi_cache_last_idx
>= 0);
452 while (i
>= bidi_cache_start
+ incr
)
454 if (bidi_cache
[i
- incr
].resolved_level
>= 0
455 && bidi_cache
[i
- incr
].resolved_level
< level
)
462 while (i
< bidi_cache_idx
- incr
)
464 if (bidi_cache
[i
+ incr
].resolved_level
>= 0
465 && bidi_cache
[i
+ incr
].resolved_level
< level
)
476 bidi_cache_ensure_space (ptrdiff_t idx
)
478 /* Enlarge the cache as needed. */
479 if (idx
>= bidi_cache_size
)
483 /* The bidi cache cannot be larger than the largest Lisp string
485 ptrdiff_t string_or_buffer_bound
=
486 max (BUF_BYTES_MAX
, STRING_BYTES_BOUND
);
488 /* Also, it cannot be larger than what C can represent. */
490 (min (PTRDIFF_MAX
, SIZE_MAX
) - bidi_shelve_header_size
) / elsz
;
492 if (min (string_or_buffer_bound
, c_bound
) <= idx
)
493 memory_full (SIZE_MAX
);
494 new_size
= idx
- idx
% BIDI_CACHE_CHUNK
+ BIDI_CACHE_CHUNK
;
495 bidi_cache
= (struct bidi_it
*) xrealloc (bidi_cache
, new_size
* elsz
);
496 bidi_cache_size
= new_size
;
501 bidi_cache_iterator_state (struct bidi_it
*bidi_it
, int resolved
)
505 /* We should never cache on backward scans. */
506 if (bidi_it
->scan_dir
== -1)
508 idx
= bidi_cache_search (bidi_it
->charpos
, -1, 1);
512 idx
= bidi_cache_idx
;
513 bidi_cache_ensure_space (idx
);
514 /* Character positions should correspond to cache positions 1:1.
515 If we are outside the range of cached positions, the cache is
516 useless and must be reset. */
517 if (idx
> bidi_cache_start
&&
518 (bidi_it
->charpos
> (bidi_cache
[idx
- 1].charpos
519 + bidi_cache
[idx
- 1].nchars
)
520 || bidi_it
->charpos
< bidi_cache
[bidi_cache_start
].charpos
))
523 idx
= bidi_cache_start
;
525 if (bidi_it
->nchars
<= 0)
527 bidi_copy_it (&bidi_cache
[idx
], bidi_it
);
529 bidi_cache
[idx
].resolved_level
= -1;
533 /* Copy only the members which could have changed, to avoid
534 costly copying of the entire struct. */
535 bidi_cache
[idx
].type
= bidi_it
->type
;
536 bidi_check_type (bidi_it
->type
);
537 bidi_cache
[idx
].type_after_w1
= bidi_it
->type_after_w1
;
538 bidi_check_type (bidi_it
->type_after_w1
);
540 bidi_cache
[idx
].resolved_level
= bidi_it
->resolved_level
;
542 bidi_cache
[idx
].resolved_level
= -1;
543 bidi_cache
[idx
].invalid_levels
= bidi_it
->invalid_levels
;
544 bidi_cache
[idx
].invalid_rl_levels
= bidi_it
->invalid_rl_levels
;
545 bidi_cache
[idx
].next_for_neutral
= bidi_it
->next_for_neutral
;
546 bidi_cache
[idx
].next_for_ws
= bidi_it
->next_for_ws
;
547 bidi_cache
[idx
].ignore_bn_limit
= bidi_it
->ignore_bn_limit
;
550 bidi_cache_last_idx
= idx
;
551 if (idx
>= bidi_cache_idx
)
552 bidi_cache_idx
= idx
+ 1;
555 static inline bidi_type_t
556 bidi_cache_find (EMACS_INT charpos
, int level
, struct bidi_it
*bidi_it
)
558 ptrdiff_t i
= bidi_cache_search (charpos
, level
, bidi_it
->scan_dir
);
560 if (i
>= bidi_cache_start
)
562 bidi_dir_t current_scan_dir
= bidi_it
->scan_dir
;
564 bidi_copy_it (bidi_it
, &bidi_cache
[i
]);
565 bidi_cache_last_idx
= i
;
566 /* Don't let scan direction from from the cached state override
567 the current scan direction. */
568 bidi_it
->scan_dir
= current_scan_dir
;
569 return bidi_it
->type
;
576 bidi_peek_at_next_level (struct bidi_it
*bidi_it
)
578 if (bidi_cache_idx
== bidi_cache_start
|| bidi_cache_last_idx
== -1)
580 return bidi_cache
[bidi_cache_last_idx
+ bidi_it
->scan_dir
].resolved_level
;
584 /***********************************************************************
585 Pushing and popping the bidi iterator state
586 ***********************************************************************/
588 /* Push the bidi iterator state in preparation for reordering a
589 different object, e.g. display string found at certain buffer
590 position. Pushing the bidi iterator boils down to saving its
591 entire state on the cache and starting a new cache "stacked" on top
592 of the current cache. */
594 bidi_push_it (struct bidi_it
*bidi_it
)
596 /* Save the current iterator state in its entirety after the last
598 bidi_cache_ensure_space (bidi_cache_idx
);
599 memcpy (&bidi_cache
[bidi_cache_idx
++], bidi_it
, sizeof (struct bidi_it
));
601 /* Push the current cache start onto the stack. */
602 xassert (bidi_cache_sp
< IT_STACK_SIZE
);
603 bidi_cache_start_stack
[bidi_cache_sp
++] = bidi_cache_start
;
605 /* Start a new level of cache, and make it empty. */
606 bidi_cache_start
= bidi_cache_idx
;
607 bidi_cache_last_idx
= -1;
610 /* Restore the iterator state saved by bidi_push_it and return the
611 cache to the corresponding state. */
613 bidi_pop_it (struct bidi_it
*bidi_it
)
615 if (bidi_cache_start
<= 0)
618 /* Reset the next free cache slot index to what it was before the
619 call to bidi_push_it. */
620 bidi_cache_idx
= bidi_cache_start
- 1;
622 /* Restore the bidi iterator state saved in the cache. */
623 memcpy (bidi_it
, &bidi_cache
[bidi_cache_idx
], sizeof (struct bidi_it
));
625 /* Pop the previous cache start from the stack. */
626 if (bidi_cache_sp
<= 0)
628 bidi_cache_start
= bidi_cache_start_stack
[--bidi_cache_sp
];
630 /* Invalidate the last-used cache slot data. */
631 bidi_cache_last_idx
= -1;
634 /* Stash away a copy of the cache and its control variables. */
636 bidi_shelve_cache (void)
638 unsigned char *databuf
;
640 if (bidi_cache_idx
== 0)
643 databuf
= xmalloc (bidi_shelve_header_size
644 + bidi_cache_idx
* sizeof (struct bidi_it
));
645 memcpy (databuf
, &bidi_cache_idx
, sizeof (bidi_cache_idx
));
646 memcpy (databuf
+ sizeof (bidi_cache_idx
),
647 bidi_cache
, bidi_cache_idx
* sizeof (struct bidi_it
));
648 memcpy (databuf
+ sizeof (bidi_cache_idx
)
649 + bidi_cache_idx
* sizeof (struct bidi_it
),
650 bidi_cache_start_stack
, sizeof (bidi_cache_start_stack
));
651 memcpy (databuf
+ sizeof (bidi_cache_idx
)
652 + bidi_cache_idx
* sizeof (struct bidi_it
)
653 + sizeof (bidi_cache_start_stack
),
654 &bidi_cache_sp
, sizeof (bidi_cache_sp
));
655 memcpy (databuf
+ sizeof (bidi_cache_idx
)
656 + bidi_cache_idx
* sizeof (struct bidi_it
)
657 + sizeof (bidi_cache_start_stack
) + sizeof (bidi_cache_sp
),
658 &bidi_cache_start
, sizeof (bidi_cache_start
));
659 memcpy (databuf
+ sizeof (bidi_cache_idx
)
660 + bidi_cache_idx
* sizeof (struct bidi_it
)
661 + sizeof (bidi_cache_start_stack
) + sizeof (bidi_cache_sp
)
662 + sizeof (bidi_cache_start
),
663 &bidi_cache_last_idx
, sizeof (bidi_cache_last_idx
));
668 /* Restore the cache state from a copy stashed away by bidi_shelve_cache. */
670 bidi_unshelve_cache (void *databuf
)
672 unsigned char *p
= databuf
;
676 /* A NULL pointer means an empty cache. */
677 bidi_cache_start
= 0;
683 memcpy (&bidi_cache_idx
, p
, sizeof (bidi_cache_idx
));
684 bidi_cache_ensure_space (bidi_cache_idx
);
685 memcpy (bidi_cache
, p
+ sizeof (bidi_cache_idx
),
686 bidi_cache_idx
* sizeof (struct bidi_it
));
687 memcpy (bidi_cache_start_stack
,
688 p
+ sizeof (bidi_cache_idx
)
689 + bidi_cache_idx
* sizeof (struct bidi_it
),
690 sizeof (bidi_cache_start_stack
));
691 memcpy (&bidi_cache_sp
,
692 p
+ sizeof (bidi_cache_idx
)
693 + bidi_cache_idx
* sizeof (struct bidi_it
)
694 + sizeof (bidi_cache_start_stack
),
695 sizeof (bidi_cache_sp
));
696 memcpy (&bidi_cache_start
,
697 p
+ sizeof (bidi_cache_idx
)
698 + bidi_cache_idx
* sizeof (struct bidi_it
)
699 + sizeof (bidi_cache_start_stack
) + sizeof (bidi_cache_sp
),
700 sizeof (bidi_cache_start
));
701 memcpy (&bidi_cache_last_idx
,
702 p
+ sizeof (bidi_cache_idx
)
703 + bidi_cache_idx
* sizeof (struct bidi_it
)
704 + sizeof (bidi_cache_start_stack
) + sizeof (bidi_cache_sp
)
705 + sizeof (bidi_cache_start
),
706 sizeof (bidi_cache_last_idx
));
713 /***********************************************************************
715 ***********************************************************************/
717 bidi_initialize (void)
720 #include "biditype.h"
721 #include "bidimirror.h"
725 bidi_type_table
= Fmake_char_table (Qnil
, make_number (STRONG_L
));
726 staticpro (&bidi_type_table
);
728 for (i
= 0; i
< sizeof bidi_type
/ sizeof bidi_type
[0]; i
++)
729 char_table_set_range (bidi_type_table
, bidi_type
[i
].from
, bidi_type
[i
].to
,
730 make_number (bidi_type
[i
].type
));
732 bidi_mirror_table
= Fmake_char_table (Qnil
, Qnil
);
733 staticpro (&bidi_mirror_table
);
735 for (i
= 0; i
< sizeof bidi_mirror
/ sizeof bidi_mirror
[0]; i
++)
736 char_table_set (bidi_mirror_table
, bidi_mirror
[i
].from
,
737 make_number (bidi_mirror
[i
].to
));
739 Qparagraph_start
= intern ("paragraph-start");
740 staticpro (&Qparagraph_start
);
741 paragraph_start_re
= Fsymbol_value (Qparagraph_start
);
742 if (!STRINGP (paragraph_start_re
))
743 paragraph_start_re
= build_string ("\f\\|[ \t]*$");
744 staticpro (¶graph_start_re
);
745 Qparagraph_separate
= intern ("paragraph-separate");
746 staticpro (&Qparagraph_separate
);
747 paragraph_separate_re
= Fsymbol_value (Qparagraph_separate
);
748 if (!STRINGP (paragraph_separate_re
))
749 paragraph_separate_re
= build_string ("[ \t\f]*$");
750 staticpro (¶graph_separate_re
);
754 bidi_initialized
= 1;
757 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
760 bidi_set_paragraph_end (struct bidi_it
*bidi_it
)
762 bidi_it
->invalid_levels
= 0;
763 bidi_it
->invalid_rl_levels
= -1;
764 bidi_it
->stack_idx
= 0;
765 bidi_it
->resolved_level
= bidi_it
->level_stack
[0].level
;
768 /* Initialize the bidi iterator from buffer/string position CHARPOS. */
770 bidi_init_it (EMACS_INT charpos
, EMACS_INT bytepos
, int frame_window_p
,
771 struct bidi_it
*bidi_it
)
773 if (! bidi_initialized
)
776 bidi_it
->charpos
= charpos
;
778 bidi_it
->bytepos
= bytepos
;
779 bidi_it
->frame_window_p
= frame_window_p
;
780 bidi_it
->nchars
= -1; /* to be computed in bidi_resolve_explicit_1 */
781 bidi_it
->first_elt
= 1;
782 bidi_set_paragraph_end (bidi_it
);
783 bidi_it
->new_paragraph
= 1;
784 bidi_it
->separator_limit
= -1;
785 bidi_it
->type
= NEUTRAL_B
;
786 bidi_it
->type_after_w1
= NEUTRAL_B
;
787 bidi_it
->orig_type
= NEUTRAL_B
;
788 bidi_it
->prev_was_pdf
= 0;
789 bidi_it
->prev
.type
= bidi_it
->prev
.type_after_w1
=
790 bidi_it
->prev
.orig_type
= UNKNOWN_BT
;
791 bidi_it
->last_strong
.type
= bidi_it
->last_strong
.type_after_w1
=
792 bidi_it
->last_strong
.orig_type
= UNKNOWN_BT
;
793 bidi_it
->next_for_neutral
.charpos
= -1;
794 bidi_it
->next_for_neutral
.type
=
795 bidi_it
->next_for_neutral
.type_after_w1
=
796 bidi_it
->next_for_neutral
.orig_type
= UNKNOWN_BT
;
797 bidi_it
->prev_for_neutral
.charpos
= -1;
798 bidi_it
->prev_for_neutral
.type
=
799 bidi_it
->prev_for_neutral
.type_after_w1
=
800 bidi_it
->prev_for_neutral
.orig_type
= UNKNOWN_BT
;
801 bidi_it
->sor
= L2R
; /* FIXME: should it be user-selectable? */
802 bidi_it
->disp_pos
= -1; /* invalid/unknown */
803 /* We can only shrink the cache if we are at the bottom level of its
805 if (bidi_cache_start
== 0)
806 bidi_cache_shrink ();
811 /* Perform initializations for reordering a new line of bidi text. */
813 bidi_line_init (struct bidi_it
*bidi_it
)
815 bidi_it
->scan_dir
= 1; /* FIXME: do we need to have control on this? */
816 bidi_it
->resolved_level
= bidi_it
->level_stack
[0].level
;
817 bidi_it
->level_stack
[0].override
= NEUTRAL_DIR
; /* X1 */
818 bidi_it
->invalid_levels
= 0;
819 bidi_it
->invalid_rl_levels
= -1;
820 bidi_it
->next_en_pos
= -1;
821 bidi_it
->next_for_ws
.type
= UNKNOWN_BT
;
822 bidi_set_sor_type (bidi_it
,
823 bidi_it
->paragraph_dir
== R2L
? 1 : 0,
824 bidi_it
->level_stack
[0].level
); /* X10 */
830 /***********************************************************************
832 ***********************************************************************/
834 /* Count bytes in string S between BEG/BEGBYTE and END. BEG and END
835 are zero-based character positions in S, BEGBYTE is byte position
836 corresponding to BEG. UNIBYTE, if non-zero, means S is a unibyte
838 static inline EMACS_INT
839 bidi_count_bytes (const unsigned char *s
, const EMACS_INT beg
,
840 const EMACS_INT begbyte
, const EMACS_INT end
, int unibyte
)
843 const unsigned char *p
= s
+ begbyte
, *start
= p
;
849 if (!CHAR_HEAD_P (*p
))
854 p
+= BYTES_BY_CHAR_HEAD (*p
);
862 /* Fetch and returns the character at byte position BYTEPOS. If S is
863 non-NULL, fetch the character from string S; otherwise fetch the
864 character from the current buffer. UNIBYTE non-zero means S is a
867 bidi_char_at_pos (EMACS_INT bytepos
, const unsigned char *s
, int unibyte
)
874 return STRING_CHAR (s
+ bytepos
);
877 return FETCH_MULTIBYTE_CHAR (bytepos
);
880 /* Fetch and return the character at BYTEPOS/CHARPOS. If that
881 character is covered by a display string, treat the entire run of
882 covered characters as a single character u+FFFC, and return their
883 combined length in CH_LEN and NCHARS. DISP_POS specifies the
884 character position of the next display string, or -1 if not yet
885 computed. When the next character is at or beyond that position,
886 the function updates DISP_POS with the position of the next display
887 string. STRING->s is the C string to iterate, or NULL if iterating
888 over a buffer or a Lisp string; in the latter case, STRING->lstring
889 is the Lisp string. */
891 bidi_fetch_char (EMACS_INT bytepos
, EMACS_INT charpos
, EMACS_INT
*disp_pos
,
892 struct bidi_string_data
*string
,
893 int frame_window_p
, EMACS_INT
*ch_len
, EMACS_INT
*nchars
)
897 (string
->s
|| STRINGP (string
->lstring
)) ? string
->schars
: ZV
;
900 /* If we got past the last known position of display string, compute
901 the position of the next one. That position could be at CHARPOS. */
902 if (charpos
< endpos
&& charpos
> *disp_pos
)
904 SET_TEXT_POS (pos
, charpos
, bytepos
);
905 *disp_pos
= compute_display_string_pos (&pos
, string
, frame_window_p
);
908 /* Fetch the character at BYTEPOS. */
909 if (charpos
>= endpos
)
916 else if (charpos
>= *disp_pos
)
918 EMACS_INT disp_end_pos
;
920 /* We don't expect to find ourselves in the middle of a display
921 property. Hopefully, it will never be needed. */
922 if (charpos
> *disp_pos
)
924 /* Return the Unicode Object Replacement Character to represent
925 the entire run of characters covered by the display string. */
927 disp_end_pos
= compute_display_string_end (*disp_pos
, string
);
928 *nchars
= disp_end_pos
- *disp_pos
;
932 *ch_len
= bidi_count_bytes (string
->s
, *disp_pos
, bytepos
,
933 disp_end_pos
, string
->unibyte
);
934 else if (STRINGP (string
->lstring
))
935 *ch_len
= bidi_count_bytes (SDATA (string
->lstring
), *disp_pos
,
936 bytepos
, disp_end_pos
, string
->unibyte
);
938 *ch_len
= CHAR_TO_BYTE (disp_end_pos
) - bytepos
;
946 if (!string
->unibyte
)
948 ch
= STRING_CHAR_AND_LENGTH (string
->s
+ bytepos
, len
);
953 ch
= UNIBYTE_TO_CHAR (string
->s
[bytepos
]);
957 else if (STRINGP (string
->lstring
))
961 if (!string
->unibyte
)
963 ch
= STRING_CHAR_AND_LENGTH (SDATA (string
->lstring
) + bytepos
,
969 ch
= UNIBYTE_TO_CHAR (SREF (string
->lstring
, bytepos
));
975 ch
= FETCH_MULTIBYTE_CHAR (bytepos
);
976 *ch_len
= CHAR_BYTES (ch
);
981 /* If we just entered a run of characters covered by a display
982 string, compute the position of the next display string. */
983 if (charpos
+ *nchars
<= endpos
&& charpos
+ *nchars
> *disp_pos
)
985 SET_TEXT_POS (pos
, charpos
+ *nchars
, bytepos
+ *ch_len
);
986 *disp_pos
= compute_display_string_pos (&pos
, string
, frame_window_p
);
993 /***********************************************************************
994 Determining paragraph direction
995 ***********************************************************************/
997 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
998 Value is the non-negative length of the paragraph separator
999 following the buffer position, -1 if position is at the beginning
1000 of a new paragraph, or -2 if position is neither at beginning nor
1001 at end of a paragraph. */
1003 bidi_at_paragraph_end (EMACS_INT charpos
, EMACS_INT bytepos
)
1006 Lisp_Object start_re
;
1009 sep_re
= paragraph_separate_re
;
1010 start_re
= paragraph_start_re
;
1012 val
= fast_looking_at (sep_re
, charpos
, bytepos
, ZV
, ZV_BYTE
, Qnil
);
1015 if (fast_looking_at (start_re
, charpos
, bytepos
, ZV
, ZV_BYTE
, Qnil
) >= 0)
1024 /* Find the beginning of this paragraph by looking back in the buffer.
1025 Value is the byte position of the paragraph's beginning. */
1027 bidi_find_paragraph_start (EMACS_INT pos
, EMACS_INT pos_byte
)
1029 Lisp_Object re
= paragraph_start_re
;
1030 EMACS_INT limit
= ZV
, limit_byte
= ZV_BYTE
;
1032 while (pos_byte
> BEGV_BYTE
1033 && fast_looking_at (re
, pos
, pos_byte
, limit
, limit_byte
, Qnil
) < 0)
1035 /* FIXME: What if the paragraph beginning is covered by a
1036 display string? And what if a display string covering some
1037 of the text over which we scan back includes
1038 paragraph_start_re? */
1039 pos
= find_next_newline_no_quit (pos
- 1, -1);
1040 pos_byte
= CHAR_TO_BYTE (pos
);
1045 /* Determine the base direction, a.k.a. base embedding level, of the
1046 paragraph we are about to iterate through. If DIR is either L2R or
1047 R2L, just use that. Otherwise, determine the paragraph direction
1048 from the first strong directional character of the paragraph.
1050 NO_DEFAULT_P non-zero means don't default to L2R if the paragraph
1051 has no strong directional characters and both DIR and
1052 bidi_it->paragraph_dir are NEUTRAL_DIR. In that case, search back
1053 in the buffer until a paragraph is found with a strong character,
1054 or until hitting BEGV. In the latter case, fall back to L2R. This
1055 flag is used in current-bidi-paragraph-direction.
1057 Note that this function gives the paragraph separator the same
1058 direction as the preceding paragraph, even though Emacs generally
1059 views the separartor as not belonging to any paragraph. */
1061 bidi_paragraph_init (bidi_dir_t dir
, struct bidi_it
*bidi_it
, int no_default_p
)
1063 EMACS_INT bytepos
= bidi_it
->bytepos
;
1064 int string_p
= bidi_it
->string
.s
!= NULL
|| STRINGP (bidi_it
->string
.lstring
);
1065 EMACS_INT pstartbyte
;
1066 /* Note that begbyte is a byte position, while end is a character
1067 position. Yes, this is ugly, but we are trying to avoid costly
1068 calls to BYTE_TO_CHAR and its ilk. */
1069 EMACS_INT begbyte
= string_p
? 0 : BEGV_BYTE
;
1070 EMACS_INT end
= string_p
? bidi_it
->string
.schars
: ZV
;
1072 /* Special case for an empty buffer. */
1073 if (bytepos
== begbyte
&& bidi_it
->charpos
== end
)
1075 /* We should never be called at EOB or before BEGV. */
1076 else if (bidi_it
->charpos
>= end
|| bytepos
< begbyte
)
1081 bidi_it
->paragraph_dir
= L2R
;
1082 bidi_it
->new_paragraph
= 0;
1084 else if (dir
== R2L
)
1086 bidi_it
->paragraph_dir
= R2L
;
1087 bidi_it
->new_paragraph
= 0;
1089 else if (dir
== NEUTRAL_DIR
) /* P2 */
1092 EMACS_INT ch_len
, nchars
;
1093 EMACS_INT pos
, disp_pos
= -1;
1095 const unsigned char *s
;
1097 if (!bidi_initialized
)
1100 /* If we are inside a paragraph separator, we are just waiting
1101 for the separator to be exhausted; use the previous paragraph
1102 direction. But don't do that if we have been just reseated,
1103 because we need to reinitialize below in that case. */
1104 if (!bidi_it
->first_elt
1105 && bidi_it
->charpos
< bidi_it
->separator_limit
)
1108 /* If we are on a newline, get past it to where the next
1109 paragraph might start. But don't do that at BEGV since then
1110 we are potentially in a new paragraph that doesn't yet
1112 pos
= bidi_it
->charpos
;
1113 s
= STRINGP (bidi_it
->string
.lstring
) ?
1114 SDATA (bidi_it
->string
.lstring
) : bidi_it
->string
.s
;
1115 if (bytepos
> begbyte
1116 && bidi_char_at_pos (bytepos
, s
, bidi_it
->string
.unibyte
) == '\n')
1122 /* We are either at the beginning of a paragraph or in the
1123 middle of it. Find where this paragraph starts. */
1126 /* We don't support changes of paragraph direction inside a
1127 string. It is treated as a single paragraph. */
1131 pstartbyte
= bidi_find_paragraph_start (pos
, bytepos
);
1132 bidi_it
->separator_limit
= -1;
1133 bidi_it
->new_paragraph
= 0;
1135 /* The following loop is run more than once only if NO_DEFAULT_P
1136 is non-zero, and only if we are iterating on a buffer. */
1138 bytepos
= pstartbyte
;
1140 pos
= BYTE_TO_CHAR (bytepos
);
1141 ch
= bidi_fetch_char (bytepos
, pos
, &disp_pos
, &bidi_it
->string
,
1142 bidi_it
->frame_window_p
, &ch_len
, &nchars
);
1143 type
= bidi_get_type (ch
, NEUTRAL_DIR
);
1145 for (pos
+= nchars
, bytepos
+= ch_len
;
1146 /* NOTE: UAX#9 says to search only for L, AL, or R types
1147 of characters, and ignore RLE, RLO, LRE, and LRO.
1148 However, I'm not sure it makes sense to omit those 4;
1149 should try with and without that to see the effect. */
1150 (bidi_get_category (type
) != STRONG
)
1151 || (bidi_ignore_explicit_marks_for_paragraph_level
1152 && (type
== RLE
|| type
== RLO
1153 || type
== LRE
|| type
== LRO
));
1154 type
= bidi_get_type (ch
, NEUTRAL_DIR
))
1158 /* Pretend there's a paragraph separator at end of
1164 && type
== NEUTRAL_B
1165 && bidi_at_paragraph_end (pos
, bytepos
) >= -1)
1167 /* Fetch next character and advance to get past it. */
1168 ch
= bidi_fetch_char (bytepos
, pos
, &disp_pos
, &bidi_it
->string
,
1169 bidi_it
->frame_window_p
, &ch_len
, &nchars
);
1173 if (type
== STRONG_R
|| type
== STRONG_AL
) /* P3 */
1174 bidi_it
->paragraph_dir
= R2L
;
1175 else if (type
== STRONG_L
)
1176 bidi_it
->paragraph_dir
= L2R
;
1178 && no_default_p
&& bidi_it
->paragraph_dir
== NEUTRAL_DIR
)
1180 /* If this paragraph is at BEGV, default to L2R. */
1181 if (pstartbyte
== BEGV_BYTE
)
1182 bidi_it
->paragraph_dir
= L2R
; /* P3 and HL1 */
1185 EMACS_INT prevpbyte
= pstartbyte
;
1186 EMACS_INT p
= BYTE_TO_CHAR (pstartbyte
), pbyte
= pstartbyte
;
1188 /* Find the beginning of the previous paragraph, if any. */
1189 while (pbyte
> BEGV_BYTE
&& prevpbyte
>= pstartbyte
)
1191 /* FXIME: What if p is covered by a display
1192 string? See also a FIXME inside
1193 bidi_find_paragraph_start. */
1195 pbyte
= CHAR_TO_BYTE (p
);
1196 prevpbyte
= bidi_find_paragraph_start (p
, pbyte
);
1198 pstartbyte
= prevpbyte
;
1202 && no_default_p
&& bidi_it
->paragraph_dir
== NEUTRAL_DIR
);
1207 /* Contrary to UAX#9 clause P3, we only default the paragraph
1208 direction to L2R if we have no previous usable paragraph
1209 direction. This is allowed by the HL1 clause. */
1210 if (bidi_it
->paragraph_dir
!= L2R
&& bidi_it
->paragraph_dir
!= R2L
)
1211 bidi_it
->paragraph_dir
= L2R
; /* P3 and HL1 ``higher-level protocols'' */
1212 if (bidi_it
->paragraph_dir
== R2L
)
1213 bidi_it
->level_stack
[0].level
= 1;
1215 bidi_it
->level_stack
[0].level
= 0;
1217 bidi_line_init (bidi_it
);
1221 /***********************************************************************
1222 Resolving explicit and implicit levels.
1223 The rest of this file constitutes the core of the UBA implementation.
1224 ***********************************************************************/
1227 bidi_explicit_dir_char (int ch
)
1229 bidi_type_t ch_type
;
1231 if (!bidi_initialized
)
1233 ch_type
= (bidi_type_t
) XINT (CHAR_TABLE_REF (bidi_type_table
, ch
));
1234 return (ch_type
== LRE
|| ch_type
== LRO
1235 || ch_type
== RLE
|| ch_type
== RLO
1239 /* A helper function for bidi_resolve_explicit. It advances to the
1240 next character in logical order and determines the new embedding
1241 level and directional override, but does not take into account
1242 empty embeddings. */
1244 bidi_resolve_explicit_1 (struct bidi_it
*bidi_it
)
1250 bidi_dir_t override
;
1251 int string_p
= bidi_it
->string
.s
!= NULL
|| STRINGP (bidi_it
->string
.lstring
);
1253 /* If reseat()'ed, don't advance, so as to start iteration from the
1254 position where we were reseated. bidi_it->bytepos can be less
1255 than BEGV_BYTE after reseat to BEGV. */
1256 if (bidi_it
->bytepos
< (string_p
? 0 : BEGV_BYTE
)
1257 || bidi_it
->first_elt
)
1259 bidi_it
->first_elt
= 0;
1262 const unsigned char *p
=
1263 STRINGP (bidi_it
->string
.lstring
)
1264 ? SDATA (bidi_it
->string
.lstring
) : bidi_it
->string
.s
;
1266 if (bidi_it
->charpos
< 0)
1267 bidi_it
->charpos
= 0;
1268 bidi_it
->bytepos
= bidi_count_bytes (p
, 0, 0, bidi_it
->charpos
,
1269 bidi_it
->string
.unibyte
);
1273 if (bidi_it
->charpos
< BEGV
)
1274 bidi_it
->charpos
= BEGV
;
1275 bidi_it
->bytepos
= CHAR_TO_BYTE (bidi_it
->charpos
);
1278 /* Don't move at end of buffer/string. */
1279 else if (bidi_it
->charpos
< (string_p
? bidi_it
->string
.schars
: ZV
))
1281 /* Advance to the next character, skipping characters covered by
1282 display strings (nchars > 1). */
1283 if (bidi_it
->nchars
<= 0)
1285 bidi_it
->charpos
+= bidi_it
->nchars
;
1286 if (bidi_it
->ch_len
== 0)
1288 bidi_it
->bytepos
+= bidi_it
->ch_len
;
1291 current_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
; /* X1 */
1292 override
= bidi_it
->level_stack
[bidi_it
->stack_idx
].override
;
1293 new_level
= current_level
;
1295 if (bidi_it
->charpos
>= (string_p
? bidi_it
->string
.schars
: ZV
))
1298 bidi_it
->ch_len
= 1;
1299 bidi_it
->nchars
= 1;
1300 bidi_it
->disp_pos
= (string_p
? bidi_it
->string
.schars
: ZV
);
1304 /* Fetch the character at BYTEPOS. If it is covered by a
1305 display string, treat the entire run of covered characters as
1306 a single character u+FFFC. */
1307 curchar
= bidi_fetch_char (bidi_it
->bytepos
, bidi_it
->charpos
,
1308 &bidi_it
->disp_pos
, &bidi_it
->string
,
1309 bidi_it
->frame_window_p
,
1310 &bidi_it
->ch_len
, &bidi_it
->nchars
);
1312 bidi_it
->ch
= curchar
;
1314 /* Don't apply directional override here, as all the types we handle
1315 below will not be affected by the override anyway, and we need
1316 the original type unaltered. The override will be applied in
1317 bidi_resolve_weak. */
1318 type
= bidi_get_type (curchar
, NEUTRAL_DIR
);
1319 bidi_it
->orig_type
= type
;
1320 bidi_check_type (bidi_it
->orig_type
);
1323 bidi_it
->prev_was_pdf
= 0;
1325 bidi_it
->type_after_w1
= UNKNOWN_BT
;
1331 bidi_it
->type_after_w1
= type
;
1332 bidi_check_type (bidi_it
->type_after_w1
);
1333 type
= WEAK_BN
; /* X9/Retaining */
1334 if (bidi_it
->ignore_bn_limit
<= -1)
1336 if (current_level
<= BIDI_MAXLEVEL
- 4)
1338 /* Compute the least odd embedding level greater than
1339 the current level. */
1340 new_level
= ((current_level
+ 1) & ~1) + 1;
1341 if (bidi_it
->type_after_w1
== RLE
)
1342 override
= NEUTRAL_DIR
;
1345 if (current_level
== BIDI_MAXLEVEL
- 4)
1346 bidi_it
->invalid_rl_levels
= 0;
1347 bidi_push_embedding_level (bidi_it
, new_level
, override
);
1351 bidi_it
->invalid_levels
++;
1352 /* See the commentary about invalid_rl_levels below. */
1353 if (bidi_it
->invalid_rl_levels
< 0)
1354 bidi_it
->invalid_rl_levels
= 0;
1355 bidi_it
->invalid_rl_levels
++;
1358 else if (bidi_it
->prev
.type_after_w1
== WEAK_EN
/* W5/Retaining */
1359 || bidi_it
->next_en_pos
> bidi_it
->charpos
)
1364 bidi_it
->type_after_w1
= type
;
1365 bidi_check_type (bidi_it
->type_after_w1
);
1366 type
= WEAK_BN
; /* X9/Retaining */
1367 if (bidi_it
->ignore_bn_limit
<= -1)
1369 if (current_level
<= BIDI_MAXLEVEL
- 5)
1371 /* Compute the least even embedding level greater than
1372 the current level. */
1373 new_level
= ((current_level
+ 2) & ~1);
1374 if (bidi_it
->type_after_w1
== LRE
)
1375 override
= NEUTRAL_DIR
;
1378 bidi_push_embedding_level (bidi_it
, new_level
, override
);
1382 bidi_it
->invalid_levels
++;
1383 /* invalid_rl_levels counts invalid levels encountered
1384 while the embedding level was already too high for
1385 LRE/LRO, but not for RLE/RLO. That is because
1386 there may be exactly one PDF which we should not
1387 ignore even though invalid_levels is non-zero.
1388 invalid_rl_levels helps to know what PDF is
1390 if (bidi_it
->invalid_rl_levels
>= 0)
1391 bidi_it
->invalid_rl_levels
++;
1394 else if (bidi_it
->prev
.type_after_w1
== WEAK_EN
/* W5/Retaining */
1395 || bidi_it
->next_en_pos
> bidi_it
->charpos
)
1399 bidi_it
->type_after_w1
= type
;
1400 bidi_check_type (bidi_it
->type_after_w1
);
1401 type
= WEAK_BN
; /* X9/Retaining */
1402 if (bidi_it
->ignore_bn_limit
<= -1)
1404 if (!bidi_it
->invalid_rl_levels
)
1406 new_level
= bidi_pop_embedding_level (bidi_it
);
1407 bidi_it
->invalid_rl_levels
= -1;
1408 if (bidi_it
->invalid_levels
)
1409 bidi_it
->invalid_levels
--;
1410 /* else nothing: UAX#9 says to ignore invalid PDFs */
1412 if (!bidi_it
->invalid_levels
)
1413 new_level
= bidi_pop_embedding_level (bidi_it
);
1416 bidi_it
->invalid_levels
--;
1417 bidi_it
->invalid_rl_levels
--;
1420 else if (bidi_it
->prev
.type_after_w1
== WEAK_EN
/* W5/Retaining */
1421 || bidi_it
->next_en_pos
> bidi_it
->charpos
)
1429 bidi_it
->type
= type
;
1430 bidi_check_type (bidi_it
->type
);
1435 /* Given an iterator state in BIDI_IT, advance one character position
1436 in the buffer/string to the next character (in the logical order),
1437 resolve any explicit embeddings and directional overrides, and
1438 return the embedding level of the character after resolving
1439 explicit directives and ignoring empty embeddings. */
1441 bidi_resolve_explicit (struct bidi_it
*bidi_it
)
1443 int prev_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
1444 int new_level
= bidi_resolve_explicit_1 (bidi_it
);
1445 EMACS_INT eob
= bidi_it
->string
.s
? bidi_it
->string
.schars
: ZV
;
1446 const unsigned char *s
= STRINGP (bidi_it
->string
.lstring
)
1447 ? SDATA (bidi_it
->string
.lstring
) : bidi_it
->string
.s
;
1449 if (prev_level
< new_level
1450 && bidi_it
->type
== WEAK_BN
1451 && bidi_it
->ignore_bn_limit
== -1 /* only if not already known */
1452 && bidi_it
->charpos
< eob
/* not already at EOB */
1453 && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it
->bytepos
1454 + bidi_it
->ch_len
, s
,
1455 bidi_it
->string
.unibyte
)))
1457 /* Avoid pushing and popping embedding levels if the level run
1458 is empty, as this breaks level runs where it shouldn't.
1459 UAX#9 removes all the explicit embedding and override codes,
1460 so empty embeddings disappear without a trace. We need to
1461 behave as if we did the same. */
1462 struct bidi_it saved_it
;
1463 int level
= prev_level
;
1465 bidi_copy_it (&saved_it
, bidi_it
);
1467 while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it
->bytepos
1468 + bidi_it
->ch_len
, s
,
1469 bidi_it
->string
.unibyte
)))
1471 /* This advances to the next character, skipping any
1472 characters covered by display strings. */
1473 level
= bidi_resolve_explicit_1 (bidi_it
);
1474 /* If string.lstring was relocated inside bidi_resolve_explicit_1,
1475 a pointer to its data is no longer valid. */
1476 if (STRINGP (bidi_it
->string
.lstring
))
1477 s
= SDATA (bidi_it
->string
.lstring
);
1480 if (bidi_it
->nchars
<= 0)
1482 if (level
== prev_level
) /* empty embedding */
1483 saved_it
.ignore_bn_limit
= bidi_it
->charpos
+ bidi_it
->nchars
;
1484 else /* this embedding is non-empty */
1485 saved_it
.ignore_bn_limit
= -2;
1487 bidi_copy_it (bidi_it
, &saved_it
);
1488 if (bidi_it
->ignore_bn_limit
> -1)
1490 /* We pushed a level, but we shouldn't have. Undo that. */
1491 if (!bidi_it
->invalid_rl_levels
)
1493 new_level
= bidi_pop_embedding_level (bidi_it
);
1494 bidi_it
->invalid_rl_levels
= -1;
1495 if (bidi_it
->invalid_levels
)
1496 bidi_it
->invalid_levels
--;
1498 if (!bidi_it
->invalid_levels
)
1499 new_level
= bidi_pop_embedding_level (bidi_it
);
1502 bidi_it
->invalid_levels
--;
1503 bidi_it
->invalid_rl_levels
--;
1508 if (bidi_it
->type
== NEUTRAL_B
) /* X8 */
1510 bidi_set_paragraph_end (bidi_it
);
1511 /* This is needed by bidi_resolve_weak below, and in L1. */
1512 bidi_it
->type_after_w1
= bidi_it
->type
;
1513 bidi_check_type (bidi_it
->type_after_w1
);
1519 /* Advance in the buffer/string, resolve weak types and return the
1520 type of the next character after weak type resolution. */
1522 bidi_resolve_weak (struct bidi_it
*bidi_it
)
1525 bidi_dir_t override
;
1526 int prev_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
1527 int new_level
= bidi_resolve_explicit (bidi_it
);
1529 bidi_type_t type_of_next
;
1530 struct bidi_it saved_it
;
1532 (STRINGP (bidi_it
->string
.lstring
) || bidi_it
->string
.s
)
1533 ? bidi_it
->string
.schars
: ZV
;
1535 type
= bidi_it
->type
;
1536 override
= bidi_it
->level_stack
[bidi_it
->stack_idx
].override
;
1538 if (type
== UNKNOWN_BT
1546 if (new_level
!= prev_level
1547 || bidi_it
->type
== NEUTRAL_B
)
1549 /* We've got a new embedding level run, compute the directional
1550 type of sor and initialize per-run variables (UAX#9, clause
1552 bidi_set_sor_type (bidi_it
, prev_level
, new_level
);
1554 else if (type
== NEUTRAL_S
|| type
== NEUTRAL_WS
1555 || type
== WEAK_BN
|| type
== STRONG_AL
)
1556 bidi_it
->type_after_w1
= type
; /* needed in L1 */
1557 bidi_check_type (bidi_it
->type_after_w1
);
1559 /* Level and directional override status are already recorded in
1560 bidi_it, and do not need any change; see X6. */
1561 if (override
== R2L
) /* X6 */
1563 else if (override
== L2R
)
1567 if (type
== WEAK_NSM
) /* W1 */
1569 /* Note that we don't need to consider the case where the
1570 prev character has its type overridden by an RLO or LRO,
1571 because then either the type of this NSM would have been
1572 also overridden, or the previous character is outside the
1573 current level run, and thus not relevant to this NSM.
1574 This is why NSM gets the type_after_w1 of the previous
1576 if (bidi_it
->prev
.type_after_w1
!= UNKNOWN_BT
1577 /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1578 && bidi_it
->prev
.type_after_w1
!= NEUTRAL_B
)
1579 type
= bidi_it
->prev
.type_after_w1
;
1580 else if (bidi_it
->sor
== R2L
)
1582 else if (bidi_it
->sor
== L2R
)
1584 else /* shouldn't happen! */
1587 if (type
== WEAK_EN
/* W2 */
1588 && bidi_it
->last_strong
.type_after_w1
== STRONG_AL
)
1590 else if (type
== STRONG_AL
) /* W3 */
1592 else if ((type
== WEAK_ES
/* W4 */
1593 && bidi_it
->prev
.type_after_w1
== WEAK_EN
1594 && bidi_it
->prev
.orig_type
== WEAK_EN
)
1596 && ((bidi_it
->prev
.type_after_w1
== WEAK_EN
1597 && bidi_it
->prev
.orig_type
== WEAK_EN
)
1598 || bidi_it
->prev
.type_after_w1
== WEAK_AN
)))
1600 const unsigned char *s
=
1601 STRINGP (bidi_it
->string
.lstring
)
1602 ? SDATA (bidi_it
->string
.lstring
) : bidi_it
->string
.s
;
1605 bidi_it
->charpos
+ bidi_it
->nchars
>= eob
1607 : bidi_char_at_pos (bidi_it
->bytepos
+ bidi_it
->ch_len
, s
,
1608 bidi_it
->string
.unibyte
);
1609 type_of_next
= bidi_get_type (next_char
, override
);
1611 if (type_of_next
== WEAK_BN
1612 || bidi_explicit_dir_char (next_char
))
1614 bidi_copy_it (&saved_it
, bidi_it
);
1615 while (bidi_resolve_explicit (bidi_it
) == new_level
1616 && bidi_it
->type
== WEAK_BN
)
1618 type_of_next
= bidi_it
->type
;
1619 bidi_copy_it (bidi_it
, &saved_it
);
1622 /* If the next character is EN, but the last strong-type
1623 character is AL, that next EN will be changed to AN when
1624 we process it in W2 above. So in that case, this ES
1625 should not be changed into EN. */
1627 && type_of_next
== WEAK_EN
1628 && bidi_it
->last_strong
.type_after_w1
!= STRONG_AL
)
1630 else if (type
== WEAK_CS
)
1632 if (bidi_it
->prev
.type_after_w1
== WEAK_AN
1633 && (type_of_next
== WEAK_AN
1634 /* If the next character is EN, but the last
1635 strong-type character is AL, EN will be later
1636 changed to AN when we process it in W2 above.
1637 So in that case, this ES should not be
1639 || (type_of_next
== WEAK_EN
1640 && bidi_it
->last_strong
.type_after_w1
== STRONG_AL
)))
1642 else if (bidi_it
->prev
.type_after_w1
== WEAK_EN
1643 && type_of_next
== WEAK_EN
1644 && bidi_it
->last_strong
.type_after_w1
!= STRONG_AL
)
1648 else if (type
== WEAK_ET
/* W5: ET with EN before or after it */
1649 || type
== WEAK_BN
) /* W5/Retaining */
1651 if (bidi_it
->prev
.type_after_w1
== WEAK_EN
/* ET/BN w/EN before it */
1652 || bidi_it
->next_en_pos
> bidi_it
->charpos
)
1654 else /* W5: ET/BN with EN after it. */
1656 EMACS_INT en_pos
= bidi_it
->charpos
+ bidi_it
->nchars
;
1657 const unsigned char *s
=
1658 STRINGP (bidi_it
->string
.lstring
)
1659 ? SDATA (bidi_it
->string
.lstring
) : bidi_it
->string
.s
;
1661 if (bidi_it
->nchars
<= 0)
1664 bidi_it
->charpos
+ bidi_it
->nchars
>= eob
1666 : bidi_char_at_pos (bidi_it
->bytepos
+ bidi_it
->ch_len
, s
,
1667 bidi_it
->string
.unibyte
);
1668 type_of_next
= bidi_get_type (next_char
, override
);
1670 if (type_of_next
== WEAK_ET
1671 || type_of_next
== WEAK_BN
1672 || bidi_explicit_dir_char (next_char
))
1674 bidi_copy_it (&saved_it
, bidi_it
);
1675 while (bidi_resolve_explicit (bidi_it
) == new_level
1676 && (bidi_it
->type
== WEAK_BN
1677 || bidi_it
->type
== WEAK_ET
))
1679 type_of_next
= bidi_it
->type
;
1680 en_pos
= bidi_it
->charpos
;
1681 bidi_copy_it (bidi_it
, &saved_it
);
1683 if (type_of_next
== WEAK_EN
)
1685 /* If the last strong character is AL, the EN we've
1686 found will become AN when we get to it (W2). */
1687 if (bidi_it
->last_strong
.type_after_w1
!= STRONG_AL
)
1690 /* Remember this EN position, to speed up processing
1692 bidi_it
->next_en_pos
= en_pos
;
1694 else if (type
== WEAK_BN
)
1695 type
= NEUTRAL_ON
; /* W6/Retaining */
1701 if (type
== WEAK_ES
|| type
== WEAK_ET
|| type
== WEAK_CS
/* W6 */
1703 && (bidi_it
->prev
.type_after_w1
== WEAK_CS
/* W6/Retaining */
1704 || bidi_it
->prev
.type_after_w1
== WEAK_ES
1705 || bidi_it
->prev
.type_after_w1
== WEAK_ET
)))
1708 /* Store the type we've got so far, before we clobber it with strong
1709 types in W7 and while resolving neutral types. But leave alone
1710 the original types that were recorded above, because we will need
1711 them for the L1 clause. */
1712 if (bidi_it
->type_after_w1
== UNKNOWN_BT
)
1713 bidi_it
->type_after_w1
= type
;
1714 bidi_check_type (bidi_it
->type_after_w1
);
1716 if (type
== WEAK_EN
) /* W7 */
1718 if ((bidi_it
->last_strong
.type_after_w1
== STRONG_L
)
1719 || (bidi_it
->last_strong
.type
== UNKNOWN_BT
&& bidi_it
->sor
== L2R
))
1723 bidi_it
->type
= type
;
1724 bidi_check_type (bidi_it
->type
);
1728 /* Resolve the type of a neutral character according to the type of
1729 surrounding strong text and the current embedding level. */
1730 static inline bidi_type_t
1731 bidi_resolve_neutral_1 (bidi_type_t prev_type
, bidi_type_t next_type
, int lev
)
1733 /* N1: European and Arabic numbers are treated as though they were R. */
1734 if (next_type
== WEAK_EN
|| next_type
== WEAK_AN
)
1735 next_type
= STRONG_R
;
1736 if (prev_type
== WEAK_EN
|| prev_type
== WEAK_AN
)
1737 prev_type
= STRONG_R
;
1739 if (next_type
== prev_type
) /* N1 */
1741 else if ((lev
& 1) == 0) /* N2 */
1748 bidi_resolve_neutral (struct bidi_it
*bidi_it
)
1750 int prev_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
1751 bidi_type_t type
= bidi_resolve_weak (bidi_it
);
1752 int current_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
1754 if (!(type
== STRONG_R
1759 || type
== NEUTRAL_B
1760 || type
== NEUTRAL_S
1761 || type
== NEUTRAL_WS
1762 || type
== NEUTRAL_ON
))
1765 if (bidi_get_category (type
) == NEUTRAL
1766 || (type
== WEAK_BN
&& prev_level
== current_level
))
1768 if (bidi_it
->next_for_neutral
.type
!= UNKNOWN_BT
)
1769 type
= bidi_resolve_neutral_1 (bidi_it
->prev_for_neutral
.type
,
1770 bidi_it
->next_for_neutral
.type
,
1774 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in
1775 the assumption of batch-style processing; see clauses W4,
1776 W5, and especially N1, which require to look far forward
1777 (as well as back) in the buffer/string. May the fleas of
1778 a thousand camels infest the armpits of those who design
1779 supposedly general-purpose algorithms by looking at their
1780 own implementations, and fail to consider other possible
1782 struct bidi_it saved_it
;
1783 bidi_type_t next_type
;
1785 if (bidi_it
->scan_dir
== -1)
1788 bidi_copy_it (&saved_it
, bidi_it
);
1789 /* Scan the text forward until we find the first non-neutral
1790 character, and then use that to resolve the neutral we
1791 are dealing with now. We also cache the scanned iterator
1792 states, to salvage some of the effort later. */
1793 bidi_cache_iterator_state (bidi_it
, 0);
1795 /* Record the info about the previous character, so that
1796 it will be cached below with this state. */
1797 if (bidi_it
->type_after_w1
!= WEAK_BN
/* W1/Retaining */
1798 && bidi_it
->type
!= WEAK_BN
)
1799 bidi_remember_char (&bidi_it
->prev
, bidi_it
);
1800 type
= bidi_resolve_weak (bidi_it
);
1801 /* Paragraph separators have their levels fully resolved
1802 at this point, so cache them as resolved. */
1803 bidi_cache_iterator_state (bidi_it
, type
== NEUTRAL_B
);
1804 /* FIXME: implement L1 here, by testing for a newline and
1805 resetting the level for any sequence of whitespace
1806 characters adjacent to it. */
1807 } while (!(type
== NEUTRAL_B
1809 && bidi_get_category (type
) != NEUTRAL
)
1810 /* This is all per level run, so stop when we
1811 reach the end of this level run. */
1812 || bidi_it
->level_stack
[bidi_it
->stack_idx
].level
!=
1815 bidi_remember_char (&saved_it
.next_for_neutral
, bidi_it
);
1826 /* N1: ``European and Arabic numbers are treated as
1827 though they were R.'' */
1828 next_type
= STRONG_R
;
1829 saved_it
.next_for_neutral
.type
= STRONG_R
;
1832 if (!bidi_explicit_dir_char (bidi_it
->ch
))
1833 abort (); /* can't happen: BNs are skipped */
1836 /* Marched all the way to the end of this level run.
1837 We need to use the eor type, whose information is
1838 stored by bidi_set_sor_type in the prev_for_neutral
1840 if (saved_it
.type
!= WEAK_BN
1841 || bidi_get_category (bidi_it
->prev
.type_after_w1
) == NEUTRAL
)
1843 next_type
= bidi_it
->prev_for_neutral
.type
;
1844 saved_it
.next_for_neutral
.type
= next_type
;
1845 bidi_check_type (next_type
);
1849 /* This is a BN which does not adjoin neutrals.
1850 Leave its type alone. */
1851 bidi_copy_it (bidi_it
, &saved_it
);
1852 return bidi_it
->type
;
1858 type
= bidi_resolve_neutral_1 (saved_it
.prev_for_neutral
.type
,
1859 next_type
, current_level
);
1860 saved_it
.type
= type
;
1861 bidi_check_type (type
);
1862 bidi_copy_it (bidi_it
, &saved_it
);
1868 /* Given an iterator state in BIDI_IT, advance one character position
1869 in the buffer/string to the next character (in the logical order),
1870 resolve the bidi type of that next character, and return that
1873 bidi_type_of_next_char (struct bidi_it
*bidi_it
)
1877 /* This should always be called during a forward scan. */
1878 if (bidi_it
->scan_dir
!= 1)
1881 /* Reset the limit until which to ignore BNs if we step out of the
1882 area where we found only empty levels. */
1883 if ((bidi_it
->ignore_bn_limit
> -1
1884 && bidi_it
->ignore_bn_limit
<= bidi_it
->charpos
)
1885 || (bidi_it
->ignore_bn_limit
== -2
1886 && !bidi_explicit_dir_char (bidi_it
->ch
)))
1887 bidi_it
->ignore_bn_limit
= -1;
1889 type
= bidi_resolve_neutral (bidi_it
);
1894 /* Given an iterator state BIDI_IT, advance one character position in
1895 the buffer/string to the next character (in the current scan
1896 direction), resolve the embedding and implicit levels of that next
1897 character, and return the resulting level. */
1899 bidi_level_of_next_char (struct bidi_it
*bidi_it
)
1902 int level
, prev_level
= -1;
1903 struct bidi_saved_info next_for_neutral
;
1904 EMACS_INT next_char_pos
= -2;
1906 if (bidi_it
->scan_dir
== 1)
1909 (bidi_it
->string
.s
|| STRINGP (bidi_it
->string
.lstring
))
1910 ? bidi_it
->string
.schars
: ZV
;
1912 /* There's no sense in trying to advance if we hit end of text. */
1913 if (bidi_it
->charpos
>= eob
)
1914 return bidi_it
->resolved_level
;
1916 /* Record the info about the previous character. */
1917 if (bidi_it
->type_after_w1
!= WEAK_BN
/* W1/Retaining */
1918 && bidi_it
->type
!= WEAK_BN
)
1919 bidi_remember_char (&bidi_it
->prev
, bidi_it
);
1920 if (bidi_it
->type_after_w1
== STRONG_R
1921 || bidi_it
->type_after_w1
== STRONG_L
1922 || bidi_it
->type_after_w1
== STRONG_AL
)
1923 bidi_remember_char (&bidi_it
->last_strong
, bidi_it
);
1924 /* FIXME: it sounds like we don't need both prev and
1925 prev_for_neutral members, but I'm leaving them both for now. */
1926 if (bidi_it
->type
== STRONG_R
|| bidi_it
->type
== STRONG_L
1927 || bidi_it
->type
== WEAK_EN
|| bidi_it
->type
== WEAK_AN
)
1928 bidi_remember_char (&bidi_it
->prev_for_neutral
, bidi_it
);
1930 /* If we overstepped the characters used for resolving neutrals
1931 and whitespace, invalidate their info in the iterator. */
1932 if (bidi_it
->charpos
>= bidi_it
->next_for_neutral
.charpos
)
1933 bidi_it
->next_for_neutral
.type
= UNKNOWN_BT
;
1934 if (bidi_it
->next_en_pos
>= 0
1935 && bidi_it
->charpos
>= bidi_it
->next_en_pos
)
1936 bidi_it
->next_en_pos
= -1;
1937 if (bidi_it
->next_for_ws
.type
!= UNKNOWN_BT
1938 && bidi_it
->charpos
>= bidi_it
->next_for_ws
.charpos
)
1939 bidi_it
->next_for_ws
.type
= UNKNOWN_BT
;
1941 /* This must be taken before we fill the iterator with the info
1942 about the next char. If we scan backwards, the iterator
1943 state must be already cached, so there's no need to know the
1944 embedding level of the previous character, since we will be
1945 returning to our caller shortly. */
1946 prev_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
1948 next_for_neutral
= bidi_it
->next_for_neutral
;
1950 /* Perhaps the character we want is already cached. If it is, the
1951 call to bidi_cache_find below will return a type other than
1953 if (bidi_cache_idx
> bidi_cache_start
&& !bidi_it
->first_elt
)
1956 (bidi_it
->string
.s
|| STRINGP (bidi_it
->string
.lstring
)) ? 0 : 1;
1958 if (bidi_it
->scan_dir
> 0)
1960 if (bidi_it
->nchars
<= 0)
1962 next_char_pos
= bidi_it
->charpos
+ bidi_it
->nchars
;
1964 else if (bidi_it
->charpos
>= bob
)
1965 /* Implementation note: we allow next_char_pos to be as low as
1966 0 for buffers or -1 for strings, and that is okay because
1967 that's the "position" of the sentinel iterator state we
1968 cached at the beginning of the iteration. */
1969 next_char_pos
= bidi_it
->charpos
- 1;
1970 if (next_char_pos
>= bob
- 1)
1971 type
= bidi_cache_find (next_char_pos
, -1, bidi_it
);
1977 if (type
!= UNKNOWN_BT
)
1979 /* Don't lose the information for resolving neutrals! The
1980 cached states could have been cached before their
1981 next_for_neutral member was computed. If we are on our way
1982 forward, we can simply take the info from the previous
1984 if (bidi_it
->scan_dir
== 1
1985 && bidi_it
->next_for_neutral
.type
== UNKNOWN_BT
)
1986 bidi_it
->next_for_neutral
= next_for_neutral
;
1988 /* If resolved_level is -1, it means this state was cached
1989 before it was completely resolved, so we cannot return
1991 if (bidi_it
->resolved_level
!= -1)
1992 return bidi_it
->resolved_level
;
1994 if (bidi_it
->scan_dir
== -1)
1995 /* If we are going backwards, the iterator state is already cached
1996 from previous scans, and should be fully resolved. */
1999 if (type
== UNKNOWN_BT
)
2000 type
= bidi_type_of_next_char (bidi_it
);
2002 if (type
== NEUTRAL_B
)
2003 return bidi_it
->resolved_level
;
2005 level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
2006 if ((bidi_get_category (type
) == NEUTRAL
/* && type != NEUTRAL_B */)
2007 || (type
== WEAK_BN
&& prev_level
== level
))
2009 if (bidi_it
->next_for_neutral
.type
== UNKNOWN_BT
)
2012 /* If the cached state shows a neutral character, it was not
2013 resolved by bidi_resolve_neutral, so do it now. */
2014 type
= bidi_resolve_neutral_1 (bidi_it
->prev_for_neutral
.type
,
2015 bidi_it
->next_for_neutral
.type
,
2019 if (!(type
== STRONG_R
2023 || type
== WEAK_AN
))
2025 bidi_it
->type
= type
;
2026 bidi_check_type (bidi_it
->type
);
2028 /* For L1 below, we need to know, for each WS character, whether
2029 it belongs to a sequence of WS characters preceding a newline
2030 or a TAB or a paragraph separator. */
2031 if (bidi_it
->orig_type
== NEUTRAL_WS
2032 && bidi_it
->next_for_ws
.type
== UNKNOWN_BT
)
2035 EMACS_INT clen
= bidi_it
->ch_len
;
2036 EMACS_INT bpos
= bidi_it
->bytepos
;
2037 EMACS_INT cpos
= bidi_it
->charpos
;
2038 EMACS_INT disp_pos
= bidi_it
->disp_pos
;
2039 EMACS_INT nc
= bidi_it
->nchars
;
2040 struct bidi_string_data bs
= bidi_it
->string
;
2042 int fwp
= bidi_it
->frame_window_p
;
2044 if (bidi_it
->nchars
<= 0)
2047 ch
= bidi_fetch_char (bpos
+= clen
, cpos
+= nc
, &disp_pos
, &bs
, fwp
,
2049 if (ch
== '\n' || ch
== BIDI_EOB
/* || ch == LINESEP_CHAR */)
2052 chtype
= bidi_get_type (ch
, NEUTRAL_DIR
);
2053 } while (chtype
== NEUTRAL_WS
|| chtype
== WEAK_BN
2054 || bidi_explicit_dir_char (ch
)); /* L1/Retaining */
2055 bidi_it
->next_for_ws
.type
= chtype
;
2056 bidi_check_type (bidi_it
->next_for_ws
.type
);
2057 bidi_it
->next_for_ws
.charpos
= cpos
;
2058 bidi_it
->next_for_ws
.bytepos
= bpos
;
2061 /* Resolve implicit levels, with a twist: PDFs get the embedding
2062 level of the enbedding they terminate. See below for the
2064 if (bidi_it
->orig_type
== PDF
2065 /* Don't do this if this formatting code didn't change the
2066 embedding level due to invalid or empty embeddings. */
2067 && prev_level
!= level
)
2069 /* Don't look in UAX#9 for the reason for this: it's our own
2070 private quirk. The reason is that we want the formatting
2071 codes to be delivered so that they bracket the text of their
2072 embedding. For example, given the text
2076 we want it to be displayed as
2084 which will result because we bump up the embedding level as
2085 soon as we see the RLO and pop it as soon as we see the PDF,
2086 so RLO itself has the same embedding level as "teST", and
2087 thus would be normally delivered last, just before the PDF.
2088 The switch below fiddles with the level of PDF so that this
2089 ugly side effect does not happen.
2091 (This is, of course, only important if the formatting codes
2092 are actually displayed, but Emacs does need to display them
2093 if the user wants to.) */
2096 else if (bidi_it
->orig_type
== NEUTRAL_B
/* L1 */
2097 || bidi_it
->orig_type
== NEUTRAL_S
2098 || bidi_it
->ch
== '\n' || bidi_it
->ch
== BIDI_EOB
2099 /* || bidi_it->ch == LINESEP_CHAR */
2100 || (bidi_it
->orig_type
== NEUTRAL_WS
2101 && (bidi_it
->next_for_ws
.type
== NEUTRAL_B
2102 || bidi_it
->next_for_ws
.type
== NEUTRAL_S
)))
2103 level
= bidi_it
->level_stack
[0].level
;
2104 else if ((level
& 1) == 0) /* I1 */
2106 if (type
== STRONG_R
)
2108 else if (type
== WEAK_EN
|| type
== WEAK_AN
)
2113 if (type
== STRONG_L
|| type
== WEAK_EN
|| type
== WEAK_AN
)
2117 bidi_it
->resolved_level
= level
;
2121 /* Move to the other edge of a level given by LEVEL. If END_FLAG is
2122 non-zero, we are at the end of a level, and we need to prepare to
2123 resume the scan of the lower level.
2125 If this level's other edge is cached, we simply jump to it, filling
2126 the iterator structure with the iterator state on the other edge.
2127 Otherwise, we walk the buffer or string until we come back to the
2128 same level as LEVEL.
2130 Note: we are not talking here about a ``level run'' in the UAX#9
2131 sense of the term, but rather about a ``level'' which includes
2132 all the levels higher than it. In other words, given the levels
2135 11111112222222333333334443343222222111111112223322111
2138 and assuming we are at point A scanning left to right, this
2139 function moves to point C, whereas the UAX#9 ``level 2 run'' ends
2142 bidi_find_other_level_edge (struct bidi_it
*bidi_it
, int level
, int end_flag
)
2144 int dir
= end_flag
? -bidi_it
->scan_dir
: bidi_it
->scan_dir
;
2147 /* Try the cache first. */
2148 if ((idx
= bidi_cache_find_level_change (level
, dir
, end_flag
))
2149 >= bidi_cache_start
)
2150 bidi_cache_fetch_state (idx
, bidi_it
);
2156 abort (); /* if we are at end of level, its edges must be cached */
2158 bidi_cache_iterator_state (bidi_it
, 1);
2160 new_level
= bidi_level_of_next_char (bidi_it
);
2161 bidi_cache_iterator_state (bidi_it
, 1);
2162 } while (new_level
>= level
);
2167 bidi_move_to_visually_next (struct bidi_it
*bidi_it
)
2169 int old_level
, new_level
, next_level
;
2170 struct bidi_it sentinel
;
2171 struct gcpro gcpro1
;
2173 if (bidi_it
->charpos
< 0 || bidi_it
->bytepos
< 0)
2176 if (bidi_it
->scan_dir
== 0)
2178 bidi_it
->scan_dir
= 1; /* default to logical order */
2181 /* The code below can call eval, and thus cause GC. If we are
2182 iterating a Lisp string, make sure it won't be GCed. */
2183 if (STRINGP (bidi_it
->string
.lstring
))
2184 GCPRO1 (bidi_it
->string
.lstring
);
2186 /* If we just passed a newline, initialize for the next line. */
2187 if (!bidi_it
->first_elt
&& bidi_it
->orig_type
== NEUTRAL_B
)
2188 bidi_line_init (bidi_it
);
2190 /* Prepare the sentinel iterator state, and cache it. When we bump
2191 into it, scanning backwards, we'll know that the last non-base
2192 level is exhausted. */
2193 if (bidi_cache_idx
== bidi_cache_start
)
2195 bidi_copy_it (&sentinel
, bidi_it
);
2196 if (bidi_it
->first_elt
)
2198 sentinel
.charpos
--; /* cached charpos needs to be monotonic */
2200 sentinel
.ch
= '\n'; /* doesn't matter, but why not? */
2201 sentinel
.ch_len
= 1;
2202 sentinel
.nchars
= 1;
2204 bidi_cache_iterator_state (&sentinel
, 1);
2207 old_level
= bidi_it
->resolved_level
;
2208 new_level
= bidi_level_of_next_char (bidi_it
);
2210 /* Reordering of resolved levels (clause L2) is implemented by
2211 jumping to the other edge of the level and flipping direction of
2212 scanning the text whenever we find a level change. */
2213 if (new_level
!= old_level
)
2215 int ascending
= new_level
> old_level
;
2216 int level_to_search
= ascending
? old_level
+ 1 : old_level
;
2217 int incr
= ascending
? 1 : -1;
2218 int expected_next_level
= old_level
+ incr
;
2220 /* Jump (or walk) to the other edge of this level. */
2221 bidi_find_other_level_edge (bidi_it
, level_to_search
, !ascending
);
2222 /* Switch scan direction and peek at the next character in the
2224 bidi_it
->scan_dir
= -bidi_it
->scan_dir
;
2226 /* The following loop handles the case where the resolved level
2227 jumps by more than one. This is typical for numbers inside a
2228 run of text with left-to-right embedding direction, but can
2229 also happen in other situations. In those cases the decision
2230 where to continue after a level change, and in what direction,
2231 is tricky. For example, given a text like below:
2236 (where the numbers below the text show the resolved levels),
2237 the result of reordering according to UAX#9 should be this:
2241 This is implemented by the loop below which flips direction
2242 and jumps to the other edge of the level each time it finds
2243 the new level not to be the expected one. The expected level
2244 is always one more or one less than the previous one. */
2245 next_level
= bidi_peek_at_next_level (bidi_it
);
2246 while (next_level
!= expected_next_level
)
2248 expected_next_level
+= incr
;
2249 level_to_search
+= incr
;
2250 bidi_find_other_level_edge (bidi_it
, level_to_search
, !ascending
);
2251 bidi_it
->scan_dir
= -bidi_it
->scan_dir
;
2252 next_level
= bidi_peek_at_next_level (bidi_it
);
2255 /* Finally, deliver the next character in the new direction. */
2256 next_level
= bidi_level_of_next_char (bidi_it
);
2259 /* Take note when we have just processed the newline that precedes
2260 the end of the paragraph. The next time we are about to be
2261 called, set_iterator_to_next will automatically reinit the
2262 paragraph direction, if needed. We do this at the newline before
2263 the paragraph separator, because the next character might not be
2264 the first character of the next paragraph, due to the bidi
2265 reordering, whereas we _must_ know the paragraph base direction
2266 _before_ we process the paragraph's text, since the base
2267 direction affects the reordering. */
2268 if (bidi_it
->scan_dir
== 1 && bidi_it
->orig_type
== NEUTRAL_B
)
2270 /* The paragraph direction of the entire string, once
2271 determined, is in effect for the entire string. Setting the
2272 separator limit to the end of the string prevents
2273 bidi_paragraph_init from being called automatically on this
2275 if (bidi_it
->string
.s
|| STRINGP (bidi_it
->string
.lstring
))
2276 bidi_it
->separator_limit
= bidi_it
->string
.schars
;
2277 else if (bidi_it
->bytepos
< ZV_BYTE
)
2280 bidi_at_paragraph_end (bidi_it
->charpos
+ bidi_it
->nchars
,
2281 bidi_it
->bytepos
+ bidi_it
->ch_len
);
2282 if (bidi_it
->nchars
<= 0)
2286 bidi_it
->new_paragraph
= 1;
2287 /* Record the buffer position of the last character of the
2288 paragraph separator. */
2289 bidi_it
->separator_limit
=
2290 bidi_it
->charpos
+ bidi_it
->nchars
+ sep_len
;
2295 if (bidi_it
->scan_dir
== 1 && bidi_cache_idx
> bidi_cache_start
)
2297 /* If we are at paragraph's base embedding level and beyond the
2298 last cached position, the cache's job is done and we can
2300 if (bidi_it
->resolved_level
== bidi_it
->level_stack
[0].level
2301 && bidi_it
->charpos
> (bidi_cache
[bidi_cache_idx
- 1].charpos
2302 + bidi_cache
[bidi_cache_idx
- 1].nchars
- 1))
2303 bidi_cache_reset ();
2304 /* But as long as we are caching during forward scan, we must
2305 cache each state, or else the cache integrity will be
2306 compromised: it assumes cached states correspond to buffer
2309 bidi_cache_iterator_state (bidi_it
, 1);
2312 if (STRINGP (bidi_it
->string
.lstring
))
2316 /* This is meant to be called from within the debugger, whenever you
2317 wish to examine the cache contents. */
2318 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE
;
2320 bidi_dump_cached_states (void)
2325 if (bidi_cache_idx
== 0)
2327 fprintf (stderr
, "The cache is empty.\n");
2330 fprintf (stderr
, "Total of %"pD
"d state%s in cache:\n",
2331 bidi_cache_idx
, bidi_cache_idx
== 1 ? "" : "s");
2333 for (i
= bidi_cache
[bidi_cache_idx
- 1].charpos
; i
> 0; i
/= 10)
2335 fputs ("ch ", stderr
);
2336 for (i
= 0; i
< bidi_cache_idx
; i
++)
2337 fprintf (stderr
, "%*c", ndigits
, bidi_cache
[i
].ch
);
2338 fputs ("\n", stderr
);
2339 fputs ("lvl ", stderr
);
2340 for (i
= 0; i
< bidi_cache_idx
; i
++)
2341 fprintf (stderr
, "%*d", ndigits
, bidi_cache
[i
].resolved_level
);
2342 fputs ("\n", stderr
);
2343 fputs ("pos ", stderr
);
2344 for (i
= 0; i
< bidi_cache_idx
; i
++)
2345 fprintf (stderr
, "%*"pI
"d", ndigits
, bidi_cache
[i
].charpos
);
2346 fputs ("\n", stderr
);