code.delx.au - gnu-emacs/blob - src/bidi.c

   1 /* Low-level bidirectional buffer-scanning functions for GNU Emacs.
   2    Copyright (C) 2000, 2001, 2004, 2005, 2009, 2010
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software: you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation, either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 /* Written by Eli Zaretskii <eliz@gnu.org>.
  21
  22    A sequential implementation of the Unicode Bidirectional algorithm,
  23    as per UAX#9, a part of the Unicode Standard.
  24
  25    Unlike the reference and most other implementations, this one is
  26    designed to be called once for every character in the buffer or
  27    string.
  28
  29    The main entry point is bidi_move_to_visually_next.  Each time it
  30    is called, it finds the next character in the visual order, and
  31    returns its information in a special structure.  The caller is then
  32    expected to process this character for display or any other
  33    purposes, and call bidi_move_to_visually_next for the next
  34    character.  See the comments in bidi_move_to_visually_next for more
  35    details about its algorithm that finds the next visual-order
  36    character by resolving their levels on the fly.
  37
  38    The two other entry points are bidi_paragraph_init and
  39    bidi_mirror_char.  The first determines the base direction of a
  40    paragraph, while the second returns the mirrored version of its
  41    argument character.
  42
  43    If you want to understand the code, you will have to read it
  44    together with the relevant portions of UAX#9.  The comments include
  45    references to UAX#9 rules, for that very reason.
  46
  47    A note about references to UAX#9 rules: if the reference says
  48    something like "X9/Retaining", it means that you need to refer to
  49    rule X9 and to its modifications decribed in the "Implementation
  50    Notes" section of UAX#9, under "Retaining Format Codes".  */
  51
  52 #include <config.h>
  53 #include <stdio.h>
  54 #include <string.h>
  55 #include <setjmp.h>
  56
  57 #include "lisp.h"
  58 #include "buffer.h"
  59 #include "character.h"
  60 #include "dispextern.h"
  61
  62 static int bidi_initialized = 0;
  63
  64 static Lisp_Object bidi_type_table, bidi_mirror_table;
  65
  66 /* FIXME: Remove these when bidi_explicit_dir_char uses a lookup table.  */
  67 #define LRM_CHAR   0x200E
  68 #define RLM_CHAR   0x200F
  69 #define LRE_CHAR   0x202A
  70 #define RLE_CHAR   0x202B
  71 #define PDF_CHAR   0x202C
  72 #define LRO_CHAR   0x202D
  73 #define RLO_CHAR   0x202E
  74
  75 #define BIDI_EOB   -1
  76 #define BIDI_BOB   -2           /* FIXME: Is this needed? */
  77
  78 /* Local data structures.  (Look in dispextern.h for the rest.)  */
  79
  80 /* What we need to know about the current paragraph.  */
  81 struct bidi_paragraph_info {
  82   EMACS_INT start_bytepos;      /* byte position where it begins */
  83   EMACS_INT end_bytepos;        /* byte position where it ends */
  84   int       embedding_level;    /* its basic embedding level */
  85   bidi_dir_t base_dir;          /* its base direction */
  86 };
  87
  88 /* Data type for describing the bidirectional character categories.  */
  89 typedef enum {
  90   UNKNOWN_BC,
  91   NEUTRAL,
  92   WEAK,
  93   STRONG
  94 } bidi_category_t;
  95
  96 int bidi_ignore_explicit_marks_for_paragraph_level = 1;
  97
  98 static Lisp_Object paragraph_start_re, paragraph_separate_re;
  99 static Lisp_Object Qparagraph_start, Qparagraph_separate;
 100
 101 static void
 102 bidi_initialize (void)
 103 {
 104
 105 #include "biditype.h"
 106 #include "bidimirror.h"
 107
 108   int i;
 109
 110   bidi_type_table = Fmake_char_table (Qnil, make_number (STRONG_L));
 111   staticpro (&bidi_type_table);
 112
 113   for (i = 0; i < sizeof bidi_type / sizeof bidi_type[0]; i++)
 114     char_table_set_range (bidi_type_table, bidi_type[i].from, bidi_type[i].to,
 115                           make_number (bidi_type[i].type));
 116
 117   bidi_mirror_table = Fmake_char_table (Qnil, Qnil);
 118   staticpro (&bidi_mirror_table);
 119
 120   for (i = 0; i < sizeof bidi_mirror / sizeof bidi_mirror[0]; i++)
 121     char_table_set (bidi_mirror_table, bidi_mirror[i].from,
 122                     make_number (bidi_mirror[i].to));
 123
 124   Qparagraph_start = intern ("paragraph-start");
 125   staticpro (&Qparagraph_start);
 126   paragraph_start_re = Fsymbol_value (Qparagraph_start);
 127   if (!STRINGP (paragraph_start_re))
 128     paragraph_start_re = build_string ("\f\\|[ \t]*$");
 129   staticpro (&paragraph_start_re);
 130   Qparagraph_separate = intern ("paragraph-separate");
 131   staticpro (&Qparagraph_separate);
 132   paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
 133   if (!STRINGP (paragraph_separate_re))
 134     paragraph_separate_re = build_string ("[ \t\f]*$");
 135   staticpro (&paragraph_separate_re);
 136   bidi_initialized = 1;
 137 }
 138
 139 /* Return the bidi type of a character CH, subject to the current
 140    directional OVERRIDE.  */
 141 static INLINE bidi_type_t
 142 bidi_get_type (int ch, bidi_dir_t override)
 143 {
 144   bidi_type_t default_type;
 145
 146   if (ch == BIDI_EOB)
 147     return NEUTRAL_B;
 148   if (ch < 0 || ch > MAX_CHAR)
 149     abort ();
 150
 151   default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
 152
 153   if (override == NEUTRAL_DIR)
 154     return default_type;
 155
 156   switch (default_type)
 157     {
 158       /* Although UAX#9 does not tell, it doesn't make sense to
 159          override NEUTRAL_B and LRM/RLM characters.  */
 160       case NEUTRAL_B:
 161       case LRE:
 162       case LRO:
 163       case RLE:
 164       case RLO:
 165       case PDF:
 166         return default_type;
 167       default:
 168         switch (ch)
 169           {
 170             case LRM_CHAR:
 171             case RLM_CHAR:
 172               return default_type;
 173             default:
 174               if (override == L2R) /* X6 */
 175                 return STRONG_L;
 176               else if (override == R2L)
 177                 return STRONG_R;
 178               else
 179                 abort ();       /* can't happen: handled above */
 180           }
 181     }
 182 }
 183
 184 void
 185 bidi_check_type (bidi_type_t type)
 186 {
 187   if (type < UNKNOWN_BT || type > NEUTRAL_ON)
 188     abort ();
 189 }
 190
 191 /* Given a bidi TYPE of a character, return its category.  */
 192 static INLINE bidi_category_t
 193 bidi_get_category (bidi_type_t type)
 194 {
 195   switch (type)
 196     {
 197       case UNKNOWN_BT:
 198         return UNKNOWN_BC;
 199       case STRONG_L:
 200       case STRONG_R:
 201       case STRONG_AL:
 202       case LRE:
 203       case LRO:
 204       case RLE:
 205       case RLO:
 206         return STRONG;
 207       case PDF:         /* ??? really?? */
 208       case WEAK_EN:
 209       case WEAK_ES:
 210       case WEAK_ET:
 211       case WEAK_AN:
 212       case WEAK_CS:
 213       case WEAK_NSM:
 214       case WEAK_BN:
 215         return WEAK;
 216       case NEUTRAL_B:
 217       case NEUTRAL_S:
 218       case NEUTRAL_WS:
 219       case NEUTRAL_ON:
 220         return NEUTRAL;
 221       default:
 222         abort ();
 223     }
 224 }
 225
 226 /* Return the mirrored character of C, if it has one.  If C has no
 227    mirrored counterpart, return C.
 228    Note: The conditions in UAX#9 clause L4 regarding the surrounding
 229    context must be tested by the caller.  */
 230 int
 231 bidi_mirror_char (int c)
 232 {
 233   Lisp_Object val;
 234
 235   if (c == BIDI_EOB)
 236     return c;
 237   if (c < 0 || c > MAX_CHAR)
 238     abort ();
 239
 240   val = CHAR_TABLE_REF (bidi_mirror_table, c);
 241   if (INTEGERP (val))
 242     {
 243       int v = XINT (val);
 244
 245       if (v < 0 || v > MAX_CHAR)
 246         abort ();
 247
 248       return v;
 249     }
 250
 251   return c;
 252 }
 253
 254 /* Copy the bidi iterator from FROM to TO.  To save cycles, this only
 255    copies the part of the level stack that is actually in use.  */
 256 static INLINE void
 257 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
 258 {
 259   int i;
 260
 261   /* Copy everything except the level stack and beyond.  */
 262   memcpy (to, from, ((size_t)&((struct bidi_it *)0)->level_stack[0]));
 263
 264   /* Copy the active part of the level stack.  */
 265   to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
 266   for (i = 1; i <= from->stack_idx; i++)
 267     to->level_stack[i] = from->level_stack[i];
 268 }
 269
 270 /* Caching the bidi iterator states.  */
 271
 272 #define BIDI_CACHE_CHUNK 200
 273 static struct bidi_it *bidi_cache;
 274 static size_t bidi_cache_size = 0;
 275 static size_t elsz = sizeof (struct bidi_it);
 276 static int bidi_cache_idx;      /* next unused cache slot */
 277 static int bidi_cache_last_idx; /* slot of last cache hit */
 278
 279 static INLINE void
 280 bidi_cache_reset (void)
 281 {
 282   bidi_cache_idx = 0;
 283   bidi_cache_last_idx = -1;
 284 }
 285
 286 static INLINE void
 287 bidi_cache_shrink (void)
 288 {
 289   if (bidi_cache_size > BIDI_CACHE_CHUNK)
 290     {
 291       bidi_cache_size = BIDI_CACHE_CHUNK;
 292       bidi_cache =
 293         (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
 294     }
 295   bidi_cache_reset ();
 296 }
 297
 298 static INLINE void
 299 bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it)
 300 {
 301   int current_scan_dir = bidi_it->scan_dir;
 302
 303   if (idx < 0 || idx >= bidi_cache_idx)
 304     abort ();
 305
 306   bidi_copy_it (bidi_it, &bidi_cache[idx]);
 307   bidi_it->scan_dir = current_scan_dir;
 308   bidi_cache_last_idx = idx;
 309 }
 310
 311 /* Find a cached state with a given CHARPOS and resolved embedding
 312    level less or equal to LEVEL.  if LEVEL is -1, disregard the
 313    resolved levels in cached states.  DIR, if non-zero, means search
 314    in that direction from the last cache hit.  */
 315 static INLINE int
 316 bidi_cache_search (EMACS_INT charpos, int level, int dir)
 317 {
 318   int i, i_start;
 319
 320   if (bidi_cache_idx)
 321     {
 322       if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
 323         dir = -1;
 324       else if (charpos > bidi_cache[bidi_cache_last_idx].charpos)
 325         dir = 1;
 326       if (dir)
 327         i_start = bidi_cache_last_idx;
 328       else
 329         {
 330           dir = -1;
 331           i_start = bidi_cache_idx - 1;
 332         }
 333
 334       if (dir < 0)
 335         {
 336           /* Linear search for now; FIXME!  */
 337           for (i = i_start; i >= 0; i--)
 338             if (bidi_cache[i].charpos == charpos
 339                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 340               return i;
 341         }
 342       else
 343         {
 344           for (i = i_start; i < bidi_cache_idx; i++)
 345             if (bidi_cache[i].charpos == charpos
 346                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 347               return i;
 348         }
 349     }
 350
 351   return -1;
 352 }
 353
 354 /* Find a cached state where the resolved level changes to a value
 355    that is lower than LEVEL, and return its cache slot index.  DIR is
 356    the direction to search, starting with the last used cache slot.
 357    BEFORE, if non-zero, means return the index of the slot that is
 358    ``before'' the level change in the search direction.  That is,
 359    given the cached levels like this:
 360
 361          1122333442211
 362           AB        C
 363
 364    and assuming we are at the position cached at the slot marked with
 365    C, searching backwards (DIR = -1) for LEVEL = 2 will return the
 366    index of slot B or A, depending whether BEFORE is, respectively,
 367    non-zero or zero.  */
 368 static int
 369 bidi_cache_find_level_change (int level, int dir, int before)
 370 {
 371   if (bidi_cache_idx)
 372     {
 373       int i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
 374       int incr = before ? 1 : 0;
 375
 376       if (!dir)
 377         dir = -1;
 378       else if (!incr)
 379         i += dir;
 380
 381       if (dir < 0)
 382         {
 383           while (i >= incr)
 384             {
 385               if (bidi_cache[i - incr].resolved_level >= 0
 386                   && bidi_cache[i - incr].resolved_level < level)
 387                 return i;
 388               i--;
 389             }
 390         }
 391       else
 392         {
 393           while (i < bidi_cache_idx - incr)
 394             {
 395               if (bidi_cache[i + incr].resolved_level >= 0
 396                   && bidi_cache[i + incr].resolved_level < level)
 397                 return i;
 398               i++;
 399             }
 400         }
 401     }
 402
 403   return -1;
 404 }
 405
 406 static INLINE void
 407 bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
 408 {
 409   int idx;
 410
 411   /* We should never cache on backward scans.  */
 412   if (bidi_it->scan_dir == -1)
 413     abort ();
 414   idx = bidi_cache_search (bidi_it->charpos, -1, 1);
 415
 416   if (idx < 0)
 417     {
 418       idx = bidi_cache_idx;
 419       /* Enlarge the cache as needed.  */
 420       if (idx >= bidi_cache_size)
 421         {
 422           bidi_cache_size += BIDI_CACHE_CHUNK;
 423           bidi_cache =
 424             (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
 425         }
 426       /* Character positions should correspond to cache positions 1:1.
 427          If we are outside the range of cached positions, the cache is
 428          useless and must be reset.  */
 429       if (idx > 0 &&
 430           (bidi_it->charpos > bidi_cache[idx - 1].charpos + 1
 431            || bidi_it->charpos < bidi_cache[0].charpos))
 432         {
 433           bidi_cache_reset ();
 434           idx = 0;
 435         }
 436       bidi_copy_it (&bidi_cache[idx], bidi_it);
 437       if (!resolved)
 438         bidi_cache[idx].resolved_level = -1;
 439     }
 440   else
 441     {
 442       /* Copy only the members which could have changed, to avoid
 443          costly copying of the entire struct.  */
 444       bidi_cache[idx].type = bidi_it->type;
 445       bidi_check_type (bidi_it->type);
 446       bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
 447       bidi_check_type (bidi_it->type_after_w1);
 448       if (resolved)
 449         bidi_cache[idx].resolved_level = bidi_it->resolved_level;
 450       else
 451         bidi_cache[idx].resolved_level = -1;
 452       bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
 453       bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
 454       bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
 455       bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
 456       bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
 457     }
 458
 459   bidi_cache_last_idx = idx;
 460   if (idx >= bidi_cache_idx)
 461     bidi_cache_idx = idx + 1;
 462 }
 463
 464 static INLINE bidi_type_t
 465 bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it)
 466 {
 467   int i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
 468
 469   if (i >= 0)
 470     {
 471       bidi_dir_t current_scan_dir = bidi_it->scan_dir;
 472
 473       bidi_copy_it (bidi_it, &bidi_cache[i]);
 474       bidi_cache_last_idx = i;
 475       /* Don't let scan direction from from the cached state override
 476          the current scan direction.  */
 477       bidi_it->scan_dir = current_scan_dir;
 478       return bidi_it->type;
 479     }
 480
 481   return UNKNOWN_BT;
 482 }
 483
 484 static INLINE int
 485 bidi_peek_at_next_level (struct bidi_it *bidi_it)
 486 {
 487   if (bidi_cache_idx == 0 || bidi_cache_last_idx == -1)
 488     abort ();
 489   return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
 490 }
 491
 492 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
 493    Value is the non-negative length of the paragraph separator
 494    following the buffer position, -1 if position is at the beginning
 495    of a new paragraph, or -2 if position is neither at beginning nor
 496    at end of a paragraph.  */
 497 static EMACS_INT
 498 bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
 499 {
 500   Lisp_Object sep_re;
 501   Lisp_Object start_re;
 502   EMACS_INT val;
 503
 504   sep_re = paragraph_separate_re;
 505   start_re = paragraph_start_re;
 506
 507   val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
 508   if (val < 0)
 509     {
 510       if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
 511         val = -1;
 512       else
 513         val = -2;
 514     }
 515
 516   return val;
 517 }
 518
 519 /* Determine the start-of-run (sor) directional type given the two
 520    embedding levels on either side of the run boundary.  Also, update
 521    the saved info about previously seen characters, since that info is
 522    generally valid for a single level run.  */
 523 static INLINE void
 524 bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
 525 {
 526   int higher_level = level_before > level_after ? level_before : level_after;
 527
 528   /* The prev_was_pdf gork is required for when we have several PDFs
 529      in a row.  In that case, we want to compute the sor type for the
 530      next level run only once: when we see the first PDF.  That's
 531      because the sor type depends only on the higher of the two levels
 532      that we find on the two sides of the level boundary (see UAX#9,
 533      clause X10), and so we don't need to know the final embedding
 534      level to which we descend after processing all the PDFs.  */
 535   if (!bidi_it->prev_was_pdf || level_before < level_after)
 536     /* FIXME: should the default sor direction be user selectable?  */
 537     bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
 538   if (level_before > level_after)
 539     bidi_it->prev_was_pdf = 1;
 540
 541   bidi_it->prev.type = UNKNOWN_BT;
 542   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
 543     bidi_it->last_strong.orig_type = UNKNOWN_BT;
 544   bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L;
 545   bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
 546   bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
 547   bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 =
 548     bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 549   bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */
 550 }
 551
 552 static void
 553 bidi_line_init (struct bidi_it *bidi_it)
 554 {
 555   bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
 556   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 557   bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
 558   bidi_it->invalid_levels = 0;
 559   bidi_it->invalid_rl_levels = -1;
 560   bidi_it->next_en_pos = -1;
 561   bidi_it->next_for_ws.type = UNKNOWN_BT;
 562   bidi_set_sor_type (bidi_it,
 563                      bidi_it->paragraph_dir == R2L ? 1 : 0,
 564                      bidi_it->level_stack[0].level); /* X10 */
 565
 566   bidi_cache_reset ();
 567 }
 568
 569 /* Find the beginning of this paragraph by looking back in the buffer.
 570    Value is the byte position of the paragraph's beginning.  */
 571 static EMACS_INT
 572 bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
 573 {
 574   Lisp_Object re = paragraph_start_re;
 575   EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
 576
 577   while (pos_byte > BEGV_BYTE
 578          && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
 579     {
 580       pos = find_next_newline_no_quit (pos - 1, -1);
 581       pos_byte = CHAR_TO_BYTE (pos);
 582     }
 583   return pos_byte;
 584 }
 585
 586 /* Determine the base direction, a.k.a. base embedding level, of the
 587    paragraph we are about to iterate through.  If DIR is either L2R or
 588    R2L, just use that.  Otherwise, determine the paragraph direction
 589    from the first strong directional character of the paragraph.
 590
 591    NO_DEFAULT_P non-nil means don't default to L2R if the paragraph
 592    has no strong directional characters and both DIR and
 593    bidi_it->paragraph_dir are NEUTRAL_DIR.  In that case, search back
 594    in the buffer until a paragraph is found with a strong character,
 595    or until hitting BEGV.  In the latter case, fall back to L2R.  This
 596    flag is used in current-bidi-paragraph-direction.
 597
 598    Note that this function gives the paragraph separator the same
 599    direction as the preceding paragraph, even though Emacs generally
 600    views the separartor as not belonging to any paragraph.  */
 601 void
 602 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
 603 {
 604   EMACS_INT bytepos = bidi_it->bytepos;
 605   EMACS_INT pstartbyte;
 606
 607   /* Special case for an empty buffer. */
 608   if (bytepos == BEGV_BYTE && bytepos == ZV_BYTE)
 609     dir = L2R;
 610   /* We should never be called at EOB or before BEGV.  */
 611   else if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE)
 612     abort ();
 613
 614   if (dir == L2R)
 615     {
 616       bidi_it->paragraph_dir = L2R;
 617       bidi_it->new_paragraph = 0;
 618     }
 619   else if (dir == R2L)
 620     {
 621       bidi_it->paragraph_dir = R2L;
 622       bidi_it->new_paragraph = 0;
 623     }
 624   else if (dir == NEUTRAL_DIR)  /* P2 */
 625     {
 626       int ch, ch_len;
 627       EMACS_INT pos;
 628       bidi_type_t type;
 629
 630       if (!bidi_initialized)
 631         bidi_initialize ();
 632
 633       /* If we are inside a paragraph separator, we are just waiting
 634          for the separator to be exhausted; use the previous paragraph
 635          direction.  But don't do that if we have been just reseated,
 636          because we need to reinitialize below in that case.  */
 637       if (!bidi_it->first_elt
 638           && bidi_it->charpos < bidi_it->separator_limit)
 639         return;
 640
 641       /* If we are on a newline, get past it to where the next
 642          paragraph might start.  But don't do that at BEGV since then
 643          we are potentially in a new paragraph that doesn't yet
 644          exist.  */
 645       pos = bidi_it->charpos;
 646       if (bytepos > BEGV_BYTE && FETCH_CHAR (bytepos) == '\n')
 647         {
 648           bytepos++;
 649           pos++;
 650         }
 651
 652       /* We are either at the beginning of a paragraph or in the
 653          middle of it.  Find where this paragraph starts.  */
 654       pstartbyte = bidi_find_paragraph_start (pos, bytepos);
 655       bidi_it->separator_limit = -1;
 656       bidi_it->new_paragraph = 0;
 657
 658       /* The following loop is run more than once only if NO_DEFAULT_P
 659          is non-zero.  */
 660       do {
 661         bytepos = pstartbyte;
 662         ch = FETCH_CHAR (bytepos);
 663         ch_len = CHAR_BYTES (ch);
 664         pos = BYTE_TO_CHAR (bytepos);
 665         type = bidi_get_type (ch, NEUTRAL_DIR);
 666
 667         for (pos++, bytepos += ch_len;
 668              /* NOTE: UAX#9 says to search only for L, AL, or R types
 669                 of characters, and ignore RLE, RLO, LRE, and LRO.
 670                 However, I'm not sure it makes sense to omit those 4;
 671                 should try with and without that to see the effect.  */
 672              (bidi_get_category (type) != STRONG)
 673                || (bidi_ignore_explicit_marks_for_paragraph_level
 674                    && (type == RLE || type == RLO
 675                        || type == LRE || type == LRO));
 676              type = bidi_get_type (ch, NEUTRAL_DIR))
 677           {
 678             if (type == NEUTRAL_B && bidi_at_paragraph_end (pos, bytepos) >= -1)
 679               break;
 680             if (bytepos >= ZV_BYTE)
 681               {
 682                 /* Pretend there's a paragraph separator at end of
 683                    buffer.  */
 684                 type = NEUTRAL_B;
 685                 break;
 686               }
 687             FETCH_CHAR_ADVANCE (ch, pos, bytepos);
 688           }
 689         if (type == STRONG_R || type == STRONG_AL) /* P3 */
 690           bidi_it->paragraph_dir = R2L;
 691         else if (type == STRONG_L)
 692           bidi_it->paragraph_dir = L2R;
 693         if (no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
 694           {
 695             /* If this paragraph is at BEGV, default to L2R.  */
 696             if (pstartbyte == BEGV_BYTE)
 697               bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
 698             else
 699               {
 700                 EMACS_INT prevpbyte = pstartbyte;
 701                 EMACS_INT p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
 702
 703                 /* Find the beginning of the previous paragraph, if any.  */
 704                 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
 705                   {
 706                     p--;
 707                     pbyte = CHAR_TO_BYTE (p);
 708                     prevpbyte = bidi_find_paragraph_start (p, pbyte);
 709                   }
 710                 pstartbyte = prevpbyte;
 711               }
 712           }
 713       } while (no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
 714     }
 715   else
 716     abort ();
 717
 718   /* Contrary to UAX#9 clause P3, we only default the paragraph
 719      direction to L2R if we have no previous usable paragraph
 720      direction.  This is allowed by the HL1 clause.  */
 721   if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
 722     bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
 723   if (bidi_it->paragraph_dir == R2L)
 724     bidi_it->level_stack[0].level = 1;
 725   else
 726     bidi_it->level_stack[0].level = 0;
 727
 728   bidi_line_init (bidi_it);
 729 }
 730
 731 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
 732    end.  */
 733 static INLINE void
 734 bidi_set_paragraph_end (struct bidi_it *bidi_it)
 735 {
 736   bidi_it->invalid_levels = 0;
 737   bidi_it->invalid_rl_levels = -1;
 738   bidi_it->stack_idx = 0;
 739   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 740 }
 741
 742 /* Initialize the bidi iterator from buffer position CHARPOS.  */
 743 void
 744 bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, struct bidi_it *bidi_it)
 745 {
 746   if (! bidi_initialized)
 747     bidi_initialize ();
 748   bidi_it->charpos = charpos;
 749   bidi_it->bytepos = bytepos;
 750   bidi_it->first_elt = 1;
 751   bidi_set_paragraph_end (bidi_it);
 752   bidi_it->new_paragraph = 1;
 753   bidi_it->separator_limit = -1;
 754   bidi_it->type = NEUTRAL_B;
 755   bidi_it->type_after_w1 = NEUTRAL_B;
 756   bidi_it->orig_type = NEUTRAL_B;
 757   bidi_it->prev_was_pdf = 0;
 758   bidi_it->prev.type = bidi_it->prev.type_after_w1 =
 759     bidi_it->prev.orig_type = UNKNOWN_BT;
 760   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
 761     bidi_it->last_strong.orig_type = UNKNOWN_BT;
 762   bidi_it->next_for_neutral.charpos = -1;
 763   bidi_it->next_for_neutral.type =
 764     bidi_it->next_for_neutral.type_after_w1 =
 765     bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 766   bidi_it->prev_for_neutral.charpos = -1;
 767   bidi_it->prev_for_neutral.type =
 768     bidi_it->prev_for_neutral.type_after_w1 =
 769     bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
 770   bidi_it->sor = L2R;    /* FIXME: should it be user-selectable? */
 771   bidi_cache_shrink ();
 772 }
 773
 774 /* Push the current embedding level and override status; reset the
 775    current level to LEVEL and the current override status to OVERRIDE.  */
 776 static INLINE void
 777 bidi_push_embedding_level (struct bidi_it *bidi_it,
 778                            int level, bidi_dir_t override)
 779 {
 780   bidi_it->stack_idx++;
 781   if (bidi_it->stack_idx >= BIDI_MAXLEVEL)
 782     abort ();
 783   bidi_it->level_stack[bidi_it->stack_idx].level = level;
 784   bidi_it->level_stack[bidi_it->stack_idx].override = override;
 785 }
 786
 787 /* Pop the embedding level and directional override status from the
 788    stack, and return the new level.  */
 789 static INLINE int
 790 bidi_pop_embedding_level (struct bidi_it *bidi_it)
 791 {
 792   /* UAX#9 says to ignore invalid PDFs.  */
 793   if (bidi_it->stack_idx > 0)
 794     bidi_it->stack_idx--;
 795   return bidi_it->level_stack[bidi_it->stack_idx].level;
 796 }
 797
 798 /* Record in SAVED_INFO the information about the current character.  */
 799 static INLINE void
 800 bidi_remember_char (struct bidi_saved_info *saved_info,
 801                     struct bidi_it *bidi_it)
 802 {
 803   saved_info->charpos = bidi_it->charpos;
 804   saved_info->bytepos = bidi_it->bytepos;
 805   saved_info->type = bidi_it->type;
 806   bidi_check_type (bidi_it->type);
 807   saved_info->type_after_w1 = bidi_it->type_after_w1;
 808   bidi_check_type (bidi_it->type_after_w1);
 809   saved_info->orig_type = bidi_it->orig_type;
 810   bidi_check_type (bidi_it->orig_type);
 811 }
 812
 813 /* Resolve the type of a neutral character according to the type of
 814    surrounding strong text and the current embedding level.  */
 815 static INLINE bidi_type_t
 816 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
 817 {
 818   /* N1: European and Arabic numbers are treated as though they were R.  */
 819   if (next_type == WEAK_EN || next_type == WEAK_AN)
 820     next_type = STRONG_R;
 821   if (prev_type == WEAK_EN || prev_type == WEAK_AN)
 822     prev_type = STRONG_R;
 823
 824   if (next_type == prev_type)   /* N1 */
 825     return next_type;
 826   else if ((lev & 1) == 0)      /* N2 */
 827     return STRONG_L;
 828   else
 829     return STRONG_R;
 830 }
 831
 832 static INLINE int
 833 bidi_explicit_dir_char (int c)
 834 {
 835   /* FIXME: this should be replaced with a lookup table with suitable
 836      bits set, like standard C ctype macros do.  */
 837   return (c == LRE_CHAR || c == LRO_CHAR
 838           || c == RLE_CHAR || c == RLO_CHAR || c == PDF_CHAR);
 839 }
 840
 841 /* A helper function for bidi_resolve_explicit.  It advances to the
 842    next character in logical order and determines the new embedding
 843    level and directional override, but does not take into account
 844    empty embeddings.  */
 845 static int
 846 bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
 847 {
 848   int curchar;
 849   bidi_type_t type;
 850   int current_level;
 851   int new_level;
 852   bidi_dir_t override;
 853
 854   if (bidi_it->bytepos < BEGV_BYTE      /* after reseat to BEGV? */
 855       || bidi_it->first_elt)
 856     {
 857       bidi_it->first_elt = 0;
 858       if (bidi_it->charpos < BEGV)
 859         bidi_it->charpos = BEGV;
 860       bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
 861     }
 862   else if (bidi_it->bytepos < ZV_BYTE)  /* don't move at ZV */
 863     {
 864       bidi_it->charpos++;
 865       if (bidi_it->ch_len == 0)
 866         abort ();
 867       bidi_it->bytepos += bidi_it->ch_len;
 868     }
 869
 870   current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
 871   override = bidi_it->level_stack[bidi_it->stack_idx].override;
 872   new_level = current_level;
 873
 874   /* in case it is a unibyte character (not yet implemented) */
 875   /* _fetch_multibyte_char_len = 1; */
 876   if (bidi_it->bytepos >= ZV_BYTE)
 877     {
 878       curchar = BIDI_EOB;
 879       bidi_it->ch_len = 1;
 880     }
 881   else
 882     {
 883       curchar = FETCH_CHAR (bidi_it->bytepos);
 884       bidi_it->ch_len = CHAR_BYTES (curchar);
 885     }
 886   bidi_it->ch = curchar;
 887
 888   /* Don't apply directional override here, as all the types we handle
 889      below will not be affected by the override anyway, and we need
 890      the original type unaltered.  The override will be applied in
 891      bidi_resolve_weak.  */
 892   type = bidi_get_type (curchar, NEUTRAL_DIR);
 893   bidi_it->orig_type = type;
 894   bidi_check_type (bidi_it->orig_type);
 895
 896   if (type != PDF)
 897     bidi_it->prev_was_pdf = 0;
 898
 899   bidi_it->type_after_w1 = UNKNOWN_BT;
 900
 901   switch (type)
 902     {
 903       case RLE: /* X2 */
 904       case RLO: /* X4 */
 905         bidi_it->type_after_w1 = type;
 906         bidi_check_type (bidi_it->type_after_w1);
 907         type = WEAK_BN; /* X9/Retaining */
 908         if (bidi_it->ignore_bn_limit <= 0)
 909           {
 910             if (current_level <= BIDI_MAXLEVEL - 4)
 911               {
 912                 /* Compute the least odd embedding level greater than
 913                    the current level.  */
 914                 new_level = ((current_level + 1) & ~1) + 1;
 915                 if (bidi_it->type_after_w1 == RLE)
 916                   override = NEUTRAL_DIR;
 917                 else
 918                   override = R2L;
 919                 if (current_level == BIDI_MAXLEVEL - 4)
 920                   bidi_it->invalid_rl_levels = 0;
 921                 bidi_push_embedding_level (bidi_it, new_level, override);
 922               }
 923             else
 924               {
 925                 bidi_it->invalid_levels++;
 926                 /* See the commentary about invalid_rl_levels below.  */
 927                 if (bidi_it->invalid_rl_levels < 0)
 928                   bidi_it->invalid_rl_levels = 0;
 929                 bidi_it->invalid_rl_levels++;
 930               }
 931           }
 932         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
 933                  || bidi_it->next_en_pos > bidi_it->charpos)
 934           type = WEAK_EN;
 935         break;
 936       case LRE: /* X3 */
 937       case LRO: /* X5 */
 938         bidi_it->type_after_w1 = type;
 939         bidi_check_type (bidi_it->type_after_w1);
 940         type = WEAK_BN; /* X9/Retaining */
 941         if (bidi_it->ignore_bn_limit <= 0)
 942           {
 943             if (current_level <= BIDI_MAXLEVEL - 5)
 944               {
 945                 /* Compute the least even embedding level greater than
 946                    the current level.  */
 947                 new_level = ((current_level + 2) & ~1);
 948                 if (bidi_it->type_after_w1 == LRE)
 949                   override = NEUTRAL_DIR;
 950                 else
 951                   override = L2R;
 952                 bidi_push_embedding_level (bidi_it, new_level, override);
 953               }
 954             else
 955               {
 956                 bidi_it->invalid_levels++;
 957                 /* invalid_rl_levels counts invalid levels encountered
 958                    while the embedding level was already too high for
 959                    LRE/LRO, but not for RLE/RLO.  That is because
 960                    there may be exactly one PDF which we should not
 961                    ignore even though invalid_levels is non-zero.
 962                    invalid_rl_levels helps to know what PDF is
 963                    that.  */
 964                 if (bidi_it->invalid_rl_levels >= 0)
 965                   bidi_it->invalid_rl_levels++;
 966               }
 967           }
 968         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
 969                  || bidi_it->next_en_pos > bidi_it->charpos)
 970           type = WEAK_EN;
 971         break;
 972       case PDF: /* X7 */
 973         bidi_it->type_after_w1 = type;
 974         bidi_check_type (bidi_it->type_after_w1);
 975         type = WEAK_BN; /* X9/Retaining */
 976         if (bidi_it->ignore_bn_limit <= 0)
 977           {
 978             if (!bidi_it->invalid_rl_levels)
 979               {
 980                 new_level = bidi_pop_embedding_level (bidi_it);
 981                 bidi_it->invalid_rl_levels = -1;
 982                 if (bidi_it->invalid_levels)
 983                   bidi_it->invalid_levels--;
 984                 /* else nothing: UAX#9 says to ignore invalid PDFs */
 985               }
 986             if (!bidi_it->invalid_levels)
 987               new_level = bidi_pop_embedding_level (bidi_it);
 988             else
 989               {
 990                 bidi_it->invalid_levels--;
 991                 bidi_it->invalid_rl_levels--;
 992               }
 993           }
 994         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
 995                  || bidi_it->next_en_pos > bidi_it->charpos)
 996           type = WEAK_EN;
 997         break;
 998       default:
 999         /* Nothing.  */
1000         break;
1001     }
1002
1003   bidi_it->type = type;
1004   bidi_check_type (bidi_it->type);
1005
1006   return new_level;
1007 }
1008
1009 /* Given an iterator state in BIDI_IT, advance one character position
1010    in the buffer to the next character (in the logical order), resolve
1011    any explicit embeddings and directional overrides, and return the
1012    embedding level of the character after resolving explicit
1013    directives and ignoring empty embeddings.  */
1014 static int
1015 bidi_resolve_explicit (struct bidi_it *bidi_it)
1016 {
1017   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1018   int new_level  = bidi_resolve_explicit_1 (bidi_it);
1019
1020   if (prev_level < new_level
1021       && bidi_it->type == WEAK_BN
1022       && bidi_it->ignore_bn_limit == 0 /* only if not already known */
1023       && bidi_it->bytepos < ZV_BYTE    /* not already at EOB */
1024       && bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
1025                                              + bidi_it->ch_len)))
1026     {
1027       /* Avoid pushing and popping embedding levels if the level run
1028          is empty, as this breaks level runs where it shouldn't.
1029          UAX#9 removes all the explicit embedding and override codes,
1030          so empty embeddings disappear without a trace.  We need to
1031          behave as if we did the same.  */
1032       struct bidi_it saved_it;
1033       int level = prev_level;
1034
1035       bidi_copy_it (&saved_it, bidi_it);
1036
1037       while (bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
1038                                                  + bidi_it->ch_len)))
1039         {
1040           level = bidi_resolve_explicit_1 (bidi_it);
1041         }
1042
1043       if (level == prev_level)  /* empty embedding */
1044         saved_it.ignore_bn_limit = bidi_it->charpos + 1;
1045       else                      /* this embedding is non-empty */
1046         saved_it.ignore_bn_limit = -1;
1047
1048       bidi_copy_it (bidi_it, &saved_it);
1049       if (bidi_it->ignore_bn_limit > 0)
1050         {
1051           /* We pushed a level, but we shouldn't have.  Undo that. */
1052           if (!bidi_it->invalid_rl_levels)
1053             {
1054               new_level = bidi_pop_embedding_level (bidi_it);
1055               bidi_it->invalid_rl_levels = -1;
1056               if (bidi_it->invalid_levels)
1057                 bidi_it->invalid_levels--;
1058             }
1059           if (!bidi_it->invalid_levels)
1060             new_level = bidi_pop_embedding_level (bidi_it);
1061           else
1062             {
1063               bidi_it->invalid_levels--;
1064               bidi_it->invalid_rl_levels--;
1065             }
1066         }
1067     }
1068
1069   if (bidi_it->type == NEUTRAL_B)       /* X8 */
1070     {
1071       bidi_set_paragraph_end (bidi_it);
1072       /* This is needed by bidi_resolve_weak below, and in L1.  */
1073       bidi_it->type_after_w1 = bidi_it->type;
1074       bidi_check_type (bidi_it->type_after_w1);
1075     }
1076
1077   return new_level;
1078 }
1079
1080 /* Advance in the buffer, resolve weak types and return the type of
1081    the next character after weak type resolution.  */
1082 static bidi_type_t
1083 bidi_resolve_weak (struct bidi_it *bidi_it)
1084 {
1085   bidi_type_t type;
1086   bidi_dir_t override;
1087   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1088   int new_level  = bidi_resolve_explicit (bidi_it);
1089   int next_char;
1090   bidi_type_t type_of_next;
1091   struct bidi_it saved_it;
1092
1093   type = bidi_it->type;
1094   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1095
1096   if (type == UNKNOWN_BT
1097       || type == LRE
1098       || type == LRO
1099       || type == RLE
1100       || type == RLO
1101       || type == PDF)
1102     abort ();
1103
1104   if (new_level != prev_level
1105       || bidi_it->type == NEUTRAL_B)
1106     {
1107       /* We've got a new embedding level run, compute the directional
1108          type of sor and initialize per-run variables (UAX#9, clause
1109          X10).  */
1110       bidi_set_sor_type (bidi_it, prev_level, new_level);
1111     }
1112   else if (type == NEUTRAL_S || type == NEUTRAL_WS
1113            || type == WEAK_BN || type == STRONG_AL)
1114     bidi_it->type_after_w1 = type;      /* needed in L1 */
1115   bidi_check_type (bidi_it->type_after_w1);
1116
1117   /* Level and directional override status are already recorded in
1118      bidi_it, and do not need any change; see X6.  */
1119   if (override == R2L)          /* X6 */
1120     type = STRONG_R;
1121   else if (override == L2R)
1122     type = STRONG_L;
1123   else
1124     {
1125       if (type == WEAK_NSM)     /* W1 */
1126         {
1127           /* Note that we don't need to consider the case where the
1128              prev character has its type overridden by an RLO or LRO,
1129              because then either the type of this NSM would have been
1130              also overridden, or the previous character is outside the
1131              current level run, and thus not relevant to this NSM.
1132              This is why NSM gets the type_after_w1 of the previous
1133              character.  */
1134           if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
1135               /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1136               && bidi_it->prev.type_after_w1 != NEUTRAL_B)
1137             type = bidi_it->prev.type_after_w1;
1138           else if (bidi_it->sor == R2L)
1139             type = STRONG_R;
1140           else if (bidi_it->sor == L2R)
1141             type = STRONG_L;
1142           else /* shouldn't happen! */
1143             abort ();
1144         }
1145       if (type == WEAK_EN       /* W2 */
1146           && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1147         type = WEAK_AN;
1148       else if (type == STRONG_AL) /* W3 */
1149         type = STRONG_R;
1150       else if ((type == WEAK_ES /* W4 */
1151                 && bidi_it->prev.type_after_w1 == WEAK_EN
1152                 && bidi_it->prev.orig_type == WEAK_EN)
1153                || (type == WEAK_CS
1154                    && ((bidi_it->prev.type_after_w1 == WEAK_EN
1155                         && bidi_it->prev.orig_type == WEAK_EN)
1156                        || bidi_it->prev.type_after_w1 == WEAK_AN)))
1157         {
1158           next_char =
1159             bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
1160             ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
1161           type_of_next = bidi_get_type (next_char, override);
1162
1163           if (type_of_next == WEAK_BN
1164               || bidi_explicit_dir_char (next_char))
1165             {
1166               bidi_copy_it (&saved_it, bidi_it);
1167               while (bidi_resolve_explicit (bidi_it) == new_level
1168                      && bidi_it->type == WEAK_BN)
1169                 ;
1170               type_of_next = bidi_it->type;
1171               bidi_copy_it (bidi_it, &saved_it);
1172             }
1173
1174           /* If the next character is EN, but the last strong-type
1175              character is AL, that next EN will be changed to AN when
1176              we process it in W2 above.  So in that case, this ES
1177              should not be changed into EN.  */
1178           if (type == WEAK_ES
1179               && type_of_next == WEAK_EN
1180               && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1181             type = WEAK_EN;
1182           else if (type == WEAK_CS)
1183             {
1184               if (bidi_it->prev.type_after_w1 == WEAK_AN
1185                   && (type_of_next == WEAK_AN
1186                       /* If the next character is EN, but the last
1187                          strong-type character is AL, EN will be later
1188                          changed to AN when we process it in W2 above.
1189                          So in that case, this ES should not be
1190                          changed into EN.  */
1191                       || (type_of_next == WEAK_EN
1192                           && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1193                 type = WEAK_AN;
1194               else if (bidi_it->prev.type_after_w1 == WEAK_EN
1195                        && type_of_next == WEAK_EN
1196                        && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1197                 type = WEAK_EN;
1198             }
1199         }
1200       else if (type == WEAK_ET  /* W5: ET with EN before or after it */
1201                || type == WEAK_BN)      /* W5/Retaining */
1202         {
1203           if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */
1204               || bidi_it->next_en_pos > bidi_it->charpos)
1205             type = WEAK_EN;
1206           else                  /* W5: ET/BN with EN after it.  */
1207             {
1208               EMACS_INT en_pos = bidi_it->charpos + 1;
1209
1210               next_char =
1211                 bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
1212                 ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
1213               type_of_next = bidi_get_type (next_char, override);
1214
1215               if (type_of_next == WEAK_ET
1216                   || type_of_next == WEAK_BN
1217                   || bidi_explicit_dir_char (next_char))
1218                 {
1219                   bidi_copy_it (&saved_it, bidi_it);
1220                   while (bidi_resolve_explicit (bidi_it) == new_level
1221                          && (bidi_it->type == WEAK_BN
1222                              || bidi_it->type == WEAK_ET))
1223                     ;
1224                   type_of_next = bidi_it->type;
1225                   en_pos = bidi_it->charpos;
1226                   bidi_copy_it (bidi_it, &saved_it);
1227                 }
1228               if (type_of_next == WEAK_EN)
1229                 {
1230                   /* If the last strong character is AL, the EN we've
1231                      found will become AN when we get to it (W2). */
1232                   if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
1233                     {
1234                       type = WEAK_EN;
1235                       /* Remember this EN position, to speed up processing
1236                          of the next ETs.  */
1237                       bidi_it->next_en_pos = en_pos;
1238                     }
1239                   else if (type == WEAK_BN)
1240                     type = NEUTRAL_ON; /* W6/Retaining */
1241                 }
1242             }
1243         }
1244     }
1245
1246   if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
1247       || (type == WEAK_BN
1248           && (bidi_it->prev.type_after_w1 == WEAK_CS        /* W6/Retaining */
1249               || bidi_it->prev.type_after_w1 == WEAK_ES
1250               || bidi_it->prev.type_after_w1 == WEAK_ET)))
1251     type = NEUTRAL_ON;
1252
1253   /* Store the type we've got so far, before we clobber it with strong
1254      types in W7 and while resolving neutral types.  But leave alone
1255      the original types that were recorded above, because we will need
1256      them for the L1 clause.  */
1257   if (bidi_it->type_after_w1 == UNKNOWN_BT)
1258     bidi_it->type_after_w1 = type;
1259   bidi_check_type (bidi_it->type_after_w1);
1260
1261   if (type == WEAK_EN)  /* W7 */
1262     {
1263       if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
1264           || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1265         type = STRONG_L;
1266     }
1267
1268   bidi_it->type = type;
1269   bidi_check_type (bidi_it->type);
1270   return type;
1271 }
1272
1273 static bidi_type_t
1274 bidi_resolve_neutral (struct bidi_it *bidi_it)
1275 {
1276   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1277   bidi_type_t type = bidi_resolve_weak (bidi_it);
1278   int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1279
1280   if (!(type == STRONG_R
1281         || type == STRONG_L
1282         || type == WEAK_BN
1283         || type == WEAK_EN
1284         || type == WEAK_AN
1285         || type == NEUTRAL_B
1286         || type == NEUTRAL_S
1287         || type == NEUTRAL_WS
1288         || type == NEUTRAL_ON))
1289     abort ();
1290
1291   if (bidi_get_category (type) == NEUTRAL
1292       || (type == WEAK_BN && prev_level == current_level))
1293     {
1294       if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1295         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1296                                        bidi_it->next_for_neutral.type,
1297                                        current_level);
1298       else
1299         {
1300           /* Arrrgh!!  The UAX#9 algorithm is too deeply entrenched in
1301              the assumption of batch-style processing; see clauses W4,
1302              W5, and especially N1, which require to look far forward
1303              (as well as back) in the buffer.  May the fleas of a
1304              thousand camels infest the armpits of those who design
1305              supposedly general-purpose algorithms by looking at their
1306              own implementations, and fail to consider other possible
1307              implementations!  */
1308           struct bidi_it saved_it;
1309           bidi_type_t next_type;
1310
1311           if (bidi_it->scan_dir == -1)
1312             abort ();
1313
1314           bidi_copy_it (&saved_it, bidi_it);
1315           /* Scan the text forward until we find the first non-neutral
1316              character, and then use that to resolve the neutral we
1317              are dealing with now.  We also cache the scanned iterator
1318              states, to salvage some of the effort later.  */
1319           bidi_cache_iterator_state (bidi_it, 0);
1320           do {
1321             /* Record the info about the previous character, so that
1322                it will be cached below with this state.  */
1323             if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1324                 && bidi_it->type != WEAK_BN)
1325               bidi_remember_char (&bidi_it->prev, bidi_it);
1326             type = bidi_resolve_weak (bidi_it);
1327             /* Paragraph separators have their levels fully resolved
1328                at this point, so cache them as resolved.  */
1329             bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1330             /* FIXME: implement L1 here, by testing for a newline and
1331                resetting the level for any sequence of whitespace
1332                characters adjacent to it.  */
1333           } while (!(type == NEUTRAL_B
1334                      || (type != WEAK_BN
1335                          && bidi_get_category (type) != NEUTRAL)
1336                      /* This is all per level run, so stop when we
1337                         reach the end of this level run.  */
1338                      || bidi_it->level_stack[bidi_it->stack_idx].level !=
1339                      current_level));
1340
1341           bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1342
1343           switch (type)
1344             {
1345               case STRONG_L:
1346               case STRONG_R:
1347               case STRONG_AL:
1348                 next_type = type;
1349                 break;
1350               case WEAK_EN:
1351               case WEAK_AN:
1352                 /* N1: ``European and Arabic numbers are treated as
1353                    though they were R.''  */
1354                 next_type = STRONG_R;
1355                 saved_it.next_for_neutral.type = STRONG_R;
1356                 break;
1357               case WEAK_BN:
1358                 if (!bidi_explicit_dir_char (bidi_it->ch))
1359                   abort ();             /* can't happen: BNs are skipped */
1360                 /* FALLTHROUGH */
1361               case NEUTRAL_B:
1362                 /* Marched all the way to the end of this level run.
1363                    We need to use the eor type, whose information is
1364                    stored by bidi_set_sor_type in the prev_for_neutral
1365                    member.  */
1366                 if (saved_it.type != WEAK_BN
1367                     || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
1368                   {
1369                     next_type = bidi_it->prev_for_neutral.type;
1370                     saved_it.next_for_neutral.type = next_type;
1371                     bidi_check_type (next_type);
1372                   }
1373                 else
1374                   {
1375                     /* This is a BN which does not adjoin neutrals.
1376                        Leave its type alone.  */
1377                     bidi_copy_it (bidi_it, &saved_it);
1378                     return bidi_it->type;
1379                   }
1380                 break;
1381               default:
1382                 abort ();
1383             }
1384           type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1385                                          next_type, current_level);
1386           saved_it.type = type;
1387           bidi_check_type (type);
1388           bidi_copy_it (bidi_it, &saved_it);
1389         }
1390     }
1391   return type;
1392 }
1393
1394 /* Given an iterator state in BIDI_IT, advance one character position
1395    in the buffer to the next character (in the logical order), resolve
1396    the bidi type of that next character, and return that type.  */
1397 static bidi_type_t
1398 bidi_type_of_next_char (struct bidi_it *bidi_it)
1399 {
1400   bidi_type_t type;
1401
1402   /* This should always be called during a forward scan.  */
1403   if (bidi_it->scan_dir != 1)
1404     abort ();
1405
1406   /* Reset the limit until which to ignore BNs if we step out of the
1407      area where we found only empty levels.  */
1408   if ((bidi_it->ignore_bn_limit > 0
1409        && bidi_it->ignore_bn_limit <= bidi_it->charpos)
1410       || (bidi_it->ignore_bn_limit == -1
1411           && !bidi_explicit_dir_char (bidi_it->ch)))
1412     bidi_it->ignore_bn_limit = 0;
1413
1414   type = bidi_resolve_neutral (bidi_it);
1415
1416   return type;
1417 }
1418
1419 /* Given an iterator state BIDI_IT, advance one character position in
1420    the buffer to the next character (in the logical order), resolve
1421    the embedding and implicit levels of that next character, and
1422    return the resulting level.  */
1423 static int
1424 bidi_level_of_next_char (struct bidi_it *bidi_it)
1425 {
1426   bidi_type_t type;
1427   int level, prev_level = -1;
1428   struct bidi_saved_info next_for_neutral;
1429
1430   if (bidi_it->scan_dir == 1)
1431     {
1432       /* There's no sense in trying to advance if we hit end of text.  */
1433       if (bidi_it->bytepos >= ZV_BYTE)
1434         return bidi_it->resolved_level;
1435
1436       /* Record the info about the previous character.  */
1437       if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1438           && bidi_it->type != WEAK_BN)
1439         bidi_remember_char (&bidi_it->prev, bidi_it);
1440       if (bidi_it->type_after_w1 == STRONG_R
1441           || bidi_it->type_after_w1 == STRONG_L
1442           || bidi_it->type_after_w1 == STRONG_AL)
1443         bidi_remember_char (&bidi_it->last_strong, bidi_it);
1444       /* FIXME: it sounds like we don't need both prev and
1445          prev_for_neutral members, but I'm leaving them both for now.  */
1446       if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
1447           || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
1448         bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
1449
1450       /* If we overstepped the characters used for resolving neutrals
1451          and whitespace, invalidate their info in the iterator.  */
1452       if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
1453         bidi_it->next_for_neutral.type = UNKNOWN_BT;
1454       if (bidi_it->next_en_pos >= 0
1455           && bidi_it->charpos >= bidi_it->next_en_pos)
1456         bidi_it->next_en_pos = -1;
1457       if (bidi_it->next_for_ws.type != UNKNOWN_BT
1458           && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
1459         bidi_it->next_for_ws.type = UNKNOWN_BT;
1460
1461       /* This must be taken before we fill the iterator with the info
1462          about the next char.  If we scan backwards, the iterator
1463          state must be already cached, so there's no need to know the
1464          embedding level of the previous character, since we will be
1465          returning to our caller shortly.  */
1466       prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1467     }
1468   next_for_neutral = bidi_it->next_for_neutral;
1469
1470   /* Perhaps it is already cached.  */
1471   type = bidi_cache_find (bidi_it->charpos + bidi_it->scan_dir, -1, bidi_it);
1472   if (type != UNKNOWN_BT)
1473     {
1474       /* Don't lose the information for resolving neutrals!  The
1475          cached states could have been cached before their
1476          next_for_neutral member was computed.  If we are on our way
1477          forward, we can simply take the info from the previous
1478          state.  */
1479       if (bidi_it->scan_dir == 1
1480           && bidi_it->next_for_neutral.type == UNKNOWN_BT)
1481         bidi_it->next_for_neutral = next_for_neutral;
1482
1483       /* If resolved_level is -1, it means this state was cached
1484          before it was completely resolved, so we cannot return
1485          it.  */
1486       if (bidi_it->resolved_level != -1)
1487         return bidi_it->resolved_level;
1488     }
1489   if (bidi_it->scan_dir == -1)
1490     /* If we are going backwards, the iterator state is already cached
1491        from previous scans, and should be fully resolved.  */
1492     abort ();
1493
1494   if (type == UNKNOWN_BT)
1495     type = bidi_type_of_next_char (bidi_it);
1496
1497   if (type == NEUTRAL_B)
1498     return bidi_it->resolved_level;
1499
1500   level = bidi_it->level_stack[bidi_it->stack_idx].level;
1501   if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
1502       || (type == WEAK_BN && prev_level == level))
1503     {
1504       if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
1505         abort ();
1506
1507       /* If the cached state shows a neutral character, it was not
1508          resolved by bidi_resolve_neutral, so do it now.  */
1509       type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1510                                      bidi_it->next_for_neutral.type,
1511                                      level);
1512     }
1513
1514   if (!(type == STRONG_R
1515         || type == STRONG_L
1516         || type == WEAK_BN
1517         || type == WEAK_EN
1518         || type == WEAK_AN))
1519     abort ();
1520   bidi_it->type = type;
1521   bidi_check_type (bidi_it->type);
1522
1523   /* For L1 below, we need to know, for each WS character, whether
1524      it belongs to a sequence of WS characters preceeding a newline
1525      or a TAB or a paragraph separator.  */
1526   if (bidi_it->orig_type == NEUTRAL_WS
1527       && bidi_it->next_for_ws.type == UNKNOWN_BT)
1528     {
1529       int ch;
1530       int clen = bidi_it->ch_len;
1531       EMACS_INT bpos = bidi_it->bytepos;
1532       EMACS_INT cpos = bidi_it->charpos;
1533       bidi_type_t chtype;
1534
1535       do {
1536         /*_fetch_multibyte_char_len = 1;*/
1537         ch = bpos + clen >= ZV_BYTE ? BIDI_EOB : FETCH_CHAR (bpos + clen);
1538         bpos += clen;
1539         cpos++;
1540         clen = (ch == BIDI_EOB ? 1 : CHAR_BYTES (ch));
1541         if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
1542           chtype = NEUTRAL_B;
1543         else
1544           chtype = bidi_get_type (ch, NEUTRAL_DIR);
1545       } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
1546                || bidi_explicit_dir_char (ch)); /* L1/Retaining */
1547       bidi_it->next_for_ws.type = chtype;
1548       bidi_check_type (bidi_it->next_for_ws.type);
1549       bidi_it->next_for_ws.charpos = cpos;
1550       bidi_it->next_for_ws.bytepos = bpos;
1551     }
1552
1553   /* Resolve implicit levels, with a twist: PDFs get the embedding
1554      level of the enbedding they terminate.  See below for the
1555      reason.  */
1556   if (bidi_it->orig_type == PDF
1557       /* Don't do this if this formatting code didn't change the
1558          embedding level due to invalid or empty embeddings.  */
1559       && prev_level != level)
1560     {
1561       /* Don't look in UAX#9 for the reason for this: it's our own
1562          private quirk.  The reason is that we want the formatting
1563          codes to be delivered so that they bracket the text of their
1564          embedding.  For example, given the text
1565
1566              {RLO}teST{PDF}
1567
1568          we want it to be displayed as
1569
1570              {PDF}STet{RLO}
1571
1572          not as
1573
1574              STet{RLO}{PDF}
1575
1576          which will result because we bump up the embedding level as
1577          soon as we see the RLO and pop it as soon as we see the PDF,
1578          so RLO itself has the same embedding level as "teST", and
1579          thus would be normally delivered last, just before the PDF.
1580          The switch below fiddles with the level of PDF so that this
1581          ugly side effect does not happen.
1582
1583          (This is, of course, only important if the formatting codes
1584          are actually displayed, but Emacs does need to display them
1585          if the user wants to.)  */
1586       level = prev_level;
1587     }
1588   else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
1589            || bidi_it->orig_type == NEUTRAL_S
1590            || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
1591            /* || bidi_it->ch == LINESEP_CHAR */
1592            || (bidi_it->orig_type == NEUTRAL_WS
1593                && (bidi_it->next_for_ws.type == NEUTRAL_B
1594                    || bidi_it->next_for_ws.type == NEUTRAL_S)))
1595     level = bidi_it->level_stack[0].level;
1596   else if ((level & 1) == 0) /* I1 */
1597     {
1598       if (type == STRONG_R)
1599         level++;
1600       else if (type == WEAK_EN || type == WEAK_AN)
1601         level += 2;
1602     }
1603   else                  /* I2 */
1604     {
1605       if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
1606         level++;
1607     }
1608
1609   bidi_it->resolved_level = level;
1610   return level;
1611 }
1612
1613 /* Move to the other edge of a level given by LEVEL.  If END_FLAG is
1614    non-zero, we are at the end of a level, and we need to prepare to
1615    resume the scan of the lower level.
1616
1617    If this level's other edge is cached, we simply jump to it, filling
1618    the iterator structure with the iterator state on the other edge.
1619    Otherwise, we walk the buffer until we come back to the same level
1620    as LEVEL.
1621
1622    Note: we are not talking here about a ``level run'' in the UAX#9
1623    sense of the term, but rather about a ``level'' which includes
1624    all the levels higher than it.  In other words, given the levels
1625    like this:
1626
1627          11111112222222333333334443343222222111111112223322111
1628                 A      B                    C
1629
1630    and assuming we are at point A scanning left to right, this
1631    function moves to point C, whereas the UAX#9 ``level 2 run'' ends
1632    at point B.  */
1633 static void
1634 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
1635 {
1636   int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
1637   int idx;
1638
1639   /* Try the cache first.  */
1640   if ((idx = bidi_cache_find_level_change (level, dir, end_flag)) >= 0)
1641     bidi_cache_fetch_state (idx, bidi_it);
1642   else
1643     {
1644       int new_level;
1645
1646       if (end_flag)
1647         abort (); /* if we are at end of level, its edges must be cached */
1648
1649       bidi_cache_iterator_state (bidi_it, 1);
1650       do {
1651         new_level = bidi_level_of_next_char (bidi_it);
1652         bidi_cache_iterator_state (bidi_it, 1);
1653       } while (new_level >= level);
1654     }
1655 }
1656
1657 void
1658 bidi_move_to_visually_next (struct bidi_it *bidi_it)
1659 {
1660   int old_level, new_level, next_level;
1661   struct bidi_it sentinel;
1662
1663   if (bidi_it->scan_dir == 0)
1664     {
1665       bidi_it->scan_dir = 1;    /* default to logical order */
1666     }
1667
1668   /* If we just passed a newline, initialize for the next line.  */
1669   if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B)
1670     bidi_line_init (bidi_it);
1671
1672   /* Prepare the sentinel iterator state, and cache it.  When we bump
1673      into it, scanning backwards, we'll know that the last non-base
1674      level is exhausted.  */
1675   if (bidi_cache_idx == 0)
1676     {
1677       bidi_copy_it (&sentinel, bidi_it);
1678       if (bidi_it->first_elt)
1679         {
1680           sentinel.charpos--;   /* cached charpos needs to be monotonic */
1681           sentinel.bytepos--;
1682           sentinel.ch = '\n';   /* doesn't matter, but why not? */
1683           sentinel.ch_len = 1;
1684         }
1685       bidi_cache_iterator_state (&sentinel, 1);
1686     }
1687
1688   old_level = bidi_it->resolved_level;
1689   new_level = bidi_level_of_next_char (bidi_it);
1690
1691   /* Reordering of resolved levels (clause L2) is implemented by
1692      jumping to the other edge of the level and flipping direction of
1693      scanning the text whenever we find a level change.  */
1694   if (new_level != old_level)
1695     {
1696       int ascending = new_level > old_level;
1697       int level_to_search = ascending ? old_level + 1 : old_level;
1698       int incr = ascending ? 1 : -1;
1699       int expected_next_level = old_level + incr;
1700
1701       /* Jump (or walk) to the other edge of this level.  */
1702       bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
1703       /* Switch scan direction and peek at the next character in the
1704          new direction.  */
1705       bidi_it->scan_dir = -bidi_it->scan_dir;
1706
1707       /* The following loop handles the case where the resolved level
1708          jumps by more than one.  This is typical for numbers inside a
1709          run of text with left-to-right embedding direction, but can
1710          also happen in other situations.  In those cases the decision
1711          where to continue after a level change, and in what direction,
1712          is tricky.  For example, given a text like below:
1713
1714                   abcdefgh
1715                   11336622
1716
1717          (where the numbers below the text show the resolved levels),
1718          the result of reordering according to UAX#9 should be this:
1719
1720                   efdcghba
1721
1722          This is implemented by the loop below which flips direction
1723          and jumps to the other edge of the level each time it finds
1724          the new level not to be the expected one.  The expected level
1725          is always one more or one less than the previous one.  */
1726       next_level = bidi_peek_at_next_level (bidi_it);
1727       while (next_level != expected_next_level)
1728         {
1729           expected_next_level += incr;
1730           level_to_search += incr;
1731           bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
1732           bidi_it->scan_dir = -bidi_it->scan_dir;
1733           next_level = bidi_peek_at_next_level (bidi_it);
1734         }
1735
1736       /* Finally, deliver the next character in the new direction.  */
1737       next_level = bidi_level_of_next_char (bidi_it);
1738     }
1739
1740   /* Take note when we have just processed the newline that precedes
1741      the end of the paragraph.  The next time we are about to be
1742      called, set_iterator_to_next will automatically reinit the
1743      paragraph direction, if needed.  We do this at the newline before
1744      the paragraph separator, because the next character might not be
1745      the first character of the next paragraph, due to the bidi
1746      reordering, whereas we _must_ know the paragraph base direction
1747      _before_ we process the paragraph's text, since the base
1748      direction affects the reordering.  */
1749   if (bidi_it->scan_dir == 1
1750       && bidi_it->orig_type == NEUTRAL_B
1751       && bidi_it->bytepos < ZV_BYTE)
1752     {
1753       EMACS_INT sep_len =
1754         bidi_at_paragraph_end (bidi_it->charpos + 1,
1755                                bidi_it->bytepos + bidi_it->ch_len);
1756       if (sep_len >= 0)
1757         {
1758           bidi_it->new_paragraph = 1;
1759           /* Record the buffer position of the last character of the
1760              paragraph separator.  */
1761           bidi_it->separator_limit = bidi_it->charpos + 1 + sep_len;
1762         }
1763     }
1764
1765   if (bidi_it->scan_dir == 1 && bidi_cache_idx)
1766     {
1767       /* If we are at paragraph's base embedding level and beyond the
1768          last cached position, the cache's job is done and we can
1769          discard it.  */
1770       if (bidi_it->resolved_level == bidi_it->level_stack[0].level
1771           && bidi_it->charpos > bidi_cache[bidi_cache_idx - 1].charpos)
1772         bidi_cache_reset ();
1773         /* But as long as we are caching during forward scan, we must
1774            cache each state, or else the cache integrity will be
1775            compromised: it assumes cached states correspond to buffer
1776            positions 1:1.  */
1777       else
1778         bidi_cache_iterator_state (bidi_it, 1);
1779     }
1780 }
1781
1782 /* This is meant to be called from within the debugger, whenever you
1783    wish to examine the cache contents.  */
1784 void
1785 bidi_dump_cached_states (void)
1786 {
1787   int i;
1788   int ndigits = 1;
1789
1790   if (bidi_cache_idx == 0)
1791     {
1792       fprintf (stderr, "The cache is empty.\n");
1793       return;
1794     }
1795   fprintf (stderr, "Total of %d state%s in cache:\n",
1796            bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
1797
1798   for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
1799     ndigits++;
1800   fputs ("ch  ", stderr);
1801   for (i = 0; i < bidi_cache_idx; i++)
1802     fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
1803   fputs ("\n", stderr);
1804   fputs ("lvl ", stderr);
1805   for (i = 0; i < bidi_cache_idx; i++)
1806     fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
1807   fputs ("\n", stderr);
1808   fputs ("pos ", stderr);
1809   for (i = 0; i < bidi_cache_idx; i++)
1810     fprintf (stderr, "%*ld", ndigits, (long)bidi_cache[i].charpos);
1811   fputs ("\n", stderr);
1812 }