code.delx.au - gnu-emacs/blob - src/bidi.c

   1 /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
   2    Copyright (C) 2000-2001, 2004-2005, 2009-2011
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software: you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation, either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 /* Written by Eli Zaretskii <eliz@gnu.org>.
  21
  22    A sequential implementation of the Unicode Bidirectional algorithm,
  23    (UBA) as per UAX#9, a part of the Unicode Standard.
  24
  25    Unlike the reference and most other implementations, this one is
  26    designed to be called once for every character in the buffer or
  27    string.
  28
  29    The main entry point is bidi_move_to_visually_next.  Each time it
  30    is called, it finds the next character in the visual order, and
  31    returns its information in a special structure.  The caller is then
  32    expected to process this character for display or any other
  33    purposes, and call bidi_move_to_visually_next for the next
  34    character.  See the comments in bidi_move_to_visually_next for more
  35    details about its algorithm that finds the next visual-order
  36    character by resolving their levels on the fly.
  37
  38    Two other entry points are bidi_paragraph_init and
  39    bidi_mirror_char.  The first determines the base direction of a
  40    paragraph, while the second returns the mirrored version of its
  41    argument character.
  42
  43    A few auxiliary entry points are used to initialize the bidi
  44    iterator for iterating an object (buffer or string), push and pop
  45    the bidi iterator state, and save and restore the state of the bidi
  46    cache.
  47
  48    If you want to understand the code, you will have to read it
  49    together with the relevant portions of UAX#9.  The comments include
  50    references to UAX#9 rules, for that very reason.
  51
  52    A note about references to UAX#9 rules: if the reference says
  53    something like "X9/Retaining", it means that you need to refer to
  54    rule X9 and to its modifications decribed in the "Implementation
  55    Notes" section of UAX#9, under "Retaining Format Codes".  */
  56
  57 #include <config.h>
  58 #include <stdio.h>
  59 #include <setjmp.h>
  60
  61 #include "lisp.h"
  62 #include "buffer.h"
  63 #include "character.h"
  64 #include "dispextern.h"
  65
  66 static int bidi_initialized = 0;
  67
  68 static Lisp_Object bidi_type_table, bidi_mirror_table;
  69
  70 #define LRM_CHAR   0x200E
  71 #define RLM_CHAR   0x200F
  72 #define BIDI_EOB   -1
  73
  74 /* Data type for describing the bidirectional character categories.  */
  75 typedef enum {
  76   UNKNOWN_BC,
  77   NEUTRAL,
  78   WEAK,
  79   STRONG
  80 } bidi_category_t;
  81
  82 /* UAX#9 says to search only for L, AL, or R types of characters, and
  83    ignore RLE, RLO, LRE, and LRO, when determining the base paragraph
  84    level.  Yudit indeed ignores them.  This variable is therefore set
  85    by default to ignore them, but setting it to zero will take them
  86    into account.  */
  87 extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE;
  88 int bidi_ignore_explicit_marks_for_paragraph_level = 1;
  89
  90 static Lisp_Object paragraph_start_re, paragraph_separate_re;
  91 static Lisp_Object Qparagraph_start, Qparagraph_separate;
  92
  93 \f
  94 /***********************************************************************
  95                         Utilities
  96  ***********************************************************************/
  97
  98 /* Return the bidi type of a character CH, subject to the current
  99    directional OVERRIDE.  */
 100 static inline bidi_type_t
 101 bidi_get_type (int ch, bidi_dir_t override)
 102 {
 103   bidi_type_t default_type;
 104
 105   if (ch == BIDI_EOB)
 106     return NEUTRAL_B;
 107   if (ch < 0 || ch > MAX_CHAR)
 108     abort ();
 109
 110   default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
 111   /* Every valid character code, even those that are unassigned by the
 112      UCD, have some bidi-class property, according to
 113      DerivedBidiClass.txt file.  Therefore, if we ever get UNKNOWN_BT
 114      (= zero) code from CHAR_TABLE_REF, that's a bug.  */
 115   if (default_type == UNKNOWN_BT)
 116     abort ();
 117
 118   if (override == NEUTRAL_DIR)
 119     return default_type;
 120
 121   switch (default_type)
 122     {
 123       /* Although UAX#9 does not tell, it doesn't make sense to
 124          override NEUTRAL_B and LRM/RLM characters.  */
 125       case NEUTRAL_B:
 126       case LRE:
 127       case LRO:
 128       case RLE:
 129       case RLO:
 130       case PDF:
 131         return default_type;
 132       default:
 133         switch (ch)
 134           {
 135             case LRM_CHAR:
 136             case RLM_CHAR:
 137               return default_type;
 138             default:
 139               if (override == L2R) /* X6 */
 140                 return STRONG_L;
 141               else if (override == R2L)
 142                 return STRONG_R;
 143               else
 144                 abort ();       /* can't happen: handled above */
 145           }
 146     }
 147 }
 148
 149 static inline void
 150 bidi_check_type (bidi_type_t type)
 151 {
 152   xassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON);
 153 }
 154
 155 /* Given a bidi TYPE of a character, return its category.  */
 156 static inline bidi_category_t
 157 bidi_get_category (bidi_type_t type)
 158 {
 159   switch (type)
 160     {
 161       case UNKNOWN_BT:
 162         return UNKNOWN_BC;
 163       case STRONG_L:
 164       case STRONG_R:
 165       case STRONG_AL:
 166       case LRE:
 167       case LRO:
 168       case RLE:
 169       case RLO:
 170         return STRONG;
 171       case PDF:         /* ??? really?? */
 172       case WEAK_EN:
 173       case WEAK_ES:
 174       case WEAK_ET:
 175       case WEAK_AN:
 176       case WEAK_CS:
 177       case WEAK_NSM:
 178       case WEAK_BN:
 179         return WEAK;
 180       case NEUTRAL_B:
 181       case NEUTRAL_S:
 182       case NEUTRAL_WS:
 183       case NEUTRAL_ON:
 184         return NEUTRAL;
 185       default:
 186         abort ();
 187     }
 188 }
 189
 190 /* Return the mirrored character of C, if it has one.  If C has no
 191    mirrored counterpart, return C.
 192    Note: The conditions in UAX#9 clause L4 regarding the surrounding
 193    context must be tested by the caller.  */
 194 int
 195 bidi_mirror_char (int c)
 196 {
 197   Lisp_Object val;
 198
 199   if (c == BIDI_EOB)
 200     return c;
 201   if (c < 0 || c > MAX_CHAR)
 202     abort ();
 203
 204   val = CHAR_TABLE_REF (bidi_mirror_table, c);
 205   if (INTEGERP (val))
 206     {
 207       int v = XINT (val);
 208
 209       if (v < 0 || v > MAX_CHAR)
 210         abort ();
 211
 212       return v;
 213     }
 214
 215   return c;
 216 }
 217
 218 /* Determine the start-of-run (sor) directional type given the two
 219    embedding levels on either side of the run boundary.  Also, update
 220    the saved info about previously seen characters, since that info is
 221    generally valid for a single level run.  */
 222 static inline void
 223 bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
 224 {
 225   int higher_level = level_before > level_after ? level_before : level_after;
 226
 227   /* The prev_was_pdf gork is required for when we have several PDFs
 228      in a row.  In that case, we want to compute the sor type for the
 229      next level run only once: when we see the first PDF.  That's
 230      because the sor type depends only on the higher of the two levels
 231      that we find on the two sides of the level boundary (see UAX#9,
 232      clause X10), and so we don't need to know the final embedding
 233      level to which we descend after processing all the PDFs.  */
 234   if (!bidi_it->prev_was_pdf || level_before < level_after)
 235     /* FIXME: should the default sor direction be user selectable?  */
 236     bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
 237   if (level_before > level_after)
 238     bidi_it->prev_was_pdf = 1;
 239
 240   bidi_it->prev.type = UNKNOWN_BT;
 241   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
 242     bidi_it->last_strong.orig_type = UNKNOWN_BT;
 243   bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L;
 244   bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
 245   bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
 246   bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 =
 247     bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 248   bidi_it->ignore_bn_limit = -1; /* meaning it's unknown */
 249 }
 250
 251 /* Push the current embedding level and override status; reset the
 252    current level to LEVEL and the current override status to OVERRIDE.  */
 253 static inline void
 254 bidi_push_embedding_level (struct bidi_it *bidi_it,
 255                            int level, bidi_dir_t override)
 256 {
 257   bidi_it->stack_idx++;
 258   xassert (bidi_it->stack_idx < BIDI_MAXLEVEL);
 259   bidi_it->level_stack[bidi_it->stack_idx].level = level;
 260   bidi_it->level_stack[bidi_it->stack_idx].override = override;
 261 }
 262
 263 /* Pop the embedding level and directional override status from the
 264    stack, and return the new level.  */
 265 static inline int
 266 bidi_pop_embedding_level (struct bidi_it *bidi_it)
 267 {
 268   /* UAX#9 says to ignore invalid PDFs.  */
 269   if (bidi_it->stack_idx > 0)
 270     bidi_it->stack_idx--;
 271   return bidi_it->level_stack[bidi_it->stack_idx].level;
 272 }
 273
 274 /* Record in SAVED_INFO the information about the current character.  */
 275 static inline void
 276 bidi_remember_char (struct bidi_saved_info *saved_info,
 277                     struct bidi_it *bidi_it)
 278 {
 279   saved_info->charpos = bidi_it->charpos;
 280   saved_info->bytepos = bidi_it->bytepos;
 281   saved_info->type = bidi_it->type;
 282   bidi_check_type (bidi_it->type);
 283   saved_info->type_after_w1 = bidi_it->type_after_w1;
 284   bidi_check_type (bidi_it->type_after_w1);
 285   saved_info->orig_type = bidi_it->orig_type;
 286   bidi_check_type (bidi_it->orig_type);
 287 }
 288
 289 /* Copy the bidi iterator from FROM to TO.  To save cycles, this only
 290    copies the part of the level stack that is actually in use.  */
 291 static inline void
 292 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
 293 {
 294   int i;
 295
 296   /* Copy everything except the level stack and beyond.  */
 297   memcpy (to, from, offsetof (struct bidi_it, level_stack[0]));
 298
 299   /* Copy the active part of the level stack.  */
 300   to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
 301   for (i = 1; i <= from->stack_idx; i++)
 302     to->level_stack[i] = from->level_stack[i];
 303 }
 304
 305 \f
 306 /***********************************************************************
 307                         Caching the bidi iterator states
 308  ***********************************************************************/
 309
 310 #define BIDI_CACHE_CHUNK 200
 311 static struct bidi_it *bidi_cache;
 312 static ptrdiff_t bidi_cache_size = 0;
 313 enum { elsz = sizeof (struct bidi_it) };
 314 static ptrdiff_t bidi_cache_idx;        /* next unused cache slot */
 315 static ptrdiff_t bidi_cache_last_idx;   /* slot of last cache hit */
 316 static ptrdiff_t bidi_cache_start = 0;  /* start of cache for this
 317                                            "stack" level */
 318
 319 /* Reset the cache state to the empty state.  We only reset the part
 320    of the cache relevant to iteration of the current object.  Previous
 321    objects, which are pushed on the display iterator's stack, are left
 322    intact.  This is called when the cached information is no more
 323    useful for the current iteration, e.g. when we were reseated to a
 324    new position on the same object.  */
 325 static inline void
 326 bidi_cache_reset (void)
 327 {
 328   bidi_cache_idx = bidi_cache_start;
 329   bidi_cache_last_idx = -1;
 330 }
 331
 332 /* Shrink the cache to its minimal size.  Called when we init the bidi
 333    iterator for reordering a buffer or a string that does not come
 334    from display properties, because that means all the previously
 335    cached info is of no further use.  */
 336 static inline void
 337 bidi_cache_shrink (void)
 338 {
 339   if (bidi_cache_size > BIDI_CACHE_CHUNK)
 340     {
 341       bidi_cache_size = BIDI_CACHE_CHUNK;
 342       bidi_cache =
 343         (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
 344     }
 345   bidi_cache_reset ();
 346 }
 347
 348 static inline void
 349 bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
 350 {
 351   int current_scan_dir = bidi_it->scan_dir;
 352
 353   if (idx < bidi_cache_start || idx >= bidi_cache_idx)
 354     abort ();
 355
 356   bidi_copy_it (bidi_it, &bidi_cache[idx]);
 357   bidi_it->scan_dir = current_scan_dir;
 358   bidi_cache_last_idx = idx;
 359 }
 360
 361 /* Find a cached state with a given CHARPOS and resolved embedding
 362    level less or equal to LEVEL.  if LEVEL is -1, disregard the
 363    resolved levels in cached states.  DIR, if non-zero, means search
 364    in that direction from the last cache hit.  */
 365 static inline ptrdiff_t
 366 bidi_cache_search (EMACS_INT charpos, int level, int dir)
 367 {
 368   ptrdiff_t i, i_start;
 369
 370   if (bidi_cache_idx > bidi_cache_start)
 371     {
 372       if (bidi_cache_last_idx == -1)
 373         bidi_cache_last_idx = bidi_cache_idx - 1;
 374       if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
 375         {
 376           dir = -1;
 377           i_start = bidi_cache_last_idx - 1;
 378         }
 379       else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
 380                           + bidi_cache[bidi_cache_last_idx].nchars - 1))
 381         {
 382           dir = 1;
 383           i_start = bidi_cache_last_idx + 1;
 384         }
 385       else if (dir)
 386         i_start = bidi_cache_last_idx;
 387       else
 388         {
 389           dir = -1;
 390           i_start = bidi_cache_idx - 1;
 391         }
 392
 393       if (dir < 0)
 394         {
 395           /* Linear search for now; FIXME!  */
 396           for (i = i_start; i >= bidi_cache_start; i--)
 397             if (bidi_cache[i].charpos <= charpos
 398                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 399                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 400               return i;
 401         }
 402       else
 403         {
 404           for (i = i_start; i < bidi_cache_idx; i++)
 405             if (bidi_cache[i].charpos <= charpos
 406                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 407                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 408               return i;
 409         }
 410     }
 411
 412   return -1;
 413 }
 414
 415 /* Find a cached state where the resolved level changes to a value
 416    that is lower than LEVEL, and return its cache slot index.  DIR is
 417    the direction to search, starting with the last used cache slot.
 418    If DIR is zero, we search backwards from the last occupied cache
 419    slot.  BEFORE, if non-zero, means return the index of the slot that
 420    is ``before'' the level change in the search direction.  That is,
 421    given the cached levels like this:
 422
 423          1122333442211
 424           AB        C
 425
 426    and assuming we are at the position cached at the slot marked with
 427    C, searching backwards (DIR = -1) for LEVEL = 2 will return the
 428    index of slot B or A, depending whether BEFORE is, respectively,
 429    non-zero or zero.  */
 430 static ptrdiff_t
 431 bidi_cache_find_level_change (int level, int dir, int before)
 432 {
 433   if (bidi_cache_idx)
 434     {
 435       ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
 436       int incr = before ? 1 : 0;
 437
 438       xassert (!dir || bidi_cache_last_idx >= 0);
 439
 440       if (!dir)
 441         dir = -1;
 442       else if (!incr)
 443         i += dir;
 444
 445       if (dir < 0)
 446         {
 447           while (i >= bidi_cache_start + incr)
 448             {
 449               if (bidi_cache[i - incr].resolved_level >= 0
 450                   && bidi_cache[i - incr].resolved_level < level)
 451                 return i;
 452               i--;
 453             }
 454         }
 455       else
 456         {
 457           while (i < bidi_cache_idx - incr)
 458             {
 459               if (bidi_cache[i + incr].resolved_level >= 0
 460                   && bidi_cache[i + incr].resolved_level < level)
 461                 return i;
 462               i++;
 463             }
 464         }
 465     }
 466
 467   return -1;
 468 }
 469
 470 static inline void
 471 bidi_cache_ensure_space (ptrdiff_t idx)
 472 {
 473   /* Enlarge the cache as needed.  */
 474   if (idx >= bidi_cache_size)
 475     {
 476       ptrdiff_t new_size;
 477
 478       /* The bidi cache cannot be larger than the largest Lisp string
 479          or buffer.  */
 480       ptrdiff_t string_or_buffer_bound =
 481         max (BUF_BYTES_MAX, STRING_BYTES_BOUND);
 482
 483       /* Also, it cannot be larger than what C can represent.  */
 484       ptrdiff_t c_bound = min (PTRDIFF_MAX, SIZE_MAX) / elsz;
 485
 486       if (min (string_or_buffer_bound, c_bound) <= idx)
 487         memory_full (SIZE_MAX);
 488       new_size = idx - idx % BIDI_CACHE_CHUNK + BIDI_CACHE_CHUNK;
 489       bidi_cache = (struct bidi_it *) xrealloc (bidi_cache, new_size * elsz);
 490       bidi_cache_size = new_size;
 491     }
 492 }
 493
 494 static inline void
 495 bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
 496 {
 497   ptrdiff_t idx;
 498
 499   /* We should never cache on backward scans.  */
 500   if (bidi_it->scan_dir == -1)
 501     abort ();
 502   idx = bidi_cache_search (bidi_it->charpos, -1, 1);
 503
 504   if (idx < 0)
 505     {
 506       idx = bidi_cache_idx;
 507       bidi_cache_ensure_space (idx);
 508       /* Character positions should correspond to cache positions 1:1.
 509          If we are outside the range of cached positions, the cache is
 510          useless and must be reset.  */
 511       if (idx > bidi_cache_start &&
 512           (bidi_it->charpos > (bidi_cache[idx - 1].charpos
 513                                + bidi_cache[idx - 1].nchars)
 514            || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos))
 515         {
 516           bidi_cache_reset ();
 517           idx = bidi_cache_start;
 518         }
 519       if (bidi_it->nchars <= 0)
 520         abort ();
 521       bidi_copy_it (&bidi_cache[idx], bidi_it);
 522       if (!resolved)
 523         bidi_cache[idx].resolved_level = -1;
 524     }
 525   else
 526     {
 527       /* Copy only the members which could have changed, to avoid
 528          costly copying of the entire struct.  */
 529       bidi_cache[idx].type = bidi_it->type;
 530       bidi_check_type (bidi_it->type);
 531       bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
 532       bidi_check_type (bidi_it->type_after_w1);
 533       if (resolved)
 534         bidi_cache[idx].resolved_level = bidi_it->resolved_level;
 535       else
 536         bidi_cache[idx].resolved_level = -1;
 537       bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
 538       bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
 539       bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
 540       bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
 541       bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
 542       bidi_cache[idx].disp_pos = bidi_it->disp_pos;
 543       bidi_cache[idx].disp_prop = bidi_it->disp_prop;
 544     }
 545
 546   bidi_cache_last_idx = idx;
 547   if (idx >= bidi_cache_idx)
 548     bidi_cache_idx = idx + 1;
 549 }
 550
 551 static inline bidi_type_t
 552 bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it)
 553 {
 554   ptrdiff_t i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
 555
 556   if (i >= bidi_cache_start)
 557     {
 558       bidi_dir_t current_scan_dir = bidi_it->scan_dir;
 559
 560       bidi_copy_it (bidi_it, &bidi_cache[i]);
 561       bidi_cache_last_idx = i;
 562       /* Don't let scan direction from from the cached state override
 563          the current scan direction.  */
 564       bidi_it->scan_dir = current_scan_dir;
 565       return bidi_it->type;
 566     }
 567
 568   return UNKNOWN_BT;
 569 }
 570
 571 static inline int
 572 bidi_peek_at_next_level (struct bidi_it *bidi_it)
 573 {
 574   if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1)
 575     abort ();
 576   return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
 577 }
 578
 579 \f
 580 /***********************************************************************
 581              Pushing and popping the bidi iterator state
 582  ***********************************************************************/
 583 /* 5-slot stack for saving the start of the previous level of the
 584    cache.  xdisp.c maintains a 5-slot stack for its iterator state,
 585    and we need the same size of our stack.  */
 586 static ptrdiff_t bidi_cache_start_stack[IT_STACK_SIZE];
 587 static int bidi_cache_sp;
 588
 589 /* Push the bidi iterator state in preparation for reordering a
 590    different object, e.g. display string found at certain buffer
 591    position.  Pushing the bidi iterator boils down to saving its
 592    entire state on the cache and starting a new cache "stacked" on top
 593    of the current cache.  */
 594 void
 595 bidi_push_it (struct bidi_it *bidi_it)
 596 {
 597   /* Save the current iterator state in its entirety after the last
 598      used cache slot.  */
 599   bidi_cache_ensure_space (bidi_cache_idx);
 600   memcpy (&bidi_cache[bidi_cache_idx++], bidi_it, sizeof (struct bidi_it));
 601
 602   /* Push the current cache start onto the stack.  */
 603   xassert (bidi_cache_sp < IT_STACK_SIZE);
 604   bidi_cache_start_stack[bidi_cache_sp++] = bidi_cache_start;
 605
 606   /* Start a new level of cache, and make it empty.  */
 607   bidi_cache_start = bidi_cache_idx;
 608   bidi_cache_last_idx = -1;
 609 }
 610
 611 /* Restore the iterator state saved by bidi_push_it and return the
 612    cache to the corresponding state.  */
 613 void
 614 bidi_pop_it (struct bidi_it *bidi_it)
 615 {
 616   if (bidi_cache_start <= 0)
 617     abort ();
 618
 619   /* Reset the next free cache slot index to what it was before the
 620      call to bidi_push_it.  */
 621   bidi_cache_idx = bidi_cache_start - 1;
 622
 623   /* Restore the bidi iterator state saved in the cache.  */
 624   memcpy (bidi_it, &bidi_cache[bidi_cache_idx], sizeof (struct bidi_it));
 625
 626   /* Pop the previous cache start from the stack.  */
 627   if (bidi_cache_sp <= 0)
 628     abort ();
 629   bidi_cache_start = bidi_cache_start_stack[--bidi_cache_sp];
 630
 631   /* Invalidate the last-used cache slot data.  */
 632   bidi_cache_last_idx = -1;
 633 }
 634
 635 static ptrdiff_t bidi_cache_total_alloc;
 636
 637 /* Stash away a copy of the cache and its control variables.  */
 638 void *
 639 bidi_shelve_cache (void)
 640 {
 641   unsigned char *databuf;
 642
 643   /* Empty cache.  */
 644   if (bidi_cache_idx == 0)
 645     return NULL;
 646
 647   databuf = xmalloc (sizeof (bidi_cache_idx)
 648                      + bidi_cache_idx * sizeof (struct bidi_it)
 649                      + sizeof (bidi_cache_start_stack)
 650                      + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
 651                      + sizeof (bidi_cache_last_idx));
 652   bidi_cache_total_alloc +=
 653     sizeof (bidi_cache_idx) + bidi_cache_idx * sizeof (struct bidi_it)
 654     + sizeof (bidi_cache_start_stack)
 655     + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
 656     + sizeof (bidi_cache_last_idx);
 657
 658   memcpy (databuf, &bidi_cache_idx, sizeof (bidi_cache_idx));
 659   memcpy (databuf + sizeof (bidi_cache_idx),
 660           bidi_cache, bidi_cache_idx * sizeof (struct bidi_it));
 661   memcpy (databuf + sizeof (bidi_cache_idx)
 662           + bidi_cache_idx * sizeof (struct bidi_it),
 663           bidi_cache_start_stack, sizeof (bidi_cache_start_stack));
 664   memcpy (databuf + sizeof (bidi_cache_idx)
 665           + bidi_cache_idx * sizeof (struct bidi_it)
 666           + sizeof (bidi_cache_start_stack),
 667           &bidi_cache_sp, sizeof (bidi_cache_sp));
 668   memcpy (databuf + sizeof (bidi_cache_idx)
 669           + bidi_cache_idx * sizeof (struct bidi_it)
 670           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
 671           &bidi_cache_start, sizeof (bidi_cache_start));
 672   memcpy (databuf + sizeof (bidi_cache_idx)
 673           + bidi_cache_idx * sizeof (struct bidi_it)
 674           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 675           + sizeof (bidi_cache_start),
 676           &bidi_cache_last_idx, sizeof (bidi_cache_last_idx));
 677
 678   return databuf;
 679 }
 680
 681 /* Restore the cache state from a copy stashed away by
 682    bidi_shelve_cache, and free the buffer used to stash that copy.
 683    JUST_FREE non-zero means free the buffer, but don't restore the
 684    cache; used when the corresponding iterator is discarded instead of
 685    being restored.  */
 686 void
 687 bidi_unshelve_cache (void *databuf, int just_free)
 688 {
 689   unsigned char *p = databuf;
 690
 691   if (!p)
 692     {
 693       if (!just_free)
 694         {
 695           /* A NULL pointer means an empty cache.  */
 696           bidi_cache_start = 0;
 697           bidi_cache_sp = 0;
 698           bidi_cache_reset ();
 699         }
 700     }
 701   else
 702     {
 703       if (just_free)
 704         {
 705           ptrdiff_t idx;
 706
 707           memcpy (&idx, p, sizeof (bidi_cache_idx));
 708           bidi_cache_total_alloc -=
 709             sizeof (bidi_cache_idx) + idx * sizeof (struct bidi_it)
 710             + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 711             + sizeof (bidi_cache_start) + sizeof (bidi_cache_last_idx);
 712         }
 713       else
 714         {
 715           memcpy (&bidi_cache_idx, p, sizeof (bidi_cache_idx));
 716           bidi_cache_ensure_space (bidi_cache_idx);
 717           memcpy (bidi_cache, p + sizeof (bidi_cache_idx),
 718                   bidi_cache_idx * sizeof (struct bidi_it));
 719           memcpy (bidi_cache_start_stack,
 720                   p + sizeof (bidi_cache_idx)
 721                   + bidi_cache_idx * sizeof (struct bidi_it),
 722                   sizeof (bidi_cache_start_stack));
 723           memcpy (&bidi_cache_sp,
 724                   p + sizeof (bidi_cache_idx)
 725                   + bidi_cache_idx * sizeof (struct bidi_it)
 726                   + sizeof (bidi_cache_start_stack),
 727                   sizeof (bidi_cache_sp));
 728           memcpy (&bidi_cache_start,
 729                   p + sizeof (bidi_cache_idx)
 730                   + bidi_cache_idx * sizeof (struct bidi_it)
 731                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
 732                   sizeof (bidi_cache_start));
 733           memcpy (&bidi_cache_last_idx,
 734                   p + sizeof (bidi_cache_idx)
 735                   + bidi_cache_idx * sizeof (struct bidi_it)
 736                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 737                   + sizeof (bidi_cache_start),
 738                   sizeof (bidi_cache_last_idx));
 739           bidi_cache_total_alloc -=
 740             sizeof (bidi_cache_idx) + bidi_cache_idx * sizeof (struct bidi_it)
 741             + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 742             + sizeof (bidi_cache_start) + sizeof (bidi_cache_last_idx);
 743         }
 744
 745       xfree (p);
 746     }
 747 }
 748
 749 \f
 750 /***********************************************************************
 751                         Initialization
 752  ***********************************************************************/
 753 static void
 754 bidi_initialize (void)
 755 {
 756   bidi_type_table = uniprop_table (intern ("bidi-class"));
 757   if (NILP (bidi_type_table))
 758     abort ();
 759   staticpro (&bidi_type_table);
 760
 761   bidi_mirror_table = uniprop_table (intern ("mirroring"));
 762   if (NILP (bidi_mirror_table))
 763     abort ();
 764   staticpro (&bidi_mirror_table);
 765
 766   Qparagraph_start = intern ("paragraph-start");
 767   staticpro (&Qparagraph_start);
 768   paragraph_start_re = Fsymbol_value (Qparagraph_start);
 769   if (!STRINGP (paragraph_start_re))
 770     paragraph_start_re = build_string ("\f\\|[ \t]*$");
 771   staticpro (&paragraph_start_re);
 772   Qparagraph_separate = intern ("paragraph-separate");
 773   staticpro (&Qparagraph_separate);
 774   paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
 775   if (!STRINGP (paragraph_separate_re))
 776     paragraph_separate_re = build_string ("[ \t\f]*$");
 777   staticpro (&paragraph_separate_re);
 778
 779   bidi_cache_sp = 0;
 780   bidi_cache_total_alloc = 0;
 781
 782   bidi_initialized = 1;
 783 }
 784
 785 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
 786    end.  */
 787 static inline void
 788 bidi_set_paragraph_end (struct bidi_it *bidi_it)
 789 {
 790   bidi_it->invalid_levels = 0;
 791   bidi_it->invalid_rl_levels = -1;
 792   bidi_it->stack_idx = 0;
 793   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 794 }
 795
 796 /* Initialize the bidi iterator from buffer/string position CHARPOS.  */
 797 void
 798 bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p,
 799               struct bidi_it *bidi_it)
 800 {
 801   if (! bidi_initialized)
 802     bidi_initialize ();
 803   if (charpos >= 0)
 804     bidi_it->charpos = charpos;
 805   if (bytepos >= 0)
 806     bidi_it->bytepos = bytepos;
 807   bidi_it->frame_window_p = frame_window_p;
 808   bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
 809   bidi_it->first_elt = 1;
 810   bidi_set_paragraph_end (bidi_it);
 811   bidi_it->new_paragraph = 1;
 812   bidi_it->separator_limit = -1;
 813   bidi_it->type = NEUTRAL_B;
 814   bidi_it->type_after_w1 = NEUTRAL_B;
 815   bidi_it->orig_type = NEUTRAL_B;
 816   bidi_it->prev_was_pdf = 0;
 817   bidi_it->prev.type = bidi_it->prev.type_after_w1 =
 818     bidi_it->prev.orig_type = UNKNOWN_BT;
 819   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
 820     bidi_it->last_strong.orig_type = UNKNOWN_BT;
 821   bidi_it->next_for_neutral.charpos = -1;
 822   bidi_it->next_for_neutral.type =
 823     bidi_it->next_for_neutral.type_after_w1 =
 824     bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 825   bidi_it->prev_for_neutral.charpos = -1;
 826   bidi_it->prev_for_neutral.type =
 827     bidi_it->prev_for_neutral.type_after_w1 =
 828     bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
 829   bidi_it->sor = L2R;    /* FIXME: should it be user-selectable? */
 830   bidi_it->disp_pos = -1;       /* invalid/unknown */
 831   bidi_it->disp_prop = 0;
 832   /* We can only shrink the cache if we are at the bottom level of its
 833      "stack".  */
 834   if (bidi_cache_start == 0)
 835     bidi_cache_shrink ();
 836   else
 837     bidi_cache_reset ();
 838 }
 839
 840 /* Perform initializations for reordering a new line of bidi text.  */
 841 static void
 842 bidi_line_init (struct bidi_it *bidi_it)
 843 {
 844   bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
 845   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 846   bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
 847   bidi_it->invalid_levels = 0;
 848   bidi_it->invalid_rl_levels = -1;
 849   bidi_it->next_en_pos = -1;
 850   bidi_it->next_for_ws.type = UNKNOWN_BT;
 851   bidi_set_sor_type (bidi_it,
 852                      bidi_it->paragraph_dir == R2L ? 1 : 0,
 853                      bidi_it->level_stack[0].level); /* X10 */
 854
 855   bidi_cache_reset ();
 856 }
 857
 858 \f
 859 /***********************************************************************
 860                         Fetching characters
 861  ***********************************************************************/
 862
 863 /* Count bytes in string S between BEG/BEGBYTE and END.  BEG and END
 864    are zero-based character positions in S, BEGBYTE is byte position
 865    corresponding to BEG.  UNIBYTE, if non-zero, means S is a unibyte
 866    string.  */
 867 static inline EMACS_INT
 868 bidi_count_bytes (const unsigned char *s, const EMACS_INT beg,
 869                   const EMACS_INT begbyte, const EMACS_INT end, int unibyte)
 870 {
 871   EMACS_INT pos = beg;
 872   const unsigned char *p = s + begbyte, *start = p;
 873
 874   if (unibyte)
 875     p = s + end;
 876   else
 877     {
 878       if (!CHAR_HEAD_P (*p))
 879         abort ();
 880
 881       while (pos < end)
 882         {
 883           p += BYTES_BY_CHAR_HEAD (*p);
 884           pos++;
 885         }
 886     }
 887
 888   return p - start;
 889 }
 890
 891 /* Fetch and returns the character at byte position BYTEPOS.  If S is
 892    non-NULL, fetch the character from string S; otherwise fetch the
 893    character from the current buffer.  UNIBYTE non-zero means S is a
 894    unibyte string.  */
 895 static inline int
 896 bidi_char_at_pos (EMACS_INT bytepos, const unsigned char *s, int unibyte)
 897 {
 898   if (s)
 899     {
 900       if (unibyte)
 901         return s[bytepos];
 902       else
 903         return STRING_CHAR (s + bytepos);
 904     }
 905   else
 906     return FETCH_MULTIBYTE_CHAR (bytepos);
 907 }
 908
 909 /* Fetch and return the character at BYTEPOS/CHARPOS.  If that
 910    character is covered by a display string, treat the entire run of
 911    covered characters as a single character, either u+2029 or u+FFFC,
 912    and return their combined length in CH_LEN and NCHARS.  DISP_POS
 913    specifies the character position of the next display string, or -1
 914    if not yet computed.  DISP_PROP non-zero means that there's really
 915    a display string at DISP_POS, as opposed to when we searched till
 916    DISP_POS without finding one.  If DISP_PROP is 2, it means the
 917    display spec is of the form `(space ...)', which is replaced with
 918    u+2029 to handle it as a paragraph separator.  When the next
 919    character is at or beyond that position, the function updates
 920    DISP_POS with the position of the next display string.  STRING->s
 921    is the C string to iterate, or NULL if iterating over a buffer or a
 922    Lisp string; in the latter case, STRING->lstring is the Lisp
 923    string.  */
 924 static inline int
 925 bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
 926                  int *disp_prop, struct bidi_string_data *string,
 927                  int frame_window_p, EMACS_INT *ch_len, EMACS_INT *nchars)
 928 {
 929   int ch;
 930   EMACS_INT endpos =
 931     (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
 932   struct text_pos pos;
 933
 934   /* If we got past the last known position of display string, compute
 935      the position of the next one.  That position could be at CHARPOS.  */
 936   if (charpos < endpos && charpos > *disp_pos)
 937     {
 938       SET_TEXT_POS (pos, charpos, bytepos);
 939       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
 940                                               disp_prop);
 941     }
 942
 943   /* Fetch the character at BYTEPOS.  */
 944   if (charpos >= endpos)
 945     {
 946       ch = BIDI_EOB;
 947       *ch_len = 1;
 948       *nchars = 1;
 949       *disp_pos = endpos;
 950       *disp_prop = 0;
 951     }
 952   else if (charpos >= *disp_pos && *disp_prop)
 953     {
 954       EMACS_INT disp_end_pos;
 955
 956       /* We don't expect to find ourselves in the middle of a display
 957          property.  Hopefully, it will never be needed.  */
 958       if (charpos > *disp_pos)
 959         abort ();
 960       /* Text covered by `display' properties and overlays with
 961          display properties or display strings is handled as a single
 962          character that represents the entire run of characters
 963          covered by the display property.  */
 964       if (*disp_prop == 2)
 965         {
 966           /* `(space ...)' display specs are handled as paragraph
 967              separators for the purposes of the reordering; see UAX#9
 968              section 3 and clause HL1 in section 4.3 there.  */
 969           ch = 0x2029;
 970         }
 971       else
 972         {
 973           /* All other display specs are handled as the Unicode Object
 974              Replacement Character.  */
 975           ch = 0xFFFC;
 976         }
 977       disp_end_pos = compute_display_string_end (*disp_pos, string);
 978       *nchars = disp_end_pos - *disp_pos;
 979       if (*nchars <= 0)
 980         abort ();
 981       if (string->s)
 982         *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
 983                                     disp_end_pos, string->unibyte);
 984       else if (STRINGP (string->lstring))
 985         *ch_len = bidi_count_bytes (SDATA (string->lstring), *disp_pos,
 986                                     bytepos, disp_end_pos, string->unibyte);
 987       else
 988         *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
 989     }
 990   else
 991     {
 992       if (string->s)
 993         {
 994           int len;
 995
 996           if (!string->unibyte)
 997             {
 998               ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
 999               *ch_len = len;
1000             }
1001           else
1002             {
1003               ch = UNIBYTE_TO_CHAR (string->s[bytepos]);
1004               *ch_len = 1;
1005             }
1006         }
1007       else if (STRINGP (string->lstring))
1008         {
1009           int len;
1010
1011           if (!string->unibyte)
1012             {
1013               ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos,
1014                                            len);
1015               *ch_len = len;
1016             }
1017           else
1018             {
1019               ch = UNIBYTE_TO_CHAR (SREF (string->lstring, bytepos));
1020               *ch_len = 1;
1021             }
1022         }
1023       else
1024         {
1025           ch = FETCH_MULTIBYTE_CHAR (bytepos);
1026           *ch_len = CHAR_BYTES (ch);
1027         }
1028       *nchars = 1;
1029     }
1030
1031   /* If we just entered a run of characters covered by a display
1032      string, compute the position of the next display string.  */
1033   if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos
1034       && *disp_prop)
1035     {
1036       SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
1037       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
1038                                               disp_prop);
1039     }
1040
1041   return ch;
1042 }
1043
1044 \f
1045 /***********************************************************************
1046                         Determining paragraph direction
1047  ***********************************************************************/
1048
1049 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
1050    Value is the non-negative length of the paragraph separator
1051    following the buffer position, -1 if position is at the beginning
1052    of a new paragraph, or -2 if position is neither at beginning nor
1053    at end of a paragraph.  */
1054 static EMACS_INT
1055 bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
1056 {
1057   Lisp_Object sep_re;
1058   Lisp_Object start_re;
1059   EMACS_INT val;
1060
1061   sep_re = paragraph_separate_re;
1062   start_re = paragraph_start_re;
1063
1064   val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
1065   if (val < 0)
1066     {
1067       if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
1068         val = -1;
1069       else
1070         val = -2;
1071     }
1072
1073   return val;
1074 }
1075
1076 /* Find the beginning of this paragraph by looking back in the buffer.
1077    Value is the byte position of the paragraph's beginning.  */
1078 static EMACS_INT
1079 bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
1080 {
1081   Lisp_Object re = paragraph_start_re;
1082   EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
1083
1084   while (pos_byte > BEGV_BYTE
1085          && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
1086     {
1087       /* FIXME: What if the paragraph beginning is covered by a
1088          display string?  And what if a display string covering some
1089          of the text over which we scan back includes
1090          paragraph_start_re?  */
1091       pos = find_next_newline_no_quit (pos - 1, -1);
1092       pos_byte = CHAR_TO_BYTE (pos);
1093     }
1094   return pos_byte;
1095 }
1096
1097 /* Determine the base direction, a.k.a. base embedding level, of the
1098    paragraph we are about to iterate through.  If DIR is either L2R or
1099    R2L, just use that.  Otherwise, determine the paragraph direction
1100    from the first strong directional character of the paragraph.
1101
1102    NO_DEFAULT_P non-zero means don't default to L2R if the paragraph
1103    has no strong directional characters and both DIR and
1104    bidi_it->paragraph_dir are NEUTRAL_DIR.  In that case, search back
1105    in the buffer until a paragraph is found with a strong character,
1106    or until hitting BEGV.  In the latter case, fall back to L2R.  This
1107    flag is used in current-bidi-paragraph-direction.
1108
1109    Note that this function gives the paragraph separator the same
1110    direction as the preceding paragraph, even though Emacs generally
1111    views the separartor as not belonging to any paragraph.  */
1112 void
1113 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
1114 {
1115   EMACS_INT bytepos = bidi_it->bytepos;
1116   int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
1117   EMACS_INT pstartbyte;
1118   /* Note that begbyte is a byte position, while end is a character
1119      position.  Yes, this is ugly, but we are trying to avoid costly
1120      calls to BYTE_TO_CHAR and its ilk.  */
1121   EMACS_INT begbyte = string_p ? 0 : BEGV_BYTE;
1122   EMACS_INT end = string_p ? bidi_it->string.schars : ZV;
1123
1124   /* Special case for an empty buffer. */
1125   if (bytepos == begbyte && bidi_it->charpos == end)
1126     dir = L2R;
1127   /* We should never be called at EOB or before BEGV.  */
1128   else if (bidi_it->charpos >= end || bytepos < begbyte)
1129     abort ();
1130
1131   if (dir == L2R)
1132     {
1133       bidi_it->paragraph_dir = L2R;
1134       bidi_it->new_paragraph = 0;
1135     }
1136   else if (dir == R2L)
1137     {
1138       bidi_it->paragraph_dir = R2L;
1139       bidi_it->new_paragraph = 0;
1140     }
1141   else if (dir == NEUTRAL_DIR)  /* P2 */
1142     {
1143       int ch;
1144       EMACS_INT ch_len, nchars;
1145       EMACS_INT pos, disp_pos = -1;
1146       int disp_prop = 0;
1147       bidi_type_t type;
1148       const unsigned char *s;
1149
1150       if (!bidi_initialized)
1151         bidi_initialize ();
1152
1153       /* If we are inside a paragraph separator, we are just waiting
1154          for the separator to be exhausted; use the previous paragraph
1155          direction.  But don't do that if we have been just reseated,
1156          because we need to reinitialize below in that case.  */
1157       if (!bidi_it->first_elt
1158           && bidi_it->charpos < bidi_it->separator_limit)
1159         return;
1160
1161       /* If we are on a newline, get past it to where the next
1162          paragraph might start.  But don't do that at BEGV since then
1163          we are potentially in a new paragraph that doesn't yet
1164          exist.  */
1165       pos = bidi_it->charpos;
1166       s = STRINGP (bidi_it->string.lstring) ?
1167         SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1168       if (bytepos > begbyte
1169           && bidi_char_at_pos (bytepos, s, bidi_it->string.unibyte) == '\n')
1170         {
1171           bytepos++;
1172           pos++;
1173         }
1174
1175       /* We are either at the beginning of a paragraph or in the
1176          middle of it.  Find where this paragraph starts.  */
1177       if (string_p)
1178         {
1179           /* We don't support changes of paragraph direction inside a
1180              string.  It is treated as a single paragraph.  */
1181           pstartbyte = 0;
1182         }
1183       else
1184         pstartbyte = bidi_find_paragraph_start (pos, bytepos);
1185       bidi_it->separator_limit = -1;
1186       bidi_it->new_paragraph = 0;
1187
1188       /* The following loop is run more than once only if NO_DEFAULT_P
1189          is non-zero, and only if we are iterating on a buffer.  */
1190       do {
1191         bytepos = pstartbyte;
1192         if (!string_p)
1193           pos = BYTE_TO_CHAR (bytepos);
1194         ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop,
1195                               &bidi_it->string,
1196                               bidi_it->frame_window_p, &ch_len, &nchars);
1197         type = bidi_get_type (ch, NEUTRAL_DIR);
1198
1199         for (pos += nchars, bytepos += ch_len;
1200              (bidi_get_category (type) != STRONG)
1201                || (bidi_ignore_explicit_marks_for_paragraph_level
1202                    && (type == RLE || type == RLO
1203                        || type == LRE || type == LRO));
1204              type = bidi_get_type (ch, NEUTRAL_DIR))
1205           {
1206             if (pos >= end)
1207               {
1208                 /* Pretend there's a paragraph separator at end of
1209                    buffer/string.  */
1210                 type = NEUTRAL_B;
1211                 break;
1212               }
1213             if (!string_p
1214                 && type == NEUTRAL_B
1215                 && bidi_at_paragraph_end (pos, bytepos) >= -1)
1216               break;
1217             /* Fetch next character and advance to get past it.  */
1218             ch = bidi_fetch_char (bytepos, pos, &disp_pos,
1219                                   &disp_prop, &bidi_it->string,
1220                                   bidi_it->frame_window_p, &ch_len, &nchars);
1221             pos += nchars;
1222             bytepos += ch_len;
1223           }
1224         if ((type == STRONG_R || type == STRONG_AL) /* P3 */
1225             || (!bidi_ignore_explicit_marks_for_paragraph_level
1226                 && (type == RLO || type == RLE)))
1227           bidi_it->paragraph_dir = R2L;
1228         else if (type == STRONG_L
1229                  || (!bidi_ignore_explicit_marks_for_paragraph_level
1230                      && (type == LRO || type == LRE)))
1231           bidi_it->paragraph_dir = L2R;
1232         if (!string_p
1233             && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
1234           {
1235             /* If this paragraph is at BEGV, default to L2R.  */
1236             if (pstartbyte == BEGV_BYTE)
1237               bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
1238             else
1239               {
1240                 EMACS_INT prevpbyte = pstartbyte;
1241                 EMACS_INT p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
1242
1243                 /* Find the beginning of the previous paragraph, if any.  */
1244                 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
1245                   {
1246                     /* FXIME: What if p is covered by a display
1247                        string?  See also a FIXME inside
1248                        bidi_find_paragraph_start.  */
1249                     p--;
1250                     pbyte = CHAR_TO_BYTE (p);
1251                     prevpbyte = bidi_find_paragraph_start (p, pbyte);
1252                   }
1253                 pstartbyte = prevpbyte;
1254               }
1255           }
1256       } while (!string_p
1257                && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
1258     }
1259   else
1260     abort ();
1261
1262   /* Contrary to UAX#9 clause P3, we only default the paragraph
1263      direction to L2R if we have no previous usable paragraph
1264      direction.  This is allowed by the HL1 clause.  */
1265   if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
1266     bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
1267   if (bidi_it->paragraph_dir == R2L)
1268     bidi_it->level_stack[0].level = 1;
1269   else
1270     bidi_it->level_stack[0].level = 0;
1271
1272   bidi_line_init (bidi_it);
1273 }
1274
1275 \f
1276 /***********************************************************************
1277                  Resolving explicit and implicit levels.
1278   The rest of this file constitutes the core of the UBA implementation.
1279  ***********************************************************************/
1280
1281 static inline int
1282 bidi_explicit_dir_char (int ch)
1283 {
1284   bidi_type_t ch_type;
1285
1286   if (!bidi_initialized)
1287     abort ();
1288   ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
1289   return (ch_type == LRE || ch_type == LRO
1290           || ch_type == RLE || ch_type == RLO
1291           || ch_type == PDF);
1292 }
1293
1294 /* A helper function for bidi_resolve_explicit.  It advances to the
1295    next character in logical order and determines the new embedding
1296    level and directional override, but does not take into account
1297    empty embeddings.  */
1298 static int
1299 bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1300 {
1301   int curchar;
1302   bidi_type_t type;
1303   int current_level;
1304   int new_level;
1305   bidi_dir_t override;
1306   int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
1307
1308   /* If reseat()'ed, don't advance, so as to start iteration from the
1309      position where we were reseated.  bidi_it->bytepos can be less
1310      than BEGV_BYTE after reseat to BEGV.  */
1311   if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE)
1312       || bidi_it->first_elt)
1313     {
1314       bidi_it->first_elt = 0;
1315       if (string_p)
1316         {
1317           const unsigned char *p =
1318             STRINGP (bidi_it->string.lstring)
1319             ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1320
1321           if (bidi_it->charpos < 0)
1322             bidi_it->charpos = 0;
1323           bidi_it->bytepos = bidi_count_bytes (p, 0, 0, bidi_it->charpos,
1324                                                bidi_it->string.unibyte);
1325         }
1326       else
1327         {
1328           if (bidi_it->charpos < BEGV)
1329             bidi_it->charpos = BEGV;
1330           bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
1331         }
1332     }
1333   /* Don't move at end of buffer/string.  */
1334   else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
1335     {
1336       /* Advance to the next character, skipping characters covered by
1337          display strings (nchars > 1).  */
1338       if (bidi_it->nchars <= 0)
1339         abort ();
1340       bidi_it->charpos += bidi_it->nchars;
1341       if (bidi_it->ch_len == 0)
1342         abort ();
1343       bidi_it->bytepos += bidi_it->ch_len;
1344     }
1345
1346   current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
1347   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1348   new_level = current_level;
1349
1350   if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
1351     {
1352       curchar = BIDI_EOB;
1353       bidi_it->ch_len = 1;
1354       bidi_it->nchars = 1;
1355       bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
1356       bidi_it->disp_prop = 0;
1357     }
1358   else
1359     {
1360       /* Fetch the character at BYTEPOS.  If it is covered by a
1361          display string, treat the entire run of covered characters as
1362          a single character u+FFFC.  */
1363       curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
1364                                  &bidi_it->disp_pos, &bidi_it->disp_prop,
1365                                  &bidi_it->string, bidi_it->frame_window_p,
1366                                  &bidi_it->ch_len, &bidi_it->nchars);
1367     }
1368   bidi_it->ch = curchar;
1369
1370   /* Don't apply directional override here, as all the types we handle
1371      below will not be affected by the override anyway, and we need
1372      the original type unaltered.  The override will be applied in
1373      bidi_resolve_weak.  */
1374   type = bidi_get_type (curchar, NEUTRAL_DIR);
1375   bidi_it->orig_type = type;
1376   bidi_check_type (bidi_it->orig_type);
1377
1378   if (type != PDF)
1379     bidi_it->prev_was_pdf = 0;
1380
1381   bidi_it->type_after_w1 = UNKNOWN_BT;
1382
1383   switch (type)
1384     {
1385       case RLE: /* X2 */
1386       case RLO: /* X4 */
1387         bidi_it->type_after_w1 = type;
1388         bidi_check_type (bidi_it->type_after_w1);
1389         type = WEAK_BN; /* X9/Retaining */
1390         if (bidi_it->ignore_bn_limit <= -1)
1391           {
1392             if (current_level <= BIDI_MAXLEVEL - 4)
1393               {
1394                 /* Compute the least odd embedding level greater than
1395                    the current level.  */
1396                 new_level = ((current_level + 1) & ~1) + 1;
1397                 if (bidi_it->type_after_w1 == RLE)
1398                   override = NEUTRAL_DIR;
1399                 else
1400                   override = R2L;
1401                 if (current_level == BIDI_MAXLEVEL - 4)
1402                   bidi_it->invalid_rl_levels = 0;
1403                 bidi_push_embedding_level (bidi_it, new_level, override);
1404               }
1405             else
1406               {
1407                 bidi_it->invalid_levels++;
1408                 /* See the commentary about invalid_rl_levels below.  */
1409                 if (bidi_it->invalid_rl_levels < 0)
1410                   bidi_it->invalid_rl_levels = 0;
1411                 bidi_it->invalid_rl_levels++;
1412               }
1413           }
1414         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1415                  || bidi_it->next_en_pos > bidi_it->charpos)
1416           type = WEAK_EN;
1417         break;
1418       case LRE: /* X3 */
1419       case LRO: /* X5 */
1420         bidi_it->type_after_w1 = type;
1421         bidi_check_type (bidi_it->type_after_w1);
1422         type = WEAK_BN; /* X9/Retaining */
1423         if (bidi_it->ignore_bn_limit <= -1)
1424           {
1425             if (current_level <= BIDI_MAXLEVEL - 5)
1426               {
1427                 /* Compute the least even embedding level greater than
1428                    the current level.  */
1429                 new_level = ((current_level + 2) & ~1);
1430                 if (bidi_it->type_after_w1 == LRE)
1431                   override = NEUTRAL_DIR;
1432                 else
1433                   override = L2R;
1434                 bidi_push_embedding_level (bidi_it, new_level, override);
1435               }
1436             else
1437               {
1438                 bidi_it->invalid_levels++;
1439                 /* invalid_rl_levels counts invalid levels encountered
1440                    while the embedding level was already too high for
1441                    LRE/LRO, but not for RLE/RLO.  That is because
1442                    there may be exactly one PDF which we should not
1443                    ignore even though invalid_levels is non-zero.
1444                    invalid_rl_levels helps to know what PDF is
1445                    that.  */
1446                 if (bidi_it->invalid_rl_levels >= 0)
1447                   bidi_it->invalid_rl_levels++;
1448               }
1449           }
1450         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1451                  || bidi_it->next_en_pos > bidi_it->charpos)
1452           type = WEAK_EN;
1453         break;
1454       case PDF: /* X7 */
1455         bidi_it->type_after_w1 = type;
1456         bidi_check_type (bidi_it->type_after_w1);
1457         type = WEAK_BN; /* X9/Retaining */
1458         if (bidi_it->ignore_bn_limit <= -1)
1459           {
1460             if (!bidi_it->invalid_rl_levels)
1461               {
1462                 new_level = bidi_pop_embedding_level (bidi_it);
1463                 bidi_it->invalid_rl_levels = -1;
1464                 if (bidi_it->invalid_levels)
1465                   bidi_it->invalid_levels--;
1466                 /* else nothing: UAX#9 says to ignore invalid PDFs */
1467               }
1468             if (!bidi_it->invalid_levels)
1469               new_level = bidi_pop_embedding_level (bidi_it);
1470             else
1471               {
1472                 bidi_it->invalid_levels--;
1473                 bidi_it->invalid_rl_levels--;
1474               }
1475           }
1476         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1477                  || bidi_it->next_en_pos > bidi_it->charpos)
1478           type = WEAK_EN;
1479         break;
1480       default:
1481         /* Nothing.  */
1482         break;
1483     }
1484
1485   bidi_it->type = type;
1486   bidi_check_type (bidi_it->type);
1487
1488   return new_level;
1489 }
1490
1491 /* Given an iterator state in BIDI_IT, advance one character position
1492    in the buffer/string to the next character (in the logical order),
1493    resolve any explicit embeddings and directional overrides, and
1494    return the embedding level of the character after resolving
1495    explicit directives and ignoring empty embeddings.  */
1496 static int
1497 bidi_resolve_explicit (struct bidi_it *bidi_it)
1498 {
1499   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1500   int new_level  = bidi_resolve_explicit_1 (bidi_it);
1501   EMACS_INT eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
1502   const unsigned char *s = STRINGP (bidi_it->string.lstring)
1503     ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1504
1505   if (prev_level < new_level
1506       && bidi_it->type == WEAK_BN
1507       && bidi_it->ignore_bn_limit == -1 /* only if not already known */
1508       && bidi_it->charpos < eob         /* not already at EOB */
1509       && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1510                                                    + bidi_it->ch_len, s,
1511                                                    bidi_it->string.unibyte)))
1512     {
1513       /* Avoid pushing and popping embedding levels if the level run
1514          is empty, as this breaks level runs where it shouldn't.
1515          UAX#9 removes all the explicit embedding and override codes,
1516          so empty embeddings disappear without a trace.  We need to
1517          behave as if we did the same.  */
1518       struct bidi_it saved_it;
1519       int level = prev_level;
1520
1521       bidi_copy_it (&saved_it, bidi_it);
1522
1523       while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1524                                                        + bidi_it->ch_len, s,
1525                                                        bidi_it->string.unibyte)))
1526         {
1527           /* This advances to the next character, skipping any
1528              characters covered by display strings.  */
1529           level = bidi_resolve_explicit_1 (bidi_it);
1530           /* If string.lstring was relocated inside bidi_resolve_explicit_1,
1531              a pointer to its data is no longer valid.  */
1532           if (STRINGP (bidi_it->string.lstring))
1533             s = SDATA (bidi_it->string.lstring);
1534         }
1535
1536       if (bidi_it->nchars <= 0)
1537         abort ();
1538       if (level == prev_level)  /* empty embedding */
1539         saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
1540       else                      /* this embedding is non-empty */
1541         saved_it.ignore_bn_limit = -2;
1542
1543       bidi_copy_it (bidi_it, &saved_it);
1544       if (bidi_it->ignore_bn_limit > -1)
1545         {
1546           /* We pushed a level, but we shouldn't have.  Undo that. */
1547           if (!bidi_it->invalid_rl_levels)
1548             {
1549               new_level = bidi_pop_embedding_level (bidi_it);
1550               bidi_it->invalid_rl_levels = -1;
1551               if (bidi_it->invalid_levels)
1552                 bidi_it->invalid_levels--;
1553             }
1554           if (!bidi_it->invalid_levels)
1555             new_level = bidi_pop_embedding_level (bidi_it);
1556           else
1557             {
1558               bidi_it->invalid_levels--;
1559               bidi_it->invalid_rl_levels--;
1560             }
1561         }
1562     }
1563
1564   if (bidi_it->type == NEUTRAL_B)       /* X8 */
1565     {
1566       bidi_set_paragraph_end (bidi_it);
1567       /* This is needed by bidi_resolve_weak below, and in L1.  */
1568       bidi_it->type_after_w1 = bidi_it->type;
1569       bidi_check_type (bidi_it->type_after_w1);
1570     }
1571
1572   return new_level;
1573 }
1574
1575 /* Advance in the buffer/string, resolve weak types and return the
1576    type of the next character after weak type resolution.  */
1577 static bidi_type_t
1578 bidi_resolve_weak (struct bidi_it *bidi_it)
1579 {
1580   bidi_type_t type;
1581   bidi_dir_t override;
1582   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1583   int new_level  = bidi_resolve_explicit (bidi_it);
1584   int next_char;
1585   bidi_type_t type_of_next;
1586   struct bidi_it saved_it;
1587   EMACS_INT eob =
1588     (STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
1589     ? bidi_it->string.schars : ZV;
1590
1591   type = bidi_it->type;
1592   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1593
1594   if (type == UNKNOWN_BT
1595       || type == LRE
1596       || type == LRO
1597       || type == RLE
1598       || type == RLO
1599       || type == PDF)
1600     abort ();
1601
1602   if (new_level != prev_level
1603       || bidi_it->type == NEUTRAL_B)
1604     {
1605       /* We've got a new embedding level run, compute the directional
1606          type of sor and initialize per-run variables (UAX#9, clause
1607          X10).  */
1608       bidi_set_sor_type (bidi_it, prev_level, new_level);
1609     }
1610   else if (type == NEUTRAL_S || type == NEUTRAL_WS
1611            || type == WEAK_BN || type == STRONG_AL)
1612     bidi_it->type_after_w1 = type;      /* needed in L1 */
1613   bidi_check_type (bidi_it->type_after_w1);
1614
1615   /* Level and directional override status are already recorded in
1616      bidi_it, and do not need any change; see X6.  */
1617   if (override == R2L)          /* X6 */
1618     type = STRONG_R;
1619   else if (override == L2R)
1620     type = STRONG_L;
1621   else
1622     {
1623       if (type == WEAK_NSM)     /* W1 */
1624         {
1625           /* Note that we don't need to consider the case where the
1626              prev character has its type overridden by an RLO or LRO,
1627              because then either the type of this NSM would have been
1628              also overridden, or the previous character is outside the
1629              current level run, and thus not relevant to this NSM.
1630              This is why NSM gets the type_after_w1 of the previous
1631              character.  */
1632           if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
1633               /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1634               && bidi_it->prev.type_after_w1 != NEUTRAL_B)
1635             type = bidi_it->prev.type_after_w1;
1636           else if (bidi_it->sor == R2L)
1637             type = STRONG_R;
1638           else if (bidi_it->sor == L2R)
1639             type = STRONG_L;
1640           else /* shouldn't happen! */
1641             abort ();
1642         }
1643       if (type == WEAK_EN       /* W2 */
1644           && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1645         type = WEAK_AN;
1646       else if (type == STRONG_AL) /* W3 */
1647         type = STRONG_R;
1648       else if ((type == WEAK_ES /* W4 */
1649                 && bidi_it->prev.type_after_w1 == WEAK_EN
1650                 && bidi_it->prev.orig_type == WEAK_EN)
1651                || (type == WEAK_CS
1652                    && ((bidi_it->prev.type_after_w1 == WEAK_EN
1653                         && bidi_it->prev.orig_type == WEAK_EN)
1654                        || bidi_it->prev.type_after_w1 == WEAK_AN)))
1655         {
1656           const unsigned char *s =
1657             STRINGP (bidi_it->string.lstring)
1658             ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1659
1660           next_char =
1661             bidi_it->charpos + bidi_it->nchars >= eob
1662             ? BIDI_EOB
1663             : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
1664                                 bidi_it->string.unibyte);
1665           type_of_next = bidi_get_type (next_char, override);
1666
1667           if (type_of_next == WEAK_BN
1668               || bidi_explicit_dir_char (next_char))
1669             {
1670               bidi_copy_it (&saved_it, bidi_it);
1671               while (bidi_resolve_explicit (bidi_it) == new_level
1672                      && bidi_it->type == WEAK_BN)
1673                 ;
1674               type_of_next = bidi_it->type;
1675               bidi_copy_it (bidi_it, &saved_it);
1676             }
1677
1678           /* If the next character is EN, but the last strong-type
1679              character is AL, that next EN will be changed to AN when
1680              we process it in W2 above.  So in that case, this ES
1681              should not be changed into EN.  */
1682           if (type == WEAK_ES
1683               && type_of_next == WEAK_EN
1684               && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1685             type = WEAK_EN;
1686           else if (type == WEAK_CS)
1687             {
1688               if (bidi_it->prev.type_after_w1 == WEAK_AN
1689                   && (type_of_next == WEAK_AN
1690                       /* If the next character is EN, but the last
1691                          strong-type character is AL, EN will be later
1692                          changed to AN when we process it in W2 above.
1693                          So in that case, this ES should not be
1694                          changed into EN.  */
1695                       || (type_of_next == WEAK_EN
1696                           && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1697                 type = WEAK_AN;
1698               else if (bidi_it->prev.type_after_w1 == WEAK_EN
1699                        && type_of_next == WEAK_EN
1700                        && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1701                 type = WEAK_EN;
1702             }
1703         }
1704       else if (type == WEAK_ET  /* W5: ET with EN before or after it */
1705                || type == WEAK_BN)      /* W5/Retaining */
1706         {
1707           if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */
1708               || bidi_it->next_en_pos > bidi_it->charpos)
1709             type = WEAK_EN;
1710           else                  /* W5: ET/BN with EN after it.  */
1711             {
1712               EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars;
1713               const unsigned char *s =
1714                 STRINGP (bidi_it->string.lstring)
1715                 ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1716
1717               if (bidi_it->nchars <= 0)
1718                 abort ();
1719               next_char =
1720                 bidi_it->charpos + bidi_it->nchars >= eob
1721                 ? BIDI_EOB
1722                 : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
1723                                     bidi_it->string.unibyte);
1724               type_of_next = bidi_get_type (next_char, override);
1725
1726               if (type_of_next == WEAK_ET
1727                   || type_of_next == WEAK_BN
1728                   || bidi_explicit_dir_char (next_char))
1729                 {
1730                   bidi_copy_it (&saved_it, bidi_it);
1731                   while (bidi_resolve_explicit (bidi_it) == new_level
1732                          && (bidi_it->type == WEAK_BN
1733                              || bidi_it->type == WEAK_ET))
1734                     ;
1735                   type_of_next = bidi_it->type;
1736                   en_pos = bidi_it->charpos;
1737                   bidi_copy_it (bidi_it, &saved_it);
1738                 }
1739               if (type_of_next == WEAK_EN)
1740                 {
1741                   /* If the last strong character is AL, the EN we've
1742                      found will become AN when we get to it (W2). */
1743                   if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
1744                     {
1745                       type = WEAK_EN;
1746                       /* Remember this EN position, to speed up processing
1747                          of the next ETs.  */
1748                       bidi_it->next_en_pos = en_pos;
1749                     }
1750                   else if (type == WEAK_BN)
1751                     type = NEUTRAL_ON; /* W6/Retaining */
1752                 }
1753             }
1754         }
1755     }
1756
1757   if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
1758       || (type == WEAK_BN
1759           && (bidi_it->prev.type_after_w1 == WEAK_CS        /* W6/Retaining */
1760               || bidi_it->prev.type_after_w1 == WEAK_ES
1761               || bidi_it->prev.type_after_w1 == WEAK_ET)))
1762     type = NEUTRAL_ON;
1763
1764   /* Store the type we've got so far, before we clobber it with strong
1765      types in W7 and while resolving neutral types.  But leave alone
1766      the original types that were recorded above, because we will need
1767      them for the L1 clause.  */
1768   if (bidi_it->type_after_w1 == UNKNOWN_BT)
1769     bidi_it->type_after_w1 = type;
1770   bidi_check_type (bidi_it->type_after_w1);
1771
1772   if (type == WEAK_EN)  /* W7 */
1773     {
1774       if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
1775           || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1776         type = STRONG_L;
1777     }
1778
1779   bidi_it->type = type;
1780   bidi_check_type (bidi_it->type);
1781   return type;
1782 }
1783
1784 /* Resolve the type of a neutral character according to the type of
1785    surrounding strong text and the current embedding level.  */
1786 static inline bidi_type_t
1787 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
1788 {
1789   /* N1: European and Arabic numbers are treated as though they were R.  */
1790   if (next_type == WEAK_EN || next_type == WEAK_AN)
1791     next_type = STRONG_R;
1792   if (prev_type == WEAK_EN || prev_type == WEAK_AN)
1793     prev_type = STRONG_R;
1794
1795   if (next_type == prev_type)   /* N1 */
1796     return next_type;
1797   else if ((lev & 1) == 0)      /* N2 */
1798     return STRONG_L;
1799   else
1800     return STRONG_R;
1801 }
1802
1803 static bidi_type_t
1804 bidi_resolve_neutral (struct bidi_it *bidi_it)
1805 {
1806   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1807   bidi_type_t type = bidi_resolve_weak (bidi_it);
1808   int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1809
1810   if (!(type == STRONG_R
1811         || type == STRONG_L
1812         || type == WEAK_BN
1813         || type == WEAK_EN
1814         || type == WEAK_AN
1815         || type == NEUTRAL_B
1816         || type == NEUTRAL_S
1817         || type == NEUTRAL_WS
1818         || type == NEUTRAL_ON))
1819     abort ();
1820
1821   if (bidi_get_category (type) == NEUTRAL
1822       || (type == WEAK_BN && prev_level == current_level))
1823     {
1824       if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1825         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1826                                        bidi_it->next_for_neutral.type,
1827                                        current_level);
1828       else
1829         {
1830           /* Arrrgh!!  The UAX#9 algorithm is too deeply entrenched in
1831              the assumption of batch-style processing; see clauses W4,
1832              W5, and especially N1, which require to look far forward
1833              (as well as back) in the buffer/string.  May the fleas of
1834              a thousand camels infest the armpits of those who design
1835              supposedly general-purpose algorithms by looking at their
1836              own implementations, and fail to consider other possible
1837              implementations!  */
1838           struct bidi_it saved_it;
1839           bidi_type_t next_type;
1840
1841           if (bidi_it->scan_dir == -1)
1842             abort ();
1843
1844           bidi_copy_it (&saved_it, bidi_it);
1845           /* Scan the text forward until we find the first non-neutral
1846              character, and then use that to resolve the neutral we
1847              are dealing with now.  We also cache the scanned iterator
1848              states, to salvage some of the effort later.  */
1849           bidi_cache_iterator_state (bidi_it, 0);
1850           do {
1851             /* Record the info about the previous character, so that
1852                it will be cached below with this state.  */
1853             if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1854                 && bidi_it->type != WEAK_BN)
1855               bidi_remember_char (&bidi_it->prev, bidi_it);
1856             type = bidi_resolve_weak (bidi_it);
1857             /* Paragraph separators have their levels fully resolved
1858                at this point, so cache them as resolved.  */
1859             bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1860             /* FIXME: implement L1 here, by testing for a newline and
1861                resetting the level for any sequence of whitespace
1862                characters adjacent to it.  */
1863           } while (!(type == NEUTRAL_B
1864                      || (type != WEAK_BN
1865                          && bidi_get_category (type) != NEUTRAL)
1866                      /* This is all per level run, so stop when we
1867                         reach the end of this level run.  */
1868                      || bidi_it->level_stack[bidi_it->stack_idx].level !=
1869                      current_level));
1870
1871           bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1872
1873           switch (type)
1874             {
1875               case STRONG_L:
1876               case STRONG_R:
1877               case STRONG_AL:
1878                 next_type = type;
1879                 break;
1880               case WEAK_EN:
1881               case WEAK_AN:
1882                 /* N1: ``European and Arabic numbers are treated as
1883                    though they were R.''  */
1884                 next_type = STRONG_R;
1885                 saved_it.next_for_neutral.type = STRONG_R;
1886                 break;
1887               case WEAK_BN:
1888                 if (!bidi_explicit_dir_char (bidi_it->ch))
1889                   abort ();             /* can't happen: BNs are skipped */
1890                 /* FALLTHROUGH */
1891               case NEUTRAL_B:
1892                 /* Marched all the way to the end of this level run.
1893                    We need to use the eor type, whose information is
1894                    stored by bidi_set_sor_type in the prev_for_neutral
1895                    member.  */
1896                 if (saved_it.type != WEAK_BN
1897                     || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
1898                   {
1899                     next_type = bidi_it->prev_for_neutral.type;
1900                     saved_it.next_for_neutral.type = next_type;
1901                     bidi_check_type (next_type);
1902                   }
1903                 else
1904                   {
1905                     /* This is a BN which does not adjoin neutrals.
1906                        Leave its type alone.  */
1907                     bidi_copy_it (bidi_it, &saved_it);
1908                     return bidi_it->type;
1909                   }
1910                 break;
1911               default:
1912                 abort ();
1913             }
1914           type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1915                                          next_type, current_level);
1916           saved_it.type = type;
1917           bidi_check_type (type);
1918           bidi_copy_it (bidi_it, &saved_it);
1919         }
1920     }
1921   return type;
1922 }
1923
1924 /* Given an iterator state in BIDI_IT, advance one character position
1925    in the buffer/string to the next character (in the logical order),
1926    resolve the bidi type of that next character, and return that
1927    type.  */
1928 static bidi_type_t
1929 bidi_type_of_next_char (struct bidi_it *bidi_it)
1930 {
1931   bidi_type_t type;
1932
1933   /* This should always be called during a forward scan.  */
1934   if (bidi_it->scan_dir != 1)
1935     abort ();
1936
1937   /* Reset the limit until which to ignore BNs if we step out of the
1938      area where we found only empty levels.  */
1939   if ((bidi_it->ignore_bn_limit > -1
1940        && bidi_it->ignore_bn_limit <= bidi_it->charpos)
1941       || (bidi_it->ignore_bn_limit == -2
1942           && !bidi_explicit_dir_char (bidi_it->ch)))
1943     bidi_it->ignore_bn_limit = -1;
1944
1945   type = bidi_resolve_neutral (bidi_it);
1946
1947   return type;
1948 }
1949
1950 /* Given an iterator state BIDI_IT, advance one character position in
1951    the buffer/string to the next character (in the current scan
1952    direction), resolve the embedding and implicit levels of that next
1953    character, and return the resulting level.  */
1954 static int
1955 bidi_level_of_next_char (struct bidi_it *bidi_it)
1956 {
1957   bidi_type_t type;
1958   int level, prev_level = -1;
1959   struct bidi_saved_info next_for_neutral;
1960   EMACS_INT next_char_pos = -2;
1961
1962   if (bidi_it->scan_dir == 1)
1963     {
1964       EMACS_INT eob =
1965         (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
1966         ? bidi_it->string.schars : ZV;
1967
1968       /* There's no sense in trying to advance if we hit end of text.  */
1969       if (bidi_it->charpos >= eob)
1970         return bidi_it->resolved_level;
1971
1972       /* Record the info about the previous character.  */
1973       if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1974           && bidi_it->type != WEAK_BN)
1975         bidi_remember_char (&bidi_it->prev, bidi_it);
1976       if (bidi_it->type_after_w1 == STRONG_R
1977           || bidi_it->type_after_w1 == STRONG_L
1978           || bidi_it->type_after_w1 == STRONG_AL)
1979         bidi_remember_char (&bidi_it->last_strong, bidi_it);
1980       /* FIXME: it sounds like we don't need both prev and
1981          prev_for_neutral members, but I'm leaving them both for now.  */
1982       if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
1983           || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
1984         bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
1985
1986       /* If we overstepped the characters used for resolving neutrals
1987          and whitespace, invalidate their info in the iterator.  */
1988       if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
1989         bidi_it->next_for_neutral.type = UNKNOWN_BT;
1990       if (bidi_it->next_en_pos >= 0
1991           && bidi_it->charpos >= bidi_it->next_en_pos)
1992         bidi_it->next_en_pos = -1;
1993       if (bidi_it->next_for_ws.type != UNKNOWN_BT
1994           && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
1995         bidi_it->next_for_ws.type = UNKNOWN_BT;
1996
1997       /* This must be taken before we fill the iterator with the info
1998          about the next char.  If we scan backwards, the iterator
1999          state must be already cached, so there's no need to know the
2000          embedding level of the previous character, since we will be
2001          returning to our caller shortly.  */
2002       prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2003     }
2004   next_for_neutral = bidi_it->next_for_neutral;
2005
2006   /* Perhaps the character we want is already cached.  If it is, the
2007      call to bidi_cache_find below will return a type other than
2008      UNKNOWN_BT.  */
2009   if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
2010     {
2011       int bob =
2012         (bidi_it->string.s || STRINGP (bidi_it->string.lstring)) ? 0 : 1;
2013
2014       if (bidi_it->scan_dir > 0)
2015         {
2016           if (bidi_it->nchars <= 0)
2017             abort ();
2018           next_char_pos = bidi_it->charpos + bidi_it->nchars;
2019         }
2020       else if (bidi_it->charpos >= bob)
2021         /* Implementation note: we allow next_char_pos to be as low as
2022            0 for buffers or -1 for strings, and that is okay because
2023            that's the "position" of the sentinel iterator state we
2024            cached at the beginning of the iteration.  */
2025         next_char_pos = bidi_it->charpos - 1;
2026       if (next_char_pos >= bob - 1)
2027         type = bidi_cache_find (next_char_pos, -1, bidi_it);
2028       else
2029         type = UNKNOWN_BT;
2030     }
2031   else
2032     type = UNKNOWN_BT;
2033   if (type != UNKNOWN_BT)
2034     {
2035       /* Don't lose the information for resolving neutrals!  The
2036          cached states could have been cached before their
2037          next_for_neutral member was computed.  If we are on our way
2038          forward, we can simply take the info from the previous
2039          state.  */
2040       if (bidi_it->scan_dir == 1
2041           && bidi_it->next_for_neutral.type == UNKNOWN_BT)
2042         bidi_it->next_for_neutral = next_for_neutral;
2043
2044       /* If resolved_level is -1, it means this state was cached
2045          before it was completely resolved, so we cannot return
2046          it.  */
2047       if (bidi_it->resolved_level != -1)
2048         return bidi_it->resolved_level;
2049     }
2050   if (bidi_it->scan_dir == -1)
2051     /* If we are going backwards, the iterator state is already cached
2052        from previous scans, and should be fully resolved.  */
2053     abort ();
2054
2055   if (type == UNKNOWN_BT)
2056     type = bidi_type_of_next_char (bidi_it);
2057
2058   if (type == NEUTRAL_B)
2059     return bidi_it->resolved_level;
2060
2061   level = bidi_it->level_stack[bidi_it->stack_idx].level;
2062   if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
2063       || (type == WEAK_BN && prev_level == level))
2064     {
2065       if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
2066         abort ();
2067
2068       /* If the cached state shows a neutral character, it was not
2069          resolved by bidi_resolve_neutral, so do it now.  */
2070       type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2071                                      bidi_it->next_for_neutral.type,
2072                                      level);
2073     }
2074
2075   if (!(type == STRONG_R
2076         || type == STRONG_L
2077         || type == WEAK_BN
2078         || type == WEAK_EN
2079         || type == WEAK_AN))
2080     abort ();
2081   bidi_it->type = type;
2082   bidi_check_type (bidi_it->type);
2083
2084   /* For L1 below, we need to know, for each WS character, whether
2085      it belongs to a sequence of WS characters preceding a newline
2086      or a TAB or a paragraph separator.  */
2087   if (bidi_it->orig_type == NEUTRAL_WS
2088       && bidi_it->next_for_ws.type == UNKNOWN_BT)
2089     {
2090       int ch;
2091       EMACS_INT clen = bidi_it->ch_len;
2092       EMACS_INT bpos = bidi_it->bytepos;
2093       EMACS_INT cpos = bidi_it->charpos;
2094       EMACS_INT disp_pos = bidi_it->disp_pos;
2095       EMACS_INT nc = bidi_it->nchars;
2096       struct bidi_string_data bs = bidi_it->string;
2097       bidi_type_t chtype;
2098       int fwp = bidi_it->frame_window_p;
2099       int dpp = bidi_it->disp_prop;
2100
2101       if (bidi_it->nchars <= 0)
2102         abort ();
2103       do {
2104         ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &dpp, &bs,
2105                               fwp, &clen, &nc);
2106         if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
2107           chtype = NEUTRAL_B;
2108         else
2109           chtype = bidi_get_type (ch, NEUTRAL_DIR);
2110       } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
2111                || bidi_explicit_dir_char (ch)); /* L1/Retaining */
2112       bidi_it->next_for_ws.type = chtype;
2113       bidi_check_type (bidi_it->next_for_ws.type);
2114       bidi_it->next_for_ws.charpos = cpos;
2115       bidi_it->next_for_ws.bytepos = bpos;
2116     }
2117
2118   /* Resolve implicit levels, with a twist: PDFs get the embedding
2119      level of the enbedding they terminate.  See below for the
2120      reason.  */
2121   if (bidi_it->orig_type == PDF
2122       /* Don't do this if this formatting code didn't change the
2123          embedding level due to invalid or empty embeddings.  */
2124       && prev_level != level)
2125     {
2126       /* Don't look in UAX#9 for the reason for this: it's our own
2127          private quirk.  The reason is that we want the formatting
2128          codes to be delivered so that they bracket the text of their
2129          embedding.  For example, given the text
2130
2131              {RLO}teST{PDF}
2132
2133          we want it to be displayed as
2134
2135              {PDF}STet{RLO}
2136
2137          not as
2138
2139              STet{RLO}{PDF}
2140
2141          which will result because we bump up the embedding level as
2142          soon as we see the RLO and pop it as soon as we see the PDF,
2143          so RLO itself has the same embedding level as "teST", and
2144          thus would be normally delivered last, just before the PDF.
2145          The switch below fiddles with the level of PDF so that this
2146          ugly side effect does not happen.
2147
2148          (This is, of course, only important if the formatting codes
2149          are actually displayed, but Emacs does need to display them
2150          if the user wants to.)  */
2151       level = prev_level;
2152     }
2153   else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
2154            || bidi_it->orig_type == NEUTRAL_S
2155            || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
2156            /* || bidi_it->ch == LINESEP_CHAR */
2157            || (bidi_it->orig_type == NEUTRAL_WS
2158                && (bidi_it->next_for_ws.type == NEUTRAL_B
2159                    || bidi_it->next_for_ws.type == NEUTRAL_S)))
2160     level = bidi_it->level_stack[0].level;
2161   else if ((level & 1) == 0) /* I1 */
2162     {
2163       if (type == STRONG_R)
2164         level++;
2165       else if (type == WEAK_EN || type == WEAK_AN)
2166         level += 2;
2167     }
2168   else                  /* I2 */
2169     {
2170       if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
2171         level++;
2172     }
2173
2174   bidi_it->resolved_level = level;
2175   return level;
2176 }
2177
2178 /* Move to the other edge of a level given by LEVEL.  If END_FLAG is
2179    non-zero, we are at the end of a level, and we need to prepare to
2180    resume the scan of the lower level.
2181
2182    If this level's other edge is cached, we simply jump to it, filling
2183    the iterator structure with the iterator state on the other edge.
2184    Otherwise, we walk the buffer or string until we come back to the
2185    same level as LEVEL.
2186
2187    Note: we are not talking here about a ``level run'' in the UAX#9
2188    sense of the term, but rather about a ``level'' which includes
2189    all the levels higher than it.  In other words, given the levels
2190    like this:
2191
2192          11111112222222333333334443343222222111111112223322111
2193                 A      B                    C
2194
2195    and assuming we are at point A scanning left to right, this
2196    function moves to point C, whereas the UAX#9 ``level 2 run'' ends
2197    at point B.  */
2198 static void
2199 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
2200 {
2201   int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
2202   ptrdiff_t idx;
2203
2204   /* Try the cache first.  */
2205   if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
2206       >= bidi_cache_start)
2207     bidi_cache_fetch_state (idx, bidi_it);
2208   else
2209     {
2210       int new_level;
2211
2212       if (end_flag)
2213         abort (); /* if we are at end of level, its edges must be cached */
2214
2215       bidi_cache_iterator_state (bidi_it, 1);
2216       do {
2217         new_level = bidi_level_of_next_char (bidi_it);
2218         bidi_cache_iterator_state (bidi_it, 1);
2219       } while (new_level >= level);
2220     }
2221 }
2222
2223 void
2224 bidi_move_to_visually_next (struct bidi_it *bidi_it)
2225 {
2226   int old_level, new_level, next_level;
2227   struct bidi_it sentinel;
2228   struct gcpro gcpro1;
2229
2230   if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
2231     abort ();
2232
2233   if (bidi_it->scan_dir == 0)
2234     {
2235       bidi_it->scan_dir = 1;    /* default to logical order */
2236     }
2237
2238   /* The code below can call eval, and thus cause GC.  If we are
2239      iterating a Lisp string, make sure it won't be GCed.  */
2240   if (STRINGP (bidi_it->string.lstring))
2241     GCPRO1 (bidi_it->string.lstring);
2242
2243   /* If we just passed a newline, initialize for the next line.  */
2244   if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B)
2245     bidi_line_init (bidi_it);
2246
2247   /* Prepare the sentinel iterator state, and cache it.  When we bump
2248      into it, scanning backwards, we'll know that the last non-base
2249      level is exhausted.  */
2250   if (bidi_cache_idx == bidi_cache_start)
2251     {
2252       bidi_copy_it (&sentinel, bidi_it);
2253       if (bidi_it->first_elt)
2254         {
2255           sentinel.charpos--;   /* cached charpos needs to be monotonic */
2256           sentinel.bytepos--;
2257           sentinel.ch = '\n';   /* doesn't matter, but why not? */
2258           sentinel.ch_len = 1;
2259           sentinel.nchars = 1;
2260         }
2261       bidi_cache_iterator_state (&sentinel, 1);
2262     }
2263
2264   old_level = bidi_it->resolved_level;
2265   new_level = bidi_level_of_next_char (bidi_it);
2266
2267   /* Reordering of resolved levels (clause L2) is implemented by
2268      jumping to the other edge of the level and flipping direction of
2269      scanning the text whenever we find a level change.  */
2270   if (new_level != old_level)
2271     {
2272       int ascending = new_level > old_level;
2273       int level_to_search = ascending ? old_level + 1 : old_level;
2274       int incr = ascending ? 1 : -1;
2275       int expected_next_level = old_level + incr;
2276
2277       /* Jump (or walk) to the other edge of this level.  */
2278       bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2279       /* Switch scan direction and peek at the next character in the
2280          new direction.  */
2281       bidi_it->scan_dir = -bidi_it->scan_dir;
2282
2283       /* The following loop handles the case where the resolved level
2284          jumps by more than one.  This is typical for numbers inside a
2285          run of text with left-to-right embedding direction, but can
2286          also happen in other situations.  In those cases the decision
2287          where to continue after a level change, and in what direction,
2288          is tricky.  For example, given a text like below:
2289
2290                   abcdefgh
2291                   11336622
2292
2293          (where the numbers below the text show the resolved levels),
2294          the result of reordering according to UAX#9 should be this:
2295
2296                   efdcghba
2297
2298          This is implemented by the loop below which flips direction
2299          and jumps to the other edge of the level each time it finds
2300          the new level not to be the expected one.  The expected level
2301          is always one more or one less than the previous one.  */
2302       next_level = bidi_peek_at_next_level (bidi_it);
2303       while (next_level != expected_next_level)
2304         {
2305           expected_next_level += incr;
2306           level_to_search += incr;
2307           bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2308           bidi_it->scan_dir = -bidi_it->scan_dir;
2309           next_level = bidi_peek_at_next_level (bidi_it);
2310         }
2311
2312       /* Finally, deliver the next character in the new direction.  */
2313       next_level = bidi_level_of_next_char (bidi_it);
2314     }
2315
2316   /* Take note when we have just processed the newline that precedes
2317      the end of the paragraph.  The next time we are about to be
2318      called, set_iterator_to_next will automatically reinit the
2319      paragraph direction, if needed.  We do this at the newline before
2320      the paragraph separator, because the next character might not be
2321      the first character of the next paragraph, due to the bidi
2322      reordering, whereas we _must_ know the paragraph base direction
2323      _before_ we process the paragraph's text, since the base
2324      direction affects the reordering.  */
2325   if (bidi_it->scan_dir == 1 && bidi_it->orig_type == NEUTRAL_B)
2326     {
2327       /* The paragraph direction of the entire string, once
2328          determined, is in effect for the entire string.  Setting the
2329          separator limit to the end of the string prevents
2330          bidi_paragraph_init from being called automatically on this
2331          string.  */
2332       if (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2333         bidi_it->separator_limit = bidi_it->string.schars;
2334       else if (bidi_it->bytepos < ZV_BYTE)
2335         {
2336           EMACS_INT sep_len =
2337             bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
2338                                    bidi_it->bytepos + bidi_it->ch_len);
2339           if (bidi_it->nchars <= 0)
2340             abort ();
2341           if (sep_len >= 0)
2342             {
2343               bidi_it->new_paragraph = 1;
2344               /* Record the buffer position of the last character of the
2345                  paragraph separator.  */
2346               bidi_it->separator_limit =
2347                 bidi_it->charpos + bidi_it->nchars + sep_len;
2348             }
2349         }
2350     }
2351
2352   if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start)
2353     {
2354       /* If we are at paragraph's base embedding level and beyond the
2355          last cached position, the cache's job is done and we can
2356          discard it.  */
2357       if (bidi_it->resolved_level == bidi_it->level_stack[0].level
2358           && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
2359                                  + bidi_cache[bidi_cache_idx - 1].nchars - 1))
2360         bidi_cache_reset ();
2361         /* But as long as we are caching during forward scan, we must
2362            cache each state, or else the cache integrity will be
2363            compromised: it assumes cached states correspond to buffer
2364            positions 1:1.  */
2365       else
2366         bidi_cache_iterator_state (bidi_it, 1);
2367     }
2368
2369   if (STRINGP (bidi_it->string.lstring))
2370     UNGCPRO;
2371 }
2372
2373 /* This is meant to be called from within the debugger, whenever you
2374    wish to examine the cache contents.  */
2375 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
2376 void
2377 bidi_dump_cached_states (void)
2378 {
2379   ptrdiff_t i;
2380   int ndigits = 1;
2381
2382   if (bidi_cache_idx == 0)
2383     {
2384       fprintf (stderr, "The cache is empty.\n");
2385       return;
2386     }
2387   fprintf (stderr, "Total of  %"pD"d state%s in cache:\n",
2388            bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
2389
2390   for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
2391     ndigits++;
2392   fputs ("ch  ", stderr);
2393   for (i = 0; i < bidi_cache_idx; i++)
2394     fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
2395   fputs ("\n", stderr);
2396   fputs ("lvl ", stderr);
2397   for (i = 0; i < bidi_cache_idx; i++)
2398     fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
2399   fputs ("\n", stderr);
2400   fputs ("pos ", stderr);
2401   for (i = 0; i < bidi_cache_idx; i++)
2402     fprintf (stderr, "%*"pI"d", ndigits, bidi_cache[i].charpos);
2403   fputs ("\n", stderr);
2404 }