code.delx.au - gnu-emacs/blob - src/bidi.c

   1 /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
   2    Copyright (C) 2000-2001, 2004-2005, 2009-2011
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software: you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation, either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 /* Written by Eli Zaretskii <eliz@gnu.org>.
  21
  22    A sequential implementation of the Unicode Bidirectional algorithm,
  23    (UBA) as per UAX#9, a part of the Unicode Standard.
  24
  25    Unlike the reference and most other implementations, this one is
  26    designed to be called once for every character in the buffer or
  27    string.
  28
  29    The main entry point is bidi_move_to_visually_next.  Each time it
  30    is called, it finds the next character in the visual order, and
  31    returns its information in a special structure.  The caller is then
  32    expected to process this character for display or any other
  33    purposes, and call bidi_move_to_visually_next for the next
  34    character.  See the comments in bidi_move_to_visually_next for more
  35    details about its algorithm that finds the next visual-order
  36    character by resolving their levels on the fly.
  37
  38    The two other entry points are bidi_paragraph_init and
  39    bidi_mirror_char.  The first determines the base direction of a
  40    paragraph, while the second returns the mirrored version of its
  41    argument character.
  42
  43    If you want to understand the code, you will have to read it
  44    together with the relevant portions of UAX#9.  The comments include
  45    references to UAX#9 rules, for that very reason.
  46
  47    A note about references to UAX#9 rules: if the reference says
  48    something like "X9/Retaining", it means that you need to refer to
  49    rule X9 and to its modifications decribed in the "Implementation
  50    Notes" section of UAX#9, under "Retaining Format Codes".  */
  51
  52 #include <config.h>
  53 #include <stdio.h>
  54 #include <setjmp.h>
  55
  56 #include "lisp.h"
  57 #include "buffer.h"
  58 #include "character.h"
  59 #include "dispextern.h"
  60
  61 static int bidi_initialized = 0;
  62
  63 static Lisp_Object bidi_type_table, bidi_mirror_table;
  64
  65 #define LRM_CHAR   0x200E
  66 #define RLM_CHAR   0x200F
  67 #define BIDI_EOB   -1
  68
  69 /* Data type for describing the bidirectional character categories.  */
  70 typedef enum {
  71   UNKNOWN_BC,
  72   NEUTRAL,
  73   WEAK,
  74   STRONG
  75 } bidi_category_t;
  76
  77 extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE;
  78 int bidi_ignore_explicit_marks_for_paragraph_level = 1;
  79
  80 static Lisp_Object paragraph_start_re, paragraph_separate_re;
  81 static Lisp_Object Qparagraph_start, Qparagraph_separate;
  82
  83 \f
  84 /***********************************************************************
  85                         Utilities
  86  ***********************************************************************/
  87
  88 /* Return the bidi type of a character CH, subject to the current
  89    directional OVERRIDE.  */
  90 static INLINE bidi_type_t
  91 bidi_get_type (int ch, bidi_dir_t override)
  92 {
  93   bidi_type_t default_type;
  94
  95   if (ch == BIDI_EOB)
  96     return NEUTRAL_B;
  97   if (ch < 0 || ch > MAX_CHAR)
  98     abort ();
  99
 100   default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
 101
 102   if (override == NEUTRAL_DIR)
 103     return default_type;
 104
 105   switch (default_type)
 106     {
 107       /* Although UAX#9 does not tell, it doesn't make sense to
 108          override NEUTRAL_B and LRM/RLM characters.  */
 109       case NEUTRAL_B:
 110       case LRE:
 111       case LRO:
 112       case RLE:
 113       case RLO:
 114       case PDF:
 115         return default_type;
 116       default:
 117         switch (ch)
 118           {
 119             case LRM_CHAR:
 120             case RLM_CHAR:
 121               return default_type;
 122             default:
 123               if (override == L2R) /* X6 */
 124                 return STRONG_L;
 125               else if (override == R2L)
 126                 return STRONG_R;
 127               else
 128                 abort ();       /* can't happen: handled above */
 129           }
 130     }
 131 }
 132
 133 static void
 134 bidi_check_type (bidi_type_t type)
 135 {
 136   if (type < UNKNOWN_BT || type > NEUTRAL_ON)
 137     abort ();
 138 }
 139
 140 /* Given a bidi TYPE of a character, return its category.  */
 141 static INLINE bidi_category_t
 142 bidi_get_category (bidi_type_t type)
 143 {
 144   switch (type)
 145     {
 146       case UNKNOWN_BT:
 147         return UNKNOWN_BC;
 148       case STRONG_L:
 149       case STRONG_R:
 150       case STRONG_AL:
 151       case LRE:
 152       case LRO:
 153       case RLE:
 154       case RLO:
 155         return STRONG;
 156       case PDF:         /* ??? really?? */
 157       case WEAK_EN:
 158       case WEAK_ES:
 159       case WEAK_ET:
 160       case WEAK_AN:
 161       case WEAK_CS:
 162       case WEAK_NSM:
 163       case WEAK_BN:
 164         return WEAK;
 165       case NEUTRAL_B:
 166       case NEUTRAL_S:
 167       case NEUTRAL_WS:
 168       case NEUTRAL_ON:
 169         return NEUTRAL;
 170       default:
 171         abort ();
 172     }
 173 }
 174
 175 /* Return the mirrored character of C, if it has one.  If C has no
 176    mirrored counterpart, return C.
 177    Note: The conditions in UAX#9 clause L4 regarding the surrounding
 178    context must be tested by the caller.  */
 179 int
 180 bidi_mirror_char (int c)
 181 {
 182   Lisp_Object val;
 183
 184   if (c == BIDI_EOB)
 185     return c;
 186   if (c < 0 || c > MAX_CHAR)
 187     abort ();
 188
 189   val = CHAR_TABLE_REF (bidi_mirror_table, c);
 190   if (INTEGERP (val))
 191     {
 192       int v = XINT (val);
 193
 194       if (v < 0 || v > MAX_CHAR)
 195         abort ();
 196
 197       return v;
 198     }
 199
 200   return c;
 201 }
 202
 203 /* Determine the start-of-run (sor) directional type given the two
 204    embedding levels on either side of the run boundary.  Also, update
 205    the saved info about previously seen characters, since that info is
 206    generally valid for a single level run.  */
 207 static INLINE void
 208 bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
 209 {
 210   int higher_level = level_before > level_after ? level_before : level_after;
 211
 212   /* The prev_was_pdf gork is required for when we have several PDFs
 213      in a row.  In that case, we want to compute the sor type for the
 214      next level run only once: when we see the first PDF.  That's
 215      because the sor type depends only on the higher of the two levels
 216      that we find on the two sides of the level boundary (see UAX#9,
 217      clause X10), and so we don't need to know the final embedding
 218      level to which we descend after processing all the PDFs.  */
 219   if (!bidi_it->prev_was_pdf || level_before < level_after)
 220     /* FIXME: should the default sor direction be user selectable?  */
 221     bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
 222   if (level_before > level_after)
 223     bidi_it->prev_was_pdf = 1;
 224
 225   bidi_it->prev.type = UNKNOWN_BT;
 226   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
 227     bidi_it->last_strong.orig_type = UNKNOWN_BT;
 228   bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L;
 229   bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
 230   bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
 231   bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 =
 232     bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 233   bidi_it->ignore_bn_limit = -1; /* meaning it's unknown */
 234 }
 235
 236 /* Push the current embedding level and override status; reset the
 237    current level to LEVEL and the current override status to OVERRIDE.  */
 238 static INLINE void
 239 bidi_push_embedding_level (struct bidi_it *bidi_it,
 240                            int level, bidi_dir_t override)
 241 {
 242   bidi_it->stack_idx++;
 243   if (bidi_it->stack_idx >= BIDI_MAXLEVEL)
 244     abort ();
 245   bidi_it->level_stack[bidi_it->stack_idx].level = level;
 246   bidi_it->level_stack[bidi_it->stack_idx].override = override;
 247 }
 248
 249 /* Pop the embedding level and directional override status from the
 250    stack, and return the new level.  */
 251 static INLINE int
 252 bidi_pop_embedding_level (struct bidi_it *bidi_it)
 253 {
 254   /* UAX#9 says to ignore invalid PDFs.  */
 255   if (bidi_it->stack_idx > 0)
 256     bidi_it->stack_idx--;
 257   return bidi_it->level_stack[bidi_it->stack_idx].level;
 258 }
 259
 260 /* Record in SAVED_INFO the information about the current character.  */
 261 static INLINE void
 262 bidi_remember_char (struct bidi_saved_info *saved_info,
 263                     struct bidi_it *bidi_it)
 264 {
 265   saved_info->charpos = bidi_it->charpos;
 266   saved_info->bytepos = bidi_it->bytepos;
 267   saved_info->type = bidi_it->type;
 268   bidi_check_type (bidi_it->type);
 269   saved_info->type_after_w1 = bidi_it->type_after_w1;
 270   bidi_check_type (bidi_it->type_after_w1);
 271   saved_info->orig_type = bidi_it->orig_type;
 272   bidi_check_type (bidi_it->orig_type);
 273 }
 274
 275 /* Copy the bidi iterator from FROM to TO.  To save cycles, this only
 276    copies the part of the level stack that is actually in use.  */
 277 static INLINE void
 278 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
 279 {
 280   int i;
 281
 282   /* Copy everything except the level stack and beyond.  */
 283   memcpy (to, from, offsetof (struct bidi_it, level_stack[0]));
 284
 285   /* Copy the active part of the level stack.  */
 286   to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
 287   for (i = 1; i <= from->stack_idx; i++)
 288     to->level_stack[i] = from->level_stack[i];
 289 }
 290
 291 \f
 292 /***********************************************************************
 293                         Caching the bidi iterator states
 294  ***********************************************************************/
 295
 296 #define BIDI_CACHE_CHUNK 200
 297 static struct bidi_it *bidi_cache;
 298 static size_t bidi_cache_size = 0;
 299 static size_t elsz = sizeof (struct bidi_it);
 300 static EMACS_INT bidi_cache_idx;        /* next unused cache slot */
 301 static EMACS_INT bidi_cache_last_idx;   /* slot of last cache hit */
 302 static EMACS_INT bidi_cache_start = 0;  /* start of cache for this
 303                                            "stack" level */
 304
 305 /* Reset the cache state to the empty state.  We only reset the part
 306    of the cache relevant to iteration of the current object.  Previous
 307    objects, which are pushed on the display iterator's stack, are left
 308    intact.  This is called when the cached information is no more
 309    useful for the current iteration, e.g. when we were reseated to a
 310    new position on the same object.  */
 311 static INLINE void
 312 bidi_cache_reset (void)
 313 {
 314   bidi_cache_idx = bidi_cache_start;
 315   bidi_cache_last_idx = -1;
 316 }
 317
 318 /* Shrink the cache to its minimal size.  Called when we init the bidi
 319    iterator for reordering a buffer or a string that does not come
 320    from display properties, because that means all the previously
 321    cached info is of no further use.  */
 322 static INLINE void
 323 bidi_cache_shrink (void)
 324 {
 325   if (bidi_cache_size > BIDI_CACHE_CHUNK)
 326     {
 327       bidi_cache_size = BIDI_CACHE_CHUNK;
 328       bidi_cache =
 329         (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
 330     }
 331   bidi_cache_reset ();
 332 }
 333
 334 static INLINE void
 335 bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it)
 336 {
 337   int current_scan_dir = bidi_it->scan_dir;
 338
 339   if (idx < bidi_cache_start || idx >= bidi_cache_idx)
 340     abort ();
 341
 342   bidi_copy_it (bidi_it, &bidi_cache[idx]);
 343   bidi_it->scan_dir = current_scan_dir;
 344   bidi_cache_last_idx = idx;
 345 }
 346
 347 /* Find a cached state with a given CHARPOS and resolved embedding
 348    level less or equal to LEVEL.  if LEVEL is -1, disregard the
 349    resolved levels in cached states.  DIR, if non-zero, means search
 350    in that direction from the last cache hit.  */
 351 static INLINE int
 352 bidi_cache_search (EMACS_INT charpos, int level, int dir)
 353 {
 354   int i, i_start;
 355
 356   if (bidi_cache_idx)
 357     {
 358       if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
 359         {
 360           dir = -1;
 361           i_start = bidi_cache_last_idx - 1;
 362         }
 363       else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
 364                           + bidi_cache[bidi_cache_last_idx].nchars - 1))
 365         {
 366           dir = 1;
 367           i_start = bidi_cache_last_idx + 1;
 368         }
 369       else if (dir)
 370         i_start = bidi_cache_last_idx;
 371       else
 372         {
 373           dir = -1;
 374           i_start = bidi_cache_idx - 1;
 375         }
 376
 377       if (dir < 0)
 378         {
 379           /* Linear search for now; FIXME!  */
 380           for (i = i_start; i >= bidi_cache_start; i--)
 381             if (bidi_cache[i].charpos <= charpos
 382                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 383                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 384               return i;
 385         }
 386       else
 387         {
 388           for (i = i_start; i < bidi_cache_idx; i++)
 389             if (bidi_cache[i].charpos <= charpos
 390                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 391                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 392               return i;
 393         }
 394     }
 395
 396   return -1;
 397 }
 398
 399 /* Find a cached state where the resolved level changes to a value
 400    that is lower than LEVEL, and return its cache slot index.  DIR is
 401    the direction to search, starting with the last used cache slot.
 402    If DIR is zero, we search backwards from the last occupied cache
 403    slot.  BEFORE, if non-zero, means return the index of the slot that
 404    is ``before'' the level change in the search direction.  That is,
 405    given the cached levels like this:
 406
 407          1122333442211
 408           AB        C
 409
 410    and assuming we are at the position cached at the slot marked with
 411    C, searching backwards (DIR = -1) for LEVEL = 2 will return the
 412    index of slot B or A, depending whether BEFORE is, respectively,
 413    non-zero or zero.  */
 414 static int
 415 bidi_cache_find_level_change (int level, int dir, int before)
 416 {
 417   if (bidi_cache_idx)
 418     {
 419       int i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
 420       int incr = before ? 1 : 0;
 421
 422       if (!dir)
 423         dir = -1;
 424       else if (!incr)
 425         i += dir;
 426
 427       if (dir < 0)
 428         {
 429           while (i >= bidi_cache_start + incr)
 430             {
 431               if (bidi_cache[i - incr].resolved_level >= 0
 432                   && bidi_cache[i - incr].resolved_level < level)
 433                 return i;
 434               i--;
 435             }
 436         }
 437       else
 438         {
 439           while (i < bidi_cache_idx - incr)
 440             {
 441               if (bidi_cache[i + incr].resolved_level >= 0
 442                   && bidi_cache[i + incr].resolved_level < level)
 443                 return i;
 444               i++;
 445             }
 446         }
 447     }
 448
 449   return -1;
 450 }
 451
 452 static INLINE void
 453 bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
 454 {
 455   int idx;
 456
 457   /* We should never cache on backward scans.  */
 458   if (bidi_it->scan_dir == -1)
 459     abort ();
 460   idx = bidi_cache_search (bidi_it->charpos, -1, 1);
 461
 462   if (idx < 0)
 463     {
 464       idx = bidi_cache_idx;
 465       /* Enlarge the cache as needed.  */
 466       if (idx >= bidi_cache_size)
 467         {
 468           bidi_cache_size += BIDI_CACHE_CHUNK;
 469           bidi_cache =
 470             (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
 471         }
 472       /* Character positions should correspond to cache positions 1:1.
 473          If we are outside the range of cached positions, the cache is
 474          useless and must be reset.  */
 475       if (idx > bidi_cache_start &&
 476           (bidi_it->charpos > (bidi_cache[idx - 1].charpos
 477                                + bidi_cache[idx - 1].nchars)
 478            || bidi_it->charpos < bidi_cache[0].charpos))
 479         {
 480           bidi_cache_reset ();
 481           idx = bidi_cache_start;
 482         }
 483       if (bidi_it->nchars <= 0)
 484         abort ();
 485       bidi_copy_it (&bidi_cache[idx], bidi_it);
 486       if (!resolved)
 487         bidi_cache[idx].resolved_level = -1;
 488     }
 489   else
 490     {
 491       /* Copy only the members which could have changed, to avoid
 492          costly copying of the entire struct.  */
 493       bidi_cache[idx].type = bidi_it->type;
 494       bidi_check_type (bidi_it->type);
 495       bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
 496       bidi_check_type (bidi_it->type_after_w1);
 497       if (resolved)
 498         bidi_cache[idx].resolved_level = bidi_it->resolved_level;
 499       else
 500         bidi_cache[idx].resolved_level = -1;
 501       bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
 502       bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
 503       bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
 504       bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
 505       bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
 506     }
 507
 508   bidi_cache_last_idx = idx;
 509   if (idx >= bidi_cache_idx)
 510     bidi_cache_idx = idx + 1;
 511 }
 512
 513 static INLINE bidi_type_t
 514 bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it)
 515 {
 516   int i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
 517
 518   if (i >= bidi_cache_start)
 519     {
 520       bidi_dir_t current_scan_dir = bidi_it->scan_dir;
 521
 522       bidi_copy_it (bidi_it, &bidi_cache[i]);
 523       bidi_cache_last_idx = i;
 524       /* Don't let scan direction from from the cached state override
 525          the current scan direction.  */
 526       bidi_it->scan_dir = current_scan_dir;
 527       return bidi_it->type;
 528     }
 529
 530   return UNKNOWN_BT;
 531 }
 532
 533 static INLINE int
 534 bidi_peek_at_next_level (struct bidi_it *bidi_it)
 535 {
 536   if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1)
 537     abort ();
 538   return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
 539 }
 540
 541 \f
 542 /***********************************************************************
 543                         Initialization
 544  ***********************************************************************/
 545 static void
 546 bidi_initialize (void)
 547 {
 548
 549 #include "biditype.h"
 550 #include "bidimirror.h"
 551
 552   int i;
 553
 554   bidi_type_table = Fmake_char_table (Qnil, make_number (STRONG_L));
 555   staticpro (&bidi_type_table);
 556
 557   for (i = 0; i < sizeof bidi_type / sizeof bidi_type[0]; i++)
 558     char_table_set_range (bidi_type_table, bidi_type[i].from, bidi_type[i].to,
 559                           make_number (bidi_type[i].type));
 560
 561   bidi_mirror_table = Fmake_char_table (Qnil, Qnil);
 562   staticpro (&bidi_mirror_table);
 563
 564   for (i = 0; i < sizeof bidi_mirror / sizeof bidi_mirror[0]; i++)
 565     char_table_set (bidi_mirror_table, bidi_mirror[i].from,
 566                     make_number (bidi_mirror[i].to));
 567
 568   Qparagraph_start = intern ("paragraph-start");
 569   staticpro (&Qparagraph_start);
 570   paragraph_start_re = Fsymbol_value (Qparagraph_start);
 571   if (!STRINGP (paragraph_start_re))
 572     paragraph_start_re = build_string ("\f\\|[ \t]*$");
 573   staticpro (&paragraph_start_re);
 574   Qparagraph_separate = intern ("paragraph-separate");
 575   staticpro (&Qparagraph_separate);
 576   paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
 577   if (!STRINGP (paragraph_separate_re))
 578     paragraph_separate_re = build_string ("[ \t\f]*$");
 579   staticpro (&paragraph_separate_re);
 580   bidi_initialized = 1;
 581 }
 582
 583 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
 584    end.  */
 585 static INLINE void
 586 bidi_set_paragraph_end (struct bidi_it *bidi_it)
 587 {
 588   bidi_it->invalid_levels = 0;
 589   bidi_it->invalid_rl_levels = -1;
 590   bidi_it->stack_idx = 0;
 591   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 592 }
 593
 594 /* Initialize the bidi iterator from buffer/string position CHARPOS.  */
 595 void
 596 bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p,
 597               struct bidi_it *bidi_it)
 598 {
 599   if (! bidi_initialized)
 600     bidi_initialize ();
 601   if (charpos >= 0)
 602     bidi_it->charpos = charpos;
 603   if (bytepos >= 0)
 604     bidi_it->bytepos = bytepos;
 605   bidi_it->frame_window_p = frame_window_p;
 606   bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
 607   bidi_it->first_elt = 1;
 608   bidi_set_paragraph_end (bidi_it);
 609   bidi_it->new_paragraph = 1;
 610   bidi_it->separator_limit = -1;
 611   bidi_it->type = NEUTRAL_B;
 612   bidi_it->type_after_w1 = NEUTRAL_B;
 613   bidi_it->orig_type = NEUTRAL_B;
 614   bidi_it->prev_was_pdf = 0;
 615   bidi_it->prev.type = bidi_it->prev.type_after_w1 =
 616     bidi_it->prev.orig_type = UNKNOWN_BT;
 617   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
 618     bidi_it->last_strong.orig_type = UNKNOWN_BT;
 619   bidi_it->next_for_neutral.charpos = -1;
 620   bidi_it->next_for_neutral.type =
 621     bidi_it->next_for_neutral.type_after_w1 =
 622     bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 623   bidi_it->prev_for_neutral.charpos = -1;
 624   bidi_it->prev_for_neutral.type =
 625     bidi_it->prev_for_neutral.type_after_w1 =
 626     bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
 627   bidi_it->sor = L2R;    /* FIXME: should it be user-selectable? */
 628   bidi_it->disp_pos = -1;       /* invalid/unknown */
 629   /* We can only shrink the cache if we are at the bottom level of its
 630      "stack".  */
 631   if (bidi_cache_start == 0)
 632     bidi_cache_shrink ();
 633 }
 634
 635 /* Perform initializations for reordering a new line of bidi text.  */
 636 static void
 637 bidi_line_init (struct bidi_it *bidi_it)
 638 {
 639   bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
 640   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 641   bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
 642   bidi_it->invalid_levels = 0;
 643   bidi_it->invalid_rl_levels = -1;
 644   bidi_it->next_en_pos = -1;
 645   bidi_it->next_for_ws.type = UNKNOWN_BT;
 646   bidi_set_sor_type (bidi_it,
 647                      bidi_it->paragraph_dir == R2L ? 1 : 0,
 648                      bidi_it->level_stack[0].level); /* X10 */
 649
 650   bidi_cache_reset ();
 651 }
 652
 653 \f
 654 /***********************************************************************
 655                         Fetching characters
 656  ***********************************************************************/
 657
 658 /* Count bytes in multibyte string S between BEG/BEGBYTE and END.  BEG
 659    and END are zero-based character positions in S, BEGBYTE is byte
 660    position corresponding to BEG.  */
 661 static inline EMACS_INT
 662 bidi_count_bytes (const unsigned char *s, const EMACS_INT beg,
 663                   const EMACS_INT begbyte, const EMACS_INT end)
 664 {
 665   EMACS_INT pos = beg;
 666   const unsigned char *p = s + begbyte, *start = p;
 667
 668   if (!CHAR_HEAD_P (*p))
 669     abort ();
 670
 671   while (pos < end)
 672     {
 673       p += BYTES_BY_CHAR_HEAD (*p);
 674       pos++;
 675     }
 676
 677   return p - start;
 678 }
 679
 680 /* Fetch and returns the character at byte position BYTEPOS.  If S is
 681    non-NULL, fetch the character from string S; otherwise fetch the
 682    character from the current buffer.  */
 683 static inline int
 684 bidi_char_at_pos (EMACS_INT bytepos, const unsigned char *s)
 685 {
 686   if (s)
 687     return STRING_CHAR (s + bytepos);
 688   else
 689     return FETCH_MULTIBYTE_CHAR (bytepos);
 690 }
 691
 692 /* Fetch and return the character at BYTEPOS/CHARPOS.  If that
 693    character is covered by a display string, treat the entire run of
 694    covered characters as a single character u+FFFC, and return their
 695    combined length in CH_LEN and NCHARS.  DISP_POS specifies the
 696    character position of the next display string, or -1 if not yet
 697    computed.  When the next character is at or beyond that position,
 698    the function updates DISP_POS with the position of the next display
 699    string.  STRING->s is the C string to iterate, or NULL if iterating
 700    over a buffer or a Lisp string; in the latter case, STRING->lstring
 701    is the Lisp string.  */
 702 static inline int
 703 bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
 704                  struct bidi_string_data *string,
 705                  int frame_window_p, EMACS_INT *ch_len, EMACS_INT *nchars)
 706 {
 707   int ch;
 708   EMACS_INT endpos =
 709     (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
 710   struct text_pos pos;
 711
 712   /* If we got past the last known position of display string, compute
 713      the position of the next one.  That position could be at CHARPOS.  */
 714   if (charpos < endpos && charpos > *disp_pos)
 715     {
 716       SET_TEXT_POS (pos, charpos, bytepos);
 717       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p);
 718     }
 719
 720   /* Fetch the character at BYTEPOS.  */
 721   if (charpos >= endpos)
 722     {
 723       ch = BIDI_EOB;
 724       *ch_len = 1;
 725       *nchars = 1;
 726       *disp_pos = endpos;
 727     }
 728   else if (charpos >= *disp_pos)
 729     {
 730       EMACS_INT disp_end_pos;
 731
 732       /* We don't expect to find ourselves in the middle of a display
 733          property.  Hopefully, it will never be needed.  */
 734       if (charpos > *disp_pos)
 735         abort ();
 736       /* Return the Unicode Object Replacement Character to represent
 737          the entire run of characters covered by the display string.  */
 738       ch = 0xFFFC;
 739       disp_end_pos = compute_display_string_end (*disp_pos, string);
 740       *nchars = disp_end_pos - *disp_pos;
 741       if (string->s)
 742         *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
 743                                     disp_end_pos);
 744       else if (STRINGP (string->lstring))
 745         *ch_len = bidi_count_bytes (SDATA (string->lstring), *disp_pos,
 746                                     bytepos, disp_end_pos);
 747       else
 748         *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
 749     }
 750   else
 751     {
 752       if (string->s)
 753         {
 754           EMACS_INT len;
 755
 756           ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
 757           *ch_len = len;
 758         }
 759       else if (STRINGP (string->lstring))
 760         {
 761           EMACS_INT len;
 762
 763           ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos, len);
 764           *ch_len = len;
 765         }
 766       else
 767         {
 768           ch = FETCH_MULTIBYTE_CHAR (bytepos);
 769           *ch_len = CHAR_BYTES (ch);
 770         }
 771       *nchars = 1;
 772     }
 773
 774   /* If we just entered a run of characters covered by a display
 775      string, compute the position of the next display string.  */
 776   if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos)
 777     {
 778       SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
 779       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p);
 780     }
 781
 782   return ch;
 783 }
 784
 785 \f
 786 /***********************************************************************
 787                         Determining paragraph direction
 788  ***********************************************************************/
 789
 790 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
 791    Value is the non-negative length of the paragraph separator
 792    following the buffer position, -1 if position is at the beginning
 793    of a new paragraph, or -2 if position is neither at beginning nor
 794    at end of a paragraph.  */
 795 static EMACS_INT
 796 bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
 797 {
 798   Lisp_Object sep_re;
 799   Lisp_Object start_re;
 800   EMACS_INT val;
 801
 802   sep_re = paragraph_separate_re;
 803   start_re = paragraph_start_re;
 804
 805   val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
 806   if (val < 0)
 807     {
 808       if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
 809         val = -1;
 810       else
 811         val = -2;
 812     }
 813
 814   return val;
 815 }
 816
 817 /* Find the beginning of this paragraph by looking back in the buffer.
 818    Value is the byte position of the paragraph's beginning.  */
 819 static EMACS_INT
 820 bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
 821 {
 822   Lisp_Object re = paragraph_start_re;
 823   EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
 824
 825   while (pos_byte > BEGV_BYTE
 826          && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
 827     {
 828       /* FIXME: What if the paragraph beginning is covered by a
 829          display string?  And what if a display string covering some
 830          of the text over which we scan back includes
 831          paragraph_start_re?  */
 832       pos = find_next_newline_no_quit (pos - 1, -1);
 833       pos_byte = CHAR_TO_BYTE (pos);
 834     }
 835   return pos_byte;
 836 }
 837
 838 /* Determine the base direction, a.k.a. base embedding level, of the
 839    paragraph we are about to iterate through.  If DIR is either L2R or
 840    R2L, just use that.  Otherwise, determine the paragraph direction
 841    from the first strong directional character of the paragraph.
 842
 843    NO_DEFAULT_P non-zero means don't default to L2R if the paragraph
 844    has no strong directional characters and both DIR and
 845    bidi_it->paragraph_dir are NEUTRAL_DIR.  In that case, search back
 846    in the buffer until a paragraph is found with a strong character,
 847    or until hitting BEGV.  In the latter case, fall back to L2R.  This
 848    flag is used in current-bidi-paragraph-direction.
 849
 850    Note that this function gives the paragraph separator the same
 851    direction as the preceding paragraph, even though Emacs generally
 852    views the separartor as not belonging to any paragraph.  */
 853 void
 854 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
 855 {
 856   EMACS_INT bytepos = bidi_it->bytepos;
 857   int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
 858   EMACS_INT pstartbyte;
 859   /* Note that begbyte is a byte position, while end is a character
 860      position.  Yes, this is ugly, but we are trying to avoid costly
 861      calls to BYTE_TO_CHAR and its ilk.  */
 862   EMACS_INT begbyte = string_p ? 0 : BEGV_BYTE;
 863   EMACS_INT end = string_p ? bidi_it->string.schars : ZV;
 864
 865   /* Special case for an empty buffer. */
 866   if (bytepos == begbyte && bidi_it->charpos == end)
 867     dir = L2R;
 868   /* We should never be called at EOB or before BEGV.  */
 869   else if (bidi_it->charpos >= end || bytepos < begbyte)
 870     abort ();
 871
 872   if (dir == L2R)
 873     {
 874       bidi_it->paragraph_dir = L2R;
 875       bidi_it->new_paragraph = 0;
 876     }
 877   else if (dir == R2L)
 878     {
 879       bidi_it->paragraph_dir = R2L;
 880       bidi_it->new_paragraph = 0;
 881     }
 882   else if (dir == NEUTRAL_DIR)  /* P2 */
 883     {
 884       int ch;
 885       EMACS_INT ch_len, nchars;
 886       EMACS_INT pos, disp_pos = -1;
 887       bidi_type_t type;
 888       const unsigned char *s;
 889
 890       if (!bidi_initialized)
 891         bidi_initialize ();
 892
 893       /* If we are inside a paragraph separator, we are just waiting
 894          for the separator to be exhausted; use the previous paragraph
 895          direction.  But don't do that if we have been just reseated,
 896          because we need to reinitialize below in that case.  */
 897       if (!bidi_it->first_elt
 898           && bidi_it->charpos < bidi_it->separator_limit)
 899         return;
 900
 901       /* If we are on a newline, get past it to where the next
 902          paragraph might start.  But don't do that at BEGV since then
 903          we are potentially in a new paragraph that doesn't yet
 904          exist.  */
 905       pos = bidi_it->charpos;
 906       s = STRINGP (bidi_it->string.lstring) ?
 907         SDATA (bidi_it->string.lstring) : bidi_it->string.s;
 908       if (bytepos > begbyte && bidi_char_at_pos (bytepos, s) == '\n')
 909         {
 910           bytepos++;
 911           pos++;
 912         }
 913
 914       /* We are either at the beginning of a paragraph or in the
 915          middle of it.  Find where this paragraph starts.  */
 916       if (string_p)
 917         {
 918           /* We don't support changes of paragraph direction inside a
 919              string.  It is treated as a single paragraph.  */
 920           pstartbyte = 0;
 921         }
 922       else
 923         pstartbyte = bidi_find_paragraph_start (pos, bytepos);
 924       bidi_it->separator_limit = -1;
 925       bidi_it->new_paragraph = 0;
 926
 927       /* The following loop is run more than once only if NO_DEFAULT_P
 928          is non-zero, and only if we are iterating on a buffer.  */
 929       do {
 930         bytepos = pstartbyte;
 931         if (!string_p)
 932           pos = BYTE_TO_CHAR (bytepos);
 933         ch = bidi_fetch_char (bytepos, pos, &disp_pos, &bidi_it->string,
 934                               bidi_it->frame_window_p, &ch_len, &nchars);
 935         type = bidi_get_type (ch, NEUTRAL_DIR);
 936
 937         for (pos += nchars, bytepos += ch_len;
 938              /* NOTE: UAX#9 says to search only for L, AL, or R types
 939                 of characters, and ignore RLE, RLO, LRE, and LRO.
 940                 However, I'm not sure it makes sense to omit those 4;
 941                 should try with and without that to see the effect.  */
 942              (bidi_get_category (type) != STRONG)
 943                || (bidi_ignore_explicit_marks_for_paragraph_level
 944                    && (type == RLE || type == RLO
 945                        || type == LRE || type == LRO));
 946              type = bidi_get_type (ch, NEUTRAL_DIR))
 947           {
 948             if (!string_p
 949                 && type == NEUTRAL_B
 950                 && bidi_at_paragraph_end (pos, bytepos) >= -1)
 951               break;
 952             if (pos >= end)
 953               {
 954                 /* Pretend there's a paragraph separator at end of
 955                    buffer/string.  */
 956                 type = NEUTRAL_B;
 957                 break;
 958               }
 959             /* Fetch next character and advance to get past it.  */
 960             ch = bidi_fetch_char (bytepos, pos, &disp_pos, &bidi_it->string,
 961                                   bidi_it->frame_window_p, &ch_len, &nchars);
 962             pos += nchars;
 963             bytepos += ch_len;
 964           }
 965         if (type == STRONG_R || type == STRONG_AL) /* P3 */
 966           bidi_it->paragraph_dir = R2L;
 967         else if (type == STRONG_L)
 968           bidi_it->paragraph_dir = L2R;
 969         if (!string_p
 970             && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
 971           {
 972             /* If this paragraph is at BEGV, default to L2R.  */
 973             if (pstartbyte == BEGV_BYTE)
 974               bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
 975             else
 976               {
 977                 EMACS_INT prevpbyte = pstartbyte;
 978                 EMACS_INT p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
 979
 980                 /* Find the beginning of the previous paragraph, if any.  */
 981                 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
 982                   {
 983                     /* FXIME: What if p is covered by a display
 984                        string?  See also a FIXME inside
 985                        bidi_find_paragraph_start.  */
 986                     p--;
 987                     pbyte = CHAR_TO_BYTE (p);
 988                     prevpbyte = bidi_find_paragraph_start (p, pbyte);
 989                   }
 990                 pstartbyte = prevpbyte;
 991               }
 992           }
 993       } while (!string_p
 994                && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
 995     }
 996   else
 997     abort ();
 998
 999   /* Contrary to UAX#9 clause P3, we only default the paragraph
1000      direction to L2R if we have no previous usable paragraph
1001      direction.  This is allowed by the HL1 clause.  */
1002   if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
1003     bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
1004   if (bidi_it->paragraph_dir == R2L)
1005     bidi_it->level_stack[0].level = 1;
1006   else
1007     bidi_it->level_stack[0].level = 0;
1008
1009   bidi_line_init (bidi_it);
1010 }
1011
1012 \f
1013 /***********************************************************************
1014                  Resolving explicit and implicit levels.
1015                  The rest of the file constitutes the core
1016                  of the UBA implementation.
1017  ***********************************************************************/
1018
1019 static INLINE int
1020 bidi_explicit_dir_char (int ch)
1021 {
1022   bidi_type_t ch_type;
1023
1024   if (!bidi_initialized)
1025     abort ();
1026   ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
1027   return (ch_type == LRE || ch_type == LRO
1028           || ch_type == RLE || ch_type == RLO
1029           || ch_type == PDF);
1030 }
1031
1032 /* A helper function for bidi_resolve_explicit.  It advances to the
1033    next character in logical order and determines the new embedding
1034    level and directional override, but does not take into account
1035    empty embeddings.  */
1036 static int
1037 bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1038 {
1039   int curchar;
1040   bidi_type_t type;
1041   int current_level;
1042   int new_level;
1043   bidi_dir_t override;
1044   int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
1045
1046   /* If reseat()'ed, don't advance, so as to start iteration from the
1047      position where we were reseated.  bidi_it->bytepos can be less
1048      than BEGV_BYTE after reseat to BEGV.  */
1049   if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE)
1050       || bidi_it->first_elt)
1051     {
1052       bidi_it->first_elt = 0;
1053       if (string_p)
1054         {
1055           const unsigned char *p =
1056             STRINGP (bidi_it->string.lstring)
1057             ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1058
1059           if (bidi_it->charpos < 0)
1060             bidi_it->charpos = 0;
1061           bidi_it->bytepos = bidi_count_bytes (p, 0, 0, bidi_it->charpos);
1062         }
1063       else
1064         {
1065           if (bidi_it->charpos < BEGV)
1066             bidi_it->charpos = BEGV;
1067           bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
1068         }
1069     }
1070   /* Don't move at end of buffer/string.  */
1071   else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
1072     {
1073       /* Advance to the next character, skipping characters covered by
1074          display strings (nchars > 1).  */
1075       if (bidi_it->nchars <= 0)
1076         abort ();
1077       bidi_it->charpos += bidi_it->nchars;
1078       if (bidi_it->ch_len == 0)
1079         abort ();
1080       bidi_it->bytepos += bidi_it->ch_len;
1081     }
1082
1083   current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
1084   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1085   new_level = current_level;
1086
1087   if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
1088     {
1089       curchar = BIDI_EOB;
1090       bidi_it->ch_len = 1;
1091       bidi_it->nchars = 1;
1092       bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
1093     }
1094   else
1095     {
1096       /* Fetch the character at BYTEPOS.  If it is covered by a
1097          display string, treat the entire run of covered characters as
1098          a single character u+FFFC.  */
1099       curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
1100                                  &bidi_it->disp_pos, &bidi_it->string,
1101                                  bidi_it->frame_window_p,
1102                                  &bidi_it->ch_len, &bidi_it->nchars);
1103     }
1104   bidi_it->ch = curchar;
1105
1106   /* Don't apply directional override here, as all the types we handle
1107      below will not be affected by the override anyway, and we need
1108      the original type unaltered.  The override will be applied in
1109      bidi_resolve_weak.  */
1110   type = bidi_get_type (curchar, NEUTRAL_DIR);
1111   bidi_it->orig_type = type;
1112   bidi_check_type (bidi_it->orig_type);
1113
1114   if (type != PDF)
1115     bidi_it->prev_was_pdf = 0;
1116
1117   bidi_it->type_after_w1 = UNKNOWN_BT;
1118
1119   switch (type)
1120     {
1121       case RLE: /* X2 */
1122       case RLO: /* X4 */
1123         bidi_it->type_after_w1 = type;
1124         bidi_check_type (bidi_it->type_after_w1);
1125         type = WEAK_BN; /* X9/Retaining */
1126         if (bidi_it->ignore_bn_limit <= -1)
1127           {
1128             if (current_level <= BIDI_MAXLEVEL - 4)
1129               {
1130                 /* Compute the least odd embedding level greater than
1131                    the current level.  */
1132                 new_level = ((current_level + 1) & ~1) + 1;
1133                 if (bidi_it->type_after_w1 == RLE)
1134                   override = NEUTRAL_DIR;
1135                 else
1136                   override = R2L;
1137                 if (current_level == BIDI_MAXLEVEL - 4)
1138                   bidi_it->invalid_rl_levels = 0;
1139                 bidi_push_embedding_level (bidi_it, new_level, override);
1140               }
1141             else
1142               {
1143                 bidi_it->invalid_levels++;
1144                 /* See the commentary about invalid_rl_levels below.  */
1145                 if (bidi_it->invalid_rl_levels < 0)
1146                   bidi_it->invalid_rl_levels = 0;
1147                 bidi_it->invalid_rl_levels++;
1148               }
1149           }
1150         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1151                  || bidi_it->next_en_pos > bidi_it->charpos)
1152           type = WEAK_EN;
1153         break;
1154       case LRE: /* X3 */
1155       case LRO: /* X5 */
1156         bidi_it->type_after_w1 = type;
1157         bidi_check_type (bidi_it->type_after_w1);
1158         type = WEAK_BN; /* X9/Retaining */
1159         if (bidi_it->ignore_bn_limit <= -1)
1160           {
1161             if (current_level <= BIDI_MAXLEVEL - 5)
1162               {
1163                 /* Compute the least even embedding level greater than
1164                    the current level.  */
1165                 new_level = ((current_level + 2) & ~1);
1166                 if (bidi_it->type_after_w1 == LRE)
1167                   override = NEUTRAL_DIR;
1168                 else
1169                   override = L2R;
1170                 bidi_push_embedding_level (bidi_it, new_level, override);
1171               }
1172             else
1173               {
1174                 bidi_it->invalid_levels++;
1175                 /* invalid_rl_levels counts invalid levels encountered
1176                    while the embedding level was already too high for
1177                    LRE/LRO, but not for RLE/RLO.  That is because
1178                    there may be exactly one PDF which we should not
1179                    ignore even though invalid_levels is non-zero.
1180                    invalid_rl_levels helps to know what PDF is
1181                    that.  */
1182                 if (bidi_it->invalid_rl_levels >= 0)
1183                   bidi_it->invalid_rl_levels++;
1184               }
1185           }
1186         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1187                  || bidi_it->next_en_pos > bidi_it->charpos)
1188           type = WEAK_EN;
1189         break;
1190       case PDF: /* X7 */
1191         bidi_it->type_after_w1 = type;
1192         bidi_check_type (bidi_it->type_after_w1);
1193         type = WEAK_BN; /* X9/Retaining */
1194         if (bidi_it->ignore_bn_limit <= -1)
1195           {
1196             if (!bidi_it->invalid_rl_levels)
1197               {
1198                 new_level = bidi_pop_embedding_level (bidi_it);
1199                 bidi_it->invalid_rl_levels = -1;
1200                 if (bidi_it->invalid_levels)
1201                   bidi_it->invalid_levels--;
1202                 /* else nothing: UAX#9 says to ignore invalid PDFs */
1203               }
1204             if (!bidi_it->invalid_levels)
1205               new_level = bidi_pop_embedding_level (bidi_it);
1206             else
1207               {
1208                 bidi_it->invalid_levels--;
1209                 bidi_it->invalid_rl_levels--;
1210               }
1211           }
1212         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1213                  || bidi_it->next_en_pos > bidi_it->charpos)
1214           type = WEAK_EN;
1215         break;
1216       default:
1217         /* Nothing.  */
1218         break;
1219     }
1220
1221   bidi_it->type = type;
1222   bidi_check_type (bidi_it->type);
1223
1224   return new_level;
1225 }
1226
1227 /* Given an iterator state in BIDI_IT, advance one character position
1228    in the buffer/string to the next character (in the logical order),
1229    resolve any explicit embeddings and directional overrides, and
1230    return the embedding level of the character after resolving
1231    explicit directives and ignoring empty embeddings.  */
1232 static int
1233 bidi_resolve_explicit (struct bidi_it *bidi_it)
1234 {
1235   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1236   int new_level  = bidi_resolve_explicit_1 (bidi_it);
1237   EMACS_INT eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
1238   const unsigned char *s = STRINGP (bidi_it->string.lstring)
1239     ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1240
1241   if (prev_level < new_level
1242       && bidi_it->type == WEAK_BN
1243       && bidi_it->ignore_bn_limit == -1 /* only if not already known */
1244       && bidi_it->charpos < eob         /* not already at EOB */
1245       && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1246                                                    + bidi_it->ch_len, s)))
1247     {
1248       /* Avoid pushing and popping embedding levels if the level run
1249          is empty, as this breaks level runs where it shouldn't.
1250          UAX#9 removes all the explicit embedding and override codes,
1251          so empty embeddings disappear without a trace.  We need to
1252          behave as if we did the same.  */
1253       struct bidi_it saved_it;
1254       int level = prev_level;
1255
1256       bidi_copy_it (&saved_it, bidi_it);
1257
1258       while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1259                                                        + bidi_it->ch_len, s)))
1260         {
1261           /* This advances to the next character, skipping any
1262              characters covered by display strings.  */
1263           level = bidi_resolve_explicit_1 (bidi_it);
1264           /* If string.lstring was relocated inside bidi_resolve_explicit_1,
1265              a pointer to its data is no longer valid.  */
1266           if (STRINGP (bidi_it->string.lstring))
1267             s = SDATA (bidi_it->string.lstring);
1268         }
1269
1270       if (bidi_it->nchars <= 0)
1271         abort ();
1272       if (level == prev_level)  /* empty embedding */
1273         saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
1274       else                      /* this embedding is non-empty */
1275         saved_it.ignore_bn_limit = -2;
1276
1277       bidi_copy_it (bidi_it, &saved_it);
1278       if (bidi_it->ignore_bn_limit > -1)
1279         {
1280           /* We pushed a level, but we shouldn't have.  Undo that. */
1281           if (!bidi_it->invalid_rl_levels)
1282             {
1283               new_level = bidi_pop_embedding_level (bidi_it);
1284               bidi_it->invalid_rl_levels = -1;
1285               if (bidi_it->invalid_levels)
1286                 bidi_it->invalid_levels--;
1287             }
1288           if (!bidi_it->invalid_levels)
1289             new_level = bidi_pop_embedding_level (bidi_it);
1290           else
1291             {
1292               bidi_it->invalid_levels--;
1293               bidi_it->invalid_rl_levels--;
1294             }
1295         }
1296     }
1297
1298   if (bidi_it->type == NEUTRAL_B)       /* X8 */
1299     {
1300       bidi_set_paragraph_end (bidi_it);
1301       /* This is needed by bidi_resolve_weak below, and in L1.  */
1302       bidi_it->type_after_w1 = bidi_it->type;
1303       bidi_check_type (bidi_it->type_after_w1);
1304     }
1305
1306   return new_level;
1307 }
1308
1309 /* Advance in the buffer/string, resolve weak types and return the
1310    type of the next character after weak type resolution.  */
1311 static bidi_type_t
1312 bidi_resolve_weak (struct bidi_it *bidi_it)
1313 {
1314   bidi_type_t type;
1315   bidi_dir_t override;
1316   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1317   int new_level  = bidi_resolve_explicit (bidi_it);
1318   int next_char;
1319   bidi_type_t type_of_next;
1320   struct bidi_it saved_it;
1321   EMACS_INT eob =
1322     (STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
1323     ? bidi_it->string.schars : ZV;
1324
1325   type = bidi_it->type;
1326   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1327
1328   if (type == UNKNOWN_BT
1329       || type == LRE
1330       || type == LRO
1331       || type == RLE
1332       || type == RLO
1333       || type == PDF)
1334     abort ();
1335
1336   if (new_level != prev_level
1337       || bidi_it->type == NEUTRAL_B)
1338     {
1339       /* We've got a new embedding level run, compute the directional
1340          type of sor and initialize per-run variables (UAX#9, clause
1341          X10).  */
1342       bidi_set_sor_type (bidi_it, prev_level, new_level);
1343     }
1344   else if (type == NEUTRAL_S || type == NEUTRAL_WS
1345            || type == WEAK_BN || type == STRONG_AL)
1346     bidi_it->type_after_w1 = type;      /* needed in L1 */
1347   bidi_check_type (bidi_it->type_after_w1);
1348
1349   /* Level and directional override status are already recorded in
1350      bidi_it, and do not need any change; see X6.  */
1351   if (override == R2L)          /* X6 */
1352     type = STRONG_R;
1353   else if (override == L2R)
1354     type = STRONG_L;
1355   else
1356     {
1357       if (type == WEAK_NSM)     /* W1 */
1358         {
1359           /* Note that we don't need to consider the case where the
1360              prev character has its type overridden by an RLO or LRO,
1361              because then either the type of this NSM would have been
1362              also overridden, or the previous character is outside the
1363              current level run, and thus not relevant to this NSM.
1364              This is why NSM gets the type_after_w1 of the previous
1365              character.  */
1366           if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
1367               /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1368               && bidi_it->prev.type_after_w1 != NEUTRAL_B)
1369             type = bidi_it->prev.type_after_w1;
1370           else if (bidi_it->sor == R2L)
1371             type = STRONG_R;
1372           else if (bidi_it->sor == L2R)
1373             type = STRONG_L;
1374           else /* shouldn't happen! */
1375             abort ();
1376         }
1377       if (type == WEAK_EN       /* W2 */
1378           && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1379         type = WEAK_AN;
1380       else if (type == STRONG_AL) /* W3 */
1381         type = STRONG_R;
1382       else if ((type == WEAK_ES /* W4 */
1383                 && bidi_it->prev.type_after_w1 == WEAK_EN
1384                 && bidi_it->prev.orig_type == WEAK_EN)
1385                || (type == WEAK_CS
1386                    && ((bidi_it->prev.type_after_w1 == WEAK_EN
1387                         && bidi_it->prev.orig_type == WEAK_EN)
1388                        || bidi_it->prev.type_after_w1 == WEAK_AN)))
1389         {
1390           const unsigned char *s =
1391             STRINGP (bidi_it->string.lstring)
1392             ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1393
1394           next_char =
1395             bidi_it->charpos + bidi_it->nchars >= eob
1396             ? BIDI_EOB
1397             : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s);
1398           type_of_next = bidi_get_type (next_char, override);
1399
1400           if (type_of_next == WEAK_BN
1401               || bidi_explicit_dir_char (next_char))
1402             {
1403               bidi_copy_it (&saved_it, bidi_it);
1404               while (bidi_resolve_explicit (bidi_it) == new_level
1405                      && bidi_it->type == WEAK_BN)
1406                 ;
1407               type_of_next = bidi_it->type;
1408               bidi_copy_it (bidi_it, &saved_it);
1409             }
1410
1411           /* If the next character is EN, but the last strong-type
1412              character is AL, that next EN will be changed to AN when
1413              we process it in W2 above.  So in that case, this ES
1414              should not be changed into EN.  */
1415           if (type == WEAK_ES
1416               && type_of_next == WEAK_EN
1417               && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1418             type = WEAK_EN;
1419           else if (type == WEAK_CS)
1420             {
1421               if (bidi_it->prev.type_after_w1 == WEAK_AN
1422                   && (type_of_next == WEAK_AN
1423                       /* If the next character is EN, but the last
1424                          strong-type character is AL, EN will be later
1425                          changed to AN when we process it in W2 above.
1426                          So in that case, this ES should not be
1427                          changed into EN.  */
1428                       || (type_of_next == WEAK_EN
1429                           && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1430                 type = WEAK_AN;
1431               else if (bidi_it->prev.type_after_w1 == WEAK_EN
1432                        && type_of_next == WEAK_EN
1433                        && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1434                 type = WEAK_EN;
1435             }
1436         }
1437       else if (type == WEAK_ET  /* W5: ET with EN before or after it */
1438                || type == WEAK_BN)      /* W5/Retaining */
1439         {
1440           if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */
1441               || bidi_it->next_en_pos > bidi_it->charpos)
1442             type = WEAK_EN;
1443           else                  /* W5: ET/BN with EN after it.  */
1444             {
1445               EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars;
1446               const unsigned char *s =
1447                 STRINGP (bidi_it->string.lstring)
1448                 ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1449
1450               if (bidi_it->nchars <= 0)
1451                 abort ();
1452               next_char =
1453                 bidi_it->charpos + bidi_it->nchars >= eob
1454                 ? BIDI_EOB
1455                 : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s);
1456               type_of_next = bidi_get_type (next_char, override);
1457
1458               if (type_of_next == WEAK_ET
1459                   || type_of_next == WEAK_BN
1460                   || bidi_explicit_dir_char (next_char))
1461                 {
1462                   bidi_copy_it (&saved_it, bidi_it);
1463                   while (bidi_resolve_explicit (bidi_it) == new_level
1464                          && (bidi_it->type == WEAK_BN
1465                              || bidi_it->type == WEAK_ET))
1466                     ;
1467                   type_of_next = bidi_it->type;
1468                   en_pos = bidi_it->charpos;
1469                   bidi_copy_it (bidi_it, &saved_it);
1470                 }
1471               if (type_of_next == WEAK_EN)
1472                 {
1473                   /* If the last strong character is AL, the EN we've
1474                      found will become AN when we get to it (W2). */
1475                   if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
1476                     {
1477                       type = WEAK_EN;
1478                       /* Remember this EN position, to speed up processing
1479                          of the next ETs.  */
1480                       bidi_it->next_en_pos = en_pos;
1481                     }
1482                   else if (type == WEAK_BN)
1483                     type = NEUTRAL_ON; /* W6/Retaining */
1484                 }
1485             }
1486         }
1487     }
1488
1489   if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
1490       || (type == WEAK_BN
1491           && (bidi_it->prev.type_after_w1 == WEAK_CS        /* W6/Retaining */
1492               || bidi_it->prev.type_after_w1 == WEAK_ES
1493               || bidi_it->prev.type_after_w1 == WEAK_ET)))
1494     type = NEUTRAL_ON;
1495
1496   /* Store the type we've got so far, before we clobber it with strong
1497      types in W7 and while resolving neutral types.  But leave alone
1498      the original types that were recorded above, because we will need
1499      them for the L1 clause.  */
1500   if (bidi_it->type_after_w1 == UNKNOWN_BT)
1501     bidi_it->type_after_w1 = type;
1502   bidi_check_type (bidi_it->type_after_w1);
1503
1504   if (type == WEAK_EN)  /* W7 */
1505     {
1506       if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
1507           || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1508         type = STRONG_L;
1509     }
1510
1511   bidi_it->type = type;
1512   bidi_check_type (bidi_it->type);
1513   return type;
1514 }
1515
1516 /* Resolve the type of a neutral character according to the type of
1517    surrounding strong text and the current embedding level.  */
1518 static INLINE bidi_type_t
1519 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
1520 {
1521   /* N1: European and Arabic numbers are treated as though they were R.  */
1522   if (next_type == WEAK_EN || next_type == WEAK_AN)
1523     next_type = STRONG_R;
1524   if (prev_type == WEAK_EN || prev_type == WEAK_AN)
1525     prev_type = STRONG_R;
1526
1527   if (next_type == prev_type)   /* N1 */
1528     return next_type;
1529   else if ((lev & 1) == 0)      /* N2 */
1530     return STRONG_L;
1531   else
1532     return STRONG_R;
1533 }
1534
1535 static bidi_type_t
1536 bidi_resolve_neutral (struct bidi_it *bidi_it)
1537 {
1538   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1539   bidi_type_t type = bidi_resolve_weak (bidi_it);
1540   int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1541
1542   if (!(type == STRONG_R
1543         || type == STRONG_L
1544         || type == WEAK_BN
1545         || type == WEAK_EN
1546         || type == WEAK_AN
1547         || type == NEUTRAL_B
1548         || type == NEUTRAL_S
1549         || type == NEUTRAL_WS
1550         || type == NEUTRAL_ON))
1551     abort ();
1552
1553   if (bidi_get_category (type) == NEUTRAL
1554       || (type == WEAK_BN && prev_level == current_level))
1555     {
1556       if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1557         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1558                                        bidi_it->next_for_neutral.type,
1559                                        current_level);
1560       else
1561         {
1562           /* Arrrgh!!  The UAX#9 algorithm is too deeply entrenched in
1563              the assumption of batch-style processing; see clauses W4,
1564              W5, and especially N1, which require to look far forward
1565              (as well as back) in the buffer/string.  May the fleas of
1566              a thousand camels infest the armpits of those who design
1567              supposedly general-purpose algorithms by looking at their
1568              own implementations, and fail to consider other possible
1569              implementations!  */
1570           struct bidi_it saved_it;
1571           bidi_type_t next_type;
1572
1573           if (bidi_it->scan_dir == -1)
1574             abort ();
1575
1576           bidi_copy_it (&saved_it, bidi_it);
1577           /* Scan the text forward until we find the first non-neutral
1578              character, and then use that to resolve the neutral we
1579              are dealing with now.  We also cache the scanned iterator
1580              states, to salvage some of the effort later.  */
1581           bidi_cache_iterator_state (bidi_it, 0);
1582           do {
1583             /* Record the info about the previous character, so that
1584                it will be cached below with this state.  */
1585             if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1586                 && bidi_it->type != WEAK_BN)
1587               bidi_remember_char (&bidi_it->prev, bidi_it);
1588             type = bidi_resolve_weak (bidi_it);
1589             /* Paragraph separators have their levels fully resolved
1590                at this point, so cache them as resolved.  */
1591             bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1592             /* FIXME: implement L1 here, by testing for a newline and
1593                resetting the level for any sequence of whitespace
1594                characters adjacent to it.  */
1595           } while (!(type == NEUTRAL_B
1596                      || (type != WEAK_BN
1597                          && bidi_get_category (type) != NEUTRAL)
1598                      /* This is all per level run, so stop when we
1599                         reach the end of this level run.  */
1600                      || bidi_it->level_stack[bidi_it->stack_idx].level !=
1601                      current_level));
1602
1603           bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1604
1605           switch (type)
1606             {
1607               case STRONG_L:
1608               case STRONG_R:
1609               case STRONG_AL:
1610                 next_type = type;
1611                 break;
1612               case WEAK_EN:
1613               case WEAK_AN:
1614                 /* N1: ``European and Arabic numbers are treated as
1615                    though they were R.''  */
1616                 next_type = STRONG_R;
1617                 saved_it.next_for_neutral.type = STRONG_R;
1618                 break;
1619               case WEAK_BN:
1620                 if (!bidi_explicit_dir_char (bidi_it->ch))
1621                   abort ();             /* can't happen: BNs are skipped */
1622                 /* FALLTHROUGH */
1623               case NEUTRAL_B:
1624                 /* Marched all the way to the end of this level run.
1625                    We need to use the eor type, whose information is
1626                    stored by bidi_set_sor_type in the prev_for_neutral
1627                    member.  */
1628                 if (saved_it.type != WEAK_BN
1629                     || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
1630                   {
1631                     next_type = bidi_it->prev_for_neutral.type;
1632                     saved_it.next_for_neutral.type = next_type;
1633                     bidi_check_type (next_type);
1634                   }
1635                 else
1636                   {
1637                     /* This is a BN which does not adjoin neutrals.
1638                        Leave its type alone.  */
1639                     bidi_copy_it (bidi_it, &saved_it);
1640                     return bidi_it->type;
1641                   }
1642                 break;
1643               default:
1644                 abort ();
1645             }
1646           type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1647                                          next_type, current_level);
1648           saved_it.type = type;
1649           bidi_check_type (type);
1650           bidi_copy_it (bidi_it, &saved_it);
1651         }
1652     }
1653   return type;
1654 }
1655
1656 /* Given an iterator state in BIDI_IT, advance one character position
1657    in the buffer/string to the next character (in the logical order),
1658    resolve the bidi type of that next character, and return that
1659    type.  */
1660 static bidi_type_t
1661 bidi_type_of_next_char (struct bidi_it *bidi_it)
1662 {
1663   bidi_type_t type;
1664
1665   /* This should always be called during a forward scan.  */
1666   if (bidi_it->scan_dir != 1)
1667     abort ();
1668
1669   /* Reset the limit until which to ignore BNs if we step out of the
1670      area where we found only empty levels.  */
1671   if ((bidi_it->ignore_bn_limit > -1
1672        && bidi_it->ignore_bn_limit <= bidi_it->charpos)
1673       || (bidi_it->ignore_bn_limit == -2
1674           && !bidi_explicit_dir_char (bidi_it->ch)))
1675     bidi_it->ignore_bn_limit = -1;
1676
1677   type = bidi_resolve_neutral (bidi_it);
1678
1679   return type;
1680 }
1681
1682 /* Given an iterator state BIDI_IT, advance one character position in
1683    the buffer/string to the next character (in the current scan
1684    direction), resolve the embedding and implicit levels of that next
1685    character, and return the resulting level.  */
1686 static int
1687 bidi_level_of_next_char (struct bidi_it *bidi_it)
1688 {
1689   bidi_type_t type;
1690   int level, prev_level = -1;
1691   struct bidi_saved_info next_for_neutral;
1692   EMACS_INT next_char_pos = -2;
1693
1694   if (bidi_it->scan_dir == 1)
1695     {
1696       EMACS_INT eob =
1697         (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
1698         ? bidi_it->string.schars : ZV;
1699
1700       /* There's no sense in trying to advance if we hit end of text.  */
1701       if (bidi_it->charpos >= eob)
1702         return bidi_it->resolved_level;
1703
1704       /* Record the info about the previous character.  */
1705       if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1706           && bidi_it->type != WEAK_BN)
1707         bidi_remember_char (&bidi_it->prev, bidi_it);
1708       if (bidi_it->type_after_w1 == STRONG_R
1709           || bidi_it->type_after_w1 == STRONG_L
1710           || bidi_it->type_after_w1 == STRONG_AL)
1711         bidi_remember_char (&bidi_it->last_strong, bidi_it);
1712       /* FIXME: it sounds like we don't need both prev and
1713          prev_for_neutral members, but I'm leaving them both for now.  */
1714       if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
1715           || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
1716         bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
1717
1718       /* If we overstepped the characters used for resolving neutrals
1719          and whitespace, invalidate their info in the iterator.  */
1720       if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
1721         bidi_it->next_for_neutral.type = UNKNOWN_BT;
1722       if (bidi_it->next_en_pos >= 0
1723           && bidi_it->charpos >= bidi_it->next_en_pos)
1724         bidi_it->next_en_pos = -1;
1725       if (bidi_it->next_for_ws.type != UNKNOWN_BT
1726           && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
1727         bidi_it->next_for_ws.type = UNKNOWN_BT;
1728
1729       /* This must be taken before we fill the iterator with the info
1730          about the next char.  If we scan backwards, the iterator
1731          state must be already cached, so there's no need to know the
1732          embedding level of the previous character, since we will be
1733          returning to our caller shortly.  */
1734       prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1735     }
1736   next_for_neutral = bidi_it->next_for_neutral;
1737
1738   /* Perhaps the character we want is already cached.  If it is, the
1739      call to bidi_cache_find below will return a type other than
1740      UNKNOWN_BT.  */
1741   if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
1742     {
1743       int bob =
1744         (bidi_it->string.s || STRINGP (bidi_it->string.lstring)) ? 0 : 1;
1745
1746       if (bidi_it->scan_dir > 0)
1747         {
1748           if (bidi_it->nchars <= 0)
1749             abort ();
1750           next_char_pos = bidi_it->charpos + bidi_it->nchars;
1751         }
1752       else if (bidi_it->charpos >= bob)
1753         /* Implementation note: we allow next_char_pos to be as low as
1754            0 for buffers or -1 for strings, and that is okay because
1755            that's the "position" of the sentinel iterator state we
1756            cached at the beginning of the iteration.  */
1757         next_char_pos = bidi_it->charpos - 1;
1758       if (next_char_pos >= bob - 1)
1759         type = bidi_cache_find (next_char_pos, -1, bidi_it);
1760       else
1761         type = UNKNOWN_BT;
1762     }
1763   else
1764     type = UNKNOWN_BT;
1765   if (type != UNKNOWN_BT)
1766     {
1767       /* Don't lose the information for resolving neutrals!  The
1768          cached states could have been cached before their
1769          next_for_neutral member was computed.  If we are on our way
1770          forward, we can simply take the info from the previous
1771          state.  */
1772       if (bidi_it->scan_dir == 1
1773           && bidi_it->next_for_neutral.type == UNKNOWN_BT)
1774         bidi_it->next_for_neutral = next_for_neutral;
1775
1776       /* If resolved_level is -1, it means this state was cached
1777          before it was completely resolved, so we cannot return
1778          it.  */
1779       if (bidi_it->resolved_level != -1)
1780         return bidi_it->resolved_level;
1781     }
1782   if (bidi_it->scan_dir == -1)
1783     /* If we are going backwards, the iterator state is already cached
1784        from previous scans, and should be fully resolved.  */
1785     abort ();
1786
1787   if (type == UNKNOWN_BT)
1788     type = bidi_type_of_next_char (bidi_it);
1789
1790   if (type == NEUTRAL_B)
1791     return bidi_it->resolved_level;
1792
1793   level = bidi_it->level_stack[bidi_it->stack_idx].level;
1794   if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
1795       || (type == WEAK_BN && prev_level == level))
1796     {
1797       if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
1798         abort ();
1799
1800       /* If the cached state shows a neutral character, it was not
1801          resolved by bidi_resolve_neutral, so do it now.  */
1802       type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1803                                      bidi_it->next_for_neutral.type,
1804                                      level);
1805     }
1806
1807   if (!(type == STRONG_R
1808         || type == STRONG_L
1809         || type == WEAK_BN
1810         || type == WEAK_EN
1811         || type == WEAK_AN))
1812     abort ();
1813   bidi_it->type = type;
1814   bidi_check_type (bidi_it->type);
1815
1816   /* For L1 below, we need to know, for each WS character, whether
1817      it belongs to a sequence of WS characters preceding a newline
1818      or a TAB or a paragraph separator.  */
1819   if (bidi_it->orig_type == NEUTRAL_WS
1820       && bidi_it->next_for_ws.type == UNKNOWN_BT)
1821     {
1822       int ch;
1823       EMACS_INT clen = bidi_it->ch_len;
1824       EMACS_INT bpos = bidi_it->bytepos;
1825       EMACS_INT cpos = bidi_it->charpos;
1826       EMACS_INT disp_pos = bidi_it->disp_pos;
1827       EMACS_INT nc = bidi_it->nchars;
1828       struct bidi_string_data bs = bidi_it->string;
1829       bidi_type_t chtype;
1830       int fwp = bidi_it->frame_window_p;
1831
1832       if (bidi_it->nchars <= 0)
1833         abort ();
1834       do {
1835         ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &bs, fwp,
1836                               &clen, &nc);
1837         if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
1838           chtype = NEUTRAL_B;
1839         else
1840           chtype = bidi_get_type (ch, NEUTRAL_DIR);
1841       } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
1842                || bidi_explicit_dir_char (ch)); /* L1/Retaining */
1843       bidi_it->next_for_ws.type = chtype;
1844       bidi_check_type (bidi_it->next_for_ws.type);
1845       bidi_it->next_for_ws.charpos = cpos;
1846       bidi_it->next_for_ws.bytepos = bpos;
1847     }
1848
1849   /* Resolve implicit levels, with a twist: PDFs get the embedding
1850      level of the enbedding they terminate.  See below for the
1851      reason.  */
1852   if (bidi_it->orig_type == PDF
1853       /* Don't do this if this formatting code didn't change the
1854          embedding level due to invalid or empty embeddings.  */
1855       && prev_level != level)
1856     {
1857       /* Don't look in UAX#9 for the reason for this: it's our own
1858          private quirk.  The reason is that we want the formatting
1859          codes to be delivered so that they bracket the text of their
1860          embedding.  For example, given the text
1861
1862              {RLO}teST{PDF}
1863
1864          we want it to be displayed as
1865
1866              {PDF}STet{RLO}
1867
1868          not as
1869
1870              STet{RLO}{PDF}
1871
1872          which will result because we bump up the embedding level as
1873          soon as we see the RLO and pop it as soon as we see the PDF,
1874          so RLO itself has the same embedding level as "teST", and
1875          thus would be normally delivered last, just before the PDF.
1876          The switch below fiddles with the level of PDF so that this
1877          ugly side effect does not happen.
1878
1879          (This is, of course, only important if the formatting codes
1880          are actually displayed, but Emacs does need to display them
1881          if the user wants to.)  */
1882       level = prev_level;
1883     }
1884   else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
1885            || bidi_it->orig_type == NEUTRAL_S
1886            || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
1887            /* || bidi_it->ch == LINESEP_CHAR */
1888            || (bidi_it->orig_type == NEUTRAL_WS
1889                && (bidi_it->next_for_ws.type == NEUTRAL_B
1890                    || bidi_it->next_for_ws.type == NEUTRAL_S)))
1891     level = bidi_it->level_stack[0].level;
1892   else if ((level & 1) == 0) /* I1 */
1893     {
1894       if (type == STRONG_R)
1895         level++;
1896       else if (type == WEAK_EN || type == WEAK_AN)
1897         level += 2;
1898     }
1899   else                  /* I2 */
1900     {
1901       if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
1902         level++;
1903     }
1904
1905   bidi_it->resolved_level = level;
1906   return level;
1907 }
1908
1909 /* Move to the other edge of a level given by LEVEL.  If END_FLAG is
1910    non-zero, we are at the end of a level, and we need to prepare to
1911    resume the scan of the lower level.
1912
1913    If this level's other edge is cached, we simply jump to it, filling
1914    the iterator structure with the iterator state on the other edge.
1915    Otherwise, we walk the buffer or string until we come back to the
1916    same level as LEVEL.
1917
1918    Note: we are not talking here about a ``level run'' in the UAX#9
1919    sense of the term, but rather about a ``level'' which includes
1920    all the levels higher than it.  In other words, given the levels
1921    like this:
1922
1923          11111112222222333333334443343222222111111112223322111
1924                 A      B                    C
1925
1926    and assuming we are at point A scanning left to right, this
1927    function moves to point C, whereas the UAX#9 ``level 2 run'' ends
1928    at point B.  */
1929 static void
1930 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
1931 {
1932   int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
1933   int idx;
1934
1935   /* Try the cache first.  */
1936   if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
1937       >= bidi_cache_start)
1938     bidi_cache_fetch_state (idx, bidi_it);
1939   else
1940     {
1941       int new_level;
1942
1943       if (end_flag)
1944         abort (); /* if we are at end of level, its edges must be cached */
1945
1946       bidi_cache_iterator_state (bidi_it, 1);
1947       do {
1948         new_level = bidi_level_of_next_char (bidi_it);
1949         bidi_cache_iterator_state (bidi_it, 1);
1950       } while (new_level >= level);
1951     }
1952 }
1953
1954 void
1955 bidi_move_to_visually_next (struct bidi_it *bidi_it)
1956 {
1957   int old_level, new_level, next_level;
1958   struct bidi_it sentinel;
1959   struct gcpro gcpro1;
1960
1961   if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
1962     abort ();
1963
1964   if (bidi_it->scan_dir == 0)
1965     {
1966       bidi_it->scan_dir = 1;    /* default to logical order */
1967     }
1968
1969   /* The code below can call eval, and thus cause GC.  If we are
1970      iterating a Lisp string, make sure it won't GCed.  */
1971   if (STRINGP (bidi_it->string.lstring))
1972     GCPRO1 (bidi_it->string.lstring);
1973
1974   /* If we just passed a newline, initialize for the next line.  */
1975   if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B)
1976     bidi_line_init (bidi_it);
1977
1978   /* Prepare the sentinel iterator state, and cache it.  When we bump
1979      into it, scanning backwards, we'll know that the last non-base
1980      level is exhausted.  */
1981   if (bidi_cache_idx == bidi_cache_start)
1982     {
1983       bidi_copy_it (&sentinel, bidi_it);
1984       if (bidi_it->first_elt)
1985         {
1986           sentinel.charpos--;   /* cached charpos needs to be monotonic */
1987           sentinel.bytepos--;
1988           sentinel.ch = '\n';   /* doesn't matter, but why not? */
1989           sentinel.ch_len = 1;
1990           sentinel.nchars = 1;
1991         }
1992       bidi_cache_iterator_state (&sentinel, 1);
1993     }
1994
1995   old_level = bidi_it->resolved_level;
1996   new_level = bidi_level_of_next_char (bidi_it);
1997
1998   /* Reordering of resolved levels (clause L2) is implemented by
1999      jumping to the other edge of the level and flipping direction of
2000      scanning the text whenever we find a level change.  */
2001   if (new_level != old_level)
2002     {
2003       int ascending = new_level > old_level;
2004       int level_to_search = ascending ? old_level + 1 : old_level;
2005       int incr = ascending ? 1 : -1;
2006       int expected_next_level = old_level + incr;
2007
2008       /* Jump (or walk) to the other edge of this level.  */
2009       bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2010       /* Switch scan direction and peek at the next character in the
2011          new direction.  */
2012       bidi_it->scan_dir = -bidi_it->scan_dir;
2013
2014       /* The following loop handles the case where the resolved level
2015          jumps by more than one.  This is typical for numbers inside a
2016          run of text with left-to-right embedding direction, but can
2017          also happen in other situations.  In those cases the decision
2018          where to continue after a level change, and in what direction,
2019          is tricky.  For example, given a text like below:
2020
2021                   abcdefgh
2022                   11336622
2023
2024          (where the numbers below the text show the resolved levels),
2025          the result of reordering according to UAX#9 should be this:
2026
2027                   efdcghba
2028
2029          This is implemented by the loop below which flips direction
2030          and jumps to the other edge of the level each time it finds
2031          the new level not to be the expected one.  The expected level
2032          is always one more or one less than the previous one.  */
2033       next_level = bidi_peek_at_next_level (bidi_it);
2034       while (next_level != expected_next_level)
2035         {
2036           expected_next_level += incr;
2037           level_to_search += incr;
2038           bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2039           bidi_it->scan_dir = -bidi_it->scan_dir;
2040           next_level = bidi_peek_at_next_level (bidi_it);
2041         }
2042
2043       /* Finally, deliver the next character in the new direction.  */
2044       next_level = bidi_level_of_next_char (bidi_it);
2045     }
2046
2047   /* Take note when we have just processed the newline that precedes
2048      the end of the paragraph.  The next time we are about to be
2049      called, set_iterator_to_next will automatically reinit the
2050      paragraph direction, if needed.  We do this at the newline before
2051      the paragraph separator, because the next character might not be
2052      the first character of the next paragraph, due to the bidi
2053      reordering, whereas we _must_ know the paragraph base direction
2054      _before_ we process the paragraph's text, since the base
2055      direction affects the reordering.  */
2056   if (bidi_it->scan_dir == 1 && bidi_it->orig_type == NEUTRAL_B)
2057     {
2058       /* The paragraph direction of the entire string, once
2059          determined, is in effect for the entire string.  Setting the
2060          separator limit to the end of the string prevents
2061          bidi_paragraph_init from being called automatically on this
2062          string.  */
2063       if (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2064         bidi_it->separator_limit = bidi_it->string.schars;
2065       else if (bidi_it->bytepos < ZV_BYTE)
2066         {
2067           EMACS_INT sep_len =
2068             bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
2069                                    bidi_it->bytepos + bidi_it->ch_len);
2070           if (bidi_it->nchars <= 0)
2071             abort ();
2072           if (sep_len >= 0)
2073             {
2074               bidi_it->new_paragraph = 1;
2075               /* Record the buffer position of the last character of the
2076                  paragraph separator.  */
2077               bidi_it->separator_limit =
2078                 bidi_it->charpos + bidi_it->nchars + sep_len;
2079             }
2080         }
2081     }
2082
2083   if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start)
2084     {
2085       /* If we are at paragraph's base embedding level and beyond the
2086          last cached position, the cache's job is done and we can
2087          discard it.  */
2088       if (bidi_it->resolved_level == bidi_it->level_stack[0].level
2089           && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
2090                                  + bidi_cache[bidi_cache_idx - 1].nchars - 1))
2091         bidi_cache_reset ();
2092         /* But as long as we are caching during forward scan, we must
2093            cache each state, or else the cache integrity will be
2094            compromised: it assumes cached states correspond to buffer
2095            positions 1:1.  */
2096       else
2097         bidi_cache_iterator_state (bidi_it, 1);
2098     }
2099
2100   if (STRINGP (bidi_it->string.lstring))
2101     UNGCPRO;
2102 }
2103
2104 /* This is meant to be called from within the debugger, whenever you
2105    wish to examine the cache contents.  */
2106 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
2107 void
2108 bidi_dump_cached_states (void)
2109 {
2110   int i;
2111   int ndigits = 1;
2112
2113   if (bidi_cache_idx == 0)
2114     {
2115       fprintf (stderr, "The cache is empty.\n");
2116       return;
2117     }
2118   fprintf (stderr, "Total of %d state%s in cache:\n",
2119            bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
2120
2121   for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
2122     ndigits++;
2123   fputs ("ch  ", stderr);
2124   for (i = 0; i < bidi_cache_idx; i++)
2125     fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
2126   fputs ("\n", stderr);
2127   fputs ("lvl ", stderr);
2128   for (i = 0; i < bidi_cache_idx; i++)
2129     fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
2130   fputs ("\n", stderr);
2131   fputs ("pos ", stderr);
2132   for (i = 0; i < bidi_cache_idx; i++)
2133     fprintf (stderr, "%*"pI"d", ndigits, bidi_cache[i].charpos);
2134   fputs ("\n", stderr);
2135 }