code.delx.au - gnu-emacs/blob - src/bidi.c

   1 /* Low-level bidirectional buffer-scanning functions for GNU Emacs.
   2    Copyright (C) 2000-2001, 2004-2005, 2009-2011
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software: you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation, either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 /* Written by Eli Zaretskii <eliz@gnu.org>.
  21
  22    A sequential implementation of the Unicode Bidirectional algorithm,
  23    as per UAX#9, a part of the Unicode Standard.
  24
  25    Unlike the reference and most other implementations, this one is
  26    designed to be called once for every character in the buffer or
  27    string.
  28
  29    The main entry point is bidi_move_to_visually_next.  Each time it
  30    is called, it finds the next character in the visual order, and
  31    returns its information in a special structure.  The caller is then
  32    expected to process this character for display or any other
  33    purposes, and call bidi_move_to_visually_next for the next
  34    character.  See the comments in bidi_move_to_visually_next for more
  35    details about its algorithm that finds the next visual-order
  36    character by resolving their levels on the fly.
  37
  38    The two other entry points are bidi_paragraph_init and
  39    bidi_mirror_char.  The first determines the base direction of a
  40    paragraph, while the second returns the mirrored version of its
  41    argument character.
  42
  43    If you want to understand the code, you will have to read it
  44    together with the relevant portions of UAX#9.  The comments include
  45    references to UAX#9 rules, for that very reason.
  46
  47    A note about references to UAX#9 rules: if the reference says
  48    something like "X9/Retaining", it means that you need to refer to
  49    rule X9 and to its modifications decribed in the "Implementation
  50    Notes" section of UAX#9, under "Retaining Format Codes".  */
  51
  52 #include <config.h>
  53 #include <stdio.h>
  54 #include <setjmp.h>
  55
  56 #include "lisp.h"
  57 #include "buffer.h"
  58 #include "character.h"
  59 #include "dispextern.h"
  60
  61 static int bidi_initialized = 0;
  62
  63 static Lisp_Object bidi_type_table, bidi_mirror_table;
  64
  65 #define LRM_CHAR   0x200E
  66 #define RLM_CHAR   0x200F
  67 #define BIDI_EOB   -1
  68
  69 /* Data type for describing the bidirectional character categories.  */
  70 typedef enum {
  71   UNKNOWN_BC,
  72   NEUTRAL,
  73   WEAK,
  74   STRONG
  75 } bidi_category_t;
  76
  77 extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE;
  78 int bidi_ignore_explicit_marks_for_paragraph_level = 1;
  79
  80 static Lisp_Object paragraph_start_re, paragraph_separate_re;
  81 static Lisp_Object Qparagraph_start, Qparagraph_separate;
  82
  83 static void
  84 bidi_initialize (void)
  85 {
  86
  87 #include "biditype.h"
  88 #include "bidimirror.h"
  89
  90   int i;
  91
  92   bidi_type_table = Fmake_char_table (Qnil, make_number (STRONG_L));
  93   staticpro (&bidi_type_table);
  94
  95   for (i = 0; i < sizeof bidi_type / sizeof bidi_type[0]; i++)
  96     char_table_set_range (bidi_type_table, bidi_type[i].from, bidi_type[i].to,
  97                           make_number (bidi_type[i].type));
  98
  99   bidi_mirror_table = Fmake_char_table (Qnil, Qnil);
 100   staticpro (&bidi_mirror_table);
 101
 102   for (i = 0; i < sizeof bidi_mirror / sizeof bidi_mirror[0]; i++)
 103     char_table_set (bidi_mirror_table, bidi_mirror[i].from,
 104                     make_number (bidi_mirror[i].to));
 105
 106   Qparagraph_start = intern ("paragraph-start");
 107   staticpro (&Qparagraph_start);
 108   paragraph_start_re = Fsymbol_value (Qparagraph_start);
 109   if (!STRINGP (paragraph_start_re))
 110     paragraph_start_re = build_string ("\f\\|[ \t]*$");
 111   staticpro (&paragraph_start_re);
 112   Qparagraph_separate = intern ("paragraph-separate");
 113   staticpro (&Qparagraph_separate);
 114   paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
 115   if (!STRINGP (paragraph_separate_re))
 116     paragraph_separate_re = build_string ("[ \t\f]*$");
 117   staticpro (&paragraph_separate_re);
 118   bidi_initialized = 1;
 119 }
 120
 121 /* Return the bidi type of a character CH, subject to the current
 122    directional OVERRIDE.  */
 123 static INLINE bidi_type_t
 124 bidi_get_type (int ch, bidi_dir_t override)
 125 {
 126   bidi_type_t default_type;
 127
 128   if (ch == BIDI_EOB)
 129     return NEUTRAL_B;
 130   if (ch < 0 || ch > MAX_CHAR)
 131     abort ();
 132
 133   default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
 134
 135   if (override == NEUTRAL_DIR)
 136     return default_type;
 137
 138   switch (default_type)
 139     {
 140       /* Although UAX#9 does not tell, it doesn't make sense to
 141          override NEUTRAL_B and LRM/RLM characters.  */
 142       case NEUTRAL_B:
 143       case LRE:
 144       case LRO:
 145       case RLE:
 146       case RLO:
 147       case PDF:
 148         return default_type;
 149       default:
 150         switch (ch)
 151           {
 152             case LRM_CHAR:
 153             case RLM_CHAR:
 154               return default_type;
 155             default:
 156               if (override == L2R) /* X6 */
 157                 return STRONG_L;
 158               else if (override == R2L)
 159                 return STRONG_R;
 160               else
 161                 abort ();       /* can't happen: handled above */
 162           }
 163     }
 164 }
 165
 166 static void
 167 bidi_check_type (bidi_type_t type)
 168 {
 169   if (type < UNKNOWN_BT || type > NEUTRAL_ON)
 170     abort ();
 171 }
 172
 173 /* Given a bidi TYPE of a character, return its category.  */
 174 static INLINE bidi_category_t
 175 bidi_get_category (bidi_type_t type)
 176 {
 177   switch (type)
 178     {
 179       case UNKNOWN_BT:
 180         return UNKNOWN_BC;
 181       case STRONG_L:
 182       case STRONG_R:
 183       case STRONG_AL:
 184       case LRE:
 185       case LRO:
 186       case RLE:
 187       case RLO:
 188         return STRONG;
 189       case PDF:         /* ??? really?? */
 190       case WEAK_EN:
 191       case WEAK_ES:
 192       case WEAK_ET:
 193       case WEAK_AN:
 194       case WEAK_CS:
 195       case WEAK_NSM:
 196       case WEAK_BN:
 197         return WEAK;
 198       case NEUTRAL_B:
 199       case NEUTRAL_S:
 200       case NEUTRAL_WS:
 201       case NEUTRAL_ON:
 202         return NEUTRAL;
 203       default:
 204         abort ();
 205     }
 206 }
 207
 208 /* Return the mirrored character of C, if it has one.  If C has no
 209    mirrored counterpart, return C.
 210    Note: The conditions in UAX#9 clause L4 regarding the surrounding
 211    context must be tested by the caller.  */
 212 int
 213 bidi_mirror_char (int c)
 214 {
 215   Lisp_Object val;
 216
 217   if (c == BIDI_EOB)
 218     return c;
 219   if (c < 0 || c > MAX_CHAR)
 220     abort ();
 221
 222   val = CHAR_TABLE_REF (bidi_mirror_table, c);
 223   if (INTEGERP (val))
 224     {
 225       int v = XINT (val);
 226
 227       if (v < 0 || v > MAX_CHAR)
 228         abort ();
 229
 230       return v;
 231     }
 232
 233   return c;
 234 }
 235
 236 /* Copy the bidi iterator from FROM to TO.  To save cycles, this only
 237    copies the part of the level stack that is actually in use.  */
 238 static INLINE void
 239 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
 240 {
 241   int i;
 242
 243   /* Copy everything except the level stack and beyond.  */
 244   memcpy (to, from, offsetof (struct bidi_it, level_stack[0]));
 245
 246   /* Copy the active part of the level stack.  */
 247   to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
 248   for (i = 1; i <= from->stack_idx; i++)
 249     to->level_stack[i] = from->level_stack[i];
 250 }
 251
 252 /* Caching the bidi iterator states.  */
 253
 254 #define BIDI_CACHE_CHUNK 200
 255 static struct bidi_it *bidi_cache;
 256 static size_t bidi_cache_size = 0;
 257 static size_t elsz = sizeof (struct bidi_it);
 258 static EMACS_INT bidi_cache_idx;        /* next unused cache slot */
 259 static EMACS_INT bidi_cache_last_idx;   /* slot of last cache hit */
 260 static EMACS_INT bidi_cache_start = 0;  /* start of cache for this
 261                                            "stack" level */
 262
 263 /* Reset the cache state to the empty state.  We only reset the part
 264    of the cache relevant to iteration of the current object.  Previous
 265    objects, which are pushed on the display iterator's stack, are left
 266    intact.  This is called when the cached information is no more
 267    useful for the current iteration, e.g. when we were reseated to a
 268    new position on the same object.  */
 269 static INLINE void
 270 bidi_cache_reset (void)
 271 {
 272   bidi_cache_idx = bidi_cache_start;
 273   bidi_cache_last_idx = -1;
 274 }
 275
 276 /* Shrink the cache to its minimal size.  Called when we init the bidi
 277    iterator for reordering a buffer or a string that does not come
 278    from display properties, because that means all the previously
 279    cached info is of no further use.  */
 280 static INLINE void
 281 bidi_cache_shrink (void)
 282 {
 283   if (bidi_cache_size > BIDI_CACHE_CHUNK)
 284     {
 285       bidi_cache_size = BIDI_CACHE_CHUNK;
 286       bidi_cache =
 287         (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
 288     }
 289   bidi_cache_reset ();
 290 }
 291
 292 static INLINE void
 293 bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it)
 294 {
 295   int current_scan_dir = bidi_it->scan_dir;
 296
 297   if (idx < bidi_cache_start || idx >= bidi_cache_idx)
 298     abort ();
 299
 300   bidi_copy_it (bidi_it, &bidi_cache[idx]);
 301   bidi_it->scan_dir = current_scan_dir;
 302   bidi_cache_last_idx = idx;
 303 }
 304
 305 /* Find a cached state with a given CHARPOS and resolved embedding
 306    level less or equal to LEVEL.  if LEVEL is -1, disregard the
 307    resolved levels in cached states.  DIR, if non-zero, means search
 308    in that direction from the last cache hit.  */
 309 static INLINE int
 310 bidi_cache_search (EMACS_INT charpos, int level, int dir)
 311 {
 312   int i, i_start;
 313
 314   if (bidi_cache_idx)
 315     {
 316       if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
 317         {
 318           dir = -1;
 319           i_start = bidi_cache_last_idx - 1;
 320         }
 321       else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
 322                           + bidi_cache[bidi_cache_last_idx].nchars - 1))
 323         {
 324           dir = 1;
 325           i_start = bidi_cache_last_idx + 1;
 326         }
 327       else if (dir)
 328         i_start = bidi_cache_last_idx;
 329       else
 330         {
 331           dir = -1;
 332           i_start = bidi_cache_idx - 1;
 333         }
 334
 335       if (dir < 0)
 336         {
 337           /* Linear search for now; FIXME!  */
 338           for (i = i_start; i >= bidi_cache_start; i--)
 339             if (bidi_cache[i].charpos <= charpos
 340                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 341                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 342               return i;
 343         }
 344       else
 345         {
 346           for (i = i_start; i < bidi_cache_idx; i++)
 347             if (bidi_cache[i].charpos <= charpos
 348                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 349                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 350               return i;
 351         }
 352     }
 353
 354   return -1;
 355 }
 356
 357 /* Find a cached state where the resolved level changes to a value
 358    that is lower than LEVEL, and return its cache slot index.  DIR is
 359    the direction to search, starting with the last used cache slot.
 360    If DIR is zero, we search backwards from the last occupied cache
 361    slot.  BEFORE, if non-zero, means return the index of the slot that
 362    is ``before'' the level change in the search direction.  That is,
 363    given the cached levels like this:
 364
 365          1122333442211
 366           AB        C
 367
 368    and assuming we are at the position cached at the slot marked with
 369    C, searching backwards (DIR = -1) for LEVEL = 2 will return the
 370    index of slot B or A, depending whether BEFORE is, respectively,
 371    non-zero or zero.  */
 372 static int
 373 bidi_cache_find_level_change (int level, int dir, int before)
 374 {
 375   if (bidi_cache_idx)
 376     {
 377       int i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
 378       int incr = before ? 1 : 0;
 379
 380       if (!dir)
 381         dir = -1;
 382       else if (!incr)
 383         i += dir;
 384
 385       if (dir < 0)
 386         {
 387           while (i >= bidi_cache_start + incr)
 388             {
 389               if (bidi_cache[i - incr].resolved_level >= 0
 390                   && bidi_cache[i - incr].resolved_level < level)
 391                 return i;
 392               i--;
 393             }
 394         }
 395       else
 396         {
 397           while (i < bidi_cache_idx - incr)
 398             {
 399               if (bidi_cache[i + incr].resolved_level >= 0
 400                   && bidi_cache[i + incr].resolved_level < level)
 401                 return i;
 402               i++;
 403             }
 404         }
 405     }
 406
 407   return -1;
 408 }
 409
 410 static INLINE void
 411 bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
 412 {
 413   int idx;
 414
 415   /* We should never cache on backward scans.  */
 416   if (bidi_it->scan_dir == -1)
 417     abort ();
 418   idx = bidi_cache_search (bidi_it->charpos, -1, 1);
 419
 420   if (idx < 0)
 421     {
 422       idx = bidi_cache_idx;
 423       /* Enlarge the cache as needed.  */
 424       if (idx >= bidi_cache_size)
 425         {
 426           bidi_cache_size += BIDI_CACHE_CHUNK;
 427           bidi_cache =
 428             (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
 429         }
 430       /* Character positions should correspond to cache positions 1:1.
 431          If we are outside the range of cached positions, the cache is
 432          useless and must be reset.  */
 433       if (idx > bidi_cache_start &&
 434           (bidi_it->charpos > (bidi_cache[idx - 1].charpos
 435                                + bidi_cache[idx - 1].nchars)
 436            || bidi_it->charpos < bidi_cache[0].charpos))
 437         {
 438           bidi_cache_reset ();
 439           idx = bidi_cache_start;
 440         }
 441       if (bidi_it->nchars <= 0)
 442         abort ();
 443       bidi_copy_it (&bidi_cache[idx], bidi_it);
 444       if (!resolved)
 445         bidi_cache[idx].resolved_level = -1;
 446     }
 447   else
 448     {
 449       /* Copy only the members which could have changed, to avoid
 450          costly copying of the entire struct.  */
 451       bidi_cache[idx].type = bidi_it->type;
 452       bidi_check_type (bidi_it->type);
 453       bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
 454       bidi_check_type (bidi_it->type_after_w1);
 455       if (resolved)
 456         bidi_cache[idx].resolved_level = bidi_it->resolved_level;
 457       else
 458         bidi_cache[idx].resolved_level = -1;
 459       bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
 460       bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
 461       bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
 462       bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
 463       bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
 464     }
 465
 466   bidi_cache_last_idx = idx;
 467   if (idx >= bidi_cache_idx)
 468     bidi_cache_idx = idx + 1;
 469 }
 470
 471 static INLINE bidi_type_t
 472 bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it)
 473 {
 474   int i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
 475
 476   if (i >= bidi_cache_start)
 477     {
 478       bidi_dir_t current_scan_dir = bidi_it->scan_dir;
 479
 480       bidi_copy_it (bidi_it, &bidi_cache[i]);
 481       bidi_cache_last_idx = i;
 482       /* Don't let scan direction from from the cached state override
 483          the current scan direction.  */
 484       bidi_it->scan_dir = current_scan_dir;
 485       return bidi_it->type;
 486     }
 487
 488   return UNKNOWN_BT;
 489 }
 490
 491 static INLINE int
 492 bidi_peek_at_next_level (struct bidi_it *bidi_it)
 493 {
 494   if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1)
 495     abort ();
 496   return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
 497 }
 498
 499 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
 500    Value is the non-negative length of the paragraph separator
 501    following the buffer position, -1 if position is at the beginning
 502    of a new paragraph, or -2 if position is neither at beginning nor
 503    at end of a paragraph.  */
 504 static EMACS_INT
 505 bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
 506 {
 507   Lisp_Object sep_re;
 508   Lisp_Object start_re;
 509   EMACS_INT val;
 510
 511   sep_re = paragraph_separate_re;
 512   start_re = paragraph_start_re;
 513
 514   val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
 515   if (val < 0)
 516     {
 517       if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
 518         val = -1;
 519       else
 520         val = -2;
 521     }
 522
 523   return val;
 524 }
 525
 526 /* Determine the start-of-run (sor) directional type given the two
 527    embedding levels on either side of the run boundary.  Also, update
 528    the saved info about previously seen characters, since that info is
 529    generally valid for a single level run.  */
 530 static INLINE void
 531 bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
 532 {
 533   int higher_level = level_before > level_after ? level_before : level_after;
 534
 535   /* The prev_was_pdf gork is required for when we have several PDFs
 536      in a row.  In that case, we want to compute the sor type for the
 537      next level run only once: when we see the first PDF.  That's
 538      because the sor type depends only on the higher of the two levels
 539      that we find on the two sides of the level boundary (see UAX#9,
 540      clause X10), and so we don't need to know the final embedding
 541      level to which we descend after processing all the PDFs.  */
 542   if (!bidi_it->prev_was_pdf || level_before < level_after)
 543     /* FIXME: should the default sor direction be user selectable?  */
 544     bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
 545   if (level_before > level_after)
 546     bidi_it->prev_was_pdf = 1;
 547
 548   bidi_it->prev.type = UNKNOWN_BT;
 549   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
 550     bidi_it->last_strong.orig_type = UNKNOWN_BT;
 551   bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L;
 552   bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
 553   bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
 554   bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 =
 555     bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 556   bidi_it->ignore_bn_limit = -1; /* meaning it's unknown */
 557 }
 558
 559 /* Perform initializations for reordering a new line of bidi text.  */
 560 static void
 561 bidi_line_init (struct bidi_it *bidi_it)
 562 {
 563   bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
 564   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 565   bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
 566   bidi_it->invalid_levels = 0;
 567   bidi_it->invalid_rl_levels = -1;
 568   bidi_it->next_en_pos = -1;
 569   bidi_it->next_for_ws.type = UNKNOWN_BT;
 570   bidi_set_sor_type (bidi_it,
 571                      bidi_it->paragraph_dir == R2L ? 1 : 0,
 572                      bidi_it->level_stack[0].level); /* X10 */
 573
 574   bidi_cache_reset ();
 575 }
 576
 577 /* Count bytes in multibyte string S between BEG/BEGBYTE and END.  BEG
 578    and END are zero-based character positions in S, BEGBYTE is byte
 579    position corresponding to BEG.  */
 580 static inline EMACS_INT
 581 bidi_count_bytes (const unsigned char *s, const EMACS_INT beg,
 582                   const EMACS_INT begbyte, const EMACS_INT end)
 583 {
 584   EMACS_INT pos = beg;
 585   const unsigned char *p = s + begbyte, *start = p;
 586
 587   if (!CHAR_HEAD_P (*p))
 588     abort ();
 589
 590   while (pos < end)
 591     {
 592       p += BYTES_BY_CHAR_HEAD (*p);
 593       pos++;
 594     }
 595
 596   return p - start;
 597 }
 598
 599 /* Fetch and returns the character at byte position BYTEPOS.  If S is
 600    non-NULL, fetch the character from string S; otherwise fetch the
 601    character from the current buffer.  */
 602 static inline int
 603 bidi_char_at_pos (EMACS_INT bytepos, const unsigned char *s)
 604 {
 605   if (s)
 606     return STRING_CHAR (s + bytepos);
 607   else
 608     return FETCH_MULTIBYTE_CHAR (bytepos);
 609 }
 610
 611 /* Fetch and return the character at BYTEPOS/CHARPOS.  If that
 612    character is covered by a display string, treat the entire run of
 613    covered characters as a single character u+FFFC, and return their
 614    combined length in CH_LEN and NCHARS.  DISP_POS specifies the
 615    character position of the next display string, or -1 if not yet
 616    computed.  When the next character is at or beyond that position,
 617    the function updates DISP_POS with the position of the next display
 618    string.  STRING->s is the C string to iterate, or NULL if iterating
 619    over a buffer or a Lisp string; in the latter case, STRING->lstring
 620    is the Lisp string.  */
 621 static inline int
 622 bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
 623                  struct bidi_string_data *string,
 624                  int frame_window_p, EMACS_INT *ch_len, EMACS_INT *nchars)
 625 {
 626   int ch;
 627   EMACS_INT endpos =
 628     (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
 629   struct text_pos pos;
 630
 631   /* If we got past the last known position of display string, compute
 632      the position of the next one.  That position could be at CHARPOS.  */
 633   if (charpos < endpos && charpos > *disp_pos)
 634     {
 635       SET_TEXT_POS (pos, charpos, bytepos);
 636       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p);
 637     }
 638
 639   /* Fetch the character at BYTEPOS.  */
 640   if (charpos >= endpos)
 641     {
 642       ch = BIDI_EOB;
 643       *ch_len = 1;
 644       *nchars = 1;
 645       *disp_pos = endpos;
 646     }
 647   else if (charpos >= *disp_pos)
 648     {
 649       EMACS_INT disp_end_pos;
 650
 651       /* We don't expect to find ourselves in the middle of a display
 652          property.  Hopefully, it will never be needed.  */
 653       if (charpos > *disp_pos)
 654         abort ();
 655       /* Return the Unicode Object Replacement Character to represent
 656          the entire run of characters covered by the display string.  */
 657       ch = 0xFFFC;
 658       disp_end_pos = compute_display_string_end (*disp_pos, string);
 659       *nchars = disp_end_pos - *disp_pos;
 660       if (string->s)
 661         *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
 662                                     disp_end_pos);
 663       else if (STRINGP (string->lstring))
 664         *ch_len = bidi_count_bytes (SDATA (string->lstring), *disp_pos,
 665                                     bytepos, disp_end_pos);
 666       else
 667         *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
 668     }
 669   else
 670     {
 671       if (string->s)
 672         {
 673           EMACS_INT len;
 674
 675           ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
 676           *ch_len = len;
 677         }
 678       else if (STRINGP (string->lstring))
 679         {
 680           EMACS_INT len;
 681
 682           ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos, len);
 683           *ch_len = len;
 684         }
 685       else
 686         {
 687           ch = FETCH_MULTIBYTE_CHAR (bytepos);
 688           *ch_len = CHAR_BYTES (ch);
 689         }
 690       *nchars = 1;
 691     }
 692
 693   /* If we just entered a run of characters covered by a display
 694      string, compute the position of the next display string.  */
 695   if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos)
 696     {
 697       SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
 698       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p);
 699     }
 700
 701   return ch;
 702 }
 703
 704 /* Find the beginning of this paragraph by looking back in the buffer.
 705    Value is the byte position of the paragraph's beginning.  */
 706 static EMACS_INT
 707 bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
 708 {
 709   Lisp_Object re = paragraph_start_re;
 710   EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
 711
 712   while (pos_byte > BEGV_BYTE
 713          && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
 714     {
 715       /* FIXME: What if the paragraph beginning is covered by a
 716          display string?  And what if a display string covering some
 717          of the text over which we scan back includes
 718          paragraph_start_re?  */
 719       pos = find_next_newline_no_quit (pos - 1, -1);
 720       pos_byte = CHAR_TO_BYTE (pos);
 721     }
 722   return pos_byte;
 723 }
 724
 725 /* Determine the base direction, a.k.a. base embedding level, of the
 726    paragraph we are about to iterate through.  If DIR is either L2R or
 727    R2L, just use that.  Otherwise, determine the paragraph direction
 728    from the first strong directional character of the paragraph.
 729
 730    NO_DEFAULT_P non-zero means don't default to L2R if the paragraph
 731    has no strong directional characters and both DIR and
 732    bidi_it->paragraph_dir are NEUTRAL_DIR.  In that case, search back
 733    in the buffer until a paragraph is found with a strong character,
 734    or until hitting BEGV.  In the latter case, fall back to L2R.  This
 735    flag is used in current-bidi-paragraph-direction.
 736
 737    Note that this function gives the paragraph separator the same
 738    direction as the preceding paragraph, even though Emacs generally
 739    views the separartor as not belonging to any paragraph.  */
 740 void
 741 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
 742 {
 743   EMACS_INT bytepos = bidi_it->bytepos;
 744   int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
 745   EMACS_INT pstartbyte;
 746   /* Note that begbyte is a byte position, while end is a character
 747      position.  Yes, this is ugly, but we are trying to avoid costly
 748      calls to BYTE_TO_CHAR and its ilk.  */
 749   EMACS_INT begbyte = string_p ? 0 : BEGV_BYTE;
 750   EMACS_INT end = string_p ? bidi_it->string.schars : ZV;
 751
 752   /* Special case for an empty buffer. */
 753   if (bytepos == begbyte && bidi_it->charpos == end)
 754     dir = L2R;
 755   /* We should never be called at EOB or before BEGV.  */
 756   else if (bidi_it->charpos >= end || bytepos < begbyte)
 757     abort ();
 758
 759   if (dir == L2R)
 760     {
 761       bidi_it->paragraph_dir = L2R;
 762       bidi_it->new_paragraph = 0;
 763     }
 764   else if (dir == R2L)
 765     {
 766       bidi_it->paragraph_dir = R2L;
 767       bidi_it->new_paragraph = 0;
 768     }
 769   else if (dir == NEUTRAL_DIR)  /* P2 */
 770     {
 771       int ch;
 772       EMACS_INT ch_len, nchars;
 773       EMACS_INT pos, disp_pos = -1;
 774       bidi_type_t type;
 775       const unsigned char *s;
 776
 777       if (!bidi_initialized)
 778         bidi_initialize ();
 779
 780       /* If we are inside a paragraph separator, we are just waiting
 781          for the separator to be exhausted; use the previous paragraph
 782          direction.  But don't do that if we have been just reseated,
 783          because we need to reinitialize below in that case.  */
 784       if (!bidi_it->first_elt
 785           && bidi_it->charpos < bidi_it->separator_limit)
 786         return;
 787
 788       /* If we are on a newline, get past it to where the next
 789          paragraph might start.  But don't do that at BEGV since then
 790          we are potentially in a new paragraph that doesn't yet
 791          exist.  */
 792       pos = bidi_it->charpos;
 793       s = STRINGP (bidi_it->string.lstring) ?
 794         SDATA (bidi_it->string.lstring) : bidi_it->string.s;
 795       if (bytepos > begbyte && bidi_char_at_pos (bytepos, s) == '\n')
 796         {
 797           bytepos++;
 798           pos++;
 799         }
 800
 801       /* We are either at the beginning of a paragraph or in the
 802          middle of it.  Find where this paragraph starts.  */
 803       if (string_p)
 804         {
 805           /* We don't support changes of paragraph direction inside a
 806              string.  It is treated as a single paragraph.  */
 807           pstartbyte = 0;
 808         }
 809       else
 810         pstartbyte = bidi_find_paragraph_start (pos, bytepos);
 811       bidi_it->separator_limit = -1;
 812       bidi_it->new_paragraph = 0;
 813
 814       /* The following loop is run more than once only if NO_DEFAULT_P
 815          is non-zero, and only if we are iterating on a buffer.  */
 816       do {
 817         bytepos = pstartbyte;
 818         if (!string_p)
 819           pos = BYTE_TO_CHAR (bytepos);
 820         ch = bidi_fetch_char (bytepos, pos, &disp_pos, &bidi_it->string,
 821                               bidi_it->frame_window_p, &ch_len, &nchars);
 822         type = bidi_get_type (ch, NEUTRAL_DIR);
 823
 824         for (pos += nchars, bytepos += ch_len;
 825              /* NOTE: UAX#9 says to search only for L, AL, or R types
 826                 of characters, and ignore RLE, RLO, LRE, and LRO.
 827                 However, I'm not sure it makes sense to omit those 4;
 828                 should try with and without that to see the effect.  */
 829              (bidi_get_category (type) != STRONG)
 830                || (bidi_ignore_explicit_marks_for_paragraph_level
 831                    && (type == RLE || type == RLO
 832                        || type == LRE || type == LRO));
 833              type = bidi_get_type (ch, NEUTRAL_DIR))
 834           {
 835             if (!string_p
 836                 && type == NEUTRAL_B
 837                 && bidi_at_paragraph_end (pos, bytepos) >= -1)
 838               break;
 839             if (pos >= end)
 840               {
 841                 /* Pretend there's a paragraph separator at end of
 842                    buffer/string.  */
 843                 type = NEUTRAL_B;
 844                 break;
 845               }
 846             /* Fetch next character and advance to get past it.  */
 847             ch = bidi_fetch_char (bytepos, pos, &disp_pos, &bidi_it->string,
 848                                   bidi_it->frame_window_p, &ch_len, &nchars);
 849             pos += nchars;
 850             bytepos += ch_len;
 851           }
 852         if (type == STRONG_R || type == STRONG_AL) /* P3 */
 853           bidi_it->paragraph_dir = R2L;
 854         else if (type == STRONG_L)
 855           bidi_it->paragraph_dir = L2R;
 856         if (!string_p
 857             && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
 858           {
 859             /* If this paragraph is at BEGV, default to L2R.  */
 860             if (pstartbyte == BEGV_BYTE)
 861               bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
 862             else
 863               {
 864                 EMACS_INT prevpbyte = pstartbyte;
 865                 EMACS_INT p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
 866
 867                 /* Find the beginning of the previous paragraph, if any.  */
 868                 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
 869                   {
 870                     /* FXIME: What if p is covered by a display
 871                        string?  See also a FIXME inside
 872                        bidi_find_paragraph_start.  */
 873                     p--;
 874                     pbyte = CHAR_TO_BYTE (p);
 875                     prevpbyte = bidi_find_paragraph_start (p, pbyte);
 876                   }
 877                 pstartbyte = prevpbyte;
 878               }
 879           }
 880       } while (!string_p
 881                && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
 882     }
 883   else
 884     abort ();
 885
 886   /* Contrary to UAX#9 clause P3, we only default the paragraph
 887      direction to L2R if we have no previous usable paragraph
 888      direction.  This is allowed by the HL1 clause.  */
 889   if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
 890     bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
 891   if (bidi_it->paragraph_dir == R2L)
 892     bidi_it->level_stack[0].level = 1;
 893   else
 894     bidi_it->level_stack[0].level = 0;
 895
 896   bidi_line_init (bidi_it);
 897 }
 898
 899 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
 900    end.  */
 901 static INLINE void
 902 bidi_set_paragraph_end (struct bidi_it *bidi_it)
 903 {
 904   bidi_it->invalid_levels = 0;
 905   bidi_it->invalid_rl_levels = -1;
 906   bidi_it->stack_idx = 0;
 907   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 908 }
 909
 910 /* Initialize the bidi iterator from buffer/string position CHARPOS.  */
 911 void
 912 bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p,
 913               struct bidi_it *bidi_it)
 914 {
 915   if (! bidi_initialized)
 916     bidi_initialize ();
 917   if (charpos >= 0)
 918     bidi_it->charpos = charpos;
 919   if (bytepos >= 0)
 920     bidi_it->bytepos = bytepos;
 921   bidi_it->frame_window_p = frame_window_p;
 922   bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
 923   bidi_it->first_elt = 1;
 924   bidi_set_paragraph_end (bidi_it);
 925   bidi_it->new_paragraph = 1;
 926   bidi_it->separator_limit = -1;
 927   bidi_it->type = NEUTRAL_B;
 928   bidi_it->type_after_w1 = NEUTRAL_B;
 929   bidi_it->orig_type = NEUTRAL_B;
 930   bidi_it->prev_was_pdf = 0;
 931   bidi_it->prev.type = bidi_it->prev.type_after_w1 =
 932     bidi_it->prev.orig_type = UNKNOWN_BT;
 933   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
 934     bidi_it->last_strong.orig_type = UNKNOWN_BT;
 935   bidi_it->next_for_neutral.charpos = -1;
 936   bidi_it->next_for_neutral.type =
 937     bidi_it->next_for_neutral.type_after_w1 =
 938     bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 939   bidi_it->prev_for_neutral.charpos = -1;
 940   bidi_it->prev_for_neutral.type =
 941     bidi_it->prev_for_neutral.type_after_w1 =
 942     bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
 943   bidi_it->sor = L2R;    /* FIXME: should it be user-selectable? */
 944   bidi_it->disp_pos = -1;       /* invalid/unknown */
 945   /* We can only shrink the cache if we are at the bottom level of its
 946      "stack".  */
 947   if (bidi_cache_start == 0)
 948     bidi_cache_shrink ();
 949 }
 950
 951 /* Push the current embedding level and override status; reset the
 952    current level to LEVEL and the current override status to OVERRIDE.  */
 953 static INLINE void
 954 bidi_push_embedding_level (struct bidi_it *bidi_it,
 955                            int level, bidi_dir_t override)
 956 {
 957   bidi_it->stack_idx++;
 958   if (bidi_it->stack_idx >= BIDI_MAXLEVEL)
 959     abort ();
 960   bidi_it->level_stack[bidi_it->stack_idx].level = level;
 961   bidi_it->level_stack[bidi_it->stack_idx].override = override;
 962 }
 963
 964 /* Pop the embedding level and directional override status from the
 965    stack, and return the new level.  */
 966 static INLINE int
 967 bidi_pop_embedding_level (struct bidi_it *bidi_it)
 968 {
 969   /* UAX#9 says to ignore invalid PDFs.  */
 970   if (bidi_it->stack_idx > 0)
 971     bidi_it->stack_idx--;
 972   return bidi_it->level_stack[bidi_it->stack_idx].level;
 973 }
 974
 975 /* Record in SAVED_INFO the information about the current character.  */
 976 static INLINE void
 977 bidi_remember_char (struct bidi_saved_info *saved_info,
 978                     struct bidi_it *bidi_it)
 979 {
 980   saved_info->charpos = bidi_it->charpos;
 981   saved_info->bytepos = bidi_it->bytepos;
 982   saved_info->type = bidi_it->type;
 983   bidi_check_type (bidi_it->type);
 984   saved_info->type_after_w1 = bidi_it->type_after_w1;
 985   bidi_check_type (bidi_it->type_after_w1);
 986   saved_info->orig_type = bidi_it->orig_type;
 987   bidi_check_type (bidi_it->orig_type);
 988 }
 989
 990 /* Resolve the type of a neutral character according to the type of
 991    surrounding strong text and the current embedding level.  */
 992 static INLINE bidi_type_t
 993 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
 994 {
 995   /* N1: European and Arabic numbers are treated as though they were R.  */
 996   if (next_type == WEAK_EN || next_type == WEAK_AN)
 997     next_type = STRONG_R;
 998   if (prev_type == WEAK_EN || prev_type == WEAK_AN)
 999     prev_type = STRONG_R;
1000
1001   if (next_type == prev_type)   /* N1 */
1002     return next_type;
1003   else if ((lev & 1) == 0)      /* N2 */
1004     return STRONG_L;
1005   else
1006     return STRONG_R;
1007 }
1008
1009 static INLINE int
1010 bidi_explicit_dir_char (int ch)
1011 {
1012   bidi_type_t ch_type;
1013
1014   if (!bidi_initialized)
1015     abort ();
1016   ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
1017   return (ch_type == LRE || ch_type == LRO
1018           || ch_type == RLE || ch_type == RLO
1019           || ch_type == PDF);
1020 }
1021
1022 /* A helper function for bidi_resolve_explicit.  It advances to the
1023    next character in logical order and determines the new embedding
1024    level and directional override, but does not take into account
1025    empty embeddings.  */
1026 static int
1027 bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1028 {
1029   int curchar;
1030   bidi_type_t type;
1031   int current_level;
1032   int new_level;
1033   bidi_dir_t override;
1034   int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
1035
1036   /* If reseat()'ed, don't advance, so as to start iteration from the
1037      position where we were reseated.  bidi_it->bytepos can be less
1038      than BEGV_BYTE after reseat to BEGV.  */
1039   if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE)
1040       || bidi_it->first_elt)
1041     {
1042       bidi_it->first_elt = 0;
1043       if (string_p)
1044         {
1045           const unsigned char *p =
1046             STRINGP (bidi_it->string.lstring)
1047             ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1048
1049           if (bidi_it->charpos < 0)
1050             bidi_it->charpos = 0;
1051           bidi_it->bytepos = bidi_count_bytes (p, 0, 0, bidi_it->charpos);
1052         }
1053       else
1054         {
1055           if (bidi_it->charpos < BEGV)
1056             bidi_it->charpos = BEGV;
1057           bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
1058         }
1059     }
1060   /* Don't move at end of buffer/string.  */
1061   else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
1062     {
1063       /* Advance to the next character, skipping characters covered by
1064          display strings (nchars > 1).  */
1065       if (bidi_it->nchars <= 0)
1066         abort ();
1067       bidi_it->charpos += bidi_it->nchars;
1068       if (bidi_it->ch_len == 0)
1069         abort ();
1070       bidi_it->bytepos += bidi_it->ch_len;
1071     }
1072
1073   current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
1074   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1075   new_level = current_level;
1076
1077   if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
1078     {
1079       curchar = BIDI_EOB;
1080       bidi_it->ch_len = 1;
1081       bidi_it->nchars = 1;
1082       bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
1083     }
1084   else
1085     {
1086       /* Fetch the character at BYTEPOS.  If it is covered by a
1087          display string, treat the entire run of covered characters as
1088          a single character u+FFFC.  */
1089       curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
1090                                  &bidi_it->disp_pos, &bidi_it->string,
1091                                  bidi_it->frame_window_p,
1092                                  &bidi_it->ch_len, &bidi_it->nchars);
1093     }
1094   bidi_it->ch = curchar;
1095
1096   /* Don't apply directional override here, as all the types we handle
1097      below will not be affected by the override anyway, and we need
1098      the original type unaltered.  The override will be applied in
1099      bidi_resolve_weak.  */
1100   type = bidi_get_type (curchar, NEUTRAL_DIR);
1101   bidi_it->orig_type = type;
1102   bidi_check_type (bidi_it->orig_type);
1103
1104   if (type != PDF)
1105     bidi_it->prev_was_pdf = 0;
1106
1107   bidi_it->type_after_w1 = UNKNOWN_BT;
1108
1109   switch (type)
1110     {
1111       case RLE: /* X2 */
1112       case RLO: /* X4 */
1113         bidi_it->type_after_w1 = type;
1114         bidi_check_type (bidi_it->type_after_w1);
1115         type = WEAK_BN; /* X9/Retaining */
1116         if (bidi_it->ignore_bn_limit <= -1)
1117           {
1118             if (current_level <= BIDI_MAXLEVEL - 4)
1119               {
1120                 /* Compute the least odd embedding level greater than
1121                    the current level.  */
1122                 new_level = ((current_level + 1) & ~1) + 1;
1123                 if (bidi_it->type_after_w1 == RLE)
1124                   override = NEUTRAL_DIR;
1125                 else
1126                   override = R2L;
1127                 if (current_level == BIDI_MAXLEVEL - 4)
1128                   bidi_it->invalid_rl_levels = 0;
1129                 bidi_push_embedding_level (bidi_it, new_level, override);
1130               }
1131             else
1132               {
1133                 bidi_it->invalid_levels++;
1134                 /* See the commentary about invalid_rl_levels below.  */
1135                 if (bidi_it->invalid_rl_levels < 0)
1136                   bidi_it->invalid_rl_levels = 0;
1137                 bidi_it->invalid_rl_levels++;
1138               }
1139           }
1140         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1141                  || bidi_it->next_en_pos > bidi_it->charpos)
1142           type = WEAK_EN;
1143         break;
1144       case LRE: /* X3 */
1145       case LRO: /* X5 */
1146         bidi_it->type_after_w1 = type;
1147         bidi_check_type (bidi_it->type_after_w1);
1148         type = WEAK_BN; /* X9/Retaining */
1149         if (bidi_it->ignore_bn_limit <= -1)
1150           {
1151             if (current_level <= BIDI_MAXLEVEL - 5)
1152               {
1153                 /* Compute the least even embedding level greater than
1154                    the current level.  */
1155                 new_level = ((current_level + 2) & ~1);
1156                 if (bidi_it->type_after_w1 == LRE)
1157                   override = NEUTRAL_DIR;
1158                 else
1159                   override = L2R;
1160                 bidi_push_embedding_level (bidi_it, new_level, override);
1161               }
1162             else
1163               {
1164                 bidi_it->invalid_levels++;
1165                 /* invalid_rl_levels counts invalid levels encountered
1166                    while the embedding level was already too high for
1167                    LRE/LRO, but not for RLE/RLO.  That is because
1168                    there may be exactly one PDF which we should not
1169                    ignore even though invalid_levels is non-zero.
1170                    invalid_rl_levels helps to know what PDF is
1171                    that.  */
1172                 if (bidi_it->invalid_rl_levels >= 0)
1173                   bidi_it->invalid_rl_levels++;
1174               }
1175           }
1176         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1177                  || bidi_it->next_en_pos > bidi_it->charpos)
1178           type = WEAK_EN;
1179         break;
1180       case PDF: /* X7 */
1181         bidi_it->type_after_w1 = type;
1182         bidi_check_type (bidi_it->type_after_w1);
1183         type = WEAK_BN; /* X9/Retaining */
1184         if (bidi_it->ignore_bn_limit <= -1)
1185           {
1186             if (!bidi_it->invalid_rl_levels)
1187               {
1188                 new_level = bidi_pop_embedding_level (bidi_it);
1189                 bidi_it->invalid_rl_levels = -1;
1190                 if (bidi_it->invalid_levels)
1191                   bidi_it->invalid_levels--;
1192                 /* else nothing: UAX#9 says to ignore invalid PDFs */
1193               }
1194             if (!bidi_it->invalid_levels)
1195               new_level = bidi_pop_embedding_level (bidi_it);
1196             else
1197               {
1198                 bidi_it->invalid_levels--;
1199                 bidi_it->invalid_rl_levels--;
1200               }
1201           }
1202         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1203                  || bidi_it->next_en_pos > bidi_it->charpos)
1204           type = WEAK_EN;
1205         break;
1206       default:
1207         /* Nothing.  */
1208         break;
1209     }
1210
1211   bidi_it->type = type;
1212   bidi_check_type (bidi_it->type);
1213
1214   return new_level;
1215 }
1216
1217 /* Given an iterator state in BIDI_IT, advance one character position
1218    in the buffer/string to the next character (in the logical order),
1219    resolve any explicit embeddings and directional overrides, and
1220    return the embedding level of the character after resolving
1221    explicit directives and ignoring empty embeddings.  */
1222 static int
1223 bidi_resolve_explicit (struct bidi_it *bidi_it)
1224 {
1225   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1226   int new_level  = bidi_resolve_explicit_1 (bidi_it);
1227   EMACS_INT eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
1228   const unsigned char *s = STRINGP (bidi_it->string.lstring)
1229     ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1230
1231   if (prev_level < new_level
1232       && bidi_it->type == WEAK_BN
1233       && bidi_it->ignore_bn_limit == -1 /* only if not already known */
1234       && bidi_it->charpos < eob         /* not already at EOB */
1235       && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1236                                                    + bidi_it->ch_len, s)))
1237     {
1238       /* Avoid pushing and popping embedding levels if the level run
1239          is empty, as this breaks level runs where it shouldn't.
1240          UAX#9 removes all the explicit embedding and override codes,
1241          so empty embeddings disappear without a trace.  We need to
1242          behave as if we did the same.  */
1243       struct bidi_it saved_it;
1244       int level = prev_level;
1245
1246       bidi_copy_it (&saved_it, bidi_it);
1247
1248       while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1249                                                        + bidi_it->ch_len, s)))
1250         {
1251           /* This advances to the next character, skipping any
1252              characters covered by display strings.  */
1253           level = bidi_resolve_explicit_1 (bidi_it);
1254           /* If string.lstring was relocated inside bidi_resolve_explicit_1,
1255              a pointer to its data is no longer valid.  */
1256           if (STRINGP (bidi_it->string.lstring))
1257             s = SDATA (bidi_it->string.lstring);
1258         }
1259
1260       if (bidi_it->nchars <= 0)
1261         abort ();
1262       if (level == prev_level)  /* empty embedding */
1263         saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
1264       else                      /* this embedding is non-empty */
1265         saved_it.ignore_bn_limit = -2;
1266
1267       bidi_copy_it (bidi_it, &saved_it);
1268       if (bidi_it->ignore_bn_limit > -1)
1269         {
1270           /* We pushed a level, but we shouldn't have.  Undo that. */
1271           if (!bidi_it->invalid_rl_levels)
1272             {
1273               new_level = bidi_pop_embedding_level (bidi_it);
1274               bidi_it->invalid_rl_levels = -1;
1275               if (bidi_it->invalid_levels)
1276                 bidi_it->invalid_levels--;
1277             }
1278           if (!bidi_it->invalid_levels)
1279             new_level = bidi_pop_embedding_level (bidi_it);
1280           else
1281             {
1282               bidi_it->invalid_levels--;
1283               bidi_it->invalid_rl_levels--;
1284             }
1285         }
1286     }
1287
1288   if (bidi_it->type == NEUTRAL_B)       /* X8 */
1289     {
1290       bidi_set_paragraph_end (bidi_it);
1291       /* This is needed by bidi_resolve_weak below, and in L1.  */
1292       bidi_it->type_after_w1 = bidi_it->type;
1293       bidi_check_type (bidi_it->type_after_w1);
1294     }
1295
1296   return new_level;
1297 }
1298
1299 /* Advance in the buffer/string, resolve weak types and return the
1300    type of the next character after weak type resolution.  */
1301 static bidi_type_t
1302 bidi_resolve_weak (struct bidi_it *bidi_it)
1303 {
1304   bidi_type_t type;
1305   bidi_dir_t override;
1306   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1307   int new_level  = bidi_resolve_explicit (bidi_it);
1308   int next_char;
1309   bidi_type_t type_of_next;
1310   struct bidi_it saved_it;
1311   EMACS_INT eob =
1312     (STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
1313     ? bidi_it->string.schars : ZV;
1314
1315   type = bidi_it->type;
1316   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1317
1318   if (type == UNKNOWN_BT
1319       || type == LRE
1320       || type == LRO
1321       || type == RLE
1322       || type == RLO
1323       || type == PDF)
1324     abort ();
1325
1326   if (new_level != prev_level
1327       || bidi_it->type == NEUTRAL_B)
1328     {
1329       /* We've got a new embedding level run, compute the directional
1330          type of sor and initialize per-run variables (UAX#9, clause
1331          X10).  */
1332       bidi_set_sor_type (bidi_it, prev_level, new_level);
1333     }
1334   else if (type == NEUTRAL_S || type == NEUTRAL_WS
1335            || type == WEAK_BN || type == STRONG_AL)
1336     bidi_it->type_after_w1 = type;      /* needed in L1 */
1337   bidi_check_type (bidi_it->type_after_w1);
1338
1339   /* Level and directional override status are already recorded in
1340      bidi_it, and do not need any change; see X6.  */
1341   if (override == R2L)          /* X6 */
1342     type = STRONG_R;
1343   else if (override == L2R)
1344     type = STRONG_L;
1345   else
1346     {
1347       if (type == WEAK_NSM)     /* W1 */
1348         {
1349           /* Note that we don't need to consider the case where the
1350              prev character has its type overridden by an RLO or LRO,
1351              because then either the type of this NSM would have been
1352              also overridden, or the previous character is outside the
1353              current level run, and thus not relevant to this NSM.
1354              This is why NSM gets the type_after_w1 of the previous
1355              character.  */
1356           if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
1357               /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1358               && bidi_it->prev.type_after_w1 != NEUTRAL_B)
1359             type = bidi_it->prev.type_after_w1;
1360           else if (bidi_it->sor == R2L)
1361             type = STRONG_R;
1362           else if (bidi_it->sor == L2R)
1363             type = STRONG_L;
1364           else /* shouldn't happen! */
1365             abort ();
1366         }
1367       if (type == WEAK_EN       /* W2 */
1368           && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1369         type = WEAK_AN;
1370       else if (type == STRONG_AL) /* W3 */
1371         type = STRONG_R;
1372       else if ((type == WEAK_ES /* W4 */
1373                 && bidi_it->prev.type_after_w1 == WEAK_EN
1374                 && bidi_it->prev.orig_type == WEAK_EN)
1375                || (type == WEAK_CS
1376                    && ((bidi_it->prev.type_after_w1 == WEAK_EN
1377                         && bidi_it->prev.orig_type == WEAK_EN)
1378                        || bidi_it->prev.type_after_w1 == WEAK_AN)))
1379         {
1380           const unsigned char *s =
1381             STRINGP (bidi_it->string.lstring)
1382             ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1383
1384           next_char =
1385             bidi_it->charpos + bidi_it->nchars >= eob
1386             ? BIDI_EOB
1387             : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s);
1388           type_of_next = bidi_get_type (next_char, override);
1389
1390           if (type_of_next == WEAK_BN
1391               || bidi_explicit_dir_char (next_char))
1392             {
1393               bidi_copy_it (&saved_it, bidi_it);
1394               while (bidi_resolve_explicit (bidi_it) == new_level
1395                      && bidi_it->type == WEAK_BN)
1396                 ;
1397               type_of_next = bidi_it->type;
1398               bidi_copy_it (bidi_it, &saved_it);
1399             }
1400
1401           /* If the next character is EN, but the last strong-type
1402              character is AL, that next EN will be changed to AN when
1403              we process it in W2 above.  So in that case, this ES
1404              should not be changed into EN.  */
1405           if (type == WEAK_ES
1406               && type_of_next == WEAK_EN
1407               && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1408             type = WEAK_EN;
1409           else if (type == WEAK_CS)
1410             {
1411               if (bidi_it->prev.type_after_w1 == WEAK_AN
1412                   && (type_of_next == WEAK_AN
1413                       /* If the next character is EN, but the last
1414                          strong-type character is AL, EN will be later
1415                          changed to AN when we process it in W2 above.
1416                          So in that case, this ES should not be
1417                          changed into EN.  */
1418                       || (type_of_next == WEAK_EN
1419                           && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1420                 type = WEAK_AN;
1421               else if (bidi_it->prev.type_after_w1 == WEAK_EN
1422                        && type_of_next == WEAK_EN
1423                        && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1424                 type = WEAK_EN;
1425             }
1426         }
1427       else if (type == WEAK_ET  /* W5: ET with EN before or after it */
1428                || type == WEAK_BN)      /* W5/Retaining */
1429         {
1430           if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */
1431               || bidi_it->next_en_pos > bidi_it->charpos)
1432             type = WEAK_EN;
1433           else                  /* W5: ET/BN with EN after it.  */
1434             {
1435               EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars;
1436               const unsigned char *s =
1437                 STRINGP (bidi_it->string.lstring)
1438                 ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1439
1440               if (bidi_it->nchars <= 0)
1441                 abort ();
1442               next_char =
1443                 bidi_it->charpos + bidi_it->nchars >= eob
1444                 ? BIDI_EOB
1445                 : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s);
1446               type_of_next = bidi_get_type (next_char, override);
1447
1448               if (type_of_next == WEAK_ET
1449                   || type_of_next == WEAK_BN
1450                   || bidi_explicit_dir_char (next_char))
1451                 {
1452                   bidi_copy_it (&saved_it, bidi_it);
1453                   while (bidi_resolve_explicit (bidi_it) == new_level
1454                          && (bidi_it->type == WEAK_BN
1455                              || bidi_it->type == WEAK_ET))
1456                     ;
1457                   type_of_next = bidi_it->type;
1458                   en_pos = bidi_it->charpos;
1459                   bidi_copy_it (bidi_it, &saved_it);
1460                 }
1461               if (type_of_next == WEAK_EN)
1462                 {
1463                   /* If the last strong character is AL, the EN we've
1464                      found will become AN when we get to it (W2). */
1465                   if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
1466                     {
1467                       type = WEAK_EN;
1468                       /* Remember this EN position, to speed up processing
1469                          of the next ETs.  */
1470                       bidi_it->next_en_pos = en_pos;
1471                     }
1472                   else if (type == WEAK_BN)
1473                     type = NEUTRAL_ON; /* W6/Retaining */
1474                 }
1475             }
1476         }
1477     }
1478
1479   if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
1480       || (type == WEAK_BN
1481           && (bidi_it->prev.type_after_w1 == WEAK_CS        /* W6/Retaining */
1482               || bidi_it->prev.type_after_w1 == WEAK_ES
1483               || bidi_it->prev.type_after_w1 == WEAK_ET)))
1484     type = NEUTRAL_ON;
1485
1486   /* Store the type we've got so far, before we clobber it with strong
1487      types in W7 and while resolving neutral types.  But leave alone
1488      the original types that were recorded above, because we will need
1489      them for the L1 clause.  */
1490   if (bidi_it->type_after_w1 == UNKNOWN_BT)
1491     bidi_it->type_after_w1 = type;
1492   bidi_check_type (bidi_it->type_after_w1);
1493
1494   if (type == WEAK_EN)  /* W7 */
1495     {
1496       if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
1497           || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1498         type = STRONG_L;
1499     }
1500
1501   bidi_it->type = type;
1502   bidi_check_type (bidi_it->type);
1503   return type;
1504 }
1505
1506 static bidi_type_t
1507 bidi_resolve_neutral (struct bidi_it *bidi_it)
1508 {
1509   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1510   bidi_type_t type = bidi_resolve_weak (bidi_it);
1511   int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1512
1513   if (!(type == STRONG_R
1514         || type == STRONG_L
1515         || type == WEAK_BN
1516         || type == WEAK_EN
1517         || type == WEAK_AN
1518         || type == NEUTRAL_B
1519         || type == NEUTRAL_S
1520         || type == NEUTRAL_WS
1521         || type == NEUTRAL_ON))
1522     abort ();
1523
1524   if (bidi_get_category (type) == NEUTRAL
1525       || (type == WEAK_BN && prev_level == current_level))
1526     {
1527       if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1528         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1529                                        bidi_it->next_for_neutral.type,
1530                                        current_level);
1531       else
1532         {
1533           /* Arrrgh!!  The UAX#9 algorithm is too deeply entrenched in
1534              the assumption of batch-style processing; see clauses W4,
1535              W5, and especially N1, which require to look far forward
1536              (as well as back) in the buffer/string.  May the fleas of
1537              a thousand camels infest the armpits of those who design
1538              supposedly general-purpose algorithms by looking at their
1539              own implementations, and fail to consider other possible
1540              implementations!  */
1541           struct bidi_it saved_it;
1542           bidi_type_t next_type;
1543
1544           if (bidi_it->scan_dir == -1)
1545             abort ();
1546
1547           bidi_copy_it (&saved_it, bidi_it);
1548           /* Scan the text forward until we find the first non-neutral
1549              character, and then use that to resolve the neutral we
1550              are dealing with now.  We also cache the scanned iterator
1551              states, to salvage some of the effort later.  */
1552           bidi_cache_iterator_state (bidi_it, 0);
1553           do {
1554             /* Record the info about the previous character, so that
1555                it will be cached below with this state.  */
1556             if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1557                 && bidi_it->type != WEAK_BN)
1558               bidi_remember_char (&bidi_it->prev, bidi_it);
1559             type = bidi_resolve_weak (bidi_it);
1560             /* Paragraph separators have their levels fully resolved
1561                at this point, so cache them as resolved.  */
1562             bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1563             /* FIXME: implement L1 here, by testing for a newline and
1564                resetting the level for any sequence of whitespace
1565                characters adjacent to it.  */
1566           } while (!(type == NEUTRAL_B
1567                      || (type != WEAK_BN
1568                          && bidi_get_category (type) != NEUTRAL)
1569                      /* This is all per level run, so stop when we
1570                         reach the end of this level run.  */
1571                      || bidi_it->level_stack[bidi_it->stack_idx].level !=
1572                      current_level));
1573
1574           bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1575
1576           switch (type)
1577             {
1578               case STRONG_L:
1579               case STRONG_R:
1580               case STRONG_AL:
1581                 next_type = type;
1582                 break;
1583               case WEAK_EN:
1584               case WEAK_AN:
1585                 /* N1: ``European and Arabic numbers are treated as
1586                    though they were R.''  */
1587                 next_type = STRONG_R;
1588                 saved_it.next_for_neutral.type = STRONG_R;
1589                 break;
1590               case WEAK_BN:
1591                 if (!bidi_explicit_dir_char (bidi_it->ch))
1592                   abort ();             /* can't happen: BNs are skipped */
1593                 /* FALLTHROUGH */
1594               case NEUTRAL_B:
1595                 /* Marched all the way to the end of this level run.
1596                    We need to use the eor type, whose information is
1597                    stored by bidi_set_sor_type in the prev_for_neutral
1598                    member.  */
1599                 if (saved_it.type != WEAK_BN
1600                     || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
1601                   {
1602                     next_type = bidi_it->prev_for_neutral.type;
1603                     saved_it.next_for_neutral.type = next_type;
1604                     bidi_check_type (next_type);
1605                   }
1606                 else
1607                   {
1608                     /* This is a BN which does not adjoin neutrals.
1609                        Leave its type alone.  */
1610                     bidi_copy_it (bidi_it, &saved_it);
1611                     return bidi_it->type;
1612                   }
1613                 break;
1614               default:
1615                 abort ();
1616             }
1617           type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1618                                          next_type, current_level);
1619           saved_it.type = type;
1620           bidi_check_type (type);
1621           bidi_copy_it (bidi_it, &saved_it);
1622         }
1623     }
1624   return type;
1625 }
1626
1627 /* Given an iterator state in BIDI_IT, advance one character position
1628    in the buffer/string to the next character (in the logical order),
1629    resolve the bidi type of that next character, and return that
1630    type.  */
1631 static bidi_type_t
1632 bidi_type_of_next_char (struct bidi_it *bidi_it)
1633 {
1634   bidi_type_t type;
1635
1636   /* This should always be called during a forward scan.  */
1637   if (bidi_it->scan_dir != 1)
1638     abort ();
1639
1640   /* Reset the limit until which to ignore BNs if we step out of the
1641      area where we found only empty levels.  */
1642   if ((bidi_it->ignore_bn_limit > -1
1643        && bidi_it->ignore_bn_limit <= bidi_it->charpos)
1644       || (bidi_it->ignore_bn_limit == -2
1645           && !bidi_explicit_dir_char (bidi_it->ch)))
1646     bidi_it->ignore_bn_limit = -1;
1647
1648   type = bidi_resolve_neutral (bidi_it);
1649
1650   return type;
1651 }
1652
1653 /* Given an iterator state BIDI_IT, advance one character position in
1654    the buffer/string to the next character (in the current scan
1655    direction), resolve the embedding and implicit levels of that next
1656    character, and return the resulting level.  */
1657 static int
1658 bidi_level_of_next_char (struct bidi_it *bidi_it)
1659 {
1660   bidi_type_t type;
1661   int level, prev_level = -1;
1662   struct bidi_saved_info next_for_neutral;
1663   EMACS_INT next_char_pos = -2;
1664
1665   if (bidi_it->scan_dir == 1)
1666     {
1667       EMACS_INT eob =
1668         (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
1669         ? bidi_it->string.schars : ZV;
1670
1671       /* There's no sense in trying to advance if we hit end of text.  */
1672       if (bidi_it->charpos >= eob)
1673         return bidi_it->resolved_level;
1674
1675       /* Record the info about the previous character.  */
1676       if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1677           && bidi_it->type != WEAK_BN)
1678         bidi_remember_char (&bidi_it->prev, bidi_it);
1679       if (bidi_it->type_after_w1 == STRONG_R
1680           || bidi_it->type_after_w1 == STRONG_L
1681           || bidi_it->type_after_w1 == STRONG_AL)
1682         bidi_remember_char (&bidi_it->last_strong, bidi_it);
1683       /* FIXME: it sounds like we don't need both prev and
1684          prev_for_neutral members, but I'm leaving them both for now.  */
1685       if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
1686           || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
1687         bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
1688
1689       /* If we overstepped the characters used for resolving neutrals
1690          and whitespace, invalidate their info in the iterator.  */
1691       if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
1692         bidi_it->next_for_neutral.type = UNKNOWN_BT;
1693       if (bidi_it->next_en_pos >= 0
1694           && bidi_it->charpos >= bidi_it->next_en_pos)
1695         bidi_it->next_en_pos = -1;
1696       if (bidi_it->next_for_ws.type != UNKNOWN_BT
1697           && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
1698         bidi_it->next_for_ws.type = UNKNOWN_BT;
1699
1700       /* This must be taken before we fill the iterator with the info
1701          about the next char.  If we scan backwards, the iterator
1702          state must be already cached, so there's no need to know the
1703          embedding level of the previous character, since we will be
1704          returning to our caller shortly.  */
1705       prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1706     }
1707   next_for_neutral = bidi_it->next_for_neutral;
1708
1709   /* Perhaps the character we want is already cached.  If it is, the
1710      call to bidi_cache_find below will return a type other than
1711      UNKNOWN_BT.  */
1712   if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
1713     {
1714       int bob =
1715         (bidi_it->string.s || STRINGP (bidi_it->string.lstring)) ? 0 : 1;
1716
1717       if (bidi_it->scan_dir > 0)
1718         {
1719           if (bidi_it->nchars <= 0)
1720             abort ();
1721           next_char_pos = bidi_it->charpos + bidi_it->nchars;
1722         }
1723       else if (bidi_it->charpos >= bob)
1724         /* Implementation note: we allow next_char_pos to be as low as
1725            0 for buffers or -1 for strings, and that is okay because
1726            that's the "position" of the sentinel iterator state we
1727            cached at the beginning of the iteration.  */
1728         next_char_pos = bidi_it->charpos - 1;
1729       if (next_char_pos >= bob - 1)
1730         type = bidi_cache_find (next_char_pos, -1, bidi_it);
1731       else
1732         type = UNKNOWN_BT;
1733     }
1734   else
1735     type = UNKNOWN_BT;
1736   if (type != UNKNOWN_BT)
1737     {
1738       /* Don't lose the information for resolving neutrals!  The
1739          cached states could have been cached before their
1740          next_for_neutral member was computed.  If we are on our way
1741          forward, we can simply take the info from the previous
1742          state.  */
1743       if (bidi_it->scan_dir == 1
1744           && bidi_it->next_for_neutral.type == UNKNOWN_BT)
1745         bidi_it->next_for_neutral = next_for_neutral;
1746
1747       /* If resolved_level is -1, it means this state was cached
1748          before it was completely resolved, so we cannot return
1749          it.  */
1750       if (bidi_it->resolved_level != -1)
1751         return bidi_it->resolved_level;
1752     }
1753   if (bidi_it->scan_dir == -1)
1754     /* If we are going backwards, the iterator state is already cached
1755        from previous scans, and should be fully resolved.  */
1756     abort ();
1757
1758   if (type == UNKNOWN_BT)
1759     type = bidi_type_of_next_char (bidi_it);
1760
1761   if (type == NEUTRAL_B)
1762     return bidi_it->resolved_level;
1763
1764   level = bidi_it->level_stack[bidi_it->stack_idx].level;
1765   if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
1766       || (type == WEAK_BN && prev_level == level))
1767     {
1768       if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
1769         abort ();
1770
1771       /* If the cached state shows a neutral character, it was not
1772          resolved by bidi_resolve_neutral, so do it now.  */
1773       type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1774                                      bidi_it->next_for_neutral.type,
1775                                      level);
1776     }
1777
1778   if (!(type == STRONG_R
1779         || type == STRONG_L
1780         || type == WEAK_BN
1781         || type == WEAK_EN
1782         || type == WEAK_AN))
1783     abort ();
1784   bidi_it->type = type;
1785   bidi_check_type (bidi_it->type);
1786
1787   /* For L1 below, we need to know, for each WS character, whether
1788      it belongs to a sequence of WS characters preceding a newline
1789      or a TAB or a paragraph separator.  */
1790   if (bidi_it->orig_type == NEUTRAL_WS
1791       && bidi_it->next_for_ws.type == UNKNOWN_BT)
1792     {
1793       int ch;
1794       EMACS_INT clen = bidi_it->ch_len;
1795       EMACS_INT bpos = bidi_it->bytepos;
1796       EMACS_INT cpos = bidi_it->charpos;
1797       EMACS_INT disp_pos = bidi_it->disp_pos;
1798       EMACS_INT nc = bidi_it->nchars;
1799       struct bidi_string_data bs = bidi_it->string;
1800       bidi_type_t chtype;
1801       int fwp = bidi_it->frame_window_p;
1802
1803       if (bidi_it->nchars <= 0)
1804         abort ();
1805       do {
1806         ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &bs, fwp,
1807                               &clen, &nc);
1808         if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
1809           chtype = NEUTRAL_B;
1810         else
1811           chtype = bidi_get_type (ch, NEUTRAL_DIR);
1812       } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
1813                || bidi_explicit_dir_char (ch)); /* L1/Retaining */
1814       bidi_it->next_for_ws.type = chtype;
1815       bidi_check_type (bidi_it->next_for_ws.type);
1816       bidi_it->next_for_ws.charpos = cpos;
1817       bidi_it->next_for_ws.bytepos = bpos;
1818     }
1819
1820   /* Resolve implicit levels, with a twist: PDFs get the embedding
1821      level of the enbedding they terminate.  See below for the
1822      reason.  */
1823   if (bidi_it->orig_type == PDF
1824       /* Don't do this if this formatting code didn't change the
1825          embedding level due to invalid or empty embeddings.  */
1826       && prev_level != level)
1827     {
1828       /* Don't look in UAX#9 for the reason for this: it's our own
1829          private quirk.  The reason is that we want the formatting
1830          codes to be delivered so that they bracket the text of their
1831          embedding.  For example, given the text
1832
1833              {RLO}teST{PDF}
1834
1835          we want it to be displayed as
1836
1837              {PDF}STet{RLO}
1838
1839          not as
1840
1841              STet{RLO}{PDF}
1842
1843          which will result because we bump up the embedding level as
1844          soon as we see the RLO and pop it as soon as we see the PDF,
1845          so RLO itself has the same embedding level as "teST", and
1846          thus would be normally delivered last, just before the PDF.
1847          The switch below fiddles with the level of PDF so that this
1848          ugly side effect does not happen.
1849
1850          (This is, of course, only important if the formatting codes
1851          are actually displayed, but Emacs does need to display them
1852          if the user wants to.)  */
1853       level = prev_level;
1854     }
1855   else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
1856            || bidi_it->orig_type == NEUTRAL_S
1857            || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
1858            /* || bidi_it->ch == LINESEP_CHAR */
1859            || (bidi_it->orig_type == NEUTRAL_WS
1860                && (bidi_it->next_for_ws.type == NEUTRAL_B
1861                    || bidi_it->next_for_ws.type == NEUTRAL_S)))
1862     level = bidi_it->level_stack[0].level;
1863   else if ((level & 1) == 0) /* I1 */
1864     {
1865       if (type == STRONG_R)
1866         level++;
1867       else if (type == WEAK_EN || type == WEAK_AN)
1868         level += 2;
1869     }
1870   else                  /* I2 */
1871     {
1872       if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
1873         level++;
1874     }
1875
1876   bidi_it->resolved_level = level;
1877   return level;
1878 }
1879
1880 /* Move to the other edge of a level given by LEVEL.  If END_FLAG is
1881    non-zero, we are at the end of a level, and we need to prepare to
1882    resume the scan of the lower level.
1883
1884    If this level's other edge is cached, we simply jump to it, filling
1885    the iterator structure with the iterator state on the other edge.
1886    Otherwise, we walk the buffer or string until we come back to the
1887    same level as LEVEL.
1888
1889    Note: we are not talking here about a ``level run'' in the UAX#9
1890    sense of the term, but rather about a ``level'' which includes
1891    all the levels higher than it.  In other words, given the levels
1892    like this:
1893
1894          11111112222222333333334443343222222111111112223322111
1895                 A      B                    C
1896
1897    and assuming we are at point A scanning left to right, this
1898    function moves to point C, whereas the UAX#9 ``level 2 run'' ends
1899    at point B.  */
1900 static void
1901 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
1902 {
1903   int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
1904   int idx;
1905
1906   /* Try the cache first.  */
1907   if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
1908       >= bidi_cache_start)
1909     bidi_cache_fetch_state (idx, bidi_it);
1910   else
1911     {
1912       int new_level;
1913
1914       if (end_flag)
1915         abort (); /* if we are at end of level, its edges must be cached */
1916
1917       bidi_cache_iterator_state (bidi_it, 1);
1918       do {
1919         new_level = bidi_level_of_next_char (bidi_it);
1920         bidi_cache_iterator_state (bidi_it, 1);
1921       } while (new_level >= level);
1922     }
1923 }
1924
1925 void
1926 bidi_move_to_visually_next (struct bidi_it *bidi_it)
1927 {
1928   int old_level, new_level, next_level;
1929   struct bidi_it sentinel;
1930
1931   if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
1932     abort ();
1933
1934   if (bidi_it->scan_dir == 0)
1935     {
1936       bidi_it->scan_dir = 1;    /* default to logical order */
1937     }
1938
1939   /* If we just passed a newline, initialize for the next line.  */
1940   if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B)
1941     bidi_line_init (bidi_it);
1942
1943   /* Prepare the sentinel iterator state, and cache it.  When we bump
1944      into it, scanning backwards, we'll know that the last non-base
1945      level is exhausted.  */
1946   if (bidi_cache_idx == bidi_cache_start)
1947     {
1948       bidi_copy_it (&sentinel, bidi_it);
1949       if (bidi_it->first_elt)
1950         {
1951           sentinel.charpos--;   /* cached charpos needs to be monotonic */
1952           sentinel.bytepos--;
1953           sentinel.ch = '\n';   /* doesn't matter, but why not? */
1954           sentinel.ch_len = 1;
1955           sentinel.nchars = 1;
1956         }
1957       bidi_cache_iterator_state (&sentinel, 1);
1958     }
1959
1960   old_level = bidi_it->resolved_level;
1961   new_level = bidi_level_of_next_char (bidi_it);
1962
1963   /* Reordering of resolved levels (clause L2) is implemented by
1964      jumping to the other edge of the level and flipping direction of
1965      scanning the text whenever we find a level change.  */
1966   if (new_level != old_level)
1967     {
1968       int ascending = new_level > old_level;
1969       int level_to_search = ascending ? old_level + 1 : old_level;
1970       int incr = ascending ? 1 : -1;
1971       int expected_next_level = old_level + incr;
1972
1973       /* Jump (or walk) to the other edge of this level.  */
1974       bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
1975       /* Switch scan direction and peek at the next character in the
1976          new direction.  */
1977       bidi_it->scan_dir = -bidi_it->scan_dir;
1978
1979       /* The following loop handles the case where the resolved level
1980          jumps by more than one.  This is typical for numbers inside a
1981          run of text with left-to-right embedding direction, but can
1982          also happen in other situations.  In those cases the decision
1983          where to continue after a level change, and in what direction,
1984          is tricky.  For example, given a text like below:
1985
1986                   abcdefgh
1987                   11336622
1988
1989          (where the numbers below the text show the resolved levels),
1990          the result of reordering according to UAX#9 should be this:
1991
1992                   efdcghba
1993
1994          This is implemented by the loop below which flips direction
1995          and jumps to the other edge of the level each time it finds
1996          the new level not to be the expected one.  The expected level
1997          is always one more or one less than the previous one.  */
1998       next_level = bidi_peek_at_next_level (bidi_it);
1999       while (next_level != expected_next_level)
2000         {
2001           expected_next_level += incr;
2002           level_to_search += incr;
2003           bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2004           bidi_it->scan_dir = -bidi_it->scan_dir;
2005           next_level = bidi_peek_at_next_level (bidi_it);
2006         }
2007
2008       /* Finally, deliver the next character in the new direction.  */
2009       next_level = bidi_level_of_next_char (bidi_it);
2010     }
2011
2012   /* Take note when we have just processed the newline that precedes
2013      the end of the paragraph.  The next time we are about to be
2014      called, set_iterator_to_next will automatically reinit the
2015      paragraph direction, if needed.  We do this at the newline before
2016      the paragraph separator, because the next character might not be
2017      the first character of the next paragraph, due to the bidi
2018      reordering, whereas we _must_ know the paragraph base direction
2019      _before_ we process the paragraph's text, since the base
2020      direction affects the reordering.  */
2021   if (bidi_it->scan_dir == 1 && bidi_it->orig_type == NEUTRAL_B)
2022     {
2023       /* The paragraph direction of the entire string, once
2024          determined, is in effect for the entire string.  Setting the
2025          separator limit to the end of the string prevents
2026          bidi_paragraph_init from being called automatically on this
2027          string.  */
2028       if (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2029         bidi_it->separator_limit = bidi_it->string.schars;
2030       else if (bidi_it->bytepos < ZV_BYTE)
2031         {
2032           EMACS_INT sep_len =
2033             bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
2034                                    bidi_it->bytepos + bidi_it->ch_len);
2035           if (bidi_it->nchars <= 0)
2036             abort ();
2037           if (sep_len >= 0)
2038             {
2039               bidi_it->new_paragraph = 1;
2040               /* Record the buffer position of the last character of the
2041                  paragraph separator.  */
2042               bidi_it->separator_limit =
2043                 bidi_it->charpos + bidi_it->nchars + sep_len;
2044             }
2045         }
2046     }
2047
2048   if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start)
2049     {
2050       /* If we are at paragraph's base embedding level and beyond the
2051          last cached position, the cache's job is done and we can
2052          discard it.  */
2053       if (bidi_it->resolved_level == bidi_it->level_stack[0].level
2054           && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
2055                                  + bidi_cache[bidi_cache_idx - 1].nchars - 1))
2056         bidi_cache_reset ();
2057         /* But as long as we are caching during forward scan, we must
2058            cache each state, or else the cache integrity will be
2059            compromised: it assumes cached states correspond to buffer
2060            positions 1:1.  */
2061       else
2062         bidi_cache_iterator_state (bidi_it, 1);
2063     }
2064 }
2065
2066 /* This is meant to be called from within the debugger, whenever you
2067    wish to examine the cache contents.  */
2068 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
2069 void
2070 bidi_dump_cached_states (void)
2071 {
2072   int i;
2073   int ndigits = 1;
2074
2075   if (bidi_cache_idx == 0)
2076     {
2077       fprintf (stderr, "The cache is empty.\n");
2078       return;
2079     }
2080   fprintf (stderr, "Total of %d state%s in cache:\n",
2081            bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
2082
2083   for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
2084     ndigits++;
2085   fputs ("ch  ", stderr);
2086   for (i = 0; i < bidi_cache_idx; i++)
2087     fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
2088   fputs ("\n", stderr);
2089   fputs ("lvl ", stderr);
2090   for (i = 0; i < bidi_cache_idx; i++)
2091     fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
2092   fputs ("\n", stderr);
2093   fputs ("pos ", stderr);
2094   for (i = 0; i < bidi_cache_idx; i++)
2095     fprintf (stderr, "%*"pI"d", ndigits, bidi_cache[i].charpos);
2096   fputs ("\n", stderr);
2097 }