Merge from trunk.

[gnu-emacs] / src / bidi.c
diff --git a/src/bidi.c b/src/bidi.c

index f499ec37b9e59ad32914d447c91d07b48a938734..cac12854f339fb59e7b299cb95945e87277d9408 100644 (file)
--- a/src/bidi.c
+++ b/src/bidi.c
@@ -79,6 +79,11 @@ typedef enum {
    STRONG
  } bidi_category_t;
  
+/* UAX#9 says to search only for L, AL, or R types of characters, and
+   ignore RLE, RLO, LRE, and LRO, when determining the base paragraph
+   level.  Yudit indeed ignores them.  This variable is therefore set
+   by default to ignore them, but setting it to zero will take them
+   into account.  */
  extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE;
  int bidi_ignore_explicit_marks_for_paragraph_level = 1;
  
@@ -103,6 +108,12 @@ bidi_get_type (int ch, bidi_dir_t override)
      abort ();
  
    default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
+  /* Every valid character code, even those that are unassigned by the
+     UCD, have some bidi-class property, according to
+     DerivedBidiClass.txt file.  Therefore, if we ever get UNKNOWN_BT
+     (= zero) code from CHAR_TABLE_REF, that's a bug.  */
+  if (default_type == UNKNOWN_BT)
+    abort ();
  
    if (override == NEUTRAL_DIR)
      return default_type;
@@ -135,11 +146,10 @@ bidi_get_type (int ch, bidi_dir_t override)
      }
  }
  
-static void
+static inline void
  bidi_check_type (bidi_type_t type)
  {
-  if (type < UNKNOWN_BT || type > NEUTRAL_ON)
-    abort ();
+  xassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON);
  }
  
  /* Given a bidi TYPE of a character, return its category.  */
@@ -194,7 +204,7 @@ bidi_mirror_char (int c)
    val = CHAR_TABLE_REF (bidi_mirror_table, c);
    if (INTEGERP (val))
      {
-      int v = XINT (val);
+      EMACS_INT v = XINT (val);
  
        if (v < 0 || v > MAX_CHAR)
         abort ();
@@ -343,9 +353,9 @@ bidi_cache_shrink (void)
  {
    if (bidi_cache_size > BIDI_CACHE_CHUNK)
      {
-      bidi_cache_size = BIDI_CACHE_CHUNK;
        bidi_cache =
-       (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
+       (struct bidi_it *) xrealloc (bidi_cache, BIDI_CACHE_CHUNK * elsz);
+      bidi_cache_size = BIDI_CACHE_CHUNK;
      }
    bidi_cache_reset ();
  }
@@ -368,7 +378,7 @@ bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
     resolved levels in cached states.  DIR, if non-zero, means search
     in that direction from the last cache hit.  */
  static inline ptrdiff_t
-bidi_cache_search (EMACS_INT charpos, int level, int dir)
+bidi_cache_search (ptrdiff_t charpos, int level, int dir)
  {
    ptrdiff_t i, i_start;
  
@@ -542,6 +552,8 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
        bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
        bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
        bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
+      bidi_cache[idx].disp_pos = bidi_it->disp_pos;
+      bidi_cache[idx].disp_prop = bidi_it->disp_prop;
      }
  
    bidi_cache_last_idx = idx;
@@ -550,7 +562,7 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
  }
  
  static inline bidi_type_t
-bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it)
+bidi_cache_find (ptrdiff_t charpos, int level, struct bidi_it *bidi_it)
  {
    ptrdiff_t i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
  
@@ -628,17 +640,24 @@ bidi_pop_it (struct bidi_it *bidi_it)
    bidi_cache_last_idx = -1;
  }
  
+static ptrdiff_t bidi_cache_total_alloc;
+
  /* Stash away a copy of the cache and its control variables.  */
  void *
  bidi_shelve_cache (void)
  {
    unsigned char *databuf;
+  ptrdiff_t alloc;
  
+  /* Empty cache.  */
    if (bidi_cache_idx == 0)
      return NULL;
  
-  databuf = xmalloc (bidi_shelve_header_size
-                    + bidi_cache_idx * sizeof (struct bidi_it));
+  alloc = (bidi_shelve_header_size
+          + bidi_cache_idx * sizeof (struct bidi_it));
+  databuf = xmalloc (alloc);
+  bidi_cache_total_alloc += alloc;
+
    memcpy (databuf, &bidi_cache_idx, sizeof (bidi_cache_idx));
    memcpy (databuf + sizeof (bidi_cache_idx),
           bidi_cache, bidi_cache_idx * sizeof (struct bidi_it));
@@ -662,45 +681,65 @@ bidi_shelve_cache (void)
    return databuf;
  }
  
-/* Restore the cache state from a copy stashed away by bidi_shelve_cache.  */
+/* Restore the cache state from a copy stashed away by
+   bidi_shelve_cache, and free the buffer used to stash that copy.
+   JUST_FREE non-zero means free the buffer, but don't restore the
+   cache; used when the corresponding iterator is discarded instead of
+   being restored.  */
  void
-bidi_unshelve_cache (void *databuf)
+bidi_unshelve_cache (void *databuf, int just_free)
  {
    unsigned char *p = databuf;
  
    if (!p)
      {
-      /* A NULL pointer means an empty cache.  */
-      bidi_cache_start = 0;
-      bidi_cache_sp = 0;
-      bidi_cache_reset ();
+      if (!just_free)
+       {
+         /* A NULL pointer means an empty cache.  */
+         bidi_cache_start = 0;
+         bidi_cache_sp = 0;
+         bidi_cache_reset ();
+       }
      }
    else
      {
-      memcpy (&bidi_cache_idx, p, sizeof (bidi_cache_idx));
-      bidi_cache_ensure_space (bidi_cache_idx);
-      memcpy (bidi_cache, p + sizeof (bidi_cache_idx),
-             bidi_cache_idx * sizeof (struct bidi_it));
-      memcpy (bidi_cache_start_stack,
-             p + sizeof (bidi_cache_idx)
-             + bidi_cache_idx * sizeof (struct bidi_it),
-             sizeof (bidi_cache_start_stack));
-      memcpy (&bidi_cache_sp,
-             p + sizeof (bidi_cache_idx)
-             + bidi_cache_idx * sizeof (struct bidi_it)
-             + sizeof (bidi_cache_start_stack),
-             sizeof (bidi_cache_sp));
-      memcpy (&bidi_cache_start,
-             p + sizeof (bidi_cache_idx)
-             + bidi_cache_idx * sizeof (struct bidi_it)
-             + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
-             sizeof (bidi_cache_start));
-      memcpy (&bidi_cache_last_idx,
-             p + sizeof (bidi_cache_idx)
-             + bidi_cache_idx * sizeof (struct bidi_it)
-             + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
-             + sizeof (bidi_cache_start),
-             sizeof (bidi_cache_last_idx));
+      if (just_free)
+       {
+         ptrdiff_t idx;
+
+         memcpy (&idx, p, sizeof (bidi_cache_idx));
+         bidi_cache_total_alloc -=
+           bidi_shelve_header_size + idx * sizeof (struct bidi_it);
+       }
+      else
+       {
+         memcpy (&bidi_cache_idx, p, sizeof (bidi_cache_idx));
+         bidi_cache_ensure_space (bidi_cache_idx);
+         memcpy (bidi_cache, p + sizeof (bidi_cache_idx),
+                 bidi_cache_idx * sizeof (struct bidi_it));
+         memcpy (bidi_cache_start_stack,
+                 p + sizeof (bidi_cache_idx)
+                 + bidi_cache_idx * sizeof (struct bidi_it),
+                 sizeof (bidi_cache_start_stack));
+         memcpy (&bidi_cache_sp,
+                 p + sizeof (bidi_cache_idx)
+                 + bidi_cache_idx * sizeof (struct bidi_it)
+                 + sizeof (bidi_cache_start_stack),
+                 sizeof (bidi_cache_sp));
+         memcpy (&bidi_cache_start,
+                 p + sizeof (bidi_cache_idx)
+                 + bidi_cache_idx * sizeof (struct bidi_it)
+                 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
+                 sizeof (bidi_cache_start));
+         memcpy (&bidi_cache_last_idx,
+                 p + sizeof (bidi_cache_idx)
+                 + bidi_cache_idx * sizeof (struct bidi_it)
+                 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
+                 + sizeof (bidi_cache_start),
+                 sizeof (bidi_cache_last_idx));
+         bidi_cache_total_alloc -=
+           bidi_shelve_header_size + bidi_cache_idx * sizeof (struct bidi_it);
+       }
  
        xfree (p);
      }
@@ -713,26 +752,16 @@ bidi_unshelve_cache (void *databuf)
  static void
  bidi_initialize (void)
  {
-
-#include "biditype.h"
-#include "bidimirror.h"
-
-  int i;
-
-  bidi_type_table = Fmake_char_table (Qnil, make_number (STRONG_L));
+  bidi_type_table = uniprop_table (intern ("bidi-class"));
+  if (NILP (bidi_type_table))
+    abort ();
    staticpro (&bidi_type_table);
  
-  for (i = 0; i < sizeof bidi_type / sizeof bidi_type[0]; i++)
-    char_table_set_range (bidi_type_table, bidi_type[i].from, bidi_type[i].to,
-                         make_number (bidi_type[i].type));
-
-  bidi_mirror_table = Fmake_char_table (Qnil, Qnil);
+  bidi_mirror_table = uniprop_table (intern ("mirroring"));
+  if (NILP (bidi_mirror_table))
+    abort ();
    staticpro (&bidi_mirror_table);
  
-  for (i = 0; i < sizeof bidi_mirror / sizeof bidi_mirror[0]; i++)
-    char_table_set (bidi_mirror_table, bidi_mirror[i].from,
-                   make_number (bidi_mirror[i].to));
-
    Qparagraph_start = intern ("paragraph-start");
    staticpro (&Qparagraph_start);
    paragraph_start_re = Fsymbol_value (Qparagraph_start);
@@ -747,6 +776,7 @@ bidi_initialize (void)
    staticpro (&paragraph_separate_re);
  
    bidi_cache_sp = 0;
+  bidi_cache_total_alloc = 0;
  
    bidi_initialized = 1;
  }
@@ -764,7 +794,7 @@ bidi_set_paragraph_end (struct bidi_it *bidi_it)
  
  /* Initialize the bidi iterator from buffer/string position CHARPOS.  */
  void
-bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p,
+bidi_init_it (ptrdiff_t charpos, ptrdiff_t bytepos, int frame_window_p,
               struct bidi_it *bidi_it)
  {
    if (! bidi_initialized)
@@ -797,7 +827,7 @@ bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p,
      bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
    bidi_it->sor = L2R;   /* FIXME: should it be user-selectable? */
    bidi_it->disp_pos = -1;      /* invalid/unknown */
-  bidi_it->disp_prop_p = 0;
+  bidi_it->disp_prop = 0;
    /* We can only shrink the cache if we are at the bottom level of its
       "stack".  */
    if (bidi_cache_start == 0)
@@ -833,11 +863,11 @@ bidi_line_init (struct bidi_it *bidi_it)
     are zero-based character positions in S, BEGBYTE is byte position
     corresponding to BEG.  UNIBYTE, if non-zero, means S is a unibyte
     string.  */
-static inline EMACS_INT
-bidi_count_bytes (const unsigned char *s, const EMACS_INT beg,
-                 const EMACS_INT begbyte, const EMACS_INT end, int unibyte)
+static inline ptrdiff_t
+bidi_count_bytes (const unsigned char *s, const ptrdiff_t beg,
+                 const ptrdiff_t begbyte, const ptrdiff_t end, int unibyte)
  {
-  EMACS_INT pos = beg;
+  ptrdiff_t pos = beg;
    const unsigned char *p = s + begbyte, *start = p;
  
    if (unibyte)
@@ -862,7 +892,7 @@ bidi_count_bytes (const unsigned char *s, const EMACS_INT beg,
     character from the current buffer.  UNIBYTE non-zero means S is a
     unibyte string.  */
  static inline int
-bidi_char_at_pos (EMACS_INT bytepos, const unsigned char *s, int unibyte)
+bidi_char_at_pos (ptrdiff_t bytepos, const unsigned char *s, int unibyte)
  {
    if (s)
      {
@@ -877,23 +907,25 @@ bidi_char_at_pos (EMACS_INT bytepos, const unsigned char *s, int unibyte)
  
  /* Fetch and return the character at BYTEPOS/CHARPOS.  If that
     character is covered by a display string, treat the entire run of
-   covered characters as a single character u+FFFC, and return their
-   combined length in CH_LEN and NCHARS.  DISP_POS specifies the
-   character position of the next display string, or -1 if not yet
-   computed.  DISP_PROP_P non-zero means that there's really a display
-   string at DISP_POS, as opposed to when we searched till DISP_POS
-   without findingone.  When the next character is at or beyond that
+   covered characters as a single character, either u+2029 or u+FFFC,
+   and return their combined length in CH_LEN and NCHARS.  DISP_POS
+   specifies the character position of the next display string, or -1
+   if not yet computed.  When the next character is at or beyond that
     position, the function updates DISP_POS with the position of the
-   next display string.  STRING->s is the C string to iterate, or NULL
-   if iterating over a buffer or a Lisp string; in the latter case,
-   STRING->lstring is the Lisp string.  */
+   next display string.  DISP_PROP non-zero means that there's really
+   a display string at DISP_POS, as opposed to when we searched till
+   DISP_POS without finding one.  If DISP_PROP is 2, it means the
+   display spec is of the form `(space ...)', which is replaced with
+   u+2029 to handle it as a paragraph separator.  STRING->s is the C
+   string to iterate, or NULL if iterating over a buffer or a Lisp
+   string; in the latter case, STRING->lstring is the Lisp string.  */
  static inline int
-bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
-                int *disp_prop_p, struct bidi_string_data *string,
-                int frame_window_p, EMACS_INT *ch_len, EMACS_INT *nchars)
+bidi_fetch_char (ptrdiff_t bytepos, ptrdiff_t charpos, ptrdiff_t *disp_pos,
+                int *disp_prop, struct bidi_string_data *string,
+                int frame_window_p, ptrdiff_t *ch_len, ptrdiff_t *nchars)
  {
    int ch;
-  EMACS_INT endpos =
+  ptrdiff_t endpos =
      (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
    struct text_pos pos;
  
@@ -903,7 +935,7 @@ bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
      {
        SET_TEXT_POS (pos, charpos, bytepos);
        *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
-                                             disp_prop_p);
+                                             disp_prop);
      }
  
    /* Fetch the character at BYTEPOS.  */
@@ -913,19 +945,33 @@ bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
        *ch_len = 1;
        *nchars = 1;
        *disp_pos = endpos;
-      *disp_prop_p = 0;
+      *disp_prop = 0;
      }
-  else if (charpos >= *disp_pos && *disp_prop_p)
+  else if (charpos >= *disp_pos && *disp_prop)
      {
-      EMACS_INT disp_end_pos;
+      ptrdiff_t disp_end_pos;
  
        /* We don't expect to find ourselves in the middle of a display
          property.  Hopefully, it will never be needed.  */
        if (charpos > *disp_pos)
         abort ();
-      /* Return the Unicode Object Replacement Character to represent
-        the entire run of characters covered by the display string.  */
-      ch = 0xFFFC;
+      /* Text covered by `display' properties and overlays with
+        display properties or display strings is handled as a single
+        character that represents the entire run of characters
+        covered by the display property.  */
+      if (*disp_prop == 2)
+       {
+         /* `(space ...)' display specs are handled as paragraph
+            separators for the purposes of the reordering; see UAX#9
+            section 3 and clause HL1 in section 4.3 there.  */
+         ch = 0x2029;
+       }
+      else
+       {
+         /* All other display specs are handled as the Unicode Object
+            Replacement Character.  */
+         ch = 0xFFFC;
+       }
        disp_end_pos = compute_display_string_end (*disp_pos, string);
        *nchars = disp_end_pos - *disp_pos;
        if (*nchars <= 0)
@@ -983,11 +1029,11 @@ bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
    /* If we just entered a run of characters covered by a display
       string, compute the position of the next display string.  */
    if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos
-      && *disp_prop_p)
+      && *disp_prop)
      {
        SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
        *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
-                                             disp_prop_p);
+                                             disp_prop);
      }
  
    return ch;
@@ -1003,12 +1049,12 @@ bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
     following the buffer position, -1 if position is at the beginning
     of a new paragraph, or -2 if position is neither at beginning nor
     at end of a paragraph.  */
-static EMACS_INT
-bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
+static ptrdiff_t
+bidi_at_paragraph_end (ptrdiff_t charpos, ptrdiff_t bytepos)
  {
    Lisp_Object sep_re;
    Lisp_Object start_re;
-  EMACS_INT val;
+  ptrdiff_t val;
  
    sep_re = paragraph_separate_re;
    start_re = paragraph_start_re;
@@ -1025,15 +1071,25 @@ bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
    return val;
  }
  
+/* On my 2005-vintage machine, searching back for paragraph start
+   takes ~1 ms per line.  And bidi_paragraph_init is called 4 times
+   when user types C-p.  The number below limits each call to
+   bidi_paragraph_init to about 10 ms.  */
+#define MAX_PARAGRAPH_SEARCH 7500
+
  /* Find the beginning of this paragraph by looking back in the buffer.
-   Value is the byte position of the paragraph's beginning.  */
-static EMACS_INT
-bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
+   Value is the byte position of the paragraph's beginning, or
+   BEGV_BYTE if paragraph_start_re is still not found after looking
+   back MAX_PARAGRAPH_SEARCH lines in the buffer.  */
+static ptrdiff_t
+bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte)
  {
    Lisp_Object re = paragraph_start_re;
-  EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
+  ptrdiff_t limit = ZV, limit_byte = ZV_BYTE;
+  ptrdiff_t n = 0;
  
    while (pos_byte > BEGV_BYTE
+        && n++ < MAX_PARAGRAPH_SEARCH
          && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
      {
        /* FIXME: What if the paragraph beginning is covered by a
@@ -1043,6 +1099,8 @@ bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
        pos = find_next_newline_no_quit (pos - 1, -1);
        pos_byte = CHAR_TO_BYTE (pos);
      }
+  if (n >= MAX_PARAGRAPH_SEARCH)
+    pos_byte = BEGV_BYTE;
    return pos_byte;
  }
  
@@ -1064,14 +1122,14 @@ bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
  void
  bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
  {
-  EMACS_INT bytepos = bidi_it->bytepos;
+  ptrdiff_t bytepos = bidi_it->bytepos;
    int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
-  EMACS_INT pstartbyte;
+  ptrdiff_t pstartbyte;
    /* Note that begbyte is a byte position, while end is a character
       position.  Yes, this is ugly, but we are trying to avoid costly
       calls to BYTE_TO_CHAR and its ilk.  */
-  EMACS_INT begbyte = string_p ? 0 : BEGV_BYTE;
-  EMACS_INT end = string_p ? bidi_it->string.schars : ZV;
+  ptrdiff_t begbyte = string_p ? 0 : BEGV_BYTE;
+  ptrdiff_t end = string_p ? bidi_it->string.schars : ZV;
  
    /* Special case for an empty buffer. */
    if (bytepos == begbyte && bidi_it->charpos == end)
@@ -1093,9 +1151,9 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
    else if (dir == NEUTRAL_DIR) /* P2 */
      {
        int ch;
-      EMACS_INT ch_len, nchars;
-      EMACS_INT pos, disp_pos = -1;
-      int disp_prop_p = 0;
+      ptrdiff_t ch_len, nchars;
+      ptrdiff_t pos, disp_pos = -1;
+      int disp_prop = 0;
        bidi_type_t type;
        const unsigned char *s;
  
@@ -1143,16 +1201,12 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
         bytepos = pstartbyte;
         if (!string_p)
           pos = BYTE_TO_CHAR (bytepos);
-       ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop_p,
+       ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop,
                               &bidi_it->string,
                               bidi_it->frame_window_p, &ch_len, &nchars);
         type = bidi_get_type (ch, NEUTRAL_DIR);
  
         for (pos += nchars, bytepos += ch_len;
-            /* NOTE: UAX#9 says to search only for L, AL, or R types
-               of characters, and ignore RLE, RLO, LRE, and LRO.
-               However, I'm not sure it makes sense to omit those 4;
-               should try with and without that to see the effect.  */
              (bidi_get_category (type) != STRONG)
                || (bidi_ignore_explicit_marks_for_paragraph_level
                    && (type == RLE || type == RLO
@@ -1172,14 +1226,18 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
               break;
             /* Fetch next character and advance to get past it.  */
             ch = bidi_fetch_char (bytepos, pos, &disp_pos,
-                                 &disp_prop_p, &bidi_it->string,
+                                 &disp_prop, &bidi_it->string,
                                   bidi_it->frame_window_p, &ch_len, &nchars);
             pos += nchars;
             bytepos += ch_len;
           }
-       if (type == STRONG_R || type == STRONG_AL) /* P3 */
+       if ((type == STRONG_R || type == STRONG_AL) /* P3 */
+           || (!bidi_ignore_explicit_marks_for_paragraph_level
+               && (type == RLO || type == RLE)))
           bidi_it->paragraph_dir = R2L;
-       else if (type == STRONG_L)
+       else if (type == STRONG_L
+                || (!bidi_ignore_explicit_marks_for_paragraph_level
+                    && (type == LRO || type == LRE)))
           bidi_it->paragraph_dir = L2R;
         if (!string_p
             && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
@@ -1189,8 +1247,8 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
               bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
             else
               {
-               EMACS_INT prevpbyte = pstartbyte;
-               EMACS_INT p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
+               ptrdiff_t prevpbyte = pstartbyte;
+               ptrdiff_t p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
  
                 /* Find the beginning of the previous paragraph, if any.  */
                 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
@@ -1305,7 +1363,7 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
        bidi_it->ch_len = 1;
        bidi_it->nchars = 1;
        bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
-      bidi_it->disp_prop_p = 0;
+      bidi_it->disp_prop = 0;
      }
    else
      {
@@ -1313,7 +1371,7 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
          display string, treat the entire run of covered characters as
          a single character u+FFFC.  */
        curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
-                                &bidi_it->disp_pos, &bidi_it->disp_prop_p,
+                                &bidi_it->disp_pos, &bidi_it->disp_prop,
                                  &bidi_it->string, bidi_it->frame_window_p,
                                  &bidi_it->ch_len, &bidi_it->nchars);
      }
@@ -1450,7 +1508,7 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
  {
    int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
    int new_level  = bidi_resolve_explicit_1 (bidi_it);
-  EMACS_INT eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
+  ptrdiff_t eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
    const unsigned char *s = STRINGP (bidi_it->string.lstring)
      ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
  
@@ -1536,7 +1594,7 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
    int next_char;
    bidi_type_t type_of_next;
    struct bidi_it saved_it;
-  EMACS_INT eob =
+  ptrdiff_t eob =
      (STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
      ? bidi_it->string.schars : ZV;
  
@@ -1661,7 +1719,7 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
             type = WEAK_EN;
           else                  /* W5: ET/BN with EN after it.  */
             {
-             EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars;
+             ptrdiff_t en_pos = bidi_it->charpos + bidi_it->nchars;
               const unsigned char *s =
                 STRINGP (bidi_it->string.lstring)
                 ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
@@ -1909,11 +1967,11 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
    bidi_type_t type;
    int level, prev_level = -1;
    struct bidi_saved_info next_for_neutral;
-  EMACS_INT next_char_pos = -2;
+  ptrdiff_t next_char_pos = -2;
  
    if (bidi_it->scan_dir == 1)
      {
-      EMACS_INT eob =
+      ptrdiff_t eob =
         (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
         ? bidi_it->string.schars : ZV;
  
@@ -2040,15 +2098,15 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
        && bidi_it->next_for_ws.type == UNKNOWN_BT)
      {
        int ch;
-      EMACS_INT clen = bidi_it->ch_len;
-      EMACS_INT bpos = bidi_it->bytepos;
-      EMACS_INT cpos = bidi_it->charpos;
-      EMACS_INT disp_pos = bidi_it->disp_pos;
-      EMACS_INT nc = bidi_it->nchars;
+      ptrdiff_t clen = bidi_it->ch_len;
+      ptrdiff_t bpos = bidi_it->bytepos;
+      ptrdiff_t cpos = bidi_it->charpos;
+      ptrdiff_t disp_pos = bidi_it->disp_pos;
+      ptrdiff_t nc = bidi_it->nchars;
        struct bidi_string_data bs = bidi_it->string;
        bidi_type_t chtype;
        int fwp = bidi_it->frame_window_p;
-      int dpp = bidi_it->disp_prop_p;
+      int dpp = bidi_it->disp_prop;
  
        if (bidi_it->nchars <= 0)
         abort ();
@@ -2193,7 +2251,8 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
      GCPRO1 (bidi_it->string.lstring);
  
    /* If we just passed a newline, initialize for the next line.  */
-  if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B)
+  if (!bidi_it->first_elt
+      && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
      bidi_line_init (bidi_it);
  
    /* Prepare the sentinel iterator state, and cache it.  When we bump
@@ -2274,7 +2333,8 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
       reordering, whereas we _must_ know the paragraph base direction
       _before_ we process the paragraph's text, since the base
       direction affects the reordering.  */
-  if (bidi_it->scan_dir == 1 && bidi_it->orig_type == NEUTRAL_B)
+  if (bidi_it->scan_dir == 1
+      && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
      {
        /* The paragraph direction of the entire string, once
          determined, is in effect for the entire string.  Setting the
@@ -2285,7 +2345,7 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
         bidi_it->separator_limit = bidi_it->string.schars;
        else if (bidi_it->bytepos < ZV_BYTE)
         {
-         EMACS_INT sep_len =
+         ptrdiff_t sep_len =
             bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
                                    bidi_it->bytepos + bidi_it->ch_len);
           if (bidi_it->nchars <= 0)