* src/dispnew.c: Remove obsolete comment.

[gnu-emacs] / src / bidi.c
diff --git a/src/bidi.c b/src/bidi.c

index ea47cd33fecf97e9d0e6ed11bf36a2681ed940da..a25976dcc015e36154f4e4846ead7433807e0594 100644 (file)
--- a/src/bidi.c
+++ b/src/bidi.c
@@ -23,17 +23,23 @@ along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
     as per UAX#9, a part of the Unicode Standard.
  
     Unlike the reference and most other implementations, this one is
-   designed to be called once for every character in the buffer.
+   designed to be called once for every character in the buffer or
+   string.
  
-   The main entry point is bidi_get_next_char_visually.  Each time it
+   The main entry point is bidi_move_to_visually_next.  Each time it
     is called, it finds the next character in the visual order, and
     returns its information in a special structure.  The caller is then
     expected to process this character for display or any other
-   purposes, and call bidi_get_next_char_visually for the next
-   character.  See the comments in bidi_get_next_char_visually for
-   more details about its algorithm that finds the next visual-order
+   purposes, and call bidi_move_to_visually_next for the next
+   character.  See the comments in bidi_move_to_visually_next for more
+   details about its algorithm that finds the next visual-order
     character by resolving their levels on the fly.
  
+   The two other entry points are bidi_paragraph_init and
+   bidi_mirror_char.  The first determines the base direction of a
+   paragraph, while the second returns the mirrored version of its
+   argument character.
+
     If you want to understand the code, you will have to read it
     together with the relevant portions of UAX#9.  The comments include
     references to UAX#9 rules, for that very reason.
@@ -96,7 +102,7 @@ typedef enum {
  
  int bidi_ignore_explicit_marks_for_paragraph_level = 1;
  
-static Lisp_Object fallback_paragraph_start_re, fallback_paragraph_separate_re;
+static Lisp_Object paragraph_start_re, paragraph_separate_re;
  static Lisp_Object Qparagraph_start, Qparagraph_separate;
  
  static void
@@ -393,26 +399,24 @@ bidi_initialize ()
                           bidi_type[i].to ? bidi_type[i].to : bidi_type[i].from,
                           make_number (bidi_type[i].type));
  
-  fallback_paragraph_start_re =
-    XSYMBOL (Fintern_soft (build_string ("paragraph-start"), Qnil))->value;
-  if (!STRINGP (fallback_paragraph_start_re))
-    fallback_paragraph_start_re = build_string ("\f\\|[ \t]*$");
-  staticpro (&fallback_paragraph_start_re);
    Qparagraph_start = intern ("paragraph-start");
    staticpro (&Qparagraph_start);
-  fallback_paragraph_separate_re =
-    XSYMBOL (Fintern_soft (build_string ("paragraph-separate"), Qnil))->value;
-  if (!STRINGP (fallback_paragraph_separate_re))
-    fallback_paragraph_separate_re = build_string ("[ \t\f]*$");
-  staticpro (&fallback_paragraph_separate_re);
+  paragraph_start_re = Fsymbol_value (Qparagraph_start);
+  if (!STRINGP (paragraph_start_re))
+    paragraph_start_re = build_string ("\f\\|[ \t]*$");
+  staticpro (&paragraph_start_re);
    Qparagraph_separate = intern ("paragraph-separate");
    staticpro (&Qparagraph_separate);
+  paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
+  if (!STRINGP (paragraph_separate_re))
+    paragraph_separate_re = build_string ("[ \t\f]*$");
+  staticpro (&paragraph_separate_re);
    bidi_initialized = 1;
  }
  
  /* Return the bidi type of a character CH, subject to the current
     directional OVERRIDE.  */
-bidi_type_t
+static INLINE bidi_type_t
  bidi_get_type (int ch, bidi_dir_t override)
  {
    bidi_type_t default_type;
@@ -463,7 +467,7 @@ bidi_check_type (bidi_type_t type)
  }
  
  /* Given a bidi TYPE of a character, return its category.  */
-bidi_category_t
+static INLINE bidi_category_t
  bidi_get_category (bidi_type_t type)
  {
    switch (type)
@@ -520,7 +524,7 @@ bidi_mirror_char (int c)
  
  /* Copy the bidi iterator from FROM to TO.  To save cycles, this only
     copies the part of the level stack that is actually in use.  */
-static inline void
+static INLINE void
  bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
  {
    int i;
@@ -536,18 +540,33 @@ bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
  
  /* Caching the bidi iterator states.  */
  
-static struct bidi_it bidi_cache[1000]; /* FIXME: make this dynamically allocated! */
-static int bidi_cache_idx;
-static int bidi_cache_last_idx;
+#define BIDI_CACHE_CHUNK 200
+static struct bidi_it *bidi_cache;
+static size_t bidi_cache_size = 0;
+static size_t elsz = sizeof (struct bidi_it);
+static int bidi_cache_idx;     /* next unused cache slot */
+static int bidi_cache_last_idx;        /* slot of last cache hit */
  
-static inline void
+static INLINE void
  bidi_cache_reset (void)
  {
    bidi_cache_idx = 0;
    bidi_cache_last_idx = -1;
  }
  
-static inline void
+static INLINE void
+bidi_cache_shrink (void)
+{
+  if (bidi_cache_size > BIDI_CACHE_CHUNK)
+    {
+      bidi_cache_size = BIDI_CACHE_CHUNK;
+      bidi_cache =
+       (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
+    }
+  bidi_cache_reset ();
+}
+
+static INLINE void
  bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it)
  {
    int current_scan_dir = bidi_it->scan_dir;
@@ -564,7 +583,7 @@ bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it)
     level less or equal to LEVEL.  if LEVEL is -1, disregard the
     resolved levels in cached states.  DIR, if non-zero, means search
     in that direction from the last cache hit.  */
-static inline int
+static INLINE int
  bidi_cache_search (int charpos, int level, int dir)
  {
    int i, i_start;
@@ -655,7 +674,7 @@ bidi_cache_find_level_change (int level, int dir, int before)
    return -1;
  }
  
-static inline void
+static INLINE void
  bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
  {
    int idx;
@@ -668,9 +687,13 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
    if (idx < 0)
      {
        idx = bidi_cache_idx;
-      /* Don't overrun the cache limit.  */
-      if (idx > sizeof (bidi_cache) / sizeof (bidi_cache[0]) - 1)
-       abort ();
+      /* Enlarge the cache as needed.  */
+      if (idx >= bidi_cache_size)
+       {
+         bidi_cache_size += BIDI_CACHE_CHUNK;
+         bidi_cache =
+           (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
+       }
        /* Character positions should correspond to cache positions 1:1.
          If we are outside the range of cached positions, the cache is
          useless and must be reset.  */
@@ -684,7 +707,6 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
        bidi_copy_it (&bidi_cache[idx], bidi_it);
        if (!resolved)
         bidi_cache[idx].resolved_level = -1;
-      bidi_cache[idx].new_paragraph = 0;
      }
    else
      {
@@ -710,7 +732,7 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
      bidi_cache_idx = idx + 1;
  }
  
-static inline bidi_type_t
+static INLINE bidi_type_t
  bidi_cache_find (int charpos, int level, struct bidi_it *bidi_it)
  {
    int i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
@@ -730,7 +752,7 @@ bidi_cache_find (int charpos, int level, struct bidi_it *bidi_it)
    return UNKNOWN_BT;
  }
  
-static inline int
+static INLINE int
  bidi_peek_at_next_level (struct bidi_it *bidi_it)
  {
    if (bidi_cache_idx == 0 || bidi_cache_last_idx == -1)
@@ -743,19 +765,16 @@ bidi_peek_at_next_level (struct bidi_it *bidi_it)
     following the buffer position, -1 if position is at the beginning
     of a new paragraph, or -2 if position is neither at beginning nor
     at end of a paragraph.  */
-EMACS_INT
+static EMACS_INT
  bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
  {
-  Lisp_Object sep_re = Fbuffer_local_value (Qparagraph_separate,
-                                           Fcurrent_buffer ());
-  Lisp_Object start_re = Fbuffer_local_value (Qparagraph_start,
-                                             Fcurrent_buffer ());
+  /* FIXME: Why Fbuffer_local_value rather than just Fsymbol_value?  */
+  Lisp_Object sep_re;
+  Lisp_Object start_re;
    EMACS_INT val;
  
-  if (!STRINGP (sep_re))
-    sep_re = fallback_paragraph_separate_re;
-  if (!STRINGP (start_re))
-    start_re = fallback_paragraph_start_re;
+  sep_re = paragraph_separate_re;
+  start_re = paragraph_start_re;
  
    val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
    if (val < 0)
@@ -773,7 +792,7 @@ bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
     embedding levels on either side of the run boundary.  Also, update
     the saved info about previously seen characters, since that info is
     generally valid for a single level run.  */
-static inline void
+static INLINE void
  bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
  {
    int higher_level = level_before > level_after ? level_before : level_after;
@@ -824,11 +843,9 @@ bidi_line_init (struct bidi_it *bidi_it)
  static EMACS_INT
  bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
  {
-  Lisp_Object re = Fbuffer_local_value (Qparagraph_start, Fcurrent_buffer ());
+  Lisp_Object re = paragraph_start_re;
    EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
  
-  if (!STRINGP (re))
-    re = fallback_paragraph_start_re;
    while (pos_byte > BEGV_BYTE
          && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
      {
@@ -873,7 +890,9 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it)
        int ch, ch_len;
        EMACS_INT pos;
        bidi_type_t type;
-      EMACS_INT sep_len;
+
+      if (!bidi_initialized)
+       bidi_initialize ();
  
        /* If we are inside a paragraph separator, we are just waiting
          for the separator to be exhausted; use the previous paragraph
@@ -898,11 +917,6 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it)
          middle of it.  Find where this paragraph starts.  */
        bytepos = bidi_find_paragraph_start (pos, bytepos);
  
-      /* We should always be at the beginning of a new line at this
-        point.  */
-      if (!(bytepos == BEGV_BYTE || FETCH_CHAR (bytepos - 1) == '\n'))
-       abort ();
-
        bidi_it->separator_limit = -1;
        bidi_it->new_paragraph = 0;
        ch = FETCH_CHAR (bytepos);
@@ -942,7 +956,7 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it)
    /* Contrary to UAX#9 clause P3, we only default the paragraph
       direction to L2R if we have no previous usable paragraph
       direction.  */
-  if (bidi_it->paragraph_dir == NEUTRAL_DIR)
+  if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
      bidi_it->paragraph_dir = L2R; /* P3 and ``higher protocols'' */
    if (bidi_it->paragraph_dir == R2L)
      bidi_it->level_stack[0].level = 1;
@@ -954,7 +968,7 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it)
  
  /* Do whatever UAX#9 clause X8 says should be done at paragraph's
     end.  */
-static inline void
+static INLINE void
  bidi_set_paragraph_end (struct bidi_it *bidi_it)
  {
    bidi_it->invalid_levels = 0;
@@ -976,10 +990,11 @@ bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, struct bidi_it *bidi_it)
    bidi_it->new_paragraph = 1;
    bidi_it->separator_limit = -1;
    bidi_it->type = NEUTRAL_B;
-  bidi_it->type_after_w1 = UNKNOWN_BT;
-  bidi_it->orig_type = UNKNOWN_BT;
+  bidi_it->type_after_w1 = NEUTRAL_B;
+  bidi_it->orig_type = NEUTRAL_B;
    bidi_it->prev_was_pdf = 0;
-  bidi_it->prev.type = bidi_it->prev.type_after_w1 = UNKNOWN_BT;
+  bidi_it->prev.type = bidi_it->prev.type_after_w1 =
+    bidi_it->prev.orig_type = UNKNOWN_BT;
    bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
      bidi_it->last_strong.orig_type = UNKNOWN_BT;
    bidi_it->next_for_neutral.charpos = -1;
@@ -991,11 +1006,12 @@ bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, struct bidi_it *bidi_it)
      bidi_it->prev_for_neutral.type_after_w1 =
      bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
    bidi_it->sor = L2R;   /* FIXME: should it be user-selectable? */
+  bidi_cache_shrink ();
  }
  
  /* Push the current embedding level and override status; reset the
     current level to LEVEL and the current override status to OVERRIDE.  */
-static inline void
+static INLINE void
  bidi_push_embedding_level (struct bidi_it *bidi_it,
                            int level, bidi_dir_t override)
  {
@@ -1008,7 +1024,7 @@ bidi_push_embedding_level (struct bidi_it *bidi_it,
  
  /* Pop the embedding level and directional override status from the
     stack, and return the new level.  */
-static inline int
+static INLINE int
  bidi_pop_embedding_level (struct bidi_it *bidi_it)
  {
    /* UAX#9 says to ignore invalid PDFs.  */
@@ -1018,7 +1034,7 @@ bidi_pop_embedding_level (struct bidi_it *bidi_it)
  }
  
  /* Record in SAVED_INFO the information about the current character.  */
-static inline void
+static INLINE void
  bidi_remember_char (struct bidi_saved_info *saved_info,
                     struct bidi_it *bidi_it)
  {
@@ -1034,7 +1050,7 @@ bidi_remember_char (struct bidi_saved_info *saved_info,
  
  /* Resolve the type of a neutral character according to the type of
     surrounding strong text and the current embedding level.  */
-static inline bidi_type_t
+static INLINE bidi_type_t
  bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
  {
    /* N1: European and Arabic numbers are treated as though they were R.  */
@@ -1051,7 +1067,7 @@ bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
      return STRONG_R;
  }
  
-static inline int
+static INLINE int
  bidi_explicit_dir_char (int c)
  {
    /* FIXME: this should be replaced with a lookup table with suitable
@@ -1242,7 +1258,7 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
    if (prev_level < new_level
        && bidi_it->type == WEAK_BN
        && bidi_it->ignore_bn_limit == 0 /* only if not already known */
-      && bidi_it->ch != BIDI_EOB       /* not already at EOB */
+      && bidi_it->bytepos < ZV_BYTE    /* not already at EOB */
        && bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
                                              + bidi_it->ch_len)))
      {
@@ -1301,7 +1317,7 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
  
  /* Advance in the buffer, resolve weak types and return the type of
     the next character after weak type resolution.  */
-bidi_type_t
+static bidi_type_t
  bidi_resolve_weak (struct bidi_it *bidi_it)
  {
    bidi_type_t type;
@@ -1347,12 +1363,16 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
        if (type == WEAK_NSM)    /* W1 */
         {
           /* Note that we don't need to consider the case where the
-            prev character has its type overridden by an RLO or LRO:
-            such characters are outside the current level run, and
-            thus not relevant to this NSM.  Thus, NSM gets the
-            orig_type of the previous character.  */
-         if (bidi_it->prev.type != UNKNOWN_BT)
-           type = bidi_it->prev.orig_type;
+            prev character has its type overridden by an RLO or LRO,
+            because then either the type of this NSM would have been
+            also overridden, or the previous character is outside the
+            current level run, and thus not relevant to this NSM.
+            This is why NSM gets the type_after_w1 of the previous
+            character.  */
+         if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
+             /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
+             && bidi_it->prev.type_after_w1 != NEUTRAL_B)
+           type = bidi_it->prev.type_after_w1;
           else if (bidi_it->sor == R2L)
             type = STRONG_R;
           else if (bidi_it->sor == L2R)
@@ -1488,7 +1508,7 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
    return type;
  }
  
-bidi_type_t
+static bidi_type_t
  bidi_resolve_neutral (struct bidi_it *bidi_it)
  {
    int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
@@ -1612,7 +1632,7 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
  /* Given an iterator state in BIDI_IT, advance one character position
     in the buffer to the next character (in the logical order), resolve
     the bidi type of that next character, and return that type.  */
-bidi_type_t
+static bidi_type_t
  bidi_type_of_next_char (struct bidi_it *bidi_it)
  {
    bidi_type_t type;
@@ -1638,7 +1658,7 @@ bidi_type_of_next_char (struct bidi_it *bidi_it)
     the buffer to the next character (in the logical order), resolve
     the embedding and implicit levels of that next character, and
     return the resulting level.  */
-int
+static int
  bidi_level_of_next_char (struct bidi_it *bidi_it)
  {
    bidi_type_t type;
@@ -1648,7 +1668,7 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
    if (bidi_it->scan_dir == 1)
      {
        /* There's no sense in trying to advance if we hit end of text.  */
-      if (bidi_it->ch == BIDI_EOB)
+      if (bidi_it->bytepos >= ZV_BYTE)
         return bidi_it->resolved_level;
  
        /* Record the info about the previous character.  */
@@ -1873,7 +1893,7 @@ bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
  }
  
  void
-bidi_get_next_char_visually (struct bidi_it *bidi_it)
+bidi_move_to_visually_next (struct bidi_it *bidi_it)
  {
    int old_level, new_level, next_level;
    struct bidi_it sentinel;
@@ -1887,7 +1907,9 @@ bidi_get_next_char_visually (struct bidi_it *bidi_it)
    if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B)
      bidi_line_init (bidi_it);
  
-  /* Prepare the sentinel iterator state.  */
+  /* Prepare the sentinel iterator state, and cache it.  When we bump
+     into it, scanning backwards, we'll know that the last non-base
+     level is exhausted.  */
    if (bidi_cache_idx == 0)
      {
        bidi_copy_it (&sentinel, bidi_it);
@@ -1898,6 +1920,7 @@ bidi_get_next_char_visually (struct bidi_it *bidi_it)
           sentinel.ch = '\n';   /* doesn't matter, but why not? */
           sentinel.ch_len = 1;
         }
+      bidi_cache_iterator_state (&sentinel, 1);
      }
  
    old_level = bidi_it->resolved_level;
@@ -1913,11 +1936,6 @@ bidi_get_next_char_visually (struct bidi_it *bidi_it)
        int incr = ascending ? 1 : -1;
        int expected_next_level = old_level + incr;
  
-      /* If we don't have anything cached yet, we need to cache the
-        sentinel state, since we'll need it to record where to jump
-        when the last non-base level is exhausted.  */
-      if (bidi_cache_idx == 0)
-       bidi_cache_iterator_state (&sentinel, 1);
        /* Jump (or walk) to the other edge of this level.  */
        bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
        /* Switch scan direction and peek at the next character in the