X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/7e09ef09a479731d01b1ca46e94ddadd73ac98e3..7c1802f6ffc2704ba8042c7c1c6faa73dfa210d1:/src/bidi.c

diff --git a/src/bidi.c b/src/bidi.c
index ef0092f3d9..c23ff95435 100644
--- a/src/bidi.c
+++ b/src/bidi.c
@@ -1,13 +1,13 @@
 /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
-   Copyright (C) 2000-2001, 2004-2005, 2009-2015 Free Software
+   Copyright (C) 2000-2001, 2004-2005, 2009-2016 Free Software
    Foundation, Inc.
 
 This file is part of GNU Emacs.
 
 GNU Emacs is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
+the Free Software Foundation, either version 3 of the License, or (at
+your option) any later version.
 
 GNU Emacs is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -262,7 +262,6 @@ typedef enum {
 } bidi_category_t;
 
 static Lisp_Object paragraph_start_re, paragraph_separate_re;
-static Lisp_Object Qparagraph_start, Qparagraph_separate;
 
 
 /***********************************************************************
@@ -533,7 +532,7 @@ bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
   /* Copy everything from the start through the active part of
      the level stack.  */
   memcpy (to, from,
-	  (offsetof (struct bidi_it, level_stack[1])
+	  (offsetof (struct bidi_it, level_stack) + sizeof from->level_stack[0]
 	   + from->stack_idx * sizeof from->level_stack[0]));
 }
 
@@ -1314,13 +1313,13 @@ bidi_fetch_char (ptrdiff_t charpos, ptrdiff_t bytepos, ptrdiff_t *disp_pos,
 	  /* `(space ...)' display specs are handled as paragraph
 	     separators for the purposes of the reordering; see UAX#9
 	     section 3 and clause HL1 in section 4.3 there.  */
-	  ch = 0x2029;
+	  ch = PARAGRAPH_SEPARATOR;
 	}
       else
 	{
 	  /* All other display specs are handled as the Unicode Object
 	     Replacement Character.  */
-	  ch = 0xFFFC;
+	  ch = OBJECT_REPLACEMENT_CHARACTER;
 	}
       disp_end_pos = compute_display_string_end (*disp_pos, string);
       if (disp_end_pos < 0)
@@ -1800,6 +1799,11 @@ bidi_explicit_dir_char (int ch)
 
   if (!bidi_initialized)
     emacs_abort ();
+  if (ch < 0)
+    {
+      eassert (ch == BIDI_EOB);
+      return false;
+    }
   ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
   return (ch_type == LRE || ch_type == LRO
 	  || ch_type == RLE || ch_type == RLO
@@ -1915,8 +1919,6 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
 	{
 	  eassert (bidi_it->prev.charpos == bidi_it->charpos - 1);
 	  prev_type = bidi_it->prev.orig_type;
-	  if (prev_type == FSI)
-	    prev_type = bidi_it->type_after_wn;
 	}
     }
   /* Don't move at end of buffer/string.  */
@@ -1931,8 +1933,6 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
 	emacs_abort ();
       bidi_it->bytepos += bidi_it->ch_len;
       prev_type = bidi_it->orig_type;
-      if (prev_type == FSI)
-	prev_type = bidi_it->type_after_wn;
     }
   else	/* EOB or end of string */
     prev_type = NEUTRAL_B;
@@ -2087,10 +2087,17 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
       if (typ1 != STRONG_R && typ1 != STRONG_AL)
 	{
 	  type = LRI;
+	  /* Override orig_type, which will be needed when we come to
+	     examine the next character, which is the first character
+	     inside the isolate.  */
+	  bidi_it->orig_type = type;
 	  goto fsi_as_lri;
 	}
       else
-	type = RLI;
+	{
+	  type = RLI;
+	  bidi_it->orig_type = type;
+	}
       /* FALLTHROUGH */
     case RLI:	/* X5a */
       if (override == NEUTRAL_DIR)
@@ -2311,7 +2318,31 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
 	      if (bidi_it->next_en_type == WEAK_EN) /* ET/BN with EN after it */
 		type = WEAK_EN;
 	    }
-	  else if (bidi_it->next_en_pos >=0)
+	  else if (type == WEAK_BN
+		   /* This condition is for the following important case:
+
+		      . we are at level zero
+		      . either previous strong character was L,
+			 or we've seen no strong characters since sos
+			 and the base paragraph direction is L2R
+		      . this BN is NOT a bidi directional control
+
+		      For such a situation, either this BN will be
+		      converted to EN per W5, and then to L by virtue
+		      of W7; or it will become ON per W6, and then L
+		      because of N1/N2.  So we take a shortcut here
+		      and make it L right away, to avoid the
+		      potentially costly loop below.  This is
+		      important when the buffer has a long series of
+		      control characters, like binary nulls, and no
+		      R2L characters at all.  */
+		   && new_level == 0
+		   && !bidi_explicit_dir_char (bidi_it->ch)
+		   && ((bidi_it->last_strong.type == STRONG_L)
+		       || (bidi_it->last_strong.type == UNKNOWN_BT
+			   && bidi_it->sos == L2R)))
+	    type = STRONG_L;
+	  else if (bidi_it->next_en_pos >= 0)
 	    {
 	      /* We overstepped the last known position for ET
 		 resolution but there could be other such characters
@@ -2448,7 +2479,7 @@ typedef struct bpa_stack_entry {
 #define MAX_BPA_STACK ((int)max (MAX_ALLOCA / sizeof (bpa_stack_entry), 1))
 
 /* UAX#9 says to match opening brackets with the matching closing
-   brackets or their canonical equivalents.  As of Unicode 7.0, there
+   brackets or their canonical equivalents.  As of Unicode 8.0, there
    are only 2 bracket characters that have canonical equivalence
    decompositions: u+2329 and u+232A.  So instead of accessing the
    table in uni-decomposition.el, we just handle these 2 characters
@@ -2478,8 +2509,8 @@ typedef struct bpa_stack_entry {
 
 #define CANONICAL_EQU(c)					\
   ( ASCII_CHAR_P (c) ? c					\
-    : (c) == 0x2329 ? 0x3008					\
-    : (c) == 0x232a ? 0x3009					\
+    : (c) == LEFT_POINTING_ANGLE_BRACKET ? LEFT_ANGLE_BRACKET	\
+    : (c) == RIGHT_POINTING_ANGLE_BRACKET ? RIGHT_ANGLE_BRACKET	\
     : c )
 
 #ifdef ENABLE_CHECKING
@@ -2944,7 +2975,7 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
 			    we are already at paragraph end.  */
        && (is_neutral || bidi_isolate_fmt_char (type)))
       /* N1-N2/Retaining */
-      || (type == WEAK_BN && bidi_explicit_dir_char (bidi_it->ch)))
+      || type == WEAK_BN)
     {
       if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
 	{
@@ -2974,8 +3005,10 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
 	 entering the expensive loop in the "else" clause.  */
       else if (current_level == 0
 	       && bidi_it->prev_for_neutral.type == STRONG_L
-	       && !bidi_explicit_dir_char (bidi_it->ch)
-	       && !bidi_isolate_fmt_char (type))
+	       && (ASCII_CHAR_P (bidi_it->ch)
+		   || (type != WEAK_BN
+		       && !bidi_explicit_dir_char (bidi_it->ch)
+		       && !bidi_isolate_fmt_char (type))))
 	type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
 				       STRONG_L, current_level);
       else if (/* current level is 1 */
@@ -2987,6 +3020,7 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
 	       && (bidi_it->prev_for_neutral.type == STRONG_R
 		   || bidi_it->prev_for_neutral.type == WEAK_EN
 		   || bidi_it->prev_for_neutral.type == WEAK_AN)
+	       && type != WEAK_BN
 	       && !bidi_explicit_dir_char (bidi_it->ch)
 	       && !bidi_isolate_fmt_char (type))
 	type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
@@ -2995,7 +3029,7 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
 	{
 	  /* Arrrgh!!  The UAX#9 algorithm is too deeply entrenched in
 	     the assumption of batch-style processing; see clauses W4,
-	     W5, and especially N1, which require to look far forward
+	     W5, and especially N1, which require looking far forward
 	     (as well as back) in the buffer/string.  May the fleas of
 	     a thousand camels infest the armpits of those who design
 	     supposedly general-purpose algorithms by looking at their
@@ -3154,7 +3188,7 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
 	}
     }
 
-  /* Perhaps the character we want is already cached s fully resolved.
+  /* Perhaps the character we want is already cached as fully resolved.
      If it is, the call to bidi_cache_find below will return a type
      other than UNKNOWN_BT.  */
   if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
@@ -3212,8 +3246,12 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
      it belongs to a sequence of WS characters preceding a newline
      or a TAB or a paragraph separator.  */
   if ((bidi_it->orig_type == NEUTRAL_WS
+       || bidi_it->orig_type == WEAK_BN
        || bidi_isolate_fmt_char (bidi_it->orig_type))
-      && bidi_it->next_for_ws.charpos < bidi_it->charpos)
+      && bidi_it->next_for_ws.charpos < bidi_it->charpos
+      /* If this character is already at base level, we don't need to
+	 reset it, so avoid the potentially costly loop below.  */
+      && level != bidi_it->level_stack[0].level)
     {
       int ch;
       ptrdiff_t clen = bidi_it->ch_len;
@@ -3245,11 +3283,14 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
 
   /* Resolve implicit levels.  */
   if (bidi_it->orig_type == NEUTRAL_B /* L1 */
-	   || bidi_it->orig_type == NEUTRAL_S
-	   || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
-	   || (bidi_it->orig_type == NEUTRAL_WS
-	       && (bidi_it->next_for_ws.type == NEUTRAL_B
-		   || bidi_it->next_for_ws.type == NEUTRAL_S)))
+      || bidi_it->orig_type == NEUTRAL_S
+      || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
+      || ((bidi_it->orig_type == NEUTRAL_WS
+	   || bidi_it->orig_type == WEAK_BN
+	   || bidi_isolate_fmt_char (bidi_it->orig_type)
+	   || bidi_explicit_dir_char (bidi_it->ch))
+	  && (bidi_it->next_for_ws.type == NEUTRAL_B
+	      || bidi_it->next_for_ws.type == NEUTRAL_S)))
     level = bidi_it->level_stack[0].level;
   else if ((level & 1) == 0) /* I1 */
     {
@@ -3344,7 +3385,6 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
 {
   int old_level, new_level, next_level;
   struct bidi_it sentinel;
-  struct gcpro gcpro1;
 
   if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
     emacs_abort ();
@@ -3354,11 +3394,6 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
       bidi_it->scan_dir = 1;	/* default to logical order */
     }
 
-  /* The code below can call eval, and thus cause GC.  If we are
-     iterating a Lisp string, make sure it won't be GCed.  */
-  if (STRINGP (bidi_it->string.lstring))
-    GCPRO1 (bidi_it->string.lstring);
-
   /* If we just passed a newline, initialize for the next line.  */
   if (!bidi_it->first_elt
       && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
@@ -3504,9 +3539,6 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
 
   eassert (bidi_it->resolved_level >= 0
 	   && bidi_it->resolved_level <= BIDI_MAXDEPTH + 2);
-
-  if (STRINGP (bidi_it->string.lstring))
-    UNGCPRO;
 }
 
 /* Utility function for looking for strong directional characters