/* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
- Copyright (C) 2000-2001, 2004-2005, 2009-2015 Free Software
+ Copyright (C) 2000-2001, 2004-2005, 2009-2016 Free Software
Foundation, Inc.
This file is part of GNU Emacs.
GNU Emacs is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
+the Free Software Foundation, either version 3 of the License, or (at
+your option) any later version.
GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
/* Copy everything from the start through the active part of
the level stack. */
memcpy (to, from,
- (offsetof (struct bidi_it, level_stack[1])
+ (offsetof (struct bidi_it, level_stack) + sizeof from->level_stack[0]
+ from->stack_idx * sizeof from->level_stack[0]));
}
emacs_abort ();
staticpro (&bidi_brackets_table);
- DEFSYM (Qparagraph_start, "paragraph-start");
- paragraph_start_re = Fsymbol_value (Qparagraph_start);
- if (!STRINGP (paragraph_start_re))
- paragraph_start_re = build_string ("\f\\|[ \t]*$");
+ paragraph_start_re = build_string ("^\\(\f\\|[ \t]*\\)$");
staticpro (¶graph_start_re);
- DEFSYM (Qparagraph_separate, "paragraph-separate");
- paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
- if (!STRINGP (paragraph_separate_re))
- paragraph_separate_re = build_string ("[ \t\f]*$");
+ paragraph_separate_re = build_string ("^[ \t\f]*$");
staticpro (¶graph_separate_re);
bidi_cache_sp = 0;
{
eassert (bidi_it->prev.charpos == bidi_it->charpos - 1);
prev_type = bidi_it->prev.orig_type;
- if (prev_type == FSI)
- prev_type = bidi_it->type_after_wn;
}
}
/* Don't move at end of buffer/string. */
emacs_abort ();
bidi_it->bytepos += bidi_it->ch_len;
prev_type = bidi_it->orig_type;
- if (prev_type == FSI)
- prev_type = bidi_it->type_after_wn;
}
else /* EOB or end of string */
prev_type = NEUTRAL_B;
if (typ1 != STRONG_R && typ1 != STRONG_AL)
{
type = LRI;
+ /* Override orig_type, which will be needed when we come to
+ examine the next character, which is the first character
+ inside the isolate. */
+ bidi_it->orig_type = type;
goto fsi_as_lri;
}
else
- type = RLI;
+ {
+ type = RLI;
+ bidi_it->orig_type = type;
+ }
/* FALLTHROUGH */
case RLI: /* X5a */
if (override == NEUTRAL_DIR)
if (bidi_it->next_en_type == WEAK_EN) /* ET/BN with EN after it */
type = WEAK_EN;
}
- else if (bidi_it->next_en_pos >=0)
+ else if (type == WEAK_BN
+ /* This condition is for the following important case:
+
+ . we are at level zero
+ . either previous strong character was L,
+ or we've seen no strong characters since sos
+ and the base paragraph direction is L2R
+ . this BN is NOT a bidi directional control
+
+ For such a situation, either this BN will be
+ converted to EN per W5, and then to L by virtue
+ of W7; or it will become ON per W6, and then L
+ because of N1/N2. So we take a shortcut here
+ and make it L right away, to avoid the
+ potentially costly loop below. This is
+ important when the buffer has a long series of
+ control characters, like binary nulls, and no
+ R2L characters at all. */
+ && new_level == 0
+ && !bidi_explicit_dir_char (bidi_it->ch)
+ && ((bidi_it->last_strong.type == STRONG_L)
+ || (bidi_it->last_strong.type == UNKNOWN_BT
+ && bidi_it->sos == L2R)))
+ type = STRONG_L;
+ else if (bidi_it->next_en_pos >= 0)
{
/* We overstepped the last known position for ET
resolution but there could be other such characters
And finally, cross-reference these two:
- fgrep -w -f brackets.txt decompositions.txt
+ grep -Fw -f brackets.txt decompositions.txt
where "decompositions.txt" was produced by the 1st script, and
- "brackets.txt" by the 2nd script. In the output of fgrep, look
+ "brackets.txt" by the 2nd script. In the output of grep, look
only for decompositions that don't begin with some compatibility
formatting tag, such as "<compat>". Only decompositions that
consist solely of character codepoints are relevant to bidi
entering the expensive loop in the "else" clause. */
else if (current_level == 0
&& bidi_it->prev_for_neutral.type == STRONG_L
- && type != WEAK_BN
- && !bidi_explicit_dir_char (bidi_it->ch)
- && !bidi_isolate_fmt_char (type))
+ && (ASCII_CHAR_P (bidi_it->ch)
+ || (type != WEAK_BN
+ && !bidi_explicit_dir_char (bidi_it->ch)
+ && !bidi_isolate_fmt_char (type))))
type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
STRONG_L, current_level);
else if (/* current level is 1 */
{
/* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in
the assumption of batch-style processing; see clauses W4,
- W5, and especially N1, which require to look far forward
+ W5, and especially N1, which require looking far forward
(as well as back) in the buffer/string. May the fleas of
a thousand camels infest the armpits of those who design
supposedly general-purpose algorithms by looking at their
}
}
- /* Perhaps the character we want is already cached s fully resolved.
+ /* Perhaps the character we want is already cached as fully resolved.
If it is, the call to bidi_cache_find below will return a type
other than UNKNOWN_BT. */
if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
if ((bidi_it->orig_type == NEUTRAL_WS
|| bidi_it->orig_type == WEAK_BN
|| bidi_isolate_fmt_char (bidi_it->orig_type))
- && bidi_it->next_for_ws.charpos < bidi_it->charpos)
+ && bidi_it->next_for_ws.charpos < bidi_it->charpos
+ /* If this character is already at base level, we don't need to
+ reset it, so avoid the potentially costly loop below. */
+ && level != bidi_it->level_stack[0].level)
{
int ch;
ptrdiff_t clen = bidi_it->ch_len;