+#define FLAG_EMBEDDING_INSIDE 1
+#define FLAG_OPPOSITE_INSIDE 2
+
+/* A data type used in the stack maintained by
+ bidi_find_bracket_pairs below. */
+typedef struct bpa_stack_entry {
+ int close_bracket_char;
+ int open_bracket_idx;
+#ifdef ENABLE_CHECKING
+ ptrdiff_t open_bracket_pos;
+#endif
+ unsigned flags : 2;
+} bpa_stack_entry;
+
+/* With MAX_ALLOCA of 16KB, this should allow at least 1K slots in the
+ BPA stack, which should be more than enough for actual bidi text. */
+#define MAX_BPA_STACK ((int)max (MAX_ALLOCA / sizeof (bpa_stack_entry), 1))
+
+/* UAX#9 says to match opening brackets with the matching closing
+ brackets or their canonical equivalents. As of Unicode 7.0, there
+ are only 2 bracket characters that have canonical equivalence
+ decompositions: u+2329 and u+232A. So instead of accessing the
+ table in uni-decomposition.el, we just handle these 2 characters
+ with this simple macro. Note that ASCII characters don't have
+ canonical equivalents by definition. */
+
+/* To find all the characters that need to be processed by
+ CANONICAL_EQU, first find all the characters which have
+ decompositions in UnicodeData.txt, with this Awk script:
+
+ awk -F ";" " {if ($6 != \"\") print $1, $6}" UnicodeData.txt
+
+ Then produce a list of all the bracket characters in BidiBrackets.txt:
+
+ awk -F "[ ;]" " {if ($1 != \"#\" && $1 != \"\") print $1}" BidiBrackets.txt
+
+ And finally, cross-reference these two:
+
+ fgrep -w -f brackets.txt decompositions.txt
+
+ where "decompositions.txt" was produced by the 1st script, and
+ "brackets.txt" by the 2nd script. In the output of fgrep, look
+ only for decompositions that don't begin with some compatibility
+ formatting tag, such as "<compat>". Only decompositions that
+ consist solely of character codepoints are relevant to bidi
+ brackets processing. */
+
+#define CANONICAL_EQU(c) \
+ ( ASCII_CHAR_P (c) ? c \
+ : (c) == 0x2329 ? 0x3008 \
+ : (c) == 0x232a ? 0x3009 \
+ : c )
+
+#ifdef ENABLE_CHECKING
+# define STORE_BRACKET_CHARPOS \
+ bpa_stack[bpa_sp].open_bracket_pos = bidi_it->charpos
+#else
+# define STORE_BRACKET_CHARPOS /* nothing */
+#endif
+
+#define PUSH_BPA_STACK \
+ do { \
+ int ch; \
+ if (bpa_sp < MAX_BPA_STACK - 1) \
+ { \
+ bpa_sp++; \
+ ch = CANONICAL_EQU (bidi_it->ch); \
+ bpa_stack[bpa_sp].close_bracket_char = bidi_mirror_char (ch); \
+ bpa_stack[bpa_sp].open_bracket_idx = bidi_cache_last_idx; \
+ bpa_stack[bpa_sp].flags = 0; \
+ STORE_BRACKET_CHARPOS; \
+ } \
+ } while (0)
+
+
+/* This function implements BPA, the Bidi Parenthesis Algorithm,
+ described in BD16 and N0 of UAX#9. It finds all the bracket pairs
+ in the current isolating sequence, and records the enclosed type
+ and the position of the matching bracket in the cache. It returns
+ non-zero if called with the iterator on the opening bracket which
+ has a matching closing bracket in the current isolating sequence,
+ zero otherwise. */
+static bool
+bidi_find_bracket_pairs (struct bidi_it *bidi_it)
+{
+ bidi_bracket_type_t btype;
+ bidi_type_t type = bidi_it->type;
+ bool retval = false;
+
+ /* When scanning backwards, we don't expect any unresolved bidi
+ bracket characters. */
+ if (bidi_it->scan_dir != 1)
+ emacs_abort ();
+
+ btype = bidi_paired_bracket_type (bidi_it->ch);
+ if (btype == BIDI_BRACKET_OPEN)
+ {
+ bpa_stack_entry bpa_stack[MAX_BPA_STACK];
+ int bpa_sp = -1;
+ struct bidi_it saved_it;
+ int base_level = bidi_it->level_stack[0].level;
+ int embedding_level = bidi_it->level_stack[bidi_it->stack_idx].level;
+ int maxlevel = embedding_level;
+ bidi_type_t embedding_type = (embedding_level & 1) ? STRONG_R : STRONG_L;
+ struct bidi_it tem_it;
+ bool l2r_seen = false, r2l_seen = false;
+ ptrdiff_t pairing_pos;
+ int idx_at_entry = bidi_cache_idx;
+
+ eassert (MAX_BPA_STACK >= 100);
+ bidi_copy_it (&saved_it, bidi_it);
+ /* bidi_cache_iterator_state refuses to cache on backward scans,
+ and bidi_cache_fetch_state doesn't bring scan_dir from the
+ cache, so we must initialize this explicitly. */
+ tem_it.scan_dir = 1;
+
+ while (1)
+ {
+ int old_sidx, new_sidx;
+ int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
+
+ if (maxlevel < current_level)
+ maxlevel = current_level;
+ /* Mark every opening bracket character we've traversed by
+ putting its own position into bracket_pairing_pos. This
+ is examined in bidi_resolve_brackets to distinguish
+ brackets that were already resolved to stay NEUTRAL_ON,
+ and those that were not yet processed by this function
+ (because they were skipped when we skip higher embedding
+ levels below). */
+ if (btype == BIDI_BRACKET_OPEN && bidi_it->bracket_pairing_pos == -1)
+ bidi_it->bracket_pairing_pos = bidi_it->charpos;
+ if (!bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0))
+ {
+ /* No more space in cache -- give up and let the opening
+ bracket that started this be processed as a
+ NEUTRAL_ON. */
+ bidi_cache_reset_to (idx_at_entry - bidi_cache_start);
+ bidi_copy_it (bidi_it, &saved_it);
+ goto give_up;
+ }
+ if (btype == BIDI_BRACKET_OPEN)
+ PUSH_BPA_STACK;
+ else if (btype == BIDI_BRACKET_CLOSE)
+ {
+ int sp = bpa_sp;
+ int curchar = CANONICAL_EQU (bidi_it->ch);
+
+ eassert (sp >= 0);
+ while (sp >= 0 && bpa_stack[sp].close_bracket_char != curchar)
+ sp--;
+ if (sp >= 0)
+ {
+ /* Update and cache the corresponding opening bracket. */
+ bidi_cache_fetch_state (bpa_stack[sp].open_bracket_idx,
+ &tem_it);
+#ifdef ENABLE_CHECKING
+ eassert (bpa_stack[sp].open_bracket_pos == tem_it.charpos);
+#endif
+ /* Determine the enclosed type for this bracket
+ pair's type resolution according to N0. */
+ if (bpa_stack[sp].flags & FLAG_EMBEDDING_INSIDE)
+ tem_it.bracket_enclosed_type = embedding_type; /* N0b */
+ else if (bpa_stack[sp].flags & FLAG_OPPOSITE_INSIDE)
+ tem_it.bracket_enclosed_type /* N0c */
+ = (embedding_type == STRONG_L ? STRONG_R : STRONG_L);
+ else /* N0d */
+ tem_it.bracket_enclosed_type = UNKNOWN_BT;
+
+ /* Record the position of the matching closing
+ bracket, and update the cache. */
+ tem_it.bracket_pairing_pos = bidi_it->charpos;
+ bidi_cache_iterator_state (&tem_it, 0, 1);
+
+ /* Pop the BPA stack. */
+ bpa_sp = sp - 1;
+ }
+ if (bpa_sp < 0)
+ {
+ retval = true;
+ break;
+ }
+ }
+ else if (bidi_get_category (bidi_it->type_after_wn) != NEUTRAL)
+ {
+ unsigned flag = 0;
+ int sp;
+
+ /* Whenever we see a strong type, update the flags of
+ all the slots on the stack. */
+ switch (bidi_it->type)
+ {
+ case STRONG_L:
+ flag = ((embedding_level & 1) == 0
+ ? FLAG_EMBEDDING_INSIDE
+ : FLAG_OPPOSITE_INSIDE);
+ l2r_seen = true;
+ break;
+ case STRONG_R:
+ case WEAK_EN:
+ case WEAK_AN:
+ flag = ((embedding_level & 1) == 1
+ ? FLAG_EMBEDDING_INSIDE
+ : FLAG_OPPOSITE_INSIDE);
+ r2l_seen = true;
+ break;
+ default:
+ break;
+ }
+ if (flag)
+ {
+ for (sp = bpa_sp; sp >= 0; sp--)
+ bpa_stack[sp].flags |= flag;
+ }
+ }
+ old_sidx = bidi_it->stack_idx;
+ type = bidi_resolve_weak (bidi_it);
+ /* Skip level runs excluded from this isolating run sequence. */
+ new_sidx = bidi_it->stack_idx;
+ if (bidi_it->level_stack[new_sidx].level > current_level
+ && (ISOLATE_STATUS (bidi_it, new_sidx)
+ || (new_sidx > old_sidx + 1
+ && ISOLATE_STATUS (bidi_it, new_sidx - 1))))
+ {
+ while (bidi_it->level_stack[bidi_it->stack_idx].level
+ > current_level)
+ {
+ if (maxlevel < bidi_it->level_stack[bidi_it->stack_idx].level)
+ maxlevel = bidi_it->level_stack[bidi_it->stack_idx].level;
+ if (!bidi_cache_iterator_state (bidi_it,
+ type == NEUTRAL_B, 0))
+ {
+ /* No more space in cache -- give up and let the
+ opening bracket that started this be
+ processed as any other NEUTRAL_ON. */
+ bidi_cache_reset_to (idx_at_entry - bidi_cache_start);
+ bidi_copy_it (bidi_it, &saved_it);
+ goto give_up;
+ }
+ type = bidi_resolve_weak (bidi_it);
+ }
+ }
+ if (type == NEUTRAL_B
+ || (bidi_it->level_stack[bidi_it->stack_idx].level
+ != current_level))
+ {
+ /* We've marched all the way to the end of this
+ isolating run sequence, and didn't find matching
+ closing brackets for some opening brackets. Leave
+ their type unchanged. */
+ pairing_pos = bidi_it->charpos;
+ break;
+ }
+ if (bidi_it->type_after_wn == NEUTRAL_ON) /* Unicode 8.0 correction */
+ btype = bidi_paired_bracket_type (bidi_it->ch);
+ else
+ btype = BIDI_BRACKET_NONE;
+ }
+
+ /* Restore bidi_it from the cache, which should have the bracket
+ resolution members set as determined by the above loop. */
+ type = bidi_cache_find (saved_it.charpos, 0, bidi_it);
+ eassert (type == NEUTRAL_ON);
+
+ /* The following is an optimization for bracketed text that has
+ only one level which is equal to the paragraph's base
+ embedding level. That is, only L2R and weak/neutral
+ characters in a L2R paragraph, or only R2L and weak/neutral
+ characters in a R2L paragraph. Such brackets can be resolved
+ by bidi_resolve_neutral, which has a further shortcut for
+ this case. So we pretend we did not resolve the brackets in
+ this case, set up next_for_neutral for the entire bracketed
+ text, and reset the cache to the character before the opening
+ bracket. The upshot is to allow bidi_move_to_visually_next
+ reset the cache when it returns this opening bracket, thus
+ cutting significantly on the size of the cache, which is
+ important with long lines, especially if word-wrap is non-nil
+ (which requires the display engine to copy the cache back and
+ forth many times). */
+ if (maxlevel == base_level
+ && ((base_level == 0 && !r2l_seen)
+ || (base_level == 1 && !l2r_seen)))
+ {
+ ptrdiff_t eob
+ = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
+ ? bidi_it->string.schars : ZV);
+
+ if (retval)
+ pairing_pos = bidi_it->bracket_pairing_pos;
+
+ /* This special value (which cannot possibly happen when
+ brackets are resolved, since there's no character at ZV)
+ will be noticed by bidi_resolve_explicit, and will be
+ copied to the following iterator states, instead of being
+ reset to -1. */
+ bidi_it->bracket_pairing_pos = eob;
+ /* This type value will be used for resolving the outermost
+ closing bracket in bidi_resolve_brackets. */
+ bidi_it->bracket_enclosed_type = embedding_type;
+ /* bidi_cache_last_idx is set to the index of the current
+ state, because we just called bidi_cache_find above.
+ That state describes the outermost opening bracket, the
+ one with which we entered this function. Force the cache
+ to "forget" all the cached states starting from that state. */
+ bidi_cache_reset_to (bidi_cache_last_idx - bidi_cache_start);
+ /* Set up the next_for_neutral member, to help
+ bidi_resolve_neutral. */
+ bidi_it->next_for_neutral.type = embedding_type;
+ bidi_it->next_for_neutral.charpos = pairing_pos;
+ /* Pretend we didn't resolve this bracket. */
+ retval = false;
+ }
+ }
+
+ give_up:
+ return retval;
+}
+
+static void
+bidi_record_type_for_neutral (struct bidi_saved_info *info, int level,
+ bool nextp)
+{
+ int idx;
+
+ for (idx = bidi_cache_last_idx + 1; idx < bidi_cache_idx; idx++)
+ {
+ int lev = bidi_cache[idx].level_stack[bidi_cache[idx].stack_idx].level;
+
+ if (lev <= level)
+ {
+ eassert (lev == level);
+ if (nextp)
+ bidi_cache[idx].next_for_neutral = *info;
+ else
+ bidi_cache[idx].prev_for_neutral = *info;
+ break;
+ }
+ }
+}
+