as per UAX#9, a part of the Unicode Standard.
Unlike the reference and most other implementations, this one is
- designed to be called once for every character in the buffer.
+ designed to be called once for every character in the buffer or
+ string.
- The main entry point is bidi_get_next_char_visually. Each time it
+ The main entry point is bidi_move_to_visually_next. Each time it
is called, it finds the next character in the visual order, and
returns its information in a special structure. The caller is then
expected to process this character for display or any other
- purposes, and call bidi_get_next_char_visually for the next
- character. See the comments in bidi_get_next_char_visually for
- more details about its algorithm that finds the next visual-order
+ purposes, and call bidi_move_to_visually_next for the next
+ character. See the comments in bidi_move_to_visually_next for more
+ details about its algorithm that finds the next visual-order
character by resolving their levels on the fly.
+ The two other entry points are bidi_paragraph_init and
+ bidi_mirror_char. The first determines the base direction of a
+ paragraph, while the second returns the mirrored version of its
+ argument character.
+
If you want to understand the code, you will have to read it
together with the relevant portions of UAX#9. The comments include
references to UAX#9 rules, for that very reason.
int bidi_ignore_explicit_marks_for_paragraph_level = 1;
-static Lisp_Object fallback_paragraph_start_re, fallback_paragraph_separate_re;
+static Lisp_Object paragraph_start_re, paragraph_separate_re;
static Lisp_Object Qparagraph_start, Qparagraph_separate;
static void
bidi_type[i].to ? bidi_type[i].to : bidi_type[i].from,
make_number (bidi_type[i].type));
- fallback_paragraph_start_re =
- XSYMBOL (Fintern_soft (build_string ("paragraph-start"), Qnil))->value;
- if (!STRINGP (fallback_paragraph_start_re))
- fallback_paragraph_start_re = build_string ("\f\\|[ \t]*$");
- staticpro (&fallback_paragraph_start_re);
Qparagraph_start = intern ("paragraph-start");
staticpro (&Qparagraph_start);
- fallback_paragraph_separate_re =
- XSYMBOL (Fintern_soft (build_string ("paragraph-separate"), Qnil))->value;
- if (!STRINGP (fallback_paragraph_separate_re))
- fallback_paragraph_separate_re = build_string ("[ \t\f]*$");
- staticpro (&fallback_paragraph_separate_re);
+ paragraph_start_re = Fsymbol_value (Qparagraph_start);
+ if (!STRINGP (paragraph_start_re))
+ paragraph_start_re = build_string ("\f\\|[ \t]*$");
+ staticpro (¶graph_start_re);
Qparagraph_separate = intern ("paragraph-separate");
staticpro (&Qparagraph_separate);
+ paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
+ if (!STRINGP (paragraph_separate_re))
+ paragraph_separate_re = build_string ("[ \t\f]*$");
+ staticpro (¶graph_separate_re);
bidi_initialized = 1;
}
/* Return the bidi type of a character CH, subject to the current
directional OVERRIDE. */
-bidi_type_t
+static INLINE bidi_type_t
bidi_get_type (int ch, bidi_dir_t override)
{
bidi_type_t default_type;
}
/* Given a bidi TYPE of a character, return its category. */
-bidi_category_t
+static INLINE bidi_category_t
bidi_get_category (bidi_type_t type)
{
switch (type)
/* Copy the bidi iterator from FROM to TO. To save cycles, this only
copies the part of the level stack that is actually in use. */
-static inline void
+static INLINE void
bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
{
int i;
/* Caching the bidi iterator states. */
-static struct bidi_it bidi_cache[1000]; /* FIXME: make this dynamically allocated! */
-static int bidi_cache_idx;
-static int bidi_cache_last_idx;
+#define BIDI_CACHE_CHUNK 200
+static struct bidi_it *bidi_cache;
+static size_t bidi_cache_size = 0;
+static size_t elsz = sizeof (struct bidi_it);
+static int bidi_cache_idx; /* next unused cache slot */
+static int bidi_cache_last_idx; /* slot of last cache hit */
-static inline void
+static INLINE void
bidi_cache_reset (void)
{
bidi_cache_idx = 0;
bidi_cache_last_idx = -1;
}
-static inline void
+static INLINE void
+bidi_cache_shrink (void)
+{
+ if (bidi_cache_size > BIDI_CACHE_CHUNK)
+ {
+ bidi_cache_size = BIDI_CACHE_CHUNK;
+ bidi_cache =
+ (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
+ }
+ bidi_cache_reset ();
+}
+
+static INLINE void
bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it)
{
int current_scan_dir = bidi_it->scan_dir;
level less or equal to LEVEL. if LEVEL is -1, disregard the
resolved levels in cached states. DIR, if non-zero, means search
in that direction from the last cache hit. */
-static inline int
+static INLINE int
bidi_cache_search (int charpos, int level, int dir)
{
int i, i_start;
return -1;
}
-static inline void
+static INLINE void
bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
{
int idx;
if (idx < 0)
{
idx = bidi_cache_idx;
- /* Don't overrun the cache limit. */
- if (idx > sizeof (bidi_cache) / sizeof (bidi_cache[0]) - 1)
- abort ();
+ /* Enlarge the cache as needed. */
+ if (idx >= bidi_cache_size)
+ {
+ bidi_cache_size += BIDI_CACHE_CHUNK;
+ bidi_cache =
+ (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
+ }
/* Character positions should correspond to cache positions 1:1.
If we are outside the range of cached positions, the cache is
useless and must be reset. */
bidi_copy_it (&bidi_cache[idx], bidi_it);
if (!resolved)
bidi_cache[idx].resolved_level = -1;
- bidi_cache[idx].new_paragraph = 0;
}
else
{
bidi_cache_idx = idx + 1;
}
-static inline bidi_type_t
+static INLINE bidi_type_t
bidi_cache_find (int charpos, int level, struct bidi_it *bidi_it)
{
int i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
return UNKNOWN_BT;
}
-static inline int
+static INLINE int
bidi_peek_at_next_level (struct bidi_it *bidi_it)
{
if (bidi_cache_idx == 0 || bidi_cache_last_idx == -1)
following the buffer position, -1 if position is at the beginning
of a new paragraph, or -2 if position is neither at beginning nor
at end of a paragraph. */
-EMACS_INT
+static EMACS_INT
bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
{
- Lisp_Object sep_re = Fbuffer_local_value (Qparagraph_separate,
- Fcurrent_buffer ());
- Lisp_Object start_re = Fbuffer_local_value (Qparagraph_start,
- Fcurrent_buffer ());
+ /* FIXME: Why Fbuffer_local_value rather than just Fsymbol_value? */
+ Lisp_Object sep_re;
+ Lisp_Object start_re;
EMACS_INT val;
- if (!STRINGP (sep_re))
- sep_re = fallback_paragraph_separate_re;
- if (!STRINGP (start_re))
- start_re = fallback_paragraph_start_re;
+ sep_re = paragraph_separate_re;
+ start_re = paragraph_start_re;
val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
if (val < 0)
embedding levels on either side of the run boundary. Also, update
the saved info about previously seen characters, since that info is
generally valid for a single level run. */
-static inline void
+static INLINE void
bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
{
int higher_level = level_before > level_after ? level_before : level_after;
static EMACS_INT
bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
{
- Lisp_Object re = Fbuffer_local_value (Qparagraph_start, Fcurrent_buffer ());
+ Lisp_Object re = paragraph_start_re;
EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
- if (!STRINGP (re))
- re = fallback_paragraph_start_re;
while (pos_byte > BEGV_BYTE
&& fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
{
int ch, ch_len;
EMACS_INT pos;
bidi_type_t type;
- EMACS_INT sep_len;
+
+ if (!bidi_initialized)
+ bidi_initialize ();
/* If we are inside a paragraph separator, we are just waiting
for the separator to be exhausted; use the previous paragraph
middle of it. Find where this paragraph starts. */
bytepos = bidi_find_paragraph_start (pos, bytepos);
- /* We should always be at the beginning of a new line at this
- point. */
- if (!(bytepos == BEGV_BYTE || FETCH_CHAR (bytepos - 1) == '\n'))
- abort ();
-
bidi_it->separator_limit = -1;
bidi_it->new_paragraph = 0;
ch = FETCH_CHAR (bytepos);
/* Contrary to UAX#9 clause P3, we only default the paragraph
direction to L2R if we have no previous usable paragraph
direction. */
- if (bidi_it->paragraph_dir == NEUTRAL_DIR)
+ if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
bidi_it->paragraph_dir = L2R; /* P3 and ``higher protocols'' */
if (bidi_it->paragraph_dir == R2L)
bidi_it->level_stack[0].level = 1;
/* Do whatever UAX#9 clause X8 says should be done at paragraph's
end. */
-static inline void
+static INLINE void
bidi_set_paragraph_end (struct bidi_it *bidi_it)
{
bidi_it->invalid_levels = 0;
bidi_it->new_paragraph = 1;
bidi_it->separator_limit = -1;
bidi_it->type = NEUTRAL_B;
- bidi_it->type_after_w1 = UNKNOWN_BT;
- bidi_it->orig_type = UNKNOWN_BT;
+ bidi_it->type_after_w1 = NEUTRAL_B;
+ bidi_it->orig_type = NEUTRAL_B;
bidi_it->prev_was_pdf = 0;
- bidi_it->prev.type = bidi_it->prev.type_after_w1 = UNKNOWN_BT;
+ bidi_it->prev.type = bidi_it->prev.type_after_w1 =
+ bidi_it->prev.orig_type = UNKNOWN_BT;
bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
bidi_it->last_strong.orig_type = UNKNOWN_BT;
bidi_it->next_for_neutral.charpos = -1;
bidi_it->prev_for_neutral.type_after_w1 =
bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */
+ bidi_cache_shrink ();
}
/* Push the current embedding level and override status; reset the
current level to LEVEL and the current override status to OVERRIDE. */
-static inline void
+static INLINE void
bidi_push_embedding_level (struct bidi_it *bidi_it,
int level, bidi_dir_t override)
{
/* Pop the embedding level and directional override status from the
stack, and return the new level. */
-static inline int
+static INLINE int
bidi_pop_embedding_level (struct bidi_it *bidi_it)
{
/* UAX#9 says to ignore invalid PDFs. */
}
/* Record in SAVED_INFO the information about the current character. */
-static inline void
+static INLINE void
bidi_remember_char (struct bidi_saved_info *saved_info,
struct bidi_it *bidi_it)
{
/* Resolve the type of a neutral character according to the type of
surrounding strong text and the current embedding level. */
-static inline bidi_type_t
+static INLINE bidi_type_t
bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
{
/* N1: European and Arabic numbers are treated as though they were R. */
return STRONG_R;
}
-static inline int
+static INLINE int
bidi_explicit_dir_char (int c)
{
/* FIXME: this should be replaced with a lookup table with suitable
if (prev_level < new_level
&& bidi_it->type == WEAK_BN
&& bidi_it->ignore_bn_limit == 0 /* only if not already known */
- && bidi_it->ch != BIDI_EOB /* not already at EOB */
+ && bidi_it->bytepos < ZV_BYTE /* not already at EOB */
&& bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
+ bidi_it->ch_len)))
{
/* Advance in the buffer, resolve weak types and return the type of
the next character after weak type resolution. */
-bidi_type_t
+static bidi_type_t
bidi_resolve_weak (struct bidi_it *bidi_it)
{
bidi_type_t type;
if (type == WEAK_NSM) /* W1 */
{
/* Note that we don't need to consider the case where the
- prev character has its type overridden by an RLO or LRO:
- such characters are outside the current level run, and
- thus not relevant to this NSM. Thus, NSM gets the
- orig_type of the previous character. */
- if (bidi_it->prev.type != UNKNOWN_BT)
- type = bidi_it->prev.orig_type;
+ prev character has its type overridden by an RLO or LRO,
+ because then either the type of this NSM would have been
+ also overridden, or the previous character is outside the
+ current level run, and thus not relevant to this NSM.
+ This is why NSM gets the type_after_w1 of the previous
+ character. */
+ if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
+ /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
+ && bidi_it->prev.type_after_w1 != NEUTRAL_B)
+ type = bidi_it->prev.type_after_w1;
else if (bidi_it->sor == R2L)
type = STRONG_R;
else if (bidi_it->sor == L2R)
return type;
}
-bidi_type_t
+static bidi_type_t
bidi_resolve_neutral (struct bidi_it *bidi_it)
{
int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
/* Given an iterator state in BIDI_IT, advance one character position
in the buffer to the next character (in the logical order), resolve
the bidi type of that next character, and return that type. */
-bidi_type_t
+static bidi_type_t
bidi_type_of_next_char (struct bidi_it *bidi_it)
{
bidi_type_t type;
the buffer to the next character (in the logical order), resolve
the embedding and implicit levels of that next character, and
return the resulting level. */
-int
+static int
bidi_level_of_next_char (struct bidi_it *bidi_it)
{
bidi_type_t type;
if (bidi_it->scan_dir == 1)
{
/* There's no sense in trying to advance if we hit end of text. */
- if (bidi_it->ch == BIDI_EOB)
+ if (bidi_it->bytepos >= ZV_BYTE)
return bidi_it->resolved_level;
/* Record the info about the previous character. */
}
void
-bidi_get_next_char_visually (struct bidi_it *bidi_it)
+bidi_move_to_visually_next (struct bidi_it *bidi_it)
{
int old_level, new_level, next_level;
struct bidi_it sentinel;
if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B)
bidi_line_init (bidi_it);
- /* Prepare the sentinel iterator state. */
+ /* Prepare the sentinel iterator state, and cache it. When we bump
+ into it, scanning backwards, we'll know that the last non-base
+ level is exhausted. */
if (bidi_cache_idx == 0)
{
bidi_copy_it (&sentinel, bidi_it);
sentinel.ch = '\n'; /* doesn't matter, but why not? */
sentinel.ch_len = 1;
}
+ bidi_cache_iterator_state (&sentinel, 1);
}
old_level = bidi_it->resolved_level;
int incr = ascending ? 1 : -1;
int expected_next_level = old_level + incr;
- /* If we don't have anything cached yet, we need to cache the
- sentinel state, since we'll need it to record where to jump
- when the last non-base level is exhausted. */
- if (bidi_cache_idx == 0)
- bidi_cache_iterator_state (&sentinel, 1);
/* Jump (or walk) to the other edge of this level. */
bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
/* Switch scan direction and peek at the next character in the