/* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
- Copyright (C) 2000-2001, 2004-2005, 2009-2012
- Free Software Foundation, Inc.
+ Copyright (C) 2000-2001, 2004-2005, 2009-2013 Free Software
+ Foundation, Inc.
This file is part of GNU Emacs.
#include <config.h>
#include <stdio.h>
-#include <setjmp.h>
#include "lisp.h"
#include "character.h"
#include "buffer.h"
#include "dispextern.h"
+#include "region-cache.h"
static bool bidi_initialized = 0;
/* Return the bidi type of a character CH, subject to the current
directional OVERRIDE. */
-static inline bidi_type_t
+static bidi_type_t
bidi_get_type (int ch, bidi_dir_t override)
{
bidi_type_t default_type;
if (ch == BIDI_EOB)
return NEUTRAL_B;
if (ch < 0 || ch > MAX_CHAR)
- abort ();
+ emacs_abort ();
default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
/* Every valid character code, even those that are unassigned by the
DerivedBidiClass.txt file. Therefore, if we ever get UNKNOWN_BT
(= zero) code from CHAR_TABLE_REF, that's a bug. */
if (default_type == UNKNOWN_BT)
- abort ();
+ emacs_abort ();
if (override == NEUTRAL_DIR)
return default_type;
else if (override == R2L)
return STRONG_R;
else
- abort (); /* can't happen: handled above */
+ emacs_abort (); /* can't happen: handled above */
}
}
}
-static inline void
+static void
bidi_check_type (bidi_type_t type)
{
eassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON);
}
/* Given a bidi TYPE of a character, return its category. */
-static inline bidi_category_t
+static bidi_category_t
bidi_get_category (bidi_type_t type)
{
switch (type)
case NEUTRAL_ON:
return NEUTRAL;
default:
- abort ();
+ emacs_abort ();
}
}
if (c == BIDI_EOB)
return c;
if (c < 0 || c > MAX_CHAR)
- abort ();
+ emacs_abort ();
val = CHAR_TABLE_REF (bidi_mirror_table, c);
if (INTEGERP (val))
/* Minimal test we must do in optimized builds, to prevent weird
crashes further down the road. */
if (v < 0 || v > MAX_CHAR)
- abort ();
+ emacs_abort ();
return v;
}
embedding levels on either side of the run boundary. Also, update
the saved info about previously seen characters, since that info is
generally valid for a single level run. */
-static inline void
+static void
bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
{
int higher_level = (level_before > level_after ? level_before : level_after);
/* Push the current embedding level and override status; reset the
current level to LEVEL and the current override status to OVERRIDE. */
-static inline void
+static void
bidi_push_embedding_level (struct bidi_it *bidi_it,
int level, bidi_dir_t override)
{
/* Pop the embedding level and directional override status from the
stack, and return the new level. */
-static inline int
+static int
bidi_pop_embedding_level (struct bidi_it *bidi_it)
{
/* UAX#9 says to ignore invalid PDFs. */
}
/* Record in SAVED_INFO the information about the current character. */
-static inline void
+static void
bidi_remember_char (struct bidi_saved_info *saved_info,
struct bidi_it *bidi_it)
{
/* Copy the bidi iterator from FROM to TO. To save cycles, this only
copies the part of the level stack that is actually in use. */
-static inline void
+static void
bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
{
- int i;
-
- /* Copy everything except the level stack and beyond. */
- memcpy (to, from, offsetof (struct bidi_it, level_stack[0]));
-
- /* Copy the active part of the level stack. */
- to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
- for (i = 1; i <= from->stack_idx; i++)
- to->level_stack[i] = from->level_stack[i];
+ /* Copy everything from the start through the active part of
+ the level stack. */
+ memcpy (to, from,
+ (offsetof (struct bidi_it, level_stack[1])
+ + from->stack_idx * sizeof from->level_stack[0]));
}
\f
intact. This is called when the cached information is no more
useful for the current iteration, e.g. when we were reseated to a
new position on the same object. */
-static inline void
+static void
bidi_cache_reset (void)
{
bidi_cache_idx = bidi_cache_start;
iterator for reordering a buffer or a string that does not come
from display properties, because that means all the previously
cached info is of no further use. */
-static inline void
+static void
bidi_cache_shrink (void)
{
if (bidi_cache_size > BIDI_CACHE_CHUNK)
bidi_cache_reset ();
}
-static inline void
+static void
bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
{
int current_scan_dir = bidi_it->scan_dir;
if (idx < bidi_cache_start || idx >= bidi_cache_idx)
- abort ();
+ emacs_abort ();
bidi_copy_it (bidi_it, &bidi_cache[idx]);
bidi_it->scan_dir = current_scan_dir;
level less or equal to LEVEL. if LEVEL is -1, disregard the
resolved levels in cached states. DIR, if non-zero, means search
in that direction from the last cache hit. */
-static inline ptrdiff_t
+static ptrdiff_t
bidi_cache_search (ptrdiff_t charpos, int level, int dir)
{
ptrdiff_t i, i_start;
return -1;
}
-static inline void
+static void
bidi_cache_ensure_space (ptrdiff_t idx)
{
/* Enlarge the cache as needed. */
}
}
-static inline void
+static void
bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved)
{
ptrdiff_t idx;
/* We should never cache on backward scans. */
if (bidi_it->scan_dir == -1)
- abort ();
+ emacs_abort ();
idx = bidi_cache_search (bidi_it->charpos, -1, 1);
if (idx < 0)
idx = bidi_cache_start;
}
if (bidi_it->nchars <= 0)
- abort ();
+ emacs_abort ();
bidi_copy_it (&bidi_cache[idx], bidi_it);
if (!resolved)
bidi_cache[idx].resolved_level = -1;
bidi_cache_idx = idx + 1;
}
-static inline bidi_type_t
+static bidi_type_t
bidi_cache_find (ptrdiff_t charpos, int level, struct bidi_it *bidi_it)
{
ptrdiff_t i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
return UNKNOWN_BT;
}
-static inline int
+static int
bidi_peek_at_next_level (struct bidi_it *bidi_it)
{
if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1)
- abort ();
+ emacs_abort ();
return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
}
/* Save the current iterator state in its entirety after the last
used cache slot. */
bidi_cache_ensure_space (bidi_cache_idx);
- memcpy (&bidi_cache[bidi_cache_idx++], bidi_it, sizeof (struct bidi_it));
+ bidi_cache[bidi_cache_idx++] = *bidi_it;
/* Push the current cache start onto the stack. */
eassert (bidi_cache_sp < IT_STACK_SIZE);
bidi_pop_it (struct bidi_it *bidi_it)
{
if (bidi_cache_start <= 0)
- abort ();
+ emacs_abort ();
/* Reset the next free cache slot index to what it was before the
call to bidi_push_it. */
bidi_cache_idx = bidi_cache_start - 1;
/* Restore the bidi iterator state saved in the cache. */
- memcpy (bidi_it, &bidi_cache[bidi_cache_idx], sizeof (struct bidi_it));
+ *bidi_it = bidi_cache[bidi_cache_idx];
/* Pop the previous cache start from the stack. */
if (bidi_cache_sp <= 0)
- abort ();
+ emacs_abort ();
bidi_cache_start = bidi_cache_start_stack[--bidi_cache_sp];
/* Invalidate the last-used cache slot data. */
{
bidi_type_table = uniprop_table (intern ("bidi-class"));
if (NILP (bidi_type_table))
- abort ();
+ emacs_abort ();
staticpro (&bidi_type_table);
bidi_mirror_table = uniprop_table (intern ("mirroring"));
if (NILP (bidi_mirror_table))
- abort ();
+ emacs_abort ();
staticpro (&bidi_mirror_table);
Qparagraph_start = intern ("paragraph-start");
/* Do whatever UAX#9 clause X8 says should be done at paragraph's
end. */
-static inline void
+static void
bidi_set_paragraph_end (struct bidi_it *bidi_it)
{
bidi_it->invalid_levels = 0;
/* Count bytes in string S between BEG/BEGBYTE and END. BEG and END
are zero-based character positions in S, BEGBYTE is byte position
corresponding to BEG. UNIBYTE means S is a unibyte string. */
-static inline ptrdiff_t
-bidi_count_bytes (const unsigned char *s, const ptrdiff_t beg,
- const ptrdiff_t begbyte, const ptrdiff_t end, bool unibyte)
+static ptrdiff_t
+bidi_count_bytes (const unsigned char *s, ptrdiff_t beg,
+ ptrdiff_t begbyte, ptrdiff_t end, bool unibyte)
{
ptrdiff_t pos = beg;
const unsigned char *p = s + begbyte, *start = p;
else
{
if (!CHAR_HEAD_P (*p))
- abort ();
+ emacs_abort ();
while (pos < end)
{
return p - start;
}
-/* Fetch and returns the character at byte position BYTEPOS. If S is
+/* Fetch and return the character at byte position BYTEPOS. If S is
non-NULL, fetch the character from string S; otherwise fetch the
character from the current buffer. UNIBYTE means S is a
unibyte string. */
-static inline int
+static int
bidi_char_at_pos (ptrdiff_t bytepos, const unsigned char *s, bool unibyte)
{
if (s)
{
+ s += bytepos;
if (unibyte)
- return s[bytepos];
- else
- return STRING_CHAR (s + bytepos);
+ return *s;
}
else
- return FETCH_MULTIBYTE_CHAR (bytepos);
+ s = BYTE_POS_ADDR (bytepos);
+ return STRING_CHAR (s);
}
-/* Fetch and return the character at BYTEPOS/CHARPOS. If that
+/* Fetch and return the character at CHARPOS/BYTEPOS. If that
character is covered by a display string, treat the entire run of
covered characters as a single character, either u+2029 or u+FFFC,
and return their combined length in CH_LEN and NCHARS. DISP_POS
u+2029 to handle it as a paragraph separator. STRING->s is the C
string to iterate, or NULL if iterating over a buffer or a Lisp
string; in the latter case, STRING->lstring is the Lisp string. */
-static inline int
-bidi_fetch_char (ptrdiff_t bytepos, ptrdiff_t charpos, ptrdiff_t *disp_pos,
+static int
+bidi_fetch_char (ptrdiff_t charpos, ptrdiff_t bytepos, ptrdiff_t *disp_pos,
int *disp_prop, struct bidi_string_data *string,
+ struct window *w,
bool frame_window_p, ptrdiff_t *ch_len, ptrdiff_t *nchars)
{
int ch;
if (charpos < endpos && charpos > *disp_pos)
{
SET_TEXT_POS (pos, charpos, bytepos);
- *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
+ *disp_pos = compute_display_string_pos (&pos, string, w, frame_window_p,
disp_prop);
}
/* We don't expect to find ourselves in the middle of a display
property. Hopefully, it will never be needed. */
if (charpos > *disp_pos)
- abort ();
+ emacs_abort ();
/* Text covered by `display' properties and overlays with
display properties or display strings is handled as a single
character that represents the entire run of characters
}
*nchars = disp_end_pos - *disp_pos;
if (*nchars <= 0)
- abort ();
+ emacs_abort ();
if (string->s)
*ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
disp_end_pos, string->unibyte);
&& *disp_prop)
{
SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
- *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
+ *disp_pos = compute_display_string_pos (&pos, string, w, frame_window_p,
disp_prop);
}
return val;
}
+/* If the user has requested the long scans caching, make sure that
+ BIDI cache is enabled. Otherwise, make sure it's disabled. */
+
+static struct region_cache *
+bidi_paragraph_cache_on_off (void)
+{
+ if (NILP (BVAR (current_buffer, cache_long_scans)))
+ {
+ if (current_buffer->bidi_paragraph_cache)
+ {
+ free_region_cache (current_buffer->bidi_paragraph_cache);
+ current_buffer->bidi_paragraph_cache = 0;
+ }
+ return NULL;
+ }
+ else
+ {
+ if (!current_buffer->bidi_paragraph_cache)
+ current_buffer->bidi_paragraph_cache = new_region_cache ();
+ return current_buffer->bidi_paragraph_cache;
+ }
+}
+
/* On my 2005-vintage machine, searching back for paragraph start
takes ~1 ms per line. And bidi_paragraph_init is called 4 times
when user types C-p. The number below limits each call to
{
Lisp_Object re = paragraph_start_re;
ptrdiff_t limit = ZV, limit_byte = ZV_BYTE;
- ptrdiff_t n = 0;
+ struct region_cache *bpc = bidi_paragraph_cache_on_off ();
+ ptrdiff_t n = 0, oldpos = pos, next;
while (pos_byte > BEGV_BYTE
&& n++ < MAX_PARAGRAPH_SEARCH
display string? And what if a display string covering some
of the text over which we scan back includes
paragraph_start_re? */
- pos = find_next_newline_no_quit (pos - 1, -1);
- pos_byte = CHAR_TO_BYTE (pos);
+ DEC_BOTH (pos, pos_byte);
+ if (bpc && region_cache_backward (current_buffer, bpc, pos, &next))
+ {
+ pos = next, pos_byte = CHAR_TO_BYTE (pos);
+ break;
+ }
+ else
+ pos = find_newline_no_quit (pos, pos_byte, -1, &pos_byte);
}
if (n >= MAX_PARAGRAPH_SEARCH)
- pos_byte = BEGV_BYTE;
+ pos = BEGV, pos_byte = BEGV_BYTE;
+ if (bpc)
+ know_region_cache (current_buffer, bpc, pos, oldpos);
return pos_byte;
}
dir = L2R;
/* We should never be called at EOB or before BEGV. */
else if (bidi_it->charpos >= end || bytepos < begbyte)
- abort ();
+ emacs_abort ();
if (dir == L2R)
{
bytepos = pstartbyte;
if (!string_p)
pos = BYTE_TO_CHAR (bytepos);
- ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop,
- &bidi_it->string,
+ ch = bidi_fetch_char (pos, bytepos, &disp_pos, &disp_prop,
+ &bidi_it->string, bidi_it->w,
bidi_it->frame_window_p, &ch_len, &nchars);
type = bidi_get_type (ch, NEUTRAL_DIR);
&& bidi_at_paragraph_end (pos, bytepos) >= -1)
break;
/* Fetch next character and advance to get past it. */
- ch = bidi_fetch_char (bytepos, pos, &disp_pos,
- &disp_prop, &bidi_it->string,
+ ch = bidi_fetch_char (pos, bytepos, &disp_pos,
+ &disp_prop, &bidi_it->string, bidi_it->w,
bidi_it->frame_window_p, &ch_len, &nchars);
pos += nchars;
bytepos += ch_len;
/* FXIME: What if p is covered by a display
string? See also a FIXME inside
bidi_find_paragraph_start. */
- p--;
- pbyte = CHAR_TO_BYTE (p);
+ DEC_BOTH (p, pbyte);
prevpbyte = bidi_find_paragraph_start (p, pbyte);
}
pstartbyte = prevpbyte;
&& no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
}
else
- abort ();
+ emacs_abort ();
/* Contrary to UAX#9 clause P3, we only default the paragraph
direction to L2R if we have no previous usable paragraph
The rest of this file constitutes the core of the UBA implementation.
***********************************************************************/
-static inline bool
+static bool
bidi_explicit_dir_char (int ch)
{
bidi_type_t ch_type;
if (!bidi_initialized)
- abort ();
+ emacs_abort ();
ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
return (ch_type == LRE || ch_type == LRO
|| ch_type == RLE || ch_type == RLO
: bidi_it->string.s);
if (bidi_it->charpos < 0)
- bidi_it->charpos = 0;
- bidi_it->bytepos = bidi_count_bytes (p, 0, 0, bidi_it->charpos,
- bidi_it->string.unibyte);
+ bidi_it->charpos = bidi_it->bytepos = 0;
+ eassert (bidi_it->bytepos == bidi_count_bytes (p, 0, 0,
+ bidi_it->charpos,
+ bidi_it->string.unibyte));
}
else
{
if (bidi_it->charpos < BEGV)
- bidi_it->charpos = BEGV;
- bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
+ {
+ bidi_it->charpos = BEGV;
+ bidi_it->bytepos = BEGV_BYTE;
+ }
+ eassert (bidi_it->bytepos == CHAR_TO_BYTE (bidi_it->charpos));
}
}
/* Don't move at end of buffer/string. */
/* Advance to the next character, skipping characters covered by
display strings (nchars > 1). */
if (bidi_it->nchars <= 0)
- abort ();
+ emacs_abort ();
bidi_it->charpos += bidi_it->nchars;
if (bidi_it->ch_len == 0)
- abort ();
+ emacs_abort ();
bidi_it->bytepos += bidi_it->ch_len;
}
/* Fetch the character at BYTEPOS. If it is covered by a
display string, treat the entire run of covered characters as
a single character u+FFFC. */
- curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
+ curchar = bidi_fetch_char (bidi_it->charpos, bidi_it->bytepos,
&bidi_it->disp_pos, &bidi_it->disp_prop,
- &bidi_it->string, bidi_it->frame_window_p,
+ &bidi_it->string, bidi_it->w,
+ bidi_it->frame_window_p,
&bidi_it->ch_len, &bidi_it->nchars);
}
bidi_it->ch = curchar;
}
if (bidi_it->nchars <= 0)
- abort ();
+ emacs_abort ();
if (level == prev_level) /* empty embedding */
saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
else /* this embedding is non-empty */
|| type == RLE
|| type == RLO
|| type == PDF)
- abort ();
+ emacs_abort ();
if (new_level != prev_level
|| bidi_it->type == NEUTRAL_B)
else if (bidi_it->sor == L2R)
type = STRONG_L;
else /* shouldn't happen! */
- abort ();
+ emacs_abort ();
}
if (type == WEAK_EN /* W2 */
&& bidi_it->last_strong.type_after_w1 == STRONG_AL)
: bidi_it->string.s);
if (bidi_it->nchars <= 0)
- abort ();
+ emacs_abort ();
next_char
= (bidi_it->charpos + bidi_it->nchars >= eob
? BIDI_EOB
/* Resolve the type of a neutral character according to the type of
surrounding strong text and the current embedding level. */
-static inline bidi_type_t
+static bidi_type_t
bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
{
/* N1: European and Arabic numbers are treated as though they were R. */
|| type == NEUTRAL_S
|| type == NEUTRAL_WS
|| type == NEUTRAL_ON))
- abort ();
+ emacs_abort ();
if ((type != NEUTRAL_B /* Don't risk entering the long loop below if
we are already at paragraph end. */
bidi_type_t next_type;
if (bidi_it->scan_dir == -1)
- abort ();
+ emacs_abort ();
bidi_copy_it (&saved_it, bidi_it);
/* Scan the text forward until we find the first non-neutral
next_type = STRONG_R;
break;
case WEAK_BN:
+ case NEUTRAL_ON: /* W6/Retaining */
if (!bidi_explicit_dir_char (bidi_it->ch))
- abort (); /* can't happen: BNs are skipped */
+ emacs_abort (); /* can't happen: BNs are skipped */
/* FALLTHROUGH */
case NEUTRAL_B:
/* Marched all the way to the end of this level run.
}
break;
default:
- abort ();
+ emacs_abort ();
}
type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
next_type, current_level);
/* This should always be called during a forward scan. */
if (bidi_it->scan_dir != 1)
- abort ();
+ emacs_abort ();
/* Reset the limit until which to ignore BNs if we step out of the
area where we found only empty levels. */
if (bidi_it->scan_dir > 0)
{
if (bidi_it->nchars <= 0)
- abort ();
+ emacs_abort ();
next_char_pos = bidi_it->charpos + bidi_it->nchars;
}
else if (bidi_it->charpos >= bob)
if (bidi_it->scan_dir == -1)
/* If we are going backwards, the iterator state is already cached
from previous scans, and should be fully resolved. */
- abort ();
+ emacs_abort ();
if (type == UNKNOWN_BT)
type = bidi_type_of_next_char (bidi_it);
|| (type == WEAK_BN && prev_level == level))
{
if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
- abort ();
+ emacs_abort ();
/* If the cached state shows a neutral character, it was not
resolved by bidi_resolve_neutral, so do it now. */
|| type == WEAK_BN
|| type == WEAK_EN
|| type == WEAK_AN))
- abort ();
+ emacs_abort ();
bidi_it->type = type;
bidi_check_type (bidi_it->type);
int dpp = bidi_it->disp_prop;
if (bidi_it->nchars <= 0)
- abort ();
+ emacs_abort ();
do {
- ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &dpp, &bs,
- fwp, &clen, &nc);
+ ch = bidi_fetch_char (cpos += nc, bpos += clen, &disp_pos, &dpp, &bs,
+ bidi_it->w, fwp, &clen, &nc);
if (ch == '\n' || ch == BIDI_EOB)
chtype = NEUTRAL_B;
else
{
int new_level;
+ /* If we are at end of level, its edges must be cached. */
if (end_flag)
- abort (); /* if we are at end of level, its edges must be cached */
+ emacs_abort ();
bidi_cache_iterator_state (bidi_it, 1);
do {
struct gcpro gcpro1;
if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
- abort ();
+ emacs_abort ();
if (bidi_it->scan_dir == 0)
{
next_level = bidi_peek_at_next_level (bidi_it);
while (next_level != expected_next_level)
{
+ /* If next_level is -1, it means we have an unresolved level
+ in the cache, which at this point should not happen. If
+ it does, we will infloop. */
+ eassert (next_level >= 0);
expected_next_level += incr;
level_to_search += incr;
bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
= bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
bidi_it->bytepos + bidi_it->ch_len);
if (bidi_it->nchars <= 0)
- abort ();
+ emacs_abort ();
if (sep_len >= 0)
{
bidi_it->new_paragraph = 1;