abort ();
default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
- if (default_type == 0)
- default_type = STRONG_L;
+ /* Every valid character code, even those that are unassigned by the
+ UCD, have some bidi-class property, according to
+ DerivedBidiClass.txt file. Therefore, if we ever get UNKNOWN_BT
+ (= zero) code from CHAR_TABLE_REF, that's a bug. */
+ if (default_type == UNKNOWN_BT)
+ abort ();
if (override == NEUTRAL_DIR)
return default_type;
}
}
-static void
+static inline void
bidi_check_type (bidi_type_t type)
{
- if (type < UNKNOWN_BT || type > NEUTRAL_ON)
- abort ();
+ xassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON);
}
/* Given a bidi TYPE of a character, return its category. */
val = CHAR_TABLE_REF (bidi_mirror_table, c);
if (INTEGERP (val))
{
- int v = XINT (val);
+ EMACS_INT v = XINT (val);
if (v < 0 || v > MAX_CHAR)
abort ();
resolved levels in cached states. DIR, if non-zero, means search
in that direction from the last cache hit. */
static inline ptrdiff_t
-bidi_cache_search (EMACS_INT charpos, int level, int dir)
+bidi_cache_search (ptrdiff_t charpos, int level, int dir)
{
ptrdiff_t i, i_start;
bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
+ bidi_cache[idx].disp_pos = bidi_it->disp_pos;
+ bidi_cache[idx].disp_prop = bidi_it->disp_prop;
}
bidi_cache_last_idx = idx;
}
static inline bidi_type_t
-bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it)
+bidi_cache_find (ptrdiff_t charpos, int level, struct bidi_it *bidi_it)
{
ptrdiff_t i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
/* Initialize the bidi iterator from buffer/string position CHARPOS. */
void
-bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p,
+bidi_init_it (ptrdiff_t charpos, ptrdiff_t bytepos, int frame_window_p,
struct bidi_it *bidi_it)
{
if (! bidi_initialized)
bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */
bidi_it->disp_pos = -1; /* invalid/unknown */
- bidi_it->disp_prop_p = 0;
+ bidi_it->disp_prop = 0;
/* We can only shrink the cache if we are at the bottom level of its
"stack". */
if (bidi_cache_start == 0)
are zero-based character positions in S, BEGBYTE is byte position
corresponding to BEG. UNIBYTE, if non-zero, means S is a unibyte
string. */
-static inline EMACS_INT
-bidi_count_bytes (const unsigned char *s, const EMACS_INT beg,
- const EMACS_INT begbyte, const EMACS_INT end, int unibyte)
+static inline ptrdiff_t
+bidi_count_bytes (const unsigned char *s, const ptrdiff_t beg,
+ const ptrdiff_t begbyte, const ptrdiff_t end, int unibyte)
{
- EMACS_INT pos = beg;
+ ptrdiff_t pos = beg;
const unsigned char *p = s + begbyte, *start = p;
if (unibyte)
character from the current buffer. UNIBYTE non-zero means S is a
unibyte string. */
static inline int
-bidi_char_at_pos (EMACS_INT bytepos, const unsigned char *s, int unibyte)
+bidi_char_at_pos (ptrdiff_t bytepos, const unsigned char *s, int unibyte)
{
if (s)
{
/* Fetch and return the character at BYTEPOS/CHARPOS. If that
character is covered by a display string, treat the entire run of
- covered characters as a single character u+FFFC, and return their
- combined length in CH_LEN and NCHARS. DISP_POS specifies the
- character position of the next display string, or -1 if not yet
- computed. DISP_PROP_P non-zero means that there's really a display
- string at DISP_POS, as opposed to when we searched till DISP_POS
- without findingone. When the next character is at or beyond that
+ covered characters as a single character, either u+2029 or u+FFFC,
+ and return their combined length in CH_LEN and NCHARS. DISP_POS
+ specifies the character position of the next display string, or -1
+ if not yet computed. When the next character is at or beyond that
position, the function updates DISP_POS with the position of the
- next display string. STRING->s is the C string to iterate, or NULL
- if iterating over a buffer or a Lisp string; in the latter case,
- STRING->lstring is the Lisp string. */
+ next display string. DISP_PROP non-zero means that there's really
+ a display string at DISP_POS, as opposed to when we searched till
+ DISP_POS without finding one. If DISP_PROP is 2, it means the
+ display spec is of the form `(space ...)', which is replaced with
+ u+2029 to handle it as a paragraph separator. STRING->s is the C
+ string to iterate, or NULL if iterating over a buffer or a Lisp
+ string; in the latter case, STRING->lstring is the Lisp string. */
static inline int
-bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
- int *disp_prop_p, struct bidi_string_data *string,
- int frame_window_p, EMACS_INT *ch_len, EMACS_INT *nchars)
+bidi_fetch_char (ptrdiff_t bytepos, ptrdiff_t charpos, ptrdiff_t *disp_pos,
+ int *disp_prop, struct bidi_string_data *string,
+ int frame_window_p, ptrdiff_t *ch_len, ptrdiff_t *nchars)
{
int ch;
- EMACS_INT endpos =
+ ptrdiff_t endpos =
(string->s || STRINGP (string->lstring)) ? string->schars : ZV;
struct text_pos pos;
{
SET_TEXT_POS (pos, charpos, bytepos);
*disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
- disp_prop_p);
+ disp_prop);
}
/* Fetch the character at BYTEPOS. */
*ch_len = 1;
*nchars = 1;
*disp_pos = endpos;
- *disp_prop_p = 0;
+ *disp_prop = 0;
}
- else if (charpos >= *disp_pos && *disp_prop_p)
+ else if (charpos >= *disp_pos && *disp_prop)
{
- EMACS_INT disp_end_pos;
+ ptrdiff_t disp_end_pos;
/* We don't expect to find ourselves in the middle of a display
property. Hopefully, it will never be needed. */
if (charpos > *disp_pos)
abort ();
- /* Return the Unicode Object Replacement Character to represent
- the entire run of characters covered by the display string. */
- ch = 0xFFFC;
+ /* Text covered by `display' properties and overlays with
+ display properties or display strings is handled as a single
+ character that represents the entire run of characters
+ covered by the display property. */
+ if (*disp_prop == 2)
+ {
+ /* `(space ...)' display specs are handled as paragraph
+ separators for the purposes of the reordering; see UAX#9
+ section 3 and clause HL1 in section 4.3 there. */
+ ch = 0x2029;
+ }
+ else
+ {
+ /* All other display specs are handled as the Unicode Object
+ Replacement Character. */
+ ch = 0xFFFC;
+ }
disp_end_pos = compute_display_string_end (*disp_pos, string);
*nchars = disp_end_pos - *disp_pos;
if (*nchars <= 0)
/* If we just entered a run of characters covered by a display
string, compute the position of the next display string. */
if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos
- && *disp_prop_p)
+ && *disp_prop)
{
SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
*disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
- disp_prop_p);
+ disp_prop);
}
return ch;
following the buffer position, -1 if position is at the beginning
of a new paragraph, or -2 if position is neither at beginning nor
at end of a paragraph. */
-static EMACS_INT
-bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
+static ptrdiff_t
+bidi_at_paragraph_end (ptrdiff_t charpos, ptrdiff_t bytepos)
{
Lisp_Object sep_re;
Lisp_Object start_re;
- EMACS_INT val;
+ ptrdiff_t val;
sep_re = paragraph_separate_re;
start_re = paragraph_start_re;
return val;
}
+/* On my 2005-vintage machine, searching back for paragraph start
+ takes ~1 ms per line. And bidi_paragraph_init is called 4 times
+ when user types C-p. The number below limits each call to
+ bidi_paragraph_init to about 10 ms. */
+#define MAX_PARAGRAPH_SEARCH 7500
+
/* Find the beginning of this paragraph by looking back in the buffer.
- Value is the byte position of the paragraph's beginning. */
-static EMACS_INT
-bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
+ Value is the byte position of the paragraph's beginning, or
+ BEGV_BYTE if paragraph_start_re is still not found after looking
+ back MAX_PARAGRAPH_SEARCH lines in the buffer. */
+static ptrdiff_t
+bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte)
{
Lisp_Object re = paragraph_start_re;
- EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
+ ptrdiff_t limit = ZV, limit_byte = ZV_BYTE;
+ ptrdiff_t n = 0;
while (pos_byte > BEGV_BYTE
+ && n++ < MAX_PARAGRAPH_SEARCH
&& fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
{
/* FIXME: What if the paragraph beginning is covered by a
pos = find_next_newline_no_quit (pos - 1, -1);
pos_byte = CHAR_TO_BYTE (pos);
}
+ if (n >= MAX_PARAGRAPH_SEARCH)
+ pos_byte = BEGV_BYTE;
return pos_byte;
}
void
bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
{
- EMACS_INT bytepos = bidi_it->bytepos;
+ ptrdiff_t bytepos = bidi_it->bytepos;
int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
- EMACS_INT pstartbyte;
+ ptrdiff_t pstartbyte;
/* Note that begbyte is a byte position, while end is a character
position. Yes, this is ugly, but we are trying to avoid costly
calls to BYTE_TO_CHAR and its ilk. */
- EMACS_INT begbyte = string_p ? 0 : BEGV_BYTE;
- EMACS_INT end = string_p ? bidi_it->string.schars : ZV;
+ ptrdiff_t begbyte = string_p ? 0 : BEGV_BYTE;
+ ptrdiff_t end = string_p ? bidi_it->string.schars : ZV;
/* Special case for an empty buffer. */
if (bytepos == begbyte && bidi_it->charpos == end)
else if (dir == NEUTRAL_DIR) /* P2 */
{
int ch;
- EMACS_INT ch_len, nchars;
- EMACS_INT pos, disp_pos = -1;
- int disp_prop_p = 0;
+ ptrdiff_t ch_len, nchars;
+ ptrdiff_t pos, disp_pos = -1;
+ int disp_prop = 0;
bidi_type_t type;
const unsigned char *s;
bytepos = pstartbyte;
if (!string_p)
pos = BYTE_TO_CHAR (bytepos);
- ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop_p,
+ ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop,
&bidi_it->string,
bidi_it->frame_window_p, &ch_len, &nchars);
type = bidi_get_type (ch, NEUTRAL_DIR);
break;
/* Fetch next character and advance to get past it. */
ch = bidi_fetch_char (bytepos, pos, &disp_pos,
- &disp_prop_p, &bidi_it->string,
+ &disp_prop, &bidi_it->string,
bidi_it->frame_window_p, &ch_len, &nchars);
pos += nchars;
bytepos += ch_len;
bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
else
{
- EMACS_INT prevpbyte = pstartbyte;
- EMACS_INT p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
+ ptrdiff_t prevpbyte = pstartbyte;
+ ptrdiff_t p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
/* Find the beginning of the previous paragraph, if any. */
while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
bidi_it->ch_len = 1;
bidi_it->nchars = 1;
bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
- bidi_it->disp_prop_p = 0;
+ bidi_it->disp_prop = 0;
}
else
{
display string, treat the entire run of covered characters as
a single character u+FFFC. */
curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
- &bidi_it->disp_pos, &bidi_it->disp_prop_p,
+ &bidi_it->disp_pos, &bidi_it->disp_prop,
&bidi_it->string, bidi_it->frame_window_p,
&bidi_it->ch_len, &bidi_it->nchars);
}
{
int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
int new_level = bidi_resolve_explicit_1 (bidi_it);
- EMACS_INT eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
+ ptrdiff_t eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
const unsigned char *s = STRINGP (bidi_it->string.lstring)
? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
int next_char;
bidi_type_t type_of_next;
struct bidi_it saved_it;
- EMACS_INT eob =
+ ptrdiff_t eob =
(STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
? bidi_it->string.schars : ZV;
type = WEAK_EN;
else /* W5: ET/BN with EN after it. */
{
- EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars;
+ ptrdiff_t en_pos = bidi_it->charpos + bidi_it->nchars;
const unsigned char *s =
STRINGP (bidi_it->string.lstring)
? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
bidi_type_t type;
int level, prev_level = -1;
struct bidi_saved_info next_for_neutral;
- EMACS_INT next_char_pos = -2;
+ ptrdiff_t next_char_pos = -2;
if (bidi_it->scan_dir == 1)
{
- EMACS_INT eob =
+ ptrdiff_t eob =
(bidi_it->string.s || STRINGP (bidi_it->string.lstring))
? bidi_it->string.schars : ZV;
&& bidi_it->next_for_ws.type == UNKNOWN_BT)
{
int ch;
- EMACS_INT clen = bidi_it->ch_len;
- EMACS_INT bpos = bidi_it->bytepos;
- EMACS_INT cpos = bidi_it->charpos;
- EMACS_INT disp_pos = bidi_it->disp_pos;
- EMACS_INT nc = bidi_it->nchars;
+ ptrdiff_t clen = bidi_it->ch_len;
+ ptrdiff_t bpos = bidi_it->bytepos;
+ ptrdiff_t cpos = bidi_it->charpos;
+ ptrdiff_t disp_pos = bidi_it->disp_pos;
+ ptrdiff_t nc = bidi_it->nchars;
struct bidi_string_data bs = bidi_it->string;
bidi_type_t chtype;
int fwp = bidi_it->frame_window_p;
- int dpp = bidi_it->disp_prop_p;
+ int dpp = bidi_it->disp_prop;
if (bidi_it->nchars <= 0)
abort ();
GCPRO1 (bidi_it->string.lstring);
/* If we just passed a newline, initialize for the next line. */
- if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B)
+ if (!bidi_it->first_elt
+ && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
bidi_line_init (bidi_it);
/* Prepare the sentinel iterator state, and cache it. When we bump
reordering, whereas we _must_ know the paragraph base direction
_before_ we process the paragraph's text, since the base
direction affects the reordering. */
- if (bidi_it->scan_dir == 1 && bidi_it->orig_type == NEUTRAL_B)
+ if (bidi_it->scan_dir == 1
+ && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
{
/* The paragraph direction of the entire string, once
determined, is in effect for the entire string. Setting the
bidi_it->separator_limit = bidi_it->string.schars;
else if (bidi_it->bytepos < ZV_BYTE)
{
- EMACS_INT sep_len =
+ ptrdiff_t sep_len =
bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
bidi_it->bytepos + bidi_it->ch_len);
if (bidi_it->nchars <= 0)