STRONG
} bidi_category_t;
+/* UAX#9 says to search only for L, AL, or R types of characters, and
+ ignore RLE, RLO, LRE, and LRO, when determining the base paragraph
+ level. Yudit indeed ignores them. This variable is therefore set
+ by default to ignore them, but setting it to zero will take them
+ into account. */
extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE;
int bidi_ignore_explicit_marks_for_paragraph_level = 1;
abort ();
default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
+ /* Every valid character code, even those that are unassigned by the
+ UCD, have some bidi-class property, according to
+ DerivedBidiClass.txt file. Therefore, if we ever get UNKNOWN_BT
+ (= zero) code from CHAR_TABLE_REF, that's a bug. */
+ if (default_type == UNKNOWN_BT)
+ abort ();
if (override == NEUTRAL_DIR)
return default_type;
}
}
-static void
+static inline void
bidi_check_type (bidi_type_t type)
{
- if (type < UNKNOWN_BT || type > NEUTRAL_ON)
- abort ();
+ xassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON);
}
/* Given a bidi TYPE of a character, return its category. */
{
if (bidi_cache_size > BIDI_CACHE_CHUNK)
{
- bidi_cache_size = BIDI_CACHE_CHUNK;
bidi_cache =
- (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
+ (struct bidi_it *) xrealloc (bidi_cache, BIDI_CACHE_CHUNK * elsz);
+ bidi_cache_size = BIDI_CACHE_CHUNK;
}
bidi_cache_reset ();
}
bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
+ bidi_cache[idx].disp_pos = bidi_it->disp_pos;
+ bidi_cache[idx].disp_prop = bidi_it->disp_prop;
}
bidi_cache_last_idx = idx;
static void
bidi_initialize (void)
{
-
-#include "biditype.h"
-#include "bidimirror.h"
-
- int i;
-
- bidi_type_table = Fmake_char_table (Qnil, make_number (STRONG_L));
+ bidi_type_table = uniprop_table (intern ("bidi-class"));
+ if (NILP (bidi_type_table))
+ abort ();
staticpro (&bidi_type_table);
- for (i = 0; i < sizeof bidi_type / sizeof bidi_type[0]; i++)
- char_table_set_range (bidi_type_table, bidi_type[i].from, bidi_type[i].to,
- make_number (bidi_type[i].type));
-
- bidi_mirror_table = Fmake_char_table (Qnil, Qnil);
+ bidi_mirror_table = uniprop_table (intern ("mirroring"));
+ if (NILP (bidi_mirror_table))
+ abort ();
staticpro (&bidi_mirror_table);
- for (i = 0; i < sizeof bidi_mirror / sizeof bidi_mirror[0]; i++)
- char_table_set (bidi_mirror_table, bidi_mirror[i].from,
- make_number (bidi_mirror[i].to));
-
Qparagraph_start = intern ("paragraph-start");
staticpro (&Qparagraph_start);
paragraph_start_re = Fsymbol_value (Qparagraph_start);
bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */
bidi_it->disp_pos = -1; /* invalid/unknown */
- bidi_it->disp_prop_p = 0;
+ bidi_it->disp_prop = 0;
/* We can only shrink the cache if we are at the bottom level of its
"stack". */
if (bidi_cache_start == 0)
/* Fetch and return the character at BYTEPOS/CHARPOS. If that
character is covered by a display string, treat the entire run of
- covered characters as a single character u+FFFC, and return their
- combined length in CH_LEN and NCHARS. DISP_POS specifies the
- character position of the next display string, or -1 if not yet
- computed. DISP_PROP_P non-zero means that there's really a display
- string at DISP_POS, as opposed to when we searched till DISP_POS
- without findingone. When the next character is at or beyond that
+ covered characters as a single character, either u+2029 or u+FFFC,
+ and return their combined length in CH_LEN and NCHARS. DISP_POS
+ specifies the character position of the next display string, or -1
+ if not yet computed. When the next character is at or beyond that
position, the function updates DISP_POS with the position of the
- next display string. STRING->s is the C string to iterate, or NULL
- if iterating over a buffer or a Lisp string; in the latter case,
- STRING->lstring is the Lisp string. */
+ next display string. DISP_PROP non-zero means that there's really
+ a display string at DISP_POS, as opposed to when we searched till
+ DISP_POS without finding one. If DISP_PROP is 2, it means the
+ display spec is of the form `(space ...)', which is replaced with
+ u+2029 to handle it as a paragraph separator. STRING->s is the C
+ string to iterate, or NULL if iterating over a buffer or a Lisp
+ string; in the latter case, STRING->lstring is the Lisp string. */
static inline int
bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
- int *disp_prop_p, struct bidi_string_data *string,
+ int *disp_prop, struct bidi_string_data *string,
int frame_window_p, EMACS_INT *ch_len, EMACS_INT *nchars)
{
int ch;
{
SET_TEXT_POS (pos, charpos, bytepos);
*disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
- disp_prop_p);
+ disp_prop);
}
/* Fetch the character at BYTEPOS. */
*ch_len = 1;
*nchars = 1;
*disp_pos = endpos;
- *disp_prop_p = 0;
+ *disp_prop = 0;
}
- else if (charpos >= *disp_pos && *disp_prop_p)
+ else if (charpos >= *disp_pos && *disp_prop)
{
EMACS_INT disp_end_pos;
property. Hopefully, it will never be needed. */
if (charpos > *disp_pos)
abort ();
- /* Return the Unicode Object Replacement Character to represent
- the entire run of characters covered by the display string. */
- ch = 0xFFFC;
+ /* Text covered by `display' properties and overlays with
+ display properties or display strings is handled as a single
+ character that represents the entire run of characters
+ covered by the display property. */
+ if (*disp_prop == 2)
+ {
+ /* `(space ...)' display specs are handled as paragraph
+ separators for the purposes of the reordering; see UAX#9
+ section 3 and clause HL1 in section 4.3 there. */
+ ch = 0x2029;
+ }
+ else
+ {
+ /* All other display specs are handled as the Unicode Object
+ Replacement Character. */
+ ch = 0xFFFC;
+ }
disp_end_pos = compute_display_string_end (*disp_pos, string);
*nchars = disp_end_pos - *disp_pos;
if (*nchars <= 0)
/* If we just entered a run of characters covered by a display
string, compute the position of the next display string. */
if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos
- && *disp_prop_p)
+ && *disp_prop)
{
SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
*disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
- disp_prop_p);
+ disp_prop);
}
return ch;
int ch;
EMACS_INT ch_len, nchars;
EMACS_INT pos, disp_pos = -1;
- int disp_prop_p = 0;
+ int disp_prop = 0;
bidi_type_t type;
const unsigned char *s;
bytepos = pstartbyte;
if (!string_p)
pos = BYTE_TO_CHAR (bytepos);
- ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop_p,
+ ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop,
&bidi_it->string,
bidi_it->frame_window_p, &ch_len, &nchars);
type = bidi_get_type (ch, NEUTRAL_DIR);
for (pos += nchars, bytepos += ch_len;
- /* NOTE: UAX#9 says to search only for L, AL, or R types
- of characters, and ignore RLE, RLO, LRE, and LRO.
- However, I'm not sure it makes sense to omit those 4;
- should try with and without that to see the effect. */
(bidi_get_category (type) != STRONG)
|| (bidi_ignore_explicit_marks_for_paragraph_level
&& (type == RLE || type == RLO
break;
/* Fetch next character and advance to get past it. */
ch = bidi_fetch_char (bytepos, pos, &disp_pos,
- &disp_prop_p, &bidi_it->string,
+ &disp_prop, &bidi_it->string,
bidi_it->frame_window_p, &ch_len, &nchars);
pos += nchars;
bytepos += ch_len;
}
- if (type == STRONG_R || type == STRONG_AL) /* P3 */
+ if ((type == STRONG_R || type == STRONG_AL) /* P3 */
+ || (!bidi_ignore_explicit_marks_for_paragraph_level
+ && (type == RLO || type == RLE)))
bidi_it->paragraph_dir = R2L;
- else if (type == STRONG_L)
+ else if (type == STRONG_L
+ || (!bidi_ignore_explicit_marks_for_paragraph_level
+ && (type == LRO || type == LRE)))
bidi_it->paragraph_dir = L2R;
if (!string_p
&& no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
bidi_it->ch_len = 1;
bidi_it->nchars = 1;
bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
- bidi_it->disp_prop_p = 0;
+ bidi_it->disp_prop = 0;
}
else
{
display string, treat the entire run of covered characters as
a single character u+FFFC. */
curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
- &bidi_it->disp_pos, &bidi_it->disp_prop_p,
+ &bidi_it->disp_pos, &bidi_it->disp_prop,
&bidi_it->string, bidi_it->frame_window_p,
&bidi_it->ch_len, &bidi_it->nchars);
}
struct bidi_string_data bs = bidi_it->string;
bidi_type_t chtype;
int fwp = bidi_it->frame_window_p;
- int dpp = bidi_it->disp_prop_p;
+ int dpp = bidi_it->disp_prop;
if (bidi_it->nchars <= 0)
abort ();