X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/ffc65beee98a7408c1420c9c55dcb7b31a6114c8..ec6e26b8112dd5eea3558a5bb7e0b44f3122dea0:/src/bidi.c diff --git a/src/bidi.c b/src/bidi.c index feb422cc5d..18dce1931d 100644 --- a/src/bidi.c +++ b/src/bidi.c @@ -61,21 +61,21 @@ along with GNU Emacs. If not, see . */ #include "character.h" #include "buffer.h" #include "dispextern.h" +#include "region-cache.h" static bool bidi_initialized = 0; static Lisp_Object bidi_type_table, bidi_mirror_table; -#define LRM_CHAR 0x200E -#define RLM_CHAR 0x200F -#define BIDI_EOB -1 +#define BIDI_EOB (-1) /* Data type for describing the bidirectional character categories. */ typedef enum { UNKNOWN_BC, NEUTRAL, WEAK, - STRONG + STRONG, + EXPLICIT_FORMATTING } bidi_category_t; /* UAX#9 says to search only for L, AL, or R types of characters, and @@ -114,13 +114,9 @@ bidi_get_type (int ch, bidi_dir_t override) if (default_type == UNKNOWN_BT) emacs_abort (); - if (override == NEUTRAL_DIR) - return default_type; - switch (default_type) { - /* Although UAX#9 does not tell, it doesn't make sense to - override NEUTRAL_B and LRM/RLM characters. */ + case WEAK_BN: case NEUTRAL_B: case LRE: case LRO: @@ -128,20 +124,20 @@ bidi_get_type (int ch, bidi_dir_t override) case RLO: case PDF: return default_type; + /* FIXME: The isolate controls are treated as BN until we add + support for UBA v6.3. */ + case LRI: + case RLI: + case FSI: + case PDI: + return WEAK_BN; default: - switch (ch) - { - case LRM_CHAR: - case RLM_CHAR: - return default_type; - default: - if (override == L2R) /* X6 */ - return STRONG_L; - else if (override == R2L) - return STRONG_R; - else - emacs_abort (); /* can't happen: handled above */ - } + if (override == L2R) + return STRONG_L; + else if (override == R2L) + return STRONG_R; + else + return default_type; } } @@ -162,12 +158,7 @@ bidi_get_category (bidi_type_t type) case STRONG_L: case STRONG_R: case STRONG_AL: - case LRE: - case LRO: - case RLE: - case RLO: return STRONG; - case PDF: /* ??? really?? */ case WEAK_EN: case WEAK_ES: case WEAK_ET: @@ -175,12 +166,30 @@ bidi_get_category (bidi_type_t type) case WEAK_CS: case WEAK_NSM: case WEAK_BN: + /* FIXME */ + case LRI: + case RLI: + case FSI: + case PDI: return WEAK; case NEUTRAL_B: case NEUTRAL_S: case NEUTRAL_WS: case NEUTRAL_ON: return NEUTRAL; + case LRE: + case LRO: + case RLE: + case RLO: + case PDF: +#if 0 + /* FIXME: This awaits implementation of isolate support. */ + case LRI: + case RLI: + case FSI: + case PDI: +#endif + return EXPLICIT_FORMATTING; default: emacs_abort (); } @@ -869,8 +878,8 @@ bidi_line_init (struct bidi_it *bidi_it) are zero-based character positions in S, BEGBYTE is byte position corresponding to BEG. UNIBYTE means S is a unibyte string. */ static ptrdiff_t -bidi_count_bytes (const unsigned char *s, const ptrdiff_t beg, - const ptrdiff_t begbyte, const ptrdiff_t end, bool unibyte) +bidi_count_bytes (const unsigned char *s, ptrdiff_t beg, + ptrdiff_t begbyte, ptrdiff_t end, bool unibyte) { ptrdiff_t pos = beg; const unsigned char *p = s + begbyte, *start = p; @@ -910,7 +919,7 @@ bidi_char_at_pos (ptrdiff_t bytepos, const unsigned char *s, bool unibyte) return STRING_CHAR (s); } -/* Fetch and return the character at BYTEPOS/CHARPOS. If that +/* Fetch and return the character at CHARPOS/BYTEPOS. If that character is covered by a display string, treat the entire run of covered characters as a single character, either u+2029 or u+FFFC, and return their combined length in CH_LEN and NCHARS. DISP_POS @@ -925,8 +934,9 @@ bidi_char_at_pos (ptrdiff_t bytepos, const unsigned char *s, bool unibyte) string to iterate, or NULL if iterating over a buffer or a Lisp string; in the latter case, STRING->lstring is the Lisp string. */ static int -bidi_fetch_char (ptrdiff_t bytepos, ptrdiff_t charpos, ptrdiff_t *disp_pos, +bidi_fetch_char (ptrdiff_t charpos, ptrdiff_t bytepos, ptrdiff_t *disp_pos, int *disp_prop, struct bidi_string_data *string, + struct window *w, bool frame_window_p, ptrdiff_t *ch_len, ptrdiff_t *nchars) { int ch; @@ -940,7 +950,7 @@ bidi_fetch_char (ptrdiff_t bytepos, ptrdiff_t charpos, ptrdiff_t *disp_pos, if (charpos < endpos && charpos > *disp_pos) { SET_TEXT_POS (pos, charpos, bytepos); - *disp_pos = compute_display_string_pos (&pos, string, frame_window_p, + *disp_pos = compute_display_string_pos (&pos, string, w, frame_window_p, disp_prop); } @@ -1045,7 +1055,7 @@ bidi_fetch_char (ptrdiff_t bytepos, ptrdiff_t charpos, ptrdiff_t *disp_pos, && *disp_prop) { SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len); - *disp_pos = compute_display_string_pos (&pos, string, frame_window_p, + *disp_pos = compute_display_string_pos (&pos, string, w, frame_window_p, disp_prop); } @@ -1084,6 +1094,29 @@ bidi_at_paragraph_end (ptrdiff_t charpos, ptrdiff_t bytepos) return val; } +/* If the user has requested the long scans caching, make sure that + BIDI cache is enabled. Otherwise, make sure it's disabled. */ + +static struct region_cache * +bidi_paragraph_cache_on_off (void) +{ + if (NILP (BVAR (current_buffer, cache_long_scans))) + { + if (current_buffer->bidi_paragraph_cache) + { + free_region_cache (current_buffer->bidi_paragraph_cache); + current_buffer->bidi_paragraph_cache = 0; + } + return NULL; + } + else + { + if (!current_buffer->bidi_paragraph_cache) + current_buffer->bidi_paragraph_cache = new_region_cache (); + return current_buffer->bidi_paragraph_cache; + } +} + /* On my 2005-vintage machine, searching back for paragraph start takes ~1 ms per line. And bidi_paragraph_init is called 4 times when user types C-p. The number below limits each call to @@ -1099,18 +1132,33 @@ bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte) { Lisp_Object re = paragraph_start_re; ptrdiff_t limit = ZV, limit_byte = ZV_BYTE; - ptrdiff_t n = 0; + struct region_cache *bpc = bidi_paragraph_cache_on_off (); + ptrdiff_t n = 0, oldpos = pos, next; while (pos_byte > BEGV_BYTE && n++ < MAX_PARAGRAPH_SEARCH && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0) - /* FIXME: What if the paragraph beginning is covered by a - display string? And what if a display string covering some - of the text over which we scan back includes - paragraph_start_re? */ - pos = find_next_newline (pos - 1, -1, &pos_byte); + { + /* FIXME: What if the paragraph beginning is covered by a + display string? And what if a display string covering some + of the text over which we scan back includes + paragraph_start_re? */ + DEC_BOTH (pos, pos_byte); + if (bpc && region_cache_backward (current_buffer, bpc, pos, &next)) + { + pos = next, pos_byte = CHAR_TO_BYTE (pos); + break; + } + else + pos = find_newline_no_quit (pos, pos_byte, -1, &pos_byte); + } if (n >= MAX_PARAGRAPH_SEARCH) - pos_byte = BEGV_BYTE; + pos = BEGV, pos_byte = BEGV_BYTE; + if (bpc) + know_region_cache (current_buffer, bpc, pos, oldpos); + /* Positions returned by the region cache are not limited to + BEGV..ZV range, so we limit them here. */ + pos_byte = clip_to_bounds (BEGV_BYTE, pos_byte, ZV_BYTE); return pos_byte; } @@ -1220,8 +1268,8 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p) bytepos = pstartbyte; if (!string_p) pos = BYTE_TO_CHAR (bytepos); - ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop, - &bidi_it->string, + ch = bidi_fetch_char (pos, bytepos, &disp_pos, &disp_prop, + &bidi_it->string, bidi_it->w, bidi_it->frame_window_p, &ch_len, &nchars); type = bidi_get_type (ch, NEUTRAL_DIR); @@ -1248,8 +1296,8 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p) && bidi_at_paragraph_end (pos, bytepos) >= -1) break; /* Fetch next character and advance to get past it. */ - ch = bidi_fetch_char (bytepos, pos, &disp_pos, - &disp_prop, &bidi_it->string, + ch = bidi_fetch_char (pos, bytepos, &disp_pos, + &disp_prop, &bidi_it->string, bidi_it->w, bidi_it->frame_window_p, &ch_len, &nchars); pos += nchars; bytepos += ch_len; @@ -1279,8 +1327,7 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p) /* FXIME: What if p is covered by a display string? See also a FIXME inside bidi_find_paragraph_start. */ - p--; - pbyte = CHAR_TO_BYTE (p); + DEC_BOTH (p, pbyte); prevpbyte = bidi_find_paragraph_start (p, pbyte); } pstartbyte = prevpbyte; @@ -1398,9 +1445,10 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it) /* Fetch the character at BYTEPOS. If it is covered by a display string, treat the entire run of covered characters as a single character u+FFFC. */ - curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos, + curchar = bidi_fetch_char (bidi_it->charpos, bidi_it->bytepos, &bidi_it->disp_pos, &bidi_it->disp_prop, - &bidi_it->string, bidi_it->frame_window_p, + &bidi_it->string, bidi_it->w, + bidi_it->frame_window_p, &bidi_it->ch_len, &bidi_it->nchars); } bidi_it->ch = curchar; @@ -2191,8 +2239,8 @@ bidi_level_of_next_char (struct bidi_it *bidi_it) if (bidi_it->nchars <= 0) emacs_abort (); do { - ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &dpp, &bs, - fwp, &clen, &nc); + ch = bidi_fetch_char (cpos += nc, bpos += clen, &disp_pos, &dpp, &bs, + bidi_it->w, fwp, &clen, &nc); if (ch == '\n' || ch == BIDI_EOB) chtype = NEUTRAL_B; else