X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/0d7b2c96d388f5a9b539df3cb7f4ef115e7010b7..e2ae1c5a40e2802fcd9f5ee26b4906be97c8b878:/src/bidi.c diff --git a/src/bidi.c b/src/bidi.c index cc70d08f01..e5e08c6a25 100644 --- a/src/bidi.c +++ b/src/bidi.c @@ -1,5 +1,5 @@ /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs. - Copyright (C) 2000-2001, 2004-2005, 2009-2014 Free Software + Copyright (C) 2000-2001, 2004-2005, 2009-2015 Free Software Foundation, Inc. This file is part of GNU Emacs. @@ -262,7 +262,6 @@ typedef enum { } bidi_category_t; static Lisp_Object paragraph_start_re, paragraph_separate_re; -static Lisp_Object Qparagraph_start, Qparagraph_separate; /*********************************************************************** @@ -546,6 +545,30 @@ bidi_copy_it (struct bidi_it *to, struct bidi_it *from) characters). 200 was chosen as an upper limit for reasonably-long lines in a text file/buffer. */ #define BIDI_CACHE_CHUNK 200 +/* Maximum size we allow the cache to become, per iterator stack slot, + in units of struct bidi_it size. If we allow unlimited growth, we + could run out of memory for pathologically long bracketed text or + very long text lines that need to be reordered. This is aggravated + when word-wrap is in effect, since then functions display_line and + move_it_in_display_line_to need to keep up to 4 copies of the + cache. + + This limitation means there can be no more than that amount of + contiguous RTL text on any single physical line in a LTR paragraph, + and similarly with contiguous LTR + numeric text in a RTL + paragraph. (LTR text in a LTR paragraph and RTL text in a RTL + paragraph are not reordered, and so don't need the cache, and + cannot hit this limit.) More importantly, no single line can have + text longer than this inside paired brackets (because bracket pairs + resolution uses the cache). If the limit is exceeded, the fallback + code will produce visual order that will be incorrect if there are + RTL characters in the offending line of text. */ +/* Do we need to allow customization of this limit? */ +#define BIDI_CACHE_MAX_ELTS_PER_SLOT 50000 +#if BIDI_CACHE_CHUNK >= BIDI_CACHE_MAX_ELTS_PER_SLOT +# error BIDI_CACHE_CHUNK must be less than BIDI_CACHE_MAX_ELTS_PER_SLOT +#endif +static ptrdiff_t bidi_cache_max_elts = BIDI_CACHE_MAX_ELTS_PER_SLOT; static struct bidi_it *bidi_cache; static ptrdiff_t bidi_cache_size = 0; enum { elsz = sizeof (struct bidi_it) }; @@ -566,7 +589,7 @@ enum bidi_shelve_header_size = (sizeof (bidi_cache_idx) + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start) - + sizeof (bidi_cache_last_idx)) + + sizeof (bidi_cache_last_idx) + sizeof (bidi_cache_max_elts)) }; /* Effectively remove the cached states beyond the Nth state from the @@ -604,6 +627,7 @@ bidi_cache_shrink (void) bidi_cache_size = BIDI_CACHE_CHUNK; } bidi_cache_reset (); + bidi_cache_max_elts = BIDI_CACHE_MAX_ELTS_PER_SLOT; } static void @@ -622,7 +646,9 @@ bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it) /* Find a cached state with a given CHARPOS and resolved embedding level less or equal to LEVEL. If LEVEL is -1, disregard the resolved levels in cached states. DIR, if non-zero, means search - in that direction from the last cache hit. */ + in that direction from the last cache hit. + + Value is the index of the cached state, or -1 if not found. */ static ptrdiff_t bidi_cache_search (ptrdiff_t charpos, int level, int dir) { @@ -696,7 +722,8 @@ bidi_cache_find_level_change (int level, int dir, bool before) ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1; int incr = before ? 1 : 0; - eassert (!dir || bidi_cache_last_idx >= 0); + if (i < 0) /* cache overflowed? */ + i = 0; if (!dir) dir = -1; @@ -734,23 +761,37 @@ bidi_cache_ensure_space (ptrdiff_t idx) /* Enlarge the cache as needed. */ if (idx >= bidi_cache_size) { - /* The bidi cache cannot be larger than the largest Lisp string - or buffer. */ - ptrdiff_t string_or_buffer_bound - = max (BUF_BYTES_MAX, STRING_BYTES_BOUND); + ptrdiff_t chunk_size = BIDI_CACHE_CHUNK; - /* Also, it cannot be larger than what C can represent. */ - ptrdiff_t c_bound - = (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz; + if (bidi_cache_size > bidi_cache_max_elts - chunk_size) + chunk_size = bidi_cache_max_elts - bidi_cache_size; - bidi_cache - = xpalloc (bidi_cache, &bidi_cache_size, - max (BIDI_CACHE_CHUNK, idx - bidi_cache_size + 1), - min (string_or_buffer_bound, c_bound), elsz); + if (max (idx + 1, + bidi_cache_size + chunk_size) <= bidi_cache_max_elts) + { + /* The bidi cache cannot be larger than the largest Lisp + string or buffer. */ + ptrdiff_t string_or_buffer_bound + = max (BUF_BYTES_MAX, STRING_BYTES_BOUND); + + /* Also, it cannot be larger than what C can represent. */ + ptrdiff_t c_bound + = (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz; + ptrdiff_t max_elts = bidi_cache_max_elts; + + max_elts = min (max_elts, min (string_or_buffer_bound, c_bound)); + + /* Force xpalloc not to over-allocate by passing it MAX_ELTS + as its 4th argument. */ + bidi_cache = xpalloc (bidi_cache, &bidi_cache_size, + max (chunk_size, idx - bidi_cache_size + 1), + max_elts, elsz); + eassert (bidi_cache_size > idx); + } } } -static void +static int bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved, bool update_only) { @@ -762,7 +803,7 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved, idx = bidi_cache_search (bidi_it->charpos, -1, 1); if (idx < 0 && update_only) - return; + return 0; if (idx < 0) { @@ -771,19 +812,23 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved, /* Character positions should correspond to cache positions 1:1. If we are outside the range of cached positions, the cache is useless and must be reset. */ - if (idx > bidi_cache_start && - (bidi_it->charpos > (bidi_cache[idx - 1].charpos - + bidi_cache[idx - 1].nchars) - || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos)) + if (bidi_cache_start < idx && idx < bidi_cache_size + && (bidi_it->charpos > (bidi_cache[idx - 1].charpos + + bidi_cache[idx - 1].nchars) + || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos)) { bidi_cache_reset (); idx = bidi_cache_start; } if (bidi_it->nchars <= 0) emacs_abort (); - bidi_copy_it (&bidi_cache[idx], bidi_it); - if (!resolved) - bidi_cache[idx].resolved_level = -1; + /* Don't cache if no available space in the cache. */ + if (bidi_cache_size > idx) + { + bidi_copy_it (&bidi_cache[idx], bidi_it); + if (!resolved) + bidi_cache[idx].resolved_level = -1; + } } else { @@ -806,9 +851,19 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved, bidi_cache[idx].bracket_enclosed_type = bidi_it->bracket_enclosed_type; } - bidi_cache_last_idx = idx; - if (idx >= bidi_cache_idx) - bidi_cache_idx = idx + 1; + if (bidi_cache_size > idx) + { + bidi_cache_last_idx = idx; + if (idx >= bidi_cache_idx) + bidi_cache_idx = idx + 1; + return 1; + } + else + { + /* The cache overflowed. */ + bidi_cache_last_idx = -1; + return 0; + } } /* Look for a cached iterator state that corresponds to CHARPOS. If @@ -846,8 +901,13 @@ bidi_cache_find (ptrdiff_t charpos, bool resolved_only, struct bidi_it *bidi_it) static int bidi_peek_at_next_level (struct bidi_it *bidi_it) { - if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1) + if (bidi_cache_idx == bidi_cache_start) emacs_abort (); + /* If the cache overflowed, return the level of the last cached + character. */ + if (bidi_cache_last_idx == -1 + || (bidi_cache_last_idx >= bidi_cache_idx - 1 && bidi_it->scan_dir > 0)) + return bidi_cache[bidi_cache_idx - 1].resolved_level; return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level; } @@ -864,6 +924,8 @@ bidi_peek_at_next_level (struct bidi_it *bidi_it) void bidi_push_it (struct bidi_it *bidi_it) { + /* Give this stack slot its cache room. */ + bidi_cache_max_elts += BIDI_CACHE_MAX_ELTS_PER_SLOT; /* Save the current iterator state in its entirety after the last used cache slot. */ bidi_cache_ensure_space (bidi_cache_idx); @@ -900,6 +962,9 @@ bidi_pop_it (struct bidi_it *bidi_it) /* Invalidate the last-used cache slot data. */ bidi_cache_last_idx = -1; + + bidi_cache_max_elts -= BIDI_CACHE_MAX_ELTS_PER_SLOT; + eassert (bidi_cache_max_elts > 0); } static ptrdiff_t bidi_cache_total_alloc; @@ -939,6 +1004,11 @@ bidi_shelve_cache (void) + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start), &bidi_cache_last_idx, sizeof (bidi_cache_last_idx)); + memcpy (databuf + sizeof (bidi_cache_idx) + + bidi_cache_idx * sizeof (struct bidi_it) + + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp) + + sizeof (bidi_cache_start) + sizeof (bidi_cache_last_idx), + &bidi_cache_max_elts, sizeof (bidi_cache_max_elts)); return databuf; } @@ -960,6 +1030,7 @@ bidi_unshelve_cache (void *databuf, bool just_free) /* A NULL pointer means an empty cache. */ bidi_cache_start = 0; bidi_cache_sp = 0; + bidi_cache_max_elts = BIDI_CACHE_MAX_ELTS_PER_SLOT; bidi_cache_reset (); } } @@ -999,6 +1070,12 @@ bidi_unshelve_cache (void *databuf, bool just_free) + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start), sizeof (bidi_cache_last_idx)); + memcpy (&bidi_cache_max_elts, + p + sizeof (bidi_cache_idx) + + bidi_cache_idx * sizeof (struct bidi_it) + + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp) + + sizeof (bidi_cache_start) + sizeof (bidi_cache_last_idx), + sizeof (bidi_cache_max_elts)); bidi_cache_total_alloc -= (bidi_shelve_header_size + bidi_cache_idx * sizeof (struct bidi_it)); @@ -1030,14 +1107,12 @@ bidi_initialize (void) emacs_abort (); staticpro (&bidi_brackets_table); - Qparagraph_start = intern ("paragraph-start"); - staticpro (&Qparagraph_start); + DEFSYM (Qparagraph_start, "paragraph-start"); paragraph_start_re = Fsymbol_value (Qparagraph_start); if (!STRINGP (paragraph_start_re)) paragraph_start_re = build_string ("\f\\|[ \t]*$"); staticpro (¶graph_start_re); - Qparagraph_separate = intern ("paragraph-separate"); - staticpro (&Qparagraph_separate); + DEFSYM (Qparagraph_separate, "paragraph-separate"); paragraph_separate_re = Fsymbol_value (Qparagraph_separate); if (!STRINGP (paragraph_separate_re)) paragraph_separate_re = build_string ("[ \t\f]*$"); @@ -1045,6 +1120,7 @@ bidi_initialize (void) bidi_cache_sp = 0; bidi_cache_total_alloc = 0; + bidi_cache_max_elts = BIDI_CACHE_MAX_ELTS_PER_SLOT; bidi_initialized = 1; } @@ -1723,6 +1799,11 @@ bidi_explicit_dir_char (int ch) if (!bidi_initialized) emacs_abort (); + if (ch < 0) + { + eassert (ch == BIDI_EOB); + return false; + } ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch)); return (ch_type == LRE || ch_type == LRO || ch_type == RLE || ch_type == RLO @@ -2459,6 +2540,7 @@ bidi_find_bracket_pairs (struct bidi_it *bidi_it) struct bidi_it tem_it; bool l2r_seen = false, r2l_seen = false; ptrdiff_t pairing_pos; + int idx_at_entry = bidi_cache_idx; eassert (MAX_BPA_STACK >= 100); bidi_copy_it (&saved_it, bidi_it); @@ -2483,7 +2565,15 @@ bidi_find_bracket_pairs (struct bidi_it *bidi_it) levels below). */ if (btype == BIDI_BRACKET_OPEN && bidi_it->bracket_pairing_pos == -1) bidi_it->bracket_pairing_pos = bidi_it->charpos; - bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0); + if (!bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0)) + { + /* No more space in cache -- give up and let the opening + bracket that started this be processed as a + NEUTRAL_ON. */ + bidi_cache_reset_to (idx_at_entry - bidi_cache_start); + bidi_copy_it (bidi_it, &saved_it); + goto give_up; + } if (btype == BIDI_BRACKET_OPEN) PUSH_BPA_STACK; else if (btype == BIDI_BRACKET_CLOSE) @@ -2572,7 +2662,16 @@ bidi_find_bracket_pairs (struct bidi_it *bidi_it) { if (maxlevel < bidi_it->level_stack[bidi_it->stack_idx].level) maxlevel = bidi_it->level_stack[bidi_it->stack_idx].level; - bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0); + if (!bidi_cache_iterator_state (bidi_it, + type == NEUTRAL_B, 0)) + { + /* No more space in cache -- give up and let the + opening bracket that started this be + processed as any other NEUTRAL_ON. */ + bidi_cache_reset_to (idx_at_entry - bidi_cache_start); + bidi_copy_it (bidi_it, &saved_it); + goto give_up; + } type = bidi_resolve_weak (bidi_it); } } @@ -2648,6 +2747,7 @@ bidi_find_bracket_pairs (struct bidi_it *bidi_it) } } + give_up: return retval; } @@ -3210,10 +3310,35 @@ bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, bool end_flag) if (end_flag) emacs_abort (); - bidi_cache_iterator_state (bidi_it, 1, 0); + if (!bidi_cache_iterator_state (bidi_it, 1, 0)) + { + /* Can't happen: if the cache needs to grow, it means we + were at base embedding level, so the cache should have + been either empty or already large enough to cover this + character position. */ + emacs_abort (); + } do { new_level = bidi_level_of_next_char (bidi_it); - bidi_cache_iterator_state (bidi_it, 1, 0); + /* If the cache is full, perform an emergency return by + pretending that the level ended. */ + if (!bidi_cache_iterator_state (bidi_it, 1, 0)) + { + new_level = level - 1; + /* Since the cache should only grow when we are scanning + forward looking for the edge of the level that is one + above the base embedding level, we can only have this + contingency when LEVEL - 1 is the base embedding + level. */ + eassert (new_level == bidi_it->level_stack[0].level); + /* Plan B, for when the cache overflows: Back up to the + previous character by fetching the last cached state, + and force the resolved level of that character be the + base embedding level. */ + bidi_cache_fetch_state (bidi_cache_idx - 1, bidi_it); + bidi_it->resolved_level = new_level; + bidi_cache_iterator_state (bidi_it, 1, 1); + } } while (new_level >= level); } } @@ -3367,6 +3492,12 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it) && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos + bidi_cache[bidi_cache_idx - 1].nchars - 1)) bidi_cache_reset (); + /* Also reset the cache if it overflowed and we have just + emergency-exited using Plan B. */ + else if (bidi_it->resolved_level == bidi_it->level_stack[0].level + && bidi_cache_idx >= bidi_cache_size + && bidi_it->charpos == bidi_cache[bidi_cache_idx - 1].charpos) + bidi_cache_reset (); /* But as long as we are caching during forward scan, we must cache each state, or else the cache integrity will be compromised: it assumes cached states correspond to buffer