Merge from mainline.

author Eli Zaretskii <eliz@gnu.org>

Sat, 6 Mar 2010 10:16:27 +0000 (05:16 -0500)

committer Eli Zaretskii <eliz@gnu.org>

Sat, 6 Mar 2010 10:16:27 +0000 (05:16 -0500)
author Eli Zaretskii <eliz@gnu.org>
Sat, 6 Mar 2010 10:16:27 +0000 (05:16 -0500)
committer Eli Zaretskii <eliz@gnu.org>
Sat, 6 Mar 2010 10:16:27 +0000 (05:16 -0500)
diff --git a/src/.gdbinit b/src/.gdbinit

index e8a64f5dfe419e5be586830448ced2f384b95fe4..8949e66134c75c0e90e55cb7379507118f91db39 100644 (file)
--- a/src/.gdbinit
+++ b/src/.gdbinit
@@ -271,6 +271,9 @@ define pitx
      end
    end
    printf "\n"
+  if ($it->bidi_p)
+    printf "BIDI: base_stop=%d prev_stop=%d level=%d\n", $it->base_level_stop, $it->prev_stop, $it->bidi_it.resolved_level
+  end
    if ($it->region_beg_charpos >= 0)
      printf "reg=%d-%d ", $it->region_beg_charpos, $it->region_end_charpos
    end
@@ -447,6 +450,36 @@ document pwin
  Pretty print window structure w.
  end
  
+define pbiditype
+  if ($arg0 == 0)
+    printf "UNDEF"
+  end
+  if ($arg0 == 1)
+    printf "L"
+  end
+  if ($arg0 == 2)
+    printf "R"
+  end
+  if ($arg0 == 3)
+    printf "EN"
+  end
+  if ($arg0 == 4)
+    printf "AN"
+  end
+  if ($arg0 == 5)
+    printf "BN"
+  end
+  if ($arg0 == 6)
+    printf "B"
+  end
+  if ($arg0 < 0 || $arg0 > 6)
+    printf "%d??", $arg0
+  end
+end
+document pbiditype
+Print textual description of bidi type given as first argument.
+end
+
  define pgx
    set $g = $arg0
    # CHAR_GLYPH
@@ -475,6 +508,11 @@ define pgx
    else
      printf " pos=%d", $g->charpos
    end
+  # For characters, print their resolved level and bidi type
+  if ($g->type == 0)
+    printf " blev=%d,btyp=", $g->resolved_level
+    pbiditype $g->bidi_type
+  end
    printf " w=%d a+d=%d+%d", $g->pixel_width, $g->ascent, $g->descent
    # If not DEFAULT_FACE_ID
    if ($g->face_id != 0)
diff --git a/src/ChangeLog.bidi b/src/ChangeLog.bidi

new file mode 100644 (file)

index 0000000..234723b
--- /dev/null
+++ b/src/ChangeLog.bidi
@@ -0,0 +1,513 @@
+2010-02-20  Eli Zaretskii  <eliz@gnu.org>
+
+       * xdisp.c (set_cursor_from_row): Compare candidate cursor
+       positions only in rows whose buffer positions occlude point.
+       (display_line): Fix computation of row->start and row->end for
+       empty lines.
+
+       * dispnew.c (row_equal_p): Compare the reversed_p attributes as
+       well.
+
+2010-02-13  Eli Zaretskii  <eliz@gnu.org>
+
+       * xdisp.c (set_cursor_from_row): Don't overwrite cursor position
+       if it is not a better candidate than what we already have.
+       (display_line): Keep calling set_cursor_from_row for
+       bidi-reordered rows even if we already have a possible candidate
+       for cursor position.  Undo the row_end setting throughout the
+       code, and instead do it after all the row's glyphs have been
+       produced, by looping over the glyphs.
+
+2010-02-06  Eli Zaretskii  <eliz@gnu.org>
+
+       Start working on cursor movement in continuation lines.
+       * xdisp.c (move_it_in_display_line_to): New variables prev_method
+       and prev_pos.  Compare for strict equality in
+       BUFFER_POS_REACHED_P.
+
+2010-01-30  Eli Zaretskii  <eliz@gnu.org>
+
+       Fix fallout from changes to managing glyph row reversed_p flag.
+       * xdisp.c (init_iterator): Initialize it->glyph_row->reversed_p
+       flag.
+
+2010-01-25  Eli Zaretskii  <eliz@gnu.org>
+
+       Clean up the mess with setting the glyph row reversed_p flag.
+       * dispnew.c (prepare_desired_row): Preserve the reversed_p flag.
+
+       * bidi.c (bidi_cache_find): Use bidi_copy_it instead of copying
+       the whole struct (which includes uninitialized parts).
+       (bidi_init_it): Don't initialize bidi_it->paragraph_dir.
+
+       * xdisp.c (display_line): Remove misplaced setting of
+       row->reversed_p flags.  Copy the reversed_p flag to the next glyph
+       row.
+       (next_element_from_buffer): Check bidi_it.paragraph_dir rather
+       than level_stack[0].level.  Reset the reversed_p flag for non-R2L
+       paragraphs.
+
+       Fix display of invisible text.
+       * xdisp.c (handle_invisible_prop): If we are `reseat'ed, init the
+       paragraph direction and set the `reversed_p' flag in the IT's
+       glyph row.  Fix exit conditions of the loop that skips invisible
+       text.  Update IT->prev_stop after skipping invisible text.  Check
+       for additional overlays at IT->stop_charpos, not at start_pos.
+
+2010-01-16  Eli Zaretskii  <eliz@gnu.org>
+
+       * xdisp.c (handle_invisible_prop): Under bidi iteration, skip
+       invisible text using bidi_get_next_char_visually.
+       (handle_stop_backwards): Restore it->bidi_p, it->current and
+       it->position before calling handle_stop.
+       (next_element_from_buffer): Fix formatting.
+
+2010-01-09  Eli Zaretskii  <eliz@gnu.org>
+
+       * xdisp.c (handle_stop_backwards): Add a prototype.
+       (reseat): call handle_stop_backwards to recompute prev_stop and
+       base_level_stop for the new position.  Solves the crash when
+       scrolling backwards.
+
+2010-01-02  Eli Zaretskii  <eliz@gnu.org>
+
+       * .gdbinit (pitx): Display some bidi information about the
+       iterator.
+
+       * dispextern.h (BIDI_AT_BASE_LEVEL): Enclose definition in
+       parentheses.
+
+       * xdisp.c (handle_stop_backwards): Save and restore it->current
+       and it->position, instead of expecting the caller to do that.
+       (next_element_from_buffer): When moving across stop_charpos,
+       record it in prev_stop.  When IT_CHARPOS backs up, call
+       handle_stop_backwards only if above the base embedding level.
+       This solves the crash while displaying etc/HELLO in bidi mode.
+
+2009-12-26  Eli Zaretskii  <eliz@gnu.org>
+
+       * xdisp.c (handle_stop_backwards): Call compute_stop_pos in the
+       loop, instead of calling handle_stop.  Call handle_stop only once,
+       after the loop.
+       (next_element_from_buffer): Don't call handle_stop_backwards if at
+       stop position.  If base_level_stop is zero, set it to 1.
+
+       * term.c (append_glyph): Fill resolved_level and bidi_type slots
+       of struct glyph for unidirectional display.
+
+       * xdisp.c (set_cursor_from_row): Handle zero-width characters.
+
+       * bidi.c (bidi_mirror_char): More efficient code (suggested by
+       Ehud Karni <ehud@unix.mvs.co.il>).  Don't even try to mirror
+       non-ASCII characters.
+
+2009-12-19  Eli Zaretskii  <eliz@gnu.org>
+
+       * buffer.c (Fbuffer_swap_text): Swap the values of
+       bidi_display_reordering and bidi_paragraph_direction.
+
+       * bidi.c (bidi_resolve_weak): Fix nesting of conditions for Wn
+       processing.  Move W3 after W1 and W2.  Simplify W4 because it is
+       now always after W1.
+
+       * .gdbinit (pbiditype): New command.
+       (pgx): Use it to display bidi level and type of the glyph.
+
+2009-12-12  Eli Zaretskii  <eliz@gnu.org>
+
+       * dispextern.h (struct it): New members prev_stop and
+       base_level_stop.
+
+       * xdisp.c (handle_stop_backwards): New function.
+       (next_element_from_buffer): Handle the situation where we
+       overstepped stop_charpos due to non-linearity of the bidi
+       iteration.  Likewise for when we back up beyond the previous
+       stop_charpos.
+       (reseat_1, pop_it, push_it): Set prev_stop and base_level_stop.
+
+       * dispextern.h (BIDI_AT_BASE_LEVEL): New macro.
+
+       * bidi.c (bidi_copy_it): Fix compiler warning due to cast of a
+       pointer to `int'.  Don't preserve the first_elt member, as it is
+       no longer copied, because its position in the structure was
+       changed, see below.
+
+       * dispextern.h (struct bidi_it): Move first_elt, new_paragraph,
+       separator_limit, and paragraph_dir to after bidi_stack.  Add a
+       note that anything beyond the level stack is not preserved when
+       the bidi iterator state is copied/saved.
+
+2009-11-21  Eli Zaretskii  <eliz@gnu.org>
+
+       * xdisp.c (set_cursor_from_row): Fix cursor positioning on empty
+       lines when integer values of `cursor' property is used on display
+       strings.
+
+2009-11-14  Eli Zaretskii  <eliz@gnu.org>
+
+       * xdisp.c (init_iterator, text_outside_line_unchanged_p)
+       (try_window_id): Rename paragraph_direction to
+       bidi_paragraph_direction.
+       (set_cursor_from_row): Handle integer values of `cursor' property
+       on display strings.
+
+       * buffer.c (init_buffer_once, syms_of_buffer): Rename
+       paragraph_direction to bidi_paragraph_direction.
+
+       * buffer.h (struct buffer): Rename paragraph_direction to
+       bidi_paragraph_direction.
+
+2009-11-07  Eli Zaretskii  <eliz@gnu.org>
+
+       * bidi.c (bidi_paragraph_init): Don't overstep end of buffer.
+       Treat end of buffer as a NEUTRAL_B character.
+       (bidi_resolve_explicit): Don't special-case ZV when bidi_it->type
+       is NEUTRAL_B, since bidi_set_paragraph_end no longer sets the
+       new_paragraph flag.
+
+2009-10-31  Eli Zaretskii  <eliz@gnu.org>
+
+       * xdisp.c (display_line): Always extend reversed_p rows to the end
+       of line.
+       (set_cursor_from_row): In R2L rows that don't display text, put
+       the cursor on the rightmost glyph.
+
+2009-10-24  Eli Zaretskii  <eliz@gnu.org>
+
+       * xdisp.c (set_cursor_from_row): Fix off-by-one error when
+       skipping over non-character glyphs at end of a reversed row.
+
+       * dispextern.h (struct glyph): The `resolved_level' member needs
+       only 5 bits, not 6.  The `bidi_type' member needs only 3 bits.
+       (bidi_type_t): Rearrange so that types that can appear in the
+       resolved type are at the beginning and have values less than 8.
+
+2009-10-23  Eli Zaretskii  <eliz@gnu.org>
+
+       * bidi.c: Include setjmp.h.
+
+2009-10-17  Eli Zaretskii  <eliz@gnu.org>
+
+       * dispextern.h (struct glyph): New members resolved_level and
+       bidi_type.
+
+       * xdisp.c (append_glyph, append_composite_glyph)
+       (produce_image_glyph, append_stretch_glyph): Set them.
+
+       * term.c (append_glyph): Ditto.
+
+       * xdisp.c (display_line, next_element_from_buffer): Set the glyph
+       row's reversed_p flag if the paragraph base direction is odd.
+       (extend_face_to_end_of_line): Don't reverse the glyphs here.
+
+       * term.c (append_glyph): Reverse glyphs here.
+
+       * bidi.c (bidi_get_next_char_visually): Don't exit early when at
+       ZV.
+       (bidi_paragraph_init): Don't step over a newline if at BEGV.
+
+2009-10-16  Eli Zaretskii  <eliz@gnu.org>
+
+       * bidi.c (bidi_paragraph_init): Handle empty buffers.
+
+2009-10-10  Eli Zaretskii  <eliz@gnu.org>
+
+       * xdisp.c (set_cursor_from_row): Skip over glyphs near end of row
+       with integer OBJECT even if their CHARPOS is zero.
+
+       * bidi.c (bidi_cache_iterator_state): Don't cache NEW_PARAGRAPH.
+       Abort if someone tries to add a cached state whose position is not
+       the immediate successor to that of the last cached state.
+       (bidi_paragraph_init): Don't bail out too early after a reseat.
+
+2009-10-09  Eli Zaretskii  <eliz@gnu.org>
+
+       * xdisp.c (text_outside_line_unchanged_p, try_window_id): Disable
+       optimizations if we are reordering bidirectional text and the
+       paragraph direction can be affected by the change.
+
+2009-10-08  Eli Zaretskii  <eliz@gnu.org>
+
+       * xdisp.c (string_buffer_position_lim): New function.
+       (string_buffer_position): Most of code moved to
+       string_buffer_position_lim.  Last argument and return value are
+       now EMACS_INT; all callers changed.
+       (set_cursor_from_row): Rewritten to support bidirectional text and
+       reversed glyph rows.
+
+       dispextern.h <string_buffer_position>: Update prototype.
+
+2009-10-07  Eli Zaretskii  <eliz@gnu.org>
+
+       * bidi.c (bidi_paragraph_init): Fix initialization of POS.
+
+       * dispextern.h (struct glyph_row): New member reversed_p.
+
+2009-10-06  Eli Zaretskii  <eliz@gnu.org>
+
+       * buffer.c (syms_of_buffer): Remove DEFVAR_LISP_NOPRO for
+       default-direction-reversed, default-bidi-display-reordering, and
+       default-paragraph-direction.
+
+2009-10-05  Eli Zaretskii  <eliz@gnu.org>
+
+       * buffer.h (struct buffer): New member paragraph_direction.
+       * buffer.c (init_buffer_once): Initialize it.
+       (syms_of_buffer): Declare Lisp variables
+       default-paragraph-direction and paragraph-direction.
+
+       * dispextern.h (struct it): New member paragraph_embedding.
+       * xdisp.c (init_iterator): Initialize it from the buffer's value
+       of paragraph-direction.
+       <Qright_to_left, Qleft_to_right>: New variables.
+       (syms_of_xdisp): Initialize and staticpro them.
+       (set_iterator_to_next, next_element_from_buffer): Use the value of
+       paragraph_embedding to determine the paragraph direction.
+
+       * bidi.c (bidi_line_init): Fix second argument to
+       bidi_set_sor_type.
+       (bidi_init_it): Initialize paragraph_dir to NEUTRAL_DIR.
+       (bidi_get_next_char_visually): Record the last character of the
+       separator in separator_limit, not the character after that.
+       (bidi_find_paragraph_start): Accept character and byte positions
+       instead of the whole iterator stricture.  All callers changed.
+
+2009-10-04  Eli Zaretskii  <eliz@gnu.org>
+
+       * bidi.c (bidi_at_paragraph_end): Check for paragraph-start if
+       paragraph-separate failed to match.  Return the length of the
+       matched separator.
+       (bidi_line_init): New function.
+       (bidi_paragraph_init): Use bidi_line_init.  Do nothing if in the
+       middle of a paragraph-separate sequence.  Don't override existing
+       paragraph direction if no strong characters found in this
+       paragraph.  Set separator_limit according to what
+       bidi_at_paragraph_end returns.  Reset new_paragraph flag when a
+       new paragraph is found.
+       (bidi_init_it): Reset separator_limit.
+
+       * dispextern.h (struct bidi_it): New member separator_limit.
+
+       * bidi.c (bidi_find_paragraph_start): Return the byte position of
+       the paragraph beginning.
+
+       * xdisp.c (set_iterator_to_next): Call bidi_paragraph_init if the
+       new_paragraph flag is set in the bidi iterator.
+
+       * bidi.c (bidi_at_paragraph_end, bidi_find_paragraph_start): Use
+       the buffer-local value of paragraph-start and paragraph-separate.
+
+2009-10-03  Eli Zaretskii  <eliz@gnu.org>
+
+       * bidi.c (bidi_set_paragraph_end): Don't set the new_paragraph
+       flag in the iterator.
+       (bidi_init_it): Set the new_paragraph flag.
+       (bidi_at_paragraph_end): Arguments are now character and byte
+       position of the next character.  All callers changed.
+       (bidi_resolve_explicit): Don't call bidi_at_paragraph_end, and
+       don't behave as if at paragraph end if it returns true.
+       (bidi_get_next_char_visually): Don't call bidi_paragraph_init if
+       new_paragraph flags is set.  Set new_paragraph flag when at end of
+       a paragraph.
+       <fallback_paragraph_start_re, fallback_paragraph_separate_re>: New
+       variables.
+       <Qparagraph_start, Qparagraph_separate>: New variables.
+       (bidi_initialize): Initialize and staticpro them.
+
+       * dispextern.h <struct bidi_it>: New element paragraph_dir.  Make
+       positional elements EMACS_INT.
+
+       * bidi.c <bidi_overriding_paragraph_direction>: Delete.
+
+2009-09-28  Eli Zaretskii  <eliz@gnu.org>
+
+       * bidi.c (bidi_init_it): Initialize charpos, bytepos, and
+       first_elt before calling bidi_set_paragraph_end.
+       (bidi_resolve_explicit): Don't call bidi_set_paragraph_end at
+       EOB.
+       (bidi_at_paragraph_end): Don't set new_paragraph flag at EOB.
+       (bidi_get_type): Accept an additional argument OVERRIDE, per UAX#9
+       "Explicit Overrides".  All callers changed.
+
+2009-09-27  Eli Zaretskii  <eliz@gnu.org>
+
+       * xdisp.c (next_element_from_buffer): If called not at line
+       beginning, start bidi iteration from line beginning.
+
+       * bidi.c (bidi_paragraph_init): Use
+       bidi_overriding_paragraph_direction instead of a literal zero.
+       (bidi_initialize): Fix some character types, per Unicode 5.x.
+       (bidi_get_type): Abort if called with invalid character code.
+
+       * dispextern.h: Add prototype of bidi_mirror_char.
+
+       * xdisp.c (get_next_display_element): Mirror characters whose
+       resolved type is STRONG_R.
+
+2009-09-26  Eli Zaretskii  <eliz@gnu.org>
+
+       * bidi.c (bidi_paragraph_init): Don't set bidi_it->ch_len.  Abort
+       if called not at beginning of a new paragraph.
+       (bidi_get_next_char_visually): Prepare and use a sentinel iterator
+       state when first_elt flag is set.
+
+       * dispextern.h (struct bidi_it): New struct member first_elt.
+
+       * bidi.c (bidi_init_it): Initialize bidi_it->first_elt.
+       (bidi_copy_it): Don't copy the first_elt flag.
+
+       * xdisp.c (reseat_1): Initialize bidi_it.first_elt.  Move bidi
+       scan start code from here...
+       (next_element_from_buffer): ...to here.  Use bidi_it.first_elt
+       flag.
+
+2009-09-20  Eli Zaretskii  <eliz@gnu.org>
+
+       * xdisp.c (reseat_1): Handle position < BEGV.
+
+       * bidi.c (bidi_paragraph_init): Set bidi_it->ch_len.  Handle ZV.
+       (bidi_init_it): Don't initialize bidi_it->ch_len.
+       (bidi_resolve_explicit_1): Abort if bidi_it->ch_len was not
+       initialized.
+       (bidi_at_paragraph_end, bidi_resolve_explicit_1)
+       (bidi_resolve_weak, bidi_level_of_next_char): Handle bytepos at
+       ZV_BYTE.
+       (bidi_resolve_explicit_1): Handle position < BEGV.
+
+2009-09-19  Eli Zaretskii  <eliz@gnu.org>
+
+       * xdisp.c (init_iterator): Call bidi_init_it.  Set
+       bidi_it->bytepos if buffer position specified.
+       (reseat_1): Don't call bidi_init_it.  Call bidi_paragraph_init
+       instead.  Move back to preceding character before the call to
+       bidi_get_next_char_visually.
+
+       * bidi.c: Remove all STANDALONE parts.
+       (bidi_init_it): Init bidi_it->charpos and bidi_it->bytepos to -1.
+       Don't call bidi_paragraph_init.  Change arguments.
+       (bidi_paragraph_init): Remove code for negative pos.
+
+       * dispextern.h <bidi_it>: Rename orig_type to type_after_w1 and
+       pristine_type to orig_type.
+
+2009-09-12  Eli Zaretskii  <eliz@gnu.org>
+
+       * dispnew.c (direct_output_for_insert): Give up if we are
+       reordering bidirectional text.
+
+       * dispextern.h (IT_STACK_SIZE): Enlarge to 5.
+
+       * xdisp.c (display_line): Set row->end and it->start for the next
+       row to the next character in logical order.  If we are reordering
+       bidi text, push and pop the iterator before and after momentarily
+       iterating in logical order.
+
+2009-09-11  Eli Zaretskii  <eliz@gnu.org>
+
+       Note: The following changes were undone on 2009-09-12.
+
+       * xdisp.c (set_iterator_to_next, reseat, reseat_1)
+       (reseat_at_next_visible_line_start): Accept additional argument
+       force_logical_p; all callers changed.  If force_logical_p is
+       non-zero, force iteration in buffer's logical order even in bidi
+       buffers.
+
+       * dispnew.c (direct_output_for_insert): Call set_iterator_to_next
+       with additional argument zero.
+
+       * dispextern.h (set_iterator_to_next): Now accepts an additional
+       argument.
+
+2009-08-29  Eli Zaretskii  <eliz@gnu.org>
+
+       * xdisp.c (set_cursor_from_row): Don't assume glyph->charpos
+       increments linearly.
+       (try_window_reusing_current_matrix): Don't assume glyph->charpos
+       increments linearly.
+
+2009-08-28  Eli Zaretskii  <eliz@gnu.org>
+
+       * bidi.c <bidi_overriding_paragraph_direction>: Default to L2R,
+       for now.
+
+2009-08-22  Eli Zaretskii  <eliz@gnu.org>
+
+       * bidi.c (bidi_initialize): staticpro bidi_char_table.
+       (bidi_check_type): New function.
+       (bidi_cache_iterator_state, bidi_remember_char)
+       (bidi_resolve_explicit_1, bidi_resolve_explicit)
+       (bidi_resolve_weak, bidi_resolve_neutral)
+       (bidi_level_of_next_char): Use it to validate the bidi type
+       assigned to the iterator.
+
+2009-08-15  Eli Zaretskii  <eliz@gnu.org>
+
+       * bidi.c (bidi_initialize): Fix initialization of bidi_type_table.
+
+       * xdisp.c (set_iterator_to_next): Fix position setting after call
+       to bidi_get_next_char_visually.
+
+2005-12-03  Eli Zaretskii  <eliz@gnu.org>
+
+       * bidi.c: Include stdio.h unconditionally.  Fix and elaborate
+       commentary.  Add Copyright blurb.
+
+2004-03-08  Kenichi Handa  <handa@m17n.org>
+
+       * xdisp.c (reseat_1): Call bidi_init_it with a previous position.
+
+       * bidi.c (bidi_init_it): Set bidi_it->ch_len even if POS > 0.
+
+2004-03-04  Kenichi Handa  <handa@m17n.org>
+
+       The following changes are to support bidirectional text display.
+
+       * Makefile.in (obj): Include bidi.o.
+       (bidi.o): New target.
+
+       * bidi.c: New file.
+
+       * buffer.h (struct buffer): New member bidi_display_reordering.
+
+       * buffer.c (init_buffer_once): Initialize bidi_display_reordering.
+       (syms_of_buffer): Declarations of Lisp variables
+       default-bidi-display-reordering and bidi-display-reordering.
+
+       * dispextern.h (BIDI_MAXLEVEL): New macro.
+       (bidi_type_t, bidi_dir_t): New types.
+       (bidi_saved_info, bidi_stack, bidi_it): New structs.
+       (struct it): New members bidi_p and bidi_it.
+       (bidi_init_it): Extern it.
+       (bidi_get_next_char_visually): Extern it.
+
+       * dispnew.c (direct_output_forward_char): Give up if we need bidi
+       processing or buffer's direction is right-to-left.
+
+       * xdisp.c (init_iterator): Initialize it->bidi_p.
+       (reseat_1): Cal bidi_init_it and bidi_get_next_char_visually if
+       necessary.
+       (set_iterator_to_next): Cal bidi_get_next_char_visually if
+       necessary.
+
+
+;; Local Variables:
+;; coding: utf-8
+;; add-log-time-zone-rule: t
+;; End:
+
+    Copyright (C) 2007, 2008, 2009  Free Software Foundation, Inc.
+
+  This file is part of GNU Emacs.
+
+  GNU Emacs is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  GNU Emacs is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
diff --git a/src/Makefile.in b/src/Makefile.in

index 11facec602398e026338850213c808a4642980e9..d64c0459607154d5f85eb5a6e8f0645d9f33531f 100644 (file)
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -560,7 +560,7 @@ FONT_DRIVERS = xfont.o
  /* lastfile must follow all files
     whose initialized data areas should be dumped as pure by dump-emacs.  */
  obj=    dispnew.o frame.o scroll.o xdisp.o menu.o $(XMENU_OBJ) window.o \
-       charset.o coding.o category.o ccl.o character.o chartab.o \
+       charset.o coding.o category.o ccl.o character.o chartab.o bidi.o \
         cm.o term.o terminal.o xfaces.o $(XOBJ) $(GTK_OBJ) $(DBUS_OBJ) \
         emacs.o keyboard.o macros.o keymap.o sysdep.o \
         buffer.o filelock.o insdel.o marker.o \
@@ -1052,6 +1052,7 @@ doc.o: buildobj.h
  
  atimer.o: atimer.c atimer.h syssignal.h systime.h lisp.h blockinput.h \
   $(config_h)
+bidi.o: bidi.c buffer.h character.h dispextern.h lisp.h $(config_h)
  buffer.o: buffer.c buffer.h region-cache.h commands.h window.h \
     $(INTERVALS_H) blockinput.h atimer.h systime.h character.h \
     indent.h keyboard.h coding.h keymap.h frame.h lisp.h $(config_h)
diff --git a/src/bidi.c b/src/bidi.c

new file mode 100644 (file)

index 0000000..5c01690
--- /dev/null
+++ b/src/bidi.c
@@ -0,0 +1,2026 @@
+/* Low-level bidirectional buffer-scanning functions for GNU Emacs.
+   Copyright (C) 2000, 2001, 2004, 2005, 2009  Free Software Foundation, Inc.
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs; see the file COPYING.  If not, write to
+the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA.  */
+
+/* Written by Eli Zaretskii <eliz@gnu.org>.
+
+   A sequential implementation of the Unicode Bidirectional algorithm,
+   as per UAX#9, a part of the Unicode Standard.
+
+   Unlike the reference and most other implementations, this one is
+   designed to be called once for every character in the buffer.
+
+   The main entry point is bidi_get_next_char_visually.  Each time it
+   is called, it finds the next character in the visual order, and
+   returns its information in a special structure.  The caller is then
+   expected to process this character for display or any other
+   purposes, and call bidi_get_next_char_visually for the next
+   character.  See the comments in bidi_get_next_char_visually for
+   more details about its algorithm that finds the next visual-order
+   character by resolving their levels on the fly.
+
+   If you want to understand the code, you will have to read it
+   together with the relevant portions of UAX#9.  The comments include
+   references to UAX#9 rules, for that very reason.
+
+   A note about references to UAX#9 rules: if the reference says
+   something like "X9/Retaining", it means that you need to refer to
+   rule X9 and to its modifications decribed in the "Implementation
+   Notes" section of UAX#9, under "Retaining Format Codes".  */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdio.h>
+
+#ifdef HAVE_STRING_H
+#include <string.h>
+#endif
+
+#include <setjmp.h>
+
+#include "lisp.h"
+#include "buffer.h"
+#include "character.h"
+#include "dispextern.h"
+
+static int bidi_initialized = 0;
+
+static Lisp_Object bidi_type_table;
+
+/* FIXME: Remove these when bidi_explicit_dir_char uses a lookup table.  */
+#define LRM_CHAR   0x200E
+#define RLM_CHAR   0x200F
+#define LRE_CHAR   0x202A
+#define RLE_CHAR   0x202B
+#define PDF_CHAR   0x202C
+#define LRO_CHAR   0x202D
+#define RLO_CHAR   0x202E
+
+#define BIDI_EOB   -1
+#define BIDI_BOB   -2          /* FIXME: Is this needed? */
+
+/* Local data structures.  (Look in dispextern.h for the rest.)  */
+
+/* What we need to know about the current paragraph.  */
+struct bidi_paragraph_info {
+  int start_bytepos;   /* byte position where it begins */
+  int end_bytepos;     /* byte position where it ends */
+  int embedding_level; /* its basic embedding level */
+  bidi_dir_t base_dir; /* its base direction */
+};
+
+/* Data type for describing the bidirectional character categories.  */
+typedef enum {
+  UNKNOWN_BC,
+  NEUTRAL,
+  WEAK,
+  STRONG
+} bidi_category_t;
+
+int bidi_ignore_explicit_marks_for_paragraph_level = 1;
+
+static Lisp_Object fallback_paragraph_start_re, fallback_paragraph_separate_re;
+static Lisp_Object Qparagraph_start, Qparagraph_separate;
+
+static void
+bidi_initialize ()
+{
+  /* FIXME: This should come from the Unicode Database.  */
+  struct {
+    int from, to;
+    bidi_type_t type;
+  } bidi_type[] =
+      { { 0x0000, 0x0008, WEAK_BN },
+       { 0x0009, 0x0000, NEUTRAL_S },
+       { 0x000A, 0x0000, NEUTRAL_B },
+       { 0x000B, 0x0000, NEUTRAL_S },
+       { 0x000C, 0x0000, NEUTRAL_WS },
+       { 0x000D, 0x0000, NEUTRAL_B },
+       { 0x000E, 0x001B, WEAK_BN },
+       { 0x001C, 0x001E, NEUTRAL_B },
+       { 0x001F, 0x0000, NEUTRAL_S },
+       { 0x0020, 0x0000, NEUTRAL_WS },
+       { 0x0021, 0x0022, NEUTRAL_ON },
+       { 0x0023, 0x0025, WEAK_ET },
+       { 0x0026, 0x002A, NEUTRAL_ON },
+       { 0x002B, 0x0000, WEAK_ES },
+       { 0x002C, 0x0000, WEAK_CS },
+       { 0x002D, 0x0000, WEAK_ES },
+       { 0x002E, 0x002F, WEAK_CS },
+       { 0x0030, 0x0039, WEAK_EN },
+       { 0x003A, 0x0000, WEAK_CS },
+       { 0x003B, 0x0040, NEUTRAL_ON },
+       { 0x005B, 0x0060, NEUTRAL_ON },
+       { 0x007B, 0x007E, NEUTRAL_ON },
+       { 0x007F, 0x0084, WEAK_BN },
+       { 0x0085, 0x0000, NEUTRAL_B },
+       { 0x0086, 0x009F, WEAK_BN },
+       { 0x00A0, 0x0000, WEAK_CS },
+       { 0x00A1, 0x0000, NEUTRAL_ON },
+       { 0x00A2, 0x00A5, WEAK_ET },
+       { 0x00A6, 0x00A9, NEUTRAL_ON },
+       { 0x00AB, 0x00AC, NEUTRAL_ON },
+       { 0x00AD, 0x0000, WEAK_BN },
+       { 0x00AE, 0x00Af, NEUTRAL_ON },
+       { 0x00B0, 0x00B1, WEAK_ET },
+       { 0x00B2, 0x00B3, WEAK_EN },
+       { 0x00B4, 0x0000, NEUTRAL_ON },
+       { 0x00B6, 0x00B8, NEUTRAL_ON },
+       { 0x00B9, 0x0000, WEAK_EN },
+       { 0x00BB, 0x00BF, NEUTRAL_ON },
+       { 0x00D7, 0x0000, NEUTRAL_ON },
+       { 0x00F7, 0x0000, NEUTRAL_ON },
+       { 0x02B9, 0x02BA, NEUTRAL_ON },
+       { 0x02C2, 0x02CF, NEUTRAL_ON },
+       { 0x02D2, 0x02DF, NEUTRAL_ON },
+       { 0x02E5, 0x02ED, NEUTRAL_ON },
+       { 0x0300, 0x036F, WEAK_NSM },
+       { 0x0374, 0x0375, NEUTRAL_ON },
+       { 0x037E, 0x0385, NEUTRAL_ON },
+       { 0x0387, 0x0000, NEUTRAL_ON },
+       { 0x03F6, 0x0000, NEUTRAL_ON },
+       { 0x0483, 0x0489, WEAK_NSM },
+       { 0x058A, 0x0000, NEUTRAL_ON },
+       { 0x0591, 0x05BD, WEAK_NSM },
+       { 0x05BE, 0x0000, STRONG_R },
+       { 0x05BF, 0x0000, WEAK_NSM },
+       { 0x05C0, 0x0000, STRONG_R },
+       { 0x05C1, 0x05C2, WEAK_NSM },
+       { 0x05C3, 0x0000, STRONG_R },
+       { 0x05C4, 0x05C5, WEAK_NSM },
+       { 0x05C6, 0x0000, STRONG_R },
+       { 0x05C7, 0x0000, WEAK_NSM },
+       { 0x05D0, 0x05F4, STRONG_R },
+       { 0x060C, 0x0000, WEAK_CS },
+       { 0x061B, 0x064A, STRONG_AL },
+       { 0x064B, 0x0655, WEAK_NSM },
+       { 0x0660, 0x0669, WEAK_AN },
+       { 0x066A, 0x0000, WEAK_ET },
+       { 0x066B, 0x066C, WEAK_AN },
+       { 0x066D, 0x066F, STRONG_AL },
+       { 0x0670, 0x0000, WEAK_NSM },
+       { 0x0671, 0x06D5, STRONG_AL },
+       { 0x06D6, 0x06DC, WEAK_NSM },
+       { 0x06DD, 0x0000, STRONG_AL },
+       { 0x06DE, 0x06E4, WEAK_NSM },
+       { 0x06E5, 0x06E6, STRONG_AL },
+       { 0x06E7, 0x06E8, WEAK_NSM },
+       { 0x06E9, 0x0000, NEUTRAL_ON },
+       { 0x06EA, 0x06ED, WEAK_NSM },
+       { 0x06F0, 0x06F9, WEAK_EN },
+       { 0x06FA, 0x070D, STRONG_AL },
+       { 0x070F, 0x0000, WEAK_BN },
+       { 0x0710, 0x0000, STRONG_AL },
+       { 0x0711, 0x0000, WEAK_NSM },
+       { 0x0712, 0x072C, STRONG_AL },
+       { 0x0730, 0x074A, WEAK_NSM },
+       { 0x0780, 0x07A5, STRONG_AL },
+       { 0x07A6, 0x07B0, WEAK_NSM },
+       { 0x07B1, 0x0000, STRONG_AL },
+       { 0x0901, 0x0902, WEAK_NSM },
+       { 0x093C, 0x0000, WEAK_NSM },
+       { 0x0941, 0x0948, WEAK_NSM },
+       { 0x094D, 0x0000, WEAK_NSM },
+       { 0x0951, 0x0954, WEAK_NSM },
+       { 0x0962, 0x0963, WEAK_NSM },
+       { 0x0981, 0x0000, WEAK_NSM },
+       { 0x09BC, 0x0000, WEAK_NSM },
+       { 0x09C1, 0x09C4, WEAK_NSM },
+       { 0x09CD, 0x0000, WEAK_NSM },
+       { 0x09E2, 0x09E3, WEAK_NSM },
+       { 0x09F2, 0x09F3, WEAK_ET },
+       { 0x0A02, 0x0000, WEAK_NSM },
+       { 0x0A3C, 0x0000, WEAK_NSM },
+       { 0x0A41, 0x0A4D, WEAK_NSM },
+       { 0x0A70, 0x0A71, WEAK_NSM },
+       { 0x0A81, 0x0A82, WEAK_NSM },
+       { 0x0ABC, 0x0000, WEAK_NSM },
+       { 0x0AC1, 0x0AC8, WEAK_NSM },
+       { 0x0ACD, 0x0000, WEAK_NSM },
+       { 0x0B01, 0x0000, WEAK_NSM },
+       { 0x0B3C, 0x0000, WEAK_NSM },
+       { 0x0B3F, 0x0000, WEAK_NSM },
+       { 0x0B41, 0x0B43, WEAK_NSM },
+       { 0x0B4D, 0x0B56, WEAK_NSM },
+       { 0x0B82, 0x0000, WEAK_NSM },
+       { 0x0BC0, 0x0000, WEAK_NSM },
+       { 0x0BCD, 0x0000, WEAK_NSM },
+       { 0x0C3E, 0x0C40, WEAK_NSM },
+       { 0x0C46, 0x0C56, WEAK_NSM },
+       { 0x0CBF, 0x0000, WEAK_NSM },
+       { 0x0CC6, 0x0000, WEAK_NSM },
+       { 0x0CCC, 0x0CCD, WEAK_NSM },
+       { 0x0D41, 0x0D43, WEAK_NSM },
+       { 0x0D4D, 0x0000, WEAK_NSM },
+       { 0x0DCA, 0x0000, WEAK_NSM },
+       { 0x0DD2, 0x0DD6, WEAK_NSM },
+       { 0x0E31, 0x0000, WEAK_NSM },
+       { 0x0E34, 0x0E3A, WEAK_NSM },
+       { 0x0E3F, 0x0000, WEAK_ET },
+       { 0x0E47, 0x0E4E, WEAK_NSM },
+       { 0x0EB1, 0x0000, WEAK_NSM },
+       { 0x0EB4, 0x0EBC, WEAK_NSM },
+       { 0x0EC8, 0x0ECD, WEAK_NSM },
+       { 0x0F18, 0x0F19, WEAK_NSM },
+       { 0x0F35, 0x0000, WEAK_NSM },
+       { 0x0F37, 0x0000, WEAK_NSM },
+       { 0x0F39, 0x0000, WEAK_NSM },
+       { 0x0F3A, 0x0F3D, NEUTRAL_ON },
+       { 0x0F71, 0x0F7E, WEAK_NSM },
+       { 0x0F80, 0x0F84, WEAK_NSM },
+       { 0x0F86, 0x0F87, WEAK_NSM },
+       { 0x0F90, 0x0FBC, WEAK_NSM },
+       { 0x0FC6, 0x0000, WEAK_NSM },
+       { 0x102D, 0x1030, WEAK_NSM },
+       { 0x1032, 0x1037, WEAK_NSM },
+       { 0x1039, 0x0000, WEAK_NSM },
+       { 0x1058, 0x1059, WEAK_NSM },
+       { 0x1680, 0x0000, NEUTRAL_WS },
+       { 0x169B, 0x169C, NEUTRAL_ON },
+       { 0x1712, 0x1714, WEAK_NSM },
+       { 0x1732, 0x1734, WEAK_NSM },
+       { 0x1752, 0x1753, WEAK_NSM },
+       { 0x1772, 0x1773, WEAK_NSM },
+       { 0x17B7, 0x17BD, WEAK_NSM },
+       { 0x17C6, 0x0000, WEAK_NSM },
+       { 0x17C9, 0x17D3, WEAK_NSM },
+       { 0x17DB, 0x0000, WEAK_ET },
+       { 0x1800, 0x180A, NEUTRAL_ON },
+       { 0x180B, 0x180D, WEAK_NSM },
+       { 0x180E, 0x0000, WEAK_BN },
+       { 0x18A9, 0x0000, WEAK_NSM },
+       { 0x1FBD, 0x0000, NEUTRAL_ON },
+       { 0x1FBF, 0x1FC1, NEUTRAL_ON },
+       { 0x1FCD, 0x1FCF, NEUTRAL_ON },
+       { 0x1FDD, 0x1FDF, NEUTRAL_ON },
+       { 0x1FED, 0x1FEF, NEUTRAL_ON },
+       { 0x1FFD, 0x1FFE, NEUTRAL_ON },
+       { 0x2000, 0x200A, NEUTRAL_WS },
+       { 0x200B, 0x200D, WEAK_BN },
+       { 0x200F, 0x0000, STRONG_R },
+       { 0x2010, 0x2027, NEUTRAL_ON },
+       { 0x2028, 0x0000, NEUTRAL_WS },
+       { 0x2029, 0x0000, NEUTRAL_B },
+       { 0x202A, 0x0000, LRE },
+       { 0x202B, 0x0000, RLE },
+       { 0x202C, 0x0000, PDF },
+       { 0x202D, 0x0000, LRO },
+       { 0x202E, 0x0000, RLO },
+       { 0x202F, 0x0000, NEUTRAL_WS },
+       { 0x2030, 0x2034, WEAK_ET },
+       { 0x2035, 0x2057, NEUTRAL_ON },
+       { 0x205F, 0x0000, NEUTRAL_WS },
+       { 0x2060, 0x206F, WEAK_BN },
+       { 0x2070, 0x0000, WEAK_EN },
+       { 0x2074, 0x2079, WEAK_EN },
+       { 0x207A, 0x207B, WEAK_ET },
+       { 0x207C, 0x207E, NEUTRAL_ON },
+       { 0x2080, 0x2089, WEAK_EN },
+       { 0x208A, 0x208B, WEAK_ET },
+       { 0x208C, 0x208E, NEUTRAL_ON },
+       { 0x20A0, 0x20B1, WEAK_ET },
+       { 0x20D0, 0x20EA, WEAK_NSM },
+       { 0x2100, 0x2101, NEUTRAL_ON },
+       { 0x2103, 0x2106, NEUTRAL_ON },
+       { 0x2108, 0x2109, NEUTRAL_ON },
+       { 0x2114, 0x0000, NEUTRAL_ON },
+       { 0x2116, 0x2118, NEUTRAL_ON },
+       { 0x211E, 0x2123, NEUTRAL_ON },
+       { 0x2125, 0x0000, NEUTRAL_ON },
+       { 0x2127, 0x0000, NEUTRAL_ON },
+       { 0x2129, 0x0000, NEUTRAL_ON },
+       { 0x212E, 0x0000, WEAK_ET },
+       { 0x2132, 0x0000, NEUTRAL_ON },
+       { 0x213A, 0x0000, NEUTRAL_ON },
+       { 0x2140, 0x2144, NEUTRAL_ON },
+       { 0x214A, 0x215F, NEUTRAL_ON },
+       { 0x2190, 0x2211, NEUTRAL_ON },
+       { 0x2212, 0x2213, WEAK_ET },
+       { 0x2214, 0x2335, NEUTRAL_ON },
+       { 0x237B, 0x2394, NEUTRAL_ON },
+       { 0x2396, 0x244A, NEUTRAL_ON },
+       { 0x2460, 0x249B, WEAK_EN },
+       { 0x24EA, 0x0000, WEAK_EN },
+       { 0x24EB, 0x2FFB, NEUTRAL_ON },
+       { 0x3000, 0x0000, NEUTRAL_WS },
+       { 0x3001, 0x3004, NEUTRAL_ON },
+       { 0x3008, 0x3020, NEUTRAL_ON },
+       { 0x302A, 0x302F, WEAK_NSM },
+       { 0x3030, 0x0000, NEUTRAL_ON },
+       { 0x3036, 0x3037, NEUTRAL_ON },
+       { 0x303D, 0x303F, NEUTRAL_ON },
+       { 0x3099, 0x309A, WEAK_NSM },
+       { 0x309B, 0x309C, NEUTRAL_ON },
+       { 0x30A0, 0x0000, NEUTRAL_ON },
+       { 0x30FB, 0x0000, NEUTRAL_ON },
+       { 0x3251, 0x325F, NEUTRAL_ON },
+       { 0x32B1, 0x32BF, NEUTRAL_ON },
+       { 0xA490, 0xA4C6, NEUTRAL_ON },
+       { 0xFB1D, 0x0000, STRONG_R },
+       { 0xFB1E, 0x0000, WEAK_NSM },
+       { 0xFB1F, 0xFB28, STRONG_R },
+       { 0xFB29, 0x0000, WEAK_ET },
+       { 0xFB2A, 0xFB4F, STRONG_R },
+       { 0xFB50, 0xFD3D, STRONG_AL },
+       { 0xFD3E, 0xFD3F, NEUTRAL_ON },
+       { 0xFD50, 0xFDFC, STRONG_AL },
+       { 0xFE00, 0xFE23, WEAK_NSM },
+       { 0xFE30, 0xFE4F, NEUTRAL_ON },
+       { 0xFE50, 0x0000, WEAK_CS },
+       { 0xFE51, 0x0000, NEUTRAL_ON },
+       { 0xFE52, 0x0000, WEAK_CS },
+       { 0xFE54, 0x0000, NEUTRAL_ON },
+       { 0xFE55, 0x0000, WEAK_CS },
+       { 0xFE56, 0xFE5E, NEUTRAL_ON },
+       { 0xFE5F, 0x0000, WEAK_ET },
+       { 0xFE60, 0xFE61, NEUTRAL_ON },
+       { 0xFE62, 0xFE63, WEAK_ET },
+       { 0xFE64, 0xFE68, NEUTRAL_ON },
+       { 0xFE69, 0xFE6A, WEAK_ET },
+       { 0xFE6B, 0x0000, NEUTRAL_ON },
+       { 0xFE70, 0xFEFC, STRONG_AL },
+       { 0xFEFF, 0x0000, WEAK_BN },
+       { 0xFF01, 0xFF02, NEUTRAL_ON },
+       { 0xFF03, 0xFF05, WEAK_ET },
+       { 0xFF06, 0xFF0A, NEUTRAL_ON },
+       { 0xFF0B, 0x0000, WEAK_ET },
+       { 0xFF0C, 0x0000, WEAK_CS },
+       { 0xFF0D, 0x0000, WEAK_ET },
+       { 0xFF0E, 0x0000, WEAK_CS },
+       { 0xFF0F, 0x0000, WEAK_ES },
+       { 0xFF10, 0xFF19, WEAK_EN },
+       { 0xFF1A, 0x0000, WEAK_CS },
+       { 0xFF1B, 0xFF20, NEUTRAL_ON },
+       { 0xFF3B, 0xFF40, NEUTRAL_ON },
+       { 0xFF5B, 0xFF65, NEUTRAL_ON },
+       { 0xFFE0, 0xFFE1, WEAK_ET },
+       { 0xFFE2, 0xFFE4, NEUTRAL_ON },
+       { 0xFFE5, 0xFFE6, WEAK_ET },
+       { 0xFFE8, 0xFFEE, NEUTRAL_ON },
+       { 0xFFF9, 0xFFFB, WEAK_BN },
+       { 0xFFFC, 0xFFFD, NEUTRAL_ON },
+       { 0x1D167, 0x1D169, WEAK_NSM },
+       { 0x1D173, 0x1D17A, WEAK_BN },
+       { 0x1D17B, 0x1D182, WEAK_NSM },
+       { 0x1D185, 0x1D18B, WEAK_NSM },
+       { 0x1D1AA, 0x1D1AD, WEAK_NSM },
+       { 0x1D7CE, 0x1D7FF, WEAK_EN },
+       { 0xE0001, 0xE007F, WEAK_BN } };
+  int i;
+
+  bidi_type_table = Fmake_char_table (Qnil, make_number (STRONG_L));
+  staticpro (&bidi_type_table);
+
+  for (i = 0; i < sizeof bidi_type / sizeof bidi_type[0]; i++)
+    char_table_set_range (bidi_type_table, bidi_type[i].from,
+                         bidi_type[i].to ? bidi_type[i].to : bidi_type[i].from,
+                         make_number (bidi_type[i].type));
+
+  fallback_paragraph_start_re =
+    XSYMBOL (Fintern_soft (build_string ("paragraph-start"), Qnil))->value;
+  if (!STRINGP (fallback_paragraph_start_re))
+    fallback_paragraph_start_re = build_string ("\f\\|[ \t]*$");
+  staticpro (&fallback_paragraph_start_re);
+  Qparagraph_start = intern ("paragraph-start");
+  staticpro (&Qparagraph_start);
+  fallback_paragraph_separate_re =
+    XSYMBOL (Fintern_soft (build_string ("paragraph-separate"), Qnil))->value;
+  if (!STRINGP (fallback_paragraph_separate_re))
+    fallback_paragraph_separate_re = build_string ("[ \t\f]*$");
+  staticpro (&fallback_paragraph_separate_re);
+  Qparagraph_separate = intern ("paragraph-separate");
+  staticpro (&Qparagraph_separate);
+  bidi_initialized = 1;
+}
+
+/* Return the bidi type of a character CH, subject to the current
+   directional OVERRIDE.  */
+bidi_type_t
+bidi_get_type (int ch, bidi_dir_t override)
+{
+  bidi_type_t default_type;
+
+  if (ch == BIDI_EOB)
+    return NEUTRAL_B;
+  if (ch < 0 || ch > MAX_CHAR)
+    abort ();
+
+  default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
+
+  if (override == NEUTRAL_DIR)
+    return default_type;
+
+  switch (default_type)
+    {
+      /* Although UAX#9 does not tell, it doesn't make sense to
+        override NEUTRAL_B and LRM/RLM characters.  */
+      case NEUTRAL_B:
+      case LRE:
+      case LRO:
+      case RLE:
+      case RLO:
+      case PDF:
+       return default_type;
+      default:
+       switch (ch)
+         {
+           case LRM_CHAR:
+           case RLM_CHAR:
+             return default_type;
+           default:
+             if (override == L2R) /* X6 */
+               return STRONG_L;
+             else if (override == R2L)
+               return STRONG_R;
+             else
+               abort ();       /* can't happen: handled above */
+         }
+    }
+}
+
+void
+bidi_check_type (bidi_type_t type)
+{
+  if (type < UNKNOWN_BT || type > NEUTRAL_ON)
+    abort ();
+}
+
+/* Given a bidi TYPE of a character, return its category.  */
+bidi_category_t
+bidi_get_category (bidi_type_t type)
+{
+  switch (type)
+    {
+      case UNKNOWN_BT:
+       return UNKNOWN_BC;
+      case STRONG_L:
+      case STRONG_R:
+      case STRONG_AL:
+      case LRE:
+      case LRO:
+      case RLE:
+      case RLO:
+       return STRONG;
+      case PDF:                /* ??? really?? */
+      case WEAK_EN:
+      case WEAK_ES:
+      case WEAK_ET:
+      case WEAK_AN:
+      case WEAK_CS:
+      case WEAK_NSM:
+      case WEAK_BN:
+       return WEAK;
+      case NEUTRAL_B:
+      case NEUTRAL_S:
+      case NEUTRAL_WS:
+      case NEUTRAL_ON:
+       return NEUTRAL;
+      default:
+       abort ();
+    }
+}
+
+/* Return the mirrored character of C, if any.
+
+   Note: The conditions in UAX#9 clause L4 must be tested by the
+   caller.  */
+/* FIXME: exceedingly temporary!  Should consult the Unicode database
+   of character properties.  */
+int
+bidi_mirror_char (int c)
+{
+  static const char mirrored_pairs[] = "()<>[]{}";
+  const char *p = c > 0 && c < 128 ? strchr (mirrored_pairs, c) : NULL;
+
+  if (p)
+    {
+      size_t i = p - mirrored_pairs;
+
+      return mirrored_pairs [(i ^ 1)];
+    }
+  return c;
+}
+
+/* Copy the bidi iterator from FROM to TO.  To save cycles, this only
+   copies the part of the level stack that is actually in use.  */
+static inline void
+bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
+{
+  int i;
+
+  /* Copy everything except the level stack and beyond.  */
+  memcpy (to, from, ((size_t)&((struct bidi_it *)0)->level_stack[0]));
+
+  /* Copy the active part of the level stack.  */
+  to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
+  for (i = 1; i <= from->stack_idx; i++)
+    to->level_stack[i] = from->level_stack[i];
+}
+
+/* Caching the bidi iterator states.  */
+
+static struct bidi_it bidi_cache[1000]; /* FIXME: make this dynamically allocated! */
+static int bidi_cache_idx;
+static int bidi_cache_last_idx;
+
+static inline void
+bidi_cache_reset (void)
+{
+  bidi_cache_idx = 0;
+  bidi_cache_last_idx = -1;
+}
+
+static inline void
+bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it)
+{
+  int current_scan_dir = bidi_it->scan_dir;
+
+  if (idx < 0 || idx >= bidi_cache_idx)
+    abort ();
+
+  bidi_copy_it (bidi_it, &bidi_cache[idx]);
+  bidi_it->scan_dir = current_scan_dir;
+  bidi_cache_last_idx = idx;
+}
+
+/* Find a cached state with a given CHARPOS and resolved embedding
+   level less or equal to LEVEL.  if LEVEL is -1, disregard the
+   resolved levels in cached states.  DIR, if non-zero, means search
+   in that direction from the last cache hit.  */
+static inline int
+bidi_cache_search (int charpos, int level, int dir)
+{
+  int i, i_start;
+
+  if (bidi_cache_idx)
+    {
+      if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
+       dir = -1;
+      else if (charpos > bidi_cache[bidi_cache_last_idx].charpos)
+       dir = 1;
+      if (dir)
+       i_start = bidi_cache_last_idx;
+      else
+       {
+         dir = -1;
+         i_start = bidi_cache_idx - 1;
+       }
+
+      if (dir < 0)
+       {
+         /* Linear search for now; FIXME!  */
+         for (i = i_start; i >= 0; i--)
+           if (bidi_cache[i].charpos == charpos
+               && (level == -1 || bidi_cache[i].resolved_level <= level))
+             return i;
+       }
+      else
+       {
+         for (i = i_start; i < bidi_cache_idx; i++)
+           if (bidi_cache[i].charpos == charpos
+               && (level == -1 || bidi_cache[i].resolved_level <= level))
+             return i;
+       }
+    }
+
+  return -1;
+}
+
+/* Find a cached state where the resolved level changes to a value
+   that is lower than LEVEL, and return its cache slot index.  DIR is
+   the direction to search, starting with the last used cache slot.
+   BEFORE, if non-zero, means return the index of the slot that is
+   ``before'' the level change in the search direction.  That is,
+   given the cached levels like this:
+
+        1122333442211
+         AB        C
+
+   and assuming we are at the position cached at the slot marked with
+   C, searching backwards (DIR = -1) for LEVEL = 2 will return the
+   index of slot B or A, depending whether BEFORE is, respectively,
+   non-zero or zero.  */
+static int
+bidi_cache_find_level_change (int level, int dir, int before)
+{
+  if (bidi_cache_idx)
+    {
+      int i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
+      int incr = before ? 1 : 0;
+
+      if (!dir)
+       dir = -1;
+      else if (!incr)
+       i += dir;
+
+      if (dir < 0)
+       {
+         while (i >= incr)
+           {
+             if (bidi_cache[i - incr].resolved_level >= 0
+                 && bidi_cache[i - incr].resolved_level < level)
+               return i;
+             i--;
+           }
+       }
+      else
+       {
+         while (i < bidi_cache_idx - incr)
+           {
+             if (bidi_cache[i + incr].resolved_level >= 0
+                 && bidi_cache[i + incr].resolved_level < level)
+               return i;
+             i++;
+           }
+       }
+    }
+
+  return -1;
+}
+
+static inline void
+bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
+{
+  int idx;
+
+  /* We should never cache on backward scans.  */
+  if (bidi_it->scan_dir == -1)
+    abort ();
+  idx = bidi_cache_search (bidi_it->charpos, -1, 1);
+
+  if (idx < 0)
+    {
+      idx = bidi_cache_idx;
+      /* Don't overrun the cache limit.  */
+      if (idx > sizeof (bidi_cache) / sizeof (bidi_cache[0]) - 1)
+       abort ();
+      /* Don't violate cache integrity: character positions should
+        correspond to cache positions 1:1.  */
+      if (idx > 0 && bidi_it->charpos != bidi_cache[idx - 1].charpos + 1)
+       abort ();
+      bidi_copy_it (&bidi_cache[idx], bidi_it);
+      if (!resolved)
+       bidi_cache[idx].resolved_level = -1;
+      bidi_cache[idx].new_paragraph = 0;
+    }
+  else
+    {
+      /* Copy only the members which could have changed, to avoid
+        costly copying of the entire struct.  */
+      bidi_cache[idx].type = bidi_it->type;
+      bidi_check_type (bidi_it->type);
+      bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
+      bidi_check_type (bidi_it->type_after_w1);
+      if (resolved)
+       bidi_cache[idx].resolved_level = bidi_it->resolved_level;
+      else
+       bidi_cache[idx].resolved_level = -1;
+      bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
+      bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
+      bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
+      bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
+      bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
+    }
+
+  bidi_cache_last_idx = idx;
+  if (idx >= bidi_cache_idx)
+    bidi_cache_idx = idx + 1;
+}
+
+static inline bidi_type_t
+bidi_cache_find (int charpos, int level, struct bidi_it *bidi_it)
+{
+  int i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
+
+  if (i >= 0)
+    {
+      bidi_dir_t current_scan_dir = bidi_it->scan_dir;
+
+      bidi_copy_it (bidi_it, &bidi_cache[i]);
+      bidi_cache_last_idx = i;
+      /* Don't let scan direction from from the cached state override
+        the current scan direction.  */
+      bidi_it->scan_dir = current_scan_dir;
+      return bidi_it->type;
+    }
+
+  return UNKNOWN_BT;
+}
+
+static inline int
+bidi_peek_at_next_level (struct bidi_it *bidi_it)
+{
+  if (bidi_cache_idx == 0 || bidi_cache_last_idx == -1)
+    abort ();
+  return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
+}
+
+/* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
+   Value is the non-negative length of the paragraph separator
+   following the buffer position, -1 if position is at the beginning
+   of a new paragraph, or -2 if position is neither at beginning nor
+   at end of a paragraph.  */
+EMACS_INT
+bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
+{
+  Lisp_Object sep_re = Fbuffer_local_value (Qparagraph_separate,
+                                           Fcurrent_buffer ());
+  Lisp_Object start_re = Fbuffer_local_value (Qparagraph_start,
+                                             Fcurrent_buffer ());
+  EMACS_INT val;
+
+  if (!STRINGP (sep_re))
+    sep_re = fallback_paragraph_separate_re;
+  if (!STRINGP (start_re))
+    start_re = fallback_paragraph_start_re;
+
+  val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
+  if (val < 0)
+    {
+      if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
+       val = -1;
+      else
+       val = -2;
+    }
+
+  return val;
+}
+
+/* Determine the start-of-run (sor) directional type given the two
+   embedding levels on either side of the run boundary.  Also, update
+   the saved info about previously seen characters, since that info is
+   generally valid for a single level run.  */
+static inline void
+bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
+{
+  int higher_level = level_before > level_after ? level_before : level_after;
+
+  /* The prev_was_pdf gork is required for when we have several PDFs
+     in a row.  In that case, we want to compute the sor type for the
+     next level run only once: when we see the first PDF.  That's
+     because the sor type depends only on the higher of the two levels
+     that we find on the two sides of the level boundary (see UAX#9,
+     clause X10), and so we don't need to know the final embedding
+     level to which we descend after processing all the PDFs.  */
+  if (!bidi_it->prev_was_pdf || level_before < level_after)
+    /* FIXME: should the default sor direction be user selectable?  */
+    bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
+  if (level_before > level_after)
+    bidi_it->prev_was_pdf = 1;
+
+  bidi_it->prev.type = UNKNOWN_BT;
+  bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
+    bidi_it->last_strong.orig_type = UNKNOWN_BT;
+  bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L;
+  bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
+  bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
+  bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 =
+    bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
+  bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */
+}
+
+static void
+bidi_line_init (struct bidi_it *bidi_it)
+{
+  bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
+  bidi_it->resolved_level = bidi_it->level_stack[0].level;
+  bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
+  bidi_it->invalid_levels = 0;
+  bidi_it->invalid_rl_levels = -1;
+  bidi_it->next_en_pos = -1;
+  bidi_it->next_for_ws.type = UNKNOWN_BT;
+  bidi_set_sor_type (bidi_it,
+                    bidi_it->paragraph_dir == R2L ? 1 : 0,
+                    bidi_it->level_stack[0].level); /* X10 */
+
+  bidi_cache_reset ();
+}
+
+/* Find the beginning of this paragraph by looking back in the buffer.
+   Value is the byte position of the paragraph's beginning.  */
+static EMACS_INT
+bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
+{
+  Lisp_Object re = Fbuffer_local_value (Qparagraph_start, Fcurrent_buffer ());
+  EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
+
+  if (!STRINGP (re))
+    re = fallback_paragraph_start_re;
+  while (pos_byte > BEGV_BYTE
+        && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
+    {
+      pos = find_next_newline_no_quit (pos - 1, -1);
+      pos_byte = CHAR_TO_BYTE (pos);
+    }
+  return pos_byte;
+}
+
+/* Determine the direction, a.k.a. base embedding level, of the
+   paragraph we are about to iterate through.  If DIR is either L2R or
+   R2L, just use that.  Otherwise, determine the paragraph direction
+   from the first strong character of the paragraph.
+
+   Note that this gives the paragraph separator the same direction as
+   the preceding paragraph, even though Emacs generally views the
+   separartor as not belonging to any paragraph.  */
+void
+bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it)
+{
+  EMACS_INT bytepos = bidi_it->bytepos;
+
+  /* Special case for an empty buffer. */
+  if (bytepos == BEGV_BYTE && bytepos == ZV_BYTE)
+    dir = L2R;
+  /* We should never be called at EOB or before BEGV.  */
+  else if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE)
+    abort ();
+
+  if (dir == L2R)
+    {
+      bidi_it->paragraph_dir = L2R;
+      bidi_it->new_paragraph = 0;
+    }
+  else if (dir == R2L)
+    {
+      bidi_it->paragraph_dir = R2L;
+      bidi_it->new_paragraph = 0;
+    }
+  else if (dir == NEUTRAL_DIR) /* P2 */
+    {
+      int ch, ch_len;
+      EMACS_INT pos;
+      bidi_type_t type;
+      EMACS_INT sep_len;
+
+      /* If we are inside a paragraph separator, we are just waiting
+        for the separator to be exhausted; use the previous paragraph
+        direction.  But don't do that if we have been just reseated,
+        because we need to reinitialize below in that case.  */
+      if (!bidi_it->first_elt
+         && bidi_it->charpos < bidi_it->separator_limit)
+       return;
+
+      /* If we are on a newline, get past it to where the next
+        paragraph might start.  But don't do that at BEGV since then
+        we are potentially in a new paragraph that doesn't yet
+        exist.  */
+      pos = bidi_it->charpos;
+      if (bytepos > BEGV_BYTE && FETCH_CHAR (bytepos) == '\n')
+       {
+         bytepos++;
+         pos++;
+       }
+
+      /* We are either at the beginning of a paragraph or in the
+        middle of it.  Find where this paragraph starts.  */
+      bytepos = bidi_find_paragraph_start (pos, bytepos);
+
+      /* We should always be at the beginning of a new line at this
+        point.  */
+      if (!(bytepos == BEGV_BYTE || FETCH_CHAR (bytepos - 1) == '\n'))
+       abort ();
+
+      bidi_it->separator_limit = -1;
+      bidi_it->new_paragraph = 0;
+      ch = FETCH_CHAR (bytepos);
+      ch_len = CHAR_BYTES (ch);
+      pos = BYTE_TO_CHAR (bytepos);
+      type = bidi_get_type (ch, NEUTRAL_DIR);
+
+      for (pos++, bytepos += ch_len;
+          /* NOTE: UAX#9 says to search only for L, AL, or R types of
+             characters, and ignore RLE, RLO, LRE, and LRO.  However,
+             I'm not sure it makes sense to omit those 4; should try
+             with and without that to see the effect.  */
+          (bidi_get_category (type) != STRONG)
+            || (bidi_ignore_explicit_marks_for_paragraph_level
+                && (type == RLE || type == RLO
+                    || type == LRE || type == LRO));
+          type = bidi_get_type (ch, NEUTRAL_DIR))
+       {
+         if (type == NEUTRAL_B && bidi_at_paragraph_end (pos, bytepos) >= -1)
+           break;
+         if (bytepos >= ZV_BYTE)
+           {
+             /* Pretend there's a paragraph separator at end of buffer.  */
+             type = NEUTRAL_B;
+             break;
+           }
+         FETCH_CHAR_ADVANCE (ch, pos, bytepos);
+       }
+      if (type == STRONG_R || type == STRONG_AL) /* P3 */
+       bidi_it->paragraph_dir = R2L;
+      else if (type == STRONG_L)
+       bidi_it->paragraph_dir = L2R;
+    }
+  else
+    abort ();
+
+  /* Contrary to UAX#9 clause P3, we only default the paragraph
+     direction to L2R if we have no previous usable paragraph
+     direction.  */
+  if (bidi_it->paragraph_dir == NEUTRAL_DIR)
+    bidi_it->paragraph_dir = L2R; /* P3 and ``higher protocols'' */
+  if (bidi_it->paragraph_dir == R2L)
+    bidi_it->level_stack[0].level = 1;
+  else
+    bidi_it->level_stack[0].level = 0;
+
+  bidi_line_init (bidi_it);
+}
+
+/* Do whatever UAX#9 clause X8 says should be done at paragraph's
+   end.  */
+static inline void
+bidi_set_paragraph_end (struct bidi_it *bidi_it)
+{
+  bidi_it->invalid_levels = 0;
+  bidi_it->invalid_rl_levels = -1;
+  bidi_it->stack_idx = 0;
+  bidi_it->resolved_level = bidi_it->level_stack[0].level;
+}
+
+/* Initialize the bidi iterator from buffer position CHARPOS.  */
+void
+bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, struct bidi_it *bidi_it)
+{
+  if (! bidi_initialized)
+    bidi_initialize ();
+  bidi_it->charpos = charpos;
+  bidi_it->bytepos = bytepos;
+  bidi_it->first_elt = 1;
+  bidi_set_paragraph_end (bidi_it);
+  bidi_it->new_paragraph = 1;
+  bidi_it->separator_limit = -1;
+  bidi_it->type = NEUTRAL_B;
+  bidi_it->type_after_w1 = UNKNOWN_BT;
+  bidi_it->orig_type = UNKNOWN_BT;
+  bidi_it->prev_was_pdf = 0;
+  bidi_it->prev.type = bidi_it->prev.type_after_w1 = UNKNOWN_BT;
+  bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
+    bidi_it->last_strong.orig_type = UNKNOWN_BT;
+  bidi_it->next_for_neutral.charpos = -1;
+  bidi_it->next_for_neutral.type =
+    bidi_it->next_for_neutral.type_after_w1 =
+    bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
+  bidi_it->prev_for_neutral.charpos = -1;
+  bidi_it->prev_for_neutral.type =
+    bidi_it->prev_for_neutral.type_after_w1 =
+    bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
+  bidi_it->sor = L2R;   /* FIXME: should it be user-selectable? */
+}
+
+/* Push the current embedding level and override status; reset the
+   current level to LEVEL and the current override status to OVERRIDE.  */
+static inline void
+bidi_push_embedding_level (struct bidi_it *bidi_it,
+                          int level, bidi_dir_t override)
+{
+  bidi_it->stack_idx++;
+  if (bidi_it->stack_idx >= BIDI_MAXLEVEL)
+    abort ();
+  bidi_it->level_stack[bidi_it->stack_idx].level = level;
+  bidi_it->level_stack[bidi_it->stack_idx].override = override;
+}
+
+/* Pop the embedding level and directional override status from the
+   stack, and return the new level.  */
+static inline int
+bidi_pop_embedding_level (struct bidi_it *bidi_it)
+{
+  /* UAX#9 says to ignore invalid PDFs.  */
+  if (bidi_it->stack_idx > 0)
+    bidi_it->stack_idx--;
+  return bidi_it->level_stack[bidi_it->stack_idx].level;
+}
+
+/* Record in SAVED_INFO the information about the current character.  */
+static inline void
+bidi_remember_char (struct bidi_saved_info *saved_info,
+                   struct bidi_it *bidi_it)
+{
+  saved_info->charpos = bidi_it->charpos;
+  saved_info->bytepos = bidi_it->bytepos;
+  saved_info->type = bidi_it->type;
+  bidi_check_type (bidi_it->type);
+  saved_info->type_after_w1 = bidi_it->type_after_w1;
+  bidi_check_type (bidi_it->type_after_w1);
+  saved_info->orig_type = bidi_it->orig_type;
+  bidi_check_type (bidi_it->orig_type);
+}
+
+/* Resolve the type of a neutral character according to the type of
+   surrounding strong text and the current embedding level.  */
+static inline bidi_type_t
+bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
+{
+  /* N1: European and Arabic numbers are treated as though they were R.  */
+  if (next_type == WEAK_EN || next_type == WEAK_AN)
+    next_type = STRONG_R;
+  if (prev_type == WEAK_EN || prev_type == WEAK_AN)
+    prev_type = STRONG_R;
+
+  if (next_type == prev_type)  /* N1 */
+    return next_type;
+  else if ((lev & 1) == 0)     /* N2 */
+    return STRONG_L;
+  else
+    return STRONG_R;
+}
+
+static inline int
+bidi_explicit_dir_char (int c)
+{
+  /* FIXME: this should be replaced with a lookup table with suitable
+     bits set, like standard C ctype macros do.  */
+  return (c == LRE_CHAR || c == LRO_CHAR
+         || c == RLE_CHAR || c == RLO_CHAR || c == PDF_CHAR);
+}
+
+/* A helper function for bidi_resolve_explicit.  It advances to the
+   next character in logical order and determines the new embedding
+   level and directional override, but does not take into account
+   empty embeddings.  */
+static int
+bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
+{
+  int curchar;
+  bidi_type_t type;
+  int current_level;
+  int new_level;
+  bidi_dir_t override;
+
+  if (bidi_it->bytepos < BEGV_BYTE     /* after reseat to BEGV? */
+      || bidi_it->first_elt)
+    {
+      bidi_it->first_elt = 0;
+      if (bidi_it->charpos < BEGV)
+       bidi_it->charpos = BEGV;
+      bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
+    }
+  else if (bidi_it->bytepos < ZV_BYTE) /* don't move at ZV */
+    {
+      bidi_it->charpos++;
+      if (bidi_it->ch_len == 0)
+       abort ();
+      bidi_it->bytepos += bidi_it->ch_len;
+    }
+
+  current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
+  override = bidi_it->level_stack[bidi_it->stack_idx].override;
+  new_level = current_level;
+
+  /* in case it is a unibyte character (not yet implemented) */
+  /* _fetch_multibyte_char_len = 1; */
+  if (bidi_it->bytepos >= ZV_BYTE)
+    {
+      curchar = BIDI_EOB;
+      bidi_it->ch_len = 1;
+    }
+  else
+    {
+      curchar = FETCH_CHAR (bidi_it->bytepos);
+      bidi_it->ch_len = CHAR_BYTES (curchar);
+    }
+  bidi_it->ch = curchar;
+
+  /* Don't apply directional override here, as all the types we handle
+     below will not be affected by the override anyway, and we need
+     the original type unaltered.  The override will be applied in
+     bidi_resolve_weak.  */
+  type = bidi_get_type (curchar, NEUTRAL_DIR);
+  bidi_it->orig_type = type;
+  bidi_check_type (bidi_it->orig_type);
+
+  if (type != PDF)
+    bidi_it->prev_was_pdf = 0;
+
+  bidi_it->type_after_w1 = UNKNOWN_BT;
+
+  switch (type)
+    {
+      case RLE:        /* X2 */
+      case RLO:        /* X4 */
+       bidi_it->type_after_w1 = type;
+       bidi_check_type (bidi_it->type_after_w1);
+       type = WEAK_BN; /* X9/Retaining */
+       if (bidi_it->ignore_bn_limit <= 0)
+         {
+           if (current_level <= BIDI_MAXLEVEL - 4)
+             {
+               /* Compute the least odd embedding level greater than
+                  the current level.  */
+               new_level = ((current_level + 1) & ~1) + 1;
+               if (bidi_it->type_after_w1 == RLE)
+                 override = NEUTRAL_DIR;
+               else
+                 override = R2L;
+               if (current_level == BIDI_MAXLEVEL - 4)
+                 bidi_it->invalid_rl_levels = 0;
+               bidi_push_embedding_level (bidi_it, new_level, override);
+             }
+           else
+             {
+               bidi_it->invalid_levels++;
+               /* See the commentary about invalid_rl_levels below.  */
+               if (bidi_it->invalid_rl_levels < 0)
+                 bidi_it->invalid_rl_levels = 0;
+               bidi_it->invalid_rl_levels++;
+             }
+         }
+       else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
+                || bidi_it->next_en_pos > bidi_it->charpos)
+         type = WEAK_EN;
+       break;
+      case LRE:        /* X3 */
+      case LRO:        /* X5 */
+       bidi_it->type_after_w1 = type;
+       bidi_check_type (bidi_it->type_after_w1);
+       type = WEAK_BN; /* X9/Retaining */
+       if (bidi_it->ignore_bn_limit <= 0)
+         {
+           if (current_level <= BIDI_MAXLEVEL - 5)
+             {
+               /* Compute the least even embedding level greater than
+                  the current level.  */
+               new_level = ((current_level + 2) & ~1);
+               if (bidi_it->type_after_w1 == LRE)
+                 override = NEUTRAL_DIR;
+               else
+                 override = L2R;
+               bidi_push_embedding_level (bidi_it, new_level, override);
+             }
+           else
+             {
+               bidi_it->invalid_levels++;
+               /* invalid_rl_levels counts invalid levels encountered
+                  while the embedding level was already too high for
+                  LRE/LRO, but not for RLE/RLO.  That is because
+                  there may be exactly one PDF which we should not
+                  ignore even though invalid_levels is non-zero.
+                  invalid_rl_levels helps to know what PDF is
+                  that.  */
+               if (bidi_it->invalid_rl_levels >= 0)
+                 bidi_it->invalid_rl_levels++;
+             }
+         }
+       else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
+                || bidi_it->next_en_pos > bidi_it->charpos)
+         type = WEAK_EN;
+       break;
+      case PDF:        /* X7 */
+       bidi_it->type_after_w1 = type;
+       bidi_check_type (bidi_it->type_after_w1);
+       type = WEAK_BN; /* X9/Retaining */
+       if (bidi_it->ignore_bn_limit <= 0)
+         {
+           if (!bidi_it->invalid_rl_levels)
+             {
+               new_level = bidi_pop_embedding_level (bidi_it);
+               bidi_it->invalid_rl_levels = -1;
+               if (bidi_it->invalid_levels)
+                 bidi_it->invalid_levels--;
+               /* else nothing: UAX#9 says to ignore invalid PDFs */
+             }
+           if (!bidi_it->invalid_levels)
+             new_level = bidi_pop_embedding_level (bidi_it);
+           else
+             {
+               bidi_it->invalid_levels--;
+               bidi_it->invalid_rl_levels--;
+             }
+         }
+       else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
+                || bidi_it->next_en_pos > bidi_it->charpos)
+         type = WEAK_EN;
+       break;
+      default:
+       /* Nothing.  */
+       break;
+    }
+
+  bidi_it->type = type;
+  bidi_check_type (bidi_it->type);
+
+  return new_level;
+}
+
+/* Given an iterator state in BIDI_IT, advance one character position
+   in the buffer to the next character (in the logical order), resolve
+   any explicit embeddings and directional overrides, and return the
+   embedding level of the character after resolving explicit
+   directives and ignoring empty embeddings.  */
+static int
+bidi_resolve_explicit (struct bidi_it *bidi_it)
+{
+  int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
+  int new_level  = bidi_resolve_explicit_1 (bidi_it);
+
+  if (prev_level < new_level
+      && bidi_it->type == WEAK_BN
+      && bidi_it->ignore_bn_limit == 0 /* only if not already known */
+      && bidi_it->ch != BIDI_EOB       /* not already at EOB */
+      && bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
+                                            + bidi_it->ch_len)))
+    {
+      /* Avoid pushing and popping embedding levels if the level run
+        is empty, as this breaks level runs where it shouldn't.
+        UAX#9 removes all the explicit embedding and override codes,
+        so empty embeddings disappear without a trace.  We need to
+        behave as if we did the same.  */
+      struct bidi_it saved_it;
+      int level = prev_level;
+
+      bidi_copy_it (&saved_it, bidi_it);
+
+      while (bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
+                                                + bidi_it->ch_len)))
+       {
+         level = bidi_resolve_explicit_1 (bidi_it);
+       }
+
+      if (level == prev_level) /* empty embedding */
+       saved_it.ignore_bn_limit = bidi_it->charpos + 1;
+      else                     /* this embedding is non-empty */
+       saved_it.ignore_bn_limit = -1;
+
+      bidi_copy_it (bidi_it, &saved_it);
+      if (bidi_it->ignore_bn_limit > 0)
+       {
+         /* We pushed a level, but we shouldn't have.  Undo that. */
+         if (!bidi_it->invalid_rl_levels)
+           {
+             new_level = bidi_pop_embedding_level (bidi_it);
+             bidi_it->invalid_rl_levels = -1;
+             if (bidi_it->invalid_levels)
+               bidi_it->invalid_levels--;
+           }
+         if (!bidi_it->invalid_levels)
+           new_level = bidi_pop_embedding_level (bidi_it);
+         else
+           {
+             bidi_it->invalid_levels--;
+             bidi_it->invalid_rl_levels--;
+           }
+       }
+    }
+
+  if (bidi_it->type == NEUTRAL_B)      /* X8 */
+    {
+      bidi_set_paragraph_end (bidi_it);
+      /* This is needed by bidi_resolve_weak below, and in L1.  */
+      bidi_it->type_after_w1 = bidi_it->type;
+      bidi_check_type (bidi_it->type_after_w1);
+    }
+
+  return new_level;
+}
+
+/* Advance in the buffer, resolve weak types and return the type of
+   the next character after weak type resolution.  */
+bidi_type_t
+bidi_resolve_weak (struct bidi_it *bidi_it)
+{
+  bidi_type_t type;
+  bidi_dir_t override;
+  int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
+  int new_level  = bidi_resolve_explicit (bidi_it);
+  int next_char;
+  bidi_type_t type_of_next;
+  struct bidi_it saved_it;
+
+  type = bidi_it->type;
+  override = bidi_it->level_stack[bidi_it->stack_idx].override;
+
+  if (type == UNKNOWN_BT
+      || type == LRE
+      || type == LRO
+      || type == RLE
+      || type == RLO
+      || type == PDF)
+    abort ();
+
+  if (new_level != prev_level
+      || bidi_it->type == NEUTRAL_B)
+    {
+      /* We've got a new embedding level run, compute the directional
+         type of sor and initialize per-run variables (UAX#9, clause
+         X10).  */
+      bidi_set_sor_type (bidi_it, prev_level, new_level);
+    }
+  else if (type == NEUTRAL_S || type == NEUTRAL_WS
+          || type == WEAK_BN || type == STRONG_AL)
+    bidi_it->type_after_w1 = type;     /* needed in L1 */
+  bidi_check_type (bidi_it->type_after_w1);
+
+  /* Level and directional override status are already recorded in
+     bidi_it, and do not need any change; see X6.  */
+  if (override == R2L)         /* X6 */
+    type = STRONG_R;
+  else if (override == L2R)
+    type = STRONG_L;
+  else
+    {
+      if (type == WEAK_NSM)    /* W1 */
+       {
+         /* Note that we don't need to consider the case where the
+            prev character has its type overridden by an RLO or LRO:
+            such characters are outside the current level run, and
+            thus not relevant to this NSM.  Thus, NSM gets the
+            orig_type of the previous character.  */
+         if (bidi_it->prev.type != UNKNOWN_BT)
+           type = bidi_it->prev.orig_type;
+         else if (bidi_it->sor == R2L)
+           type = STRONG_R;
+         else if (bidi_it->sor == L2R)
+           type = STRONG_L;
+         else /* shouldn't happen! */
+           abort ();
+       }
+      if (type == WEAK_EN      /* W2 */
+         && bidi_it->last_strong.type_after_w1 == STRONG_AL)
+       type = WEAK_AN;
+      else if (type == STRONG_AL) /* W3 */
+       type = STRONG_R;
+      else if ((type == WEAK_ES        /* W4 */
+               && bidi_it->prev.type_after_w1 == WEAK_EN
+               && bidi_it->prev.orig_type == WEAK_EN)
+              || (type == WEAK_CS
+                  && ((bidi_it->prev.type_after_w1 == WEAK_EN
+                       && bidi_it->prev.orig_type == WEAK_EN)
+                      || bidi_it->prev.type_after_w1 == WEAK_AN)))
+       {
+         next_char =
+           bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
+           ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
+         type_of_next = bidi_get_type (next_char, override);
+
+         if (type_of_next == WEAK_BN
+             || bidi_explicit_dir_char (next_char))
+           {
+             bidi_copy_it (&saved_it, bidi_it);
+             while (bidi_resolve_explicit (bidi_it) == new_level
+                    && bidi_it->type == WEAK_BN)
+               ;
+             type_of_next = bidi_it->type;
+             bidi_copy_it (bidi_it, &saved_it);
+           }
+
+         /* If the next character is EN, but the last strong-type
+            character is AL, that next EN will be changed to AN when
+            we process it in W2 above.  So in that case, this ES
+            should not be changed into EN.  */
+         if (type == WEAK_ES
+             && type_of_next == WEAK_EN
+             && bidi_it->last_strong.type_after_w1 != STRONG_AL)
+           type = WEAK_EN;
+         else if (type == WEAK_CS)
+           {
+             if (bidi_it->prev.type_after_w1 == WEAK_AN
+                 && (type_of_next == WEAK_AN
+                     /* If the next character is EN, but the last
+                        strong-type character is AL, EN will be later
+                        changed to AN when we process it in W2 above.
+                        So in that case, this ES should not be
+                        changed into EN.  */
+                     || (type_of_next == WEAK_EN
+                         && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
+               type = WEAK_AN;
+             else if (bidi_it->prev.type_after_w1 == WEAK_EN
+                      && type_of_next == WEAK_EN
+                      && bidi_it->last_strong.type_after_w1 != STRONG_AL)
+               type = WEAK_EN;
+           }
+       }
+      else if (type == WEAK_ET /* W5: ET with EN before or after it */
+              || type == WEAK_BN)      /* W5/Retaining */
+       {
+         if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */
+             || bidi_it->next_en_pos > bidi_it->charpos)
+           type = WEAK_EN;
+         else                  /* W5: ET/BN with EN after it.  */
+           {
+             EMACS_INT en_pos = bidi_it->charpos + 1;
+
+             next_char =
+               bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
+               ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
+             type_of_next = bidi_get_type (next_char, override);
+
+             if (type_of_next == WEAK_ET
+                 || type_of_next == WEAK_BN
+                 || bidi_explicit_dir_char (next_char))
+               {
+                 bidi_copy_it (&saved_it, bidi_it);
+                 while (bidi_resolve_explicit (bidi_it) == new_level
+                        && (bidi_it->type == WEAK_BN
+                            || bidi_it->type == WEAK_ET))
+                   ;
+                 type_of_next = bidi_it->type;
+                 en_pos = bidi_it->charpos;
+                 bidi_copy_it (bidi_it, &saved_it);
+               }
+             if (type_of_next == WEAK_EN)
+               {
+                 /* If the last strong character is AL, the EN we've
+                    found will become AN when we get to it (W2). */
+                 if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
+                   {
+                     type = WEAK_EN;
+                     /* Remember this EN position, to speed up processing
+                        of the next ETs.  */
+                     bidi_it->next_en_pos = en_pos;
+                   }
+                 else if (type == WEAK_BN)
+                   type = NEUTRAL_ON; /* W6/Retaining */
+               }
+           }
+       }
+    }
+
+  if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
+      || (type == WEAK_BN
+         && (bidi_it->prev.type_after_w1 == WEAK_CS        /* W6/Retaining */
+             || bidi_it->prev.type_after_w1 == WEAK_ES
+             || bidi_it->prev.type_after_w1 == WEAK_ET)))
+    type = NEUTRAL_ON;
+
+  /* Store the type we've got so far, before we clobber it with strong
+     types in W7 and while resolving neutral types.  But leave alone
+     the original types that were recorded above, because we will need
+     them for the L1 clause.  */
+  if (bidi_it->type_after_w1 == UNKNOWN_BT)
+    bidi_it->type_after_w1 = type;
+  bidi_check_type (bidi_it->type_after_w1);
+
+  if (type == WEAK_EN) /* W7 */
+    {
+      if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
+         || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
+       type = STRONG_L;
+    }
+
+  bidi_it->type = type;
+  bidi_check_type (bidi_it->type);
+  return type;
+}
+
+bidi_type_t
+bidi_resolve_neutral (struct bidi_it *bidi_it)
+{
+  int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
+  bidi_type_t type = bidi_resolve_weak (bidi_it);
+  int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
+
+  if (!(type == STRONG_R
+       || type == STRONG_L
+       || type == WEAK_BN
+       || type == WEAK_EN
+       || type == WEAK_AN
+       || type == NEUTRAL_B
+       || type == NEUTRAL_S
+       || type == NEUTRAL_WS
+       || type == NEUTRAL_ON))
+    abort ();
+
+  if (bidi_get_category (type) == NEUTRAL
+      || (type == WEAK_BN && prev_level == current_level))
+    {
+      if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
+       type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
+                                      bidi_it->next_for_neutral.type,
+                                      current_level);
+      else
+       {
+         /* Arrrgh!!  The UAX#9 algorithm is too deeply entrenched in
+            the assumption of batch-style processing; see clauses W4,
+            W5, and especially N1, which require to look far forward
+            (as well as back) in the buffer.  May the fleas of a
+            thousand camels infest the armpits of those who design
+            supposedly general-purpose algorithms by looking at their
+            own implementations, and fail to consider other possible
+            implementations!  */
+         struct bidi_it saved_it;
+         bidi_type_t next_type;
+
+         if (bidi_it->scan_dir == -1)
+           abort ();
+
+         bidi_copy_it (&saved_it, bidi_it);
+         /* Scan the text forward until we find the first non-neutral
+            character, and then use that to resolve the neutral we
+            are dealing with now.  We also cache the scanned iterator
+            states, to salvage some of the effort later.  */
+         bidi_cache_iterator_state (bidi_it, 0);
+         do {
+           /* Record the info about the previous character, so that
+              it will be cached below with this state.  */
+           if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
+               && bidi_it->type != WEAK_BN)
+             bidi_remember_char (&bidi_it->prev, bidi_it);
+           type = bidi_resolve_weak (bidi_it);
+           /* Paragraph separators have their levels fully resolved
+              at this point, so cache them as resolved.  */
+           bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
+           /* FIXME: implement L1 here, by testing for a newline and
+              resetting the level for any sequence of whitespace
+              characters adjacent to it.  */
+         } while (!(type == NEUTRAL_B
+                    || (type != WEAK_BN
+                        && bidi_get_category (type) != NEUTRAL)
+                    /* This is all per level run, so stop when we
+                       reach the end of this level run.  */
+                    || bidi_it->level_stack[bidi_it->stack_idx].level !=
+                    current_level));
+
+         bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
+
+         switch (type)
+           {
+             case STRONG_L:
+             case STRONG_R:
+             case STRONG_AL:
+               next_type = type;
+               break;
+             case WEAK_EN:
+             case WEAK_AN:
+               /* N1: ``European and Arabic numbers are treated as
+                  though they were R.''  */
+               next_type = STRONG_R;
+               saved_it.next_for_neutral.type = STRONG_R;
+               break;
+             case WEAK_BN:
+               if (!bidi_explicit_dir_char (bidi_it->ch))
+                 abort ();             /* can't happen: BNs are skipped */
+               /* FALLTHROUGH */
+             case NEUTRAL_B:
+               /* Marched all the way to the end of this level run.
+                  We need to use the eor type, whose information is
+                  stored by bidi_set_sor_type in the prev_for_neutral
+                  member.  */
+               if (saved_it.type != WEAK_BN
+                   || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
+                 {
+                   next_type = bidi_it->prev_for_neutral.type;
+                   saved_it.next_for_neutral.type = next_type;
+                   bidi_check_type (next_type);
+                 }
+               else
+                 {
+                   /* This is a BN which does not adjoin neutrals.
+                      Leave its type alone.  */
+                   bidi_copy_it (bidi_it, &saved_it);
+                   return bidi_it->type;
+                 }
+               break;
+             default:
+               abort ();
+           }
+         type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
+                                        next_type, current_level);
+         saved_it.type = type;
+         bidi_check_type (type);
+         bidi_copy_it (bidi_it, &saved_it);
+       }
+    }
+  return type;
+}
+
+/* Given an iterator state in BIDI_IT, advance one character position
+   in the buffer to the next character (in the logical order), resolve
+   the bidi type of that next character, and return that type.  */
+bidi_type_t
+bidi_type_of_next_char (struct bidi_it *bidi_it)
+{
+  bidi_type_t type;
+
+  /* This should always be called during a forward scan.  */
+  if (bidi_it->scan_dir != 1)
+    abort ();
+
+  /* Reset the limit until which to ignore BNs if we step out of the
+     area where we found only empty levels.  */
+  if ((bidi_it->ignore_bn_limit > 0
+       && bidi_it->ignore_bn_limit <= bidi_it->charpos)
+      || (bidi_it->ignore_bn_limit == -1
+         && !bidi_explicit_dir_char (bidi_it->ch)))
+    bidi_it->ignore_bn_limit = 0;
+
+  type = bidi_resolve_neutral (bidi_it);
+
+  return type;
+}
+
+/* Given an iterator state BIDI_IT, advance one character position in
+   the buffer to the next character (in the logical order), resolve
+   the embedding and implicit levels of that next character, and
+   return the resulting level.  */
+int
+bidi_level_of_next_char (struct bidi_it *bidi_it)
+{
+  bidi_type_t type;
+  int level, prev_level = -1;
+  struct bidi_saved_info next_for_neutral;
+
+  if (bidi_it->scan_dir == 1)
+    {
+      /* There's no sense in trying to advance if we hit end of text.  */
+      if (bidi_it->ch == BIDI_EOB)
+       return bidi_it->resolved_level;
+
+      /* Record the info about the previous character.  */
+      if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
+         && bidi_it->type != WEAK_BN)
+       bidi_remember_char (&bidi_it->prev, bidi_it);
+      if (bidi_it->type_after_w1 == STRONG_R
+         || bidi_it->type_after_w1 == STRONG_L
+         || bidi_it->type_after_w1 == STRONG_AL)
+       bidi_remember_char (&bidi_it->last_strong, bidi_it);
+      /* FIXME: it sounds like we don't need both prev and
+        prev_for_neutral members, but I'm leaving them both for now.  */
+      if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
+         || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
+       bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
+
+      /* If we overstepped the characters used for resolving neutrals
+        and whitespace, invalidate their info in the iterator.  */
+      if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
+       bidi_it->next_for_neutral.type = UNKNOWN_BT;
+      if (bidi_it->next_en_pos >= 0
+         && bidi_it->charpos >= bidi_it->next_en_pos)
+       bidi_it->next_en_pos = -1;
+      if (bidi_it->next_for_ws.type != UNKNOWN_BT
+         && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
+       bidi_it->next_for_ws.type = UNKNOWN_BT;
+
+      /* This must be taken before we fill the iterator with the info
+        about the next char.  If we scan backwards, the iterator
+        state must be already cached, so there's no need to know the
+        embedding level of the previous character, since we will be
+        returning to our caller shortly.  */
+      prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
+    }
+  next_for_neutral = bidi_it->next_for_neutral;
+
+  /* Perhaps it is already cached.  */
+  type = bidi_cache_find (bidi_it->charpos + bidi_it->scan_dir, -1, bidi_it);
+  if (type != UNKNOWN_BT)
+    {
+      /* Don't lose the information for resolving neutrals!  The
+        cached states could have been cached before their
+        next_for_neutral member was computed.  If we are on our way
+        forward, we can simply take the info from the previous
+        state.  */
+      if (bidi_it->scan_dir == 1
+         && bidi_it->next_for_neutral.type == UNKNOWN_BT)
+       bidi_it->next_for_neutral = next_for_neutral;
+
+      /* If resolved_level is -1, it means this state was cached
+        before it was completely resolved, so we cannot return
+        it.  */
+      if (bidi_it->resolved_level != -1)
+       return bidi_it->resolved_level;
+    }
+  if (bidi_it->scan_dir == -1)
+    /* If we are going backwards, the iterator state is already cached
+       from previous scans, and should be fully resolved.  */
+    abort ();
+
+  if (type == UNKNOWN_BT)
+    type = bidi_type_of_next_char (bidi_it);
+
+  if (type == NEUTRAL_B)
+    return bidi_it->resolved_level;
+
+  level = bidi_it->level_stack[bidi_it->stack_idx].level;
+  if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
+      || (type == WEAK_BN && prev_level == level))
+    {
+      if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
+       abort ();
+
+      /* If the cached state shows a neutral character, it was not
+        resolved by bidi_resolve_neutral, so do it now.  */
+      type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
+                                    bidi_it->next_for_neutral.type,
+                                    level);
+    }
+
+  if (!(type == STRONG_R
+       || type == STRONG_L
+       || type == WEAK_BN
+       || type == WEAK_EN
+       || type == WEAK_AN))
+    abort ();
+  bidi_it->type = type;
+  bidi_check_type (bidi_it->type);
+
+  /* For L1 below, we need to know, for each WS character, whether
+     it belongs to a sequence of WS characters preceeding a newline
+     or a TAB or a paragraph separator.  */
+  if (bidi_it->orig_type == NEUTRAL_WS
+      && bidi_it->next_for_ws.type == UNKNOWN_BT)
+    {
+      int ch;
+      int clen = bidi_it->ch_len;
+      EMACS_INT bpos = bidi_it->bytepos;
+      EMACS_INT cpos = bidi_it->charpos;
+      bidi_type_t chtype;
+
+      do {
+       /*_fetch_multibyte_char_len = 1;*/
+       ch = bpos + clen >= ZV_BYTE ? BIDI_EOB : FETCH_CHAR (bpos + clen);
+       bpos += clen;
+       cpos++;
+       clen = (ch == BIDI_EOB ? 1 : CHAR_BYTES (ch));
+       if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
+         chtype = NEUTRAL_B;
+       else
+         chtype = bidi_get_type (ch, NEUTRAL_DIR);
+      } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
+              || bidi_explicit_dir_char (ch)); /* L1/Retaining */
+      bidi_it->next_for_ws.type = chtype;
+      bidi_check_type (bidi_it->next_for_ws.type);
+      bidi_it->next_for_ws.charpos = cpos;
+      bidi_it->next_for_ws.bytepos = bpos;
+    }
+
+  /* Resolve implicit levels, with a twist: PDFs get the embedding
+     level of the enbedding they terminate.  See below for the
+     reason.  */
+  if (bidi_it->orig_type == PDF
+      /* Don't do this if this formatting code didn't change the
+        embedding level due to invalid or empty embeddings.  */
+      && prev_level != level)
+    {
+      /* Don't look in UAX#9 for the reason for this: it's our own
+        private quirk.  The reason is that we want the formatting
+        codes to be delivered so that they bracket the text of their
+        embedding.  For example, given the text
+
+            {RLO}teST{PDF}
+
+        we want it to be displayed as
+
+            {RLO}STet{PDF}
+
+        not as
+
+            STet{RLO}{PDF}
+
+        which will result because we bump up the embedding level as
+        soon as we see the RLO and pop it as soon as we see the PDF,
+        so RLO itself has the same embedding level as "teST", and
+        thus would be normally delivered last, just before the PDF.
+        The switch below fiddles with the level of PDF so that this
+        ugly side effect does not happen.
+
+        (This is, of course, only important if the formatting codes
+        are actually displayed, but Emacs does need to display them
+        if the user wants to.)  */
+      level = prev_level;
+    }
+  else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
+          || bidi_it->orig_type == NEUTRAL_S
+          || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
+          /* || bidi_it->ch == LINESEP_CHAR */
+          || (bidi_it->orig_type == NEUTRAL_WS
+              && (bidi_it->next_for_ws.type == NEUTRAL_B
+                  || bidi_it->next_for_ws.type == NEUTRAL_S)))
+    level = bidi_it->level_stack[0].level;
+  else if ((level & 1) == 0) /* I1 */
+    {
+      if (type == STRONG_R)
+       level++;
+      else if (type == WEAK_EN || type == WEAK_AN)
+       level += 2;
+    }
+  else                 /* I2 */
+    {
+      if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
+       level++;
+    }
+
+  bidi_it->resolved_level = level;
+  return level;
+}
+
+/* Move to the other edge of a level given by LEVEL.  If END_FLAG is
+   non-zero, we are at the end of a level, and we need to prepare to
+   resume the scan of the lower level.
+
+   If this level's other edge is cached, we simply jump to it, filling
+   the iterator structure with the iterator state on the other edge.
+   Otherwise, we walk the buffer until we come back to the same level
+   as LEVEL.
+
+   Note: we are not talking here about a ``level run'' in the UAX#9
+   sense of the term, but rather about a ``level'' which includes
+   all the levels higher than it.  In other words, given the levels
+   like this:
+
+         11111112222222333333334443343222222111111112223322111
+                A      B                    C
+
+   and assuming we are at point A scanning left to right, this
+   function moves to point C, whereas the UAX#9 ``level 2 run'' ends
+   at point B.  */
+static void
+bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
+{
+  int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
+  int idx;
+
+  /* Try the cache first.  */
+  if ((idx = bidi_cache_find_level_change (level, dir, end_flag)) >= 0)
+    bidi_cache_fetch_state (idx, bidi_it);
+  else
+    {
+      int new_level;
+
+      if (end_flag)
+       abort (); /* if we are at end of level, its edges must be cached */
+
+      bidi_cache_iterator_state (bidi_it, 1);
+      do {
+       new_level = bidi_level_of_next_char (bidi_it);
+       bidi_cache_iterator_state (bidi_it, 1);
+      } while (new_level >= level);
+    }
+}
+
+void
+bidi_get_next_char_visually (struct bidi_it *bidi_it)
+{
+  int old_level, new_level, next_level;
+  struct bidi_it sentinel;
+
+  if (bidi_it->scan_dir == 0)
+    {
+      bidi_it->scan_dir = 1;   /* default to logical order */
+    }
+
+  /* If we just passed a newline, initialize for the next line.  */
+  if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B)
+    bidi_line_init (bidi_it);
+
+  /* Prepare the sentinel iterator state.  */
+  if (bidi_cache_idx == 0)
+    {
+      bidi_copy_it (&sentinel, bidi_it);
+      if (bidi_it->first_elt)
+       {
+         sentinel.charpos--;   /* cached charpos needs to be monotonic */
+         sentinel.bytepos--;
+         sentinel.ch = '\n';   /* doesn't matter, but why not? */
+         sentinel.ch_len = 1;
+       }
+    }
+
+  old_level = bidi_it->resolved_level;
+  new_level = bidi_level_of_next_char (bidi_it);
+
+  /* Reordering of resolved levels (clause L2) is implemented by
+     jumping to the other edge of the level and flipping direction of
+     scanning the buffer whenever we find a level change.  */
+  if (new_level != old_level)
+    {
+      int ascending = new_level > old_level;
+      int level_to_search = ascending ? old_level + 1 : old_level;
+      int incr = ascending ? 1 : -1;
+      int expected_next_level = old_level + incr;
+
+      /* If we don't have anything cached yet, we need to cache the
+        sentinel state, since we'll need it to record where to jump
+        when the last non-base level is exhausted.  */
+      if (bidi_cache_idx == 0)
+       bidi_cache_iterator_state (&sentinel, 1);
+      /* Jump (or walk) to the other edge of this level.  */
+      bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
+      /* Switch scan direction and peek at the next character in the
+        new direction.  */
+      bidi_it->scan_dir = -bidi_it->scan_dir;
+
+      /* The following loop handles the case where the resolved level
+        jumps by more than one.  This is typical for numbers inside a
+        run of text with left-to-right embedding direction, but can
+        also happen in other situations.  In those cases the decision
+        where to continue after a level change, and in what direction,
+        is tricky.  For example, given a text like below:
+
+                 abcdefgh
+                 11336622
+
+        (where the numbers below the text show the resolved levels),
+        the result of reordering according to UAX#9 should be this:
+
+                 efdcghba
+
+        This is implemented by the loop below which flips direction
+        and jumps to the other edge of the level each time it finds
+        the new level not to be the expected one.  The expected level
+        is always one more or one less than the previous one.  */
+      next_level = bidi_peek_at_next_level (bidi_it);
+      while (next_level != expected_next_level)
+       {
+         expected_next_level += incr;
+         level_to_search += incr;
+         bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
+         bidi_it->scan_dir = -bidi_it->scan_dir;
+         next_level = bidi_peek_at_next_level (bidi_it);
+       }
+
+      /* Finally, deliver the next character in the new direction.  */
+      next_level = bidi_level_of_next_char (bidi_it);
+    }
+
+  /* Take note when we have just processed the newline that precedes
+     the end of the paragraph.  The next time we are about to be
+     called, set_iterator_to_next will automatically reinit the
+     paragraph direction, if needed.  We do this at the newline before
+     the paragraph separator, because the next character might not be
+     the first character of the next paragraph, due to the bidi
+     reordering.  */
+  if (bidi_it->scan_dir == 1
+      && bidi_it->orig_type == NEUTRAL_B
+      && bidi_it->bytepos < ZV_BYTE)
+    {
+      EMACS_INT sep_len =
+       bidi_at_paragraph_end (bidi_it->charpos + 1,
+                              bidi_it->bytepos + bidi_it->ch_len);
+      if (sep_len >= 0)
+       {
+         bidi_it->new_paragraph = 1;
+         /* Record the buffer position of the last character of the
+            paragraph separator.  */
+         bidi_it->separator_limit = bidi_it->charpos + 1 + sep_len;
+       }
+    }
+
+  if (bidi_it->scan_dir == 1 && bidi_cache_idx)
+    {
+      /* If we are at paragraph's base embedding level and beyond the
+        last cached position, the cache's job is done and we can
+        discard it.  */
+      if (bidi_it->resolved_level == bidi_it->level_stack[0].level
+         && bidi_it->charpos > bidi_cache[bidi_cache_idx - 1].charpos)
+       bidi_cache_reset ();
+       /* But as long as we are caching during forward scan, we must
+          cache each state, or else the cache integrity will be
+          compromised: it assumes cached states correspond to buffer
+          positions 1:1.  */
+      else
+       bidi_cache_iterator_state (bidi_it, 1);
+    }
+}
+
+/* This is meant to be called from within the debugger, whenever you
+   wish to examine the cache contents.  */
+void
+bidi_dump_cached_states (void)
+{
+  int i;
+  int ndigits = 1;
+
+  if (bidi_cache_idx == 0)
+    {
+      fprintf (stderr, "The cache is empty.\n");
+      return;
+    }
+  fprintf (stderr, "Total of %d state%s in cache:\n",
+          bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
+
+  for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
+    ndigits++;
+  fputs ("ch  ", stderr);
+  for (i = 0; i < bidi_cache_idx; i++)
+    fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
+  fputs ("\n", stderr);
+  fputs ("lvl ", stderr);
+  for (i = 0; i < bidi_cache_idx; i++)
+    fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
+  fputs ("\n", stderr);
+  fputs ("pos ", stderr);
+  for (i = 0; i < bidi_cache_idx; i++)
+    fprintf (stderr, "%*d", ndigits, bidi_cache[i].charpos);
+  fputs ("\n", stderr);
+}
diff --git a/src/buffer.c b/src/buffer.c

index a0acad309af557c58fd40b49c55ae9d32e0612d4..0c6e57d45be9446ddc85712cbbd295006f8e329f 100644 (file)
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -2279,6 +2279,8 @@ DEFUN ("buffer-swap-text", Fbuffer_swap_text, Sbuffer_swap_text,
    swapfield (undo_list, Lisp_Object);
    swapfield (mark, Lisp_Object);
    swapfield (enable_multibyte_characters, Lisp_Object);
+  swapfield (bidi_display_reordering, Lisp_Object);
+  swapfield (bidi_paragraph_direction, Lisp_Object);
    /* FIXME: Not sure what we should do with these *_marker fields.
       Hopefully they're just nil anyway.  */
    swapfield (pt_marker, Lisp_Object);
@@ -5206,7 +5208,9 @@ init_buffer_once ()
    buffer_defaults.truncate_lines = Qnil;
    buffer_defaults.word_wrap = Qnil;
    buffer_defaults.ctl_arrow = Qt;
+  buffer_defaults.bidi_display_reordering = Qnil;
    buffer_defaults.direction_reversed = Qnil;
+  buffer_defaults.bidi_paragraph_direction = Qnil;
    buffer_defaults.cursor_type = Qt;
    buffer_defaults.extra_line_spacing = Qnil;
    buffer_defaults.cursor_in_non_selected_windows = Qt;
@@ -5291,7 +5295,9 @@ init_buffer_once ()
    XSETFASTINT (buffer_local_flags.syntax_table, idx); ++idx;
    XSETFASTINT (buffer_local_flags.cache_long_line_scans, idx); ++idx;
    XSETFASTINT (buffer_local_flags.category_table, idx); ++idx;
+  XSETFASTINT (buffer_local_flags.bidi_display_reordering, idx); ++idx;
    XSETFASTINT (buffer_local_flags.direction_reversed, idx); ++idx;
+  XSETFASTINT (buffer_local_flags.bidi_paragraph_direction, idx); ++idx;
    XSETFASTINT (buffer_local_flags.buffer_file_coding_system, idx);
    /* Make this one a permanent local.  */
    buffer_permanent_local_flags[idx++] = 1;
@@ -5548,11 +5554,6 @@ This is the same as (default-value 'abbrev-mode).  */);
                      doc: /* Default value of `ctl-arrow' for buffers that do not override it.
  This is the same as (default-value 'ctl-arrow).  */);
  
-  DEFVAR_LISP_NOPRO ("default-direction-reversed",
-                     &buffer_defaults.direction_reversed,
-                     doc: /* Default value of `direction-reversed' for buffers that do not override it.
-This is the same as (default-value 'direction-reversed).  */);
-
    DEFVAR_LISP_NOPRO ("default-enable-multibyte-characters",
                       &buffer_defaults.enable_multibyte_characters,
                       doc: /* *Default value of `enable-multibyte-characters' for buffers not overriding it.
@@ -5809,11 +5810,29 @@ The variable `coding-system-for-write', if non-nil, overrides this variable.
  
  This variable is never applied to a way of decoding a file while reading it.  */);
  
-  DEFVAR_PER_BUFFER ("direction-reversed", &current_buffer->direction_reversed,
-                    Qnil,
-                    doc: /* *Non-nil means lines in the buffer are displayed right to left.  */);
-
-  DEFVAR_PER_BUFFER ("truncate-lines", &current_buffer->truncate_lines, Qnil,
+  DEFVAR_PER_BUFFER ("direction-reversed",
+                    &current_buffer->direction_reversed, Qnil,
+                    doc: /* Non-nil means set beginning of lines at the right edge of the window.
+See also the variable `bidi-display-reordering'.  */);
+
+  DEFVAR_PER_BUFFER ("bidi-display-reordering",
+                    &current_buffer->bidi_display_reordering, Qnil,
+                    doc: /* Non-nil means reorder bidirectional text for display in the visual order.
+See also the variable `direction-reversed'.  */);
+
+  DEFVAR_PER_BUFFER ("bidi-paragraph-direction",
+                    &current_buffer->bidi_paragraph_direction, Qnil,
+                    doc: /* *If non-nil, forces directionality of text paragraphs in the buffer.
+                            
+If this is nil (the default), the direction of each paragraph is
+determined by the first strong directional character of its text.
+The values of `right-to-left' and `left-to-right' override that.
+Any other value is treated as nil.
+                            
+This variable has no effect unless the buffer's value of
+\`bidi-display-reordering' is non-nil.  */);
+
+ DEFVAR_PER_BUFFER ("truncate-lines", &current_buffer->truncate_lines, Qnil,
                      doc: /* *Non-nil means do not display continuation lines.
  Instead, give each line of text just one screen line.
  
diff --git a/src/buffer.h b/src/buffer.h

index 5217c6d7298311b79d2b61e01f97e07a57679102..40f03daca902a09a1afbee0004fcb546b1b709cc 100644 (file)
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -662,8 +662,16 @@ struct buffer
    Lisp_Object word_wrap;
    /* Non-nil means display ctl chars with uparrow.  */
    Lisp_Object ctl_arrow;
-  /* Non-nil means display text from right to left.  */
+  /* Non-nil means reorder bidirectional text for display in the
+     visual order.  */
+  Lisp_Object bidi_display_reordering;
+  /* Non-nil means set beginning of lines at the right edge of
+     windows.  */
    Lisp_Object direction_reversed;
+  /* If non-nil, specifies which direction of text to force in all the
+     paragraphs of the buffer.  Nil means determine paragraph
+     direction dynamically for each paragraph.  */
+  Lisp_Object bidi_paragraph_direction;
    /* Non-nil means do selective display;
       see doc string in syms_of_buffer (buffer.c) for details.  */
    Lisp_Object selective_display;
diff --git a/src/dispextern.h b/src/dispextern.h

index 01ba3f54181ac456ea6907703a608d908689d0dd..2b581fd0eaad15bb993367552d0a99268ff4aeba 100644 (file)
--- a/src/dispextern.h
+++ b/src/dispextern.h
@@ -370,6 +370,16 @@ struct glyph
    /* Non-zero means don't display cursor here.  */
    unsigned avoid_cursor_p : 1;
  
+  /* Resolved bidirectional level of this character [0..63].  */
+  unsigned resolved_level : 5;
+
+  /* Resolved bidirectional type of this character, see enum
+     bidi_type_t below.  Note that according to UAX#9, only some
+     values (STRONG_L, STRONG_R, WEAK_AN, WEAK_EN, WEAK_BN, and
+     NEUTRAL_B) can appear in the resolved type, so we only reserve
+     space for those that can.  */
+  unsigned bidi_type : 3;
+
  #define FACE_ID_BITS   20
  
    /* Face of the glyph.  This is a realized face ID,
@@ -739,14 +749,18 @@ struct glyph_row
    /* First position in this row.  This is the text position, including
       overlay position information etc, where the display of this row
       started, and can thus be less the position of the first glyph
-     (e.g. due to invisible text or horizontal scrolling).  */
+     (e.g. due to invisible text or horizontal scrolling).  BIDI Note:
+     This is the smallest character position in the row, but not
+     necessarily the character that is the leftmost on the display.  */
    struct display_pos start;
  
    /* Text position at the end of this row.  This is the position after
       the last glyph on this row.  It can be greater than the last
       glyph position + 1, due to truncation, invisible text etc.  In an
       up-to-date display, this should always be equal to the start
-     position of the next row.  */
+     position of the next row.  BIDI Note: this is the character whose
+     buffer position is the largest, but not necessarily the rightmost
+     one on the display.  */
    struct display_pos end;
  
    /* Non-zero means the overlay arrow bitmap is on this line.
@@ -872,6 +886,10 @@ struct glyph_row
       the bottom line of the window, but not end of the buffer.  */
    unsigned indicate_bottom_line_p : 1;
  
+  /* Non-zero means the row was reversed to display text in a
+     right-to-left paragraph.  */
+  unsigned reversed_p : 1;
+
    /* Continuation lines width at the start of the row.  */
    int continuation_lines_width;
  
@@ -924,12 +942,18 @@ struct glyph_row *matrix_row P_ ((struct glyph_matrix *, int));
       (MATRIX_ROW ((MATRIX), (ROW))->used[TEXT_AREA])
  
  /* Return the character/ byte position at which the display of ROW
-   starts.  */
+   starts.  BIDI Note: this is the smallest character/byte position
+   among characters in ROW, i.e. the first logical-order character
+   displayed by ROW, which is not necessarily the smallest horizontal
+   position.  */
  
  #define MATRIX_ROW_START_CHARPOS(ROW) ((ROW)->start.pos.charpos)
  #define MATRIX_ROW_START_BYTEPOS(ROW) ((ROW)->start.pos.bytepos)
  
-/* Return the character/ byte position at which ROW ends.  */
+/* Return the character/ byte position at which ROW ends.  BIDI Note:
+   this is the largest character/byte position among characters in
+   ROW, i.e. the last logical-order character displayed by ROW, which
+   is not necessarily the largest horizontal position.  */
  
  #define MATRIX_ROW_END_CHARPOS(ROW) ((ROW)->end.pos.charpos)
  #define MATRIX_ROW_END_BYTEPOS(ROW) ((ROW)->end.pos.bytepos)
@@ -1702,7 +1726,93 @@ struct face_cache
  
  extern int face_change_count;
  
+/* For reordering of bidirectional text.  */
+#define BIDI_MAXLEVEL 64
+
+/* Data type for describing the bidirectional character types.  The
+   first 7 must be at the beginning, because they are the only values
+   valid in the `bidi_type' member of `struct glyph'; we only reserve
+   3 bits for it, so we cannot use there values larger than 7.  */
+typedef enum {
+  UNKNOWN_BT = 0,
+  STRONG_L,    /* strong left-to-right */
+  STRONG_R,    /* strong right-to-left */
+  WEAK_EN,     /* european number */
+  WEAK_AN,     /* arabic number */
+  WEAK_BN,     /* boundary neutral */
+  NEUTRAL_B,   /* paragraph separator */
+  STRONG_AL,   /* arabic right-to-left letter */
+  LRE,         /* left-to-right embedding */
+  LRO,         /* left-to-right override */
+  RLE,         /* right-to-left embedding */
+  RLO,         /* right-to-left override */
+  PDF,         /* pop directional format */
+  WEAK_ES,     /* european number separator */
+  WEAK_ET,     /* european number terminator */
+  WEAK_CS,     /* common separator */
+  WEAK_NSM,    /* non-spacing mark */
+  NEUTRAL_S,   /* segment separator */
+  NEUTRAL_WS,  /* whitespace */
+  NEUTRAL_ON   /* other neutrals */
+} bidi_type_t;
+
+/* The basic directionality data type.  */
+typedef enum { NEUTRAL_DIR, L2R, R2L } bidi_dir_t;
+
+/* Data type for storing information about characters we need to
+   remember.  */
+struct bidi_saved_info {
+  int bytepos, charpos;                /* character's buffer position */
+  bidi_type_t type;            /* character's resolved bidi type */
+  bidi_type_t type_after_w1;   /* original type of the character, after W1 */
+  bidi_type_t orig_type;       /* type as we found it in the buffer */
+};
+
+/* Data type for keeping track of saved embedding levels and override
+   status information.  */
+struct bidi_stack {
+  int level;
+  bidi_dir_t override;
+};
  
+/* Data type for iterating over bidi text.  */
+struct bidi_it {
+  EMACS_INT bytepos;           /* iterator's position in buffer */
+  EMACS_INT charpos;
+  int ch;                      /* character itself */
+  int ch_len;                  /* length of its multibyte sequence */
+  bidi_type_t type;            /* bidi type of this character, after
+                                  resolving weak and neutral types */
+  bidi_type_t type_after_w1;   /* original type, after overrides and W1 */
+  bidi_type_t orig_type;       /* original type, as found in the buffer */
+  int resolved_level;          /* final resolved level of this character */
+  int invalid_levels;          /* how many PDFs to ignore */
+  int invalid_rl_levels;       /* how many PDFs from RLE/RLO to ignore */
+  int prev_was_pdf;            /* if non-zero, previous char was PDF */
+  struct bidi_saved_info prev; /* info about previous character */
+  struct bidi_saved_info last_strong; /* last-seen strong directional char */
+  struct bidi_saved_info next_for_neutral; /* surrounding characters for... */
+  struct bidi_saved_info prev_for_neutral; /* ...resolving neutrals */
+  struct bidi_saved_info next_for_ws; /* character after sequence of ws */
+  EMACS_INT next_en_pos;       /* position of next EN char for ET */
+  EMACS_INT ignore_bn_limit;   /* position until which to ignore BNs */
+  bidi_dir_t sor;              /* direction of start-of-run in effect */
+  int scan_dir;                        /* direction of text scan */
+  int stack_idx;               /* index of current data on the stack */
+  /* Note: Everything from here on is not copied/saved when the bidi
+     iterator state is saved, pushed, or popped.  So only put here
+     stuff that is not part of the bidi iterator's state!  */
+  struct bidi_stack level_stack[BIDI_MAXLEVEL]; /* stack of embedding levels */
+  int first_elt;               /* if non-zero, examine current char first */
+  bidi_dir_t paragraph_dir;    /* current paragraph direction */
+  int new_paragraph;           /* if non-zero, we expect a new paragraph */
+  EMACS_INT separator_limit;   /* where paragraph separator should end */
+};
+
+/* Value is non-zero when the bidi iterator is at base paragraph
+   embedding level.  */
+#define BIDI_AT_BASE_LEVEL(BIDI_IT) \
+  ((BIDI_IT).resolved_level == (BIDI_IT).level_stack[0].level)
  
  \f
  /***********************************************************************
@@ -1854,7 +1964,7 @@ enum it_method {
    NUM_IT_METHODS
  };
  
-#define IT_STACK_SIZE 4
+#define IT_STACK_SIZE 5
  
  /* Iterator for composition (both for static and automatic).  */
  struct composition_it
@@ -1902,6 +2012,14 @@ struct it
       text, overlay strings, end of text etc., which see.  */
    EMACS_INT stop_charpos;
  
+  /* Previous stop position, i.e. the last one before the current
+     iterator position in `current'.  */
+  EMACS_INT prev_stop;
+
+  /* Last stop position iterated across whose embedding level is equal
+     to the current paragraph's embedding level.  */
+  EMACS_INT base_level_stop;
+
    /* Maximum string or buffer position + 1.  ZV when iterating over
       current_buffer.  */
    EMACS_INT end_charpos;
@@ -2008,6 +2126,8 @@ struct it
      int string_nchars;
      EMACS_INT end_charpos;
      EMACS_INT stop_charpos;
+    EMACS_INT prev_stop;
+    EMACS_INT base_level_stop;
      struct composition_it cmp_it;
      int face_id;
  
@@ -2233,6 +2353,14 @@ struct it
  
    /* Face of the right fringe glyph.  */
    unsigned right_user_fringe_face_id : FACE_ID_BITS;
+
+  /* Non-zero means we need to reorder bidirectional text for display
+     in the visual order.  */
+  int bidi_p;
+
+  /* For iterating over bidirectional text.  */
+  struct bidi_it bidi_it;
+  bidi_dir_t paragraph_embedding;
  };
  
  
@@ -2704,12 +2832,20 @@ extern EMACS_INT tool_bar_button_relief;
                           Function Prototypes
   ***********************************************************************/
  
+/* Defined in bidi.c */
+
+extern void bidi_init_it P_ ((EMACS_INT, EMACS_INT, struct bidi_it *));
+extern void bidi_get_next_char_visually P_ ((struct bidi_it *));
+extern void bidi_paragraph_init P_ ((bidi_dir_t, struct bidi_it *));
+extern int  bidi_mirror_char P_ ((int));
+
  /* Defined in xdisp.c */
  
  struct glyph_row *row_containing_pos P_ ((struct window *, int,
                                           struct glyph_row *,
                                           struct glyph_row *, int));
-int string_buffer_position P_ ((struct window *, Lisp_Object, int));
+EMACS_INT string_buffer_position P_ ((struct window *, Lisp_Object,
+                                     EMACS_INT));
  int line_bottom_y P_ ((struct it *));
  int display_prop_intangible_p P_ ((Lisp_Object));
  void resize_echo_area_exactly P_ ((void));
diff --git a/src/dispnew.c b/src/dispnew.c

index d32ce48cce6a190619ee1a2b1cccac864daba3cf..fd470491f785a9765c4a43ad905c46a75c3870d7 100644 (file)
--- a/src/dispnew.c
+++ b/src/dispnew.c
@@ -1388,8 +1388,11 @@ prepare_desired_row (row)
  {
    if (!row->enabled_p)
      {
+      unsigned rp = row->reversed_p;
+
        clear_glyph_row (row);
        row->enabled_p = 1;
+      row->reversed_p = rp;
      }
  }
  
@@ -1540,6 +1543,7 @@ row_equal_p (w, a, b, mouse_face_p)
           || a->overlapped_p != b->overlapped_p
           || (MATRIX_ROW_CONTINUATION_LINE_P (a)
               != MATRIX_ROW_CONTINUATION_LINE_P (b))
+         || a->reversed_p != b->reversed_p
           /* Different partially visible characters on left margin.  */
           || a->x != b->x
           /* Different height.  */
@@ -3500,6 +3504,8 @@ direct_output_for_insert (g)
        || !display_completed
        /* Give up if buffer appears in two places.  */
        || buffer_shared > 1
+      /* Give up if we need to reorder bidirectional text.  */
+      || !NILP (current_buffer->bidi_display_reordering)
        /* Give up if currently displaying a message instead of the
          minibuffer contents.  */
        || (EQ (selected_window, minibuf_window)
@@ -3776,6 +3782,10 @@ direct_output_forward_char (n)
    if (!display_completed || cursor_in_echo_area)
      return 0;
  
+  /* Give up if we need to reorder bidirectional text.  */
+  if (!NILP (XBUFFER (w->buffer)->bidi_display_reordering))
+    return 0;
+
    /* Give up if the buffer's direction is reversed.  */
    if (!NILP (XBUFFER (w->buffer)->direction_reversed))
      return 0;
diff --git a/src/term.c b/src/term.c

index 7adaeeac1ae4069496865b307d019faa682f9cb5..dbfdf80d9058a0f6cd92f57e391beccc3ff7af5a 100644 (file)
--- a/src/term.c
+++ b/src/term.c
@@ -1545,6 +1545,26 @@ append_glyph (it)
            + it->glyph_row->used[it->area]);
    end = it->glyph_row->glyphs[1 + it->area];
  
+  /* If the glyph row is reversed, we need to prepend the glyph rather
+     than append it.  */
+  if (it->glyph_row->reversed_p && it->area == TEXT_AREA)
+    {
+      struct glyph *g;
+      int move_by = it->pixel_width;
+
+      /* Make room for the new glyphs.  */
+      if (move_by > end - glyph) /* don't overstep end of this area */
+       move_by = end - glyph;
+      for (g = glyph - 1; g >= it->glyph_row->glyphs[it->area]; g--)
+       g[move_by] = *g;
+      glyph = it->glyph_row->glyphs[it->area];
+      end = glyph + move_by;
+    }
+
+  /* BIDI Note: we put the glyphs of a "multi-pixel" character left to
+     right, even in the REVERSED_P case, since (a) all of its u.ch are
+     identical, and (b) the PADDING_P flag needs to be set for the
+     leftmost one, because we write to the terminal left-to-right.  */
    for (i = 0;
         i < it->pixel_width && glyph < end;
         ++i)
@@ -1556,6 +1576,18 @@ append_glyph (it)
        glyph->padding_p = i > 0;
        glyph->charpos = CHARPOS (it->position);
        glyph->object = it->object;
+      if (it->bidi_p)
+       {
+         glyph->resolved_level = it->bidi_it.resolved_level;
+         if ((it->bidi_it.type & 7) != it->bidi_it.type)
+           abort ();
+         glyph->bidi_type = it->bidi_it.type;
+       }
+      else
+       {
+         glyph->resolved_level = 0;
+         glyph->bidi_type = UNKNOWN_BT;
+       }
  
        ++it->glyph_row->used[it->area];
        ++glyph;
diff --git a/src/window.h b/src/window.h

index 05c1eb18c89839a7fa3df0b69642032d3f1f9bcc..17332f0af20ae38db238547b4ac95bd63b53b179 100644 (file)
--- a/src/window.h
+++ b/src/window.h
@@ -117,7 +117,10 @@ struct window
      /* The buffer displayed in this window */
      /* Of the fields vchild, hchild and buffer, only one is non-nil.  */
      Lisp_Object buffer;
-    /* A marker pointing to where in the text to start displaying */
+    /* A marker pointing to where in the text to start displaying.
+       BIDI Note: This is the _logical-order_ start, i.e. the smallest
+       buffer position visible in the window, not necessarily the
+       character displayed in the top left corner of the window.  */
      Lisp_Object start;
      /* A marker pointing to where in the text point is in this window,
         used only when the window is not selected.
diff --git a/src/xdisp.c b/src/xdisp.c

index 9ece458e77eb3cb1feea31f88016bbaf049693c6..056a009a7d96864bd73108f9c61367433d8979f9 100644 (file)
--- a/src/xdisp.c
+++ b/src/xdisp.c
@@ -249,6 +249,7 @@ Lisp_Object Qfontified;
  Lisp_Object Qgrow_only;
  Lisp_Object Qinhibit_eval_during_redisplay;
  Lisp_Object Qbuffer_position, Qposition, Qobject;
+Lisp_Object Qright_to_left, Qleft_to_right;
  
  /* Cursor shapes */
  Lisp_Object Qbar, Qhbar, Qbox, Qhollow;
@@ -904,6 +905,7 @@ static void store_mode_line_noprop_char P_ ((char));
  static int store_mode_line_noprop P_ ((const unsigned char *, int, int));
  static void x_consider_frame_title P_ ((Lisp_Object));
  static void handle_stop P_ ((struct it *));
+static void handle_stop_backwards P_ ((struct it *, EMACS_INT));
  static int tool_bar_lines_needed P_ ((struct frame *, int *));
  static int single_display_spec_intangible_p P_ ((Lisp_Object));
  static void ensure_echo_area_buffers P_ ((void));
@@ -2654,6 +2656,9 @@ init_iterator (it, w, charpos, bytepos, row, base_face_id)
    /* Are multibyte characters enabled in current_buffer?  */
    it->multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
  
+  /* Do we need to reorder bidirectional text?  */
+  it->bidi_p = !NILP (current_buffer->bidi_display_reordering);
+
    /* Non-zero if we should highlight the region.  */
    highlight_region_p
      = (!NILP (Vtransient_mark_mode)
@@ -2744,6 +2749,10 @@ init_iterator (it, w, charpos, bytepos, row, base_face_id)
    it->glyph_row = row;
    it->area = TEXT_AREA;
  
+  /* Forget any previous info about this row being reversed.  */
+  if (it->glyph_row)
+    it->glyph_row->reversed_p = 0;
+
    /* Get the dimensions of the display area.  The display area
       consists of the visible window area plus a horizontally scrolled
       part to the left of the window.  All x-values are relative to the
@@ -2799,6 +2808,21 @@ init_iterator (it, w, charpos, bytepos, row, base_face_id)
         it->start_of_box_run_p = 1;
      }
  
+  /* If we are to reorder bidirectional text, init the bidi
+     iterator.  */
+  if (it->bidi_p)
+    {
+      /* Note the paragraph direction that this buffer wants to
+        use.  */
+      if (EQ (current_buffer->bidi_paragraph_direction, Qleft_to_right))
+       it->paragraph_embedding = L2R;
+      else if (EQ (current_buffer->bidi_paragraph_direction, Qright_to_left))
+       it->paragraph_embedding = R2L;
+      else
+       it->paragraph_embedding = NEUTRAL_DIR;
+      bidi_init_it (charpos, bytepos, &it->bidi_it);
+    }
+
    /* If a buffer position was specified, set the iterator there,
       getting overlays and face properties from that position.  */
    if (charpos >= BUF_BEG (current_buffer))
@@ -3764,18 +3788,18 @@ handle_invisible_prop (it)
    else
      {
        int invis_p;
-      EMACS_INT newpos, next_stop, start_charpos;
+      EMACS_INT newpos, next_stop, start_charpos, tem;
        Lisp_Object pos, prop, overlay;
  
        /* First of all, is there invisible text at this position?  */
-      start_charpos = IT_CHARPOS (*it);
-      pos = make_number (IT_CHARPOS (*it));
+      tem = start_charpos = IT_CHARPOS (*it);
+      pos = make_number (tem);
        prop = get_char_property_and_overlay (pos, Qinvisible, it->window,
                                             &overlay);
        invis_p = TEXT_PROP_MEANS_INVISIBLE (prop);
  
        /* If we are on invisible text, skip over it.  */
-      if (invis_p && IT_CHARPOS (*it) < it->end_charpos)
+      if (invis_p && start_charpos < it->end_charpos)
         {
           /* Record whether we have to display an ellipsis for the
              invisible text.  */
@@ -3788,17 +3812,16 @@ handle_invisible_prop (it)
           do
             {
               /* Try to skip some invisible text.  Return value is the
-                position reached which can be equal to IT's position
-                if there is nothing invisible here.  This skips both
+                position reached which can be equal to where we start
+                if there is nothing invisible there.  This skips both
                  over invisible text properties and overlays with
                  invisible property.  */
-             newpos = skip_invisible (IT_CHARPOS (*it),
-                                      &next_stop, ZV, it->window);
+             newpos = skip_invisible (tem, &next_stop, ZV, it->window);
  
               /* If we skipped nothing at all we weren't at invisible
                  text in the first place.  If everything to the end of
                  the buffer was skipped, end the loop.  */
-             if (newpos == IT_CHARPOS (*it) || newpos >= ZV)
+             if (newpos == tem || newpos >= ZV)
                 invis_p = 0;
               else
                 {
@@ -3816,7 +3839,7 @@ handle_invisible_prop (it)
               /* If we ended up on invisible text, proceed to
                  skip starting with next_stop.  */
               if (invis_p)
-               IT_CHARPOS (*it) = next_stop;
+               tem = next_stop;
  
                /* If there are adjacent invisible texts, don't lose the
                   second one's ellipsis. */
@@ -3826,8 +3849,56 @@ handle_invisible_prop (it)
           while (invis_p);
  
           /* The position newpos is now either ZV or on visible text.  */
-         IT_CHARPOS (*it) = newpos;
-         IT_BYTEPOS (*it) = CHAR_TO_BYTE (newpos);
+         if (it->bidi_p && newpos < ZV)
+           {
+             /* With bidi iteration, the region of invisible text
+                could start and/or end in the middle of a non-base
+                embedding level.  Therefore, we need to skip
+                invisible text using the bidi iterator, starting at
+                IT's current position, until we find ourselves
+                outside the invisible text.  Skipping invisible text
+                _after_ bidi iteration avoids affecting the visual
+                order of the displayed text when invisible properties
+                are added or removed.  */
+             if (it->bidi_it.first_elt)
+               {
+                 /* If we were `reseat'ed to a new paragraph,
+                    determine the paragraph base direction.  We need
+                    to do it now because next_element_from_buffer may
+                    not have a chance to do it, if we are going to
+                    skip any text at the beginning, which resets the
+                    FIRST_ELT flag.  */
+                 bidi_paragraph_init (it->paragraph_embedding, &it->bidi_it);
+                 /* If the paragraph base direction is R2L, its
+                    glyphs should be reversed.  */
+                 if (it->glyph_row)
+                   {
+                     if (it->bidi_it.paragraph_dir == R2L)
+                       it->glyph_row->reversed_p = 1;
+                     else
+                       it->glyph_row->reversed_p = 0;
+                   }
+               }
+             do
+               {
+                 bidi_get_next_char_visually (&it->bidi_it);
+               }
+             while (it->stop_charpos <= it->bidi_it.charpos
+                    && it->bidi_it.charpos < newpos);
+             IT_CHARPOS (*it) = it->bidi_it.charpos;
+             IT_BYTEPOS (*it) = it->bidi_it.bytepos;
+             /* If we overstepped NEWPOS, record its position in the
+                iterator, so that we skip invisible text if later the
+                bidi iteration lands us in the invisible region
+                again. */
+             if (IT_CHARPOS (*it) >= newpos)
+               it->prev_stop = newpos;
+           }
+         else
+           {
+             IT_CHARPOS (*it) = newpos;
+             IT_BYTEPOS (*it) = CHAR_TO_BYTE (newpos);
+           }
  
           /* If there are before-strings at the start of invisible
              text, and the text is invisible because of a text
@@ -3836,7 +3907,7 @@ handle_invisible_prop (it)
              overlay property instead of a text property, this is
              already handled in the overlay code.)  */
           if (NILP (overlay)
-             && get_overlay_strings (it, start_charpos))
+             && get_overlay_strings (it, it->stop_charpos))
             {
               handled = HANDLED_RECOMPUTE_PROPS;
               it->stack[it->sp - 1].display_ellipsis_p = display_ellipsis_p;
@@ -3857,7 +3928,7 @@ handle_invisible_prop (it)
                   first invisible character.  */
               if (!STRINGP (it->object))
                 {
-                 it->position.charpos = IT_CHARPOS (*it) - 1;
+                 it->position.charpos = newpos - 1;
                   it->position.bytepos = CHAR_TO_BYTE (it->position.charpos);
                 }
               it->ellipsis_p = 1;
@@ -4571,43 +4642,46 @@ display_prop_string_p (prop, string)
    return 0;
  }
  
-
-/* Determine which buffer position in W's buffer STRING comes from.
-   AROUND_CHARPOS is an approximate position where it could come from.
-   Value is the buffer position or 0 if it couldn't be determined.
+/* Look for STRING in overlays and text properties in W's buffer,
+   between character positions FROM and TO (excluding TO).
+   BACK_P non-zero means look back (in this case, TO is supposed to be
+   less than FROM).
+   Value is the first character position where STRING was found, or
+   zero if it wasn't found before hitting TO.
  
     W's buffer must be current.
  
-   This function is necessary because we don't record buffer positions
-   in glyphs generated from strings (to keep struct glyph small).
     This function may only use code that doesn't eval because it is
     called asynchronously from note_mouse_highlight.  */
  
-int
-string_buffer_position (w, string, around_charpos)
+static EMACS_INT
+string_buffer_position_lim (w, string, from, to, back_p)
       struct window *w;
       Lisp_Object string;
-     int around_charpos;
+     EMACS_INT from, to;
+     int back_p;
  {
    Lisp_Object limit, prop, pos;
-  const int MAX_DISTANCE = 1000;
    int found = 0;
  
-  pos = make_number (around_charpos);
-  limit = make_number (min (XINT (pos) + MAX_DISTANCE, ZV));
-  while (!found && !EQ (pos, limit))
+  pos = make_number (from);
+
+  if (!back_p) /* looking forward */
      {
-      prop = Fget_char_property (pos, Qdisplay, Qnil);
-      if (!NILP (prop) && display_prop_string_p (prop, string))
-       found = 1;
-      else
-       pos = Fnext_single_char_property_change (pos, Qdisplay, Qnil, limit);
+      limit = make_number (min (to, ZV));
+      while (!found && !EQ (pos, limit))
+       {
+         prop = Fget_char_property (pos, Qdisplay, Qnil);
+         if (!NILP (prop) && display_prop_string_p (prop, string))
+           found = 1;
+         else
+           pos = Fnext_single_char_property_change (pos, Qdisplay, Qnil,
+                                                    limit);
+       }
      }
-
-  if (!found)
+  else         /* looking back */
      {
-      pos = make_number (around_charpos);
-      limit = make_number (max (XINT (pos) - MAX_DISTANCE, BEGV));
+      limit = make_number (max (to, BEGV));
        while (!found && !EQ (pos, limit))
         {
           prop = Fget_char_property (pos, Qdisplay, Qnil);
@@ -4622,6 +4696,35 @@ string_buffer_position (w, string, around_charpos)
    return found ? XINT (pos) : 0;
  }
  
+/* Determine which buffer position in W's buffer STRING comes from.
+   AROUND_CHARPOS is an approximate position where it could come from.
+   Value is the buffer position or 0 if it couldn't be determined.
+
+   W's buffer must be current.
+
+   This function is necessary because we don't record buffer positions
+   in glyphs generated from strings (to keep struct glyph small).
+   This function may only use code that doesn't eval because it is
+   called asynchronously from note_mouse_highlight.  */
+
+EMACS_INT
+string_buffer_position (w, string, around_charpos)
+     struct window *w;
+     Lisp_Object string;
+     EMACS_INT around_charpos;
+{
+  Lisp_Object limit, prop, pos;
+  const int MAX_DISTANCE = 1000;
+  EMACS_INT found = string_buffer_position_lim (w, string, around_charpos,
+                                               around_charpos + MAX_DISTANCE,
+                                               0);
+
+  if (!found)
+    found = string_buffer_position_lim (w, string, around_charpos,
+                                       around_charpos - MAX_DISTANCE, 1);
+  return found;
+}
+
  
  \f
  /***********************************************************************
@@ -5088,6 +5191,8 @@ push_it (it)
    p = it->stack + it->sp;
  
    p->stop_charpos = it->stop_charpos;
+  p->prev_stop = it->prev_stop;
+  p->base_level_stop = it->base_level_stop;
    p->cmp_it = it->cmp_it;
    xassert (it->face_id >= 0);
    p->face_id = it->face_id;
@@ -5138,6 +5243,8 @@ pop_it (it)
    --it->sp;
    p = it->stack + it->sp;
    it->stop_charpos = p->stop_charpos;
+  it->prev_stop = p->prev_stop;
+  it->base_level_stop = p->base_level_stop;
    it->cmp_it = p->cmp_it;
    it->face_id = p->face_id;
    it->current = p->current;
@@ -5315,8 +5422,8 @@ back_to_previous_visible_line_start (it)
        if (IT_CHARPOS (*it) <= BEGV)
         break;
  
-      /* If selective > 0, then lines indented more than that values
-        are invisible.  */
+      /* If selective > 0, then lines indented more than its value are
+        invisible.  */
        if (it->selective > 0
           && indented_beyond_p (IT_CHARPOS (*it), IT_BYTEPOS (*it),
                                 (double) it->selective)) /* iftc */
@@ -5473,7 +5580,30 @@ reseat (it, pos, force_p)
    if (force_p
        || CHARPOS (pos) > it->stop_charpos
        || CHARPOS (pos) < original_pos)
-    handle_stop (it);
+    {
+      if (it->bidi_p)
+       {
+         /* For bidi iteration, we need to prime prev_stop and
+            base_level_stop with our best estimations.  */
+         if (CHARPOS (pos) < it->prev_stop)
+           {
+             handle_stop_backwards (it, BEGV);
+             if (CHARPOS (pos) < it->base_level_stop)
+               it->base_level_stop = 0;
+           }
+         else if (CHARPOS (pos) > it->stop_charpos
+                  && it->stop_charpos >= BEGV)
+           handle_stop_backwards (it, it->stop_charpos);
+         else  /* force_p */
+           handle_stop (it);
+       }
+      else
+       {
+         handle_stop (it);
+         it->prev_stop = it->base_level_stop = 0;
+       }
+
+    }
  
    CHECK_IT (it);
  }
@@ -5510,9 +5640,14 @@ reseat_1 (it, pos, set_stop_p)
    it->sp = 0;
    it->string_from_display_prop_p = 0;
    it->face_before_selective_p = 0;
+  if (it->bidi_p)
+    it->bidi_it.first_elt = 1;
  
    if (set_stop_p)
-    it->stop_charpos = CHARPOS (pos);
+    {
+      it->stop_charpos = CHARPOS (pos);
+      it->base_level_stop = CHARPOS (pos);
+    }
  }
  
  
@@ -5624,7 +5759,7 @@ reseat_to_string (it, s, string, charpos, precision, field_width, multibyte)
  \f
  /***********************************************************************
                               Iteration
- ***********************************************************************/
+***********************************************************************/
  
  /* Map enum it_method value to corresponding next_element_from_* function.  */
  
@@ -5676,6 +5811,13 @@ get_next_display_element (it)
  
    if (it->what == IT_CHARACTER)
      {
+      /* UAX#9, L4: "A character is depicted by a mirrored glyph if
+        and only if (a) the resolved directionality of that character
+        is R..."  */
+      /* FIXME: Do we need an exception for characters from display
+        tables?  */
+      if (it->bidi_p && it->bidi_it.type == STRONG_R)
+       it->c = bidi_mirror_char (it->c);
        /* Map via display table or translate control characters.
          IT->c, IT->len etc. have been set to the next character by
          the function call above.  If we have a display table, and it
@@ -5690,7 +5832,7 @@ get_next_display_element (it)
           Lisp_Object dv;
           struct charset *unibyte = CHARSET_FROM_ID (charset_unibyte);
           enum { char_is_other = 0, char_is_nbsp, char_is_soft_hyphen }
-              nbsp_or_shy = char_is_other;
+         nbsp_or_shy = char_is_other;
           int decoded = it->c;
  
           if (it->dp
@@ -5908,12 +6050,12 @@ get_next_display_element (it)
                        happen actually, but due to bugs it may
                        happen.  Let's print the char as is, there's
                        not much meaningful we can do with it.  */
-                     str[0] = it->c;
-                     str[1] = it->c >> 8;
-                     str[2] = it->c >> 16;
-                     str[3] = it->c >> 24;
-                     len = 4;
-                   }
+                   str[0] = it->c;
+                   str[1] = it->c >> 8;
+                   str[2] = it->c >> 16;
+                   str[3] = it->c >> 24;
+                   len = 4;
+                 }
  
                 for (i = 0; i < len; i++)
                   {
@@ -6082,8 +6224,31 @@ set_iterator_to_next (it, reseat_p)
        else
         {
           xassert (it->len != 0);
-         IT_BYTEPOS (*it) += it->len;
-         IT_CHARPOS (*it) += 1;
+
+         if (!it->bidi_p)
+           {
+             IT_BYTEPOS (*it) += it->len;
+             IT_CHARPOS (*it) += 1;
+           }
+         else
+           {
+             /* If this is a new paragraph, determine its base
+                direction (a.k.a. its base embedding level).  */
+             if (it->bidi_it.new_paragraph)
+               {
+                 bidi_paragraph_init (it->paragraph_embedding, &it->bidi_it);
+                 if (it->glyph_row)
+                   {
+                     if (it->bidi_it.paragraph_dir == R2L)
+                       it->glyph_row->reversed_p = 1;
+                     else
+                       it->glyph_row->reversed_p = 0;
+                   }
+               }
+             bidi_get_next_char_visually (&it->bidi_it);
+             IT_BYTEPOS (*it) = it->bidi_it.bytepos;
+             IT_CHARPOS (*it) = it->bidi_it.charpos;
+           }
           xassert (IT_BYTEPOS (*it) == CHAR_TO_BYTE (IT_CHARPOS (*it)));
         }
        break;
@@ -6236,7 +6401,7 @@ next_element_from_display_vector (it)
    it->face_id = it->saved_face_id;
  
    /* KFS: This code used to check ip->dpvec[0] instead of the current element.
-          That seemed totally bogus - so I changed it...  */
+     That seemed totally bogus - so I changed it...  */
    gc = it->dpvec[it->current.dpvec_index];
  
    if (GLYPH_CODE_P (gc) && GLYPH_CODE_CHAR_VALID_P (gc))
@@ -6471,6 +6636,45 @@ next_element_from_stretch (it)
    return 1;
  }
  
+/* Scan forward from CHARPOS in the current buffer, until we find a
+   stop position > current IT's position.  Then handle the stop
+   position before that.  This is called when we bump into a stop
+   position while reordering bidirectional text.  */
+
+static void
+handle_stop_backwards (it, charpos)
+     struct it *it;
+     EMACS_INT charpos;
+{
+  EMACS_INT where_we_are = IT_CHARPOS (*it);
+  struct display_pos save_current = it->current;
+  struct text_pos save_position = it->position;
+  struct text_pos pos1;
+  EMACS_INT next_stop;
+
+  /* Scan in strict logical order.  */
+  it->bidi_p = 0;
+  do
+    {
+      it->prev_stop = charpos;
+      SET_TEXT_POS (pos1, charpos, CHAR_TO_BYTE (charpos));
+      reseat_1 (it, pos1, 0);
+      compute_stop_pos (it);
+      /* We must advance forward, right?  */
+      if (it->stop_charpos <= it->prev_stop)
+       abort ();
+      charpos = it->stop_charpos;
+    }
+  while (charpos <= where_we_are);
+
+  next_stop = it->stop_charpos;
+  it->stop_charpos = it->prev_stop;
+  it->bidi_p = 1;
+  it->current = save_current;
+  it->position = save_position;
+  handle_stop (it);
+  it->stop_charpos = next_stop;
+}
  
  /* Load IT with the next display element from current_buffer.  Value
     is zero if end of buffer reached.  IT->stop_charpos is the next
@@ -6485,6 +6689,69 @@ next_element_from_buffer (it)
  
    xassert (IT_CHARPOS (*it) >= BEGV);
  
+  /* With bidi reordering, the character to display might not be the
+     character at IT_CHARPOS.  BIDI_IT.FIRST_ELT non-zero means that
+     we were reseat()ed to a new buffer position, which is potentially
+     a different paragraph.  */
+  if (it->bidi_p && it->bidi_it.first_elt)
+    {
+      it->bidi_it.charpos = IT_CHARPOS (*it);
+      it->bidi_it.bytepos = IT_BYTEPOS (*it);
+      /* If we are at the beginning of a line, we can produce the next
+        element right away.  */
+      if (it->bidi_it.bytepos == BEGV_BYTE
+         /* FIXME: Should support all Unicode line separators.  */
+         || FETCH_CHAR (it->bidi_it.bytepos - 1) == '\n'
+         || FETCH_CHAR (it->bidi_it.bytepos) == '\n')
+       {
+         bidi_paragraph_init (it->paragraph_embedding, &it->bidi_it);
+         /* If the paragraph base direction is R2L, its glyphs should
+            be reversed.  */
+         if (it->glyph_row)
+           {
+             if (it->bidi_it.paragraph_dir == R2L)
+               it->glyph_row->reversed_p = 1;
+             else
+               it->glyph_row->reversed_p = 0;
+           }
+         bidi_get_next_char_visually (&it->bidi_it);
+       }
+      else
+       {
+         int orig_bytepos = IT_BYTEPOS (*it);
+
+         /* We need to prime the bidi iterator starting at the line's
+            beginning, before we will be able to produce the next
+            element.  */
+         IT_CHARPOS (*it) = find_next_newline_no_quit (IT_CHARPOS (*it), -1);
+         IT_BYTEPOS (*it) = CHAR_TO_BYTE (IT_CHARPOS (*it));
+         it->bidi_it.charpos = IT_CHARPOS (*it);
+         it->bidi_it.bytepos = IT_BYTEPOS (*it);
+         bidi_paragraph_init (it->paragraph_embedding, &it->bidi_it);
+         if (it->glyph_row)
+           {
+             if (it->bidi_it.paragraph_dir == R2L)
+               it->glyph_row->reversed_p = 1;
+             else
+               it->glyph_row->reversed_p = 0;
+           }
+         do
+           {
+             /* Now return to buffer position where we were asked to
+                get the next display element, and produce that.  */
+             bidi_get_next_char_visually (&it->bidi_it);
+           }
+         while (it->bidi_it.bytepos != orig_bytepos
+                && it->bidi_it.bytepos < ZV_BYTE);
+       }
+
+      it->bidi_it.first_elt = 0; /* paranoia: bidi.c does this */
+      /*  Adjust IT's position information to where we ended up.  */
+      IT_CHARPOS (*it) = it->bidi_it.charpos;
+      IT_BYTEPOS (*it) = it->bidi_it.bytepos;
+      SET_TEXT_POS (it->position, IT_CHARPOS (*it), IT_BYTEPOS (*it));
+    }
+
    if (IT_CHARPOS (*it) >= it->stop_charpos)
      {
        if (IT_CHARPOS (*it) >= it->end_charpos)
@@ -6510,12 +6777,51 @@ next_element_from_buffer (it)
               success_p = 0;
             }
         }
+      else if (!(!it->bidi_p
+                || BIDI_AT_BASE_LEVEL (it->bidi_it)
+                || IT_CHARPOS (*it) == it->stop_charpos))
+       {
+         /* With bidi non-linear iteration, we could find ourselves
+            far beyond the last computed stop_charpos, with several
+            other stop positions in between that we missed.  Scan
+            them all now, in buffer's logical order, until we find
+            and handle the last stop_charpos that precedes our
+            current position.  */
+         handle_stop_backwards (it, it->stop_charpos);
+         return GET_NEXT_DISPLAY_ELEMENT (it);
+       }
        else
         {
+         if (it->bidi_p)
+           {
+             /* Take note of the stop position we just moved across,
+                for when we will move back across it.  */
+             it->prev_stop = it->stop_charpos;
+             /* If we are at base paragraph embedding level, take
+                note of the last stop position seen at this
+                level.  */
+             if (BIDI_AT_BASE_LEVEL (it->bidi_it))
+               it->base_level_stop = it->stop_charpos;
+           }
           handle_stop (it);
           return GET_NEXT_DISPLAY_ELEMENT (it);
         }
      }
+  else if (it->bidi_p
+          /* We can sometimes back up for reasons that have nothing
+             to do with bidi reordering.  E.g., compositions.  The
+             code below is only needed when we are above the base
+             embedding level, so test for that explicitly.  */
+          && !BIDI_AT_BASE_LEVEL (it->bidi_it)
+          && IT_CHARPOS (*it) < it->prev_stop)
+    {
+      if (it->base_level_stop <= 0)
+       it->base_level_stop = BEGV;
+      if (IT_CHARPOS (*it) < it->base_level_stop)
+       abort ();
+      handle_stop_backwards (it, it->base_level_stop);
+      return GET_NEXT_DISPLAY_ELEMENT (it);
+    }
    else
      {
        /* No face changes, overlays etc. in sight, so just return a
@@ -6670,9 +6976,9 @@ next_element_from_composition (it)
     line on the display without producing glyphs.
  
     OP should be a bit mask including some or all of these bits:
-    MOVE_TO_X: Stop on reaching x-position TO_X.
-    MOVE_TO_POS: Stop on reaching buffer or string position TO_CHARPOS.
-   Regardless of OP's value, stop in reaching the end of the display line.
+    MOVE_TO_X: Stop upon reaching x-position TO_X.
+    MOVE_TO_POS: Stop upon reaching buffer or string position TO_CHARPOS.
+   Regardless of OP's value, stop upon reaching the end of the display line.
  
     TO_X is normally a value 0 <= TO_X <= IT->last_visible_x.
     This means, in particular, that TO_X includes window's horizontal
@@ -6708,6 +7014,8 @@ move_it_in_display_line_to (struct it *it,
    struct glyph_row *saved_glyph_row;
    struct it wrap_it, atpos_it, atx_it;
    int may_wrap = 0;
+  enum it_method prev_method = it->method;
+  EMACS_INT prev_pos = IT_CHARPOS (*it);
  
    /* Don't produce glyphs in produce_glyphs.  */
    saved_glyph_row = it->glyph_row;
@@ -6725,7 +7033,7 @@ move_it_in_display_line_to (struct it *it,
  #define BUFFER_POS_REACHED_P()                                 \
    ((op & MOVE_TO_POS) != 0                                     \
     && BUFFERP (it->object)                                     \
-   && IT_CHARPOS (*it) >= to_charpos                           \
+   && IT_CHARPOS (*it) == to_charpos                           \
     && (it->method == GET_FROM_BUFFER                           \
         || (it->method == GET_FROM_DISPLAY_VECTOR               \
            && it->dpvec + it->current.dpvec_index + 1 >= it->dpend)))
@@ -6749,7 +7057,14 @@ move_it_in_display_line_to (struct it *it,
        if ((op & MOVE_TO_POS) != 0
           && BUFFERP (it->object)
           && it->method == GET_FROM_BUFFER
-         && IT_CHARPOS (*it) > to_charpos)
+         && (prev_method == GET_FROM_IMAGE
+             || prev_method == GET_FROM_STRETCH)
+         /* Passed TO_CHARPOS from left to right.  */
+         && ((prev_pos < to_charpos
+              && IT_CHARPOS (*it) > to_charpos)
+             /* Passed TO_CHARPOS from right to left.  */
+             || (prev_pos > to_charpos)
+                 && IT_CHARPOS (*it) < to_charpos))
         {
           if (it->line_wrap != WORD_WRAP || wrap_it.sp < 0)
             {
@@ -6763,6 +7078,9 @@ move_it_in_display_line_to (struct it *it,
             atpos_it = *it;
         }
  
+      prev_method = it->method;
+      if (it->method == GET_FROM_BUFFER)
+       prev_pos = IT_CHARPOS (*it);
        /* Stop when ZV reached.
           We used to stop here when TO_CHARPOS reached as well, but that is
           too soon if this glyph does not fit on this line.  So we handle it
@@ -7028,6 +7346,8 @@ move_it_in_display_line_to (struct it *it,
           break;
         }
  
+      if (it->method == GET_FROM_BUFFER)
+       prev_pos = IT_CHARPOS (*it);
        /* The current display element has been consumed.  Advance
          to the next.  */
        set_iterator_to_next (it, 1);
@@ -11033,6 +11353,17 @@ text_outside_line_unchanged_p (w, start, end)
               && overlay_touches_p (Z - end))
             unchanged_p = 0;
         }
+
+      /* Under bidi reordering, adding or deleting a character in the
+        beginning of a paragraph, before the first strong directional
+        character, can change the base direction of the paragraph (unless
+        the buffer specifies a fixed paragraph direction), which will
+        require to redisplay the whole paragraph.  It might be worthwhile
+        to find the paragraph limits and widen the range of redisplayed
+        lines to that, but for now just give up this optimization.  */
+      if (!NILP (XBUFFER (w->buffer)->bidi_display_reordering)
+         && NILP (XBUFFER (w->buffer)->bidi_paragraph_direction))
+       unchanged_p = 0;
      }
  
    return unchanged_p;
@@ -12323,162 +12654,397 @@ set_cursor_from_row (w, row, matrix, delta, delta_bytes, dy, dvpos)
    struct glyph *glyph = row->glyphs[TEXT_AREA];
    struct glyph *end = glyph + row->used[TEXT_AREA];
    struct glyph *cursor = NULL;
-  /* The first glyph that starts a sequence of glyphs from a string
-     that is a value of a display property.  */
-  struct glyph *string_start;
-  /* The X coordinate of string_start.  */
-  int string_start_x;
    /* The last known character position in row.  */
    int last_pos = MATRIX_ROW_START_CHARPOS (row) + delta;
-  /* The last known character position before string_start.  */
-  int string_before_pos;
    int x = row->x;
    int cursor_x = x;
-  /* Last buffer position covered by an overlay.  */
-  int cursor_from_overlay_pos = 0;
-  int pt_old = PT - delta;
-
-  /* Skip over glyphs not having an object at the start of the row.
-     These are special glyphs like truncation marks on terminal
-     frames.  */
+  EMACS_INT pt_old = PT - delta;
+  EMACS_INT pos_before = MATRIX_ROW_START_CHARPOS (row) + delta;
+  EMACS_INT pos_after = MATRIX_ROW_END_CHARPOS (row) + delta;
+  struct glyph *glyph_before = glyph - 1, *glyph_after = end;
+  /* Non-zero means we've found a match for cursor position, but that
+     glyph has the avoid_cursor_p flag set.  */
+  int match_with_avoid_cursor = 0;
+  /* Non-zero means we've seen at least one glyph that came from a
+     display string.  */
+  int string_seen = 0;
+  /* Largest buffer position seen so far during scan of glyph row.  */
+  EMACS_INT bpos_max = last_pos;
+  /* Last buffer position covered by an overlay string with an integer
+     `cursor' property.  */
+  EMACS_INT bpos_covered = 0;
+
+  /* Skip over glyphs not having an object at the start and the end of
+     the row.  These are special glyphs like truncation marks on
+     terminal frames.  */
    if (row->displays_text_p)
-    while (glyph < end
-          && INTEGERP (glyph->object)
-          && glyph->charpos < 0)
+    {
+      if (!row->reversed_p)
+       {
+         while (glyph < end
+                && INTEGERP (glyph->object)
+                && glyph->charpos < 0)
+           {
+             x += glyph->pixel_width;
+             ++glyph;
+           }
+         while (end > glyph
+                && INTEGERP ((end - 1)->object)
+                /* CHARPOS is zero for blanks inserted by
+                   extend_face_to_end_of_line.  */
+                && (end - 1)->charpos <= 0)
+           --end;
+         glyph_before = glyph - 1;
+         glyph_after = end;
+       }
+      else
+       {
+         struct glyph *g;
+
+         /* If the glyph row is reversed, we need to process it from back
+            to front, so swap the edge pointers.  */
+         end = glyph - 1;
+         glyph += row->used[TEXT_AREA] - 1;
+         /* Reverse the known positions in the row.  */
+         last_pos = pos_after = MATRIX_ROW_START_CHARPOS (row) + delta;
+         pos_before = MATRIX_ROW_END_CHARPOS (row) + delta;
+
+         while (glyph > end + 1
+                && INTEGERP (glyph->object)
+                && glyph->charpos < 0)
+           {
+             --glyph;
+             x -= glyph->pixel_width;
+           }
+         if (INTEGERP (glyph->object) && glyph->charpos < 0)
+           --glyph;
+         /* By default, put the cursor on the rightmost glyph.  */
+         for (g = end + 1; g < glyph; g++)
+           x += g->pixel_width;
+         cursor_x = x;
+         while (end < glyph
+                && INTEGERP ((end + 1)->object)
+                && (end + 1)->charpos <= 0)
+           ++end;
+         glyph_before = glyph + 1;
+         glyph_after = end;
+       }
+    }
+  else if (row->reversed_p)
+    {
+      /* In R2L rows that don't display text, put the cursor on the
+        rightmost glyph.  Case in point: an empty last line that is
+        part of an R2L paragraph.  */
+      cursor = end - 1;
+      x = -1;  /* will be computed below, at lable compute_x */
+    }
+
+  /* Step 1: Try to find the glyph whose character position
+     corresponds to point.  If that's not possible, find 2 glyphs
+     whose character positions are the closest to point, one before
+     point, the other after it.  */
+  if (!row->reversed_p)
+    while (/* not marched to end of glyph row */
+          glyph < end
+          /* glyph was not inserted by redisplay for internal purposes */
+          && !INTEGERP (glyph->object))
        {
+       if (BUFFERP (glyph->object))
+         {
+           EMACS_INT dpos = glyph->charpos - pt_old;
+
+           if (glyph->charpos > bpos_max)
+             bpos_max = glyph->charpos;
+           if (!glyph->avoid_cursor_p)
+             {
+               /* If we hit point, we've found the glyph on which to
+                  display the cursor.  */
+               if (dpos == 0)
+                 {
+                   match_with_avoid_cursor = 0;
+                   break;
+                 }
+               /* See if we've found a better approximation to
+                  POS_BEFORE or to POS_AFTER.  Note that we want the
+                  first (leftmost) glyph of all those that are the
+                  closest from below, and the last (rightmost) of all
+                  those from above.  */
+               if (0 > dpos && dpos > pos_before - pt_old)
+                 {
+                   pos_before = glyph->charpos;
+                   glyph_before = glyph;
+                 }
+               else if (0 < dpos && dpos <= pos_after - pt_old)
+                 {
+                   pos_after = glyph->charpos;
+                   glyph_after = glyph;
+                 }
+             }
+           else if (dpos == 0)
+             match_with_avoid_cursor = 1;
+         }
+       else if (STRINGP (glyph->object))
+         {
+           Lisp_Object chprop;
+           int glyph_pos = glyph->charpos;
+
+           chprop = Fget_char_property (make_number (glyph_pos), Qcursor,
+                                        glyph->object);
+           if (INTEGERP (chprop))
+             {
+               bpos_covered = bpos_max + XINT (chprop);
+               /* If the `cursor' property covers buffer positions up
+                  to and including point, we should display cursor on
+                  this glyph.  */
+               /* Implementation note: bpos_max == pt_old when, e.g.,
+                  we are in an empty line, where bpos_max is set to
+                  MATRIX_ROW_START_CHARPOS, see above.  */
+               if (bpos_max <= pt_old && bpos_covered >= pt_old)
+                 {
+                   cursor = glyph;
+                   break;
+                 }
+             }
+
+           string_seen = 1;
+         }
         x += glyph->pixel_width;
         ++glyph;
        }
+  else if (glyph > end)        /* row is reversed */
+    while (!INTEGERP (glyph->object))
+      {
+       if (BUFFERP (glyph->object))
+         {
+           EMACS_INT dpos = glyph->charpos - pt_old;
  
-  string_start = NULL;
-  while (glyph < end
-        && !INTEGERP (glyph->object)
-        && (!BUFFERP (glyph->object)
-            || (last_pos = glyph->charpos) < pt_old
-            || glyph->avoid_cursor_p))
+           if (glyph->charpos > bpos_max)
+             bpos_max = glyph->charpos;
+           if (!glyph->avoid_cursor_p)
+             {
+               if (dpos == 0)
+                 {
+                   match_with_avoid_cursor = 0;
+                   break;
+                 }
+               if (0 > dpos && dpos > pos_before - pt_old)
+                 {
+                   pos_before = glyph->charpos;
+                   glyph_before = glyph;
+                 }
+               else if (0 < dpos && dpos <= pos_after - pt_old)
+                 {
+                   pos_after = glyph->charpos;
+                   glyph_after = glyph;
+                 }
+             }
+           else if (dpos == 0)
+             match_with_avoid_cursor = 1;
+         }
+       else if (STRINGP (glyph->object))
+         {
+           Lisp_Object chprop;
+           int glyph_pos = glyph->charpos;
+
+           chprop = Fget_char_property (make_number (glyph_pos), Qcursor,
+                                        glyph->object);
+           if (INTEGERP (chprop))
+             {
+               bpos_covered = bpos_max + XINT (chprop);
+               /* If the `cursor' property covers buffer positions up
+                  to and including point, we should display cursor on
+                  this glyph.  */
+               if (bpos_max <= pt_old && bpos_covered >= pt_old)
+                 {
+                   cursor = glyph;
+                   break;
+                 }
+             }
+           string_seen = 1;
+         }
+       --glyph;
+       if (glyph == end)
+         break;
+       x -= glyph->pixel_width;
+    }
+
+  /* Step 2: If we didn't find an exact match for point, we need to
+     look for a proper place to put the cursor among glyphs between
+     GLYPH_BEFORE and GLYPH_AFTER.  */
+  if (!(BUFFERP (glyph->object) && glyph->charpos == pt_old)
+      && bpos_covered < pt_old)
      {
-      if (! STRINGP (glyph->object))
+      if (row->ends_in_ellipsis_p && pos_after == last_pos)
         {
-         string_start = NULL;
-         x += glyph->pixel_width;
-         ++glyph;
-         /* If we are beyond the cursor position computed from the
-            last overlay seen, that overlay is not in effect for
-            current cursor position.  Reset the cursor information
-            computed from that overlay.  */
-         if (cursor_from_overlay_pos
-             && last_pos >= cursor_from_overlay_pos)
+         EMACS_INT ellipsis_pos;
+
+         /* Scan back over the ellipsis glyphs.  */
+         if (!row->reversed_p)
             {
-             cursor_from_overlay_pos = 0;
-             cursor = NULL;
+             ellipsis_pos = (glyph - 1)->charpos;
+             while (glyph > row->glyphs[TEXT_AREA]
+                    && (glyph - 1)->charpos == ellipsis_pos)
+               glyph--, x -= glyph->pixel_width;
+             /* That loop always goes one position too far, including
+                the glyph before the ellipsis.  So scan forward over
+                that one.  */
+             x += glyph->pixel_width;
+             glyph++;
             }
-       }
-      else
-       {
-         if (string_start == NULL)
+         else  /* row is reversed */
             {
-             string_before_pos = last_pos;
-             string_start = glyph;
-             string_start_x = x;
+             ellipsis_pos = (glyph + 1)->charpos;
+             while (glyph < row->glyphs[TEXT_AREA] + row->used[TEXT_AREA] - 1
+                    && (glyph + 1)->charpos == ellipsis_pos)
+               glyph++, x += glyph->pixel_width;
+             x -= glyph->pixel_width;
+             glyph--;
             }
-         /* Skip all glyphs from a string.  */
-         do
+       }
+      else if (match_with_avoid_cursor
+              /* zero-width characters produce no glyphs */
+              || eabs (glyph_after - glyph_before) == 1)
+       {
+         cursor = glyph_after;
+         x = -1;
+       }
+      else if (string_seen)
+       {
+         int incr = row->reversed_p ? -1 : +1;
+
+         /* Need to find the glyph that came out of a string which is
+            present at point.  That glyph is somewhere between
+            GLYPH_BEFORE and GLYPH_AFTER, and it came from a string
+            positioned between POS_BEFORE and POS_AFTER in the
+            buffer.  */
+         struct glyph *stop = glyph_after;
+         EMACS_INT pos = pos_before;
+
+         x = -1;
+         for (glyph = glyph_before + incr;
+              row->reversed_p ? glyph > stop : glyph < stop; )
             {
-             Lisp_Object cprop;
-             int pos;
-             if ((cursor == NULL || glyph > cursor)
-                 && (cprop = Fget_char_property (make_number ((glyph)->charpos),
-                                                 Qcursor, (glyph)->object),
-                     !NILP (cprop))
-                 && (pos = string_buffer_position (w, glyph->object,
-                                                   string_before_pos),
-                     (pos == 0   /* from overlay */
-                      || pos == pt_old)))
+
+             /* Any glyphs that come from the buffer are here because
+                of bidi reordering.  Skip them, and only pay
+                attention to glyphs that came from some string.  */
+             if (STRINGP (glyph->object))
                 {
-                 /* Compute the first buffer position after the overlay.
-                    If the `cursor' property tells us how  many positions
-                    are associated with the overlay, use that.  Otherwise,
-                    estimate from the buffer positions of the glyphs
-                    before and after the overlay.  */
-                 cursor_from_overlay_pos = (pos ? 0 : last_pos
-                                            + (INTEGERP (cprop) ? XINT (cprop) : 0));
-                 cursor = glyph;
-                 cursor_x = x;
+                 Lisp_Object str;
+                 EMACS_INT tem;
+
+                 str = glyph->object;
+                 tem = string_buffer_position_lim (w, str, pos, pos_after, 0);
+                 if (pos <= tem)
+                   {
+                     /* If the string from which this glyph came is
+                        found in the buffer at point, then we've
+                        found the glyph we've been looking for.  */
+                     if (tem == pt_old)
+                       {
+                         /* The glyphs from this string could have
+                            been reordered.  Find the one with the
+                            smallest string position.  Or there could
+                            be a character in the string with the
+                            `cursor' property, which means display
+                            cursor on that character's glyph.  */
+                         int strpos = glyph->charpos;
+
+                         cursor = glyph;
+                         for (glyph += incr;
+                              EQ (glyph->object, str);
+                              glyph += incr)
+                           {
+                             Lisp_Object cprop;
+                             int gpos = glyph->charpos;
+
+                             cprop = Fget_char_property (make_number (gpos),
+                                                         Qcursor,
+                                                         glyph->object);
+                             if (!NILP (cprop))
+                               {
+                                 cursor = glyph;
+                                 break;
+                               }
+                             if (glyph->charpos < strpos)
+                               {
+                                 strpos = glyph->charpos;
+                                 cursor = glyph;
+                               }
+                           }
+
+                         goto compute_x;
+                       }
+                     pos = tem + 1; /* don't find previous instances */
+                   }
+                 /* This string is not what we want; skip all of the
+                    glyphs that came from it.  */
+                 do
+                   glyph += incr;
+                 while ((row->reversed_p ? glyph > stop : glyph < stop)
+                        && EQ (glyph->object, str));
                 }
-             x += glyph->pixel_width;
-             ++glyph;
+             else
+               glyph += incr;
             }
-         while (glyph < end && EQ (glyph->object, string_start->object));
+
+         /* If we reached the end of the line, and END was from a string,
+            the cursor is not on this line.  */
+         if (glyph == end
+             && STRINGP ((glyph - incr)->object)
+             && row->continued_p)
+           return 0;
         }
      }
  
+ compute_x:
    if (cursor != NULL)
+    glyph = cursor;
+  if (x < 0)
      {
-      glyph = cursor;
-      x = cursor_x;
-    }
-  else if (row->ends_in_ellipsis_p && glyph == end)
-    {
-      /* Scan back over the ellipsis glyphs, decrementing positions.  */
-      while (glyph > row->glyphs[TEXT_AREA]
-            && (glyph - 1)->charpos == last_pos)
-       glyph--, x -= glyph->pixel_width;
-      /* That loop always goes one position too far, including the
-        glyph before the ellipsis.  So scan forward over that one.  */
-      x += glyph->pixel_width;
-      glyph++;
-    }
-  else if (string_start
-          && (glyph == end || !BUFFERP (glyph->object) || last_pos > pt_old))
-    {
-      /* We may have skipped over point because the previous glyphs
-        are from string.  As there's no easy way to know the
-        character position of the current glyph, find the correct
-        glyph on point by scanning from string_start again.  */
-      Lisp_Object limit;
-      Lisp_Object string;
-      struct glyph *stop = glyph;
-      int pos;
+      struct glyph *g;
  
-      limit = make_number (pt_old + 1);
-      glyph = string_start;
-      x = string_start_x;
-      string = glyph->object;
-      pos = string_buffer_position (w, string, string_before_pos);
-      /* If POS == 0, STRING is from overlay.  We skip such glyphs
-        because we always put the cursor after overlay strings.  */
-      while (pos == 0 && glyph < stop)
+      /* Need to compute x that corresponds to GLYPH.  */
+      for (g = row->glyphs[TEXT_AREA], x = row->x; g < glyph; g++)
         {
-         string = glyph->object;
-         SKIP_GLYPHS (glyph, stop, x, EQ (glyph->object, string));
-         if (glyph < stop)
-           pos = string_buffer_position (w, glyph->object, string_before_pos);
-       }
-
-      while (glyph < stop)
-       {
-         pos = XINT (Fnext_single_char_property_change
-                     (make_number (pos), Qdisplay, Qnil, limit));
-         if (pos > pt_old)
-           break;
-         /* Skip glyphs from the same string.  */
-         string = glyph->object;
-         SKIP_GLYPHS (glyph, stop, x, EQ (glyph->object, string));
-         /* Skip glyphs from an overlay.  */
-         while (glyph < stop
-                && ! string_buffer_position (w, glyph->object, pos))
-           {
-             string = glyph->object;
-             SKIP_GLYPHS (glyph, stop, x, EQ (glyph->object, string));
-           }
-       }
-
-      /* If we reached the end of the line, and END was from a string,
-        the cursor is not on this line.  */
-      if (glyph == end && row->continued_p)
+         if (g >= row->glyphs[TEXT_AREA] + row->used[TEXT_AREA])
+           abort ();
+         x += g->pixel_width;
+       }
+    }
+
+  /* ROW could be part of a continued line, which might have other
+     rows whose start and end charpos occlude point.  Only set
+     w->cursor if we found a better approximation to the cursor
+     position than we have from previously examined rows.  */
+  if (w->cursor.vpos >= 0
+      /* Make sure cursor.vpos specifies a row whose start and end
+        charpos occlude point.  This is because some callers of this
+        function leave cursor.vpos at the row where the cursor was
+        displayed during the last redisplay cycle.  */
+      && MATRIX_ROW_START_CHARPOS (MATRIX_ROW (matrix, w->cursor.vpos)) <= pt_old
+      && pt_old < MATRIX_ROW_END_CHARPOS (MATRIX_ROW (matrix, w->cursor.vpos)))
+    {
+      struct glyph *g1 =
+       MATRIX_ROW_GLYPH_START (matrix, w->cursor.vpos) + w->cursor.hpos;
+
+      /* Keep the candidate whose buffer position is the closest to
+        point.  */
+      if (BUFFERP (g1->object)
+         && (g1->charpos == pt_old /* an exact match always wins */
+             || (BUFFERP (glyph->object)
+                 && eabs (g1->charpos - pt_old)
+                  < eabs (glyph->charpos - pt_old))))
+       return 0;
+      /* Keep the candidate that comes from a row spanning less buffer
+        positions.  This may win when one or both candidate positions
+        are on glyphs that came from display strings, for which we
+        cannot compare buffer positions.  */
+      if (MATRIX_ROW_END_CHARPOS (MATRIX_ROW (matrix, w->cursor.vpos))
+         - MATRIX_ROW_START_CHARPOS (MATRIX_ROW (matrix, w->cursor.vpos))
+         < MATRIX_ROW_END_CHARPOS (row) - MATRIX_ROW_START_CHARPOS (row))
         return 0;
      }
-
    w->cursor.hpos = glyph - row->glyphs[TEXT_AREA];
    w->cursor.x = x;
    w->cursor.vpos = MATRIX_ROW_VPOS (row, matrix) + dvpos;
@@ -14474,15 +15040,39 @@ try_window_reusing_current_matrix (w)
             {
               struct glyph *glyph = row->glyphs[TEXT_AREA] + w->cursor.hpos;
               struct glyph *end = glyph + row->used[TEXT_AREA];
+             struct glyph *orig_glyph = glyph;
+             struct cursor_pos orig_cursor = w->cursor;
  
               for (; glyph < end
                      && (!BUFFERP (glyph->object)
-                        || glyph->charpos < PT);
+                        || glyph->charpos != PT);
                    glyph++)
                 {
                   w->cursor.hpos++;
                   w->cursor.x += glyph->pixel_width;
                 }
+             /* With bidi reordering, charpos changes non-linearly
+                with hpos, so the right glyph could be to the
+                left.  */
+             if (!NILP (XBUFFER (w->buffer)->bidi_display_reordering)
+                 && (!BUFFERP (glyph->object) || glyph->charpos != PT))
+               {
+                 struct glyph *start_glyph = row->glyphs[TEXT_AREA];
+
+                 glyph = orig_glyph - 1;
+                 orig_cursor.hpos--;
+                 orig_cursor.x -= glyph->pixel_width;
+                 for (; glyph >= start_glyph
+                        && (!BUFFERP (glyph->object)
+                            || glyph->charpos != PT);
+                      glyph--)
+                   {
+                     w->cursor.hpos--;
+                     w->cursor.x -= glyph->pixel_width;
+                   }
+                 if (BUFFERP (glyph->object) && glyph->charpos == PT)
+                   w->cursor = orig_cursor;
+               }
             }
         }
  
@@ -14926,6 +15516,18 @@ try_window_id (w)
    if (!NILP (XBUFFER (w->buffer)->word_wrap))
      GIVE_UP (21);
  
+  /* Under bidi reordering, adding or deleting a character in the
+     beginning of a paragraph, before the first strong directional
+     character, can change the base direction of the paragraph (unless
+     the buffer specifies a fixed paragraph direction), which will
+     require to redisplay the whole paragraph.  It might be worthwhile
+     to find the paragraph limits and widen the range of redisplayed
+     lines to that, but for now just give up this optimization and
+     redisplay from scratch.  */
+  if (!NILP (XBUFFER (w->buffer)->bidi_display_reordering)
+      && NILP (XBUFFER (w->buffer)->bidi_paragraph_direction))
+    GIVE_UP (22);
+
    /* Make sure beg_unchanged and end_unchanged are up to date.  Do it
       only if buffer has really changed.  The reason is that the gap is
       initially at Z for freshly visited files.  The code below would
@@ -16501,6 +17103,7 @@ display_line (it)
    int wrap_row_used = -1, wrap_row_ascent, wrap_row_height;
    int wrap_row_phys_ascent, wrap_row_phys_height;
    int wrap_row_extra_line_spacing;
+  struct display_pos row_end;
  
    /* We always start displaying at hpos zero even if hscrolled.  */
    xassert (it->hpos == 0 && it->current_x == 0);
@@ -16589,6 +17192,11 @@ display_line (it)
  
           it->continuation_lines_width = 0;
           row->ends_at_zv_p = 1;
+         /* A row that displays right-to-left text must always have
+            its last face extended all the way to the end of line,
+            even if this row ends in ZV.  */
+         if (row->reversed_p)
+           extend_face_to_end_of_line (it);
           break;
         }
  
@@ -16996,7 +17604,60 @@ display_line (it)
    compute_line_metrics (it);
  
    /* Remember the position at which this line ends.  */
-  row->end = it->current;
+  if (!it->bidi_p)
+    row->end = row_end = it->current;
+  else
+    {
+      EMACS_INT min_pos = row->start.pos.charpos, max_pos = 0;
+      struct glyph *g;
+      struct it save_it;
+      struct text_pos tpos;
+
+      /* ROW->start and ROW->end must be the smallest and largest
+        buffer positions in ROW.  But if ROW was bidi-reordered,
+        these two positions can be anywhere in the row, so we must
+        rescan all of the ROW's glyphs to find them.  */
+      for (g = row->glyphs[TEXT_AREA];
+          g < row->glyphs[TEXT_AREA] + row->used[TEXT_AREA];
+          g++)
+       {
+         if (BUFFERP (g->object))
+           {
+             if (g->charpos && g->charpos < min_pos)
+               min_pos = g->charpos;
+             if (g->charpos > max_pos)
+               max_pos = g->charpos;
+           }
+       }
+      if (min_pos < row->start.pos.charpos)
+       {
+         row->start.pos.charpos = min_pos;
+         row->start.pos.bytepos = CHAR_TO_BYTE (min_pos);
+       }
+      if (max_pos == 0)
+       max_pos = min_pos;
+      /* For ROW->end, we need the position that is _after_ max_pos,
+        in the logical order.  */
+      SET_TEXT_POS (tpos, max_pos + 1, CHAR_TO_BYTE (max_pos + 1));
+      /* If the character at max_pos+1 is a newline, skip that as
+        well.  Note that this may skip some invisible text.  */
+      if (FETCH_CHAR (tpos.bytepos) == '\n'
+         || (FETCH_CHAR (tpos.bytepos) == '\r' && it->selective))
+       {
+         save_it = *it;
+         it->bidi_p = 0;
+         reseat_1 (it, tpos, 0);
+         set_iterator_to_next (it, 1);
+         row_end = it->current;
+         *it = save_it;
+       }
+      else
+       {
+         row_end = it->current;
+         row_end.pos = tpos;
+       }
+      row->end = row_end;
+    }
  
    /* Record whether this row ends inside an ellipsis.  */
    row->ends_in_ellipsis_p
@@ -17015,7 +17676,12 @@ display_line (it)
    it->right_user_fringe_face_id = 0;
  
    /* Maybe set the cursor.  */
-  if (it->w->cursor.vpos < 0
+  if ((it->w->cursor.vpos < 0
+       /* In bidi-reordered rows, keep checking for proper cursor
+         position even if one has been found already, because buffer
+         positions in such rows change non-linearly with ROW->VPOS,
+         when a line is continued.  */
+       || it->bidi_p)
        && PT >= MATRIX_ROW_START_CHARPOS (row)
        && PT <= MATRIX_ROW_END_CHARPOS (row)
        && cursor_row_p (it->w, row))
@@ -17033,7 +17699,11 @@ display_line (it)
    it->current_y += row->height;
    ++it->vpos;
    ++it->glyph_row;
-  it->start = it->current;
+  /* The next row should use same value of the reversed_p flag as this
+     one.  set_iterator_to_next decides when it's a new paragraph and
+     recomputes the value of the flag accordingly.  */
+  it->glyph_row->reversed_p = row->reversed_p;
+  it->start = row_end;
    return row->displays_text_p;
  }
  
@@ -20591,6 +21261,13 @@ append_glyph (it)
        glyph->u.ch = it->char_to_display;
        glyph->slice = null_glyph_slice;
        glyph->font_type = FONT_TYPE_UNKNOWN;
+      if (it->bidi_p)
+       {
+         glyph->resolved_level = it->bidi_it.resolved_level;
+         if ((it->bidi_it.type & 7) != it->bidi_it.type)
+           abort ();
+         glyph->bidi_type = it->bidi_it.type;
+       }
        ++it->glyph_row->used[area];
      }
    else
@@ -20643,6 +21320,13 @@ append_composite_glyph (it)
        glyph->face_id = it->face_id;
        glyph->slice = null_glyph_slice;
        glyph->font_type = FONT_TYPE_UNKNOWN;
+      if (it->bidi_p)
+       {
+         glyph->resolved_level = it->bidi_it.resolved_level;
+         if ((it->bidi_it.type & 7) != it->bidi_it.type)
+           abort ();
+         glyph->bidi_type = it->bidi_it.type;
+       }
        ++it->glyph_row->used[area];
      }
    else
@@ -20817,6 +21501,13 @@ produce_image_glyph (it)
           glyph->u.img_id = img->id;
           glyph->slice = slice;
           glyph->font_type = FONT_TYPE_UNKNOWN;
+         if (it->bidi_p)
+           {
+             glyph->resolved_level = it->bidi_it.resolved_level;
+             if ((it->bidi_it.type & 7) != it->bidi_it.type)
+               abort ();
+             glyph->bidi_type = it->bidi_it.type;
+           }
           ++it->glyph_row->used[area];
         }
        else
@@ -20863,6 +21554,13 @@ append_stretch_glyph (it, object, width, height, ascent)
        glyph->u.stretch.height = height;
        glyph->slice = null_glyph_slice;
        glyph->font_type = FONT_TYPE_UNKNOWN;
+      if (it->bidi_p)
+       {
+         glyph->resolved_level = it->bidi_it.resolved_level;
+         if ((it->bidi_it.type & 7) != it->bidi_it.type)
+           abort ();
+         glyph->bidi_type = it->bidi_it.type;
+       }
        ++it->glyph_row->used[area];
      }
    else
@@ -23040,7 +23738,7 @@ mouse_face_from_buffer_pos (Lisp_Object window,
          associated with the end position, which must not be
          highlighted.  */
        Lisp_Object prev_object;
-      int pos;
+      EMACS_INT pos;
  
        while (glyph > row->glyphs[TEXT_AREA])
         {
@@ -23672,7 +24370,8 @@ note_mouse_highlight (f, x, y)
        && XFASTINT (w->last_modified) == BUF_MODIFF (b)
        && XFASTINT (w->last_overlay_modified) == BUF_OVERLAY_MODIFF (b))
      {
-      int hpos, vpos, pos, i, dx, dy, area;
+      int hpos, vpos, i, dx, dy, area;
+      EMACS_INT pos;
        struct glyph *glyph;
        Lisp_Object object;
        Lisp_Object mouse_face = Qnil, overlay = Qnil, position;
@@ -23960,7 +24659,7 @@ note_mouse_highlight (f, x, y)
                     struct glyph_row *r
                       = MATRIX_ROW (w->current_matrix, vpos);
                     int start = MATRIX_ROW_START_CHARPOS (r);
-                   int pos = string_buffer_position (w, object, start);
+                   EMACS_INT pos = string_buffer_position (w, object, start);
                     if (pos > 0)
                       {
                         help = Fget_char_property (make_number (pos),
@@ -24015,7 +24714,8 @@ note_mouse_highlight (f, x, y)
                       struct glyph_row *r
                         = MATRIX_ROW (w->current_matrix, vpos);
                       int start = MATRIX_ROW_START_CHARPOS (r);
-                     int pos = string_buffer_position (w, object, start);
+                     EMACS_INT pos = string_buffer_position (w, object,
+                                                             start);
                       if (pos > 0)
                         pointer = Fget_char_property (make_number (pos),
                                                       Qpointer, w->buffer);
@@ -24824,6 +25524,11 @@ syms_of_xdisp ()
    staticpro (&previous_help_echo_string);
    help_echo_pos = -1;
  
+  Qright_to_left = intern ("right-to-left");
+  staticpro (&Qright_to_left);
+  Qleft_to_right = intern ("left-to-right");
+  staticpro (&Qleft_to_right);
+
  #ifdef HAVE_WINDOW_SYSTEM
    DEFVAR_BOOL ("x-stretch-cursor", &x_stretch_cursor_p,
      doc: /* *Non-nil means draw block cursor as wide as the glyph under it.
author	Eli Zaretskii <eliz@gnu.org>
	Sat, 6 Mar 2010 10:16:27 +0000 (05:16 -0500)
committer	Eli Zaretskii <eliz@gnu.org>
	Sat, 6 Mar 2010 10:16:27 +0000 (05:16 -0500)
src/.gdbinit		patch \| blob \| history
src/ChangeLog.bidi	[new file with mode: 0644]	patch \| blob
src/Makefile.in		patch \| blob \| history
src/bidi.c	[new file with mode: 0644]	patch \| blob
src/buffer.c		patch \| blob \| history
src/buffer.h		patch \| blob \| history
src/dispextern.h		patch \| blob \| history
src/dispnew.c		patch \| blob \| history
src/term.c		patch \| blob \| history
src/window.h		patch \| blob \| history
src/xdisp.c		patch \| blob \| history