Merge from origin/emacs-24

[gnu-emacs] / doc / lispref / text.texi
diff --git a/doc/lispref/text.texi b/doc/lispref/text.texi

index 4c3286adbfc92a522d79b34b877a0730fbf481dc..a7cfb22e889059757326325e19d50f7041cc0f37 100644 (file)
--- a/doc/lispref/text.texi
+++ b/doc/lispref/text.texi
@@ -1,6 +1,6 @@
  @c -*-texinfo-*-
  @c This is part of the GNU Emacs Lisp Reference Manual.
-@c Copyright (C) 1990-1995, 1998-2014 Free Software Foundation, Inc.
+@c Copyright (C) 1990-1995, 1998-2015 Free Software Foundation, Inc.
  @c See the file elisp.texi for copying conditions.
  @node Text
  @chapter Text
@@ -162,6 +162,7 @@ the end of a line.
  
  @node Buffer Contents
  @section Examining Buffer Contents
+@cindex buffer portion as string
  
    This section describes functions that allow a Lisp program to
  convert any portion of the text in the buffer into a string.
@@ -219,6 +220,12 @@ This function returns the contents of the entire accessible portion of
  the current buffer, as a string.
  @end defun
  
+  If you need to make sure the resulting string, when copied to a
+different location, will not change its visual appearance due to
+reordering of bidirectional text, use the
+@code{buffer-substring-with-bidi-context} function
+(@pxref{Bidirectional Display, buffer-substring-with-bidi-context}).
+
  @defun filter-buffer-substring start end &optional delete
  This function filters the buffer text between @var{start} and @var{end}
  using a function specified by the variable
@@ -343,10 +350,10 @@ This function ignores case when comparing characters
  if @code{case-fold-search} is non-@code{nil}.  It always ignores
  text properties.
  
-Suppose the current buffer contains the text @samp{foobarbar
-haha!rara!}; then in this example the two substrings are @samp{rbar }
-and @samp{rara!}.  The value is 2 because the first substring is greater
-at the second character.
+Suppose you have the text @w{@samp{foobarbar haha!rara!}} in the
+current buffer; then in this example the two substrings are @samp{rbar
+} and @samp{rara!}.  The value is 2 because the first substring is
+greater at the second character.
  
  @example
  (compare-buffer-substrings nil 6 11 nil 16 21)
@@ -806,7 +813,7 @@ non-whitespace character in each line in the region.
  
  If this command acts on the entire buffer (i.e. if called
  interactively with the mark inactive, or called from Lisp with
-@var{end} nil), it also deletes all trailing lines at the end of the
+@var{end} @code{nil}), it also deletes all trailing lines at the end of the
  buffer if the variable @code{delete-trailing-lines} is non-@code{nil}.
  @end deffn
  
@@ -2673,6 +2680,8 @@ along with the characters; this includes such diverse functions as
  
  @node Examining Properties
  @subsection Examining Text Properties
+@cindex examining text properties
+@cindex text properties, examining
  
    The simplest way to examine text properties is to ask for the value of
  a particular property of a particular character.  For that, use
@@ -2764,6 +2773,8 @@ used instead.  Here is an example:
  
  @node Changing Properties
  @subsection Changing Text Properties
+@cindex changing text properties
+@cindex text properties, changing
  
    The primitives for changing properties apply to a specified range of
  text in a buffer or string.  The function @code{set-text-properties}
@@ -2865,7 +2876,7 @@ adding the face @var{face} to the @code{face} text property.
  (@pxref{Special Properties}), such as a face name or an anonymous face
  (@pxref{Faces}).
  
-If any text in the region already has a non-nil @code{face} property,
+If any text in the region already has a non-@code{nil} @code{face} property,
  those face(s) are retained.  This function sets the @code{face}
  property to a list of faces, with @var{face} as the first element (by
  default) and the pre-existing faces as the remaining elements.  If the
@@ -2927,6 +2938,8 @@ buffer but does not copy its properties.
  
  @node Property Search
  @subsection Text Property Search Functions
+@cindex searching text properties
+@cindex text properties, searching
  
    In typical use of text properties, most of the time several or many
  consecutive characters have the same value for a property.  Rather than
@@ -3241,6 +3254,11 @@ possible to remove a @code{read-only} property unless you know the
  special trick: bind @code{inhibit-read-only} to a non-@code{nil} value
  and then remove the property.  @xref{Read Only Buffers}.
  
+@item inhibit-read-only
+@kindex inhibit-read-only @r{(text property)}
+If a character has the property @code{inhibit-read-only}, and the
+buffer is read-only, editing the character in question is allowed.
+
  @item invisible
  @kindex invisible @r{(text property)}
  A non-@code{nil} @code{invisible} property can make a character invisible
@@ -3713,6 +3731,7 @@ clicks on the link quickly without moving the mouse.  This behavior is
  controlled by the user option @code{mouse-1-click-follows-link}.
  @xref{Mouse References,,, emacs, The GNU Emacs Manual}.
  
+@cindex follow-link (text or overlay property)
    To set up the link so that it obeys
  @code{mouse-1-click-follows-link}, you must either (1) apply a
  @code{follow-link} text or overlay property to the link text, or (2)
@@ -3980,6 +3999,8 @@ coalesced whenever possible.  @xref{Property Search}.
  
  @node Substitution
  @section Substituting for a Character Code
+@cindex replace characters in region
+@cindex substitute characters
  
    The following functions replace characters within a specified region
  based on their character codes.
@@ -4114,8 +4135,9 @@ buffer.
  Normally, this command puts point before the inserted text, and the
  mark after it.  However, if the optional second argument @var{beforep}
  is non-@code{nil}, it puts the mark before and point after.
-You can pass a non-@code{nil} second argument @var{beforep} to this
-function interactively by supplying any prefix argument.
+
+When called interactively, the command defaults to putting point after
+text, and a prefix argument inverts this behavior.
  
  If the register contains a rectangle, then the rectangle is inserted
  with its upper left corner at point.  This means that text is inserted
@@ -4318,7 +4340,7 @@ coding instead.
  When Emacs is compiled with libxml2 support, the following functions
  are available to parse HTML or XML text into Lisp object trees.
  
-@defun libxml-parse-html-region start end &optional base-url
+@defun libxml-parse-html-region start end &optional base-url discard-comments
  This function parses the text between @var{start} and @var{end} as
  HTML, and returns a list representing the HTML @dfn{parse tree}.  It
  attempts to handle ``real world'' HTML by robustly coping with syntax
@@ -4327,6 +4349,9 @@ mistakes.
  The optional argument @var{base-url}, if non-@code{nil}, should be a
  string specifying the base URL for relative URLs occurring in links.
  
+If the optional argument @var{discard-comments} is non-@code{nil},
+then the parse tree is created without any comments.
+
  In the parse tree, each HTML node is represented by a list in which
  the first element is a symbol representing the node name, the second
  element is an alist of node attributes, and the remaining elements are
@@ -4340,16 +4365,17 @@ document:
  @end example
  
  @noindent
-A call to @code{libxml-parse-html-region} returns this:
+A call to @code{libxml-parse-html-region} returns this @acronym{DOM}
+(document object model):
  
  @example
-(html ()
-  (head ())
-  (body ((width . "101"))
-   (div ((class . "thing"))
-    "Foo"
-    (div ()
-      "Yes"))))
+(html nil
+ (head nil)
+ (body ((width . "101"))
+  (div ((class . "thing"))
+   "Foo"
+   (div nil
+    "Yes"))))
  @end example
  @end defun
  
@@ -4362,12 +4388,140 @@ buffer.  The argument @var{dom} should be a list as generated by
  @end defun
  
  @cindex parsing xml
-@defun libxml-parse-xml-region start end &optional base-url
+@defun libxml-parse-xml-region start end &optional base-url discard-comments
  This function is the same as @code{libxml-parse-html-region}, except
  that it parses the text as XML rather than HTML (so it is stricter
  about syntax).
  @end defun
  
+@menu
+* Document Object Model:: Access, manipulate and search the @acronym{DOM}.
+@end menu
+
+@node Document Object Model
+@subsection Document Object Model
+@cindex HTML DOM
+@cindex XML DOM
+@cindex DOM
+@cindex Document Object Model
+
+The @acronym{DOM} returned by @code{libxml-parse-html-region} (and the
+other @acronym{XML} parsing functions) is a tree structure where each
+node has a node name (called a @dfn{tag}), and optional key/value
+@dfn{attribute} list, and then a list of @dfn{child nodes}.  The child
+nodes are either strings or @acronym{DOM} objects.
+
+@example
+(body ((width . "101"))
+ (div ((class . "thing"))
+  "Foo"
+  (div nil
+   "Yes")))
+@end example
+
+@defun dom-node tag &optional attributes &rest children
+This function creates a @acronym{DOM} node of type @var{tag}.  If
+given, @var{attributes} should be a key/value pair list.
+If given, @var{children} should be @acronym{DOM} nodes.
+@end defun
+
+The following functions can be used to work with this structure.  Each
+function takes a @acronym{DOM} node, or a list of nodes.  In the
+latter case, only the first node in the list is used.
+
+Simple accessors:
+
+@table @code
+@item dom-tag @var{node}
+Return the @dfn{tag} (also called ``node name'') of the node.
+
+@item dom-attr @var{node} @var{attribute}
+Return the value of @var{attribute} in the node.  A common usage
+would be:
+
+@lisp
+(dom-attr img 'href)
+=> "http://fsf.org/logo.png"
+@end lisp
+
+@item dom-children @var{node}
+Return all the children of the node.
+
+@item dom-non-text-children @var{node}
+Return all the non-string children of the node.
+
+@item dom-attributes @var{node}
+Return the key/value pair list of attributes of the node.
+
+@item dom-text @var{node}
+Return all the textual elements of the node as a concatenated string.
+
+@item dom-texts @var{node}
+Return all the textual elements of the node, as well as the textual
+elements of all the children of the node, recursively, as a
+concatenated string.  This function also takes an optional separator
+to be inserted between the textual elements.
+
+@item dom-parent @var{dom} @var{node}
+Return the parent of @var{node} in @var{dom}.
+@end table
+
+The following are functions for altering the @acronym{DOM}.
+
+@table @code
+@item dom-set-attribute @var{node} @var{attribute} @var{value}
+Set the @var{attribute} of the node to @var{value}.
+
+@item dom-append-child @var{node} @var{child}
+Append @var{child} as the last child of @var{node}.
+
+@item dom-add-child-before @var{node} @var{child} @var{before}
+Add @var{child} to @var{node}'s child list before the @var{before}
+node.  If @var{before} is @code{nil}, make @var{child} the first child.
+
+@item dom-set-attributes @var{node} @var{attributes}
+Replace all the attributes of the node with a new key/value list.
+@end table
+
+The following are functions for searching for elements in the
+@acronym{DOM}.  They all return lists of matching nodes.
+
+@table @code
+@item dom-by-tag @var{dom} @var{tag}
+Return all nodes in @var{dom} that are of type @var{tag}.  A typical
+use would be:
+
+@lisp
+(dom-by-tag dom 'td)
+=> '((td ...) (td ...) (td ...))
+@end lisp
+
+@item dom-by-class @var{dom} @var{match}
+Return all nodes in @var{dom} that have class names that match
+@var{match}, which is a regular expression.
+
+@item dom-by-style @var{dom} @var{style}
+Return all nodes in @var{dom} that have styles that match @var{match},
+which is a regular expression.
+
+@item dom-by-id @var{dom} @var{style}
+Return all nodes in @var{dom} that have IDs that match @var{match},
+which is a regular expression.
+
+@item dom-strings @var{dom}
+Return all strings in @var{DOM}.
+
+@end table
+
+Utility functions:
+
+@table @code
+@item dom-pp @var{dom} &optional @var{remove-empty}
+Pretty-print @var{dom} at point.  If @var{remove-empty}, don't print
+textual nodes that just contain white-space.
+@end table
+
+
  @node Atomic Changes
  @section Atomic Change Groups
  @cindex atomic changes