Merge from origin/emacs-25

[gnu-emacs] / doc / lispref / strings.texi
diff --git a/doc/lispref/strings.texi b/doc/lispref/strings.texi

index aca6189d7bf48db572778a47a81b271c05ca1e7e..ce629aa8259e9883d84c4af064dbd1b3a9cb5c78 100644 (file)
--- a/doc/lispref/strings.texi
+++ b/doc/lispref/strings.texi
@@ -1,6 +1,6 @@
-@c -*-texinfo-*-
+@c -*- mode: texinfo; coding: utf-8 -*-
  @c This is part of the GNU Emacs Lisp Reference Manual.
-@c Copyright (C) 1990-1995, 1998-1999, 2001-2015 Free Software
+@c Copyright (C) 1990-1995, 1998-1999, 2001-2016 Free Software
  @c Foundation, Inc.
  @c See the file elisp.texi for copying conditions.
  @node Strings and Characters
@@ -145,11 +145,12 @@ This returns a string containing the characters @var{characters}.
  @end example
  @end defun
  
-@defun substring string start &optional end
+@defun substring string &optional start end
  This function returns a new string which consists of those characters
  from @var{string} in the range from (and including) the character at the
  index @var{start} up to (but excluding) the character at the index
-@var{end}.  The first character is at index zero.
+@var{end}.  The first character is at index zero.  With one argument,
+this function just copies @var{string}.
  
  @example
  @group
@@ -465,6 +466,7 @@ Representations}.
  @code{string-equal} is another name for @code{string=}.
  @end defun
  
+@cindex locale-dependent string equivalence
  @defun string-collate-equalp string1 string2 &optional locale ignore-case
  This function returns @code{t} if @var{string1} and @var{string2} are
  equal with respect to collation rules.  A collation rule is not only
@@ -486,16 +488,17 @@ accent Unicode characters:
  
  The optional argument @var{locale}, a string, overrides the setting of
  your current locale identifier for collation.  The value is system
-dependent; a @var{locale} "en_US.UTF-8" is applicable on POSIX
-systems, while it would be, e.g., "enu_USA.1252" on MS-Windows
+dependent; a @var{locale} @code{"en_US.UTF-8"} is applicable on POSIX
+systems, while it would be, e.g., @code{"enu_USA.1252"} on MS-Windows
  systems.
  
  If @var{ignore-case} is non-@code{nil}, characters are converted to lower-case
  before comparing them.
  
+@vindex w32-collate-ignore-punctuation
  To emulate Unicode-compliant collation on MS-Windows systems,
  bind @code{w32-collate-ignore-punctuation} to a non-@code{nil} value, since
-the codeset part of the locale cannot be "UTF-8" on MS-Windows.
+the codeset part of the locale cannot be @code{"UTF-8"} on MS-Windows.
  
  If your system does not support a locale environment, this function
  behaves like @code{string-equal}.
@@ -518,7 +521,7 @@ optional argument @var{ignore-case} is non-@code{nil}, the comparison
  ignores case differences.
  @end defun
  
-@cindex lexical comparison
+@cindex lexical comparison of strings
  @defun string< string1 string2
  @c (findex string< causes problems for permuted index!!)
  This function compares two strings a character at a time.  It
@@ -569,13 +572,20 @@ no characters is less than any other string.
  @end example
  
  Symbols are also allowed as arguments, in which case their print names
-are used.
+are compared.
  @end defun
  
  @defun string-lessp string1 string2
  @code{string-lessp} is another name for @code{string<}.
  @end defun
  
+@defun string-greaterp string1 string2
+This function returns the result of comparing @var{string1} and
+@var{string2} in the opposite order, i.e., it is equivalent to calling
+@code{(string-lessp @var{string2} @var{string1})}.
+@end defun
+
+@cindex locale-dependent string comparison
  @defun string-collate-lessp string1 string2 &optional locale ignore-case
  This function returns @code{t} if @var{string1} is less than
  @var{string2} in collation order.  A collation order is not only
@@ -584,8 +594,8 @@ determined by the lexicographic order of the characters contained in
  relations between these characters.  Usually, it is defined by the
  @var{locale} environment Emacs is running with.
  
-For example, punctuation and whitespace characters might be considered
-less significant for @ref{Sorting,,sorting}.
+For example, punctuation and whitespace characters might be ignored
+for sorting (@pxref{Sequence Functions}):
  
  @example
  @group
@@ -594,12 +604,15 @@ less significant for @ref{Sorting,,sorting}.
  @end group
  @end example
  
+This behavior is system-dependent; e.g., punctuation and whitespace
+are never ignored on Cygwin, regardless of locale.
+
  The optional argument @var{locale}, a string, overrides the setting of
  your current locale identifier for collation.  The value is system
-dependent; a @var{locale} "en_US.UTF-8" is applicable on POSIX
-systems, while it would be, e.g., "enu_USA.1252" on MS-Windows
-systems.  The @var{locale} "POSIX" lets @code{string-collate-lessp}
-behave like @code{string-lessp}:
+dependent; a @var{locale} @code{"en_US.UTF-8"} is applicable on POSIX
+systems, while it would be, e.g., @code{"enu_USA.1252"} on MS-Windows
+systems.  The @var{locale} value of @code{"POSIX"} or @code{"C"} lets
+@code{string-collate-lessp} behave like @code{string-lessp}:
  
  @example
  @group
@@ -614,12 +627,20 @@ before comparing them.
  
  To emulate Unicode-compliant collation on MS-Windows systems,
  bind @code{w32-collate-ignore-punctuation} to a non-@code{nil} value, since
-the codeset part of the locale cannot be "UTF-8" on MS-Windows.
+the codeset part of the locale cannot be @code{"UTF-8"} on MS-Windows.
  
  If your system does not support a locale environment, this function
  behaves like @code{string-lessp}.
  @end defun
  
+@defun string-version-lessp string1 string2
+This function compares strings lexicographically, except it treats
+sequences of numerical characters as if they comprised a base-ten
+number, and then compares the numbers.  So @samp{foo2.png} is
+``smaller'' than @samp{foo12.png} according to this predicate, even if
+@samp{12} is lexicographically ``smaller'' than @samp{2}.
+@end defun
+
  @defun string-prefix-p string1 string2 &optional ignore-case
  This function returns non-@code{nil} if @var{string1} is a prefix of
  @var{string2}; i.e., if @var{string2} starts with @var{string1}.  If
@@ -644,7 +665,7 @@ string.  Likewise, the specified part of @var{string2} runs from index
  @var{start2} up to index @var{end2}.
  
  The strings are compared by the numeric values of their characters.
-For instance, @var{str1} is considered ``smaller than'' @var{str2} if
+For instance, @var{str1} is considered less than @var{str2} if
  its first differing character has a smaller numeric value.  If
  @var{ignore-case} is non-@code{nil}, characters are converted to
  lower-case before comparing them.  Unibyte strings are converted to
@@ -685,7 +706,7 @@ against a string, can be used for a kind of string comparison; see
  strings and integers.  @code{format} (@pxref{Formatting Strings}) and
  @code{prin1-to-string} (@pxref{Output Functions}) can also convert
  Lisp objects into strings.  @code{read-from-string} (@pxref{Input
-Functions}) can ``convert'' a string representation of a Lisp object
+Functions}) can convert a string representation of a Lisp object
  into an object.  The functions @code{string-to-multibyte} and
  @code{string-to-unibyte} convert the text representation of a string
  (@pxref{Converting Representations}).
@@ -801,7 +822,7 @@ they appear; it is called a @dfn{format string}.
  
    Formatting is often useful for computing messages to be displayed.  In
  fact, the functions @code{message} and @code{error} provide the same
-formatting feature described here; they differ from @code{format} only
+formatting feature described here; they differ from @code{format-message} only
  in how they use the result of formatting.
  
  @defun format string &rest objects
@@ -815,6 +836,16 @@ are copied directly into the output, including their text properties,
  if any.
  @end defun
  
+@defun format-message string &rest objects
+@cindex curved quotes
+@cindex curly quotes
+This function acts like @code{format}, except it also converts any
+curved single quotes in @var{string} as per the value of
+@code{text-quoting-style}, and treats grave accent (@t{`}) and
+apostrophe (@t{'}) as if they were curved single quotes.  @xref{Keys
+in Documentation}.
+@end defun
+
  @cindex @samp{%} in format
  @cindex format specification
    A format specification is a sequence of characters beginning with a
@@ -913,20 +944,23 @@ specification is unusual in that it does not use a value.  For example,
    Any other format character results in an @samp{Invalid format
  operation} error.
  
-  Here are several examples:
+  Here are several examples, which assume the typical
+@code{text-quoting-style} settings:
  
  @example
  @group
-(format "The name of this buffer is %s." (buffer-name))
-     @result{} "The name of this buffer is strings.texi."
-
-(format "The buffer object prints as %s." (current-buffer))
-     @result{} "The buffer object prints as strings.texi."
-
  (format "The octal value of %d is %o,
           and the hex value is %x." 18 18 18)
       @result{} "The octal value of 18 is 22,
           and the hex value is 12."
+
+(format-message
+ "The name of this buffer is ‘%s’." (buffer-name))
+     @result{} "The name of this buffer is ‘strings.texi’."
+
+(format-message
+ "The buffer object prints as `%s'." (current-buffer))
+     @result{} "The buffer object prints as ‘strings.texi’."
  @end group
  @end example
  
@@ -948,7 +982,7 @@ the width specifier normally consists of spaces inserted on the left:
  If the width is too small, @code{format} does not truncate the
  object's printed representation.  Thus, you can use a width to specify
  a minimum spacing between columns with no risk of losing information.
-In the following three examples, @samp{%7s} specifies a minimum width
+In the following two examples, @samp{%7s} specifies a minimum width
  of 7.  In the first case, the string inserted in place of @samp{%7s}
  has only 3 letters, and needs 4 blank spaces as padding.  In the
  second case, the string @code{"specification"} is 13 letters wide but
@@ -956,12 +990,12 @@ is not truncated.
  
  @example
  @group
-(format "The word `%7s' has %d letters in it."
+(format "The word '%7s' has %d letters in it."
          "foo" (length "foo"))
-     @result{} "The word `    foo' has 3 letters in it."
-(format "The word `%7s' has %d letters in it."
+     @result{} "The word '    foo' has 3 letters in it."
+(format "The word '%7s' has %d letters in it."
          "specification" (length "specification"))
-     @result{} "The word `specification' has 13 letters in it."
+     @result{} "The word 'specification' has 13 letters in it."
  @end group
  @end example
  
@@ -977,7 +1011,7 @@ numbers and negative numbers use the same number of columns.  They are
  ignored except for @samp{%d}, @samp{%e}, @samp{%f}, @samp{%g}, and if
  both flags are used, @samp{+} takes precedence.
  
-  The flag @samp{#} specifies an ``alternate form'' which depends on
+  The flag @samp{#} specifies an alternate form which depends on
  the format in use.  For @samp{%o}, it ensures that the result begins
  with a @samp{0}.  For @samp{%x} and @samp{%X}, it prefixes the result
  with @samp{0x} or @samp{0X}.  For @samp{%e}, @samp{%f}, and @samp{%g},
@@ -1000,12 +1034,12 @@ ignored.
  (format "%06d is padded on the left with zeros" 123)
       @result{} "000123 is padded on the left with zeros"
  
-(format "%-6d is padded on the right" 123)
-     @result{} "123    is padded on the right"
+(format "'%-6d' is padded on the right" 123)
+     @result{} "'123   ' is padded on the right"
  
-(format "The word `%-7s' actually has %d letters in it."
+(format "The word '%-7s' actually has %d letters in it."
          "foo" (length "foo"))
-     @result{} "The word `foo    ' actually has 3 letters in it."
+     @result{} "The word 'foo    ' actually has 3 letters in it."
  @end group
  @end example
  
@@ -1232,8 +1266,8 @@ Exits}).
  
    Some language environments modify the case conversions of
  @acronym{ASCII} characters; for example, in the Turkish language
-environment, the @acronym{ASCII} character @samp{I} is downcased into
-a Turkish ``dotless i''.  This can interfere with code that requires
+environment, the @acronym{ASCII} capital I is downcased into
+a Turkish dotless i (@samp{ı}).  This can interfere with code that requires
  ordinary @acronym{ASCII} case conversion, such as implementations of
  @acronym{ASCII}-based network protocols.  In that case, use the
  @code{with-case-table} macro with the variable @var{ascii-case-table},