#!/bin/sh
# Check the format of GNU Emacs change log entries.
-# Copyright 2014 Free Software Foundation, Inc.
+# Copyright 2014-2016 Free Software Foundation, Inc.
# This file is part of GNU Emacs.
# Written by Paul Eggert.
+# Prefer gawk if available, as it handles NUL bytes properly.
+if type gawk >/dev/null 2>&1; then
+ awk=gawk
+else
+ awk=awk
+fi
+
# Use a UTF-8 locale if available, so that the UTF-8 check works.
# Use U+00A2 CENT SIGN to test whether the locale works.
-cent_sign_utf8_octal='\302\242'
-at_sign=`
- printf "${cent_sign_utf8_octal}@" |
- awk '{print substr($0, 2)}' 2>/dev/null
-`
+cent_sign_utf8_format='\302\242\n'
+cent_sign=`printf "$cent_sign_utf8_format"`
+print_at_sign='BEGIN {print substr("'$cent_sign'@", 2)}'
+at_sign=`$awk "$print_at_sign" </dev/null 2>/dev/null`
if test "$at_sign" != @; then
- at_sign=`
- printf "${cent_sign_utf8_octal}@" |
- LC_ALL=en_US.utf8 awk '{print substr($0, 2)}' 2>/dev/null
- `
+ at_sign=`LC_ALL=en_US.UTF-8 $awk "$print_at_sign" </dev/null 2>/dev/null`
if test "$at_sign" = @; then
- LC_ALL=en_US.utf8; export LC_ALL
+ LC_ALL=en_US.UTF-8
+ else
+ LC_ALL=C
fi
+ export LC_ALL
fi
# Check the log entry.
-exec awk '
- /^#/ { next }
+exec $awk -v at_sign="$at_sign" -v cent_sign="$cent_sign" '
+ BEGIN {
+ # These regular expressions assume traditional Unix unibyte behavior.
+ # They are needed for old or broken versions of awk, e.g.,
+ # mawk 1.3.3 (1996), or gawk on MSYS (2015), and/or for systems that
+ # cannot use UTF-8 as the codeset for the locale.
+ space = "[ \f\n\r\t\v]"
+ non_space = "[^ \f\n\r\t\v]"
+ # The non_print below rejects control characters and surrogates
+ # UTF-8 for: 0x01-0x1f 0x7f 0x80-0x9f 0xd800-0xdbff 0xdc00-0xdfff
+ non_print = "[\1-\37\177]|\302[\200-\237]|\355[\240-\277][\200-\277]"
+
+ # Prefer POSIX regular expressions if available, as they do a
+ # better job of checking. Similarly, prefer POSIX negated
+ # expressions if UTF-8 also works.
+ if (" " ~ /[[:space:]]/) {
+ space = "[[:space:]]"
+ if (at_sign == "@" && cent_sign ~ /^[[:print:]]$/) {
+ non_space = "[^[:space:]]"
+ non_print = "[^[:print:]]"
+ }
+ }
+ }
+
+ /^#/ {
+ # Ignore every line after a scissors line.
+ if (/^# *---* *(>[8%]|[8%]<) *---* *$/) { exit }
+
+ # Ignore comment lines.
+ next
+ }
!/^.*$/ {
- print "Invalid character (not UTF-8)"
+ print "Invalid character (not UTF-8) in commit message"
status = 1
}
- nlines == 0 && !/[^[:space:]]/ { next }
+ nlines == 0 && $0 !~ non_space { next }
{ nlines++ }
- nlines == 1 && /^[[:space:]]/ {
- print "White space at start of first line"
- status = 1
+ nlines == 1 {
+ # Ignore special markers used by "git rebase --autosquash".
+ if (! sub(/^fixup! /, ""))
+ sub(/^squash! /, "")
+
+ if ($0 ~ "^" space) {
+ print "White space at start of commit message'\''s first line"
+ status = 1
+ }
}
- nlines == 2 && /[^[:space:]]/ {
- print "Nonempty second line"
+ nlines == 2 && $0 ~ non_space {
+ print "Nonempty second line in commit message"
status = 1
}
- /[[:cntrl:]]/ {
- print "Text contains control character; please use spaces instead of tabs"
- status = 1
+ {
+ # Expand tabs to spaces for length calculations etc.
+ while (match($0, /\t/)) {
+ before_tab = substr($0, 1, RSTART - 1)
+ after_tab = substr($0, RSTART + 1)
+ $0 = sprintf("%s%*s%s", before_tab, 8 - (RSTART - 1) % 8, "", after_tab)
+ }
}
- 72 < length && /[[:space:]]/ {
- print "Line longer than 72 characters"
+ 78 < length && $0 ~ space {
+ print "Line longer than 78 characters in commit message"
status = 1
}
140 < length {
- print "Word longer than 140 characters"
+ print "Word longer than 140 characters in commit message"
status = 1
}
/^Signed-off-by: / {
- print "'Signed-off-by:' present"
+ print "'\''Signed-off-by:'\'' in commit message"
+ status = 1
+ }
+
+ $0 ~ non_print {
+ print "Unprintable character in commit message"
status = 1
}
END {
if (nlines == 0) {
- print "Empty change log entry"
+ print "Empty commit message"
status = 1
}
+ if (status != 0) {
+ print "Commit aborted; please see the file 'CONTRIBUTE'"
+ }
exit status
}
' <"$1"