#!/bin/sh
# Check the format of GNU Emacs change log entries.
-# Copyright 2014-2015 Free Software Foundation, Inc.
+# Copyright 2014-2016 Free Software Foundation, Inc.
# This file is part of GNU Emacs.
# Use U+00A2 CENT SIGN to test whether the locale works.
cent_sign_utf8_format='\302\242\n'
cent_sign=`printf "$cent_sign_utf8_format"`
-print_at_sign='{print substr("'$cent_sign'@", 2)}'
-at_sign=`$awk "$print_at_sign" 2>/dev/null`
+print_at_sign='BEGIN {print substr("'$cent_sign'@", 2)}'
+at_sign=`$awk "$print_at_sign" </dev/null 2>/dev/null`
if test "$at_sign" != @; then
- at_sign=`LC_ALL=en_US.UTF-8 $awk "$print_at_sign" 2>/dev/null`
+ at_sign=`LC_ALL=en_US.UTF-8 $awk "$print_at_sign" </dev/null 2>/dev/null`
if test "$at_sign" = @; then
- LC_ALL=en_US.UTF-8; export LC_ALL
+ LC_ALL=en_US.UTF-8
+ else
+ LC_ALL=C
fi
+ export LC_ALL
fi
# Check the log entry.
BEGIN {
# These regular expressions assume traditional Unix unibyte behavior.
# They are needed for old or broken versions of awk, e.g.,
- # mawk 1.3.3 (1996), Gawk 3.0.4 (1999).
+ # mawk 1.3.3 (1996), or gawk on MSYS (2015), and/or for systems that
+ # cannot use UTF-8 as the codeset for the locale.
space = "[ \f\n\r\t\v]"
non_space = "[^ \f\n\r\t\v]"
- non_print = "[\1-\37\177]"
+ # The non_print below rejects control characters and surrogates
+ # UTF-8 for: 0x01-0x1f 0x7f 0x80-0x9f 0xd800-0xdbff 0xdc00-0xdfff
+ non_print = "[\1-\37\177]|\302[\200-\237]|\355[\240-\277][\200-\277]"
# Prefer POSIX regular expressions if available, as they do a
# better job of checking. Similarly, prefer POSIX negated
}
}
- /^#/ { next }
+ /^#/ {
+ # Ignore every line after a scissors line.
+ if (/^# *---* *(>[8%]|[8%]<) *---* *$/) { exit }
+
+ # Ignore comment lines.
+ next
+ }
!/^.*$/ {
print "Invalid character (not UTF-8) in commit message"