;;; thai-word.el -- find Thai word boundaries
-;; Copyright (C) 2000, 2001, 2002, 2003, 2004
-;; Electrotechnical Laboratory, JAPAN.
+;; Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+;; National Institute of Advanced Industrial Science and Technology (AIST)
+;; Registration Number H14PRO021
;; Author: Kenichi HANDA <handa@etl.go.jp>
;; Keywords: thai, word break, emacs
-;; This program is free software; you can redistribute it and/or modify
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 2, or (at your option)
-;; any later version.
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
-;; This program is distributed in the hope that it will be useful,
+;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
-;; along with GNU Emacs; see the file COPYING. If not, write to
-;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
;; The used Thai word list has been taken from IBM's ICU4J project
;; (file `thai6.ucs', version 1.4, converted to TIS encoding, with
;; written authorization of the copyright holder.
+;;; Commentary:
+
;; This file implements an algorithm to find Thai word breaks using a
;; dictionary. It is based on the C program `cttex' written by
;; Vuthichai Ampornaramveth <vuthi@nii.ac.jp>.
;; which means that you can easily index the list character by
;; character.
-(defconst thai-word-table nil)
-
-
-;; Set up `thai-word-table'.
-
-(let
- ((l
+(defvar thai-word-table
+ (let ((table (list 'thai-words)))
+ (dolist (elt
+ ;;; The following is indented as this to minimize this file size.
'("¡¡"
"¡¡Å"
"¡¡Ø¸Àѳ±ì"
"äÎâ¡ÃÁÔàµÍÃì"
"äÎâ´Ã¤ÒÃìºÍ¹"
"äÎâÅ"
- )))
- (setq thai-word-table (list 'thai-words))
- (while l
- (set-nested-alist (car l) 1 thai-word-table)
- (setq l (cdr l))))
+ ))
+ (set-nested-alist elt 1 table))
+ table)
+ "Nested alist of Thai words.")
(defun thai-update-word-table (file &optional append)
(coding-system-for-read 'thai-tis620)
(table (if append thai-word-table (list 'thai-words))))
(unwind-protect
- (save-excursion
- (set-buffer buf)
+ (with-current-buffer buf
(insert-file-contents file)
(goto-char (point-min))
(while (re-search-forward "\\ct+" nil t)
;; character by character.
(while this
(setq pos (1+ pos)
- char (char-after pos)
+ char (or (char-after pos) 0)
category-set (char-category-set char))
;; If the current sequence is recorded in `thai-word-table'
;; (i.e. (car THIS) is 1) and the following Thai character is
;; Move point forward to the end of Thai word which follows point and
-;; update VEC. VEC is a vector of three elements used to cache word
+;; update VEC. VEC is a vector of three elements used to cache word
;; end positions. The Nth element, if non-nil, is a list of end
;; points of the Nth word, or t indicating that there is no Thai
;; character. LIMIT limits the point movement.
nil)
(progn
;; We found four succeeding Thai words (or LIMIT has been
- ;; reached). Move to the end of the first word.
+ ;; reached). Move to the end of the first word.
(goto-char (car v0))
;; Update VEC for the next function call. If no larger word
;; positions have been found, set the corresponding vector
(aset vec 1 v2)
(aset vec 2 v3)))) ; exit function successfully
- ;; We didn't find four consecutive words. If we have found a
+ ;; We didn't find four consecutive words. If we have found a
;; `second best' solution and the length of those two words is
;; longer than the longest word we can see at the current point,
;; adopt the second best solution. This decision is based on
(thai-forward-word (- count)))
+(defun thai-kill-word (arg)
+ "Like kill-word but pay attention to Thai word boundaries.
+With argument, do this that many times."
+ (interactive "p")
+ (kill-region (point) (progn (thai-forward-word arg) (point))))
+
+
+(defun thai-backward-kill-word (arg)
+ "Like backward-kill-word but pay attention to Thai word boundaries."
+ (interactive "p")
+ (thai-kill-word (- arg)))
+
+
+(defun thai-transpose-words (arg)
+ "Like transpose-words but pay attention to Thai word boundaries."
+ (interactive "*p")
+ (transpose-subr 'thai-forward-word arg))
+
+(defun thai-fill-find-break-point (linebeg)
+ "Go to a line breaking position near point considering Thai word boundaries."
+ (let ((pos (point)))
+ (thai-forward-word -1)
+ (when (<= (point) linebeg)
+ (goto-char pos)
+ (thai-forward-word 1))
+ (kinsoku linebeg)))
+
(provide 'thai-word)
;; End:
;; end of thai-word.el
+
+;; arch-tag: 29927f02-e177-4224-a270-7e67210b038a