+(defun vc-rcs-parse (&optional buffer)
+ "Parse current buffer, presumed to be in RCS-style masterfile format.
+Optional arg BUFFER specifies another buffer to parse. Return an alist
+of two elements, w/ keys `headers' and `revisions' and values in turn
+sub-alists. For `headers', the values unless otherwise specified are
+strings and the keys are:
+
+ desc -- description
+ head -- latest revision
+ branch -- the branch the \"head revision\" lies on;
+ absent if the head revision lies on the trunk
+ access -- ???
+ symbols -- sub-alist of (SYMBOL . REVISION) elements
+ locks -- if file is checked out, something like \"ttn:1.7\"
+ strict -- t if \"strict locking\" is in effect, otherwise nil
+ comment -- may be absent; typically something like \"# \" or \"; \"
+ expand -- may be absent; ???
+
+For `revisions', the car is REVISION (string), the cdr a sub-alist,
+with string values (unless otherwise specified) and keys:
+
+ date -- a time value (like that returned by `encode-time'); as a
+ special case, a year value less than 100 is augmented by 1900
+ author -- username
+ state -- typically \"Exp\" or \"Rel\"
+ branches -- list of revisions that begin branches from this revision
+ next -- on the trunk: the chronologically-preceding revision, or \"\";
+ on a branch: the chronologically-following revision, or \"\"
+ log -- change log entry
+ text -- for the head revision on the trunk, the body of the file;
+ other revisions have `:insn' instead
+ :insn -- for non-head revisions, a list of parsed instructions
+ in one of two forms, in both cases START meaning \"first
+ go to line START\":
+ - `(START k COUNT)' -- kill COUNT lines
+ - `(START i TEXT)' -- insert TEXT (a string)
+ The list is in descending order by START.
+
+The `:insn' key is a keyword to distinguish it as a vc-rcs.el extension."
+ (setq buffer (get-buffer (or buffer (current-buffer))))
+ (set-buffer buffer)
+ ;; An RCS masterfile can be viewed as containing four regular (for the
+ ;; most part) sections: (a) the "headers", (b) the "rev headers", (c)
+ ;; the "description" and (d) the "rev bodies", in that order. In the
+ ;; returned alist (see docstring), elements from (b) and (d) are
+ ;; combined pairwise to form the "revisions", while those from (a) and
+ ;; (c) are simply combined to form the "headers".
+ ;;
+ ;; Loosely speaking, each section contains a series of alternating
+ ;; "tags" and "printed representations". In the (b) and (d), many
+ ;; such series can appear, and a revision number on a line by itself
+ ;; precedes the series of tags and printed representations associated
+ ;; with it.
+ ;;
+ ;; In (a) and (b), the printed representations (with the exception of
+ ;; the `comment' tag in the headers) terminate with a semicolon, which
+ ;; is NOT part of the "value" finally associated with the tag. All
+ ;; other printed representations are in "@@-format"; there is an "@",
+ ;; the middle part (to be translated into the value), another "@" and
+ ;; a newline. Each "@@" in the middle part indicates the position of
+ ;; a single "@" (and consequently the requirement of an additional
+ ;; initial step when translating to the value).
+ ;;
+ ;; Parser state includes vars that collect parts of the return value...
+ (let ((desc nil) (headers nil) (revs nil)
+ ;; ... as well as vars that support a single-pass, tag-assisted,
+ ;; minimal-data-copying scan. Basically -- skirting around the
+ ;; grouping by revision required in (b) and (d) -- we repeatedly
+ ;; and context-sensitively read a tag (that MUST be present),
+ ;; determine the bounds of the printed representation, translate
+ ;; it into a value, and push the tag plus value onto one of the
+ ;; collection vars. Finally, we return the parse tree
+ ;; incorporating the values of the collection vars (see "rv").
+ ;;
+ ;; A symbol or string to keep track of context (for error messages).
+ context
+ ;; A symbol, the current tag.
+ tok
+ ;; Region (begin and end buffer positions) of the printed
+ ;; representation for the current tag.
+ b e
+ ;; A list of buffer positions where "@@" can be found within the
+ ;; printed representation region. For each location, we push two
+ ;; elements onto the list, 1+ and 2+ the location, respectively,
+ ;; with the 2+ appearing at the head. In this way, the expression
+ ;; `(,e ,@@-holes ,b)
+ ;; describes regions that can be concatenated (in reverse order)
+ ;; to "de-@@-format" the printed representation as the first step
+ ;; to translating it into some value. See internal func `gather'.
+ @-holes)
+ (flet ((sw () (skip-chars-forward " \t\n")) ; i.e., `[:space:]'
+ (at (tag) (save-excursion (eq tag (read buffer))))
+ (to-eol () (buffer-substring-no-properties
+ (point) (progn (forward-line 1)
+ (1- (point)))))
+ (to-semi () (setq b (point)
+ e (progn (search-forward ";")
+ (1- (point)))))
+ (to-one@ () (setq @-holes nil
+ b (progn (search-forward "@") (point))
+ e (progn (while (and (search-forward "@")
+ (= ?@ (char-after))
+ (progn
+ (push (point) @-holes)
+ (forward-char 1)
+ (push (point) @-holes))))
+ (1- (point)))))
+ (tok+val (set-b+e name &optional proc)
+ (unless (eq name (setq tok (read buffer)))
+ (error "Missing `%s' while parsing %s" name context))
+ (sw)
+ (funcall set-b+e)
+ (cons tok (if proc
+ (funcall proc)
+ (buffer-substring-no-properties b e))))
+ (k-semi (name &optional proc) (tok+val 'to-semi name proc))
+ (gather () (let ((pairs `(,e ,@@-holes ,b))
+ acc)
+ (while pairs
+ (push (buffer-substring-no-properties
+ (cadr pairs) (car pairs))
+ acc)
+ (setq pairs (cddr pairs)))
+ (apply 'concat acc)))
+ (k-one@ (name &optional later) (tok+val 'to-one@ name
+ (if later
+ (lambda () t)
+ 'gather))))
+ (save-excursion
+ (goto-char (point-min))
+ ;; headers
+ (setq context 'headers)
+ (flet ((hpush (name &optional proc)
+ (push (k-semi name proc) headers)))
+ (hpush 'head)
+ (when (at 'branch)
+ (hpush 'branch))
+ (hpush 'access)
+ (hpush 'symbols
+ (lambda ()
+ (mapcar (lambda (together)
+ (let ((two (split-string together ":")))
+ (setcar two (intern (car two)))
+ (setcdr two (cadr two))
+ two))
+ (split-string
+ (buffer-substring-no-properties b e)))))
+ (hpush 'locks))
+ (push `(strict . ,(when (at 'strict)
+ (search-forward ";")
+ t))
+ headers)
+ (when (at 'comment)
+ (push (k-one@ 'comment) headers)
+ (search-forward ";"))
+ (when (at 'expand)
+ (push (k-one@ 'expand) headers)
+ (search-forward ";"))
+ (setq headers (nreverse headers))
+ ;; rev headers
+ (sw) (setq context 'rev-headers)
+ (while (looking-at "[0-9]")
+ (push `(,(to-eol)
+ ,(k-semi 'date
+ (lambda ()
+ (let ((ls (mapcar 'string-to-number
+ (split-string
+ (buffer-substring-no-properties
+ b e)
+ "\\."))))
+ ;; Hack the year -- verified to be the
+ ;; same algorithm used in RCS 5.7.
+ (when (< (car ls) 100)
+ (setcar ls (+ 1900 (car ls))))
+ (apply 'encode-time (nreverse ls)))))
+ ,@(mapcar 'k-semi '(author state))
+ ,(k-semi 'branches
+ (lambda ()
+ (split-string
+ (buffer-substring-no-properties b e))))
+ ,(k-semi 'next))
+ revs)
+ (sw))
+ (setq revs (nreverse revs))
+ ;; desc
+ (sw) (setq context 'desc
+ desc (k-one@ 'desc))
+ ;; rev bodies
+ (let (acc
+ ;; Element of `revs' that initially holds only header info.
+ ;; "Pairwise combination" occurs when we add body info.
+ rev
+ ;; Components of the editing commands (aside from the actual
+ ;; text) that comprise the `text' printed representations
+ ;; (not including the "head" revision).
+ cmd start act
+ ;; Ascending (reversed) `@-holes' which the internal func
+ ;; `incg' pops to effect incremental gathering.
+ asc
+ ;; Function to extract text (for the `a' command), either
+ ;; `incg' or `buffer-substring-no-properties'. (This is
+ ;; for speed; strictly speaking, it is sufficient to use
+ ;; only the former since it behaves identically to the
+ ;; latter in the absense of "@@".)
+ sub)
+ (flet ((incg (beg end) (let ((b beg) (e end) @-holes)
+ (while (and asc (< (car asc) e))
+ (push (pop asc) @-holes))
+ ;; Self-deprecate when work is done.
+ ;; Folding many dimensions into one.
+ ;; Thanks B.Mandelbrot, for complex sum.
+ ;; O beauteous math! --the Unvexed Bum
+ (unless asc
+ (setq sub 'buffer-substring-no-properties))
+ (gather))))
+ (while (and (sw)
+ (not (eobp))
+ (setq context (to-eol)
+ rev (or (assoc context revs)
+ (error "Rev `%s' has body but no head"
+ context))))
+ (push (k-one@ 'log) (cdr rev))
+ ;; For rev body `text' tags, delay translation slightly...
+ (push (k-one@ 'text t) (cdr rev))
+ ;; ... until we decide which tag and value is appropriate to
+ ;; collect. For the "head" revision, compute the value of the
+ ;; `text' printed representation by simple `gather'. For all
+ ;; other revisions, replace the `text' tag+value with `:insn'
+ ;; plus value, always scanning in-place.
+ (if (string= context (cdr (assq 'head headers)))
+ (setcdr (cadr rev) (gather))
+ (if @-holes
+ (setq asc (nreverse @-holes)
+ sub 'incg)
+ (setq sub 'buffer-substring-no-properties))
+ (goto-char b)
+ (setq acc nil)
+ (while (< (point) e)
+ (forward-char 1)
+ (setq cmd (char-before)
+ start (read (current-buffer))
+ act (read (current-buffer)))
+ (forward-char 1)
+ (push (case cmd
+ (?d
+ ;; `d' means "delete lines".
+ ;; For Emacs spirit, we use `k' for "kill".
+ `(,start k ,act))
+ (?a
+ ;; `a' means "append after this line" but
+ ;; internally we normalize it so that START
+ ;; specifies the actual line for insert, thus
+ ;; requiring less hair in the realization algs.
+ ;; For Emacs spirit, we use `i' for "insert".
+ `(,(1+ start) i
+ ,(funcall sub (point) (progn (forward-line act)
+ (point)))))
+ (t (error "Bad command `%c' in `text' for rev `%s'"
+ cmd context)))
+ acc))
+ (goto-char (1+ e))
+ (setcar (cdr rev) (cons :insn acc)))))))
+ ;; rv
+ `((headers ,desc ,@headers)
+ (revisions ,@revs)))))
+