;;; transcribe.el --- Package for audio transcriptions
-;; Copyright 2014-2015 Free Software Foundation, Inc.
+;; Copyright 2014-2016 Free Software Foundation, Inc.
;; Author: David Gonzalez Gandara <dggandara@member.fsf.org>
-;; Version: 1.0.1
+;; Version: 1.5.0
;; This program is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; REQUIRES:
;; -----------------------------
-;; In order to use the audio functions of transcribe, you need to install
-;; emms and mpg321.
+;; This module works without any requires, but in order to use the audio
+;; functions, you need to install the Emacs package "emms", by Joe Drew,
+;; and the external program "mpg321", by Jorgen Schafer and Ulrik Jensen,
+;; both under GPL licenses.
;;
;; USAGE:
;; -------------------------
-;; Transcribe is a tool to make audio transcriptions. It allows the
-;; transcriber to control the audio easily while typing, as well as
+;; Transcribe is a tool to make audio transcriptions for discourse analysis
+;; in the classroom.
+;; It allows the transcriber to control the audio easily while typing, as well as
;; automate the insertion of xml tags, in case the transcription protocol
;; include them.
-;; The analyse function will search for a specific structure
+;; The analysis functions will search for a specific structure
;; of episodes that can be automatically added with the macro NewEpisode.
-;; The function expects the utterances to be transcribed inside a xml tag
-;; with the identifier of the speaker, with the tags <l1> or <l2>, depending
+;; The function expects the speech acts to be transcribed inside a turn xml
+;; tag with the identifier of the speaker with optional move attribute.
+;; Each speech act is spected inside a <l1> or <l2> tag, depending
;; on the language used by the person. The attributes expected are the
-;; number of clauses that form the utterance and the number of errors the
-;; transcriber observes.
+;; number of clauses that form the utterance, the number of errors the
+;; transcriber observes, and the function of the speech act. The parser will
+;; work even if some attributes are missing.
;;
;;
;; AUDIO COMMANDS
;;
;; XML TAGGING COMMANDS
;; --------------------------------------------------
-;; C-x C-n --> Create new episode structure. This is useful in case your
-;; xml file structure requires it. You can customize the text
-;; inserted manipulating the realted function.
-;; <f6> -----> Interactively insert new tag. You will be prompted for the
-;; content of the tag. The starting tag and the end tag will be
-;; inserted automatically and the cursor placed in the proper
-;; place to type.
+;; C-x C-n ------> Create new episode structure. This is useful in case your
+;; xml file structure requires it.
+;; <f2> ---------> Interactively insert a function attribute in a speech act
+;; (l1 or l2) tag.
+;; <f3> ---------> Interactively insert a move attribute in a turn (person) tag
+;; <f4> ---------> Interactively insert an attribute (any kind)
+;; <f9> ---------> Insert turn (person) tag. Inserts a move attribute.
+;; <f10> --------> Insert a custom tag. Edit the function to adapt to your needs.
+;; <f11> --------> Insert speech act tag in L1, with clauses, errors and function
+;; attributes.
+;; <f12> --------> Insert speech act tag in L2, with clauses, errors and function
+;; attributes.
;;
-;;
-;;
-;; SPECIFIC COMMANDS I USE, THAT YOU MAY FIND USEFUL
-;; ------------------------------------------------
+;; AUTOMATIC PARSING
+;; -----------------------------------------------------
;; C-x C-a ------> Analyses the text for measurments of performance.
-;; <f11> --------> Customised tag 1. Edit the function to adapt to your needs.
-;; <f12> --------> Customised tag 2. Edit the function to adapt to your needs.
-;; <f7> ---------> Break tag. This command "breaks" a tag in two, that is
-;; it inserts an ending tag and then a starting tag.
-;; <f4> ---------> Insert atributes. This function insert custom xml attributes.
-;; Edit the function to suit you needs.
;;; Code:
-(if t (require 'emms-setup))
-;(require 'emms-player-mpd)
-;(setq emms-player-mpd-server-name "localhost")
-;(setq emms-player-mpd-server-port "6600")
+(require 'xml)
+
+;; (if t (require 'emms-setup))
+;; (require 'emms-player-mpd)
+;; (setq emms-player-mpd-server-name "localhost")
+;; (setq emms-player-mpd-server-port "6600")
(emms-standard)
(emms-default-players)
(if t (require 'emms-playing-time))
(emms-playing-time 1)
+(defvar transcribe-function-list '("initiating" "responding" "control" "expresive" "interpersonal"))
+(defvar transcribe-move-list '("initiation" "response" "follow-up"))
+(defvar transcribe-attribute-list '("clauses" "errors" "function" "move"))
+;; (append transcribe-attribute-list transcribe-function-list transcribe-move-list)
+
(defun transcribe-analyze-episode (episode person)
"This calls the external python package analyze_episodes2.py. The new
function transcribe-analyze implements its role now."
(shell-command (concat (expand-file-name "analyze_episodes2.py")
" -e " episode " -p " person " -i " buffer-file-name )))
+(defun transcribe-raw-to-buffer ()
+ "EXPERIMENTAL - Convert the xml tagged transcription to raw transcription, with the names
+ and the persons and the utterances only. The raw transcription will be send to buffer called
+ 'Raw Output'"
+ (interactive)
+ (let* ((xml (xml-parse-region (point-min) (point-max)))
+ (results (car xml))
+ (episodes (xml-get-children results 'episode)))
+
+ (dolist (episode episodes)
+ (let* ((transcription (xml-get-children episode 'transcription)))
+
+ (dolist (turn transcription)
+ (dolist (intervention (xml-node-children turn))
+ (if (listp intervention)
+ (progn
+ (with-current-buffer "Raw Output"
+ (insert (format "%s\t" (line-number-at-pos)))
+ (insert (format "%s:\t" (car intervention)))
+ (dolist (utterance (nthcdr 2 intervention))
+ (if (listp utterance)
+ (progn
+ (insert (format "%s " (nth 2 utterance))))
+
+ (insert (format "%s" utterance))))))
+
+ (with-current-buffer "Raw Output"
+ (insert (format "%s" (line-number-at-pos)))
+ (insert (format "%s" intervention))))))))))
+
(defun transcribe-analyze (episodenumber personid)
"Extract from a given episode and person the number of asunits per
- second produced, and the number of clauses per asunits, for L2 and L1."
+ second produced, and the number of clauses per asunits, for L2 and L1.
+ It writes two output files, one for L2 utterances and one for L1
+ utterances, so that they can be used with external programs. Output will
+ be inserted in 'Statistics Output' buffer"
(interactive "sepisodenumber: \nspersonid:")
(let* ((interventionsl2 '())
(interventionsl1 '())
(episodes (xml-get-children results 'episode))
(asunitsl2 0.0000)
(asunitsl1 0.0000)
- (shifts nil)
+ ;; (shifts 0.0000);; TODO implement
+ (initiating 0.0000);; TODO implement
+ (responding 0.0000);; TODO implement
+ (control 0.0000);; TODO implement
+ (expressive 0.0000);; TODO implement
+ (interpersonal 0.0000);; TODO implement
(clausesl1 0.0000)
- (errorsl1 0.0000)
+ ;; (errorsl1 0.0000);; TODO implement
(clausesl2 0.0000)
(errorsl2 0.0000)
(duration nil)
+ (role nil)
+ (context nil)
+ (demand nil)
+ ;; (clausesmessage nil)
(number nil))
(dolist (episode episodes)
- (let*((numbernode (xml-get-children episode 'number)))
+ (let*((numbernode (xml-get-children episode 'number))
+ (tasknode (xml-get-children episode 'task)))
(setq number (nth 2 (car numbernode)))
(when (equal episodenumber number)
(transcription (xml-get-children episode 'transcription)))
(setq duration (nth 2 (car durationnode)))
+
+ (dolist (task tasknode)
+ (let* ((rolenode (xml-get-children task 'role))
+ (contextnode (xml-get-children task 'context))
+ (demandnode (xml-get-children task 'demand)))
+
+ (setq role (nth 2 (car rolenode)))
+ (setq context (nth 2 (car contextnode)))
+ (setq demand (nth 2 (car demandnode)))
+ ;; (with-current-buffer "Statistics Output"
+ ;; (insert (format "role: %s; context: %s; demand: %s\n" role context demand)))
+ ))
+
(dolist (turn transcription)
(let* ((interventionnode (xml-get-children turn
(intern personid))))
(dolist (l2turn l2node)
(let* ((l2 (nth 2 l2turn))
- (clausesl2node (nth 1 l2turn))
- (clausesl2nodeinc (cdr (car clausesl2node))))
+ (attrs (nth 1 l2turn))
+ (clausesl2nodeinc (cdr (assq 'clauses attrs)))
+ (errorsl2inc (cdr (assq 'errors attrs)))
+ (function (cdr (assq 'function attrs))))
- (when (not (equal clausesl2node nil))
+ (when (string-equal function "initiating")
+ (setq initiating (+ initiating 1)))
+ (when (string-equal function "responding")
+ (setq responding (+ responding 1)))
+ (when (string-equal function "control")
+ (setq control (+ control 1)))
+ (when (string-equal function "expressive")
+ (setq expressive (+ expressive 1)))
+ (when (string-equal function "interpersonal")
+ (setq interpersonal (+ interpersonal 1)))
+ (when attrs
(setq clausesl2 (+ clausesl2 (string-to-number
- clausesl2nodeinc))))
- (when (not (equal l2 nil))
- (add-to-list 'interventionsl2 l2)
+ clausesl2nodeinc)))
+ (setq errorsl2 (+ errorsl2 (string-to-number
+ errorsl2inc))))
+ (when l2
+ ;; (add-to-list 'interventionsl2 l2)
+ (cl-pushnew l2 interventionsl2 :test #'equal)
(setq asunitsl2 (1+ asunitsl2)))))
(dolist (l1turn l1node)
(let*((l1 (nth 2 l1turn))
(when (not (equal clausesl1node nil))
(setq clausesl1 (+ clausesl1 (string-to-number
clausesl1nodeinc))))
- (when (not (equal l1 nil))
- (add-to-list 'interventionsl1 l1)
+ (when l1
+ ;; (add-to-list 'interventionsl1 l1)
+ (cl-pushnew l1 interventionsl1 :test #'equal)
(setq asunitsl1 (1+ asunitsl1)))))))))))))
(reverse interventionsl2)
+ ;; (write-region (format "%s" interventionsl2) nil (format "transcribe-output-%s-%s-l2.txt" episodenumber personid))
+ ;; Write raw interventions to file will be supported by a different function
(reverse interventionsl1)
- ;(print interventions) ;uncomment to display all the interventions on screen
- (setq asunitspersecondl2 (/ asunitsl2 (string-to-number duration)))
- (setq clausesperasunitl2 (/ clausesl2 asunitsl2))
- (setq asunitspersecondl1 (/ asunitsl1 (string-to-number duration)))
- (setq clausesperasunitl1 (/ clausesl1 asunitsl1))
- (princ (format "episode: %s, duration: %s, person: %s\n" number duration personid))
- (princ (format "L2(Asunits/second): %s, L2(clauses/Asunit): %s, L1(Asunits/second): %s"
- asunitspersecondl2 clausesperasunitl2 asunitspersecondl1)))
-)
+ ;; (write-region (format "%s" interventionsl1) nil (format "transcribe-output-%s-%s-l1.txt" episodenumber personid))
+ ;; (print interventionsl2) ;uncomment to display all the interventions on screen
+ (let((asunitspersecondl2 (/ asunitsl2 (string-to-number duration)))
+ (clausesperasunitl2 (/ clausesl2 asunitsl2))
+ (errorsperasunitl2 (/ errorsl2 asunitsl2))
+ (asunitspersecondl1 (/ asunitsl1 (string-to-number duration)))
+ ;; (clausesperasunitl1 (/ clausesl1 asunitsl1))
+ (initiatingperasunitl2 (/ initiating asunitsl2))
+ (respondingperasunitl2 (/ responding asunitsl2))
+ (controlperasunitl2 (/ control asunitsl2))
+ (expressiveperasunitl2 (/ expressive asunitsl2))
+ (interpersonalperasunitl2 (/ interpersonal asunitsl2)))
+
+ ;; (princ clausesmessage)
+ (princ (format "episode: %s, duration: %s, person: %s\n" episodenumber duration personid))
+ (with-current-buffer "Statistics Output"
+ (insert (format "%s,%s,%s,0,0,%s,%s,%s,%s,%s,QUAN-L2,segmented,aux,level,subject,yearofclil,month\n" personid episodenumber duration role context demand asunitspersecondl2 asunitspersecondl1)))
+ (princ (format "L2(Asunits/second): %s, L2(clauses/Asunit): %s, L2(errors/Asunit):%s, L1(Asunits/second): %s\n"
+ asunitspersecondl2 clausesperasunitl2 errorsperasunitl2 asunitspersecondl1))
+ (princ (format "Functions/unit: Initiating: %s, Responding: %s, Control: %s, Expressive: %s, Interpersonal: %s" initiatingperasunitl2 respondingperasunitl2 controlperasunitl2 expressiveperasunitl2 interpersonalperasunitl2)))))
+
+(defun transcribe-analyze-all ()
+ "Analyze all file and output to 'Statistics Output' buffer. The buffer will
+ lost all previous data. The data in the buffer can be saved to a file and be
+ passed to 'R' for statistical analysis."
+ (interactive)
+ (let* ((xml (xml-parse-region (point-min) (point-max)))
+ (results (car xml))
+ (episodes (xml-get-children results 'episode)))
+
+ (with-current-buffer "Statistics Output"
+ (erase-buffer)
+ (insert "person,episode,duration,C-UNITS(L2),C-UNITS(L1),role,context,demand,QUAN-L2,QUAN-L1,QUAL-L2,segmented,aux,level,subjects,yearofCLIL,month\n"))
+ (dolist (episode episodes)
+ (let* ((numbernode (xml-get-children episode 'number))
+ (participantsnode (xml-get-children episode 'participants))
+ ;; (transcription (xml-get-children episode 'transcription))
+ (number (nth 2 (car numbernode)))
+ (participantsstring (nth 2 (car participantsnode)))
+ (participants (split-string participantsstring)))
+
+ (dolist (participant participants)
+ (transcribe-analyze number participant))))))
+
+
+(defun transcribe-xml-tag-person (xmltag)
+ "This function allows the automatic insetion of a speaker xml tag and places the cursor."
+ (interactive "stag:")
+ (insert (format "<%s move=\"\"></%s>" xmltag xmltag))
+ (backward-char 3)
+ (backward-char (string-width xmltag)))
-(defun transcribe-define-xml-tag (xmltag)
- "This function allows the automatic insetion of a xml tag and places the cursor."
+(defun transcribe-xml-tag (xmltag)
+ "This function allows the automatic insetion of a custom xml tag and places the cursor."
(interactive "stag:")
(insert (format "<%s></%s>" xmltag xmltag))
(backward-char 3)
(backward-char (string-width xmltag)))
+(defun transcribe-region-xml-tag (xmltag)
+ "This function encapsulates the marked region in the given tag."
+ (interactive "stag:")
+ (let ((beginning (region-beginning))
+ (end (region-end)))
+ (goto-char beginning)
+ (insert (format "<%s>" xmltag))
+ (goto-char end)
+ (insert (format "</%s>" xmltag))))
+
+(defun transcribe-add-attribute (att val)
+ "Adds a xml attribute at cursor with the name and value specified (autocompletion possible)"
+ (interactive (list(completing-read "attibute name:" transcribe-attribute-list)(read-string "value:")))
+ (insert (format "%s=\"%s\"" att val)))
+
+(defun transcribe-add-attribute-function (val)
+ "Adds the xml attribute 'function' at cursor with the name specified (autocompletion possible)"
+ (interactive (list(completing-read "function name:" transcribe-function-list)))
+ (insert (format "function=\"%s\"" val)))
+
+(defun transcribe-add-attribute-move (val)
+ "Adds the xml attribute 'move' at cursor with the name specified (autocompletion possible"
+ (interactive (list(completing-read "move name:" transcribe-move-list)))
+ (insert (format "move=\"%s\"" val)))
+
(defun transcribe-xml-tag-l1 ()
"Inserts a l1 tag and places the cursor"
(interactive)
- (insert "<l1></l1>")
- (backward-char 3)
- (backward-char 2))
+ (insert "<l1 clauses=\"1\" errors=\"0\" function=\"\"></l1>")
+ (backward-char 5))
(defun transcribe-xml-tag-l2 ()
"Inserts a l2 tag and places the cursor"
(interactive)
- (insert "<l2 clauses=\"1\" errors=\"0\"></l2>")
- (backward-char 3)
- (backward-char 2))
+ (insert "<l2 clauses=\"1\" errors=\"0\" function=\"\"></l2>")
+ (backward-char 5))
-(fset 'transcribe-xml-tag-l2-break "</l2><l2 clauses=\"1\" errors=\"0\">")
- ;inserts a break inside a l2 tag
-(fset 'transcribe-set-attributes "clauses=\"1\" errors=\"0\"")
- ;inserts the attributes where they are missing
+(defun transcribe-xml-tag-break (xmltag)
+ "This function breaks an unit into two. That is, insert a closing and an opening equal tags"
+ (interactive "stag:")
+ (insert (format "</%s><%s>" xmltag xmltag)))
(defun transcribe-display-audio-info ()
(interactive)
(fset 'NewEpisode
- "<episode>\n<number>DATE-NUMBER</number>\n<duration></duration>\n<comment></comment>\n<subject>Subject (level)</subject>\n<task>\n\t<role>low or high</role>\n<context>low or high</context>\n<demand>low or high</demand>\r</task>\n<auxiliar>Yes/no</auxiliar>\n<transcription>\n</transcription>\n</episode>");Inserts a new episode structure
+ "<episode>\n<number>DATE-NUMBER</number>\n<duration></duration>\n<comment></comment>\n<subject>Subject (level)</subject>\n<participants></participants>\n<task>\n\t<role>low or high</role>\n<context>low or high</context>\n<demand>low or high</demand>\r</task>\n<auxiliar>Yes/no</auxiliar>\n<transcription>\n</transcription>\n</episode>");Inserts a new episode structure
+
+
+(defvar transcribe-mode-map
+ (let ((map (make-sparse-keymap)))
+ (define-key map (kbd "C-x C-p") 'emms-play-file)
+ (define-key map (kbd "C-x C-a") 'transcribe-analyze)
+ (define-key map (kbd "C-x C-n") 'NewEpisode)
+ (define-key map (kbd "C-x <down>") 'emms-stop)
+ (define-key map (kbd "C-x <right>") 'emms-seek-forward)
+ (define-key map (kbd "C-x <left>") 'emms-seek-backward)
+ (define-key map (kbd "<f2>") 'transcribe-add-attribute-move)
+ (define-key map (kbd "<f3>") 'transcribe-add-attribute-function)
+ (define-key map (kbd "<f4>") 'transcribe-add-attribute)
+ (define-key map (kbd "<f5>") 'emms-pause)
+ (define-key map (kbd "<f8>") 'emms-seek)
+ (define-key map (kbd "<f9>") 'transcribe-xml-tag)
+ (define-key map (kbd "<f10>") 'transcribe-xml-tag-person)
+ (define-key map (kbd "<f11>") 'transcribe-xml-tag-l1)
+ (define-key map (kbd "<f12>") 'transcribe-xml-tag-l2)
+ map)
+ "Keymap for Transcribe minor mode.")
+
+
+(easy-menu-define transcribe-mode-menu transcribe-mode-map
+ "Menu for Transcribe mode"
+ '("Transcribe"
+ ["Raw Output" transcribe-raw-to-buffer]
+ "---"
+ ["Analyze" transcribe-analyze]
+ ["Analyze all" arbitools-analyze-all]
+ "---"
+ ["Add transcription header" NewEpisode]
+ ["Add move attribute" transcribe-add-attribute-move]
+ ["Add function attribute" transcribe-add-attribute-function]
+ ["Add L1 intervention" transcribe-xml-tag-l1]
+ ["Add L2 intervention" transcribe-xml-tag-l2]
+ ["Add move" transcribe-xml-tag-person]
+ "---"
+ ["Play audio file" emms-play-file]
+ ))
+
;;;###autoload
(define-minor-mode transcribe-mode
"Toggle transcribe-mode"
nil
" Trans"
- '(([?\C-x ?\C-p] . emms-play-file)
- ([?\C-x ?\C-a] . transcribe-analyze)
- ([?\C-x ?\C-n] . NewEpisode)
- ([?\C-x down] . emms-stop)
- ([?\C-x right] . emms-seek-forward)
- ([?\C-x left] . emms-seek-backward)
- ([f5] . emms-pause)
- ([f6] . transcribe-define-xml-tag)
- ([f7] . transcribe-xml-tag-l2-break)
- ([f8] . emms-seek)
- ([f4] . transcribe-set-atributes)
- ([f11] . transcribe-xml-tag-l1)
- ([f12] . transcribe-xml-tag-l2))
+ transcribe-mode-map
+ (generate-new-buffer "Statistics Output")
+ (generate-new-buffer "Raw Output")
+;; (with-current-buffer "Raw Output"
+;; (linum-mode t)
+;; (setq linum-format "%d "))
+ (with-current-buffer "Statistics Output"
+ ;; (insert "person,episode,duration,C-UNITS(L2),C-UNITS(L1),role,context,demand,QUAN-L2,QUAN-L1,QUAL-L2,segmented,aux,level,subjects,yearofCLIL,month\n")
+ )
+ ;; TODO: save the students present in transcription in list so that we can use that list for transcribe-analyze-all
)
(provide 'transcribe)