X-Git-Url: https://code.delx.au/gnu-emacs-elpa/blobdiff_plain/78ed9a1eaa41f759ba025306172210d784e1d721..f10533854f4c7bb54247a11981191bf37b70cb36:/packages/transcribe/transcribe.el diff --git a/packages/transcribe/transcribe.el b/packages/transcribe/transcribe.el index c751bc04b..48497b4cc 100644 --- a/packages/transcribe/transcribe.el +++ b/packages/transcribe/transcribe.el @@ -1,9 +1,9 @@ ;;; transcribe.el --- Package for audio transcriptions -;; Copyright 2014-2015 Free Software Foundation, Inc. +;; Copyright 2014-2016 Free Software Foundation, Inc. ;; Author: David Gonzalez Gandara -;; Version: 1.0.0 +;; Version: 1.5.0 ;; This program is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by @@ -22,62 +22,66 @@ ;; REQUIRES: ;; ----------------------------- -;; In order to use the audio functions of transcribe, you need to install -;; emms and mpg321. +;; This module works without any requires, but in order to use the audio +;; functions, you need to install the Emacs package "emms", by Joe Drew, +;; and the external program "mpg321", by Jorgen Schafer and Ulrik Jensen, +;; both under GPL licenses. ;; ;; USAGE: ;; ------------------------- -;; Transcribe is a tool to make audio transcriptions. It allows the -;; transcriber to control the audio easily while typing, as well as -;; automate the insertion of xml tags, in case the transcription protocol +;; Transcribe is a tool to make audio transcriptions for discourse analysis +;; in the classroom. +;; It allows the transcriber to control the audio easily while typing, as well as +;; automate the insertion of xml tags, in case the transcription protocol ;; include them. -;; The analyse function will search for a specific structure -;; of episodes that can be automatically added with the macro NewEpisode. -;; The function expects the utterances to be transcribed inside a xml tag -;; with the identifier of the speaker, with the tags or , depending -;; on the language used by the person. The attributes expected are the -;; number of clauses that form the utterance and the number of errors the -;; transcriber observes. -;; -;; +;; The analysis functions will search for a specific structure +;; of episodes that can be automatically added with the macro NewEpisode. +;; The function expects the speech acts to be transcribed inside a turn xml +;; tag with the identifier of the speaker with optional move attribute. +;; Each speech act is spected inside a or tag, depending +;; on the language used by the person. The attributes expected are the +;; number of clauses that form the utterance, the number of errors the +;; transcriber observes, and the function of the speech act. The parser will +;; work even if some attributes are missing. +;; +;; ;; AUDIO COMMANDS ;; ------------------------------ -;; C-x C-p ------> Play audio file. You will be prompted for the name +;; C-x C-p ------> Play audio file. You will be prompted for the name ;; of the file. The recommended format is mp2. ;; ---------> Pause or play audio. ;; C-x --> seek audio 10 seconds forward. ;; C-x --->seek audio 10 seconds backward. -;; ---------> seek interactively: positive seconds go forward and +;; ---------> seek interactively: positive seconds go forward and ;; negative seconds go backward ;; ;; XML TAGGING COMMANDS ;; -------------------------------------------------- -;; C-x C-n --> Create new episode structure. This is useful in case your -;; xml file structure requires it. You can customize the text -;; inserted manipulating the realted function. -;; -----> Interactively insert new tag. You will be prompted for the -;; content of the tag. The starting tag and the end tag will be -;; inserted automatically and the cursor placed in the proper -;; place to type. -;; +;; C-x C-n ------> Create new episode structure. This is useful in case your +;; xml file structure requires it. +;; ---------> Interactively insert a function attribute in a speech act +;; (l1 or l2) tag. +;; ---------> Interactively insert a move attribute in a turn (person) tag +;; ---------> Interactively insert an attribute (any kind) +;; ---------> Insert turn (person) tag. Inserts a move attribute. +;; --------> Insert a custom tag. Edit the function to adapt to your needs. +;; --------> Insert speech act tag in L1, with clauses, errors and function +;; attributes. +;; --------> Insert speech act tag in L2, with clauses, errors and function +;; attributes. ;; -;; -;; SPECIFIC COMMANDS I USE, THAT YOU MAY FIND USEFUL -;; ------------------------------------------------ +;; AUTOMATIC PARSING +;; ----------------------------------------------------- ;; C-x C-a ------> Analyses the text for measurments of performance. -;; --------> Customised tag 1. Edit the function to adapt to your needs. -;; --------> Customised tag 2. Edit the function to adapt to your needs. -;; ---------> Break tag. This command "breaks" a tag in two, that is -;; it inserts an ending tag and then a starting tag. -;; ---------> Insert atributes. This function insert custom xml attributes. -;; Edit the function to suit you needs. ;;; Code: -(if t (require 'emms-setup)) -;(require 'emms-player-mpd) -;(setq emms-player-mpd-server-name "localhost") -;(setq emms-player-mpd-server-port "6600") +(require 'xml) + +;; (if t (require 'emms-setup)) +;; (require 'emms-player-mpd) +;; (setq emms-player-mpd-server-name "localhost") +;; (setq emms-player-mpd-server-port "6600") (emms-standard) (emms-default-players) @@ -90,98 +94,252 @@ (if t (require 'emms-playing-time)) (emms-playing-time 1) +(defvar transcribe-function-list '("initiating" "responding" "control" "expresive" "interpersonal")) +(defvar transcribe-move-list '("initiation" "response" "follow-up")) +(defvar transcribe-attribute-list '("clauses" "errors" "function" "move")) +;; (append transcribe-attribute-list transcribe-function-list transcribe-move-list) + (defun transcribe-analyze-episode (episode person) - "This calls the external python package analyze_episodes2.py. The new + "This calls the external python package analyze_episodes2.py. The new function transcribe-analyze implements its role now." (interactive "sepisode: \nsperson:") - (shell-command (concat (expand-file-name "analyze_episodes2.py") + (shell-command (concat (expand-file-name "analyze_episodes2.py") " -e " episode " -p " person " -i " buffer-file-name ))) +(defun transcribe-raw-to-buffer () + "EXPERIMENTAL - Convert the xml tagged transcription to raw transcription, with the names + and the persons and the utterances only. The raw transcription will be send to buffer called + `Raw Output'." + (interactive) + (let* ((xml (xml-parse-region (point-min) (point-max))) + (results (car xml)) + (episodes (xml-get-children results 'episode))) + + (dolist (episode episodes) + (let* ((transcription (xml-get-children episode 'transcription))) + + (dolist (turn transcription) + (dolist (intervention (xml-node-children turn)) + (if (listp intervention) + (progn + (with-current-buffer "Raw Output" + (insert (format "%s\t" (line-number-at-pos))) + (insert (format "%s:\t" (car intervention))) + (dolist (utterance (nthcdr 2 intervention)) + (if (listp utterance) + (progn + (insert (format "%s " (nth 2 utterance)))) + + (insert (format "%s" utterance)))))) + + (with-current-buffer "Raw Output" + (insert (format "%s" (line-number-at-pos))) + (insert (format "%s" intervention)))))))))) + (defun transcribe-analyze (episodenumber personid) - "Extract from a given episode and person the number of asunits per - second produced, and the number of clauses per asunits, for L2 and L1." + "Extract from a given episode and person the number of asunits per + second produced, and the number of clauses per asunits, for L2 and L1. + It writes two output files, one for L2 utterances and one for L1 + utterances, so that they can be used with external programs. Output will + be inserted in `Statistics Output' buffer." (interactive "sepisodenumber: \nspersonid:") - (setq interventionsl2 '()) - (setq interventionsl1 '()) - (setq xml (xml-parse-region (point-min) (point-max))) - (setq results (car xml)) - (setq episodes (xml-get-children results 'episode)) - (setq asunitsl2 0.0000) - (setq asunitsl1 0.0000) - (setq shifts) - (setq clausesl1 0.0000) - (setq errorsl1 0.0000) - (setq clausesl2 0.0000) - (setq errorsl2 0.0000) - (dolist (episode episodes) - (setq numbernode (xml-get-children episode 'number)) - (setq number (nth 2 (car numbernode))) - (when (equal episodenumber number) - (setq durationnode (xml-get-children episode 'duration)) - (setq duration (nth 2 (car durationnode))) - (setq transcription (xml-get-children episode 'transcription)) - (dolist (turn transcription) - (setq interventionnode (xml-get-children turn (intern personid))) - (dolist (intervention interventionnode) - (setq l2node (xml-get-children intervention 'l2)) - (dolist (l2turn l2node) - (setq l2 (nth 2 l2turn)) - (setq clausesl2node (nth 1 l2turn)) - (setq clausesl2nodeinc (cdr (car clausesl2node))) - (when (not (equal clausesl2node nil)) - (setq clausesl2 (+ clausesl2 - (string-to-number clausesl2nodeinc)))) - (when (not (equal l2 nil)) - (add-to-list 'interventionsl2 l2) - (setq asunitsl2 (1+ asunitsl2)))) - (setq l1node (xml-get-children intervention 'l1)) - (dolist (l1turn l1node) - (setq l1 (nth 2 l1turn)) - (setq clausesl1node (nth 1 l1turn)) - (setq clausesl1nodeinc (cdr (car clausesl1node))) - (when (not (equal clausesl1node nil)) - (setq clausesl1 (+ clausesl1 - (string-to-number clausesl1nodeinc)))) - (when (not (equal l1 nil)) - (add-to-list 'interventionsl1 l1) - (setq asunitsl1 (1+ asunitsl1)))))))) + (let* ((interventionsl2 '()) + (interventionsl1 '()) + (xml (xml-parse-region (point-min) (point-max))) + (results (car xml)) + (episodes (xml-get-children results 'episode)) + (asunitsl2 0.0000) + (asunitsl1 0.0000) + ;; (shifts 0.0000);; TODO implement + (initiating 0.0000);; TODO implement + (responding 0.0000);; TODO implement + (control 0.0000);; TODO implement + (expressive 0.0000);; TODO implement + (interpersonal 0.0000);; TODO implement + (clausesl1 0.0000) + ;; (errorsl1 0.0000);; TODO implement + (clausesl2 0.0000) + (errorsl2 0.0000) + (duration nil) + (role nil) + (context nil) + (demand nil) + ;; (clausesmessage nil) + (number nil)) + + (dolist (episode episodes) + (let*((numbernode (xml-get-children episode 'number)) + (tasknode (xml-get-children episode 'task))) + + (setq number (nth 2 (car numbernode))) + (when (equal episodenumber number) + (let* ((durationnode (xml-get-children episode 'duration)) + (transcription (xml-get-children episode 'transcription))) + + (setq duration (nth 2 (car durationnode))) + + (dolist (task tasknode) + (let* ((rolenode (xml-get-children task 'role)) + (contextnode (xml-get-children task 'context)) + (demandnode (xml-get-children task 'demand))) + + (setq role (nth 2 (car rolenode))) + (setq context (nth 2 (car contextnode))) + (setq demand (nth 2 (car demandnode))) + ;; (with-current-buffer "Statistics Output" + ;; (insert (format "role: %s; context: %s; demand: %s\n" role context demand))) + )) + + (dolist (turn transcription) + (let* ((interventionnode (xml-get-children turn + (intern personid)))) + + (dolist (intervention interventionnode) + (let* ((l2node (xml-get-children intervention 'l2)) + (l1node (xml-get-children intervention 'l1))) + + (dolist (l2turn l2node) + (let* ((l2 (nth 2 l2turn)) + (attrs (nth 1 l2turn)) + (clausesl2nodeinc (cdr (assq 'clauses attrs))) + (errorsl2inc (cdr (assq 'errors attrs))) + (function (cdr (assq 'function attrs)))) + + (when (string-equal function "initiating") + (setq initiating (+ initiating 1))) + (when (string-equal function "responding") + (setq responding (+ responding 1))) + (when (string-equal function "control") + (setq control (+ control 1))) + (when (string-equal function "expressive") + (setq expressive (+ expressive 1))) + (when (string-equal function "interpersonal") + (setq interpersonal (+ interpersonal 1))) + (when attrs + (setq clausesl2 (+ clausesl2 (string-to-number + clausesl2nodeinc))) + (setq errorsl2 (+ errorsl2 (string-to-number + errorsl2inc)))) + (when l2 + ;; (add-to-list 'interventionsl2 l2) + (cl-pushnew l2 interventionsl2 :test #'equal) + (setq asunitsl2 (1+ asunitsl2))))) + (dolist (l1turn l1node) + (let*((l1 (nth 2 l1turn)) + (clausesl1node (nth 1 l1turn)) + (clausesl1nodeinc (cdr (car clausesl1node)))) + + (when (not (equal clausesl1node nil)) + (setq clausesl1 (+ clausesl1 (string-to-number + clausesl1nodeinc)))) + (when l1 + ;; (add-to-list 'interventionsl1 l1) + (cl-pushnew l1 interventionsl1 :test #'equal) + (setq asunitsl1 (1+ asunitsl1))))))))))))) (reverse interventionsl2) + ;; (write-region (format "%s" interventionsl2) nil (format "transcribe-output-%s-%s-l2.txt" episodenumber personid)) + ;; Write raw interventions to file will be supported by a different function (reverse interventionsl1) - ;(print interventions) ;uncomment to display all the interventions on screen - (setq asunitspersecondl2 (/ asunitsl2 (string-to-number duration))) - (setq clausesperasunitl2 (/ clausesl2 asunitsl2)) - (setq asunitspersecondl1 (/ asunitsl1 (string-to-number duration))) - (setq clausesperasunitl1 (/ clausesl1 asunitsl1)) - (princ (format "episode: %s, duration: %s, person: %s\n" number duration personid)) - (princ (format "L2(Asunits/second): %s, L2(clauses/Asunit): %s, L1(Asunits/second): %s" - asunitspersecondl2 clausesperasunitl2 asunitspersecondl1)) -) + ;; (write-region (format "%s" interventionsl1) nil (format "transcribe-output-%s-%s-l1.txt" episodenumber personid)) + ;; (print interventionsl2) ;uncomment to display all the interventions on screen + (let((asunitspersecondl2 (/ asunitsl2 (string-to-number duration))) + (clausesperasunitl2 (/ clausesl2 asunitsl2)) + (errorsperasunitl2 (/ errorsl2 asunitsl2)) + (asunitspersecondl1 (/ asunitsl1 (string-to-number duration))) + ;; (clausesperasunitl1 (/ clausesl1 asunitsl1)) + (initiatingperasunitl2 (/ initiating asunitsl2)) + (respondingperasunitl2 (/ responding asunitsl2)) + (controlperasunitl2 (/ control asunitsl2)) + (expressiveperasunitl2 (/ expressive asunitsl2)) + (interpersonalperasunitl2 (/ interpersonal asunitsl2))) -(defun transcribe-define-xml-tag (xmltag) - "This function allows the automatic insetion of a xml tag and places the cursor." + ;; (princ clausesmessage) + (princ (format "episode: %s, duration: %s, person: %s\n" episodenumber duration personid)) + (with-current-buffer "Statistics Output" + (insert (format "%s,%s,%s,0,0,%s,%s,%s,%s,%s,QUAN-L2,segmented,aux,level,subject,yearofclil,month\n" personid episodenumber duration role context demand asunitspersecondl2 asunitspersecondl1))) + (princ (format "L2(Asunits/second): %s, L2(clauses/Asunit): %s, L2(errors/Asunit):%s, L1(Asunits/second): %s\n" + asunitspersecondl2 clausesperasunitl2 errorsperasunitl2 asunitspersecondl1)) + (princ (format "Functions/unit: Initiating: %s, Responding: %s, Control: %s, Expressive: %s, Interpersonal: %s" initiatingperasunitl2 respondingperasunitl2 controlperasunitl2 expressiveperasunitl2 interpersonalperasunitl2))))) + +(defun transcribe-analyze-all () + "Analyze all file and output to `Statistics Output' buffer. The buffer will + lost all previous data. The data in the buffer can be saved to a file and be + passed to R for statistical analysis." + (interactive) + (let* ((xml (xml-parse-region (point-min) (point-max))) + (results (car xml)) + (episodes (xml-get-children results 'episode))) + + (with-current-buffer "Statistics Output" + (erase-buffer) + (insert "person,episode,duration,C-UNITS(L2),C-UNITS(L1),role,context,demand,QUAN-L2,QUAN-L1,QUAL-L2,segmented,aux,level,subjects,yearofCLIL,month\n")) + (dolist (episode episodes) + (let* ((numbernode (xml-get-children episode 'number)) + (participantsnode (xml-get-children episode 'participants)) + ;; (transcription (xml-get-children episode 'transcription)) + (number (nth 2 (car numbernode))) + (participantsstring (nth 2 (car participantsnode))) + (participants (split-string participantsstring))) + + (dolist (participant participants) + (transcribe-analyze number participant)))))) + + +(defun transcribe-xml-tag-person (xmltag) + "This function allows the automatic insetion of a speaker xml tag and places the cursor." + (interactive "stag:") + (insert (format "<%s move=\"\">" xmltag xmltag)) + (backward-char 3) + (backward-char (string-width xmltag))) + +(defun transcribe-xml-tag (xmltag) + "This function allows the automatic insetion of a custom xml tag and places the cursor." (interactive "stag:") (insert (format "<%s>" xmltag xmltag)) (backward-char 3) (backward-char (string-width xmltag))) +(defun transcribe-region-xml-tag (xmltag) + "This function encapsulates the marked region in the given tag." + (interactive "stag:") + (let ((beginning (region-beginning)) + (end (region-end))) + (goto-char beginning) + (insert (format "<%s>" xmltag)) + (goto-char end) + (insert (format "" xmltag)))) + +(defun transcribe-add-attribute (att val) + "Adds a xml attribute at cursor with the name and value specified (autocompletion possible)" + (interactive (list(completing-read "attibute name:" transcribe-attribute-list)(read-string "value:"))) + (insert (format "%s=\"%s\"" att val))) + +(defun transcribe-add-attribute-function (val) + "Adds the xml attribute `function' at cursor with the name specified (autocompletion possible)" + (interactive (list(completing-read "function name:" transcribe-function-list))) + (insert (format "function=\"%s\"" val))) + +(defun transcribe-add-attribute-move (val) + "Adds the xml attribute `move' at cursor with the name specified (autocompletion possible" + (interactive (list(completing-read "move name:" transcribe-move-list))) + (insert (format "move=\"%s\"" val))) + (defun transcribe-xml-tag-l1 () "Inserts a l1 tag and places the cursor" (interactive) - (insert "") - (backward-char 3) - (backward-char 2)) + (insert "") + (backward-char 5)) (defun transcribe-xml-tag-l2 () "Inserts a l2 tag and places the cursor" (interactive) - (insert "") - (backward-char 3) - (backward-char 2)) + (insert "") + (backward-char 5)) -(fset 'transcribe-xml-tag-l2-break "") - ;inserts a break inside a l2 tag -(fset 'transcribe-set-attributes "clauses=\"1\" errors=\"0\"") - ;inserts the attributes where they are missing +(defun transcribe-xml-tag-break (xmltag) + "This function breaks an unit into two. That is, insert a closing and an opening equal tags" + (interactive "stag:") + (insert (format "<%s>" xmltag xmltag))) (defun transcribe-display-audio-info () (interactive) @@ -190,26 +348,64 @@ (fset 'NewEpisode - "\nDATE-NUMBER\n\n\nSubject (level)\n\n\tlow or high\nlow or high\nlow or high\r\nYes/no\n\n\n");Inserts a new episode structure + "\nDATE-NUMBER\n\n\nSubject (level)\n\n\n\tlow or high\nlow or high\nlow or high\r\nYes/no\n\n\n");Inserts a new episode structure + + +(defvar transcribe-mode-map + (let ((map (make-sparse-keymap))) + (define-key map (kbd "C-x C-p") 'emms-play-file) + (define-key map (kbd "C-x C-a") 'transcribe-analyze) + (define-key map (kbd "C-x C-n") 'NewEpisode) + (define-key map (kbd "C-x ") 'emms-stop) + (define-key map (kbd "C-x ") 'emms-seek-forward) + (define-key map (kbd "C-x ") 'emms-seek-backward) + (define-key map (kbd "") 'transcribe-add-attribute-move) + (define-key map (kbd "") 'transcribe-add-attribute-function) + (define-key map (kbd "") 'transcribe-add-attribute) + (define-key map (kbd "") 'emms-pause) + (define-key map (kbd "") 'emms-seek) + (define-key map (kbd "") 'transcribe-xml-tag) + (define-key map (kbd "") 'transcribe-xml-tag-person) + (define-key map (kbd "") 'transcribe-xml-tag-l1) + (define-key map (kbd "") 'transcribe-xml-tag-l2) + map) + "Keymap for Transcribe minor mode.") + + +(easy-menu-define transcribe-mode-menu transcribe-mode-map + "Menu for Transcribe mode" + '("Transcribe" + ["Raw Output" transcribe-raw-to-buffer] + "---" + ["Analyze" transcribe-analyze] + ["Analyze all" arbitools-analyze-all] + "---" + ["Add transcription header" NewEpisode] + ["Add move attribute" transcribe-add-attribute-move] + ["Add function attribute" transcribe-add-attribute-function] + ["Add L1 intervention" transcribe-xml-tag-l1] + ["Add L2 intervention" transcribe-xml-tag-l2] + ["Add move" transcribe-xml-tag-person] + "---" + ["Play audio file" emms-play-file] + )) + ;;;###autoload (define-minor-mode transcribe-mode "Toggle transcribe-mode" nil " Trans" - '(([?\C-x ?\C-p] . emms-play-file) - ([?\C-x ?\C-a] . transcribe-analyze) - ([?\C-x ?\C-n] . NewEpisode) - ([?\C-x down] . emms-stop) - ([?\C-x right] . emms-seek-forward) - ([?\C-x left] . emms-seek-backward) - ([f5] . emms-pause) - ([f6] . transcribe-define-xml-tag) - ([f7] . transcribe-xml-tag-l2-break) - ([f8] . emms-seek) - ([f4] . transcribe-set-atributes) - ([f11] . transcribe-xml-tag-l1) - ([f12] . transcribe-xml-tag-l2)) + transcribe-mode-map + (generate-new-buffer "Statistics Output") + (generate-new-buffer "Raw Output") +;; (with-current-buffer "Raw Output" +;; (linum-mode t) +;; (setq linum-format "%d ")) + (with-current-buffer "Statistics Output" + ;; (insert "person,episode,duration,C-UNITS(L2),C-UNITS(L1),role,context,demand,QUAN-L2,QUAN-L1,QUAL-L2,segmented,aux,level,subjects,yearofCLIL,month\n") + ) + ;; TODO: save the students present in transcription in list so that we can use that list for transcribe-analyze-all ) (provide 'transcribe)