1 ;;; transcribe.el --- Package for audio transcriptions
3 ;; Copyright 2014-2016 Free Software Foundation, Inc.
5 ;; Author: David Gonzalez Gandara <dggandara@member.fsf.org>
8 ;; This program is free software: you can redistribute it and/or modify
9 ;; it under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation, either version 3 of the License, or
11 ;; (at your option) any later version.
13 ;; This program is distributed in the hope that it will be useful,
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;; GNU General Public License for more details.
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with this program. If not, see <http://www.gnu.org/licenses/>.
24 ;; -----------------------------
25 ;; This module works without any requires, but in order to use the audio
26 ;; functions, you need to install the Emacs package "emms", by Joe Drew,
27 ;; and the external program "mpg321", by Jorgen Schafer and Ulrik Jensen,
28 ;; both under GPL licenses.
31 ;; -------------------------
32 ;; Transcribe is a tool to make audio transcriptions for discourse analysis
34 ;; It allows the transcriber to control the audio easily while typing, as well as
35 ;; automate the insertion of xml tags, in case the transcription protocol
37 ;; The analysis functions will search for a specific structure
38 ;; of episodes that can be automatically added with the macro NewEpisode.
39 ;; The function expects the speech acts to be transcribed inside a turn xml
40 ;; tag with the identifier of the speaker with optional move attribute.
41 ;; Each speech act is spected inside a <l1> or <l2> tag, depending
42 ;; on the language used by the person. The attributes expected are the
43 ;; number of clauses that form the utterance, the number of errors the
44 ;; transcriber observes, and the function of the speech act. The parser will
45 ;; work even if some attributes are missing.
49 ;; ------------------------------
50 ;; C-x C-p ------> Play audio file. You will be prompted for the name
51 ;; of the file. The recommended format is mp2.
52 ;; <f5> ---------> Pause or play audio.
53 ;; C-x <right> --> seek audio 10 seconds forward.
54 ;; C-x <left> --->seek audio 10 seconds backward.
55 ;; <f8> ---------> seek interactively: positive seconds go forward and
56 ;; negative seconds go backward
58 ;; XML TAGGING COMMANDS
59 ;; --------------------------------------------------
60 ;; C-x C-n ------> Create new episode structure. This is useful in case your
61 ;; xml file structure requires it.
62 ;; <f2> ---------> Interactively insert a function attribute in a speech act
64 ;; <f3> ---------> Interactively insert a move attribute in a turn (person) tag
65 ;; <f4> ---------> Interactively insert an attribute (any kind)
66 ;; <f9> ---------> Insert turn (person) tag. Inserts a move attribute.
67 ;; <f10> --------> Insert a custom tag. Edit the function to adapt to your needs.
68 ;; <f11> --------> Insert speech act tag in L1, with clauses, errors and function
70 ;; <f12> --------> Insert speech act tag in L2, with clauses, errors and function
74 ;; -----------------------------------------------------
75 ;; C-x C-a ------> Analyses the text for measurments of performance.
81 ;; (if t (require 'emms-setup))
82 ;; (require 'emms-player-mpd)
83 ;; (setq emms-player-mpd-server-name "localhost")
84 ;; (setq emms-player-mpd-server-port "6600")
87 (emms-default-players)
88 (if t (require 'emms-player-mpg321-remote))
89 (defvar emms-player-list)
90 (push 'emms-player-mpg321-remote emms-player-list)
92 (if t (require 'emms-mode-line))
94 (if t (require 'emms-playing-time))
97 (defvar transcribe-function-list '("initiating" "responding" "control" "expresive" "interpersonal"))
98 (defvar transcribe-move-list '("initiation" "response" "follow-up"))
99 (defvar transcribe-attribute-list '("clauses" "errors" "function" "move"))
100 ;; (append transcribe-attribute-list transcribe-function-list transcribe-move-list)
102 (defun transcribe-analyze-episode (episode person)
103 "This calls the external python package analyze_episodes2.py. The new
104 function transcribe-analyze implements its role now."
105 (interactive "sepisode: \nsperson:")
106 (shell-command (concat (expand-file-name "analyze_episodes2.py")
107 " -e " episode " -p " person " -i " buffer-file-name )))
109 (defun transcribe-raw-to-buffer ()
110 "EXPERIMENTAL - Convert the xml tagged transcription to raw transcription, with the names
111 and the persons and the utterances only. The raw transcription will be send to buffer called
114 (let* ((xml (xml-parse-region (point-min) (point-max)))
116 (episodes (xml-get-children results 'episode)))
118 (dolist (episode episodes)
119 (let* ((transcription (xml-get-children episode 'transcription)))
121 (dolist (turn transcription)
122 (dolist (intervention (xml-node-children turn))
123 (when (listp intervention)
124 (with-current-buffer "Raw Output"
125 (insert (format "%s: " (car intervention)))
126 (dolist (utterance (nthcdr 2 intervention))
127 (when (listp utterance)
128 (insert (format "%s " (nth 2 utterance)))))
129 (insert "\n")))))))))
131 (defun transcribe-analyze (episodenumber personid)
132 "Extract from a given episode and person the number of asunits per
133 second produced, and the number of clauses per asunits, for L2 and L1.
134 It writes two output files, one for L2 utterances and one for L1
135 utterances, so that they can be used with external programs. Output will
136 be inserted in 'Statistics Output' buffer"
137 (interactive "sepisodenumber: \nspersonid:")
138 (let* ((interventionsl2 '())
139 (interventionsl1 '())
140 (xml (xml-parse-region (point-min) (point-max)))
142 (episodes (xml-get-children results 'episode))
145 ;; (shifts 0.0000);; TODO implement
146 (initiating 0.0000);; TODO implement
147 (responding 0.0000);; TODO implement
148 (control 0.0000);; TODO implement
149 (expressive 0.0000);; TODO implement
150 (interpersonal 0.0000);; TODO implement
152 ;; (errorsl1 0.0000);; TODO implement
159 ;; (clausesmessage nil)
162 (dolist (episode episodes)
163 (let*((numbernode (xml-get-children episode 'number))
164 (tasknode (xml-get-children episode 'task)))
166 (setq number (nth 2 (car numbernode)))
167 (when (equal episodenumber number)
168 (let* ((durationnode (xml-get-children episode 'duration))
169 (transcription (xml-get-children episode 'transcription)))
171 (setq duration (nth 2 (car durationnode)))
173 (dolist (task tasknode)
174 (let* ((rolenode (xml-get-children task 'role))
175 (contextnode (xml-get-children task 'context))
176 (demandnode (xml-get-children task 'demand)))
178 (setq role (nth 2 (car rolenode)))
179 (setq context (nth 2 (car contextnode)))
180 (setq demand (nth 2 (car demandnode)))
181 ;; (with-current-buffer "Statistics Output"
182 ;; (insert (format "role: %s; context: %s; demand: %s\n" role context demand)))
185 (dolist (turn transcription)
186 (let* ((interventionnode (xml-get-children turn
189 (dolist (intervention interventionnode)
190 (let* ((l2node (xml-get-children intervention 'l2))
191 (l1node (xml-get-children intervention 'l1)))
193 (dolist (l2turn l2node)
194 (let* ((l2 (nth 2 l2turn))
195 (attrs (nth 1 l2turn))
196 (clausesl2nodeinc (cdr (assq 'clauses attrs)))
197 (errorsl2inc (cdr (assq 'errors attrs)))
198 (function (cdr (assq 'function attrs))))
200 (when (string-equal function "initiating")
201 (setq initiating (+ initiating 1)))
202 (when (string-equal function "responding")
203 (setq responding (+ responding 1)))
204 (when (string-equal function "control")
205 (setq control (+ control 1)))
206 (when (string-equal function "expressive")
207 (setq expressive (+ expressive 1)))
208 (when (string-equal function "interpersonal")
209 (setq interpersonal (+ interpersonal 1)))
211 (setq clausesl2 (+ clausesl2 (string-to-number
213 (setq errorsl2 (+ errorsl2 (string-to-number
216 ;; (add-to-list 'interventionsl2 l2)
217 (cl-pushnew l2 interventionsl2 :test #'equal)
218 (setq asunitsl2 (1+ asunitsl2)))))
219 (dolist (l1turn l1node)
220 (let*((l1 (nth 2 l1turn))
221 (clausesl1node (nth 1 l1turn))
222 (clausesl1nodeinc (cdr (car clausesl1node))))
224 (when (not (equal clausesl1node nil))
225 (setq clausesl1 (+ clausesl1 (string-to-number
228 ;; (add-to-list 'interventionsl1 l1)
229 (cl-pushnew l1 interventionsl1 :test #'equal)
230 (setq asunitsl1 (1+ asunitsl1)))))))))))))
231 (reverse interventionsl2)
232 ;; (write-region (format "%s" interventionsl2) nil (format "transcribe-output-%s-%s-l2.txt" episodenumber personid))
233 ;; Write raw interventions to file will be supported by a different function
234 (reverse interventionsl1)
235 ;; (write-region (format "%s" interventionsl1) nil (format "transcribe-output-%s-%s-l1.txt" episodenumber personid))
236 ;; (print interventionsl2) ;uncomment to display all the interventions on screen
237 (let((asunitspersecondl2 (/ asunitsl2 (string-to-number duration)))
238 (clausesperasunitl2 (/ clausesl2 asunitsl2))
239 (errorsperasunitl2 (/ errorsl2 asunitsl2))
240 (asunitspersecondl1 (/ asunitsl1 (string-to-number duration)))
241 ;; (clausesperasunitl1 (/ clausesl1 asunitsl1))
242 (initiatingperasunitl2 (/ initiating asunitsl2))
243 (respondingperasunitl2 (/ responding asunitsl2))
244 (controlperasunitl2 (/ control asunitsl2))
245 (expressiveperasunitl2 (/ expressive asunitsl2))
246 (interpersonalperasunitl2 (/ interpersonal asunitsl2)))
248 ;; (princ clausesmessage)
249 (princ (format "episode: %s, duration: %s, person: %s\n" episodenumber duration personid))
250 (with-current-buffer "Statistics Output"
251 (insert (format "%s,%s,%s,0,0,%s,%s,%s,%s,%s,QUAN-L2,segmented,aux,level,subject,yearofclil,month\n" personid episodenumber duration role context demand asunitspersecondl2 asunitspersecondl1)))
252 (princ (format "L2(Asunits/second): %s, L2(clauses/Asunit): %s, L2(errors/Asunit):%s, L1(Asunits/second): %s\n"
253 asunitspersecondl2 clausesperasunitl2 errorsperasunitl2 asunitspersecondl1))
254 (princ (format "Functions/unit: Initiating: %s, Responding: %s, Control: %s, Expressive: %s, Interpersonal: %s" initiatingperasunitl2 respondingperasunitl2 controlperasunitl2 expressiveperasunitl2 interpersonalperasunitl2)))))
256 (defun transcribe-analyze-all ()
257 "Analyze all file and output to 'Statistics Output' buffer. The buffer will
258 lost all previous data. The data in the buffer can be saved to a file and be
259 passed to 'R' for statistical analysis."
261 (let* ((xml (xml-parse-region (point-min) (point-max)))
263 (episodes (xml-get-children results 'episode)))
265 (with-current-buffer "Statistics Output"
267 (insert "person,episode,duration,C-UNITS(L2),C-UNITS(L1),role,context,demand,QUAN-L2,QUAN-L1,QUAL-L2,segmented,aux,level,subjects,yearofCLIL,month\n"))
268 (dolist (episode episodes)
269 (let* ((numbernode (xml-get-children episode 'number))
270 (participantsnode (xml-get-children episode 'participants))
271 ;; (transcription (xml-get-children episode 'transcription))
272 (number (nth 2 (car numbernode)))
273 (participantsstring (nth 2 (car participantsnode)))
274 (participants (split-string participantsstring)))
276 (dolist (participant participants)
277 (transcribe-analyze number participant))))))
280 (defun transcribe-xml-tag-person (xmltag)
281 "This function allows the automatic insetion of a speaker xml tag and places the cursor."
282 (interactive "stag:")
283 (insert (format "<%s move=\"\"></%s>" xmltag xmltag))
285 (backward-char (string-width xmltag)))
287 (defun transcribe-xml-tag (xmltag)
288 "This function allows the automatic insetion of a custom xml tag and places the cursor."
289 (interactive "stag:")
290 (insert (format "<%s></%s>" xmltag xmltag))
292 (backward-char (string-width xmltag)))
294 (defun transcribe-region-xml-tag (xmltag)
295 "This function encapsulates the marked region in the given tag."
296 (interactive "stag:")
297 (let ((beginning (region-beginning))
299 (goto-char beginning)
300 (insert (format "<%s>" xmltag))
302 (insert (format "</%s>" xmltag))))
304 (defun transcribe-add-attribute (att val)
305 "Adds a xml attribute at cursor with the name and value specified (autocompletion possible)"
306 (interactive (list(completing-read "attibute name:" transcribe-attribute-list)(read-string "value:")))
307 (insert (format "%s=\"%s\"" att val)))
309 (defun transcribe-add-attribute-function (val)
310 "Adds the xml attribute 'function' at cursor with the name specified (autocompletion possible)"
311 (interactive (list(completing-read "function name:" transcribe-function-list)))
312 (insert (format "function=\"%s\"" val)))
314 (defun transcribe-add-attribute-move (val)
315 "Adds the xml attribute 'move' at cursor with the name specified (autocompletion possible"
316 (interactive (list(completing-read "move name:" transcribe-move-list)))
317 (insert (format "move=\"%s\"" val)))
319 (defun transcribe-xml-tag-l1 ()
320 "Inserts a l1 tag and places the cursor"
322 (insert "<l1 clauses=\"1\" errors=\"0\" function=\"\"></l1>")
325 (defun transcribe-xml-tag-l2 ()
326 "Inserts a l2 tag and places the cursor"
328 (insert "<l2 clauses=\"1\" errors=\"0\" function=\"\"></l2>")
331 (defun transcribe-xml-tag-break (xmltag)
332 "This function breaks an unit into two. That is, insert a closing and an opening equal tags"
333 (interactive "stag:")
334 (insert (format "</%s><%s>" xmltag xmltag)))
336 (defun transcribe-display-audio-info ()
338 (emms-player-mpg321-remote-proc)
339 (shell-command "/usr/bin/mpg321 -R - &"))
343 "<episode>\n<number>DATE-NUMBER</number>\n<duration></duration>\n<comment></comment>\n<subject>Subject (level)</subject>\n<participants><\participants>\n<task>\n\t<role>low or high</role>\n<context>low or high</context>\n<demand>low or high</demand>\r</task>\n<auxiliar>Yes/no</auxiliar>\n<transcription>\n</transcription>\n</episode>");Inserts a new episode structure
346 (defvar transcribe-mode-map
347 (let ((map (make-sparse-keymap)))
348 (define-key map (kbd "C-x C-p") 'transcribe-play-file)
349 (define-key map (kbd "C-x C-a") 'transcribe-analyze)
350 (define-key map (kbd "C-x C-n") 'NewEpisode)
351 (define-key map (kbd "C-x <down>") 'emms-stop)
352 (define-key map (kbd "C-x <right>") 'emms-seek-forward)
353 (define-key map (kbd "C-x <left>") 'emms-seek-backward)
354 (define-key map (kbd "<f2>") 'transcribe-add-attribute-move)
355 (define-key map (kbd "<f3>") 'transcribe-add-attribute-function)
356 (define-key map (kbd "<f4>") 'transcribe-add-attribute)
357 (define-key map (kbd "<f5>") 'emms-pause)
358 (define-key map (kbd "<f8>") 'emms-seek)
359 (define-key map (kbd "<f9>") 'transcribe-xml-tag)
360 (define-key map (kbd "<f10>") 'transcribe-xml-tag-person)
361 (define-key map (kbd "<f11>") 'transcribe-xml-l1)
362 (define-key map (kbd "<f12>") 'transcribe-xml-l2)
364 "Keymap for Transcribe minor mode.")
367 (easy-menu-define transcribe-mode-menu transcribe-mode-map
368 "Menu for Transcribe mode"
370 ["Raw Output" transcribe-raw-to-buffer]
372 ["Analyze" transcribe-analyze]
373 ["Analyze all" arbitools-analyze-all]
375 ["Add transcription header" NewEpisode]
376 ["Add move attribute" transcribe-add-attribute-move]
377 ["Add function attribute" transcribe-add-attribute-function]
378 ["Add L1 intervention" transcribe-xml-l1]
379 ["Add L2 intervention" transcribe-xml-l2]
380 ["Add move" transcribe-xml-tag-person]
382 ["Play audio file" transcribe-play-file]
387 (define-minor-mode transcribe-mode
388 "Toggle transcribe-mode"
392 (generate-new-buffer "Statistics Output")
393 (generate-new-buffer "Raw Output")
394 (with-current-buffer "Statistics Output"
395 ;; (insert "person,episode,duration,C-UNITS(L2),C-UNITS(L1),role,context,demand,QUAN-L2,QUAN-L1,QUAL-L2,segmented,aux,level,subjects,yearofCLIL,month\n")
397 ;; TODO: save the students present in transcription in list so that we can use that list for transcribe-analyze-all
400 (provide 'transcribe)
402 ;;; transcribe.el ends here