]> code.delx.au - gnu-emacs/blob - lib-src/make-docfile.c
* make-docfile.c (scan_lisp_file): Add bounds checking.
[gnu-emacs] / lib-src / make-docfile.c
1 /* Generate doc-string file for GNU Emacs from source files.
2
3 Copyright (C) 1985-1986, 1992-1994, 1997, 1999-2012
4 Free Software Foundation, Inc.
5
6 This file is part of GNU Emacs.
7
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
20
21
22 /* The arguments given to this program are all the C and Lisp source files
23 of GNU Emacs. .elc and .el and .c files are allowed.
24 A .o file can also be specified; the .c file it was made from is used.
25 This helps the makefile pass the correct list of files.
26 Option -d DIR means change to DIR before looking for files.
27
28 The results, which go to standard output or to a file
29 specified with -a or -o (-a to append, -o to start from nothing),
30 are entries containing function or variable names and their documentation.
31 Each entry starts with a ^_ character.
32 Then comes F for a function or V for a variable.
33 Then comes the function or variable name, terminated with a newline.
34 Then comes the documentation for that function or variable.
35 */
36
37 #include <config.h>
38
39 #include <stdio.h>
40 #include <stdlib.h> /* config.h unconditionally includes this anyway */
41 #ifdef MSDOS
42 #include <fcntl.h>
43 #endif /* MSDOS */
44 #ifdef WINDOWSNT
45 /* Defined to be sys_fopen in ms-w32.h, but only #ifdef emacs, so this
46 is really just insurance. */
47 #undef fopen
48 #include <fcntl.h>
49 #include <direct.h>
50 #endif /* WINDOWSNT */
51
52 #ifdef DOS_NT
53 /* Defined to be sys_chdir in ms-w32.h, but only #ifdef emacs, so this
54 is really just insurance.
55
56 Similarly, msdos defines this as sys_chdir, but we're not linking with the
57 file where that function is defined. */
58 #undef chdir
59 #define READ_TEXT "rt"
60 #define READ_BINARY "rb"
61 #else /* not DOS_NT */
62 #define READ_TEXT "r"
63 #define READ_BINARY "r"
64 #endif /* not DOS_NT */
65
66 static int scan_file (char *filename);
67 static int scan_lisp_file (const char *filename, const char *mode);
68 static int scan_c_file (char *filename, const char *mode);
69 static void start_globals (void);
70 static void write_globals (void);
71
72 #include <unistd.h>
73
74 /* Stdio stream for output to the DOC file. */
75 FILE *outfile;
76
77 /* Name this program was invoked with. */
78 char *progname;
79
80 /* Nonzero if this invocation is generating globals.h. */
81 int generate_globals;
82
83 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
84
85 /* VARARGS1 */
86 static void
87 error (const char *s1, const char *s2)
88 {
89 fprintf (stderr, "%s: ", progname);
90 fprintf (stderr, s1, s2);
91 fprintf (stderr, "\n");
92 }
93
94 /* Print error message and exit. */
95
96 /* VARARGS1 */
97 static _Noreturn void
98 fatal (const char *s1, const char *s2)
99 {
100 error (s1, s2);
101 exit (EXIT_FAILURE);
102 }
103
104 /* Like malloc but get fatal error if memory is exhausted. */
105
106 static void *
107 xmalloc (unsigned int size)
108 {
109 void *result = (void *) malloc (size);
110 if (result == NULL)
111 fatal ("virtual memory exhausted", 0);
112 return result;
113 }
114
115 /* Like realloc but get fatal error if memory is exhausted. */
116
117 static void *
118 xrealloc (void *arg, unsigned int size)
119 {
120 void *result = (void *) realloc (arg, size);
121 if (result == NULL)
122 fatal ("virtual memory exhausted", 0);
123 return result;
124 }
125
126 \f
127 int
128 main (int argc, char **argv)
129 {
130 int i;
131 int err_count = 0;
132 int first_infile;
133
134 progname = argv[0];
135
136 outfile = stdout;
137
138 /* Don't put CRs in the DOC file. */
139 #ifdef MSDOS
140 _fmode = O_BINARY;
141 #if 0 /* Suspicion is that this causes hanging.
142 So instead we require people to use -o on MSDOS. */
143 (stdout)->_flag &= ~_IOTEXT;
144 _setmode (fileno (stdout), O_BINARY);
145 #endif
146 outfile = 0;
147 #endif /* MSDOS */
148 #ifdef WINDOWSNT
149 _fmode = O_BINARY;
150 _setmode (fileno (stdout), O_BINARY);
151 #endif /* WINDOWSNT */
152
153 /* If first two args are -o FILE, output to FILE. */
154 i = 1;
155 if (argc > i + 1 && !strcmp (argv[i], "-o"))
156 {
157 outfile = fopen (argv[i + 1], "w");
158 i += 2;
159 }
160 if (argc > i + 1 && !strcmp (argv[i], "-a"))
161 {
162 outfile = fopen (argv[i + 1], "a");
163 i += 2;
164 }
165 if (argc > i + 1 && !strcmp (argv[i], "-d"))
166 {
167 if (chdir (argv[i + 1]) != 0)
168 {
169 perror (argv[i + 1]);
170 return EXIT_FAILURE;
171 }
172 i += 2;
173 }
174 if (argc > i && !strcmp (argv[i], "-g"))
175 {
176 generate_globals = 1;
177 ++i;
178 }
179
180 if (outfile == 0)
181 fatal ("No output file specified", "");
182
183 if (generate_globals)
184 start_globals ();
185
186 first_infile = i;
187 for (; i < argc; i++)
188 {
189 int j;
190 /* Don't process one file twice. */
191 for (j = first_infile; j < i; j++)
192 if (! strcmp (argv[i], argv[j]))
193 break;
194 if (j == i)
195 err_count += scan_file (argv[i]);
196 }
197
198 if (err_count == 0 && generate_globals)
199 write_globals ();
200
201 return (err_count > 0 ? EXIT_FAILURE : EXIT_SUCCESS);
202 }
203
204 /* Add a source file name boundary marker in the output file. */
205 static void
206 put_filename (char *filename)
207 {
208 char *tmp;
209
210 for (tmp = filename; *tmp; tmp++)
211 {
212 if (IS_DIRECTORY_SEP (*tmp))
213 filename = tmp + 1;
214 }
215
216 putc (037, outfile);
217 putc ('S', outfile);
218 fprintf (outfile, "%s\n", filename);
219 }
220
221 /* Read file FILENAME and output its doc strings to outfile. */
222 /* Return 1 if file is not found, 0 if it is found. */
223
224 static int
225 scan_file (char *filename)
226 {
227
228 size_t len = strlen (filename);
229
230 if (!generate_globals)
231 put_filename (filename);
232 if (len > 4 && !strcmp (filename + len - 4, ".elc"))
233 return scan_lisp_file (filename, READ_BINARY);
234 else if (len > 3 && !strcmp (filename + len - 3, ".el"))
235 return scan_lisp_file (filename, READ_TEXT);
236 else
237 return scan_c_file (filename, READ_TEXT);
238 }
239
240 static void
241 start_globals (void)
242 {
243 fprintf (outfile, "/* This file was auto-generated by make-docfile. */\n");
244 fprintf (outfile, "/* DO NOT EDIT. */\n");
245 fprintf (outfile, "struct emacs_globals {\n");
246 }
247 \f
248 static char input_buffer[128];
249
250 /* Some state during the execution of `read_c_string_or_comment'. */
251 struct rcsoc_state
252 {
253 /* A count of spaces and newlines that have been read, but not output. */
254 unsigned pending_spaces, pending_newlines;
255
256 /* Where we're reading from. */
257 FILE *in_file;
258
259 /* If non-zero, a buffer into which to copy characters. */
260 char *buf_ptr;
261 /* If non-zero, a file into which to copy characters. */
262 FILE *out_file;
263
264 /* A keyword we look for at the beginning of lines. If found, it is
265 not copied, and SAW_KEYWORD is set to true. */
266 const char *keyword;
267 /* The current point we've reached in an occurrence of KEYWORD in
268 the input stream. */
269 const char *cur_keyword_ptr;
270 /* Set to true if we saw an occurrence of KEYWORD. */
271 int saw_keyword;
272 };
273
274 /* Output CH to the file or buffer in STATE. Any pending newlines or
275 spaces are output first. */
276
277 static inline void
278 put_char (int ch, struct rcsoc_state *state)
279 {
280 int out_ch;
281 do
282 {
283 if (state->pending_newlines > 0)
284 {
285 state->pending_newlines--;
286 out_ch = '\n';
287 }
288 else if (state->pending_spaces > 0)
289 {
290 state->pending_spaces--;
291 out_ch = ' ';
292 }
293 else
294 out_ch = ch;
295
296 if (state->out_file)
297 putc (out_ch, state->out_file);
298 if (state->buf_ptr)
299 *state->buf_ptr++ = out_ch;
300 }
301 while (out_ch != ch);
302 }
303
304 /* If in the middle of scanning a keyword, continue scanning with
305 character CH, otherwise output CH to the file or buffer in STATE.
306 Any pending newlines or spaces are output first, as well as any
307 previously scanned characters that were thought to be part of a
308 keyword, but were in fact not. */
309
310 static void
311 scan_keyword_or_put_char (int ch, struct rcsoc_state *state)
312 {
313 if (state->keyword
314 && *state->cur_keyword_ptr == ch
315 && (state->cur_keyword_ptr > state->keyword
316 || state->pending_newlines > 0))
317 /* We might be looking at STATE->keyword at some point.
318 Keep looking until we know for sure. */
319 {
320 if (*++state->cur_keyword_ptr == '\0')
321 /* Saw the whole keyword. Set SAW_KEYWORD flag to true. */
322 {
323 state->saw_keyword = 1;
324
325 /* Reset the scanning pointer. */
326 state->cur_keyword_ptr = state->keyword;
327
328 /* Canonicalize whitespace preceding a usage string. */
329 state->pending_newlines = 2;
330 state->pending_spaces = 0;
331
332 /* Skip any whitespace between the keyword and the
333 usage string. */
334 do
335 ch = getc (state->in_file);
336 while (ch == ' ' || ch == '\n');
337
338 /* Output the open-paren we just read. */
339 put_char (ch, state);
340
341 /* Skip the function name and replace it with `fn'. */
342 do
343 ch = getc (state->in_file);
344 while (ch != ' ' && ch != ')');
345 put_char ('f', state);
346 put_char ('n', state);
347
348 /* Put back the last character. */
349 ungetc (ch, state->in_file);
350 }
351 }
352 else
353 {
354 if (state->keyword && state->cur_keyword_ptr > state->keyword)
355 /* We scanned the beginning of a potential usage
356 keyword, but it was a false alarm. Output the
357 part we scanned. */
358 {
359 const char *p;
360
361 for (p = state->keyword; p < state->cur_keyword_ptr; p++)
362 put_char (*p, state);
363
364 state->cur_keyword_ptr = state->keyword;
365 }
366
367 put_char (ch, state);
368 }
369 }
370
371
372 /* Skip a C string or C-style comment from INFILE, and return the
373 character that follows. COMMENT non-zero means skip a comment. If
374 PRINTFLAG is positive, output string contents to outfile. If it is
375 negative, store contents in buf. Convert escape sequences \n and
376 \t to newline and tab; discard \ followed by newline.
377 If SAW_USAGE is non-zero, then any occurrences of the string `usage:'
378 at the beginning of a line will be removed, and *SAW_USAGE set to
379 true if any were encountered. */
380
381 static int
382 read_c_string_or_comment (FILE *infile, int printflag, int comment, int *saw_usage)
383 {
384 register int c;
385 struct rcsoc_state state;
386
387 state.in_file = infile;
388 state.buf_ptr = (printflag < 0 ? input_buffer : 0);
389 state.out_file = (printflag > 0 ? outfile : 0);
390 state.pending_spaces = 0;
391 state.pending_newlines = 0;
392 state.keyword = (saw_usage ? "usage:" : 0);
393 state.cur_keyword_ptr = state.keyword;
394 state.saw_keyword = 0;
395
396 c = getc (infile);
397 if (comment)
398 while (c == '\n' || c == '\r' || c == '\t' || c == ' ')
399 c = getc (infile);
400
401 while (c != EOF)
402 {
403 while (c != EOF && (comment ? c != '*' : c != '"'))
404 {
405 if (c == '\\')
406 {
407 c = getc (infile);
408 if (c == '\n' || c == '\r')
409 {
410 c = getc (infile);
411 continue;
412 }
413 if (c == 'n')
414 c = '\n';
415 if (c == 't')
416 c = '\t';
417 }
418
419 if (c == ' ')
420 state.pending_spaces++;
421 else if (c == '\n')
422 {
423 state.pending_newlines++;
424 state.pending_spaces = 0;
425 }
426 else
427 scan_keyword_or_put_char (c, &state);
428
429 c = getc (infile);
430 }
431
432 if (c != EOF)
433 c = getc (infile);
434
435 if (comment)
436 {
437 if (c == '/')
438 {
439 c = getc (infile);
440 break;
441 }
442
443 scan_keyword_or_put_char ('*', &state);
444 }
445 else
446 {
447 if (c != '"')
448 break;
449
450 /* If we had a "", concatenate the two strings. */
451 c = getc (infile);
452 }
453 }
454
455 if (printflag < 0)
456 *state.buf_ptr = 0;
457
458 if (saw_usage)
459 *saw_usage = state.saw_keyword;
460
461 return c;
462 }
463
464
465 \f
466 /* Write to file OUT the argument names of function FUNC, whose text is in BUF.
467 MINARGS and MAXARGS are the minimum and maximum number of arguments. */
468
469 static void
470 write_c_args (FILE *out, char *func, char *buf, int minargs, int maxargs)
471 {
472 register char *p;
473 int in_ident = 0;
474 char *ident_start IF_LINT (= NULL);
475 size_t ident_length = 0;
476
477 fprintf (out, "(fn");
478
479 if (*buf == '(')
480 ++buf;
481
482 for (p = buf; *p; p++)
483 {
484 char c = *p;
485
486 /* Notice when a new identifier starts. */
487 if ((('A' <= c && c <= 'Z')
488 || ('a' <= c && c <= 'z')
489 || ('0' <= c && c <= '9')
490 || c == '_')
491 != in_ident)
492 {
493 if (!in_ident)
494 {
495 in_ident = 1;
496 ident_start = p;
497 }
498 else
499 {
500 in_ident = 0;
501 ident_length = p - ident_start;
502 }
503 }
504
505 /* Found the end of an argument, write out the last seen
506 identifier. */
507 if (c == ',' || c == ')')
508 {
509 if (ident_length == 0)
510 {
511 error ("empty arg list for `%s' should be (void), not ()", func);
512 continue;
513 }
514
515 if (strncmp (ident_start, "void", ident_length) == 0)
516 continue;
517
518 putc (' ', out);
519
520 if (minargs == 0 && maxargs > 0)
521 fprintf (out, "&optional ");
522
523 minargs--;
524 maxargs--;
525
526 /* In C code, `default' is a reserved word, so we spell it
527 `defalt'; demangle that here. */
528 if (ident_length == 6 && memcmp (ident_start, "defalt", 6) == 0)
529 fprintf (out, "DEFAULT");
530 else
531 while (ident_length-- > 0)
532 {
533 c = *ident_start++;
534 if (c >= 'a' && c <= 'z')
535 /* Upcase the letter. */
536 c += 'A' - 'a';
537 else if (c == '_')
538 /* Print underscore as hyphen. */
539 c = '-';
540 putc (c, out);
541 }
542 }
543 }
544
545 putc (')', out);
546 }
547 \f
548 /* The types of globals. These are sorted roughly in decreasing alignment
549 order to avoid allocation gaps, except that functions are last. */
550 enum global_type
551 {
552 INVALID,
553 LISP_OBJECT,
554 EMACS_INTEGER,
555 BOOLEAN,
556 FUNCTION,
557 };
558
559 /* A single global. */
560 struct global
561 {
562 enum global_type type;
563 char *name;
564 int value;
565 };
566
567 /* All the variable names we saw while scanning C sources in `-g'
568 mode. */
569 int num_globals;
570 int num_globals_allocated;
571 struct global *globals;
572
573 static void
574 add_global (enum global_type type, char *name, int value)
575 {
576 /* Ignore the one non-symbol that can occur. */
577 if (strcmp (name, "..."))
578 {
579 ++num_globals;
580
581 if (num_globals_allocated == 0)
582 {
583 num_globals_allocated = 100;
584 globals = xmalloc (num_globals_allocated * sizeof (struct global));
585 }
586 else if (num_globals == num_globals_allocated)
587 {
588 num_globals_allocated *= 2;
589 globals = xrealloc (globals,
590 num_globals_allocated * sizeof (struct global));
591 }
592
593 globals[num_globals - 1].type = type;
594 globals[num_globals - 1].name = name;
595 globals[num_globals - 1].value = value;
596 }
597 }
598
599 static int
600 compare_globals (const void *a, const void *b)
601 {
602 const struct global *ga = a;
603 const struct global *gb = b;
604
605 if (ga->type != gb->type)
606 return ga->type - gb->type;
607
608 return strcmp (ga->name, gb->name);
609 }
610
611 static void
612 close_emacs_globals (void)
613 {
614 fprintf (outfile, "};\n");
615 fprintf (outfile, "extern struct emacs_globals globals;\n");
616 }
617
618 static void
619 write_globals (void)
620 {
621 int i, seen_defun = 0;
622 qsort (globals, num_globals, sizeof (struct global), compare_globals);
623 for (i = 0; i < num_globals; ++i)
624 {
625 char const *type;
626
627 switch (globals[i].type)
628 {
629 case EMACS_INTEGER:
630 type = "EMACS_INT";
631 break;
632 case BOOLEAN:
633 type = "bool";
634 break;
635 case LISP_OBJECT:
636 type = "Lisp_Object";
637 break;
638 case FUNCTION:
639 if (!seen_defun)
640 {
641 close_emacs_globals ();
642 fprintf (outfile, "\n");
643 seen_defun = 1;
644 }
645 break;
646 default:
647 fatal ("not a recognized DEFVAR_", 0);
648 }
649
650 if (globals[i].type != FUNCTION)
651 {
652 fprintf (outfile, " %s f_%s;\n", type, globals[i].name);
653 fprintf (outfile, "#define %s globals.f_%s\n",
654 globals[i].name, globals[i].name);
655 }
656 else
657 {
658 /* It would be nice to have a cleaner way to deal with these
659 special hacks. */
660 if (strcmp (globals[i].name, "Fthrow") == 0
661 || strcmp (globals[i].name, "Ftop_level") == 0
662 || strcmp (globals[i].name, "Fkill_emacs") == 0
663 || strcmp (globals[i].name, "Fexit_recursive_edit") == 0
664 || strcmp (globals[i].name, "Fabort_recursive_edit") == 0)
665 fprintf (outfile, "_Noreturn ");
666 fprintf (outfile, "EXFUN (%s, ", globals[i].name);
667 if (globals[i].value == -1)
668 fprintf (outfile, "MANY");
669 else if (globals[i].value == -2)
670 fprintf (outfile, "UNEVALLED");
671 else
672 fprintf (outfile, "%d", globals[i].value);
673 fprintf (outfile, ");\n");
674 }
675
676 while (i + 1 < num_globals
677 && !strcmp (globals[i].name, globals[i + 1].name))
678 {
679 if (globals[i].type == FUNCTION
680 && globals[i].value != globals[i + 1].value)
681 error ("function '%s' defined twice with differing signatures",
682 globals[i].name);
683 ++i;
684 }
685 }
686
687 if (!seen_defun)
688 close_emacs_globals ();
689 }
690
691 \f
692 /* Read through a c file. If a .o file is named,
693 the corresponding .c or .m file is read instead.
694 Looks for DEFUN constructs such as are defined in ../src/lisp.h.
695 Accepts any word starting DEF... so it finds DEFSIMPLE and DEFPRED. */
696
697 static int
698 scan_c_file (char *filename, const char *mode)
699 {
700 FILE *infile;
701 register int c;
702 register int commas;
703 int minargs, maxargs;
704 int extension = filename[strlen (filename) - 1];
705
706 if (extension == 'o')
707 filename[strlen (filename) - 1] = 'c';
708
709 infile = fopen (filename, mode);
710
711 if (infile == NULL && extension == 'o')
712 {
713 /* Try .m. */
714 filename[strlen (filename) - 1] = 'm';
715 infile = fopen (filename, mode);
716 if (infile == NULL)
717 filename[strlen (filename) - 1] = 'c'; /* Don't confuse people. */
718 }
719
720 /* No error if non-ex input file. */
721 if (infile == NULL)
722 {
723 perror (filename);
724 return 0;
725 }
726
727 /* Reset extension to be able to detect duplicate files. */
728 filename[strlen (filename) - 1] = extension;
729
730 c = '\n';
731 while (!feof (infile))
732 {
733 int doc_keyword = 0;
734 int defunflag = 0;
735 int defvarperbufferflag = 0;
736 int defvarflag = 0;
737 enum global_type type = INVALID;
738 char *name IF_LINT (= 0);
739
740 if (c != '\n' && c != '\r')
741 {
742 c = getc (infile);
743 continue;
744 }
745 c = getc (infile);
746 if (c == ' ')
747 {
748 while (c == ' ')
749 c = getc (infile);
750 if (c != 'D')
751 continue;
752 c = getc (infile);
753 if (c != 'E')
754 continue;
755 c = getc (infile);
756 if (c != 'F')
757 continue;
758 c = getc (infile);
759 if (c != 'V')
760 continue;
761 c = getc (infile);
762 if (c != 'A')
763 continue;
764 c = getc (infile);
765 if (c != 'R')
766 continue;
767 c = getc (infile);
768 if (c != '_')
769 continue;
770
771 defvarflag = 1;
772
773 c = getc (infile);
774 defvarperbufferflag = (c == 'P');
775 if (generate_globals)
776 {
777 if (c == 'I')
778 type = EMACS_INTEGER;
779 else if (c == 'L')
780 type = LISP_OBJECT;
781 else if (c == 'B')
782 type = BOOLEAN;
783 }
784
785 c = getc (infile);
786 /* We need to distinguish between DEFVAR_BOOL and
787 DEFVAR_BUFFER_DEFAULTS. */
788 if (generate_globals && type == BOOLEAN && c != 'O')
789 type = INVALID;
790 }
791 else if (c == 'D')
792 {
793 c = getc (infile);
794 if (c != 'E')
795 continue;
796 c = getc (infile);
797 if (c != 'F')
798 continue;
799 c = getc (infile);
800 defunflag = c == 'U';
801 }
802 else continue;
803
804 if (generate_globals
805 && (!defvarflag || defvarperbufferflag || type == INVALID)
806 && !defunflag)
807 continue;
808
809 while (c != '(')
810 {
811 if (c < 0)
812 goto eof;
813 c = getc (infile);
814 }
815
816 /* Lisp variable or function name. */
817 c = getc (infile);
818 if (c != '"')
819 continue;
820 c = read_c_string_or_comment (infile, -1, 0, 0);
821
822 if (generate_globals)
823 {
824 int i = 0;
825
826 /* Skip "," and whitespace. */
827 do
828 {
829 c = getc (infile);
830 }
831 while (c == ',' || c == ' ' || c == '\t' || c == '\n' || c == '\r');
832
833 /* Read in the identifier. */
834 do
835 {
836 input_buffer[i++] = c;
837 c = getc (infile);
838 }
839 while (! (c == ',' || c == ' ' || c == '\t'
840 || c == '\n' || c == '\r'));
841 input_buffer[i] = '\0';
842
843 name = xmalloc (i + 1);
844 memcpy (name, input_buffer, i + 1);
845
846 if (!defunflag)
847 {
848 add_global (type, name, 0);
849 continue;
850 }
851 }
852
853 /* DEFVAR_LISP ("name", addr, "doc")
854 DEFVAR_LISP ("name", addr /\* doc *\/)
855 DEFVAR_LISP ("name", addr, doc: /\* doc *\/) */
856
857 if (defunflag)
858 commas = generate_globals ? 4 : 5;
859 else if (defvarperbufferflag)
860 commas = 3;
861 else if (defvarflag)
862 commas = 1;
863 else /* For DEFSIMPLE and DEFPRED. */
864 commas = 2;
865
866 while (commas)
867 {
868 if (c == ',')
869 {
870 commas--;
871
872 if (defunflag && (commas == 1 || commas == 2))
873 {
874 int scanned = 0;
875 do
876 c = getc (infile);
877 while (c == ' ' || c == '\n' || c == '\r' || c == '\t');
878 if (c < 0)
879 goto eof;
880 ungetc (c, infile);
881 if (commas == 2) /* Pick up minargs. */
882 scanned = fscanf (infile, "%d", &minargs);
883 else /* Pick up maxargs. */
884 if (c == 'M' || c == 'U') /* MANY || UNEVALLED */
885 {
886 if (generate_globals)
887 maxargs = (c == 'M') ? -1 : -2;
888 else
889 maxargs = -1;
890 }
891 else
892 scanned = fscanf (infile, "%d", &maxargs);
893 if (scanned < 0)
894 goto eof;
895 }
896 }
897
898 if (c == EOF)
899 goto eof;
900 c = getc (infile);
901 }
902
903 if (generate_globals)
904 {
905 add_global (FUNCTION, name, maxargs);
906 continue;
907 }
908
909 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
910 c = getc (infile);
911
912 if (c == '"')
913 c = read_c_string_or_comment (infile, 0, 0, 0);
914
915 while (c != EOF && c != ',' && c != '/')
916 c = getc (infile);
917 if (c == ',')
918 {
919 c = getc (infile);
920 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
921 c = getc (infile);
922 while ((c >= 'a' && c <= 'z') || (c >= 'Z' && c <= 'Z'))
923 c = getc (infile);
924 if (c == ':')
925 {
926 doc_keyword = 1;
927 c = getc (infile);
928 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
929 c = getc (infile);
930 }
931 }
932
933 if (c == '"'
934 || (c == '/'
935 && (c = getc (infile),
936 ungetc (c, infile),
937 c == '*')))
938 {
939 int comment = c != '"';
940 int saw_usage;
941
942 putc (037, outfile);
943 putc (defvarflag ? 'V' : 'F', outfile);
944 fprintf (outfile, "%s\n", input_buffer);
945
946 if (comment)
947 getc (infile); /* Skip past `*'. */
948 c = read_c_string_or_comment (infile, 1, comment, &saw_usage);
949
950 /* If this is a defun, find the arguments and print them. If
951 this function takes MANY or UNEVALLED args, then the C source
952 won't give the names of the arguments, so we shouldn't bother
953 trying to find them.
954
955 Various doc-string styles:
956 0: DEFUN (..., "DOC") (args) [!comment]
957 1: DEFUN (..., /\* DOC *\/ (args)) [comment && !doc_keyword]
958 2: DEFUN (..., doc: /\* DOC *\/) (args) [comment && doc_keyword]
959 */
960 if (defunflag && maxargs != -1 && !saw_usage)
961 {
962 char argbuf[1024], *p = argbuf;
963
964 if (!comment || doc_keyword)
965 while (c != ')')
966 {
967 if (c < 0)
968 goto eof;
969 c = getc (infile);
970 }
971
972 /* Skip into arguments. */
973 while (c != '(')
974 {
975 if (c < 0)
976 goto eof;
977 c = getc (infile);
978 }
979 /* Copy arguments into ARGBUF. */
980 *p++ = c;
981 do
982 *p++ = c = getc (infile);
983 while (c != ')');
984 *p = '\0';
985 /* Output them. */
986 fprintf (outfile, "\n\n");
987 write_c_args (outfile, input_buffer, argbuf, minargs, maxargs);
988 }
989 else if (defunflag && maxargs == -1 && !saw_usage)
990 /* The DOC should provide the usage form. */
991 fprintf (stderr, "Missing `usage' for function `%s'.\n",
992 input_buffer);
993 }
994 }
995 eof:
996 fclose (infile);
997 return 0;
998 }
999 \f
1000 /* Read a file of Lisp code, compiled or interpreted.
1001 Looks for
1002 (defun NAME ARGS DOCSTRING ...)
1003 (defmacro NAME ARGS DOCSTRING ...)
1004 (defsubst NAME ARGS DOCSTRING ...)
1005 (autoload (quote NAME) FILE DOCSTRING ...)
1006 (defvar NAME VALUE DOCSTRING)
1007 (defconst NAME VALUE DOCSTRING)
1008 (fset (quote NAME) (make-byte-code ... DOCSTRING ...))
1009 (fset (quote NAME) #[... DOCSTRING ...])
1010 (defalias (quote NAME) #[... DOCSTRING ...])
1011 (custom-declare-variable (quote NAME) VALUE DOCSTRING ...)
1012 starting in column zero.
1013 (quote NAME) may appear as 'NAME as well.
1014
1015 We also look for #@LENGTH CONTENTS^_ at the beginning of the line.
1016 When we find that, we save it for the following defining-form,
1017 and we use that instead of reading a doc string within that defining-form.
1018
1019 For defvar, defconst, and fset we skip to the docstring with a kludgy
1020 formatting convention: all docstrings must appear on the same line as the
1021 initial open-paren (the one in column zero) and must contain a backslash
1022 and a newline immediately after the initial double-quote. No newlines
1023 must appear between the beginning of the form and the first double-quote.
1024 For defun, defmacro, and autoload, we know how to skip over the
1025 arglist, but the doc string must still have a backslash and newline
1026 immediately after the double quote.
1027 The only source files that must follow this convention are preloaded
1028 uncompiled ones like loaddefs.el; aside from that, it is always the .elc
1029 file that we should look at, and they are no problem because byte-compiler
1030 output follows this convention.
1031 The NAME and DOCSTRING are output.
1032 NAME is preceded by `F' for a function or `V' for a variable.
1033 An entry is output only if DOCSTRING has \ newline just after the opening ".
1034 */
1035
1036 static void
1037 skip_white (FILE *infile)
1038 {
1039 char c = ' ';
1040 while (c == ' ' || c == '\t' || c == '\n' || c == '\r')
1041 c = getc (infile);
1042 ungetc (c, infile);
1043 }
1044
1045 static void
1046 read_lisp_symbol (FILE *infile, char *buffer)
1047 {
1048 char c;
1049 char *fillp = buffer;
1050
1051 skip_white (infile);
1052 while (1)
1053 {
1054 c = getc (infile);
1055 if (c == '\\')
1056 *(++fillp) = getc (infile);
1057 else if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '(' || c == ')')
1058 {
1059 ungetc (c, infile);
1060 *fillp = 0;
1061 break;
1062 }
1063 else
1064 *fillp++ = c;
1065 }
1066
1067 if (! buffer[0])
1068 fprintf (stderr, "## expected a symbol, got '%c'\n", c);
1069
1070 skip_white (infile);
1071 }
1072
1073 static int
1074 search_lisp_doc_at_eol (FILE *infile)
1075 {
1076 char c = 0, c1 = 0, c2 = 0;
1077
1078 /* Skip until the end of line; remember two previous chars. */
1079 while (c != '\n' && c != '\r' && c != EOF)
1080 {
1081 c2 = c1;
1082 c1 = c;
1083 c = getc (infile);
1084 }
1085
1086 /* If two previous characters were " and \,
1087 this is a doc string. Otherwise, there is none. */
1088 if (c2 != '"' || c1 != '\\')
1089 {
1090 #ifdef DEBUG
1091 fprintf (stderr, "## non-docstring in %s (%s)\n",
1092 buffer, filename);
1093 #endif
1094 if (c != EOF)
1095 ungetc (c, infile);
1096 return 0;
1097 }
1098 return 1;
1099 }
1100
1101 static int
1102 scan_lisp_file (const char *filename, const char *mode)
1103 {
1104 FILE *infile;
1105 register int c;
1106 char *saved_string = 0;
1107 /* These are the only files that are loaded uncompiled, and must
1108 follow the conventions of the doc strings expected by this
1109 function. These conventions are automatically followed by the
1110 byte compiler when it produces the .elc files. */
1111 static const char *const uncompiled[] =
1112 {
1113 "loaddefs.el",
1114 "loadup.el",
1115 "charprop.el"
1116 };
1117 int i, match;
1118 size_t flen = strlen (filename);
1119
1120 if (generate_globals)
1121 fatal ("scanning lisp file when -g specified", 0);
1122 if (flen > 3 && !strcmp (filename + flen - 3, ".el"))
1123 {
1124 for (i = 0, match = 0; i < sizeof (uncompiled) / sizeof (uncompiled[0]);
1125 i++)
1126 {
1127 if (strlen (uncompiled[i]) <= flen
1128 && !strcmp (filename + flen - strlen (uncompiled[i]),
1129 uncompiled[i]))
1130 {
1131 match = 1;
1132 break;
1133 }
1134 }
1135 if (!match)
1136 fatal ("uncompiled lisp file %s is not supported", filename);
1137 }
1138
1139 infile = fopen (filename, mode);
1140 if (infile == NULL)
1141 {
1142 perror (filename);
1143 return 0; /* No error. */
1144 }
1145
1146 c = '\n';
1147 while (!feof (infile))
1148 {
1149 char buffer[BUFSIZ];
1150 char type;
1151
1152 /* If not at end of line, skip till we get to one. */
1153 if (c != '\n' && c != '\r')
1154 {
1155 c = getc (infile);
1156 continue;
1157 }
1158 /* Skip the line break. */
1159 while (c == '\n' || c == '\r')
1160 c = getc (infile);
1161 /* Detect a dynamic doc string and save it for the next expression. */
1162 if (c == '#')
1163 {
1164 c = getc (infile);
1165 if (c == '@')
1166 {
1167 size_t length = 0;
1168 size_t i;
1169
1170 /* Read the length. */
1171 while ((c = getc (infile),
1172 c >= '0' && c <= '9'))
1173 {
1174 length *= 10;
1175 length += c - '0';
1176 }
1177
1178 if (length <= 1)
1179 fatal ("invalid dynamic doc string length", "");
1180
1181 if (c != ' ')
1182 fatal ("space not found after dynamic doc string length", "");
1183
1184 /* The next character is a space that is counted in the length
1185 but not part of the doc string.
1186 We already read it, so just ignore it. */
1187 length--;
1188
1189 /* Read in the contents. */
1190 free (saved_string);
1191 saved_string = (char *) xmalloc (length);
1192 for (i = 0; i < length; i++)
1193 saved_string[i] = getc (infile);
1194 /* The last character is a ^_.
1195 That is needed in the .elc file
1196 but it is redundant in DOC. So get rid of it here. */
1197 saved_string[length - 1] = 0;
1198 /* Skip the line break. */
1199 while (c == '\n' || c == '\r')
1200 c = getc (infile);
1201 /* Skip the following line. */
1202 while (c != '\n' && c != '\r')
1203 c = getc (infile);
1204 }
1205 continue;
1206 }
1207
1208 if (c != '(')
1209 continue;
1210
1211 read_lisp_symbol (infile, buffer);
1212
1213 if (! strcmp (buffer, "defun")
1214 || ! strcmp (buffer, "defmacro")
1215 || ! strcmp (buffer, "defsubst"))
1216 {
1217 type = 'F';
1218 read_lisp_symbol (infile, buffer);
1219
1220 /* Skip the arguments: either "nil" or a list in parens. */
1221
1222 c = getc (infile);
1223 if (c == 'n') /* nil */
1224 {
1225 if ((c = getc (infile)) != 'i'
1226 || (c = getc (infile)) != 'l')
1227 {
1228 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
1229 buffer, filename);
1230 continue;
1231 }
1232 }
1233 else if (c != '(')
1234 {
1235 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
1236 buffer, filename);
1237 continue;
1238 }
1239 else
1240 while (c != ')')
1241 c = getc (infile);
1242 skip_white (infile);
1243
1244 /* If the next three characters aren't `dquote bslash newline'
1245 then we're not reading a docstring.
1246 */
1247 if ((c = getc (infile)) != '"'
1248 || (c = getc (infile)) != '\\'
1249 || ((c = getc (infile)) != '\n' && c != '\r'))
1250 {
1251 #ifdef DEBUG
1252 fprintf (stderr, "## non-docstring in %s (%s)\n",
1253 buffer, filename);
1254 #endif
1255 continue;
1256 }
1257 }
1258
1259 /* defcustom can only occur in uncompiled Lisp files. */
1260 else if (! strcmp (buffer, "defvar")
1261 || ! strcmp (buffer, "defconst")
1262 || ! strcmp (buffer, "defcustom"))
1263 {
1264 type = 'V';
1265 read_lisp_symbol (infile, buffer);
1266
1267 if (saved_string == 0)
1268 if (!search_lisp_doc_at_eol (infile))
1269 continue;
1270 }
1271
1272 else if (! strcmp (buffer, "custom-declare-variable")
1273 || ! strcmp (buffer, "defvaralias")
1274 )
1275 {
1276 type = 'V';
1277
1278 c = getc (infile);
1279 if (c == '\'')
1280 read_lisp_symbol (infile, buffer);
1281 else
1282 {
1283 if (c != '(')
1284 {
1285 fprintf (stderr,
1286 "## unparsable name in custom-declare-variable in %s\n",
1287 filename);
1288 continue;
1289 }
1290 read_lisp_symbol (infile, buffer);
1291 if (strcmp (buffer, "quote"))
1292 {
1293 fprintf (stderr,
1294 "## unparsable name in custom-declare-variable in %s\n",
1295 filename);
1296 continue;
1297 }
1298 read_lisp_symbol (infile, buffer);
1299 c = getc (infile);
1300 if (c != ')')
1301 {
1302 fprintf (stderr,
1303 "## unparsable quoted name in custom-declare-variable in %s\n",
1304 filename);
1305 continue;
1306 }
1307 }
1308
1309 if (saved_string == 0)
1310 if (!search_lisp_doc_at_eol (infile))
1311 continue;
1312 }
1313
1314 else if (! strcmp (buffer, "fset") || ! strcmp (buffer, "defalias"))
1315 {
1316 type = 'F';
1317
1318 c = getc (infile);
1319 if (c == '\'')
1320 read_lisp_symbol (infile, buffer);
1321 else
1322 {
1323 if (c != '(')
1324 {
1325 fprintf (stderr, "## unparsable name in fset in %s\n",
1326 filename);
1327 continue;
1328 }
1329 read_lisp_symbol (infile, buffer);
1330 if (strcmp (buffer, "quote"))
1331 {
1332 fprintf (stderr, "## unparsable name in fset in %s\n",
1333 filename);
1334 continue;
1335 }
1336 read_lisp_symbol (infile, buffer);
1337 c = getc (infile);
1338 if (c != ')')
1339 {
1340 fprintf (stderr,
1341 "## unparsable quoted name in fset in %s\n",
1342 filename);
1343 continue;
1344 }
1345 }
1346
1347 if (saved_string == 0)
1348 if (!search_lisp_doc_at_eol (infile))
1349 continue;
1350 }
1351
1352 else if (! strcmp (buffer, "autoload"))
1353 {
1354 type = 'F';
1355 c = getc (infile);
1356 if (c == '\'')
1357 read_lisp_symbol (infile, buffer);
1358 else
1359 {
1360 if (c != '(')
1361 {
1362 fprintf (stderr, "## unparsable name in autoload in %s\n",
1363 filename);
1364 continue;
1365 }
1366 read_lisp_symbol (infile, buffer);
1367 if (strcmp (buffer, "quote"))
1368 {
1369 fprintf (stderr, "## unparsable name in autoload in %s\n",
1370 filename);
1371 continue;
1372 }
1373 read_lisp_symbol (infile, buffer);
1374 c = getc (infile);
1375 if (c != ')')
1376 {
1377 fprintf (stderr,
1378 "## unparsable quoted name in autoload in %s\n",
1379 filename);
1380 continue;
1381 }
1382 }
1383 skip_white (infile);
1384 if ((c = getc (infile)) != '\"')
1385 {
1386 fprintf (stderr, "## autoload of %s unparsable (%s)\n",
1387 buffer, filename);
1388 continue;
1389 }
1390 read_c_string_or_comment (infile, 0, 0, 0);
1391
1392 if (saved_string == 0)
1393 if (!search_lisp_doc_at_eol (infile))
1394 continue;
1395 }
1396
1397 #ifdef DEBUG
1398 else if (! strcmp (buffer, "if")
1399 || ! strcmp (buffer, "byte-code"))
1400 continue;
1401 #endif
1402
1403 else
1404 {
1405 #ifdef DEBUG
1406 fprintf (stderr, "## unrecognized top-level form, %s (%s)\n",
1407 buffer, filename);
1408 #endif
1409 continue;
1410 }
1411
1412 /* At this point, we should either use the previous dynamic doc string in
1413 saved_string or gobble a doc string from the input file.
1414 In the latter case, the opening quote (and leading backslash-newline)
1415 have already been read. */
1416
1417 putc (037, outfile);
1418 putc (type, outfile);
1419 fprintf (outfile, "%s\n", buffer);
1420 if (saved_string)
1421 {
1422 fputs (saved_string, outfile);
1423 /* Don't use one dynamic doc string twice. */
1424 free (saved_string);
1425 saved_string = 0;
1426 }
1427 else
1428 read_c_string_or_comment (infile, 1, 0, 0);
1429 }
1430 fclose (infile);
1431 return 0;
1432 }
1433
1434
1435 /* make-docfile.c ends here */