]> code.delx.au - gnu-emacs/blob - lib-src/etags.c
ea337d4100822c87cff1ddaae9f9cbafef14d8b0
[gnu-emacs] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: utf-8 -*-
2
3 Copyright (C) 1984 The Regents of the University of California
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
17
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2015 Free Software
32 Foundation, Inc.
33
34 This file is not considered part of GNU Emacs.
35
36 This program is free software: you can redistribute it and/or modify
37 it under the terms of the GNU General Public License as published by
38 the Free Software Foundation, either version 3 of the License, or
39 (at your option) any later version.
40
41 This program is distributed in the hope that it will be useful,
42 but WITHOUT ANY WARRANTY; without even the implied warranty of
43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 GNU General Public License for more details.
45
46 You should have received a copy of the GNU General Public License
47 along with this program. If not, see <http://www.gnu.org/licenses/>. */
48
49
50 /* NB To comply with the above BSD license, copyright information is
51 reproduced in etc/ETAGS.README. That file should be updated when the
52 above notices are.
53
54 To the best of our knowledge, this code was originally based on the
55 ctags.c distributed with BSD4.2, which was copyrighted by the
56 University of California, as described above. */
57
58
59 /*
60 * Authors:
61 * 1983 Ctags originally by Ken Arnold.
62 * 1984 Fortran added by Jim Kleckner.
63 * 1984 Ed Pelegri-Llopart added C typedefs.
64 * 1985 Emacs TAGS format by Richard Stallman.
65 * 1989 Sam Kendall added C++.
66 * 1992 Joseph B. Wells improved C and C++ parsing.
67 * 1993 Francesco Potortì reorganized C and C++.
68 * 1994 Line-by-line regexp tags by Tom Tromey.
69 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
70 * 2002 #line directives by Francesco Potortì.
71 * Francesco Potortì maintained and improved it for many years
72 starting in 1993.
73 */
74
75 /*
76 * If you want to add support for a new language, start by looking at the LUA
77 * language, which is the simplest. Alternatively, consider distributing etags
78 * together with a configuration file containing regexp definitions for etags.
79 */
80
81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
82
83 #ifdef DEBUG
84 # undef DEBUG
85 # define DEBUG true
86 #else
87 # define DEBUG false
88 # define NDEBUG /* disable assert */
89 #endif
90
91 #include <config.h>
92
93 #ifndef _GNU_SOURCE
94 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
95 #endif
96
97 /* WIN32_NATIVE is for XEmacs.
98 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
99 #ifdef WIN32_NATIVE
100 # undef MSDOS
101 # undef WINDOWSNT
102 # define WINDOWSNT
103 #endif /* WIN32_NATIVE */
104
105 #ifdef MSDOS
106 # undef MSDOS
107 # define MSDOS true
108 # include <sys/param.h>
109 #else
110 # define MSDOS false
111 #endif /* MSDOS */
112
113 #ifdef WINDOWSNT
114 # include <direct.h>
115 # define MAXPATHLEN _MAX_PATH
116 # undef HAVE_NTGUI
117 # undef DOS_NT
118 # define DOS_NT
119 # define O_CLOEXEC O_NOINHERIT
120 #endif /* WINDOWSNT */
121
122 #include <limits.h>
123 #include <unistd.h>
124 #include <stdarg.h>
125 #include <stdlib.h>
126 #include <string.h>
127 #include <sysstdio.h>
128 #include <errno.h>
129 #include <fcntl.h>
130 #include <sys/types.h>
131 #include <sys/stat.h>
132 #include <binary-io.h>
133 #include <c-ctype.h>
134 #include <c-strcase.h>
135
136 #include <assert.h>
137 #ifdef NDEBUG
138 # undef assert /* some systems have a buggy assert.h */
139 # define assert(x) ((void) 0)
140 #endif
141
142 #include <getopt.h>
143 #include <regex.h>
144
145 /* Define CTAGS to make the program "ctags" compatible with the usual one.
146 Leave it undefined to make the program "etags", which makes emacs-style
147 tag tables and tags typedefs, #defines and struct/union/enum by default. */
148 #ifdef CTAGS
149 # undef CTAGS
150 # define CTAGS true
151 #else
152 # define CTAGS false
153 #endif
154
155 #define streq(s,t) (assert ((s)!=NULL || (t)!=NULL), !strcmp (s, t))
156 #define strcaseeq(s,t) (assert ((s)!=NULL && (t)!=NULL), !c_strcasecmp (s, t))
157 #define strneq(s,t,n) (assert ((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
158 #define strncaseeq(s,t,n) (assert ((s)!=NULL && (t)!=NULL), !c_strncasecmp (s, t, n))
159
160 /* C is not in a name. */
161 static bool
162 notinname (unsigned char c)
163 {
164 /* Look at make_tag before modifying! */
165 static bool const table[UCHAR_MAX + 1] = {
166 ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
167 ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
168 };
169 return table[c];
170 }
171
172 /* C can start a token. */
173 static bool
174 begtoken (unsigned char c)
175 {
176 static bool const table[UCHAR_MAX + 1] = {
177 ['$']=1, ['@']=1,
178 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
179 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
180 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
181 ['Y']=1, ['Z']=1,
182 ['_']=1,
183 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
184 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
185 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
186 ['y']=1, ['z']=1,
187 ['~']=1
188 };
189 return table[c];
190 }
191
192 /* C can be in the middle of a token. */
193 static bool
194 intoken (unsigned char c)
195 {
196 static bool const table[UCHAR_MAX + 1] = {
197 ['$']=1,
198 ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
199 ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
200 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
201 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
202 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
203 ['Y']=1, ['Z']=1,
204 ['_']=1,
205 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
206 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
207 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
208 ['y']=1, ['z']=1
209 };
210 return table[c];
211 }
212
213 /* C can end a token. */
214 static bool
215 endtoken (unsigned char c)
216 {
217 static bool const table[UCHAR_MAX + 1] = {
218 ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
219 ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
220 ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
221 ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
222 ['{']=1, ['|']=1, ['}']=1, ['~']=1
223 };
224 return table[c];
225 }
226
227 /*
228 * xnew, xrnew -- allocate, reallocate storage
229 *
230 * SYNOPSIS: Type *xnew (int n, Type);
231 * void xrnew (OldPointer, int n, Type);
232 */
233 #define xnew(n, Type) ((Type *) xmalloc ((n) * sizeof (Type)))
234 #define xrnew(op, n, Type) ((op) = (Type *) xrealloc (op, (n) * sizeof (Type)))
235
236 typedef void Lang_function (FILE *);
237
238 typedef struct
239 {
240 const char *suffix; /* file name suffix for this compressor */
241 const char *command; /* takes one arg and decompresses to stdout */
242 } compressor;
243
244 typedef struct
245 {
246 const char *name; /* language name */
247 const char *help; /* detailed help for the language */
248 Lang_function *function; /* parse function */
249 const char **suffixes; /* name suffixes of this language's files */
250 const char **filenames; /* names of this language's files */
251 const char **interpreters; /* interpreters for this language */
252 bool metasource; /* source used to generate other sources */
253 } language;
254
255 typedef struct fdesc
256 {
257 struct fdesc *next; /* for the linked list */
258 char *infname; /* uncompressed input file name */
259 char *infabsname; /* absolute uncompressed input file name */
260 char *infabsdir; /* absolute dir of input file */
261 char *taggedfname; /* file name to write in tagfile */
262 language *lang; /* language of file */
263 char *prop; /* file properties to write in tagfile */
264 bool usecharno; /* etags tags shall contain char number */
265 bool written; /* entry written in the tags file */
266 } fdesc;
267
268 typedef struct node_st
269 { /* sorting structure */
270 struct node_st *left, *right; /* left and right sons */
271 fdesc *fdp; /* description of file to whom tag belongs */
272 char *name; /* tag name */
273 char *regex; /* search regexp */
274 bool valid; /* write this tag on the tag file */
275 bool is_func; /* function tag: use regexp in CTAGS mode */
276 bool been_warned; /* warning already given for duplicated tag */
277 int lno; /* line number tag is on */
278 long cno; /* character number line starts on */
279 } node;
280
281 /*
282 * A `linebuffer' is a structure which holds a line of text.
283 * `readline_internal' reads a line from a stream into a linebuffer
284 * and works regardless of the length of the line.
285 * SIZE is the size of BUFFER, LEN is the length of the string in
286 * BUFFER after readline reads it.
287 */
288 typedef struct
289 {
290 long size;
291 int len;
292 char *buffer;
293 } linebuffer;
294
295 /* Used to support mixing of --lang and file names. */
296 typedef struct
297 {
298 enum {
299 at_language, /* a language specification */
300 at_regexp, /* a regular expression */
301 at_filename, /* a file name */
302 at_stdin, /* read from stdin here */
303 at_end /* stop parsing the list */
304 } arg_type; /* argument type */
305 language *lang; /* language associated with the argument */
306 char *what; /* the argument itself */
307 } argument;
308
309 /* Structure defining a regular expression. */
310 typedef struct regexp
311 {
312 struct regexp *p_next; /* pointer to next in list */
313 language *lang; /* if set, use only for this language */
314 char *pattern; /* the regexp pattern */
315 char *name; /* tag name */
316 struct re_pattern_buffer *pat; /* the compiled pattern */
317 struct re_registers regs; /* re registers */
318 bool error_signaled; /* already signaled for this regexp */
319 bool force_explicit_name; /* do not allow implicit tag name */
320 bool ignore_case; /* ignore case when matching */
321 bool multi_line; /* do a multi-line match on the whole file */
322 } regexp;
323
324
325 /* Many compilers barf on this:
326 Lang_function Ada_funcs;
327 so let's write it this way */
328 static void Ada_funcs (FILE *);
329 static void Asm_labels (FILE *);
330 static void C_entries (int c_ext, FILE *);
331 static void default_C_entries (FILE *);
332 static void plain_C_entries (FILE *);
333 static void Cjava_entries (FILE *);
334 static void Cobol_paragraphs (FILE *);
335 static void Cplusplus_entries (FILE *);
336 static void Cstar_entries (FILE *);
337 static void Erlang_functions (FILE *);
338 static void Forth_words (FILE *);
339 static void Fortran_functions (FILE *);
340 static void HTML_labels (FILE *);
341 static void Lisp_functions (FILE *);
342 static void Lua_functions (FILE *);
343 static void Makefile_targets (FILE *);
344 static void Pascal_functions (FILE *);
345 static void Perl_functions (FILE *);
346 static void PHP_functions (FILE *);
347 static void PS_functions (FILE *);
348 static void Prolog_functions (FILE *);
349 static void Python_functions (FILE *);
350 static void Scheme_functions (FILE *);
351 static void TeX_commands (FILE *);
352 static void Texinfo_nodes (FILE *);
353 static void Yacc_entries (FILE *);
354 static void just_read_file (FILE *);
355
356 static language *get_language_from_langname (const char *);
357 static void readline (linebuffer *, FILE *);
358 static long readline_internal (linebuffer *, FILE *);
359 static bool nocase_tail (const char *);
360 static void get_tag (char *, char **);
361
362 static void analyze_regex (char *);
363 static void free_regexps (void);
364 static void regex_tag_multiline (void);
365 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
366 static _Noreturn void suggest_asking_for_help (void);
367 _Noreturn void fatal (const char *, const char *);
368 static _Noreturn void pfatal (const char *);
369 static void add_node (node *, node **);
370
371 static void process_file_name (char *, language *);
372 static void process_file (FILE *, char *, language *);
373 static void find_entries (FILE *);
374 static void free_tree (node *);
375 static void free_fdesc (fdesc *);
376 static void pfnote (char *, bool, char *, int, int, long);
377 static void invalidate_nodes (fdesc *, node **);
378 static void put_entries (node *);
379
380 static char *concat (const char *, const char *, const char *);
381 static char *skip_spaces (char *);
382 static char *skip_non_spaces (char *);
383 static char *skip_name (char *);
384 static char *savenstr (const char *, int);
385 static char *savestr (const char *);
386 static char *etags_getcwd (void);
387 static char *relative_filename (char *, char *);
388 static char *absolute_filename (char *, char *);
389 static char *absolute_dirname (char *, char *);
390 static bool filename_is_absolute (char *f);
391 static void canonicalize_filename (char *);
392 static char *etags_mktmp (void);
393 static void linebuffer_init (linebuffer *);
394 static void linebuffer_setlen (linebuffer *, int);
395 static void *xmalloc (size_t);
396 static void *xrealloc (void *, size_t);
397
398 \f
399 static char searchar = '/'; /* use /.../ searches */
400
401 static char *tagfile; /* output file */
402 static char *progname; /* name this program was invoked with */
403 static char *cwd; /* current working directory */
404 static char *tagfiledir; /* directory of tagfile */
405 static FILE *tagf; /* ioptr for tags file */
406 static ptrdiff_t whatlen_max; /* maximum length of any 'what' member */
407
408 static fdesc *fdhead; /* head of file description list */
409 static fdesc *curfdp; /* current file description */
410 static int lineno; /* line number of current line */
411 static long charno; /* current character number */
412 static long linecharno; /* charno of start of current line */
413 static char *dbp; /* pointer to start of current tag */
414
415 static const int invalidcharno = -1;
416
417 static node *nodehead; /* the head of the binary tree of tags */
418 static node *last_node; /* the last node created */
419
420 static linebuffer lb; /* the current line */
421 static linebuffer filebuf; /* a buffer containing the whole file */
422 static linebuffer token_name; /* a buffer containing a tag name */
423
424 static bool append_to_tagfile; /* -a: append to tags */
425 /* The next five default to true in C and derived languages. */
426 static bool typedefs; /* -t: create tags for C and Ada typedefs */
427 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
428 /* 0 struct/enum/union decls, and C++ */
429 /* member functions. */
430 static bool constantypedefs; /* -d: create tags for C #define, enum */
431 /* constants and variables. */
432 /* -D: opposite of -d. Default under ctags. */
433 static int globals; /* create tags for global variables */
434 static int members; /* create tags for C member variables */
435 static int declarations; /* --declarations: tag them and extern in C&Co*/
436 static int no_line_directive; /* ignore #line directives (undocumented) */
437 static int no_duplicates; /* no duplicate tags for ctags (undocumented) */
438 static bool update; /* -u: update tags */
439 static bool vgrind_style; /* -v: create vgrind style index output */
440 static bool no_warnings; /* -w: suppress warnings (undocumented) */
441 static bool cxref_style; /* -x: create cxref style output */
442 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
443 static bool ignoreindent; /* -I: ignore indentation in C */
444 static int packages_only; /* --packages-only: in Ada, only tag packages*/
445
446 /* STDIN is defined in LynxOS system headers */
447 #ifdef STDIN
448 # undef STDIN
449 #endif
450
451 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
452 static bool parsing_stdin; /* --parse-stdin used */
453
454 static regexp *p_head; /* list of all regexps */
455 static bool need_filebuf; /* some regexes are multi-line */
456
457 static struct option longopts[] =
458 {
459 { "append", no_argument, NULL, 'a' },
460 { "packages-only", no_argument, &packages_only, 1 },
461 { "c++", no_argument, NULL, 'C' },
462 { "declarations", no_argument, &declarations, 1 },
463 { "no-line-directive", no_argument, &no_line_directive, 1 },
464 { "no-duplicates", no_argument, &no_duplicates, 1 },
465 { "help", no_argument, NULL, 'h' },
466 { "help", no_argument, NULL, 'H' },
467 { "ignore-indentation", no_argument, NULL, 'I' },
468 { "language", required_argument, NULL, 'l' },
469 { "members", no_argument, &members, 1 },
470 { "no-members", no_argument, &members, 0 },
471 { "output", required_argument, NULL, 'o' },
472 { "regex", required_argument, NULL, 'r' },
473 { "no-regex", no_argument, NULL, 'R' },
474 { "ignore-case-regex", required_argument, NULL, 'c' },
475 { "parse-stdin", required_argument, NULL, STDIN },
476 { "version", no_argument, NULL, 'V' },
477
478 #if CTAGS /* Ctags options */
479 { "backward-search", no_argument, NULL, 'B' },
480 { "cxref", no_argument, NULL, 'x' },
481 { "defines", no_argument, NULL, 'd' },
482 { "globals", no_argument, &globals, 1 },
483 { "typedefs", no_argument, NULL, 't' },
484 { "typedefs-and-c++", no_argument, NULL, 'T' },
485 { "update", no_argument, NULL, 'u' },
486 { "vgrind", no_argument, NULL, 'v' },
487 { "no-warn", no_argument, NULL, 'w' },
488
489 #else /* Etags options */
490 { "no-defines", no_argument, NULL, 'D' },
491 { "no-globals", no_argument, &globals, 0 },
492 { "include", required_argument, NULL, 'i' },
493 #endif
494 { NULL }
495 };
496
497 static compressor compressors[] =
498 {
499 { "z", "gzip -d -c"},
500 { "Z", "gzip -d -c"},
501 { "gz", "gzip -d -c"},
502 { "GZ", "gzip -d -c"},
503 { "bz2", "bzip2 -d -c" },
504 { "xz", "xz -d -c" },
505 { NULL }
506 };
507
508 /*
509 * Language stuff.
510 */
511
512 /* Ada code */
513 static const char *Ada_suffixes [] =
514 { "ads", "adb", "ada", NULL };
515 static const char Ada_help [] =
516 "In Ada code, functions, procedures, packages, tasks and types are\n\
517 tags. Use the `--packages-only' option to create tags for\n\
518 packages only.\n\
519 Ada tag names have suffixes indicating the type of entity:\n\
520 Entity type: Qualifier:\n\
521 ------------ ----------\n\
522 function /f\n\
523 procedure /p\n\
524 package spec /s\n\
525 package body /b\n\
526 type /t\n\
527 task /k\n\
528 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
529 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
530 will just search for any tag `bidule'.";
531
532 /* Assembly code */
533 static const char *Asm_suffixes [] =
534 { "a", /* Unix assembler */
535 "asm", /* Microcontroller assembly */
536 "def", /* BSO/Tasking definition includes */
537 "inc", /* Microcontroller include files */
538 "ins", /* Microcontroller include files */
539 "s", "sa", /* Unix assembler */
540 "S", /* cpp-processed Unix assembler */
541 "src", /* BSO/Tasking C compiler output */
542 NULL
543 };
544 static const char Asm_help [] =
545 "In assembler code, labels appearing at the beginning of a line,\n\
546 followed by a colon, are tags.";
547
548
549 /* Note that .c and .h can be considered C++, if the --c++ flag was
550 given, or if the `class' or `template' keywords are met inside the file.
551 That is why default_C_entries is called for these. */
552 static const char *default_C_suffixes [] =
553 { "c", "h", NULL };
554 #if CTAGS /* C help for Ctags */
555 static const char default_C_help [] =
556 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
557 Use -T to tag definitions of `struct', `union' and `enum'.\n\
558 Use -d to tag `#define' macro definitions and `enum' constants.\n\
559 Use --globals to tag global variables.\n\
560 You can tag function declarations and external variables by\n\
561 using `--declarations', and struct members by using `--members'.";
562 #else /* C help for Etags */
563 static const char default_C_help [] =
564 "In C code, any C function or typedef is a tag, and so are\n\
565 definitions of `struct', `union' and `enum'. `#define' macro\n\
566 definitions and `enum' constants are tags unless you specify\n\
567 `--no-defines'. Global variables are tags unless you specify\n\
568 `--no-globals' and so are struct members unless you specify\n\
569 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
570 `--no-members' can make the tags table file much smaller.\n\
571 You can tag function declarations and external variables by\n\
572 using `--declarations'.";
573 #endif /* C help for Ctags and Etags */
574
575 static const char *Cplusplus_suffixes [] =
576 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
577 "M", /* Objective C++ */
578 "pdb", /* PostScript with C syntax */
579 NULL };
580 static const char Cplusplus_help [] =
581 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
582 --help --lang=c --lang=c++ for full help.)\n\
583 In addition to C tags, member functions are also recognized. Member\n\
584 variables are recognized unless you use the `--no-members' option.\n\
585 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
586 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
587 `operator+'.";
588
589 static const char *Cjava_suffixes [] =
590 { "java", NULL };
591 static char Cjava_help [] =
592 "In Java code, all the tags constructs of C and C++ code are\n\
593 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
594
595
596 static const char *Cobol_suffixes [] =
597 { "COB", "cob", NULL };
598 static char Cobol_help [] =
599 "In Cobol code, tags are paragraph names; that is, any word\n\
600 starting in column 8 and followed by a period.";
601
602 static const char *Cstar_suffixes [] =
603 { "cs", "hs", NULL };
604
605 static const char *Erlang_suffixes [] =
606 { "erl", "hrl", NULL };
607 static const char Erlang_help [] =
608 "In Erlang code, the tags are the functions, records and macros\n\
609 defined in the file.";
610
611 const char *Forth_suffixes [] =
612 { "fth", "tok", NULL };
613 static const char Forth_help [] =
614 "In Forth code, tags are words defined by `:',\n\
615 constant, code, create, defer, value, variable, buffer:, field.";
616
617 static const char *Fortran_suffixes [] =
618 { "F", "f", "f90", "for", NULL };
619 static const char Fortran_help [] =
620 "In Fortran code, functions, subroutines and block data are tags.";
621
622 static const char *HTML_suffixes [] =
623 { "htm", "html", "shtml", NULL };
624 static const char HTML_help [] =
625 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
626 `h3' headers. Also, tags are `name=' in anchors and all\n\
627 occurrences of `id='.";
628
629 static const char *Lisp_suffixes [] =
630 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
631 static const char Lisp_help [] =
632 "In Lisp code, any function defined with `defun', any variable\n\
633 defined with `defvar' or `defconst', and in general the first\n\
634 argument of any expression that starts with `(def' in column zero\n\
635 is a tag.\n\
636 The `--declarations' option tags \"(defvar foo)\" constructs too.";
637
638 static const char *Lua_suffixes [] =
639 { "lua", "LUA", NULL };
640 static const char Lua_help [] =
641 "In Lua scripts, all functions are tags.";
642
643 static const char *Makefile_filenames [] =
644 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
645 static const char Makefile_help [] =
646 "In makefiles, targets are tags; additionally, variables are tags\n\
647 unless you specify `--no-globals'.";
648
649 static const char *Objc_suffixes [] =
650 { "lm", /* Objective lex file */
651 "m", /* Objective C file */
652 NULL };
653 static const char Objc_help [] =
654 "In Objective C code, tags include Objective C definitions for classes,\n\
655 class categories, methods and protocols. Tags for variables and\n\
656 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
657 (Use --help --lang=c --lang=objc --lang=java for full help.)";
658
659 static const char *Pascal_suffixes [] =
660 { "p", "pas", NULL };
661 static const char Pascal_help [] =
662 "In Pascal code, the tags are the functions and procedures defined\n\
663 in the file.";
664 /* " // this is for working around an Emacs highlighting bug... */
665
666 static const char *Perl_suffixes [] =
667 { "pl", "pm", NULL };
668 static const char *Perl_interpreters [] =
669 { "perl", "@PERL@", NULL };
670 static const char Perl_help [] =
671 "In Perl code, the tags are the packages, subroutines and variables\n\
672 defined by the `package', `sub', `my' and `local' keywords. Use\n\
673 `--globals' if you want to tag global variables. Tags for\n\
674 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
675 defined in the default package is `main::SUB'.";
676
677 static const char *PHP_suffixes [] =
678 { "php", "php3", "php4", NULL };
679 static const char PHP_help [] =
680 "In PHP code, tags are functions, classes and defines. Unless you use\n\
681 the `--no-members' option, vars are tags too.";
682
683 static const char *plain_C_suffixes [] =
684 { "pc", /* Pro*C file */
685 NULL };
686
687 static const char *PS_suffixes [] =
688 { "ps", "psw", NULL }; /* .psw is for PSWrap */
689 static const char PS_help [] =
690 "In PostScript code, the tags are the functions.";
691
692 static const char *Prolog_suffixes [] =
693 { "prolog", NULL };
694 static const char Prolog_help [] =
695 "In Prolog code, tags are predicates and rules at the beginning of\n\
696 line.";
697
698 static const char *Python_suffixes [] =
699 { "py", NULL };
700 static const char Python_help [] =
701 "In Python code, `def' or `class' at the beginning of a line\n\
702 generate a tag.";
703
704 /* Can't do the `SCM' or `scm' prefix with a version number. */
705 static const char *Scheme_suffixes [] =
706 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
707 static const char Scheme_help [] =
708 "In Scheme code, tags include anything defined with `def' or with a\n\
709 construct whose name starts with `def'. They also include\n\
710 variables set with `set!' at top level in the file.";
711
712 static const char *TeX_suffixes [] =
713 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
714 static const char TeX_help [] =
715 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
716 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
717 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
718 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
719 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
720 \n\
721 Other commands can be specified by setting the environment variable\n\
722 `TEXTAGS' to a colon-separated list like, for example,\n\
723 TEXTAGS=\"mycommand:myothercommand\".";
724
725
726 static const char *Texinfo_suffixes [] =
727 { "texi", "texinfo", "txi", NULL };
728 static const char Texinfo_help [] =
729 "for texinfo files, lines starting with @node are tagged.";
730
731 static const char *Yacc_suffixes [] =
732 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
733 static const char Yacc_help [] =
734 "In Bison or Yacc input files, each rule defines as a tag the\n\
735 nonterminal it constructs. The portions of the file that contain\n\
736 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
737 for full help).";
738
739 static const char auto_help [] =
740 "`auto' is not a real language, it indicates to use\n\
741 a default language for files base on file name suffix and file contents.";
742
743 static const char none_help [] =
744 "`none' is not a real language, it indicates to only do\n\
745 regexp processing on files.";
746
747 static const char no_lang_help [] =
748 "No detailed help available for this language.";
749
750
751 /*
752 * Table of languages.
753 *
754 * It is ok for a given function to be listed under more than one
755 * name. I just didn't.
756 */
757
758 static language lang_names [] =
759 {
760 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
761 { "asm", Asm_help, Asm_labels, Asm_suffixes },
762 { "c", default_C_help, default_C_entries, default_C_suffixes },
763 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
764 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
765 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
766 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
767 { "forth", Forth_help, Forth_words, Forth_suffixes },
768 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
769 { "html", HTML_help, HTML_labels, HTML_suffixes },
770 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
771 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
772 { "lua", Lua_help, Lua_functions, Lua_suffixes },
773 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
774 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
775 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
776 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
777 { "php", PHP_help, PHP_functions, PHP_suffixes },
778 { "postscript",PS_help, PS_functions, PS_suffixes },
779 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
780 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
781 { "python", Python_help, Python_functions, Python_suffixes },
782 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
783 { "tex", TeX_help, TeX_commands, TeX_suffixes },
784 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
785 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
786 { "auto", auto_help }, /* default guessing scheme */
787 { "none", none_help, just_read_file }, /* regexp matching only */
788 { NULL } /* end of list */
789 };
790
791 \f
792 static void
793 print_language_names (void)
794 {
795 language *lang;
796 const char **name, **ext;
797
798 puts ("\nThese are the currently supported languages, along with the\n\
799 default file names and dot suffixes:");
800 for (lang = lang_names; lang->name != NULL; lang++)
801 {
802 printf (" %-*s", 10, lang->name);
803 if (lang->filenames != NULL)
804 for (name = lang->filenames; *name != NULL; name++)
805 printf (" %s", *name);
806 if (lang->suffixes != NULL)
807 for (ext = lang->suffixes; *ext != NULL; ext++)
808 printf (" .%s", *ext);
809 puts ("");
810 }
811 puts ("where `auto' means use default language for files based on file\n\
812 name suffix, and `none' means only do regexp processing on files.\n\
813 If no language is specified and no matching suffix is found,\n\
814 the first line of the file is read for a sharp-bang (#!) sequence\n\
815 followed by the name of an interpreter. If no such sequence is found,\n\
816 Fortran is tried first; if no tags are found, C is tried next.\n\
817 When parsing any C file, a \"class\" or \"template\" keyword\n\
818 switches to C++.");
819 puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
820 \n\
821 For detailed help on a given language use, for example,\n\
822 etags --help --lang=ada.");
823 }
824
825 #ifndef EMACS_NAME
826 # define EMACS_NAME "standalone"
827 #endif
828 #ifndef VERSION
829 # define VERSION "17.38.1.4"
830 #endif
831 static _Noreturn void
832 print_version (void)
833 {
834 char emacs_copyright[] = COPYRIGHT;
835
836 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
837 puts (emacs_copyright);
838 puts ("This program is distributed under the terms in ETAGS.README");
839
840 exit (EXIT_SUCCESS);
841 }
842
843 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
844 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
845 #endif
846
847 static _Noreturn void
848 print_help (argument *argbuffer)
849 {
850 bool help_for_lang = false;
851
852 for (; argbuffer->arg_type != at_end; argbuffer++)
853 if (argbuffer->arg_type == at_language)
854 {
855 if (help_for_lang)
856 puts ("");
857 puts (argbuffer->lang->help);
858 help_for_lang = true;
859 }
860
861 if (help_for_lang)
862 exit (EXIT_SUCCESS);
863
864 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
865 \n\
866 These are the options accepted by %s.\n", progname, progname);
867 puts ("You may use unambiguous abbreviations for the long option names.");
868 puts (" A - as file name means read names from stdin (one per line).\n\
869 Absolute names are stored in the output file as they are.\n\
870 Relative ones are stored relative to the output file's directory.\n");
871
872 puts ("-a, --append\n\
873 Append tag entries to existing tags file.");
874
875 puts ("--packages-only\n\
876 For Ada files, only generate tags for packages.");
877
878 if (CTAGS)
879 puts ("-B, --backward-search\n\
880 Write the search commands for the tag entries using '?', the\n\
881 backward-search command instead of '/', the forward-search command.");
882
883 /* This option is mostly obsolete, because etags can now automatically
884 detect C++. Retained for backward compatibility and for debugging and
885 experimentation. In principle, we could want to tag as C++ even
886 before any "class" or "template" keyword.
887 puts ("-C, --c++\n\
888 Treat files whose name suffix defaults to C language as C++ files.");
889 */
890
891 puts ("--declarations\n\
892 In C and derived languages, create tags for function declarations,");
893 if (CTAGS)
894 puts ("\tand create tags for extern variables if --globals is used.");
895 else
896 puts
897 ("\tand create tags for extern variables unless --no-globals is used.");
898
899 if (CTAGS)
900 puts ("-d, --defines\n\
901 Create tag entries for C #define constants and enum constants, too.");
902 else
903 puts ("-D, --no-defines\n\
904 Don't create tag entries for C #define constants and enum constants.\n\
905 This makes the tags file smaller.");
906
907 if (!CTAGS)
908 puts ("-i FILE, --include=FILE\n\
909 Include a note in tag file indicating that, when searching for\n\
910 a tag, one should also consult the tags file FILE after\n\
911 checking the current file.");
912
913 puts ("-l LANG, --language=LANG\n\
914 Force the following files to be considered as written in the\n\
915 named language up to the next --language=LANG option.");
916
917 if (CTAGS)
918 puts ("--globals\n\
919 Create tag entries for global variables in some languages.");
920 else
921 puts ("--no-globals\n\
922 Do not create tag entries for global variables in some\n\
923 languages. This makes the tags file smaller.");
924
925 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
926 puts ("--no-line-directive\n\
927 Ignore #line preprocessor directives in C and derived languages.");
928
929 if (CTAGS)
930 puts ("--members\n\
931 Create tag entries for members of structures in some languages.");
932 else
933 puts ("--no-members\n\
934 Do not create tag entries for members of structures\n\
935 in some languages.");
936
937 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
938 Make a tag for each line matching a regular expression pattern\n\
939 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
940 files only. REGEXFILE is a file containing one REGEXP per line.\n\
941 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
942 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
943 puts (" If TAGNAME/ is present, the tags created are named.\n\
944 For example Tcl named tags can be created with:\n\
945 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
946 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
947 `m' means to allow multi-line matches, `s' implies `m' and\n\
948 causes dot to match any character, including newline.");
949
950 puts ("-R, --no-regex\n\
951 Don't create tags from regexps for the following files.");
952
953 puts ("-I, --ignore-indentation\n\
954 In C and C++ do not assume that a closing brace in the first\n\
955 column is the final brace of a function or structure definition.");
956
957 puts ("-o FILE, --output=FILE\n\
958 Write the tags to FILE.");
959
960 puts ("--parse-stdin=NAME\n\
961 Read from standard input and record tags as belonging to file NAME.");
962
963 if (CTAGS)
964 {
965 puts ("-t, --typedefs\n\
966 Generate tag entries for C and Ada typedefs.");
967 puts ("-T, --typedefs-and-c++\n\
968 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
969 and C++ member functions.");
970 }
971
972 if (CTAGS)
973 puts ("-u, --update\n\
974 Update the tag entries for the given files, leaving tag\n\
975 entries for other files in place. Currently, this is\n\
976 implemented by deleting the existing entries for the given\n\
977 files and then rewriting the new entries at the end of the\n\
978 tags file. It is often faster to simply rebuild the entire\n\
979 tag file than to use this.");
980
981 if (CTAGS)
982 {
983 puts ("-v, --vgrind\n\
984 Print on the standard output an index of items intended for\n\
985 human consumption, similar to the output of vgrind. The index\n\
986 is sorted, and gives the page number of each item.");
987
988 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
989 puts ("-w, --no-duplicates\n\
990 Do not create duplicate tag entries, for compatibility with\n\
991 traditional ctags.");
992
993 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
994 puts ("-w, --no-warn\n\
995 Suppress warning messages about duplicate tag entries.");
996
997 puts ("-x, --cxref\n\
998 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
999 The output uses line numbers instead of page numbers, but\n\
1000 beyond that the differences are cosmetic; try both to see\n\
1001 which you like.");
1002 }
1003
1004 puts ("-V, --version\n\
1005 Print the version of the program.\n\
1006 -h, --help\n\
1007 Print this help message.\n\
1008 Followed by one or more `--language' options prints detailed\n\
1009 help about tag generation for the specified languages.");
1010
1011 print_language_names ();
1012
1013 puts ("");
1014 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1015
1016 exit (EXIT_SUCCESS);
1017 }
1018
1019 \f
1020 int
1021 main (int argc, char **argv)
1022 {
1023 int i;
1024 unsigned int nincluded_files;
1025 char **included_files;
1026 argument *argbuffer;
1027 int current_arg, file_count;
1028 linebuffer filename_lb;
1029 bool help_asked = false;
1030 ptrdiff_t len;
1031 char *optstring;
1032 int opt;
1033
1034 progname = argv[0];
1035 nincluded_files = 0;
1036 included_files = xnew (argc, char *);
1037 current_arg = 0;
1038 file_count = 0;
1039
1040 /* Allocate enough no matter what happens. Overkill, but each one
1041 is small. */
1042 argbuffer = xnew (argc, argument);
1043
1044 /*
1045 * Always find typedefs and structure tags.
1046 * Also default to find macro constants, enum constants, struct
1047 * members and global variables. Do it for both etags and ctags.
1048 */
1049 typedefs = typedefs_or_cplusplus = constantypedefs = true;
1050 globals = members = true;
1051
1052 /* When the optstring begins with a '-' getopt_long does not rearrange the
1053 non-options arguments to be at the end, but leaves them alone. */
1054 optstring = concat ("-ac:Cf:Il:o:r:RSVhH",
1055 (CTAGS) ? "BxdtTuvw" : "Di:",
1056 "");
1057
1058 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1059 switch (opt)
1060 {
1061 case 0:
1062 /* If getopt returns 0, then it has already processed a
1063 long-named option. We should do nothing. */
1064 break;
1065
1066 case 1:
1067 /* This means that a file name has been seen. Record it. */
1068 argbuffer[current_arg].arg_type = at_filename;
1069 argbuffer[current_arg].what = optarg;
1070 len = strlen (optarg);
1071 if (whatlen_max < len)
1072 whatlen_max = len;
1073 ++current_arg;
1074 ++file_count;
1075 break;
1076
1077 case STDIN:
1078 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1079 argbuffer[current_arg].arg_type = at_stdin;
1080 argbuffer[current_arg].what = optarg;
1081 len = strlen (optarg);
1082 if (whatlen_max < len)
1083 whatlen_max = len;
1084 ++current_arg;
1085 ++file_count;
1086 if (parsing_stdin)
1087 fatal ("cannot parse standard input more than once", (char *)NULL);
1088 parsing_stdin = true;
1089 break;
1090
1091 /* Common options. */
1092 case 'a': append_to_tagfile = true; break;
1093 case 'C': cplusplus = true; break;
1094 case 'f': /* for compatibility with old makefiles */
1095 case 'o':
1096 if (tagfile)
1097 {
1098 error ("-o option may only be given once.");
1099 suggest_asking_for_help ();
1100 /* NOTREACHED */
1101 }
1102 tagfile = optarg;
1103 break;
1104 case 'I':
1105 case 'S': /* for backward compatibility */
1106 ignoreindent = true;
1107 break;
1108 case 'l':
1109 {
1110 language *lang = get_language_from_langname (optarg);
1111 if (lang != NULL)
1112 {
1113 argbuffer[current_arg].lang = lang;
1114 argbuffer[current_arg].arg_type = at_language;
1115 ++current_arg;
1116 }
1117 }
1118 break;
1119 case 'c':
1120 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1121 optarg = concat (optarg, "i", ""); /* memory leak here */
1122 /* FALLTHRU */
1123 case 'r':
1124 argbuffer[current_arg].arg_type = at_regexp;
1125 argbuffer[current_arg].what = optarg;
1126 len = strlen (optarg);
1127 if (whatlen_max < len)
1128 whatlen_max = len;
1129 ++current_arg;
1130 break;
1131 case 'R':
1132 argbuffer[current_arg].arg_type = at_regexp;
1133 argbuffer[current_arg].what = NULL;
1134 ++current_arg;
1135 break;
1136 case 'V':
1137 print_version ();
1138 break;
1139 case 'h':
1140 case 'H':
1141 help_asked = true;
1142 break;
1143
1144 /* Etags options */
1145 case 'D': constantypedefs = false; break;
1146 case 'i': included_files[nincluded_files++] = optarg; break;
1147
1148 /* Ctags options. */
1149 case 'B': searchar = '?'; break;
1150 case 'd': constantypedefs = true; break;
1151 case 't': typedefs = true; break;
1152 case 'T': typedefs = typedefs_or_cplusplus = true; break;
1153 case 'u': update = true; break;
1154 case 'v': vgrind_style = true; /*FALLTHRU*/
1155 case 'x': cxref_style = true; break;
1156 case 'w': no_warnings = true; break;
1157 default:
1158 suggest_asking_for_help ();
1159 /* NOTREACHED */
1160 }
1161
1162 /* No more options. Store the rest of arguments. */
1163 for (; optind < argc; optind++)
1164 {
1165 argbuffer[current_arg].arg_type = at_filename;
1166 argbuffer[current_arg].what = argv[optind];
1167 len = strlen (argv[optind]);
1168 if (whatlen_max < len)
1169 whatlen_max = len;
1170 ++current_arg;
1171 ++file_count;
1172 }
1173
1174 argbuffer[current_arg].arg_type = at_end;
1175
1176 if (help_asked)
1177 print_help (argbuffer);
1178 /* NOTREACHED */
1179
1180 if (nincluded_files == 0 && file_count == 0)
1181 {
1182 error ("no input files specified.");
1183 suggest_asking_for_help ();
1184 /* NOTREACHED */
1185 }
1186
1187 if (tagfile == NULL)
1188 tagfile = savestr (CTAGS ? "tags" : "TAGS");
1189 cwd = etags_getcwd (); /* the current working directory */
1190 if (cwd[strlen (cwd) - 1] != '/')
1191 {
1192 char *oldcwd = cwd;
1193 cwd = concat (oldcwd, "/", "");
1194 free (oldcwd);
1195 }
1196
1197 /* Compute base directory for relative file names. */
1198 if (streq (tagfile, "-")
1199 || strneq (tagfile, "/dev/", 5))
1200 tagfiledir = cwd; /* relative file names are relative to cwd */
1201 else
1202 {
1203 canonicalize_filename (tagfile);
1204 tagfiledir = absolute_dirname (tagfile, cwd);
1205 }
1206
1207 linebuffer_init (&lb);
1208 linebuffer_init (&filename_lb);
1209 linebuffer_init (&filebuf);
1210 linebuffer_init (&token_name);
1211
1212 if (!CTAGS)
1213 {
1214 if (streq (tagfile, "-"))
1215 {
1216 tagf = stdout;
1217 SET_BINARY (fileno (stdout));
1218 }
1219 else
1220 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1221 if (tagf == NULL)
1222 pfatal (tagfile);
1223 }
1224
1225 /*
1226 * Loop through files finding functions.
1227 */
1228 for (i = 0; i < current_arg; i++)
1229 {
1230 static language *lang; /* non-NULL if language is forced */
1231 char *this_file;
1232
1233 switch (argbuffer[i].arg_type)
1234 {
1235 case at_language:
1236 lang = argbuffer[i].lang;
1237 break;
1238 case at_regexp:
1239 analyze_regex (argbuffer[i].what);
1240 break;
1241 case at_filename:
1242 this_file = argbuffer[i].what;
1243 /* Input file named "-" means read file names from stdin
1244 (one per line) and use them. */
1245 if (streq (this_file, "-"))
1246 {
1247 if (parsing_stdin)
1248 fatal ("cannot parse standard input AND read file names from it",
1249 (char *)NULL);
1250 while (readline_internal (&filename_lb, stdin) > 0)
1251 process_file_name (filename_lb.buffer, lang);
1252 }
1253 else
1254 process_file_name (this_file, lang);
1255 break;
1256 case at_stdin:
1257 this_file = argbuffer[i].what;
1258 process_file (stdin, this_file, lang);
1259 break;
1260 }
1261 }
1262
1263 free_regexps ();
1264 free (lb.buffer);
1265 free (filebuf.buffer);
1266 free (token_name.buffer);
1267
1268 if (!CTAGS || cxref_style)
1269 {
1270 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1271 put_entries (nodehead);
1272 free_tree (nodehead);
1273 nodehead = NULL;
1274 if (!CTAGS)
1275 {
1276 fdesc *fdp;
1277
1278 /* Output file entries that have no tags. */
1279 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1280 if (!fdp->written)
1281 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1282
1283 while (nincluded_files-- > 0)
1284 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1285
1286 if (fclose (tagf) == EOF)
1287 pfatal (tagfile);
1288 }
1289
1290 exit (EXIT_SUCCESS);
1291 }
1292
1293 /* From here on, we are in (CTAGS && !cxref_style) */
1294 if (update)
1295 {
1296 char *cmd =
1297 xmalloc (strlen (tagfile) + whatlen_max +
1298 sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1299 for (i = 0; i < current_arg; ++i)
1300 {
1301 switch (argbuffer[i].arg_type)
1302 {
1303 case at_filename:
1304 case at_stdin:
1305 break;
1306 default:
1307 continue; /* the for loop */
1308 }
1309 char *z = stpcpy (cmd, "mv ");
1310 z = stpcpy (z, tagfile);
1311 z = stpcpy (z, " OTAGS;fgrep -v '\t");
1312 z = stpcpy (z, argbuffer[i].what);
1313 z = stpcpy (z, "\t' OTAGS >");
1314 z = stpcpy (z, tagfile);
1315 strcpy (z, ";rm OTAGS");
1316 if (system (cmd) != EXIT_SUCCESS)
1317 fatal ("failed to execute shell command", (char *)NULL);
1318 }
1319 free (cmd);
1320 append_to_tagfile = true;
1321 }
1322
1323 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1324 if (tagf == NULL)
1325 pfatal (tagfile);
1326 put_entries (nodehead); /* write all the tags (CTAGS) */
1327 free_tree (nodehead);
1328 nodehead = NULL;
1329 if (fclose (tagf) == EOF)
1330 pfatal (tagfile);
1331
1332 if (CTAGS)
1333 if (append_to_tagfile || update)
1334 {
1335 char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1336 /* Maybe these should be used:
1337 setenv ("LC_COLLATE", "C", 1);
1338 setenv ("LC_ALL", "C", 1); */
1339 char *z = stpcpy (cmd, "sort -u -o ");
1340 z = stpcpy (z, tagfile);
1341 *z++ = ' ';
1342 strcpy (z, tagfile);
1343 exit (system (cmd));
1344 }
1345 return EXIT_SUCCESS;
1346 }
1347
1348
1349 /*
1350 * Return a compressor given the file name. If EXTPTR is non-zero,
1351 * return a pointer into FILE where the compressor-specific
1352 * extension begins. If no compressor is found, NULL is returned
1353 * and EXTPTR is not significant.
1354 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1355 */
1356 static compressor *
1357 get_compressor_from_suffix (char *file, char **extptr)
1358 {
1359 compressor *compr;
1360 char *slash, *suffix;
1361
1362 /* File has been processed by canonicalize_filename,
1363 so we don't need to consider backslashes on DOS_NT. */
1364 slash = strrchr (file, '/');
1365 suffix = strrchr (file, '.');
1366 if (suffix == NULL || suffix < slash)
1367 return NULL;
1368 if (extptr != NULL)
1369 *extptr = suffix;
1370 suffix += 1;
1371 /* Let those poor souls who live with DOS 8+3 file name limits get
1372 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1373 Only the first do loop is run if not MSDOS */
1374 do
1375 {
1376 for (compr = compressors; compr->suffix != NULL; compr++)
1377 if (streq (compr->suffix, suffix))
1378 return compr;
1379 if (!MSDOS)
1380 break; /* do it only once: not really a loop */
1381 if (extptr != NULL)
1382 *extptr = ++suffix;
1383 } while (*suffix != '\0');
1384 return NULL;
1385 }
1386
1387
1388
1389 /*
1390 * Return a language given the name.
1391 */
1392 static language *
1393 get_language_from_langname (const char *name)
1394 {
1395 language *lang;
1396
1397 if (name == NULL)
1398 error ("empty language name");
1399 else
1400 {
1401 for (lang = lang_names; lang->name != NULL; lang++)
1402 if (streq (name, lang->name))
1403 return lang;
1404 error ("unknown language \"%s\"", name);
1405 }
1406
1407 return NULL;
1408 }
1409
1410
1411 /*
1412 * Return a language given the interpreter name.
1413 */
1414 static language *
1415 get_language_from_interpreter (char *interpreter)
1416 {
1417 language *lang;
1418 const char **iname;
1419
1420 if (interpreter == NULL)
1421 return NULL;
1422 for (lang = lang_names; lang->name != NULL; lang++)
1423 if (lang->interpreters != NULL)
1424 for (iname = lang->interpreters; *iname != NULL; iname++)
1425 if (streq (*iname, interpreter))
1426 return lang;
1427
1428 return NULL;
1429 }
1430
1431
1432
1433 /*
1434 * Return a language given the file name.
1435 */
1436 static language *
1437 get_language_from_filename (char *file, int case_sensitive)
1438 {
1439 language *lang;
1440 const char **name, **ext, *suffix;
1441
1442 /* Try whole file name first. */
1443 for (lang = lang_names; lang->name != NULL; lang++)
1444 if (lang->filenames != NULL)
1445 for (name = lang->filenames; *name != NULL; name++)
1446 if ((case_sensitive)
1447 ? streq (*name, file)
1448 : strcaseeq (*name, file))
1449 return lang;
1450
1451 /* If not found, try suffix after last dot. */
1452 suffix = strrchr (file, '.');
1453 if (suffix == NULL)
1454 return NULL;
1455 suffix += 1;
1456 for (lang = lang_names; lang->name != NULL; lang++)
1457 if (lang->suffixes != NULL)
1458 for (ext = lang->suffixes; *ext != NULL; ext++)
1459 if ((case_sensitive)
1460 ? streq (*ext, suffix)
1461 : strcaseeq (*ext, suffix))
1462 return lang;
1463 return NULL;
1464 }
1465
1466 \f
1467 /*
1468 * This routine is called on each file argument.
1469 */
1470 static void
1471 process_file_name (char *file, language *lang)
1472 {
1473 struct stat stat_buf;
1474 FILE *inf;
1475 fdesc *fdp;
1476 compressor *compr;
1477 char *compressed_name, *uncompressed_name;
1478 char *ext, *real_name, *tmp_name;
1479 int retval;
1480
1481 canonicalize_filename (file);
1482 if (streq (file, tagfile) && !streq (tagfile, "-"))
1483 {
1484 error ("skipping inclusion of %s in self.", file);
1485 return;
1486 }
1487 compr = get_compressor_from_suffix (file, &ext);
1488 if (compr)
1489 {
1490 real_name = compressed_name = savestr (file);
1491 uncompressed_name = savenstr (file, ext - file);
1492 }
1493 else
1494 {
1495 compressed_name = NULL;
1496 real_name = uncompressed_name = savestr (file);
1497 }
1498
1499 /* If the canonicalized uncompressed name
1500 has already been dealt with, skip it silently. */
1501 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1502 {
1503 assert (fdp->infname != NULL);
1504 if (streq (uncompressed_name, fdp->infname))
1505 goto cleanup;
1506 }
1507
1508 if (stat (real_name, &stat_buf) != 0)
1509 {
1510 /* Reset real_name and try with a different name. */
1511 real_name = NULL;
1512 if (compressed_name != NULL) /* try with the given suffix */
1513 {
1514 if (stat (uncompressed_name, &stat_buf) == 0)
1515 real_name = uncompressed_name;
1516 }
1517 else /* try all possible suffixes */
1518 {
1519 for (compr = compressors; compr->suffix != NULL; compr++)
1520 {
1521 compressed_name = concat (file, ".", compr->suffix);
1522 if (stat (compressed_name, &stat_buf) != 0)
1523 {
1524 if (MSDOS)
1525 {
1526 char *suf = compressed_name + strlen (file);
1527 size_t suflen = strlen (compr->suffix) + 1;
1528 for ( ; suf[1]; suf++, suflen--)
1529 {
1530 memmove (suf, suf + 1, suflen);
1531 if (stat (compressed_name, &stat_buf) == 0)
1532 {
1533 real_name = compressed_name;
1534 break;
1535 }
1536 }
1537 if (real_name != NULL)
1538 break;
1539 } /* MSDOS */
1540 free (compressed_name);
1541 compressed_name = NULL;
1542 }
1543 else
1544 {
1545 real_name = compressed_name;
1546 break;
1547 }
1548 }
1549 }
1550 if (real_name == NULL)
1551 {
1552 perror (file);
1553 goto cleanup;
1554 }
1555 } /* try with a different name */
1556
1557 if (!S_ISREG (stat_buf.st_mode))
1558 {
1559 error ("skipping %s: it is not a regular file.", real_name);
1560 goto cleanup;
1561 }
1562 if (real_name == compressed_name)
1563 {
1564 tmp_name = etags_mktmp ();
1565 if (!tmp_name)
1566 inf = NULL;
1567 else
1568 {
1569 char *cmd1 = concat (compr->command, " ", real_name);
1570 char *cmd = concat (cmd1, " > ", tmp_name);
1571 free (cmd1);
1572 if (system (cmd) == -1)
1573 inf = NULL;
1574 else
1575 inf = fopen (tmp_name, "r" FOPEN_BINARY);
1576 free (cmd);
1577 }
1578 }
1579 else
1580 inf = fopen (real_name, "r" FOPEN_BINARY);
1581 if (inf == NULL)
1582 {
1583 perror (real_name);
1584 goto cleanup;
1585 }
1586
1587 process_file (inf, uncompressed_name, lang);
1588
1589 retval = fclose (inf);
1590 if (real_name == compressed_name)
1591 {
1592 remove (tmp_name);
1593 free (tmp_name);
1594 }
1595 if (retval < 0)
1596 pfatal (file);
1597
1598 cleanup:
1599 free (compressed_name);
1600 free (uncompressed_name);
1601 last_node = NULL;
1602 curfdp = NULL;
1603 return;
1604 }
1605
1606 static void
1607 process_file (FILE *fh, char *fn, language *lang)
1608 {
1609 static const fdesc emptyfdesc;
1610 fdesc *fdp;
1611
1612 /* Create a new input file description entry. */
1613 fdp = xnew (1, fdesc);
1614 *fdp = emptyfdesc;
1615 fdp->next = fdhead;
1616 fdp->infname = savestr (fn);
1617 fdp->lang = lang;
1618 fdp->infabsname = absolute_filename (fn, cwd);
1619 fdp->infabsdir = absolute_dirname (fn, cwd);
1620 if (filename_is_absolute (fn))
1621 {
1622 /* An absolute file name. Canonicalize it. */
1623 fdp->taggedfname = absolute_filename (fn, NULL);
1624 }
1625 else
1626 {
1627 /* A file name relative to cwd. Make it relative
1628 to the directory of the tags file. */
1629 fdp->taggedfname = relative_filename (fn, tagfiledir);
1630 }
1631 fdp->usecharno = true; /* use char position when making tags */
1632 fdp->prop = NULL;
1633 fdp->written = false; /* not written on tags file yet */
1634
1635 fdhead = fdp;
1636 curfdp = fdhead; /* the current file description */
1637
1638 find_entries (fh);
1639
1640 /* If not Ctags, and if this is not metasource and if it contained no #line
1641 directives, we can write the tags and free all nodes pointing to
1642 curfdp. */
1643 if (!CTAGS
1644 && curfdp->usecharno /* no #line directives in this file */
1645 && !curfdp->lang->metasource)
1646 {
1647 node *np, *prev;
1648
1649 /* Look for the head of the sublist relative to this file. See add_node
1650 for the structure of the node tree. */
1651 prev = NULL;
1652 for (np = nodehead; np != NULL; prev = np, np = np->left)
1653 if (np->fdp == curfdp)
1654 break;
1655
1656 /* If we generated tags for this file, write and delete them. */
1657 if (np != NULL)
1658 {
1659 /* This is the head of the last sublist, if any. The following
1660 instructions depend on this being true. */
1661 assert (np->left == NULL);
1662
1663 assert (fdhead == curfdp);
1664 assert (last_node->fdp == curfdp);
1665 put_entries (np); /* write tags for file curfdp->taggedfname */
1666 free_tree (np); /* remove the written nodes */
1667 if (prev == NULL)
1668 nodehead = NULL; /* no nodes left */
1669 else
1670 prev->left = NULL; /* delete the pointer to the sublist */
1671 }
1672 }
1673 }
1674
1675 /*
1676 * This routine opens the specified file and calls the function
1677 * which finds the function and type definitions.
1678 */
1679 static void
1680 find_entries (FILE *inf)
1681 {
1682 char *cp;
1683 language *lang = curfdp->lang;
1684 Lang_function *parser = NULL;
1685
1686 /* If user specified a language, use it. */
1687 if (lang != NULL && lang->function != NULL)
1688 {
1689 parser = lang->function;
1690 }
1691
1692 /* Else try to guess the language given the file name. */
1693 if (parser == NULL)
1694 {
1695 lang = get_language_from_filename (curfdp->infname, true);
1696 if (lang != NULL && lang->function != NULL)
1697 {
1698 curfdp->lang = lang;
1699 parser = lang->function;
1700 }
1701 }
1702
1703 /* Else look for sharp-bang as the first two characters. */
1704 if (parser == NULL
1705 && readline_internal (&lb, inf) > 0
1706 && lb.len >= 2
1707 && lb.buffer[0] == '#'
1708 && lb.buffer[1] == '!')
1709 {
1710 char *lp;
1711
1712 /* Set lp to point at the first char after the last slash in the
1713 line or, if no slashes, at the first nonblank. Then set cp to
1714 the first successive blank and terminate the string. */
1715 lp = strrchr (lb.buffer+2, '/');
1716 if (lp != NULL)
1717 lp += 1;
1718 else
1719 lp = skip_spaces (lb.buffer + 2);
1720 cp = skip_non_spaces (lp);
1721 *cp = '\0';
1722
1723 if (strlen (lp) > 0)
1724 {
1725 lang = get_language_from_interpreter (lp);
1726 if (lang != NULL && lang->function != NULL)
1727 {
1728 curfdp->lang = lang;
1729 parser = lang->function;
1730 }
1731 }
1732 }
1733
1734 rewind (inf);
1735
1736 /* Else try to guess the language given the case insensitive file name. */
1737 if (parser == NULL)
1738 {
1739 lang = get_language_from_filename (curfdp->infname, false);
1740 if (lang != NULL && lang->function != NULL)
1741 {
1742 curfdp->lang = lang;
1743 parser = lang->function;
1744 }
1745 }
1746
1747 /* Else try Fortran or C. */
1748 if (parser == NULL)
1749 {
1750 node *old_last_node = last_node;
1751
1752 curfdp->lang = get_language_from_langname ("fortran");
1753 find_entries (inf);
1754
1755 if (old_last_node == last_node)
1756 /* No Fortran entries found. Try C. */
1757 {
1758 rewind (inf);
1759 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1760 find_entries (inf);
1761 }
1762 return;
1763 }
1764
1765 if (!no_line_directive
1766 && curfdp->lang != NULL && curfdp->lang->metasource)
1767 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1768 file, or anyway we parsed a file that is automatically generated from
1769 this one. If this is the case, the bingo.c file contained #line
1770 directives that generated tags pointing to this file. Let's delete
1771 them all before parsing this file, which is the real source. */
1772 {
1773 fdesc **fdpp = &fdhead;
1774 while (*fdpp != NULL)
1775 if (*fdpp != curfdp
1776 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1777 /* We found one of those! We must delete both the file description
1778 and all tags referring to it. */
1779 {
1780 fdesc *badfdp = *fdpp;
1781
1782 /* Delete the tags referring to badfdp->taggedfname
1783 that were obtained from badfdp->infname. */
1784 invalidate_nodes (badfdp, &nodehead);
1785
1786 *fdpp = badfdp->next; /* remove the bad description from the list */
1787 free_fdesc (badfdp);
1788 }
1789 else
1790 fdpp = &(*fdpp)->next; /* advance the list pointer */
1791 }
1792
1793 assert (parser != NULL);
1794
1795 /* Generic initializations before reading from file. */
1796 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1797
1798 /* Generic initializations before parsing file with readline. */
1799 lineno = 0; /* reset global line number */
1800 charno = 0; /* reset global char number */
1801 linecharno = 0; /* reset global char number of line start */
1802
1803 parser (inf);
1804
1805 regex_tag_multiline ();
1806 }
1807
1808 \f
1809 /*
1810 * Check whether an implicitly named tag should be created,
1811 * then call `pfnote'.
1812 * NAME is a string that is internally copied by this function.
1813 *
1814 * TAGS format specification
1815 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1816 * The following is explained in some more detail in etc/ETAGS.EBNF.
1817 *
1818 * make_tag creates tags with "implicit tag names" (unnamed tags)
1819 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1820 * 1. NAME does not contain any of the characters in NONAM;
1821 * 2. LINESTART contains name as either a rightmost, or rightmost but
1822 * one character, substring;
1823 * 3. the character, if any, immediately before NAME in LINESTART must
1824 * be a character in NONAM;
1825 * 4. the character, if any, immediately after NAME in LINESTART must
1826 * also be a character in NONAM.
1827 *
1828 * The implementation uses the notinname() macro, which recognizes the
1829 * characters stored in the string `nonam'.
1830 * etags.el needs to use the same characters that are in NONAM.
1831 */
1832 static void
1833 make_tag (const char *name, /* tag name, or NULL if unnamed */
1834 int namelen, /* tag length */
1835 bool is_func, /* tag is a function */
1836 char *linestart, /* start of the line where tag is */
1837 int linelen, /* length of the line where tag is */
1838 int lno, /* line number */
1839 long int cno) /* character number */
1840 {
1841 bool named = (name != NULL && namelen > 0);
1842 char *nname = NULL;
1843
1844 if (!CTAGS && named) /* maybe set named to false */
1845 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1846 such that etags.el can guess a name from it. */
1847 {
1848 int i;
1849 register const char *cp = name;
1850
1851 for (i = 0; i < namelen; i++)
1852 if (notinname (*cp++))
1853 break;
1854 if (i == namelen) /* rule #1 */
1855 {
1856 cp = linestart + linelen - namelen;
1857 if (notinname (linestart[linelen-1]))
1858 cp -= 1; /* rule #4 */
1859 if (cp >= linestart /* rule #2 */
1860 && (cp == linestart
1861 || notinname (cp[-1])) /* rule #3 */
1862 && strneq (name, cp, namelen)) /* rule #2 */
1863 named = false; /* use implicit tag name */
1864 }
1865 }
1866
1867 if (named)
1868 nname = savenstr (name, namelen);
1869
1870 pfnote (nname, is_func, linestart, linelen, lno, cno);
1871 }
1872
1873 /* Record a tag. */
1874 static void
1875 pfnote (char *name, bool is_func, char *linestart, int linelen, int lno,
1876 long int cno)
1877 /* tag name, or NULL if unnamed */
1878 /* tag is a function */
1879 /* start of the line where tag is */
1880 /* length of the line where tag is */
1881 /* line number */
1882 /* character number */
1883 {
1884 register node *np;
1885
1886 assert (name == NULL || name[0] != '\0');
1887 if (CTAGS && name == NULL)
1888 return;
1889
1890 np = xnew (1, node);
1891
1892 /* If ctags mode, change name "main" to M<thisfilename>. */
1893 if (CTAGS && !cxref_style && streq (name, "main"))
1894 {
1895 char *fp = strrchr (curfdp->taggedfname, '/');
1896 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1897 fp = strrchr (np->name, '.');
1898 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1899 fp[0] = '\0';
1900 }
1901 else
1902 np->name = name;
1903 np->valid = true;
1904 np->been_warned = false;
1905 np->fdp = curfdp;
1906 np->is_func = is_func;
1907 np->lno = lno;
1908 if (np->fdp->usecharno)
1909 /* Our char numbers are 0-base, because of C language tradition?
1910 ctags compatibility? old versions compatibility? I don't know.
1911 Anyway, since emacs's are 1-base we expect etags.el to take care
1912 of the difference. If we wanted to have 1-based numbers, we would
1913 uncomment the +1 below. */
1914 np->cno = cno /* + 1 */ ;
1915 else
1916 np->cno = invalidcharno;
1917 np->left = np->right = NULL;
1918 if (CTAGS && !cxref_style)
1919 {
1920 if (strlen (linestart) < 50)
1921 np->regex = concat (linestart, "$", "");
1922 else
1923 np->regex = savenstr (linestart, 50);
1924 }
1925 else
1926 np->regex = savenstr (linestart, linelen);
1927
1928 add_node (np, &nodehead);
1929 }
1930
1931 /*
1932 * free_tree ()
1933 * recurse on left children, iterate on right children.
1934 */
1935 static void
1936 free_tree (register node *np)
1937 {
1938 while (np)
1939 {
1940 register node *node_right = np->right;
1941 free_tree (np->left);
1942 free (np->name);
1943 free (np->regex);
1944 free (np);
1945 np = node_right;
1946 }
1947 }
1948
1949 /*
1950 * free_fdesc ()
1951 * delete a file description
1952 */
1953 static void
1954 free_fdesc (register fdesc *fdp)
1955 {
1956 free (fdp->infname);
1957 free (fdp->infabsname);
1958 free (fdp->infabsdir);
1959 free (fdp->taggedfname);
1960 free (fdp->prop);
1961 free (fdp);
1962 }
1963
1964 /*
1965 * add_node ()
1966 * Adds a node to the tree of nodes. In etags mode, sort by file
1967 * name. In ctags mode, sort by tag name. Make no attempt at
1968 * balancing.
1969 *
1970 * add_node is the only function allowed to add nodes, so it can
1971 * maintain state.
1972 */
1973 static void
1974 add_node (node *np, node **cur_node_p)
1975 {
1976 register int dif;
1977 register node *cur_node = *cur_node_p;
1978
1979 if (cur_node == NULL)
1980 {
1981 *cur_node_p = np;
1982 last_node = np;
1983 return;
1984 }
1985
1986 if (!CTAGS)
1987 /* Etags Mode */
1988 {
1989 /* For each file name, tags are in a linked sublist on the right
1990 pointer. The first tags of different files are a linked list
1991 on the left pointer. last_node points to the end of the last
1992 used sublist. */
1993 if (last_node != NULL && last_node->fdp == np->fdp)
1994 {
1995 /* Let's use the same sublist as the last added node. */
1996 assert (last_node->right == NULL);
1997 last_node->right = np;
1998 last_node = np;
1999 }
2000 else if (cur_node->fdp == np->fdp)
2001 {
2002 /* Scanning the list we found the head of a sublist which is
2003 good for us. Let's scan this sublist. */
2004 add_node (np, &cur_node->right);
2005 }
2006 else
2007 /* The head of this sublist is not good for us. Let's try the
2008 next one. */
2009 add_node (np, &cur_node->left);
2010 } /* if ETAGS mode */
2011
2012 else
2013 {
2014 /* Ctags Mode */
2015 dif = strcmp (np->name, cur_node->name);
2016
2017 /*
2018 * If this tag name matches an existing one, then
2019 * do not add the node, but maybe print a warning.
2020 */
2021 if (no_duplicates && !dif)
2022 {
2023 if (np->fdp == cur_node->fdp)
2024 {
2025 if (!no_warnings)
2026 {
2027 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2028 np->fdp->infname, lineno, np->name);
2029 fprintf (stderr, "Second entry ignored\n");
2030 }
2031 }
2032 else if (!cur_node->been_warned && !no_warnings)
2033 {
2034 fprintf
2035 (stderr,
2036 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2037 np->fdp->infname, cur_node->fdp->infname, np->name);
2038 cur_node->been_warned = true;
2039 }
2040 return;
2041 }
2042
2043 /* Actually add the node */
2044 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2045 } /* if CTAGS mode */
2046 }
2047
2048 /*
2049 * invalidate_nodes ()
2050 * Scan the node tree and invalidate all nodes pointing to the
2051 * given file description (CTAGS case) or free them (ETAGS case).
2052 */
2053 static void
2054 invalidate_nodes (fdesc *badfdp, node **npp)
2055 {
2056 node *np = *npp;
2057
2058 if (np == NULL)
2059 return;
2060
2061 if (CTAGS)
2062 {
2063 if (np->left != NULL)
2064 invalidate_nodes (badfdp, &np->left);
2065 if (np->fdp == badfdp)
2066 np->valid = false;
2067 if (np->right != NULL)
2068 invalidate_nodes (badfdp, &np->right);
2069 }
2070 else
2071 {
2072 assert (np->fdp != NULL);
2073 if (np->fdp == badfdp)
2074 {
2075 *npp = np->left; /* detach the sublist from the list */
2076 np->left = NULL; /* isolate it */
2077 free_tree (np); /* free it */
2078 invalidate_nodes (badfdp, npp);
2079 }
2080 else
2081 invalidate_nodes (badfdp, &np->left);
2082 }
2083 }
2084
2085 \f
2086 static int total_size_of_entries (node *);
2087 static int number_len (long) ATTRIBUTE_CONST;
2088
2089 /* Length of a non-negative number's decimal representation. */
2090 static int
2091 number_len (long int num)
2092 {
2093 int len = 1;
2094 while ((num /= 10) > 0)
2095 len += 1;
2096 return len;
2097 }
2098
2099 /*
2100 * Return total number of characters that put_entries will output for
2101 * the nodes in the linked list at the right of the specified node.
2102 * This count is irrelevant with etags.el since emacs 19.34 at least,
2103 * but is still supplied for backward compatibility.
2104 */
2105 static int
2106 total_size_of_entries (register node *np)
2107 {
2108 register int total = 0;
2109
2110 for (; np != NULL; np = np->right)
2111 if (np->valid)
2112 {
2113 total += strlen (np->regex) + 1; /* pat\177 */
2114 if (np->name != NULL)
2115 total += strlen (np->name) + 1; /* name\001 */
2116 total += number_len ((long) np->lno) + 1; /* lno, */
2117 if (np->cno != invalidcharno) /* cno */
2118 total += number_len (np->cno);
2119 total += 1; /* newline */
2120 }
2121
2122 return total;
2123 }
2124
2125 static void
2126 put_entries (register node *np)
2127 {
2128 register char *sp;
2129 static fdesc *fdp = NULL;
2130
2131 if (np == NULL)
2132 return;
2133
2134 /* Output subentries that precede this one */
2135 if (CTAGS)
2136 put_entries (np->left);
2137
2138 /* Output this entry */
2139 if (np->valid)
2140 {
2141 if (!CTAGS)
2142 {
2143 /* Etags mode */
2144 if (fdp != np->fdp)
2145 {
2146 fdp = np->fdp;
2147 fprintf (tagf, "\f\n%s,%d\n",
2148 fdp->taggedfname, total_size_of_entries (np));
2149 fdp->written = true;
2150 }
2151 fputs (np->regex, tagf);
2152 fputc ('\177', tagf);
2153 if (np->name != NULL)
2154 {
2155 fputs (np->name, tagf);
2156 fputc ('\001', tagf);
2157 }
2158 fprintf (tagf, "%d,", np->lno);
2159 if (np->cno != invalidcharno)
2160 fprintf (tagf, "%ld", np->cno);
2161 fputs ("\n", tagf);
2162 }
2163 else
2164 {
2165 /* Ctags mode */
2166 if (np->name == NULL)
2167 error ("internal error: NULL name in ctags mode.");
2168
2169 if (cxref_style)
2170 {
2171 if (vgrind_style)
2172 fprintf (stdout, "%s %s %d\n",
2173 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2174 else
2175 fprintf (stdout, "%-16s %3d %-16s %s\n",
2176 np->name, np->lno, np->fdp->taggedfname, np->regex);
2177 }
2178 else
2179 {
2180 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2181
2182 if (np->is_func)
2183 { /* function or #define macro with args */
2184 putc (searchar, tagf);
2185 putc ('^', tagf);
2186
2187 for (sp = np->regex; *sp; sp++)
2188 {
2189 if (*sp == '\\' || *sp == searchar)
2190 putc ('\\', tagf);
2191 putc (*sp, tagf);
2192 }
2193 putc (searchar, tagf);
2194 }
2195 else
2196 { /* anything else; text pattern inadequate */
2197 fprintf (tagf, "%d", np->lno);
2198 }
2199 putc ('\n', tagf);
2200 }
2201 }
2202 } /* if this node contains a valid tag */
2203
2204 /* Output subentries that follow this one */
2205 put_entries (np->right);
2206 if (!CTAGS)
2207 put_entries (np->left);
2208 }
2209
2210 \f
2211 /* C extensions. */
2212 #define C_EXT 0x00fff /* C extensions */
2213 #define C_PLAIN 0x00000 /* C */
2214 #define C_PLPL 0x00001 /* C++ */
2215 #define C_STAR 0x00003 /* C* */
2216 #define C_JAVA 0x00005 /* JAVA */
2217 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2218 #define YACC 0x10000 /* yacc file */
2219
2220 /*
2221 * The C symbol tables.
2222 */
2223 enum sym_type
2224 {
2225 st_none,
2226 st_C_objprot, st_C_objimpl, st_C_objend,
2227 st_C_gnumacro,
2228 st_C_ignore, st_C_attribute,
2229 st_C_javastruct,
2230 st_C_operator,
2231 st_C_class, st_C_template,
2232 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2233 };
2234
2235 /* Feed stuff between (but not including) %[ and %] lines to:
2236 gperf -m 5
2237 %[
2238 %compare-strncmp
2239 %enum
2240 %struct-type
2241 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2242 %%
2243 if, 0, st_C_ignore
2244 for, 0, st_C_ignore
2245 while, 0, st_C_ignore
2246 switch, 0, st_C_ignore
2247 return, 0, st_C_ignore
2248 __attribute__, 0, st_C_attribute
2249 GTY, 0, st_C_attribute
2250 @interface, 0, st_C_objprot
2251 @protocol, 0, st_C_objprot
2252 @implementation,0, st_C_objimpl
2253 @end, 0, st_C_objend
2254 import, (C_JAVA & ~C_PLPL), st_C_ignore
2255 package, (C_JAVA & ~C_PLPL), st_C_ignore
2256 friend, C_PLPL, st_C_ignore
2257 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2258 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2259 interface, (C_JAVA & ~C_PLPL), st_C_struct
2260 class, 0, st_C_class
2261 namespace, C_PLPL, st_C_struct
2262 domain, C_STAR, st_C_struct
2263 union, 0, st_C_struct
2264 struct, 0, st_C_struct
2265 extern, 0, st_C_extern
2266 enum, 0, st_C_enum
2267 typedef, 0, st_C_typedef
2268 define, 0, st_C_define
2269 undef, 0, st_C_define
2270 operator, C_PLPL, st_C_operator
2271 template, 0, st_C_template
2272 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2273 DEFUN, 0, st_C_gnumacro
2274 SYSCALL, 0, st_C_gnumacro
2275 ENTRY, 0, st_C_gnumacro
2276 PSEUDO, 0, st_C_gnumacro
2277 # These are defined inside C functions, so currently they are not met.
2278 # EXFUN used in glibc, DEFVAR_* in emacs.
2279 #EXFUN, 0, st_C_gnumacro
2280 #DEFVAR_, 0, st_C_gnumacro
2281 %]
2282 and replace lines between %< and %> with its output, then:
2283 - remove the #if characterset check
2284 - make in_word_set static and not inline. */
2285 /*%<*/
2286 /* C code produced by gperf version 3.0.1 */
2287 /* Command-line: gperf -m 5 */
2288 /* Computed positions: -k'2-3' */
2289
2290 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2291 /* maximum key range = 33, duplicates = 0 */
2292
2293 static int
2294 hash (const char *str, int len)
2295 {
2296 static char const asso_values[] =
2297 {
2298 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2299 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2300 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2301 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2302 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2303 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2304 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2305 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2306 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2307 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2308 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2309 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2310 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2311 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2312 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2313 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2314 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2315 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2316 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2317 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2318 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2319 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2320 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2321 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2322 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2323 35, 35, 35, 35, 35, 35
2324 };
2325 int hval = len;
2326
2327 switch (hval)
2328 {
2329 default:
2330 hval += asso_values[(unsigned char) str[2]];
2331 /*FALLTHROUGH*/
2332 case 2:
2333 hval += asso_values[(unsigned char) str[1]];
2334 break;
2335 }
2336 return hval;
2337 }
2338
2339 static struct C_stab_entry *
2340 in_word_set (register const char *str, register unsigned int len)
2341 {
2342 enum
2343 {
2344 TOTAL_KEYWORDS = 33,
2345 MIN_WORD_LENGTH = 2,
2346 MAX_WORD_LENGTH = 15,
2347 MIN_HASH_VALUE = 2,
2348 MAX_HASH_VALUE = 34
2349 };
2350
2351 static struct C_stab_entry wordlist[] =
2352 {
2353 {""}, {""},
2354 {"if", 0, st_C_ignore},
2355 {"GTY", 0, st_C_attribute},
2356 {"@end", 0, st_C_objend},
2357 {"union", 0, st_C_struct},
2358 {"define", 0, st_C_define},
2359 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2360 {"template", 0, st_C_template},
2361 {"operator", C_PLPL, st_C_operator},
2362 {"@interface", 0, st_C_objprot},
2363 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2364 {"friend", C_PLPL, st_C_ignore},
2365 {"typedef", 0, st_C_typedef},
2366 {"return", 0, st_C_ignore},
2367 {"@implementation",0, st_C_objimpl},
2368 {"@protocol", 0, st_C_objprot},
2369 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2370 {"extern", 0, st_C_extern},
2371 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2372 {"struct", 0, st_C_struct},
2373 {"domain", C_STAR, st_C_struct},
2374 {"switch", 0, st_C_ignore},
2375 {"enum", 0, st_C_enum},
2376 {"for", 0, st_C_ignore},
2377 {"namespace", C_PLPL, st_C_struct},
2378 {"class", 0, st_C_class},
2379 {"while", 0, st_C_ignore},
2380 {"undef", 0, st_C_define},
2381 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2382 {"__attribute__", 0, st_C_attribute},
2383 {"SYSCALL", 0, st_C_gnumacro},
2384 {"ENTRY", 0, st_C_gnumacro},
2385 {"PSEUDO", 0, st_C_gnumacro},
2386 {"DEFUN", 0, st_C_gnumacro}
2387 };
2388
2389 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2390 {
2391 int key = hash (str, len);
2392
2393 if (key <= MAX_HASH_VALUE && key >= 0)
2394 {
2395 const char *s = wordlist[key].name;
2396
2397 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2398 return &wordlist[key];
2399 }
2400 }
2401 return 0;
2402 }
2403 /*%>*/
2404
2405 static enum sym_type
2406 C_symtype (char *str, int len, int c_ext)
2407 {
2408 register struct C_stab_entry *se = in_word_set (str, len);
2409
2410 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2411 return st_none;
2412 return se->type;
2413 }
2414
2415 \f
2416 /*
2417 * Ignoring __attribute__ ((list))
2418 */
2419 static bool inattribute; /* looking at an __attribute__ construct */
2420
2421 /*
2422 * C functions and variables are recognized using a simple
2423 * finite automaton. fvdef is its state variable.
2424 */
2425 static enum
2426 {
2427 fvnone, /* nothing seen */
2428 fdefunkey, /* Emacs DEFUN keyword seen */
2429 fdefunname, /* Emacs DEFUN name seen */
2430 foperator, /* func: operator keyword seen (cplpl) */
2431 fvnameseen, /* function or variable name seen */
2432 fstartlist, /* func: just after open parenthesis */
2433 finlist, /* func: in parameter list */
2434 flistseen, /* func: after parameter list */
2435 fignore, /* func: before open brace */
2436 vignore /* var-like: ignore until ';' */
2437 } fvdef;
2438
2439 static bool fvextern; /* func or var: extern keyword seen; */
2440
2441 /*
2442 * typedefs are recognized using a simple finite automaton.
2443 * typdef is its state variable.
2444 */
2445 static enum
2446 {
2447 tnone, /* nothing seen */
2448 tkeyseen, /* typedef keyword seen */
2449 ttypeseen, /* defined type seen */
2450 tinbody, /* inside typedef body */
2451 tend, /* just before typedef tag */
2452 tignore /* junk after typedef tag */
2453 } typdef;
2454
2455 /*
2456 * struct-like structures (enum, struct and union) are recognized
2457 * using another simple finite automaton. `structdef' is its state
2458 * variable.
2459 */
2460 static enum
2461 {
2462 snone, /* nothing seen yet,
2463 or in struct body if bracelev > 0 */
2464 skeyseen, /* struct-like keyword seen */
2465 stagseen, /* struct-like tag seen */
2466 scolonseen /* colon seen after struct-like tag */
2467 } structdef;
2468
2469 /*
2470 * When objdef is different from onone, objtag is the name of the class.
2471 */
2472 static const char *objtag = "<uninited>";
2473
2474 /*
2475 * Yet another little state machine to deal with preprocessor lines.
2476 */
2477 static enum
2478 {
2479 dnone, /* nothing seen */
2480 dsharpseen, /* '#' seen as first char on line */
2481 ddefineseen, /* '#' and 'define' seen */
2482 dignorerest /* ignore rest of line */
2483 } definedef;
2484
2485 /*
2486 * State machine for Objective C protocols and implementations.
2487 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2488 */
2489 static enum
2490 {
2491 onone, /* nothing seen */
2492 oprotocol, /* @interface or @protocol seen */
2493 oimplementation, /* @implementations seen */
2494 otagseen, /* class name seen */
2495 oparenseen, /* parenthesis before category seen */
2496 ocatseen, /* category name seen */
2497 oinbody, /* in @implementation body */
2498 omethodsign, /* in @implementation body, after +/- */
2499 omethodtag, /* after method name */
2500 omethodcolon, /* after method colon */
2501 omethodparm, /* after method parameter */
2502 oignore /* wait for @end */
2503 } objdef;
2504
2505
2506 /*
2507 * Use this structure to keep info about the token read, and how it
2508 * should be tagged. Used by the make_C_tag function to build a tag.
2509 */
2510 static struct tok
2511 {
2512 char *line; /* string containing the token */
2513 int offset; /* where the token starts in LINE */
2514 int length; /* token length */
2515 /*
2516 The previous members can be used to pass strings around for generic
2517 purposes. The following ones specifically refer to creating tags. In this
2518 case the token contained here is the pattern that will be used to create a
2519 tag.
2520 */
2521 bool valid; /* do not create a tag; the token should be
2522 invalidated whenever a state machine is
2523 reset prematurely */
2524 bool named; /* create a named tag */
2525 int lineno; /* source line number of tag */
2526 long linepos; /* source char number of tag */
2527 } token; /* latest token read */
2528
2529 /*
2530 * Variables and functions for dealing with nested structures.
2531 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2532 */
2533 static void pushclass_above (int, char *, int);
2534 static void popclass_above (int);
2535 static void write_classname (linebuffer *, const char *qualifier);
2536
2537 static struct {
2538 char **cname; /* nested class names */
2539 int *bracelev; /* nested class brace level */
2540 int nl; /* class nesting level (elements used) */
2541 int size; /* length of the array */
2542 } cstack; /* stack for nested declaration tags */
2543 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2544 #define nestlev (cstack.nl)
2545 /* After struct keyword or in struct body, not inside a nested function. */
2546 #define instruct (structdef == snone && nestlev > 0 \
2547 && bracelev == cstack.bracelev[nestlev-1] + 1)
2548
2549 static void
2550 pushclass_above (int bracelev, char *str, int len)
2551 {
2552 int nl;
2553
2554 popclass_above (bracelev);
2555 nl = cstack.nl;
2556 if (nl >= cstack.size)
2557 {
2558 int size = cstack.size *= 2;
2559 xrnew (cstack.cname, size, char *);
2560 xrnew (cstack.bracelev, size, int);
2561 }
2562 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2563 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2564 cstack.bracelev[nl] = bracelev;
2565 cstack.nl = nl + 1;
2566 }
2567
2568 static void
2569 popclass_above (int bracelev)
2570 {
2571 int nl;
2572
2573 for (nl = cstack.nl - 1;
2574 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2575 nl--)
2576 {
2577 free (cstack.cname[nl]);
2578 cstack.nl = nl;
2579 }
2580 }
2581
2582 static void
2583 write_classname (linebuffer *cn, const char *qualifier)
2584 {
2585 int i, len;
2586 int qlen = strlen (qualifier);
2587
2588 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2589 {
2590 len = 0;
2591 cn->len = 0;
2592 cn->buffer[0] = '\0';
2593 }
2594 else
2595 {
2596 len = strlen (cstack.cname[0]);
2597 linebuffer_setlen (cn, len);
2598 strcpy (cn->buffer, cstack.cname[0]);
2599 }
2600 for (i = 1; i < cstack.nl; i++)
2601 {
2602 char *s = cstack.cname[i];
2603 if (s == NULL)
2604 continue;
2605 linebuffer_setlen (cn, len + qlen + strlen (s));
2606 len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2607 }
2608 }
2609
2610 \f
2611 static bool consider_token (char *, int, int, int *, int, int, bool *);
2612 static void make_C_tag (bool);
2613
2614 /*
2615 * consider_token ()
2616 * checks to see if the current token is at the start of a
2617 * function or variable, or corresponds to a typedef, or
2618 * is a struct/union/enum tag, or #define, or an enum constant.
2619 *
2620 * *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2621 * with args. C_EXTP points to which language we are looking at.
2622 *
2623 * Globals
2624 * fvdef IN OUT
2625 * structdef IN OUT
2626 * definedef IN OUT
2627 * typdef IN OUT
2628 * objdef IN OUT
2629 */
2630
2631 static bool
2632 consider_token (char *str, int len, int c, int *c_extp,
2633 int bracelev, int parlev, bool *is_func_or_var)
2634 /* IN: token pointer */
2635 /* IN: token length */
2636 /* IN: first char after the token */
2637 /* IN, OUT: C extensions mask */
2638 /* IN: brace level */
2639 /* IN: parenthesis level */
2640 /* OUT: function or variable found */
2641 {
2642 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2643 structtype is the type of the preceding struct-like keyword, and
2644 structbracelev is the brace level where it has been seen. */
2645 static enum sym_type structtype;
2646 static int structbracelev;
2647 static enum sym_type toktype;
2648
2649
2650 toktype = C_symtype (str, len, *c_extp);
2651
2652 /*
2653 * Skip __attribute__
2654 */
2655 if (toktype == st_C_attribute)
2656 {
2657 inattribute = true;
2658 return false;
2659 }
2660
2661 /*
2662 * Advance the definedef state machine.
2663 */
2664 switch (definedef)
2665 {
2666 case dnone:
2667 /* We're not on a preprocessor line. */
2668 if (toktype == st_C_gnumacro)
2669 {
2670 fvdef = fdefunkey;
2671 return false;
2672 }
2673 break;
2674 case dsharpseen:
2675 if (toktype == st_C_define)
2676 {
2677 definedef = ddefineseen;
2678 }
2679 else
2680 {
2681 definedef = dignorerest;
2682 }
2683 return false;
2684 case ddefineseen:
2685 /*
2686 * Make a tag for any macro, unless it is a constant
2687 * and constantypedefs is false.
2688 */
2689 definedef = dignorerest;
2690 *is_func_or_var = (c == '(');
2691 if (!*is_func_or_var && !constantypedefs)
2692 return false;
2693 else
2694 return true;
2695 case dignorerest:
2696 return false;
2697 default:
2698 error ("internal error: definedef value.");
2699 }
2700
2701 /*
2702 * Now typedefs
2703 */
2704 switch (typdef)
2705 {
2706 case tnone:
2707 if (toktype == st_C_typedef)
2708 {
2709 if (typedefs)
2710 typdef = tkeyseen;
2711 fvextern = false;
2712 fvdef = fvnone;
2713 return false;
2714 }
2715 break;
2716 case tkeyseen:
2717 switch (toktype)
2718 {
2719 case st_none:
2720 case st_C_class:
2721 case st_C_struct:
2722 case st_C_enum:
2723 typdef = ttypeseen;
2724 }
2725 break;
2726 case ttypeseen:
2727 if (structdef == snone && fvdef == fvnone)
2728 {
2729 fvdef = fvnameseen;
2730 return true;
2731 }
2732 break;
2733 case tend:
2734 switch (toktype)
2735 {
2736 case st_C_class:
2737 case st_C_struct:
2738 case st_C_enum:
2739 return false;
2740 }
2741 return true;
2742 }
2743
2744 switch (toktype)
2745 {
2746 case st_C_javastruct:
2747 if (structdef == stagseen)
2748 structdef = scolonseen;
2749 return false;
2750 case st_C_template:
2751 case st_C_class:
2752 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2753 && bracelev == 0
2754 && definedef == dnone && structdef == snone
2755 && typdef == tnone && fvdef == fvnone)
2756 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2757 if (toktype == st_C_template)
2758 break;
2759 /* FALLTHRU */
2760 case st_C_struct:
2761 case st_C_enum:
2762 if (parlev == 0
2763 && fvdef != vignore
2764 && (typdef == tkeyseen
2765 || (typedefs_or_cplusplus && structdef == snone)))
2766 {
2767 structdef = skeyseen;
2768 structtype = toktype;
2769 structbracelev = bracelev;
2770 if (fvdef == fvnameseen)
2771 fvdef = fvnone;
2772 }
2773 return false;
2774 }
2775
2776 if (structdef == skeyseen)
2777 {
2778 structdef = stagseen;
2779 return true;
2780 }
2781
2782 if (typdef != tnone)
2783 definedef = dnone;
2784
2785 /* Detect Objective C constructs. */
2786 switch (objdef)
2787 {
2788 case onone:
2789 switch (toktype)
2790 {
2791 case st_C_objprot:
2792 objdef = oprotocol;
2793 return false;
2794 case st_C_objimpl:
2795 objdef = oimplementation;
2796 return false;
2797 }
2798 break;
2799 case oimplementation:
2800 /* Save the class tag for functions or variables defined inside. */
2801 objtag = savenstr (str, len);
2802 objdef = oinbody;
2803 return false;
2804 case oprotocol:
2805 /* Save the class tag for categories. */
2806 objtag = savenstr (str, len);
2807 objdef = otagseen;
2808 *is_func_or_var = true;
2809 return true;
2810 case oparenseen:
2811 objdef = ocatseen;
2812 *is_func_or_var = true;
2813 return true;
2814 case oinbody:
2815 break;
2816 case omethodsign:
2817 if (parlev == 0)
2818 {
2819 fvdef = fvnone;
2820 objdef = omethodtag;
2821 linebuffer_setlen (&token_name, len);
2822 memcpy (token_name.buffer, str, len);
2823 token_name.buffer[len] = '\0';
2824 return true;
2825 }
2826 return false;
2827 case omethodcolon:
2828 if (parlev == 0)
2829 objdef = omethodparm;
2830 return false;
2831 case omethodparm:
2832 if (parlev == 0)
2833 {
2834 int oldlen = token_name.len;
2835 fvdef = fvnone;
2836 objdef = omethodtag;
2837 linebuffer_setlen (&token_name, oldlen + len);
2838 memcpy (token_name.buffer + oldlen, str, len);
2839 token_name.buffer[oldlen + len] = '\0';
2840 return true;
2841 }
2842 return false;
2843 case oignore:
2844 if (toktype == st_C_objend)
2845 {
2846 /* Memory leakage here: the string pointed by objtag is
2847 never released, because many tests would be needed to
2848 avoid breaking on incorrect input code. The amount of
2849 memory leaked here is the sum of the lengths of the
2850 class tags.
2851 free (objtag); */
2852 objdef = onone;
2853 }
2854 return false;
2855 }
2856
2857 /* A function, variable or enum constant? */
2858 switch (toktype)
2859 {
2860 case st_C_extern:
2861 fvextern = true;
2862 switch (fvdef)
2863 {
2864 case finlist:
2865 case flistseen:
2866 case fignore:
2867 case vignore:
2868 break;
2869 default:
2870 fvdef = fvnone;
2871 }
2872 return false;
2873 case st_C_ignore:
2874 fvextern = false;
2875 fvdef = vignore;
2876 return false;
2877 case st_C_operator:
2878 fvdef = foperator;
2879 *is_func_or_var = true;
2880 return true;
2881 case st_none:
2882 if (constantypedefs
2883 && structdef == snone
2884 && structtype == st_C_enum && bracelev > structbracelev
2885 /* Don't tag tokens in expressions that assign values to enum
2886 constants. */
2887 && fvdef != vignore)
2888 return true; /* enum constant */
2889 switch (fvdef)
2890 {
2891 case fdefunkey:
2892 if (bracelev > 0)
2893 break;
2894 fvdef = fdefunname; /* GNU macro */
2895 *is_func_or_var = true;
2896 return true;
2897 case fvnone:
2898 switch (typdef)
2899 {
2900 case ttypeseen:
2901 return false;
2902 case tnone:
2903 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2904 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2905 {
2906 fvdef = vignore;
2907 return false;
2908 }
2909 break;
2910 }
2911 /* FALLTHRU */
2912 case fvnameseen:
2913 if (len >= 10 && strneq (str+len-10, "::operator", 10))
2914 {
2915 if (*c_extp & C_AUTO) /* automatic detection of C++ */
2916 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2917 fvdef = foperator;
2918 *is_func_or_var = true;
2919 return true;
2920 }
2921 if (bracelev > 0 && !instruct)
2922 break;
2923 fvdef = fvnameseen; /* function or variable */
2924 *is_func_or_var = true;
2925 return true;
2926 }
2927 break;
2928 }
2929
2930 return false;
2931 }
2932
2933 \f
2934 /*
2935 * C_entries often keeps pointers to tokens or lines which are older than
2936 * the line currently read. By keeping two line buffers, and switching
2937 * them at end of line, it is possible to use those pointers.
2938 */
2939 static struct
2940 {
2941 long linepos;
2942 linebuffer lb;
2943 } lbs[2];
2944
2945 #define current_lb_is_new (newndx == curndx)
2946 #define switch_line_buffers() (curndx = 1 - curndx)
2947
2948 #define curlb (lbs[curndx].lb)
2949 #define newlb (lbs[newndx].lb)
2950 #define curlinepos (lbs[curndx].linepos)
2951 #define newlinepos (lbs[newndx].linepos)
2952
2953 #define plainc ((c_ext & C_EXT) == C_PLAIN)
2954 #define cplpl (c_ext & C_PLPL)
2955 #define cjava ((c_ext & C_JAVA) == C_JAVA)
2956
2957 #define CNL_SAVE_DEFINEDEF() \
2958 do { \
2959 curlinepos = charno; \
2960 readline (&curlb, inf); \
2961 lp = curlb.buffer; \
2962 quotednl = false; \
2963 newndx = curndx; \
2964 } while (0)
2965
2966 #define CNL() \
2967 do { \
2968 CNL_SAVE_DEFINEDEF(); \
2969 if (savetoken.valid) \
2970 { \
2971 token = savetoken; \
2972 savetoken.valid = false; \
2973 } \
2974 definedef = dnone; \
2975 } while (0)
2976
2977
2978 static void
2979 make_C_tag (bool isfun)
2980 {
2981 /* This function is never called when token.valid is false, but
2982 we must protect against invalid input or internal errors. */
2983 if (token.valid)
2984 make_tag (token_name.buffer, token_name.len, isfun, token.line,
2985 token.offset+token.length+1, token.lineno, token.linepos);
2986 else if (DEBUG)
2987 { /* this branch is optimized away if !DEBUG */
2988 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
2989 token_name.len + 17, isfun, token.line,
2990 token.offset+token.length+1, token.lineno, token.linepos);
2991 error ("INVALID TOKEN");
2992 }
2993
2994 token.valid = false;
2995 }
2996
2997
2998 /*
2999 * C_entries ()
3000 * This routine finds functions, variables, typedefs,
3001 * #define's, enum constants and struct/union/enum definitions in
3002 * C syntax and adds them to the list.
3003 */
3004 static void
3005 C_entries (int c_ext, FILE *inf)
3006 /* extension of C */
3007 /* input file */
3008 {
3009 register char c; /* latest char read; '\0' for end of line */
3010 register char *lp; /* pointer one beyond the character `c' */
3011 int curndx, newndx; /* indices for current and new lb */
3012 register int tokoff; /* offset in line of start of current token */
3013 register int toklen; /* length of current token */
3014 const char *qualifier; /* string used to qualify names */
3015 int qlen; /* length of qualifier */
3016 int bracelev; /* current brace level */
3017 int bracketlev; /* current bracket level */
3018 int parlev; /* current parenthesis level */
3019 int attrparlev; /* __attribute__ parenthesis level */
3020 int templatelev; /* current template level */
3021 int typdefbracelev; /* bracelev where a typedef struct body begun */
3022 bool incomm, inquote, inchar, quotednl, midtoken;
3023 bool yacc_rules; /* in the rules part of a yacc file */
3024 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3025
3026
3027 linebuffer_init (&lbs[0].lb);
3028 linebuffer_init (&lbs[1].lb);
3029 if (cstack.size == 0)
3030 {
3031 cstack.size = (DEBUG) ? 1 : 4;
3032 cstack.nl = 0;
3033 cstack.cname = xnew (cstack.size, char *);
3034 cstack.bracelev = xnew (cstack.size, int);
3035 }
3036
3037 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3038 curndx = newndx = 0;
3039 lp = curlb.buffer;
3040 *lp = 0;
3041
3042 fvdef = fvnone; fvextern = false; typdef = tnone;
3043 structdef = snone; definedef = dnone; objdef = onone;
3044 yacc_rules = false;
3045 midtoken = inquote = inchar = incomm = quotednl = false;
3046 token.valid = savetoken.valid = false;
3047 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3048 if (cjava)
3049 { qualifier = "."; qlen = 1; }
3050 else
3051 { qualifier = "::"; qlen = 2; }
3052
3053
3054 while (!feof (inf))
3055 {
3056 c = *lp++;
3057 if (c == '\\')
3058 {
3059 /* If we are at the end of the line, the next character is a
3060 '\0'; do not skip it, because it is what tells us
3061 to read the next line. */
3062 if (*lp == '\0')
3063 {
3064 quotednl = true;
3065 continue;
3066 }
3067 lp++;
3068 c = ' ';
3069 }
3070 else if (incomm)
3071 {
3072 switch (c)
3073 {
3074 case '*':
3075 if (*lp == '/')
3076 {
3077 c = *lp++;
3078 incomm = false;
3079 }
3080 break;
3081 case '\0':
3082 /* Newlines inside comments do not end macro definitions in
3083 traditional cpp. */
3084 CNL_SAVE_DEFINEDEF ();
3085 break;
3086 }
3087 continue;
3088 }
3089 else if (inquote)
3090 {
3091 switch (c)
3092 {
3093 case '"':
3094 inquote = false;
3095 break;
3096 case '\0':
3097 /* Newlines inside strings do not end macro definitions
3098 in traditional cpp, even though compilers don't
3099 usually accept them. */
3100 CNL_SAVE_DEFINEDEF ();
3101 break;
3102 }
3103 continue;
3104 }
3105 else if (inchar)
3106 {
3107 switch (c)
3108 {
3109 case '\0':
3110 /* Hmmm, something went wrong. */
3111 CNL ();
3112 /* FALLTHRU */
3113 case '\'':
3114 inchar = false;
3115 break;
3116 }
3117 continue;
3118 }
3119 else switch (c)
3120 {
3121 case '"':
3122 inquote = true;
3123 if (bracketlev > 0)
3124 continue;
3125 if (inattribute)
3126 break;
3127 switch (fvdef)
3128 {
3129 case fdefunkey:
3130 case fstartlist:
3131 case finlist:
3132 case fignore:
3133 case vignore:
3134 break;
3135 default:
3136 fvextern = false;
3137 fvdef = fvnone;
3138 }
3139 continue;
3140 case '\'':
3141 inchar = true;
3142 if (bracketlev > 0)
3143 continue;
3144 if (inattribute)
3145 break;
3146 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3147 {
3148 fvextern = false;
3149 fvdef = fvnone;
3150 }
3151 continue;
3152 case '/':
3153 if (*lp == '*')
3154 {
3155 incomm = true;
3156 lp++;
3157 c = ' ';
3158 if (bracketlev > 0)
3159 continue;
3160 }
3161 else if (/* cplpl && */ *lp == '/')
3162 {
3163 c = '\0';
3164 }
3165 break;
3166 case '%':
3167 if ((c_ext & YACC) && *lp == '%')
3168 {
3169 /* Entering or exiting rules section in yacc file. */
3170 lp++;
3171 definedef = dnone; fvdef = fvnone; fvextern = false;
3172 typdef = tnone; structdef = snone;
3173 midtoken = inquote = inchar = incomm = quotednl = false;
3174 bracelev = 0;
3175 yacc_rules = !yacc_rules;
3176 continue;
3177 }
3178 else
3179 break;
3180 case '#':
3181 if (definedef == dnone)
3182 {
3183 char *cp;
3184 bool cpptoken = true;
3185
3186 /* Look back on this line. If all blanks, or nonblanks
3187 followed by an end of comment, this is a preprocessor
3188 token. */
3189 for (cp = newlb.buffer; cp < lp-1; cp++)
3190 if (!c_isspace (*cp))
3191 {
3192 if (*cp == '*' && cp[1] == '/')
3193 {
3194 cp++;
3195 cpptoken = true;
3196 }
3197 else
3198 cpptoken = false;
3199 }
3200 if (cpptoken)
3201 {
3202 definedef = dsharpseen;
3203 /* This is needed for tagging enum values: when there are
3204 preprocessor conditionals inside the enum, we need to
3205 reset the value of fvdef so that the next enum value is
3206 tagged even though the one before it did not end in a
3207 comma. */
3208 if (fvdef == vignore && instruct && parlev == 0)
3209 {
3210 if (strneq (cp, "#if", 3) || strneq (cp, "#el", 3))
3211 fvdef = fvnone;
3212 }
3213 }
3214 } /* if (definedef == dnone) */
3215 continue;
3216 case '[':
3217 bracketlev++;
3218 continue;
3219 default:
3220 if (bracketlev > 0)
3221 {
3222 if (c == ']')
3223 --bracketlev;
3224 else if (c == '\0')
3225 CNL_SAVE_DEFINEDEF ();
3226 continue;
3227 }
3228 break;
3229 } /* switch (c) */
3230
3231
3232 /* Consider token only if some involved conditions are satisfied. */
3233 if (typdef != tignore
3234 && definedef != dignorerest
3235 && fvdef != finlist
3236 && templatelev == 0
3237 && (definedef != dnone
3238 || structdef != scolonseen)
3239 && !inattribute)
3240 {
3241 if (midtoken)
3242 {
3243 if (endtoken (c))
3244 {
3245 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3246 /* This handles :: in the middle,
3247 but not at the beginning of an identifier.
3248 Also, space-separated :: is not recognized. */
3249 {
3250 if (c_ext & C_AUTO) /* automatic detection of C++ */
3251 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3252 lp += 2;
3253 toklen += 2;
3254 c = lp[-1];
3255 goto still_in_token;
3256 }
3257 else
3258 {
3259 bool funorvar = false;
3260
3261 if (yacc_rules
3262 || consider_token (newlb.buffer + tokoff, toklen, c,
3263 &c_ext, bracelev, parlev,
3264 &funorvar))
3265 {
3266 if (fvdef == foperator)
3267 {
3268 char *oldlp = lp;
3269 lp = skip_spaces (lp-1);
3270 if (*lp != '\0')
3271 lp += 1;
3272 while (*lp != '\0'
3273 && !c_isspace (*lp) && *lp != '(')
3274 lp += 1;
3275 c = *lp++;
3276 toklen += lp - oldlp;
3277 }
3278 token.named = false;
3279 if (!plainc
3280 && nestlev > 0 && definedef == dnone)
3281 /* in struct body */
3282 {
3283 int len;
3284 write_classname (&token_name, qualifier);
3285 len = token_name.len;
3286 linebuffer_setlen (&token_name, len+qlen+toklen);
3287 sprintf (token_name.buffer + len, "%s%.*s",
3288 qualifier, toklen, newlb.buffer + tokoff);
3289 token.named = true;
3290 }
3291 else if (objdef == ocatseen)
3292 /* Objective C category */
3293 {
3294 int len = strlen (objtag) + 2 + toklen;
3295 linebuffer_setlen (&token_name, len);
3296 sprintf (token_name.buffer, "%s(%.*s)",
3297 objtag, toklen, newlb.buffer + tokoff);
3298 token.named = true;
3299 }
3300 else if (objdef == omethodtag
3301 || objdef == omethodparm)
3302 /* Objective C method */
3303 {
3304 token.named = true;
3305 }
3306 else if (fvdef == fdefunname)
3307 /* GNU DEFUN and similar macros */
3308 {
3309 bool defun = (newlb.buffer[tokoff] == 'F');
3310 int off = tokoff;
3311 int len = toklen;
3312
3313 /* Rewrite the tag so that emacs lisp DEFUNs
3314 can be found by their elisp name */
3315 if (defun)
3316 {
3317 off += 1;
3318 len -= 1;
3319 }
3320 linebuffer_setlen (&token_name, len);
3321 memcpy (token_name.buffer,
3322 newlb.buffer + off, len);
3323 token_name.buffer[len] = '\0';
3324 if (defun)
3325 while (--len >= 0)
3326 if (token_name.buffer[len] == '_')
3327 token_name.buffer[len] = '-';
3328 token.named = defun;
3329 }
3330 else
3331 {
3332 linebuffer_setlen (&token_name, toklen);
3333 memcpy (token_name.buffer,
3334 newlb.buffer + tokoff, toklen);
3335 token_name.buffer[toklen] = '\0';
3336 /* Name macros and members. */
3337 token.named = (structdef == stagseen
3338 || typdef == ttypeseen
3339 || typdef == tend
3340 || (funorvar
3341 && definedef == dignorerest)
3342 || (funorvar
3343 && definedef == dnone
3344 && structdef == snone
3345 && bracelev > 0));
3346 }
3347 token.lineno = lineno;
3348 token.offset = tokoff;
3349 token.length = toklen;
3350 token.line = newlb.buffer;
3351 token.linepos = newlinepos;
3352 token.valid = true;
3353
3354 if (definedef == dnone
3355 && (fvdef == fvnameseen
3356 || fvdef == foperator
3357 || structdef == stagseen
3358 || typdef == tend
3359 || typdef == ttypeseen
3360 || objdef != onone))
3361 {
3362 if (current_lb_is_new)
3363 switch_line_buffers ();
3364 }
3365 else if (definedef != dnone
3366 || fvdef == fdefunname
3367 || instruct)
3368 make_C_tag (funorvar);
3369 }
3370 else /* not yacc and consider_token failed */
3371 {
3372 if (inattribute && fvdef == fignore)
3373 {
3374 /* We have just met __attribute__ after a
3375 function parameter list: do not tag the
3376 function again. */
3377 fvdef = fvnone;
3378 }
3379 }
3380 midtoken = false;
3381 }
3382 } /* if (endtoken (c)) */
3383 else if (intoken (c))
3384 still_in_token:
3385 {
3386 toklen++;
3387 continue;
3388 }
3389 } /* if (midtoken) */
3390 else if (begtoken (c))
3391 {
3392 switch (definedef)
3393 {
3394 case dnone:
3395 switch (fvdef)
3396 {
3397 case fstartlist:
3398 /* This prevents tagging fb in
3399 void (__attribute__((noreturn)) *fb) (void);
3400 Fixing this is not easy and not very important. */
3401 fvdef = finlist;
3402 continue;
3403 case flistseen:
3404 if (plainc || declarations)
3405 {
3406 make_C_tag (true); /* a function */
3407 fvdef = fignore;
3408 }
3409 break;
3410 }
3411 if (structdef == stagseen && !cjava)
3412 {
3413 popclass_above (bracelev);
3414 structdef = snone;
3415 }
3416 break;
3417 case dsharpseen:
3418 savetoken = token;
3419 break;
3420 }
3421 if (!yacc_rules || lp == newlb.buffer + 1)
3422 {
3423 tokoff = lp - 1 - newlb.buffer;
3424 toklen = 1;
3425 midtoken = true;
3426 }
3427 continue;
3428 } /* if (begtoken) */
3429 } /* if must look at token */
3430
3431
3432 /* Detect end of line, colon, comma, semicolon and various braces
3433 after having handled a token.*/
3434 switch (c)
3435 {
3436 case ':':
3437 if (inattribute)
3438 break;
3439 if (yacc_rules && token.offset == 0 && token.valid)
3440 {
3441 make_C_tag (false); /* a yacc function */
3442 break;
3443 }
3444 if (definedef != dnone)
3445 break;
3446 switch (objdef)
3447 {
3448 case otagseen:
3449 objdef = oignore;
3450 make_C_tag (true); /* an Objective C class */
3451 break;
3452 case omethodtag:
3453 case omethodparm:
3454 objdef = omethodcolon;
3455 int toklen = token_name.len;
3456 linebuffer_setlen (&token_name, toklen + 1);
3457 strcpy (token_name.buffer + toklen, ":");
3458 break;
3459 }
3460 if (structdef == stagseen)
3461 {
3462 structdef = scolonseen;
3463 break;
3464 }
3465 /* Should be useless, but may be work as a safety net. */
3466 if (cplpl && fvdef == flistseen)
3467 {
3468 make_C_tag (true); /* a function */
3469 fvdef = fignore;
3470 break;
3471 }
3472 break;
3473 case ';':
3474 if (definedef != dnone || inattribute)
3475 break;
3476 switch (typdef)
3477 {
3478 case tend:
3479 case ttypeseen:
3480 make_C_tag (false); /* a typedef */
3481 typdef = tnone;
3482 fvdef = fvnone;
3483 break;
3484 case tnone:
3485 case tinbody:
3486 case tignore:
3487 switch (fvdef)
3488 {
3489 case fignore:
3490 if (typdef == tignore || cplpl)
3491 fvdef = fvnone;
3492 break;
3493 case fvnameseen:
3494 if ((globals && bracelev == 0 && (!fvextern || declarations))
3495 || (members && instruct))
3496 make_C_tag (false); /* a variable */
3497 fvextern = false;
3498 fvdef = fvnone;
3499 token.valid = false;
3500 break;
3501 case flistseen:
3502 if ((declarations
3503 && (cplpl || !instruct)
3504 && (typdef == tnone || (typdef != tignore && instruct)))
3505 || (members
3506 && plainc && instruct))
3507 make_C_tag (true); /* a function */
3508 /* FALLTHRU */
3509 default:
3510 fvextern = false;
3511 fvdef = fvnone;
3512 if (declarations
3513 && cplpl && structdef == stagseen)
3514 make_C_tag (false); /* forward declaration */
3515 else
3516 token.valid = false;
3517 } /* switch (fvdef) */
3518 /* FALLTHRU */
3519 default:
3520 if (!instruct)
3521 typdef = tnone;
3522 }
3523 if (structdef == stagseen)
3524 structdef = snone;
3525 break;
3526 case ',':
3527 if (definedef != dnone || inattribute)
3528 break;
3529 switch (objdef)
3530 {
3531 case omethodtag:
3532 case omethodparm:
3533 make_C_tag (true); /* an Objective C method */
3534 objdef = oinbody;
3535 break;
3536 }
3537 switch (fvdef)
3538 {
3539 case fdefunkey:
3540 case foperator:
3541 case fstartlist:
3542 case finlist:
3543 case fignore:
3544 break;
3545 case vignore:
3546 if (instruct && parlev == 0)
3547 fvdef = fvnone;
3548 break;
3549 case fdefunname:
3550 fvdef = fignore;
3551 break;
3552 case fvnameseen:
3553 if (parlev == 0
3554 && ((globals
3555 && bracelev == 0
3556 && templatelev == 0
3557 && (!fvextern || declarations))
3558 || (members && instruct)))
3559 make_C_tag (false); /* a variable */
3560 break;
3561 case flistseen:
3562 if ((declarations && typdef == tnone && !instruct)
3563 || (members && typdef != tignore && instruct))
3564 {
3565 make_C_tag (true); /* a function */
3566 fvdef = fvnameseen;
3567 }
3568 else if (!declarations)
3569 fvdef = fvnone;
3570 token.valid = false;
3571 break;
3572 default:
3573 fvdef = fvnone;
3574 }
3575 if (structdef == stagseen)
3576 structdef = snone;
3577 break;
3578 case ']':
3579 if (definedef != dnone || inattribute)
3580 break;
3581 if (structdef == stagseen)
3582 structdef = snone;
3583 switch (typdef)
3584 {
3585 case ttypeseen:
3586 case tend:
3587 typdef = tignore;
3588 make_C_tag (false); /* a typedef */
3589 break;
3590 case tnone:
3591 case tinbody:
3592 switch (fvdef)
3593 {
3594 case foperator:
3595 case finlist:
3596 case fignore:
3597 case vignore:
3598 break;
3599 case fvnameseen:
3600 if ((members && bracelev == 1)
3601 || (globals && bracelev == 0
3602 && (!fvextern || declarations)))
3603 make_C_tag (false); /* a variable */
3604 /* FALLTHRU */
3605 default:
3606 fvdef = fvnone;
3607 }
3608 break;
3609 }
3610 break;
3611 case '(':
3612 if (inattribute)
3613 {
3614 attrparlev++;
3615 break;
3616 }
3617 if (definedef != dnone)
3618 break;
3619 if (objdef == otagseen && parlev == 0)
3620 objdef = oparenseen;
3621 switch (fvdef)
3622 {
3623 case fvnameseen:
3624 if (typdef == ttypeseen
3625 && *lp != '*'
3626 && !instruct)
3627 {
3628 /* This handles constructs like:
3629 typedef void OperatorFun (int fun); */
3630 make_C_tag (false);
3631 typdef = tignore;
3632 fvdef = fignore;
3633 break;
3634 }
3635 /* FALLTHRU */
3636 case foperator:
3637 fvdef = fstartlist;
3638 break;
3639 case flistseen:
3640 fvdef = finlist;
3641 break;
3642 }
3643 parlev++;
3644 break;
3645 case ')':
3646 if (inattribute)
3647 {
3648 if (--attrparlev == 0)
3649 inattribute = false;
3650 break;
3651 }
3652 if (definedef != dnone)
3653 break;
3654 if (objdef == ocatseen && parlev == 1)
3655 {
3656 make_C_tag (true); /* an Objective C category */
3657 objdef = oignore;
3658 }
3659 if (--parlev == 0)
3660 {
3661 switch (fvdef)
3662 {
3663 case fstartlist:
3664 case finlist:
3665 fvdef = flistseen;
3666 break;
3667 }
3668 if (!instruct
3669 && (typdef == tend
3670 || typdef == ttypeseen))
3671 {
3672 typdef = tignore;
3673 make_C_tag (false); /* a typedef */
3674 }
3675 }
3676 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3677 parlev = 0;
3678 break;
3679 case '{':
3680 if (definedef != dnone)
3681 break;
3682 if (typdef == ttypeseen)
3683 {
3684 /* Whenever typdef is set to tinbody (currently only
3685 here), typdefbracelev should be set to bracelev. */
3686 typdef = tinbody;
3687 typdefbracelev = bracelev;
3688 }
3689 switch (fvdef)
3690 {
3691 case flistseen:
3692 make_C_tag (true); /* a function */
3693 /* FALLTHRU */
3694 case fignore:
3695 fvdef = fvnone;
3696 break;
3697 case fvnone:
3698 switch (objdef)
3699 {
3700 case otagseen:
3701 make_C_tag (true); /* an Objective C class */
3702 objdef = oignore;
3703 break;
3704 case omethodtag:
3705 case omethodparm:
3706 make_C_tag (true); /* an Objective C method */
3707 objdef = oinbody;
3708 break;
3709 default:
3710 /* Neutralize `extern "C" {' grot. */
3711 if (bracelev == 0 && structdef == snone && nestlev == 0
3712 && typdef == tnone)
3713 bracelev = -1;
3714 }
3715 break;
3716 }
3717 switch (structdef)
3718 {
3719 case skeyseen: /* unnamed struct */
3720 pushclass_above (bracelev, NULL, 0);
3721 structdef = snone;
3722 break;
3723 case stagseen: /* named struct or enum */
3724 case scolonseen: /* a class */
3725 pushclass_above (bracelev,token.line+token.offset, token.length);
3726 structdef = snone;
3727 make_C_tag (false); /* a struct or enum */
3728 break;
3729 }
3730 bracelev += 1;
3731 break;
3732 case '*':
3733 if (definedef != dnone)
3734 break;
3735 if (fvdef == fstartlist)
3736 {
3737 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3738 token.valid = false;
3739 }
3740 break;
3741 case '}':
3742 if (definedef != dnone)
3743 break;
3744 bracelev -= 1;
3745 if (!ignoreindent && lp == newlb.buffer + 1)
3746 {
3747 if (bracelev != 0)
3748 token.valid = false; /* unexpected value, token unreliable */
3749 bracelev = 0; /* reset brace level if first column */
3750 parlev = 0; /* also reset paren level, just in case... */
3751 }
3752 else if (bracelev < 0)
3753 {
3754 token.valid = false; /* something gone amiss, token unreliable */
3755 bracelev = 0;
3756 }
3757 if (bracelev == 0 && fvdef == vignore)
3758 fvdef = fvnone; /* end of function */
3759 popclass_above (bracelev);
3760 structdef = snone;
3761 /* Only if typdef == tinbody is typdefbracelev significant. */
3762 if (typdef == tinbody && bracelev <= typdefbracelev)
3763 {
3764 assert (bracelev == typdefbracelev);
3765 typdef = tend;
3766 }
3767 break;
3768 case '=':
3769 if (definedef != dnone)
3770 break;
3771 switch (fvdef)
3772 {
3773 case foperator:
3774 case finlist:
3775 case fignore:
3776 case vignore:
3777 break;
3778 case fvnameseen:
3779 if ((members && bracelev == 1)
3780 || (globals && bracelev == 0 && (!fvextern || declarations)))
3781 make_C_tag (false); /* a variable */
3782 /* FALLTHRU */
3783 default:
3784 fvdef = vignore;
3785 }
3786 break;
3787 case '<':
3788 if (cplpl
3789 && (structdef == stagseen || fvdef == fvnameseen))
3790 {
3791 templatelev++;
3792 break;
3793 }
3794 goto resetfvdef;
3795 case '>':
3796 if (templatelev > 0)
3797 {
3798 templatelev--;
3799 break;
3800 }
3801 goto resetfvdef;
3802 case '+':
3803 case '-':
3804 if (objdef == oinbody && bracelev == 0)
3805 {
3806 objdef = omethodsign;
3807 break;
3808 }
3809 /* FALLTHRU */
3810 resetfvdef:
3811 case '#': case '~': case '&': case '%': case '/':
3812 case '|': case '^': case '!': case '.': case '?':
3813 if (definedef != dnone)
3814 break;
3815 /* These surely cannot follow a function tag in C. */
3816 switch (fvdef)
3817 {
3818 case foperator:
3819 case finlist:
3820 case fignore:
3821 case vignore:
3822 break;
3823 default:
3824 fvdef = fvnone;
3825 }
3826 break;
3827 case '\0':
3828 if (objdef == otagseen)
3829 {
3830 make_C_tag (true); /* an Objective C class */
3831 objdef = oignore;
3832 }
3833 /* If a macro spans multiple lines don't reset its state. */
3834 if (quotednl)
3835 CNL_SAVE_DEFINEDEF ();
3836 else
3837 CNL ();
3838 break;
3839 } /* switch (c) */
3840
3841 } /* while not eof */
3842
3843 free (lbs[0].lb.buffer);
3844 free (lbs[1].lb.buffer);
3845 }
3846
3847 /*
3848 * Process either a C++ file or a C file depending on the setting
3849 * of a global flag.
3850 */
3851 static void
3852 default_C_entries (FILE *inf)
3853 {
3854 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3855 }
3856
3857 /* Always do plain C. */
3858 static void
3859 plain_C_entries (FILE *inf)
3860 {
3861 C_entries (0, inf);
3862 }
3863
3864 /* Always do C++. */
3865 static void
3866 Cplusplus_entries (FILE *inf)
3867 {
3868 C_entries (C_PLPL, inf);
3869 }
3870
3871 /* Always do Java. */
3872 static void
3873 Cjava_entries (FILE *inf)
3874 {
3875 C_entries (C_JAVA, inf);
3876 }
3877
3878 /* Always do C*. */
3879 static void
3880 Cstar_entries (FILE *inf)
3881 {
3882 C_entries (C_STAR, inf);
3883 }
3884
3885 /* Always do Yacc. */
3886 static void
3887 Yacc_entries (FILE *inf)
3888 {
3889 C_entries (YACC, inf);
3890 }
3891
3892 \f
3893 /* Useful macros. */
3894 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3895 for (; /* loop initialization */ \
3896 !feof (file_pointer) /* loop test */ \
3897 && /* instructions at start of loop */ \
3898 (readline (&line_buffer, file_pointer), \
3899 char_pointer = line_buffer.buffer, \
3900 true); \
3901 )
3902
3903 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
3904 ((assert ("" kw), true) /* syntax error if not a literal string */ \
3905 && strneq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
3906 && notinname ((cp)[sizeof (kw)-1]) /* end of kw */ \
3907 && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
3908
3909 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3910 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3911 ((assert ("" kw), true) /* syntax error if not a literal string */ \
3912 && strncaseeq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
3913 && ((cp) += sizeof (kw)-1)) /* skip spaces */
3914
3915 /*
3916 * Read a file, but do no processing. This is used to do regexp
3917 * matching on files that have no language defined.
3918 */
3919 static void
3920 just_read_file (FILE *inf)
3921 {
3922 while (!feof (inf))
3923 readline (&lb, inf);
3924 }
3925
3926 \f
3927 /* Fortran parsing */
3928
3929 static void F_takeprec (void);
3930 static void F_getit (FILE *);
3931
3932 static void
3933 F_takeprec (void)
3934 {
3935 dbp = skip_spaces (dbp);
3936 if (*dbp != '*')
3937 return;
3938 dbp++;
3939 dbp = skip_spaces (dbp);
3940 if (strneq (dbp, "(*)", 3))
3941 {
3942 dbp += 3;
3943 return;
3944 }
3945 if (!c_isdigit (*dbp))
3946 {
3947 --dbp; /* force failure */
3948 return;
3949 }
3950 do
3951 dbp++;
3952 while (c_isdigit (*dbp));
3953 }
3954
3955 static void
3956 F_getit (FILE *inf)
3957 {
3958 register char *cp;
3959
3960 dbp = skip_spaces (dbp);
3961 if (*dbp == '\0')
3962 {
3963 readline (&lb, inf);
3964 dbp = lb.buffer;
3965 if (dbp[5] != '&')
3966 return;
3967 dbp += 6;
3968 dbp = skip_spaces (dbp);
3969 }
3970 if (!c_isalpha (*dbp) && *dbp != '_' && *dbp != '$')
3971 return;
3972 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3973 continue;
3974 make_tag (dbp, cp-dbp, true,
3975 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3976 }
3977
3978
3979 static void
3980 Fortran_functions (FILE *inf)
3981 {
3982 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3983 {
3984 if (*dbp == '%')
3985 dbp++; /* Ratfor escape to fortran */
3986 dbp = skip_spaces (dbp);
3987 if (*dbp == '\0')
3988 continue;
3989
3990 if (LOOKING_AT_NOCASE (dbp, "recursive"))
3991 dbp = skip_spaces (dbp);
3992
3993 if (LOOKING_AT_NOCASE (dbp, "pure"))
3994 dbp = skip_spaces (dbp);
3995
3996 if (LOOKING_AT_NOCASE (dbp, "elemental"))
3997 dbp = skip_spaces (dbp);
3998
3999 switch (c_tolower (*dbp))
4000 {
4001 case 'i':
4002 if (nocase_tail ("integer"))
4003 F_takeprec ();
4004 break;
4005 case 'r':
4006 if (nocase_tail ("real"))
4007 F_takeprec ();
4008 break;
4009 case 'l':
4010 if (nocase_tail ("logical"))
4011 F_takeprec ();
4012 break;
4013 case 'c':
4014 if (nocase_tail ("complex") || nocase_tail ("character"))
4015 F_takeprec ();
4016 break;
4017 case 'd':
4018 if (nocase_tail ("double"))
4019 {
4020 dbp = skip_spaces (dbp);
4021 if (*dbp == '\0')
4022 continue;
4023 if (nocase_tail ("precision"))
4024 break;
4025 continue;
4026 }
4027 break;
4028 }
4029 dbp = skip_spaces (dbp);
4030 if (*dbp == '\0')
4031 continue;
4032 switch (c_tolower (*dbp))
4033 {
4034 case 'f':
4035 if (nocase_tail ("function"))
4036 F_getit (inf);
4037 continue;
4038 case 's':
4039 if (nocase_tail ("subroutine"))
4040 F_getit (inf);
4041 continue;
4042 case 'e':
4043 if (nocase_tail ("entry"))
4044 F_getit (inf);
4045 continue;
4046 case 'b':
4047 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4048 {
4049 dbp = skip_spaces (dbp);
4050 if (*dbp == '\0') /* assume un-named */
4051 make_tag ("blockdata", 9, true,
4052 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4053 else
4054 F_getit (inf); /* look for name */
4055 }
4056 continue;
4057 }
4058 }
4059 }
4060
4061 \f
4062 /*
4063 * Ada parsing
4064 * Original code by
4065 * Philippe Waroquiers (1998)
4066 */
4067
4068 /* Once we are positioned after an "interesting" keyword, let's get
4069 the real tag value necessary. */
4070 static void
4071 Ada_getit (FILE *inf, const char *name_qualifier)
4072 {
4073 register char *cp;
4074 char *name;
4075 char c;
4076
4077 while (!feof (inf))
4078 {
4079 dbp = skip_spaces (dbp);
4080 if (*dbp == '\0'
4081 || (dbp[0] == '-' && dbp[1] == '-'))
4082 {
4083 readline (&lb, inf);
4084 dbp = lb.buffer;
4085 }
4086 switch (c_tolower (*dbp))
4087 {
4088 case 'b':
4089 if (nocase_tail ("body"))
4090 {
4091 /* Skipping body of procedure body or package body or ....
4092 resetting qualifier to body instead of spec. */
4093 name_qualifier = "/b";
4094 continue;
4095 }
4096 break;
4097 case 't':
4098 /* Skipping type of task type or protected type ... */
4099 if (nocase_tail ("type"))
4100 continue;
4101 break;
4102 }
4103 if (*dbp == '"')
4104 {
4105 dbp += 1;
4106 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4107 continue;
4108 }
4109 else
4110 {
4111 dbp = skip_spaces (dbp);
4112 for (cp = dbp;
4113 c_isalnum (*cp) || *cp == '_' || *cp == '.';
4114 cp++)
4115 continue;
4116 if (cp == dbp)
4117 return;
4118 }
4119 c = *cp;
4120 *cp = '\0';
4121 name = concat (dbp, name_qualifier, "");
4122 *cp = c;
4123 make_tag (name, strlen (name), true,
4124 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4125 free (name);
4126 if (c == '"')
4127 dbp = cp + 1;
4128 return;
4129 }
4130 }
4131
4132 static void
4133 Ada_funcs (FILE *inf)
4134 {
4135 bool inquote = false;
4136 bool skip_till_semicolumn = false;
4137
4138 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4139 {
4140 while (*dbp != '\0')
4141 {
4142 /* Skip a string i.e. "abcd". */
4143 if (inquote || (*dbp == '"'))
4144 {
4145 dbp = strchr (dbp + !inquote, '"');
4146 if (dbp != NULL)
4147 {
4148 inquote = false;
4149 dbp += 1;
4150 continue; /* advance char */
4151 }
4152 else
4153 {
4154 inquote = true;
4155 break; /* advance line */
4156 }
4157 }
4158
4159 /* Skip comments. */
4160 if (dbp[0] == '-' && dbp[1] == '-')
4161 break; /* advance line */
4162
4163 /* Skip character enclosed in single quote i.e. 'a'
4164 and skip single quote starting an attribute i.e. 'Image. */
4165 if (*dbp == '\'')
4166 {
4167 dbp++ ;
4168 if (*dbp != '\0')
4169 dbp++;
4170 continue;
4171 }
4172
4173 if (skip_till_semicolumn)
4174 {
4175 if (*dbp == ';')
4176 skip_till_semicolumn = false;
4177 dbp++;
4178 continue; /* advance char */
4179 }
4180
4181 /* Search for beginning of a token. */
4182 if (!begtoken (*dbp))
4183 {
4184 dbp++;
4185 continue; /* advance char */
4186 }
4187
4188 /* We are at the beginning of a token. */
4189 switch (c_tolower (*dbp))
4190 {
4191 case 'f':
4192 if (!packages_only && nocase_tail ("function"))
4193 Ada_getit (inf, "/f");
4194 else
4195 break; /* from switch */
4196 continue; /* advance char */
4197 case 'p':
4198 if (!packages_only && nocase_tail ("procedure"))
4199 Ada_getit (inf, "/p");
4200 else if (nocase_tail ("package"))
4201 Ada_getit (inf, "/s");
4202 else if (nocase_tail ("protected")) /* protected type */
4203 Ada_getit (inf, "/t");
4204 else
4205 break; /* from switch */
4206 continue; /* advance char */
4207
4208 case 'u':
4209 if (typedefs && !packages_only && nocase_tail ("use"))
4210 {
4211 /* when tagging types, avoid tagging use type Pack.Typename;
4212 for this, we will skip everything till a ; */
4213 skip_till_semicolumn = true;
4214 continue; /* advance char */
4215 }
4216
4217 case 't':
4218 if (!packages_only && nocase_tail ("task"))
4219 Ada_getit (inf, "/k");
4220 else if (typedefs && !packages_only && nocase_tail ("type"))
4221 {
4222 Ada_getit (inf, "/t");
4223 while (*dbp != '\0')
4224 dbp += 1;
4225 }
4226 else
4227 break; /* from switch */
4228 continue; /* advance char */
4229 }
4230
4231 /* Look for the end of the token. */
4232 while (!endtoken (*dbp))
4233 dbp++;
4234
4235 } /* advance char */
4236 } /* advance line */
4237 }
4238
4239 \f
4240 /*
4241 * Unix and microcontroller assembly tag handling
4242 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4243 * Idea by Bob Weiner, Motorola Inc. (1994)
4244 */
4245 static void
4246 Asm_labels (FILE *inf)
4247 {
4248 register char *cp;
4249
4250 LOOP_ON_INPUT_LINES (inf, lb, cp)
4251 {
4252 /* If first char is alphabetic or one of [_.$], test for colon
4253 following identifier. */
4254 if (c_isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4255 {
4256 /* Read past label. */
4257 cp++;
4258 while (c_isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4259 cp++;
4260 if (*cp == ':' || c_isspace (*cp))
4261 /* Found end of label, so copy it and add it to the table. */
4262 make_tag (lb.buffer, cp - lb.buffer, true,
4263 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4264 }
4265 }
4266 }
4267
4268 \f
4269 /*
4270 * Perl support
4271 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4272 * /^use constant[ \t\n]+[^ \t\n{=,;]+/
4273 * Perl variable names: /^(my|local).../
4274 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4275 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4276 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4277 */
4278 static void
4279 Perl_functions (FILE *inf)
4280 {
4281 char *package = savestr ("main"); /* current package name */
4282 register char *cp;
4283
4284 LOOP_ON_INPUT_LINES (inf, lb, cp)
4285 {
4286 cp = skip_spaces (cp);
4287
4288 if (LOOKING_AT (cp, "package"))
4289 {
4290 free (package);
4291 get_tag (cp, &package);
4292 }
4293 else if (LOOKING_AT (cp, "sub"))
4294 {
4295 char *pos, *sp;
4296
4297 subr:
4298 sp = cp;
4299 while (!notinname (*cp))
4300 cp++;
4301 if (cp == sp)
4302 continue; /* nothing found */
4303 pos = strchr (sp, ':');
4304 if (pos && pos < cp && pos[1] == ':')
4305 /* The name is already qualified. */
4306 make_tag (sp, cp - sp, true,
4307 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4308 else
4309 /* Qualify it. */
4310 {
4311 char savechar, *name;
4312
4313 savechar = *cp;
4314 *cp = '\0';
4315 name = concat (package, "::", sp);
4316 *cp = savechar;
4317 make_tag (name, strlen (name), true,
4318 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4319 free (name);
4320 }
4321 }
4322 else if (LOOKING_AT (cp, "use constant")
4323 || LOOKING_AT (cp, "use constant::defer"))
4324 {
4325 /* For hash style multi-constant like
4326 use constant { FOO => 123,
4327 BAR => 456 };
4328 only the first FOO is picked up. Parsing across the value
4329 expressions would be difficult in general, due to possible nested
4330 hashes, here-documents, etc. */
4331 if (*cp == '{')
4332 cp = skip_spaces (cp+1);
4333 goto subr;
4334 }
4335 else if (globals) /* only if we are tagging global vars */
4336 {
4337 /* Skip a qualifier, if any. */
4338 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4339 /* After "my" or "local", but before any following paren or space. */
4340 char *varstart = cp;
4341
4342 if (qual /* should this be removed? If yes, how? */
4343 && (*cp == '$' || *cp == '@' || *cp == '%'))
4344 {
4345 varstart += 1;
4346 do
4347 cp++;
4348 while (c_isalnum (*cp) || *cp == '_');
4349 }
4350 else if (qual)
4351 {
4352 /* Should be examining a variable list at this point;
4353 could insist on seeing an open parenthesis. */
4354 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4355 cp++;
4356 }
4357 else
4358 continue;
4359
4360 make_tag (varstart, cp - varstart, false,
4361 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4362 }
4363 }
4364 free (package);
4365 }
4366
4367
4368 /*
4369 * Python support
4370 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4371 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4372 * More ideas by seb bacon <seb@jamkit.com> (2002)
4373 */
4374 static void
4375 Python_functions (FILE *inf)
4376 {
4377 register char *cp;
4378
4379 LOOP_ON_INPUT_LINES (inf, lb, cp)
4380 {
4381 cp = skip_spaces (cp);
4382 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4383 {
4384 char *name = cp;
4385 while (!notinname (*cp) && *cp != ':')
4386 cp++;
4387 make_tag (name, cp - name, true,
4388 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4389 }
4390 }
4391 }
4392
4393 \f
4394 /*
4395 * PHP support
4396 * Look for:
4397 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4398 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4399 * - /^[ \t]*define\(\"[^\"]+/
4400 * Only with --members:
4401 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4402 * Idea by Diez B. Roggisch (2001)
4403 */
4404 static void
4405 PHP_functions (FILE *inf)
4406 {
4407 char *cp, *name;
4408 bool search_identifier = false;
4409
4410 LOOP_ON_INPUT_LINES (inf, lb, cp)
4411 {
4412 cp = skip_spaces (cp);
4413 name = cp;
4414 if (search_identifier
4415 && *cp != '\0')
4416 {
4417 while (!notinname (*cp))
4418 cp++;
4419 make_tag (name, cp - name, true,
4420 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4421 search_identifier = false;
4422 }
4423 else if (LOOKING_AT (cp, "function"))
4424 {
4425 if (*cp == '&')
4426 cp = skip_spaces (cp+1);
4427 if (*cp != '\0')
4428 {
4429 name = cp;
4430 while (!notinname (*cp))
4431 cp++;
4432 make_tag (name, cp - name, true,
4433 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4434 }
4435 else
4436 search_identifier = true;
4437 }
4438 else if (LOOKING_AT (cp, "class"))
4439 {
4440 if (*cp != '\0')
4441 {
4442 name = cp;
4443 while (*cp != '\0' && !c_isspace (*cp))
4444 cp++;
4445 make_tag (name, cp - name, false,
4446 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4447 }
4448 else
4449 search_identifier = true;
4450 }
4451 else if (strneq (cp, "define", 6)
4452 && (cp = skip_spaces (cp+6))
4453 && *cp++ == '('
4454 && (*cp == '"' || *cp == '\''))
4455 {
4456 char quote = *cp++;
4457 name = cp;
4458 while (*cp != quote && *cp != '\0')
4459 cp++;
4460 make_tag (name, cp - name, false,
4461 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4462 }
4463 else if (members
4464 && LOOKING_AT (cp, "var")
4465 && *cp == '$')
4466 {
4467 name = cp;
4468 while (!notinname (*cp))
4469 cp++;
4470 make_tag (name, cp - name, false,
4471 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4472 }
4473 }
4474 }
4475
4476 \f
4477 /*
4478 * Cobol tag functions
4479 * We could look for anything that could be a paragraph name.
4480 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4481 * Idea by Corny de Souza (1993)
4482 */
4483 static void
4484 Cobol_paragraphs (FILE *inf)
4485 {
4486 register char *bp, *ep;
4487
4488 LOOP_ON_INPUT_LINES (inf, lb, bp)
4489 {
4490 if (lb.len < 9)
4491 continue;
4492 bp += 8;
4493
4494 /* If eoln, compiler option or comment ignore whole line. */
4495 if (bp[-1] != ' ' || !c_isalnum (bp[0]))
4496 continue;
4497
4498 for (ep = bp; c_isalnum (*ep) || *ep == '-'; ep++)
4499 continue;
4500 if (*ep++ == '.')
4501 make_tag (bp, ep - bp, true,
4502 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4503 }
4504 }
4505
4506 \f
4507 /*
4508 * Makefile support
4509 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4510 */
4511 static void
4512 Makefile_targets (FILE *inf)
4513 {
4514 register char *bp;
4515
4516 LOOP_ON_INPUT_LINES (inf, lb, bp)
4517 {
4518 if (*bp == '\t' || *bp == '#')
4519 continue;
4520 while (*bp != '\0' && *bp != '=' && *bp != ':')
4521 bp++;
4522 if (*bp == ':' || (globals && *bp == '='))
4523 {
4524 /* We should detect if there is more than one tag, but we do not.
4525 We just skip initial and final spaces. */
4526 char * namestart = skip_spaces (lb.buffer);
4527 while (--bp > namestart)
4528 if (!notinname (*bp))
4529 break;
4530 make_tag (namestart, bp - namestart + 1, true,
4531 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4532 }
4533 }
4534 }
4535
4536 \f
4537 /*
4538 * Pascal parsing
4539 * Original code by Mosur K. Mohan (1989)
4540 *
4541 * Locates tags for procedures & functions. Doesn't do any type- or
4542 * var-definitions. It does look for the keyword "extern" or
4543 * "forward" immediately following the procedure statement; if found,
4544 * the tag is skipped.
4545 */
4546 static void
4547 Pascal_functions (FILE *inf)
4548 {
4549 linebuffer tline; /* mostly copied from C_entries */
4550 long save_lcno;
4551 int save_lineno, namelen, taglen;
4552 char c, *name;
4553
4554 bool /* each of these flags is true if: */
4555 incomment, /* point is inside a comment */
4556 inquote, /* point is inside '..' string */
4557 get_tagname, /* point is after PROCEDURE/FUNCTION
4558 keyword, so next item = potential tag */
4559 found_tag, /* point is after a potential tag */
4560 inparms, /* point is within parameter-list */
4561 verify_tag; /* point has passed the parm-list, so the
4562 next token will determine whether this
4563 is a FORWARD/EXTERN to be ignored, or
4564 whether it is a real tag */
4565
4566 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4567 name = NULL; /* keep compiler quiet */
4568 dbp = lb.buffer;
4569 *dbp = '\0';
4570 linebuffer_init (&tline);
4571
4572 incomment = inquote = false;
4573 found_tag = false; /* have a proc name; check if extern */
4574 get_tagname = false; /* found "procedure" keyword */
4575 inparms = false; /* found '(' after "proc" */
4576 verify_tag = false; /* check if "extern" is ahead */
4577
4578
4579 while (!feof (inf)) /* long main loop to get next char */
4580 {
4581 c = *dbp++;
4582 if (c == '\0') /* if end of line */
4583 {
4584 readline (&lb, inf);
4585 dbp = lb.buffer;
4586 if (*dbp == '\0')
4587 continue;
4588 if (!((found_tag && verify_tag)
4589 || get_tagname))
4590 c = *dbp++; /* only if don't need *dbp pointing
4591 to the beginning of the name of
4592 the procedure or function */
4593 }
4594 if (incomment)
4595 {
4596 if (c == '}') /* within { } comments */
4597 incomment = false;
4598 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4599 {
4600 dbp++;
4601 incomment = false;
4602 }
4603 continue;
4604 }
4605 else if (inquote)
4606 {
4607 if (c == '\'')
4608 inquote = false;
4609 continue;
4610 }
4611 else
4612 switch (c)
4613 {
4614 case '\'':
4615 inquote = true; /* found first quote */
4616 continue;
4617 case '{': /* found open { comment */
4618 incomment = true;
4619 continue;
4620 case '(':
4621 if (*dbp == '*') /* found open (* comment */
4622 {
4623 incomment = true;
4624 dbp++;
4625 }
4626 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4627 inparms = true;
4628 continue;
4629 case ')': /* end of parms list */
4630 if (inparms)
4631 inparms = false;
4632 continue;
4633 case ';':
4634 if (found_tag && !inparms) /* end of proc or fn stmt */
4635 {
4636 verify_tag = true;
4637 break;
4638 }
4639 continue;
4640 }
4641 if (found_tag && verify_tag && (*dbp != ' '))
4642 {
4643 /* Check if this is an "extern" declaration. */
4644 if (*dbp == '\0')
4645 continue;
4646 if (c_tolower (*dbp) == 'e')
4647 {
4648 if (nocase_tail ("extern")) /* superfluous, really! */
4649 {
4650 found_tag = false;
4651 verify_tag = false;
4652 }
4653 }
4654 else if (c_tolower (*dbp) == 'f')
4655 {
4656 if (nocase_tail ("forward")) /* check for forward reference */
4657 {
4658 found_tag = false;
4659 verify_tag = false;
4660 }
4661 }
4662 if (found_tag && verify_tag) /* not external proc, so make tag */
4663 {
4664 found_tag = false;
4665 verify_tag = false;
4666 make_tag (name, namelen, true,
4667 tline.buffer, taglen, save_lineno, save_lcno);
4668 continue;
4669 }
4670 }
4671 if (get_tagname) /* grab name of proc or fn */
4672 {
4673 char *cp;
4674
4675 if (*dbp == '\0')
4676 continue;
4677
4678 /* Find block name. */
4679 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4680 continue;
4681
4682 /* Save all values for later tagging. */
4683 linebuffer_setlen (&tline, lb.len);
4684 strcpy (tline.buffer, lb.buffer);
4685 save_lineno = lineno;
4686 save_lcno = linecharno;
4687 name = tline.buffer + (dbp - lb.buffer);
4688 namelen = cp - dbp;
4689 taglen = cp - lb.buffer + 1;
4690
4691 dbp = cp; /* set dbp to e-o-token */
4692 get_tagname = false;
4693 found_tag = true;
4694 continue;
4695
4696 /* And proceed to check for "extern". */
4697 }
4698 else if (!incomment && !inquote && !found_tag)
4699 {
4700 /* Check for proc/fn keywords. */
4701 switch (c_tolower (c))
4702 {
4703 case 'p':
4704 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4705 get_tagname = true;
4706 continue;
4707 case 'f':
4708 if (nocase_tail ("unction"))
4709 get_tagname = true;
4710 continue;
4711 }
4712 }
4713 } /* while not eof */
4714
4715 free (tline.buffer);
4716 }
4717
4718 \f
4719 /*
4720 * Lisp tag functions
4721 * look for (def or (DEF, quote or QUOTE
4722 */
4723
4724 static void L_getit (void);
4725
4726 static void
4727 L_getit (void)
4728 {
4729 if (*dbp == '\'') /* Skip prefix quote */
4730 dbp++;
4731 else if (*dbp == '(')
4732 {
4733 dbp++;
4734 /* Try to skip "(quote " */
4735 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4736 /* Ok, then skip "(" before name in (defstruct (foo)) */
4737 dbp = skip_spaces (dbp);
4738 }
4739 get_tag (dbp, NULL);
4740 }
4741
4742 static void
4743 Lisp_functions (FILE *inf)
4744 {
4745 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4746 {
4747 if (dbp[0] != '(')
4748 continue;
4749
4750 /* "(defvar foo)" is a declaration rather than a definition. */
4751 if (! declarations)
4752 {
4753 char *p = dbp + 1;
4754 if (LOOKING_AT (p, "defvar"))
4755 {
4756 p = skip_name (p); /* past var name */
4757 p = skip_spaces (p);
4758 if (*p == ')')
4759 continue;
4760 }
4761 }
4762
4763 if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
4764 dbp += 3;
4765
4766 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4767 {
4768 dbp = skip_non_spaces (dbp);
4769 dbp = skip_spaces (dbp);
4770 L_getit ();
4771 }
4772 else
4773 {
4774 /* Check for (foo::defmumble name-defined ... */
4775 do
4776 dbp++;
4777 while (!notinname (*dbp) && *dbp != ':');
4778 if (*dbp == ':')
4779 {
4780 do
4781 dbp++;
4782 while (*dbp == ':');
4783
4784 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4785 {
4786 dbp = skip_non_spaces (dbp);
4787 dbp = skip_spaces (dbp);
4788 L_getit ();
4789 }
4790 }
4791 }
4792 }
4793 }
4794
4795 \f
4796 /*
4797 * Lua script language parsing
4798 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4799 *
4800 * "function" and "local function" are tags if they start at column 1.
4801 */
4802 static void
4803 Lua_functions (FILE *inf)
4804 {
4805 register char *bp;
4806
4807 LOOP_ON_INPUT_LINES (inf, lb, bp)
4808 {
4809 if (bp[0] != 'f' && bp[0] != 'l')
4810 continue;
4811
4812 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4813
4814 if (LOOKING_AT (bp, "function"))
4815 get_tag (bp, NULL);
4816 }
4817 }
4818
4819 \f
4820 /*
4821 * PostScript tags
4822 * Just look for lines where the first character is '/'
4823 * Also look at "defineps" for PSWrap
4824 * Ideas by:
4825 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4826 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4827 */
4828 static void
4829 PS_functions (FILE *inf)
4830 {
4831 register char *bp, *ep;
4832
4833 LOOP_ON_INPUT_LINES (inf, lb, bp)
4834 {
4835 if (bp[0] == '/')
4836 {
4837 for (ep = bp+1;
4838 *ep != '\0' && *ep != ' ' && *ep != '{';
4839 ep++)
4840 continue;
4841 make_tag (bp, ep - bp, true,
4842 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4843 }
4844 else if (LOOKING_AT (bp, "defineps"))
4845 get_tag (bp, NULL);
4846 }
4847 }
4848
4849 \f
4850 /*
4851 * Forth tags
4852 * Ignore anything after \ followed by space or in ( )
4853 * Look for words defined by :
4854 * Look for constant, code, create, defer, value, and variable
4855 * OBP extensions: Look for buffer:, field,
4856 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4857 */
4858 static void
4859 Forth_words (FILE *inf)
4860 {
4861 register char *bp;
4862
4863 LOOP_ON_INPUT_LINES (inf, lb, bp)
4864 while ((bp = skip_spaces (bp))[0] != '\0')
4865 if (bp[0] == '\\' && c_isspace (bp[1]))
4866 break; /* read next line */
4867 else if (bp[0] == '(' && c_isspace (bp[1]))
4868 do /* skip to ) or eol */
4869 bp++;
4870 while (*bp != ')' && *bp != '\0');
4871 else if ((bp[0] == ':' && c_isspace (bp[1]) && bp++)
4872 || LOOKING_AT_NOCASE (bp, "constant")
4873 || LOOKING_AT_NOCASE (bp, "code")
4874 || LOOKING_AT_NOCASE (bp, "create")
4875 || LOOKING_AT_NOCASE (bp, "defer")
4876 || LOOKING_AT_NOCASE (bp, "value")
4877 || LOOKING_AT_NOCASE (bp, "variable")
4878 || LOOKING_AT_NOCASE (bp, "buffer:")
4879 || LOOKING_AT_NOCASE (bp, "field"))
4880 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
4881 else
4882 bp = skip_non_spaces (bp);
4883 }
4884
4885 \f
4886 /*
4887 * Scheme tag functions
4888 * look for (def... xyzzy
4889 * (def... (xyzzy
4890 * (def ... ((...(xyzzy ....
4891 * (set! xyzzy
4892 * Original code by Ken Haase (1985?)
4893 */
4894 static void
4895 Scheme_functions (FILE *inf)
4896 {
4897 register char *bp;
4898
4899 LOOP_ON_INPUT_LINES (inf, lb, bp)
4900 {
4901 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4902 {
4903 bp = skip_non_spaces (bp+4);
4904 /* Skip over open parens and white space. Don't continue past
4905 '\0'. */
4906 while (*bp && notinname (*bp))
4907 bp++;
4908 get_tag (bp, NULL);
4909 }
4910 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4911 get_tag (bp, NULL);
4912 }
4913 }
4914
4915 \f
4916 /* Find tags in TeX and LaTeX input files. */
4917
4918 /* TEX_toktab is a table of TeX control sequences that define tags.
4919 * Each entry records one such control sequence.
4920 *
4921 * Original code from who knows whom.
4922 * Ideas by:
4923 * Stefan Monnier (2002)
4924 */
4925
4926 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4927
4928 /* Default set of control sequences to put into TEX_toktab.
4929 The value of environment var TEXTAGS is prepended to this. */
4930 static const char *TEX_defenv = "\
4931 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4932 :part:appendix:entry:index:def\
4933 :newcommand:renewcommand:newenvironment:renewenvironment";
4934
4935 static void TEX_mode (FILE *);
4936 static void TEX_decode_env (const char *, const char *);
4937
4938 static char TEX_esc = '\\';
4939 static char TEX_opgrp = '{';
4940 static char TEX_clgrp = '}';
4941
4942 /*
4943 * TeX/LaTeX scanning loop.
4944 */
4945 static void
4946 TeX_commands (FILE *inf)
4947 {
4948 char *cp;
4949 linebuffer *key;
4950
4951 /* Select either \ or ! as escape character. */
4952 TEX_mode (inf);
4953
4954 /* Initialize token table once from environment. */
4955 if (TEX_toktab == NULL)
4956 TEX_decode_env ("TEXTAGS", TEX_defenv);
4957
4958 LOOP_ON_INPUT_LINES (inf, lb, cp)
4959 {
4960 /* Look at each TEX keyword in line. */
4961 for (;;)
4962 {
4963 /* Look for a TEX escape. */
4964 while (*cp++ != TEX_esc)
4965 if (cp[-1] == '\0' || cp[-1] == '%')
4966 goto tex_next_line;
4967
4968 for (key = TEX_toktab; key->buffer != NULL; key++)
4969 if (strneq (cp, key->buffer, key->len))
4970 {
4971 char *p;
4972 int namelen, linelen;
4973 bool opgrp = false;
4974
4975 cp = skip_spaces (cp + key->len);
4976 if (*cp == TEX_opgrp)
4977 {
4978 opgrp = true;
4979 cp++;
4980 }
4981 for (p = cp;
4982 (!c_isspace (*p) && *p != '#' &&
4983 *p != TEX_opgrp && *p != TEX_clgrp);
4984 p++)
4985 continue;
4986 namelen = p - cp;
4987 linelen = lb.len;
4988 if (!opgrp || *p == TEX_clgrp)
4989 {
4990 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
4991 p++;
4992 linelen = p - lb.buffer + 1;
4993 }
4994 make_tag (cp, namelen, true,
4995 lb.buffer, linelen, lineno, linecharno);
4996 goto tex_next_line; /* We only tag a line once */
4997 }
4998 }
4999 tex_next_line:
5000 ;
5001 }
5002 }
5003
5004 #define TEX_LESC '\\'
5005 #define TEX_SESC '!'
5006
5007 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5008 chars accordingly. */
5009 static void
5010 TEX_mode (FILE *inf)
5011 {
5012 int c;
5013
5014 while ((c = getc (inf)) != EOF)
5015 {
5016 /* Skip to next line if we hit the TeX comment char. */
5017 if (c == '%')
5018 while (c != '\n' && c != EOF)
5019 c = getc (inf);
5020 else if (c == TEX_LESC || c == TEX_SESC )
5021 break;
5022 }
5023
5024 if (c == TEX_LESC)
5025 {
5026 TEX_esc = TEX_LESC;
5027 TEX_opgrp = '{';
5028 TEX_clgrp = '}';
5029 }
5030 else
5031 {
5032 TEX_esc = TEX_SESC;
5033 TEX_opgrp = '<';
5034 TEX_clgrp = '>';
5035 }
5036 rewind (inf);
5037 }
5038
5039 /* Read environment and prepend it to the default string.
5040 Build token table. */
5041 static void
5042 TEX_decode_env (const char *evarname, const char *defenv)
5043 {
5044 register const char *env, *p;
5045 int i, len;
5046
5047 /* Append default string to environment. */
5048 env = getenv (evarname);
5049 if (!env)
5050 env = defenv;
5051 else
5052 env = concat (env, defenv, "");
5053
5054 /* Allocate a token table */
5055 for (len = 1, p = env; (p = strchr (p, ':')); )
5056 if (*++p)
5057 len++;
5058 TEX_toktab = xnew (len, linebuffer);
5059
5060 /* Unpack environment string into token table. Be careful about */
5061 /* zero-length strings (leading ':', "::" and trailing ':') */
5062 for (i = 0; *env != '\0';)
5063 {
5064 p = strchr (env, ':');
5065 if (!p) /* End of environment string. */
5066 p = env + strlen (env);
5067 if (p - env > 0)
5068 { /* Only non-zero strings. */
5069 TEX_toktab[i].buffer = savenstr (env, p - env);
5070 TEX_toktab[i].len = p - env;
5071 i++;
5072 }
5073 if (*p)
5074 env = p + 1;
5075 else
5076 {
5077 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5078 TEX_toktab[i].len = 0;
5079 break;
5080 }
5081 }
5082 }
5083
5084 \f
5085 /* Texinfo support. Dave Love, Mar. 2000. */
5086 static void
5087 Texinfo_nodes (FILE *inf)
5088 {
5089 char *cp, *start;
5090 LOOP_ON_INPUT_LINES (inf, lb, cp)
5091 if (LOOKING_AT (cp, "@node"))
5092 {
5093 start = cp;
5094 while (*cp != '\0' && *cp != ',')
5095 cp++;
5096 make_tag (start, cp - start, true,
5097 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5098 }
5099 }
5100
5101 \f
5102 /*
5103 * HTML support.
5104 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5105 * Contents of <a name=xxx> are tags with name xxx.
5106 *
5107 * Francesco Potortì, 2002.
5108 */
5109 static void
5110 HTML_labels (FILE *inf)
5111 {
5112 bool getnext = false; /* next text outside of HTML tags is a tag */
5113 bool skiptag = false; /* skip to the end of the current HTML tag */
5114 bool intag = false; /* inside an html tag, looking for ID= */
5115 bool inanchor = false; /* when INTAG, is an anchor, look for NAME= */
5116 char *end;
5117
5118
5119 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5120
5121 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5122 for (;;) /* loop on the same line */
5123 {
5124 if (skiptag) /* skip HTML tag */
5125 {
5126 while (*dbp != '\0' && *dbp != '>')
5127 dbp++;
5128 if (*dbp == '>')
5129 {
5130 dbp += 1;
5131 skiptag = false;
5132 continue; /* look on the same line */
5133 }
5134 break; /* go to next line */
5135 }
5136
5137 else if (intag) /* look for "name=" or "id=" */
5138 {
5139 while (*dbp != '\0' && *dbp != '>'
5140 && c_tolower (*dbp) != 'n' && c_tolower (*dbp) != 'i')
5141 dbp++;
5142 if (*dbp == '\0')
5143 break; /* go to next line */
5144 if (*dbp == '>')
5145 {
5146 dbp += 1;
5147 intag = false;
5148 continue; /* look on the same line */
5149 }
5150 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5151 || LOOKING_AT_NOCASE (dbp, "id="))
5152 {
5153 bool quoted = (dbp[0] == '"');
5154
5155 if (quoted)
5156 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5157 continue;
5158 else
5159 for (end = dbp; *end != '\0' && intoken (*end); end++)
5160 continue;
5161 linebuffer_setlen (&token_name, end - dbp);
5162 memcpy (token_name.buffer, dbp, end - dbp);
5163 token_name.buffer[end - dbp] = '\0';
5164
5165 dbp = end;
5166 intag = false; /* we found what we looked for */
5167 skiptag = true; /* skip to the end of the tag */
5168 getnext = true; /* then grab the text */
5169 continue; /* look on the same line */
5170 }
5171 dbp += 1;
5172 }
5173
5174 else if (getnext) /* grab next tokens and tag them */
5175 {
5176 dbp = skip_spaces (dbp);
5177 if (*dbp == '\0')
5178 break; /* go to next line */
5179 if (*dbp == '<')
5180 {
5181 intag = true;
5182 inanchor = (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]));
5183 continue; /* look on the same line */
5184 }
5185
5186 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5187 continue;
5188 make_tag (token_name.buffer, token_name.len, true,
5189 dbp, end - dbp, lineno, linecharno);
5190 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5191 getnext = false;
5192 break; /* go to next line */
5193 }
5194
5195 else /* look for an interesting HTML tag */
5196 {
5197 while (*dbp != '\0' && *dbp != '<')
5198 dbp++;
5199 if (*dbp == '\0')
5200 break; /* go to next line */
5201 intag = true;
5202 if (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]))
5203 {
5204 inanchor = true;
5205 continue; /* look on the same line */
5206 }
5207 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5208 || LOOKING_AT_NOCASE (dbp, "<h1>")
5209 || LOOKING_AT_NOCASE (dbp, "<h2>")
5210 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5211 {
5212 intag = false;
5213 getnext = true;
5214 continue; /* look on the same line */
5215 }
5216 dbp += 1;
5217 }
5218 }
5219 }
5220
5221 \f
5222 /*
5223 * Prolog support
5224 *
5225 * Assumes that the predicate or rule starts at column 0.
5226 * Only the first clause of a predicate or rule is added.
5227 * Original code by Sunichirou Sugou (1989)
5228 * Rewritten by Anders Lindgren (1996)
5229 */
5230 static size_t prolog_pr (char *, char *);
5231 static void prolog_skip_comment (linebuffer *, FILE *);
5232 static size_t prolog_atom (char *, size_t);
5233
5234 static void
5235 Prolog_functions (FILE *inf)
5236 {
5237 char *cp, *last;
5238 size_t len;
5239 size_t allocated;
5240
5241 allocated = 0;
5242 len = 0;
5243 last = NULL;
5244
5245 LOOP_ON_INPUT_LINES (inf, lb, cp)
5246 {
5247 if (cp[0] == '\0') /* Empty line */
5248 continue;
5249 else if (c_isspace (cp[0])) /* Not a predicate */
5250 continue;
5251 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5252 prolog_skip_comment (&lb, inf);
5253 else if ((len = prolog_pr (cp, last)) > 0)
5254 {
5255 /* Predicate or rule. Store the function name so that we
5256 only generate a tag for the first clause. */
5257 if (last == NULL)
5258 last = xnew (len + 1, char);
5259 else if (len + 1 > allocated)
5260 xrnew (last, len + 1, char);
5261 allocated = len + 1;
5262 memcpy (last, cp, len);
5263 last[len] = '\0';
5264 }
5265 }
5266 free (last);
5267 }
5268
5269
5270 static void
5271 prolog_skip_comment (linebuffer *plb, FILE *inf)
5272 {
5273 char *cp;
5274
5275 do
5276 {
5277 for (cp = plb->buffer; *cp != '\0'; cp++)
5278 if (cp[0] == '*' && cp[1] == '/')
5279 return;
5280 readline (plb, inf);
5281 }
5282 while (!feof (inf));
5283 }
5284
5285 /*
5286 * A predicate or rule definition is added if it matches:
5287 * <beginning of line><Prolog Atom><whitespace>(
5288 * or <beginning of line><Prolog Atom><whitespace>:-
5289 *
5290 * It is added to the tags database if it doesn't match the
5291 * name of the previous clause header.
5292 *
5293 * Return the size of the name of the predicate or rule, or 0 if no
5294 * header was found.
5295 */
5296 static size_t
5297 prolog_pr (char *s, char *last)
5298
5299 /* Name of last clause. */
5300 {
5301 size_t pos;
5302 size_t len;
5303
5304 pos = prolog_atom (s, 0);
5305 if (! pos)
5306 return 0;
5307
5308 len = pos;
5309 pos = skip_spaces (s + pos) - s;
5310
5311 if ((s[pos] == '.'
5312 || (s[pos] == '(' && (pos += 1))
5313 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5314 && (last == NULL /* save only the first clause */
5315 || len != strlen (last)
5316 || !strneq (s, last, len)))
5317 {
5318 make_tag (s, len, true, s, pos, lineno, linecharno);
5319 return len;
5320 }
5321 else
5322 return 0;
5323 }
5324
5325 /*
5326 * Consume a Prolog atom.
5327 * Return the number of bytes consumed, or 0 if there was an error.
5328 *
5329 * A prolog atom, in this context, could be one of:
5330 * - An alphanumeric sequence, starting with a lower case letter.
5331 * - A quoted arbitrary string. Single quotes can escape themselves.
5332 * Backslash quotes everything.
5333 */
5334 static size_t
5335 prolog_atom (char *s, size_t pos)
5336 {
5337 size_t origpos;
5338
5339 origpos = pos;
5340
5341 if (c_islower (s[pos]) || s[pos] == '_')
5342 {
5343 /* The atom is unquoted. */
5344 pos++;
5345 while (c_isalnum (s[pos]) || s[pos] == '_')
5346 {
5347 pos++;
5348 }
5349 return pos - origpos;
5350 }
5351 else if (s[pos] == '\'')
5352 {
5353 pos++;
5354
5355 for (;;)
5356 {
5357 if (s[pos] == '\'')
5358 {
5359 pos++;
5360 if (s[pos] != '\'')
5361 break;
5362 pos++; /* A double quote */
5363 }
5364 else if (s[pos] == '\0')
5365 /* Multiline quoted atoms are ignored. */
5366 return 0;
5367 else if (s[pos] == '\\')
5368 {
5369 if (s[pos+1] == '\0')
5370 return 0;
5371 pos += 2;
5372 }
5373 else
5374 pos++;
5375 }
5376 return pos - origpos;
5377 }
5378 else
5379 return 0;
5380 }
5381
5382 \f
5383 /*
5384 * Support for Erlang
5385 *
5386 * Generates tags for functions, defines, and records.
5387 * Assumes that Erlang functions start at column 0.
5388 * Original code by Anders Lindgren (1996)
5389 */
5390 static int erlang_func (char *, char *);
5391 static void erlang_attribute (char *);
5392 static int erlang_atom (char *);
5393
5394 static void
5395 Erlang_functions (FILE *inf)
5396 {
5397 char *cp, *last;
5398 int len;
5399 int allocated;
5400
5401 allocated = 0;
5402 len = 0;
5403 last = NULL;
5404
5405 LOOP_ON_INPUT_LINES (inf, lb, cp)
5406 {
5407 if (cp[0] == '\0') /* Empty line */
5408 continue;
5409 else if (c_isspace (cp[0])) /* Not function nor attribute */
5410 continue;
5411 else if (cp[0] == '%') /* comment */
5412 continue;
5413 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5414 continue;
5415 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5416 {
5417 erlang_attribute (cp);
5418 if (last != NULL)
5419 {
5420 free (last);
5421 last = NULL;
5422 }
5423 }
5424 else if ((len = erlang_func (cp, last)) > 0)
5425 {
5426 /*
5427 * Function. Store the function name so that we only
5428 * generates a tag for the first clause.
5429 */
5430 if (last == NULL)
5431 last = xnew (len + 1, char);
5432 else if (len + 1 > allocated)
5433 xrnew (last, len + 1, char);
5434 allocated = len + 1;
5435 memcpy (last, cp, len);
5436 last[len] = '\0';
5437 }
5438 }
5439 free (last);
5440 }
5441
5442
5443 /*
5444 * A function definition is added if it matches:
5445 * <beginning of line><Erlang Atom><whitespace>(
5446 *
5447 * It is added to the tags database if it doesn't match the
5448 * name of the previous clause header.
5449 *
5450 * Return the size of the name of the function, or 0 if no function
5451 * was found.
5452 */
5453 static int
5454 erlang_func (char *s, char *last)
5455
5456 /* Name of last clause. */
5457 {
5458 int pos;
5459 int len;
5460
5461 pos = erlang_atom (s);
5462 if (pos < 1)
5463 return 0;
5464
5465 len = pos;
5466 pos = skip_spaces (s + pos) - s;
5467
5468 /* Save only the first clause. */
5469 if (s[pos++] == '('
5470 && (last == NULL
5471 || len != (int)strlen (last)
5472 || !strneq (s, last, len)))
5473 {
5474 make_tag (s, len, true, s, pos, lineno, linecharno);
5475 return len;
5476 }
5477
5478 return 0;
5479 }
5480
5481
5482 /*
5483 * Handle attributes. Currently, tags are generated for defines
5484 * and records.
5485 *
5486 * They are on the form:
5487 * -define(foo, bar).
5488 * -define(Foo(M, N), M+N).
5489 * -record(graph, {vtab = notable, cyclic = true}).
5490 */
5491 static void
5492 erlang_attribute (char *s)
5493 {
5494 char *cp = s;
5495
5496 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5497 && *cp++ == '(')
5498 {
5499 int len = erlang_atom (skip_spaces (cp));
5500 if (len > 0)
5501 make_tag (cp, len, true, s, cp + len - s, lineno, linecharno);
5502 }
5503 return;
5504 }
5505
5506
5507 /*
5508 * Consume an Erlang atom (or variable).
5509 * Return the number of bytes consumed, or -1 if there was an error.
5510 */
5511 static int
5512 erlang_atom (char *s)
5513 {
5514 int pos = 0;
5515
5516 if (c_isalpha (s[pos]) || s[pos] == '_')
5517 {
5518 /* The atom is unquoted. */
5519 do
5520 pos++;
5521 while (c_isalnum (s[pos]) || s[pos] == '_');
5522 }
5523 else if (s[pos] == '\'')
5524 {
5525 for (pos++; s[pos] != '\''; pos++)
5526 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5527 || (s[pos] == '\\' && s[++pos] == '\0'))
5528 return 0;
5529 pos++;
5530 }
5531
5532 return pos;
5533 }
5534
5535 \f
5536 static char *scan_separators (char *);
5537 static void add_regex (char *, language *);
5538 static char *substitute (char *, char *, struct re_registers *);
5539
5540 /*
5541 * Take a string like "/blah/" and turn it into "blah", verifying
5542 * that the first and last characters are the same, and handling
5543 * quoted separator characters. Actually, stops on the occurrence of
5544 * an unquoted separator. Also process \t, \n, etc. and turn into
5545 * appropriate characters. Works in place. Null terminates name string.
5546 * Returns pointer to terminating separator, or NULL for
5547 * unterminated regexps.
5548 */
5549 static char *
5550 scan_separators (char *name)
5551 {
5552 char sep = name[0];
5553 char *copyto = name;
5554 bool quoted = false;
5555
5556 for (++name; *name != '\0'; ++name)
5557 {
5558 if (quoted)
5559 {
5560 switch (*name)
5561 {
5562 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5563 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5564 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5565 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5566 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5567 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5568 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5569 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5570 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5571 default:
5572 if (*name == sep)
5573 *copyto++ = sep;
5574 else
5575 {
5576 /* Something else is quoted, so preserve the quote. */
5577 *copyto++ = '\\';
5578 *copyto++ = *name;
5579 }
5580 break;
5581 }
5582 quoted = false;
5583 }
5584 else if (*name == '\\')
5585 quoted = true;
5586 else if (*name == sep)
5587 break;
5588 else
5589 *copyto++ = *name;
5590 }
5591 if (*name != sep)
5592 name = NULL; /* signal unterminated regexp */
5593
5594 /* Terminate copied string. */
5595 *copyto = '\0';
5596 return name;
5597 }
5598
5599 /* Look at the argument of --regex or --no-regex and do the right
5600 thing. Same for each line of a regexp file. */
5601 static void
5602 analyze_regex (char *regex_arg)
5603 {
5604 if (regex_arg == NULL)
5605 {
5606 free_regexps (); /* --no-regex: remove existing regexps */
5607 return;
5608 }
5609
5610 /* A real --regexp option or a line in a regexp file. */
5611 switch (regex_arg[0])
5612 {
5613 /* Comments in regexp file or null arg to --regex. */
5614 case '\0':
5615 case ' ':
5616 case '\t':
5617 break;
5618
5619 /* Read a regex file. This is recursive and may result in a
5620 loop, which will stop when the file descriptors are exhausted. */
5621 case '@':
5622 {
5623 FILE *regexfp;
5624 linebuffer regexbuf;
5625 char *regexfile = regex_arg + 1;
5626
5627 /* regexfile is a file containing regexps, one per line. */
5628 regexfp = fopen (regexfile, "r" FOPEN_BINARY);
5629 if (regexfp == NULL)
5630 pfatal (regexfile);
5631 linebuffer_init (&regexbuf);
5632 while (readline_internal (&regexbuf, regexfp) > 0)
5633 analyze_regex (regexbuf.buffer);
5634 free (regexbuf.buffer);
5635 fclose (regexfp);
5636 }
5637 break;
5638
5639 /* Regexp to be used for a specific language only. */
5640 case '{':
5641 {
5642 language *lang;
5643 char *lang_name = regex_arg + 1;
5644 char *cp;
5645
5646 for (cp = lang_name; *cp != '}'; cp++)
5647 if (*cp == '\0')
5648 {
5649 error ("unterminated language name in regex: %s", regex_arg);
5650 return;
5651 }
5652 *cp++ = '\0';
5653 lang = get_language_from_langname (lang_name);
5654 if (lang == NULL)
5655 return;
5656 add_regex (cp, lang);
5657 }
5658 break;
5659
5660 /* Regexp to be used for any language. */
5661 default:
5662 add_regex (regex_arg, NULL);
5663 break;
5664 }
5665 }
5666
5667 /* Separate the regexp pattern, compile it,
5668 and care for optional name and modifiers. */
5669 static void
5670 add_regex (char *regexp_pattern, language *lang)
5671 {
5672 static struct re_pattern_buffer zeropattern;
5673 char sep, *pat, *name, *modifiers;
5674 char empty = '\0';
5675 const char *err;
5676 struct re_pattern_buffer *patbuf;
5677 regexp *rp;
5678 bool
5679 force_explicit_name = true, /* do not use implicit tag names */
5680 ignore_case = false, /* case is significant */
5681 multi_line = false, /* matches are done one line at a time */
5682 single_line = false; /* dot does not match newline */
5683
5684
5685 if (strlen (regexp_pattern) < 3)
5686 {
5687 error ("null regexp");
5688 return;
5689 }
5690 sep = regexp_pattern[0];
5691 name = scan_separators (regexp_pattern);
5692 if (name == NULL)
5693 {
5694 error ("%s: unterminated regexp", regexp_pattern);
5695 return;
5696 }
5697 if (name[1] == sep)
5698 {
5699 error ("null name for regexp \"%s\"", regexp_pattern);
5700 return;
5701 }
5702 modifiers = scan_separators (name);
5703 if (modifiers == NULL) /* no terminating separator --> no name */
5704 {
5705 modifiers = name;
5706 name = &empty;
5707 }
5708 else
5709 modifiers += 1; /* skip separator */
5710
5711 /* Parse regex modifiers. */
5712 for (; modifiers[0] != '\0'; modifiers++)
5713 switch (modifiers[0])
5714 {
5715 case 'N':
5716 if (modifiers == name)
5717 error ("forcing explicit tag name but no name, ignoring");
5718 force_explicit_name = true;
5719 break;
5720 case 'i':
5721 ignore_case = true;
5722 break;
5723 case 's':
5724 single_line = true;
5725 /* FALLTHRU */
5726 case 'm':
5727 multi_line = true;
5728 need_filebuf = true;
5729 break;
5730 default:
5731 error ("invalid regexp modifier `%c', ignoring", modifiers[0]);
5732 break;
5733 }
5734
5735 patbuf = xnew (1, struct re_pattern_buffer);
5736 *patbuf = zeropattern;
5737 if (ignore_case)
5738 {
5739 static char lc_trans[UCHAR_MAX + 1];
5740 int i;
5741 for (i = 0; i < UCHAR_MAX + 1; i++)
5742 lc_trans[i] = c_tolower (i);
5743 patbuf->translate = lc_trans; /* translation table to fold case */
5744 }
5745
5746 if (multi_line)
5747 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5748 else
5749 pat = regexp_pattern;
5750
5751 if (single_line)
5752 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5753 else
5754 re_set_syntax (RE_SYNTAX_EMACS);
5755
5756 err = re_compile_pattern (pat, strlen (pat), patbuf);
5757 if (multi_line)
5758 free (pat);
5759 if (err != NULL)
5760 {
5761 error ("%s while compiling pattern", err);
5762 return;
5763 }
5764
5765 rp = p_head;
5766 p_head = xnew (1, regexp);
5767 p_head->pattern = savestr (regexp_pattern);
5768 p_head->p_next = rp;
5769 p_head->lang = lang;
5770 p_head->pat = patbuf;
5771 p_head->name = savestr (name);
5772 p_head->error_signaled = false;
5773 p_head->force_explicit_name = force_explicit_name;
5774 p_head->ignore_case = ignore_case;
5775 p_head->multi_line = multi_line;
5776 }
5777
5778 /*
5779 * Do the substitutions indicated by the regular expression and
5780 * arguments.
5781 */
5782 static char *
5783 substitute (char *in, char *out, struct re_registers *regs)
5784 {
5785 char *result, *t;
5786 int size, dig, diglen;
5787
5788 result = NULL;
5789 size = strlen (out);
5790
5791 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5792 if (out[size - 1] == '\\')
5793 fatal ("pattern error in \"%s\"", out);
5794 for (t = strchr (out, '\\');
5795 t != NULL;
5796 t = strchr (t + 2, '\\'))
5797 if (c_isdigit (t[1]))
5798 {
5799 dig = t[1] - '0';
5800 diglen = regs->end[dig] - regs->start[dig];
5801 size += diglen - 2;
5802 }
5803 else
5804 size -= 1;
5805
5806 /* Allocate space and do the substitutions. */
5807 assert (size >= 0);
5808 result = xnew (size + 1, char);
5809
5810 for (t = result; *out != '\0'; out++)
5811 if (*out == '\\' && c_isdigit (*++out))
5812 {
5813 dig = *out - '0';
5814 diglen = regs->end[dig] - regs->start[dig];
5815 memcpy (t, in + regs->start[dig], diglen);
5816 t += diglen;
5817 }
5818 else
5819 *t++ = *out;
5820 *t = '\0';
5821
5822 assert (t <= result + size);
5823 assert (t - result == (int)strlen (result));
5824
5825 return result;
5826 }
5827
5828 /* Deallocate all regexps. */
5829 static void
5830 free_regexps (void)
5831 {
5832 regexp *rp;
5833 while (p_head != NULL)
5834 {
5835 rp = p_head->p_next;
5836 free (p_head->pattern);
5837 free (p_head->name);
5838 free (p_head);
5839 p_head = rp;
5840 }
5841 return;
5842 }
5843
5844 /*
5845 * Reads the whole file as a single string from `filebuf' and looks for
5846 * multi-line regular expressions, creating tags on matches.
5847 * readline already dealt with normal regexps.
5848 *
5849 * Idea by Ben Wing <ben@666.com> (2002).
5850 */
5851 static void
5852 regex_tag_multiline (void)
5853 {
5854 char *buffer = filebuf.buffer;
5855 regexp *rp;
5856 char *name;
5857
5858 for (rp = p_head; rp != NULL; rp = rp->p_next)
5859 {
5860 int match = 0;
5861
5862 if (!rp->multi_line)
5863 continue; /* skip normal regexps */
5864
5865 /* Generic initializations before parsing file from memory. */
5866 lineno = 1; /* reset global line number */
5867 charno = 0; /* reset global char number */
5868 linecharno = 0; /* reset global char number of line start */
5869
5870 /* Only use generic regexps or those for the current language. */
5871 if (rp->lang != NULL && rp->lang != curfdp->lang)
5872 continue;
5873
5874 while (match >= 0 && match < filebuf.len)
5875 {
5876 match = re_search (rp->pat, buffer, filebuf.len, charno,
5877 filebuf.len - match, &rp->regs);
5878 switch (match)
5879 {
5880 case -2:
5881 /* Some error. */
5882 if (!rp->error_signaled)
5883 {
5884 error ("regexp stack overflow while matching \"%s\"",
5885 rp->pattern);
5886 rp->error_signaled = true;
5887 }
5888 break;
5889 case -1:
5890 /* No match. */
5891 break;
5892 default:
5893 if (match == rp->regs.end[0])
5894 {
5895 if (!rp->error_signaled)
5896 {
5897 error ("regexp matches the empty string: \"%s\"",
5898 rp->pattern);
5899 rp->error_signaled = true;
5900 }
5901 match = -3; /* exit from while loop */
5902 break;
5903 }
5904
5905 /* Match occurred. Construct a tag. */
5906 while (charno < rp->regs.end[0])
5907 if (buffer[charno++] == '\n')
5908 lineno++, linecharno = charno;
5909 name = rp->name;
5910 if (name[0] == '\0')
5911 name = NULL;
5912 else /* make a named tag */
5913 name = substitute (buffer, rp->name, &rp->regs);
5914 if (rp->force_explicit_name)
5915 /* Force explicit tag name, if a name is there. */
5916 pfnote (name, true, buffer + linecharno,
5917 charno - linecharno + 1, lineno, linecharno);
5918 else
5919 make_tag (name, strlen (name), true, buffer + linecharno,
5920 charno - linecharno + 1, lineno, linecharno);
5921 break;
5922 }
5923 }
5924 }
5925 }
5926
5927 \f
5928 static bool
5929 nocase_tail (const char *cp)
5930 {
5931 int len = 0;
5932
5933 while (*cp != '\0' && c_tolower (*cp) == c_tolower (dbp[len]))
5934 cp++, len++;
5935 if (*cp == '\0' && !intoken (dbp[len]))
5936 {
5937 dbp += len;
5938 return true;
5939 }
5940 return false;
5941 }
5942
5943 static void
5944 get_tag (register char *bp, char **namepp)
5945 {
5946 register char *cp = bp;
5947
5948 if (*bp != '\0')
5949 {
5950 /* Go till you get to white space or a syntactic break */
5951 for (cp = bp + 1; !notinname (*cp); cp++)
5952 continue;
5953 make_tag (bp, cp - bp, true,
5954 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5955 }
5956
5957 if (namepp != NULL)
5958 *namepp = savenstr (bp, cp - bp);
5959 }
5960
5961 /*
5962 * Read a line of text from `stream' into `lbp', excluding the
5963 * newline or CR-NL, if any. Return the number of characters read from
5964 * `stream', which is the length of the line including the newline.
5965 *
5966 * On DOS or Windows we do not count the CR character, if any before the
5967 * NL, in the returned length; this mirrors the behavior of Emacs on those
5968 * platforms (for text files, it translates CR-NL to NL as it reads in the
5969 * file).
5970 *
5971 * If multi-line regular expressions are requested, each line read is
5972 * appended to `filebuf'.
5973 */
5974 static long
5975 readline_internal (linebuffer *lbp, register FILE *stream)
5976 {
5977 char *buffer = lbp->buffer;
5978 register char *p = lbp->buffer;
5979 register char *pend;
5980 int chars_deleted;
5981
5982 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
5983
5984 for (;;)
5985 {
5986 register int c = getc (stream);
5987 if (p == pend)
5988 {
5989 /* We're at the end of linebuffer: expand it. */
5990 lbp->size *= 2;
5991 xrnew (buffer, lbp->size, char);
5992 p += buffer - lbp->buffer;
5993 pend = buffer + lbp->size;
5994 lbp->buffer = buffer;
5995 }
5996 if (c == EOF)
5997 {
5998 *p = '\0';
5999 chars_deleted = 0;
6000 break;
6001 }
6002 if (c == '\n')
6003 {
6004 if (p > buffer && p[-1] == '\r')
6005 {
6006 p -= 1;
6007 #ifdef DOS_NT
6008 /* Assume CRLF->LF translation will be performed by Emacs
6009 when loading this file, so CRs won't appear in the buffer.
6010 It would be cleaner to compensate within Emacs;
6011 however, Emacs does not know how many CRs were deleted
6012 before any given point in the file. */
6013 chars_deleted = 1;
6014 #else
6015 chars_deleted = 2;
6016 #endif
6017 }
6018 else
6019 {
6020 chars_deleted = 1;
6021 }
6022 *p = '\0';
6023 break;
6024 }
6025 *p++ = c;
6026 }
6027 lbp->len = p - buffer;
6028
6029 if (need_filebuf /* we need filebuf for multi-line regexps */
6030 && chars_deleted > 0) /* not at EOF */
6031 {
6032 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6033 {
6034 /* Expand filebuf. */
6035 filebuf.size *= 2;
6036 xrnew (filebuf.buffer, filebuf.size, char);
6037 }
6038 memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6039 filebuf.len += lbp->len;
6040 filebuf.buffer[filebuf.len++] = '\n';
6041 filebuf.buffer[filebuf.len] = '\0';
6042 }
6043
6044 return lbp->len + chars_deleted;
6045 }
6046
6047 /*
6048 * Like readline_internal, above, but in addition try to match the
6049 * input line against relevant regular expressions and manage #line
6050 * directives.
6051 */
6052 static void
6053 readline (linebuffer *lbp, FILE *stream)
6054 {
6055 long result;
6056
6057 linecharno = charno; /* update global char number of line start */
6058 result = readline_internal (lbp, stream); /* read line */
6059 lineno += 1; /* increment global line number */
6060 charno += result; /* increment global char number */
6061
6062 /* Honor #line directives. */
6063 if (!no_line_directive)
6064 {
6065 static bool discard_until_line_directive;
6066
6067 /* Check whether this is a #line directive. */
6068 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6069 {
6070 unsigned int lno;
6071 int start = 0;
6072
6073 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6074 && start > 0) /* double quote character found */
6075 {
6076 char *endp = lbp->buffer + start;
6077
6078 while ((endp = strchr (endp, '"')) != NULL
6079 && endp[-1] == '\\')
6080 endp++;
6081 if (endp != NULL)
6082 /* Ok, this is a real #line directive. Let's deal with it. */
6083 {
6084 char *taggedabsname; /* absolute name of original file */
6085 char *taggedfname; /* name of original file as given */
6086 char *name; /* temp var */
6087
6088 discard_until_line_directive = false; /* found it */
6089 name = lbp->buffer + start;
6090 *endp = '\0';
6091 canonicalize_filename (name);
6092 taggedabsname = absolute_filename (name, tagfiledir);
6093 if (filename_is_absolute (name)
6094 || filename_is_absolute (curfdp->infname))
6095 taggedfname = savestr (taggedabsname);
6096 else
6097 taggedfname = relative_filename (taggedabsname,tagfiledir);
6098
6099 if (streq (curfdp->taggedfname, taggedfname))
6100 /* The #line directive is only a line number change. We
6101 deal with this afterwards. */
6102 free (taggedfname);
6103 else
6104 /* The tags following this #line directive should be
6105 attributed to taggedfname. In order to do this, set
6106 curfdp accordingly. */
6107 {
6108 fdesc *fdp; /* file description pointer */
6109
6110 /* Go look for a file description already set up for the
6111 file indicated in the #line directive. If there is
6112 one, use it from now until the next #line
6113 directive. */
6114 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6115 if (streq (fdp->infname, curfdp->infname)
6116 && streq (fdp->taggedfname, taggedfname))
6117 /* If we remove the second test above (after the &&)
6118 then all entries pertaining to the same file are
6119 coalesced in the tags file. If we use it, then
6120 entries pertaining to the same file but generated
6121 from different files (via #line directives) will
6122 go into separate sections in the tags file. These
6123 alternatives look equivalent. The first one
6124 destroys some apparently useless information. */
6125 {
6126 curfdp = fdp;
6127 free (taggedfname);
6128 break;
6129 }
6130 /* Else, if we already tagged the real file, skip all
6131 input lines until the next #line directive. */
6132 if (fdp == NULL) /* not found */
6133 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6134 if (streq (fdp->infabsname, taggedabsname))
6135 {
6136 discard_until_line_directive = true;
6137 free (taggedfname);
6138 break;
6139 }
6140 /* Else create a new file description and use that from
6141 now on, until the next #line directive. */
6142 if (fdp == NULL) /* not found */
6143 {
6144 fdp = fdhead;
6145 fdhead = xnew (1, fdesc);
6146 *fdhead = *curfdp; /* copy curr. file description */
6147 fdhead->next = fdp;
6148 fdhead->infname = savestr (curfdp->infname);
6149 fdhead->infabsname = savestr (curfdp->infabsname);
6150 fdhead->infabsdir = savestr (curfdp->infabsdir);
6151 fdhead->taggedfname = taggedfname;
6152 fdhead->usecharno = false;
6153 fdhead->prop = NULL;
6154 fdhead->written = false;
6155 curfdp = fdhead;
6156 }
6157 }
6158 free (taggedabsname);
6159 lineno = lno - 1;
6160 readline (lbp, stream);
6161 return;
6162 } /* if a real #line directive */
6163 } /* if #line is followed by a number */
6164 } /* if line begins with "#line " */
6165
6166 /* If we are here, no #line directive was found. */
6167 if (discard_until_line_directive)
6168 {
6169 if (result > 0)
6170 {
6171 /* Do a tail recursion on ourselves, thus discarding the contents
6172 of the line buffer. */
6173 readline (lbp, stream);
6174 return;
6175 }
6176 /* End of file. */
6177 discard_until_line_directive = false;
6178 return;
6179 }
6180 } /* if #line directives should be considered */
6181
6182 {
6183 int match;
6184 regexp *rp;
6185 char *name;
6186
6187 /* Match against relevant regexps. */
6188 if (lbp->len > 0)
6189 for (rp = p_head; rp != NULL; rp = rp->p_next)
6190 {
6191 /* Only use generic regexps or those for the current language.
6192 Also do not use multiline regexps, which is the job of
6193 regex_tag_multiline. */
6194 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6195 || rp->multi_line)
6196 continue;
6197
6198 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6199 switch (match)
6200 {
6201 case -2:
6202 /* Some error. */
6203 if (!rp->error_signaled)
6204 {
6205 error ("regexp stack overflow while matching \"%s\"",
6206 rp->pattern);
6207 rp->error_signaled = true;
6208 }
6209 break;
6210 case -1:
6211 /* No match. */
6212 break;
6213 case 0:
6214 /* Empty string matched. */
6215 if (!rp->error_signaled)
6216 {
6217 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6218 rp->error_signaled = true;
6219 }
6220 break;
6221 default:
6222 /* Match occurred. Construct a tag. */
6223 name = rp->name;
6224 if (name[0] == '\0')
6225 name = NULL;
6226 else /* make a named tag */
6227 name = substitute (lbp->buffer, rp->name, &rp->regs);
6228 if (rp->force_explicit_name)
6229 /* Force explicit tag name, if a name is there. */
6230 pfnote (name, true, lbp->buffer, match, lineno, linecharno);
6231 else
6232 make_tag (name, strlen (name), true,
6233 lbp->buffer, match, lineno, linecharno);
6234 break;
6235 }
6236 }
6237 }
6238 }
6239
6240 \f
6241 /*
6242 * Return a pointer to a space of size strlen(cp)+1 allocated
6243 * with xnew where the string CP has been copied.
6244 */
6245 static char *
6246 savestr (const char *cp)
6247 {
6248 return savenstr (cp, strlen (cp));
6249 }
6250
6251 /*
6252 * Return a pointer to a space of size LEN+1 allocated with xnew where
6253 * the string CP has been copied for at most the first LEN characters.
6254 */
6255 static char *
6256 savenstr (const char *cp, int len)
6257 {
6258 char *dp = xnew (len + 1, char);
6259 dp[len] = '\0';
6260 return memcpy (dp, cp, len);
6261 }
6262
6263 /* Skip spaces (end of string is not space), return new pointer. */
6264 static char *
6265 skip_spaces (char *cp)
6266 {
6267 while (c_isspace (*cp))
6268 cp++;
6269 return cp;
6270 }
6271
6272 /* Skip non spaces, except end of string, return new pointer. */
6273 static char *
6274 skip_non_spaces (char *cp)
6275 {
6276 while (*cp != '\0' && !c_isspace (*cp))
6277 cp++;
6278 return cp;
6279 }
6280
6281 /* Skip any chars in the "name" class.*/
6282 static char *
6283 skip_name (char *cp)
6284 {
6285 /* '\0' is a notinname() so loop stops there too */
6286 while (! notinname (*cp))
6287 cp++;
6288 return cp;
6289 }
6290
6291 /* Print error message and exit. */
6292 void
6293 fatal (const char *s1, const char *s2)
6294 {
6295 error (s1, s2);
6296 exit (EXIT_FAILURE);
6297 }
6298
6299 static void
6300 pfatal (const char *s1)
6301 {
6302 perror (s1);
6303 exit (EXIT_FAILURE);
6304 }
6305
6306 static void
6307 suggest_asking_for_help (void)
6308 {
6309 fprintf (stderr, "\tTry `%s --help' for a complete list of options.\n",
6310 progname);
6311 exit (EXIT_FAILURE);
6312 }
6313
6314 /* Output a diagnostic with printf-style FORMAT and args. */
6315 static void
6316 error (const char *format, ...)
6317 {
6318 va_list ap;
6319 va_start (ap, format);
6320 fprintf (stderr, "%s: ", progname);
6321 vfprintf (stderr, format, ap);
6322 fprintf (stderr, "\n");
6323 va_end (ap);
6324 }
6325
6326 /* Return a newly-allocated string whose contents
6327 concatenate those of s1, s2, s3. */
6328 static char *
6329 concat (const char *s1, const char *s2, const char *s3)
6330 {
6331 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6332 char *result = xnew (len1 + len2 + len3 + 1, char);
6333
6334 strcpy (result, s1);
6335 strcpy (result + len1, s2);
6336 strcpy (result + len1 + len2, s3);
6337
6338 return result;
6339 }
6340
6341 \f
6342 /* Does the same work as the system V getcwd, but does not need to
6343 guess the buffer size in advance. */
6344 static char *
6345 etags_getcwd (void)
6346 {
6347 int bufsize = 200;
6348 char *path = xnew (bufsize, char);
6349
6350 while (getcwd (path, bufsize) == NULL)
6351 {
6352 if (errno != ERANGE)
6353 pfatal ("getcwd");
6354 bufsize *= 2;
6355 free (path);
6356 path = xnew (bufsize, char);
6357 }
6358
6359 canonicalize_filename (path);
6360 return path;
6361 }
6362
6363 /* Return a newly allocated string containing a name of a temporary file. */
6364 static char *
6365 etags_mktmp (void)
6366 {
6367 const char *tmpdir = getenv ("TMPDIR");
6368 const char *slash = "/";
6369
6370 #if MSDOS || defined (DOS_NT)
6371 if (!tmpdir)
6372 tmpdir = getenv ("TEMP");
6373 if (!tmpdir)
6374 tmpdir = getenv ("TMP");
6375 if (!tmpdir)
6376 tmpdir = ".";
6377 if (tmpdir[strlen (tmpdir) - 1] == '/'
6378 || tmpdir[strlen (tmpdir) - 1] == '\\')
6379 slash = "";
6380 #else
6381 if (!tmpdir)
6382 tmpdir = "/tmp";
6383 if (tmpdir[strlen (tmpdir) - 1] == '/')
6384 slash = "";
6385 #endif
6386
6387 char *templt = concat (tmpdir, slash, "etXXXXXX");
6388 int fd = mkostemp (templt, O_CLOEXEC);
6389 if (fd < 0)
6390 {
6391 free (templt);
6392 templt = NULL;
6393 }
6394 else
6395 close (fd);
6396
6397 #if defined (DOS_NT)
6398 /* The file name will be used in shell redirection, so it needs to have
6399 DOS-style backslashes, or else the Windows shell will barf. */
6400 char *p;
6401 for (p = templt; *p; p++)
6402 if (*p == '/')
6403 *p = '\\';
6404 #endif
6405 return templt;
6406 }
6407
6408 /* Return a newly allocated string containing the file name of FILE
6409 relative to the absolute directory DIR (which should end with a slash). */
6410 static char *
6411 relative_filename (char *file, char *dir)
6412 {
6413 char *fp, *dp, *afn, *res;
6414 int i;
6415
6416 /* Find the common root of file and dir (with a trailing slash). */
6417 afn = absolute_filename (file, cwd);
6418 fp = afn;
6419 dp = dir;
6420 while (*fp++ == *dp++)
6421 continue;
6422 fp--, dp--; /* back to the first differing char */
6423 #ifdef DOS_NT
6424 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6425 return afn;
6426 #endif
6427 do /* look at the equal chars until '/' */
6428 fp--, dp--;
6429 while (*fp != '/');
6430
6431 /* Build a sequence of "../" strings for the resulting relative file name. */
6432 i = 0;
6433 while ((dp = strchr (dp + 1, '/')) != NULL)
6434 i += 1;
6435 res = xnew (3*i + strlen (fp + 1) + 1, char);
6436 char *z = res;
6437 while (i-- > 0)
6438 z = stpcpy (z, "../");
6439
6440 /* Add the file name relative to the common root of file and dir. */
6441 strcpy (z, fp + 1);
6442 free (afn);
6443
6444 return res;
6445 }
6446
6447 /* Return a newly allocated string containing the absolute file name
6448 of FILE given DIR (which should end with a slash). */
6449 static char *
6450 absolute_filename (char *file, char *dir)
6451 {
6452 char *slashp, *cp, *res;
6453
6454 if (filename_is_absolute (file))
6455 res = savestr (file);
6456 #ifdef DOS_NT
6457 /* We don't support non-absolute file names with a drive
6458 letter, like `d:NAME' (it's too much hassle). */
6459 else if (file[1] == ':')
6460 fatal ("%s: relative file names with drive letters not supported", file);
6461 #endif
6462 else
6463 res = concat (dir, file, "");
6464
6465 /* Delete the "/dirname/.." and "/." substrings. */
6466 slashp = strchr (res, '/');
6467 while (slashp != NULL && slashp[0] != '\0')
6468 {
6469 if (slashp[1] == '.')
6470 {
6471 if (slashp[2] == '.'
6472 && (slashp[3] == '/' || slashp[3] == '\0'))
6473 {
6474 cp = slashp;
6475 do
6476 cp--;
6477 while (cp >= res && !filename_is_absolute (cp));
6478 if (cp < res)
6479 cp = slashp; /* the absolute name begins with "/.." */
6480 #ifdef DOS_NT
6481 /* Under MSDOS and NT we get `d:/NAME' as absolute
6482 file name, so the luser could say `d:/../NAME'.
6483 We silently treat this as `d:/NAME'. */
6484 else if (cp[0] != '/')
6485 cp = slashp;
6486 #endif
6487 memmove (cp, slashp + 3, strlen (slashp + 2));
6488 slashp = cp;
6489 continue;
6490 }
6491 else if (slashp[2] == '/' || slashp[2] == '\0')
6492 {
6493 memmove (slashp, slashp + 2, strlen (slashp + 1));
6494 continue;
6495 }
6496 }
6497
6498 slashp = strchr (slashp + 1, '/');
6499 }
6500
6501 if (res[0] == '\0') /* just a safety net: should never happen */
6502 {
6503 free (res);
6504 return savestr ("/");
6505 }
6506 else
6507 return res;
6508 }
6509
6510 /* Return a newly allocated string containing the absolute
6511 file name of dir where FILE resides given DIR (which should
6512 end with a slash). */
6513 static char *
6514 absolute_dirname (char *file, char *dir)
6515 {
6516 char *slashp, *res;
6517 char save;
6518
6519 slashp = strrchr (file, '/');
6520 if (slashp == NULL)
6521 return savestr (dir);
6522 save = slashp[1];
6523 slashp[1] = '\0';
6524 res = absolute_filename (file, dir);
6525 slashp[1] = save;
6526
6527 return res;
6528 }
6529
6530 /* Whether the argument string is an absolute file name. The argument
6531 string must have been canonicalized with canonicalize_filename. */
6532 static bool
6533 filename_is_absolute (char *fn)
6534 {
6535 return (fn[0] == '/'
6536 #ifdef DOS_NT
6537 || (c_isalpha (fn[0]) && fn[1] == ':' && fn[2] == '/')
6538 #endif
6539 );
6540 }
6541
6542 /* Downcase DOS drive letter and collapse separators into single slashes.
6543 Works in place. */
6544 static void
6545 canonicalize_filename (register char *fn)
6546 {
6547 register char* cp;
6548
6549 #ifdef DOS_NT
6550 /* Canonicalize drive letter case. */
6551 if (c_isupper (fn[0]) && fn[1] == ':')
6552 fn[0] = c_tolower (fn[0]);
6553
6554 /* Collapse multiple forward- and back-slashes into a single forward
6555 slash. */
6556 for (cp = fn; *cp != '\0'; cp++, fn++)
6557 if (*cp == '/' || *cp == '\\')
6558 {
6559 *fn = '/';
6560 while (cp[1] == '/' || cp[1] == '\\')
6561 cp++;
6562 }
6563 else
6564 *fn = *cp;
6565
6566 #else /* !DOS_NT */
6567
6568 /* Collapse multiple slashes into a single slash. */
6569 for (cp = fn; *cp != '\0'; cp++, fn++)
6570 if (*cp == '/')
6571 {
6572 *fn = '/';
6573 while (cp[1] == '/')
6574 cp++;
6575 }
6576 else
6577 *fn = *cp;
6578
6579 #endif /* !DOS_NT */
6580
6581 *fn = '\0';
6582 }
6583
6584 \f
6585 /* Initialize a linebuffer for use. */
6586 static void
6587 linebuffer_init (linebuffer *lbp)
6588 {
6589 lbp->size = (DEBUG) ? 3 : 200;
6590 lbp->buffer = xnew (lbp->size, char);
6591 lbp->buffer[0] = '\0';
6592 lbp->len = 0;
6593 }
6594
6595 /* Set the minimum size of a string contained in a linebuffer. */
6596 static void
6597 linebuffer_setlen (linebuffer *lbp, int toksize)
6598 {
6599 while (lbp->size <= toksize)
6600 {
6601 lbp->size *= 2;
6602 xrnew (lbp->buffer, lbp->size, char);
6603 }
6604 lbp->len = toksize;
6605 }
6606
6607 /* Like malloc but get fatal error if memory is exhausted. */
6608 static void *
6609 xmalloc (size_t size)
6610 {
6611 void *result = malloc (size);
6612 if (result == NULL)
6613 fatal ("virtual memory exhausted", (char *)NULL);
6614 return result;
6615 }
6616
6617 static void *
6618 xrealloc (void *ptr, size_t size)
6619 {
6620 void *result = realloc (ptr, size);
6621 if (result == NULL)
6622 fatal ("virtual memory exhausted", (char *)NULL);
6623 return result;
6624 }
6625
6626 /*
6627 * Local Variables:
6628 * indent-tabs-mode: t
6629 * tab-width: 8
6630 * fill-column: 79
6631 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6632 * c-file-style: "gnu"
6633 * End:
6634 */
6635
6636 /* etags.c ends here */