]> code.delx.au - gnu-emacs/blob - lib-src/etags.c
Merge from origin/emacs-24
[gnu-emacs] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: utf-8 -*-
2
3 Copyright (C) 1984 The Regents of the University of California
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
17
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2014 Free Software
32 Foundation, Inc.
33
34 This file is not considered part of GNU Emacs.
35
36 This program is free software: you can redistribute it and/or modify
37 it under the terms of the GNU General Public License as published by
38 the Free Software Foundation, either version 3 of the License, or
39 (at your option) any later version.
40
41 This program is distributed in the hope that it will be useful,
42 but WITHOUT ANY WARRANTY; without even the implied warranty of
43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 GNU General Public License for more details.
45
46 You should have received a copy of the GNU General Public License
47 along with this program. If not, see <http://www.gnu.org/licenses/>. */
48
49
50 /* NB To comply with the above BSD license, copyright information is
51 reproduced in etc/ETAGS.README. That file should be updated when the
52 above notices are.
53
54 To the best of our knowledge, this code was originally based on the
55 ctags.c distributed with BSD4.2, which was copyrighted by the
56 University of California, as described above. */
57
58
59 /*
60 * Authors:
61 * 1983 Ctags originally by Ken Arnold.
62 * 1984 Fortran added by Jim Kleckner.
63 * 1984 Ed Pelegri-Llopart added C typedefs.
64 * 1985 Emacs TAGS format by Richard Stallman.
65 * 1989 Sam Kendall added C++.
66 * 1992 Joseph B. Wells improved C and C++ parsing.
67 * 1993 Francesco Potortì reorganized C and C++.
68 * 1994 Line-by-line regexp tags by Tom Tromey.
69 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
70 * 2002 #line directives by Francesco Potortì.
71 *
72 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
73 */
74
75 /*
76 * If you want to add support for a new language, start by looking at the LUA
77 * language, which is the simplest. Alternatively, consider distributing etags
78 * together with a configuration file containing regexp definitions for etags.
79 */
80
81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
82
83 #ifdef DEBUG
84 # undef DEBUG
85 # define DEBUG true
86 #else
87 # define DEBUG false
88 # define NDEBUG /* disable assert */
89 #endif
90
91 #include <config.h>
92
93 #ifndef _GNU_SOURCE
94 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
95 #endif
96
97 /* WIN32_NATIVE is for XEmacs.
98 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
99 #ifdef WIN32_NATIVE
100 # undef MSDOS
101 # undef WINDOWSNT
102 # define WINDOWSNT
103 #endif /* WIN32_NATIVE */
104
105 #ifdef MSDOS
106 # undef MSDOS
107 # define MSDOS true
108 # include <sys/param.h>
109 #else
110 # define MSDOS false
111 #endif /* MSDOS */
112
113 #ifdef WINDOWSNT
114 # include <direct.h>
115 # define MAXPATHLEN _MAX_PATH
116 # undef HAVE_NTGUI
117 # undef DOS_NT
118 # define DOS_NT
119 #endif /* WINDOWSNT */
120
121 #include <unistd.h>
122 #include <stdarg.h>
123 #include <stdlib.h>
124 #include <string.h>
125 #include <stdio.h>
126 #include <ctype.h>
127 #include <errno.h>
128 #include <sys/types.h>
129 #include <sys/stat.h>
130 #include <binary-io.h>
131 #include <c-strcase.h>
132
133 #include <assert.h>
134 #ifdef NDEBUG
135 # undef assert /* some systems have a buggy assert.h */
136 # define assert(x) ((void) 0)
137 #endif
138
139 #include <getopt.h>
140 #include <regex.h>
141
142 /* Define CTAGS to make the program "ctags" compatible with the usual one.
143 Leave it undefined to make the program "etags", which makes emacs-style
144 tag tables and tags typedefs, #defines and struct/union/enum by default. */
145 #ifdef CTAGS
146 # undef CTAGS
147 # define CTAGS true
148 #else
149 # define CTAGS false
150 #endif
151
152 #define streq(s,t) (assert ((s)!=NULL || (t)!=NULL), !strcmp (s, t))
153 #define strcaseeq(s,t) (assert ((s)!=NULL && (t)!=NULL), !c_strcasecmp (s, t))
154 #define strneq(s,t,n) (assert ((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
155 #define strncaseeq(s,t,n) (assert ((s)!=NULL && (t)!=NULL), !c_strncasecmp (s, t, n))
156
157 #define CHARS 256 /* 2^sizeof(char) */
158 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
159 #define iswhite(c) (_wht[CHAR (c)]) /* c is white (see white) */
160 #define notinname(c) (_nin[CHAR (c)]) /* c is not in a name (see nonam) */
161 #define begtoken(c) (_btk[CHAR (c)]) /* c can start token (see begtk) */
162 #define intoken(c) (_itk[CHAR (c)]) /* c can be in token (see midtk) */
163 #define endtoken(c) (_etk[CHAR (c)]) /* c ends tokens (see endtk) */
164
165 #define ISALNUM(c) isalnum (CHAR (c))
166 #define ISALPHA(c) isalpha (CHAR (c))
167 #define ISDIGIT(c) isdigit (CHAR (c))
168 #define ISLOWER(c) islower (CHAR (c))
169
170 #define lowcase(c) tolower (CHAR (c))
171
172
173 /*
174 * xnew, xrnew -- allocate, reallocate storage
175 *
176 * SYNOPSIS: Type *xnew (int n, Type);
177 * void xrnew (OldPointer, int n, Type);
178 */
179 #if DEBUG
180 # include "chkmalloc.h"
181 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
182 (n) * sizeof (Type)))
183 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
184 (char *) (op), (n) * sizeof (Type)))
185 #else
186 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
187 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
188 (char *) (op), (n) * sizeof (Type)))
189 #endif
190
191 typedef void Lang_function (FILE *);
192
193 typedef struct
194 {
195 const char *suffix; /* file name suffix for this compressor */
196 const char *command; /* takes one arg and decompresses to stdout */
197 } compressor;
198
199 typedef struct
200 {
201 const char *name; /* language name */
202 const char *help; /* detailed help for the language */
203 Lang_function *function; /* parse function */
204 const char **suffixes; /* name suffixes of this language's files */
205 const char **filenames; /* names of this language's files */
206 const char **interpreters; /* interpreters for this language */
207 bool metasource; /* source used to generate other sources */
208 } language;
209
210 typedef struct fdesc
211 {
212 struct fdesc *next; /* for the linked list */
213 char *infname; /* uncompressed input file name */
214 char *infabsname; /* absolute uncompressed input file name */
215 char *infabsdir; /* absolute dir of input file */
216 char *taggedfname; /* file name to write in tagfile */
217 language *lang; /* language of file */
218 char *prop; /* file properties to write in tagfile */
219 bool usecharno; /* etags tags shall contain char number */
220 bool written; /* entry written in the tags file */
221 } fdesc;
222
223 typedef struct node_st
224 { /* sorting structure */
225 struct node_st *left, *right; /* left and right sons */
226 fdesc *fdp; /* description of file to whom tag belongs */
227 char *name; /* tag name */
228 char *regex; /* search regexp */
229 bool valid; /* write this tag on the tag file */
230 bool is_func; /* function tag: use regexp in CTAGS mode */
231 bool been_warned; /* warning already given for duplicated tag */
232 int lno; /* line number tag is on */
233 long cno; /* character number line starts on */
234 } node;
235
236 /*
237 * A `linebuffer' is a structure which holds a line of text.
238 * `readline_internal' reads a line from a stream into a linebuffer
239 * and works regardless of the length of the line.
240 * SIZE is the size of BUFFER, LEN is the length of the string in
241 * BUFFER after readline reads it.
242 */
243 typedef struct
244 {
245 long size;
246 int len;
247 char *buffer;
248 } linebuffer;
249
250 /* Used to support mixing of --lang and file names. */
251 typedef struct
252 {
253 enum {
254 at_language, /* a language specification */
255 at_regexp, /* a regular expression */
256 at_filename, /* a file name */
257 at_stdin, /* read from stdin here */
258 at_end /* stop parsing the list */
259 } arg_type; /* argument type */
260 language *lang; /* language associated with the argument */
261 char *what; /* the argument itself */
262 } argument;
263
264 /* Structure defining a regular expression. */
265 typedef struct regexp
266 {
267 struct regexp *p_next; /* pointer to next in list */
268 language *lang; /* if set, use only for this language */
269 char *pattern; /* the regexp pattern */
270 char *name; /* tag name */
271 struct re_pattern_buffer *pat; /* the compiled pattern */
272 struct re_registers regs; /* re registers */
273 bool error_signaled; /* already signaled for this regexp */
274 bool force_explicit_name; /* do not allow implicit tag name */
275 bool ignore_case; /* ignore case when matching */
276 bool multi_line; /* do a multi-line match on the whole file */
277 } regexp;
278
279
280 /* Many compilers barf on this:
281 Lang_function Ada_funcs;
282 so let's write it this way */
283 static void Ada_funcs (FILE *);
284 static void Asm_labels (FILE *);
285 static void C_entries (int c_ext, FILE *);
286 static void default_C_entries (FILE *);
287 static void plain_C_entries (FILE *);
288 static void Cjava_entries (FILE *);
289 static void Cobol_paragraphs (FILE *);
290 static void Cplusplus_entries (FILE *);
291 static void Cstar_entries (FILE *);
292 static void Erlang_functions (FILE *);
293 static void Forth_words (FILE *);
294 static void Fortran_functions (FILE *);
295 static void HTML_labels (FILE *);
296 static void Lisp_functions (FILE *);
297 static void Lua_functions (FILE *);
298 static void Makefile_targets (FILE *);
299 static void Pascal_functions (FILE *);
300 static void Perl_functions (FILE *);
301 static void PHP_functions (FILE *);
302 static void PS_functions (FILE *);
303 static void Prolog_functions (FILE *);
304 static void Python_functions (FILE *);
305 static void Scheme_functions (FILE *);
306 static void TeX_commands (FILE *);
307 static void Texinfo_nodes (FILE *);
308 static void Yacc_entries (FILE *);
309 static void just_read_file (FILE *);
310
311 static language *get_language_from_langname (const char *);
312 static void readline (linebuffer *, FILE *);
313 static long readline_internal (linebuffer *, FILE *);
314 static bool nocase_tail (const char *);
315 static void get_tag (char *, char **);
316
317 static void analyse_regex (char *);
318 static void free_regexps (void);
319 static void regex_tag_multiline (void);
320 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
321 static _Noreturn void suggest_asking_for_help (void);
322 _Noreturn void fatal (const char *, const char *);
323 static _Noreturn void pfatal (const char *);
324 static void add_node (node *, node **);
325
326 static void init (void);
327 static void process_file_name (char *, language *);
328 static void process_file (FILE *, char *, language *);
329 static void find_entries (FILE *);
330 static void free_tree (node *);
331 static void free_fdesc (fdesc *);
332 static void pfnote (char *, bool, char *, int, int, long);
333 static void invalidate_nodes (fdesc *, node **);
334 static void put_entries (node *);
335
336 static char *concat (const char *, const char *, const char *);
337 static char *skip_spaces (char *);
338 static char *skip_non_spaces (char *);
339 static char *skip_name (char *);
340 static char *savenstr (const char *, int);
341 static char *savestr (const char *);
342 static char *etags_getcwd (void);
343 static char *relative_filename (char *, char *);
344 static char *absolute_filename (char *, char *);
345 static char *absolute_dirname (char *, char *);
346 static bool filename_is_absolute (char *f);
347 static void canonicalize_filename (char *);
348 static void linebuffer_init (linebuffer *);
349 static void linebuffer_setlen (linebuffer *, int);
350 static void *xmalloc (size_t);
351 static void *xrealloc (char *, size_t);
352
353 \f
354 static char searchar = '/'; /* use /.../ searches */
355
356 static char *tagfile; /* output file */
357 static char *progname; /* name this program was invoked with */
358 static char *cwd; /* current working directory */
359 static char *tagfiledir; /* directory of tagfile */
360 static FILE *tagf; /* ioptr for tags file */
361 static ptrdiff_t whatlen_max; /* maximum length of any 'what' member */
362
363 static fdesc *fdhead; /* head of file description list */
364 static fdesc *curfdp; /* current file description */
365 static int lineno; /* line number of current line */
366 static long charno; /* current character number */
367 static long linecharno; /* charno of start of current line */
368 static char *dbp; /* pointer to start of current tag */
369
370 static const int invalidcharno = -1;
371
372 static node *nodehead; /* the head of the binary tree of tags */
373 static node *last_node; /* the last node created */
374
375 static linebuffer lb; /* the current line */
376 static linebuffer filebuf; /* a buffer containing the whole file */
377 static linebuffer token_name; /* a buffer containing a tag name */
378
379 /* boolean "functions" (see init) */
380 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
381 static const char
382 /* white chars */
383 *white = " \f\t\n\r\v",
384 /* not in a name */
385 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
386 /* token ending chars */
387 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
388 /* token starting chars */
389 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
390 /* valid in-token chars */
391 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
392
393 static bool append_to_tagfile; /* -a: append to tags */
394 /* The next five default to true in C and derived languages. */
395 static bool typedefs; /* -t: create tags for C and Ada typedefs */
396 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
397 /* 0 struct/enum/union decls, and C++ */
398 /* member functions. */
399 static bool constantypedefs; /* -d: create tags for C #define, enum */
400 /* constants and variables. */
401 /* -D: opposite of -d. Default under ctags. */
402 static int globals; /* create tags for global variables */
403 static int members; /* create tags for C member variables */
404 static int declarations; /* --declarations: tag them and extern in C&Co*/
405 static int no_line_directive; /* ignore #line directives (undocumented) */
406 static int no_duplicates; /* no duplicate tags for ctags (undocumented) */
407 static bool update; /* -u: update tags */
408 static bool vgrind_style; /* -v: create vgrind style index output */
409 static bool no_warnings; /* -w: suppress warnings (undocumented) */
410 static bool cxref_style; /* -x: create cxref style output */
411 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
412 static bool ignoreindent; /* -I: ignore indentation in C */
413 static int packages_only; /* --packages-only: in Ada, only tag packages*/
414
415 /* STDIN is defined in LynxOS system headers */
416 #ifdef STDIN
417 # undef STDIN
418 #endif
419
420 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
421 static bool parsing_stdin; /* --parse-stdin used */
422
423 static regexp *p_head; /* list of all regexps */
424 static bool need_filebuf; /* some regexes are multi-line */
425
426 static struct option longopts[] =
427 {
428 { "append", no_argument, NULL, 'a' },
429 { "packages-only", no_argument, &packages_only, 1 },
430 { "c++", no_argument, NULL, 'C' },
431 { "declarations", no_argument, &declarations, 1 },
432 { "no-line-directive", no_argument, &no_line_directive, 1 },
433 { "no-duplicates", no_argument, &no_duplicates, 1 },
434 { "help", no_argument, NULL, 'h' },
435 { "help", no_argument, NULL, 'H' },
436 { "ignore-indentation", no_argument, NULL, 'I' },
437 { "language", required_argument, NULL, 'l' },
438 { "members", no_argument, &members, 1 },
439 { "no-members", no_argument, &members, 0 },
440 { "output", required_argument, NULL, 'o' },
441 { "regex", required_argument, NULL, 'r' },
442 { "no-regex", no_argument, NULL, 'R' },
443 { "ignore-case-regex", required_argument, NULL, 'c' },
444 { "parse-stdin", required_argument, NULL, STDIN },
445 { "version", no_argument, NULL, 'V' },
446
447 #if CTAGS /* Ctags options */
448 { "backward-search", no_argument, NULL, 'B' },
449 { "cxref", no_argument, NULL, 'x' },
450 { "defines", no_argument, NULL, 'd' },
451 { "globals", no_argument, &globals, 1 },
452 { "typedefs", no_argument, NULL, 't' },
453 { "typedefs-and-c++", no_argument, NULL, 'T' },
454 { "update", no_argument, NULL, 'u' },
455 { "vgrind", no_argument, NULL, 'v' },
456 { "no-warn", no_argument, NULL, 'w' },
457
458 #else /* Etags options */
459 { "no-defines", no_argument, NULL, 'D' },
460 { "no-globals", no_argument, &globals, 0 },
461 { "include", required_argument, NULL, 'i' },
462 #endif
463 { NULL }
464 };
465
466 static compressor compressors[] =
467 {
468 { "z", "gzip -d -c"},
469 { "Z", "gzip -d -c"},
470 { "gz", "gzip -d -c"},
471 { "GZ", "gzip -d -c"},
472 { "bz2", "bzip2 -d -c" },
473 { "xz", "xz -d -c" },
474 { NULL }
475 };
476
477 /*
478 * Language stuff.
479 */
480
481 /* Ada code */
482 static const char *Ada_suffixes [] =
483 { "ads", "adb", "ada", NULL };
484 static const char Ada_help [] =
485 "In Ada code, functions, procedures, packages, tasks and types are\n\
486 tags. Use the `--packages-only' option to create tags for\n\
487 packages only.\n\
488 Ada tag names have suffixes indicating the type of entity:\n\
489 Entity type: Qualifier:\n\
490 ------------ ----------\n\
491 function /f\n\
492 procedure /p\n\
493 package spec /s\n\
494 package body /b\n\
495 type /t\n\
496 task /k\n\
497 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
498 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
499 will just search for any tag `bidule'.";
500
501 /* Assembly code */
502 static const char *Asm_suffixes [] =
503 { "a", /* Unix assembler */
504 "asm", /* Microcontroller assembly */
505 "def", /* BSO/Tasking definition includes */
506 "inc", /* Microcontroller include files */
507 "ins", /* Microcontroller include files */
508 "s", "sa", /* Unix assembler */
509 "S", /* cpp-processed Unix assembler */
510 "src", /* BSO/Tasking C compiler output */
511 NULL
512 };
513 static const char Asm_help [] =
514 "In assembler code, labels appearing at the beginning of a line,\n\
515 followed by a colon, are tags.";
516
517
518 /* Note that .c and .h can be considered C++, if the --c++ flag was
519 given, or if the `class' or `template' keywords are met inside the file.
520 That is why default_C_entries is called for these. */
521 static const char *default_C_suffixes [] =
522 { "c", "h", NULL };
523 #if CTAGS /* C help for Ctags */
524 static const char default_C_help [] =
525 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
526 Use -T to tag definitions of `struct', `union' and `enum'.\n\
527 Use -d to tag `#define' macro definitions and `enum' constants.\n\
528 Use --globals to tag global variables.\n\
529 You can tag function declarations and external variables by\n\
530 using `--declarations', and struct members by using `--members'.";
531 #else /* C help for Etags */
532 static const char default_C_help [] =
533 "In C code, any C function or typedef is a tag, and so are\n\
534 definitions of `struct', `union' and `enum'. `#define' macro\n\
535 definitions and `enum' constants are tags unless you specify\n\
536 `--no-defines'. Global variables are tags unless you specify\n\
537 `--no-globals' and so are struct members unless you specify\n\
538 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
539 `--no-members' can make the tags table file much smaller.\n\
540 You can tag function declarations and external variables by\n\
541 using `--declarations'.";
542 #endif /* C help for Ctags and Etags */
543
544 static const char *Cplusplus_suffixes [] =
545 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
546 "M", /* Objective C++ */
547 "pdb", /* PostScript with C syntax */
548 NULL };
549 static const char Cplusplus_help [] =
550 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
551 --help --lang=c --lang=c++ for full help.)\n\
552 In addition to C tags, member functions are also recognized. Member\n\
553 variables are recognized unless you use the `--no-members' option.\n\
554 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
555 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
556 `operator+'.";
557
558 static const char *Cjava_suffixes [] =
559 { "java", NULL };
560 static char Cjava_help [] =
561 "In Java code, all the tags constructs of C and C++ code are\n\
562 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
563
564
565 static const char *Cobol_suffixes [] =
566 { "COB", "cob", NULL };
567 static char Cobol_help [] =
568 "In Cobol code, tags are paragraph names; that is, any word\n\
569 starting in column 8 and followed by a period.";
570
571 static const char *Cstar_suffixes [] =
572 { "cs", "hs", NULL };
573
574 static const char *Erlang_suffixes [] =
575 { "erl", "hrl", NULL };
576 static const char Erlang_help [] =
577 "In Erlang code, the tags are the functions, records and macros\n\
578 defined in the file.";
579
580 const char *Forth_suffixes [] =
581 { "fth", "tok", NULL };
582 static const char Forth_help [] =
583 "In Forth code, tags are words defined by `:',\n\
584 constant, code, create, defer, value, variable, buffer:, field.";
585
586 static const char *Fortran_suffixes [] =
587 { "F", "f", "f90", "for", NULL };
588 static const char Fortran_help [] =
589 "In Fortran code, functions, subroutines and block data are tags.";
590
591 static const char *HTML_suffixes [] =
592 { "htm", "html", "shtml", NULL };
593 static const char HTML_help [] =
594 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
595 `h3' headers. Also, tags are `name=' in anchors and all\n\
596 occurrences of `id='.";
597
598 static const char *Lisp_suffixes [] =
599 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
600 static const char Lisp_help [] =
601 "In Lisp code, any function defined with `defun', any variable\n\
602 defined with `defvar' or `defconst', and in general the first\n\
603 argument of any expression that starts with `(def' in column zero\n\
604 is a tag.\n\
605 The `--declarations' option tags \"(defvar foo)\" constructs too.";
606
607 static const char *Lua_suffixes [] =
608 { "lua", "LUA", NULL };
609 static const char Lua_help [] =
610 "In Lua scripts, all functions are tags.";
611
612 static const char *Makefile_filenames [] =
613 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
614 static const char Makefile_help [] =
615 "In makefiles, targets are tags; additionally, variables are tags\n\
616 unless you specify `--no-globals'.";
617
618 static const char *Objc_suffixes [] =
619 { "lm", /* Objective lex file */
620 "m", /* Objective C file */
621 NULL };
622 static const char Objc_help [] =
623 "In Objective C code, tags include Objective C definitions for classes,\n\
624 class categories, methods and protocols. Tags for variables and\n\
625 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
626 (Use --help --lang=c --lang=objc --lang=java for full help.)";
627
628 static const char *Pascal_suffixes [] =
629 { "p", "pas", NULL };
630 static const char Pascal_help [] =
631 "In Pascal code, the tags are the functions and procedures defined\n\
632 in the file.";
633 /* " // this is for working around an Emacs highlighting bug... */
634
635 static const char *Perl_suffixes [] =
636 { "pl", "pm", NULL };
637 static const char *Perl_interpreters [] =
638 { "perl", "@PERL@", NULL };
639 static const char Perl_help [] =
640 "In Perl code, the tags are the packages, subroutines and variables\n\
641 defined by the `package', `sub', `my' and `local' keywords. Use\n\
642 `--globals' if you want to tag global variables. Tags for\n\
643 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
644 defined in the default package is `main::SUB'.";
645
646 static const char *PHP_suffixes [] =
647 { "php", "php3", "php4", NULL };
648 static const char PHP_help [] =
649 "In PHP code, tags are functions, classes and defines. Unless you use\n\
650 the `--no-members' option, vars are tags too.";
651
652 static const char *plain_C_suffixes [] =
653 { "pc", /* Pro*C file */
654 NULL };
655
656 static const char *PS_suffixes [] =
657 { "ps", "psw", NULL }; /* .psw is for PSWrap */
658 static const char PS_help [] =
659 "In PostScript code, the tags are the functions.";
660
661 static const char *Prolog_suffixes [] =
662 { "prolog", NULL };
663 static const char Prolog_help [] =
664 "In Prolog code, tags are predicates and rules at the beginning of\n\
665 line.";
666
667 static const char *Python_suffixes [] =
668 { "py", NULL };
669 static const char Python_help [] =
670 "In Python code, `def' or `class' at the beginning of a line\n\
671 generate a tag.";
672
673 /* Can't do the `SCM' or `scm' prefix with a version number. */
674 static const char *Scheme_suffixes [] =
675 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
676 static const char Scheme_help [] =
677 "In Scheme code, tags include anything defined with `def' or with a\n\
678 construct whose name starts with `def'. They also include\n\
679 variables set with `set!' at top level in the file.";
680
681 static const char *TeX_suffixes [] =
682 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
683 static const char TeX_help [] =
684 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
685 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
686 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
687 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
688 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
689 \n\
690 Other commands can be specified by setting the environment variable\n\
691 `TEXTAGS' to a colon-separated list like, for example,\n\
692 TEXTAGS=\"mycommand:myothercommand\".";
693
694
695 static const char *Texinfo_suffixes [] =
696 { "texi", "texinfo", "txi", NULL };
697 static const char Texinfo_help [] =
698 "for texinfo files, lines starting with @node are tagged.";
699
700 static const char *Yacc_suffixes [] =
701 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
702 static const char Yacc_help [] =
703 "In Bison or Yacc input files, each rule defines as a tag the\n\
704 nonterminal it constructs. The portions of the file that contain\n\
705 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
706 for full help).";
707
708 static const char auto_help [] =
709 "`auto' is not a real language, it indicates to use\n\
710 a default language for files base on file name suffix and file contents.";
711
712 static const char none_help [] =
713 "`none' is not a real language, it indicates to only do\n\
714 regexp processing on files.";
715
716 static const char no_lang_help [] =
717 "No detailed help available for this language.";
718
719
720 /*
721 * Table of languages.
722 *
723 * It is ok for a given function to be listed under more than one
724 * name. I just didn't.
725 */
726
727 static language lang_names [] =
728 {
729 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
730 { "asm", Asm_help, Asm_labels, Asm_suffixes },
731 { "c", default_C_help, default_C_entries, default_C_suffixes },
732 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
733 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
734 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
735 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
736 { "forth", Forth_help, Forth_words, Forth_suffixes },
737 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
738 { "html", HTML_help, HTML_labels, HTML_suffixes },
739 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
740 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
741 { "lua", Lua_help, Lua_functions, Lua_suffixes },
742 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
743 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
744 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
745 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
746 { "php", PHP_help, PHP_functions, PHP_suffixes },
747 { "postscript",PS_help, PS_functions, PS_suffixes },
748 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
749 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
750 { "python", Python_help, Python_functions, Python_suffixes },
751 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
752 { "tex", TeX_help, TeX_commands, TeX_suffixes },
753 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
754 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
755 { "auto", auto_help }, /* default guessing scheme */
756 { "none", none_help, just_read_file }, /* regexp matching only */
757 { NULL } /* end of list */
758 };
759
760 \f
761 static void
762 print_language_names (void)
763 {
764 language *lang;
765 const char **name, **ext;
766
767 puts ("\nThese are the currently supported languages, along with the\n\
768 default file names and dot suffixes:");
769 for (lang = lang_names; lang->name != NULL; lang++)
770 {
771 printf (" %-*s", 10, lang->name);
772 if (lang->filenames != NULL)
773 for (name = lang->filenames; *name != NULL; name++)
774 printf (" %s", *name);
775 if (lang->suffixes != NULL)
776 for (ext = lang->suffixes; *ext != NULL; ext++)
777 printf (" .%s", *ext);
778 puts ("");
779 }
780 puts ("where `auto' means use default language for files based on file\n\
781 name suffix, and `none' means only do regexp processing on files.\n\
782 If no language is specified and no matching suffix is found,\n\
783 the first line of the file is read for a sharp-bang (#!) sequence\n\
784 followed by the name of an interpreter. If no such sequence is found,\n\
785 Fortran is tried first; if no tags are found, C is tried next.\n\
786 When parsing any C file, a \"class\" or \"template\" keyword\n\
787 switches to C++.");
788 puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
789 \n\
790 For detailed help on a given language use, for example,\n\
791 etags --help --lang=ada.");
792 }
793
794 #ifndef EMACS_NAME
795 # define EMACS_NAME "standalone"
796 #endif
797 #ifndef VERSION
798 # define VERSION "17.38.1.4"
799 #endif
800 static _Noreturn void
801 print_version (void)
802 {
803 char emacs_copyright[] = COPYRIGHT;
804
805 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
806 puts (emacs_copyright);
807 puts ("This program is distributed under the terms in ETAGS.README");
808
809 exit (EXIT_SUCCESS);
810 }
811
812 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
813 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
814 #endif
815
816 static _Noreturn void
817 print_help (argument *argbuffer)
818 {
819 bool help_for_lang = false;
820
821 for (; argbuffer->arg_type != at_end; argbuffer++)
822 if (argbuffer->arg_type == at_language)
823 {
824 if (help_for_lang)
825 puts ("");
826 puts (argbuffer->lang->help);
827 help_for_lang = true;
828 }
829
830 if (help_for_lang)
831 exit (EXIT_SUCCESS);
832
833 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
834 \n\
835 These are the options accepted by %s.\n", progname, progname);
836 puts ("You may use unambiguous abbreviations for the long option names.");
837 puts (" A - as file name means read names from stdin (one per line).\n\
838 Absolute names are stored in the output file as they are.\n\
839 Relative ones are stored relative to the output file's directory.\n");
840
841 puts ("-a, --append\n\
842 Append tag entries to existing tags file.");
843
844 puts ("--packages-only\n\
845 For Ada files, only generate tags for packages.");
846
847 if (CTAGS)
848 puts ("-B, --backward-search\n\
849 Write the search commands for the tag entries using '?', the\n\
850 backward-search command instead of '/', the forward-search command.");
851
852 /* This option is mostly obsolete, because etags can now automatically
853 detect C++. Retained for backward compatibility and for debugging and
854 experimentation. In principle, we could want to tag as C++ even
855 before any "class" or "template" keyword.
856 puts ("-C, --c++\n\
857 Treat files whose name suffix defaults to C language as C++ files.");
858 */
859
860 puts ("--declarations\n\
861 In C and derived languages, create tags for function declarations,");
862 if (CTAGS)
863 puts ("\tand create tags for extern variables if --globals is used.");
864 else
865 puts
866 ("\tand create tags for extern variables unless --no-globals is used.");
867
868 if (CTAGS)
869 puts ("-d, --defines\n\
870 Create tag entries for C #define constants and enum constants, too.");
871 else
872 puts ("-D, --no-defines\n\
873 Don't create tag entries for C #define constants and enum constants.\n\
874 This makes the tags file smaller.");
875
876 if (!CTAGS)
877 puts ("-i FILE, --include=FILE\n\
878 Include a note in tag file indicating that, when searching for\n\
879 a tag, one should also consult the tags file FILE after\n\
880 checking the current file.");
881
882 puts ("-l LANG, --language=LANG\n\
883 Force the following files to be considered as written in the\n\
884 named language up to the next --language=LANG option.");
885
886 if (CTAGS)
887 puts ("--globals\n\
888 Create tag entries for global variables in some languages.");
889 else
890 puts ("--no-globals\n\
891 Do not create tag entries for global variables in some\n\
892 languages. This makes the tags file smaller.");
893
894 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
895 puts ("--no-line-directive\n\
896 Ignore #line preprocessor directives in C and derived languages.");
897
898 if (CTAGS)
899 puts ("--members\n\
900 Create tag entries for members of structures in some languages.");
901 else
902 puts ("--no-members\n\
903 Do not create tag entries for members of structures\n\
904 in some languages.");
905
906 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
907 Make a tag for each line matching a regular expression pattern\n\
908 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
909 files only. REGEXFILE is a file containing one REGEXP per line.\n\
910 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
911 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
912 puts (" If TAGNAME/ is present, the tags created are named.\n\
913 For example Tcl named tags can be created with:\n\
914 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
915 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
916 `m' means to allow multi-line matches, `s' implies `m' and\n\
917 causes dot to match any character, including newline.");
918
919 puts ("-R, --no-regex\n\
920 Don't create tags from regexps for the following files.");
921
922 puts ("-I, --ignore-indentation\n\
923 In C and C++ do not assume that a closing brace in the first\n\
924 column is the final brace of a function or structure definition.");
925
926 puts ("-o FILE, --output=FILE\n\
927 Write the tags to FILE.");
928
929 puts ("--parse-stdin=NAME\n\
930 Read from standard input and record tags as belonging to file NAME.");
931
932 if (CTAGS)
933 {
934 puts ("-t, --typedefs\n\
935 Generate tag entries for C and Ada typedefs.");
936 puts ("-T, --typedefs-and-c++\n\
937 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
938 and C++ member functions.");
939 }
940
941 if (CTAGS)
942 puts ("-u, --update\n\
943 Update the tag entries for the given files, leaving tag\n\
944 entries for other files in place. Currently, this is\n\
945 implemented by deleting the existing entries for the given\n\
946 files and then rewriting the new entries at the end of the\n\
947 tags file. It is often faster to simply rebuild the entire\n\
948 tag file than to use this.");
949
950 if (CTAGS)
951 {
952 puts ("-v, --vgrind\n\
953 Print on the standard output an index of items intended for\n\
954 human consumption, similar to the output of vgrind. The index\n\
955 is sorted, and gives the page number of each item.");
956
957 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
958 puts ("-w, --no-duplicates\n\
959 Do not create duplicate tag entries, for compatibility with\n\
960 traditional ctags.");
961
962 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
963 puts ("-w, --no-warn\n\
964 Suppress warning messages about duplicate tag entries.");
965
966 puts ("-x, --cxref\n\
967 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
968 The output uses line numbers instead of page numbers, but\n\
969 beyond that the differences are cosmetic; try both to see\n\
970 which you like.");
971 }
972
973 puts ("-V, --version\n\
974 Print the version of the program.\n\
975 -h, --help\n\
976 Print this help message.\n\
977 Followed by one or more `--language' options prints detailed\n\
978 help about tag generation for the specified languages.");
979
980 print_language_names ();
981
982 puts ("");
983 puts ("Report bugs to bug-gnu-emacs@gnu.org");
984
985 exit (EXIT_SUCCESS);
986 }
987
988 \f
989 int
990 main (int argc, char **argv)
991 {
992 int i;
993 unsigned int nincluded_files;
994 char **included_files;
995 argument *argbuffer;
996 int current_arg, file_count;
997 linebuffer filename_lb;
998 bool help_asked = false;
999 ptrdiff_t len;
1000 char *optstring;
1001 int opt;
1002
1003 progname = argv[0];
1004 nincluded_files = 0;
1005 included_files = xnew (argc, char *);
1006 current_arg = 0;
1007 file_count = 0;
1008
1009 /* Allocate enough no matter what happens. Overkill, but each one
1010 is small. */
1011 argbuffer = xnew (argc, argument);
1012
1013 /*
1014 * Always find typedefs and structure tags.
1015 * Also default to find macro constants, enum constants, struct
1016 * members and global variables. Do it for both etags and ctags.
1017 */
1018 typedefs = typedefs_or_cplusplus = constantypedefs = true;
1019 globals = members = true;
1020
1021 /* When the optstring begins with a '-' getopt_long does not rearrange the
1022 non-options arguments to be at the end, but leaves them alone. */
1023 optstring = concat ("-ac:Cf:Il:o:r:RSVhH",
1024 (CTAGS) ? "BxdtTuvw" : "Di:",
1025 "");
1026
1027 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1028 switch (opt)
1029 {
1030 case 0:
1031 /* If getopt returns 0, then it has already processed a
1032 long-named option. We should do nothing. */
1033 break;
1034
1035 case 1:
1036 /* This means that a file name has been seen. Record it. */
1037 argbuffer[current_arg].arg_type = at_filename;
1038 argbuffer[current_arg].what = optarg;
1039 len = strlen (optarg);
1040 if (whatlen_max < len)
1041 whatlen_max = len;
1042 ++current_arg;
1043 ++file_count;
1044 break;
1045
1046 case STDIN:
1047 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1048 argbuffer[current_arg].arg_type = at_stdin;
1049 argbuffer[current_arg].what = optarg;
1050 len = strlen (optarg);
1051 if (whatlen_max < len)
1052 whatlen_max = len;
1053 ++current_arg;
1054 ++file_count;
1055 if (parsing_stdin)
1056 fatal ("cannot parse standard input more than once", (char *)NULL);
1057 parsing_stdin = true;
1058 break;
1059
1060 /* Common options. */
1061 case 'a': append_to_tagfile = true; break;
1062 case 'C': cplusplus = true; break;
1063 case 'f': /* for compatibility with old makefiles */
1064 case 'o':
1065 if (tagfile)
1066 {
1067 error ("-o option may only be given once.");
1068 suggest_asking_for_help ();
1069 /* NOTREACHED */
1070 }
1071 tagfile = optarg;
1072 break;
1073 case 'I':
1074 case 'S': /* for backward compatibility */
1075 ignoreindent = true;
1076 break;
1077 case 'l':
1078 {
1079 language *lang = get_language_from_langname (optarg);
1080 if (lang != NULL)
1081 {
1082 argbuffer[current_arg].lang = lang;
1083 argbuffer[current_arg].arg_type = at_language;
1084 ++current_arg;
1085 }
1086 }
1087 break;
1088 case 'c':
1089 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1090 optarg = concat (optarg, "i", ""); /* memory leak here */
1091 /* FALLTHRU */
1092 case 'r':
1093 argbuffer[current_arg].arg_type = at_regexp;
1094 argbuffer[current_arg].what = optarg;
1095 len = strlen (optarg);
1096 if (whatlen_max < len)
1097 whatlen_max = len;
1098 ++current_arg;
1099 break;
1100 case 'R':
1101 argbuffer[current_arg].arg_type = at_regexp;
1102 argbuffer[current_arg].what = NULL;
1103 ++current_arg;
1104 break;
1105 case 'V':
1106 print_version ();
1107 break;
1108 case 'h':
1109 case 'H':
1110 help_asked = true;
1111 break;
1112
1113 /* Etags options */
1114 case 'D': constantypedefs = false; break;
1115 case 'i': included_files[nincluded_files++] = optarg; break;
1116
1117 /* Ctags options. */
1118 case 'B': searchar = '?'; break;
1119 case 'd': constantypedefs = true; break;
1120 case 't': typedefs = true; break;
1121 case 'T': typedefs = typedefs_or_cplusplus = true; break;
1122 case 'u': update = true; break;
1123 case 'v': vgrind_style = true; /*FALLTHRU*/
1124 case 'x': cxref_style = true; break;
1125 case 'w': no_warnings = true; break;
1126 default:
1127 suggest_asking_for_help ();
1128 /* NOTREACHED */
1129 }
1130
1131 /* No more options. Store the rest of arguments. */
1132 for (; optind < argc; optind++)
1133 {
1134 argbuffer[current_arg].arg_type = at_filename;
1135 argbuffer[current_arg].what = argv[optind];
1136 len = strlen (argv[optind]);
1137 if (whatlen_max < len)
1138 whatlen_max = len;
1139 ++current_arg;
1140 ++file_count;
1141 }
1142
1143 argbuffer[current_arg].arg_type = at_end;
1144
1145 if (help_asked)
1146 print_help (argbuffer);
1147 /* NOTREACHED */
1148
1149 if (nincluded_files == 0 && file_count == 0)
1150 {
1151 error ("no input files specified.");
1152 suggest_asking_for_help ();
1153 /* NOTREACHED */
1154 }
1155
1156 if (tagfile == NULL)
1157 tagfile = savestr (CTAGS ? "tags" : "TAGS");
1158 cwd = etags_getcwd (); /* the current working directory */
1159 if (cwd[strlen (cwd) - 1] != '/')
1160 {
1161 char *oldcwd = cwd;
1162 cwd = concat (oldcwd, "/", "");
1163 free (oldcwd);
1164 }
1165
1166 /* Compute base directory for relative file names. */
1167 if (streq (tagfile, "-")
1168 || strneq (tagfile, "/dev/", 5))
1169 tagfiledir = cwd; /* relative file names are relative to cwd */
1170 else
1171 {
1172 canonicalize_filename (tagfile);
1173 tagfiledir = absolute_dirname (tagfile, cwd);
1174 }
1175
1176 init (); /* set up boolean "functions" */
1177
1178 linebuffer_init (&lb);
1179 linebuffer_init (&filename_lb);
1180 linebuffer_init (&filebuf);
1181 linebuffer_init (&token_name);
1182
1183 if (!CTAGS)
1184 {
1185 if (streq (tagfile, "-"))
1186 {
1187 tagf = stdout;
1188 SET_BINARY (fileno (stdout));
1189 }
1190 else
1191 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1192 if (tagf == NULL)
1193 pfatal (tagfile);
1194 }
1195
1196 /*
1197 * Loop through files finding functions.
1198 */
1199 for (i = 0; i < current_arg; i++)
1200 {
1201 static language *lang; /* non-NULL if language is forced */
1202 char *this_file;
1203
1204 switch (argbuffer[i].arg_type)
1205 {
1206 case at_language:
1207 lang = argbuffer[i].lang;
1208 break;
1209 case at_regexp:
1210 analyse_regex (argbuffer[i].what);
1211 break;
1212 case at_filename:
1213 this_file = argbuffer[i].what;
1214 /* Input file named "-" means read file names from stdin
1215 (one per line) and use them. */
1216 if (streq (this_file, "-"))
1217 {
1218 if (parsing_stdin)
1219 fatal ("cannot parse standard input AND read file names from it",
1220 (char *)NULL);
1221 while (readline_internal (&filename_lb, stdin) > 0)
1222 process_file_name (filename_lb.buffer, lang);
1223 }
1224 else
1225 process_file_name (this_file, lang);
1226 break;
1227 case at_stdin:
1228 this_file = argbuffer[i].what;
1229 process_file (stdin, this_file, lang);
1230 break;
1231 }
1232 }
1233
1234 free_regexps ();
1235 free (lb.buffer);
1236 free (filebuf.buffer);
1237 free (token_name.buffer);
1238
1239 if (!CTAGS || cxref_style)
1240 {
1241 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1242 put_entries (nodehead);
1243 free_tree (nodehead);
1244 nodehead = NULL;
1245 if (!CTAGS)
1246 {
1247 fdesc *fdp;
1248
1249 /* Output file entries that have no tags. */
1250 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1251 if (!fdp->written)
1252 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1253
1254 while (nincluded_files-- > 0)
1255 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1256
1257 if (fclose (tagf) == EOF)
1258 pfatal (tagfile);
1259 }
1260
1261 exit (EXIT_SUCCESS);
1262 }
1263
1264 /* From here on, we are in (CTAGS && !cxref_style) */
1265 if (update)
1266 {
1267 char *cmd =
1268 xmalloc (strlen (tagfile) + whatlen_max +
1269 sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1270 for (i = 0; i < current_arg; ++i)
1271 {
1272 switch (argbuffer[i].arg_type)
1273 {
1274 case at_filename:
1275 case at_stdin:
1276 break;
1277 default:
1278 continue; /* the for loop */
1279 }
1280 strcpy (cmd, "mv ");
1281 strcat (cmd, tagfile);
1282 strcat (cmd, " OTAGS;fgrep -v '\t");
1283 strcat (cmd, argbuffer[i].what);
1284 strcat (cmd, "\t' OTAGS >");
1285 strcat (cmd, tagfile);
1286 strcat (cmd, ";rm OTAGS");
1287 if (system (cmd) != EXIT_SUCCESS)
1288 fatal ("failed to execute shell command", (char *)NULL);
1289 }
1290 free (cmd);
1291 append_to_tagfile = true;
1292 }
1293
1294 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1295 if (tagf == NULL)
1296 pfatal (tagfile);
1297 put_entries (nodehead); /* write all the tags (CTAGS) */
1298 free_tree (nodehead);
1299 nodehead = NULL;
1300 if (fclose (tagf) == EOF)
1301 pfatal (tagfile);
1302
1303 if (CTAGS)
1304 if (append_to_tagfile || update)
1305 {
1306 char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1307 /* Maybe these should be used:
1308 setenv ("LC_COLLATE", "C", 1);
1309 setenv ("LC_ALL", "C", 1); */
1310 strcpy (cmd, "sort -u -o ");
1311 strcat (cmd, tagfile);
1312 strcat (cmd, " ");
1313 strcat (cmd, tagfile);
1314 exit (system (cmd));
1315 }
1316 return EXIT_SUCCESS;
1317 }
1318
1319
1320 /*
1321 * Return a compressor given the file name. If EXTPTR is non-zero,
1322 * return a pointer into FILE where the compressor-specific
1323 * extension begins. If no compressor is found, NULL is returned
1324 * and EXTPTR is not significant.
1325 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1326 */
1327 static compressor *
1328 get_compressor_from_suffix (char *file, char **extptr)
1329 {
1330 compressor *compr;
1331 char *slash, *suffix;
1332
1333 /* File has been processed by canonicalize_filename,
1334 so we don't need to consider backslashes on DOS_NT. */
1335 slash = strrchr (file, '/');
1336 suffix = strrchr (file, '.');
1337 if (suffix == NULL || suffix < slash)
1338 return NULL;
1339 if (extptr != NULL)
1340 *extptr = suffix;
1341 suffix += 1;
1342 /* Let those poor souls who live with DOS 8+3 file name limits get
1343 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1344 Only the first do loop is run if not MSDOS */
1345 do
1346 {
1347 for (compr = compressors; compr->suffix != NULL; compr++)
1348 if (streq (compr->suffix, suffix))
1349 return compr;
1350 if (!MSDOS)
1351 break; /* do it only once: not really a loop */
1352 if (extptr != NULL)
1353 *extptr = ++suffix;
1354 } while (*suffix != '\0');
1355 return NULL;
1356 }
1357
1358
1359
1360 /*
1361 * Return a language given the name.
1362 */
1363 static language *
1364 get_language_from_langname (const char *name)
1365 {
1366 language *lang;
1367
1368 if (name == NULL)
1369 error ("empty language name");
1370 else
1371 {
1372 for (lang = lang_names; lang->name != NULL; lang++)
1373 if (streq (name, lang->name))
1374 return lang;
1375 error ("unknown language \"%s\"", name);
1376 }
1377
1378 return NULL;
1379 }
1380
1381
1382 /*
1383 * Return a language given the interpreter name.
1384 */
1385 static language *
1386 get_language_from_interpreter (char *interpreter)
1387 {
1388 language *lang;
1389 const char **iname;
1390
1391 if (interpreter == NULL)
1392 return NULL;
1393 for (lang = lang_names; lang->name != NULL; lang++)
1394 if (lang->interpreters != NULL)
1395 for (iname = lang->interpreters; *iname != NULL; iname++)
1396 if (streq (*iname, interpreter))
1397 return lang;
1398
1399 return NULL;
1400 }
1401
1402
1403
1404 /*
1405 * Return a language given the file name.
1406 */
1407 static language *
1408 get_language_from_filename (char *file, int case_sensitive)
1409 {
1410 language *lang;
1411 const char **name, **ext, *suffix;
1412
1413 /* Try whole file name first. */
1414 for (lang = lang_names; lang->name != NULL; lang++)
1415 if (lang->filenames != NULL)
1416 for (name = lang->filenames; *name != NULL; name++)
1417 if ((case_sensitive)
1418 ? streq (*name, file)
1419 : strcaseeq (*name, file))
1420 return lang;
1421
1422 /* If not found, try suffix after last dot. */
1423 suffix = strrchr (file, '.');
1424 if (suffix == NULL)
1425 return NULL;
1426 suffix += 1;
1427 for (lang = lang_names; lang->name != NULL; lang++)
1428 if (lang->suffixes != NULL)
1429 for (ext = lang->suffixes; *ext != NULL; ext++)
1430 if ((case_sensitive)
1431 ? streq (*ext, suffix)
1432 : strcaseeq (*ext, suffix))
1433 return lang;
1434 return NULL;
1435 }
1436
1437 \f
1438 /*
1439 * This routine is called on each file argument.
1440 */
1441 static void
1442 process_file_name (char *file, language *lang)
1443 {
1444 struct stat stat_buf;
1445 FILE *inf;
1446 fdesc *fdp;
1447 compressor *compr;
1448 char *compressed_name, *uncompressed_name;
1449 char *ext, *real_name;
1450 int retval;
1451
1452 canonicalize_filename (file);
1453 if (streq (file, tagfile) && !streq (tagfile, "-"))
1454 {
1455 error ("skipping inclusion of %s in self.", file);
1456 return;
1457 }
1458 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1459 {
1460 compressed_name = NULL;
1461 real_name = uncompressed_name = savestr (file);
1462 }
1463 else
1464 {
1465 real_name = compressed_name = savestr (file);
1466 uncompressed_name = savenstr (file, ext - file);
1467 }
1468
1469 /* If the canonicalized uncompressed name
1470 has already been dealt with, skip it silently. */
1471 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1472 {
1473 assert (fdp->infname != NULL);
1474 if (streq (uncompressed_name, fdp->infname))
1475 goto cleanup;
1476 }
1477
1478 if (stat (real_name, &stat_buf) != 0)
1479 {
1480 /* Reset real_name and try with a different name. */
1481 real_name = NULL;
1482 if (compressed_name != NULL) /* try with the given suffix */
1483 {
1484 if (stat (uncompressed_name, &stat_buf) == 0)
1485 real_name = uncompressed_name;
1486 }
1487 else /* try all possible suffixes */
1488 {
1489 for (compr = compressors; compr->suffix != NULL; compr++)
1490 {
1491 compressed_name = concat (file, ".", compr->suffix);
1492 if (stat (compressed_name, &stat_buf) != 0)
1493 {
1494 if (MSDOS)
1495 {
1496 char *suf = compressed_name + strlen (file);
1497 size_t suflen = strlen (compr->suffix) + 1;
1498 for ( ; suf[1]; suf++, suflen--)
1499 {
1500 memmove (suf, suf + 1, suflen);
1501 if (stat (compressed_name, &stat_buf) == 0)
1502 {
1503 real_name = compressed_name;
1504 break;
1505 }
1506 }
1507 if (real_name != NULL)
1508 break;
1509 } /* MSDOS */
1510 free (compressed_name);
1511 compressed_name = NULL;
1512 }
1513 else
1514 {
1515 real_name = compressed_name;
1516 break;
1517 }
1518 }
1519 }
1520 if (real_name == NULL)
1521 {
1522 perror (file);
1523 goto cleanup;
1524 }
1525 } /* try with a different name */
1526
1527 if (!S_ISREG (stat_buf.st_mode))
1528 {
1529 error ("skipping %s: it is not a regular file.", real_name);
1530 goto cleanup;
1531 }
1532 if (real_name == compressed_name)
1533 {
1534 char *cmd = concat (compr->command, " ", real_name);
1535 inf = popen (cmd, "rb");
1536 free (cmd);
1537 }
1538 else
1539 inf = fopen (real_name, "rb");
1540 if (inf == NULL)
1541 {
1542 perror (real_name);
1543 goto cleanup;
1544 }
1545
1546 process_file (inf, uncompressed_name, lang);
1547
1548 if (real_name == compressed_name)
1549 retval = pclose (inf);
1550 else
1551 retval = fclose (inf);
1552 if (retval < 0)
1553 pfatal (file);
1554
1555 cleanup:
1556 free (compressed_name);
1557 free (uncompressed_name);
1558 last_node = NULL;
1559 curfdp = NULL;
1560 return;
1561 }
1562
1563 static void
1564 process_file (FILE *fh, char *fn, language *lang)
1565 {
1566 static const fdesc emptyfdesc;
1567 fdesc *fdp;
1568
1569 /* Create a new input file description entry. */
1570 fdp = xnew (1, fdesc);
1571 *fdp = emptyfdesc;
1572 fdp->next = fdhead;
1573 fdp->infname = savestr (fn);
1574 fdp->lang = lang;
1575 fdp->infabsname = absolute_filename (fn, cwd);
1576 fdp->infabsdir = absolute_dirname (fn, cwd);
1577 if (filename_is_absolute (fn))
1578 {
1579 /* An absolute file name. Canonicalize it. */
1580 fdp->taggedfname = absolute_filename (fn, NULL);
1581 }
1582 else
1583 {
1584 /* A file name relative to cwd. Make it relative
1585 to the directory of the tags file. */
1586 fdp->taggedfname = relative_filename (fn, tagfiledir);
1587 }
1588 fdp->usecharno = true; /* use char position when making tags */
1589 fdp->prop = NULL;
1590 fdp->written = false; /* not written on tags file yet */
1591
1592 fdhead = fdp;
1593 curfdp = fdhead; /* the current file description */
1594
1595 find_entries (fh);
1596
1597 /* If not Ctags, and if this is not metasource and if it contained no #line
1598 directives, we can write the tags and free all nodes pointing to
1599 curfdp. */
1600 if (!CTAGS
1601 && curfdp->usecharno /* no #line directives in this file */
1602 && !curfdp->lang->metasource)
1603 {
1604 node *np, *prev;
1605
1606 /* Look for the head of the sublist relative to this file. See add_node
1607 for the structure of the node tree. */
1608 prev = NULL;
1609 for (np = nodehead; np != NULL; prev = np, np = np->left)
1610 if (np->fdp == curfdp)
1611 break;
1612
1613 /* If we generated tags for this file, write and delete them. */
1614 if (np != NULL)
1615 {
1616 /* This is the head of the last sublist, if any. The following
1617 instructions depend on this being true. */
1618 assert (np->left == NULL);
1619
1620 assert (fdhead == curfdp);
1621 assert (last_node->fdp == curfdp);
1622 put_entries (np); /* write tags for file curfdp->taggedfname */
1623 free_tree (np); /* remove the written nodes */
1624 if (prev == NULL)
1625 nodehead = NULL; /* no nodes left */
1626 else
1627 prev->left = NULL; /* delete the pointer to the sublist */
1628 }
1629 }
1630 }
1631
1632 /*
1633 * This routine sets up the boolean pseudo-functions which work
1634 * by setting boolean flags dependent upon the corresponding character.
1635 * Every char which is NOT in that string is not a white char. Therefore,
1636 * all of the array "_wht" is set to false, and then the elements
1637 * subscripted by the chars in "white" are set to true. Thus "_wht"
1638 * of a char is true if it is the string "white", else false.
1639 */
1640 static void
1641 init (void)
1642 {
1643 const char *sp;
1644 int i;
1645
1646 for (i = 0; i < CHARS; i++)
1647 iswhite (i) = notinname (i) = begtoken (i) = intoken (i) = endtoken (i)
1648 = false;
1649 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = true;
1650 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = true;
1651 notinname ('\0') = notinname ('\n');
1652 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = true;
1653 begtoken ('\0') = begtoken ('\n');
1654 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = true;
1655 intoken ('\0') = intoken ('\n');
1656 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = true;
1657 endtoken ('\0') = endtoken ('\n');
1658 }
1659
1660 /*
1661 * This routine opens the specified file and calls the function
1662 * which finds the function and type definitions.
1663 */
1664 static void
1665 find_entries (FILE *inf)
1666 {
1667 char *cp;
1668 language *lang = curfdp->lang;
1669 Lang_function *parser = NULL;
1670
1671 /* If user specified a language, use it. */
1672 if (lang != NULL && lang->function != NULL)
1673 {
1674 parser = lang->function;
1675 }
1676
1677 /* Else try to guess the language given the file name. */
1678 if (parser == NULL)
1679 {
1680 lang = get_language_from_filename (curfdp->infname, true);
1681 if (lang != NULL && lang->function != NULL)
1682 {
1683 curfdp->lang = lang;
1684 parser = lang->function;
1685 }
1686 }
1687
1688 /* Else look for sharp-bang as the first two characters. */
1689 if (parser == NULL
1690 && readline_internal (&lb, inf) > 0
1691 && lb.len >= 2
1692 && lb.buffer[0] == '#'
1693 && lb.buffer[1] == '!')
1694 {
1695 char *lp;
1696
1697 /* Set lp to point at the first char after the last slash in the
1698 line or, if no slashes, at the first nonblank. Then set cp to
1699 the first successive blank and terminate the string. */
1700 lp = strrchr (lb.buffer+2, '/');
1701 if (lp != NULL)
1702 lp += 1;
1703 else
1704 lp = skip_spaces (lb.buffer + 2);
1705 cp = skip_non_spaces (lp);
1706 *cp = '\0';
1707
1708 if (strlen (lp) > 0)
1709 {
1710 lang = get_language_from_interpreter (lp);
1711 if (lang != NULL && lang->function != NULL)
1712 {
1713 curfdp->lang = lang;
1714 parser = lang->function;
1715 }
1716 }
1717 }
1718
1719 /* We rewind here, even if inf may be a pipe. We fail if the
1720 length of the first line is longer than the pipe block size,
1721 which is unlikely. */
1722 rewind (inf);
1723
1724 /* Else try to guess the language given the case insensitive file name. */
1725 if (parser == NULL)
1726 {
1727 lang = get_language_from_filename (curfdp->infname, false);
1728 if (lang != NULL && lang->function != NULL)
1729 {
1730 curfdp->lang = lang;
1731 parser = lang->function;
1732 }
1733 }
1734
1735 /* Else try Fortran or C. */
1736 if (parser == NULL)
1737 {
1738 node *old_last_node = last_node;
1739
1740 curfdp->lang = get_language_from_langname ("fortran");
1741 find_entries (inf);
1742
1743 if (old_last_node == last_node)
1744 /* No Fortran entries found. Try C. */
1745 {
1746 /* We do not tag if rewind fails.
1747 Only the file name will be recorded in the tags file. */
1748 rewind (inf);
1749 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1750 find_entries (inf);
1751 }
1752 return;
1753 }
1754
1755 if (!no_line_directive
1756 && curfdp->lang != NULL && curfdp->lang->metasource)
1757 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1758 file, or anyway we parsed a file that is automatically generated from
1759 this one. If this is the case, the bingo.c file contained #line
1760 directives that generated tags pointing to this file. Let's delete
1761 them all before parsing this file, which is the real source. */
1762 {
1763 fdesc **fdpp = &fdhead;
1764 while (*fdpp != NULL)
1765 if (*fdpp != curfdp
1766 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1767 /* We found one of those! We must delete both the file description
1768 and all tags referring to it. */
1769 {
1770 fdesc *badfdp = *fdpp;
1771
1772 /* Delete the tags referring to badfdp->taggedfname
1773 that were obtained from badfdp->infname. */
1774 invalidate_nodes (badfdp, &nodehead);
1775
1776 *fdpp = badfdp->next; /* remove the bad description from the list */
1777 free_fdesc (badfdp);
1778 }
1779 else
1780 fdpp = &(*fdpp)->next; /* advance the list pointer */
1781 }
1782
1783 assert (parser != NULL);
1784
1785 /* Generic initializations before reading from file. */
1786 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1787
1788 /* Generic initializations before parsing file with readline. */
1789 lineno = 0; /* reset global line number */
1790 charno = 0; /* reset global char number */
1791 linecharno = 0; /* reset global char number of line start */
1792
1793 parser (inf);
1794
1795 regex_tag_multiline ();
1796 }
1797
1798 \f
1799 /*
1800 * Check whether an implicitly named tag should be created,
1801 * then call `pfnote'.
1802 * NAME is a string that is internally copied by this function.
1803 *
1804 * TAGS format specification
1805 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1806 * The following is explained in some more detail in etc/ETAGS.EBNF.
1807 *
1808 * make_tag creates tags with "implicit tag names" (unnamed tags)
1809 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1810 * 1. NAME does not contain any of the characters in NONAM;
1811 * 2. LINESTART contains name as either a rightmost, or rightmost but
1812 * one character, substring;
1813 * 3. the character, if any, immediately before NAME in LINESTART must
1814 * be a character in NONAM;
1815 * 4. the character, if any, immediately after NAME in LINESTART must
1816 * also be a character in NONAM.
1817 *
1818 * The implementation uses the notinname() macro, which recognizes the
1819 * characters stored in the string `nonam'.
1820 * etags.el needs to use the same characters that are in NONAM.
1821 */
1822 static void
1823 make_tag (const char *name, /* tag name, or NULL if unnamed */
1824 int namelen, /* tag length */
1825 bool is_func, /* tag is a function */
1826 char *linestart, /* start of the line where tag is */
1827 int linelen, /* length of the line where tag is */
1828 int lno, /* line number */
1829 long int cno) /* character number */
1830 {
1831 bool named = (name != NULL && namelen > 0);
1832 char *nname = NULL;
1833
1834 if (!CTAGS && named) /* maybe set named to false */
1835 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1836 such that etags.el can guess a name from it. */
1837 {
1838 int i;
1839 register const char *cp = name;
1840
1841 for (i = 0; i < namelen; i++)
1842 if (notinname (*cp++))
1843 break;
1844 if (i == namelen) /* rule #1 */
1845 {
1846 cp = linestart + linelen - namelen;
1847 if (notinname (linestart[linelen-1]))
1848 cp -= 1; /* rule #4 */
1849 if (cp >= linestart /* rule #2 */
1850 && (cp == linestart
1851 || notinname (cp[-1])) /* rule #3 */
1852 && strneq (name, cp, namelen)) /* rule #2 */
1853 named = false; /* use implicit tag name */
1854 }
1855 }
1856
1857 if (named)
1858 nname = savenstr (name, namelen);
1859
1860 pfnote (nname, is_func, linestart, linelen, lno, cno);
1861 }
1862
1863 /* Record a tag. */
1864 static void
1865 pfnote (char *name, bool is_func, char *linestart, int linelen, int lno,
1866 long int cno)
1867 /* tag name, or NULL if unnamed */
1868 /* tag is a function */
1869 /* start of the line where tag is */
1870 /* length of the line where tag is */
1871 /* line number */
1872 /* character number */
1873 {
1874 register node *np;
1875
1876 assert (name == NULL || name[0] != '\0');
1877 if (CTAGS && name == NULL)
1878 return;
1879
1880 np = xnew (1, node);
1881
1882 /* If ctags mode, change name "main" to M<thisfilename>. */
1883 if (CTAGS && !cxref_style && streq (name, "main"))
1884 {
1885 char *fp = strrchr (curfdp->taggedfname, '/');
1886 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1887 fp = strrchr (np->name, '.');
1888 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1889 fp[0] = '\0';
1890 }
1891 else
1892 np->name = name;
1893 np->valid = true;
1894 np->been_warned = false;
1895 np->fdp = curfdp;
1896 np->is_func = is_func;
1897 np->lno = lno;
1898 if (np->fdp->usecharno)
1899 /* Our char numbers are 0-base, because of C language tradition?
1900 ctags compatibility? old versions compatibility? I don't know.
1901 Anyway, since emacs's are 1-base we expect etags.el to take care
1902 of the difference. If we wanted to have 1-based numbers, we would
1903 uncomment the +1 below. */
1904 np->cno = cno /* + 1 */ ;
1905 else
1906 np->cno = invalidcharno;
1907 np->left = np->right = NULL;
1908 if (CTAGS && !cxref_style)
1909 {
1910 if (strlen (linestart) < 50)
1911 np->regex = concat (linestart, "$", "");
1912 else
1913 np->regex = savenstr (linestart, 50);
1914 }
1915 else
1916 np->regex = savenstr (linestart, linelen);
1917
1918 add_node (np, &nodehead);
1919 }
1920
1921 /*
1922 * free_tree ()
1923 * recurse on left children, iterate on right children.
1924 */
1925 static void
1926 free_tree (register node *np)
1927 {
1928 while (np)
1929 {
1930 register node *node_right = np->right;
1931 free_tree (np->left);
1932 free (np->name);
1933 free (np->regex);
1934 free (np);
1935 np = node_right;
1936 }
1937 }
1938
1939 /*
1940 * free_fdesc ()
1941 * delete a file description
1942 */
1943 static void
1944 free_fdesc (register fdesc *fdp)
1945 {
1946 free (fdp->infname);
1947 free (fdp->infabsname);
1948 free (fdp->infabsdir);
1949 free (fdp->taggedfname);
1950 free (fdp->prop);
1951 free (fdp);
1952 }
1953
1954 /*
1955 * add_node ()
1956 * Adds a node to the tree of nodes. In etags mode, sort by file
1957 * name. In ctags mode, sort by tag name. Make no attempt at
1958 * balancing.
1959 *
1960 * add_node is the only function allowed to add nodes, so it can
1961 * maintain state.
1962 */
1963 static void
1964 add_node (node *np, node **cur_node_p)
1965 {
1966 register int dif;
1967 register node *cur_node = *cur_node_p;
1968
1969 if (cur_node == NULL)
1970 {
1971 *cur_node_p = np;
1972 last_node = np;
1973 return;
1974 }
1975
1976 if (!CTAGS)
1977 /* Etags Mode */
1978 {
1979 /* For each file name, tags are in a linked sublist on the right
1980 pointer. The first tags of different files are a linked list
1981 on the left pointer. last_node points to the end of the last
1982 used sublist. */
1983 if (last_node != NULL && last_node->fdp == np->fdp)
1984 {
1985 /* Let's use the same sublist as the last added node. */
1986 assert (last_node->right == NULL);
1987 last_node->right = np;
1988 last_node = np;
1989 }
1990 else if (cur_node->fdp == np->fdp)
1991 {
1992 /* Scanning the list we found the head of a sublist which is
1993 good for us. Let's scan this sublist. */
1994 add_node (np, &cur_node->right);
1995 }
1996 else
1997 /* The head of this sublist is not good for us. Let's try the
1998 next one. */
1999 add_node (np, &cur_node->left);
2000 } /* if ETAGS mode */
2001
2002 else
2003 {
2004 /* Ctags Mode */
2005 dif = strcmp (np->name, cur_node->name);
2006
2007 /*
2008 * If this tag name matches an existing one, then
2009 * do not add the node, but maybe print a warning.
2010 */
2011 if (no_duplicates && !dif)
2012 {
2013 if (np->fdp == cur_node->fdp)
2014 {
2015 if (!no_warnings)
2016 {
2017 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2018 np->fdp->infname, lineno, np->name);
2019 fprintf (stderr, "Second entry ignored\n");
2020 }
2021 }
2022 else if (!cur_node->been_warned && !no_warnings)
2023 {
2024 fprintf
2025 (stderr,
2026 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2027 np->fdp->infname, cur_node->fdp->infname, np->name);
2028 cur_node->been_warned = true;
2029 }
2030 return;
2031 }
2032
2033 /* Actually add the node */
2034 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2035 } /* if CTAGS mode */
2036 }
2037
2038 /*
2039 * invalidate_nodes ()
2040 * Scan the node tree and invalidate all nodes pointing to the
2041 * given file description (CTAGS case) or free them (ETAGS case).
2042 */
2043 static void
2044 invalidate_nodes (fdesc *badfdp, node **npp)
2045 {
2046 node *np = *npp;
2047
2048 if (np == NULL)
2049 return;
2050
2051 if (CTAGS)
2052 {
2053 if (np->left != NULL)
2054 invalidate_nodes (badfdp, &np->left);
2055 if (np->fdp == badfdp)
2056 np->valid = false;
2057 if (np->right != NULL)
2058 invalidate_nodes (badfdp, &np->right);
2059 }
2060 else
2061 {
2062 assert (np->fdp != NULL);
2063 if (np->fdp == badfdp)
2064 {
2065 *npp = np->left; /* detach the sublist from the list */
2066 np->left = NULL; /* isolate it */
2067 free_tree (np); /* free it */
2068 invalidate_nodes (badfdp, npp);
2069 }
2070 else
2071 invalidate_nodes (badfdp, &np->left);
2072 }
2073 }
2074
2075 \f
2076 static int total_size_of_entries (node *);
2077 static int number_len (long) ATTRIBUTE_CONST;
2078
2079 /* Length of a non-negative number's decimal representation. */
2080 static int
2081 number_len (long int num)
2082 {
2083 int len = 1;
2084 while ((num /= 10) > 0)
2085 len += 1;
2086 return len;
2087 }
2088
2089 /*
2090 * Return total number of characters that put_entries will output for
2091 * the nodes in the linked list at the right of the specified node.
2092 * This count is irrelevant with etags.el since emacs 19.34 at least,
2093 * but is still supplied for backward compatibility.
2094 */
2095 static int
2096 total_size_of_entries (register node *np)
2097 {
2098 register int total = 0;
2099
2100 for (; np != NULL; np = np->right)
2101 if (np->valid)
2102 {
2103 total += strlen (np->regex) + 1; /* pat\177 */
2104 if (np->name != NULL)
2105 total += strlen (np->name) + 1; /* name\001 */
2106 total += number_len ((long) np->lno) + 1; /* lno, */
2107 if (np->cno != invalidcharno) /* cno */
2108 total += number_len (np->cno);
2109 total += 1; /* newline */
2110 }
2111
2112 return total;
2113 }
2114
2115 static void
2116 put_entries (register node *np)
2117 {
2118 register char *sp;
2119 static fdesc *fdp = NULL;
2120
2121 if (np == NULL)
2122 return;
2123
2124 /* Output subentries that precede this one */
2125 if (CTAGS)
2126 put_entries (np->left);
2127
2128 /* Output this entry */
2129 if (np->valid)
2130 {
2131 if (!CTAGS)
2132 {
2133 /* Etags mode */
2134 if (fdp != np->fdp)
2135 {
2136 fdp = np->fdp;
2137 fprintf (tagf, "\f\n%s,%d\n",
2138 fdp->taggedfname, total_size_of_entries (np));
2139 fdp->written = true;
2140 }
2141 fputs (np->regex, tagf);
2142 fputc ('\177', tagf);
2143 if (np->name != NULL)
2144 {
2145 fputs (np->name, tagf);
2146 fputc ('\001', tagf);
2147 }
2148 fprintf (tagf, "%d,", np->lno);
2149 if (np->cno != invalidcharno)
2150 fprintf (tagf, "%ld", np->cno);
2151 fputs ("\n", tagf);
2152 }
2153 else
2154 {
2155 /* Ctags mode */
2156 if (np->name == NULL)
2157 error ("internal error: NULL name in ctags mode.");
2158
2159 if (cxref_style)
2160 {
2161 if (vgrind_style)
2162 fprintf (stdout, "%s %s %d\n",
2163 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2164 else
2165 fprintf (stdout, "%-16s %3d %-16s %s\n",
2166 np->name, np->lno, np->fdp->taggedfname, np->regex);
2167 }
2168 else
2169 {
2170 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2171
2172 if (np->is_func)
2173 { /* function or #define macro with args */
2174 putc (searchar, tagf);
2175 putc ('^', tagf);
2176
2177 for (sp = np->regex; *sp; sp++)
2178 {
2179 if (*sp == '\\' || *sp == searchar)
2180 putc ('\\', tagf);
2181 putc (*sp, tagf);
2182 }
2183 putc (searchar, tagf);
2184 }
2185 else
2186 { /* anything else; text pattern inadequate */
2187 fprintf (tagf, "%d", np->lno);
2188 }
2189 putc ('\n', tagf);
2190 }
2191 }
2192 } /* if this node contains a valid tag */
2193
2194 /* Output subentries that follow this one */
2195 put_entries (np->right);
2196 if (!CTAGS)
2197 put_entries (np->left);
2198 }
2199
2200 \f
2201 /* C extensions. */
2202 #define C_EXT 0x00fff /* C extensions */
2203 #define C_PLAIN 0x00000 /* C */
2204 #define C_PLPL 0x00001 /* C++ */
2205 #define C_STAR 0x00003 /* C* */
2206 #define C_JAVA 0x00005 /* JAVA */
2207 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2208 #define YACC 0x10000 /* yacc file */
2209
2210 /*
2211 * The C symbol tables.
2212 */
2213 enum sym_type
2214 {
2215 st_none,
2216 st_C_objprot, st_C_objimpl, st_C_objend,
2217 st_C_gnumacro,
2218 st_C_ignore, st_C_attribute,
2219 st_C_javastruct,
2220 st_C_operator,
2221 st_C_class, st_C_template,
2222 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2223 };
2224
2225 /* Feed stuff between (but not including) %[ and %] lines to:
2226 gperf -m 5
2227 %[
2228 %compare-strncmp
2229 %enum
2230 %struct-type
2231 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2232 %%
2233 if, 0, st_C_ignore
2234 for, 0, st_C_ignore
2235 while, 0, st_C_ignore
2236 switch, 0, st_C_ignore
2237 return, 0, st_C_ignore
2238 __attribute__, 0, st_C_attribute
2239 GTY, 0, st_C_attribute
2240 @interface, 0, st_C_objprot
2241 @protocol, 0, st_C_objprot
2242 @implementation,0, st_C_objimpl
2243 @end, 0, st_C_objend
2244 import, (C_JAVA & ~C_PLPL), st_C_ignore
2245 package, (C_JAVA & ~C_PLPL), st_C_ignore
2246 friend, C_PLPL, st_C_ignore
2247 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2248 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2249 interface, (C_JAVA & ~C_PLPL), st_C_struct
2250 class, 0, st_C_class
2251 namespace, C_PLPL, st_C_struct
2252 domain, C_STAR, st_C_struct
2253 union, 0, st_C_struct
2254 struct, 0, st_C_struct
2255 extern, 0, st_C_extern
2256 enum, 0, st_C_enum
2257 typedef, 0, st_C_typedef
2258 define, 0, st_C_define
2259 undef, 0, st_C_define
2260 operator, C_PLPL, st_C_operator
2261 template, 0, st_C_template
2262 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2263 DEFUN, 0, st_C_gnumacro
2264 SYSCALL, 0, st_C_gnumacro
2265 ENTRY, 0, st_C_gnumacro
2266 PSEUDO, 0, st_C_gnumacro
2267 # These are defined inside C functions, so currently they are not met.
2268 # EXFUN used in glibc, DEFVAR_* in emacs.
2269 #EXFUN, 0, st_C_gnumacro
2270 #DEFVAR_, 0, st_C_gnumacro
2271 %]
2272 and replace lines between %< and %> with its output, then:
2273 - remove the #if characterset check
2274 - make in_word_set static and not inline. */
2275 /*%<*/
2276 /* C code produced by gperf version 3.0.1 */
2277 /* Command-line: gperf -m 5 */
2278 /* Computed positions: -k'2-3' */
2279
2280 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2281 /* maximum key range = 33, duplicates = 0 */
2282
2283 static int
2284 hash (const char *str, int len)
2285 {
2286 static char const asso_values[] =
2287 {
2288 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2289 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2290 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2291 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2292 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2293 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2294 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2295 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2296 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2297 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2298 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2299 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2300 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2301 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2302 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2303 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2304 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2305 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2306 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2307 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2308 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2309 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2310 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2311 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2312 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2313 35, 35, 35, 35, 35, 35
2314 };
2315 int hval = len;
2316
2317 switch (hval)
2318 {
2319 default:
2320 hval += asso_values[(unsigned char) str[2]];
2321 /*FALLTHROUGH*/
2322 case 2:
2323 hval += asso_values[(unsigned char) str[1]];
2324 break;
2325 }
2326 return hval;
2327 }
2328
2329 static struct C_stab_entry *
2330 in_word_set (register const char *str, register unsigned int len)
2331 {
2332 enum
2333 {
2334 TOTAL_KEYWORDS = 33,
2335 MIN_WORD_LENGTH = 2,
2336 MAX_WORD_LENGTH = 15,
2337 MIN_HASH_VALUE = 2,
2338 MAX_HASH_VALUE = 34
2339 };
2340
2341 static struct C_stab_entry wordlist[] =
2342 {
2343 {""}, {""},
2344 {"if", 0, st_C_ignore},
2345 {"GTY", 0, st_C_attribute},
2346 {"@end", 0, st_C_objend},
2347 {"union", 0, st_C_struct},
2348 {"define", 0, st_C_define},
2349 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2350 {"template", 0, st_C_template},
2351 {"operator", C_PLPL, st_C_operator},
2352 {"@interface", 0, st_C_objprot},
2353 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2354 {"friend", C_PLPL, st_C_ignore},
2355 {"typedef", 0, st_C_typedef},
2356 {"return", 0, st_C_ignore},
2357 {"@implementation",0, st_C_objimpl},
2358 {"@protocol", 0, st_C_objprot},
2359 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2360 {"extern", 0, st_C_extern},
2361 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2362 {"struct", 0, st_C_struct},
2363 {"domain", C_STAR, st_C_struct},
2364 {"switch", 0, st_C_ignore},
2365 {"enum", 0, st_C_enum},
2366 {"for", 0, st_C_ignore},
2367 {"namespace", C_PLPL, st_C_struct},
2368 {"class", 0, st_C_class},
2369 {"while", 0, st_C_ignore},
2370 {"undef", 0, st_C_define},
2371 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2372 {"__attribute__", 0, st_C_attribute},
2373 {"SYSCALL", 0, st_C_gnumacro},
2374 {"ENTRY", 0, st_C_gnumacro},
2375 {"PSEUDO", 0, st_C_gnumacro},
2376 {"DEFUN", 0, st_C_gnumacro}
2377 };
2378
2379 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2380 {
2381 int key = hash (str, len);
2382
2383 if (key <= MAX_HASH_VALUE && key >= 0)
2384 {
2385 const char *s = wordlist[key].name;
2386
2387 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2388 return &wordlist[key];
2389 }
2390 }
2391 return 0;
2392 }
2393 /*%>*/
2394
2395 static enum sym_type
2396 C_symtype (char *str, int len, int c_ext)
2397 {
2398 register struct C_stab_entry *se = in_word_set (str, len);
2399
2400 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2401 return st_none;
2402 return se->type;
2403 }
2404
2405 \f
2406 /*
2407 * Ignoring __attribute__ ((list))
2408 */
2409 static bool inattribute; /* looking at an __attribute__ construct */
2410
2411 /*
2412 * C functions and variables are recognized using a simple
2413 * finite automaton. fvdef is its state variable.
2414 */
2415 static enum
2416 {
2417 fvnone, /* nothing seen */
2418 fdefunkey, /* Emacs DEFUN keyword seen */
2419 fdefunname, /* Emacs DEFUN name seen */
2420 foperator, /* func: operator keyword seen (cplpl) */
2421 fvnameseen, /* function or variable name seen */
2422 fstartlist, /* func: just after open parenthesis */
2423 finlist, /* func: in parameter list */
2424 flistseen, /* func: after parameter list */
2425 fignore, /* func: before open brace */
2426 vignore /* var-like: ignore until ';' */
2427 } fvdef;
2428
2429 static bool fvextern; /* func or var: extern keyword seen; */
2430
2431 /*
2432 * typedefs are recognized using a simple finite automaton.
2433 * typdef is its state variable.
2434 */
2435 static enum
2436 {
2437 tnone, /* nothing seen */
2438 tkeyseen, /* typedef keyword seen */
2439 ttypeseen, /* defined type seen */
2440 tinbody, /* inside typedef body */
2441 tend, /* just before typedef tag */
2442 tignore /* junk after typedef tag */
2443 } typdef;
2444
2445 /*
2446 * struct-like structures (enum, struct and union) are recognized
2447 * using another simple finite automaton. `structdef' is its state
2448 * variable.
2449 */
2450 static enum
2451 {
2452 snone, /* nothing seen yet,
2453 or in struct body if bracelev > 0 */
2454 skeyseen, /* struct-like keyword seen */
2455 stagseen, /* struct-like tag seen */
2456 scolonseen /* colon seen after struct-like tag */
2457 } structdef;
2458
2459 /*
2460 * When objdef is different from onone, objtag is the name of the class.
2461 */
2462 static const char *objtag = "<uninited>";
2463
2464 /*
2465 * Yet another little state machine to deal with preprocessor lines.
2466 */
2467 static enum
2468 {
2469 dnone, /* nothing seen */
2470 dsharpseen, /* '#' seen as first char on line */
2471 ddefineseen, /* '#' and 'define' seen */
2472 dignorerest /* ignore rest of line */
2473 } definedef;
2474
2475 /*
2476 * State machine for Objective C protocols and implementations.
2477 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2478 */
2479 static enum
2480 {
2481 onone, /* nothing seen */
2482 oprotocol, /* @interface or @protocol seen */
2483 oimplementation, /* @implementations seen */
2484 otagseen, /* class name seen */
2485 oparenseen, /* parenthesis before category seen */
2486 ocatseen, /* category name seen */
2487 oinbody, /* in @implementation body */
2488 omethodsign, /* in @implementation body, after +/- */
2489 omethodtag, /* after method name */
2490 omethodcolon, /* after method colon */
2491 omethodparm, /* after method parameter */
2492 oignore /* wait for @end */
2493 } objdef;
2494
2495
2496 /*
2497 * Use this structure to keep info about the token read, and how it
2498 * should be tagged. Used by the make_C_tag function to build a tag.
2499 */
2500 static struct tok
2501 {
2502 char *line; /* string containing the token */
2503 int offset; /* where the token starts in LINE */
2504 int length; /* token length */
2505 /*
2506 The previous members can be used to pass strings around for generic
2507 purposes. The following ones specifically refer to creating tags. In this
2508 case the token contained here is the pattern that will be used to create a
2509 tag.
2510 */
2511 bool valid; /* do not create a tag; the token should be
2512 invalidated whenever a state machine is
2513 reset prematurely */
2514 bool named; /* create a named tag */
2515 int lineno; /* source line number of tag */
2516 long linepos; /* source char number of tag */
2517 } token; /* latest token read */
2518
2519 /*
2520 * Variables and functions for dealing with nested structures.
2521 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2522 */
2523 static void pushclass_above (int, char *, int);
2524 static void popclass_above (int);
2525 static void write_classname (linebuffer *, const char *qualifier);
2526
2527 static struct {
2528 char **cname; /* nested class names */
2529 int *bracelev; /* nested class brace level */
2530 int nl; /* class nesting level (elements used) */
2531 int size; /* length of the array */
2532 } cstack; /* stack for nested declaration tags */
2533 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2534 #define nestlev (cstack.nl)
2535 /* After struct keyword or in struct body, not inside a nested function. */
2536 #define instruct (structdef == snone && nestlev > 0 \
2537 && bracelev == cstack.bracelev[nestlev-1] + 1)
2538
2539 static void
2540 pushclass_above (int bracelev, char *str, int len)
2541 {
2542 int nl;
2543
2544 popclass_above (bracelev);
2545 nl = cstack.nl;
2546 if (nl >= cstack.size)
2547 {
2548 int size = cstack.size *= 2;
2549 xrnew (cstack.cname, size, char *);
2550 xrnew (cstack.bracelev, size, int);
2551 }
2552 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2553 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2554 cstack.bracelev[nl] = bracelev;
2555 cstack.nl = nl + 1;
2556 }
2557
2558 static void
2559 popclass_above (int bracelev)
2560 {
2561 int nl;
2562
2563 for (nl = cstack.nl - 1;
2564 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2565 nl--)
2566 {
2567 free (cstack.cname[nl]);
2568 cstack.nl = nl;
2569 }
2570 }
2571
2572 static void
2573 write_classname (linebuffer *cn, const char *qualifier)
2574 {
2575 int i, len;
2576 int qlen = strlen (qualifier);
2577
2578 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2579 {
2580 len = 0;
2581 cn->len = 0;
2582 cn->buffer[0] = '\0';
2583 }
2584 else
2585 {
2586 len = strlen (cstack.cname[0]);
2587 linebuffer_setlen (cn, len);
2588 strcpy (cn->buffer, cstack.cname[0]);
2589 }
2590 for (i = 1; i < cstack.nl; i++)
2591 {
2592 char *s = cstack.cname[i];
2593 if (s == NULL)
2594 continue;
2595 linebuffer_setlen (cn, len + qlen + strlen (s));
2596 len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2597 }
2598 }
2599
2600 \f
2601 static bool consider_token (char *, int, int, int *, int, int, bool *);
2602 static void make_C_tag (bool);
2603
2604 /*
2605 * consider_token ()
2606 * checks to see if the current token is at the start of a
2607 * function or variable, or corresponds to a typedef, or
2608 * is a struct/union/enum tag, or #define, or an enum constant.
2609 *
2610 * *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2611 * with args. C_EXTP points to which language we are looking at.
2612 *
2613 * Globals
2614 * fvdef IN OUT
2615 * structdef IN OUT
2616 * definedef IN OUT
2617 * typdef IN OUT
2618 * objdef IN OUT
2619 */
2620
2621 static bool
2622 consider_token (char *str, int len, int c, int *c_extp,
2623 int bracelev, int parlev, bool *is_func_or_var)
2624 /* IN: token pointer */
2625 /* IN: token length */
2626 /* IN: first char after the token */
2627 /* IN, OUT: C extensions mask */
2628 /* IN: brace level */
2629 /* IN: parenthesis level */
2630 /* OUT: function or variable found */
2631 {
2632 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2633 structtype is the type of the preceding struct-like keyword, and
2634 structbracelev is the brace level where it has been seen. */
2635 static enum sym_type structtype;
2636 static int structbracelev;
2637 static enum sym_type toktype;
2638
2639
2640 toktype = C_symtype (str, len, *c_extp);
2641
2642 /*
2643 * Skip __attribute__
2644 */
2645 if (toktype == st_C_attribute)
2646 {
2647 inattribute = true;
2648 return false;
2649 }
2650
2651 /*
2652 * Advance the definedef state machine.
2653 */
2654 switch (definedef)
2655 {
2656 case dnone:
2657 /* We're not on a preprocessor line. */
2658 if (toktype == st_C_gnumacro)
2659 {
2660 fvdef = fdefunkey;
2661 return false;
2662 }
2663 break;
2664 case dsharpseen:
2665 if (toktype == st_C_define)
2666 {
2667 definedef = ddefineseen;
2668 }
2669 else
2670 {
2671 definedef = dignorerest;
2672 }
2673 return false;
2674 case ddefineseen:
2675 /*
2676 * Make a tag for any macro, unless it is a constant
2677 * and constantypedefs is false.
2678 */
2679 definedef = dignorerest;
2680 *is_func_or_var = (c == '(');
2681 if (!*is_func_or_var && !constantypedefs)
2682 return false;
2683 else
2684 return true;
2685 case dignorerest:
2686 return false;
2687 default:
2688 error ("internal error: definedef value.");
2689 }
2690
2691 /*
2692 * Now typedefs
2693 */
2694 switch (typdef)
2695 {
2696 case tnone:
2697 if (toktype == st_C_typedef)
2698 {
2699 if (typedefs)
2700 typdef = tkeyseen;
2701 fvextern = false;
2702 fvdef = fvnone;
2703 return false;
2704 }
2705 break;
2706 case tkeyseen:
2707 switch (toktype)
2708 {
2709 case st_none:
2710 case st_C_class:
2711 case st_C_struct:
2712 case st_C_enum:
2713 typdef = ttypeseen;
2714 }
2715 break;
2716 case ttypeseen:
2717 if (structdef == snone && fvdef == fvnone)
2718 {
2719 fvdef = fvnameseen;
2720 return true;
2721 }
2722 break;
2723 case tend:
2724 switch (toktype)
2725 {
2726 case st_C_class:
2727 case st_C_struct:
2728 case st_C_enum:
2729 return false;
2730 }
2731 return true;
2732 }
2733
2734 switch (toktype)
2735 {
2736 case st_C_javastruct:
2737 if (structdef == stagseen)
2738 structdef = scolonseen;
2739 return false;
2740 case st_C_template:
2741 case st_C_class:
2742 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2743 && bracelev == 0
2744 && definedef == dnone && structdef == snone
2745 && typdef == tnone && fvdef == fvnone)
2746 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2747 if (toktype == st_C_template)
2748 break;
2749 /* FALLTHRU */
2750 case st_C_struct:
2751 case st_C_enum:
2752 if (parlev == 0
2753 && fvdef != vignore
2754 && (typdef == tkeyseen
2755 || (typedefs_or_cplusplus && structdef == snone)))
2756 {
2757 structdef = skeyseen;
2758 structtype = toktype;
2759 structbracelev = bracelev;
2760 if (fvdef == fvnameseen)
2761 fvdef = fvnone;
2762 }
2763 return false;
2764 }
2765
2766 if (structdef == skeyseen)
2767 {
2768 structdef = stagseen;
2769 return true;
2770 }
2771
2772 if (typdef != tnone)
2773 definedef = dnone;
2774
2775 /* Detect Objective C constructs. */
2776 switch (objdef)
2777 {
2778 case onone:
2779 switch (toktype)
2780 {
2781 case st_C_objprot:
2782 objdef = oprotocol;
2783 return false;
2784 case st_C_objimpl:
2785 objdef = oimplementation;
2786 return false;
2787 }
2788 break;
2789 case oimplementation:
2790 /* Save the class tag for functions or variables defined inside. */
2791 objtag = savenstr (str, len);
2792 objdef = oinbody;
2793 return false;
2794 case oprotocol:
2795 /* Save the class tag for categories. */
2796 objtag = savenstr (str, len);
2797 objdef = otagseen;
2798 *is_func_or_var = true;
2799 return true;
2800 case oparenseen:
2801 objdef = ocatseen;
2802 *is_func_or_var = true;
2803 return true;
2804 case oinbody:
2805 break;
2806 case omethodsign:
2807 if (parlev == 0)
2808 {
2809 fvdef = fvnone;
2810 objdef = omethodtag;
2811 linebuffer_setlen (&token_name, len);
2812 memcpy (token_name.buffer, str, len);
2813 token_name.buffer[len] = '\0';
2814 return true;
2815 }
2816 return false;
2817 case omethodcolon:
2818 if (parlev == 0)
2819 objdef = omethodparm;
2820 return false;
2821 case omethodparm:
2822 if (parlev == 0)
2823 {
2824 int oldlen = token_name.len;
2825 fvdef = fvnone;
2826 objdef = omethodtag;
2827 linebuffer_setlen (&token_name, oldlen + len);
2828 memcpy (token_name.buffer + oldlen, str, len);
2829 token_name.buffer[oldlen + len] = '\0';
2830 return true;
2831 }
2832 return false;
2833 case oignore:
2834 if (toktype == st_C_objend)
2835 {
2836 /* Memory leakage here: the string pointed by objtag is
2837 never released, because many tests would be needed to
2838 avoid breaking on incorrect input code. The amount of
2839 memory leaked here is the sum of the lengths of the
2840 class tags.
2841 free (objtag); */
2842 objdef = onone;
2843 }
2844 return false;
2845 }
2846
2847 /* A function, variable or enum constant? */
2848 switch (toktype)
2849 {
2850 case st_C_extern:
2851 fvextern = true;
2852 switch (fvdef)
2853 {
2854 case finlist:
2855 case flistseen:
2856 case fignore:
2857 case vignore:
2858 break;
2859 default:
2860 fvdef = fvnone;
2861 }
2862 return false;
2863 case st_C_ignore:
2864 fvextern = false;
2865 fvdef = vignore;
2866 return false;
2867 case st_C_operator:
2868 fvdef = foperator;
2869 *is_func_or_var = true;
2870 return true;
2871 case st_none:
2872 if (constantypedefs
2873 && structdef == snone
2874 && structtype == st_C_enum && bracelev > structbracelev)
2875 return true; /* enum constant */
2876 switch (fvdef)
2877 {
2878 case fdefunkey:
2879 if (bracelev > 0)
2880 break;
2881 fvdef = fdefunname; /* GNU macro */
2882 *is_func_or_var = true;
2883 return true;
2884 case fvnone:
2885 switch (typdef)
2886 {
2887 case ttypeseen:
2888 return false;
2889 case tnone:
2890 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2891 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2892 {
2893 fvdef = vignore;
2894 return false;
2895 }
2896 break;
2897 }
2898 /* FALLTHRU */
2899 case fvnameseen:
2900 if (len >= 10 && strneq (str+len-10, "::operator", 10))
2901 {
2902 if (*c_extp & C_AUTO) /* automatic detection of C++ */
2903 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2904 fvdef = foperator;
2905 *is_func_or_var = true;
2906 return true;
2907 }
2908 if (bracelev > 0 && !instruct)
2909 break;
2910 fvdef = fvnameseen; /* function or variable */
2911 *is_func_or_var = true;
2912 return true;
2913 }
2914 break;
2915 }
2916
2917 return false;
2918 }
2919
2920 \f
2921 /*
2922 * C_entries often keeps pointers to tokens or lines which are older than
2923 * the line currently read. By keeping two line buffers, and switching
2924 * them at end of line, it is possible to use those pointers.
2925 */
2926 static struct
2927 {
2928 long linepos;
2929 linebuffer lb;
2930 } lbs[2];
2931
2932 #define current_lb_is_new (newndx == curndx)
2933 #define switch_line_buffers() (curndx = 1 - curndx)
2934
2935 #define curlb (lbs[curndx].lb)
2936 #define newlb (lbs[newndx].lb)
2937 #define curlinepos (lbs[curndx].linepos)
2938 #define newlinepos (lbs[newndx].linepos)
2939
2940 #define plainc ((c_ext & C_EXT) == C_PLAIN)
2941 #define cplpl (c_ext & C_PLPL)
2942 #define cjava ((c_ext & C_JAVA) == C_JAVA)
2943
2944 #define CNL_SAVE_DEFINEDEF() \
2945 do { \
2946 curlinepos = charno; \
2947 readline (&curlb, inf); \
2948 lp = curlb.buffer; \
2949 quotednl = false; \
2950 newndx = curndx; \
2951 } while (0)
2952
2953 #define CNL() \
2954 do { \
2955 CNL_SAVE_DEFINEDEF(); \
2956 if (savetoken.valid) \
2957 { \
2958 token = savetoken; \
2959 savetoken.valid = false; \
2960 } \
2961 definedef = dnone; \
2962 } while (0)
2963
2964
2965 static void
2966 make_C_tag (bool isfun)
2967 {
2968 /* This function is never called when token.valid is false, but
2969 we must protect against invalid input or internal errors. */
2970 if (token.valid)
2971 make_tag (token_name.buffer, token_name.len, isfun, token.line,
2972 token.offset+token.length+1, token.lineno, token.linepos);
2973 else if (DEBUG)
2974 { /* this branch is optimized away if !DEBUG */
2975 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
2976 token_name.len + 17, isfun, token.line,
2977 token.offset+token.length+1, token.lineno, token.linepos);
2978 error ("INVALID TOKEN");
2979 }
2980
2981 token.valid = false;
2982 }
2983
2984
2985 /*
2986 * C_entries ()
2987 * This routine finds functions, variables, typedefs,
2988 * #define's, enum constants and struct/union/enum definitions in
2989 * C syntax and adds them to the list.
2990 */
2991 static void
2992 C_entries (int c_ext, FILE *inf)
2993 /* extension of C */
2994 /* input file */
2995 {
2996 register char c; /* latest char read; '\0' for end of line */
2997 register char *lp; /* pointer one beyond the character `c' */
2998 int curndx, newndx; /* indices for current and new lb */
2999 register int tokoff; /* offset in line of start of current token */
3000 register int toklen; /* length of current token */
3001 const char *qualifier; /* string used to qualify names */
3002 int qlen; /* length of qualifier */
3003 int bracelev; /* current brace level */
3004 int bracketlev; /* current bracket level */
3005 int parlev; /* current parenthesis level */
3006 int attrparlev; /* __attribute__ parenthesis level */
3007 int templatelev; /* current template level */
3008 int typdefbracelev; /* bracelev where a typedef struct body begun */
3009 bool incomm, inquote, inchar, quotednl, midtoken;
3010 bool yacc_rules; /* in the rules part of a yacc file */
3011 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3012
3013
3014 linebuffer_init (&lbs[0].lb);
3015 linebuffer_init (&lbs[1].lb);
3016 if (cstack.size == 0)
3017 {
3018 cstack.size = (DEBUG) ? 1 : 4;
3019 cstack.nl = 0;
3020 cstack.cname = xnew (cstack.size, char *);
3021 cstack.bracelev = xnew (cstack.size, int);
3022 }
3023
3024 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3025 curndx = newndx = 0;
3026 lp = curlb.buffer;
3027 *lp = 0;
3028
3029 fvdef = fvnone; fvextern = false; typdef = tnone;
3030 structdef = snone; definedef = dnone; objdef = onone;
3031 yacc_rules = false;
3032 midtoken = inquote = inchar = incomm = quotednl = false;
3033 token.valid = savetoken.valid = false;
3034 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3035 if (cjava)
3036 { qualifier = "."; qlen = 1; }
3037 else
3038 { qualifier = "::"; qlen = 2; }
3039
3040
3041 while (!feof (inf))
3042 {
3043 c = *lp++;
3044 if (c == '\\')
3045 {
3046 /* If we are at the end of the line, the next character is a
3047 '\0'; do not skip it, because it is what tells us
3048 to read the next line. */
3049 if (*lp == '\0')
3050 {
3051 quotednl = true;
3052 continue;
3053 }
3054 lp++;
3055 c = ' ';
3056 }
3057 else if (incomm)
3058 {
3059 switch (c)
3060 {
3061 case '*':
3062 if (*lp == '/')
3063 {
3064 c = *lp++;
3065 incomm = false;
3066 }
3067 break;
3068 case '\0':
3069 /* Newlines inside comments do not end macro definitions in
3070 traditional cpp. */
3071 CNL_SAVE_DEFINEDEF ();
3072 break;
3073 }
3074 continue;
3075 }
3076 else if (inquote)
3077 {
3078 switch (c)
3079 {
3080 case '"':
3081 inquote = false;
3082 break;
3083 case '\0':
3084 /* Newlines inside strings do not end macro definitions
3085 in traditional cpp, even though compilers don't
3086 usually accept them. */
3087 CNL_SAVE_DEFINEDEF ();
3088 break;
3089 }
3090 continue;
3091 }
3092 else if (inchar)
3093 {
3094 switch (c)
3095 {
3096 case '\0':
3097 /* Hmmm, something went wrong. */
3098 CNL ();
3099 /* FALLTHRU */
3100 case '\'':
3101 inchar = false;
3102 break;
3103 }
3104 continue;
3105 }
3106 else switch (c)
3107 {
3108 case '"':
3109 inquote = true;
3110 if (bracketlev > 0)
3111 continue;
3112 if (inattribute)
3113 break;
3114 switch (fvdef)
3115 {
3116 case fdefunkey:
3117 case fstartlist:
3118 case finlist:
3119 case fignore:
3120 case vignore:
3121 break;
3122 default:
3123 fvextern = false;
3124 fvdef = fvnone;
3125 }
3126 continue;
3127 case '\'':
3128 inchar = true;
3129 if (bracketlev > 0)
3130 continue;
3131 if (inattribute)
3132 break;
3133 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3134 {
3135 fvextern = false;
3136 fvdef = fvnone;
3137 }
3138 continue;
3139 case '/':
3140 if (*lp == '*')
3141 {
3142 incomm = true;
3143 lp++;
3144 c = ' ';
3145 if (bracketlev > 0)
3146 continue;
3147 }
3148 else if (/* cplpl && */ *lp == '/')
3149 {
3150 c = '\0';
3151 }
3152 break;
3153 case '%':
3154 if ((c_ext & YACC) && *lp == '%')
3155 {
3156 /* Entering or exiting rules section in yacc file. */
3157 lp++;
3158 definedef = dnone; fvdef = fvnone; fvextern = false;
3159 typdef = tnone; structdef = snone;
3160 midtoken = inquote = inchar = incomm = quotednl = false;
3161 bracelev = 0;
3162 yacc_rules = !yacc_rules;
3163 continue;
3164 }
3165 else
3166 break;
3167 case '#':
3168 if (definedef == dnone)
3169 {
3170 char *cp;
3171 bool cpptoken = true;
3172
3173 /* Look back on this line. If all blanks, or nonblanks
3174 followed by an end of comment, this is a preprocessor
3175 token. */
3176 for (cp = newlb.buffer; cp < lp-1; cp++)
3177 if (!iswhite (*cp))
3178 {
3179 if (*cp == '*' && cp[1] == '/')
3180 {
3181 cp++;
3182 cpptoken = true;
3183 }
3184 else
3185 cpptoken = false;
3186 }
3187 if (cpptoken)
3188 definedef = dsharpseen;
3189 } /* if (definedef == dnone) */
3190 continue;
3191 case '[':
3192 bracketlev++;
3193 continue;
3194 default:
3195 if (bracketlev > 0)
3196 {
3197 if (c == ']')
3198 --bracketlev;
3199 else if (c == '\0')
3200 CNL_SAVE_DEFINEDEF ();
3201 continue;
3202 }
3203 break;
3204 } /* switch (c) */
3205
3206
3207 /* Consider token only if some involved conditions are satisfied. */
3208 if (typdef != tignore
3209 && definedef != dignorerest
3210 && fvdef != finlist
3211 && templatelev == 0
3212 && (definedef != dnone
3213 || structdef != scolonseen)
3214 && !inattribute)
3215 {
3216 if (midtoken)
3217 {
3218 if (endtoken (c))
3219 {
3220 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3221 /* This handles :: in the middle,
3222 but not at the beginning of an identifier.
3223 Also, space-separated :: is not recognized. */
3224 {
3225 if (c_ext & C_AUTO) /* automatic detection of C++ */
3226 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3227 lp += 2;
3228 toklen += 2;
3229 c = lp[-1];
3230 goto still_in_token;
3231 }
3232 else
3233 {
3234 bool funorvar = false;
3235
3236 if (yacc_rules
3237 || consider_token (newlb.buffer + tokoff, toklen, c,
3238 &c_ext, bracelev, parlev,
3239 &funorvar))
3240 {
3241 if (fvdef == foperator)
3242 {
3243 char *oldlp = lp;
3244 lp = skip_spaces (lp-1);
3245 if (*lp != '\0')
3246 lp += 1;
3247 while (*lp != '\0'
3248 && !iswhite (*lp) && *lp != '(')
3249 lp += 1;
3250 c = *lp++;
3251 toklen += lp - oldlp;
3252 }
3253 token.named = false;
3254 if (!plainc
3255 && nestlev > 0 && definedef == dnone)
3256 /* in struct body */
3257 {
3258 int len;
3259 write_classname (&token_name, qualifier);
3260 len = token_name.len;
3261 linebuffer_setlen (&token_name, len+qlen+toklen);
3262 sprintf (token_name.buffer + len, "%s%.*s",
3263 qualifier, toklen, newlb.buffer + tokoff);
3264 token.named = true;
3265 }
3266 else if (objdef == ocatseen)
3267 /* Objective C category */
3268 {
3269 int len = strlen (objtag) + 2 + toklen;
3270 linebuffer_setlen (&token_name, len);
3271 sprintf (token_name.buffer, "%s(%.*s)",
3272 objtag, toklen, newlb.buffer + tokoff);
3273 token.named = true;
3274 }
3275 else if (objdef == omethodtag
3276 || objdef == omethodparm)
3277 /* Objective C method */
3278 {
3279 token.named = true;
3280 }
3281 else if (fvdef == fdefunname)
3282 /* GNU DEFUN and similar macros */
3283 {
3284 bool defun = (newlb.buffer[tokoff] == 'F');
3285 int off = tokoff;
3286 int len = toklen;
3287
3288 /* Rewrite the tag so that emacs lisp DEFUNs
3289 can be found by their elisp name */
3290 if (defun)
3291 {
3292 off += 1;
3293 len -= 1;
3294 }
3295 linebuffer_setlen (&token_name, len);
3296 memcpy (token_name.buffer,
3297 newlb.buffer + off, len);
3298 token_name.buffer[len] = '\0';
3299 if (defun)
3300 while (--len >= 0)
3301 if (token_name.buffer[len] == '_')
3302 token_name.buffer[len] = '-';
3303 token.named = defun;
3304 }
3305 else
3306 {
3307 linebuffer_setlen (&token_name, toklen);
3308 memcpy (token_name.buffer,
3309 newlb.buffer + tokoff, toklen);
3310 token_name.buffer[toklen] = '\0';
3311 /* Name macros and members. */
3312 token.named = (structdef == stagseen
3313 || typdef == ttypeseen
3314 || typdef == tend
3315 || (funorvar
3316 && definedef == dignorerest)
3317 || (funorvar
3318 && definedef == dnone
3319 && structdef == snone
3320 && bracelev > 0));
3321 }
3322 token.lineno = lineno;
3323 token.offset = tokoff;
3324 token.length = toklen;
3325 token.line = newlb.buffer;
3326 token.linepos = newlinepos;
3327 token.valid = true;
3328
3329 if (definedef == dnone
3330 && (fvdef == fvnameseen
3331 || fvdef == foperator
3332 || structdef == stagseen
3333 || typdef == tend
3334 || typdef == ttypeseen
3335 || objdef != onone))
3336 {
3337 if (current_lb_is_new)
3338 switch_line_buffers ();
3339 }
3340 else if (definedef != dnone
3341 || fvdef == fdefunname
3342 || instruct)
3343 make_C_tag (funorvar);
3344 }
3345 else /* not yacc and consider_token failed */
3346 {
3347 if (inattribute && fvdef == fignore)
3348 {
3349 /* We have just met __attribute__ after a
3350 function parameter list: do not tag the
3351 function again. */
3352 fvdef = fvnone;
3353 }
3354 }
3355 midtoken = false;
3356 }
3357 } /* if (endtoken (c)) */
3358 else if (intoken (c))
3359 still_in_token:
3360 {
3361 toklen++;
3362 continue;
3363 }
3364 } /* if (midtoken) */
3365 else if (begtoken (c))
3366 {
3367 switch (definedef)
3368 {
3369 case dnone:
3370 switch (fvdef)
3371 {
3372 case fstartlist:
3373 /* This prevents tagging fb in
3374 void (__attribute__((noreturn)) *fb) (void);
3375 Fixing this is not easy and not very important. */
3376 fvdef = finlist;
3377 continue;
3378 case flistseen:
3379 if (plainc || declarations)
3380 {
3381 make_C_tag (true); /* a function */
3382 fvdef = fignore;
3383 }
3384 break;
3385 }
3386 if (structdef == stagseen && !cjava)
3387 {
3388 popclass_above (bracelev);
3389 structdef = snone;
3390 }
3391 break;
3392 case dsharpseen:
3393 savetoken = token;
3394 break;
3395 }
3396 if (!yacc_rules || lp == newlb.buffer + 1)
3397 {
3398 tokoff = lp - 1 - newlb.buffer;
3399 toklen = 1;
3400 midtoken = true;
3401 }
3402 continue;
3403 } /* if (begtoken) */
3404 } /* if must look at token */
3405
3406
3407 /* Detect end of line, colon, comma, semicolon and various braces
3408 after having handled a token.*/
3409 switch (c)
3410 {
3411 case ':':
3412 if (inattribute)
3413 break;
3414 if (yacc_rules && token.offset == 0 && token.valid)
3415 {
3416 make_C_tag (false); /* a yacc function */
3417 break;
3418 }
3419 if (definedef != dnone)
3420 break;
3421 switch (objdef)
3422 {
3423 case otagseen:
3424 objdef = oignore;
3425 make_C_tag (true); /* an Objective C class */
3426 break;
3427 case omethodtag:
3428 case omethodparm:
3429 objdef = omethodcolon;
3430 linebuffer_setlen (&token_name, token_name.len + 1);
3431 strcat (token_name.buffer, ":");
3432 break;
3433 }
3434 if (structdef == stagseen)
3435 {
3436 structdef = scolonseen;
3437 break;
3438 }
3439 /* Should be useless, but may be work as a safety net. */
3440 if (cplpl && fvdef == flistseen)
3441 {
3442 make_C_tag (true); /* a function */
3443 fvdef = fignore;
3444 break;
3445 }
3446 break;
3447 case ';':
3448 if (definedef != dnone || inattribute)
3449 break;
3450 switch (typdef)
3451 {
3452 case tend:
3453 case ttypeseen:
3454 make_C_tag (false); /* a typedef */
3455 typdef = tnone;
3456 fvdef = fvnone;
3457 break;
3458 case tnone:
3459 case tinbody:
3460 case tignore:
3461 switch (fvdef)
3462 {
3463 case fignore:
3464 if (typdef == tignore || cplpl)
3465 fvdef = fvnone;
3466 break;
3467 case fvnameseen:
3468 if ((globals && bracelev == 0 && (!fvextern || declarations))
3469 || (members && instruct))
3470 make_C_tag (false); /* a variable */
3471 fvextern = false;
3472 fvdef = fvnone;
3473 token.valid = false;
3474 break;
3475 case flistseen:
3476 if ((declarations
3477 && (cplpl || !instruct)
3478 && (typdef == tnone || (typdef != tignore && instruct)))
3479 || (members
3480 && plainc && instruct))
3481 make_C_tag (true); /* a function */
3482 /* FALLTHRU */
3483 default:
3484 fvextern = false;
3485 fvdef = fvnone;
3486 if (declarations
3487 && cplpl && structdef == stagseen)
3488 make_C_tag (false); /* forward declaration */
3489 else
3490 token.valid = false;
3491 } /* switch (fvdef) */
3492 /* FALLTHRU */
3493 default:
3494 if (!instruct)
3495 typdef = tnone;
3496 }
3497 if (structdef == stagseen)
3498 structdef = snone;
3499 break;
3500 case ',':
3501 if (definedef != dnone || inattribute)
3502 break;
3503 switch (objdef)
3504 {
3505 case omethodtag:
3506 case omethodparm:
3507 make_C_tag (true); /* an Objective C method */
3508 objdef = oinbody;
3509 break;
3510 }
3511 switch (fvdef)
3512 {
3513 case fdefunkey:
3514 case foperator:
3515 case fstartlist:
3516 case finlist:
3517 case fignore:
3518 case vignore:
3519 break;
3520 case fdefunname:
3521 fvdef = fignore;
3522 break;
3523 case fvnameseen:
3524 if (parlev == 0
3525 && ((globals
3526 && bracelev == 0
3527 && templatelev == 0
3528 && (!fvextern || declarations))
3529 || (members && instruct)))
3530 make_C_tag (false); /* a variable */
3531 break;
3532 case flistseen:
3533 if ((declarations && typdef == tnone && !instruct)
3534 || (members && typdef != tignore && instruct))
3535 {
3536 make_C_tag (true); /* a function */
3537 fvdef = fvnameseen;
3538 }
3539 else if (!declarations)
3540 fvdef = fvnone;
3541 token.valid = false;
3542 break;
3543 default:
3544 fvdef = fvnone;
3545 }
3546 if (structdef == stagseen)
3547 structdef = snone;
3548 break;
3549 case ']':
3550 if (definedef != dnone || inattribute)
3551 break;
3552 if (structdef == stagseen)
3553 structdef = snone;
3554 switch (typdef)
3555 {
3556 case ttypeseen:
3557 case tend:
3558 typdef = tignore;
3559 make_C_tag (false); /* a typedef */
3560 break;
3561 case tnone:
3562 case tinbody:
3563 switch (fvdef)
3564 {
3565 case foperator:
3566 case finlist:
3567 case fignore:
3568 case vignore:
3569 break;
3570 case fvnameseen:
3571 if ((members && bracelev == 1)
3572 || (globals && bracelev == 0
3573 && (!fvextern || declarations)))
3574 make_C_tag (false); /* a variable */
3575 /* FALLTHRU */
3576 default:
3577 fvdef = fvnone;
3578 }
3579 break;
3580 }
3581 break;
3582 case '(':
3583 if (inattribute)
3584 {
3585 attrparlev++;
3586 break;
3587 }
3588 if (definedef != dnone)
3589 break;
3590 if (objdef == otagseen && parlev == 0)
3591 objdef = oparenseen;
3592 switch (fvdef)
3593 {
3594 case fvnameseen:
3595 if (typdef == ttypeseen
3596 && *lp != '*'
3597 && !instruct)
3598 {
3599 /* This handles constructs like:
3600 typedef void OperatorFun (int fun); */
3601 make_C_tag (false);
3602 typdef = tignore;
3603 fvdef = fignore;
3604 break;
3605 }
3606 /* FALLTHRU */
3607 case foperator:
3608 fvdef = fstartlist;
3609 break;
3610 case flistseen:
3611 fvdef = finlist;
3612 break;
3613 }
3614 parlev++;
3615 break;
3616 case ')':
3617 if (inattribute)
3618 {
3619 if (--attrparlev == 0)
3620 inattribute = false;
3621 break;
3622 }
3623 if (definedef != dnone)
3624 break;
3625 if (objdef == ocatseen && parlev == 1)
3626 {
3627 make_C_tag (true); /* an Objective C category */
3628 objdef = oignore;
3629 }
3630 if (--parlev == 0)
3631 {
3632 switch (fvdef)
3633 {
3634 case fstartlist:
3635 case finlist:
3636 fvdef = flistseen;
3637 break;
3638 }
3639 if (!instruct
3640 && (typdef == tend
3641 || typdef == ttypeseen))
3642 {
3643 typdef = tignore;
3644 make_C_tag (false); /* a typedef */
3645 }
3646 }
3647 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3648 parlev = 0;
3649 break;
3650 case '{':
3651 if (definedef != dnone)
3652 break;
3653 if (typdef == ttypeseen)
3654 {
3655 /* Whenever typdef is set to tinbody (currently only
3656 here), typdefbracelev should be set to bracelev. */
3657 typdef = tinbody;
3658 typdefbracelev = bracelev;
3659 }
3660 switch (fvdef)
3661 {
3662 case flistseen:
3663 make_C_tag (true); /* a function */
3664 /* FALLTHRU */
3665 case fignore:
3666 fvdef = fvnone;
3667 break;
3668 case fvnone:
3669 switch (objdef)
3670 {
3671 case otagseen:
3672 make_C_tag (true); /* an Objective C class */
3673 objdef = oignore;
3674 break;
3675 case omethodtag:
3676 case omethodparm:
3677 make_C_tag (true); /* an Objective C method */
3678 objdef = oinbody;
3679 break;
3680 default:
3681 /* Neutralize `extern "C" {' grot. */
3682 if (bracelev == 0 && structdef == snone && nestlev == 0
3683 && typdef == tnone)
3684 bracelev = -1;
3685 }
3686 break;
3687 }
3688 switch (structdef)
3689 {
3690 case skeyseen: /* unnamed struct */
3691 pushclass_above (bracelev, NULL, 0);
3692 structdef = snone;
3693 break;
3694 case stagseen: /* named struct or enum */
3695 case scolonseen: /* a class */
3696 pushclass_above (bracelev,token.line+token.offset, token.length);
3697 structdef = snone;
3698 make_C_tag (false); /* a struct or enum */
3699 break;
3700 }
3701 bracelev += 1;
3702 break;
3703 case '*':
3704 if (definedef != dnone)
3705 break;
3706 if (fvdef == fstartlist)
3707 {
3708 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3709 token.valid = false;
3710 }
3711 break;
3712 case '}':
3713 if (definedef != dnone)
3714 break;
3715 bracelev -= 1;
3716 if (!ignoreindent && lp == newlb.buffer + 1)
3717 {
3718 if (bracelev != 0)
3719 token.valid = false; /* unexpected value, token unreliable */
3720 bracelev = 0; /* reset brace level if first column */
3721 parlev = 0; /* also reset paren level, just in case... */
3722 }
3723 else if (bracelev < 0)
3724 {
3725 token.valid = false; /* something gone amiss, token unreliable */
3726 bracelev = 0;
3727 }
3728 if (bracelev == 0 && fvdef == vignore)
3729 fvdef = fvnone; /* end of function */
3730 popclass_above (bracelev);
3731 structdef = snone;
3732 /* Only if typdef == tinbody is typdefbracelev significant. */
3733 if (typdef == tinbody && bracelev <= typdefbracelev)
3734 {
3735 assert (bracelev == typdefbracelev);
3736 typdef = tend;
3737 }
3738 break;
3739 case '=':
3740 if (definedef != dnone)
3741 break;
3742 switch (fvdef)
3743 {
3744 case foperator:
3745 case finlist:
3746 case fignore:
3747 case vignore:
3748 break;
3749 case fvnameseen:
3750 if ((members && bracelev == 1)
3751 || (globals && bracelev == 0 && (!fvextern || declarations)))
3752 make_C_tag (false); /* a variable */
3753 /* FALLTHRU */
3754 default:
3755 fvdef = vignore;
3756 }
3757 break;
3758 case '<':
3759 if (cplpl
3760 && (structdef == stagseen || fvdef == fvnameseen))
3761 {
3762 templatelev++;
3763 break;
3764 }
3765 goto resetfvdef;
3766 case '>':
3767 if (templatelev > 0)
3768 {
3769 templatelev--;
3770 break;
3771 }
3772 goto resetfvdef;
3773 case '+':
3774 case '-':
3775 if (objdef == oinbody && bracelev == 0)
3776 {
3777 objdef = omethodsign;
3778 break;
3779 }
3780 /* FALLTHRU */
3781 resetfvdef:
3782 case '#': case '~': case '&': case '%': case '/':
3783 case '|': case '^': case '!': case '.': case '?':
3784 if (definedef != dnone)
3785 break;
3786 /* These surely cannot follow a function tag in C. */
3787 switch (fvdef)
3788 {
3789 case foperator:
3790 case finlist:
3791 case fignore:
3792 case vignore:
3793 break;
3794 default:
3795 fvdef = fvnone;
3796 }
3797 break;
3798 case '\0':
3799 if (objdef == otagseen)
3800 {
3801 make_C_tag (true); /* an Objective C class */
3802 objdef = oignore;
3803 }
3804 /* If a macro spans multiple lines don't reset its state. */
3805 if (quotednl)
3806 CNL_SAVE_DEFINEDEF ();
3807 else
3808 CNL ();
3809 break;
3810 } /* switch (c) */
3811
3812 } /* while not eof */
3813
3814 free (lbs[0].lb.buffer);
3815 free (lbs[1].lb.buffer);
3816 }
3817
3818 /*
3819 * Process either a C++ file or a C file depending on the setting
3820 * of a global flag.
3821 */
3822 static void
3823 default_C_entries (FILE *inf)
3824 {
3825 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3826 }
3827
3828 /* Always do plain C. */
3829 static void
3830 plain_C_entries (FILE *inf)
3831 {
3832 C_entries (0, inf);
3833 }
3834
3835 /* Always do C++. */
3836 static void
3837 Cplusplus_entries (FILE *inf)
3838 {
3839 C_entries (C_PLPL, inf);
3840 }
3841
3842 /* Always do Java. */
3843 static void
3844 Cjava_entries (FILE *inf)
3845 {
3846 C_entries (C_JAVA, inf);
3847 }
3848
3849 /* Always do C*. */
3850 static void
3851 Cstar_entries (FILE *inf)
3852 {
3853 C_entries (C_STAR, inf);
3854 }
3855
3856 /* Always do Yacc. */
3857 static void
3858 Yacc_entries (FILE *inf)
3859 {
3860 C_entries (YACC, inf);
3861 }
3862
3863 \f
3864 /* Useful macros. */
3865 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3866 for (; /* loop initialization */ \
3867 !feof (file_pointer) /* loop test */ \
3868 && /* instructions at start of loop */ \
3869 (readline (&line_buffer, file_pointer), \
3870 char_pointer = line_buffer.buffer, \
3871 true); \
3872 )
3873
3874 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
3875 ((assert ("" kw), true) /* syntax error if not a literal string */ \
3876 && strneq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
3877 && notinname ((cp)[sizeof (kw)-1]) /* end of kw */ \
3878 && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
3879
3880 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3881 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3882 ((assert ("" kw), true) /* syntax error if not a literal string */ \
3883 && strncaseeq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
3884 && ((cp) += sizeof (kw)-1)) /* skip spaces */
3885
3886 /*
3887 * Read a file, but do no processing. This is used to do regexp
3888 * matching on files that have no language defined.
3889 */
3890 static void
3891 just_read_file (FILE *inf)
3892 {
3893 while (!feof (inf))
3894 readline (&lb, inf);
3895 }
3896
3897 \f
3898 /* Fortran parsing */
3899
3900 static void F_takeprec (void);
3901 static void F_getit (FILE *);
3902
3903 static void
3904 F_takeprec (void)
3905 {
3906 dbp = skip_spaces (dbp);
3907 if (*dbp != '*')
3908 return;
3909 dbp++;
3910 dbp = skip_spaces (dbp);
3911 if (strneq (dbp, "(*)", 3))
3912 {
3913 dbp += 3;
3914 return;
3915 }
3916 if (!ISDIGIT (*dbp))
3917 {
3918 --dbp; /* force failure */
3919 return;
3920 }
3921 do
3922 dbp++;
3923 while (ISDIGIT (*dbp));
3924 }
3925
3926 static void
3927 F_getit (FILE *inf)
3928 {
3929 register char *cp;
3930
3931 dbp = skip_spaces (dbp);
3932 if (*dbp == '\0')
3933 {
3934 readline (&lb, inf);
3935 dbp = lb.buffer;
3936 if (dbp[5] != '&')
3937 return;
3938 dbp += 6;
3939 dbp = skip_spaces (dbp);
3940 }
3941 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3942 return;
3943 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3944 continue;
3945 make_tag (dbp, cp-dbp, true,
3946 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3947 }
3948
3949
3950 static void
3951 Fortran_functions (FILE *inf)
3952 {
3953 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3954 {
3955 if (*dbp == '%')
3956 dbp++; /* Ratfor escape to fortran */
3957 dbp = skip_spaces (dbp);
3958 if (*dbp == '\0')
3959 continue;
3960
3961 if (LOOKING_AT_NOCASE (dbp, "recursive"))
3962 dbp = skip_spaces (dbp);
3963
3964 if (LOOKING_AT_NOCASE (dbp, "pure"))
3965 dbp = skip_spaces (dbp);
3966
3967 if (LOOKING_AT_NOCASE (dbp, "elemental"))
3968 dbp = skip_spaces (dbp);
3969
3970 switch (lowcase (*dbp))
3971 {
3972 case 'i':
3973 if (nocase_tail ("integer"))
3974 F_takeprec ();
3975 break;
3976 case 'r':
3977 if (nocase_tail ("real"))
3978 F_takeprec ();
3979 break;
3980 case 'l':
3981 if (nocase_tail ("logical"))
3982 F_takeprec ();
3983 break;
3984 case 'c':
3985 if (nocase_tail ("complex") || nocase_tail ("character"))
3986 F_takeprec ();
3987 break;
3988 case 'd':
3989 if (nocase_tail ("double"))
3990 {
3991 dbp = skip_spaces (dbp);
3992 if (*dbp == '\0')
3993 continue;
3994 if (nocase_tail ("precision"))
3995 break;
3996 continue;
3997 }
3998 break;
3999 }
4000 dbp = skip_spaces (dbp);
4001 if (*dbp == '\0')
4002 continue;
4003 switch (lowcase (*dbp))
4004 {
4005 case 'f':
4006 if (nocase_tail ("function"))
4007 F_getit (inf);
4008 continue;
4009 case 's':
4010 if (nocase_tail ("subroutine"))
4011 F_getit (inf);
4012 continue;
4013 case 'e':
4014 if (nocase_tail ("entry"))
4015 F_getit (inf);
4016 continue;
4017 case 'b':
4018 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4019 {
4020 dbp = skip_spaces (dbp);
4021 if (*dbp == '\0') /* assume un-named */
4022 make_tag ("blockdata", 9, true,
4023 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4024 else
4025 F_getit (inf); /* look for name */
4026 }
4027 continue;
4028 }
4029 }
4030 }
4031
4032 \f
4033 /*
4034 * Ada parsing
4035 * Original code by
4036 * Philippe Waroquiers (1998)
4037 */
4038
4039 /* Once we are positioned after an "interesting" keyword, let's get
4040 the real tag value necessary. */
4041 static void
4042 Ada_getit (FILE *inf, const char *name_qualifier)
4043 {
4044 register char *cp;
4045 char *name;
4046 char c;
4047
4048 while (!feof (inf))
4049 {
4050 dbp = skip_spaces (dbp);
4051 if (*dbp == '\0'
4052 || (dbp[0] == '-' && dbp[1] == '-'))
4053 {
4054 readline (&lb, inf);
4055 dbp = lb.buffer;
4056 }
4057 switch (lowcase (*dbp))
4058 {
4059 case 'b':
4060 if (nocase_tail ("body"))
4061 {
4062 /* Skipping body of procedure body or package body or ....
4063 resetting qualifier to body instead of spec. */
4064 name_qualifier = "/b";
4065 continue;
4066 }
4067 break;
4068 case 't':
4069 /* Skipping type of task type or protected type ... */
4070 if (nocase_tail ("type"))
4071 continue;
4072 break;
4073 }
4074 if (*dbp == '"')
4075 {
4076 dbp += 1;
4077 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4078 continue;
4079 }
4080 else
4081 {
4082 dbp = skip_spaces (dbp);
4083 for (cp = dbp;
4084 (*cp != '\0'
4085 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4086 cp++)
4087 continue;
4088 if (cp == dbp)
4089 return;
4090 }
4091 c = *cp;
4092 *cp = '\0';
4093 name = concat (dbp, name_qualifier, "");
4094 *cp = c;
4095 make_tag (name, strlen (name), true,
4096 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4097 free (name);
4098 if (c == '"')
4099 dbp = cp + 1;
4100 return;
4101 }
4102 }
4103
4104 static void
4105 Ada_funcs (FILE *inf)
4106 {
4107 bool inquote = false;
4108 bool skip_till_semicolumn = false;
4109
4110 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4111 {
4112 while (*dbp != '\0')
4113 {
4114 /* Skip a string i.e. "abcd". */
4115 if (inquote || (*dbp == '"'))
4116 {
4117 dbp = strchr (dbp + !inquote, '"');
4118 if (dbp != NULL)
4119 {
4120 inquote = false;
4121 dbp += 1;
4122 continue; /* advance char */
4123 }
4124 else
4125 {
4126 inquote = true;
4127 break; /* advance line */
4128 }
4129 }
4130
4131 /* Skip comments. */
4132 if (dbp[0] == '-' && dbp[1] == '-')
4133 break; /* advance line */
4134
4135 /* Skip character enclosed in single quote i.e. 'a'
4136 and skip single quote starting an attribute i.e. 'Image. */
4137 if (*dbp == '\'')
4138 {
4139 dbp++ ;
4140 if (*dbp != '\0')
4141 dbp++;
4142 continue;
4143 }
4144
4145 if (skip_till_semicolumn)
4146 {
4147 if (*dbp == ';')
4148 skip_till_semicolumn = false;
4149 dbp++;
4150 continue; /* advance char */
4151 }
4152
4153 /* Search for beginning of a token. */
4154 if (!begtoken (*dbp))
4155 {
4156 dbp++;
4157 continue; /* advance char */
4158 }
4159
4160 /* We are at the beginning of a token. */
4161 switch (lowcase (*dbp))
4162 {
4163 case 'f':
4164 if (!packages_only && nocase_tail ("function"))
4165 Ada_getit (inf, "/f");
4166 else
4167 break; /* from switch */
4168 continue; /* advance char */
4169 case 'p':
4170 if (!packages_only && nocase_tail ("procedure"))
4171 Ada_getit (inf, "/p");
4172 else if (nocase_tail ("package"))
4173 Ada_getit (inf, "/s");
4174 else if (nocase_tail ("protected")) /* protected type */
4175 Ada_getit (inf, "/t");
4176 else
4177 break; /* from switch */
4178 continue; /* advance char */
4179
4180 case 'u':
4181 if (typedefs && !packages_only && nocase_tail ("use"))
4182 {
4183 /* when tagging types, avoid tagging use type Pack.Typename;
4184 for this, we will skip everything till a ; */
4185 skip_till_semicolumn = true;
4186 continue; /* advance char */
4187 }
4188
4189 case 't':
4190 if (!packages_only && nocase_tail ("task"))
4191 Ada_getit (inf, "/k");
4192 else if (typedefs && !packages_only && nocase_tail ("type"))
4193 {
4194 Ada_getit (inf, "/t");
4195 while (*dbp != '\0')
4196 dbp += 1;
4197 }
4198 else
4199 break; /* from switch */
4200 continue; /* advance char */
4201 }
4202
4203 /* Look for the end of the token. */
4204 while (!endtoken (*dbp))
4205 dbp++;
4206
4207 } /* advance char */
4208 } /* advance line */
4209 }
4210
4211 \f
4212 /*
4213 * Unix and microcontroller assembly tag handling
4214 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4215 * Idea by Bob Weiner, Motorola Inc. (1994)
4216 */
4217 static void
4218 Asm_labels (FILE *inf)
4219 {
4220 register char *cp;
4221
4222 LOOP_ON_INPUT_LINES (inf, lb, cp)
4223 {
4224 /* If first char is alphabetic or one of [_.$], test for colon
4225 following identifier. */
4226 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4227 {
4228 /* Read past label. */
4229 cp++;
4230 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4231 cp++;
4232 if (*cp == ':' || iswhite (*cp))
4233 /* Found end of label, so copy it and add it to the table. */
4234 make_tag (lb.buffer, cp - lb.buffer, true,
4235 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4236 }
4237 }
4238 }
4239
4240 \f
4241 /*
4242 * Perl support
4243 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4244 * /^use constant[ \t\n]+[^ \t\n{=,;]+/
4245 * Perl variable names: /^(my|local).../
4246 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4247 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4248 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4249 */
4250 static void
4251 Perl_functions (FILE *inf)
4252 {
4253 char *package = savestr ("main"); /* current package name */
4254 register char *cp;
4255
4256 LOOP_ON_INPUT_LINES (inf, lb, cp)
4257 {
4258 cp = skip_spaces (cp);
4259
4260 if (LOOKING_AT (cp, "package"))
4261 {
4262 free (package);
4263 get_tag (cp, &package);
4264 }
4265 else if (LOOKING_AT (cp, "sub"))
4266 {
4267 char *pos, *sp;
4268
4269 subr:
4270 sp = cp;
4271 while (!notinname (*cp))
4272 cp++;
4273 if (cp == sp)
4274 continue; /* nothing found */
4275 if ((pos = strchr (sp, ':')) != NULL
4276 && pos < cp && pos[1] == ':')
4277 /* The name is already qualified. */
4278 make_tag (sp, cp - sp, true,
4279 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4280 else
4281 /* Qualify it. */
4282 {
4283 char savechar, *name;
4284
4285 savechar = *cp;
4286 *cp = '\0';
4287 name = concat (package, "::", sp);
4288 *cp = savechar;
4289 make_tag (name, strlen (name), true,
4290 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4291 free (name);
4292 }
4293 }
4294 else if (LOOKING_AT (cp, "use constant")
4295 || LOOKING_AT (cp, "use constant::defer"))
4296 {
4297 /* For hash style multi-constant like
4298 use constant { FOO => 123,
4299 BAR => 456 };
4300 only the first FOO is picked up. Parsing across the value
4301 expressions would be difficult in general, due to possible nested
4302 hashes, here-documents, etc. */
4303 if (*cp == '{')
4304 cp = skip_spaces (cp+1);
4305 goto subr;
4306 }
4307 else if (globals) /* only if we are tagging global vars */
4308 {
4309 /* Skip a qualifier, if any. */
4310 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4311 /* After "my" or "local", but before any following paren or space. */
4312 char *varstart = cp;
4313
4314 if (qual /* should this be removed? If yes, how? */
4315 && (*cp == '$' || *cp == '@' || *cp == '%'))
4316 {
4317 varstart += 1;
4318 do
4319 cp++;
4320 while (ISALNUM (*cp) || *cp == '_');
4321 }
4322 else if (qual)
4323 {
4324 /* Should be examining a variable list at this point;
4325 could insist on seeing an open parenthesis. */
4326 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4327 cp++;
4328 }
4329 else
4330 continue;
4331
4332 make_tag (varstart, cp - varstart, false,
4333 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4334 }
4335 }
4336 free (package);
4337 }
4338
4339
4340 /*
4341 * Python support
4342 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4343 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4344 * More ideas by seb bacon <seb@jamkit.com> (2002)
4345 */
4346 static void
4347 Python_functions (FILE *inf)
4348 {
4349 register char *cp;
4350
4351 LOOP_ON_INPUT_LINES (inf, lb, cp)
4352 {
4353 cp = skip_spaces (cp);
4354 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4355 {
4356 char *name = cp;
4357 while (!notinname (*cp) && *cp != ':')
4358 cp++;
4359 make_tag (name, cp - name, true,
4360 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4361 }
4362 }
4363 }
4364
4365 \f
4366 /*
4367 * PHP support
4368 * Look for:
4369 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4370 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4371 * - /^[ \t]*define\(\"[^\"]+/
4372 * Only with --members:
4373 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4374 * Idea by Diez B. Roggisch (2001)
4375 */
4376 static void
4377 PHP_functions (FILE *inf)
4378 {
4379 char *cp, *name;
4380 bool search_identifier = false;
4381
4382 LOOP_ON_INPUT_LINES (inf, lb, cp)
4383 {
4384 cp = skip_spaces (cp);
4385 name = cp;
4386 if (search_identifier
4387 && *cp != '\0')
4388 {
4389 while (!notinname (*cp))
4390 cp++;
4391 make_tag (name, cp - name, true,
4392 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4393 search_identifier = false;
4394 }
4395 else if (LOOKING_AT (cp, "function"))
4396 {
4397 if (*cp == '&')
4398 cp = skip_spaces (cp+1);
4399 if (*cp != '\0')
4400 {
4401 name = cp;
4402 while (!notinname (*cp))
4403 cp++;
4404 make_tag (name, cp - name, true,
4405 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4406 }
4407 else
4408 search_identifier = true;
4409 }
4410 else if (LOOKING_AT (cp, "class"))
4411 {
4412 if (*cp != '\0')
4413 {
4414 name = cp;
4415 while (*cp != '\0' && !iswhite (*cp))
4416 cp++;
4417 make_tag (name, cp - name, false,
4418 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4419 }
4420 else
4421 search_identifier = true;
4422 }
4423 else if (strneq (cp, "define", 6)
4424 && (cp = skip_spaces (cp+6))
4425 && *cp++ == '('
4426 && (*cp == '"' || *cp == '\''))
4427 {
4428 char quote = *cp++;
4429 name = cp;
4430 while (*cp != quote && *cp != '\0')
4431 cp++;
4432 make_tag (name, cp - name, false,
4433 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4434 }
4435 else if (members
4436 && LOOKING_AT (cp, "var")
4437 && *cp == '$')
4438 {
4439 name = cp;
4440 while (!notinname (*cp))
4441 cp++;
4442 make_tag (name, cp - name, false,
4443 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4444 }
4445 }
4446 }
4447
4448 \f
4449 /*
4450 * Cobol tag functions
4451 * We could look for anything that could be a paragraph name.
4452 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4453 * Idea by Corny de Souza (1993)
4454 */
4455 static void
4456 Cobol_paragraphs (FILE *inf)
4457 {
4458 register char *bp, *ep;
4459
4460 LOOP_ON_INPUT_LINES (inf, lb, bp)
4461 {
4462 if (lb.len < 9)
4463 continue;
4464 bp += 8;
4465
4466 /* If eoln, compiler option or comment ignore whole line. */
4467 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4468 continue;
4469
4470 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4471 continue;
4472 if (*ep++ == '.')
4473 make_tag (bp, ep - bp, true,
4474 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4475 }
4476 }
4477
4478 \f
4479 /*
4480 * Makefile support
4481 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4482 */
4483 static void
4484 Makefile_targets (FILE *inf)
4485 {
4486 register char *bp;
4487
4488 LOOP_ON_INPUT_LINES (inf, lb, bp)
4489 {
4490 if (*bp == '\t' || *bp == '#')
4491 continue;
4492 while (*bp != '\0' && *bp != '=' && *bp != ':')
4493 bp++;
4494 if (*bp == ':' || (globals && *bp == '='))
4495 {
4496 /* We should detect if there is more than one tag, but we do not.
4497 We just skip initial and final spaces. */
4498 char * namestart = skip_spaces (lb.buffer);
4499 while (--bp > namestart)
4500 if (!notinname (*bp))
4501 break;
4502 make_tag (namestart, bp - namestart + 1, true,
4503 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4504 }
4505 }
4506 }
4507
4508 \f
4509 /*
4510 * Pascal parsing
4511 * Original code by Mosur K. Mohan (1989)
4512 *
4513 * Locates tags for procedures & functions. Doesn't do any type- or
4514 * var-definitions. It does look for the keyword "extern" or
4515 * "forward" immediately following the procedure statement; if found,
4516 * the tag is skipped.
4517 */
4518 static void
4519 Pascal_functions (FILE *inf)
4520 {
4521 linebuffer tline; /* mostly copied from C_entries */
4522 long save_lcno;
4523 int save_lineno, namelen, taglen;
4524 char c, *name;
4525
4526 bool /* each of these flags is true if: */
4527 incomment, /* point is inside a comment */
4528 inquote, /* point is inside '..' string */
4529 get_tagname, /* point is after PROCEDURE/FUNCTION
4530 keyword, so next item = potential tag */
4531 found_tag, /* point is after a potential tag */
4532 inparms, /* point is within parameter-list */
4533 verify_tag; /* point has passed the parm-list, so the
4534 next token will determine whether this
4535 is a FORWARD/EXTERN to be ignored, or
4536 whether it is a real tag */
4537
4538 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4539 name = NULL; /* keep compiler quiet */
4540 dbp = lb.buffer;
4541 *dbp = '\0';
4542 linebuffer_init (&tline);
4543
4544 incomment = inquote = false;
4545 found_tag = false; /* have a proc name; check if extern */
4546 get_tagname = false; /* found "procedure" keyword */
4547 inparms = false; /* found '(' after "proc" */
4548 verify_tag = false; /* check if "extern" is ahead */
4549
4550
4551 while (!feof (inf)) /* long main loop to get next char */
4552 {
4553 c = *dbp++;
4554 if (c == '\0') /* if end of line */
4555 {
4556 readline (&lb, inf);
4557 dbp = lb.buffer;
4558 if (*dbp == '\0')
4559 continue;
4560 if (!((found_tag && verify_tag)
4561 || get_tagname))
4562 c = *dbp++; /* only if don't need *dbp pointing
4563 to the beginning of the name of
4564 the procedure or function */
4565 }
4566 if (incomment)
4567 {
4568 if (c == '}') /* within { } comments */
4569 incomment = false;
4570 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4571 {
4572 dbp++;
4573 incomment = false;
4574 }
4575 continue;
4576 }
4577 else if (inquote)
4578 {
4579 if (c == '\'')
4580 inquote = false;
4581 continue;
4582 }
4583 else
4584 switch (c)
4585 {
4586 case '\'':
4587 inquote = true; /* found first quote */
4588 continue;
4589 case '{': /* found open { comment */
4590 incomment = true;
4591 continue;
4592 case '(':
4593 if (*dbp == '*') /* found open (* comment */
4594 {
4595 incomment = true;
4596 dbp++;
4597 }
4598 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4599 inparms = true;
4600 continue;
4601 case ')': /* end of parms list */
4602 if (inparms)
4603 inparms = false;
4604 continue;
4605 case ';':
4606 if (found_tag && !inparms) /* end of proc or fn stmt */
4607 {
4608 verify_tag = true;
4609 break;
4610 }
4611 continue;
4612 }
4613 if (found_tag && verify_tag && (*dbp != ' '))
4614 {
4615 /* Check if this is an "extern" declaration. */
4616 if (*dbp == '\0')
4617 continue;
4618 if (lowcase (*dbp) == 'e')
4619 {
4620 if (nocase_tail ("extern")) /* superfluous, really! */
4621 {
4622 found_tag = false;
4623 verify_tag = false;
4624 }
4625 }
4626 else if (lowcase (*dbp) == 'f')
4627 {
4628 if (nocase_tail ("forward")) /* check for forward reference */
4629 {
4630 found_tag = false;
4631 verify_tag = false;
4632 }
4633 }
4634 if (found_tag && verify_tag) /* not external proc, so make tag */
4635 {
4636 found_tag = false;
4637 verify_tag = false;
4638 make_tag (name, namelen, true,
4639 tline.buffer, taglen, save_lineno, save_lcno);
4640 continue;
4641 }
4642 }
4643 if (get_tagname) /* grab name of proc or fn */
4644 {
4645 char *cp;
4646
4647 if (*dbp == '\0')
4648 continue;
4649
4650 /* Find block name. */
4651 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4652 continue;
4653
4654 /* Save all values for later tagging. */
4655 linebuffer_setlen (&tline, lb.len);
4656 strcpy (tline.buffer, lb.buffer);
4657 save_lineno = lineno;
4658 save_lcno = linecharno;
4659 name = tline.buffer + (dbp - lb.buffer);
4660 namelen = cp - dbp;
4661 taglen = cp - lb.buffer + 1;
4662
4663 dbp = cp; /* set dbp to e-o-token */
4664 get_tagname = false;
4665 found_tag = true;
4666 continue;
4667
4668 /* And proceed to check for "extern". */
4669 }
4670 else if (!incomment && !inquote && !found_tag)
4671 {
4672 /* Check for proc/fn keywords. */
4673 switch (lowcase (c))
4674 {
4675 case 'p':
4676 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4677 get_tagname = true;
4678 continue;
4679 case 'f':
4680 if (nocase_tail ("unction"))
4681 get_tagname = true;
4682 continue;
4683 }
4684 }
4685 } /* while not eof */
4686
4687 free (tline.buffer);
4688 }
4689
4690 \f
4691 /*
4692 * Lisp tag functions
4693 * look for (def or (DEF, quote or QUOTE
4694 */
4695
4696 static void L_getit (void);
4697
4698 static void
4699 L_getit (void)
4700 {
4701 if (*dbp == '\'') /* Skip prefix quote */
4702 dbp++;
4703 else if (*dbp == '(')
4704 {
4705 dbp++;
4706 /* Try to skip "(quote " */
4707 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4708 /* Ok, then skip "(" before name in (defstruct (foo)) */
4709 dbp = skip_spaces (dbp);
4710 }
4711 get_tag (dbp, NULL);
4712 }
4713
4714 static void
4715 Lisp_functions (FILE *inf)
4716 {
4717 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4718 {
4719 if (dbp[0] != '(')
4720 continue;
4721
4722 /* "(defvar foo)" is a declaration rather than a definition. */
4723 if (! declarations)
4724 {
4725 char *p = dbp + 1;
4726 if (LOOKING_AT (p, "defvar"))
4727 {
4728 p = skip_name (p); /* past var name */
4729 p = skip_spaces (p);
4730 if (*p == ')')
4731 continue;
4732 }
4733 }
4734
4735 if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
4736 dbp += 3;
4737
4738 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4739 {
4740 dbp = skip_non_spaces (dbp);
4741 dbp = skip_spaces (dbp);
4742 L_getit ();
4743 }
4744 else
4745 {
4746 /* Check for (foo::defmumble name-defined ... */
4747 do
4748 dbp++;
4749 while (!notinname (*dbp) && *dbp != ':');
4750 if (*dbp == ':')
4751 {
4752 do
4753 dbp++;
4754 while (*dbp == ':');
4755
4756 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4757 {
4758 dbp = skip_non_spaces (dbp);
4759 dbp = skip_spaces (dbp);
4760 L_getit ();
4761 }
4762 }
4763 }
4764 }
4765 }
4766
4767 \f
4768 /*
4769 * Lua script language parsing
4770 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4771 *
4772 * "function" and "local function" are tags if they start at column 1.
4773 */
4774 static void
4775 Lua_functions (FILE *inf)
4776 {
4777 register char *bp;
4778
4779 LOOP_ON_INPUT_LINES (inf, lb, bp)
4780 {
4781 if (bp[0] != 'f' && bp[0] != 'l')
4782 continue;
4783
4784 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4785
4786 if (LOOKING_AT (bp, "function"))
4787 get_tag (bp, NULL);
4788 }
4789 }
4790
4791 \f
4792 /*
4793 * PostScript tags
4794 * Just look for lines where the first character is '/'
4795 * Also look at "defineps" for PSWrap
4796 * Ideas by:
4797 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4798 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4799 */
4800 static void
4801 PS_functions (FILE *inf)
4802 {
4803 register char *bp, *ep;
4804
4805 LOOP_ON_INPUT_LINES (inf, lb, bp)
4806 {
4807 if (bp[0] == '/')
4808 {
4809 for (ep = bp+1;
4810 *ep != '\0' && *ep != ' ' && *ep != '{';
4811 ep++)
4812 continue;
4813 make_tag (bp, ep - bp, true,
4814 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4815 }
4816 else if (LOOKING_AT (bp, "defineps"))
4817 get_tag (bp, NULL);
4818 }
4819 }
4820
4821 \f
4822 /*
4823 * Forth tags
4824 * Ignore anything after \ followed by space or in ( )
4825 * Look for words defined by :
4826 * Look for constant, code, create, defer, value, and variable
4827 * OBP extensions: Look for buffer:, field,
4828 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4829 */
4830 static void
4831 Forth_words (FILE *inf)
4832 {
4833 register char *bp;
4834
4835 LOOP_ON_INPUT_LINES (inf, lb, bp)
4836 while ((bp = skip_spaces (bp))[0] != '\0')
4837 if (bp[0] == '\\' && iswhite (bp[1]))
4838 break; /* read next line */
4839 else if (bp[0] == '(' && iswhite (bp[1]))
4840 do /* skip to ) or eol */
4841 bp++;
4842 while (*bp != ')' && *bp != '\0');
4843 else if ((bp[0] == ':' && iswhite (bp[1]) && bp++)
4844 || LOOKING_AT_NOCASE (bp, "constant")
4845 || LOOKING_AT_NOCASE (bp, "code")
4846 || LOOKING_AT_NOCASE (bp, "create")
4847 || LOOKING_AT_NOCASE (bp, "defer")
4848 || LOOKING_AT_NOCASE (bp, "value")
4849 || LOOKING_AT_NOCASE (bp, "variable")
4850 || LOOKING_AT_NOCASE (bp, "buffer:")
4851 || LOOKING_AT_NOCASE (bp, "field"))
4852 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
4853 else
4854 bp = skip_non_spaces (bp);
4855 }
4856
4857 \f
4858 /*
4859 * Scheme tag functions
4860 * look for (def... xyzzy
4861 * (def... (xyzzy
4862 * (def ... ((...(xyzzy ....
4863 * (set! xyzzy
4864 * Original code by Ken Haase (1985?)
4865 */
4866 static void
4867 Scheme_functions (FILE *inf)
4868 {
4869 register char *bp;
4870
4871 LOOP_ON_INPUT_LINES (inf, lb, bp)
4872 {
4873 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4874 {
4875 bp = skip_non_spaces (bp+4);
4876 /* Skip over open parens and white space. Don't continue past
4877 '\0'. */
4878 while (*bp && notinname (*bp))
4879 bp++;
4880 get_tag (bp, NULL);
4881 }
4882 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4883 get_tag (bp, NULL);
4884 }
4885 }
4886
4887 \f
4888 /* Find tags in TeX and LaTeX input files. */
4889
4890 /* TEX_toktab is a table of TeX control sequences that define tags.
4891 * Each entry records one such control sequence.
4892 *
4893 * Original code from who knows whom.
4894 * Ideas by:
4895 * Stefan Monnier (2002)
4896 */
4897
4898 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4899
4900 /* Default set of control sequences to put into TEX_toktab.
4901 The value of environment var TEXTAGS is prepended to this. */
4902 static const char *TEX_defenv = "\
4903 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4904 :part:appendix:entry:index:def\
4905 :newcommand:renewcommand:newenvironment:renewenvironment";
4906
4907 static void TEX_mode (FILE *);
4908 static void TEX_decode_env (const char *, const char *);
4909
4910 static char TEX_esc = '\\';
4911 static char TEX_opgrp = '{';
4912 static char TEX_clgrp = '}';
4913
4914 /*
4915 * TeX/LaTeX scanning loop.
4916 */
4917 static void
4918 TeX_commands (FILE *inf)
4919 {
4920 char *cp;
4921 linebuffer *key;
4922
4923 /* Select either \ or ! as escape character. */
4924 TEX_mode (inf);
4925
4926 /* Initialize token table once from environment. */
4927 if (TEX_toktab == NULL)
4928 TEX_decode_env ("TEXTAGS", TEX_defenv);
4929
4930 LOOP_ON_INPUT_LINES (inf, lb, cp)
4931 {
4932 /* Look at each TEX keyword in line. */
4933 for (;;)
4934 {
4935 /* Look for a TEX escape. */
4936 while (*cp++ != TEX_esc)
4937 if (cp[-1] == '\0' || cp[-1] == '%')
4938 goto tex_next_line;
4939
4940 for (key = TEX_toktab; key->buffer != NULL; key++)
4941 if (strneq (cp, key->buffer, key->len))
4942 {
4943 char *p;
4944 int namelen, linelen;
4945 bool opgrp = false;
4946
4947 cp = skip_spaces (cp + key->len);
4948 if (*cp == TEX_opgrp)
4949 {
4950 opgrp = true;
4951 cp++;
4952 }
4953 for (p = cp;
4954 (!iswhite (*p) && *p != '#' &&
4955 *p != TEX_opgrp && *p != TEX_clgrp);
4956 p++)
4957 continue;
4958 namelen = p - cp;
4959 linelen = lb.len;
4960 if (!opgrp || *p == TEX_clgrp)
4961 {
4962 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
4963 p++;
4964 linelen = p - lb.buffer + 1;
4965 }
4966 make_tag (cp, namelen, true,
4967 lb.buffer, linelen, lineno, linecharno);
4968 goto tex_next_line; /* We only tag a line once */
4969 }
4970 }
4971 tex_next_line:
4972 ;
4973 }
4974 }
4975
4976 #define TEX_LESC '\\'
4977 #define TEX_SESC '!'
4978
4979 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4980 chars accordingly. */
4981 static void
4982 TEX_mode (FILE *inf)
4983 {
4984 int c;
4985
4986 while ((c = getc (inf)) != EOF)
4987 {
4988 /* Skip to next line if we hit the TeX comment char. */
4989 if (c == '%')
4990 while (c != '\n' && c != EOF)
4991 c = getc (inf);
4992 else if (c == TEX_LESC || c == TEX_SESC )
4993 break;
4994 }
4995
4996 if (c == TEX_LESC)
4997 {
4998 TEX_esc = TEX_LESC;
4999 TEX_opgrp = '{';
5000 TEX_clgrp = '}';
5001 }
5002 else
5003 {
5004 TEX_esc = TEX_SESC;
5005 TEX_opgrp = '<';
5006 TEX_clgrp = '>';
5007 }
5008 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5009 No attempt is made to correct the situation. */
5010 rewind (inf);
5011 }
5012
5013 /* Read environment and prepend it to the default string.
5014 Build token table. */
5015 static void
5016 TEX_decode_env (const char *evarname, const char *defenv)
5017 {
5018 register const char *env, *p;
5019 int i, len;
5020
5021 /* Append default string to environment. */
5022 env = getenv (evarname);
5023 if (!env)
5024 env = defenv;
5025 else
5026 env = concat (env, defenv, "");
5027
5028 /* Allocate a token table */
5029 for (len = 1, p = env; p;)
5030 if ((p = strchr (p, ':')) && *++p != '\0')
5031 len++;
5032 TEX_toktab = xnew (len, linebuffer);
5033
5034 /* Unpack environment string into token table. Be careful about */
5035 /* zero-length strings (leading ':', "::" and trailing ':') */
5036 for (i = 0; *env != '\0';)
5037 {
5038 p = strchr (env, ':');
5039 if (!p) /* End of environment string. */
5040 p = env + strlen (env);
5041 if (p - env > 0)
5042 { /* Only non-zero strings. */
5043 TEX_toktab[i].buffer = savenstr (env, p - env);
5044 TEX_toktab[i].len = p - env;
5045 i++;
5046 }
5047 if (*p)
5048 env = p + 1;
5049 else
5050 {
5051 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5052 TEX_toktab[i].len = 0;
5053 break;
5054 }
5055 }
5056 }
5057
5058 \f
5059 /* Texinfo support. Dave Love, Mar. 2000. */
5060 static void
5061 Texinfo_nodes (FILE *inf)
5062 {
5063 char *cp, *start;
5064 LOOP_ON_INPUT_LINES (inf, lb, cp)
5065 if (LOOKING_AT (cp, "@node"))
5066 {
5067 start = cp;
5068 while (*cp != '\0' && *cp != ',')
5069 cp++;
5070 make_tag (start, cp - start, true,
5071 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5072 }
5073 }
5074
5075 \f
5076 /*
5077 * HTML support.
5078 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5079 * Contents of <a name=xxx> are tags with name xxx.
5080 *
5081 * Francesco Potortì, 2002.
5082 */
5083 static void
5084 HTML_labels (FILE *inf)
5085 {
5086 bool getnext = false; /* next text outside of HTML tags is a tag */
5087 bool skiptag = false; /* skip to the end of the current HTML tag */
5088 bool intag = false; /* inside an html tag, looking for ID= */
5089 bool inanchor = false; /* when INTAG, is an anchor, look for NAME= */
5090 char *end;
5091
5092
5093 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5094
5095 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5096 for (;;) /* loop on the same line */
5097 {
5098 if (skiptag) /* skip HTML tag */
5099 {
5100 while (*dbp != '\0' && *dbp != '>')
5101 dbp++;
5102 if (*dbp == '>')
5103 {
5104 dbp += 1;
5105 skiptag = false;
5106 continue; /* look on the same line */
5107 }
5108 break; /* go to next line */
5109 }
5110
5111 else if (intag) /* look for "name=" or "id=" */
5112 {
5113 while (*dbp != '\0' && *dbp != '>'
5114 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5115 dbp++;
5116 if (*dbp == '\0')
5117 break; /* go to next line */
5118 if (*dbp == '>')
5119 {
5120 dbp += 1;
5121 intag = false;
5122 continue; /* look on the same line */
5123 }
5124 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5125 || LOOKING_AT_NOCASE (dbp, "id="))
5126 {
5127 bool quoted = (dbp[0] == '"');
5128
5129 if (quoted)
5130 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5131 continue;
5132 else
5133 for (end = dbp; *end != '\0' && intoken (*end); end++)
5134 continue;
5135 linebuffer_setlen (&token_name, end - dbp);
5136 memcpy (token_name.buffer, dbp, end - dbp);
5137 token_name.buffer[end - dbp] = '\0';
5138
5139 dbp = end;
5140 intag = false; /* we found what we looked for */
5141 skiptag = true; /* skip to the end of the tag */
5142 getnext = true; /* then grab the text */
5143 continue; /* look on the same line */
5144 }
5145 dbp += 1;
5146 }
5147
5148 else if (getnext) /* grab next tokens and tag them */
5149 {
5150 dbp = skip_spaces (dbp);
5151 if (*dbp == '\0')
5152 break; /* go to next line */
5153 if (*dbp == '<')
5154 {
5155 intag = true;
5156 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5157 continue; /* look on the same line */
5158 }
5159
5160 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5161 continue;
5162 make_tag (token_name.buffer, token_name.len, true,
5163 dbp, end - dbp, lineno, linecharno);
5164 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5165 getnext = false;
5166 break; /* go to next line */
5167 }
5168
5169 else /* look for an interesting HTML tag */
5170 {
5171 while (*dbp != '\0' && *dbp != '<')
5172 dbp++;
5173 if (*dbp == '\0')
5174 break; /* go to next line */
5175 intag = true;
5176 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5177 {
5178 inanchor = true;
5179 continue; /* look on the same line */
5180 }
5181 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5182 || LOOKING_AT_NOCASE (dbp, "<h1>")
5183 || LOOKING_AT_NOCASE (dbp, "<h2>")
5184 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5185 {
5186 intag = false;
5187 getnext = true;
5188 continue; /* look on the same line */
5189 }
5190 dbp += 1;
5191 }
5192 }
5193 }
5194
5195 \f
5196 /*
5197 * Prolog support
5198 *
5199 * Assumes that the predicate or rule starts at column 0.
5200 * Only the first clause of a predicate or rule is added.
5201 * Original code by Sunichirou Sugou (1989)
5202 * Rewritten by Anders Lindgren (1996)
5203 */
5204 static size_t prolog_pr (char *, char *);
5205 static void prolog_skip_comment (linebuffer *, FILE *);
5206 static size_t prolog_atom (char *, size_t);
5207
5208 static void
5209 Prolog_functions (FILE *inf)
5210 {
5211 char *cp, *last;
5212 size_t len;
5213 size_t allocated;
5214
5215 allocated = 0;
5216 len = 0;
5217 last = NULL;
5218
5219 LOOP_ON_INPUT_LINES (inf, lb, cp)
5220 {
5221 if (cp[0] == '\0') /* Empty line */
5222 continue;
5223 else if (iswhite (cp[0])) /* Not a predicate */
5224 continue;
5225 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5226 prolog_skip_comment (&lb, inf);
5227 else if ((len = prolog_pr (cp, last)) > 0)
5228 {
5229 /* Predicate or rule. Store the function name so that we
5230 only generate a tag for the first clause. */
5231 if (last == NULL)
5232 last = xnew (len + 1, char);
5233 else if (len + 1 > allocated)
5234 xrnew (last, len + 1, char);
5235 allocated = len + 1;
5236 memcpy (last, cp, len);
5237 last[len] = '\0';
5238 }
5239 }
5240 free (last);
5241 }
5242
5243
5244 static void
5245 prolog_skip_comment (linebuffer *plb, FILE *inf)
5246 {
5247 char *cp;
5248
5249 do
5250 {
5251 for (cp = plb->buffer; *cp != '\0'; cp++)
5252 if (cp[0] == '*' && cp[1] == '/')
5253 return;
5254 readline (plb, inf);
5255 }
5256 while (!feof (inf));
5257 }
5258
5259 /*
5260 * A predicate or rule definition is added if it matches:
5261 * <beginning of line><Prolog Atom><whitespace>(
5262 * or <beginning of line><Prolog Atom><whitespace>:-
5263 *
5264 * It is added to the tags database if it doesn't match the
5265 * name of the previous clause header.
5266 *
5267 * Return the size of the name of the predicate or rule, or 0 if no
5268 * header was found.
5269 */
5270 static size_t
5271 prolog_pr (char *s, char *last)
5272
5273 /* Name of last clause. */
5274 {
5275 size_t pos;
5276 size_t len;
5277
5278 pos = prolog_atom (s, 0);
5279 if (! pos)
5280 return 0;
5281
5282 len = pos;
5283 pos = skip_spaces (s + pos) - s;
5284
5285 if ((s[pos] == '.'
5286 || (s[pos] == '(' && (pos += 1))
5287 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5288 && (last == NULL /* save only the first clause */
5289 || len != strlen (last)
5290 || !strneq (s, last, len)))
5291 {
5292 make_tag (s, len, true, s, pos, lineno, linecharno);
5293 return len;
5294 }
5295 else
5296 return 0;
5297 }
5298
5299 /*
5300 * Consume a Prolog atom.
5301 * Return the number of bytes consumed, or 0 if there was an error.
5302 *
5303 * A prolog atom, in this context, could be one of:
5304 * - An alphanumeric sequence, starting with a lower case letter.
5305 * - A quoted arbitrary string. Single quotes can escape themselves.
5306 * Backslash quotes everything.
5307 */
5308 static size_t
5309 prolog_atom (char *s, size_t pos)
5310 {
5311 size_t origpos;
5312
5313 origpos = pos;
5314
5315 if (ISLOWER (s[pos]) || (s[pos] == '_'))
5316 {
5317 /* The atom is unquoted. */
5318 pos++;
5319 while (ISALNUM (s[pos]) || (s[pos] == '_'))
5320 {
5321 pos++;
5322 }
5323 return pos - origpos;
5324 }
5325 else if (s[pos] == '\'')
5326 {
5327 pos++;
5328
5329 for (;;)
5330 {
5331 if (s[pos] == '\'')
5332 {
5333 pos++;
5334 if (s[pos] != '\'')
5335 break;
5336 pos++; /* A double quote */
5337 }
5338 else if (s[pos] == '\0')
5339 /* Multiline quoted atoms are ignored. */
5340 return 0;
5341 else if (s[pos] == '\\')
5342 {
5343 if (s[pos+1] == '\0')
5344 return 0;
5345 pos += 2;
5346 }
5347 else
5348 pos++;
5349 }
5350 return pos - origpos;
5351 }
5352 else
5353 return 0;
5354 }
5355
5356 \f
5357 /*
5358 * Support for Erlang
5359 *
5360 * Generates tags for functions, defines, and records.
5361 * Assumes that Erlang functions start at column 0.
5362 * Original code by Anders Lindgren (1996)
5363 */
5364 static int erlang_func (char *, char *);
5365 static void erlang_attribute (char *);
5366 static int erlang_atom (char *);
5367
5368 static void
5369 Erlang_functions (FILE *inf)
5370 {
5371 char *cp, *last;
5372 int len;
5373 int allocated;
5374
5375 allocated = 0;
5376 len = 0;
5377 last = NULL;
5378
5379 LOOP_ON_INPUT_LINES (inf, lb, cp)
5380 {
5381 if (cp[0] == '\0') /* Empty line */
5382 continue;
5383 else if (iswhite (cp[0])) /* Not function nor attribute */
5384 continue;
5385 else if (cp[0] == '%') /* comment */
5386 continue;
5387 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5388 continue;
5389 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5390 {
5391 erlang_attribute (cp);
5392 if (last != NULL)
5393 {
5394 free (last);
5395 last = NULL;
5396 }
5397 }
5398 else if ((len = erlang_func (cp, last)) > 0)
5399 {
5400 /*
5401 * Function. Store the function name so that we only
5402 * generates a tag for the first clause.
5403 */
5404 if (last == NULL)
5405 last = xnew (len + 1, char);
5406 else if (len + 1 > allocated)
5407 xrnew (last, len + 1, char);
5408 allocated = len + 1;
5409 memcpy (last, cp, len);
5410 last[len] = '\0';
5411 }
5412 }
5413 free (last);
5414 }
5415
5416
5417 /*
5418 * A function definition is added if it matches:
5419 * <beginning of line><Erlang Atom><whitespace>(
5420 *
5421 * It is added to the tags database if it doesn't match the
5422 * name of the previous clause header.
5423 *
5424 * Return the size of the name of the function, or 0 if no function
5425 * was found.
5426 */
5427 static int
5428 erlang_func (char *s, char *last)
5429
5430 /* Name of last clause. */
5431 {
5432 int pos;
5433 int len;
5434
5435 pos = erlang_atom (s);
5436 if (pos < 1)
5437 return 0;
5438
5439 len = pos;
5440 pos = skip_spaces (s + pos) - s;
5441
5442 /* Save only the first clause. */
5443 if (s[pos++] == '('
5444 && (last == NULL
5445 || len != (int)strlen (last)
5446 || !strneq (s, last, len)))
5447 {
5448 make_tag (s, len, true, s, pos, lineno, linecharno);
5449 return len;
5450 }
5451
5452 return 0;
5453 }
5454
5455
5456 /*
5457 * Handle attributes. Currently, tags are generated for defines
5458 * and records.
5459 *
5460 * They are on the form:
5461 * -define(foo, bar).
5462 * -define(Foo(M, N), M+N).
5463 * -record(graph, {vtab = notable, cyclic = true}).
5464 */
5465 static void
5466 erlang_attribute (char *s)
5467 {
5468 char *cp = s;
5469
5470 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5471 && *cp++ == '(')
5472 {
5473 int len = erlang_atom (skip_spaces (cp));
5474 if (len > 0)
5475 make_tag (cp, len, true, s, cp + len - s, lineno, linecharno);
5476 }
5477 return;
5478 }
5479
5480
5481 /*
5482 * Consume an Erlang atom (or variable).
5483 * Return the number of bytes consumed, or -1 if there was an error.
5484 */
5485 static int
5486 erlang_atom (char *s)
5487 {
5488 int pos = 0;
5489
5490 if (ISALPHA (s[pos]) || s[pos] == '_')
5491 {
5492 /* The atom is unquoted. */
5493 do
5494 pos++;
5495 while (ISALNUM (s[pos]) || s[pos] == '_');
5496 }
5497 else if (s[pos] == '\'')
5498 {
5499 for (pos++; s[pos] != '\''; pos++)
5500 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5501 || (s[pos] == '\\' && s[++pos] == '\0'))
5502 return 0;
5503 pos++;
5504 }
5505
5506 return pos;
5507 }
5508
5509 \f
5510 static char *scan_separators (char *);
5511 static void add_regex (char *, language *);
5512 static char *substitute (char *, char *, struct re_registers *);
5513
5514 /*
5515 * Take a string like "/blah/" and turn it into "blah", verifying
5516 * that the first and last characters are the same, and handling
5517 * quoted separator characters. Actually, stops on the occurrence of
5518 * an unquoted separator. Also process \t, \n, etc. and turn into
5519 * appropriate characters. Works in place. Null terminates name string.
5520 * Returns pointer to terminating separator, or NULL for
5521 * unterminated regexps.
5522 */
5523 static char *
5524 scan_separators (char *name)
5525 {
5526 char sep = name[0];
5527 char *copyto = name;
5528 bool quoted = false;
5529
5530 for (++name; *name != '\0'; ++name)
5531 {
5532 if (quoted)
5533 {
5534 switch (*name)
5535 {
5536 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5537 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5538 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5539 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5540 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5541 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5542 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5543 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5544 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5545 default:
5546 if (*name == sep)
5547 *copyto++ = sep;
5548 else
5549 {
5550 /* Something else is quoted, so preserve the quote. */
5551 *copyto++ = '\\';
5552 *copyto++ = *name;
5553 }
5554 break;
5555 }
5556 quoted = false;
5557 }
5558 else if (*name == '\\')
5559 quoted = true;
5560 else if (*name == sep)
5561 break;
5562 else
5563 *copyto++ = *name;
5564 }
5565 if (*name != sep)
5566 name = NULL; /* signal unterminated regexp */
5567
5568 /* Terminate copied string. */
5569 *copyto = '\0';
5570 return name;
5571 }
5572
5573 /* Look at the argument of --regex or --no-regex and do the right
5574 thing. Same for each line of a regexp file. */
5575 static void
5576 analyse_regex (char *regex_arg)
5577 {
5578 if (regex_arg == NULL)
5579 {
5580 free_regexps (); /* --no-regex: remove existing regexps */
5581 return;
5582 }
5583
5584 /* A real --regexp option or a line in a regexp file. */
5585 switch (regex_arg[0])
5586 {
5587 /* Comments in regexp file or null arg to --regex. */
5588 case '\0':
5589 case ' ':
5590 case '\t':
5591 break;
5592
5593 /* Read a regex file. This is recursive and may result in a
5594 loop, which will stop when the file descriptors are exhausted. */
5595 case '@':
5596 {
5597 FILE *regexfp;
5598 linebuffer regexbuf;
5599 char *regexfile = regex_arg + 1;
5600
5601 /* regexfile is a file containing regexps, one per line. */
5602 regexfp = fopen (regexfile, "rb");
5603 if (regexfp == NULL)
5604 pfatal (regexfile);
5605 linebuffer_init (&regexbuf);
5606 while (readline_internal (&regexbuf, regexfp) > 0)
5607 analyse_regex (regexbuf.buffer);
5608 free (regexbuf.buffer);
5609 fclose (regexfp);
5610 }
5611 break;
5612
5613 /* Regexp to be used for a specific language only. */
5614 case '{':
5615 {
5616 language *lang;
5617 char *lang_name = regex_arg + 1;
5618 char *cp;
5619
5620 for (cp = lang_name; *cp != '}'; cp++)
5621 if (*cp == '\0')
5622 {
5623 error ("unterminated language name in regex: %s", regex_arg);
5624 return;
5625 }
5626 *cp++ = '\0';
5627 lang = get_language_from_langname (lang_name);
5628 if (lang == NULL)
5629 return;
5630 add_regex (cp, lang);
5631 }
5632 break;
5633
5634 /* Regexp to be used for any language. */
5635 default:
5636 add_regex (regex_arg, NULL);
5637 break;
5638 }
5639 }
5640
5641 /* Separate the regexp pattern, compile it,
5642 and care for optional name and modifiers. */
5643 static void
5644 add_regex (char *regexp_pattern, language *lang)
5645 {
5646 static struct re_pattern_buffer zeropattern;
5647 char sep, *pat, *name, *modifiers;
5648 char empty = '\0';
5649 const char *err;
5650 struct re_pattern_buffer *patbuf;
5651 regexp *rp;
5652 bool
5653 force_explicit_name = true, /* do not use implicit tag names */
5654 ignore_case = false, /* case is significant */
5655 multi_line = false, /* matches are done one line at a time */
5656 single_line = false; /* dot does not match newline */
5657
5658
5659 if (strlen (regexp_pattern) < 3)
5660 {
5661 error ("null regexp");
5662 return;
5663 }
5664 sep = regexp_pattern[0];
5665 name = scan_separators (regexp_pattern);
5666 if (name == NULL)
5667 {
5668 error ("%s: unterminated regexp", regexp_pattern);
5669 return;
5670 }
5671 if (name[1] == sep)
5672 {
5673 error ("null name for regexp \"%s\"", regexp_pattern);
5674 return;
5675 }
5676 modifiers = scan_separators (name);
5677 if (modifiers == NULL) /* no terminating separator --> no name */
5678 {
5679 modifiers = name;
5680 name = &empty;
5681 }
5682 else
5683 modifiers += 1; /* skip separator */
5684
5685 /* Parse regex modifiers. */
5686 for (; modifiers[0] != '\0'; modifiers++)
5687 switch (modifiers[0])
5688 {
5689 case 'N':
5690 if (modifiers == name)
5691 error ("forcing explicit tag name but no name, ignoring");
5692 force_explicit_name = true;
5693 break;
5694 case 'i':
5695 ignore_case = true;
5696 break;
5697 case 's':
5698 single_line = true;
5699 /* FALLTHRU */
5700 case 'm':
5701 multi_line = true;
5702 need_filebuf = true;
5703 break;
5704 default:
5705 error ("invalid regexp modifier `%c', ignoring", modifiers[0]);
5706 break;
5707 }
5708
5709 patbuf = xnew (1, struct re_pattern_buffer);
5710 *patbuf = zeropattern;
5711 if (ignore_case)
5712 {
5713 static char lc_trans[CHARS];
5714 int i;
5715 for (i = 0; i < CHARS; i++)
5716 lc_trans[i] = lowcase (i);
5717 patbuf->translate = lc_trans; /* translation table to fold case */
5718 }
5719
5720 if (multi_line)
5721 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5722 else
5723 pat = regexp_pattern;
5724
5725 if (single_line)
5726 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5727 else
5728 re_set_syntax (RE_SYNTAX_EMACS);
5729
5730 err = re_compile_pattern (pat, strlen (pat), patbuf);
5731 if (multi_line)
5732 free (pat);
5733 if (err != NULL)
5734 {
5735 error ("%s while compiling pattern", err);
5736 return;
5737 }
5738
5739 rp = p_head;
5740 p_head = xnew (1, regexp);
5741 p_head->pattern = savestr (regexp_pattern);
5742 p_head->p_next = rp;
5743 p_head->lang = lang;
5744 p_head->pat = patbuf;
5745 p_head->name = savestr (name);
5746 p_head->error_signaled = false;
5747 p_head->force_explicit_name = force_explicit_name;
5748 p_head->ignore_case = ignore_case;
5749 p_head->multi_line = multi_line;
5750 }
5751
5752 /*
5753 * Do the substitutions indicated by the regular expression and
5754 * arguments.
5755 */
5756 static char *
5757 substitute (char *in, char *out, struct re_registers *regs)
5758 {
5759 char *result, *t;
5760 int size, dig, diglen;
5761
5762 result = NULL;
5763 size = strlen (out);
5764
5765 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5766 if (out[size - 1] == '\\')
5767 fatal ("pattern error in \"%s\"", out);
5768 for (t = strchr (out, '\\');
5769 t != NULL;
5770 t = strchr (t + 2, '\\'))
5771 if (ISDIGIT (t[1]))
5772 {
5773 dig = t[1] - '0';
5774 diglen = regs->end[dig] - regs->start[dig];
5775 size += diglen - 2;
5776 }
5777 else
5778 size -= 1;
5779
5780 /* Allocate space and do the substitutions. */
5781 assert (size >= 0);
5782 result = xnew (size + 1, char);
5783
5784 for (t = result; *out != '\0'; out++)
5785 if (*out == '\\' && ISDIGIT (*++out))
5786 {
5787 dig = *out - '0';
5788 diglen = regs->end[dig] - regs->start[dig];
5789 memcpy (t, in + regs->start[dig], diglen);
5790 t += diglen;
5791 }
5792 else
5793 *t++ = *out;
5794 *t = '\0';
5795
5796 assert (t <= result + size);
5797 assert (t - result == (int)strlen (result));
5798
5799 return result;
5800 }
5801
5802 /* Deallocate all regexps. */
5803 static void
5804 free_regexps (void)
5805 {
5806 regexp *rp;
5807 while (p_head != NULL)
5808 {
5809 rp = p_head->p_next;
5810 free (p_head->pattern);
5811 free (p_head->name);
5812 free (p_head);
5813 p_head = rp;
5814 }
5815 return;
5816 }
5817
5818 /*
5819 * Reads the whole file as a single string from `filebuf' and looks for
5820 * multi-line regular expressions, creating tags on matches.
5821 * readline already dealt with normal regexps.
5822 *
5823 * Idea by Ben Wing <ben@666.com> (2002).
5824 */
5825 static void
5826 regex_tag_multiline (void)
5827 {
5828 char *buffer = filebuf.buffer;
5829 regexp *rp;
5830 char *name;
5831
5832 for (rp = p_head; rp != NULL; rp = rp->p_next)
5833 {
5834 int match = 0;
5835
5836 if (!rp->multi_line)
5837 continue; /* skip normal regexps */
5838
5839 /* Generic initializations before parsing file from memory. */
5840 lineno = 1; /* reset global line number */
5841 charno = 0; /* reset global char number */
5842 linecharno = 0; /* reset global char number of line start */
5843
5844 /* Only use generic regexps or those for the current language. */
5845 if (rp->lang != NULL && rp->lang != curfdp->lang)
5846 continue;
5847
5848 while (match >= 0 && match < filebuf.len)
5849 {
5850 match = re_search (rp->pat, buffer, filebuf.len, charno,
5851 filebuf.len - match, &rp->regs);
5852 switch (match)
5853 {
5854 case -2:
5855 /* Some error. */
5856 if (!rp->error_signaled)
5857 {
5858 error ("regexp stack overflow while matching \"%s\"",
5859 rp->pattern);
5860 rp->error_signaled = true;
5861 }
5862 break;
5863 case -1:
5864 /* No match. */
5865 break;
5866 default:
5867 if (match == rp->regs.end[0])
5868 {
5869 if (!rp->error_signaled)
5870 {
5871 error ("regexp matches the empty string: \"%s\"",
5872 rp->pattern);
5873 rp->error_signaled = true;
5874 }
5875 match = -3; /* exit from while loop */
5876 break;
5877 }
5878
5879 /* Match occurred. Construct a tag. */
5880 while (charno < rp->regs.end[0])
5881 if (buffer[charno++] == '\n')
5882 lineno++, linecharno = charno;
5883 name = rp->name;
5884 if (name[0] == '\0')
5885 name = NULL;
5886 else /* make a named tag */
5887 name = substitute (buffer, rp->name, &rp->regs);
5888 if (rp->force_explicit_name)
5889 /* Force explicit tag name, if a name is there. */
5890 pfnote (name, true, buffer + linecharno,
5891 charno - linecharno + 1, lineno, linecharno);
5892 else
5893 make_tag (name, strlen (name), true, buffer + linecharno,
5894 charno - linecharno + 1, lineno, linecharno);
5895 break;
5896 }
5897 }
5898 }
5899 }
5900
5901 \f
5902 static bool
5903 nocase_tail (const char *cp)
5904 {
5905 register int len = 0;
5906
5907 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5908 cp++, len++;
5909 if (*cp == '\0' && !intoken (dbp[len]))
5910 {
5911 dbp += len;
5912 return true;
5913 }
5914 return false;
5915 }
5916
5917 static void
5918 get_tag (register char *bp, char **namepp)
5919 {
5920 register char *cp = bp;
5921
5922 if (*bp != '\0')
5923 {
5924 /* Go till you get to white space or a syntactic break */
5925 for (cp = bp + 1; !notinname (*cp); cp++)
5926 continue;
5927 make_tag (bp, cp - bp, true,
5928 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5929 }
5930
5931 if (namepp != NULL)
5932 *namepp = savenstr (bp, cp - bp);
5933 }
5934
5935 /*
5936 * Read a line of text from `stream' into `lbp', excluding the
5937 * newline or CR-NL, if any. Return the number of characters read from
5938 * `stream', which is the length of the line including the newline.
5939 *
5940 * On DOS or Windows we do not count the CR character, if any before the
5941 * NL, in the returned length; this mirrors the behavior of Emacs on those
5942 * platforms (for text files, it translates CR-NL to NL as it reads in the
5943 * file).
5944 *
5945 * If multi-line regular expressions are requested, each line read is
5946 * appended to `filebuf'.
5947 */
5948 static long
5949 readline_internal (linebuffer *lbp, register FILE *stream)
5950 {
5951 char *buffer = lbp->buffer;
5952 register char *p = lbp->buffer;
5953 register char *pend;
5954 int chars_deleted;
5955
5956 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
5957
5958 for (;;)
5959 {
5960 register int c = getc (stream);
5961 if (p == pend)
5962 {
5963 /* We're at the end of linebuffer: expand it. */
5964 lbp->size *= 2;
5965 xrnew (buffer, lbp->size, char);
5966 p += buffer - lbp->buffer;
5967 pend = buffer + lbp->size;
5968 lbp->buffer = buffer;
5969 }
5970 if (c == EOF)
5971 {
5972 *p = '\0';
5973 chars_deleted = 0;
5974 break;
5975 }
5976 if (c == '\n')
5977 {
5978 if (p > buffer && p[-1] == '\r')
5979 {
5980 p -= 1;
5981 #ifdef DOS_NT
5982 /* Assume CRLF->LF translation will be performed by Emacs
5983 when loading this file, so CRs won't appear in the buffer.
5984 It would be cleaner to compensate within Emacs;
5985 however, Emacs does not know how many CRs were deleted
5986 before any given point in the file. */
5987 chars_deleted = 1;
5988 #else
5989 chars_deleted = 2;
5990 #endif
5991 }
5992 else
5993 {
5994 chars_deleted = 1;
5995 }
5996 *p = '\0';
5997 break;
5998 }
5999 *p++ = c;
6000 }
6001 lbp->len = p - buffer;
6002
6003 if (need_filebuf /* we need filebuf for multi-line regexps */
6004 && chars_deleted > 0) /* not at EOF */
6005 {
6006 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6007 {
6008 /* Expand filebuf. */
6009 filebuf.size *= 2;
6010 xrnew (filebuf.buffer, filebuf.size, char);
6011 }
6012 memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6013 filebuf.len += lbp->len;
6014 filebuf.buffer[filebuf.len++] = '\n';
6015 filebuf.buffer[filebuf.len] = '\0';
6016 }
6017
6018 return lbp->len + chars_deleted;
6019 }
6020
6021 /*
6022 * Like readline_internal, above, but in addition try to match the
6023 * input line against relevant regular expressions and manage #line
6024 * directives.
6025 */
6026 static void
6027 readline (linebuffer *lbp, FILE *stream)
6028 {
6029 long result;
6030
6031 linecharno = charno; /* update global char number of line start */
6032 result = readline_internal (lbp, stream); /* read line */
6033 lineno += 1; /* increment global line number */
6034 charno += result; /* increment global char number */
6035
6036 /* Honor #line directives. */
6037 if (!no_line_directive)
6038 {
6039 static bool discard_until_line_directive;
6040
6041 /* Check whether this is a #line directive. */
6042 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6043 {
6044 unsigned int lno;
6045 int start = 0;
6046
6047 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6048 && start > 0) /* double quote character found */
6049 {
6050 char *endp = lbp->buffer + start;
6051
6052 while ((endp = strchr (endp, '"')) != NULL
6053 && endp[-1] == '\\')
6054 endp++;
6055 if (endp != NULL)
6056 /* Ok, this is a real #line directive. Let's deal with it. */
6057 {
6058 char *taggedabsname; /* absolute name of original file */
6059 char *taggedfname; /* name of original file as given */
6060 char *name; /* temp var */
6061
6062 discard_until_line_directive = false; /* found it */
6063 name = lbp->buffer + start;
6064 *endp = '\0';
6065 canonicalize_filename (name);
6066 taggedabsname = absolute_filename (name, tagfiledir);
6067 if (filename_is_absolute (name)
6068 || filename_is_absolute (curfdp->infname))
6069 taggedfname = savestr (taggedabsname);
6070 else
6071 taggedfname = relative_filename (taggedabsname,tagfiledir);
6072
6073 if (streq (curfdp->taggedfname, taggedfname))
6074 /* The #line directive is only a line number change. We
6075 deal with this afterwards. */
6076 free (taggedfname);
6077 else
6078 /* The tags following this #line directive should be
6079 attributed to taggedfname. In order to do this, set
6080 curfdp accordingly. */
6081 {
6082 fdesc *fdp; /* file description pointer */
6083
6084 /* Go look for a file description already set up for the
6085 file indicated in the #line directive. If there is
6086 one, use it from now until the next #line
6087 directive. */
6088 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6089 if (streq (fdp->infname, curfdp->infname)
6090 && streq (fdp->taggedfname, taggedfname))
6091 /* If we remove the second test above (after the &&)
6092 then all entries pertaining to the same file are
6093 coalesced in the tags file. If we use it, then
6094 entries pertaining to the same file but generated
6095 from different files (via #line directives) will
6096 go into separate sections in the tags file. These
6097 alternatives look equivalent. The first one
6098 destroys some apparently useless information. */
6099 {
6100 curfdp = fdp;
6101 free (taggedfname);
6102 break;
6103 }
6104 /* Else, if we already tagged the real file, skip all
6105 input lines until the next #line directive. */
6106 if (fdp == NULL) /* not found */
6107 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6108 if (streq (fdp->infabsname, taggedabsname))
6109 {
6110 discard_until_line_directive = true;
6111 free (taggedfname);
6112 break;
6113 }
6114 /* Else create a new file description and use that from
6115 now on, until the next #line directive. */
6116 if (fdp == NULL) /* not found */
6117 {
6118 fdp = fdhead;
6119 fdhead = xnew (1, fdesc);
6120 *fdhead = *curfdp; /* copy curr. file description */
6121 fdhead->next = fdp;
6122 fdhead->infname = savestr (curfdp->infname);
6123 fdhead->infabsname = savestr (curfdp->infabsname);
6124 fdhead->infabsdir = savestr (curfdp->infabsdir);
6125 fdhead->taggedfname = taggedfname;
6126 fdhead->usecharno = false;
6127 fdhead->prop = NULL;
6128 fdhead->written = false;
6129 curfdp = fdhead;
6130 }
6131 }
6132 free (taggedabsname);
6133 lineno = lno - 1;
6134 readline (lbp, stream);
6135 return;
6136 } /* if a real #line directive */
6137 } /* if #line is followed by a number */
6138 } /* if line begins with "#line " */
6139
6140 /* If we are here, no #line directive was found. */
6141 if (discard_until_line_directive)
6142 {
6143 if (result > 0)
6144 {
6145 /* Do a tail recursion on ourselves, thus discarding the contents
6146 of the line buffer. */
6147 readline (lbp, stream);
6148 return;
6149 }
6150 /* End of file. */
6151 discard_until_line_directive = false;
6152 return;
6153 }
6154 } /* if #line directives should be considered */
6155
6156 {
6157 int match;
6158 regexp *rp;
6159 char *name;
6160
6161 /* Match against relevant regexps. */
6162 if (lbp->len > 0)
6163 for (rp = p_head; rp != NULL; rp = rp->p_next)
6164 {
6165 /* Only use generic regexps or those for the current language.
6166 Also do not use multiline regexps, which is the job of
6167 regex_tag_multiline. */
6168 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6169 || rp->multi_line)
6170 continue;
6171
6172 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6173 switch (match)
6174 {
6175 case -2:
6176 /* Some error. */
6177 if (!rp->error_signaled)
6178 {
6179 error ("regexp stack overflow while matching \"%s\"",
6180 rp->pattern);
6181 rp->error_signaled = true;
6182 }
6183 break;
6184 case -1:
6185 /* No match. */
6186 break;
6187 case 0:
6188 /* Empty string matched. */
6189 if (!rp->error_signaled)
6190 {
6191 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6192 rp->error_signaled = true;
6193 }
6194 break;
6195 default:
6196 /* Match occurred. Construct a tag. */
6197 name = rp->name;
6198 if (name[0] == '\0')
6199 name = NULL;
6200 else /* make a named tag */
6201 name = substitute (lbp->buffer, rp->name, &rp->regs);
6202 if (rp->force_explicit_name)
6203 /* Force explicit tag name, if a name is there. */
6204 pfnote (name, true, lbp->buffer, match, lineno, linecharno);
6205 else
6206 make_tag (name, strlen (name), true,
6207 lbp->buffer, match, lineno, linecharno);
6208 break;
6209 }
6210 }
6211 }
6212 }
6213
6214 \f
6215 /*
6216 * Return a pointer to a space of size strlen(cp)+1 allocated
6217 * with xnew where the string CP has been copied.
6218 */
6219 static char *
6220 savestr (const char *cp)
6221 {
6222 return savenstr (cp, strlen (cp));
6223 }
6224
6225 /*
6226 * Return a pointer to a space of size LEN+1 allocated with xnew where
6227 * the string CP has been copied for at most the first LEN characters.
6228 */
6229 static char *
6230 savenstr (const char *cp, int len)
6231 {
6232 char *dp = xnew (len + 1, char);
6233 dp[len] = '\0';
6234 return memcpy (dp, cp, len);
6235 }
6236
6237 /* Skip spaces (end of string is not space), return new pointer. */
6238 static char *
6239 skip_spaces (char *cp)
6240 {
6241 while (iswhite (*cp))
6242 cp++;
6243 return cp;
6244 }
6245
6246 /* Skip non spaces, except end of string, return new pointer. */
6247 static char *
6248 skip_non_spaces (char *cp)
6249 {
6250 while (*cp != '\0' && !iswhite (*cp))
6251 cp++;
6252 return cp;
6253 }
6254
6255 /* Skip any chars in the "name" class.*/
6256 static char *
6257 skip_name (char *cp)
6258 {
6259 /* '\0' is a notinname() so loop stops there too */
6260 while (! notinname (*cp))
6261 cp++;
6262 return cp;
6263 }
6264
6265 /* Print error message and exit. */
6266 void
6267 fatal (const char *s1, const char *s2)
6268 {
6269 error (s1, s2);
6270 exit (EXIT_FAILURE);
6271 }
6272
6273 static void
6274 pfatal (const char *s1)
6275 {
6276 perror (s1);
6277 exit (EXIT_FAILURE);
6278 }
6279
6280 static void
6281 suggest_asking_for_help (void)
6282 {
6283 fprintf (stderr, "\tTry `%s --help' for a complete list of options.\n",
6284 progname);
6285 exit (EXIT_FAILURE);
6286 }
6287
6288 /* Output a diagnostic with printf-style FORMAT and args. */
6289 static void
6290 error (const char *format, ...)
6291 {
6292 va_list ap;
6293 va_start (ap, format);
6294 fprintf (stderr, "%s: ", progname);
6295 vfprintf (stderr, format, ap);
6296 fprintf (stderr, "\n");
6297 va_end (ap);
6298 }
6299
6300 /* Return a newly-allocated string whose contents
6301 concatenate those of s1, s2, s3. */
6302 static char *
6303 concat (const char *s1, const char *s2, const char *s3)
6304 {
6305 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6306 char *result = xnew (len1 + len2 + len3 + 1, char);
6307
6308 strcpy (result, s1);
6309 strcpy (result + len1, s2);
6310 strcpy (result + len1 + len2, s3);
6311
6312 return result;
6313 }
6314
6315 \f
6316 /* Does the same work as the system V getcwd, but does not need to
6317 guess the buffer size in advance. */
6318 static char *
6319 etags_getcwd (void)
6320 {
6321 int bufsize = 200;
6322 char *path = xnew (bufsize, char);
6323
6324 while (getcwd (path, bufsize) == NULL)
6325 {
6326 if (errno != ERANGE)
6327 pfatal ("getcwd");
6328 bufsize *= 2;
6329 free (path);
6330 path = xnew (bufsize, char);
6331 }
6332
6333 canonicalize_filename (path);
6334 return path;
6335 }
6336
6337 /* Return a newly allocated string containing the file name of FILE
6338 relative to the absolute directory DIR (which should end with a slash). */
6339 static char *
6340 relative_filename (char *file, char *dir)
6341 {
6342 char *fp, *dp, *afn, *res;
6343 int i;
6344
6345 /* Find the common root of file and dir (with a trailing slash). */
6346 afn = absolute_filename (file, cwd);
6347 fp = afn;
6348 dp = dir;
6349 while (*fp++ == *dp++)
6350 continue;
6351 fp--, dp--; /* back to the first differing char */
6352 #ifdef DOS_NT
6353 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6354 return afn;
6355 #endif
6356 do /* look at the equal chars until '/' */
6357 fp--, dp--;
6358 while (*fp != '/');
6359
6360 /* Build a sequence of "../" strings for the resulting relative file name. */
6361 i = 0;
6362 while ((dp = strchr (dp + 1, '/')) != NULL)
6363 i += 1;
6364 res = xnew (3*i + strlen (fp + 1) + 1, char);
6365 res[0] = '\0';
6366 while (i-- > 0)
6367 strcat (res, "../");
6368
6369 /* Add the file name relative to the common root of file and dir. */
6370 strcat (res, fp + 1);
6371 free (afn);
6372
6373 return res;
6374 }
6375
6376 /* Return a newly allocated string containing the absolute file name
6377 of FILE given DIR (which should end with a slash). */
6378 static char *
6379 absolute_filename (char *file, char *dir)
6380 {
6381 char *slashp, *cp, *res;
6382
6383 if (filename_is_absolute (file))
6384 res = savestr (file);
6385 #ifdef DOS_NT
6386 /* We don't support non-absolute file names with a drive
6387 letter, like `d:NAME' (it's too much hassle). */
6388 else if (file[1] == ':')
6389 fatal ("%s: relative file names with drive letters not supported", file);
6390 #endif
6391 else
6392 res = concat (dir, file, "");
6393
6394 /* Delete the "/dirname/.." and "/." substrings. */
6395 slashp = strchr (res, '/');
6396 while (slashp != NULL && slashp[0] != '\0')
6397 {
6398 if (slashp[1] == '.')
6399 {
6400 if (slashp[2] == '.'
6401 && (slashp[3] == '/' || slashp[3] == '\0'))
6402 {
6403 cp = slashp;
6404 do
6405 cp--;
6406 while (cp >= res && !filename_is_absolute (cp));
6407 if (cp < res)
6408 cp = slashp; /* the absolute name begins with "/.." */
6409 #ifdef DOS_NT
6410 /* Under MSDOS and NT we get `d:/NAME' as absolute
6411 file name, so the luser could say `d:/../NAME'.
6412 We silently treat this as `d:/NAME'. */
6413 else if (cp[0] != '/')
6414 cp = slashp;
6415 #endif
6416 memmove (cp, slashp + 3, strlen (slashp + 2));
6417 slashp = cp;
6418 continue;
6419 }
6420 else if (slashp[2] == '/' || slashp[2] == '\0')
6421 {
6422 memmove (slashp, slashp + 2, strlen (slashp + 1));
6423 continue;
6424 }
6425 }
6426
6427 slashp = strchr (slashp + 1, '/');
6428 }
6429
6430 if (res[0] == '\0') /* just a safety net: should never happen */
6431 {
6432 free (res);
6433 return savestr ("/");
6434 }
6435 else
6436 return res;
6437 }
6438
6439 /* Return a newly allocated string containing the absolute
6440 file name of dir where FILE resides given DIR (which should
6441 end with a slash). */
6442 static char *
6443 absolute_dirname (char *file, char *dir)
6444 {
6445 char *slashp, *res;
6446 char save;
6447
6448 slashp = strrchr (file, '/');
6449 if (slashp == NULL)
6450 return savestr (dir);
6451 save = slashp[1];
6452 slashp[1] = '\0';
6453 res = absolute_filename (file, dir);
6454 slashp[1] = save;
6455
6456 return res;
6457 }
6458
6459 /* Whether the argument string is an absolute file name. The argument
6460 string must have been canonicalized with canonicalize_filename. */
6461 static bool
6462 filename_is_absolute (char *fn)
6463 {
6464 return (fn[0] == '/'
6465 #ifdef DOS_NT
6466 || (ISALPHA (fn[0]) && fn[1] == ':' && fn[2] == '/')
6467 #endif
6468 );
6469 }
6470
6471 /* Downcase DOS drive letter and collapse separators into single slashes.
6472 Works in place. */
6473 static void
6474 canonicalize_filename (register char *fn)
6475 {
6476 register char* cp;
6477 char sep = '/';
6478
6479 #ifdef DOS_NT
6480 /* Canonicalize drive letter case. */
6481 # define ISUPPER(c) isupper (CHAR (c))
6482 if (fn[0] != '\0' && fn[1] == ':' && ISUPPER (fn[0]))
6483 fn[0] = lowcase (fn[0]);
6484
6485 sep = '\\';
6486 #endif
6487
6488 /* Collapse multiple separators into a single slash. */
6489 for (cp = fn; *cp != '\0'; cp++, fn++)
6490 if (*cp == sep)
6491 {
6492 *fn = '/';
6493 while (cp[1] == sep)
6494 cp++;
6495 }
6496 else
6497 *fn = *cp;
6498 *fn = '\0';
6499 }
6500
6501 \f
6502 /* Initialize a linebuffer for use. */
6503 static void
6504 linebuffer_init (linebuffer *lbp)
6505 {
6506 lbp->size = (DEBUG) ? 3 : 200;
6507 lbp->buffer = xnew (lbp->size, char);
6508 lbp->buffer[0] = '\0';
6509 lbp->len = 0;
6510 }
6511
6512 /* Set the minimum size of a string contained in a linebuffer. */
6513 static void
6514 linebuffer_setlen (linebuffer *lbp, int toksize)
6515 {
6516 while (lbp->size <= toksize)
6517 {
6518 lbp->size *= 2;
6519 xrnew (lbp->buffer, lbp->size, char);
6520 }
6521 lbp->len = toksize;
6522 }
6523
6524 /* Like malloc but get fatal error if memory is exhausted. */
6525 static void *
6526 xmalloc (size_t size)
6527 {
6528 void *result = malloc (size);
6529 if (result == NULL)
6530 fatal ("virtual memory exhausted", (char *)NULL);
6531 return result;
6532 }
6533
6534 static void *
6535 xrealloc (char *ptr, size_t size)
6536 {
6537 void *result = realloc (ptr, size);
6538 if (result == NULL)
6539 fatal ("virtual memory exhausted", (char *)NULL);
6540 return result;
6541 }
6542
6543 /*
6544 * Local Variables:
6545 * indent-tabs-mode: t
6546 * tab-width: 8
6547 * fill-column: 79
6548 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6549 * c-file-style: "gnu"
6550 * End:
6551 */
6552
6553 /* etags.c ends here */