]> code.delx.au - gnu-emacs/blob - lib-src/etags.c
(main, consider_token, C_entries): Add misc switch
[gnu-emacs] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001, 2002
3 Free Software Foundation, Inc. and Ken Arnold
4
5 This file is not considered part of GNU Emacs.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20
21 /*
22 * Authors:
23 * Ctags originally by Ken Arnold.
24 * Fortran added by Jim Kleckner.
25 * Ed Pelegri-Llopart added C typedefs.
26 * Gnu Emacs TAGS format and modifications by RMS?
27 * 1989 Sam Kendall added C++.
28 * 1992 Joseph B. Wells improved C and C++ parsing.
29 * 1993 Francesco Potortì reorganised C and C++.
30 * 1994 Line-by-line regexp tags by Tom Tromey.
31 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
32 * 2002 #line directives by Francesco Potortì.
33 *
34 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
35 */
36
37 /*
38 * If you want to add support for a new language, start by looking at the LUA
39 * language, which is the simplest. Alternatively, consider shipping a
40 * configuration file containing regexp definitions for etags.
41 */
42
43 char pot_etags_version[] = "@(#) pot revision number is 17.5";
44
45 #define TRUE 1
46 #define FALSE 0
47
48 #ifdef DEBUG
49 # undef DEBUG
50 # define DEBUG TRUE
51 #else
52 # define DEBUG FALSE
53 # define NDEBUG /* disable assert */
54 #endif
55
56 #ifdef HAVE_CONFIG_H
57 # include <config.h>
58 /* On some systems, Emacs defines static as nothing for the sake
59 of unexec. We don't want that here since we don't use unexec. */
60 # undef static
61 # define ETAGS_REGEXPS /* use the regexp features */
62 # define LONG_OPTIONS /* accept long options */
63 # ifndef PTR /* for Xemacs */
64 # define PTR void *
65 # endif
66 # ifndef __P /* for Xemacs */
67 # define __P(args) args
68 # endif
69 #else /* no config.h */
70 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
71 # define __P(args) args /* use prototypes */
72 # define PTR void * /* for generic pointers */
73 # else /* not standard C */
74 # define __P(args) () /* no prototypes */
75 # define const /* remove const for old compilers' sake */
76 # define PTR long * /* don't use void* */
77 # endif
78 #endif /* !HAVE_CONFIG_H */
79
80 #ifndef _GNU_SOURCE
81 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
82 #endif
83
84 #ifdef LONG_OPTIONS
85 # undef LONG_OPTIONS
86 # define LONG_OPTIONS TRUE
87 #else
88 # define LONG_OPTIONS FALSE
89 #endif
90
91 /* WIN32_NATIVE is for Xemacs.
92 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
93 #ifdef WIN32_NATIVE
94 # undef MSDOS
95 # undef WINDOWSNT
96 # define WINDOWSNT
97 #endif /* WIN32_NATIVE */
98
99 #ifdef MSDOS
100 # undef MSDOS
101 # define MSDOS TRUE
102 # include <fcntl.h>
103 # include <sys/param.h>
104 # include <io.h>
105 # ifndef HAVE_CONFIG_H
106 # define DOS_NT
107 # include <sys/config.h>
108 # endif
109 #else
110 # define MSDOS FALSE
111 #endif /* MSDOS */
112
113 #ifdef WINDOWSNT
114 # include <stdlib.h>
115 # include <fcntl.h>
116 # include <string.h>
117 # include <direct.h>
118 # include <io.h>
119 # define MAXPATHLEN _MAX_PATH
120 # undef HAVE_NTGUI
121 # undef DOS_NT
122 # define DOS_NT
123 # ifndef HAVE_GETCWD
124 # define HAVE_GETCWD
125 # endif /* undef HAVE_GETCWD */
126 #else /* not WINDOWSNT */
127 # ifdef STDC_HEADERS
128 # include <stdlib.h>
129 # include <string.h>
130 # else /* no standard C headers */
131 extern char *getenv ();
132 # ifdef VMS
133 # define EXIT_SUCCESS 1
134 # define EXIT_FAILURE 0
135 # else /* no VMS */
136 # define EXIT_SUCCESS 0
137 # define EXIT_FAILURE 1
138 # endif
139 # endif
140 #endif /* !WINDOWSNT */
141
142 #ifdef HAVE_UNISTD_H
143 # include <unistd.h>
144 #else
145 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
146 extern char *getcwd (char *buf, size_t size);
147 # endif
148 #endif /* HAVE_UNISTD_H */
149
150 #include <stdio.h>
151 #include <ctype.h>
152 #include <errno.h>
153 #ifndef errno
154 extern int errno;
155 #endif
156 #include <sys/types.h>
157 #include <sys/stat.h>
158
159 #include <assert.h>
160 #ifdef NDEBUG
161 # undef assert /* some systems have a buggy assert.h */
162 # define assert(x) ((void) 0)
163 #endif
164
165 #if !defined (S_ISREG) && defined (S_IFREG)
166 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
167 #endif
168
169 #if LONG_OPTIONS
170 # include <getopt.h>
171 #else
172 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
173 extern char *optarg;
174 extern int optind, opterr;
175 #endif /* LONG_OPTIONS */
176
177 #ifdef ETAGS_REGEXPS
178 # ifndef HAVE_CONFIG_H /* this is a standalone compilation */
179 # ifdef __CYGWIN__ /* compiling on Cygwin */
180 !!! NOTICE !!!
181 the regex.h distributed with Cygwin is not compatible with etags, alas!
182 If you want regular expression support, you should delete this notice and
183 arrange to use the GNU regex.h and regex.c.
184 # endif
185 # endif
186 # include <regex.h>
187 #endif /* ETAGS_REGEXPS */
188
189 /* Define CTAGS to make the program "ctags" compatible with the usual one.
190 Leave it undefined to make the program "etags", which makes emacs-style
191 tag tables and tags typedefs, #defines and struct/union/enum by default. */
192 #ifdef CTAGS
193 # undef CTAGS
194 # define CTAGS TRUE
195 #else
196 # define CTAGS FALSE
197 #endif
198
199 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
200 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
201 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
202 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
203
204 #define CHARS 256 /* 2^sizeof(char) */
205 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
206 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
207 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
208 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
209 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
210 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
211
212 #define ISALNUM(c) isalnum (CHAR(c))
213 #define ISALPHA(c) isalpha (CHAR(c))
214 #define ISDIGIT(c) isdigit (CHAR(c))
215 #define ISLOWER(c) islower (CHAR(c))
216
217 #define lowcase(c) tolower (CHAR(c))
218 #define upcase(c) toupper (CHAR(c))
219
220
221 /*
222 * xnew, xrnew -- allocate, reallocate storage
223 *
224 * SYNOPSIS: Type *xnew (int n, Type);
225 * void xrnew (OldPointer, int n, Type);
226 */
227 #if DEBUG
228 # include "chkmalloc.h"
229 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
230 (n) * sizeof (Type)))
231 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
232 (char *) (op), (n) * sizeof (Type)))
233 #else
234 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
235 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
236 (char *) (op), (n) * sizeof (Type)))
237 #endif
238
239 #define bool int
240
241 typedef void Lang_function __P((FILE *));
242
243 typedef struct
244 {
245 char *suffix; /* file name suffix for this compressor */
246 char *command; /* takes one arg and decompresses to stdout */
247 } compressor;
248
249 typedef struct
250 {
251 char *name; /* language name */
252 char *help; /* detailed help for the language */
253 Lang_function *function; /* parse function */
254 char **suffixes; /* name suffixes of this language's files */
255 char **filenames; /* names of this language's files */
256 char **interpreters; /* interpreters for this language */
257 bool metasource; /* source used to generate other sources */
258 } language;
259
260 typedef struct fdesc
261 {
262 struct fdesc *next; /* for the linked list */
263 char *infname; /* uncompressed input file name */
264 char *infabsname; /* absolute uncompressed input file name */
265 char *infabsdir; /* absolute dir of input file */
266 char *taggedfname; /* file name to write in tagfile */
267 language *lang; /* language of file */
268 char *prop; /* file properties to write in tagfile */
269 bool usecharno; /* etags tags shall contain char number */
270 bool written; /* entry written in the tags file */
271 } fdesc;
272
273 typedef struct node_st
274 { /* sorting structure */
275 struct node_st *left, *right; /* left and right sons */
276 fdesc *fdp; /* description of file to whom tag belongs */
277 char *name; /* tag name */
278 char *regex; /* search regexp */
279 bool valid; /* write this tag on the tag file */
280 bool is_func; /* function tag: use regexp in CTAGS mode */
281 bool been_warned; /* warning already given for duplicated tag */
282 int lno; /* line number tag is on */
283 long cno; /* character number line starts on */
284 } node;
285
286 /*
287 * A `linebuffer' is a structure which holds a line of text.
288 * `readline_internal' reads a line from a stream into a linebuffer
289 * and works regardless of the length of the line.
290 * SIZE is the size of BUFFER, LEN is the length of the string in
291 * BUFFER after readline reads it.
292 */
293 typedef struct
294 {
295 long size;
296 int len;
297 char *buffer;
298 } linebuffer;
299
300 /* Used to support mixing of --lang and file names. */
301 typedef struct
302 {
303 enum {
304 at_language, /* a language specification */
305 at_regexp, /* a regular expression */
306 at_filename, /* a file name */
307 at_stdin, /* read from stdin here */
308 at_end /* stop parsing the list */
309 } arg_type; /* argument type */
310 language *lang; /* language associated with the argument */
311 char *what; /* the argument itself */
312 } argument;
313
314 #ifdef ETAGS_REGEXPS
315 /* Structure defining a regular expression. */
316 typedef struct regexp
317 {
318 struct regexp *p_next; /* pointer to next in list */
319 language *lang; /* if set, use only for this language */
320 char *pattern; /* the regexp pattern */
321 char *name; /* tag name */
322 struct re_pattern_buffer *pat; /* the compiled pattern */
323 struct re_registers regs; /* re registers */
324 bool error_signaled; /* already signaled for this regexp */
325 bool force_explicit_name; /* do not allow implict tag name */
326 bool ignore_case; /* ignore case when matching */
327 bool multi_line; /* do a multi-line match on the whole file */
328 } regexp;
329 #endif /* ETAGS_REGEXPS */
330
331
332 /* Many compilers barf on this:
333 Lang_function Ada_funcs;
334 so let's write it this way */
335 static void Ada_funcs __P((FILE *));
336 static void Asm_labels __P((FILE *));
337 static void C_entries __P((int c_ext, FILE *));
338 static void default_C_entries __P((FILE *));
339 static void plain_C_entries __P((FILE *));
340 static void Cjava_entries __P((FILE *));
341 static void Cobol_paragraphs __P((FILE *));
342 static void Cplusplus_entries __P((FILE *));
343 static void Cstar_entries __P((FILE *));
344 static void Erlang_functions __P((FILE *));
345 static void Fortran_functions __P((FILE *));
346 static void HTML_labels __P((FILE *));
347 static void Lisp_functions __P((FILE *));
348 static void Lua_functions __P((FILE *));
349 static void Makefile_targets __P((FILE *));
350 static void Pascal_functions __P((FILE *));
351 static void Perl_functions __P((FILE *));
352 static void PHP_functions __P((FILE *));
353 static void PS_functions __P((FILE *));
354 static void Prolog_functions __P((FILE *));
355 static void Python_functions __P((FILE *));
356 static void Scheme_functions __P((FILE *));
357 static void TeX_commands __P((FILE *));
358 static void Texinfo_nodes __P((FILE *));
359 static void Yacc_entries __P((FILE *));
360 static void just_read_file __P((FILE *));
361
362 static void print_language_names __P((void));
363 static void print_version __P((void));
364 static void print_help __P((argument *));
365 int main __P((int, char **));
366
367 static compressor *get_compressor_from_suffix __P((char *, char **));
368 static language *get_language_from_langname __P((const char *));
369 static language *get_language_from_interpreter __P((char *));
370 static language *get_language_from_filename __P((char *, bool));
371 static void readline __P((linebuffer *, FILE *));
372 static long readline_internal __P((linebuffer *, FILE *));
373 static bool nocase_tail __P((char *));
374 static void get_tag __P((char *, char **));
375
376 #ifdef ETAGS_REGEXPS
377 static void analyse_regex __P((char *));
378 static void free_regexps __P((void));
379 static void regex_tag_multiline __P((void));
380 #endif /* ETAGS_REGEXPS */
381 static void error __P((const char *, const char *));
382 static void suggest_asking_for_help __P((void));
383 void fatal __P((char *, char *));
384 static void pfatal __P((char *));
385 static void add_node __P((node *, node **));
386
387 static void init __P((void));
388 static void process_file_name __P((char *, language *));
389 static void process_file __P((FILE *, char *, language *));
390 static void find_entries __P((FILE *));
391 static void free_tree __P((node *));
392 static void free_fdesc __P((fdesc *));
393 static void pfnote __P((char *, bool, char *, int, int, long));
394 static void make_tag __P((char *, int, bool, char *, int, int, long));
395 static void invalidate_nodes __P((fdesc *, node **));
396 static void put_entries __P((node *));
397
398 static char *concat __P((char *, char *, char *));
399 static char *skip_spaces __P((char *));
400 static char *skip_non_spaces __P((char *));
401 static char *savenstr __P((char *, int));
402 static char *savestr __P((char *));
403 static char *etags_strchr __P((const char *, int));
404 static char *etags_strrchr __P((const char *, int));
405 static int etags_strcasecmp __P((const char *, const char *));
406 static int etags_strncasecmp __P((const char *, const char *, int));
407 static char *etags_getcwd __P((void));
408 static char *relative_filename __P((char *, char *));
409 static char *absolute_filename __P((char *, char *));
410 static char *absolute_dirname __P((char *, char *));
411 static bool filename_is_absolute __P((char *f));
412 static void canonicalize_filename __P((char *));
413 static void linebuffer_init __P((linebuffer *));
414 static void linebuffer_setlen __P((linebuffer *, int));
415 static PTR xmalloc __P((unsigned int));
416 static PTR xrealloc __P((char *, unsigned int));
417
418 \f
419 static char searchar = '/'; /* use /.../ searches */
420
421 static char *tagfile; /* output file */
422 static char *progname; /* name this program was invoked with */
423 static char *cwd; /* current working directory */
424 static char *tagfiledir; /* directory of tagfile */
425 static FILE *tagf; /* ioptr for tags file */
426
427 static fdesc *fdhead; /* head of file description list */
428 static fdesc *curfdp; /* current file description */
429 static int lineno; /* line number of current line */
430 static long charno; /* current character number */
431 static long linecharno; /* charno of start of current line */
432 static char *dbp; /* pointer to start of current tag */
433
434 static const int invalidcharno = -1;
435
436 static node *nodehead; /* the head of the binary tree of tags */
437 static node *last_node; /* the last node created */
438
439 static linebuffer lb; /* the current line */
440 static linebuffer filebuf; /* a buffer containing the whole file */
441 static linebuffer token_name; /* a buffer containing a tag name */
442
443 /* boolean "functions" (see init) */
444 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
445 static char
446 /* white chars */
447 *white = " \f\t\n\r\v",
448 /* not in a name */
449 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
450 /* token ending chars */
451 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
452 /* token starting chars */
453 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
454 /* valid in-token chars */
455 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
456
457 static bool append_to_tagfile; /* -a: append to tags */
458 /* The next four default to TRUE for etags, but to FALSE for ctags. */
459 static bool typedefs; /* -t: create tags for C and Ada typedefs */
460 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
461 /* 0 struct/enum/union decls, and C++ */
462 /* member functions. */
463 static bool constantypedefs; /* -d: create tags for C #define, enum */
464 /* constants and variables. */
465 /* -D: opposite of -d. Default under ctags. */
466 static bool globals; /* create tags for global variables */
467 static bool declarations; /* --declarations: tag them and extern in C&Co*/
468 static bool members; /* create tags for C member variables */
469 static bool no_line_directive; /* ignore #line directives (undocumented) */
470 static bool update; /* -u: update tags */
471 static bool vgrind_style; /* -v: create vgrind style index output */
472 static bool no_warnings; /* -w: suppress warnings */
473 static bool cxref_style; /* -x: create cxref style output */
474 static bool cplusplus; /* .[hc] means C++, not C */
475 static bool ignoreindent; /* -I: ignore indentation in C */
476 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
477
478 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
479 static bool parsing_stdin; /* --parse-stdin used */
480
481 #ifdef ETAGS_REGEXPS
482 static regexp *p_head; /* list of all regexps */
483 static bool need_filebuf; /* some regexes are multi-line */
484 #else
485 # define need_filebuf FALSE
486 #endif /* ETAGS_REGEXPS */
487
488 #if LONG_OPTIONS
489 static struct option longopts[] =
490 {
491 { "packages-only", no_argument, &packages_only, TRUE },
492 { "c++", no_argument, NULL, 'C' },
493 { "declarations", no_argument, &declarations, TRUE },
494 { "no-line-directive", no_argument, &no_line_directive, TRUE },
495 { "help", no_argument, NULL, 'h' },
496 { "help", no_argument, NULL, 'H' },
497 { "ignore-indentation", no_argument, NULL, 'I' },
498 { "language", required_argument, NULL, 'l' },
499 { "members", no_argument, &members, TRUE },
500 { "no-members", no_argument, &members, FALSE },
501 { "output", required_argument, NULL, 'o' },
502 #ifdef ETAGS_REGEXPS
503 { "regex", required_argument, NULL, 'r' },
504 { "no-regex", no_argument, NULL, 'R' },
505 { "ignore-case-regex", required_argument, NULL, 'c' },
506 #endif /* ETAGS_REGEXPS */
507 { "parse-stdin", required_argument, NULL, STDIN },
508 { "version", no_argument, NULL, 'V' },
509
510 #if CTAGS /* Etags options */
511 { "backward-search", no_argument, NULL, 'B' },
512 { "cxref", no_argument, NULL, 'x' },
513 { "defines", no_argument, NULL, 'd' },
514 { "globals", no_argument, &globals, TRUE },
515 { "typedefs", no_argument, NULL, 't' },
516 { "typedefs-and-c++", no_argument, NULL, 'T' },
517 { "update", no_argument, NULL, 'u' },
518 { "vgrind", no_argument, NULL, 'v' },
519 { "no-warn", no_argument, NULL, 'w' },
520
521 #else /* Ctags options */
522 { "append", no_argument, NULL, 'a' },
523 { "no-defines", no_argument, NULL, 'D' },
524 { "no-globals", no_argument, &globals, FALSE },
525 { "include", required_argument, NULL, 'i' },
526 #endif
527 { NULL }
528 };
529 #endif /* LONG_OPTIONS */
530
531 static compressor compressors[] =
532 {
533 { "z", "gzip -d -c"},
534 { "Z", "gzip -d -c"},
535 { "gz", "gzip -d -c"},
536 { "GZ", "gzip -d -c"},
537 { "bz2", "bzip2 -d -c" },
538 { NULL }
539 };
540
541 /*
542 * Language stuff.
543 */
544
545 /* Ada code */
546 static char *Ada_suffixes [] =
547 { "ads", "adb", "ada", NULL };
548 static char Ada_help [] =
549 "In Ada code, functions, procedures, packages, tasks and types are\n\
550 tags. Use the `--packages-only' option to create tags for\n\
551 packages only.\n\
552 Ada tag names have suffixes indicating the type of entity:\n\
553 Entity type: Qualifier:\n\
554 ------------ ----------\n\
555 function /f\n\
556 procedure /p\n\
557 package spec /s\n\
558 package body /b\n\
559 type /t\n\
560 task /k\n\
561 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
562 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
563 will just search for any tag `bidule'.";
564
565 /* Assembly code */
566 static char *Asm_suffixes [] =
567 { "a", /* Unix assembler */
568 "asm", /* Microcontroller assembly */
569 "def", /* BSO/Tasking definition includes */
570 "inc", /* Microcontroller include files */
571 "ins", /* Microcontroller include files */
572 "s", "sa", /* Unix assembler */
573 "S", /* cpp-processed Unix assembler */
574 "src", /* BSO/Tasking C compiler output */
575 NULL
576 };
577 static char Asm_help [] =
578 "In assembler code, labels appearing at the beginning of a line,\n\
579 followed by a colon, are tags.";
580
581
582 /* Note that .c and .h can be considered C++, if the --c++ flag was
583 given, or if the `class' or `template' keyowrds are met inside the file.
584 That is why default_C_entries is called for these. */
585 static char *default_C_suffixes [] =
586 { "c", "h", NULL };
587 static char default_C_help [] =
588 "In C code, any C function or typedef is a tag, and so are\n\
589 definitions of `struct', `union' and `enum'. `#define' macro\n\
590 definitions and `enum' constants are tags unless you specify\n\
591 `--no-defines'. Global variables are tags unless you specify\n\
592 `--no-globals'. Use of `--no-globals' and `--no-defines'\n\
593 can make the tags table file much smaller.\n\
594 You can tag function declarations and external variables by\n\
595 using `--declarations', and struct members by using `--members'.";
596
597 static char *Cplusplus_suffixes [] =
598 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
599 "M", /* Objective C++ */
600 "pdb", /* Postscript with C syntax */
601 NULL };
602 static char Cplusplus_help [] =
603 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
604 --help --lang=c --lang=c++ for full help.)\n\
605 In addition to C tags, member functions are also recognized, and\n\
606 optionally member variables if you use the `--members' option.\n\
607 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
608 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
609 `operator+'.";
610
611 static char *Cjava_suffixes [] =
612 { "java", NULL };
613 static char Cjava_help [] =
614 "In Java code, all the tags constructs of C and C++ code are\n\
615 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
616
617
618 static char *Cobol_suffixes [] =
619 { "COB", "cob", NULL };
620 static char Cobol_help [] =
621 "In Cobol code, tags are paragraph names; that is, any word\n\
622 starting in column 8 and followed by a period.";
623
624 static char *Cstar_suffixes [] =
625 { "cs", "hs", NULL };
626
627 static char *Erlang_suffixes [] =
628 { "erl", "hrl", NULL };
629 static char Erlang_help [] =
630 "In Erlang code, the tags are the functions, records and macros\n\
631 defined in the file.";
632
633 static char *Fortran_suffixes [] =
634 { "F", "f", "f90", "for", NULL };
635 static char Fortran_help [] =
636 "In Fortran code, functions, subroutines and block data are tags.";
637
638 static char *HTML_suffixes [] =
639 { "htm", "html", "shtml", NULL };
640 static char HTML_help [] =
641 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
642 `h3' headers. Also, tags are `name=' in anchors and all\n\
643 occurrences of `id='.";
644
645 static char *Lisp_suffixes [] =
646 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
647 static char Lisp_help [] =
648 "In Lisp code, any function defined with `defun', any variable\n\
649 defined with `defvar' or `defconst', and in general the first\n\
650 argument of any expression that starts with `(def' in column zero\n\
651 is a tag.";
652
653 static char *Lua_suffixes [] =
654 { "lua", "LUA", NULL };
655 static char Lua_help [] =
656 "In Lua scripts, all functions are tags.";
657
658 static char *Makefile_filenames [] =
659 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
660 static char Makefile_help [] =
661 "In makefiles, targets are tags; additionally, variables are tags\n\
662 unless you specify `--no-globals'.";
663
664 static char *Objc_suffixes [] =
665 { "lm", /* Objective lex file */
666 "m", /* Objective C file */
667 NULL };
668 static char Objc_help [] =
669 "In Objective C code, tags include Objective C definitions for classes,\n\
670 class categories, methods and protocols. Tags for variables and\n\
671 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.";
672
673 static char *Pascal_suffixes [] =
674 { "p", "pas", NULL };
675 static char Pascal_help [] =
676 "In Pascal code, the tags are the functions and procedures defined\n\
677 in the file.";
678
679 static char *Perl_suffixes [] =
680 { "pl", "pm", NULL };
681 static char *Perl_interpreters [] =
682 { "perl", "@PERL@", NULL };
683 static char Perl_help [] =
684 "In Perl code, the tags are the packages, subroutines and variables\n\
685 defined by the `package', `sub', `my' and `local' keywords. Use\n\
686 `--globals' if you want to tag global variables. Tags for\n\
687 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
688 defined in the default package is `main::SUB'.";
689
690 static char *PHP_suffixes [] =
691 { "php", "php3", "php4", NULL };
692 static char PHP_help [] =
693 "In PHP code, tags are functions, classes and defines. When using\n\
694 the `--members' option, vars are tags too.";
695
696 static char *plain_C_suffixes [] =
697 { "pc", /* Pro*C file */
698 NULL };
699
700 static char *PS_suffixes [] =
701 { "ps", "psw", NULL }; /* .psw is for PSWrap */
702 static char PS_help [] =
703 "In PostScript code, the tags are the functions.";
704
705 static char *Prolog_suffixes [] =
706 { "prolog", NULL };
707 static char Prolog_help [] =
708 "In Prolog code, tags are predicates and rules at the beginning of\n\
709 line.";
710
711 static char *Python_suffixes [] =
712 { "py", NULL };
713 static char Python_help [] =
714 "In Python code, `def' or `class' at the beginning of a line\n\
715 generate a tag.";
716
717 /* Can't do the `SCM' or `scm' prefix with a version number. */
718 static char *Scheme_suffixes [] =
719 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
720 static char Scheme_help [] =
721 "In Scheme code, tags include anything defined with `def' or with a\n\
722 construct whose name starts with `def'. They also include\n\
723 variables set with `set!' at top level in the file.";
724
725 static char *TeX_suffixes [] =
726 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
727 static char TeX_help [] =
728 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
729 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
730 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
731 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
732 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
733 \n\
734 Other commands can be specified by setting the environment variable\n\
735 `TEXTAGS' to a colon-separated list like, for example,\n\
736 TEXTAGS=\"mycommand:myothercommand\".";
737
738
739 static char *Texinfo_suffixes [] =
740 { "texi", "texinfo", "txi", NULL };
741 static char Texinfo_help [] =
742 "for texinfo files, lines starting with @node are tagged.";
743
744 static char *Yacc_suffixes [] =
745 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
746 static char Yacc_help [] =
747 "In Bison or Yacc input files, each rule defines as a tag the\n\
748 nonterminal it constructs. The portions of the file that contain\n\
749 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
750 for full help).";
751
752 static char auto_help [] =
753 "`auto' is not a real language, it indicates to use\n\
754 a default language for files base on file name suffix and file contents.";
755
756 static char none_help [] =
757 "`none' is not a real language, it indicates to only do\n\
758 regexp processing on files.";
759
760 static char no_lang_help [] =
761 "No detailed help available for this language.";
762
763
764 /*
765 * Table of languages.
766 *
767 * It is ok for a given function to be listed under more than one
768 * name. I just didn't.
769 */
770
771 static language lang_names [] =
772 {
773 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
774 { "asm", Asm_help, Asm_labels, Asm_suffixes },
775 { "c", default_C_help, default_C_entries, default_C_suffixes },
776 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
777 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
778 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
779 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
780 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
781 { "html", HTML_help, HTML_labels, HTML_suffixes },
782 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
783 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
784 { "lua", Lua_help, Lua_functions, Lua_suffixes },
785 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
786 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
787 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
788 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
789 { "php", PHP_help, PHP_functions, PHP_suffixes },
790 { "postscript",PS_help, PS_functions, PS_suffixes },
791 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
792 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
793 { "python", Python_help, Python_functions, Python_suffixes },
794 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
795 { "tex", TeX_help, TeX_commands, TeX_suffixes },
796 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
797 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
798 { "auto", auto_help }, /* default guessing scheme */
799 { "none", none_help, just_read_file }, /* regexp matching only */
800 { NULL } /* end of list */
801 };
802
803 \f
804 static void
805 print_language_names ()
806 {
807 language *lang;
808 char **name, **ext;
809
810 puts ("\nThese are the currently supported languages, along with the\n\
811 default file names and dot suffixes:");
812 for (lang = lang_names; lang->name != NULL; lang++)
813 {
814 printf (" %-*s", 10, lang->name);
815 if (lang->filenames != NULL)
816 for (name = lang->filenames; *name != NULL; name++)
817 printf (" %s", *name);
818 if (lang->suffixes != NULL)
819 for (ext = lang->suffixes; *ext != NULL; ext++)
820 printf (" .%s", *ext);
821 puts ("");
822 }
823 puts ("where `auto' means use default language for files based on file\n\
824 name suffix, and `none' means only do regexp processing on files.\n\
825 If no language is specified and no matching suffix is found,\n\
826 the first line of the file is read for a sharp-bang (#!) sequence\n\
827 followed by the name of an interpreter. If no such sequence is found,\n\
828 Fortran is tried first; if no tags are found, C is tried next.\n\
829 When parsing any C file, a \"class\" or \"template\" keyword\n\
830 switches to C++.");
831 puts ("Compressed files are supported using gzip and bzip2.\n\
832 \n\
833 For detailed help on a given language use, for example,\n\
834 etags --help --lang=ada.");
835 }
836
837 #ifndef EMACS_NAME
838 # define EMACS_NAME "standalone"
839 #endif
840 #ifndef VERSION
841 # define VERSION "version"
842 #endif
843 static void
844 print_version ()
845 {
846 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
847 puts ("Copyright (C) 2002 Free Software Foundation, Inc. and Ken Arnold");
848 puts ("This program is distributed under the same terms as Emacs");
849
850 exit (EXIT_SUCCESS);
851 }
852
853 static void
854 print_help (argbuffer)
855 argument *argbuffer;
856 {
857 bool help_for_lang = FALSE;
858
859 for (; argbuffer->arg_type != at_end; argbuffer++)
860 if (argbuffer->arg_type == at_language)
861 {
862 if (help_for_lang)
863 puts ("");
864 puts (argbuffer->lang->help);
865 help_for_lang = TRUE;
866 }
867
868 if (help_for_lang)
869 exit (EXIT_SUCCESS);
870
871 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
872 \n\
873 These are the options accepted by %s.\n", progname, progname);
874 if (LONG_OPTIONS)
875 puts ("You may use unambiguous abbreviations for the long option names.");
876 else
877 puts ("Long option names do not work with this executable, as it is not\n\
878 linked with GNU getopt.");
879 puts (" A - as file name means read names from stdin (one per line).\n\
880 Absolute names are stored in the output file as they are.\n\
881 Relative ones are stored relative to the output file's directory.\n");
882
883 if (!CTAGS)
884 puts ("-a, --append\n\
885 Append tag entries to existing tags file.");
886
887 puts ("--packages-only\n\
888 For Ada files, only generate tags for packages.");
889
890 if (CTAGS)
891 puts ("-B, --backward-search\n\
892 Write the search commands for the tag entries using '?', the\n\
893 backward-search command instead of '/', the forward-search command.");
894
895 /* This option is mostly obsolete, because etags can now automatically
896 detect C++. Retained for backward compatibility and for debugging and
897 experimentation. In principle, we could want to tag as C++ even
898 before any "class" or "template" keyword.
899 puts ("-C, --c++\n\
900 Treat files whose name suffix defaults to C language as C++ files.");
901 */
902
903 puts ("--declarations\n\
904 In C and derived languages, create tags for function declarations,");
905 if (CTAGS)
906 puts ("\tand create tags for extern variables if --globals is used.");
907 else
908 puts
909 ("\tand create tags for extern variables unless --no-globals is used.");
910
911 if (CTAGS)
912 puts ("-d, --defines\n\
913 Create tag entries for C #define constants and enum constants, too.");
914 else
915 puts ("-D, --no-defines\n\
916 Don't create tag entries for C #define constants and enum constants.\n\
917 This makes the tags file smaller.");
918
919 if (!CTAGS)
920 puts ("-i FILE, --include=FILE\n\
921 Include a note in tag file indicating that, when searching for\n\
922 a tag, one should also consult the tags file FILE after\n\
923 checking the current file.");
924
925 puts ("-l LANG, --language=LANG\n\
926 Force the following files to be considered as written in the\n\
927 named language up to the next --language=LANG option.");
928
929 if (CTAGS)
930 puts ("--globals\n\
931 Create tag entries for global variables in some languages.");
932 else
933 puts ("--no-globals\n\
934 Do not create tag entries for global variables in some\n\
935 languages. This makes the tags file smaller.");
936 puts ("--members\n\
937 Create tag entries for members of structures in some languages.");
938
939 #ifdef ETAGS_REGEXPS
940 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
941 Make a tag for each line matching a regular expression pattern\n\
942 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
943 files only. REGEXFILE is a file containing one REGEXP per line.\n\
944 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
945 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
946 puts (" If TAGNAME/ is present, the tags created are named.\n\
947 For example Tcl named tags can be created with:\n\
948 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
949 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
950 `m' means to allow multi-line matches, `s' implies `m' and\n\
951 causes dot to match any character, including newline.");
952 puts ("-R, --no-regex\n\
953 Don't create tags from regexps for the following files.");
954 #endif /* ETAGS_REGEXPS */
955 puts ("-I, --ignore-indentation\n\
956 In C and C++ do not assume that a closing brace in the first\n\
957 column is the final brace of a function or structure definition.");
958 puts ("-o FILE, --output=FILE\n\
959 Write the tags to FILE.");
960 puts ("--parse-stdin=NAME\n\
961 Read from standard input and record tags as belonging to file NAME.");
962
963 if (CTAGS)
964 {
965 puts ("-t, --typedefs\n\
966 Generate tag entries for C and Ada typedefs.");
967 puts ("-T, --typedefs-and-c++\n\
968 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
969 and C++ member functions.");
970 }
971
972 if (CTAGS)
973 puts ("-u, --update\n\
974 Update the tag entries for the given files, leaving tag\n\
975 entries for other files in place. Currently, this is\n\
976 implemented by deleting the existing entries for the given\n\
977 files and then rewriting the new entries at the end of the\n\
978 tags file. It is often faster to simply rebuild the entire\n\
979 tag file than to use this.");
980
981 if (CTAGS)
982 {
983 puts ("-v, --vgrind\n\
984 Generates an index of items intended for human consumption,\n\
985 similar to the output of vgrind. The index is sorted, and\n\
986 gives the page number of each item.");
987 puts ("-w, --no-warn\n\
988 Suppress warning messages about entries defined in multiple\n\
989 files.");
990 puts ("-x, --cxref\n\
991 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
992 The output uses line numbers instead of page numbers, but\n\
993 beyond that the differences are cosmetic; try both to see\n\
994 which you like.");
995 }
996
997 puts ("-V, --version\n\
998 Print the version of the program.\n\
999 -h, --help\n\
1000 Print this help message.\n\
1001 Followed by one or more `--language' options prints detailed\n\
1002 help about tag generation for the specified languages.");
1003
1004 print_language_names ();
1005
1006 puts ("");
1007 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1008
1009 exit (EXIT_SUCCESS);
1010 }
1011
1012 \f
1013 #ifdef VMS /* VMS specific functions */
1014
1015 #define EOS '\0'
1016
1017 /* This is a BUG! ANY arbitrary limit is a BUG!
1018 Won't someone please fix this? */
1019 #define MAX_FILE_SPEC_LEN 255
1020 typedef struct {
1021 short curlen;
1022 char body[MAX_FILE_SPEC_LEN + 1];
1023 } vspec;
1024
1025 /*
1026 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1027 returning in each successive call the next file name matching the input
1028 spec. The function expects that each in_spec passed
1029 to it will be processed to completion; in particular, up to and
1030 including the call following that in which the last matching name
1031 is returned, the function ignores the value of in_spec, and will
1032 only start processing a new spec with the following call.
1033 If an error occurs, on return out_spec contains the value
1034 of in_spec when the error occurred.
1035
1036 With each successive file name returned in out_spec, the
1037 function's return value is one. When there are no more matching
1038 names the function returns zero. If on the first call no file
1039 matches in_spec, or there is any other error, -1 is returned.
1040 */
1041
1042 #include <rmsdef.h>
1043 #include <descrip.h>
1044 #define OUTSIZE MAX_FILE_SPEC_LEN
1045 static short
1046 fn_exp (out, in)
1047 vspec *out;
1048 char *in;
1049 {
1050 static long context = 0;
1051 static struct dsc$descriptor_s o;
1052 static struct dsc$descriptor_s i;
1053 static bool pass1 = TRUE;
1054 long status;
1055 short retval;
1056
1057 if (pass1)
1058 {
1059 pass1 = FALSE;
1060 o.dsc$a_pointer = (char *) out;
1061 o.dsc$w_length = (short)OUTSIZE;
1062 i.dsc$a_pointer = in;
1063 i.dsc$w_length = (short)strlen(in);
1064 i.dsc$b_dtype = DSC$K_DTYPE_T;
1065 i.dsc$b_class = DSC$K_CLASS_S;
1066 o.dsc$b_dtype = DSC$K_DTYPE_VT;
1067 o.dsc$b_class = DSC$K_CLASS_VS;
1068 }
1069 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1070 {
1071 out->body[out->curlen] = EOS;
1072 return 1;
1073 }
1074 else if (status == RMS$_NMF)
1075 retval = 0;
1076 else
1077 {
1078 strcpy(out->body, in);
1079 retval = -1;
1080 }
1081 lib$find_file_end(&context);
1082 pass1 = TRUE;
1083 return retval;
1084 }
1085
1086 /*
1087 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1088 name of each file specified by the provided arg expanding wildcards.
1089 */
1090 static char *
1091 gfnames (arg, p_error)
1092 char *arg;
1093 bool *p_error;
1094 {
1095 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1096
1097 switch (fn_exp (&filename, arg))
1098 {
1099 case 1:
1100 *p_error = FALSE;
1101 return filename.body;
1102 case 0:
1103 *p_error = FALSE;
1104 return NULL;
1105 default:
1106 *p_error = TRUE;
1107 return filename.body;
1108 }
1109 }
1110
1111 #ifndef OLD /* Newer versions of VMS do provide `system'. */
1112 system (cmd)
1113 char *cmd;
1114 {
1115 error ("%s", "system() function not implemented under VMS");
1116 }
1117 #endif
1118
1119 #define VERSION_DELIM ';'
1120 char *massage_name (s)
1121 char *s;
1122 {
1123 char *start = s;
1124
1125 for ( ; *s; s++)
1126 if (*s == VERSION_DELIM)
1127 {
1128 *s = EOS;
1129 break;
1130 }
1131 else
1132 *s = lowcase (*s);
1133 return start;
1134 }
1135 #endif /* VMS */
1136
1137 \f
1138 int
1139 main (argc, argv)
1140 int argc;
1141 char *argv[];
1142 {
1143 int i;
1144 unsigned int nincluded_files;
1145 char **included_files;
1146 argument *argbuffer;
1147 int current_arg, file_count;
1148 linebuffer filename_lb;
1149 bool help_asked = FALSE;
1150 #ifdef VMS
1151 bool got_err;
1152 #endif
1153 char *optstring;
1154 int opt;
1155
1156
1157 #ifdef DOS_NT
1158 _fmode = O_BINARY; /* all of files are treated as binary files */
1159 #endif /* DOS_NT */
1160
1161 progname = argv[0];
1162 nincluded_files = 0;
1163 included_files = xnew (argc, char *);
1164 current_arg = 0;
1165 file_count = 0;
1166
1167 /* Allocate enough no matter what happens. Overkill, but each one
1168 is small. */
1169 argbuffer = xnew (argc, argument);
1170
1171 /*
1172 * If etags, always find typedefs and structure tags. Why not?
1173 * Also default to find macro constants, enum constants and
1174 * global variables.
1175 */
1176 if (!CTAGS)
1177 {
1178 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1179 globals = TRUE;
1180 }
1181
1182 optstring = "-";
1183 #ifdef ETAGS_REGEXPS
1184 optstring = "-r:Rc:";
1185 #endif /* ETAGS_REGEXPS */
1186 if (LONG_OPTIONS)
1187 optstring += 1;
1188 optstring = concat (optstring,
1189 "Cf:Il:o:SVhH",
1190 (CTAGS) ? "BxdtTuvw" : "aDi:");
1191
1192 while ((opt = getopt_long (argc, argv, optstring, longopts, 0)) != EOF)
1193 switch (opt)
1194 {
1195 case 0:
1196 /* If getopt returns 0, then it has already processed a
1197 long-named option. We should do nothing. */
1198 break;
1199
1200 case 1:
1201 /* This means that a file name has been seen. Record it. */
1202 argbuffer[current_arg].arg_type = at_filename;
1203 argbuffer[current_arg].what = optarg;
1204 ++current_arg;
1205 ++file_count;
1206 break;
1207
1208 case STDIN:
1209 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1210 argbuffer[current_arg].arg_type = at_stdin;
1211 argbuffer[current_arg].what = optarg;
1212 ++current_arg;
1213 ++file_count;
1214 if (parsing_stdin)
1215 fatal ("cannot parse standard input more than once", (char *)NULL);
1216 parsing_stdin = TRUE;
1217 break;
1218
1219 /* Common options. */
1220 case 'C': cplusplus = TRUE; break;
1221 case 'f': /* for compatibility with old makefiles */
1222 case 'o':
1223 if (tagfile)
1224 {
1225 error ("-o option may only be given once.", (char *)NULL);
1226 suggest_asking_for_help ();
1227 /* NOTREACHED */
1228 }
1229 tagfile = optarg;
1230 break;
1231 case 'I':
1232 case 'S': /* for backward compatibility */
1233 ignoreindent = TRUE;
1234 break;
1235 case 'l':
1236 {
1237 language *lang = get_language_from_langname (optarg);
1238 if (lang != NULL)
1239 {
1240 argbuffer[current_arg].lang = lang;
1241 argbuffer[current_arg].arg_type = at_language;
1242 ++current_arg;
1243 }
1244 }
1245 break;
1246 case 'c':
1247 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1248 optarg = concat (optarg, "i", ""); /* memory leak here */
1249 /* FALLTHRU */
1250 case 'r':
1251 argbuffer[current_arg].arg_type = at_regexp;
1252 argbuffer[current_arg].what = optarg;
1253 ++current_arg;
1254 break;
1255 case 'R':
1256 argbuffer[current_arg].arg_type = at_regexp;
1257 argbuffer[current_arg].what = NULL;
1258 ++current_arg;
1259 break;
1260 case 'V':
1261 print_version ();
1262 break;
1263 case 'h':
1264 case 'H':
1265 help_asked = TRUE;
1266 break;
1267
1268 /* Etags options */
1269 case 'a': append_to_tagfile = TRUE; break;
1270 case 'D': constantypedefs = FALSE; break;
1271 case 'i': included_files[nincluded_files++] = optarg; break;
1272
1273 /* Ctags options. */
1274 case 'B': searchar = '?'; break;
1275 case 'd': constantypedefs = TRUE; break;
1276 case 't': typedefs = TRUE; break;
1277 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1278 case 'u': update = TRUE; break;
1279 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1280 case 'x': cxref_style = TRUE; break;
1281 case 'w': no_warnings = TRUE; break;
1282 default:
1283 suggest_asking_for_help ();
1284 /* NOTREACHED */
1285 }
1286
1287 for (; optind < argc; optind++)
1288 {
1289 argbuffer[current_arg].arg_type = at_filename;
1290 argbuffer[current_arg].what = argv[optind];
1291 ++current_arg;
1292 ++file_count;
1293 }
1294
1295 argbuffer[current_arg].arg_type = at_end;
1296
1297 if (help_asked)
1298 print_help (argbuffer);
1299 /* NOTREACHED */
1300
1301 if (nincluded_files == 0 && file_count == 0)
1302 {
1303 error ("no input files specified.", (char *)NULL);
1304 suggest_asking_for_help ();
1305 /* NOTREACHED */
1306 }
1307
1308 if (tagfile == NULL)
1309 tagfile = CTAGS ? "tags" : "TAGS";
1310 cwd = etags_getcwd (); /* the current working directory */
1311 if (cwd[strlen (cwd) - 1] != '/')
1312 {
1313 char *oldcwd = cwd;
1314 cwd = concat (oldcwd, "/", "");
1315 free (oldcwd);
1316 }
1317 /* Relative file names are made relative to the current directory. */
1318 if (streq (tagfile, "-")
1319 || strneq (tagfile, "/dev/", 5))
1320 tagfiledir = cwd;
1321 else
1322 tagfiledir = absolute_dirname (tagfile, cwd);
1323
1324 init (); /* set up boolean "functions" */
1325
1326 linebuffer_init (&lb);
1327 linebuffer_init (&filename_lb);
1328 linebuffer_init (&filebuf);
1329 linebuffer_init (&token_name);
1330
1331 if (!CTAGS)
1332 {
1333 if (streq (tagfile, "-"))
1334 {
1335 tagf = stdout;
1336 #ifdef DOS_NT
1337 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1338 doesn't take effect until after `stdout' is already open). */
1339 if (!isatty (fileno (stdout)))
1340 setmode (fileno (stdout), O_BINARY);
1341 #endif /* DOS_NT */
1342 }
1343 else
1344 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1345 if (tagf == NULL)
1346 pfatal (tagfile);
1347 }
1348
1349 /*
1350 * Loop through files finding functions.
1351 */
1352 for (i = 0; i < current_arg; i++)
1353 {
1354 static language *lang; /* non-NULL if language is forced */
1355 char *this_file;
1356
1357 switch (argbuffer[i].arg_type)
1358 {
1359 case at_language:
1360 lang = argbuffer[i].lang;
1361 break;
1362 #ifdef ETAGS_REGEXPS
1363 case at_regexp:
1364 analyse_regex (argbuffer[i].what);
1365 break;
1366 #endif
1367 case at_filename:
1368 #ifdef VMS
1369 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1370 {
1371 if (got_err)
1372 {
1373 error ("can't find file %s\n", this_file);
1374 argc--, argv++;
1375 }
1376 else
1377 {
1378 this_file = massage_name (this_file);
1379 }
1380 #else
1381 this_file = argbuffer[i].what;
1382 #endif
1383 /* Input file named "-" means read file names from stdin
1384 (one per line) and use them. */
1385 if (streq (this_file, "-"))
1386 {
1387 if (parsing_stdin)
1388 fatal ("cannot parse standard input AND read file names from it",
1389 (char *)NULL);
1390 while (readline_internal (&filename_lb, stdin) > 0)
1391 process_file_name (filename_lb.buffer, lang);
1392 }
1393 else
1394 process_file_name (this_file, lang);
1395 #ifdef VMS
1396 }
1397 #endif
1398 break;
1399 case at_stdin:
1400 this_file = argbuffer[i].what;
1401 process_file (stdin, this_file, lang);
1402 break;
1403 case at_end:
1404 break;
1405 }
1406 }
1407
1408 #ifdef ETAGS_REGEXPS
1409 free_regexps ();
1410 #endif /* ETAGS_REGEXPS */
1411 free (lb.buffer);
1412 free (filebuf.buffer);
1413 free (token_name.buffer);
1414
1415 if (!CTAGS || cxref_style)
1416 {
1417 put_entries (nodehead); /* write the remainig tags (ETAGS) */
1418 free_tree (nodehead);
1419 nodehead = NULL;
1420 if (!CTAGS)
1421 {
1422 fdesc *fdp;
1423
1424 /* Output file entries that have no tags. */
1425 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1426 if (!fdp->written)
1427 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1428
1429 while (nincluded_files-- > 0)
1430 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1431 }
1432
1433 if (fclose (tagf) == EOF)
1434 pfatal (tagfile);
1435 exit (EXIT_SUCCESS);
1436 }
1437
1438 if (update)
1439 {
1440 char cmd[BUFSIZ];
1441 for (i = 0; i < current_arg; ++i)
1442 {
1443 switch (argbuffer[i].arg_type)
1444 {
1445 case at_filename:
1446 case at_stdin:
1447 break;
1448 default:
1449 continue; /* the for loop */
1450 }
1451 sprintf (cmd,
1452 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1453 tagfile, argbuffer[i].what, tagfile);
1454 if (system (cmd) != EXIT_SUCCESS)
1455 fatal ("failed to execute shell command", (char *)NULL);
1456 }
1457 append_to_tagfile = TRUE;
1458 }
1459
1460 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1461 if (tagf == NULL)
1462 pfatal (tagfile);
1463 put_entries (nodehead); /* write all the tags (CTAGS) */
1464 free_tree (nodehead);
1465 nodehead = NULL;
1466 if (fclose (tagf) == EOF)
1467 pfatal (tagfile);
1468
1469 if (update)
1470 {
1471 char cmd[2*BUFSIZ+10];
1472 sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1473 exit (system (cmd));
1474 }
1475 return EXIT_SUCCESS;
1476 }
1477
1478
1479 /*
1480 * Return a compressor given the file name. If EXTPTR is non-zero,
1481 * return a pointer into FILE where the compressor-specific
1482 * extension begins. If no compressor is found, NULL is returned
1483 * and EXTPTR is not significant.
1484 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1485 */
1486 static compressor *
1487 get_compressor_from_suffix (file, extptr)
1488 char *file;
1489 char **extptr;
1490 {
1491 compressor *compr;
1492 char *slash, *suffix;
1493
1494 /* This relies on FN to be after canonicalize_filename,
1495 so we don't need to consider backslashes on DOS_NT. */
1496 slash = etags_strrchr (file, '/');
1497 suffix = etags_strrchr (file, '.');
1498 if (suffix == NULL || suffix < slash)
1499 return NULL;
1500 if (extptr != NULL)
1501 *extptr = suffix;
1502 suffix += 1;
1503 /* Let those poor souls who live with DOS 8+3 file name limits get
1504 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1505 Only the first do loop is run if not MSDOS */
1506 do
1507 {
1508 for (compr = compressors; compr->suffix != NULL; compr++)
1509 if (streq (compr->suffix, suffix))
1510 return compr;
1511 if (!MSDOS)
1512 break; /* do it only once: not really a loop */
1513 if (extptr != NULL)
1514 *extptr = ++suffix;
1515 } while (*suffix != '\0');
1516 return NULL;
1517 }
1518
1519
1520
1521 /*
1522 * Return a language given the name.
1523 */
1524 static language *
1525 get_language_from_langname (name)
1526 const char *name;
1527 {
1528 language *lang;
1529
1530 if (name == NULL)
1531 error ("empty language name", (char *)NULL);
1532 else
1533 {
1534 for (lang = lang_names; lang->name != NULL; lang++)
1535 if (streq (name, lang->name))
1536 return lang;
1537 error ("unknown language \"%s\"", name);
1538 }
1539
1540 return NULL;
1541 }
1542
1543
1544 /*
1545 * Return a language given the interpreter name.
1546 */
1547 static language *
1548 get_language_from_interpreter (interpreter)
1549 char *interpreter;
1550 {
1551 language *lang;
1552 char **iname;
1553
1554 if (interpreter == NULL)
1555 return NULL;
1556 for (lang = lang_names; lang->name != NULL; lang++)
1557 if (lang->interpreters != NULL)
1558 for (iname = lang->interpreters; *iname != NULL; iname++)
1559 if (streq (*iname, interpreter))
1560 return lang;
1561
1562 return NULL;
1563 }
1564
1565
1566
1567 /*
1568 * Return a language given the file name.
1569 */
1570 static language *
1571 get_language_from_filename (file, case_sensitive)
1572 char *file;
1573 bool case_sensitive;
1574 {
1575 language *lang;
1576 char **name, **ext, *suffix;
1577
1578 /* Try whole file name first. */
1579 for (lang = lang_names; lang->name != NULL; lang++)
1580 if (lang->filenames != NULL)
1581 for (name = lang->filenames; *name != NULL; name++)
1582 if ((case_sensitive)
1583 ? streq (*name, file)
1584 : strcaseeq (*name, file))
1585 return lang;
1586
1587 /* If not found, try suffix after last dot. */
1588 suffix = etags_strrchr (file, '.');
1589 if (suffix == NULL)
1590 return NULL;
1591 suffix += 1;
1592 for (lang = lang_names; lang->name != NULL; lang++)
1593 if (lang->suffixes != NULL)
1594 for (ext = lang->suffixes; *ext != NULL; ext++)
1595 if ((case_sensitive)
1596 ? streq (*ext, suffix)
1597 : strcaseeq (*ext, suffix))
1598 return lang;
1599 return NULL;
1600 }
1601
1602 \f
1603 /*
1604 * This routine is called on each file argument.
1605 */
1606 static void
1607 process_file_name (file, lang)
1608 char *file;
1609 language *lang;
1610 {
1611 struct stat stat_buf;
1612 FILE *inf;
1613 fdesc *fdp;
1614 compressor *compr;
1615 char *compressed_name, *uncompressed_name;
1616 char *ext, *real_name;
1617 int retval;
1618
1619 canonicalize_filename (file);
1620 if (streq (file, tagfile) && !streq (tagfile, "-"))
1621 {
1622 error ("skipping inclusion of %s in self.", file);
1623 return;
1624 }
1625 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1626 {
1627 compressed_name = NULL;
1628 real_name = uncompressed_name = savestr (file);
1629 }
1630 else
1631 {
1632 real_name = compressed_name = savestr (file);
1633 uncompressed_name = savenstr (file, ext - file);
1634 }
1635
1636 /* If the canonicalized uncompressed name
1637 has already been dealt with, skip it silently. */
1638 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1639 {
1640 assert (fdp->infname != NULL);
1641 if (streq (uncompressed_name, fdp->infname))
1642 goto cleanup;
1643 }
1644
1645 if (stat (real_name, &stat_buf) != 0)
1646 {
1647 /* Reset real_name and try with a different name. */
1648 real_name = NULL;
1649 if (compressed_name != NULL) /* try with the given suffix */
1650 {
1651 if (stat (uncompressed_name, &stat_buf) == 0)
1652 real_name = uncompressed_name;
1653 }
1654 else /* try all possible suffixes */
1655 {
1656 for (compr = compressors; compr->suffix != NULL; compr++)
1657 {
1658 compressed_name = concat (file, ".", compr->suffix);
1659 if (stat (compressed_name, &stat_buf) != 0)
1660 {
1661 if (MSDOS)
1662 {
1663 char *suf = compressed_name + strlen (file);
1664 size_t suflen = strlen (compr->suffix) + 1;
1665 for ( ; suf[1]; suf++, suflen--)
1666 {
1667 memmove (suf, suf + 1, suflen);
1668 if (stat (compressed_name, &stat_buf) == 0)
1669 {
1670 real_name = compressed_name;
1671 break;
1672 }
1673 }
1674 if (real_name != NULL)
1675 break;
1676 } /* MSDOS */
1677 free (compressed_name);
1678 compressed_name = NULL;
1679 }
1680 else
1681 {
1682 real_name = compressed_name;
1683 break;
1684 }
1685 }
1686 }
1687 if (real_name == NULL)
1688 {
1689 perror (file);
1690 goto cleanup;
1691 }
1692 } /* try with a different name */
1693
1694 if (!S_ISREG (stat_buf.st_mode))
1695 {
1696 error ("skipping %s: it is not a regular file.", real_name);
1697 goto cleanup;
1698 }
1699 if (real_name == compressed_name)
1700 {
1701 char *cmd = concat (compr->command, " ", real_name);
1702 inf = (FILE *) popen (cmd, "r");
1703 free (cmd);
1704 }
1705 else
1706 inf = fopen (real_name, "r");
1707 if (inf == NULL)
1708 {
1709 perror (real_name);
1710 goto cleanup;
1711 }
1712
1713 process_file (inf, uncompressed_name, lang);
1714
1715 if (real_name == compressed_name)
1716 retval = pclose (inf);
1717 else
1718 retval = fclose (inf);
1719 if (retval < 0)
1720 pfatal (file);
1721
1722 cleanup:
1723 if (compressed_name) free (compressed_name);
1724 if (uncompressed_name) free (uncompressed_name);
1725 last_node = NULL;
1726 curfdp = NULL;
1727 return;
1728 }
1729
1730 static void
1731 process_file (fh, fn, lang)
1732 FILE *fh;
1733 char *fn;
1734 language *lang;
1735 {
1736 static const fdesc emptyfdesc;
1737 fdesc *fdp;
1738
1739 /* Create a new input file description entry. */
1740 fdp = xnew (1, fdesc);
1741 *fdp = emptyfdesc;
1742 fdp->next = fdhead;
1743 fdp->infname = savestr (fn);
1744 fdp->lang = lang;
1745 fdp->infabsname = absolute_filename (fn, cwd);
1746 fdp->infabsdir = absolute_dirname (fn, cwd);
1747 if (filename_is_absolute (fn))
1748 {
1749 /* An absolute file name. Canonicalize it. */
1750 fdp->taggedfname = absolute_filename (fn, NULL);
1751 }
1752 else
1753 {
1754 /* A file name relative to cwd. Make it relative
1755 to the directory of the tags file. */
1756 fdp->taggedfname = relative_filename (fn, tagfiledir);
1757 }
1758 fdp->usecharno = TRUE; /* use char position when making tags */
1759 fdp->prop = NULL;
1760 fdp->written = FALSE; /* not written on tags file yet */
1761
1762 fdhead = fdp;
1763 curfdp = fdhead; /* the current file description */
1764
1765 find_entries (fh);
1766
1767 /* If not Ctags, and if this is not metasource and if it contained no #line
1768 directives, we can write the tags and free all nodes pointing to
1769 curfdp. */
1770 if (!CTAGS
1771 && curfdp->usecharno /* no #line directives in this file */
1772 && !curfdp->lang->metasource)
1773 {
1774 node *np, *prev;
1775
1776 /* Look for the head of the sublist relative to this file. See add_node
1777 for the structure of the node tree. */
1778 prev = NULL;
1779 for (np = nodehead; np != NULL; prev = np, np = np->left)
1780 if (np->fdp == curfdp)
1781 break;
1782
1783 /* If we generated tags for this file, write and delete them. */
1784 if (np != NULL)
1785 {
1786 /* This is the head of the last sublist, if any. The following
1787 instructions depend on this being true. */
1788 assert (np->left == NULL);
1789
1790 assert (fdhead == curfdp);
1791 assert (last_node->fdp == curfdp);
1792 put_entries (np); /* write tags for file curfdp->taggedfname */
1793 free_tree (np); /* remove the written nodes */
1794 if (prev == NULL)
1795 nodehead = NULL; /* no nodes left */
1796 else
1797 prev->left = NULL; /* delete the pointer to the sublist */
1798 }
1799 }
1800 }
1801
1802 /*
1803 * This routine sets up the boolean pseudo-functions which work
1804 * by setting boolean flags dependent upon the corresponding character.
1805 * Every char which is NOT in that string is not a white char. Therefore,
1806 * all of the array "_wht" is set to FALSE, and then the elements
1807 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1808 * of a char is TRUE if it is the string "white", else FALSE.
1809 */
1810 static void
1811 init ()
1812 {
1813 register char *sp;
1814 register int i;
1815
1816 for (i = 0; i < CHARS; i++)
1817 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1818 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1819 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1820 notinname('\0') = notinname('\n');
1821 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1822 begtoken('\0') = begtoken('\n');
1823 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1824 intoken('\0') = intoken('\n');
1825 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1826 endtoken('\0') = endtoken('\n');
1827 }
1828
1829 /*
1830 * This routine opens the specified file and calls the function
1831 * which finds the function and type definitions.
1832 */
1833 static void
1834 find_entries (inf)
1835 FILE *inf;
1836 {
1837 char *cp;
1838 language *lang = curfdp->lang;
1839 Lang_function *parser = NULL;
1840
1841 /* If user specified a language, use it. */
1842 if (lang != NULL && lang->function != NULL)
1843 {
1844 parser = lang->function;
1845 }
1846
1847 /* Else try to guess the language given the file name. */
1848 if (parser == NULL)
1849 {
1850 lang = get_language_from_filename (curfdp->infname, TRUE);
1851 if (lang != NULL && lang->function != NULL)
1852 {
1853 curfdp->lang = lang;
1854 parser = lang->function;
1855 }
1856 }
1857
1858 /* Else look for sharp-bang as the first two characters. */
1859 if (parser == NULL
1860 && readline_internal (&lb, inf) > 0
1861 && lb.len >= 2
1862 && lb.buffer[0] == '#'
1863 && lb.buffer[1] == '!')
1864 {
1865 char *lp;
1866
1867 /* Set lp to point at the first char after the last slash in the
1868 line or, if no slashes, at the first nonblank. Then set cp to
1869 the first successive blank and terminate the string. */
1870 lp = etags_strrchr (lb.buffer+2, '/');
1871 if (lp != NULL)
1872 lp += 1;
1873 else
1874 lp = skip_spaces (lb.buffer + 2);
1875 cp = skip_non_spaces (lp);
1876 *cp = '\0';
1877
1878 if (strlen (lp) > 0)
1879 {
1880 lang = get_language_from_interpreter (lp);
1881 if (lang != NULL && lang->function != NULL)
1882 {
1883 curfdp->lang = lang;
1884 parser = lang->function;
1885 }
1886 }
1887 }
1888
1889 /* We rewind here, even if inf may be a pipe. We fail if the
1890 length of the first line is longer than the pipe block size,
1891 which is unlikely. */
1892 rewind (inf);
1893
1894 /* Else try to guess the language given the case insensitive file name. */
1895 if (parser == NULL)
1896 {
1897 lang = get_language_from_filename (curfdp->infname, FALSE);
1898 if (lang != NULL && lang->function != NULL)
1899 {
1900 curfdp->lang = lang;
1901 parser = lang->function;
1902 }
1903 }
1904
1905 /* Else try Fortran or C. */
1906 if (parser == NULL)
1907 {
1908 node *old_last_node = last_node;
1909
1910 curfdp->lang = get_language_from_langname ("fortran");
1911 find_entries (inf);
1912
1913 if (old_last_node == last_node)
1914 /* No Fortran entries found. Try C. */
1915 {
1916 /* We do not tag if rewind fails.
1917 Only the file name will be recorded in the tags file. */
1918 rewind (inf);
1919 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1920 find_entries (inf);
1921 }
1922 return;
1923 }
1924
1925 if (!no_line_directive
1926 && curfdp->lang != NULL && curfdp->lang->metasource)
1927 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1928 file, or anyway we parsed a file that is automatically generated from
1929 this one. If this is the case, the bingo.c file contained #line
1930 directives that generated tags pointing to this file. Let's delete
1931 them all before parsing this file, which is the real source. */
1932 {
1933 fdesc **fdpp = &fdhead;
1934 while (*fdpp != NULL)
1935 if (*fdpp != curfdp
1936 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1937 /* We found one of those! We must delete both the file description
1938 and all tags referring to it. */
1939 {
1940 fdesc *badfdp = *fdpp;
1941
1942 /* Delete the tags referring to badfdp->taggedfname
1943 that were obtained from badfdp->infname. */
1944 invalidate_nodes (badfdp, &nodehead);
1945
1946 *fdpp = badfdp->next; /* remove the bad description from the list */
1947 free_fdesc (badfdp);
1948 }
1949 else
1950 fdpp = &(*fdpp)->next; /* advance the list pointer */
1951 }
1952
1953 assert (parser != NULL);
1954
1955 /* Generic initialisations before reading from file. */
1956 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1957
1958 /* Generic initialisations before parsing file with readline. */
1959 lineno = 0; /* reset global line number */
1960 charno = 0; /* reset global char number */
1961 linecharno = 0; /* reset global char number of line start */
1962
1963 parser (inf);
1964
1965 #ifdef ETAGS_REGEXPS
1966 regex_tag_multiline ();
1967 #endif /* ETAGS_REGEXPS */
1968 }
1969
1970 \f
1971 /*
1972 * Check whether an implicitly named tag should be created,
1973 * then call `pfnote'.
1974 * NAME is a string that is internally copied by this function.
1975 *
1976 * TAGS format specification
1977 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1978 * The following is explained in some more detail in etc/ETAGS.EBNF.
1979 *
1980 * make_tag creates tags with "implicit tag names" (unnamed tags)
1981 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1982 * 1. NAME does not contain any of the characters in NONAM;
1983 * 2. LINESTART contains name as either a rightmost, or rightmost but
1984 * one character, substring;
1985 * 3. the character, if any, immediately before NAME in LINESTART must
1986 * be a character in NONAM;
1987 * 4. the character, if any, immediately after NAME in LINESTART must
1988 * also be a character in NONAM.
1989 *
1990 * The implementation uses the notinname() macro, which recognises the
1991 * characters stored in the string `nonam'.
1992 * etags.el needs to use the same characters that are in NONAM.
1993 */
1994 static void
1995 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
1996 char *name; /* tag name, or NULL if unnamed */
1997 int namelen; /* tag length */
1998 bool is_func; /* tag is a function */
1999 char *linestart; /* start of the line where tag is */
2000 int linelen; /* length of the line where tag is */
2001 int lno; /* line number */
2002 long cno; /* character number */
2003 {
2004 bool named = (name != NULL && namelen > 0);
2005
2006 if (!CTAGS && named) /* maybe set named to false */
2007 /* Let's try to make an implicit tag name, that is, create an unnamed tag
2008 such that etags.el can guess a name from it. */
2009 {
2010 int i;
2011 register char *cp = name;
2012
2013 for (i = 0; i < namelen; i++)
2014 if (notinname (*cp++))
2015 break;
2016 if (i == namelen) /* rule #1 */
2017 {
2018 cp = linestart + linelen - namelen;
2019 if (notinname (linestart[linelen-1]))
2020 cp -= 1; /* rule #4 */
2021 if (cp >= linestart /* rule #2 */
2022 && (cp == linestart
2023 || notinname (cp[-1])) /* rule #3 */
2024 && strneq (name, cp, namelen)) /* rule #2 */
2025 named = FALSE; /* use implicit tag name */
2026 }
2027 }
2028
2029 if (named)
2030 name = savenstr (name, namelen);
2031 else
2032 name = NULL;
2033 pfnote (name, is_func, linestart, linelen, lno, cno);
2034 }
2035
2036 /* Record a tag. */
2037 static void
2038 pfnote (name, is_func, linestart, linelen, lno, cno)
2039 char *name; /* tag name, or NULL if unnamed */
2040 bool is_func; /* tag is a function */
2041 char *linestart; /* start of the line where tag is */
2042 int linelen; /* length of the line where tag is */
2043 int lno; /* line number */
2044 long cno; /* character number */
2045 {
2046 register node *np;
2047
2048 assert (name == NULL || name[0] != '\0');
2049 if (CTAGS && name == NULL)
2050 return;
2051
2052 np = xnew (1, node);
2053
2054 /* If ctags mode, change name "main" to M<thisfilename>. */
2055 if (CTAGS && !cxref_style && streq (name, "main"))
2056 {
2057 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2058 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2059 fp = etags_strrchr (np->name, '.');
2060 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2061 fp[0] = '\0';
2062 }
2063 else
2064 np->name = name;
2065 np->valid = TRUE;
2066 np->been_warned = FALSE;
2067 np->fdp = curfdp;
2068 np->is_func = is_func;
2069 np->lno = lno;
2070 if (np->fdp->usecharno)
2071 /* Our char numbers are 0-base, because of C language tradition?
2072 ctags compatibility? old versions compatibility? I don't know.
2073 Anyway, since emacs's are 1-base we expect etags.el to take care
2074 of the difference. If we wanted to have 1-based numbers, we would
2075 uncomment the +1 below. */
2076 np->cno = cno /* + 1 */ ;
2077 else
2078 np->cno = invalidcharno;
2079 np->left = np->right = NULL;
2080 if (CTAGS && !cxref_style)
2081 {
2082 if (strlen (linestart) < 50)
2083 np->regex = concat (linestart, "$", "");
2084 else
2085 np->regex = savenstr (linestart, 50);
2086 }
2087 else
2088 np->regex = savenstr (linestart, linelen);
2089
2090 add_node (np, &nodehead);
2091 }
2092
2093 /*
2094 * free_tree ()
2095 * recurse on left children, iterate on right children.
2096 */
2097 static void
2098 free_tree (np)
2099 register node *np;
2100 {
2101 while (np)
2102 {
2103 register node *node_right = np->right;
2104 free_tree (np->left);
2105 if (np->name != NULL)
2106 free (np->name);
2107 free (np->regex);
2108 free (np);
2109 np = node_right;
2110 }
2111 }
2112
2113 /*
2114 * free_fdesc ()
2115 * delete a file description
2116 */
2117 static void
2118 free_fdesc (fdp)
2119 register fdesc *fdp;
2120 {
2121 if (fdp->infname != NULL) free (fdp->infname);
2122 if (fdp->infabsname != NULL) free (fdp->infabsname);
2123 if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2124 if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2125 if (fdp->prop != NULL) free (fdp->prop);
2126 free (fdp);
2127 }
2128
2129 /*
2130 * add_node ()
2131 * Adds a node to the tree of nodes. In etags mode, sort by file
2132 * name. In ctags mode, sort by tag name. Make no attempt at
2133 * balancing.
2134 *
2135 * add_node is the only function allowed to add nodes, so it can
2136 * maintain state.
2137 */
2138 static void
2139 add_node (np, cur_node_p)
2140 node *np, **cur_node_p;
2141 {
2142 register int dif;
2143 register node *cur_node = *cur_node_p;
2144
2145 if (cur_node == NULL)
2146 {
2147 *cur_node_p = np;
2148 last_node = np;
2149 return;
2150 }
2151
2152 if (!CTAGS)
2153 /* Etags Mode */
2154 {
2155 /* For each file name, tags are in a linked sublist on the right
2156 pointer. The first tags of different files are a linked list
2157 on the left pointer. last_node points to the end of the last
2158 used sublist. */
2159 if (last_node != NULL && last_node->fdp == np->fdp)
2160 {
2161 /* Let's use the same sublist as the last added node. */
2162 assert (last_node->right == NULL);
2163 last_node->right = np;
2164 last_node = np;
2165 }
2166 else if (cur_node->fdp == np->fdp)
2167 {
2168 /* Scanning the list we found the head of a sublist which is
2169 good for us. Let's scan this sublist. */
2170 add_node (np, &cur_node->right);
2171 }
2172 else
2173 /* The head of this sublist is not good for us. Let's try the
2174 next one. */
2175 add_node (np, &cur_node->left);
2176 } /* if ETAGS mode */
2177
2178 else
2179 {
2180 /* Ctags Mode */
2181 dif = strcmp (np->name, cur_node->name);
2182
2183 /*
2184 * If this tag name matches an existing one, then
2185 * do not add the node, but maybe print a warning.
2186 */
2187 if (!dif)
2188 {
2189 if (np->fdp == cur_node->fdp)
2190 {
2191 if (!no_warnings)
2192 {
2193 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2194 np->fdp->infname, lineno, np->name);
2195 fprintf (stderr, "Second entry ignored\n");
2196 }
2197 }
2198 else if (!cur_node->been_warned && !no_warnings)
2199 {
2200 fprintf
2201 (stderr,
2202 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2203 np->fdp->infname, cur_node->fdp->infname, np->name);
2204 cur_node->been_warned = TRUE;
2205 }
2206 return;
2207 }
2208
2209 /* Actually add the node */
2210 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2211 } /* if CTAGS mode */
2212 }
2213
2214 /*
2215 * invalidate_nodes ()
2216 * Scan the node tree and invalidate all nodes pointing to the
2217 * given file description (CTAGS case) or free them (ETAGS case).
2218 */
2219 static void
2220 invalidate_nodes (badfdp, npp)
2221 fdesc *badfdp;
2222 node **npp;
2223 {
2224 node *np = *npp;
2225
2226 if (np == NULL)
2227 return;
2228
2229 if (CTAGS)
2230 {
2231 if (np->left != NULL)
2232 invalidate_nodes (badfdp, &np->left);
2233 if (np->fdp == badfdp)
2234 np->valid = FALSE;
2235 if (np->right != NULL)
2236 invalidate_nodes (badfdp, &np->right);
2237 }
2238 else
2239 {
2240 assert (np->fdp != NULL);
2241 if (np->fdp == badfdp)
2242 {
2243 *npp = np->left; /* detach the sublist from the list */
2244 np->left = NULL; /* isolate it */
2245 free_tree (np); /* free it */
2246 invalidate_nodes (badfdp, npp);
2247 }
2248 else
2249 invalidate_nodes (badfdp, &np->left);
2250 }
2251 }
2252
2253 \f
2254 static int total_size_of_entries __P((node *));
2255 static int number_len __P((long));
2256
2257 /* Length of a non-negative number's decimal representation. */
2258 static int
2259 number_len (num)
2260 long num;
2261 {
2262 int len = 1;
2263 while ((num /= 10) > 0)
2264 len += 1;
2265 return len;
2266 }
2267
2268 /*
2269 * Return total number of characters that put_entries will output for
2270 * the nodes in the linked list at the right of the specified node.
2271 * This count is irrelevant with etags.el since emacs 19.34 at least,
2272 * but is still supplied for backward compatibility.
2273 */
2274 static int
2275 total_size_of_entries (np)
2276 register node *np;
2277 {
2278 register int total = 0;
2279
2280 for (; np != NULL; np = np->right)
2281 if (np->valid)
2282 {
2283 total += strlen (np->regex) + 1; /* pat\177 */
2284 if (np->name != NULL)
2285 total += strlen (np->name) + 1; /* name\001 */
2286 total += number_len ((long) np->lno) + 1; /* lno, */
2287 if (np->cno != invalidcharno) /* cno */
2288 total += number_len (np->cno);
2289 total += 1; /* newline */
2290 }
2291
2292 return total;
2293 }
2294
2295 static void
2296 put_entries (np)
2297 register node *np;
2298 {
2299 register char *sp;
2300 static fdesc *fdp = NULL;
2301
2302 if (np == NULL)
2303 return;
2304
2305 /* Output subentries that precede this one */
2306 if (CTAGS)
2307 put_entries (np->left);
2308
2309 /* Output this entry */
2310 if (np->valid)
2311 {
2312 if (!CTAGS)
2313 {
2314 /* Etags mode */
2315 if (fdp != np->fdp)
2316 {
2317 fdp = np->fdp;
2318 fprintf (tagf, "\f\n%s,%d\n",
2319 fdp->taggedfname, total_size_of_entries (np));
2320 fdp->written = TRUE;
2321 }
2322 fputs (np->regex, tagf);
2323 fputc ('\177', tagf);
2324 if (np->name != NULL)
2325 {
2326 fputs (np->name, tagf);
2327 fputc ('\001', tagf);
2328 }
2329 fprintf (tagf, "%d,", np->lno);
2330 if (np->cno != invalidcharno)
2331 fprintf (tagf, "%ld", np->cno);
2332 fputs ("\n", tagf);
2333 }
2334 else
2335 {
2336 /* Ctags mode */
2337 if (np->name == NULL)
2338 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2339
2340 if (cxref_style)
2341 {
2342 if (vgrind_style)
2343 fprintf (stdout, "%s %s %d\n",
2344 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2345 else
2346 fprintf (stdout, "%-16s %3d %-16s %s\n",
2347 np->name, np->lno, np->fdp->taggedfname, np->regex);
2348 }
2349 else
2350 {
2351 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2352
2353 if (np->is_func)
2354 { /* function or #define macro with args */
2355 putc (searchar, tagf);
2356 putc ('^', tagf);
2357
2358 for (sp = np->regex; *sp; sp++)
2359 {
2360 if (*sp == '\\' || *sp == searchar)
2361 putc ('\\', tagf);
2362 putc (*sp, tagf);
2363 }
2364 putc (searchar, tagf);
2365 }
2366 else
2367 { /* anything else; text pattern inadequate */
2368 fprintf (tagf, "%d", np->lno);
2369 }
2370 putc ('\n', tagf);
2371 }
2372 }
2373 } /* if this node contains a valid tag */
2374
2375 /* Output subentries that follow this one */
2376 put_entries (np->right);
2377 if (!CTAGS)
2378 put_entries (np->left);
2379 }
2380
2381 \f
2382 /* C extensions. */
2383 #define C_EXT 0x00fff /* C extensions */
2384 #define C_PLAIN 0x00000 /* C */
2385 #define C_PLPL 0x00001 /* C++ */
2386 #define C_STAR 0x00003 /* C* */
2387 #define C_JAVA 0x00005 /* JAVA */
2388 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2389 #define YACC 0x10000 /* yacc file */
2390
2391 /*
2392 * The C symbol tables.
2393 */
2394 enum sym_type
2395 {
2396 st_none,
2397 st_C_objprot, st_C_objimpl, st_C_objend,
2398 st_C_gnumacro,
2399 st_C_ignore, st_C_attribute,
2400 st_C_javastruct,
2401 st_C_operator,
2402 st_C_class, st_C_template,
2403 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2404 };
2405
2406 static unsigned int hash __P((const char *, unsigned int));
2407 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2408 static enum sym_type C_symtype __P((char *, int, int));
2409
2410 /* Feed stuff between (but not including) %[ and %] lines to:
2411 gperf -m 5
2412 %[
2413 %compare-strncmp
2414 %enum
2415 %struct-type
2416 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2417 %%
2418 if, 0, st_C_ignore
2419 for, 0, st_C_ignore
2420 while, 0, st_C_ignore
2421 switch, 0, st_C_ignore
2422 return, 0, st_C_ignore
2423 __attribute__, 0, st_C_attribute
2424 @interface, 0, st_C_objprot
2425 @protocol, 0, st_C_objprot
2426 @implementation,0, st_C_objimpl
2427 @end, 0, st_C_objend
2428 import, (C_JAVA & !C_PLPL), st_C_ignore
2429 package, (C_JAVA & !C_PLPL), st_C_ignore
2430 friend, C_PLPL, st_C_ignore
2431 extends, (C_JAVA & !C_PLPL), st_C_javastruct
2432 implements, (C_JAVA & !C_PLPL), st_C_javastruct
2433 interface, (C_JAVA & !C_PLPL), st_C_struct
2434 class, 0, st_C_class
2435 namespace, C_PLPL, st_C_struct
2436 domain, C_STAR, st_C_struct
2437 union, 0, st_C_struct
2438 struct, 0, st_C_struct
2439 extern, 0, st_C_extern
2440 enum, 0, st_C_enum
2441 typedef, 0, st_C_typedef
2442 define, 0, st_C_define
2443 operator, C_PLPL, st_C_operator
2444 template, 0, st_C_template
2445 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2446 DEFUN, 0, st_C_gnumacro
2447 SYSCALL, 0, st_C_gnumacro
2448 ENTRY, 0, st_C_gnumacro
2449 PSEUDO, 0, st_C_gnumacro
2450 # These are defined inside C functions, so currently they are not met.
2451 # EXFUN used in glibc, DEFVAR_* in emacs.
2452 #EXFUN, 0, st_C_gnumacro
2453 #DEFVAR_, 0, st_C_gnumacro
2454 %]
2455 and replace lines between %< and %> with its output, then:
2456 - remove the #if characterset check
2457 - make in_word_set static and not inline. */
2458 /*%<*/
2459 /* C code produced by gperf version 3.0.1 */
2460 /* Command-line: gperf -m 5 */
2461 /* Computed positions: -k'1-2' */
2462
2463 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2464 /* maximum key range = 31, duplicates = 0 */
2465
2466 #ifdef __GNUC__
2467 __inline
2468 #else
2469 #ifdef __cplusplus
2470 inline
2471 #endif
2472 #endif
2473 static unsigned int
2474 hash (str, len)
2475 register const char *str;
2476 register unsigned int len;
2477 {
2478 static unsigned char asso_values[] =
2479 {
2480 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2481 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2482 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2483 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2484 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2485 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2486 34, 34, 34, 34, 1, 34, 34, 34, 14, 14,
2487 34, 34, 34, 34, 34, 34, 34, 34, 13, 34,
2488 13, 34, 34, 12, 34, 34, 34, 34, 34, 11,
2489 34, 34, 34, 34, 34, 8, 34, 11, 34, 12,
2490 11, 0, 1, 34, 7, 0, 34, 34, 11, 9,
2491 0, 4, 0, 34, 7, 4, 14, 21, 34, 15,
2492 0, 2, 34, 34, 34, 34, 34, 34, 34, 34,
2493 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2494 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2495 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2496 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2497 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2498 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2499 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2500 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2501 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2502 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2503 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2504 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2505 34, 34, 34, 34, 34, 34
2506 };
2507 return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]];
2508 }
2509
2510 static struct C_stab_entry *
2511 in_word_set (str, len)
2512 register const char *str;
2513 register unsigned int len;
2514 {
2515 enum
2516 {
2517 TOTAL_KEYWORDS = 31,
2518 MIN_WORD_LENGTH = 2,
2519 MAX_WORD_LENGTH = 15,
2520 MIN_HASH_VALUE = 3,
2521 MAX_HASH_VALUE = 33
2522 };
2523
2524 static struct C_stab_entry wordlist[] =
2525 {
2526 {""}, {""}, {""},
2527 {"if", 0, st_C_ignore},
2528 {"enum", 0, st_C_enum},
2529 {"@end", 0, st_C_objend},
2530 {"extern", 0, st_C_extern},
2531 {"extends", (C_JAVA & !C_PLPL), st_C_javastruct},
2532 {"for", 0, st_C_ignore},
2533 {"interface", (C_JAVA & !C_PLPL), st_C_struct},
2534 {"@protocol", 0, st_C_objprot},
2535 {"@interface", 0, st_C_objprot},
2536 {"operator", C_PLPL, st_C_operator},
2537 {"return", 0, st_C_ignore},
2538 {"friend", C_PLPL, st_C_ignore},
2539 {"import", (C_JAVA & !C_PLPL), st_C_ignore},
2540 {"@implementation",0, st_C_objimpl},
2541 {"define", 0, st_C_define},
2542 {"package", (C_JAVA & !C_PLPL), st_C_ignore},
2543 {"implements", (C_JAVA & !C_PLPL), st_C_javastruct},
2544 {"namespace", C_PLPL, st_C_struct},
2545 {"domain", C_STAR, st_C_struct},
2546 {"template", 0, st_C_template},
2547 {"typedef", 0, st_C_typedef},
2548 {"struct", 0, st_C_struct},
2549 {"switch", 0, st_C_ignore},
2550 {"union", 0, st_C_struct},
2551 {"while", 0, st_C_ignore},
2552 {"class", 0, st_C_class},
2553 {"__attribute__", 0, st_C_attribute},
2554 {"SYSCALL", 0, st_C_gnumacro},
2555 {"PSEUDO", 0, st_C_gnumacro},
2556 {"ENTRY", 0, st_C_gnumacro},
2557 {"DEFUN", 0, st_C_gnumacro}
2558 };
2559
2560 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2561 {
2562 register int key = hash (str, len);
2563
2564 if (key <= MAX_HASH_VALUE && key >= 0)
2565 {
2566 register const char *s = wordlist[key].name;
2567
2568 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2569 return &wordlist[key];
2570 }
2571 }
2572 return 0;
2573 }
2574 /*%>*/
2575
2576 static enum sym_type
2577 C_symtype (str, len, c_ext)
2578 char *str;
2579 int len;
2580 int c_ext;
2581 {
2582 register struct C_stab_entry *se = in_word_set (str, len);
2583
2584 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2585 return st_none;
2586 return se->type;
2587 }
2588
2589 \f
2590 /*
2591 * Ignoring __attribute__ ((list))
2592 */
2593 static bool inattribute; /* looking at an __attribute__ construct */
2594
2595 /*
2596 * C functions and variables are recognized using a simple
2597 * finite automaton. fvdef is its state variable.
2598 */
2599 static enum
2600 {
2601 fvnone, /* nothing seen */
2602 fdefunkey, /* Emacs DEFUN keyword seen */
2603 fdefunname, /* Emacs DEFUN name seen */
2604 foperator, /* func: operator keyword seen (cplpl) */
2605 fvnameseen, /* function or variable name seen */
2606 fstartlist, /* func: just after open parenthesis */
2607 finlist, /* func: in parameter list */
2608 flistseen, /* func: after parameter list */
2609 fignore, /* func: before open brace */
2610 vignore /* var-like: ignore until ';' */
2611 } fvdef;
2612
2613 static bool fvextern; /* func or var: extern keyword seen; */
2614
2615 /*
2616 * typedefs are recognized using a simple finite automaton.
2617 * typdef is its state variable.
2618 */
2619 static enum
2620 {
2621 tnone, /* nothing seen */
2622 tkeyseen, /* typedef keyword seen */
2623 ttypeseen, /* defined type seen */
2624 tinbody, /* inside typedef body */
2625 tend, /* just before typedef tag */
2626 tignore /* junk after typedef tag */
2627 } typdef;
2628
2629 /*
2630 * struct-like structures (enum, struct and union) are recognized
2631 * using another simple finite automaton. `structdef' is its state
2632 * variable.
2633 */
2634 static enum
2635 {
2636 snone, /* nothing seen yet,
2637 or in struct body if bracelev > 0 */
2638 skeyseen, /* struct-like keyword seen */
2639 stagseen, /* struct-like tag seen */
2640 scolonseen /* colon seen after struct-like tag */
2641 } structdef;
2642
2643 /*
2644 * When objdef is different from onone, objtag is the name of the class.
2645 */
2646 static char *objtag = "<uninited>";
2647
2648 /*
2649 * Yet another little state machine to deal with preprocessor lines.
2650 */
2651 static enum
2652 {
2653 dnone, /* nothing seen */
2654 dsharpseen, /* '#' seen as first char on line */
2655 ddefineseen, /* '#' and 'define' seen */
2656 dignorerest /* ignore rest of line */
2657 } definedef;
2658
2659 /*
2660 * State machine for Objective C protocols and implementations.
2661 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2662 */
2663 static enum
2664 {
2665 onone, /* nothing seen */
2666 oprotocol, /* @interface or @protocol seen */
2667 oimplementation, /* @implementations seen */
2668 otagseen, /* class name seen */
2669 oparenseen, /* parenthesis before category seen */
2670 ocatseen, /* category name seen */
2671 oinbody, /* in @implementation body */
2672 omethodsign, /* in @implementation body, after +/- */
2673 omethodtag, /* after method name */
2674 omethodcolon, /* after method colon */
2675 omethodparm, /* after method parameter */
2676 oignore /* wait for @end */
2677 } objdef;
2678
2679
2680 /*
2681 * Use this structure to keep info about the token read, and how it
2682 * should be tagged. Used by the make_C_tag function to build a tag.
2683 */
2684 static struct tok
2685 {
2686 char *line; /* string containing the token */
2687 int offset; /* where the token starts in LINE */
2688 int length; /* token length */
2689 /*
2690 The previous members can be used to pass strings around for generic
2691 purposes. The following ones specifically refer to creating tags. In this
2692 case the token contained here is the pattern that will be used to create a
2693 tag.
2694 */
2695 bool valid; /* do not create a tag; the token should be
2696 invalidated whenever a state machine is
2697 reset prematurely */
2698 bool named; /* create a named tag */
2699 int lineno; /* source line number of tag */
2700 long linepos; /* source char number of tag */
2701 } token; /* latest token read */
2702
2703 /*
2704 * Variables and functions for dealing with nested structures.
2705 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2706 */
2707 static void pushclass_above __P((int, char *, int));
2708 static void popclass_above __P((int));
2709 static void write_classname __P((linebuffer *, char *qualifier));
2710
2711 static struct {
2712 char **cname; /* nested class names */
2713 int *bracelev; /* nested class brace level */
2714 int nl; /* class nesting level (elements used) */
2715 int size; /* length of the array */
2716 } cstack; /* stack for nested declaration tags */
2717 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2718 #define nestlev (cstack.nl)
2719 /* After struct keyword or in struct body, not inside a nested function. */
2720 #define instruct (structdef == snone && nestlev > 0 \
2721 && bracelev == cstack.bracelev[nestlev-1] + 1)
2722
2723 static void
2724 pushclass_above (bracelev, str, len)
2725 int bracelev;
2726 char *str;
2727 int len;
2728 {
2729 int nl;
2730
2731 popclass_above (bracelev);
2732 nl = cstack.nl;
2733 if (nl >= cstack.size)
2734 {
2735 int size = cstack.size *= 2;
2736 xrnew (cstack.cname, size, char *);
2737 xrnew (cstack.bracelev, size, int);
2738 }
2739 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2740 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2741 cstack.bracelev[nl] = bracelev;
2742 cstack.nl = nl + 1;
2743 }
2744
2745 static void
2746 popclass_above (bracelev)
2747 int bracelev;
2748 {
2749 int nl;
2750
2751 for (nl = cstack.nl - 1;
2752 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2753 nl--)
2754 {
2755 if (cstack.cname[nl] != NULL)
2756 free (cstack.cname[nl]);
2757 cstack.nl = nl;
2758 }
2759 }
2760
2761 static void
2762 write_classname (cn, qualifier)
2763 linebuffer *cn;
2764 char *qualifier;
2765 {
2766 int i, len;
2767 int qlen = strlen (qualifier);
2768
2769 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2770 {
2771 len = 0;
2772 cn->len = 0;
2773 cn->buffer[0] = '\0';
2774 }
2775 else
2776 {
2777 len = strlen (cstack.cname[0]);
2778 linebuffer_setlen (cn, len);
2779 strcpy (cn->buffer, cstack.cname[0]);
2780 }
2781 for (i = 1; i < cstack.nl; i++)
2782 {
2783 char *s;
2784 int slen;
2785
2786 s = cstack.cname[i];
2787 if (s == NULL)
2788 continue;
2789 slen = strlen (s);
2790 len += slen + qlen;
2791 linebuffer_setlen (cn, len);
2792 strncat (cn->buffer, qualifier, qlen);
2793 strncat (cn->buffer, s, slen);
2794 }
2795 }
2796
2797 \f
2798 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2799 static void make_C_tag __P((bool));
2800
2801 /*
2802 * consider_token ()
2803 * checks to see if the current token is at the start of a
2804 * function or variable, or corresponds to a typedef, or
2805 * is a struct/union/enum tag, or #define, or an enum constant.
2806 *
2807 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2808 * with args. C_EXTP points to which language we are looking at.
2809 *
2810 * Globals
2811 * fvdef IN OUT
2812 * structdef IN OUT
2813 * definedef IN OUT
2814 * typdef IN OUT
2815 * objdef IN OUT
2816 */
2817
2818 static bool
2819 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2820 register char *str; /* IN: token pointer */
2821 register int len; /* IN: token length */
2822 register int c; /* IN: first char after the token */
2823 int *c_extp; /* IN, OUT: C extensions mask */
2824 int bracelev; /* IN: brace level */
2825 int parlev; /* IN: parenthesis level */
2826 bool *is_func_or_var; /* OUT: function or variable found */
2827 {
2828 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2829 structtype is the type of the preceding struct-like keyword, and
2830 structbracelev is the brace level where it has been seen. */
2831 static enum sym_type structtype;
2832 static int structbracelev;
2833 static enum sym_type toktype;
2834
2835
2836 toktype = C_symtype (str, len, *c_extp);
2837
2838 /*
2839 * Skip __attribute__
2840 */
2841 if (toktype == st_C_attribute)
2842 {
2843 inattribute = TRUE;
2844 return FALSE;
2845 }
2846
2847 /*
2848 * Advance the definedef state machine.
2849 */
2850 switch (definedef)
2851 {
2852 case dnone:
2853 /* We're not on a preprocessor line. */
2854 if (toktype == st_C_gnumacro)
2855 {
2856 fvdef = fdefunkey;
2857 return FALSE;
2858 }
2859 break;
2860 case dsharpseen:
2861 if (toktype == st_C_define)
2862 {
2863 definedef = ddefineseen;
2864 }
2865 else
2866 {
2867 definedef = dignorerest;
2868 }
2869 return FALSE;
2870 case ddefineseen:
2871 /*
2872 * Make a tag for any macro, unless it is a constant
2873 * and constantypedefs is FALSE.
2874 */
2875 definedef = dignorerest;
2876 *is_func_or_var = (c == '(');
2877 if (!*is_func_or_var && !constantypedefs)
2878 return FALSE;
2879 else
2880 return TRUE;
2881 case dignorerest:
2882 return FALSE;
2883 default:
2884 error ("internal error: definedef value.", (char *)NULL);
2885 }
2886
2887 /*
2888 * Now typedefs
2889 */
2890 switch (typdef)
2891 {
2892 case tnone:
2893 if (toktype == st_C_typedef)
2894 {
2895 if (typedefs)
2896 typdef = tkeyseen;
2897 fvextern = FALSE;
2898 fvdef = fvnone;
2899 return FALSE;
2900 }
2901 break;
2902 case tkeyseen:
2903 switch (toktype)
2904 {
2905 default:
2906 break;
2907 case st_none:
2908 case st_C_class:
2909 case st_C_struct:
2910 case st_C_enum:
2911 typdef = ttypeseen;
2912 }
2913 break;
2914 case ttypeseen:
2915 if (structdef == snone && fvdef == fvnone)
2916 {
2917 fvdef = fvnameseen;
2918 return TRUE;
2919 }
2920 break;
2921 case tend:
2922 switch (toktype)
2923 {
2924 default:
2925 break;
2926 case st_C_class:
2927 case st_C_struct:
2928 case st_C_enum:
2929 return FALSE;
2930 }
2931 return TRUE;
2932 default:
2933 break;
2934 }
2935
2936 /*
2937 * This structdef business is NOT invoked when we are ctags and the
2938 * file is plain C. This is because a struct tag may have the same
2939 * name as another tag, and this loses with ctags.
2940 */
2941 switch (toktype)
2942 {
2943 case st_C_javastruct:
2944 if (structdef == stagseen)
2945 structdef = scolonseen;
2946 return FALSE;
2947 case st_C_template:
2948 case st_C_class:
2949 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2950 && bracelev == 0
2951 && definedef == dnone && structdef == snone
2952 && typdef == tnone && fvdef == fvnone)
2953 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2954 if (toktype == st_C_template)
2955 break;
2956 /* FALLTHRU */
2957 case st_C_struct:
2958 case st_C_enum:
2959 if (parlev == 0
2960 && fvdef != vignore
2961 && (typdef == tkeyseen
2962 || (typedefs_or_cplusplus && structdef == snone)))
2963 {
2964 structdef = skeyseen;
2965 structtype = toktype;
2966 structbracelev = bracelev;
2967 if (fvdef == fvnameseen)
2968 fvdef = fvnone;
2969 }
2970 return FALSE;
2971 default:
2972 break;
2973 }
2974
2975 if (structdef == skeyseen)
2976 {
2977 structdef = stagseen;
2978 return TRUE;
2979 }
2980
2981 if (typdef != tnone)
2982 definedef = dnone;
2983
2984 /* Detect Objective C constructs. */
2985 switch (objdef)
2986 {
2987 case onone:
2988 switch (toktype)
2989 {
2990 case st_C_objprot:
2991 objdef = oprotocol;
2992 return FALSE;
2993 case st_C_objimpl:
2994 objdef = oimplementation;
2995 return FALSE;
2996 default:
2997 break;
2998 }
2999 break;
3000 case oimplementation:
3001 /* Save the class tag for functions or variables defined inside. */
3002 objtag = savenstr (str, len);
3003 objdef = oinbody;
3004 return FALSE;
3005 case oprotocol:
3006 /* Save the class tag for categories. */
3007 objtag = savenstr (str, len);
3008 objdef = otagseen;
3009 *is_func_or_var = TRUE;
3010 return TRUE;
3011 case oparenseen:
3012 objdef = ocatseen;
3013 *is_func_or_var = TRUE;
3014 return TRUE;
3015 case oinbody:
3016 break;
3017 case omethodsign:
3018 if (parlev == 0)
3019 {
3020 fvdef = fvnone;
3021 objdef = omethodtag;
3022 linebuffer_setlen (&token_name, len);
3023 strncpy (token_name.buffer, str, len);
3024 token_name.buffer[len] = '\0';
3025 return TRUE;
3026 }
3027 return FALSE;
3028 case omethodcolon:
3029 if (parlev == 0)
3030 objdef = omethodparm;
3031 return FALSE;
3032 case omethodparm:
3033 if (parlev == 0)
3034 {
3035 fvdef = fvnone;
3036 objdef = omethodtag;
3037 linebuffer_setlen (&token_name, token_name.len + len);
3038 strncat (token_name.buffer, str, len);
3039 return TRUE;
3040 }
3041 return FALSE;
3042 case oignore:
3043 if (toktype == st_C_objend)
3044 {
3045 /* Memory leakage here: the string pointed by objtag is
3046 never released, because many tests would be needed to
3047 avoid breaking on incorrect input code. The amount of
3048 memory leaked here is the sum of the lengths of the
3049 class tags.
3050 free (objtag); */
3051 objdef = onone;
3052 }
3053 return FALSE;
3054 default:
3055 break;
3056 }
3057
3058 /* A function, variable or enum constant? */
3059 switch (toktype)
3060 {
3061 case st_C_extern:
3062 fvextern = TRUE;
3063 switch (fvdef)
3064 {
3065 case finlist:
3066 case flistseen:
3067 case fignore:
3068 case vignore:
3069 break;
3070 default:
3071 fvdef = fvnone;
3072 }
3073 return FALSE;
3074 case st_C_ignore:
3075 fvextern = FALSE;
3076 fvdef = vignore;
3077 return FALSE;
3078 case st_C_operator:
3079 fvdef = foperator;
3080 *is_func_or_var = TRUE;
3081 return TRUE;
3082 case st_none:
3083 if (constantypedefs
3084 && structdef == snone
3085 && structtype == st_C_enum && bracelev > structbracelev)
3086 return TRUE; /* enum constant */
3087 switch (fvdef)
3088 {
3089 case fdefunkey:
3090 if (bracelev > 0)
3091 break;
3092 fvdef = fdefunname; /* GNU macro */
3093 *is_func_or_var = TRUE;
3094 return TRUE;
3095 case fvnone:
3096 switch (typdef)
3097 {
3098 case ttypeseen:
3099 return FALSE;
3100 case tnone:
3101 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3102 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3103 {
3104 fvdef = vignore;
3105 return FALSE;
3106 }
3107 break;
3108 default:
3109 break;
3110 }
3111 /* FALLTHRU */
3112 case fvnameseen:
3113 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3114 {
3115 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3116 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3117 fvdef = foperator;
3118 *is_func_or_var = TRUE;
3119 return TRUE;
3120 }
3121 if (bracelev > 0 && !instruct)
3122 break;
3123 fvdef = fvnameseen; /* function or variable */
3124 *is_func_or_var = TRUE;
3125 return TRUE;
3126 default:
3127 break;
3128 }
3129 break;
3130 default:
3131 break;
3132 }
3133
3134 return FALSE;
3135 }
3136
3137 \f
3138 /*
3139 * C_entries often keeps pointers to tokens or lines which are older than
3140 * the line currently read. By keeping two line buffers, and switching
3141 * them at end of line, it is possible to use those pointers.
3142 */
3143 static struct
3144 {
3145 long linepos;
3146 linebuffer lb;
3147 } lbs[2];
3148
3149 #define current_lb_is_new (newndx == curndx)
3150 #define switch_line_buffers() (curndx = 1 - curndx)
3151
3152 #define curlb (lbs[curndx].lb)
3153 #define newlb (lbs[newndx].lb)
3154 #define curlinepos (lbs[curndx].linepos)
3155 #define newlinepos (lbs[newndx].linepos)
3156
3157 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3158 #define cplpl (c_ext & C_PLPL)
3159 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3160
3161 #define CNL_SAVE_DEFINEDEF() \
3162 do { \
3163 curlinepos = charno; \
3164 readline (&curlb, inf); \
3165 lp = curlb.buffer; \
3166 quotednl = FALSE; \
3167 newndx = curndx; \
3168 } while (0)
3169
3170 #define CNL() \
3171 do { \
3172 CNL_SAVE_DEFINEDEF(); \
3173 if (savetoken.valid) \
3174 { \
3175 token = savetoken; \
3176 savetoken.valid = FALSE; \
3177 } \
3178 definedef = dnone; \
3179 } while (0)
3180
3181
3182 static void
3183 make_C_tag (isfun)
3184 bool isfun;
3185 {
3186 /* This function should never be called when token.valid is FALSE, but
3187 we must protect against invalid input or internal errors. */
3188 if (!DEBUG && !token.valid)
3189 return;
3190
3191 if (token.valid)
3192 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3193 token.offset+token.length+1, token.lineno, token.linepos);
3194 else /* this case is optimised away if !DEBUG */
3195 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3196 token_name.len + 17, isfun, token.line,
3197 token.offset+token.length+1, token.lineno, token.linepos);
3198
3199 token.valid = FALSE;
3200 }
3201
3202
3203 /*
3204 * C_entries ()
3205 * This routine finds functions, variables, typedefs,
3206 * #define's, enum constants and struct/union/enum definitions in
3207 * C syntax and adds them to the list.
3208 */
3209 static void
3210 C_entries (c_ext, inf)
3211 int c_ext; /* extension of C */
3212 FILE *inf; /* input file */
3213 {
3214 register char c; /* latest char read; '\0' for end of line */
3215 register char *lp; /* pointer one beyond the character `c' */
3216 int curndx, newndx; /* indices for current and new lb */
3217 register int tokoff; /* offset in line of start of current token */
3218 register int toklen; /* length of current token */
3219 char *qualifier; /* string used to qualify names */
3220 int qlen; /* length of qualifier */
3221 int bracelev; /* current brace level */
3222 int bracketlev; /* current bracket level */
3223 int parlev; /* current parenthesis level */
3224 int attrparlev; /* __attribute__ parenthesis level */
3225 int templatelev; /* current template level */
3226 int typdefbracelev; /* bracelev where a typedef struct body begun */
3227 bool incomm, inquote, inchar, quotednl, midtoken;
3228 bool yacc_rules; /* in the rules part of a yacc file */
3229 struct tok savetoken; /* token saved during preprocessor handling */
3230
3231
3232 linebuffer_init (&lbs[0].lb);
3233 linebuffer_init (&lbs[1].lb);
3234 if (cstack.size == 0)
3235 {
3236 cstack.size = (DEBUG) ? 1 : 4;
3237 cstack.nl = 0;
3238 cstack.cname = xnew (cstack.size, char *);
3239 cstack.bracelev = xnew (cstack.size, int);
3240 }
3241
3242 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3243 curndx = newndx = 0;
3244 lp = curlb.buffer;
3245 *lp = 0;
3246
3247 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3248 structdef = snone; definedef = dnone; objdef = onone;
3249 yacc_rules = FALSE;
3250 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3251 token.valid = savetoken.valid = FALSE;
3252 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3253 if (cjava)
3254 { qualifier = "."; qlen = 1; }
3255 else
3256 { qualifier = "::"; qlen = 2; }
3257
3258
3259 while (!feof (inf))
3260 {
3261 c = *lp++;
3262 if (c == '\\')
3263 {
3264 /* If we are at the end of the line, the next character is a
3265 '\0'; do not skip it, because it is what tells us
3266 to read the next line. */
3267 if (*lp == '\0')
3268 {
3269 quotednl = TRUE;
3270 continue;
3271 }
3272 lp++;
3273 c = ' ';
3274 }
3275 else if (incomm)
3276 {
3277 switch (c)
3278 {
3279 case '*':
3280 if (*lp == '/')
3281 {
3282 c = *lp++;
3283 incomm = FALSE;
3284 }
3285 break;
3286 case '\0':
3287 /* Newlines inside comments do not end macro definitions in
3288 traditional cpp. */
3289 CNL_SAVE_DEFINEDEF ();
3290 break;
3291 }
3292 continue;
3293 }
3294 else if (inquote)
3295 {
3296 switch (c)
3297 {
3298 case '"':
3299 inquote = FALSE;
3300 break;
3301 case '\0':
3302 /* Newlines inside strings do not end macro definitions
3303 in traditional cpp, even though compilers don't
3304 usually accept them. */
3305 CNL_SAVE_DEFINEDEF ();
3306 break;
3307 }
3308 continue;
3309 }
3310 else if (inchar)
3311 {
3312 switch (c)
3313 {
3314 case '\0':
3315 /* Hmmm, something went wrong. */
3316 CNL ();
3317 /* FALLTHRU */
3318 case '\'':
3319 inchar = FALSE;
3320 break;
3321 }
3322 continue;
3323 }
3324 else if (bracketlev > 0)
3325 {
3326 switch (c)
3327 {
3328 case ']':
3329 if (--bracketlev > 0)
3330 continue;
3331 break;
3332 case '\0':
3333 CNL_SAVE_DEFINEDEF ();
3334 break;
3335 }
3336 continue;
3337 }
3338 else switch (c)
3339 {
3340 case '"':
3341 inquote = TRUE;
3342 if (inattribute)
3343 break;
3344 switch (fvdef)
3345 {
3346 case fdefunkey:
3347 case fstartlist:
3348 case finlist:
3349 case fignore:
3350 case vignore:
3351 break;
3352 default:
3353 fvextern = FALSE;
3354 fvdef = fvnone;
3355 }
3356 continue;
3357 case '\'':
3358 inchar = TRUE;
3359 if (inattribute)
3360 break;
3361 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3362 {
3363 fvextern = FALSE;
3364 fvdef = fvnone;
3365 }
3366 continue;
3367 case '/':
3368 if (*lp == '*')
3369 {
3370 lp++;
3371 incomm = TRUE;
3372 continue;
3373 }
3374 else if (/* cplpl && */ *lp == '/')
3375 {
3376 c = '\0';
3377 break;
3378 }
3379 else
3380 break;
3381 case '%':
3382 if ((c_ext & YACC) && *lp == '%')
3383 {
3384 /* Entering or exiting rules section in yacc file. */
3385 lp++;
3386 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3387 typdef = tnone; structdef = snone;
3388 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3389 bracelev = 0;
3390 yacc_rules = !yacc_rules;
3391 continue;
3392 }
3393 else
3394 break;
3395 case '#':
3396 if (definedef == dnone)
3397 {
3398 char *cp;
3399 bool cpptoken = TRUE;
3400
3401 /* Look back on this line. If all blanks, or nonblanks
3402 followed by an end of comment, this is a preprocessor
3403 token. */
3404 for (cp = newlb.buffer; cp < lp-1; cp++)
3405 if (!iswhite (*cp))
3406 {
3407 if (*cp == '*' && *(cp+1) == '/')
3408 {
3409 cp++;
3410 cpptoken = TRUE;
3411 }
3412 else
3413 cpptoken = FALSE;
3414 }
3415 if (cpptoken)
3416 definedef = dsharpseen;
3417 } /* if (definedef == dnone) */
3418 continue;
3419 case '[':
3420 bracketlev++;
3421 continue;
3422 } /* switch (c) */
3423
3424
3425 /* Consider token only if some involved conditions are satisfied. */
3426 if (typdef != tignore
3427 && definedef != dignorerest
3428 && fvdef != finlist
3429 && templatelev == 0
3430 && (definedef != dnone
3431 || structdef != scolonseen)
3432 && !inattribute)
3433 {
3434 if (midtoken)
3435 {
3436 if (endtoken (c))
3437 {
3438 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3439 /* This handles :: in the middle,
3440 but not at the beginning of an identifier.
3441 Also, space-separated :: is not recognised. */
3442 {
3443 if (c_ext & C_AUTO) /* automatic detection of C++ */
3444 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3445 lp += 2;
3446 toklen += 2;
3447 c = lp[-1];
3448 goto still_in_token;
3449 }
3450 else
3451 {
3452 bool funorvar = FALSE;
3453
3454 if (yacc_rules
3455 || consider_token (newlb.buffer + tokoff, toklen, c,
3456 &c_ext, bracelev, parlev,
3457 &funorvar))
3458 {
3459 if (fvdef == foperator)
3460 {
3461 char *oldlp = lp;
3462 lp = skip_spaces (lp-1);
3463 if (*lp != '\0')
3464 lp += 1;
3465 while (*lp != '\0'
3466 && !iswhite (*lp) && *lp != '(')
3467 lp += 1;
3468 c = *lp++;
3469 toklen += lp - oldlp;
3470 }
3471 token.named = FALSE;
3472 if (!plainc
3473 && nestlev > 0 && definedef == dnone)
3474 /* in struct body */
3475 {
3476 write_classname (&token_name, qualifier);
3477 linebuffer_setlen (&token_name,
3478 token_name.len+qlen+toklen);
3479 strcat (token_name.buffer, qualifier);
3480 strncat (token_name.buffer,
3481 newlb.buffer + tokoff, toklen);
3482 token.named = TRUE;
3483 }
3484 else if (objdef == ocatseen)
3485 /* Objective C category */
3486 {
3487 int len = strlen (objtag) + 2 + toklen;
3488 linebuffer_setlen (&token_name, len);
3489 strcpy (token_name.buffer, objtag);
3490 strcat (token_name.buffer, "(");
3491 strncat (token_name.buffer,
3492 newlb.buffer + tokoff, toklen);
3493 strcat (token_name.buffer, ")");
3494 token.named = TRUE;
3495 }
3496 else if (objdef == omethodtag
3497 || objdef == omethodparm)
3498 /* Objective C method */
3499 {
3500 token.named = TRUE;
3501 }
3502 else if (fvdef == fdefunname)
3503 /* GNU DEFUN and similar macros */
3504 {
3505 bool defun = (newlb.buffer[tokoff] == 'F');
3506 int off = tokoff;
3507 int len = toklen;
3508
3509 /* Rewrite the tag so that emacs lisp DEFUNs
3510 can be found by their elisp name */
3511 if (defun)
3512 {
3513 off += 1;
3514 len -= 1;
3515 }
3516 len = toklen;
3517 linebuffer_setlen (&token_name, len);
3518 strncpy (token_name.buffer,
3519 newlb.buffer + off, len);
3520 token_name.buffer[len] = '\0';
3521 if (defun)
3522 while (--len >= 0)
3523 if (token_name.buffer[len] == '_')
3524 token_name.buffer[len] = '-';
3525 token.named = defun;
3526 }
3527 else
3528 {
3529 linebuffer_setlen (&token_name, toklen);
3530 strncpy (token_name.buffer,
3531 newlb.buffer + tokoff, toklen);
3532 token_name.buffer[toklen] = '\0';
3533 /* Name macros and members. */
3534 token.named = (structdef == stagseen
3535 || typdef == ttypeseen
3536 || typdef == tend
3537 || (funorvar
3538 && definedef == dignorerest)
3539 || (funorvar
3540 && definedef == dnone
3541 && structdef == snone
3542 && bracelev > 0));
3543 }
3544 token.lineno = lineno;
3545 token.offset = tokoff;
3546 token.length = toklen;
3547 token.line = newlb.buffer;
3548 token.linepos = newlinepos;
3549 token.valid = TRUE;
3550
3551 if (definedef == dnone
3552 && (fvdef == fvnameseen
3553 || fvdef == foperator
3554 || structdef == stagseen
3555 || typdef == tend
3556 || typdef == ttypeseen
3557 || objdef != onone))
3558 {
3559 if (current_lb_is_new)
3560 switch_line_buffers ();
3561 }
3562 else if (definedef != dnone
3563 || fvdef == fdefunname
3564 || instruct)
3565 make_C_tag (funorvar);
3566 }
3567 else /* not yacc and consider_token failed */
3568 {
3569 if (inattribute && fvdef == fignore)
3570 {
3571 /* We have just met __attribute__ after a
3572 function parameter list: do not tag the
3573 function again. */
3574 fvdef = fvnone;
3575 }
3576 }
3577 midtoken = FALSE;
3578 }
3579 } /* if (endtoken (c)) */
3580 else if (intoken (c))
3581 still_in_token:
3582 {
3583 toklen++;
3584 continue;
3585 }
3586 } /* if (midtoken) */
3587 else if (begtoken (c))
3588 {
3589 switch (definedef)
3590 {
3591 case dnone:
3592 switch (fvdef)
3593 {
3594 case fstartlist:
3595 /* This prevents tagging fb in
3596 void (__attribute__((noreturn)) *fb) (void);
3597 Fixing this is not easy and not very important. */
3598 fvdef = finlist;
3599 continue;
3600 case flistseen:
3601 if (plainc || declarations)
3602 {
3603 make_C_tag (TRUE); /* a function */
3604 fvdef = fignore;
3605 }
3606 break;
3607 default:
3608 break;
3609 }
3610 if (structdef == stagseen && !cjava)
3611 {
3612 popclass_above (bracelev);
3613 structdef = snone;
3614 }
3615 break;
3616 case dsharpseen:
3617 savetoken = token;
3618 break;
3619 default:
3620 break;
3621 }
3622 if (!yacc_rules || lp == newlb.buffer + 1)
3623 {
3624 tokoff = lp - 1 - newlb.buffer;
3625 toklen = 1;
3626 midtoken = TRUE;
3627 }
3628 continue;
3629 } /* if (begtoken) */
3630 } /* if must look at token */
3631
3632
3633 /* Detect end of line, colon, comma, semicolon and various braces
3634 after having handled a token.*/
3635 switch (c)
3636 {
3637 case ':':
3638 if (inattribute)
3639 break;
3640 if (yacc_rules && token.offset == 0 && token.valid)
3641 {
3642 make_C_tag (FALSE); /* a yacc function */
3643 break;
3644 }
3645 if (definedef != dnone)
3646 break;
3647 switch (objdef)
3648 {
3649 case otagseen:
3650 objdef = oignore;
3651 make_C_tag (TRUE); /* an Objective C class */
3652 break;
3653 case omethodtag:
3654 case omethodparm:
3655 objdef = omethodcolon;
3656 linebuffer_setlen (&token_name, token_name.len + 1);
3657 strcat (token_name.buffer, ":");
3658 break;
3659 default:
3660 break;
3661 }
3662 if (structdef == stagseen)
3663 {
3664 structdef = scolonseen;
3665 break;
3666 }
3667 /* Should be useless, but may be work as a safety net. */
3668 if (cplpl && fvdef == flistseen)
3669 {
3670 make_C_tag (TRUE); /* a function */
3671 fvdef = fignore;
3672 break;
3673 }
3674 break;
3675 case ';':
3676 if (definedef != dnone || inattribute)
3677 break;
3678 switch (typdef)
3679 {
3680 case tend:
3681 case ttypeseen:
3682 make_C_tag (FALSE); /* a typedef */
3683 typdef = tnone;
3684 fvdef = fvnone;
3685 break;
3686 case tnone:
3687 case tinbody:
3688 case tignore:
3689 switch (fvdef)
3690 {
3691 case fignore:
3692 if (typdef == tignore || cplpl)
3693 fvdef = fvnone;
3694 break;
3695 case fvnameseen:
3696 if ((globals && bracelev == 0 && (!fvextern || declarations))
3697 || (members && instruct))
3698 make_C_tag (FALSE); /* a variable */
3699 fvextern = FALSE;
3700 fvdef = fvnone;
3701 token.valid = FALSE;
3702 break;
3703 case flistseen:
3704 if ((declarations
3705 && (cplpl || !instruct)
3706 && (typdef == tnone || (typdef != tignore && instruct)))
3707 || (members
3708 && plainc && instruct))
3709 make_C_tag (TRUE); /* a function */
3710 /* FALLTHRU */
3711 default:
3712 fvextern = FALSE;
3713 fvdef = fvnone;
3714 if (declarations
3715 && cplpl && structdef == stagseen)
3716 make_C_tag (FALSE); /* forward declaration */
3717 else
3718 token.valid = FALSE;
3719 } /* switch (fvdef) */
3720 /* FALLTHRU */
3721 default:
3722 if (!instruct)
3723 typdef = tnone;
3724 }
3725 if (structdef == stagseen)
3726 structdef = snone;
3727 break;
3728 case ',':
3729 if (definedef != dnone || inattribute)
3730 break;
3731 switch (objdef)
3732 {
3733 case omethodtag:
3734 case omethodparm:
3735 make_C_tag (TRUE); /* an Objective C method */
3736 objdef = oinbody;
3737 break;
3738 default:
3739 break;
3740 }
3741 switch (fvdef)
3742 {
3743 case fdefunkey:
3744 case foperator:
3745 case fstartlist:
3746 case finlist:
3747 case fignore:
3748 case vignore:
3749 break;
3750 case fdefunname:
3751 fvdef = fignore;
3752 break;
3753 case fvnameseen:
3754 if (parlev == 0
3755 && ((globals
3756 && bracelev == 0
3757 && templatelev == 0
3758 && (!fvextern || declarations))
3759 || (members && instruct)))
3760 make_C_tag (FALSE); /* a variable */
3761 break;
3762 case flistseen:
3763 if ((declarations && typdef == tnone && !instruct)
3764 || (members && typdef != tignore && instruct))
3765 {
3766 make_C_tag (TRUE); /* a function */
3767 fvdef = fvnameseen;
3768 }
3769 else if (!declarations)
3770 fvdef = fvnone;
3771 token.valid = FALSE;
3772 break;
3773 default:
3774 fvdef = fvnone;
3775 }
3776 if (structdef == stagseen)
3777 structdef = snone;
3778 break;
3779 case ']':
3780 if (definedef != dnone || inattribute)
3781 break;
3782 if (structdef == stagseen)
3783 structdef = snone;
3784 switch (typdef)
3785 {
3786 case ttypeseen:
3787 case tend:
3788 typdef = tignore;
3789 make_C_tag (FALSE); /* a typedef */
3790 break;
3791 case tnone:
3792 case tinbody:
3793 switch (fvdef)
3794 {
3795 case foperator:
3796 case finlist:
3797 case fignore:
3798 case vignore:
3799 break;
3800 case fvnameseen:
3801 if ((members && bracelev == 1)
3802 || (globals && bracelev == 0
3803 && (!fvextern || declarations)))
3804 make_C_tag (FALSE); /* a variable */
3805 /* FALLTHRU */
3806 default:
3807 fvdef = fvnone;
3808 }
3809 break;
3810 default:
3811 break;
3812 }
3813 break;
3814 case '(':
3815 if (inattribute)
3816 {
3817 attrparlev++;
3818 break;
3819 }
3820 if (definedef != dnone)
3821 break;
3822 if (objdef == otagseen && parlev == 0)
3823 objdef = oparenseen;
3824 switch (fvdef)
3825 {
3826 case fvnameseen:
3827 if (typdef == ttypeseen
3828 && *lp != '*'
3829 && !instruct)
3830 {
3831 /* This handles constructs like:
3832 typedef void OperatorFun (int fun); */
3833 make_C_tag (FALSE);
3834 typdef = tignore;
3835 fvdef = fignore;
3836 break;
3837 }
3838 /* FALLTHRU */
3839 case foperator:
3840 fvdef = fstartlist;
3841 break;
3842 case flistseen:
3843 fvdef = finlist;
3844 break;
3845 default:
3846 break;
3847 }
3848 parlev++;
3849 break;
3850 case ')':
3851 if (inattribute)
3852 {
3853 if (--attrparlev == 0)
3854 inattribute = FALSE;
3855 break;
3856 }
3857 if (definedef != dnone)
3858 break;
3859 if (objdef == ocatseen && parlev == 1)
3860 {
3861 make_C_tag (TRUE); /* an Objective C category */
3862 objdef = oignore;
3863 }
3864 if (--parlev == 0)
3865 {
3866 switch (fvdef)
3867 {
3868 case fstartlist:
3869 case finlist:
3870 fvdef = flistseen;
3871 break;
3872 default:
3873 break;
3874 }
3875 if (!instruct
3876 && (typdef == tend
3877 || typdef == ttypeseen))
3878 {
3879 typdef = tignore;
3880 make_C_tag (FALSE); /* a typedef */
3881 }
3882 }
3883 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3884 parlev = 0;
3885 break;
3886 case '{':
3887 if (definedef != dnone)
3888 break;
3889 if (typdef == ttypeseen)
3890 {
3891 /* Whenever typdef is set to tinbody (currently only
3892 here), typdefbracelev should be set to bracelev. */
3893 typdef = tinbody;
3894 typdefbracelev = bracelev;
3895 }
3896 switch (fvdef)
3897 {
3898 case flistseen:
3899 make_C_tag (TRUE); /* a function */
3900 /* FALLTHRU */
3901 case fignore:
3902 fvdef = fvnone;
3903 break;
3904 case fvnone:
3905 switch (objdef)
3906 {
3907 case otagseen:
3908 make_C_tag (TRUE); /* an Objective C class */
3909 objdef = oignore;
3910 break;
3911 case omethodtag:
3912 case omethodparm:
3913 make_C_tag (TRUE); /* an Objective C method */
3914 objdef = oinbody;
3915 break;
3916 default:
3917 /* Neutralize `extern "C" {' grot. */
3918 if (bracelev == 0 && structdef == snone && nestlev == 0
3919 && typdef == tnone)
3920 bracelev = -1;
3921 }
3922 break;
3923 default:
3924 break;
3925 }
3926 switch (structdef)
3927 {
3928 case skeyseen: /* unnamed struct */
3929 pushclass_above (bracelev, NULL, 0);
3930 structdef = snone;
3931 break;
3932 case stagseen: /* named struct or enum */
3933 case scolonseen: /* a class */
3934 pushclass_above (bracelev,token.line+token.offset, token.length);
3935 structdef = snone;
3936 make_C_tag (FALSE); /* a struct or enum */
3937 break;
3938 default:
3939 break;
3940 }
3941 bracelev++;
3942 break;
3943 case '*':
3944 if (definedef != dnone)
3945 break;
3946 if (fvdef == fstartlist)
3947 {
3948 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3949 token.valid = FALSE;
3950 }
3951 break;
3952 case '}':
3953 if (definedef != dnone)
3954 break;
3955 if (!ignoreindent && lp == newlb.buffer + 1)
3956 {
3957 if (bracelev != 0)
3958 token.valid = FALSE;
3959 bracelev = 0; /* reset brace level if first column */
3960 parlev = 0; /* also reset paren level, just in case... */
3961 }
3962 else if (bracelev > 0)
3963 bracelev--;
3964 else
3965 token.valid = FALSE; /* something gone amiss, token unreliable */
3966 popclass_above (bracelev);
3967 structdef = snone;
3968 /* Only if typdef == tinbody is typdefbracelev significant. */
3969 if (typdef == tinbody && bracelev <= typdefbracelev)
3970 {
3971 assert (bracelev == typdefbracelev);
3972 typdef = tend;
3973 }
3974 break;
3975 case '=':
3976 if (definedef != dnone)
3977 break;
3978 switch (fvdef)
3979 {
3980 case foperator:
3981 case finlist:
3982 case fignore:
3983 case vignore:
3984 break;
3985 case fvnameseen:
3986 if ((members && bracelev == 1)
3987 || (globals && bracelev == 0 && (!fvextern || declarations)))
3988 make_C_tag (FALSE); /* a variable */
3989 /* FALLTHRU */
3990 default:
3991 fvdef = vignore;
3992 }
3993 break;
3994 case '<':
3995 if (cplpl
3996 && (structdef == stagseen || fvdef == fvnameseen))
3997 {
3998 templatelev++;
3999 break;
4000 }
4001 goto resetfvdef;
4002 case '>':
4003 if (templatelev > 0)
4004 {
4005 templatelev--;
4006 break;
4007 }
4008 goto resetfvdef;
4009 case '+':
4010 case '-':
4011 if (objdef == oinbody && bracelev == 0)
4012 {
4013 objdef = omethodsign;
4014 break;
4015 }
4016 /* FALLTHRU */
4017 resetfvdef:
4018 case '#': case '~': case '&': case '%': case '/':
4019 case '|': case '^': case '!': case '.': case '?':
4020 if (definedef != dnone)
4021 break;
4022 /* These surely cannot follow a function tag in C. */
4023 switch (fvdef)
4024 {
4025 case foperator:
4026 case finlist:
4027 case fignore:
4028 case vignore:
4029 break;
4030 default:
4031 fvdef = fvnone;
4032 }
4033 break;
4034 case '\0':
4035 if (objdef == otagseen)
4036 {
4037 make_C_tag (TRUE); /* an Objective C class */
4038 objdef = oignore;
4039 }
4040 /* If a macro spans multiple lines don't reset its state. */
4041 if (quotednl)
4042 CNL_SAVE_DEFINEDEF ();
4043 else
4044 CNL ();
4045 break;
4046 } /* switch (c) */
4047
4048 } /* while not eof */
4049
4050 free (lbs[0].lb.buffer);
4051 free (lbs[1].lb.buffer);
4052 }
4053
4054 /*
4055 * Process either a C++ file or a C file depending on the setting
4056 * of a global flag.
4057 */
4058 static void
4059 default_C_entries (inf)
4060 FILE *inf;
4061 {
4062 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4063 }
4064
4065 /* Always do plain C. */
4066 static void
4067 plain_C_entries (inf)
4068 FILE *inf;
4069 {
4070 C_entries (0, inf);
4071 }
4072
4073 /* Always do C++. */
4074 static void
4075 Cplusplus_entries (inf)
4076 FILE *inf;
4077 {
4078 C_entries (C_PLPL, inf);
4079 }
4080
4081 /* Always do Java. */
4082 static void
4083 Cjava_entries (inf)
4084 FILE *inf;
4085 {
4086 C_entries (C_JAVA, inf);
4087 }
4088
4089 /* Always do C*. */
4090 static void
4091 Cstar_entries (inf)
4092 FILE *inf;
4093 {
4094 C_entries (C_STAR, inf);
4095 }
4096
4097 /* Always do Yacc. */
4098 static void
4099 Yacc_entries (inf)
4100 FILE *inf;
4101 {
4102 C_entries (YACC, inf);
4103 }
4104
4105 \f
4106 /* Useful macros. */
4107 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4108 for (; /* loop initialization */ \
4109 !feof (file_pointer) /* loop test */ \
4110 && /* instructions at start of loop */ \
4111 (readline (&line_buffer, file_pointer), \
4112 char_pointer = line_buffer.buffer, \
4113 TRUE); \
4114 )
4115 #define LOOKING_AT(cp, keyword) /* keyword is a constant string */ \
4116 (strneq ((cp), keyword, sizeof(keyword)-1) /* cp points at keyword */ \
4117 && notinname ((cp)[sizeof(keyword)-1]) /* end of keyword */ \
4118 && ((cp) = skip_spaces((cp)+sizeof(keyword)-1))) /* skip spaces */
4119
4120 /*
4121 * Read a file, but do no processing. This is used to do regexp
4122 * matching on files that have no language defined.
4123 */
4124 static void
4125 just_read_file (inf)
4126 FILE *inf;
4127 {
4128 register char *dummy;
4129
4130 LOOP_ON_INPUT_LINES (inf, lb, dummy)
4131 continue;
4132 }
4133
4134 \f
4135 /* Fortran parsing */
4136
4137 static void F_takeprec __P((void));
4138 static void F_getit __P((FILE *));
4139
4140 static void
4141 F_takeprec ()
4142 {
4143 dbp = skip_spaces (dbp);
4144 if (*dbp != '*')
4145 return;
4146 dbp++;
4147 dbp = skip_spaces (dbp);
4148 if (strneq (dbp, "(*)", 3))
4149 {
4150 dbp += 3;
4151 return;
4152 }
4153 if (!ISDIGIT (*dbp))
4154 {
4155 --dbp; /* force failure */
4156 return;
4157 }
4158 do
4159 dbp++;
4160 while (ISDIGIT (*dbp));
4161 }
4162
4163 static void
4164 F_getit (inf)
4165 FILE *inf;
4166 {
4167 register char *cp;
4168
4169 dbp = skip_spaces (dbp);
4170 if (*dbp == '\0')
4171 {
4172 readline (&lb, inf);
4173 dbp = lb.buffer;
4174 if (dbp[5] != '&')
4175 return;
4176 dbp += 6;
4177 dbp = skip_spaces (dbp);
4178 }
4179 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4180 return;
4181 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4182 continue;
4183 make_tag (dbp, cp-dbp, TRUE,
4184 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4185 }
4186
4187
4188 static void
4189 Fortran_functions (inf)
4190 FILE *inf;
4191 {
4192 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4193 {
4194 if (*dbp == '%')
4195 dbp++; /* Ratfor escape to fortran */
4196 dbp = skip_spaces (dbp);
4197 if (*dbp == '\0')
4198 continue;
4199 switch (lowcase (*dbp))
4200 {
4201 case 'i':
4202 if (nocase_tail ("integer"))
4203 F_takeprec ();
4204 break;
4205 case 'r':
4206 if (nocase_tail ("real"))
4207 F_takeprec ();
4208 break;
4209 case 'l':
4210 if (nocase_tail ("logical"))
4211 F_takeprec ();
4212 break;
4213 case 'c':
4214 if (nocase_tail ("complex") || nocase_tail ("character"))
4215 F_takeprec ();
4216 break;
4217 case 'd':
4218 if (nocase_tail ("double"))
4219 {
4220 dbp = skip_spaces (dbp);
4221 if (*dbp == '\0')
4222 continue;
4223 if (nocase_tail ("precision"))
4224 break;
4225 continue;
4226 }
4227 break;
4228 }
4229 dbp = skip_spaces (dbp);
4230 if (*dbp == '\0')
4231 continue;
4232 switch (lowcase (*dbp))
4233 {
4234 case 'f':
4235 if (nocase_tail ("function"))
4236 F_getit (inf);
4237 continue;
4238 case 's':
4239 if (nocase_tail ("subroutine"))
4240 F_getit (inf);
4241 continue;
4242 case 'e':
4243 if (nocase_tail ("entry"))
4244 F_getit (inf);
4245 continue;
4246 case 'b':
4247 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4248 {
4249 dbp = skip_spaces (dbp);
4250 if (*dbp == '\0') /* assume un-named */
4251 make_tag ("blockdata", 9, TRUE,
4252 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4253 else
4254 F_getit (inf); /* look for name */
4255 }
4256 continue;
4257 }
4258 }
4259 }
4260
4261 \f
4262 /*
4263 * Ada parsing
4264 * Original code by
4265 * Philippe Waroquiers (1998)
4266 */
4267
4268 static void Ada_getit __P((FILE *, char *));
4269
4270 /* Once we are positioned after an "interesting" keyword, let's get
4271 the real tag value necessary. */
4272 static void
4273 Ada_getit (inf, name_qualifier)
4274 FILE *inf;
4275 char *name_qualifier;
4276 {
4277 register char *cp;
4278 char *name;
4279 char c;
4280
4281 while (!feof (inf))
4282 {
4283 dbp = skip_spaces (dbp);
4284 if (*dbp == '\0'
4285 || (dbp[0] == '-' && dbp[1] == '-'))
4286 {
4287 readline (&lb, inf);
4288 dbp = lb.buffer;
4289 }
4290 switch (lowcase(*dbp))
4291 {
4292 case 'b':
4293 if (nocase_tail ("body"))
4294 {
4295 /* Skipping body of procedure body or package body or ....
4296 resetting qualifier to body instead of spec. */
4297 name_qualifier = "/b";
4298 continue;
4299 }
4300 break;
4301 case 't':
4302 /* Skipping type of task type or protected type ... */
4303 if (nocase_tail ("type"))
4304 continue;
4305 break;
4306 }
4307 if (*dbp == '"')
4308 {
4309 dbp += 1;
4310 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4311 continue;
4312 }
4313 else
4314 {
4315 dbp = skip_spaces (dbp);
4316 for (cp = dbp;
4317 (*cp != '\0'
4318 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4319 cp++)
4320 continue;
4321 if (cp == dbp)
4322 return;
4323 }
4324 c = *cp;
4325 *cp = '\0';
4326 name = concat (dbp, name_qualifier, "");
4327 *cp = c;
4328 make_tag (name, strlen (name), TRUE,
4329 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4330 free (name);
4331 if (c == '"')
4332 dbp = cp + 1;
4333 return;
4334 }
4335 }
4336
4337 static void
4338 Ada_funcs (inf)
4339 FILE *inf;
4340 {
4341 bool inquote = FALSE;
4342 bool skip_till_semicolumn = FALSE;
4343
4344 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4345 {
4346 while (*dbp != '\0')
4347 {
4348 /* Skip a string i.e. "abcd". */
4349 if (inquote || (*dbp == '"'))
4350 {
4351 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4352 if (dbp != NULL)
4353 {
4354 inquote = FALSE;
4355 dbp += 1;
4356 continue; /* advance char */
4357 }
4358 else
4359 {
4360 inquote = TRUE;
4361 break; /* advance line */
4362 }
4363 }
4364
4365 /* Skip comments. */
4366 if (dbp[0] == '-' && dbp[1] == '-')
4367 break; /* advance line */
4368
4369 /* Skip character enclosed in single quote i.e. 'a'
4370 and skip single quote starting an attribute i.e. 'Image. */
4371 if (*dbp == '\'')
4372 {
4373 dbp++ ;
4374 if (*dbp != '\0')
4375 dbp++;
4376 continue;
4377 }
4378
4379 if (skip_till_semicolumn)
4380 {
4381 if (*dbp == ';')
4382 skip_till_semicolumn = FALSE;
4383 dbp++;
4384 continue; /* advance char */
4385 }
4386
4387 /* Search for beginning of a token. */
4388 if (!begtoken (*dbp))
4389 {
4390 dbp++;
4391 continue; /* advance char */
4392 }
4393
4394 /* We are at the beginning of a token. */
4395 switch (lowcase(*dbp))
4396 {
4397 case 'f':
4398 if (!packages_only && nocase_tail ("function"))
4399 Ada_getit (inf, "/f");
4400 else
4401 break; /* from switch */
4402 continue; /* advance char */
4403 case 'p':
4404 if (!packages_only && nocase_tail ("procedure"))
4405 Ada_getit (inf, "/p");
4406 else if (nocase_tail ("package"))
4407 Ada_getit (inf, "/s");
4408 else if (nocase_tail ("protected")) /* protected type */
4409 Ada_getit (inf, "/t");
4410 else
4411 break; /* from switch */
4412 continue; /* advance char */
4413
4414 case 'u':
4415 if (typedefs && !packages_only && nocase_tail ("use"))
4416 {
4417 /* when tagging types, avoid tagging use type Pack.Typename;
4418 for this, we will skip everything till a ; */
4419 skip_till_semicolumn = TRUE;
4420 continue; /* advance char */
4421 }
4422
4423 case 't':
4424 if (!packages_only && nocase_tail ("task"))
4425 Ada_getit (inf, "/k");
4426 else if (typedefs && !packages_only && nocase_tail ("type"))
4427 {
4428 Ada_getit (inf, "/t");
4429 while (*dbp != '\0')
4430 dbp += 1;
4431 }
4432 else
4433 break; /* from switch */
4434 continue; /* advance char */
4435 }
4436
4437 /* Look for the end of the token. */
4438 while (!endtoken (*dbp))
4439 dbp++;
4440
4441 } /* advance char */
4442 } /* advance line */
4443 }
4444
4445 \f
4446 /*
4447 * Unix and microcontroller assembly tag handling
4448 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4449 * Idea by Bob Weiner, Motorola Inc. (1994)
4450 */
4451 static void
4452 Asm_labels (inf)
4453 FILE *inf;
4454 {
4455 register char *cp;
4456
4457 LOOP_ON_INPUT_LINES (inf, lb, cp)
4458 {
4459 /* If first char is alphabetic or one of [_.$], test for colon
4460 following identifier. */
4461 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4462 {
4463 /* Read past label. */
4464 cp++;
4465 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4466 cp++;
4467 if (*cp == ':' || iswhite (*cp))
4468 /* Found end of label, so copy it and add it to the table. */
4469 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4470 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4471 }
4472 }
4473 }
4474
4475 \f
4476 /*
4477 * Perl support
4478 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4479 * Perl variable names: /^(my|local).../
4480 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4481 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4482 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4483 */
4484 static void
4485 Perl_functions (inf)
4486 FILE *inf;
4487 {
4488 char *package = savestr ("main"); /* current package name */
4489 register char *cp;
4490
4491 LOOP_ON_INPUT_LINES (inf, lb, cp)
4492 {
4493 skip_spaces(cp);
4494
4495 if (LOOKING_AT (cp, "package"))
4496 {
4497 free (package);
4498 get_tag (cp, &package);
4499 }
4500 else if (LOOKING_AT (cp, "sub"))
4501 {
4502 char *pos;
4503 char *sp = cp;
4504
4505 while (!notinname (*cp))
4506 cp++;
4507 if (cp == sp)
4508 continue; /* nothing found */
4509 if ((pos = etags_strchr (sp, ':')) != NULL
4510 && pos < cp && pos[1] == ':')
4511 /* The name is already qualified. */
4512 make_tag (sp, cp - sp, TRUE,
4513 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4514 else
4515 /* Qualify it. */
4516 {
4517 char savechar, *name;
4518
4519 savechar = *cp;
4520 *cp = '\0';
4521 name = concat (package, "::", sp);
4522 *cp = savechar;
4523 make_tag (name, strlen(name), TRUE,
4524 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4525 free (name);
4526 }
4527 }
4528 else if (globals) /* only if we are tagging global vars */
4529 {
4530 /* Skip a qualifier, if any. */
4531 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4532 /* After "my" or "local", but before any following paren or space. */
4533 char *varstart = cp;
4534
4535 if (qual /* should this be removed? If yes, how? */
4536 && (*cp == '$' || *cp == '@' || *cp == '%'))
4537 {
4538 varstart += 1;
4539 do
4540 cp++;
4541 while (ISALNUM (*cp) || *cp == '_');
4542 }
4543 else if (qual)
4544 {
4545 /* Should be examining a variable list at this point;
4546 could insist on seeing an open parenthesis. */
4547 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4548 cp++;
4549 }
4550 else
4551 continue;
4552
4553 make_tag (varstart, cp - varstart, FALSE,
4554 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4555 }
4556 }
4557 }
4558
4559
4560 /*
4561 * Python support
4562 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4563 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4564 * More ideas by seb bacon <seb@jamkit.com> (2002)
4565 */
4566 static void
4567 Python_functions (inf)
4568 FILE *inf;
4569 {
4570 register char *cp;
4571
4572 LOOP_ON_INPUT_LINES (inf, lb, cp)
4573 {
4574 cp = skip_spaces (cp);
4575 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4576 {
4577 char *name = cp;
4578 while (!notinname (*cp) && *cp != ':')
4579 cp++;
4580 make_tag (name, cp - name, TRUE,
4581 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4582 }
4583 }
4584 }
4585
4586 \f
4587 /*
4588 * PHP support
4589 * Look for:
4590 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4591 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4592 * - /^[ \t]*define\(\"[^\"]+/
4593 * Only with --members:
4594 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4595 * Idea by Diez B. Roggisch (2001)
4596 */
4597 static void
4598 PHP_functions (inf)
4599 FILE *inf;
4600 {
4601 register char *cp, *name;
4602 bool search_identifier = FALSE;
4603
4604 LOOP_ON_INPUT_LINES (inf, lb, cp)
4605 {
4606 cp = skip_spaces (cp);
4607 name = cp;
4608 if (search_identifier
4609 && *cp != '\0')
4610 {
4611 while (!notinname (*cp))
4612 cp++;
4613 make_tag (name, cp - name, TRUE,
4614 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4615 search_identifier = FALSE;
4616 }
4617 else if (LOOKING_AT (cp, "function"))
4618 {
4619 if(*cp == '&')
4620 cp = skip_spaces (cp+1);
4621 if(*cp != '\0')
4622 {
4623 name = cp;
4624 while (!notinname (*cp))
4625 cp++;
4626 make_tag (name, cp - name, TRUE,
4627 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4628 }
4629 else
4630 search_identifier = TRUE;
4631 }
4632 else if (LOOKING_AT (cp, "class"))
4633 {
4634 if (*cp != '\0')
4635 {
4636 name = cp;
4637 while (*cp != '\0' && !iswhite (*cp))
4638 cp++;
4639 make_tag (name, cp - name, FALSE,
4640 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4641 }
4642 else
4643 search_identifier = TRUE;
4644 }
4645 else if (strneq (cp, "define", 6)
4646 && (cp = skip_spaces (cp+6))
4647 && *cp++ == '('
4648 && (*cp == '"' || *cp == '\''))
4649 {
4650 char quote = *cp++;
4651 name = cp;
4652 while (*cp != quote && *cp != '\0')
4653 cp++;
4654 make_tag (name, cp - name, FALSE,
4655 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4656 }
4657 else if (members
4658 && LOOKING_AT (cp, "var")
4659 && *cp == '$')
4660 {
4661 name = cp;
4662 while (!notinname(*cp))
4663 cp++;
4664 make_tag (name, cp - name, FALSE,
4665 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4666 }
4667 }
4668 }
4669
4670 \f
4671 /*
4672 * Cobol tag functions
4673 * We could look for anything that could be a paragraph name.
4674 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4675 * Idea by Corny de Souza (1993)
4676 */
4677 static void
4678 Cobol_paragraphs (inf)
4679 FILE *inf;
4680 {
4681 register char *bp, *ep;
4682
4683 LOOP_ON_INPUT_LINES (inf, lb, bp)
4684 {
4685 if (lb.len < 9)
4686 continue;
4687 bp += 8;
4688
4689 /* If eoln, compiler option or comment ignore whole line. */
4690 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4691 continue;
4692
4693 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4694 continue;
4695 if (*ep++ == '.')
4696 make_tag (bp, ep - bp, TRUE,
4697 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4698 }
4699 }
4700
4701 \f
4702 /*
4703 * Makefile support
4704 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4705 */
4706 static void
4707 Makefile_targets (inf)
4708 FILE *inf;
4709 {
4710 register char *bp;
4711
4712 LOOP_ON_INPUT_LINES (inf, lb, bp)
4713 {
4714 if (*bp == '\t' || *bp == '#')
4715 continue;
4716 while (*bp != '\0' && *bp != '=' && *bp != ':')
4717 bp++;
4718 if (*bp == ':' || (globals && *bp == '='))
4719 make_tag (lb.buffer, bp - lb.buffer, TRUE,
4720 lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4721 }
4722 }
4723
4724 \f
4725 /*
4726 * Pascal parsing
4727 * Original code by Mosur K. Mohan (1989)
4728 *
4729 * Locates tags for procedures & functions. Doesn't do any type- or
4730 * var-definitions. It does look for the keyword "extern" or
4731 * "forward" immediately following the procedure statement; if found,
4732 * the tag is skipped.
4733 */
4734 static void
4735 Pascal_functions (inf)
4736 FILE *inf;
4737 {
4738 linebuffer tline; /* mostly copied from C_entries */
4739 long save_lcno;
4740 int save_lineno, namelen, taglen;
4741 char c, *name;
4742
4743 bool /* each of these flags is TRUE iff: */
4744 incomment, /* point is inside a comment */
4745 inquote, /* point is inside '..' string */
4746 get_tagname, /* point is after PROCEDURE/FUNCTION
4747 keyword, so next item = potential tag */
4748 found_tag, /* point is after a potential tag */
4749 inparms, /* point is within parameter-list */
4750 verify_tag; /* point has passed the parm-list, so the
4751 next token will determine whether this
4752 is a FORWARD/EXTERN to be ignored, or
4753 whether it is a real tag */
4754
4755 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4756 name = NULL; /* keep compiler quiet */
4757 dbp = lb.buffer;
4758 *dbp = '\0';
4759 linebuffer_init (&tline);
4760
4761 incomment = inquote = FALSE;
4762 found_tag = FALSE; /* have a proc name; check if extern */
4763 get_tagname = FALSE; /* found "procedure" keyword */
4764 inparms = FALSE; /* found '(' after "proc" */
4765 verify_tag = FALSE; /* check if "extern" is ahead */
4766
4767
4768 while (!feof (inf)) /* long main loop to get next char */
4769 {
4770 c = *dbp++;
4771 if (c == '\0') /* if end of line */
4772 {
4773 readline (&lb, inf);
4774 dbp = lb.buffer;
4775 if (*dbp == '\0')
4776 continue;
4777 if (!((found_tag && verify_tag)
4778 || get_tagname))
4779 c = *dbp++; /* only if don't need *dbp pointing
4780 to the beginning of the name of
4781 the procedure or function */
4782 }
4783 if (incomment)
4784 {
4785 if (c == '}') /* within { } comments */
4786 incomment = FALSE;
4787 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4788 {
4789 dbp++;
4790 incomment = FALSE;
4791 }
4792 continue;
4793 }
4794 else if (inquote)
4795 {
4796 if (c == '\'')
4797 inquote = FALSE;
4798 continue;
4799 }
4800 else
4801 switch (c)
4802 {
4803 case '\'':
4804 inquote = TRUE; /* found first quote */
4805 continue;
4806 case '{': /* found open { comment */
4807 incomment = TRUE;
4808 continue;
4809 case '(':
4810 if (*dbp == '*') /* found open (* comment */
4811 {
4812 incomment = TRUE;
4813 dbp++;
4814 }
4815 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4816 inparms = TRUE;
4817 continue;
4818 case ')': /* end of parms list */
4819 if (inparms)
4820 inparms = FALSE;
4821 continue;
4822 case ';':
4823 if (found_tag && !inparms) /* end of proc or fn stmt */
4824 {
4825 verify_tag = TRUE;
4826 break;
4827 }
4828 continue;
4829 }
4830 if (found_tag && verify_tag && (*dbp != ' '))
4831 {
4832 /* Check if this is an "extern" declaration. */
4833 if (*dbp == '\0')
4834 continue;
4835 if (lowcase (*dbp == 'e'))
4836 {
4837 if (nocase_tail ("extern")) /* superfluous, really! */
4838 {
4839 found_tag = FALSE;
4840 verify_tag = FALSE;
4841 }
4842 }
4843 else if (lowcase (*dbp) == 'f')
4844 {
4845 if (nocase_tail ("forward")) /* check for forward reference */
4846 {
4847 found_tag = FALSE;
4848 verify_tag = FALSE;
4849 }
4850 }
4851 if (found_tag && verify_tag) /* not external proc, so make tag */
4852 {
4853 found_tag = FALSE;
4854 verify_tag = FALSE;
4855 make_tag (name, namelen, TRUE,
4856 tline.buffer, taglen, save_lineno, save_lcno);
4857 continue;
4858 }
4859 }
4860 if (get_tagname) /* grab name of proc or fn */
4861 {
4862 char *cp;
4863
4864 if (*dbp == '\0')
4865 continue;
4866
4867 /* Find block name. */
4868 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4869 continue;
4870
4871 /* Save all values for later tagging. */
4872 linebuffer_setlen (&tline, lb.len);
4873 strcpy (tline.buffer, lb.buffer);
4874 save_lineno = lineno;
4875 save_lcno = linecharno;
4876 name = tline.buffer + (dbp - lb.buffer);
4877 namelen = cp - dbp;
4878 taglen = cp - lb.buffer + 1;
4879
4880 dbp = cp; /* set dbp to e-o-token */
4881 get_tagname = FALSE;
4882 found_tag = TRUE;
4883 continue;
4884
4885 /* And proceed to check for "extern". */
4886 }
4887 else if (!incomment && !inquote && !found_tag)
4888 {
4889 /* Check for proc/fn keywords. */
4890 switch (lowcase (c))
4891 {
4892 case 'p':
4893 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4894 get_tagname = TRUE;
4895 continue;
4896 case 'f':
4897 if (nocase_tail ("unction"))
4898 get_tagname = TRUE;
4899 continue;
4900 }
4901 }
4902 } /* while not eof */
4903
4904 free (tline.buffer);
4905 }
4906
4907 \f
4908 /*
4909 * Lisp tag functions
4910 * look for (def or (DEF, quote or QUOTE
4911 */
4912
4913 static void L_getit __P((void));
4914
4915 static void
4916 L_getit ()
4917 {
4918 if (*dbp == '\'') /* Skip prefix quote */
4919 dbp++;
4920 else if (*dbp == '(')
4921 {
4922 dbp++;
4923 /* Try to skip "(quote " */
4924 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4925 /* Ok, then skip "(" before name in (defstruct (foo)) */
4926 dbp = skip_spaces (dbp);
4927 }
4928 get_tag (dbp, NULL);
4929 }
4930
4931 static void
4932 Lisp_functions (inf)
4933 FILE *inf;
4934 {
4935 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4936 {
4937 if (dbp[0] != '(')
4938 continue;
4939
4940 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4941 {
4942 dbp = skip_non_spaces (dbp);
4943 dbp = skip_spaces (dbp);
4944 L_getit ();
4945 }
4946 else
4947 {
4948 /* Check for (foo::defmumble name-defined ... */
4949 do
4950 dbp++;
4951 while (!notinname (*dbp) && *dbp != ':');
4952 if (*dbp == ':')
4953 {
4954 do
4955 dbp++;
4956 while (*dbp == ':');
4957
4958 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4959 {
4960 dbp = skip_non_spaces (dbp);
4961 dbp = skip_spaces (dbp);
4962 L_getit ();
4963 }
4964 }
4965 }
4966 }
4967 }
4968
4969 \f
4970 /*
4971 * Lua script language parsing
4972 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4973 *
4974 * "function" and "local function" are tags if they start at column 1.
4975 */
4976 static void
4977 Lua_functions (inf)
4978 FILE *inf;
4979 {
4980 register char *bp;
4981
4982 LOOP_ON_INPUT_LINES (inf, lb, bp)
4983 {
4984 if (bp[0] != 'f' && bp[0] != 'l')
4985 continue;
4986
4987 LOOKING_AT (bp, "local"); /* skip possible "local" */
4988
4989 if (LOOKING_AT (bp, "function"))
4990 get_tag (bp, NULL);
4991 }
4992 }
4993
4994 \f
4995 /*
4996 * Postscript tag functions
4997 * Just look for lines where the first character is '/'
4998 * Also look at "defineps" for PSWrap
4999 * Ideas by:
5000 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
5001 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5002 */
5003 static void
5004 PS_functions (inf)
5005 FILE *inf;
5006 {
5007 register char *bp, *ep;
5008
5009 LOOP_ON_INPUT_LINES (inf, lb, bp)
5010 {
5011 if (bp[0] == '/')
5012 {
5013 for (ep = bp+1;
5014 *ep != '\0' && *ep != ' ' && *ep != '{';
5015 ep++)
5016 continue;
5017 make_tag (bp, ep - bp, TRUE,
5018 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5019 }
5020 else if (LOOKING_AT (bp, "defineps"))
5021 get_tag (bp, NULL);
5022 }
5023 }
5024
5025 \f
5026 /*
5027 * Scheme tag functions
5028 * look for (def... xyzzy
5029 * (def... (xyzzy
5030 * (def ... ((...(xyzzy ....
5031 * (set! xyzzy
5032 * Original code by Ken Haase (1985?)
5033 */
5034
5035 static void
5036 Scheme_functions (inf)
5037 FILE *inf;
5038 {
5039 register char *bp;
5040
5041 LOOP_ON_INPUT_LINES (inf, lb, bp)
5042 {
5043 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5044 {
5045 bp = skip_non_spaces (bp+4);
5046 /* Skip over open parens and white space */
5047 while (notinname (*bp))
5048 bp++;
5049 get_tag (bp, NULL);
5050 }
5051 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5052 get_tag (bp, NULL);
5053 }
5054 }
5055
5056 \f
5057 /* Find tags in TeX and LaTeX input files. */
5058
5059 /* TEX_toktab is a table of TeX control sequences that define tags.
5060 * Each entry records one such control sequence.
5061 *
5062 * Original code from who knows whom.
5063 * Ideas by:
5064 * Stefan Monnier (2002)
5065 */
5066
5067 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5068
5069 /* Default set of control sequences to put into TEX_toktab.
5070 The value of environment var TEXTAGS is prepended to this. */
5071 static char *TEX_defenv = "\
5072 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5073 :part:appendix:entry:index:def\
5074 :newcommand:renewcommand:newenvironment:renewenvironment";
5075
5076 static void TEX_mode __P((FILE *));
5077 static void TEX_decode_env __P((char *, char *));
5078
5079 static char TEX_esc = '\\';
5080 static char TEX_opgrp = '{';
5081 static char TEX_clgrp = '}';
5082
5083 /*
5084 * TeX/LaTeX scanning loop.
5085 */
5086 static void
5087 TeX_commands (inf)
5088 FILE *inf;
5089 {
5090 char *cp;
5091 linebuffer *key;
5092
5093 /* Select either \ or ! as escape character. */
5094 TEX_mode (inf);
5095
5096 /* Initialize token table once from environment. */
5097 if (TEX_toktab == NULL)
5098 TEX_decode_env ("TEXTAGS", TEX_defenv);
5099
5100 LOOP_ON_INPUT_LINES (inf, lb, cp)
5101 {
5102 /* Look at each TEX keyword in line. */
5103 for (;;)
5104 {
5105 /* Look for a TEX escape. */
5106 while (*cp++ != TEX_esc)
5107 if (cp[-1] == '\0' || cp[-1] == '%')
5108 goto tex_next_line;
5109
5110 for (key = TEX_toktab; key->buffer != NULL; key++)
5111 if (strneq (cp, key->buffer, key->len))
5112 {
5113 register char *p;
5114 int namelen, linelen;
5115 bool opgrp = FALSE;
5116
5117 cp = skip_spaces (cp + key->len);
5118 if (*cp == TEX_opgrp)
5119 {
5120 opgrp = TRUE;
5121 cp++;
5122 }
5123 for (p = cp;
5124 (!iswhite (*p) && *p != '#' &&
5125 *p != TEX_opgrp && *p != TEX_clgrp);
5126 p++)
5127 continue;
5128 namelen = p - cp;
5129 linelen = lb.len;
5130 if (!opgrp || *p == TEX_clgrp)
5131 {
5132 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5133 *p++;
5134 linelen = p - lb.buffer + 1;
5135 }
5136 make_tag (cp, namelen, TRUE,
5137 lb.buffer, linelen, lineno, linecharno);
5138 goto tex_next_line; /* We only tag a line once */
5139 }
5140 }
5141 tex_next_line:
5142 ;
5143 }
5144 }
5145
5146 #define TEX_LESC '\\'
5147 #define TEX_SESC '!'
5148
5149 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5150 chars accordingly. */
5151 static void
5152 TEX_mode (inf)
5153 FILE *inf;
5154 {
5155 int c;
5156
5157 while ((c = getc (inf)) != EOF)
5158 {
5159 /* Skip to next line if we hit the TeX comment char. */
5160 if (c == '%')
5161 while (c != '\n')
5162 c = getc (inf);
5163 else if (c == TEX_LESC || c == TEX_SESC )
5164 break;
5165 }
5166
5167 if (c == TEX_LESC)
5168 {
5169 TEX_esc = TEX_LESC;
5170 TEX_opgrp = '{';
5171 TEX_clgrp = '}';
5172 }
5173 else
5174 {
5175 TEX_esc = TEX_SESC;
5176 TEX_opgrp = '<';
5177 TEX_clgrp = '>';
5178 }
5179 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5180 No attempt is made to correct the situation. */
5181 rewind (inf);
5182 }
5183
5184 /* Read environment and prepend it to the default string.
5185 Build token table. */
5186 static void
5187 TEX_decode_env (evarname, defenv)
5188 char *evarname;
5189 char *defenv;
5190 {
5191 register char *env, *p;
5192 int i, len;
5193
5194 /* Append default string to environment. */
5195 env = getenv (evarname);
5196 if (!env)
5197 env = defenv;
5198 else
5199 {
5200 char *oldenv = env;
5201 env = concat (oldenv, defenv, "");
5202 }
5203
5204 /* Allocate a token table */
5205 for (len = 1, p = env; p;)
5206 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5207 len++;
5208 TEX_toktab = xnew (len, linebuffer);
5209
5210 /* Unpack environment string into token table. Be careful about */
5211 /* zero-length strings (leading ':', "::" and trailing ':') */
5212 for (i = 0; *env != '\0';)
5213 {
5214 p = etags_strchr (env, ':');
5215 if (!p) /* End of environment string. */
5216 p = env + strlen (env);
5217 if (p - env > 0)
5218 { /* Only non-zero strings. */
5219 TEX_toktab[i].buffer = savenstr (env, p - env);
5220 TEX_toktab[i].len = p - env;
5221 i++;
5222 }
5223 if (*p)
5224 env = p + 1;
5225 else
5226 {
5227 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5228 TEX_toktab[i].len = 0;
5229 break;
5230 }
5231 }
5232 }
5233
5234 \f
5235 /* Texinfo support. Dave Love, Mar. 2000. */
5236 static void
5237 Texinfo_nodes (inf)
5238 FILE * inf;
5239 {
5240 char *cp, *start;
5241 LOOP_ON_INPUT_LINES (inf, lb, cp)
5242 if (LOOKING_AT (cp, "@node"))
5243 {
5244 start = cp;
5245 while (*cp != '\0' && *cp != ',')
5246 cp++;
5247 make_tag (start, cp - start, TRUE,
5248 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5249 }
5250 }
5251
5252 \f
5253 /* Similar to LOOKING_AT but does not use notinname, does not skip */
5254 #define LOOKING_AT_NOCASE(cp, kw) /* kw is a constant string */ \
5255 (strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
5256 && ((cp) += sizeof(kw)-1)) /* skip spaces */
5257
5258 /*
5259 * HTML support.
5260 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5261 * Contents of <a name=xxx> are tags with name xxx.
5262 *
5263 * Francesco Potortì, 2002.
5264 */
5265 static void
5266 HTML_labels (inf)
5267 FILE * inf;
5268 {
5269 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5270 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5271 bool intag = FALSE; /* inside an html tag, looking for ID= */
5272 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5273 char *end;
5274
5275
5276 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5277
5278 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5279 for (;;) /* loop on the same line */
5280 {
5281 if (skiptag) /* skip HTML tag */
5282 {
5283 while (*dbp != '\0' && *dbp != '>')
5284 dbp++;
5285 if (*dbp == '>')
5286 {
5287 dbp += 1;
5288 skiptag = FALSE;
5289 continue; /* look on the same line */
5290 }
5291 break; /* go to next line */
5292 }
5293
5294 else if (intag) /* look for "name=" or "id=" */
5295 {
5296 while (*dbp != '\0' && *dbp != '>'
5297 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5298 dbp++;
5299 if (*dbp == '\0')
5300 break; /* go to next line */
5301 if (*dbp == '>')
5302 {
5303 dbp += 1;
5304 intag = FALSE;
5305 continue; /* look on the same line */
5306 }
5307 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5308 || LOOKING_AT_NOCASE (dbp, "id="))
5309 {
5310 bool quoted = (dbp[0] == '"');
5311
5312 if (quoted)
5313 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5314 continue;
5315 else
5316 for (end = dbp; *end != '\0' && intoken (*end); end++)
5317 continue;
5318 linebuffer_setlen (&token_name, end - dbp);
5319 strncpy (token_name.buffer, dbp, end - dbp);
5320 token_name.buffer[end - dbp] = '\0';
5321
5322 dbp = end;
5323 intag = FALSE; /* we found what we looked for */
5324 skiptag = TRUE; /* skip to the end of the tag */
5325 getnext = TRUE; /* then grab the text */
5326 continue; /* look on the same line */
5327 }
5328 dbp += 1;
5329 }
5330
5331 else if (getnext) /* grab next tokens and tag them */
5332 {
5333 dbp = skip_spaces (dbp);
5334 if (*dbp == '\0')
5335 break; /* go to next line */
5336 if (*dbp == '<')
5337 {
5338 intag = TRUE;
5339 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5340 continue; /* look on the same line */
5341 }
5342
5343 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5344 continue;
5345 make_tag (token_name.buffer, token_name.len, TRUE,
5346 dbp, end - dbp, lineno, linecharno);
5347 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5348 getnext = FALSE;
5349 break; /* go to next line */
5350 }
5351
5352 else /* look for an interesting HTML tag */
5353 {
5354 while (*dbp != '\0' && *dbp != '<')
5355 dbp++;
5356 if (*dbp == '\0')
5357 break; /* go to next line */
5358 intag = TRUE;
5359 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5360 {
5361 inanchor = TRUE;
5362 continue; /* look on the same line */
5363 }
5364 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5365 || LOOKING_AT_NOCASE (dbp, "<h1>")
5366 || LOOKING_AT_NOCASE (dbp, "<h2>")
5367 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5368 {
5369 intag = FALSE;
5370 getnext = TRUE;
5371 continue; /* look on the same line */
5372 }
5373 dbp += 1;
5374 }
5375 }
5376 }
5377
5378 \f
5379 /*
5380 * Prolog support
5381 *
5382 * Assumes that the predicate or rule starts at column 0.
5383 * Only the first clause of a predicate or rule is added.
5384 * Original code by Sunichirou Sugou (1989)
5385 * Rewritten by Anders Lindgren (1996)
5386 */
5387 static int prolog_pr __P((char *, char *));
5388 static void prolog_skip_comment __P((linebuffer *, FILE *));
5389 static int prolog_atom __P((char *, int));
5390
5391 static void
5392 Prolog_functions (inf)
5393 FILE *inf;
5394 {
5395 char *cp, *last;
5396 int len;
5397 int allocated;
5398
5399 allocated = 0;
5400 len = 0;
5401 last = NULL;
5402
5403 LOOP_ON_INPUT_LINES (inf, lb, cp)
5404 {
5405 if (cp[0] == '\0') /* Empty line */
5406 continue;
5407 else if (iswhite (cp[0])) /* Not a predicate */
5408 continue;
5409 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5410 prolog_skip_comment (&lb, inf);
5411 else if ((len = prolog_pr (cp, last)) > 0)
5412 {
5413 /* Predicate or rule. Store the function name so that we
5414 only generate a tag for the first clause. */
5415 if (last == NULL)
5416 last = xnew(len + 1, char);
5417 else if (len + 1 > allocated)
5418 xrnew (last, len + 1, char);
5419 allocated = len + 1;
5420 strncpy (last, cp, len);
5421 last[len] = '\0';
5422 }
5423 }
5424 }
5425
5426
5427 static void
5428 prolog_skip_comment (plb, inf)
5429 linebuffer *plb;
5430 FILE *inf;
5431 {
5432 char *cp;
5433
5434 do
5435 {
5436 for (cp = plb->buffer; *cp != '\0'; cp++)
5437 if (cp[0] == '*' && cp[1] == '/')
5438 return;
5439 readline (plb, inf);
5440 }
5441 while (!feof(inf));
5442 }
5443
5444 /*
5445 * A predicate or rule definition is added if it matches:
5446 * <beginning of line><Prolog Atom><whitespace>(
5447 * or <beginning of line><Prolog Atom><whitespace>:-
5448 *
5449 * It is added to the tags database if it doesn't match the
5450 * name of the previous clause header.
5451 *
5452 * Return the size of the name of the predicate or rule, or 0 if no
5453 * header was found.
5454 */
5455 static int
5456 prolog_pr (s, last)
5457 char *s;
5458 char *last; /* Name of last clause. */
5459 {
5460 int pos;
5461 int len;
5462
5463 pos = prolog_atom (s, 0);
5464 if (pos < 1)
5465 return 0;
5466
5467 len = pos;
5468 pos = skip_spaces (s + pos) - s;
5469
5470 if ((s[pos] == '.'
5471 || (s[pos] == '(' && (pos += 1))
5472 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5473 && (last == NULL /* save only the first clause */
5474 || len != strlen (last)
5475 || !strneq (s, last, len)))
5476 {
5477 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5478 return len;
5479 }
5480 else
5481 return 0;
5482 }
5483
5484 /*
5485 * Consume a Prolog atom.
5486 * Return the number of bytes consumed, or -1 if there was an error.
5487 *
5488 * A prolog atom, in this context, could be one of:
5489 * - An alphanumeric sequence, starting with a lower case letter.
5490 * - A quoted arbitrary string. Single quotes can escape themselves.
5491 * Backslash quotes everything.
5492 */
5493 static int
5494 prolog_atom (s, pos)
5495 char *s;
5496 int pos;
5497 {
5498 int origpos;
5499
5500 origpos = pos;
5501
5502 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5503 {
5504 /* The atom is unquoted. */
5505 pos++;
5506 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5507 {
5508 pos++;
5509 }
5510 return pos - origpos;
5511 }
5512 else if (s[pos] == '\'')
5513 {
5514 pos++;
5515
5516 for (;;)
5517 {
5518 if (s[pos] == '\'')
5519 {
5520 pos++;
5521 if (s[pos] != '\'')
5522 break;
5523 pos++; /* A double quote */
5524 }
5525 else if (s[pos] == '\0')
5526 /* Multiline quoted atoms are ignored. */
5527 return -1;
5528 else if (s[pos] == '\\')
5529 {
5530 if (s[pos+1] == '\0')
5531 return -1;
5532 pos += 2;
5533 }
5534 else
5535 pos++;
5536 }
5537 return pos - origpos;
5538 }
5539 else
5540 return -1;
5541 }
5542
5543 \f
5544 /*
5545 * Support for Erlang
5546 *
5547 * Generates tags for functions, defines, and records.
5548 * Assumes that Erlang functions start at column 0.
5549 * Original code by Anders Lindgren (1996)
5550 */
5551 static int erlang_func __P((char *, char *));
5552 static void erlang_attribute __P((char *));
5553 static int erlang_atom __P((char *));
5554
5555 static void
5556 Erlang_functions (inf)
5557 FILE *inf;
5558 {
5559 char *cp, *last;
5560 int len;
5561 int allocated;
5562
5563 allocated = 0;
5564 len = 0;
5565 last = NULL;
5566
5567 LOOP_ON_INPUT_LINES (inf, lb, cp)
5568 {
5569 if (cp[0] == '\0') /* Empty line */
5570 continue;
5571 else if (iswhite (cp[0])) /* Not function nor attribute */
5572 continue;
5573 else if (cp[0] == '%') /* comment */
5574 continue;
5575 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5576 continue;
5577 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5578 {
5579 erlang_attribute (cp);
5580 last = NULL;
5581 }
5582 else if ((len = erlang_func (cp, last)) > 0)
5583 {
5584 /*
5585 * Function. Store the function name so that we only
5586 * generates a tag for the first clause.
5587 */
5588 if (last == NULL)
5589 last = xnew (len + 1, char);
5590 else if (len + 1 > allocated)
5591 xrnew (last, len + 1, char);
5592 allocated = len + 1;
5593 strncpy (last, cp, len);
5594 last[len] = '\0';
5595 }
5596 }
5597 }
5598
5599
5600 /*
5601 * A function definition is added if it matches:
5602 * <beginning of line><Erlang Atom><whitespace>(
5603 *
5604 * It is added to the tags database if it doesn't match the
5605 * name of the previous clause header.
5606 *
5607 * Return the size of the name of the function, or 0 if no function
5608 * was found.
5609 */
5610 static int
5611 erlang_func (s, last)
5612 char *s;
5613 char *last; /* Name of last clause. */
5614 {
5615 int pos;
5616 int len;
5617
5618 pos = erlang_atom (s);
5619 if (pos < 1)
5620 return 0;
5621
5622 len = pos;
5623 pos = skip_spaces (s + pos) - s;
5624
5625 /* Save only the first clause. */
5626 if (s[pos++] == '('
5627 && (last == NULL
5628 || len != (int)strlen (last)
5629 || !strneq (s, last, len)))
5630 {
5631 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5632 return len;
5633 }
5634
5635 return 0;
5636 }
5637
5638
5639 /*
5640 * Handle attributes. Currently, tags are generated for defines
5641 * and records.
5642 *
5643 * They are on the form:
5644 * -define(foo, bar).
5645 * -define(Foo(M, N), M+N).
5646 * -record(graph, {vtab = notable, cyclic = true}).
5647 */
5648 static void
5649 erlang_attribute (s)
5650 char *s;
5651 {
5652 char *cp = s;
5653
5654 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5655 && *cp++ == '(')
5656 {
5657 int len = erlang_atom (skip_spaces (cp));
5658 if (len > 0)
5659 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5660 }
5661 return;
5662 }
5663
5664
5665 /*
5666 * Consume an Erlang atom (or variable).
5667 * Return the number of bytes consumed, or -1 if there was an error.
5668 */
5669 static int
5670 erlang_atom (s)
5671 char *s;
5672 {
5673 int pos = 0;
5674
5675 if (ISALPHA (s[pos]) || s[pos] == '_')
5676 {
5677 /* The atom is unquoted. */
5678 do
5679 pos++;
5680 while (ISALNUM (s[pos]) || s[pos] == '_');
5681 }
5682 else if (s[pos] == '\'')
5683 {
5684 for (pos++; s[pos] != '\''; pos++)
5685 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5686 || (s[pos] == '\\' && s[++pos] == '\0'))
5687 return 0;
5688 pos++;
5689 }
5690
5691 return pos;
5692 }
5693
5694 \f
5695 #ifdef ETAGS_REGEXPS
5696
5697 static char *scan_separators __P((char *));
5698 static void add_regex __P((char *, language *));
5699 static char *substitute __P((char *, char *, struct re_registers *));
5700
5701 /*
5702 * Take a string like "/blah/" and turn it into "blah", verifying
5703 * that the first and last characters are the same, and handling
5704 * quoted separator characters. Actually, stops on the occurrence of
5705 * an unquoted separator. Also process \t, \n, etc. and turn into
5706 * appropriate characters. Works in place. Null terminates name string.
5707 * Returns pointer to terminating separator, or NULL for
5708 * unterminated regexps.
5709 */
5710 static char *
5711 scan_separators (name)
5712 char *name;
5713 {
5714 char sep = name[0];
5715 char *copyto = name;
5716 bool quoted = FALSE;
5717
5718 for (++name; *name != '\0'; ++name)
5719 {
5720 if (quoted)
5721 {
5722 switch (*name)
5723 {
5724 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5725 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5726 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5727 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5728 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5729 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5730 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5731 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5732 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5733 default:
5734 if (*name == sep)
5735 *copyto++ = sep;
5736 else
5737 {
5738 /* Something else is quoted, so preserve the quote. */
5739 *copyto++ = '\\';
5740 *copyto++ = *name;
5741 }
5742 break;
5743 }
5744 quoted = FALSE;
5745 }
5746 else if (*name == '\\')
5747 quoted = TRUE;
5748 else if (*name == sep)
5749 break;
5750 else
5751 *copyto++ = *name;
5752 }
5753 if (*name != sep)
5754 name = NULL; /* signal unterminated regexp */
5755
5756 /* Terminate copied string. */
5757 *copyto = '\0';
5758 return name;
5759 }
5760
5761 /* Look at the argument of --regex or --no-regex and do the right
5762 thing. Same for each line of a regexp file. */
5763 static void
5764 analyse_regex (regex_arg)
5765 char *regex_arg;
5766 {
5767 if (regex_arg == NULL)
5768 {
5769 free_regexps (); /* --no-regex: remove existing regexps */
5770 return;
5771 }
5772
5773 /* A real --regexp option or a line in a regexp file. */
5774 switch (regex_arg[0])
5775 {
5776 /* Comments in regexp file or null arg to --regex. */
5777 case '\0':
5778 case ' ':
5779 case '\t':
5780 break;
5781
5782 /* Read a regex file. This is recursive and may result in a
5783 loop, which will stop when the file descriptors are exhausted. */
5784 case '@':
5785 {
5786 FILE *regexfp;
5787 linebuffer regexbuf;
5788 char *regexfile = regex_arg + 1;
5789
5790 /* regexfile is a file containing regexps, one per line. */
5791 regexfp = fopen (regexfile, "r");
5792 if (regexfp == NULL)
5793 {
5794 pfatal (regexfile);
5795 return;
5796 }
5797 linebuffer_init (&regexbuf);
5798 while (readline_internal (&regexbuf, regexfp) > 0)
5799 analyse_regex (regexbuf.buffer);
5800 free (regexbuf.buffer);
5801 fclose (regexfp);
5802 }
5803 break;
5804
5805 /* Regexp to be used for a specific language only. */
5806 case '{':
5807 {
5808 language *lang;
5809 char *lang_name = regex_arg + 1;
5810 char *cp;
5811
5812 for (cp = lang_name; *cp != '}'; cp++)
5813 if (*cp == '\0')
5814 {
5815 error ("unterminated language name in regex: %s", regex_arg);
5816 return;
5817 }
5818 *cp++ = '\0';
5819 lang = get_language_from_langname (lang_name);
5820 if (lang == NULL)
5821 return;
5822 add_regex (cp, lang);
5823 }
5824 break;
5825
5826 /* Regexp to be used for any language. */
5827 default:
5828 add_regex (regex_arg, NULL);
5829 break;
5830 }
5831 }
5832
5833 /* Separate the regexp pattern, compile it,
5834 and care for optional name and modifiers. */
5835 static void
5836 add_regex (regexp_pattern, lang)
5837 char *regexp_pattern;
5838 language *lang;
5839 {
5840 static struct re_pattern_buffer zeropattern;
5841 char sep, *pat, *name, *modifiers;
5842 const char *err;
5843 struct re_pattern_buffer *patbuf;
5844 regexp *rp;
5845 bool
5846 force_explicit_name = TRUE, /* do not use implicit tag names */
5847 ignore_case = FALSE, /* case is significant */
5848 multi_line = FALSE, /* matches are done one line at a time */
5849 single_line = FALSE; /* dot does not match newline */
5850
5851
5852 if (strlen(regexp_pattern) < 3)
5853 {
5854 error ("null regexp", (char *)NULL);
5855 return;
5856 }
5857 sep = regexp_pattern[0];
5858 name = scan_separators (regexp_pattern);
5859 if (name == NULL)
5860 {
5861 error ("%s: unterminated regexp", regexp_pattern);
5862 return;
5863 }
5864 if (name[1] == sep)
5865 {
5866 error ("null name for regexp \"%s\"", regexp_pattern);
5867 return;
5868 }
5869 modifiers = scan_separators (name);
5870 if (modifiers == NULL) /* no terminating separator --> no name */
5871 {
5872 modifiers = name;
5873 name = "";
5874 }
5875 else
5876 modifiers += 1; /* skip separator */
5877
5878 /* Parse regex modifiers. */
5879 for (; modifiers[0] != '\0'; modifiers++)
5880 switch (modifiers[0])
5881 {
5882 case 'N':
5883 if (modifiers == name)
5884 error ("forcing explicit tag name but no name, ignoring", NULL);
5885 force_explicit_name = TRUE;
5886 break;
5887 case 'i':
5888 ignore_case = TRUE;
5889 break;
5890 case 's':
5891 single_line = TRUE;
5892 /* FALLTHRU */
5893 case 'm':
5894 multi_line = TRUE;
5895 need_filebuf = TRUE;
5896 break;
5897 default:
5898 {
5899 char wrongmod [2];
5900 wrongmod[0] = modifiers[0];
5901 wrongmod[1] = '\0';
5902 error ("invalid regexp modifier `%s', ignoring", wrongmod);
5903 }
5904 break;
5905 }
5906
5907 patbuf = xnew (1, struct re_pattern_buffer);
5908 *patbuf = zeropattern;
5909 if (ignore_case)
5910 {
5911 static char lc_trans[CHARS];
5912 int i;
5913 for (i = 0; i < CHARS; i++)
5914 lc_trans[i] = lowcase (i);
5915 patbuf->translate = lc_trans; /* translation table to fold case */
5916 }
5917
5918 if (multi_line)
5919 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5920 else
5921 pat = regexp_pattern;
5922
5923 if (single_line)
5924 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5925 else
5926 re_set_syntax (RE_SYNTAX_EMACS);
5927
5928 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5929 if (multi_line)
5930 free (pat);
5931 if (err != NULL)
5932 {
5933 error ("%s while compiling pattern", err);
5934 return;
5935 }
5936
5937 rp = p_head;
5938 p_head = xnew (1, regexp);
5939 p_head->pattern = savestr (regexp_pattern);
5940 p_head->p_next = rp;
5941 p_head->lang = lang;
5942 p_head->pat = patbuf;
5943 p_head->name = savestr (name);
5944 p_head->error_signaled = FALSE;
5945 p_head->force_explicit_name = force_explicit_name;
5946 p_head->ignore_case = ignore_case;
5947 p_head->multi_line = multi_line;
5948 }
5949
5950 /*
5951 * Do the substitutions indicated by the regular expression and
5952 * arguments.
5953 */
5954 static char *
5955 substitute (in, out, regs)
5956 char *in, *out;
5957 struct re_registers *regs;
5958 {
5959 char *result, *t;
5960 int size, dig, diglen;
5961
5962 result = NULL;
5963 size = strlen (out);
5964
5965 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5966 if (out[size - 1] == '\\')
5967 fatal ("pattern error in \"%s\"", out);
5968 for (t = etags_strchr (out, '\\');
5969 t != NULL;
5970 t = etags_strchr (t + 2, '\\'))
5971 if (ISDIGIT (t[1]))
5972 {
5973 dig = t[1] - '0';
5974 diglen = regs->end[dig] - regs->start[dig];
5975 size += diglen - 2;
5976 }
5977 else
5978 size -= 1;
5979
5980 /* Allocate space and do the substitutions. */
5981 assert (size >= 0);
5982 result = xnew (size + 1, char);
5983
5984 for (t = result; *out != '\0'; out++)
5985 if (*out == '\\' && ISDIGIT (*++out))
5986 {
5987 dig = *out - '0';
5988 diglen = regs->end[dig] - regs->start[dig];
5989 strncpy (t, in + regs->start[dig], diglen);
5990 t += diglen;
5991 }
5992 else
5993 *t++ = *out;
5994 *t = '\0';
5995
5996 assert (t <= result + size);
5997 assert (t - result == (int)strlen (result));
5998
5999 return result;
6000 }
6001
6002 /* Deallocate all regexps. */
6003 static void
6004 free_regexps ()
6005 {
6006 regexp *rp;
6007 while (p_head != NULL)
6008 {
6009 rp = p_head->p_next;
6010 free (p_head->pattern);
6011 free (p_head->name);
6012 free (p_head);
6013 p_head = rp;
6014 }
6015 return;
6016 }
6017
6018 /*
6019 * Reads the whole file as a single string from `filebuf' and looks for
6020 * multi-line regular expressions, creating tags on matches.
6021 * readline already dealt with normal regexps.
6022 *
6023 * Idea by Ben Wing <ben@666.com> (2002).
6024 */
6025 static void
6026 regex_tag_multiline ()
6027 {
6028 char *buffer = filebuf.buffer;
6029 regexp *rp;
6030 char *name;
6031
6032 for (rp = p_head; rp != NULL; rp = rp->p_next)
6033 {
6034 int match = 0;
6035
6036 if (!rp->multi_line)
6037 continue; /* skip normal regexps */
6038
6039 /* Generic initialisations before parsing file from memory. */
6040 lineno = 1; /* reset global line number */
6041 charno = 0; /* reset global char number */
6042 linecharno = 0; /* reset global char number of line start */
6043
6044 /* Only use generic regexps or those for the current language. */
6045 if (rp->lang != NULL && rp->lang != curfdp->lang)
6046 continue;
6047
6048 while (match >= 0 && match < filebuf.len)
6049 {
6050 match = re_search (rp->pat, buffer, filebuf.len, charno,
6051 filebuf.len - match, &rp->regs);
6052 switch (match)
6053 {
6054 case -2:
6055 /* Some error. */
6056 if (!rp->error_signaled)
6057 {
6058 error ("regexp stack overflow while matching \"%s\"",
6059 rp->pattern);
6060 rp->error_signaled = TRUE;
6061 }
6062 break;
6063 case -1:
6064 /* No match. */
6065 break;
6066 default:
6067 if (match == rp->regs.end[0])
6068 {
6069 if (!rp->error_signaled)
6070 {
6071 error ("regexp matches the empty string: \"%s\"",
6072 rp->pattern);
6073 rp->error_signaled = TRUE;
6074 }
6075 match = -3; /* exit from while loop */
6076 break;
6077 }
6078
6079 /* Match occurred. Construct a tag. */
6080 while (charno < rp->regs.end[0])
6081 if (buffer[charno++] == '\n')
6082 lineno++, linecharno = charno;
6083 name = rp->name;
6084 if (name[0] == '\0')
6085 name = NULL;
6086 else /* make a named tag */
6087 name = substitute (buffer, rp->name, &rp->regs);
6088 if (rp->force_explicit_name)
6089 /* Force explicit tag name, if a name is there. */
6090 pfnote (name, TRUE, buffer + linecharno,
6091 charno - linecharno + 1, lineno, linecharno);
6092 else
6093 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6094 charno - linecharno + 1, lineno, linecharno);
6095 break;
6096 }
6097 }
6098 }
6099 }
6100
6101 #endif /* ETAGS_REGEXPS */
6102
6103 \f
6104 static bool
6105 nocase_tail (cp)
6106 char *cp;
6107 {
6108 register int len = 0;
6109
6110 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6111 cp++, len++;
6112 if (*cp == '\0' && !intoken (dbp[len]))
6113 {
6114 dbp += len;
6115 return TRUE;
6116 }
6117 return FALSE;
6118 }
6119
6120 static void
6121 get_tag (bp, namepp)
6122 register char *bp;
6123 char **namepp;
6124 {
6125 register char *cp = bp;
6126
6127 if (*bp != '\0')
6128 {
6129 /* Go till you get to white space or a syntactic break */
6130 for (cp = bp + 1; !notinname (*cp); cp++)
6131 continue;
6132 make_tag (bp, cp - bp, TRUE,
6133 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6134 }
6135
6136 if (namepp != NULL)
6137 *namepp = savenstr (bp, cp - bp);
6138 }
6139
6140 /*
6141 * Read a line of text from `stream' into `lbp', excluding the
6142 * newline or CR-NL, if any. Return the number of characters read from
6143 * `stream', which is the length of the line including the newline.
6144 *
6145 * On DOS or Windows we do not count the CR character, if any before the
6146 * NL, in the returned length; this mirrors the behavior of Emacs on those
6147 * platforms (for text files, it translates CR-NL to NL as it reads in the
6148 * file).
6149 *
6150 * If multi-line regular expressions are requested, each line read is
6151 * appended to `filebuf'.
6152 */
6153 static long
6154 readline_internal (lbp, stream)
6155 linebuffer *lbp;
6156 register FILE *stream;
6157 {
6158 char *buffer = lbp->buffer;
6159 register char *p = lbp->buffer;
6160 register char *pend;
6161 int chars_deleted;
6162
6163 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6164
6165 for (;;)
6166 {
6167 register int c = getc (stream);
6168 if (p == pend)
6169 {
6170 /* We're at the end of linebuffer: expand it. */
6171 lbp->size *= 2;
6172 xrnew (buffer, lbp->size, char);
6173 p += buffer - lbp->buffer;
6174 pend = buffer + lbp->size;
6175 lbp->buffer = buffer;
6176 }
6177 if (c == EOF)
6178 {
6179 *p = '\0';
6180 chars_deleted = 0;
6181 break;
6182 }
6183 if (c == '\n')
6184 {
6185 if (p > buffer && p[-1] == '\r')
6186 {
6187 p -= 1;
6188 #ifdef DOS_NT
6189 /* Assume CRLF->LF translation will be performed by Emacs
6190 when loading this file, so CRs won't appear in the buffer.
6191 It would be cleaner to compensate within Emacs;
6192 however, Emacs does not know how many CRs were deleted
6193 before any given point in the file. */
6194 chars_deleted = 1;
6195 #else
6196 chars_deleted = 2;
6197 #endif
6198 }
6199 else
6200 {
6201 chars_deleted = 1;
6202 }
6203 *p = '\0';
6204 break;
6205 }
6206 *p++ = c;
6207 }
6208 lbp->len = p - buffer;
6209
6210 if (need_filebuf /* we need filebuf for multi-line regexps */
6211 && chars_deleted > 0) /* not at EOF */
6212 {
6213 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6214 {
6215 /* Expand filebuf. */
6216 filebuf.size *= 2;
6217 xrnew (filebuf.buffer, filebuf.size, char);
6218 }
6219 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6220 filebuf.len += lbp->len;
6221 filebuf.buffer[filebuf.len++] = '\n';
6222 filebuf.buffer[filebuf.len] = '\0';
6223 }
6224
6225 return lbp->len + chars_deleted;
6226 }
6227
6228 /*
6229 * Like readline_internal, above, but in addition try to match the
6230 * input line against relevant regular expressions and manage #line
6231 * directives.
6232 */
6233 static void
6234 readline (lbp, stream)
6235 linebuffer *lbp;
6236 FILE *stream;
6237 {
6238 long result;
6239
6240 linecharno = charno; /* update global char number of line start */
6241 result = readline_internal (lbp, stream); /* read line */
6242 lineno += 1; /* increment global line number */
6243 charno += result; /* increment global char number */
6244
6245 /* Honour #line directives. */
6246 if (!no_line_directive)
6247 {
6248 static bool discard_until_line_directive;
6249
6250 /* Check whether this is a #line directive. */
6251 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6252 {
6253 int start, lno;
6254
6255 if (DEBUG) start = 0; /* shut up the compiler */
6256 if (sscanf (lbp->buffer, "#line %d \"%n", &lno, &start) == 1)
6257 {
6258 char *endp = lbp->buffer + start;
6259
6260 assert (start > 0);
6261 while ((endp = etags_strchr (endp, '"')) != NULL
6262 && endp[-1] == '\\')
6263 endp++;
6264 if (endp != NULL)
6265 /* Ok, this is a real #line directive. Let's deal with it. */
6266 {
6267 char *taggedabsname; /* absolute name of original file */
6268 char *taggedfname; /* name of original file as given */
6269 char *name; /* temp var */
6270
6271 discard_until_line_directive = FALSE; /* found it */
6272 name = lbp->buffer + start;
6273 *endp = '\0';
6274 canonicalize_filename (name); /* for DOS */
6275 taggedabsname = absolute_filename (name, curfdp->infabsdir);
6276 if (filename_is_absolute (name)
6277 || filename_is_absolute (curfdp->infname))
6278 taggedfname = savestr (taggedabsname);
6279 else
6280 taggedfname = relative_filename (taggedabsname,tagfiledir);
6281
6282 if (streq (curfdp->taggedfname, taggedfname))
6283 /* The #line directive is only a line number change. We
6284 deal with this afterwards. */
6285 free (taggedfname);
6286 else
6287 /* The tags following this #line directive should be
6288 attributed to taggedfname. In order to do this, set
6289 curfdp accordingly. */
6290 {
6291 fdesc *fdp; /* file description pointer */
6292
6293 /* Go look for a file description already set up for the
6294 file indicated in the #line directive. If there is
6295 one, use it from now until the next #line
6296 directive. */
6297 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6298 if (streq (fdp->infname, curfdp->infname)
6299 && streq (fdp->taggedfname, taggedfname))
6300 /* If we remove the second test above (after the &&)
6301 then all entries pertaining to the same file are
6302 coalesced in the tags file. If we use it, then
6303 entries pertaining to the same file but generated
6304 from different files (via #line directives) will
6305 go into separate sections in the tags file. These
6306 alternatives look equivalent. The first one
6307 destroys some apparently useless information. */
6308 {
6309 curfdp = fdp;
6310 free (taggedfname);
6311 break;
6312 }
6313 /* Else, if we already tagged the real file, skip all
6314 input lines until the next #line directive. */
6315 if (fdp == NULL) /* not found */
6316 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6317 if (streq (fdp->infabsname, taggedabsname))
6318 {
6319 discard_until_line_directive = TRUE;
6320 free (taggedfname);
6321 break;
6322 }
6323 /* Else create a new file description and use that from
6324 now on, until the next #line directive. */
6325 if (fdp == NULL) /* not found */
6326 {
6327 fdp = fdhead;
6328 fdhead = xnew (1, fdesc);
6329 *fdhead = *curfdp; /* copy curr. file description */
6330 fdhead->next = fdp;
6331 fdhead->infname = savestr (curfdp->infname);
6332 fdhead->infabsname = savestr (curfdp->infabsname);
6333 fdhead->infabsdir = savestr (curfdp->infabsdir);
6334 fdhead->taggedfname = taggedfname;
6335 fdhead->usecharno = FALSE;
6336 fdhead->prop = NULL;
6337 fdhead->written = FALSE;
6338 curfdp = fdhead;
6339 }
6340 }
6341 free (taggedabsname);
6342 lineno = lno - 1;
6343 readline (lbp, stream);
6344 return;
6345 } /* if a real #line directive */
6346 } /* if #line is followed by a a number */
6347 } /* if line begins with "#line " */
6348
6349 /* If we are here, no #line directive was found. */
6350 if (discard_until_line_directive)
6351 {
6352 if (result > 0)
6353 {
6354 /* Do a tail recursion on ourselves, thus discarding the contents
6355 of the line buffer. */
6356 readline (lbp, stream);
6357 return;
6358 }
6359 /* End of file. */
6360 discard_until_line_directive = FALSE;
6361 return;
6362 }
6363 } /* if #line directives should be considered */
6364
6365 #ifdef ETAGS_REGEXPS
6366 {
6367 int match;
6368 regexp *rp;
6369 char *name;
6370
6371 /* Match against relevant regexps. */
6372 if (lbp->len > 0)
6373 for (rp = p_head; rp != NULL; rp = rp->p_next)
6374 {
6375 /* Only use generic regexps or those for the current language.
6376 Also do not use multiline regexps, which is the job of
6377 regex_tag_multiline. */
6378 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6379 || rp->multi_line)
6380 continue;
6381
6382 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6383 switch (match)
6384 {
6385 case -2:
6386 /* Some error. */
6387 if (!rp->error_signaled)
6388 {
6389 error ("regexp stack overflow while matching \"%s\"",
6390 rp->pattern);
6391 rp->error_signaled = TRUE;
6392 }
6393 break;
6394 case -1:
6395 /* No match. */
6396 break;
6397 case 0:
6398 /* Empty string matched. */
6399 if (!rp->error_signaled)
6400 {
6401 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6402 rp->error_signaled = TRUE;
6403 }
6404 break;
6405 default:
6406 /* Match occurred. Construct a tag. */
6407 name = rp->name;
6408 if (name[0] == '\0')
6409 name = NULL;
6410 else /* make a named tag */
6411 name = substitute (lbp->buffer, rp->name, &rp->regs);
6412 if (rp->force_explicit_name)
6413 /* Force explicit tag name, if a name is there. */
6414 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6415 else
6416 make_tag (name, strlen (name), TRUE,
6417 lbp->buffer, match, lineno, linecharno);
6418 break;
6419 }
6420 }
6421 }
6422 #endif /* ETAGS_REGEXPS */
6423 }
6424
6425 \f
6426 /*
6427 * Return a pointer to a space of size strlen(cp)+1 allocated
6428 * with xnew where the string CP has been copied.
6429 */
6430 static char *
6431 savestr (cp)
6432 char *cp;
6433 {
6434 return savenstr (cp, strlen (cp));
6435 }
6436
6437 /*
6438 * Return a pointer to a space of size LEN+1 allocated with xnew where
6439 * the string CP has been copied for at most the first LEN characters.
6440 */
6441 static char *
6442 savenstr (cp, len)
6443 char *cp;
6444 int len;
6445 {
6446 register char *dp;
6447
6448 dp = xnew (len + 1, char);
6449 strncpy (dp, cp, len);
6450 dp[len] = '\0';
6451 return dp;
6452 }
6453
6454 /*
6455 * Return the ptr in sp at which the character c last
6456 * appears; NULL if not found
6457 *
6458 * Identical to POSIX strrchr, included for portability.
6459 */
6460 static char *
6461 etags_strrchr (sp, c)
6462 register const char *sp;
6463 register int c;
6464 {
6465 register const char *r;
6466
6467 r = NULL;
6468 do
6469 {
6470 if (*sp == c)
6471 r = sp;
6472 } while (*sp++);
6473 return (char *)r;
6474 }
6475
6476 /*
6477 * Return the ptr in sp at which the character c first
6478 * appears; NULL if not found
6479 *
6480 * Identical to POSIX strchr, included for portability.
6481 */
6482 static char *
6483 etags_strchr (sp, c)
6484 register const char *sp;
6485 register int c;
6486 {
6487 do
6488 {
6489 if (*sp == c)
6490 return (char *)sp;
6491 } while (*sp++);
6492 return NULL;
6493 }
6494
6495 /*
6496 * Compare two strings, ignoring case for alphabetic characters.
6497 *
6498 * Same as BSD's strcasecmp, included for portability.
6499 */
6500 static int
6501 etags_strcasecmp (s1, s2)
6502 register const char *s1;
6503 register const char *s2;
6504 {
6505 while (*s1 != '\0'
6506 && (ISALPHA (*s1) && ISALPHA (*s2)
6507 ? lowcase (*s1) == lowcase (*s2)
6508 : *s1 == *s2))
6509 s1++, s2++;
6510
6511 return (ISALPHA (*s1) && ISALPHA (*s2)
6512 ? lowcase (*s1) - lowcase (*s2)
6513 : *s1 - *s2);
6514 }
6515
6516 /*
6517 * Compare two strings, ignoring case for alphabetic characters.
6518 * Stop after a given number of characters
6519 *
6520 * Same as BSD's strncasecmp, included for portability.
6521 */
6522 static int
6523 etags_strncasecmp (s1, s2, n)
6524 register const char *s1;
6525 register const char *s2;
6526 register int n;
6527 {
6528 while (*s1 != '\0' && n-- > 0
6529 && (ISALPHA (*s1) && ISALPHA (*s2)
6530 ? lowcase (*s1) == lowcase (*s2)
6531 : *s1 == *s2))
6532 s1++, s2++;
6533
6534 if (n < 0)
6535 return 0;
6536 else
6537 return (ISALPHA (*s1) && ISALPHA (*s2)
6538 ? lowcase (*s1) - lowcase (*s2)
6539 : *s1 - *s2);
6540 }
6541
6542 /* Skip spaces, return new pointer. */
6543 static char *
6544 skip_spaces (cp)
6545 char *cp;
6546 {
6547 while (iswhite (*cp))
6548 cp++;
6549 return cp;
6550 }
6551
6552 /* Skip non spaces, return new pointer. */
6553 static char *
6554 skip_non_spaces (cp)
6555 char *cp;
6556 {
6557 while (*cp != '\0' && !iswhite (*cp))
6558 cp++;
6559 return cp;
6560 }
6561
6562 /* Print error message and exit. */
6563 void
6564 fatal (s1, s2)
6565 char *s1, *s2;
6566 {
6567 error (s1, s2);
6568 exit (EXIT_FAILURE);
6569 }
6570
6571 static void
6572 pfatal (s1)
6573 char *s1;
6574 {
6575 perror (s1);
6576 exit (EXIT_FAILURE);
6577 }
6578
6579 static void
6580 suggest_asking_for_help ()
6581 {
6582 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6583 progname, LONG_OPTIONS ? "--help" : "-h");
6584 exit (EXIT_FAILURE);
6585 }
6586
6587 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6588 static void
6589 error (s1, s2)
6590 const char *s1, *s2;
6591 {
6592 fprintf (stderr, "%s: ", progname);
6593 fprintf (stderr, s1, s2);
6594 fprintf (stderr, "\n");
6595 }
6596
6597 /* Return a newly-allocated string whose contents
6598 concatenate those of s1, s2, s3. */
6599 static char *
6600 concat (s1, s2, s3)
6601 char *s1, *s2, *s3;
6602 {
6603 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6604 char *result = xnew (len1 + len2 + len3 + 1, char);
6605
6606 strcpy (result, s1);
6607 strcpy (result + len1, s2);
6608 strcpy (result + len1 + len2, s3);
6609 result[len1 + len2 + len3] = '\0';
6610
6611 return result;
6612 }
6613
6614 \f
6615 /* Does the same work as the system V getcwd, but does not need to
6616 guess the buffer size in advance. */
6617 static char *
6618 etags_getcwd ()
6619 {
6620 #ifdef HAVE_GETCWD
6621 int bufsize = 200;
6622 char *path = xnew (bufsize, char);
6623
6624 while (getcwd (path, bufsize) == NULL)
6625 {
6626 if (errno != ERANGE)
6627 pfatal ("getcwd");
6628 bufsize *= 2;
6629 free (path);
6630 path = xnew (bufsize, char);
6631 }
6632
6633 canonicalize_filename (path);
6634 return path;
6635
6636 #else /* not HAVE_GETCWD */
6637 #if MSDOS
6638
6639 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6640
6641 getwd (path);
6642
6643 for (p = path; *p != '\0'; p++)
6644 if (*p == '\\')
6645 *p = '/';
6646 else
6647 *p = lowcase (*p);
6648
6649 return strdup (path);
6650 #else /* not MSDOS */
6651 linebuffer path;
6652 FILE *pipe;
6653
6654 linebuffer_init (&path);
6655 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6656 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6657 pfatal ("pwd");
6658 pclose (pipe);
6659
6660 return path.buffer;
6661 #endif /* not MSDOS */
6662 #endif /* not HAVE_GETCWD */
6663 }
6664
6665 /* Return a newly allocated string containing the file name of FILE
6666 relative to the absolute directory DIR (which should end with a slash). */
6667 static char *
6668 relative_filename (file, dir)
6669 char *file, *dir;
6670 {
6671 char *fp, *dp, *afn, *res;
6672 int i;
6673
6674 /* Find the common root of file and dir (with a trailing slash). */
6675 afn = absolute_filename (file, cwd);
6676 fp = afn;
6677 dp = dir;
6678 while (*fp++ == *dp++)
6679 continue;
6680 fp--, dp--; /* back to the first differing char */
6681 #ifdef DOS_NT
6682 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6683 return afn;
6684 #endif
6685 do /* look at the equal chars until '/' */
6686 fp--, dp--;
6687 while (*fp != '/');
6688
6689 /* Build a sequence of "../" strings for the resulting relative file name. */
6690 i = 0;
6691 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6692 i += 1;
6693 res = xnew (3*i + strlen (fp + 1) + 1, char);
6694 res[0] = '\0';
6695 while (i-- > 0)
6696 strcat (res, "../");
6697
6698 /* Add the file name relative to the common root of file and dir. */
6699 strcat (res, fp + 1);
6700 free (afn);
6701
6702 return res;
6703 }
6704
6705 /* Return a newly allocated string containing the absolute file name
6706 of FILE given DIR (which should end with a slash). */
6707 static char *
6708 absolute_filename (file, dir)
6709 char *file, *dir;
6710 {
6711 char *slashp, *cp, *res;
6712
6713 if (filename_is_absolute (file))
6714 res = savestr (file);
6715 #ifdef DOS_NT
6716 /* We don't support non-absolute file names with a drive
6717 letter, like `d:NAME' (it's too much hassle). */
6718 else if (file[1] == ':')
6719 fatal ("%s: relative file names with drive letters not supported", file);
6720 #endif
6721 else
6722 res = concat (dir, file, "");
6723
6724 /* Delete the "/dirname/.." and "/." substrings. */
6725 slashp = etags_strchr (res, '/');
6726 while (slashp != NULL && slashp[0] != '\0')
6727 {
6728 if (slashp[1] == '.')
6729 {
6730 if (slashp[2] == '.'
6731 && (slashp[3] == '/' || slashp[3] == '\0'))
6732 {
6733 cp = slashp;
6734 do
6735 cp--;
6736 while (cp >= res && !filename_is_absolute (cp));
6737 if (cp < res)
6738 cp = slashp; /* the absolute name begins with "/.." */
6739 #ifdef DOS_NT
6740 /* Under MSDOS and NT we get `d:/NAME' as absolute
6741 file name, so the luser could say `d:/../NAME'.
6742 We silently treat this as `d:/NAME'. */
6743 else if (cp[0] != '/')
6744 cp = slashp;
6745 #endif
6746 strcpy (cp, slashp + 3);
6747 slashp = cp;
6748 continue;
6749 }
6750 else if (slashp[2] == '/' || slashp[2] == '\0')
6751 {
6752 strcpy (slashp, slashp + 2);
6753 continue;
6754 }
6755 }
6756
6757 slashp = etags_strchr (slashp + 1, '/');
6758 }
6759
6760 if (res[0] == '\0')
6761 return savestr ("/");
6762 else
6763 return res;
6764 }
6765
6766 /* Return a newly allocated string containing the absolute
6767 file name of dir where FILE resides given DIR (which should
6768 end with a slash). */
6769 static char *
6770 absolute_dirname (file, dir)
6771 char *file, *dir;
6772 {
6773 char *slashp, *res;
6774 char save;
6775
6776 canonicalize_filename (file);
6777 slashp = etags_strrchr (file, '/');
6778 if (slashp == NULL)
6779 return savestr (dir);
6780 save = slashp[1];
6781 slashp[1] = '\0';
6782 res = absolute_filename (file, dir);
6783 slashp[1] = save;
6784
6785 return res;
6786 }
6787
6788 /* Whether the argument string is an absolute file name. The argument
6789 string must have been canonicalized with canonicalize_filename. */
6790 static bool
6791 filename_is_absolute (fn)
6792 char *fn;
6793 {
6794 return (fn[0] == '/'
6795 #ifdef DOS_NT
6796 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6797 #endif
6798 );
6799 }
6800
6801 /* Translate backslashes into slashes. Works in place. */
6802 static void
6803 canonicalize_filename (fn)
6804 register char *fn;
6805 {
6806 #ifdef DOS_NT
6807 /* Canonicalize drive letter case. */
6808 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6809 fn[0] = upcase (fn[0]);
6810 /* Convert backslashes to slashes. */
6811 for (; *fn != '\0'; fn++)
6812 if (*fn == '\\')
6813 *fn = '/';
6814 #else
6815 /* No action. */
6816 fn = NULL; /* shut up the compiler */
6817 #endif
6818 }
6819
6820 \f
6821 /* Initialize a linebuffer for use */
6822 static void
6823 linebuffer_init (lbp)
6824 linebuffer *lbp;
6825 {
6826 lbp->size = (DEBUG) ? 3 : 200;
6827 lbp->buffer = xnew (lbp->size, char);
6828 lbp->buffer[0] = '\0';
6829 lbp->len = 0;
6830 }
6831
6832 /* Set the minimum size of a string contained in a linebuffer. */
6833 static void
6834 linebuffer_setlen (lbp, toksize)
6835 linebuffer *lbp;
6836 int toksize;
6837 {
6838 while (lbp->size <= toksize)
6839 {
6840 lbp->size *= 2;
6841 xrnew (lbp->buffer, lbp->size, char);
6842 }
6843 lbp->len = toksize;
6844 }
6845
6846 /* Like malloc but get fatal error if memory is exhausted. */
6847 static PTR
6848 xmalloc (size)
6849 unsigned int size;
6850 {
6851 PTR result = (PTR) malloc (size);
6852 if (result == NULL)
6853 fatal ("virtual memory exhausted", (char *)NULL);
6854 return result;
6855 }
6856
6857 static PTR
6858 xrealloc (ptr, size)
6859 char *ptr;
6860 unsigned int size;
6861 {
6862 PTR result = (PTR) realloc (ptr, size);
6863 if (result == NULL)
6864 fatal ("virtual memory exhausted", (char *)NULL);
6865 return result;
6866 }
6867
6868 /*
6869 * Local Variables:
6870 * c-indentation-style: gnu
6871 * indent-tabs-mode: t
6872 * tab-width: 8
6873 * fill-column: 79
6874 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6875 * End:
6876 */
6877
6878 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6879 (do not change this comment) */
6880
6881 /* etags.c ends here */