]> code.delx.au - gnu-emacs/blob - lib-src/etags.c
b5ff33c8b4c9701e130d0cff8a56e453067f80ef
[gnu-emacs] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995,
3 1998, 1999, 2000, 2001, 2002, 2003, 2004,
4 2005, 2006 Free Software Foundation, Inc. and Ken Arnold
5
6 This file is not considered part of GNU Emacs.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software Foundation,
20 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
21
22 /*
23 * Authors:
24 * Ctags originally by Ken Arnold.
25 * Fortran added by Jim Kleckner.
26 * Ed Pelegri-Llopart added C typedefs.
27 * Gnu Emacs TAGS format and modifications by RMS?
28 * 1989 Sam Kendall added C++.
29 * 1992 Joseph B. Wells improved C and C++ parsing.
30 * 1993 Francesco Potortì reorganised C and C++.
31 * 1994 Line-by-line regexp tags by Tom Tromey.
32 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
33 * 2002 #line directives by Francesco Potortì.
34 *
35 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
36 */
37
38 /*
39 * If you want to add support for a new language, start by looking at the LUA
40 * language, which is the simplest. Alternatively, consider shipping a
41 * configuration file containing regexp definitions for etags.
42 */
43
44 char pot_etags_version[] = "@(#) pot revision number is 17.18";
45
46 #define TRUE 1
47 #define FALSE 0
48
49 #ifdef DEBUG
50 # undef DEBUG
51 # define DEBUG TRUE
52 #else
53 # define DEBUG FALSE
54 # define NDEBUG /* disable assert */
55 #endif
56
57 #ifdef HAVE_CONFIG_H
58 # include <config.h>
59 /* On some systems, Emacs defines static as nothing for the sake
60 of unexec. We don't want that here since we don't use unexec. */
61 # undef static
62 # define ETAGS_REGEXPS /* use the regexp features */
63 # define LONG_OPTIONS /* accept long options */
64 # ifndef PTR /* for Xemacs */
65 # define PTR void *
66 # endif
67 # ifndef __P /* for Xemacs */
68 # define __P(args) args
69 # endif
70 #else /* no config.h */
71 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
72 # define __P(args) args /* use prototypes */
73 # define PTR void * /* for generic pointers */
74 # else /* not standard C */
75 # define __P(args) () /* no prototypes */
76 # define const /* remove const for old compilers' sake */
77 # define PTR long * /* don't use void* */
78 # endif
79 #endif /* !HAVE_CONFIG_H */
80
81 #ifndef _GNU_SOURCE
82 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
83 #endif
84
85 #ifdef LONG_OPTIONS
86 # undef LONG_OPTIONS
87 # define LONG_OPTIONS TRUE
88 #else
89 # define LONG_OPTIONS FALSE
90 #endif
91
92 /* WIN32_NATIVE is for Xemacs.
93 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
94 #ifdef WIN32_NATIVE
95 # undef MSDOS
96 # undef WINDOWSNT
97 # define WINDOWSNT
98 #endif /* WIN32_NATIVE */
99
100 #ifdef MSDOS
101 # undef MSDOS
102 # define MSDOS TRUE
103 # include <fcntl.h>
104 # include <sys/param.h>
105 # include <io.h>
106 # ifndef HAVE_CONFIG_H
107 # define DOS_NT
108 # include <sys/config.h>
109 # endif
110 #else
111 # define MSDOS FALSE
112 #endif /* MSDOS */
113
114 #ifdef WINDOWSNT
115 # include <stdlib.h>
116 # include <fcntl.h>
117 # include <string.h>
118 # include <direct.h>
119 # include <io.h>
120 # define MAXPATHLEN _MAX_PATH
121 # undef HAVE_NTGUI
122 # undef DOS_NT
123 # define DOS_NT
124 # ifndef HAVE_GETCWD
125 # define HAVE_GETCWD
126 # endif /* undef HAVE_GETCWD */
127 #else /* not WINDOWSNT */
128 # ifdef STDC_HEADERS
129 # include <stdlib.h>
130 # include <string.h>
131 # else /* no standard C headers */
132 extern char *getenv ();
133 # ifdef VMS
134 # define EXIT_SUCCESS 1
135 # define EXIT_FAILURE 0
136 # else /* no VMS */
137 # define EXIT_SUCCESS 0
138 # define EXIT_FAILURE 1
139 # endif
140 # endif
141 #endif /* !WINDOWSNT */
142
143 #ifdef HAVE_UNISTD_H
144 # include <unistd.h>
145 #else
146 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
147 extern char *getcwd (char *buf, size_t size);
148 # endif
149 #endif /* HAVE_UNISTD_H */
150
151 #include <stdio.h>
152 #include <ctype.h>
153 #include <errno.h>
154 #ifndef errno
155 extern int errno;
156 #endif
157 #include <sys/types.h>
158 #include <sys/stat.h>
159
160 #include <assert.h>
161 #ifdef NDEBUG
162 # undef assert /* some systems have a buggy assert.h */
163 # define assert(x) ((void) 0)
164 #endif
165
166 #if !defined (S_ISREG) && defined (S_IFREG)
167 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
168 #endif
169
170 #if LONG_OPTIONS
171 # include <getopt.h>
172 #else
173 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
174 extern char *optarg;
175 extern int optind, opterr;
176 #endif /* LONG_OPTIONS */
177
178 #ifdef ETAGS_REGEXPS
179 # ifndef HAVE_CONFIG_H /* this is a standalone compilation */
180 # ifdef __CYGWIN__ /* compiling on Cygwin */
181 !!! NOTICE !!!
182 the regex.h distributed with Cygwin is not compatible with etags, alas!
183 If you want regular expression support, you should delete this notice and
184 arrange to use the GNU regex.h and regex.c.
185 # endif
186 # endif
187 # include <regex.h>
188 #endif /* ETAGS_REGEXPS */
189
190 /* Define CTAGS to make the program "ctags" compatible with the usual one.
191 Leave it undefined to make the program "etags", which makes emacs-style
192 tag tables and tags typedefs, #defines and struct/union/enum by default. */
193 #ifdef CTAGS
194 # undef CTAGS
195 # define CTAGS TRUE
196 #else
197 # define CTAGS FALSE
198 #endif
199
200 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
201 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
202 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
203 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
204
205 #define CHARS 256 /* 2^sizeof(char) */
206 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
207 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
208 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
209 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
210 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
211 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
212
213 #define ISALNUM(c) isalnum (CHAR(c))
214 #define ISALPHA(c) isalpha (CHAR(c))
215 #define ISDIGIT(c) isdigit (CHAR(c))
216 #define ISLOWER(c) islower (CHAR(c))
217
218 #define lowcase(c) tolower (CHAR(c))
219 #define upcase(c) toupper (CHAR(c))
220
221
222 /*
223 * xnew, xrnew -- allocate, reallocate storage
224 *
225 * SYNOPSIS: Type *xnew (int n, Type);
226 * void xrnew (OldPointer, int n, Type);
227 */
228 #if DEBUG
229 # include "chkmalloc.h"
230 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
231 (n) * sizeof (Type)))
232 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
233 (char *) (op), (n) * sizeof (Type)))
234 #else
235 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
236 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
237 (char *) (op), (n) * sizeof (Type)))
238 #endif
239
240 #define bool int
241
242 typedef void Lang_function __P((FILE *));
243
244 typedef struct
245 {
246 char *suffix; /* file name suffix for this compressor */
247 char *command; /* takes one arg and decompresses to stdout */
248 } compressor;
249
250 typedef struct
251 {
252 char *name; /* language name */
253 char *help; /* detailed help for the language */
254 Lang_function *function; /* parse function */
255 char **suffixes; /* name suffixes of this language's files */
256 char **filenames; /* names of this language's files */
257 char **interpreters; /* interpreters for this language */
258 bool metasource; /* source used to generate other sources */
259 } language;
260
261 typedef struct fdesc
262 {
263 struct fdesc *next; /* for the linked list */
264 char *infname; /* uncompressed input file name */
265 char *infabsname; /* absolute uncompressed input file name */
266 char *infabsdir; /* absolute dir of input file */
267 char *taggedfname; /* file name to write in tagfile */
268 language *lang; /* language of file */
269 char *prop; /* file properties to write in tagfile */
270 bool usecharno; /* etags tags shall contain char number */
271 bool written; /* entry written in the tags file */
272 } fdesc;
273
274 typedef struct node_st
275 { /* sorting structure */
276 struct node_st *left, *right; /* left and right sons */
277 fdesc *fdp; /* description of file to whom tag belongs */
278 char *name; /* tag name */
279 char *regex; /* search regexp */
280 bool valid; /* write this tag on the tag file */
281 bool is_func; /* function tag: use regexp in CTAGS mode */
282 bool been_warned; /* warning already given for duplicated tag */
283 int lno; /* line number tag is on */
284 long cno; /* character number line starts on */
285 } node;
286
287 /*
288 * A `linebuffer' is a structure which holds a line of text.
289 * `readline_internal' reads a line from a stream into a linebuffer
290 * and works regardless of the length of the line.
291 * SIZE is the size of BUFFER, LEN is the length of the string in
292 * BUFFER after readline reads it.
293 */
294 typedef struct
295 {
296 long size;
297 int len;
298 char *buffer;
299 } linebuffer;
300
301 /* Used to support mixing of --lang and file names. */
302 typedef struct
303 {
304 enum {
305 at_language, /* a language specification */
306 at_regexp, /* a regular expression */
307 at_filename, /* a file name */
308 at_stdin, /* read from stdin here */
309 at_end /* stop parsing the list */
310 } arg_type; /* argument type */
311 language *lang; /* language associated with the argument */
312 char *what; /* the argument itself */
313 } argument;
314
315 #ifdef ETAGS_REGEXPS
316 /* Structure defining a regular expression. */
317 typedef struct regexp
318 {
319 struct regexp *p_next; /* pointer to next in list */
320 language *lang; /* if set, use only for this language */
321 char *pattern; /* the regexp pattern */
322 char *name; /* tag name */
323 struct re_pattern_buffer *pat; /* the compiled pattern */
324 struct re_registers regs; /* re registers */
325 bool error_signaled; /* already signaled for this regexp */
326 bool force_explicit_name; /* do not allow implict tag name */
327 bool ignore_case; /* ignore case when matching */
328 bool multi_line; /* do a multi-line match on the whole file */
329 } regexp;
330 #endif /* ETAGS_REGEXPS */
331
332
333 /* Many compilers barf on this:
334 Lang_function Ada_funcs;
335 so let's write it this way */
336 static void Ada_funcs __P((FILE *));
337 static void Asm_labels __P((FILE *));
338 static void C_entries __P((int c_ext, FILE *));
339 static void default_C_entries __P((FILE *));
340 static void plain_C_entries __P((FILE *));
341 static void Cjava_entries __P((FILE *));
342 static void Cobol_paragraphs __P((FILE *));
343 static void Cplusplus_entries __P((FILE *));
344 static void Cstar_entries __P((FILE *));
345 static void Erlang_functions __P((FILE *));
346 static void Forth_words __P((FILE *));
347 static void Fortran_functions __P((FILE *));
348 static void HTML_labels __P((FILE *));
349 static void Lisp_functions __P((FILE *));
350 static void Lua_functions __P((FILE *));
351 static void Makefile_targets __P((FILE *));
352 static void Pascal_functions __P((FILE *));
353 static void Perl_functions __P((FILE *));
354 static void PHP_functions __P((FILE *));
355 static void PS_functions __P((FILE *));
356 static void Prolog_functions __P((FILE *));
357 static void Python_functions __P((FILE *));
358 static void Scheme_functions __P((FILE *));
359 static void TeX_commands __P((FILE *));
360 static void Texinfo_nodes __P((FILE *));
361 static void Yacc_entries __P((FILE *));
362 static void just_read_file __P((FILE *));
363
364 static void print_language_names __P((void));
365 static void print_version __P((void));
366 static void print_help __P((argument *));
367 int main __P((int, char **));
368
369 static compressor *get_compressor_from_suffix __P((char *, char **));
370 static language *get_language_from_langname __P((const char *));
371 static language *get_language_from_interpreter __P((char *));
372 static language *get_language_from_filename __P((char *, bool));
373 static void readline __P((linebuffer *, FILE *));
374 static long readline_internal __P((linebuffer *, FILE *));
375 static bool nocase_tail __P((char *));
376 static void get_tag __P((char *, char **));
377
378 #ifdef ETAGS_REGEXPS
379 static void analyse_regex __P((char *));
380 static void free_regexps __P((void));
381 static void regex_tag_multiline __P((void));
382 #endif /* ETAGS_REGEXPS */
383 static void error __P((const char *, const char *));
384 static void suggest_asking_for_help __P((void));
385 void fatal __P((char *, char *));
386 static void pfatal __P((char *));
387 static void add_node __P((node *, node **));
388
389 static void init __P((void));
390 static void process_file_name __P((char *, language *));
391 static void process_file __P((FILE *, char *, language *));
392 static void find_entries __P((FILE *));
393 static void free_tree __P((node *));
394 static void free_fdesc __P((fdesc *));
395 static void pfnote __P((char *, bool, char *, int, int, long));
396 static void make_tag __P((char *, int, bool, char *, int, int, long));
397 static void invalidate_nodes __P((fdesc *, node **));
398 static void put_entries __P((node *));
399
400 static char *concat __P((char *, char *, char *));
401 static char *skip_spaces __P((char *));
402 static char *skip_non_spaces __P((char *));
403 static char *savenstr __P((char *, int));
404 static char *savestr __P((char *));
405 static char *etags_strchr __P((const char *, int));
406 static char *etags_strrchr __P((const char *, int));
407 static int etags_strcasecmp __P((const char *, const char *));
408 static int etags_strncasecmp __P((const char *, const char *, int));
409 static char *etags_getcwd __P((void));
410 static char *relative_filename __P((char *, char *));
411 static char *absolute_filename __P((char *, char *));
412 static char *absolute_dirname __P((char *, char *));
413 static bool filename_is_absolute __P((char *f));
414 static void canonicalize_filename __P((char *));
415 static void linebuffer_init __P((linebuffer *));
416 static void linebuffer_setlen __P((linebuffer *, int));
417 static PTR xmalloc __P((unsigned int));
418 static PTR xrealloc __P((char *, unsigned int));
419
420 \f
421 static char searchar = '/'; /* use /.../ searches */
422
423 static char *tagfile; /* output file */
424 static char *progname; /* name this program was invoked with */
425 static char *cwd; /* current working directory */
426 static char *tagfiledir; /* directory of tagfile */
427 static FILE *tagf; /* ioptr for tags file */
428
429 static fdesc *fdhead; /* head of file description list */
430 static fdesc *curfdp; /* current file description */
431 static int lineno; /* line number of current line */
432 static long charno; /* current character number */
433 static long linecharno; /* charno of start of current line */
434 static char *dbp; /* pointer to start of current tag */
435
436 static const int invalidcharno = -1;
437
438 static node *nodehead; /* the head of the binary tree of tags */
439 static node *last_node; /* the last node created */
440
441 static linebuffer lb; /* the current line */
442 static linebuffer filebuf; /* a buffer containing the whole file */
443 static linebuffer token_name; /* a buffer containing a tag name */
444
445 /* boolean "functions" (see init) */
446 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
447 static char
448 /* white chars */
449 *white = " \f\t\n\r\v",
450 /* not in a name */
451 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
452 /* token ending chars */
453 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
454 /* token starting chars */
455 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
456 /* valid in-token chars */
457 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
458
459 static bool append_to_tagfile; /* -a: append to tags */
460 /* The next four default to TRUE for etags, but to FALSE for ctags. */
461 static bool typedefs; /* -t: create tags for C and Ada typedefs */
462 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
463 /* 0 struct/enum/union decls, and C++ */
464 /* member functions. */
465 static bool constantypedefs; /* -d: create tags for C #define, enum */
466 /* constants and variables. */
467 /* -D: opposite of -d. Default under ctags. */
468 static bool globals; /* create tags for global variables */
469 static bool declarations; /* --declarations: tag them and extern in C&Co*/
470 static bool members; /* create tags for C member variables */
471 static bool no_line_directive; /* ignore #line directives (undocumented) */
472 static bool update; /* -u: update tags */
473 static bool vgrind_style; /* -v: create vgrind style index output */
474 static bool no_warnings; /* -w: suppress warnings */
475 static bool cxref_style; /* -x: create cxref style output */
476 static bool cplusplus; /* .[hc] means C++, not C */
477 static bool ignoreindent; /* -I: ignore indentation in C */
478 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
479
480 /* STDIN is defined in LynxOS system headers */
481 #ifdef STDIN
482 # undef STDIN
483 #endif
484
485 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
486 static bool parsing_stdin; /* --parse-stdin used */
487
488 #ifdef ETAGS_REGEXPS
489 static regexp *p_head; /* list of all regexps */
490 static bool need_filebuf; /* some regexes are multi-line */
491 #else
492 # define need_filebuf FALSE
493 #endif /* ETAGS_REGEXPS */
494
495 #if LONG_OPTIONS
496 static struct option longopts[] =
497 {
498 { "append", no_argument, NULL, 'a' },
499 { "packages-only", no_argument, &packages_only, TRUE },
500 { "c++", no_argument, NULL, 'C' },
501 { "declarations", no_argument, &declarations, TRUE },
502 { "no-line-directive", no_argument, &no_line_directive, TRUE },
503 { "help", no_argument, NULL, 'h' },
504 { "help", no_argument, NULL, 'H' },
505 { "ignore-indentation", no_argument, NULL, 'I' },
506 { "language", required_argument, NULL, 'l' },
507 { "members", no_argument, &members, TRUE },
508 { "no-members", no_argument, &members, FALSE },
509 { "output", required_argument, NULL, 'o' },
510 #ifdef ETAGS_REGEXPS
511 { "regex", required_argument, NULL, 'r' },
512 { "no-regex", no_argument, NULL, 'R' },
513 { "ignore-case-regex", required_argument, NULL, 'c' },
514 #endif /* ETAGS_REGEXPS */
515 { "parse-stdin", required_argument, NULL, STDIN },
516 { "version", no_argument, NULL, 'V' },
517
518 #if CTAGS /* Ctags options */
519 { "backward-search", no_argument, NULL, 'B' },
520 { "cxref", no_argument, NULL, 'x' },
521 { "defines", no_argument, NULL, 'd' },
522 { "globals", no_argument, &globals, TRUE },
523 { "typedefs", no_argument, NULL, 't' },
524 { "typedefs-and-c++", no_argument, NULL, 'T' },
525 { "update", no_argument, NULL, 'u' },
526 { "vgrind", no_argument, NULL, 'v' },
527 { "no-warn", no_argument, NULL, 'w' },
528
529 #else /* Etags options */
530 { "no-defines", no_argument, NULL, 'D' },
531 { "no-globals", no_argument, &globals, FALSE },
532 { "include", required_argument, NULL, 'i' },
533 #endif
534 { NULL }
535 };
536 #endif /* LONG_OPTIONS */
537
538 static compressor compressors[] =
539 {
540 { "z", "gzip -d -c"},
541 { "Z", "gzip -d -c"},
542 { "gz", "gzip -d -c"},
543 { "GZ", "gzip -d -c"},
544 { "bz2", "bzip2 -d -c" },
545 { NULL }
546 };
547
548 /*
549 * Language stuff.
550 */
551
552 /* Ada code */
553 static char *Ada_suffixes [] =
554 { "ads", "adb", "ada", NULL };
555 static char Ada_help [] =
556 "In Ada code, functions, procedures, packages, tasks and types are\n\
557 tags. Use the `--packages-only' option to create tags for\n\
558 packages only.\n\
559 Ada tag names have suffixes indicating the type of entity:\n\
560 Entity type: Qualifier:\n\
561 ------------ ----------\n\
562 function /f\n\
563 procedure /p\n\
564 package spec /s\n\
565 package body /b\n\
566 type /t\n\
567 task /k\n\
568 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
569 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
570 will just search for any tag `bidule'.";
571
572 /* Assembly code */
573 static char *Asm_suffixes [] =
574 { "a", /* Unix assembler */
575 "asm", /* Microcontroller assembly */
576 "def", /* BSO/Tasking definition includes */
577 "inc", /* Microcontroller include files */
578 "ins", /* Microcontroller include files */
579 "s", "sa", /* Unix assembler */
580 "S", /* cpp-processed Unix assembler */
581 "src", /* BSO/Tasking C compiler output */
582 NULL
583 };
584 static char Asm_help [] =
585 "In assembler code, labels appearing at the beginning of a line,\n\
586 followed by a colon, are tags.";
587
588
589 /* Note that .c and .h can be considered C++, if the --c++ flag was
590 given, or if the `class' or `template' keyowrds are met inside the file.
591 That is why default_C_entries is called for these. */
592 static char *default_C_suffixes [] =
593 { "c", "h", NULL };
594 static char default_C_help [] =
595 "In C code, any C function or typedef is a tag, and so are\n\
596 definitions of `struct', `union' and `enum'. `#define' macro\n\
597 definitions and `enum' constants are tags unless you specify\n\
598 `--no-defines'. Global variables are tags unless you specify\n\
599 `--no-globals'. Use of `--no-globals' and `--no-defines'\n\
600 can make the tags table file much smaller.\n\
601 You can tag function declarations and external variables by\n\
602 using `--declarations', and struct members by using `--members'.";
603
604 static char *Cplusplus_suffixes [] =
605 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
606 "M", /* Objective C++ */
607 "pdb", /* Postscript with C syntax */
608 NULL };
609 static char Cplusplus_help [] =
610 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
611 --help --lang=c --lang=c++ for full help.)\n\
612 In addition to C tags, member functions are also recognized, and\n\
613 optionally member variables if you use the `--members' option.\n\
614 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
615 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
616 `operator+'.";
617
618 static char *Cjava_suffixes [] =
619 { "java", NULL };
620 static char Cjava_help [] =
621 "In Java code, all the tags constructs of C and C++ code are\n\
622 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
623
624
625 static char *Cobol_suffixes [] =
626 { "COB", "cob", NULL };
627 static char Cobol_help [] =
628 "In Cobol code, tags are paragraph names; that is, any word\n\
629 starting in column 8 and followed by a period.";
630
631 static char *Cstar_suffixes [] =
632 { "cs", "hs", NULL };
633
634 static char *Erlang_suffixes [] =
635 { "erl", "hrl", NULL };
636 static char Erlang_help [] =
637 "In Erlang code, the tags are the functions, records and macros\n\
638 defined in the file.";
639
640 char *Forth_suffixes [] =
641 { "fth", "tok", NULL };
642 static char Forth_help [] =
643 "In Forth code, tags are words defined by `:',\n\
644 constant, code, create, defer, value, variable, buffer:, field.";
645
646 static char *Fortran_suffixes [] =
647 { "F", "f", "f90", "for", NULL };
648 static char Fortran_help [] =
649 "In Fortran code, functions, subroutines and block data are tags.";
650
651 static char *HTML_suffixes [] =
652 { "htm", "html", "shtml", NULL };
653 static char HTML_help [] =
654 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
655 `h3' headers. Also, tags are `name=' in anchors and all\n\
656 occurrences of `id='.";
657
658 static char *Lisp_suffixes [] =
659 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
660 static char Lisp_help [] =
661 "In Lisp code, any function defined with `defun', any variable\n\
662 defined with `defvar' or `defconst', and in general the first\n\
663 argument of any expression that starts with `(def' in column zero\n\
664 is a tag.";
665
666 static char *Lua_suffixes [] =
667 { "lua", "LUA", NULL };
668 static char Lua_help [] =
669 "In Lua scripts, all functions are tags.";
670
671 static char *Makefile_filenames [] =
672 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
673 static char Makefile_help [] =
674 "In makefiles, targets are tags; additionally, variables are tags\n\
675 unless you specify `--no-globals'.";
676
677 static char *Objc_suffixes [] =
678 { "lm", /* Objective lex file */
679 "m", /* Objective C file */
680 NULL };
681 static char Objc_help [] =
682 "In Objective C code, tags include Objective C definitions for classes,\n\
683 class categories, methods and protocols. Tags for variables and\n\
684 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.";
685
686 static char *Pascal_suffixes [] =
687 { "p", "pas", NULL };
688 static char Pascal_help [] =
689 "In Pascal code, the tags are the functions and procedures defined\n\
690 in the file.";
691
692 static char *Perl_suffixes [] =
693 { "pl", "pm", NULL };
694 static char *Perl_interpreters [] =
695 { "perl", "@PERL@", NULL };
696 static char Perl_help [] =
697 "In Perl code, the tags are the packages, subroutines and variables\n\
698 defined by the `package', `sub', `my' and `local' keywords. Use\n\
699 `--globals' if you want to tag global variables. Tags for\n\
700 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
701 defined in the default package is `main::SUB'.";
702
703 static char *PHP_suffixes [] =
704 { "php", "php3", "php4", NULL };
705 static char PHP_help [] =
706 "In PHP code, tags are functions, classes and defines. When using\n\
707 the `--members' option, vars are tags too.";
708
709 static char *plain_C_suffixes [] =
710 { "pc", /* Pro*C file */
711 NULL };
712
713 static char *PS_suffixes [] =
714 { "ps", "psw", NULL }; /* .psw is for PSWrap */
715 static char PS_help [] =
716 "In PostScript code, the tags are the functions.";
717
718 static char *Prolog_suffixes [] =
719 { "prolog", NULL };
720 static char Prolog_help [] =
721 "In Prolog code, tags are predicates and rules at the beginning of\n\
722 line.";
723
724 static char *Python_suffixes [] =
725 { "py", NULL };
726 static char Python_help [] =
727 "In Python code, `def' or `class' at the beginning of a line\n\
728 generate a tag.";
729
730 /* Can't do the `SCM' or `scm' prefix with a version number. */
731 static char *Scheme_suffixes [] =
732 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
733 static char Scheme_help [] =
734 "In Scheme code, tags include anything defined with `def' or with a\n\
735 construct whose name starts with `def'. They also include\n\
736 variables set with `set!' at top level in the file.";
737
738 static char *TeX_suffixes [] =
739 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
740 static char TeX_help [] =
741 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
742 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
743 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
744 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
745 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
746 \n\
747 Other commands can be specified by setting the environment variable\n\
748 `TEXTAGS' to a colon-separated list like, for example,\n\
749 TEXTAGS=\"mycommand:myothercommand\".";
750
751
752 static char *Texinfo_suffixes [] =
753 { "texi", "texinfo", "txi", NULL };
754 static char Texinfo_help [] =
755 "for texinfo files, lines starting with @node are tagged.";
756
757 static char *Yacc_suffixes [] =
758 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
759 static char Yacc_help [] =
760 "In Bison or Yacc input files, each rule defines as a tag the\n\
761 nonterminal it constructs. The portions of the file that contain\n\
762 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
763 for full help).";
764
765 static char auto_help [] =
766 "`auto' is not a real language, it indicates to use\n\
767 a default language for files base on file name suffix and file contents.";
768
769 static char none_help [] =
770 "`none' is not a real language, it indicates to only do\n\
771 regexp processing on files.";
772
773 static char no_lang_help [] =
774 "No detailed help available for this language.";
775
776
777 /*
778 * Table of languages.
779 *
780 * It is ok for a given function to be listed under more than one
781 * name. I just didn't.
782 */
783
784 static language lang_names [] =
785 {
786 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
787 { "asm", Asm_help, Asm_labels, Asm_suffixes },
788 { "c", default_C_help, default_C_entries, default_C_suffixes },
789 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
790 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
791 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
792 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
793 { "forth", Forth_help, Forth_words, Forth_suffixes },
794 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
795 { "html", HTML_help, HTML_labels, HTML_suffixes },
796 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
797 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
798 { "lua", Lua_help, Lua_functions, Lua_suffixes },
799 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
800 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
801 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
802 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
803 { "php", PHP_help, PHP_functions, PHP_suffixes },
804 { "postscript",PS_help, PS_functions, PS_suffixes },
805 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
806 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
807 { "python", Python_help, Python_functions, Python_suffixes },
808 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
809 { "tex", TeX_help, TeX_commands, TeX_suffixes },
810 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
811 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
812 { "auto", auto_help }, /* default guessing scheme */
813 { "none", none_help, just_read_file }, /* regexp matching only */
814 { NULL } /* end of list */
815 };
816
817 \f
818 static void
819 print_language_names ()
820 {
821 language *lang;
822 char **name, **ext;
823
824 puts ("\nThese are the currently supported languages, along with the\n\
825 default file names and dot suffixes:");
826 for (lang = lang_names; lang->name != NULL; lang++)
827 {
828 printf (" %-*s", 10, lang->name);
829 if (lang->filenames != NULL)
830 for (name = lang->filenames; *name != NULL; name++)
831 printf (" %s", *name);
832 if (lang->suffixes != NULL)
833 for (ext = lang->suffixes; *ext != NULL; ext++)
834 printf (" .%s", *ext);
835 puts ("");
836 }
837 puts ("where `auto' means use default language for files based on file\n\
838 name suffix, and `none' means only do regexp processing on files.\n\
839 If no language is specified and no matching suffix is found,\n\
840 the first line of the file is read for a sharp-bang (#!) sequence\n\
841 followed by the name of an interpreter. If no such sequence is found,\n\
842 Fortran is tried first; if no tags are found, C is tried next.\n\
843 When parsing any C file, a \"class\" or \"template\" keyword\n\
844 switches to C++.");
845 puts ("Compressed files are supported using gzip and bzip2.\n\
846 \n\
847 For detailed help on a given language use, for example,\n\
848 etags --help --lang=ada.");
849 }
850
851 #ifndef EMACS_NAME
852 # define EMACS_NAME "standalone"
853 #endif
854 #ifndef VERSION
855 # define VERSION "version"
856 #endif
857 static void
858 print_version ()
859 {
860 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
861 puts ("Copyright (C) 2006 Free Software Foundation, Inc. and Ken Arnold");
862 puts ("This program is distributed under the same terms as Emacs");
863
864 exit (EXIT_SUCCESS);
865 }
866
867 static void
868 print_help (argbuffer)
869 argument *argbuffer;
870 {
871 bool help_for_lang = FALSE;
872
873 for (; argbuffer->arg_type != at_end; argbuffer++)
874 if (argbuffer->arg_type == at_language)
875 {
876 if (help_for_lang)
877 puts ("");
878 puts (argbuffer->lang->help);
879 help_for_lang = TRUE;
880 }
881
882 if (help_for_lang)
883 exit (EXIT_SUCCESS);
884
885 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
886 \n\
887 These are the options accepted by %s.\n", progname, progname);
888 if (LONG_OPTIONS)
889 puts ("You may use unambiguous abbreviations for the long option names.");
890 else
891 puts ("Long option names do not work with this executable, as it is not\n\
892 linked with GNU getopt.");
893 puts (" A - as file name means read names from stdin (one per line).\n\
894 Absolute names are stored in the output file as they are.\n\
895 Relative ones are stored relative to the output file's directory.\n");
896
897 puts ("-a, --append\n\
898 Append tag entries to existing tags file.");
899
900 puts ("--packages-only\n\
901 For Ada files, only generate tags for packages.");
902
903 if (CTAGS)
904 puts ("-B, --backward-search\n\
905 Write the search commands for the tag entries using '?', the\n\
906 backward-search command instead of '/', the forward-search command.");
907
908 /* This option is mostly obsolete, because etags can now automatically
909 detect C++. Retained for backward compatibility and for debugging and
910 experimentation. In principle, we could want to tag as C++ even
911 before any "class" or "template" keyword.
912 puts ("-C, --c++\n\
913 Treat files whose name suffix defaults to C language as C++ files.");
914 */
915
916 puts ("--declarations\n\
917 In C and derived languages, create tags for function declarations,");
918 if (CTAGS)
919 puts ("\tand create tags for extern variables if --globals is used.");
920 else
921 puts
922 ("\tand create tags for extern variables unless --no-globals is used.");
923
924 if (CTAGS)
925 puts ("-d, --defines\n\
926 Create tag entries for C #define constants and enum constants, too.");
927 else
928 puts ("-D, --no-defines\n\
929 Don't create tag entries for C #define constants and enum constants.\n\
930 This makes the tags file smaller.");
931
932 if (!CTAGS)
933 puts ("-i FILE, --include=FILE\n\
934 Include a note in tag file indicating that, when searching for\n\
935 a tag, one should also consult the tags file FILE after\n\
936 checking the current file.");
937
938 puts ("-l LANG, --language=LANG\n\
939 Force the following files to be considered as written in the\n\
940 named language up to the next --language=LANG option.");
941
942 if (CTAGS)
943 puts ("--globals\n\
944 Create tag entries for global variables in some languages.");
945 else
946 puts ("--no-globals\n\
947 Do not create tag entries for global variables in some\n\
948 languages. This makes the tags file smaller.");
949 puts ("--members\n\
950 Create tag entries for members of structures in some languages.");
951
952 #ifdef ETAGS_REGEXPS
953 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
954 Make a tag for each line matching a regular expression pattern\n\
955 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
956 files only. REGEXFILE is a file containing one REGEXP per line.\n\
957 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
958 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
959 puts (" If TAGNAME/ is present, the tags created are named.\n\
960 For example Tcl named tags can be created with:\n\
961 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
962 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
963 `m' means to allow multi-line matches, `s' implies `m' and\n\
964 causes dot to match any character, including newline.");
965 puts ("-R, --no-regex\n\
966 Don't create tags from regexps for the following files.");
967 #endif /* ETAGS_REGEXPS */
968 puts ("-I, --ignore-indentation\n\
969 In C and C++ do not assume that a closing brace in the first\n\
970 column is the final brace of a function or structure definition.");
971 puts ("-o FILE, --output=FILE\n\
972 Write the tags to FILE.");
973 puts ("--parse-stdin=NAME\n\
974 Read from standard input and record tags as belonging to file NAME.");
975
976 if (CTAGS)
977 {
978 puts ("-t, --typedefs\n\
979 Generate tag entries for C and Ada typedefs.");
980 puts ("-T, --typedefs-and-c++\n\
981 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
982 and C++ member functions.");
983 }
984
985 if (CTAGS)
986 puts ("-u, --update\n\
987 Update the tag entries for the given files, leaving tag\n\
988 entries for other files in place. Currently, this is\n\
989 implemented by deleting the existing entries for the given\n\
990 files and then rewriting the new entries at the end of the\n\
991 tags file. It is often faster to simply rebuild the entire\n\
992 tag file than to use this.");
993
994 if (CTAGS)
995 {
996 puts ("-v, --vgrind\n\
997 Print on the standard output an index of items intended for\n\
998 human consumption, similar to the output of vgrind. The index\n\
999 is sorted, and gives the page number of each item.");
1000 puts ("-w, --no-warn\n\
1001 Suppress warning messages about entries defined in multiple\n\
1002 files.");
1003 puts ("-x, --cxref\n\
1004 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1005 The output uses line numbers instead of page numbers, but\n\
1006 beyond that the differences are cosmetic; try both to see\n\
1007 which you like.");
1008 }
1009
1010 puts ("-V, --version\n\
1011 Print the version of the program.\n\
1012 -h, --help\n\
1013 Print this help message.\n\
1014 Followed by one or more `--language' options prints detailed\n\
1015 help about tag generation for the specified languages.");
1016
1017 print_language_names ();
1018
1019 puts ("");
1020 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1021
1022 exit (EXIT_SUCCESS);
1023 }
1024
1025 \f
1026 #ifdef VMS /* VMS specific functions */
1027
1028 #define EOS '\0'
1029
1030 /* This is a BUG! ANY arbitrary limit is a BUG!
1031 Won't someone please fix this? */
1032 #define MAX_FILE_SPEC_LEN 255
1033 typedef struct {
1034 short curlen;
1035 char body[MAX_FILE_SPEC_LEN + 1];
1036 } vspec;
1037
1038 /*
1039 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1040 returning in each successive call the next file name matching the input
1041 spec. The function expects that each in_spec passed
1042 to it will be processed to completion; in particular, up to and
1043 including the call following that in which the last matching name
1044 is returned, the function ignores the value of in_spec, and will
1045 only start processing a new spec with the following call.
1046 If an error occurs, on return out_spec contains the value
1047 of in_spec when the error occurred.
1048
1049 With each successive file name returned in out_spec, the
1050 function's return value is one. When there are no more matching
1051 names the function returns zero. If on the first call no file
1052 matches in_spec, or there is any other error, -1 is returned.
1053 */
1054
1055 #include <rmsdef.h>
1056 #include <descrip.h>
1057 #define OUTSIZE MAX_FILE_SPEC_LEN
1058 static short
1059 fn_exp (out, in)
1060 vspec *out;
1061 char *in;
1062 {
1063 static long context = 0;
1064 static struct dsc$descriptor_s o;
1065 static struct dsc$descriptor_s i;
1066 static bool pass1 = TRUE;
1067 long status;
1068 short retval;
1069
1070 if (pass1)
1071 {
1072 pass1 = FALSE;
1073 o.dsc$a_pointer = (char *) out;
1074 o.dsc$w_length = (short)OUTSIZE;
1075 i.dsc$a_pointer = in;
1076 i.dsc$w_length = (short)strlen(in);
1077 i.dsc$b_dtype = DSC$K_DTYPE_T;
1078 i.dsc$b_class = DSC$K_CLASS_S;
1079 o.dsc$b_dtype = DSC$K_DTYPE_VT;
1080 o.dsc$b_class = DSC$K_CLASS_VS;
1081 }
1082 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1083 {
1084 out->body[out->curlen] = EOS;
1085 return 1;
1086 }
1087 else if (status == RMS$_NMF)
1088 retval = 0;
1089 else
1090 {
1091 strcpy(out->body, in);
1092 retval = -1;
1093 }
1094 lib$find_file_end(&context);
1095 pass1 = TRUE;
1096 return retval;
1097 }
1098
1099 /*
1100 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1101 name of each file specified by the provided arg expanding wildcards.
1102 */
1103 static char *
1104 gfnames (arg, p_error)
1105 char *arg;
1106 bool *p_error;
1107 {
1108 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1109
1110 switch (fn_exp (&filename, arg))
1111 {
1112 case 1:
1113 *p_error = FALSE;
1114 return filename.body;
1115 case 0:
1116 *p_error = FALSE;
1117 return NULL;
1118 default:
1119 *p_error = TRUE;
1120 return filename.body;
1121 }
1122 }
1123
1124 #ifndef OLD /* Newer versions of VMS do provide `system'. */
1125 system (cmd)
1126 char *cmd;
1127 {
1128 error ("%s", "system() function not implemented under VMS");
1129 }
1130 #endif
1131
1132 #define VERSION_DELIM ';'
1133 char *massage_name (s)
1134 char *s;
1135 {
1136 char *start = s;
1137
1138 for ( ; *s; s++)
1139 if (*s == VERSION_DELIM)
1140 {
1141 *s = EOS;
1142 break;
1143 }
1144 else
1145 *s = lowcase (*s);
1146 return start;
1147 }
1148 #endif /* VMS */
1149
1150 \f
1151 int
1152 main (argc, argv)
1153 int argc;
1154 char *argv[];
1155 {
1156 int i;
1157 unsigned int nincluded_files;
1158 char **included_files;
1159 argument *argbuffer;
1160 int current_arg, file_count;
1161 linebuffer filename_lb;
1162 bool help_asked = FALSE;
1163 #ifdef VMS
1164 bool got_err;
1165 #endif
1166 char *optstring;
1167 int opt;
1168
1169
1170 #ifdef DOS_NT
1171 _fmode = O_BINARY; /* all of files are treated as binary files */
1172 #endif /* DOS_NT */
1173
1174 progname = argv[0];
1175 nincluded_files = 0;
1176 included_files = xnew (argc, char *);
1177 current_arg = 0;
1178 file_count = 0;
1179
1180 /* Allocate enough no matter what happens. Overkill, but each one
1181 is small. */
1182 argbuffer = xnew (argc, argument);
1183
1184 /*
1185 * If etags, always find typedefs and structure tags. Why not?
1186 * Also default to find macro constants, enum constants and
1187 * global variables.
1188 */
1189 if (!CTAGS)
1190 {
1191 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1192 globals = TRUE;
1193 }
1194
1195 /* When the optstring begins with a '-' getopt_long does not rearrange the
1196 non-options arguments to be at the end, but leaves them alone. */
1197 optstring = "-";
1198 #ifdef ETAGS_REGEXPS
1199 optstring = "-r:Rc:";
1200 #endif /* ETAGS_REGEXPS */
1201 if (!LONG_OPTIONS)
1202 optstring += 1; /* remove the initial '-' */
1203 optstring = concat (optstring,
1204 "aCf:Il:o:SVhH",
1205 (CTAGS) ? "BxdtTuvw" : "Di:");
1206
1207 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1208 switch (opt)
1209 {
1210 case 0:
1211 /* If getopt returns 0, then it has already processed a
1212 long-named option. We should do nothing. */
1213 break;
1214
1215 case 1:
1216 /* This means that a file name has been seen. Record it. */
1217 argbuffer[current_arg].arg_type = at_filename;
1218 argbuffer[current_arg].what = optarg;
1219 ++current_arg;
1220 ++file_count;
1221 break;
1222
1223 case STDIN:
1224 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1225 argbuffer[current_arg].arg_type = at_stdin;
1226 argbuffer[current_arg].what = optarg;
1227 ++current_arg;
1228 ++file_count;
1229 if (parsing_stdin)
1230 fatal ("cannot parse standard input more than once", (char *)NULL);
1231 parsing_stdin = TRUE;
1232 break;
1233
1234 /* Common options. */
1235 case 'a': append_to_tagfile = TRUE; break;
1236 case 'C': cplusplus = TRUE; break;
1237 case 'f': /* for compatibility with old makefiles */
1238 case 'o':
1239 if (tagfile)
1240 {
1241 error ("-o option may only be given once.", (char *)NULL);
1242 suggest_asking_for_help ();
1243 /* NOTREACHED */
1244 }
1245 tagfile = optarg;
1246 break;
1247 case 'I':
1248 case 'S': /* for backward compatibility */
1249 ignoreindent = TRUE;
1250 break;
1251 case 'l':
1252 {
1253 language *lang = get_language_from_langname (optarg);
1254 if (lang != NULL)
1255 {
1256 argbuffer[current_arg].lang = lang;
1257 argbuffer[current_arg].arg_type = at_language;
1258 ++current_arg;
1259 }
1260 }
1261 break;
1262 case 'c':
1263 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1264 optarg = concat (optarg, "i", ""); /* memory leak here */
1265 /* FALLTHRU */
1266 case 'r':
1267 argbuffer[current_arg].arg_type = at_regexp;
1268 argbuffer[current_arg].what = optarg;
1269 ++current_arg;
1270 break;
1271 case 'R':
1272 argbuffer[current_arg].arg_type = at_regexp;
1273 argbuffer[current_arg].what = NULL;
1274 ++current_arg;
1275 break;
1276 case 'V':
1277 print_version ();
1278 break;
1279 case 'h':
1280 case 'H':
1281 help_asked = TRUE;
1282 break;
1283
1284 /* Etags options */
1285 case 'D': constantypedefs = FALSE; break;
1286 case 'i': included_files[nincluded_files++] = optarg; break;
1287
1288 /* Ctags options. */
1289 case 'B': searchar = '?'; break;
1290 case 'd': constantypedefs = TRUE; break;
1291 case 't': typedefs = TRUE; break;
1292 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1293 case 'u': update = TRUE; break;
1294 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1295 case 'x': cxref_style = TRUE; break;
1296 case 'w': no_warnings = TRUE; break;
1297 default:
1298 suggest_asking_for_help ();
1299 /* NOTREACHED */
1300 }
1301
1302 /* No more options. Store the rest of arguments. */
1303 for (; optind < argc; optind++)
1304 {
1305 argbuffer[current_arg].arg_type = at_filename;
1306 argbuffer[current_arg].what = argv[optind];
1307 ++current_arg;
1308 ++file_count;
1309 }
1310
1311 argbuffer[current_arg].arg_type = at_end;
1312
1313 if (help_asked)
1314 print_help (argbuffer);
1315 /* NOTREACHED */
1316
1317 if (nincluded_files == 0 && file_count == 0)
1318 {
1319 error ("no input files specified.", (char *)NULL);
1320 suggest_asking_for_help ();
1321 /* NOTREACHED */
1322 }
1323
1324 if (tagfile == NULL)
1325 tagfile = CTAGS ? "tags" : "TAGS";
1326 cwd = etags_getcwd (); /* the current working directory */
1327 if (cwd[strlen (cwd) - 1] != '/')
1328 {
1329 char *oldcwd = cwd;
1330 cwd = concat (oldcwd, "/", "");
1331 free (oldcwd);
1332 }
1333 /* Relative file names are made relative to the current directory. */
1334 if (streq (tagfile, "-")
1335 || strneq (tagfile, "/dev/", 5))
1336 tagfiledir = cwd;
1337 else
1338 tagfiledir = absolute_dirname (tagfile, cwd);
1339
1340 init (); /* set up boolean "functions" */
1341
1342 linebuffer_init (&lb);
1343 linebuffer_init (&filename_lb);
1344 linebuffer_init (&filebuf);
1345 linebuffer_init (&token_name);
1346
1347 if (!CTAGS)
1348 {
1349 if (streq (tagfile, "-"))
1350 {
1351 tagf = stdout;
1352 #ifdef DOS_NT
1353 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1354 doesn't take effect until after `stdout' is already open). */
1355 if (!isatty (fileno (stdout)))
1356 setmode (fileno (stdout), O_BINARY);
1357 #endif /* DOS_NT */
1358 }
1359 else
1360 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1361 if (tagf == NULL)
1362 pfatal (tagfile);
1363 }
1364
1365 /*
1366 * Loop through files finding functions.
1367 */
1368 for (i = 0; i < current_arg; i++)
1369 {
1370 static language *lang; /* non-NULL if language is forced */
1371 char *this_file;
1372
1373 switch (argbuffer[i].arg_type)
1374 {
1375 case at_language:
1376 lang = argbuffer[i].lang;
1377 break;
1378 #ifdef ETAGS_REGEXPS
1379 case at_regexp:
1380 analyse_regex (argbuffer[i].what);
1381 break;
1382 #endif
1383 case at_filename:
1384 #ifdef VMS
1385 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1386 {
1387 if (got_err)
1388 {
1389 error ("can't find file %s\n", this_file);
1390 argc--, argv++;
1391 }
1392 else
1393 {
1394 this_file = massage_name (this_file);
1395 }
1396 #else
1397 this_file = argbuffer[i].what;
1398 #endif
1399 /* Input file named "-" means read file names from stdin
1400 (one per line) and use them. */
1401 if (streq (this_file, "-"))
1402 {
1403 if (parsing_stdin)
1404 fatal ("cannot parse standard input AND read file names from it",
1405 (char *)NULL);
1406 while (readline_internal (&filename_lb, stdin) > 0)
1407 process_file_name (filename_lb.buffer, lang);
1408 }
1409 else
1410 process_file_name (this_file, lang);
1411 #ifdef VMS
1412 }
1413 #endif
1414 break;
1415 case at_stdin:
1416 this_file = argbuffer[i].what;
1417 process_file (stdin, this_file, lang);
1418 break;
1419 }
1420 }
1421
1422 #ifdef ETAGS_REGEXPS
1423 free_regexps ();
1424 #endif /* ETAGS_REGEXPS */
1425 free (lb.buffer);
1426 free (filebuf.buffer);
1427 free (token_name.buffer);
1428
1429 if (!CTAGS || cxref_style)
1430 {
1431 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1432 put_entries (nodehead);
1433 free_tree (nodehead);
1434 nodehead = NULL;
1435 if (!CTAGS)
1436 {
1437 fdesc *fdp;
1438
1439 /* Output file entries that have no tags. */
1440 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1441 if (!fdp->written)
1442 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1443
1444 while (nincluded_files-- > 0)
1445 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1446
1447 if (fclose (tagf) == EOF)
1448 pfatal (tagfile);
1449 }
1450
1451 exit (EXIT_SUCCESS);
1452 }
1453
1454 if (update)
1455 {
1456 char cmd[BUFSIZ];
1457 for (i = 0; i < current_arg; ++i)
1458 {
1459 switch (argbuffer[i].arg_type)
1460 {
1461 case at_filename:
1462 case at_stdin:
1463 break;
1464 default:
1465 continue; /* the for loop */
1466 }
1467 sprintf (cmd,
1468 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1469 tagfile, argbuffer[i].what, tagfile);
1470 if (system (cmd) != EXIT_SUCCESS)
1471 fatal ("failed to execute shell command", (char *)NULL);
1472 }
1473 append_to_tagfile = TRUE;
1474 }
1475
1476 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1477 if (tagf == NULL)
1478 pfatal (tagfile);
1479 put_entries (nodehead); /* write all the tags (CTAGS) */
1480 free_tree (nodehead);
1481 nodehead = NULL;
1482 if (fclose (tagf) == EOF)
1483 pfatal (tagfile);
1484
1485 if (CTAGS)
1486 if (append_to_tagfile || update)
1487 {
1488 char cmd[2*BUFSIZ+10];
1489 sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1490 exit (system (cmd));
1491 }
1492 return EXIT_SUCCESS;
1493 }
1494
1495
1496 /*
1497 * Return a compressor given the file name. If EXTPTR is non-zero,
1498 * return a pointer into FILE where the compressor-specific
1499 * extension begins. If no compressor is found, NULL is returned
1500 * and EXTPTR is not significant.
1501 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1502 */
1503 static compressor *
1504 get_compressor_from_suffix (file, extptr)
1505 char *file;
1506 char **extptr;
1507 {
1508 compressor *compr;
1509 char *slash, *suffix;
1510
1511 /* This relies on FN to be after canonicalize_filename,
1512 so we don't need to consider backslashes on DOS_NT. */
1513 slash = etags_strrchr (file, '/');
1514 suffix = etags_strrchr (file, '.');
1515 if (suffix == NULL || suffix < slash)
1516 return NULL;
1517 if (extptr != NULL)
1518 *extptr = suffix;
1519 suffix += 1;
1520 /* Let those poor souls who live with DOS 8+3 file name limits get
1521 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1522 Only the first do loop is run if not MSDOS */
1523 do
1524 {
1525 for (compr = compressors; compr->suffix != NULL; compr++)
1526 if (streq (compr->suffix, suffix))
1527 return compr;
1528 if (!MSDOS)
1529 break; /* do it only once: not really a loop */
1530 if (extptr != NULL)
1531 *extptr = ++suffix;
1532 } while (*suffix != '\0');
1533 return NULL;
1534 }
1535
1536
1537
1538 /*
1539 * Return a language given the name.
1540 */
1541 static language *
1542 get_language_from_langname (name)
1543 const char *name;
1544 {
1545 language *lang;
1546
1547 if (name == NULL)
1548 error ("empty language name", (char *)NULL);
1549 else
1550 {
1551 for (lang = lang_names; lang->name != NULL; lang++)
1552 if (streq (name, lang->name))
1553 return lang;
1554 error ("unknown language \"%s\"", name);
1555 }
1556
1557 return NULL;
1558 }
1559
1560
1561 /*
1562 * Return a language given the interpreter name.
1563 */
1564 static language *
1565 get_language_from_interpreter (interpreter)
1566 char *interpreter;
1567 {
1568 language *lang;
1569 char **iname;
1570
1571 if (interpreter == NULL)
1572 return NULL;
1573 for (lang = lang_names; lang->name != NULL; lang++)
1574 if (lang->interpreters != NULL)
1575 for (iname = lang->interpreters; *iname != NULL; iname++)
1576 if (streq (*iname, interpreter))
1577 return lang;
1578
1579 return NULL;
1580 }
1581
1582
1583
1584 /*
1585 * Return a language given the file name.
1586 */
1587 static language *
1588 get_language_from_filename (file, case_sensitive)
1589 char *file;
1590 bool case_sensitive;
1591 {
1592 language *lang;
1593 char **name, **ext, *suffix;
1594
1595 /* Try whole file name first. */
1596 for (lang = lang_names; lang->name != NULL; lang++)
1597 if (lang->filenames != NULL)
1598 for (name = lang->filenames; *name != NULL; name++)
1599 if ((case_sensitive)
1600 ? streq (*name, file)
1601 : strcaseeq (*name, file))
1602 return lang;
1603
1604 /* If not found, try suffix after last dot. */
1605 suffix = etags_strrchr (file, '.');
1606 if (suffix == NULL)
1607 return NULL;
1608 suffix += 1;
1609 for (lang = lang_names; lang->name != NULL; lang++)
1610 if (lang->suffixes != NULL)
1611 for (ext = lang->suffixes; *ext != NULL; ext++)
1612 if ((case_sensitive)
1613 ? streq (*ext, suffix)
1614 : strcaseeq (*ext, suffix))
1615 return lang;
1616 return NULL;
1617 }
1618
1619 \f
1620 /*
1621 * This routine is called on each file argument.
1622 */
1623 static void
1624 process_file_name (file, lang)
1625 char *file;
1626 language *lang;
1627 {
1628 struct stat stat_buf;
1629 FILE *inf;
1630 fdesc *fdp;
1631 compressor *compr;
1632 char *compressed_name, *uncompressed_name;
1633 char *ext, *real_name;
1634 int retval;
1635
1636 canonicalize_filename (file);
1637 if (streq (file, tagfile) && !streq (tagfile, "-"))
1638 {
1639 error ("skipping inclusion of %s in self.", file);
1640 return;
1641 }
1642 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1643 {
1644 compressed_name = NULL;
1645 real_name = uncompressed_name = savestr (file);
1646 }
1647 else
1648 {
1649 real_name = compressed_name = savestr (file);
1650 uncompressed_name = savenstr (file, ext - file);
1651 }
1652
1653 /* If the canonicalized uncompressed name
1654 has already been dealt with, skip it silently. */
1655 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1656 {
1657 assert (fdp->infname != NULL);
1658 if (streq (uncompressed_name, fdp->infname))
1659 goto cleanup;
1660 }
1661
1662 if (stat (real_name, &stat_buf) != 0)
1663 {
1664 /* Reset real_name and try with a different name. */
1665 real_name = NULL;
1666 if (compressed_name != NULL) /* try with the given suffix */
1667 {
1668 if (stat (uncompressed_name, &stat_buf) == 0)
1669 real_name = uncompressed_name;
1670 }
1671 else /* try all possible suffixes */
1672 {
1673 for (compr = compressors; compr->suffix != NULL; compr++)
1674 {
1675 compressed_name = concat (file, ".", compr->suffix);
1676 if (stat (compressed_name, &stat_buf) != 0)
1677 {
1678 if (MSDOS)
1679 {
1680 char *suf = compressed_name + strlen (file);
1681 size_t suflen = strlen (compr->suffix) + 1;
1682 for ( ; suf[1]; suf++, suflen--)
1683 {
1684 memmove (suf, suf + 1, suflen);
1685 if (stat (compressed_name, &stat_buf) == 0)
1686 {
1687 real_name = compressed_name;
1688 break;
1689 }
1690 }
1691 if (real_name != NULL)
1692 break;
1693 } /* MSDOS */
1694 free (compressed_name);
1695 compressed_name = NULL;
1696 }
1697 else
1698 {
1699 real_name = compressed_name;
1700 break;
1701 }
1702 }
1703 }
1704 if (real_name == NULL)
1705 {
1706 perror (file);
1707 goto cleanup;
1708 }
1709 } /* try with a different name */
1710
1711 if (!S_ISREG (stat_buf.st_mode))
1712 {
1713 error ("skipping %s: it is not a regular file.", real_name);
1714 goto cleanup;
1715 }
1716 if (real_name == compressed_name)
1717 {
1718 char *cmd = concat (compr->command, " ", real_name);
1719 inf = (FILE *) popen (cmd, "r");
1720 free (cmd);
1721 }
1722 else
1723 inf = fopen (real_name, "r");
1724 if (inf == NULL)
1725 {
1726 perror (real_name);
1727 goto cleanup;
1728 }
1729
1730 process_file (inf, uncompressed_name, lang);
1731
1732 if (real_name == compressed_name)
1733 retval = pclose (inf);
1734 else
1735 retval = fclose (inf);
1736 if (retval < 0)
1737 pfatal (file);
1738
1739 cleanup:
1740 if (compressed_name) free (compressed_name);
1741 if (uncompressed_name) free (uncompressed_name);
1742 last_node = NULL;
1743 curfdp = NULL;
1744 return;
1745 }
1746
1747 static void
1748 process_file (fh, fn, lang)
1749 FILE *fh;
1750 char *fn;
1751 language *lang;
1752 {
1753 static const fdesc emptyfdesc;
1754 fdesc *fdp;
1755
1756 /* Create a new input file description entry. */
1757 fdp = xnew (1, fdesc);
1758 *fdp = emptyfdesc;
1759 fdp->next = fdhead;
1760 fdp->infname = savestr (fn);
1761 fdp->lang = lang;
1762 fdp->infabsname = absolute_filename (fn, cwd);
1763 fdp->infabsdir = absolute_dirname (fn, cwd);
1764 if (filename_is_absolute (fn))
1765 {
1766 /* An absolute file name. Canonicalize it. */
1767 fdp->taggedfname = absolute_filename (fn, NULL);
1768 }
1769 else
1770 {
1771 /* A file name relative to cwd. Make it relative
1772 to the directory of the tags file. */
1773 fdp->taggedfname = relative_filename (fn, tagfiledir);
1774 }
1775 fdp->usecharno = TRUE; /* use char position when making tags */
1776 fdp->prop = NULL;
1777 fdp->written = FALSE; /* not written on tags file yet */
1778
1779 fdhead = fdp;
1780 curfdp = fdhead; /* the current file description */
1781
1782 find_entries (fh);
1783
1784 /* If not Ctags, and if this is not metasource and if it contained no #line
1785 directives, we can write the tags and free all nodes pointing to
1786 curfdp. */
1787 if (!CTAGS
1788 && curfdp->usecharno /* no #line directives in this file */
1789 && !curfdp->lang->metasource)
1790 {
1791 node *np, *prev;
1792
1793 /* Look for the head of the sublist relative to this file. See add_node
1794 for the structure of the node tree. */
1795 prev = NULL;
1796 for (np = nodehead; np != NULL; prev = np, np = np->left)
1797 if (np->fdp == curfdp)
1798 break;
1799
1800 /* If we generated tags for this file, write and delete them. */
1801 if (np != NULL)
1802 {
1803 /* This is the head of the last sublist, if any. The following
1804 instructions depend on this being true. */
1805 assert (np->left == NULL);
1806
1807 assert (fdhead == curfdp);
1808 assert (last_node->fdp == curfdp);
1809 put_entries (np); /* write tags for file curfdp->taggedfname */
1810 free_tree (np); /* remove the written nodes */
1811 if (prev == NULL)
1812 nodehead = NULL; /* no nodes left */
1813 else
1814 prev->left = NULL; /* delete the pointer to the sublist */
1815 }
1816 }
1817 }
1818
1819 /*
1820 * This routine sets up the boolean pseudo-functions which work
1821 * by setting boolean flags dependent upon the corresponding character.
1822 * Every char which is NOT in that string is not a white char. Therefore,
1823 * all of the array "_wht" is set to FALSE, and then the elements
1824 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1825 * of a char is TRUE if it is the string "white", else FALSE.
1826 */
1827 static void
1828 init ()
1829 {
1830 register char *sp;
1831 register int i;
1832
1833 for (i = 0; i < CHARS; i++)
1834 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1835 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1836 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1837 notinname('\0') = notinname('\n');
1838 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1839 begtoken('\0') = begtoken('\n');
1840 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1841 intoken('\0') = intoken('\n');
1842 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1843 endtoken('\0') = endtoken('\n');
1844 }
1845
1846 /*
1847 * This routine opens the specified file and calls the function
1848 * which finds the function and type definitions.
1849 */
1850 static void
1851 find_entries (inf)
1852 FILE *inf;
1853 {
1854 char *cp;
1855 language *lang = curfdp->lang;
1856 Lang_function *parser = NULL;
1857
1858 /* If user specified a language, use it. */
1859 if (lang != NULL && lang->function != NULL)
1860 {
1861 parser = lang->function;
1862 }
1863
1864 /* Else try to guess the language given the file name. */
1865 if (parser == NULL)
1866 {
1867 lang = get_language_from_filename (curfdp->infname, TRUE);
1868 if (lang != NULL && lang->function != NULL)
1869 {
1870 curfdp->lang = lang;
1871 parser = lang->function;
1872 }
1873 }
1874
1875 /* Else look for sharp-bang as the first two characters. */
1876 if (parser == NULL
1877 && readline_internal (&lb, inf) > 0
1878 && lb.len >= 2
1879 && lb.buffer[0] == '#'
1880 && lb.buffer[1] == '!')
1881 {
1882 char *lp;
1883
1884 /* Set lp to point at the first char after the last slash in the
1885 line or, if no slashes, at the first nonblank. Then set cp to
1886 the first successive blank and terminate the string. */
1887 lp = etags_strrchr (lb.buffer+2, '/');
1888 if (lp != NULL)
1889 lp += 1;
1890 else
1891 lp = skip_spaces (lb.buffer + 2);
1892 cp = skip_non_spaces (lp);
1893 *cp = '\0';
1894
1895 if (strlen (lp) > 0)
1896 {
1897 lang = get_language_from_interpreter (lp);
1898 if (lang != NULL && lang->function != NULL)
1899 {
1900 curfdp->lang = lang;
1901 parser = lang->function;
1902 }
1903 }
1904 }
1905
1906 /* We rewind here, even if inf may be a pipe. We fail if the
1907 length of the first line is longer than the pipe block size,
1908 which is unlikely. */
1909 rewind (inf);
1910
1911 /* Else try to guess the language given the case insensitive file name. */
1912 if (parser == NULL)
1913 {
1914 lang = get_language_from_filename (curfdp->infname, FALSE);
1915 if (lang != NULL && lang->function != NULL)
1916 {
1917 curfdp->lang = lang;
1918 parser = lang->function;
1919 }
1920 }
1921
1922 /* Else try Fortran or C. */
1923 if (parser == NULL)
1924 {
1925 node *old_last_node = last_node;
1926
1927 curfdp->lang = get_language_from_langname ("fortran");
1928 find_entries (inf);
1929
1930 if (old_last_node == last_node)
1931 /* No Fortran entries found. Try C. */
1932 {
1933 /* We do not tag if rewind fails.
1934 Only the file name will be recorded in the tags file. */
1935 rewind (inf);
1936 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1937 find_entries (inf);
1938 }
1939 return;
1940 }
1941
1942 if (!no_line_directive
1943 && curfdp->lang != NULL && curfdp->lang->metasource)
1944 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1945 file, or anyway we parsed a file that is automatically generated from
1946 this one. If this is the case, the bingo.c file contained #line
1947 directives that generated tags pointing to this file. Let's delete
1948 them all before parsing this file, which is the real source. */
1949 {
1950 fdesc **fdpp = &fdhead;
1951 while (*fdpp != NULL)
1952 if (*fdpp != curfdp
1953 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1954 /* We found one of those! We must delete both the file description
1955 and all tags referring to it. */
1956 {
1957 fdesc *badfdp = *fdpp;
1958
1959 /* Delete the tags referring to badfdp->taggedfname
1960 that were obtained from badfdp->infname. */
1961 invalidate_nodes (badfdp, &nodehead);
1962
1963 *fdpp = badfdp->next; /* remove the bad description from the list */
1964 free_fdesc (badfdp);
1965 }
1966 else
1967 fdpp = &(*fdpp)->next; /* advance the list pointer */
1968 }
1969
1970 assert (parser != NULL);
1971
1972 /* Generic initialisations before reading from file. */
1973 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1974
1975 /* Generic initialisations before parsing file with readline. */
1976 lineno = 0; /* reset global line number */
1977 charno = 0; /* reset global char number */
1978 linecharno = 0; /* reset global char number of line start */
1979
1980 parser (inf);
1981
1982 #ifdef ETAGS_REGEXPS
1983 regex_tag_multiline ();
1984 #endif /* ETAGS_REGEXPS */
1985 }
1986
1987 \f
1988 /*
1989 * Check whether an implicitly named tag should be created,
1990 * then call `pfnote'.
1991 * NAME is a string that is internally copied by this function.
1992 *
1993 * TAGS format specification
1994 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1995 * The following is explained in some more detail in etc/ETAGS.EBNF.
1996 *
1997 * make_tag creates tags with "implicit tag names" (unnamed tags)
1998 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1999 * 1. NAME does not contain any of the characters in NONAM;
2000 * 2. LINESTART contains name as either a rightmost, or rightmost but
2001 * one character, substring;
2002 * 3. the character, if any, immediately before NAME in LINESTART must
2003 * be a character in NONAM;
2004 * 4. the character, if any, immediately after NAME in LINESTART must
2005 * also be a character in NONAM.
2006 *
2007 * The implementation uses the notinname() macro, which recognises the
2008 * characters stored in the string `nonam'.
2009 * etags.el needs to use the same characters that are in NONAM.
2010 */
2011 static void
2012 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
2013 char *name; /* tag name, or NULL if unnamed */
2014 int namelen; /* tag length */
2015 bool is_func; /* tag is a function */
2016 char *linestart; /* start of the line where tag is */
2017 int linelen; /* length of the line where tag is */
2018 int lno; /* line number */
2019 long cno; /* character number */
2020 {
2021 bool named = (name != NULL && namelen > 0);
2022
2023 if (!CTAGS && named) /* maybe set named to false */
2024 /* Let's try to make an implicit tag name, that is, create an unnamed tag
2025 such that etags.el can guess a name from it. */
2026 {
2027 int i;
2028 register char *cp = name;
2029
2030 for (i = 0; i < namelen; i++)
2031 if (notinname (*cp++))
2032 break;
2033 if (i == namelen) /* rule #1 */
2034 {
2035 cp = linestart + linelen - namelen;
2036 if (notinname (linestart[linelen-1]))
2037 cp -= 1; /* rule #4 */
2038 if (cp >= linestart /* rule #2 */
2039 && (cp == linestart
2040 || notinname (cp[-1])) /* rule #3 */
2041 && strneq (name, cp, namelen)) /* rule #2 */
2042 named = FALSE; /* use implicit tag name */
2043 }
2044 }
2045
2046 if (named)
2047 name = savenstr (name, namelen);
2048 else
2049 name = NULL;
2050 pfnote (name, is_func, linestart, linelen, lno, cno);
2051 }
2052
2053 /* Record a tag. */
2054 static void
2055 pfnote (name, is_func, linestart, linelen, lno, cno)
2056 char *name; /* tag name, or NULL if unnamed */
2057 bool is_func; /* tag is a function */
2058 char *linestart; /* start of the line where tag is */
2059 int linelen; /* length of the line where tag is */
2060 int lno; /* line number */
2061 long cno; /* character number */
2062 {
2063 register node *np;
2064
2065 assert (name == NULL || name[0] != '\0');
2066 if (CTAGS && name == NULL)
2067 return;
2068
2069 np = xnew (1, node);
2070
2071 /* If ctags mode, change name "main" to M<thisfilename>. */
2072 if (CTAGS && !cxref_style && streq (name, "main"))
2073 {
2074 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2075 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2076 fp = etags_strrchr (np->name, '.');
2077 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2078 fp[0] = '\0';
2079 }
2080 else
2081 np->name = name;
2082 np->valid = TRUE;
2083 np->been_warned = FALSE;
2084 np->fdp = curfdp;
2085 np->is_func = is_func;
2086 np->lno = lno;
2087 if (np->fdp->usecharno)
2088 /* Our char numbers are 0-base, because of C language tradition?
2089 ctags compatibility? old versions compatibility? I don't know.
2090 Anyway, since emacs's are 1-base we expect etags.el to take care
2091 of the difference. If we wanted to have 1-based numbers, we would
2092 uncomment the +1 below. */
2093 np->cno = cno /* + 1 */ ;
2094 else
2095 np->cno = invalidcharno;
2096 np->left = np->right = NULL;
2097 if (CTAGS && !cxref_style)
2098 {
2099 if (strlen (linestart) < 50)
2100 np->regex = concat (linestart, "$", "");
2101 else
2102 np->regex = savenstr (linestart, 50);
2103 }
2104 else
2105 np->regex = savenstr (linestart, linelen);
2106
2107 add_node (np, &nodehead);
2108 }
2109
2110 /*
2111 * free_tree ()
2112 * recurse on left children, iterate on right children.
2113 */
2114 static void
2115 free_tree (np)
2116 register node *np;
2117 {
2118 while (np)
2119 {
2120 register node *node_right = np->right;
2121 free_tree (np->left);
2122 if (np->name != NULL)
2123 free (np->name);
2124 free (np->regex);
2125 free (np);
2126 np = node_right;
2127 }
2128 }
2129
2130 /*
2131 * free_fdesc ()
2132 * delete a file description
2133 */
2134 static void
2135 free_fdesc (fdp)
2136 register fdesc *fdp;
2137 {
2138 if (fdp->infname != NULL) free (fdp->infname);
2139 if (fdp->infabsname != NULL) free (fdp->infabsname);
2140 if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2141 if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2142 if (fdp->prop != NULL) free (fdp->prop);
2143 free (fdp);
2144 }
2145
2146 /*
2147 * add_node ()
2148 * Adds a node to the tree of nodes. In etags mode, sort by file
2149 * name. In ctags mode, sort by tag name. Make no attempt at
2150 * balancing.
2151 *
2152 * add_node is the only function allowed to add nodes, so it can
2153 * maintain state.
2154 */
2155 static void
2156 add_node (np, cur_node_p)
2157 node *np, **cur_node_p;
2158 {
2159 register int dif;
2160 register node *cur_node = *cur_node_p;
2161
2162 if (cur_node == NULL)
2163 {
2164 *cur_node_p = np;
2165 last_node = np;
2166 return;
2167 }
2168
2169 if (!CTAGS)
2170 /* Etags Mode */
2171 {
2172 /* For each file name, tags are in a linked sublist on the right
2173 pointer. The first tags of different files are a linked list
2174 on the left pointer. last_node points to the end of the last
2175 used sublist. */
2176 if (last_node != NULL && last_node->fdp == np->fdp)
2177 {
2178 /* Let's use the same sublist as the last added node. */
2179 assert (last_node->right == NULL);
2180 last_node->right = np;
2181 last_node = np;
2182 }
2183 else if (cur_node->fdp == np->fdp)
2184 {
2185 /* Scanning the list we found the head of a sublist which is
2186 good for us. Let's scan this sublist. */
2187 add_node (np, &cur_node->right);
2188 }
2189 else
2190 /* The head of this sublist is not good for us. Let's try the
2191 next one. */
2192 add_node (np, &cur_node->left);
2193 } /* if ETAGS mode */
2194
2195 else
2196 {
2197 /* Ctags Mode */
2198 dif = strcmp (np->name, cur_node->name);
2199
2200 /*
2201 * If this tag name matches an existing one, then
2202 * do not add the node, but maybe print a warning.
2203 */
2204 if (!dif)
2205 {
2206 if (np->fdp == cur_node->fdp)
2207 {
2208 if (!no_warnings)
2209 {
2210 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2211 np->fdp->infname, lineno, np->name);
2212 fprintf (stderr, "Second entry ignored\n");
2213 }
2214 }
2215 else if (!cur_node->been_warned && !no_warnings)
2216 {
2217 fprintf
2218 (stderr,
2219 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2220 np->fdp->infname, cur_node->fdp->infname, np->name);
2221 cur_node->been_warned = TRUE;
2222 }
2223 return;
2224 }
2225
2226 /* Actually add the node */
2227 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2228 } /* if CTAGS mode */
2229 }
2230
2231 /*
2232 * invalidate_nodes ()
2233 * Scan the node tree and invalidate all nodes pointing to the
2234 * given file description (CTAGS case) or free them (ETAGS case).
2235 */
2236 static void
2237 invalidate_nodes (badfdp, npp)
2238 fdesc *badfdp;
2239 node **npp;
2240 {
2241 node *np = *npp;
2242
2243 if (np == NULL)
2244 return;
2245
2246 if (CTAGS)
2247 {
2248 if (np->left != NULL)
2249 invalidate_nodes (badfdp, &np->left);
2250 if (np->fdp == badfdp)
2251 np->valid = FALSE;
2252 if (np->right != NULL)
2253 invalidate_nodes (badfdp, &np->right);
2254 }
2255 else
2256 {
2257 assert (np->fdp != NULL);
2258 if (np->fdp == badfdp)
2259 {
2260 *npp = np->left; /* detach the sublist from the list */
2261 np->left = NULL; /* isolate it */
2262 free_tree (np); /* free it */
2263 invalidate_nodes (badfdp, npp);
2264 }
2265 else
2266 invalidate_nodes (badfdp, &np->left);
2267 }
2268 }
2269
2270 \f
2271 static int total_size_of_entries __P((node *));
2272 static int number_len __P((long));
2273
2274 /* Length of a non-negative number's decimal representation. */
2275 static int
2276 number_len (num)
2277 long num;
2278 {
2279 int len = 1;
2280 while ((num /= 10) > 0)
2281 len += 1;
2282 return len;
2283 }
2284
2285 /*
2286 * Return total number of characters that put_entries will output for
2287 * the nodes in the linked list at the right of the specified node.
2288 * This count is irrelevant with etags.el since emacs 19.34 at least,
2289 * but is still supplied for backward compatibility.
2290 */
2291 static int
2292 total_size_of_entries (np)
2293 register node *np;
2294 {
2295 register int total = 0;
2296
2297 for (; np != NULL; np = np->right)
2298 if (np->valid)
2299 {
2300 total += strlen (np->regex) + 1; /* pat\177 */
2301 if (np->name != NULL)
2302 total += strlen (np->name) + 1; /* name\001 */
2303 total += number_len ((long) np->lno) + 1; /* lno, */
2304 if (np->cno != invalidcharno) /* cno */
2305 total += number_len (np->cno);
2306 total += 1; /* newline */
2307 }
2308
2309 return total;
2310 }
2311
2312 static void
2313 put_entries (np)
2314 register node *np;
2315 {
2316 register char *sp;
2317 static fdesc *fdp = NULL;
2318
2319 if (np == NULL)
2320 return;
2321
2322 /* Output subentries that precede this one */
2323 if (CTAGS)
2324 put_entries (np->left);
2325
2326 /* Output this entry */
2327 if (np->valid)
2328 {
2329 if (!CTAGS)
2330 {
2331 /* Etags mode */
2332 if (fdp != np->fdp)
2333 {
2334 fdp = np->fdp;
2335 fprintf (tagf, "\f\n%s,%d\n",
2336 fdp->taggedfname, total_size_of_entries (np));
2337 fdp->written = TRUE;
2338 }
2339 fputs (np->regex, tagf);
2340 fputc ('\177', tagf);
2341 if (np->name != NULL)
2342 {
2343 fputs (np->name, tagf);
2344 fputc ('\001', tagf);
2345 }
2346 fprintf (tagf, "%d,", np->lno);
2347 if (np->cno != invalidcharno)
2348 fprintf (tagf, "%ld", np->cno);
2349 fputs ("\n", tagf);
2350 }
2351 else
2352 {
2353 /* Ctags mode */
2354 if (np->name == NULL)
2355 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2356
2357 if (cxref_style)
2358 {
2359 if (vgrind_style)
2360 fprintf (stdout, "%s %s %d\n",
2361 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2362 else
2363 fprintf (stdout, "%-16s %3d %-16s %s\n",
2364 np->name, np->lno, np->fdp->taggedfname, np->regex);
2365 }
2366 else
2367 {
2368 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2369
2370 if (np->is_func)
2371 { /* function or #define macro with args */
2372 putc (searchar, tagf);
2373 putc ('^', tagf);
2374
2375 for (sp = np->regex; *sp; sp++)
2376 {
2377 if (*sp == '\\' || *sp == searchar)
2378 putc ('\\', tagf);
2379 putc (*sp, tagf);
2380 }
2381 putc (searchar, tagf);
2382 }
2383 else
2384 { /* anything else; text pattern inadequate */
2385 fprintf (tagf, "%d", np->lno);
2386 }
2387 putc ('\n', tagf);
2388 }
2389 }
2390 } /* if this node contains a valid tag */
2391
2392 /* Output subentries that follow this one */
2393 put_entries (np->right);
2394 if (!CTAGS)
2395 put_entries (np->left);
2396 }
2397
2398 \f
2399 /* C extensions. */
2400 #define C_EXT 0x00fff /* C extensions */
2401 #define C_PLAIN 0x00000 /* C */
2402 #define C_PLPL 0x00001 /* C++ */
2403 #define C_STAR 0x00003 /* C* */
2404 #define C_JAVA 0x00005 /* JAVA */
2405 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2406 #define YACC 0x10000 /* yacc file */
2407
2408 /*
2409 * The C symbol tables.
2410 */
2411 enum sym_type
2412 {
2413 st_none,
2414 st_C_objprot, st_C_objimpl, st_C_objend,
2415 st_C_gnumacro,
2416 st_C_ignore, st_C_attribute,
2417 st_C_javastruct,
2418 st_C_operator,
2419 st_C_class, st_C_template,
2420 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2421 };
2422
2423 static unsigned int hash __P((const char *, unsigned int));
2424 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2425 static enum sym_type C_symtype __P((char *, int, int));
2426
2427 /* Feed stuff between (but not including) %[ and %] lines to:
2428 gperf -m 5
2429 %[
2430 %compare-strncmp
2431 %enum
2432 %struct-type
2433 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2434 %%
2435 if, 0, st_C_ignore
2436 for, 0, st_C_ignore
2437 while, 0, st_C_ignore
2438 switch, 0, st_C_ignore
2439 return, 0, st_C_ignore
2440 __attribute__, 0, st_C_attribute
2441 @interface, 0, st_C_objprot
2442 @protocol, 0, st_C_objprot
2443 @implementation,0, st_C_objimpl
2444 @end, 0, st_C_objend
2445 import, (C_JAVA & !C_PLPL), st_C_ignore
2446 package, (C_JAVA & !C_PLPL), st_C_ignore
2447 friend, C_PLPL, st_C_ignore
2448 extends, (C_JAVA & !C_PLPL), st_C_javastruct
2449 implements, (C_JAVA & !C_PLPL), st_C_javastruct
2450 interface, (C_JAVA & !C_PLPL), st_C_struct
2451 class, 0, st_C_class
2452 namespace, C_PLPL, st_C_struct
2453 domain, C_STAR, st_C_struct
2454 union, 0, st_C_struct
2455 struct, 0, st_C_struct
2456 extern, 0, st_C_extern
2457 enum, 0, st_C_enum
2458 typedef, 0, st_C_typedef
2459 define, 0, st_C_define
2460 undef, 0, st_C_define
2461 operator, C_PLPL, st_C_operator
2462 template, 0, st_C_template
2463 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2464 DEFUN, 0, st_C_gnumacro
2465 SYSCALL, 0, st_C_gnumacro
2466 ENTRY, 0, st_C_gnumacro
2467 PSEUDO, 0, st_C_gnumacro
2468 # These are defined inside C functions, so currently they are not met.
2469 # EXFUN used in glibc, DEFVAR_* in emacs.
2470 #EXFUN, 0, st_C_gnumacro
2471 #DEFVAR_, 0, st_C_gnumacro
2472 %]
2473 and replace lines between %< and %> with its output, then:
2474 - remove the #if characterset check
2475 - make in_word_set static and not inline. */
2476 /*%<*/
2477 /* C code produced by gperf version 3.0.1 */
2478 /* Command-line: gperf -m 5 */
2479 /* Computed positions: -k'2-3' */
2480
2481 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2482 /* maximum key range = 33, duplicates = 0 */
2483
2484 #ifdef __GNUC__
2485 __inline
2486 #else
2487 #ifdef __cplusplus
2488 inline
2489 #endif
2490 #endif
2491 static unsigned int
2492 hash (str, len)
2493 register const char *str;
2494 register unsigned int len;
2495 {
2496 static unsigned char asso_values[] =
2497 {
2498 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2499 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2500 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2501 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2502 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2503 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2504 35, 35, 35, 35, 35, 35, 35, 35, 35, 15,
2505 14, 35, 35, 35, 35, 35, 35, 35, 14, 35,
2506 35, 35, 35, 12, 13, 35, 35, 35, 35, 12,
2507 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2508 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2509 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2510 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2511 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2512 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2513 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2514 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2515 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2516 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2517 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2518 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2519 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2520 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2521 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2522 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2523 35, 35, 35, 35, 35, 35
2524 };
2525 register int hval = len;
2526
2527 switch (hval)
2528 {
2529 default:
2530 hval += asso_values[(unsigned char)str[2]];
2531 /*FALLTHROUGH*/
2532 case 2:
2533 hval += asso_values[(unsigned char)str[1]];
2534 break;
2535 }
2536 return hval;
2537 }
2538
2539 static struct C_stab_entry *
2540 in_word_set (str, len)
2541 register const char *str;
2542 register unsigned int len;
2543 {
2544 enum
2545 {
2546 TOTAL_KEYWORDS = 32,
2547 MIN_WORD_LENGTH = 2,
2548 MAX_WORD_LENGTH = 15,
2549 MIN_HASH_VALUE = 2,
2550 MAX_HASH_VALUE = 34
2551 };
2552
2553 static struct C_stab_entry wordlist[] =
2554 {
2555 {""}, {""},
2556 {"if", 0, st_C_ignore},
2557 {""},
2558 {"@end", 0, st_C_objend},
2559 {"union", 0, st_C_struct},
2560 {"define", 0, st_C_define},
2561 {"import", (C_JAVA & !C_PLPL), st_C_ignore},
2562 {"template", 0, st_C_template},
2563 {"operator", C_PLPL, st_C_operator},
2564 {"@interface", 0, st_C_objprot},
2565 {"implements", (C_JAVA & !C_PLPL), st_C_javastruct},
2566 {"friend", C_PLPL, st_C_ignore},
2567 {"typedef", 0, st_C_typedef},
2568 {"return", 0, st_C_ignore},
2569 {"@implementation",0, st_C_objimpl},
2570 {"@protocol", 0, st_C_objprot},
2571 {"interface", (C_JAVA & !C_PLPL), st_C_struct},
2572 {"extern", 0, st_C_extern},
2573 {"extends", (C_JAVA & !C_PLPL), st_C_javastruct},
2574 {"struct", 0, st_C_struct},
2575 {"domain", C_STAR, st_C_struct},
2576 {"switch", 0, st_C_ignore},
2577 {"enum", 0, st_C_enum},
2578 {"for", 0, st_C_ignore},
2579 {"namespace", C_PLPL, st_C_struct},
2580 {"class", 0, st_C_class},
2581 {"while", 0, st_C_ignore},
2582 {"undef", 0, st_C_define},
2583 {"package", (C_JAVA & !C_PLPL), st_C_ignore},
2584 {"__attribute__", 0, st_C_attribute},
2585 {"SYSCALL", 0, st_C_gnumacro},
2586 {"ENTRY", 0, st_C_gnumacro},
2587 {"PSEUDO", 0, st_C_gnumacro},
2588 {"DEFUN", 0, st_C_gnumacro}
2589 };
2590
2591 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2592 {
2593 register int key = hash (str, len);
2594
2595 if (key <= MAX_HASH_VALUE && key >= 0)
2596 {
2597 register const char *s = wordlist[key].name;
2598
2599 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2600 return &wordlist[key];
2601 }
2602 }
2603 return 0;
2604 }
2605 /*%>*/
2606
2607 static enum sym_type
2608 C_symtype (str, len, c_ext)
2609 char *str;
2610 int len;
2611 int c_ext;
2612 {
2613 register struct C_stab_entry *se = in_word_set (str, len);
2614
2615 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2616 return st_none;
2617 return se->type;
2618 }
2619
2620 \f
2621 /*
2622 * Ignoring __attribute__ ((list))
2623 */
2624 static bool inattribute; /* looking at an __attribute__ construct */
2625
2626 /*
2627 * C functions and variables are recognized using a simple
2628 * finite automaton. fvdef is its state variable.
2629 */
2630 static enum
2631 {
2632 fvnone, /* nothing seen */
2633 fdefunkey, /* Emacs DEFUN keyword seen */
2634 fdefunname, /* Emacs DEFUN name seen */
2635 foperator, /* func: operator keyword seen (cplpl) */
2636 fvnameseen, /* function or variable name seen */
2637 fstartlist, /* func: just after open parenthesis */
2638 finlist, /* func: in parameter list */
2639 flistseen, /* func: after parameter list */
2640 fignore, /* func: before open brace */
2641 vignore /* var-like: ignore until ';' */
2642 } fvdef;
2643
2644 static bool fvextern; /* func or var: extern keyword seen; */
2645
2646 /*
2647 * typedefs are recognized using a simple finite automaton.
2648 * typdef is its state variable.
2649 */
2650 static enum
2651 {
2652 tnone, /* nothing seen */
2653 tkeyseen, /* typedef keyword seen */
2654 ttypeseen, /* defined type seen */
2655 tinbody, /* inside typedef body */
2656 tend, /* just before typedef tag */
2657 tignore /* junk after typedef tag */
2658 } typdef;
2659
2660 /*
2661 * struct-like structures (enum, struct and union) are recognized
2662 * using another simple finite automaton. `structdef' is its state
2663 * variable.
2664 */
2665 static enum
2666 {
2667 snone, /* nothing seen yet,
2668 or in struct body if bracelev > 0 */
2669 skeyseen, /* struct-like keyword seen */
2670 stagseen, /* struct-like tag seen */
2671 scolonseen /* colon seen after struct-like tag */
2672 } structdef;
2673
2674 /*
2675 * When objdef is different from onone, objtag is the name of the class.
2676 */
2677 static char *objtag = "<uninited>";
2678
2679 /*
2680 * Yet another little state machine to deal with preprocessor lines.
2681 */
2682 static enum
2683 {
2684 dnone, /* nothing seen */
2685 dsharpseen, /* '#' seen as first char on line */
2686 ddefineseen, /* '#' and 'define' seen */
2687 dignorerest /* ignore rest of line */
2688 } definedef;
2689
2690 /*
2691 * State machine for Objective C protocols and implementations.
2692 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2693 */
2694 static enum
2695 {
2696 onone, /* nothing seen */
2697 oprotocol, /* @interface or @protocol seen */
2698 oimplementation, /* @implementations seen */
2699 otagseen, /* class name seen */
2700 oparenseen, /* parenthesis before category seen */
2701 ocatseen, /* category name seen */
2702 oinbody, /* in @implementation body */
2703 omethodsign, /* in @implementation body, after +/- */
2704 omethodtag, /* after method name */
2705 omethodcolon, /* after method colon */
2706 omethodparm, /* after method parameter */
2707 oignore /* wait for @end */
2708 } objdef;
2709
2710
2711 /*
2712 * Use this structure to keep info about the token read, and how it
2713 * should be tagged. Used by the make_C_tag function to build a tag.
2714 */
2715 static struct tok
2716 {
2717 char *line; /* string containing the token */
2718 int offset; /* where the token starts in LINE */
2719 int length; /* token length */
2720 /*
2721 The previous members can be used to pass strings around for generic
2722 purposes. The following ones specifically refer to creating tags. In this
2723 case the token contained here is the pattern that will be used to create a
2724 tag.
2725 */
2726 bool valid; /* do not create a tag; the token should be
2727 invalidated whenever a state machine is
2728 reset prematurely */
2729 bool named; /* create a named tag */
2730 int lineno; /* source line number of tag */
2731 long linepos; /* source char number of tag */
2732 } token; /* latest token read */
2733
2734 /*
2735 * Variables and functions for dealing with nested structures.
2736 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2737 */
2738 static void pushclass_above __P((int, char *, int));
2739 static void popclass_above __P((int));
2740 static void write_classname __P((linebuffer *, char *qualifier));
2741
2742 static struct {
2743 char **cname; /* nested class names */
2744 int *bracelev; /* nested class brace level */
2745 int nl; /* class nesting level (elements used) */
2746 int size; /* length of the array */
2747 } cstack; /* stack for nested declaration tags */
2748 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2749 #define nestlev (cstack.nl)
2750 /* After struct keyword or in struct body, not inside a nested function. */
2751 #define instruct (structdef == snone && nestlev > 0 \
2752 && bracelev == cstack.bracelev[nestlev-1] + 1)
2753
2754 static void
2755 pushclass_above (bracelev, str, len)
2756 int bracelev;
2757 char *str;
2758 int len;
2759 {
2760 int nl;
2761
2762 popclass_above (bracelev);
2763 nl = cstack.nl;
2764 if (nl >= cstack.size)
2765 {
2766 int size = cstack.size *= 2;
2767 xrnew (cstack.cname, size, char *);
2768 xrnew (cstack.bracelev, size, int);
2769 }
2770 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2771 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2772 cstack.bracelev[nl] = bracelev;
2773 cstack.nl = nl + 1;
2774 }
2775
2776 static void
2777 popclass_above (bracelev)
2778 int bracelev;
2779 {
2780 int nl;
2781
2782 for (nl = cstack.nl - 1;
2783 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2784 nl--)
2785 {
2786 if (cstack.cname[nl] != NULL)
2787 free (cstack.cname[nl]);
2788 cstack.nl = nl;
2789 }
2790 }
2791
2792 static void
2793 write_classname (cn, qualifier)
2794 linebuffer *cn;
2795 char *qualifier;
2796 {
2797 int i, len;
2798 int qlen = strlen (qualifier);
2799
2800 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2801 {
2802 len = 0;
2803 cn->len = 0;
2804 cn->buffer[0] = '\0';
2805 }
2806 else
2807 {
2808 len = strlen (cstack.cname[0]);
2809 linebuffer_setlen (cn, len);
2810 strcpy (cn->buffer, cstack.cname[0]);
2811 }
2812 for (i = 1; i < cstack.nl; i++)
2813 {
2814 char *s;
2815 int slen;
2816
2817 s = cstack.cname[i];
2818 if (s == NULL)
2819 continue;
2820 slen = strlen (s);
2821 len += slen + qlen;
2822 linebuffer_setlen (cn, len);
2823 strncat (cn->buffer, qualifier, qlen);
2824 strncat (cn->buffer, s, slen);
2825 }
2826 }
2827
2828 \f
2829 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2830 static void make_C_tag __P((bool));
2831
2832 /*
2833 * consider_token ()
2834 * checks to see if the current token is at the start of a
2835 * function or variable, or corresponds to a typedef, or
2836 * is a struct/union/enum tag, or #define, or an enum constant.
2837 *
2838 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2839 * with args. C_EXTP points to which language we are looking at.
2840 *
2841 * Globals
2842 * fvdef IN OUT
2843 * structdef IN OUT
2844 * definedef IN OUT
2845 * typdef IN OUT
2846 * objdef IN OUT
2847 */
2848
2849 static bool
2850 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2851 register char *str; /* IN: token pointer */
2852 register int len; /* IN: token length */
2853 register int c; /* IN: first char after the token */
2854 int *c_extp; /* IN, OUT: C extensions mask */
2855 int bracelev; /* IN: brace level */
2856 int parlev; /* IN: parenthesis level */
2857 bool *is_func_or_var; /* OUT: function or variable found */
2858 {
2859 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2860 structtype is the type of the preceding struct-like keyword, and
2861 structbracelev is the brace level where it has been seen. */
2862 static enum sym_type structtype;
2863 static int structbracelev;
2864 static enum sym_type toktype;
2865
2866
2867 toktype = C_symtype (str, len, *c_extp);
2868
2869 /*
2870 * Skip __attribute__
2871 */
2872 if (toktype == st_C_attribute)
2873 {
2874 inattribute = TRUE;
2875 return FALSE;
2876 }
2877
2878 /*
2879 * Advance the definedef state machine.
2880 */
2881 switch (definedef)
2882 {
2883 case dnone:
2884 /* We're not on a preprocessor line. */
2885 if (toktype == st_C_gnumacro)
2886 {
2887 fvdef = fdefunkey;
2888 return FALSE;
2889 }
2890 break;
2891 case dsharpseen:
2892 if (toktype == st_C_define)
2893 {
2894 definedef = ddefineseen;
2895 }
2896 else
2897 {
2898 definedef = dignorerest;
2899 }
2900 return FALSE;
2901 case ddefineseen:
2902 /*
2903 * Make a tag for any macro, unless it is a constant
2904 * and constantypedefs is FALSE.
2905 */
2906 definedef = dignorerest;
2907 *is_func_or_var = (c == '(');
2908 if (!*is_func_or_var && !constantypedefs)
2909 return FALSE;
2910 else
2911 return TRUE;
2912 case dignorerest:
2913 return FALSE;
2914 default:
2915 error ("internal error: definedef value.", (char *)NULL);
2916 }
2917
2918 /*
2919 * Now typedefs
2920 */
2921 switch (typdef)
2922 {
2923 case tnone:
2924 if (toktype == st_C_typedef)
2925 {
2926 if (typedefs)
2927 typdef = tkeyseen;
2928 fvextern = FALSE;
2929 fvdef = fvnone;
2930 return FALSE;
2931 }
2932 break;
2933 case tkeyseen:
2934 switch (toktype)
2935 {
2936 case st_none:
2937 case st_C_class:
2938 case st_C_struct:
2939 case st_C_enum:
2940 typdef = ttypeseen;
2941 }
2942 break;
2943 case ttypeseen:
2944 if (structdef == snone && fvdef == fvnone)
2945 {
2946 fvdef = fvnameseen;
2947 return TRUE;
2948 }
2949 break;
2950 case tend:
2951 switch (toktype)
2952 {
2953 case st_C_class:
2954 case st_C_struct:
2955 case st_C_enum:
2956 return FALSE;
2957 }
2958 return TRUE;
2959 }
2960
2961 /*
2962 * This structdef business is NOT invoked when we are ctags and the
2963 * file is plain C. This is because a struct tag may have the same
2964 * name as another tag, and this loses with ctags.
2965 */
2966 switch (toktype)
2967 {
2968 case st_C_javastruct:
2969 if (structdef == stagseen)
2970 structdef = scolonseen;
2971 return FALSE;
2972 case st_C_template:
2973 case st_C_class:
2974 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2975 && bracelev == 0
2976 && definedef == dnone && structdef == snone
2977 && typdef == tnone && fvdef == fvnone)
2978 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2979 if (toktype == st_C_template)
2980 break;
2981 /* FALLTHRU */
2982 case st_C_struct:
2983 case st_C_enum:
2984 if (parlev == 0
2985 && fvdef != vignore
2986 && (typdef == tkeyseen
2987 || (typedefs_or_cplusplus && structdef == snone)))
2988 {
2989 structdef = skeyseen;
2990 structtype = toktype;
2991 structbracelev = bracelev;
2992 if (fvdef == fvnameseen)
2993 fvdef = fvnone;
2994 }
2995 return FALSE;
2996 }
2997
2998 if (structdef == skeyseen)
2999 {
3000 structdef = stagseen;
3001 return TRUE;
3002 }
3003
3004 if (typdef != tnone)
3005 definedef = dnone;
3006
3007 /* Detect Objective C constructs. */
3008 switch (objdef)
3009 {
3010 case onone:
3011 switch (toktype)
3012 {
3013 case st_C_objprot:
3014 objdef = oprotocol;
3015 return FALSE;
3016 case st_C_objimpl:
3017 objdef = oimplementation;
3018 return FALSE;
3019 }
3020 break;
3021 case oimplementation:
3022 /* Save the class tag for functions or variables defined inside. */
3023 objtag = savenstr (str, len);
3024 objdef = oinbody;
3025 return FALSE;
3026 case oprotocol:
3027 /* Save the class tag for categories. */
3028 objtag = savenstr (str, len);
3029 objdef = otagseen;
3030 *is_func_or_var = TRUE;
3031 return TRUE;
3032 case oparenseen:
3033 objdef = ocatseen;
3034 *is_func_or_var = TRUE;
3035 return TRUE;
3036 case oinbody:
3037 break;
3038 case omethodsign:
3039 if (parlev == 0)
3040 {
3041 fvdef = fvnone;
3042 objdef = omethodtag;
3043 linebuffer_setlen (&token_name, len);
3044 strncpy (token_name.buffer, str, len);
3045 token_name.buffer[len] = '\0';
3046 return TRUE;
3047 }
3048 return FALSE;
3049 case omethodcolon:
3050 if (parlev == 0)
3051 objdef = omethodparm;
3052 return FALSE;
3053 case omethodparm:
3054 if (parlev == 0)
3055 {
3056 fvdef = fvnone;
3057 objdef = omethodtag;
3058 linebuffer_setlen (&token_name, token_name.len + len);
3059 strncat (token_name.buffer, str, len);
3060 return TRUE;
3061 }
3062 return FALSE;
3063 case oignore:
3064 if (toktype == st_C_objend)
3065 {
3066 /* Memory leakage here: the string pointed by objtag is
3067 never released, because many tests would be needed to
3068 avoid breaking on incorrect input code. The amount of
3069 memory leaked here is the sum of the lengths of the
3070 class tags.
3071 free (objtag); */
3072 objdef = onone;
3073 }
3074 return FALSE;
3075 }
3076
3077 /* A function, variable or enum constant? */
3078 switch (toktype)
3079 {
3080 case st_C_extern:
3081 fvextern = TRUE;
3082 switch (fvdef)
3083 {
3084 case finlist:
3085 case flistseen:
3086 case fignore:
3087 case vignore:
3088 break;
3089 default:
3090 fvdef = fvnone;
3091 }
3092 return FALSE;
3093 case st_C_ignore:
3094 fvextern = FALSE;
3095 fvdef = vignore;
3096 return FALSE;
3097 case st_C_operator:
3098 fvdef = foperator;
3099 *is_func_or_var = TRUE;
3100 return TRUE;
3101 case st_none:
3102 if (constantypedefs
3103 && structdef == snone
3104 && structtype == st_C_enum && bracelev > structbracelev)
3105 return TRUE; /* enum constant */
3106 switch (fvdef)
3107 {
3108 case fdefunkey:
3109 if (bracelev > 0)
3110 break;
3111 fvdef = fdefunname; /* GNU macro */
3112 *is_func_or_var = TRUE;
3113 return TRUE;
3114 case fvnone:
3115 switch (typdef)
3116 {
3117 case ttypeseen:
3118 return FALSE;
3119 case tnone:
3120 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3121 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3122 {
3123 fvdef = vignore;
3124 return FALSE;
3125 }
3126 break;
3127 }
3128 /* FALLTHRU */
3129 case fvnameseen:
3130 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3131 {
3132 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3133 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3134 fvdef = foperator;
3135 *is_func_or_var = TRUE;
3136 return TRUE;
3137 }
3138 if (bracelev > 0 && !instruct)
3139 break;
3140 fvdef = fvnameseen; /* function or variable */
3141 *is_func_or_var = TRUE;
3142 return TRUE;
3143 }
3144 break;
3145 }
3146
3147 return FALSE;
3148 }
3149
3150 \f
3151 /*
3152 * C_entries often keeps pointers to tokens or lines which are older than
3153 * the line currently read. By keeping two line buffers, and switching
3154 * them at end of line, it is possible to use those pointers.
3155 */
3156 static struct
3157 {
3158 long linepos;
3159 linebuffer lb;
3160 } lbs[2];
3161
3162 #define current_lb_is_new (newndx == curndx)
3163 #define switch_line_buffers() (curndx = 1 - curndx)
3164
3165 #define curlb (lbs[curndx].lb)
3166 #define newlb (lbs[newndx].lb)
3167 #define curlinepos (lbs[curndx].linepos)
3168 #define newlinepos (lbs[newndx].linepos)
3169
3170 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3171 #define cplpl (c_ext & C_PLPL)
3172 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3173
3174 #define CNL_SAVE_DEFINEDEF() \
3175 do { \
3176 curlinepos = charno; \
3177 readline (&curlb, inf); \
3178 lp = curlb.buffer; \
3179 quotednl = FALSE; \
3180 newndx = curndx; \
3181 } while (0)
3182
3183 #define CNL() \
3184 do { \
3185 CNL_SAVE_DEFINEDEF(); \
3186 if (savetoken.valid) \
3187 { \
3188 token = savetoken; \
3189 savetoken.valid = FALSE; \
3190 } \
3191 definedef = dnone; \
3192 } while (0)
3193
3194
3195 static void
3196 make_C_tag (isfun)
3197 bool isfun;
3198 {
3199 /* This function should never be called when token.valid is FALSE, but
3200 we must protect against invalid input or internal errors. */
3201 if (!DEBUG && !token.valid)
3202 return;
3203
3204 if (token.valid)
3205 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3206 token.offset+token.length+1, token.lineno, token.linepos);
3207 else /* this case is optimised away if !DEBUG */
3208 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3209 token_name.len + 17, isfun, token.line,
3210 token.offset+token.length+1, token.lineno, token.linepos);
3211
3212 token.valid = FALSE;
3213 }
3214
3215
3216 /*
3217 * C_entries ()
3218 * This routine finds functions, variables, typedefs,
3219 * #define's, enum constants and struct/union/enum definitions in
3220 * C syntax and adds them to the list.
3221 */
3222 static void
3223 C_entries (c_ext, inf)
3224 int c_ext; /* extension of C */
3225 FILE *inf; /* input file */
3226 {
3227 register char c; /* latest char read; '\0' for end of line */
3228 register char *lp; /* pointer one beyond the character `c' */
3229 int curndx, newndx; /* indices for current and new lb */
3230 register int tokoff; /* offset in line of start of current token */
3231 register int toklen; /* length of current token */
3232 char *qualifier; /* string used to qualify names */
3233 int qlen; /* length of qualifier */
3234 int bracelev; /* current brace level */
3235 int bracketlev; /* current bracket level */
3236 int parlev; /* current parenthesis level */
3237 int attrparlev; /* __attribute__ parenthesis level */
3238 int templatelev; /* current template level */
3239 int typdefbracelev; /* bracelev where a typedef struct body begun */
3240 bool incomm, inquote, inchar, quotednl, midtoken;
3241 bool yacc_rules; /* in the rules part of a yacc file */
3242 struct tok savetoken; /* token saved during preprocessor handling */
3243
3244
3245 linebuffer_init (&lbs[0].lb);
3246 linebuffer_init (&lbs[1].lb);
3247 if (cstack.size == 0)
3248 {
3249 cstack.size = (DEBUG) ? 1 : 4;
3250 cstack.nl = 0;
3251 cstack.cname = xnew (cstack.size, char *);
3252 cstack.bracelev = xnew (cstack.size, int);
3253 }
3254
3255 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3256 curndx = newndx = 0;
3257 lp = curlb.buffer;
3258 *lp = 0;
3259
3260 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3261 structdef = snone; definedef = dnone; objdef = onone;
3262 yacc_rules = FALSE;
3263 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3264 token.valid = savetoken.valid = FALSE;
3265 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3266 if (cjava)
3267 { qualifier = "."; qlen = 1; }
3268 else
3269 { qualifier = "::"; qlen = 2; }
3270
3271
3272 while (!feof (inf))
3273 {
3274 c = *lp++;
3275 if (c == '\\')
3276 {
3277 /* If we are at the end of the line, the next character is a
3278 '\0'; do not skip it, because it is what tells us
3279 to read the next line. */
3280 if (*lp == '\0')
3281 {
3282 quotednl = TRUE;
3283 continue;
3284 }
3285 lp++;
3286 c = ' ';
3287 }
3288 else if (incomm)
3289 {
3290 switch (c)
3291 {
3292 case '*':
3293 if (*lp == '/')
3294 {
3295 c = *lp++;
3296 incomm = FALSE;
3297 }
3298 break;
3299 case '\0':
3300 /* Newlines inside comments do not end macro definitions in
3301 traditional cpp. */
3302 CNL_SAVE_DEFINEDEF ();
3303 break;
3304 }
3305 continue;
3306 }
3307 else if (inquote)
3308 {
3309 switch (c)
3310 {
3311 case '"':
3312 inquote = FALSE;
3313 break;
3314 case '\0':
3315 /* Newlines inside strings do not end macro definitions
3316 in traditional cpp, even though compilers don't
3317 usually accept them. */
3318 CNL_SAVE_DEFINEDEF ();
3319 break;
3320 }
3321 continue;
3322 }
3323 else if (inchar)
3324 {
3325 switch (c)
3326 {
3327 case '\0':
3328 /* Hmmm, something went wrong. */
3329 CNL ();
3330 /* FALLTHRU */
3331 case '\'':
3332 inchar = FALSE;
3333 break;
3334 }
3335 continue;
3336 }
3337 else if (bracketlev > 0)
3338 {
3339 switch (c)
3340 {
3341 case ']':
3342 if (--bracketlev > 0)
3343 continue;
3344 break;
3345 case '\0':
3346 CNL_SAVE_DEFINEDEF ();
3347 break;
3348 }
3349 continue;
3350 }
3351 else switch (c)
3352 {
3353 case '"':
3354 inquote = TRUE;
3355 if (inattribute)
3356 break;
3357 switch (fvdef)
3358 {
3359 case fdefunkey:
3360 case fstartlist:
3361 case finlist:
3362 case fignore:
3363 case vignore:
3364 break;
3365 default:
3366 fvextern = FALSE;
3367 fvdef = fvnone;
3368 }
3369 continue;
3370 case '\'':
3371 inchar = TRUE;
3372 if (inattribute)
3373 break;
3374 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3375 {
3376 fvextern = FALSE;
3377 fvdef = fvnone;
3378 }
3379 continue;
3380 case '/':
3381 if (*lp == '*')
3382 {
3383 lp++;
3384 incomm = TRUE;
3385 continue;
3386 }
3387 else if (/* cplpl && */ *lp == '/')
3388 {
3389 c = '\0';
3390 break;
3391 }
3392 else
3393 break;
3394 case '%':
3395 if ((c_ext & YACC) && *lp == '%')
3396 {
3397 /* Entering or exiting rules section in yacc file. */
3398 lp++;
3399 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3400 typdef = tnone; structdef = snone;
3401 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3402 bracelev = 0;
3403 yacc_rules = !yacc_rules;
3404 continue;
3405 }
3406 else
3407 break;
3408 case '#':
3409 if (definedef == dnone)
3410 {
3411 char *cp;
3412 bool cpptoken = TRUE;
3413
3414 /* Look back on this line. If all blanks, or nonblanks
3415 followed by an end of comment, this is a preprocessor
3416 token. */
3417 for (cp = newlb.buffer; cp < lp-1; cp++)
3418 if (!iswhite (*cp))
3419 {
3420 if (*cp == '*' && *(cp+1) == '/')
3421 {
3422 cp++;
3423 cpptoken = TRUE;
3424 }
3425 else
3426 cpptoken = FALSE;
3427 }
3428 if (cpptoken)
3429 definedef = dsharpseen;
3430 } /* if (definedef == dnone) */
3431 continue;
3432 case '[':
3433 bracketlev++;
3434 continue;
3435 } /* switch (c) */
3436
3437
3438 /* Consider token only if some involved conditions are satisfied. */
3439 if (typdef != tignore
3440 && definedef != dignorerest
3441 && fvdef != finlist
3442 && templatelev == 0
3443 && (definedef != dnone
3444 || structdef != scolonseen)
3445 && !inattribute)
3446 {
3447 if (midtoken)
3448 {
3449 if (endtoken (c))
3450 {
3451 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3452 /* This handles :: in the middle,
3453 but not at the beginning of an identifier.
3454 Also, space-separated :: is not recognised. */
3455 {
3456 if (c_ext & C_AUTO) /* automatic detection of C++ */
3457 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3458 lp += 2;
3459 toklen += 2;
3460 c = lp[-1];
3461 goto still_in_token;
3462 }
3463 else
3464 {
3465 bool funorvar = FALSE;
3466
3467 if (yacc_rules
3468 || consider_token (newlb.buffer + tokoff, toklen, c,
3469 &c_ext, bracelev, parlev,
3470 &funorvar))
3471 {
3472 if (fvdef == foperator)
3473 {
3474 char *oldlp = lp;
3475 lp = skip_spaces (lp-1);
3476 if (*lp != '\0')
3477 lp += 1;
3478 while (*lp != '\0'
3479 && !iswhite (*lp) && *lp != '(')
3480 lp += 1;
3481 c = *lp++;
3482 toklen += lp - oldlp;
3483 }
3484 token.named = FALSE;
3485 if (!plainc
3486 && nestlev > 0 && definedef == dnone)
3487 /* in struct body */
3488 {
3489 write_classname (&token_name, qualifier);
3490 linebuffer_setlen (&token_name,
3491 token_name.len+qlen+toklen);
3492 strcat (token_name.buffer, qualifier);
3493 strncat (token_name.buffer,
3494 newlb.buffer + tokoff, toklen);
3495 token.named = TRUE;
3496 }
3497 else if (objdef == ocatseen)
3498 /* Objective C category */
3499 {
3500 int len = strlen (objtag) + 2 + toklen;
3501 linebuffer_setlen (&token_name, len);
3502 strcpy (token_name.buffer, objtag);
3503 strcat (token_name.buffer, "(");
3504 strncat (token_name.buffer,
3505 newlb.buffer + tokoff, toklen);
3506 strcat (token_name.buffer, ")");
3507 token.named = TRUE;
3508 }
3509 else if (objdef == omethodtag
3510 || objdef == omethodparm)
3511 /* Objective C method */
3512 {
3513 token.named = TRUE;
3514 }
3515 else if (fvdef == fdefunname)
3516 /* GNU DEFUN and similar macros */
3517 {
3518 bool defun = (newlb.buffer[tokoff] == 'F');
3519 int off = tokoff;
3520 int len = toklen;
3521
3522 /* Rewrite the tag so that emacs lisp DEFUNs
3523 can be found by their elisp name */
3524 if (defun)
3525 {
3526 off += 1;
3527 len -= 1;
3528 }
3529 len = toklen;
3530 linebuffer_setlen (&token_name, len);
3531 strncpy (token_name.buffer,
3532 newlb.buffer + off, len);
3533 token_name.buffer[len] = '\0';
3534 if (defun)
3535 while (--len >= 0)
3536 if (token_name.buffer[len] == '_')
3537 token_name.buffer[len] = '-';
3538 token.named = defun;
3539 }
3540 else
3541 {
3542 linebuffer_setlen (&token_name, toklen);
3543 strncpy (token_name.buffer,
3544 newlb.buffer + tokoff, toklen);
3545 token_name.buffer[toklen] = '\0';
3546 /* Name macros and members. */
3547 token.named = (structdef == stagseen
3548 || typdef == ttypeseen
3549 || typdef == tend
3550 || (funorvar
3551 && definedef == dignorerest)
3552 || (funorvar
3553 && definedef == dnone
3554 && structdef == snone
3555 && bracelev > 0));
3556 }
3557 token.lineno = lineno;
3558 token.offset = tokoff;
3559 token.length = toklen;
3560 token.line = newlb.buffer;
3561 token.linepos = newlinepos;
3562 token.valid = TRUE;
3563
3564 if (definedef == dnone
3565 && (fvdef == fvnameseen
3566 || fvdef == foperator
3567 || structdef == stagseen
3568 || typdef == tend
3569 || typdef == ttypeseen
3570 || objdef != onone))
3571 {
3572 if (current_lb_is_new)
3573 switch_line_buffers ();
3574 }
3575 else if (definedef != dnone
3576 || fvdef == fdefunname
3577 || instruct)
3578 make_C_tag (funorvar);
3579 }
3580 else /* not yacc and consider_token failed */
3581 {
3582 if (inattribute && fvdef == fignore)
3583 {
3584 /* We have just met __attribute__ after a
3585 function parameter list: do not tag the
3586 function again. */
3587 fvdef = fvnone;
3588 }
3589 }
3590 midtoken = FALSE;
3591 }
3592 } /* if (endtoken (c)) */
3593 else if (intoken (c))
3594 still_in_token:
3595 {
3596 toklen++;
3597 continue;
3598 }
3599 } /* if (midtoken) */
3600 else if (begtoken (c))
3601 {
3602 switch (definedef)
3603 {
3604 case dnone:
3605 switch (fvdef)
3606 {
3607 case fstartlist:
3608 /* This prevents tagging fb in
3609 void (__attribute__((noreturn)) *fb) (void);
3610 Fixing this is not easy and not very important. */
3611 fvdef = finlist;
3612 continue;
3613 case flistseen:
3614 if (plainc || declarations)
3615 {
3616 make_C_tag (TRUE); /* a function */
3617 fvdef = fignore;
3618 }
3619 break;
3620 }
3621 if (structdef == stagseen && !cjava)
3622 {
3623 popclass_above (bracelev);
3624 structdef = snone;
3625 }
3626 break;
3627 case dsharpseen:
3628 savetoken = token;
3629 break;
3630 }
3631 if (!yacc_rules || lp == newlb.buffer + 1)
3632 {
3633 tokoff = lp - 1 - newlb.buffer;
3634 toklen = 1;
3635 midtoken = TRUE;
3636 }
3637 continue;
3638 } /* if (begtoken) */
3639 } /* if must look at token */
3640
3641
3642 /* Detect end of line, colon, comma, semicolon and various braces
3643 after having handled a token.*/
3644 switch (c)
3645 {
3646 case ':':
3647 if (inattribute)
3648 break;
3649 if (yacc_rules && token.offset == 0 && token.valid)
3650 {
3651 make_C_tag (FALSE); /* a yacc function */
3652 break;
3653 }
3654 if (definedef != dnone)
3655 break;
3656 switch (objdef)
3657 {
3658 case otagseen:
3659 objdef = oignore;
3660 make_C_tag (TRUE); /* an Objective C class */
3661 break;
3662 case omethodtag:
3663 case omethodparm:
3664 objdef = omethodcolon;
3665 linebuffer_setlen (&token_name, token_name.len + 1);
3666 strcat (token_name.buffer, ":");
3667 break;
3668 }
3669 if (structdef == stagseen)
3670 {
3671 structdef = scolonseen;
3672 break;
3673 }
3674 /* Should be useless, but may be work as a safety net. */
3675 if (cplpl && fvdef == flistseen)
3676 {
3677 make_C_tag (TRUE); /* a function */
3678 fvdef = fignore;
3679 break;
3680 }
3681 break;
3682 case ';':
3683 if (definedef != dnone || inattribute)
3684 break;
3685 switch (typdef)
3686 {
3687 case tend:
3688 case ttypeseen:
3689 make_C_tag (FALSE); /* a typedef */
3690 typdef = tnone;
3691 fvdef = fvnone;
3692 break;
3693 case tnone:
3694 case tinbody:
3695 case tignore:
3696 switch (fvdef)
3697 {
3698 case fignore:
3699 if (typdef == tignore || cplpl)
3700 fvdef = fvnone;
3701 break;
3702 case fvnameseen:
3703 if ((globals && bracelev == 0 && (!fvextern || declarations))
3704 || (members && instruct))
3705 make_C_tag (FALSE); /* a variable */
3706 fvextern = FALSE;
3707 fvdef = fvnone;
3708 token.valid = FALSE;
3709 break;
3710 case flistseen:
3711 if ((declarations
3712 && (cplpl || !instruct)
3713 && (typdef == tnone || (typdef != tignore && instruct)))
3714 || (members
3715 && plainc && instruct))
3716 make_C_tag (TRUE); /* a function */
3717 /* FALLTHRU */
3718 default:
3719 fvextern = FALSE;
3720 fvdef = fvnone;
3721 if (declarations
3722 && cplpl && structdef == stagseen)
3723 make_C_tag (FALSE); /* forward declaration */
3724 else
3725 token.valid = FALSE;
3726 } /* switch (fvdef) */
3727 /* FALLTHRU */
3728 default:
3729 if (!instruct)
3730 typdef = tnone;
3731 }
3732 if (structdef == stagseen)
3733 structdef = snone;
3734 break;
3735 case ',':
3736 if (definedef != dnone || inattribute)
3737 break;
3738 switch (objdef)
3739 {
3740 case omethodtag:
3741 case omethodparm:
3742 make_C_tag (TRUE); /* an Objective C method */
3743 objdef = oinbody;
3744 break;
3745 }
3746 switch (fvdef)
3747 {
3748 case fdefunkey:
3749 case foperator:
3750 case fstartlist:
3751 case finlist:
3752 case fignore:
3753 case vignore:
3754 break;
3755 case fdefunname:
3756 fvdef = fignore;
3757 break;
3758 case fvnameseen:
3759 if (parlev == 0
3760 && ((globals
3761 && bracelev == 0
3762 && templatelev == 0
3763 && (!fvextern || declarations))
3764 || (members && instruct)))
3765 make_C_tag (FALSE); /* a variable */
3766 break;
3767 case flistseen:
3768 if ((declarations && typdef == tnone && !instruct)
3769 || (members && typdef != tignore && instruct))
3770 {
3771 make_C_tag (TRUE); /* a function */
3772 fvdef = fvnameseen;
3773 }
3774 else if (!declarations)
3775 fvdef = fvnone;
3776 token.valid = FALSE;
3777 break;
3778 default:
3779 fvdef = fvnone;
3780 }
3781 if (structdef == stagseen)
3782 structdef = snone;
3783 break;
3784 case ']':
3785 if (definedef != dnone || inattribute)
3786 break;
3787 if (structdef == stagseen)
3788 structdef = snone;
3789 switch (typdef)
3790 {
3791 case ttypeseen:
3792 case tend:
3793 typdef = tignore;
3794 make_C_tag (FALSE); /* a typedef */
3795 break;
3796 case tnone:
3797 case tinbody:
3798 switch (fvdef)
3799 {
3800 case foperator:
3801 case finlist:
3802 case fignore:
3803 case vignore:
3804 break;
3805 case fvnameseen:
3806 if ((members && bracelev == 1)
3807 || (globals && bracelev == 0
3808 && (!fvextern || declarations)))
3809 make_C_tag (FALSE); /* a variable */
3810 /* FALLTHRU */
3811 default:
3812 fvdef = fvnone;
3813 }
3814 break;
3815 }
3816 break;
3817 case '(':
3818 if (inattribute)
3819 {
3820 attrparlev++;
3821 break;
3822 }
3823 if (definedef != dnone)
3824 break;
3825 if (objdef == otagseen && parlev == 0)
3826 objdef = oparenseen;
3827 switch (fvdef)
3828 {
3829 case fvnameseen:
3830 if (typdef == ttypeseen
3831 && *lp != '*'
3832 && !instruct)
3833 {
3834 /* This handles constructs like:
3835 typedef void OperatorFun (int fun); */
3836 make_C_tag (FALSE);
3837 typdef = tignore;
3838 fvdef = fignore;
3839 break;
3840 }
3841 /* FALLTHRU */
3842 case foperator:
3843 fvdef = fstartlist;
3844 break;
3845 case flistseen:
3846 fvdef = finlist;
3847 break;
3848 }
3849 parlev++;
3850 break;
3851 case ')':
3852 if (inattribute)
3853 {
3854 if (--attrparlev == 0)
3855 inattribute = FALSE;
3856 break;
3857 }
3858 if (definedef != dnone)
3859 break;
3860 if (objdef == ocatseen && parlev == 1)
3861 {
3862 make_C_tag (TRUE); /* an Objective C category */
3863 objdef = oignore;
3864 }
3865 if (--parlev == 0)
3866 {
3867 switch (fvdef)
3868 {
3869 case fstartlist:
3870 case finlist:
3871 fvdef = flistseen;
3872 break;
3873 }
3874 if (!instruct
3875 && (typdef == tend
3876 || typdef == ttypeseen))
3877 {
3878 typdef = tignore;
3879 make_C_tag (FALSE); /* a typedef */
3880 }
3881 }
3882 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3883 parlev = 0;
3884 break;
3885 case '{':
3886 if (definedef != dnone)
3887 break;
3888 if (typdef == ttypeseen)
3889 {
3890 /* Whenever typdef is set to tinbody (currently only
3891 here), typdefbracelev should be set to bracelev. */
3892 typdef = tinbody;
3893 typdefbracelev = bracelev;
3894 }
3895 switch (fvdef)
3896 {
3897 case flistseen:
3898 make_C_tag (TRUE); /* a function */
3899 /* FALLTHRU */
3900 case fignore:
3901 fvdef = fvnone;
3902 break;
3903 case fvnone:
3904 switch (objdef)
3905 {
3906 case otagseen:
3907 make_C_tag (TRUE); /* an Objective C class */
3908 objdef = oignore;
3909 break;
3910 case omethodtag:
3911 case omethodparm:
3912 make_C_tag (TRUE); /* an Objective C method */
3913 objdef = oinbody;
3914 break;
3915 default:
3916 /* Neutralize `extern "C" {' grot. */
3917 if (bracelev == 0 && structdef == snone && nestlev == 0
3918 && typdef == tnone)
3919 bracelev = -1;
3920 }
3921 break;
3922 }
3923 switch (structdef)
3924 {
3925 case skeyseen: /* unnamed struct */
3926 pushclass_above (bracelev, NULL, 0);
3927 structdef = snone;
3928 break;
3929 case stagseen: /* named struct or enum */
3930 case scolonseen: /* a class */
3931 pushclass_above (bracelev,token.line+token.offset, token.length);
3932 structdef = snone;
3933 make_C_tag (FALSE); /* a struct or enum */
3934 break;
3935 }
3936 bracelev++;
3937 break;
3938 case '*':
3939 if (definedef != dnone)
3940 break;
3941 if (fvdef == fstartlist)
3942 {
3943 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3944 token.valid = FALSE;
3945 }
3946 break;
3947 case '}':
3948 if (definedef != dnone)
3949 break;
3950 if (!ignoreindent && lp == newlb.buffer + 1)
3951 {
3952 if (bracelev != 0)
3953 token.valid = FALSE;
3954 bracelev = 0; /* reset brace level if first column */
3955 parlev = 0; /* also reset paren level, just in case... */
3956 }
3957 else if (bracelev > 0)
3958 bracelev--;
3959 else
3960 token.valid = FALSE; /* something gone amiss, token unreliable */
3961 popclass_above (bracelev);
3962 structdef = snone;
3963 /* Only if typdef == tinbody is typdefbracelev significant. */
3964 if (typdef == tinbody && bracelev <= typdefbracelev)
3965 {
3966 assert (bracelev == typdefbracelev);
3967 typdef = tend;
3968 }
3969 break;
3970 case '=':
3971 if (definedef != dnone)
3972 break;
3973 switch (fvdef)
3974 {
3975 case foperator:
3976 case finlist:
3977 case fignore:
3978 case vignore:
3979 break;
3980 case fvnameseen:
3981 if ((members && bracelev == 1)
3982 || (globals && bracelev == 0 && (!fvextern || declarations)))
3983 make_C_tag (FALSE); /* a variable */
3984 /* FALLTHRU */
3985 default:
3986 fvdef = vignore;
3987 }
3988 break;
3989 case '<':
3990 if (cplpl
3991 && (structdef == stagseen || fvdef == fvnameseen))
3992 {
3993 templatelev++;
3994 break;
3995 }
3996 goto resetfvdef;
3997 case '>':
3998 if (templatelev > 0)
3999 {
4000 templatelev--;
4001 break;
4002 }
4003 goto resetfvdef;
4004 case '+':
4005 case '-':
4006 if (objdef == oinbody && bracelev == 0)
4007 {
4008 objdef = omethodsign;
4009 break;
4010 }
4011 /* FALLTHRU */
4012 resetfvdef:
4013 case '#': case '~': case '&': case '%': case '/':
4014 case '|': case '^': case '!': case '.': case '?':
4015 if (definedef != dnone)
4016 break;
4017 /* These surely cannot follow a function tag in C. */
4018 switch (fvdef)
4019 {
4020 case foperator:
4021 case finlist:
4022 case fignore:
4023 case vignore:
4024 break;
4025 default:
4026 fvdef = fvnone;
4027 }
4028 break;
4029 case '\0':
4030 if (objdef == otagseen)
4031 {
4032 make_C_tag (TRUE); /* an Objective C class */
4033 objdef = oignore;
4034 }
4035 /* If a macro spans multiple lines don't reset its state. */
4036 if (quotednl)
4037 CNL_SAVE_DEFINEDEF ();
4038 else
4039 CNL ();
4040 break;
4041 } /* switch (c) */
4042
4043 } /* while not eof */
4044
4045 free (lbs[0].lb.buffer);
4046 free (lbs[1].lb.buffer);
4047 }
4048
4049 /*
4050 * Process either a C++ file or a C file depending on the setting
4051 * of a global flag.
4052 */
4053 static void
4054 default_C_entries (inf)
4055 FILE *inf;
4056 {
4057 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4058 }
4059
4060 /* Always do plain C. */
4061 static void
4062 plain_C_entries (inf)
4063 FILE *inf;
4064 {
4065 C_entries (0, inf);
4066 }
4067
4068 /* Always do C++. */
4069 static void
4070 Cplusplus_entries (inf)
4071 FILE *inf;
4072 {
4073 C_entries (C_PLPL, inf);
4074 }
4075
4076 /* Always do Java. */
4077 static void
4078 Cjava_entries (inf)
4079 FILE *inf;
4080 {
4081 C_entries (C_JAVA, inf);
4082 }
4083
4084 /* Always do C*. */
4085 static void
4086 Cstar_entries (inf)
4087 FILE *inf;
4088 {
4089 C_entries (C_STAR, inf);
4090 }
4091
4092 /* Always do Yacc. */
4093 static void
4094 Yacc_entries (inf)
4095 FILE *inf;
4096 {
4097 C_entries (YACC, inf);
4098 }
4099
4100 \f
4101 /* Useful macros. */
4102 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4103 for (; /* loop initialization */ \
4104 !feof (file_pointer) /* loop test */ \
4105 && /* instructions at start of loop */ \
4106 (readline (&line_buffer, file_pointer), \
4107 char_pointer = line_buffer.buffer, \
4108 TRUE); \
4109 )
4110
4111 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4112 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4113 && strneq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4114 && notinname ((cp)[sizeof(kw)-1]) /* end of kw */ \
4115 && ((cp) = skip_spaces((cp)+sizeof(kw)-1))) /* skip spaces */
4116
4117 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4118 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4119 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4120 && strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4121 && ((cp) += sizeof(kw)-1)) /* skip spaces */
4122
4123 /*
4124 * Read a file, but do no processing. This is used to do regexp
4125 * matching on files that have no language defined.
4126 */
4127 static void
4128 just_read_file (inf)
4129 FILE *inf;
4130 {
4131 register char *dummy;
4132
4133 LOOP_ON_INPUT_LINES (inf, lb, dummy)
4134 continue;
4135 }
4136
4137 \f
4138 /* Fortran parsing */
4139
4140 static void F_takeprec __P((void));
4141 static void F_getit __P((FILE *));
4142
4143 static void
4144 F_takeprec ()
4145 {
4146 dbp = skip_spaces (dbp);
4147 if (*dbp != '*')
4148 return;
4149 dbp++;
4150 dbp = skip_spaces (dbp);
4151 if (strneq (dbp, "(*)", 3))
4152 {
4153 dbp += 3;
4154 return;
4155 }
4156 if (!ISDIGIT (*dbp))
4157 {
4158 --dbp; /* force failure */
4159 return;
4160 }
4161 do
4162 dbp++;
4163 while (ISDIGIT (*dbp));
4164 }
4165
4166 static void
4167 F_getit (inf)
4168 FILE *inf;
4169 {
4170 register char *cp;
4171
4172 dbp = skip_spaces (dbp);
4173 if (*dbp == '\0')
4174 {
4175 readline (&lb, inf);
4176 dbp = lb.buffer;
4177 if (dbp[5] != '&')
4178 return;
4179 dbp += 6;
4180 dbp = skip_spaces (dbp);
4181 }
4182 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4183 return;
4184 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4185 continue;
4186 make_tag (dbp, cp-dbp, TRUE,
4187 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4188 }
4189
4190
4191 static void
4192 Fortran_functions (inf)
4193 FILE *inf;
4194 {
4195 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4196 {
4197 if (*dbp == '%')
4198 dbp++; /* Ratfor escape to fortran */
4199 dbp = skip_spaces (dbp);
4200 if (*dbp == '\0')
4201 continue;
4202 switch (lowcase (*dbp))
4203 {
4204 case 'i':
4205 if (nocase_tail ("integer"))
4206 F_takeprec ();
4207 break;
4208 case 'r':
4209 if (nocase_tail ("real"))
4210 F_takeprec ();
4211 break;
4212 case 'l':
4213 if (nocase_tail ("logical"))
4214 F_takeprec ();
4215 break;
4216 case 'c':
4217 if (nocase_tail ("complex") || nocase_tail ("character"))
4218 F_takeprec ();
4219 break;
4220 case 'd':
4221 if (nocase_tail ("double"))
4222 {
4223 dbp = skip_spaces (dbp);
4224 if (*dbp == '\0')
4225 continue;
4226 if (nocase_tail ("precision"))
4227 break;
4228 continue;
4229 }
4230 break;
4231 }
4232 dbp = skip_spaces (dbp);
4233 if (*dbp == '\0')
4234 continue;
4235 switch (lowcase (*dbp))
4236 {
4237 case 'f':
4238 if (nocase_tail ("function"))
4239 F_getit (inf);
4240 continue;
4241 case 's':
4242 if (nocase_tail ("subroutine"))
4243 F_getit (inf);
4244 continue;
4245 case 'e':
4246 if (nocase_tail ("entry"))
4247 F_getit (inf);
4248 continue;
4249 case 'b':
4250 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4251 {
4252 dbp = skip_spaces (dbp);
4253 if (*dbp == '\0') /* assume un-named */
4254 make_tag ("blockdata", 9, TRUE,
4255 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4256 else
4257 F_getit (inf); /* look for name */
4258 }
4259 continue;
4260 }
4261 }
4262 }
4263
4264 \f
4265 /*
4266 * Ada parsing
4267 * Original code by
4268 * Philippe Waroquiers (1998)
4269 */
4270
4271 static void Ada_getit __P((FILE *, char *));
4272
4273 /* Once we are positioned after an "interesting" keyword, let's get
4274 the real tag value necessary. */
4275 static void
4276 Ada_getit (inf, name_qualifier)
4277 FILE *inf;
4278 char *name_qualifier;
4279 {
4280 register char *cp;
4281 char *name;
4282 char c;
4283
4284 while (!feof (inf))
4285 {
4286 dbp = skip_spaces (dbp);
4287 if (*dbp == '\0'
4288 || (dbp[0] == '-' && dbp[1] == '-'))
4289 {
4290 readline (&lb, inf);
4291 dbp = lb.buffer;
4292 }
4293 switch (lowcase(*dbp))
4294 {
4295 case 'b':
4296 if (nocase_tail ("body"))
4297 {
4298 /* Skipping body of procedure body or package body or ....
4299 resetting qualifier to body instead of spec. */
4300 name_qualifier = "/b";
4301 continue;
4302 }
4303 break;
4304 case 't':
4305 /* Skipping type of task type or protected type ... */
4306 if (nocase_tail ("type"))
4307 continue;
4308 break;
4309 }
4310 if (*dbp == '"')
4311 {
4312 dbp += 1;
4313 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4314 continue;
4315 }
4316 else
4317 {
4318 dbp = skip_spaces (dbp);
4319 for (cp = dbp;
4320 (*cp != '\0'
4321 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4322 cp++)
4323 continue;
4324 if (cp == dbp)
4325 return;
4326 }
4327 c = *cp;
4328 *cp = '\0';
4329 name = concat (dbp, name_qualifier, "");
4330 *cp = c;
4331 make_tag (name, strlen (name), TRUE,
4332 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4333 free (name);
4334 if (c == '"')
4335 dbp = cp + 1;
4336 return;
4337 }
4338 }
4339
4340 static void
4341 Ada_funcs (inf)
4342 FILE *inf;
4343 {
4344 bool inquote = FALSE;
4345 bool skip_till_semicolumn = FALSE;
4346
4347 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4348 {
4349 while (*dbp != '\0')
4350 {
4351 /* Skip a string i.e. "abcd". */
4352 if (inquote || (*dbp == '"'))
4353 {
4354 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4355 if (dbp != NULL)
4356 {
4357 inquote = FALSE;
4358 dbp += 1;
4359 continue; /* advance char */
4360 }
4361 else
4362 {
4363 inquote = TRUE;
4364 break; /* advance line */
4365 }
4366 }
4367
4368 /* Skip comments. */
4369 if (dbp[0] == '-' && dbp[1] == '-')
4370 break; /* advance line */
4371
4372 /* Skip character enclosed in single quote i.e. 'a'
4373 and skip single quote starting an attribute i.e. 'Image. */
4374 if (*dbp == '\'')
4375 {
4376 dbp++ ;
4377 if (*dbp != '\0')
4378 dbp++;
4379 continue;
4380 }
4381
4382 if (skip_till_semicolumn)
4383 {
4384 if (*dbp == ';')
4385 skip_till_semicolumn = FALSE;
4386 dbp++;
4387 continue; /* advance char */
4388 }
4389
4390 /* Search for beginning of a token. */
4391 if (!begtoken (*dbp))
4392 {
4393 dbp++;
4394 continue; /* advance char */
4395 }
4396
4397 /* We are at the beginning of a token. */
4398 switch (lowcase(*dbp))
4399 {
4400 case 'f':
4401 if (!packages_only && nocase_tail ("function"))
4402 Ada_getit (inf, "/f");
4403 else
4404 break; /* from switch */
4405 continue; /* advance char */
4406 case 'p':
4407 if (!packages_only && nocase_tail ("procedure"))
4408 Ada_getit (inf, "/p");
4409 else if (nocase_tail ("package"))
4410 Ada_getit (inf, "/s");
4411 else if (nocase_tail ("protected")) /* protected type */
4412 Ada_getit (inf, "/t");
4413 else
4414 break; /* from switch */
4415 continue; /* advance char */
4416
4417 case 'u':
4418 if (typedefs && !packages_only && nocase_tail ("use"))
4419 {
4420 /* when tagging types, avoid tagging use type Pack.Typename;
4421 for this, we will skip everything till a ; */
4422 skip_till_semicolumn = TRUE;
4423 continue; /* advance char */
4424 }
4425
4426 case 't':
4427 if (!packages_only && nocase_tail ("task"))
4428 Ada_getit (inf, "/k");
4429 else if (typedefs && !packages_only && nocase_tail ("type"))
4430 {
4431 Ada_getit (inf, "/t");
4432 while (*dbp != '\0')
4433 dbp += 1;
4434 }
4435 else
4436 break; /* from switch */
4437 continue; /* advance char */
4438 }
4439
4440 /* Look for the end of the token. */
4441 while (!endtoken (*dbp))
4442 dbp++;
4443
4444 } /* advance char */
4445 } /* advance line */
4446 }
4447
4448 \f
4449 /*
4450 * Unix and microcontroller assembly tag handling
4451 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4452 * Idea by Bob Weiner, Motorola Inc. (1994)
4453 */
4454 static void
4455 Asm_labels (inf)
4456 FILE *inf;
4457 {
4458 register char *cp;
4459
4460 LOOP_ON_INPUT_LINES (inf, lb, cp)
4461 {
4462 /* If first char is alphabetic or one of [_.$], test for colon
4463 following identifier. */
4464 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4465 {
4466 /* Read past label. */
4467 cp++;
4468 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4469 cp++;
4470 if (*cp == ':' || iswhite (*cp))
4471 /* Found end of label, so copy it and add it to the table. */
4472 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4473 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4474 }
4475 }
4476 }
4477
4478 \f
4479 /*
4480 * Perl support
4481 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4482 * Perl variable names: /^(my|local).../
4483 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4484 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4485 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4486 */
4487 static void
4488 Perl_functions (inf)
4489 FILE *inf;
4490 {
4491 char *package = savestr ("main"); /* current package name */
4492 register char *cp;
4493
4494 LOOP_ON_INPUT_LINES (inf, lb, cp)
4495 {
4496 skip_spaces(cp);
4497
4498 if (LOOKING_AT (cp, "package"))
4499 {
4500 free (package);
4501 get_tag (cp, &package);
4502 }
4503 else if (LOOKING_AT (cp, "sub"))
4504 {
4505 char *pos;
4506 char *sp = cp;
4507
4508 while (!notinname (*cp))
4509 cp++;
4510 if (cp == sp)
4511 continue; /* nothing found */
4512 if ((pos = etags_strchr (sp, ':')) != NULL
4513 && pos < cp && pos[1] == ':')
4514 /* The name is already qualified. */
4515 make_tag (sp, cp - sp, TRUE,
4516 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4517 else
4518 /* Qualify it. */
4519 {
4520 char savechar, *name;
4521
4522 savechar = *cp;
4523 *cp = '\0';
4524 name = concat (package, "::", sp);
4525 *cp = savechar;
4526 make_tag (name, strlen(name), TRUE,
4527 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4528 free (name);
4529 }
4530 }
4531 else if (globals) /* only if we are tagging global vars */
4532 {
4533 /* Skip a qualifier, if any. */
4534 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4535 /* After "my" or "local", but before any following paren or space. */
4536 char *varstart = cp;
4537
4538 if (qual /* should this be removed? If yes, how? */
4539 && (*cp == '$' || *cp == '@' || *cp == '%'))
4540 {
4541 varstart += 1;
4542 do
4543 cp++;
4544 while (ISALNUM (*cp) || *cp == '_');
4545 }
4546 else if (qual)
4547 {
4548 /* Should be examining a variable list at this point;
4549 could insist on seeing an open parenthesis. */
4550 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4551 cp++;
4552 }
4553 else
4554 continue;
4555
4556 make_tag (varstart, cp - varstart, FALSE,
4557 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4558 }
4559 }
4560 free (package);
4561 }
4562
4563
4564 /*
4565 * Python support
4566 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4567 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4568 * More ideas by seb bacon <seb@jamkit.com> (2002)
4569 */
4570 static void
4571 Python_functions (inf)
4572 FILE *inf;
4573 {
4574 register char *cp;
4575
4576 LOOP_ON_INPUT_LINES (inf, lb, cp)
4577 {
4578 cp = skip_spaces (cp);
4579 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4580 {
4581 char *name = cp;
4582 while (!notinname (*cp) && *cp != ':')
4583 cp++;
4584 make_tag (name, cp - name, TRUE,
4585 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4586 }
4587 }
4588 }
4589
4590 \f
4591 /*
4592 * PHP support
4593 * Look for:
4594 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4595 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4596 * - /^[ \t]*define\(\"[^\"]+/
4597 * Only with --members:
4598 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4599 * Idea by Diez B. Roggisch (2001)
4600 */
4601 static void
4602 PHP_functions (inf)
4603 FILE *inf;
4604 {
4605 register char *cp, *name;
4606 bool search_identifier = FALSE;
4607
4608 LOOP_ON_INPUT_LINES (inf, lb, cp)
4609 {
4610 cp = skip_spaces (cp);
4611 name = cp;
4612 if (search_identifier
4613 && *cp != '\0')
4614 {
4615 while (!notinname (*cp))
4616 cp++;
4617 make_tag (name, cp - name, TRUE,
4618 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4619 search_identifier = FALSE;
4620 }
4621 else if (LOOKING_AT (cp, "function"))
4622 {
4623 if(*cp == '&')
4624 cp = skip_spaces (cp+1);
4625 if(*cp != '\0')
4626 {
4627 name = cp;
4628 while (!notinname (*cp))
4629 cp++;
4630 make_tag (name, cp - name, TRUE,
4631 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4632 }
4633 else
4634 search_identifier = TRUE;
4635 }
4636 else if (LOOKING_AT (cp, "class"))
4637 {
4638 if (*cp != '\0')
4639 {
4640 name = cp;
4641 while (*cp != '\0' && !iswhite (*cp))
4642 cp++;
4643 make_tag (name, cp - name, FALSE,
4644 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4645 }
4646 else
4647 search_identifier = TRUE;
4648 }
4649 else if (strneq (cp, "define", 6)
4650 && (cp = skip_spaces (cp+6))
4651 && *cp++ == '('
4652 && (*cp == '"' || *cp == '\''))
4653 {
4654 char quote = *cp++;
4655 name = cp;
4656 while (*cp != quote && *cp != '\0')
4657 cp++;
4658 make_tag (name, cp - name, FALSE,
4659 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4660 }
4661 else if (members
4662 && LOOKING_AT (cp, "var")
4663 && *cp == '$')
4664 {
4665 name = cp;
4666 while (!notinname(*cp))
4667 cp++;
4668 make_tag (name, cp - name, FALSE,
4669 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4670 }
4671 }
4672 }
4673
4674 \f
4675 /*
4676 * Cobol tag functions
4677 * We could look for anything that could be a paragraph name.
4678 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4679 * Idea by Corny de Souza (1993)
4680 */
4681 static void
4682 Cobol_paragraphs (inf)
4683 FILE *inf;
4684 {
4685 register char *bp, *ep;
4686
4687 LOOP_ON_INPUT_LINES (inf, lb, bp)
4688 {
4689 if (lb.len < 9)
4690 continue;
4691 bp += 8;
4692
4693 /* If eoln, compiler option or comment ignore whole line. */
4694 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4695 continue;
4696
4697 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4698 continue;
4699 if (*ep++ == '.')
4700 make_tag (bp, ep - bp, TRUE,
4701 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4702 }
4703 }
4704
4705 \f
4706 /*
4707 * Makefile support
4708 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4709 */
4710 static void
4711 Makefile_targets (inf)
4712 FILE *inf;
4713 {
4714 register char *bp;
4715
4716 LOOP_ON_INPUT_LINES (inf, lb, bp)
4717 {
4718 if (*bp == '\t' || *bp == '#')
4719 continue;
4720 while (*bp != '\0' && *bp != '=' && *bp != ':')
4721 bp++;
4722 if (*bp == ':' || (globals && *bp == '='))
4723 make_tag (lb.buffer, bp - lb.buffer, TRUE,
4724 lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4725 }
4726 }
4727
4728 \f
4729 /*
4730 * Pascal parsing
4731 * Original code by Mosur K. Mohan (1989)
4732 *
4733 * Locates tags for procedures & functions. Doesn't do any type- or
4734 * var-definitions. It does look for the keyword "extern" or
4735 * "forward" immediately following the procedure statement; if found,
4736 * the tag is skipped.
4737 */
4738 static void
4739 Pascal_functions (inf)
4740 FILE *inf;
4741 {
4742 linebuffer tline; /* mostly copied from C_entries */
4743 long save_lcno;
4744 int save_lineno, namelen, taglen;
4745 char c, *name;
4746
4747 bool /* each of these flags is TRUE iff: */
4748 incomment, /* point is inside a comment */
4749 inquote, /* point is inside '..' string */
4750 get_tagname, /* point is after PROCEDURE/FUNCTION
4751 keyword, so next item = potential tag */
4752 found_tag, /* point is after a potential tag */
4753 inparms, /* point is within parameter-list */
4754 verify_tag; /* point has passed the parm-list, so the
4755 next token will determine whether this
4756 is a FORWARD/EXTERN to be ignored, or
4757 whether it is a real tag */
4758
4759 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4760 name = NULL; /* keep compiler quiet */
4761 dbp = lb.buffer;
4762 *dbp = '\0';
4763 linebuffer_init (&tline);
4764
4765 incomment = inquote = FALSE;
4766 found_tag = FALSE; /* have a proc name; check if extern */
4767 get_tagname = FALSE; /* found "procedure" keyword */
4768 inparms = FALSE; /* found '(' after "proc" */
4769 verify_tag = FALSE; /* check if "extern" is ahead */
4770
4771
4772 while (!feof (inf)) /* long main loop to get next char */
4773 {
4774 c = *dbp++;
4775 if (c == '\0') /* if end of line */
4776 {
4777 readline (&lb, inf);
4778 dbp = lb.buffer;
4779 if (*dbp == '\0')
4780 continue;
4781 if (!((found_tag && verify_tag)
4782 || get_tagname))
4783 c = *dbp++; /* only if don't need *dbp pointing
4784 to the beginning of the name of
4785 the procedure or function */
4786 }
4787 if (incomment)
4788 {
4789 if (c == '}') /* within { } comments */
4790 incomment = FALSE;
4791 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4792 {
4793 dbp++;
4794 incomment = FALSE;
4795 }
4796 continue;
4797 }
4798 else if (inquote)
4799 {
4800 if (c == '\'')
4801 inquote = FALSE;
4802 continue;
4803 }
4804 else
4805 switch (c)
4806 {
4807 case '\'':
4808 inquote = TRUE; /* found first quote */
4809 continue;
4810 case '{': /* found open { comment */
4811 incomment = TRUE;
4812 continue;
4813 case '(':
4814 if (*dbp == '*') /* found open (* comment */
4815 {
4816 incomment = TRUE;
4817 dbp++;
4818 }
4819 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4820 inparms = TRUE;
4821 continue;
4822 case ')': /* end of parms list */
4823 if (inparms)
4824 inparms = FALSE;
4825 continue;
4826 case ';':
4827 if (found_tag && !inparms) /* end of proc or fn stmt */
4828 {
4829 verify_tag = TRUE;
4830 break;
4831 }
4832 continue;
4833 }
4834 if (found_tag && verify_tag && (*dbp != ' '))
4835 {
4836 /* Check if this is an "extern" declaration. */
4837 if (*dbp == '\0')
4838 continue;
4839 if (lowcase (*dbp == 'e'))
4840 {
4841 if (nocase_tail ("extern")) /* superfluous, really! */
4842 {
4843 found_tag = FALSE;
4844 verify_tag = FALSE;
4845 }
4846 }
4847 else if (lowcase (*dbp) == 'f')
4848 {
4849 if (nocase_tail ("forward")) /* check for forward reference */
4850 {
4851 found_tag = FALSE;
4852 verify_tag = FALSE;
4853 }
4854 }
4855 if (found_tag && verify_tag) /* not external proc, so make tag */
4856 {
4857 found_tag = FALSE;
4858 verify_tag = FALSE;
4859 make_tag (name, namelen, TRUE,
4860 tline.buffer, taglen, save_lineno, save_lcno);
4861 continue;
4862 }
4863 }
4864 if (get_tagname) /* grab name of proc or fn */
4865 {
4866 char *cp;
4867
4868 if (*dbp == '\0')
4869 continue;
4870
4871 /* Find block name. */
4872 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4873 continue;
4874
4875 /* Save all values for later tagging. */
4876 linebuffer_setlen (&tline, lb.len);
4877 strcpy (tline.buffer, lb.buffer);
4878 save_lineno = lineno;
4879 save_lcno = linecharno;
4880 name = tline.buffer + (dbp - lb.buffer);
4881 namelen = cp - dbp;
4882 taglen = cp - lb.buffer + 1;
4883
4884 dbp = cp; /* set dbp to e-o-token */
4885 get_tagname = FALSE;
4886 found_tag = TRUE;
4887 continue;
4888
4889 /* And proceed to check for "extern". */
4890 }
4891 else if (!incomment && !inquote && !found_tag)
4892 {
4893 /* Check for proc/fn keywords. */
4894 switch (lowcase (c))
4895 {
4896 case 'p':
4897 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4898 get_tagname = TRUE;
4899 continue;
4900 case 'f':
4901 if (nocase_tail ("unction"))
4902 get_tagname = TRUE;
4903 continue;
4904 }
4905 }
4906 } /* while not eof */
4907
4908 free (tline.buffer);
4909 }
4910
4911 \f
4912 /*
4913 * Lisp tag functions
4914 * look for (def or (DEF, quote or QUOTE
4915 */
4916
4917 static void L_getit __P((void));
4918
4919 static void
4920 L_getit ()
4921 {
4922 if (*dbp == '\'') /* Skip prefix quote */
4923 dbp++;
4924 else if (*dbp == '(')
4925 {
4926 dbp++;
4927 /* Try to skip "(quote " */
4928 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4929 /* Ok, then skip "(" before name in (defstruct (foo)) */
4930 dbp = skip_spaces (dbp);
4931 }
4932 get_tag (dbp, NULL);
4933 }
4934
4935 static void
4936 Lisp_functions (inf)
4937 FILE *inf;
4938 {
4939 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4940 {
4941 if (dbp[0] != '(')
4942 continue;
4943
4944 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4945 {
4946 dbp = skip_non_spaces (dbp);
4947 dbp = skip_spaces (dbp);
4948 L_getit ();
4949 }
4950 else
4951 {
4952 /* Check for (foo::defmumble name-defined ... */
4953 do
4954 dbp++;
4955 while (!notinname (*dbp) && *dbp != ':');
4956 if (*dbp == ':')
4957 {
4958 do
4959 dbp++;
4960 while (*dbp == ':');
4961
4962 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4963 {
4964 dbp = skip_non_spaces (dbp);
4965 dbp = skip_spaces (dbp);
4966 L_getit ();
4967 }
4968 }
4969 }
4970 }
4971 }
4972
4973 \f
4974 /*
4975 * Lua script language parsing
4976 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4977 *
4978 * "function" and "local function" are tags if they start at column 1.
4979 */
4980 static void
4981 Lua_functions (inf)
4982 FILE *inf;
4983 {
4984 register char *bp;
4985
4986 LOOP_ON_INPUT_LINES (inf, lb, bp)
4987 {
4988 if (bp[0] != 'f' && bp[0] != 'l')
4989 continue;
4990
4991 LOOKING_AT (bp, "local"); /* skip possible "local" */
4992
4993 if (LOOKING_AT (bp, "function"))
4994 get_tag (bp, NULL);
4995 }
4996 }
4997
4998 \f
4999 /*
5000 * Postscript tags
5001 * Just look for lines where the first character is '/'
5002 * Also look at "defineps" for PSWrap
5003 * Ideas by:
5004 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
5005 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5006 */
5007 static void
5008 PS_functions (inf)
5009 FILE *inf;
5010 {
5011 register char *bp, *ep;
5012
5013 LOOP_ON_INPUT_LINES (inf, lb, bp)
5014 {
5015 if (bp[0] == '/')
5016 {
5017 for (ep = bp+1;
5018 *ep != '\0' && *ep != ' ' && *ep != '{';
5019 ep++)
5020 continue;
5021 make_tag (bp, ep - bp, TRUE,
5022 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5023 }
5024 else if (LOOKING_AT (bp, "defineps"))
5025 get_tag (bp, NULL);
5026 }
5027 }
5028
5029 \f
5030 /*
5031 * Forth tags
5032 * Ignore anything after \ followed by space or in ( )
5033 * Look for words defined by :
5034 * Look for constant, code, create, defer, value, and variable
5035 * OBP extensions: Look for buffer:, field,
5036 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5037 */
5038 static void
5039 Forth_words (inf)
5040 FILE *inf;
5041 {
5042 register char *bp;
5043
5044 LOOP_ON_INPUT_LINES (inf, lb, bp)
5045 while ((bp = skip_spaces (bp))[0] != '\0')
5046 if (bp[0] == '\\' && iswhite(bp[1]))
5047 break; /* read next line */
5048 else if (bp[0] == '(' && iswhite(bp[1]))
5049 do /* skip to ) or eol */
5050 bp++;
5051 while (*bp != ')' && *bp != '\0');
5052 else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5053 || LOOKING_AT_NOCASE (bp, "constant")
5054 || LOOKING_AT_NOCASE (bp, "code")
5055 || LOOKING_AT_NOCASE (bp, "create")
5056 || LOOKING_AT_NOCASE (bp, "defer")
5057 || LOOKING_AT_NOCASE (bp, "value")
5058 || LOOKING_AT_NOCASE (bp, "variable")
5059 || LOOKING_AT_NOCASE (bp, "buffer:")
5060 || LOOKING_AT_NOCASE (bp, "field"))
5061 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
5062 else
5063 bp = skip_non_spaces (bp);
5064 }
5065
5066 \f
5067 /*
5068 * Scheme tag functions
5069 * look for (def... xyzzy
5070 * (def... (xyzzy
5071 * (def ... ((...(xyzzy ....
5072 * (set! xyzzy
5073 * Original code by Ken Haase (1985?)
5074 */
5075 static void
5076 Scheme_functions (inf)
5077 FILE *inf;
5078 {
5079 register char *bp;
5080
5081 LOOP_ON_INPUT_LINES (inf, lb, bp)
5082 {
5083 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5084 {
5085 bp = skip_non_spaces (bp+4);
5086 /* Skip over open parens and white space */
5087 while (notinname (*bp))
5088 bp++;
5089 get_tag (bp, NULL);
5090 }
5091 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5092 get_tag (bp, NULL);
5093 }
5094 }
5095
5096 \f
5097 /* Find tags in TeX and LaTeX input files. */
5098
5099 /* TEX_toktab is a table of TeX control sequences that define tags.
5100 * Each entry records one such control sequence.
5101 *
5102 * Original code from who knows whom.
5103 * Ideas by:
5104 * Stefan Monnier (2002)
5105 */
5106
5107 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5108
5109 /* Default set of control sequences to put into TEX_toktab.
5110 The value of environment var TEXTAGS is prepended to this. */
5111 static char *TEX_defenv = "\
5112 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5113 :part:appendix:entry:index:def\
5114 :newcommand:renewcommand:newenvironment:renewenvironment";
5115
5116 static void TEX_mode __P((FILE *));
5117 static void TEX_decode_env __P((char *, char *));
5118
5119 static char TEX_esc = '\\';
5120 static char TEX_opgrp = '{';
5121 static char TEX_clgrp = '}';
5122
5123 /*
5124 * TeX/LaTeX scanning loop.
5125 */
5126 static void
5127 TeX_commands (inf)
5128 FILE *inf;
5129 {
5130 char *cp;
5131 linebuffer *key;
5132
5133 /* Select either \ or ! as escape character. */
5134 TEX_mode (inf);
5135
5136 /* Initialize token table once from environment. */
5137 if (TEX_toktab == NULL)
5138 TEX_decode_env ("TEXTAGS", TEX_defenv);
5139
5140 LOOP_ON_INPUT_LINES (inf, lb, cp)
5141 {
5142 /* Look at each TEX keyword in line. */
5143 for (;;)
5144 {
5145 /* Look for a TEX escape. */
5146 while (*cp++ != TEX_esc)
5147 if (cp[-1] == '\0' || cp[-1] == '%')
5148 goto tex_next_line;
5149
5150 for (key = TEX_toktab; key->buffer != NULL; key++)
5151 if (strneq (cp, key->buffer, key->len))
5152 {
5153 register char *p;
5154 int namelen, linelen;
5155 bool opgrp = FALSE;
5156
5157 cp = skip_spaces (cp + key->len);
5158 if (*cp == TEX_opgrp)
5159 {
5160 opgrp = TRUE;
5161 cp++;
5162 }
5163 for (p = cp;
5164 (!iswhite (*p) && *p != '#' &&
5165 *p != TEX_opgrp && *p != TEX_clgrp);
5166 p++)
5167 continue;
5168 namelen = p - cp;
5169 linelen = lb.len;
5170 if (!opgrp || *p == TEX_clgrp)
5171 {
5172 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5173 *p++;
5174 linelen = p - lb.buffer + 1;
5175 }
5176 make_tag (cp, namelen, TRUE,
5177 lb.buffer, linelen, lineno, linecharno);
5178 goto tex_next_line; /* We only tag a line once */
5179 }
5180 }
5181 tex_next_line:
5182 ;
5183 }
5184 }
5185
5186 #define TEX_LESC '\\'
5187 #define TEX_SESC '!'
5188
5189 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5190 chars accordingly. */
5191 static void
5192 TEX_mode (inf)
5193 FILE *inf;
5194 {
5195 int c;
5196
5197 while ((c = getc (inf)) != EOF)
5198 {
5199 /* Skip to next line if we hit the TeX comment char. */
5200 if (c == '%')
5201 while (c != '\n')
5202 c = getc (inf);
5203 else if (c == TEX_LESC || c == TEX_SESC )
5204 break;
5205 }
5206
5207 if (c == TEX_LESC)
5208 {
5209 TEX_esc = TEX_LESC;
5210 TEX_opgrp = '{';
5211 TEX_clgrp = '}';
5212 }
5213 else
5214 {
5215 TEX_esc = TEX_SESC;
5216 TEX_opgrp = '<';
5217 TEX_clgrp = '>';
5218 }
5219 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5220 No attempt is made to correct the situation. */
5221 rewind (inf);
5222 }
5223
5224 /* Read environment and prepend it to the default string.
5225 Build token table. */
5226 static void
5227 TEX_decode_env (evarname, defenv)
5228 char *evarname;
5229 char *defenv;
5230 {
5231 register char *env, *p;
5232 int i, len;
5233
5234 /* Append default string to environment. */
5235 env = getenv (evarname);
5236 if (!env)
5237 env = defenv;
5238 else
5239 {
5240 char *oldenv = env;
5241 env = concat (oldenv, defenv, "");
5242 }
5243
5244 /* Allocate a token table */
5245 for (len = 1, p = env; p;)
5246 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5247 len++;
5248 TEX_toktab = xnew (len, linebuffer);
5249
5250 /* Unpack environment string into token table. Be careful about */
5251 /* zero-length strings (leading ':', "::" and trailing ':') */
5252 for (i = 0; *env != '\0';)
5253 {
5254 p = etags_strchr (env, ':');
5255 if (!p) /* End of environment string. */
5256 p = env + strlen (env);
5257 if (p - env > 0)
5258 { /* Only non-zero strings. */
5259 TEX_toktab[i].buffer = savenstr (env, p - env);
5260 TEX_toktab[i].len = p - env;
5261 i++;
5262 }
5263 if (*p)
5264 env = p + 1;
5265 else
5266 {
5267 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5268 TEX_toktab[i].len = 0;
5269 break;
5270 }
5271 }
5272 }
5273
5274 \f
5275 /* Texinfo support. Dave Love, Mar. 2000. */
5276 static void
5277 Texinfo_nodes (inf)
5278 FILE * inf;
5279 {
5280 char *cp, *start;
5281 LOOP_ON_INPUT_LINES (inf, lb, cp)
5282 if (LOOKING_AT (cp, "@node"))
5283 {
5284 start = cp;
5285 while (*cp != '\0' && *cp != ',')
5286 cp++;
5287 make_tag (start, cp - start, TRUE,
5288 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5289 }
5290 }
5291
5292 \f
5293 /*
5294 * HTML support.
5295 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5296 * Contents of <a name=xxx> are tags with name xxx.
5297 *
5298 * Francesco Potortì, 2002.
5299 */
5300 static void
5301 HTML_labels (inf)
5302 FILE * inf;
5303 {
5304 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5305 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5306 bool intag = FALSE; /* inside an html tag, looking for ID= */
5307 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5308 char *end;
5309
5310
5311 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5312
5313 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5314 for (;;) /* loop on the same line */
5315 {
5316 if (skiptag) /* skip HTML tag */
5317 {
5318 while (*dbp != '\0' && *dbp != '>')
5319 dbp++;
5320 if (*dbp == '>')
5321 {
5322 dbp += 1;
5323 skiptag = FALSE;
5324 continue; /* look on the same line */
5325 }
5326 break; /* go to next line */
5327 }
5328
5329 else if (intag) /* look for "name=" or "id=" */
5330 {
5331 while (*dbp != '\0' && *dbp != '>'
5332 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5333 dbp++;
5334 if (*dbp == '\0')
5335 break; /* go to next line */
5336 if (*dbp == '>')
5337 {
5338 dbp += 1;
5339 intag = FALSE;
5340 continue; /* look on the same line */
5341 }
5342 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5343 || LOOKING_AT_NOCASE (dbp, "id="))
5344 {
5345 bool quoted = (dbp[0] == '"');
5346
5347 if (quoted)
5348 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5349 continue;
5350 else
5351 for (end = dbp; *end != '\0' && intoken (*end); end++)
5352 continue;
5353 linebuffer_setlen (&token_name, end - dbp);
5354 strncpy (token_name.buffer, dbp, end - dbp);
5355 token_name.buffer[end - dbp] = '\0';
5356
5357 dbp = end;
5358 intag = FALSE; /* we found what we looked for */
5359 skiptag = TRUE; /* skip to the end of the tag */
5360 getnext = TRUE; /* then grab the text */
5361 continue; /* look on the same line */
5362 }
5363 dbp += 1;
5364 }
5365
5366 else if (getnext) /* grab next tokens and tag them */
5367 {
5368 dbp = skip_spaces (dbp);
5369 if (*dbp == '\0')
5370 break; /* go to next line */
5371 if (*dbp == '<')
5372 {
5373 intag = TRUE;
5374 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5375 continue; /* look on the same line */
5376 }
5377
5378 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5379 continue;
5380 make_tag (token_name.buffer, token_name.len, TRUE,
5381 dbp, end - dbp, lineno, linecharno);
5382 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5383 getnext = FALSE;
5384 break; /* go to next line */
5385 }
5386
5387 else /* look for an interesting HTML tag */
5388 {
5389 while (*dbp != '\0' && *dbp != '<')
5390 dbp++;
5391 if (*dbp == '\0')
5392 break; /* go to next line */
5393 intag = TRUE;
5394 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5395 {
5396 inanchor = TRUE;
5397 continue; /* look on the same line */
5398 }
5399 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5400 || LOOKING_AT_NOCASE (dbp, "<h1>")
5401 || LOOKING_AT_NOCASE (dbp, "<h2>")
5402 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5403 {
5404 intag = FALSE;
5405 getnext = TRUE;
5406 continue; /* look on the same line */
5407 }
5408 dbp += 1;
5409 }
5410 }
5411 }
5412
5413 \f
5414 /*
5415 * Prolog support
5416 *
5417 * Assumes that the predicate or rule starts at column 0.
5418 * Only the first clause of a predicate or rule is added.
5419 * Original code by Sunichirou Sugou (1989)
5420 * Rewritten by Anders Lindgren (1996)
5421 */
5422 static int prolog_pr __P((char *, char *));
5423 static void prolog_skip_comment __P((linebuffer *, FILE *));
5424 static int prolog_atom __P((char *, int));
5425
5426 static void
5427 Prolog_functions (inf)
5428 FILE *inf;
5429 {
5430 char *cp, *last;
5431 int len;
5432 int allocated;
5433
5434 allocated = 0;
5435 len = 0;
5436 last = NULL;
5437
5438 LOOP_ON_INPUT_LINES (inf, lb, cp)
5439 {
5440 if (cp[0] == '\0') /* Empty line */
5441 continue;
5442 else if (iswhite (cp[0])) /* Not a predicate */
5443 continue;
5444 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5445 prolog_skip_comment (&lb, inf);
5446 else if ((len = prolog_pr (cp, last)) > 0)
5447 {
5448 /* Predicate or rule. Store the function name so that we
5449 only generate a tag for the first clause. */
5450 if (last == NULL)
5451 last = xnew(len + 1, char);
5452 else if (len + 1 > allocated)
5453 xrnew (last, len + 1, char);
5454 allocated = len + 1;
5455 strncpy (last, cp, len);
5456 last[len] = '\0';
5457 }
5458 }
5459 if (last != NULL)
5460 free (last);
5461 }
5462
5463
5464 static void
5465 prolog_skip_comment (plb, inf)
5466 linebuffer *plb;
5467 FILE *inf;
5468 {
5469 char *cp;
5470
5471 do
5472 {
5473 for (cp = plb->buffer; *cp != '\0'; cp++)
5474 if (cp[0] == '*' && cp[1] == '/')
5475 return;
5476 readline (plb, inf);
5477 }
5478 while (!feof(inf));
5479 }
5480
5481 /*
5482 * A predicate or rule definition is added if it matches:
5483 * <beginning of line><Prolog Atom><whitespace>(
5484 * or <beginning of line><Prolog Atom><whitespace>:-
5485 *
5486 * It is added to the tags database if it doesn't match the
5487 * name of the previous clause header.
5488 *
5489 * Return the size of the name of the predicate or rule, or 0 if no
5490 * header was found.
5491 */
5492 static int
5493 prolog_pr (s, last)
5494 char *s;
5495 char *last; /* Name of last clause. */
5496 {
5497 int pos;
5498 int len;
5499
5500 pos = prolog_atom (s, 0);
5501 if (pos < 1)
5502 return 0;
5503
5504 len = pos;
5505 pos = skip_spaces (s + pos) - s;
5506
5507 if ((s[pos] == '.'
5508 || (s[pos] == '(' && (pos += 1))
5509 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5510 && (last == NULL /* save only the first clause */
5511 || len != (int)strlen (last)
5512 || !strneq (s, last, len)))
5513 {
5514 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5515 return len;
5516 }
5517 else
5518 return 0;
5519 }
5520
5521 /*
5522 * Consume a Prolog atom.
5523 * Return the number of bytes consumed, or -1 if there was an error.
5524 *
5525 * A prolog atom, in this context, could be one of:
5526 * - An alphanumeric sequence, starting with a lower case letter.
5527 * - A quoted arbitrary string. Single quotes can escape themselves.
5528 * Backslash quotes everything.
5529 */
5530 static int
5531 prolog_atom (s, pos)
5532 char *s;
5533 int pos;
5534 {
5535 int origpos;
5536
5537 origpos = pos;
5538
5539 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5540 {
5541 /* The atom is unquoted. */
5542 pos++;
5543 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5544 {
5545 pos++;
5546 }
5547 return pos - origpos;
5548 }
5549 else if (s[pos] == '\'')
5550 {
5551 pos++;
5552
5553 for (;;)
5554 {
5555 if (s[pos] == '\'')
5556 {
5557 pos++;
5558 if (s[pos] != '\'')
5559 break;
5560 pos++; /* A double quote */
5561 }
5562 else if (s[pos] == '\0')
5563 /* Multiline quoted atoms are ignored. */
5564 return -1;
5565 else if (s[pos] == '\\')
5566 {
5567 if (s[pos+1] == '\0')
5568 return -1;
5569 pos += 2;
5570 }
5571 else
5572 pos++;
5573 }
5574 return pos - origpos;
5575 }
5576 else
5577 return -1;
5578 }
5579
5580 \f
5581 /*
5582 * Support for Erlang
5583 *
5584 * Generates tags for functions, defines, and records.
5585 * Assumes that Erlang functions start at column 0.
5586 * Original code by Anders Lindgren (1996)
5587 */
5588 static int erlang_func __P((char *, char *));
5589 static void erlang_attribute __P((char *));
5590 static int erlang_atom __P((char *));
5591
5592 static void
5593 Erlang_functions (inf)
5594 FILE *inf;
5595 {
5596 char *cp, *last;
5597 int len;
5598 int allocated;
5599
5600 allocated = 0;
5601 len = 0;
5602 last = NULL;
5603
5604 LOOP_ON_INPUT_LINES (inf, lb, cp)
5605 {
5606 if (cp[0] == '\0') /* Empty line */
5607 continue;
5608 else if (iswhite (cp[0])) /* Not function nor attribute */
5609 continue;
5610 else if (cp[0] == '%') /* comment */
5611 continue;
5612 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5613 continue;
5614 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5615 {
5616 erlang_attribute (cp);
5617 if (last != NULL)
5618 {
5619 free (last);
5620 last = NULL;
5621 }
5622 }
5623 else if ((len = erlang_func (cp, last)) > 0)
5624 {
5625 /*
5626 * Function. Store the function name so that we only
5627 * generates a tag for the first clause.
5628 */
5629 if (last == NULL)
5630 last = xnew (len + 1, char);
5631 else if (len + 1 > allocated)
5632 xrnew (last, len + 1, char);
5633 allocated = len + 1;
5634 strncpy (last, cp, len);
5635 last[len] = '\0';
5636 }
5637 }
5638 if (last != NULL)
5639 free (last);
5640 }
5641
5642
5643 /*
5644 * A function definition is added if it matches:
5645 * <beginning of line><Erlang Atom><whitespace>(
5646 *
5647 * It is added to the tags database if it doesn't match the
5648 * name of the previous clause header.
5649 *
5650 * Return the size of the name of the function, or 0 if no function
5651 * was found.
5652 */
5653 static int
5654 erlang_func (s, last)
5655 char *s;
5656 char *last; /* Name of last clause. */
5657 {
5658 int pos;
5659 int len;
5660
5661 pos = erlang_atom (s);
5662 if (pos < 1)
5663 return 0;
5664
5665 len = pos;
5666 pos = skip_spaces (s + pos) - s;
5667
5668 /* Save only the first clause. */
5669 if (s[pos++] == '('
5670 && (last == NULL
5671 || len != (int)strlen (last)
5672 || !strneq (s, last, len)))
5673 {
5674 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5675 return len;
5676 }
5677
5678 return 0;
5679 }
5680
5681
5682 /*
5683 * Handle attributes. Currently, tags are generated for defines
5684 * and records.
5685 *
5686 * They are on the form:
5687 * -define(foo, bar).
5688 * -define(Foo(M, N), M+N).
5689 * -record(graph, {vtab = notable, cyclic = true}).
5690 */
5691 static void
5692 erlang_attribute (s)
5693 char *s;
5694 {
5695 char *cp = s;
5696
5697 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5698 && *cp++ == '(')
5699 {
5700 int len = erlang_atom (skip_spaces (cp));
5701 if (len > 0)
5702 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5703 }
5704 return;
5705 }
5706
5707
5708 /*
5709 * Consume an Erlang atom (or variable).
5710 * Return the number of bytes consumed, or -1 if there was an error.
5711 */
5712 static int
5713 erlang_atom (s)
5714 char *s;
5715 {
5716 int pos = 0;
5717
5718 if (ISALPHA (s[pos]) || s[pos] == '_')
5719 {
5720 /* The atom is unquoted. */
5721 do
5722 pos++;
5723 while (ISALNUM (s[pos]) || s[pos] == '_');
5724 }
5725 else if (s[pos] == '\'')
5726 {
5727 for (pos++; s[pos] != '\''; pos++)
5728 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5729 || (s[pos] == '\\' && s[++pos] == '\0'))
5730 return 0;
5731 pos++;
5732 }
5733
5734 return pos;
5735 }
5736
5737 \f
5738 #ifdef ETAGS_REGEXPS
5739
5740 static char *scan_separators __P((char *));
5741 static void add_regex __P((char *, language *));
5742 static char *substitute __P((char *, char *, struct re_registers *));
5743
5744 /*
5745 * Take a string like "/blah/" and turn it into "blah", verifying
5746 * that the first and last characters are the same, and handling
5747 * quoted separator characters. Actually, stops on the occurrence of
5748 * an unquoted separator. Also process \t, \n, etc. and turn into
5749 * appropriate characters. Works in place. Null terminates name string.
5750 * Returns pointer to terminating separator, or NULL for
5751 * unterminated regexps.
5752 */
5753 static char *
5754 scan_separators (name)
5755 char *name;
5756 {
5757 char sep = name[0];
5758 char *copyto = name;
5759 bool quoted = FALSE;
5760
5761 for (++name; *name != '\0'; ++name)
5762 {
5763 if (quoted)
5764 {
5765 switch (*name)
5766 {
5767 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5768 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5769 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5770 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5771 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5772 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5773 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5774 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5775 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5776 default:
5777 if (*name == sep)
5778 *copyto++ = sep;
5779 else
5780 {
5781 /* Something else is quoted, so preserve the quote. */
5782 *copyto++ = '\\';
5783 *copyto++ = *name;
5784 }
5785 break;
5786 }
5787 quoted = FALSE;
5788 }
5789 else if (*name == '\\')
5790 quoted = TRUE;
5791 else if (*name == sep)
5792 break;
5793 else
5794 *copyto++ = *name;
5795 }
5796 if (*name != sep)
5797 name = NULL; /* signal unterminated regexp */
5798
5799 /* Terminate copied string. */
5800 *copyto = '\0';
5801 return name;
5802 }
5803
5804 /* Look at the argument of --regex or --no-regex and do the right
5805 thing. Same for each line of a regexp file. */
5806 static void
5807 analyse_regex (regex_arg)
5808 char *regex_arg;
5809 {
5810 if (regex_arg == NULL)
5811 {
5812 free_regexps (); /* --no-regex: remove existing regexps */
5813 return;
5814 }
5815
5816 /* A real --regexp option or a line in a regexp file. */
5817 switch (regex_arg[0])
5818 {
5819 /* Comments in regexp file or null arg to --regex. */
5820 case '\0':
5821 case ' ':
5822 case '\t':
5823 break;
5824
5825 /* Read a regex file. This is recursive and may result in a
5826 loop, which will stop when the file descriptors are exhausted. */
5827 case '@':
5828 {
5829 FILE *regexfp;
5830 linebuffer regexbuf;
5831 char *regexfile = regex_arg + 1;
5832
5833 /* regexfile is a file containing regexps, one per line. */
5834 regexfp = fopen (regexfile, "r");
5835 if (regexfp == NULL)
5836 {
5837 pfatal (regexfile);
5838 return;
5839 }
5840 linebuffer_init (&regexbuf);
5841 while (readline_internal (&regexbuf, regexfp) > 0)
5842 analyse_regex (regexbuf.buffer);
5843 free (regexbuf.buffer);
5844 fclose (regexfp);
5845 }
5846 break;
5847
5848 /* Regexp to be used for a specific language only. */
5849 case '{':
5850 {
5851 language *lang;
5852 char *lang_name = regex_arg + 1;
5853 char *cp;
5854
5855 for (cp = lang_name; *cp != '}'; cp++)
5856 if (*cp == '\0')
5857 {
5858 error ("unterminated language name in regex: %s", regex_arg);
5859 return;
5860 }
5861 *cp++ = '\0';
5862 lang = get_language_from_langname (lang_name);
5863 if (lang == NULL)
5864 return;
5865 add_regex (cp, lang);
5866 }
5867 break;
5868
5869 /* Regexp to be used for any language. */
5870 default:
5871 add_regex (regex_arg, NULL);
5872 break;
5873 }
5874 }
5875
5876 /* Separate the regexp pattern, compile it,
5877 and care for optional name and modifiers. */
5878 static void
5879 add_regex (regexp_pattern, lang)
5880 char *regexp_pattern;
5881 language *lang;
5882 {
5883 static struct re_pattern_buffer zeropattern;
5884 char sep, *pat, *name, *modifiers;
5885 const char *err;
5886 struct re_pattern_buffer *patbuf;
5887 regexp *rp;
5888 bool
5889 force_explicit_name = TRUE, /* do not use implicit tag names */
5890 ignore_case = FALSE, /* case is significant */
5891 multi_line = FALSE, /* matches are done one line at a time */
5892 single_line = FALSE; /* dot does not match newline */
5893
5894
5895 if (strlen(regexp_pattern) < 3)
5896 {
5897 error ("null regexp", (char *)NULL);
5898 return;
5899 }
5900 sep = regexp_pattern[0];
5901 name = scan_separators (regexp_pattern);
5902 if (name == NULL)
5903 {
5904 error ("%s: unterminated regexp", regexp_pattern);
5905 return;
5906 }
5907 if (name[1] == sep)
5908 {
5909 error ("null name for regexp \"%s\"", regexp_pattern);
5910 return;
5911 }
5912 modifiers = scan_separators (name);
5913 if (modifiers == NULL) /* no terminating separator --> no name */
5914 {
5915 modifiers = name;
5916 name = "";
5917 }
5918 else
5919 modifiers += 1; /* skip separator */
5920
5921 /* Parse regex modifiers. */
5922 for (; modifiers[0] != '\0'; modifiers++)
5923 switch (modifiers[0])
5924 {
5925 case 'N':
5926 if (modifiers == name)
5927 error ("forcing explicit tag name but no name, ignoring", NULL);
5928 force_explicit_name = TRUE;
5929 break;
5930 case 'i':
5931 ignore_case = TRUE;
5932 break;
5933 case 's':
5934 single_line = TRUE;
5935 /* FALLTHRU */
5936 case 'm':
5937 multi_line = TRUE;
5938 need_filebuf = TRUE;
5939 break;
5940 default:
5941 {
5942 char wrongmod [2];
5943 wrongmod[0] = modifiers[0];
5944 wrongmod[1] = '\0';
5945 error ("invalid regexp modifier `%s', ignoring", wrongmod);
5946 }
5947 break;
5948 }
5949
5950 patbuf = xnew (1, struct re_pattern_buffer);
5951 *patbuf = zeropattern;
5952 if (ignore_case)
5953 {
5954 static char lc_trans[CHARS];
5955 int i;
5956 for (i = 0; i < CHARS; i++)
5957 lc_trans[i] = lowcase (i);
5958 patbuf->translate = lc_trans; /* translation table to fold case */
5959 }
5960
5961 if (multi_line)
5962 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5963 else
5964 pat = regexp_pattern;
5965
5966 if (single_line)
5967 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5968 else
5969 re_set_syntax (RE_SYNTAX_EMACS);
5970
5971 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5972 if (multi_line)
5973 free (pat);
5974 if (err != NULL)
5975 {
5976 error ("%s while compiling pattern", err);
5977 return;
5978 }
5979
5980 rp = p_head;
5981 p_head = xnew (1, regexp);
5982 p_head->pattern = savestr (regexp_pattern);
5983 p_head->p_next = rp;
5984 p_head->lang = lang;
5985 p_head->pat = patbuf;
5986 p_head->name = savestr (name);
5987 p_head->error_signaled = FALSE;
5988 p_head->force_explicit_name = force_explicit_name;
5989 p_head->ignore_case = ignore_case;
5990 p_head->multi_line = multi_line;
5991 }
5992
5993 /*
5994 * Do the substitutions indicated by the regular expression and
5995 * arguments.
5996 */
5997 static char *
5998 substitute (in, out, regs)
5999 char *in, *out;
6000 struct re_registers *regs;
6001 {
6002 char *result, *t;
6003 int size, dig, diglen;
6004
6005 result = NULL;
6006 size = strlen (out);
6007
6008 /* Pass 1: figure out how much to allocate by finding all \N strings. */
6009 if (out[size - 1] == '\\')
6010 fatal ("pattern error in \"%s\"", out);
6011 for (t = etags_strchr (out, '\\');
6012 t != NULL;
6013 t = etags_strchr (t + 2, '\\'))
6014 if (ISDIGIT (t[1]))
6015 {
6016 dig = t[1] - '0';
6017 diglen = regs->end[dig] - regs->start[dig];
6018 size += diglen - 2;
6019 }
6020 else
6021 size -= 1;
6022
6023 /* Allocate space and do the substitutions. */
6024 assert (size >= 0);
6025 result = xnew (size + 1, char);
6026
6027 for (t = result; *out != '\0'; out++)
6028 if (*out == '\\' && ISDIGIT (*++out))
6029 {
6030 dig = *out - '0';
6031 diglen = regs->end[dig] - regs->start[dig];
6032 strncpy (t, in + regs->start[dig], diglen);
6033 t += diglen;
6034 }
6035 else
6036 *t++ = *out;
6037 *t = '\0';
6038
6039 assert (t <= result + size);
6040 assert (t - result == (int)strlen (result));
6041
6042 return result;
6043 }
6044
6045 /* Deallocate all regexps. */
6046 static void
6047 free_regexps ()
6048 {
6049 regexp *rp;
6050 while (p_head != NULL)
6051 {
6052 rp = p_head->p_next;
6053 free (p_head->pattern);
6054 free (p_head->name);
6055 free (p_head);
6056 p_head = rp;
6057 }
6058 return;
6059 }
6060
6061 /*
6062 * Reads the whole file as a single string from `filebuf' and looks for
6063 * multi-line regular expressions, creating tags on matches.
6064 * readline already dealt with normal regexps.
6065 *
6066 * Idea by Ben Wing <ben@666.com> (2002).
6067 */
6068 static void
6069 regex_tag_multiline ()
6070 {
6071 char *buffer = filebuf.buffer;
6072 regexp *rp;
6073 char *name;
6074
6075 for (rp = p_head; rp != NULL; rp = rp->p_next)
6076 {
6077 int match = 0;
6078
6079 if (!rp->multi_line)
6080 continue; /* skip normal regexps */
6081
6082 /* Generic initialisations before parsing file from memory. */
6083 lineno = 1; /* reset global line number */
6084 charno = 0; /* reset global char number */
6085 linecharno = 0; /* reset global char number of line start */
6086
6087 /* Only use generic regexps or those for the current language. */
6088 if (rp->lang != NULL && rp->lang != curfdp->lang)
6089 continue;
6090
6091 while (match >= 0 && match < filebuf.len)
6092 {
6093 match = re_search (rp->pat, buffer, filebuf.len, charno,
6094 filebuf.len - match, &rp->regs);
6095 switch (match)
6096 {
6097 case -2:
6098 /* Some error. */
6099 if (!rp->error_signaled)
6100 {
6101 error ("regexp stack overflow while matching \"%s\"",
6102 rp->pattern);
6103 rp->error_signaled = TRUE;
6104 }
6105 break;
6106 case -1:
6107 /* No match. */
6108 break;
6109 default:
6110 if (match == rp->regs.end[0])
6111 {
6112 if (!rp->error_signaled)
6113 {
6114 error ("regexp matches the empty string: \"%s\"",
6115 rp->pattern);
6116 rp->error_signaled = TRUE;
6117 }
6118 match = -3; /* exit from while loop */
6119 break;
6120 }
6121
6122 /* Match occurred. Construct a tag. */
6123 while (charno < rp->regs.end[0])
6124 if (buffer[charno++] == '\n')
6125 lineno++, linecharno = charno;
6126 name = rp->name;
6127 if (name[0] == '\0')
6128 name = NULL;
6129 else /* make a named tag */
6130 name = substitute (buffer, rp->name, &rp->regs);
6131 if (rp->force_explicit_name)
6132 /* Force explicit tag name, if a name is there. */
6133 pfnote (name, TRUE, buffer + linecharno,
6134 charno - linecharno + 1, lineno, linecharno);
6135 else
6136 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6137 charno - linecharno + 1, lineno, linecharno);
6138 break;
6139 }
6140 }
6141 }
6142 }
6143
6144 #endif /* ETAGS_REGEXPS */
6145
6146 \f
6147 static bool
6148 nocase_tail (cp)
6149 char *cp;
6150 {
6151 register int len = 0;
6152
6153 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6154 cp++, len++;
6155 if (*cp == '\0' && !intoken (dbp[len]))
6156 {
6157 dbp += len;
6158 return TRUE;
6159 }
6160 return FALSE;
6161 }
6162
6163 static void
6164 get_tag (bp, namepp)
6165 register char *bp;
6166 char **namepp;
6167 {
6168 register char *cp = bp;
6169
6170 if (*bp != '\0')
6171 {
6172 /* Go till you get to white space or a syntactic break */
6173 for (cp = bp + 1; !notinname (*cp); cp++)
6174 continue;
6175 make_tag (bp, cp - bp, TRUE,
6176 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6177 }
6178
6179 if (namepp != NULL)
6180 *namepp = savenstr (bp, cp - bp);
6181 }
6182
6183 /*
6184 * Read a line of text from `stream' into `lbp', excluding the
6185 * newline or CR-NL, if any. Return the number of characters read from
6186 * `stream', which is the length of the line including the newline.
6187 *
6188 * On DOS or Windows we do not count the CR character, if any before the
6189 * NL, in the returned length; this mirrors the behavior of Emacs on those
6190 * platforms (for text files, it translates CR-NL to NL as it reads in the
6191 * file).
6192 *
6193 * If multi-line regular expressions are requested, each line read is
6194 * appended to `filebuf'.
6195 */
6196 static long
6197 readline_internal (lbp, stream)
6198 linebuffer *lbp;
6199 register FILE *stream;
6200 {
6201 char *buffer = lbp->buffer;
6202 register char *p = lbp->buffer;
6203 register char *pend;
6204 int chars_deleted;
6205
6206 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6207
6208 for (;;)
6209 {
6210 register int c = getc (stream);
6211 if (p == pend)
6212 {
6213 /* We're at the end of linebuffer: expand it. */
6214 lbp->size *= 2;
6215 xrnew (buffer, lbp->size, char);
6216 p += buffer - lbp->buffer;
6217 pend = buffer + lbp->size;
6218 lbp->buffer = buffer;
6219 }
6220 if (c == EOF)
6221 {
6222 *p = '\0';
6223 chars_deleted = 0;
6224 break;
6225 }
6226 if (c == '\n')
6227 {
6228 if (p > buffer && p[-1] == '\r')
6229 {
6230 p -= 1;
6231 #ifdef DOS_NT
6232 /* Assume CRLF->LF translation will be performed by Emacs
6233 when loading this file, so CRs won't appear in the buffer.
6234 It would be cleaner to compensate within Emacs;
6235 however, Emacs does not know how many CRs were deleted
6236 before any given point in the file. */
6237 chars_deleted = 1;
6238 #else
6239 chars_deleted = 2;
6240 #endif
6241 }
6242 else
6243 {
6244 chars_deleted = 1;
6245 }
6246 *p = '\0';
6247 break;
6248 }
6249 *p++ = c;
6250 }
6251 lbp->len = p - buffer;
6252
6253 if (need_filebuf /* we need filebuf for multi-line regexps */
6254 && chars_deleted > 0) /* not at EOF */
6255 {
6256 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6257 {
6258 /* Expand filebuf. */
6259 filebuf.size *= 2;
6260 xrnew (filebuf.buffer, filebuf.size, char);
6261 }
6262 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6263 filebuf.len += lbp->len;
6264 filebuf.buffer[filebuf.len++] = '\n';
6265 filebuf.buffer[filebuf.len] = '\0';
6266 }
6267
6268 return lbp->len + chars_deleted;
6269 }
6270
6271 /*
6272 * Like readline_internal, above, but in addition try to match the
6273 * input line against relevant regular expressions and manage #line
6274 * directives.
6275 */
6276 static void
6277 readline (lbp, stream)
6278 linebuffer *lbp;
6279 FILE *stream;
6280 {
6281 long result;
6282
6283 linecharno = charno; /* update global char number of line start */
6284 result = readline_internal (lbp, stream); /* read line */
6285 lineno += 1; /* increment global line number */
6286 charno += result; /* increment global char number */
6287
6288 /* Honour #line directives. */
6289 if (!no_line_directive)
6290 {
6291 static bool discard_until_line_directive;
6292
6293 /* Check whether this is a #line directive. */
6294 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6295 {
6296 int start, lno;
6297
6298 if (DEBUG) start = 0; /* shut up the compiler */
6299 if (sscanf (lbp->buffer, "#line %d \"%n", &lno, &start) == 1)
6300 {
6301 char *endp = lbp->buffer + start;
6302
6303 assert (start > 0);
6304 while ((endp = etags_strchr (endp, '"')) != NULL
6305 && endp[-1] == '\\')
6306 endp++;
6307 if (endp != NULL)
6308 /* Ok, this is a real #line directive. Let's deal with it. */
6309 {
6310 char *taggedabsname; /* absolute name of original file */
6311 char *taggedfname; /* name of original file as given */
6312 char *name; /* temp var */
6313
6314 discard_until_line_directive = FALSE; /* found it */
6315 name = lbp->buffer + start;
6316 *endp = '\0';
6317 canonicalize_filename (name); /* for DOS */
6318 taggedabsname = absolute_filename (name, curfdp->infabsdir);
6319 if (filename_is_absolute (name)
6320 || filename_is_absolute (curfdp->infname))
6321 taggedfname = savestr (taggedabsname);
6322 else
6323 taggedfname = relative_filename (taggedabsname,tagfiledir);
6324
6325 if (streq (curfdp->taggedfname, taggedfname))
6326 /* The #line directive is only a line number change. We
6327 deal with this afterwards. */
6328 free (taggedfname);
6329 else
6330 /* The tags following this #line directive should be
6331 attributed to taggedfname. In order to do this, set
6332 curfdp accordingly. */
6333 {
6334 fdesc *fdp; /* file description pointer */
6335
6336 /* Go look for a file description already set up for the
6337 file indicated in the #line directive. If there is
6338 one, use it from now until the next #line
6339 directive. */
6340 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6341 if (streq (fdp->infname, curfdp->infname)
6342 && streq (fdp->taggedfname, taggedfname))
6343 /* If we remove the second test above (after the &&)
6344 then all entries pertaining to the same file are
6345 coalesced in the tags file. If we use it, then
6346 entries pertaining to the same file but generated
6347 from different files (via #line directives) will
6348 go into separate sections in the tags file. These
6349 alternatives look equivalent. The first one
6350 destroys some apparently useless information. */
6351 {
6352 curfdp = fdp;
6353 free (taggedfname);
6354 break;
6355 }
6356 /* Else, if we already tagged the real file, skip all
6357 input lines until the next #line directive. */
6358 if (fdp == NULL) /* not found */
6359 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6360 if (streq (fdp->infabsname, taggedabsname))
6361 {
6362 discard_until_line_directive = TRUE;
6363 free (taggedfname);
6364 break;
6365 }
6366 /* Else create a new file description and use that from
6367 now on, until the next #line directive. */
6368 if (fdp == NULL) /* not found */
6369 {
6370 fdp = fdhead;
6371 fdhead = xnew (1, fdesc);
6372 *fdhead = *curfdp; /* copy curr. file description */
6373 fdhead->next = fdp;
6374 fdhead->infname = savestr (curfdp->infname);
6375 fdhead->infabsname = savestr (curfdp->infabsname);
6376 fdhead->infabsdir = savestr (curfdp->infabsdir);
6377 fdhead->taggedfname = taggedfname;
6378 fdhead->usecharno = FALSE;
6379 fdhead->prop = NULL;
6380 fdhead->written = FALSE;
6381 curfdp = fdhead;
6382 }
6383 }
6384 free (taggedabsname);
6385 lineno = lno - 1;
6386 readline (lbp, stream);
6387 return;
6388 } /* if a real #line directive */
6389 } /* if #line is followed by a a number */
6390 } /* if line begins with "#line " */
6391
6392 /* If we are here, no #line directive was found. */
6393 if (discard_until_line_directive)
6394 {
6395 if (result > 0)
6396 {
6397 /* Do a tail recursion on ourselves, thus discarding the contents
6398 of the line buffer. */
6399 readline (lbp, stream);
6400 return;
6401 }
6402 /* End of file. */
6403 discard_until_line_directive = FALSE;
6404 return;
6405 }
6406 } /* if #line directives should be considered */
6407
6408 #ifdef ETAGS_REGEXPS
6409 {
6410 int match;
6411 regexp *rp;
6412 char *name;
6413
6414 /* Match against relevant regexps. */
6415 if (lbp->len > 0)
6416 for (rp = p_head; rp != NULL; rp = rp->p_next)
6417 {
6418 /* Only use generic regexps or those for the current language.
6419 Also do not use multiline regexps, which is the job of
6420 regex_tag_multiline. */
6421 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6422 || rp->multi_line)
6423 continue;
6424
6425 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6426 switch (match)
6427 {
6428 case -2:
6429 /* Some error. */
6430 if (!rp->error_signaled)
6431 {
6432 error ("regexp stack overflow while matching \"%s\"",
6433 rp->pattern);
6434 rp->error_signaled = TRUE;
6435 }
6436 break;
6437 case -1:
6438 /* No match. */
6439 break;
6440 case 0:
6441 /* Empty string matched. */
6442 if (!rp->error_signaled)
6443 {
6444 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6445 rp->error_signaled = TRUE;
6446 }
6447 break;
6448 default:
6449 /* Match occurred. Construct a tag. */
6450 name = rp->name;
6451 if (name[0] == '\0')
6452 name = NULL;
6453 else /* make a named tag */
6454 name = substitute (lbp->buffer, rp->name, &rp->regs);
6455 if (rp->force_explicit_name)
6456 /* Force explicit tag name, if a name is there. */
6457 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6458 else
6459 make_tag (name, strlen (name), TRUE,
6460 lbp->buffer, match, lineno, linecharno);
6461 break;
6462 }
6463 }
6464 }
6465 #endif /* ETAGS_REGEXPS */
6466 }
6467
6468 \f
6469 /*
6470 * Return a pointer to a space of size strlen(cp)+1 allocated
6471 * with xnew where the string CP has been copied.
6472 */
6473 static char *
6474 savestr (cp)
6475 char *cp;
6476 {
6477 return savenstr (cp, strlen (cp));
6478 }
6479
6480 /*
6481 * Return a pointer to a space of size LEN+1 allocated with xnew where
6482 * the string CP has been copied for at most the first LEN characters.
6483 */
6484 static char *
6485 savenstr (cp, len)
6486 char *cp;
6487 int len;
6488 {
6489 register char *dp;
6490
6491 dp = xnew (len + 1, char);
6492 strncpy (dp, cp, len);
6493 dp[len] = '\0';
6494 return dp;
6495 }
6496
6497 /*
6498 * Return the ptr in sp at which the character c last
6499 * appears; NULL if not found
6500 *
6501 * Identical to POSIX strrchr, included for portability.
6502 */
6503 static char *
6504 etags_strrchr (sp, c)
6505 register const char *sp;
6506 register int c;
6507 {
6508 register const char *r;
6509
6510 r = NULL;
6511 do
6512 {
6513 if (*sp == c)
6514 r = sp;
6515 } while (*sp++);
6516 return (char *)r;
6517 }
6518
6519 /*
6520 * Return the ptr in sp at which the character c first
6521 * appears; NULL if not found
6522 *
6523 * Identical to POSIX strchr, included for portability.
6524 */
6525 static char *
6526 etags_strchr (sp, c)
6527 register const char *sp;
6528 register int c;
6529 {
6530 do
6531 {
6532 if (*sp == c)
6533 return (char *)sp;
6534 } while (*sp++);
6535 return NULL;
6536 }
6537
6538 /*
6539 * Compare two strings, ignoring case for alphabetic characters.
6540 *
6541 * Same as BSD's strcasecmp, included for portability.
6542 */
6543 static int
6544 etags_strcasecmp (s1, s2)
6545 register const char *s1;
6546 register const char *s2;
6547 {
6548 while (*s1 != '\0'
6549 && (ISALPHA (*s1) && ISALPHA (*s2)
6550 ? lowcase (*s1) == lowcase (*s2)
6551 : *s1 == *s2))
6552 s1++, s2++;
6553
6554 return (ISALPHA (*s1) && ISALPHA (*s2)
6555 ? lowcase (*s1) - lowcase (*s2)
6556 : *s1 - *s2);
6557 }
6558
6559 /*
6560 * Compare two strings, ignoring case for alphabetic characters.
6561 * Stop after a given number of characters
6562 *
6563 * Same as BSD's strncasecmp, included for portability.
6564 */
6565 static int
6566 etags_strncasecmp (s1, s2, n)
6567 register const char *s1;
6568 register const char *s2;
6569 register int n;
6570 {
6571 while (*s1 != '\0' && n-- > 0
6572 && (ISALPHA (*s1) && ISALPHA (*s2)
6573 ? lowcase (*s1) == lowcase (*s2)
6574 : *s1 == *s2))
6575 s1++, s2++;
6576
6577 if (n < 0)
6578 return 0;
6579 else
6580 return (ISALPHA (*s1) && ISALPHA (*s2)
6581 ? lowcase (*s1) - lowcase (*s2)
6582 : *s1 - *s2);
6583 }
6584
6585 /* Skip spaces (end of string is not space), return new pointer. */
6586 static char *
6587 skip_spaces (cp)
6588 char *cp;
6589 {
6590 while (iswhite (*cp))
6591 cp++;
6592 return cp;
6593 }
6594
6595 /* Skip non spaces, except end of string, return new pointer. */
6596 static char *
6597 skip_non_spaces (cp)
6598 char *cp;
6599 {
6600 while (*cp != '\0' && !iswhite (*cp))
6601 cp++;
6602 return cp;
6603 }
6604
6605 /* Print error message and exit. */
6606 void
6607 fatal (s1, s2)
6608 char *s1, *s2;
6609 {
6610 error (s1, s2);
6611 exit (EXIT_FAILURE);
6612 }
6613
6614 static void
6615 pfatal (s1)
6616 char *s1;
6617 {
6618 perror (s1);
6619 exit (EXIT_FAILURE);
6620 }
6621
6622 static void
6623 suggest_asking_for_help ()
6624 {
6625 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6626 progname, LONG_OPTIONS ? "--help" : "-h");
6627 exit (EXIT_FAILURE);
6628 }
6629
6630 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6631 static void
6632 error (s1, s2)
6633 const char *s1, *s2;
6634 {
6635 fprintf (stderr, "%s: ", progname);
6636 fprintf (stderr, s1, s2);
6637 fprintf (stderr, "\n");
6638 }
6639
6640 /* Return a newly-allocated string whose contents
6641 concatenate those of s1, s2, s3. */
6642 static char *
6643 concat (s1, s2, s3)
6644 char *s1, *s2, *s3;
6645 {
6646 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6647 char *result = xnew (len1 + len2 + len3 + 1, char);
6648
6649 strcpy (result, s1);
6650 strcpy (result + len1, s2);
6651 strcpy (result + len1 + len2, s3);
6652 result[len1 + len2 + len3] = '\0';
6653
6654 return result;
6655 }
6656
6657 \f
6658 /* Does the same work as the system V getcwd, but does not need to
6659 guess the buffer size in advance. */
6660 static char *
6661 etags_getcwd ()
6662 {
6663 #ifdef HAVE_GETCWD
6664 int bufsize = 200;
6665 char *path = xnew (bufsize, char);
6666
6667 while (getcwd (path, bufsize) == NULL)
6668 {
6669 if (errno != ERANGE)
6670 pfatal ("getcwd");
6671 bufsize *= 2;
6672 free (path);
6673 path = xnew (bufsize, char);
6674 }
6675
6676 canonicalize_filename (path);
6677 return path;
6678
6679 #else /* not HAVE_GETCWD */
6680 #if MSDOS
6681
6682 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6683
6684 getwd (path);
6685
6686 for (p = path; *p != '\0'; p++)
6687 if (*p == '\\')
6688 *p = '/';
6689 else
6690 *p = lowcase (*p);
6691
6692 return strdup (path);
6693 #else /* not MSDOS */
6694 linebuffer path;
6695 FILE *pipe;
6696
6697 linebuffer_init (&path);
6698 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6699 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6700 pfatal ("pwd");
6701 pclose (pipe);
6702
6703 return path.buffer;
6704 #endif /* not MSDOS */
6705 #endif /* not HAVE_GETCWD */
6706 }
6707
6708 /* Return a newly allocated string containing the file name of FILE
6709 relative to the absolute directory DIR (which should end with a slash). */
6710 static char *
6711 relative_filename (file, dir)
6712 char *file, *dir;
6713 {
6714 char *fp, *dp, *afn, *res;
6715 int i;
6716
6717 /* Find the common root of file and dir (with a trailing slash). */
6718 afn = absolute_filename (file, cwd);
6719 fp = afn;
6720 dp = dir;
6721 while (*fp++ == *dp++)
6722 continue;
6723 fp--, dp--; /* back to the first differing char */
6724 #ifdef DOS_NT
6725 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6726 return afn;
6727 #endif
6728 do /* look at the equal chars until '/' */
6729 fp--, dp--;
6730 while (*fp != '/');
6731
6732 /* Build a sequence of "../" strings for the resulting relative file name. */
6733 i = 0;
6734 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6735 i += 1;
6736 res = xnew (3*i + strlen (fp + 1) + 1, char);
6737 res[0] = '\0';
6738 while (i-- > 0)
6739 strcat (res, "../");
6740
6741 /* Add the file name relative to the common root of file and dir. */
6742 strcat (res, fp + 1);
6743 free (afn);
6744
6745 return res;
6746 }
6747
6748 /* Return a newly allocated string containing the absolute file name
6749 of FILE given DIR (which should end with a slash). */
6750 static char *
6751 absolute_filename (file, dir)
6752 char *file, *dir;
6753 {
6754 char *slashp, *cp, *res;
6755
6756 if (filename_is_absolute (file))
6757 res = savestr (file);
6758 #ifdef DOS_NT
6759 /* We don't support non-absolute file names with a drive
6760 letter, like `d:NAME' (it's too much hassle). */
6761 else if (file[1] == ':')
6762 fatal ("%s: relative file names with drive letters not supported", file);
6763 #endif
6764 else
6765 res = concat (dir, file, "");
6766
6767 /* Delete the "/dirname/.." and "/." substrings. */
6768 slashp = etags_strchr (res, '/');
6769 while (slashp != NULL && slashp[0] != '\0')
6770 {
6771 if (slashp[1] == '.')
6772 {
6773 if (slashp[2] == '.'
6774 && (slashp[3] == '/' || slashp[3] == '\0'))
6775 {
6776 cp = slashp;
6777 do
6778 cp--;
6779 while (cp >= res && !filename_is_absolute (cp));
6780 if (cp < res)
6781 cp = slashp; /* the absolute name begins with "/.." */
6782 #ifdef DOS_NT
6783 /* Under MSDOS and NT we get `d:/NAME' as absolute
6784 file name, so the luser could say `d:/../NAME'.
6785 We silently treat this as `d:/NAME'. */
6786 else if (cp[0] != '/')
6787 cp = slashp;
6788 #endif
6789 strcpy (cp, slashp + 3);
6790 slashp = cp;
6791 continue;
6792 }
6793 else if (slashp[2] == '/' || slashp[2] == '\0')
6794 {
6795 strcpy (slashp, slashp + 2);
6796 continue;
6797 }
6798 }
6799
6800 slashp = etags_strchr (slashp + 1, '/');
6801 }
6802
6803 if (res[0] == '\0') /* just a safety net: should never happen */
6804 {
6805 free (res);
6806 return savestr ("/");
6807 }
6808 else
6809 return res;
6810 }
6811
6812 /* Return a newly allocated string containing the absolute
6813 file name of dir where FILE resides given DIR (which should
6814 end with a slash). */
6815 static char *
6816 absolute_dirname (file, dir)
6817 char *file, *dir;
6818 {
6819 char *slashp, *res;
6820 char save;
6821
6822 canonicalize_filename (file);
6823 slashp = etags_strrchr (file, '/');
6824 if (slashp == NULL)
6825 return savestr (dir);
6826 save = slashp[1];
6827 slashp[1] = '\0';
6828 res = absolute_filename (file, dir);
6829 slashp[1] = save;
6830
6831 return res;
6832 }
6833
6834 /* Whether the argument string is an absolute file name. The argument
6835 string must have been canonicalized with canonicalize_filename. */
6836 static bool
6837 filename_is_absolute (fn)
6838 char *fn;
6839 {
6840 return (fn[0] == '/'
6841 #ifdef DOS_NT
6842 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6843 #endif
6844 );
6845 }
6846
6847 /* Translate backslashes into slashes. Works in place. */
6848 static void
6849 canonicalize_filename (fn)
6850 register char *fn;
6851 {
6852 #ifdef DOS_NT
6853 /* Canonicalize drive letter case. */
6854 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6855 fn[0] = upcase (fn[0]);
6856 /* Convert backslashes to slashes. */
6857 for (; *fn != '\0'; fn++)
6858 if (*fn == '\\')
6859 *fn = '/';
6860 #else
6861 /* No action. */
6862 fn = NULL; /* shut up the compiler */
6863 #endif
6864 }
6865
6866 \f
6867 /* Initialize a linebuffer for use */
6868 static void
6869 linebuffer_init (lbp)
6870 linebuffer *lbp;
6871 {
6872 lbp->size = (DEBUG) ? 3 : 200;
6873 lbp->buffer = xnew (lbp->size, char);
6874 lbp->buffer[0] = '\0';
6875 lbp->len = 0;
6876 }
6877
6878 /* Set the minimum size of a string contained in a linebuffer. */
6879 static void
6880 linebuffer_setlen (lbp, toksize)
6881 linebuffer *lbp;
6882 int toksize;
6883 {
6884 while (lbp->size <= toksize)
6885 {
6886 lbp->size *= 2;
6887 xrnew (lbp->buffer, lbp->size, char);
6888 }
6889 lbp->len = toksize;
6890 }
6891
6892 /* Like malloc but get fatal error if memory is exhausted. */
6893 static PTR
6894 xmalloc (size)
6895 unsigned int size;
6896 {
6897 PTR result = (PTR) malloc (size);
6898 if (result == NULL)
6899 fatal ("virtual memory exhausted", (char *)NULL);
6900 return result;
6901 }
6902
6903 static PTR
6904 xrealloc (ptr, size)
6905 char *ptr;
6906 unsigned int size;
6907 {
6908 PTR result = (PTR) realloc (ptr, size);
6909 if (result == NULL)
6910 fatal ("virtual memory exhausted", (char *)NULL);
6911 return result;
6912 }
6913
6914 /*
6915 * Local Variables:
6916 * indent-tabs-mode: t
6917 * tab-width: 8
6918 * fill-column: 79
6919 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6920 * End:
6921 */
6922
6923 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6924 (do not change this comment) */
6925
6926 /* etags.c ends here */