]> code.delx.au - gnu-emacs/blob - lib-src/etags.c
(add_regex): Invalid regexp modifiers are ignored.
[gnu-emacs] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001, 2002
3 Free Software Foundation, Inc. and Ken Arnold
4
5 This file is not considered part of GNU Emacs.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20
21 /*
22 * Authors:
23 * Ctags originally by Ken Arnold.
24 * Fortran added by Jim Kleckner.
25 * Ed Pelegri-Llopart added C typedefs.
26 * Gnu Emacs TAGS format and modifications by RMS?
27 * 1989 Sam Kendall added C++.
28 * 1992 Joseph B. Wells improved C and C++ parsing.
29 * 1993 Francesco Potortì reorganised C and C++.
30 * 1994 Regexp tags by Tom Tromey.
31 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
32 * 2002 #line directives by Francesco Potortì.
33 *
34 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
35 *
36 */
37
38 char pot_etags_version[] = "@(#) pot revision number is 16.27";
39
40 #define TRUE 1
41 #define FALSE 0
42
43 #ifdef DEBUG
44 # undef DEBUG
45 # define DEBUG TRUE
46 #else
47 # define DEBUG FALSE
48 # define NDEBUG /* disable assert */
49 #endif
50
51 #ifdef HAVE_CONFIG_H
52 # include <config.h>
53 /* On some systems, Emacs defines static as nothing for the sake
54 of unexec. We don't want that here since we don't use unexec. */
55 # undef static
56 # define ETAGS_REGEXPS /* use the regexp features */
57 # define LONG_OPTIONS /* accept long options */
58 # ifndef PTR /* for Xemacs */
59 # define PTR void *
60 # endif
61 # ifndef __P /* for Xemacs */
62 # define __P(args) args
63 # endif
64 #else
65 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
66 # define __P(args) args /* use prototypes */
67 # define PTR void * /* for generic pointers */
68 # else
69 # define __P(args) () /* no prototypes */
70 # define const /* remove const for old compilers' sake */
71 # define PTR long * /* don't use void* */
72 # endif
73 #endif /* !HAVE_CONFIG_H */
74
75 #ifndef _GNU_SOURCE
76 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
77 #endif
78
79 /* WIN32_NATIVE is for Xemacs.
80 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
81 #ifdef WIN32_NATIVE
82 # undef MSDOS
83 # undef WINDOWSNT
84 # define WINDOWSNT
85 #endif /* WIN32_NATIVE */
86
87 #ifdef MSDOS
88 # undef MSDOS
89 # define MSDOS TRUE
90 # include <fcntl.h>
91 # include <sys/param.h>
92 # include <io.h>
93 # ifndef HAVE_CONFIG_H
94 # define DOS_NT
95 # include <sys/config.h>
96 # endif
97 #else
98 # define MSDOS FALSE
99 #endif /* MSDOS */
100
101 #ifdef WINDOWSNT
102 # include <stdlib.h>
103 # include <fcntl.h>
104 # include <string.h>
105 # include <direct.h>
106 # include <io.h>
107 # define MAXPATHLEN _MAX_PATH
108 # undef HAVE_NTGUI
109 # undef DOS_NT
110 # define DOS_NT
111 # ifndef HAVE_GETCWD
112 # define HAVE_GETCWD
113 # endif /* undef HAVE_GETCWD */
114 #else /* !WINDOWSNT */
115 # ifdef STDC_HEADERS
116 # include <stdlib.h>
117 # include <string.h>
118 # else
119 extern char *getenv ();
120 # endif
121 #endif /* !WINDOWSNT */
122
123 #ifdef HAVE_UNISTD_H
124 # include <unistd.h>
125 #else
126 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
127 extern char *getcwd (char *buf, size_t size);
128 # endif
129 #endif /* HAVE_UNISTD_H */
130
131 #include <stdio.h>
132 #include <ctype.h>
133 #include <errno.h>
134 #ifndef errno
135 extern int errno;
136 #endif
137 #include <sys/types.h>
138 #include <sys/stat.h>
139
140 #include <assert.h>
141 #ifdef NDEBUG
142 # undef assert /* some systems have a buggy assert.h */
143 # define assert(x) ((void) 0)
144 #endif
145
146 #if !defined (S_ISREG) && defined (S_IFREG)
147 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
148 #endif
149
150 #ifdef LONG_OPTIONS
151 # include <getopt.h>
152 #else
153 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
154 extern char *optarg;
155 extern int optind, opterr;
156 #endif /* LONG_OPTIONS */
157
158 #ifdef ETAGS_REGEXPS
159 # ifndef HAVE_CONFIG_H /* this is a standalone compilation */
160 # ifdef __CYGWIN__ /* compiling on Cygwin */
161 !!! NOTICE !!!
162 the regex.h distributed with Cygwin is not compatible with etags, alas!
163 If you want regular expression support, you should delete this notice and
164 arrange to use the GNU regex.h and regex.c.
165 # endif
166 # endif
167 # include <regex.h>
168 #endif /* ETAGS_REGEXPS */
169
170 /* Define CTAGS to make the program "ctags" compatible with the usual one.
171 Leave it undefined to make the program "etags", which makes emacs-style
172 tag tables and tags typedefs, #defines and struct/union/enum by default. */
173 #ifdef CTAGS
174 # undef CTAGS
175 # define CTAGS TRUE
176 #else
177 # define CTAGS FALSE
178 #endif
179
180 /* Exit codes for success and failure. */
181 #ifdef VMS
182 # define GOOD 1
183 # define BAD 0
184 #else
185 # define GOOD 0
186 # define BAD 1
187 #endif
188
189 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
190 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
191
192 #define CHARS 256 /* 2^sizeof(char) */
193 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
194 #define iswhite(c) (_wht[CHAR(c)]) /* c is white */
195 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name */
196 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token */
197 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token */
198 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens */
199
200 #define ISALNUM(c) isalnum (CHAR(c))
201 #define ISALPHA(c) isalpha (CHAR(c))
202 #define ISDIGIT(c) isdigit (CHAR(c))
203 #define ISLOWER(c) islower (CHAR(c))
204
205 #define lowcase(c) tolower (CHAR(c))
206 #define upcase(c) toupper (CHAR(c))
207
208
209 /*
210 * xnew, xrnew -- allocate, reallocate storage
211 *
212 * SYNOPSIS: Type *xnew (int n, Type);
213 * void xrnew (OldPointer, int n, Type);
214 */
215 #if DEBUG
216 # include "chkmalloc.h"
217 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
218 (n) * sizeof (Type)))
219 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
220 (char *) (op), (n) * sizeof (Type)))
221 #else
222 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
223 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
224 (char *) (op), (n) * sizeof (Type)))
225 #endif
226
227 #define bool int
228
229 typedef void Lang_function __P((FILE *));
230
231 typedef struct
232 {
233 char *suffix; /* file name suffix for this compressor */
234 char *command; /* takes one arg and decompresses to stdout */
235 } compressor;
236
237 typedef struct
238 {
239 char *name; /* language name */
240 bool metasource; /* source used to generate other sources */
241 Lang_function *function; /* parse function */
242 char **filenames; /* names of this language's files */
243 char **suffixes; /* name suffixes of this language's files */
244 char **interpreters; /* interpreters for this language */
245 } language;
246
247 typedef struct fdesc
248 {
249 struct fdesc *next; /* for the linked list */
250 char *infname; /* uncompressed input file name */
251 char *infabsname; /* absolute uncompressed input file name */
252 char *infabsdir; /* absolute dir of input file */
253 char *taggedfname; /* file name to write in tagfile */
254 language *lang; /* language of file */
255 char *prop; /* file properties to write in tagfile */
256 bool usecharno; /* etags tags shall contain char number */
257 } fdesc;
258
259 typedef struct node_st
260 { /* sorting structure */
261 struct node_st *left, *right; /* left and right sons */
262 fdesc *fdp; /* description of file to whom tag belongs */
263 char *name; /* tag name */
264 char *pat; /* search pattern */
265 bool valid; /* write this tag on the tag file */
266 bool is_func; /* function tag: use pattern in CTAGS mode */
267 bool been_warned; /* warning already given for duplicated tag */
268 int lno; /* line number tag is on */
269 long cno; /* character number line starts on */
270 } node;
271
272 /*
273 * A `linebuffer' is a structure which holds a line of text.
274 * `readline_internal' reads a line from a stream into a linebuffer
275 * and works regardless of the length of the line.
276 * SIZE is the size of BUFFER, LEN is the length of the string in
277 * BUFFER after readline reads it.
278 */
279 typedef struct
280 {
281 long size;
282 int len;
283 char *buffer;
284 } linebuffer;
285
286 /* Used to support mixing of --lang and file names. */
287 typedef struct
288 {
289 enum {
290 at_language, /* a language specification */
291 at_regexp, /* a regular expression */
292 at_filename, /* a file name */
293 at_stdin /* read from stdin here */
294 } arg_type; /* argument type */
295 language *lang; /* language associated with the argument */
296 char *what; /* the argument itself */
297 } argument;
298
299 #ifdef ETAGS_REGEXPS
300 /* Structure defining a regular expression. */
301 typedef struct pattern
302 {
303 struct pattern *p_next;
304 language *lang;
305 char *regex;
306 struct re_pattern_buffer *pat;
307 struct re_registers regs;
308 char *name_pattern;
309 bool error_signaled;
310 bool ignore_case;
311 bool multi_line;
312 } pattern;
313 #endif /* ETAGS_REGEXPS */
314
315
316 /* Many compilers barf on this:
317 Lang_function Ada_funcs;
318 so let's write it this way */
319 static void Ada_funcs __P((FILE *));
320 static void Asm_labels __P((FILE *));
321 static void C_entries __P((int c_ext, FILE *));
322 static void default_C_entries __P((FILE *));
323 static void plain_C_entries __P((FILE *));
324 static void Cjava_entries __P((FILE *));
325 static void Cobol_paragraphs __P((FILE *));
326 static void Cplusplus_entries __P((FILE *));
327 static void Cstar_entries __P((FILE *));
328 static void Erlang_functions __P((FILE *));
329 static void Fortran_functions __P((FILE *));
330 static void Yacc_entries __P((FILE *));
331 static void Lisp_functions __P((FILE *));
332 static void Makefile_targets __P((FILE *));
333 static void Pascal_functions __P((FILE *));
334 static void Perl_functions __P((FILE *));
335 static void PHP_functions __P((FILE *));
336 static void Postscript_functions __P((FILE *));
337 static void Prolog_functions __P((FILE *));
338 static void Python_functions __P((FILE *));
339 static void Scheme_functions __P((FILE *));
340 static void TeX_commands __P((FILE *));
341 static void Texinfo_nodes __P((FILE *));
342 static void just_read_file __P((FILE *));
343
344 static void print_language_names __P((void));
345 static void print_version __P((void));
346 static void print_help __P((void));
347 int main __P((int, char **));
348
349 static compressor *get_compressor_from_suffix __P((char *, char **));
350 static language *get_language_from_langname __P((const char *));
351 static language *get_language_from_interpreter __P((char *));
352 static language *get_language_from_filename __P((char *, bool));
353 static void readline __P((linebuffer *, FILE *));
354 static long readline_internal __P((linebuffer *, FILE *));
355 static bool nocase_tail __P((char *));
356 static char *get_tag __P((char *));
357
358 #ifdef ETAGS_REGEXPS
359 static void analyse_regex __P((char *));
360 static void free_patterns __P((void));
361 static void regex_tag_multiline __P((void));
362 #endif /* ETAGS_REGEXPS */
363 static void error __P((const char *, const char *));
364 static void suggest_asking_for_help __P((void));
365 void fatal __P((char *, char *));
366 static void pfatal __P((char *));
367 static void add_node __P((node *, node **));
368
369 static void init __P((void));
370 static void initbuffer __P((linebuffer *));
371 static void process_file_name __P((char *, language *));
372 static void process_file __P((FILE *, char *, language *));
373 static void find_entries __P((FILE *));
374 static void free_tree __P((node *));
375 static void free_fdesc __P((fdesc *));
376 static void pfnote __P((char *, bool, char *, int, int, long));
377 static void new_pfnote __P((char *, int, bool, char *, int, int, long));
378 static void invalidate_nodes __P((fdesc *, node **));
379 static void put_entries __P((node *));
380
381 static char *concat __P((char *, char *, char *));
382 static char *skip_spaces __P((char *));
383 static char *skip_non_spaces __P((char *));
384 static char *savenstr __P((char *, int));
385 static char *savestr __P((char *));
386 static char *etags_strchr __P((const char *, int));
387 static char *etags_strrchr __P((const char *, int));
388 static bool strcaseeq __P((const char *, const char *));
389 static char *etags_getcwd __P((void));
390 static char *relative_filename __P((char *, char *));
391 static char *absolute_filename __P((char *, char *));
392 static char *absolute_dirname __P((char *, char *));
393 static bool filename_is_absolute __P((char *f));
394 static void canonicalize_filename __P((char *));
395 static void linebuffer_setlen __P((linebuffer *, int));
396 static PTR xmalloc __P((unsigned int));
397 static PTR xrealloc __P((char *, unsigned int));
398
399 \f
400 static char searchar = '/'; /* use /.../ searches */
401
402 static char *tagfile; /* output file */
403 static char *progname; /* name this program was invoked with */
404 static char *cwd; /* current working directory */
405 static char *tagfiledir; /* directory of tagfile */
406 static FILE *tagf; /* ioptr for tags file */
407
408 static fdesc *fdhead; /* head of file description list */
409 static fdesc *curfdp; /* current file description */
410 static int lineno; /* line number of current line */
411 static long charno; /* current character number */
412 static long linecharno; /* charno of start of current line */
413 static char *dbp; /* pointer to start of current tag */
414
415 static const int invalidcharno = -1;
416
417 static node *nodehead; /* the head of the binary tree of tags */
418 static node *last_node; /* the last node created */
419
420 static linebuffer lb; /* the current line */
421 static linebuffer filebuf; /* a buffer containing the whole file */
422
423 /* boolean "functions" (see init) */
424 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
425 static char
426 /* white chars */
427 *white = " \f\t\n\r\v",
428 /* not in a name */
429 *nonam = " \f\t\n\r()=,;",
430 /* token ending chars */
431 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
432 /* token starting chars */
433 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
434 /* valid in-token chars */
435 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
436
437 static bool append_to_tagfile; /* -a: append to tags */
438 /* The next four default to TRUE for etags, but to FALSE for ctags. */
439 static bool typedefs; /* -t: create tags for C and Ada typedefs */
440 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
441 /* 0 struct/enum/union decls, and C++ */
442 /* member functions. */
443 static bool constantypedefs; /* -d: create tags for C #define, enum */
444 /* constants and variables. */
445 /* -D: opposite of -d. Default under ctags. */
446 static bool globals; /* create tags for global variables */
447 static bool declarations; /* --declarations: tag them and extern in C&Co*/
448 static bool members; /* create tags for C member variables */
449 static bool no_line_directive; /* ignore #line directives (undocumented) */
450 static bool update; /* -u: update tags */
451 static bool vgrind_style; /* -v: create vgrind style index output */
452 static bool no_warnings; /* -w: suppress warnings */
453 static bool cxref_style; /* -x: create cxref style output */
454 static bool cplusplus; /* .[hc] means C++, not C */
455 static bool noindentypedefs; /* -I: ignore indentation in C */
456 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
457
458 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
459 static bool parsing_stdin; /* --parse-stdin used */
460
461 #ifdef ETAGS_REGEXPS
462 static pattern *p_head; /* list of all regexps */
463 static bool need_filebuf; /* some regexes are multi-line */
464 #else
465 # define need_filebuf FALSE
466 #endif /* ETAGS_REGEXPS */
467
468 #ifdef LONG_OPTIONS
469 static struct option longopts[] =
470 {
471 { "packages-only", no_argument, &packages_only, TRUE },
472 { "c++", no_argument, NULL, 'C' },
473 { "declarations", no_argument, &declarations, TRUE },
474 { "no-line-directive", no_argument, &no_line_directive, TRUE },
475 { "help", no_argument, NULL, 'h' },
476 { "help", no_argument, NULL, 'H' },
477 { "ignore-indentation", no_argument, NULL, 'I' },
478 { "language", required_argument, NULL, 'l' },
479 { "members", no_argument, &members, TRUE },
480 { "no-members", no_argument, &members, FALSE },
481 { "output", required_argument, NULL, 'o' },
482 #ifdef ETAGS_REGEXPS
483 { "regex", required_argument, NULL, 'r' },
484 { "no-regex", no_argument, NULL, 'R' },
485 { "ignore-case-regex", required_argument, NULL, 'c' },
486 #endif /* ETAGS_REGEXPS */
487 { "parse-stdin", required_argument, NULL, STDIN },
488 { "version", no_argument, NULL, 'V' },
489
490 #if CTAGS /* Etags options */
491 { "backward-search", no_argument, NULL, 'B' },
492 { "cxref", no_argument, NULL, 'x' },
493 { "defines", no_argument, NULL, 'd' },
494 { "globals", no_argument, &globals, TRUE },
495 { "typedefs", no_argument, NULL, 't' },
496 { "typedefs-and-c++", no_argument, NULL, 'T' },
497 { "update", no_argument, NULL, 'u' },
498 { "vgrind", no_argument, NULL, 'v' },
499 { "no-warn", no_argument, NULL, 'w' },
500
501 #else /* Ctags options */
502 { "append", no_argument, NULL, 'a' },
503 { "no-defines", no_argument, NULL, 'D' },
504 { "no-globals", no_argument, &globals, FALSE },
505 { "include", required_argument, NULL, 'i' },
506 #endif
507 { NULL }
508 };
509 #endif /* LONG_OPTIONS */
510
511 static compressor compressors[] =
512 {
513 { "z", "gzip -d -c"},
514 { "Z", "gzip -d -c"},
515 { "gz", "gzip -d -c"},
516 { "GZ", "gzip -d -c"},
517 { "bz2", "bzip2 -d -c" },
518 { NULL }
519 };
520
521 /*
522 * Language stuff.
523 */
524
525 /* Ada code */
526 static char *Ada_suffixes [] =
527 { "ads", "adb", "ada", NULL };
528
529 /* Assembly code */
530 static char *Asm_suffixes [] =
531 { "a", /* Unix assembler */
532 "asm", /* Microcontroller assembly */
533 "def", /* BSO/Tasking definition includes */
534 "inc", /* Microcontroller include files */
535 "ins", /* Microcontroller include files */
536 "s", "sa", /* Unix assembler */
537 "S", /* cpp-processed Unix assembler */
538 "src", /* BSO/Tasking C compiler output */
539 NULL
540 };
541
542 /* Note that .c and .h can be considered C++, if the --c++ flag was
543 given, or if the `class' keyowrd is met inside the file.
544 That is why default_C_entries is called for these. */
545 static char *default_C_suffixes [] =
546 { "c", "h", NULL };
547
548 static char *Cplusplus_suffixes [] =
549 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
550 "M", /* Objective C++ */
551 "pdb", /* Postscript with C syntax */
552 NULL };
553
554 static char *Cjava_suffixes [] =
555 { "java", NULL };
556
557 static char *Cobol_suffixes [] =
558 { "COB", "cob", NULL };
559
560 static char *Cstar_suffixes [] =
561 { "cs", "hs", NULL };
562
563 static char *Erlang_suffixes [] =
564 { "erl", "hrl", NULL };
565
566 static char *Fortran_suffixes [] =
567 { "F", "f", "f90", "for", NULL };
568
569 static char *Lisp_suffixes [] =
570 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
571
572 static char *Makefile_filenames [] =
573 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
574
575 static char *Pascal_suffixes [] =
576 { "p", "pas", NULL };
577
578 static char *Perl_suffixes [] =
579 { "pl", "pm", NULL };
580
581 static char *Perl_interpreters [] =
582 { "perl", "@PERL@", NULL };
583
584 static char *PHP_suffixes [] =
585 { "php", "php3", "php4", NULL };
586
587 static char *plain_C_suffixes [] =
588 { "lm", /* Objective lex file */
589 "m", /* Objective C file */
590 "pc", /* Pro*C file */
591 NULL };
592
593 static char *Postscript_suffixes [] =
594 { "ps", "psw", NULL }; /* .psw is for PSWrap */
595
596 static char *Prolog_suffixes [] =
597 { "prolog", NULL };
598
599 static char *Python_suffixes [] =
600 { "py", NULL };
601
602 /* Can't do the `SCM' or `scm' prefix with a version number. */
603 static char *Scheme_suffixes [] =
604 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
605
606 static char *TeX_suffixes [] =
607 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
608
609 static char *Texinfo_suffixes [] =
610 { "texi", "texinfo", "txi", NULL };
611
612 static char *Yacc_suffixes [] =
613 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
614
615 /*
616 * Table of languages.
617 *
618 * It is ok for a given function to be listed under more than one
619 * name. I just didn't.
620 */
621
622 static language lang_names [] =
623 {
624 { "ada", FALSE, Ada_funcs, NULL, Ada_suffixes, NULL },
625 { "asm", FALSE, Asm_labels, NULL, Asm_suffixes, NULL },
626 { "c", FALSE, default_C_entries, NULL, default_C_suffixes, NULL },
627 { "c++", FALSE, Cplusplus_entries, NULL, Cplusplus_suffixes, NULL },
628 { "c*", FALSE, Cstar_entries, NULL, Cstar_suffixes, NULL },
629 { "cobol", FALSE, Cobol_paragraphs, NULL, Cobol_suffixes, NULL },
630 { "erlang", FALSE, Erlang_functions, NULL, Erlang_suffixes, NULL },
631 { "fortran", FALSE, Fortran_functions, NULL, Fortran_suffixes, NULL },
632 { "java", FALSE, Cjava_entries, NULL, Cjava_suffixes, NULL },
633 { "lisp", FALSE, Lisp_functions, NULL, Lisp_suffixes, NULL },
634 { "makefile", FALSE, Makefile_targets, Makefile_filenames, NULL, NULL },
635 { "pascal", FALSE, Pascal_functions, NULL, Pascal_suffixes, NULL },
636 { "perl", FALSE, Perl_functions,NULL, Perl_suffixes, Perl_interpreters },
637 { "php", FALSE, PHP_functions, NULL, PHP_suffixes, NULL },
638 { "postscript",FALSE, Postscript_functions,NULL, Postscript_suffixes, NULL },
639 { "proc", FALSE, plain_C_entries, NULL, plain_C_suffixes, NULL },
640 { "prolog", FALSE, Prolog_functions, NULL, Prolog_suffixes, NULL },
641 { "python", FALSE, Python_functions, NULL, Python_suffixes, NULL },
642 { "scheme", FALSE, Scheme_functions, NULL, Scheme_suffixes, NULL },
643 { "tex", FALSE, TeX_commands, NULL, TeX_suffixes, NULL },
644 { "texinfo", FALSE, Texinfo_nodes, NULL, Texinfo_suffixes, NULL },
645 { "yacc", TRUE, Yacc_entries, NULL, Yacc_suffixes, NULL },
646 { "auto", FALSE, NULL }, /* default guessing scheme */
647 { "none", FALSE, just_read_file }, /* regexp matching only */
648 { NULL, FALSE, NULL } /* end of list */
649 };
650
651 \f
652 static void
653 print_language_names ()
654 {
655 language *lang;
656 char **name, **ext;
657
658 puts ("\nThese are the currently supported languages, along with the\n\
659 default file names and dot suffixes:");
660 for (lang = lang_names; lang->name != NULL; lang++)
661 {
662 printf (" %-*s", 10, lang->name);
663 if (lang->filenames != NULL)
664 for (name = lang->filenames; *name != NULL; name++)
665 printf (" %s", *name);
666 if (lang->suffixes != NULL)
667 for (ext = lang->suffixes; *ext != NULL; ext++)
668 printf (" .%s", *ext);
669 puts ("");
670 }
671 puts ("Where `auto' means use default language for files based on file\n\
672 name suffix, and `none' means only do regexp processing on files.\n\
673 If no language is specified and no matching suffix is found,\n\
674 the first line of the file is read for a sharp-bang (#!) sequence\n\
675 followed by the name of an interpreter. If no such sequence is found,\n\
676 Fortran is tried first; if no tags are found, C is tried next.\n\
677 When parsing any C file, a \"class\" keyword switches to C++.\n\
678 Compressed files are supported using gzip and bzip2.");
679 }
680
681 #ifndef EMACS_NAME
682 # define EMACS_NAME "standalone"
683 #endif
684 #ifndef VERSION
685 # define VERSION "version"
686 #endif
687 static void
688 print_version ()
689 {
690 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
691 puts ("Copyright (C) 2002 Free Software Foundation, Inc. and Ken Arnold");
692 puts ("This program is distributed under the same terms as Emacs");
693
694 exit (GOOD);
695 }
696
697 static void
698 print_help ()
699 {
700 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
701 \n\
702 These are the options accepted by %s.\n", progname, progname);
703 #ifdef LONG_OPTIONS
704 puts ("You may use unambiguous abbreviations for the long option names.");
705 #else
706 puts ("Long option names do not work with this executable, as it is not\n\
707 linked with GNU getopt.");
708 #endif /* LONG_OPTIONS */
709 puts (" A - as file name means read names from stdin (one per line).\n\
710 Absolute names are stored in the output file as they are.\n\
711 Relative ones are stored relative to the output file's directory.\n");
712
713 if (!CTAGS)
714 puts ("-a, --append\n\
715 Append tag entries to existing tags file.");
716
717 puts ("--packages-only\n\
718 For Ada files, only generate tags for packages.");
719
720 if (CTAGS)
721 puts ("-B, --backward-search\n\
722 Write the search commands for the tag entries using '?', the\n\
723 backward-search command instead of '/', the forward-search command.");
724
725 /* This option is mostly obsolete, because etags can now automatically
726 detect C++. Retained for backward compatibility and for debugging and
727 experimentation. In principle, we could want to tag as C++ even
728 before any "class" keyword.
729 puts ("-C, --c++\n\
730 Treat files whose name suffix defaults to C language as C++ files.");
731 */
732
733 puts ("--declarations\n\
734 In C and derived languages, create tags for function declarations,");
735 if (CTAGS)
736 puts ("\tand create tags for extern variables if --globals is used.");
737 else
738 puts
739 ("\tand create tags for extern variables unless --no-globals is used.");
740
741 if (CTAGS)
742 puts ("-d, --defines\n\
743 Create tag entries for C #define constants and enum constants, too.");
744 else
745 puts ("-D, --no-defines\n\
746 Don't create tag entries for C #define constants and enum constants.\n\
747 This makes the tags file smaller.");
748
749 if (!CTAGS)
750 puts ("-i FILE, --include=FILE\n\
751 Include a note in tag file indicating that, when searching for\n\
752 a tag, one should also consult the tags file FILE after\n\
753 checking the current file.");
754
755 puts ("-l LANG, --language=LANG\n\
756 Force the following files to be considered as written in the\n\
757 named language up to the next --language=LANG option.");
758
759 if (CTAGS)
760 puts ("--globals\n\
761 Create tag entries for global variables in some languages.");
762 else
763 puts ("--no-globals\n\
764 Do not create tag entries for global variables in some\n\
765 languages. This makes the tags file smaller.");
766 puts ("--members\n\
767 Create tag entries for member variables in C and derived languages.");
768
769 #ifdef ETAGS_REGEXPS
770 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
771 Make a tag for each line matching the regular expression pattern\n\
772 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
773 files only. REGEXFILE is a file containing one REGEXP per line.\n\
774 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
775 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
776 puts (" If TAGNAME/ is present, the tags created are named.\n\
777 For example Tcl named tags can be created with:\n\
778 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
779 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
780 `m' means to allow multi-line matches, `s' implies `m' and\n\
781 causes dot to match the newline character as well.");
782 puts ("-R, --no-regex\n\
783 Don't create tags from regexps for the following files.");
784 #endif /* ETAGS_REGEXPS */
785 puts ("-I, --ignore-indentation\n\
786 Don't rely on indentation quite as much as normal. Currently,\n\
787 this means not to assume that a closing brace in the first\n\
788 column is the final brace of a function or structure\n\
789 definition in C and C++.");
790 puts ("-o FILE, --output=FILE\n\
791 Write the tags to FILE.");
792 puts ("--parse-stdin=NAME\n\
793 Read from standard input and record tags as belonging to file NAME.");
794
795 if (CTAGS)
796 {
797 puts ("-t, --typedefs\n\
798 Generate tag entries for C and Ada typedefs.");
799 puts ("-T, --typedefs-and-c++\n\
800 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
801 and C++ member functions.");
802 }
803
804 if (CTAGS)
805 puts ("-u, --update\n\
806 Update the tag entries for the given files, leaving tag\n\
807 entries for other files in place. Currently, this is\n\
808 implemented by deleting the existing entries for the given\n\
809 files and then rewriting the new entries at the end of the\n\
810 tags file. It is often faster to simply rebuild the entire\n\
811 tag file than to use this.");
812
813 if (CTAGS)
814 {
815 puts ("-v, --vgrind\n\
816 Generates an index of items intended for human consumption,\n\
817 similar to the output of vgrind. The index is sorted, and\n\
818 gives the page number of each item.");
819 puts ("-w, --no-warn\n\
820 Suppress warning messages about entries defined in multiple\n\
821 files.");
822 puts ("-x, --cxref\n\
823 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
824 The output uses line numbers instead of page numbers, but\n\
825 beyond that the differences are cosmetic; try both to see\n\
826 which you like.");
827 }
828
829 puts ("-V, --version\n\
830 Print the version of the program.\n\
831 -h, --help\n\
832 Print this help message.");
833
834 print_language_names ();
835
836 puts ("");
837 puts ("Report bugs to bug-gnu-emacs@gnu.org");
838
839 exit (GOOD);
840 }
841
842 \f
843 #ifdef VMS /* VMS specific functions */
844
845 #define EOS '\0'
846
847 /* This is a BUG! ANY arbitrary limit is a BUG!
848 Won't someone please fix this? */
849 #define MAX_FILE_SPEC_LEN 255
850 typedef struct {
851 short curlen;
852 char body[MAX_FILE_SPEC_LEN + 1];
853 } vspec;
854
855 /*
856 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
857 returning in each successive call the next file name matching the input
858 spec. The function expects that each in_spec passed
859 to it will be processed to completion; in particular, up to and
860 including the call following that in which the last matching name
861 is returned, the function ignores the value of in_spec, and will
862 only start processing a new spec with the following call.
863 If an error occurs, on return out_spec contains the value
864 of in_spec when the error occurred.
865
866 With each successive file name returned in out_spec, the
867 function's return value is one. When there are no more matching
868 names the function returns zero. If on the first call no file
869 matches in_spec, or there is any other error, -1 is returned.
870 */
871
872 #include <rmsdef.h>
873 #include <descrip.h>
874 #define OUTSIZE MAX_FILE_SPEC_LEN
875 static short
876 fn_exp (out, in)
877 vspec *out;
878 char *in;
879 {
880 static long context = 0;
881 static struct dsc$descriptor_s o;
882 static struct dsc$descriptor_s i;
883 static bool pass1 = TRUE;
884 long status;
885 short retval;
886
887 if (pass1)
888 {
889 pass1 = FALSE;
890 o.dsc$a_pointer = (char *) out;
891 o.dsc$w_length = (short)OUTSIZE;
892 i.dsc$a_pointer = in;
893 i.dsc$w_length = (short)strlen(in);
894 i.dsc$b_dtype = DSC$K_DTYPE_T;
895 i.dsc$b_class = DSC$K_CLASS_S;
896 o.dsc$b_dtype = DSC$K_DTYPE_VT;
897 o.dsc$b_class = DSC$K_CLASS_VS;
898 }
899 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
900 {
901 out->body[out->curlen] = EOS;
902 return 1;
903 }
904 else if (status == RMS$_NMF)
905 retval = 0;
906 else
907 {
908 strcpy(out->body, in);
909 retval = -1;
910 }
911 lib$find_file_end(&context);
912 pass1 = TRUE;
913 return retval;
914 }
915
916 /*
917 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
918 name of each file specified by the provided arg expanding wildcards.
919 */
920 static char *
921 gfnames (arg, p_error)
922 char *arg;
923 bool *p_error;
924 {
925 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
926
927 switch (fn_exp (&filename, arg))
928 {
929 case 1:
930 *p_error = FALSE;
931 return filename.body;
932 case 0:
933 *p_error = FALSE;
934 return NULL;
935 default:
936 *p_error = TRUE;
937 return filename.body;
938 }
939 }
940
941 #ifndef OLD /* Newer versions of VMS do provide `system'. */
942 system (cmd)
943 char *cmd;
944 {
945 error ("%s", "system() function not implemented under VMS");
946 }
947 #endif
948
949 #define VERSION_DELIM ';'
950 char *massage_name (s)
951 char *s;
952 {
953 char *start = s;
954
955 for ( ; *s; s++)
956 if (*s == VERSION_DELIM)
957 {
958 *s = EOS;
959 break;
960 }
961 else
962 *s = lowcase (*s);
963 return start;
964 }
965 #endif /* VMS */
966
967 \f
968 int
969 main (argc, argv)
970 int argc;
971 char *argv[];
972 {
973 int i;
974 unsigned int nincluded_files;
975 char **included_files;
976 argument *argbuffer;
977 int current_arg, file_count;
978 linebuffer filename_lb;
979 #ifdef VMS
980 bool got_err;
981 #endif
982 char *optstring;
983 int opt;
984
985
986 #ifdef DOS_NT
987 _fmode = O_BINARY; /* all of files are treated as binary files */
988 #endif /* DOS_NT */
989
990 progname = argv[0];
991 nincluded_files = 0;
992 included_files = xnew (argc, char *);
993 current_arg = 0;
994 file_count = 0;
995
996 /* Allocate enough no matter what happens. Overkill, but each one
997 is small. */
998 argbuffer = xnew (argc, argument);
999
1000 /*
1001 * If etags, always find typedefs and structure tags. Why not?
1002 * Also default to find macro constants, enum constants and
1003 * global variables.
1004 */
1005 if (!CTAGS)
1006 {
1007 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1008 globals = TRUE;
1009 }
1010
1011 optstring = "-";
1012 #ifdef ETAGS_REGEXPS
1013 optstring = "-r:Rc:";
1014 #endif /* ETAGS_REGEXPS */
1015 #ifndef LONG_OPTIONS
1016 optstring = optstring + 1;
1017 #endif /* LONG_OPTIONS */
1018 optstring = concat (optstring,
1019 "Cf:Il:o:SVhH",
1020 (CTAGS) ? "BxdtTuvw" : "aDi:");
1021
1022 while ((opt = getopt_long (argc, argv, optstring, longopts, 0)) != EOF)
1023 switch (opt)
1024 {
1025 case 0:
1026 /* If getopt returns 0, then it has already processed a
1027 long-named option. We should do nothing. */
1028 break;
1029
1030 case 1:
1031 /* This means that a file name has been seen. Record it. */
1032 argbuffer[current_arg].arg_type = at_filename;
1033 argbuffer[current_arg].what = optarg;
1034 ++current_arg;
1035 ++file_count;
1036 break;
1037
1038 case STDIN:
1039 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1040 argbuffer[current_arg].arg_type = at_stdin;
1041 argbuffer[current_arg].what = optarg;
1042 ++current_arg;
1043 ++file_count;
1044 if (parsing_stdin)
1045 fatal ("cannot parse standard input more than once", (char *)NULL);
1046 parsing_stdin = TRUE;
1047 break;
1048
1049 /* Common options. */
1050 case 'C': cplusplus = TRUE; break;
1051 case 'f': /* for compatibility with old makefiles */
1052 case 'o':
1053 if (tagfile)
1054 {
1055 error ("-o option may only be given once.", (char *)NULL);
1056 suggest_asking_for_help ();
1057 }
1058 tagfile = optarg;
1059 break;
1060 case 'I':
1061 case 'S': /* for backward compatibility */
1062 noindentypedefs = TRUE;
1063 break;
1064 case 'l':
1065 {
1066 language *lang = get_language_from_langname (optarg);
1067 if (lang != NULL)
1068 {
1069 argbuffer[current_arg].lang = lang;
1070 argbuffer[current_arg].arg_type = at_language;
1071 ++current_arg;
1072 }
1073 }
1074 break;
1075 case 'c':
1076 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1077 optarg = concat (optarg, "i", ""); /* memory leak here */
1078 /* FALLTHRU */
1079 case 'r':
1080 argbuffer[current_arg].arg_type = at_regexp;
1081 argbuffer[current_arg].what = optarg;
1082 ++current_arg;
1083 break;
1084 case 'R':
1085 argbuffer[current_arg].arg_type = at_regexp;
1086 argbuffer[current_arg].what = NULL;
1087 ++current_arg;
1088 break;
1089 case 'V':
1090 print_version ();
1091 break;
1092 case 'h':
1093 case 'H':
1094 print_help ();
1095 break;
1096
1097 /* Etags options */
1098 case 'a': append_to_tagfile = TRUE; break;
1099 case 'D': constantypedefs = FALSE; break;
1100 case 'i': included_files[nincluded_files++] = optarg; break;
1101
1102 /* Ctags options. */
1103 case 'B': searchar = '?'; break;
1104 case 'd': constantypedefs = TRUE; break;
1105 case 't': typedefs = TRUE; break;
1106 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1107 case 'u': update = TRUE; break;
1108 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1109 case 'x': cxref_style = TRUE; break;
1110 case 'w': no_warnings = TRUE; break;
1111 default:
1112 suggest_asking_for_help ();
1113 }
1114
1115 for (; optind < argc; ++optind)
1116 {
1117 argbuffer[current_arg].arg_type = at_filename;
1118 argbuffer[current_arg].what = argv[optind];
1119 ++current_arg;
1120 ++file_count;
1121 }
1122
1123 if (nincluded_files == 0 && file_count == 0)
1124 {
1125 error ("no input files specified.", (char *)NULL);
1126 suggest_asking_for_help ();
1127 }
1128
1129 if (tagfile == NULL)
1130 tagfile = CTAGS ? "tags" : "TAGS";
1131 cwd = etags_getcwd (); /* the current working directory */
1132 if (cwd[strlen (cwd) - 1] != '/')
1133 {
1134 char *oldcwd = cwd;
1135 cwd = concat (oldcwd, "/", "");
1136 free (oldcwd);
1137 }
1138 if (streq (tagfile, "-"))
1139 tagfiledir = cwd;
1140 else
1141 tagfiledir = absolute_dirname (tagfile, cwd);
1142
1143 init (); /* set up boolean "functions" */
1144
1145 initbuffer (&lb);
1146 initbuffer (&filename_lb);
1147 initbuffer (&filebuf);
1148
1149 if (!CTAGS)
1150 {
1151 if (streq (tagfile, "-"))
1152 {
1153 tagf = stdout;
1154 #ifdef DOS_NT
1155 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1156 doesn't take effect until after `stdout' is already open). */
1157 if (!isatty (fileno (stdout)))
1158 setmode (fileno (stdout), O_BINARY);
1159 #endif /* DOS_NT */
1160 }
1161 else
1162 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1163 if (tagf == NULL)
1164 pfatal (tagfile);
1165 }
1166
1167 /*
1168 * Loop through files finding functions.
1169 */
1170 for (i = 0; i < current_arg; ++i)
1171 {
1172 static language *lang; /* non-NULL if language is forced */
1173 char *this_file;
1174
1175 switch (argbuffer[i].arg_type)
1176 {
1177 case at_language:
1178 lang = argbuffer[i].lang;
1179 break;
1180 #ifdef ETAGS_REGEXPS
1181 case at_regexp:
1182 analyse_regex (argbuffer[i].what);
1183 break;
1184 #endif
1185 case at_filename:
1186 #ifdef VMS
1187 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1188 {
1189 if (got_err)
1190 {
1191 error ("can't find file %s\n", this_file);
1192 argc--, argv++;
1193 }
1194 else
1195 {
1196 this_file = massage_name (this_file);
1197 }
1198 #else
1199 this_file = argbuffer[i].what;
1200 #endif
1201 /* Input file named "-" means read file names from stdin
1202 (one per line) and use them. */
1203 if (streq (this_file, "-"))
1204 {
1205 if (parsing_stdin)
1206 fatal ("cannot parse standard input AND read file names from it",
1207 (char *)NULL);
1208 while (readline_internal (&filename_lb, stdin) > 0)
1209 process_file_name (filename_lb.buffer, lang);
1210 }
1211 else
1212 process_file_name (this_file, lang);
1213 #ifdef VMS
1214 }
1215 #endif
1216 break;
1217 case at_stdin:
1218 this_file = argbuffer[i].what;
1219 process_file (stdin, this_file, lang);
1220 break;
1221 }
1222 }
1223
1224 #ifdef ETAGS_REGEXPS
1225 free_patterns ();
1226 #endif /* ETAGS_REGEXPS */
1227 free (filebuf.buffer);
1228
1229 if (!CTAGS || cxref_style)
1230 {
1231 put_entries (nodehead); /* write the remainig tags (ETAGS) */
1232 free_tree (nodehead);
1233 nodehead = NULL;
1234 if (!CTAGS)
1235 while (nincluded_files-- > 0)
1236 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1237
1238 if (fclose (tagf) == EOF)
1239 pfatal (tagfile);
1240 exit (GOOD);
1241 }
1242
1243 if (update)
1244 {
1245 char cmd[BUFSIZ];
1246 for (i = 0; i < current_arg; ++i)
1247 {
1248 switch (argbuffer[i].arg_type)
1249 {
1250 case at_filename:
1251 case at_stdin:
1252 break;
1253 default:
1254 continue; /* the for loop */
1255 }
1256 sprintf (cmd,
1257 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1258 tagfile, argbuffer[i].what, tagfile);
1259 if (system (cmd) != GOOD)
1260 fatal ("failed to execute shell command", (char *)NULL);
1261 }
1262 append_to_tagfile = TRUE;
1263 }
1264
1265 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1266 if (tagf == NULL)
1267 pfatal (tagfile);
1268 put_entries (nodehead); /* write all the tags (CTAGS) */
1269 free_tree (nodehead);
1270 nodehead = NULL;
1271 if (fclose (tagf) == EOF)
1272 pfatal (tagfile);
1273
1274 if (update)
1275 {
1276 char cmd[2*BUFSIZ+10];
1277 sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1278 exit (system (cmd));
1279 }
1280 return GOOD;
1281 }
1282
1283
1284 /*
1285 * Return a compressor given the file name. If EXTPTR is non-zero,
1286 * return a pointer into FILE where the compressor-specific
1287 * extension begins. If no compressor is found, NULL is returned
1288 * and EXTPTR is not significant.
1289 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1290 */
1291 static compressor *
1292 get_compressor_from_suffix (file, extptr)
1293 char *file;
1294 char **extptr;
1295 {
1296 compressor *compr;
1297 char *slash, *suffix;
1298
1299 /* This relies on FN to be after canonicalize_filename,
1300 so we don't need to consider backslashes on DOS_NT. */
1301 slash = etags_strrchr (file, '/');
1302 suffix = etags_strrchr (file, '.');
1303 if (suffix == NULL || suffix < slash)
1304 return NULL;
1305 if (extptr != NULL)
1306 *extptr = suffix;
1307 suffix += 1;
1308 /* Let those poor souls who live with DOS 8+3 file name limits get
1309 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1310 Only the first do loop is run if not MSDOS */
1311 do
1312 {
1313 for (compr = compressors; compr->suffix != NULL; compr++)
1314 if (streq (compr->suffix, suffix))
1315 return compr;
1316 if (!MSDOS)
1317 break; /* do it only once: not really a loop */
1318 if (extptr != NULL)
1319 *extptr = ++suffix;
1320 } while (*suffix != '\0');
1321 return NULL;
1322 }
1323
1324
1325
1326 /*
1327 * Return a language given the name.
1328 */
1329 static language *
1330 get_language_from_langname (name)
1331 const char *name;
1332 {
1333 language *lang;
1334
1335 if (name == NULL)
1336 error ("empty language name", (char *)NULL);
1337 else
1338 {
1339 for (lang = lang_names; lang->name != NULL; lang++)
1340 if (streq (name, lang->name))
1341 return lang;
1342 error ("unknown language \"%s\"", name);
1343 }
1344
1345 return NULL;
1346 }
1347
1348
1349 /*
1350 * Return a language given the interpreter name.
1351 */
1352 static language *
1353 get_language_from_interpreter (interpreter)
1354 char *interpreter;
1355 {
1356 language *lang;
1357 char **iname;
1358
1359 if (interpreter == NULL)
1360 return NULL;
1361 for (lang = lang_names; lang->name != NULL; lang++)
1362 if (lang->interpreters != NULL)
1363 for (iname = lang->interpreters; *iname != NULL; iname++)
1364 if (streq (*iname, interpreter))
1365 return lang;
1366
1367 return NULL;
1368 }
1369
1370
1371
1372 /*
1373 * Return a language given the file name.
1374 */
1375 static language *
1376 get_language_from_filename (file, case_sensitive)
1377 char *file;
1378 bool case_sensitive;
1379 {
1380 language *lang;
1381 char **name, **ext, *suffix;
1382
1383 /* Try whole file name first. */
1384 for (lang = lang_names; lang->name != NULL; lang++)
1385 if (lang->filenames != NULL)
1386 for (name = lang->filenames; *name != NULL; name++)
1387 if ((case_sensitive)
1388 ? streq (*name, file)
1389 : strcaseeq (*name, file))
1390 return lang;
1391
1392 /* If not found, try suffix after last dot. */
1393 suffix = etags_strrchr (file, '.');
1394 if (suffix == NULL)
1395 return NULL;
1396 suffix += 1;
1397 for (lang = lang_names; lang->name != NULL; lang++)
1398 if (lang->suffixes != NULL)
1399 for (ext = lang->suffixes; *ext != NULL; ext++)
1400 if ((case_sensitive)
1401 ? streq (*ext, suffix)
1402 : strcaseeq (*ext, suffix))
1403 return lang;
1404 return NULL;
1405 }
1406
1407 \f
1408 /*
1409 * This routine is called on each file argument.
1410 */
1411 static void
1412 process_file_name (file, lang)
1413 char *file;
1414 language *lang;
1415 {
1416 struct stat stat_buf;
1417 FILE *inf;
1418 fdesc *fdp;
1419 compressor *compr;
1420 char *compressed_name, *uncompressed_name;
1421 char *ext, *real_name;
1422 int retval;
1423
1424 canonicalize_filename (file);
1425 if (streq (file, tagfile) && !streq (tagfile, "-"))
1426 {
1427 error ("skipping inclusion of %s in self.", file);
1428 return;
1429 }
1430 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1431 {
1432 compressed_name = NULL;
1433 real_name = uncompressed_name = savestr (file);
1434 }
1435 else
1436 {
1437 real_name = compressed_name = savestr (file);
1438 uncompressed_name = savenstr (file, ext - file);
1439 }
1440
1441 /* If the canonicalized uncompressed name
1442 has already been dealt with, skip it silently. */
1443 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1444 {
1445 assert (fdp->infname != NULL);
1446 if (streq (uncompressed_name, fdp->infname))
1447 goto cleanup;
1448 }
1449
1450 if (stat (real_name, &stat_buf) != 0)
1451 {
1452 /* Reset real_name and try with a different name. */
1453 real_name = NULL;
1454 if (compressed_name != NULL) /* try with the given suffix */
1455 {
1456 if (stat (uncompressed_name, &stat_buf) == 0)
1457 real_name = uncompressed_name;
1458 }
1459 else /* try all possible suffixes */
1460 {
1461 for (compr = compressors; compr->suffix != NULL; compr++)
1462 {
1463 compressed_name = concat (file, ".", compr->suffix);
1464 if (stat (compressed_name, &stat_buf) != 0)
1465 {
1466 if (MSDOS)
1467 {
1468 char *suf = compressed_name + strlen (file);
1469 size_t suflen = strlen (compr->suffix) + 1;
1470 for ( ; suf[1]; suf++, suflen--)
1471 {
1472 memmove (suf, suf + 1, suflen);
1473 if (stat (compressed_name, &stat_buf) == 0)
1474 {
1475 real_name = compressed_name;
1476 break;
1477 }
1478 }
1479 if (real_name != NULL)
1480 break;
1481 } /* MSDOS */
1482 free (compressed_name);
1483 compressed_name = NULL;
1484 }
1485 else
1486 {
1487 real_name = compressed_name;
1488 break;
1489 }
1490 }
1491 }
1492 if (real_name == NULL)
1493 {
1494 perror (file);
1495 goto cleanup;
1496 }
1497 } /* try with a different name */
1498
1499 if (!S_ISREG (stat_buf.st_mode))
1500 {
1501 error ("skipping %s: it is not a regular file.", real_name);
1502 goto cleanup;
1503 }
1504 if (real_name == compressed_name)
1505 {
1506 char *cmd = concat (compr->command, " ", real_name);
1507 inf = (FILE *) popen (cmd, "r");
1508 free (cmd);
1509 }
1510 else
1511 inf = fopen (real_name, "r");
1512 if (inf == NULL)
1513 {
1514 perror (real_name);
1515 goto cleanup;
1516 }
1517
1518 process_file (inf, uncompressed_name, lang);
1519
1520 if (real_name == compressed_name)
1521 retval = pclose (inf);
1522 else
1523 retval = fclose (inf);
1524 if (retval < 0)
1525 pfatal (file);
1526
1527 cleanup:
1528 if (compressed_name) free (compressed_name);
1529 if (uncompressed_name) free (uncompressed_name);
1530 last_node = NULL;
1531 curfdp = NULL;
1532 return;
1533 }
1534
1535 static void
1536 process_file (fh, fn, lang)
1537 FILE *fh;
1538 char *fn;
1539 language *lang;
1540 {
1541 static const fdesc emptyfdesc;
1542 fdesc *fdp;
1543
1544 /* Create a new input file description entry. */
1545 fdp = xnew (1, fdesc);
1546 *fdp = emptyfdesc;
1547 fdp->next = fdhead;
1548 fdp->infname = savestr (fn);
1549 fdp->lang = lang;
1550 fdp->infabsname = absolute_filename (fn, cwd);
1551 fdp->infabsdir = absolute_dirname (fn, cwd);
1552 if (filename_is_absolute (fn))
1553 {
1554 /* An absolute file name. Canonicalize it. */
1555 fdp->taggedfname = absolute_filename (fn, NULL);
1556 }
1557 else
1558 {
1559 /* A file name relative to cwd. Make it relative
1560 to the directory of the tags file. */
1561 fdp->taggedfname = relative_filename (fn, tagfiledir);
1562 }
1563 fdp->usecharno = TRUE; /* use char position when making tags */
1564 fdp->prop = NULL;
1565
1566 fdhead = fdp;
1567 curfdp = fdhead; /* the current file description */
1568
1569 find_entries (fh);
1570
1571 /* If not Ctags, and if this is not metasource and if it contained no #line
1572 directives, we can write the tags and free all nodes pointing to
1573 curfdp. */
1574 if (!CTAGS
1575 && curfdp->usecharno /* no #line directives in this file */
1576 && !curfdp->lang->metasource)
1577 {
1578 node *np, *prev;
1579
1580 /* Look for the head of the sublist relative to this file. See add_node
1581 for the structure of the node tree. */
1582 prev = NULL;
1583 for (np = nodehead; np != NULL; prev = np, np = np->left)
1584 if (np->fdp == curfdp)
1585 break;
1586
1587 /* If we generated tags for this file, write and delete them. */
1588 if (np != NULL)
1589 {
1590 /* This is the head of the last sublist, if any. The following
1591 instructions depend on this being true. */
1592 assert (np->left == NULL);
1593
1594 assert (fdhead == curfdp);
1595 assert (last_node->fdp == curfdp);
1596 put_entries (np); /* write tags for file curfdp->taggedfname */
1597 free_tree (np); /* remove the written nodes */
1598 if (prev == NULL)
1599 nodehead = NULL; /* no nodes left */
1600 else
1601 prev->left = NULL; /* delete the pointer to the sublist */
1602 }
1603 }
1604 }
1605
1606 /*
1607 * This routine sets up the boolean pseudo-functions which work
1608 * by setting boolean flags dependent upon the corresponding character.
1609 * Every char which is NOT in that string is not a white char. Therefore,
1610 * all of the array "_wht" is set to FALSE, and then the elements
1611 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1612 * of a char is TRUE if it is the string "white", else FALSE.
1613 */
1614 static void
1615 init ()
1616 {
1617 register char *sp;
1618 register int i;
1619
1620 for (i = 0; i < CHARS; i++)
1621 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1622 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1623 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1624 notinname('\0') = notinname('\n');
1625 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1626 begtoken('\0') = begtoken('\n');
1627 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1628 intoken('\0') = intoken('\n');
1629 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1630 endtoken('\0') = endtoken('\n');
1631 }
1632
1633 /*
1634 * This routine opens the specified file and calls the function
1635 * which finds the function and type definitions.
1636 */
1637 static void
1638 find_entries (inf)
1639 FILE *inf;
1640 {
1641 char *cp;
1642 language *lang = curfdp->lang;
1643 Lang_function *parser = NULL;
1644
1645 /* If user specified a language, use it. */
1646 if (lang != NULL && lang->function != NULL)
1647 {
1648 parser = lang->function;
1649 }
1650
1651 /* Else try to guess the language given the file name. */
1652 if (parser == NULL)
1653 {
1654 lang = get_language_from_filename (curfdp->infname, TRUE);
1655 if (lang != NULL && lang->function != NULL)
1656 {
1657 curfdp->lang = lang;
1658 parser = lang->function;
1659 }
1660 }
1661
1662 /* Else look for sharp-bang as the first two characters. */
1663 if (parser == NULL
1664 && readline_internal (&lb, inf) > 0
1665 && lb.len >= 2
1666 && lb.buffer[0] == '#'
1667 && lb.buffer[1] == '!')
1668 {
1669 char *lp;
1670
1671 /* Set lp to point at the first char after the last slash in the
1672 line or, if no slashes, at the first nonblank. Then set cp to
1673 the first successive blank and terminate the string. */
1674 lp = etags_strrchr (lb.buffer+2, '/');
1675 if (lp != NULL)
1676 lp += 1;
1677 else
1678 lp = skip_spaces (lb.buffer + 2);
1679 cp = skip_non_spaces (lp);
1680 *cp = '\0';
1681
1682 if (strlen (lp) > 0)
1683 {
1684 lang = get_language_from_interpreter (lp);
1685 if (lang != NULL && lang->function != NULL)
1686 {
1687 curfdp->lang = lang;
1688 parser = lang->function;
1689 }
1690 }
1691 }
1692
1693 /* We rewind here, even if inf may be a pipe. We fail if the
1694 length of the first line is longer than the pipe block size,
1695 which is unlikely. */
1696 rewind (inf);
1697
1698 /* Else try to guess the language given the case insensitive file name. */
1699 if (parser == NULL)
1700 {
1701 lang = get_language_from_filename (curfdp->infname, FALSE);
1702 if (lang != NULL && lang->function != NULL)
1703 {
1704 curfdp->lang = lang;
1705 parser = lang->function;
1706 }
1707 }
1708
1709 /* Else try Fortran or C. */
1710 if (parser == NULL)
1711 {
1712 node *old_last_node = last_node;
1713
1714 curfdp->lang = get_language_from_langname ("fortran");
1715 find_entries (inf);
1716
1717 if (old_last_node == last_node)
1718 /* No Fortran entries found. Try C. */
1719 {
1720 /* We do not tag if rewind fails.
1721 Only the file name will be recorded in the tags file. */
1722 rewind (inf);
1723 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1724 find_entries (inf);
1725 }
1726 return;
1727 }
1728
1729 if (!no_line_directive
1730 && curfdp->lang != NULL && curfdp->lang->metasource)
1731 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1732 file, or anyway we parsed a file that is automatically generated from
1733 this one. If this is the case, the bingo.c file contained #line
1734 directives that generated tags pointing to this file. Let's delete
1735 them all before parsing this file, which is the real source. */
1736 {
1737 fdesc **fdpp = &fdhead;
1738 while (*fdpp != NULL)
1739 if (*fdpp != curfdp
1740 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1741 /* We found one of those! We must delete both the file description
1742 and all tags referring to it. */
1743 {
1744 fdesc *badfdp = *fdpp;
1745
1746 if (DEBUG)
1747 fprintf (stderr,
1748 "Removing references to \"%s\" obtained from \"%s\"\n",
1749 badfdp->taggedfname, badfdp->infname);
1750
1751 /* Delete the tags referring to badfdp. */
1752 invalidate_nodes (badfdp, &nodehead);
1753
1754 *fdpp = badfdp->next; /* remove the bad description from the list */
1755 free_fdesc (badfdp);
1756 }
1757 else
1758 fdpp = &(*fdpp)->next; /* advance the list pointer */
1759 }
1760
1761 assert (parser != NULL);
1762
1763 /* Generic initialisations before reading from file. */
1764 filebuf.len = 0; /* reset the file buffer */
1765
1766 /* Generic initialisations before parsing file with readline. */
1767 lineno = 0; /* reset global line number */
1768 charno = 0; /* reset global char number */
1769 linecharno = 0; /* reset global char number of line start */
1770
1771 parser (inf);
1772
1773 #ifdef ETAGS_REGEXPS
1774 regex_tag_multiline ();
1775 #endif /* ETAGS_REGEXPS */
1776 }
1777
1778 \f
1779 /* Record a tag. */
1780 static void
1781 pfnote (name, is_func, linestart, linelen, lno, cno)
1782 char *name; /* tag name, or NULL if unnamed */
1783 bool is_func; /* tag is a function */
1784 char *linestart; /* start of the line where tag is */
1785 int linelen; /* length of the line where tag is */
1786 int lno; /* line number */
1787 long cno; /* character number */
1788 {
1789 register node *np;
1790
1791 if (CTAGS && name == NULL)
1792 return;
1793
1794 np = xnew (1, node);
1795
1796 /* If ctags mode, change name "main" to M<thisfilename>. */
1797 if (CTAGS && !cxref_style && streq (name, "main"))
1798 {
1799 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1800 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1801 fp = etags_strrchr (np->name, '.');
1802 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1803 fp[0] = '\0';
1804 }
1805 else
1806 np->name = name;
1807 np->valid = TRUE;
1808 np->been_warned = FALSE;
1809 np->fdp = curfdp;
1810 np->is_func = is_func;
1811 np->lno = lno;
1812 if (np->fdp->usecharno)
1813 /* Our char numbers are 0-base, because of C language tradition?
1814 ctags compatibility? old versions compatibility? I don't know.
1815 Anyway, since emacs's are 1-base we expect etags.el to take care
1816 of the difference. If we wanted to have 1-based numbers, we would
1817 uncomment the +1 below. */
1818 np->cno = cno /* + 1 */ ;
1819 else
1820 np->cno = invalidcharno;
1821 np->left = np->right = NULL;
1822 if (CTAGS && !cxref_style)
1823 {
1824 if (strlen (linestart) < 50)
1825 np->pat = concat (linestart, "$", "");
1826 else
1827 np->pat = savenstr (linestart, 50);
1828 }
1829 else
1830 np->pat = savenstr (linestart, linelen);
1831
1832 add_node (np, &nodehead);
1833 }
1834
1835 /*
1836 * TAGS format specification
1837 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1838 *
1839 * pfnote should emit the optimized form [unnamed tag] only if:
1840 * 1. name does not contain any of the characters " \t\r\n(),;";
1841 * 2. linestart contains name as either a rightmost, or rightmost but
1842 * one character, substring;
1843 * 3. the character, if any, immediately before name in linestart must
1844 * be one of the characters " \t(),;";
1845 * 4. the character, if any, immediately after name in linestart must
1846 * also be one of the characters " \t(),;".
1847 *
1848 * The real implementation uses the notinname() macro, which recognises
1849 * characters slightly different from " \t\r\n(),;". See the variable
1850 * `nonam'.
1851 */
1852 #define traditional_tag_style TRUE
1853 static void
1854 new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
1855 char *name; /* tag name, or NULL if unnamed */
1856 int namelen; /* tag length */
1857 bool is_func; /* tag is a function */
1858 char *linestart; /* start of the line where tag is */
1859 int linelen; /* length of the line where tag is */
1860 int lno; /* line number */
1861 long cno; /* character number */
1862 {
1863 register char *cp;
1864 bool named;
1865
1866 named = TRUE;
1867 if (!CTAGS)
1868 {
1869 for (cp = name; !notinname (*cp); cp++)
1870 continue;
1871 if (*cp == '\0') /* rule #1 */
1872 {
1873 cp = linestart + linelen - namelen;
1874 if (notinname (linestart[linelen-1]))
1875 cp -= 1; /* rule #4 */
1876 if (cp >= linestart /* rule #2 */
1877 && (cp == linestart
1878 || notinname (cp[-1])) /* rule #3 */
1879 && strneq (name, cp, namelen)) /* rule #2 */
1880 named = FALSE; /* use unnamed tag */
1881 }
1882 }
1883
1884 if (named)
1885 name = savenstr (name, namelen);
1886 else
1887 name = NULL;
1888 pfnote (name, is_func, linestart, linelen, lno, cno);
1889 }
1890
1891 /*
1892 * free_tree ()
1893 * recurse on left children, iterate on right children.
1894 */
1895 static void
1896 free_tree (np)
1897 register node *np;
1898 {
1899 while (np)
1900 {
1901 register node *node_right = np->right;
1902 free_tree (np->left);
1903 if (np->name != NULL)
1904 free (np->name);
1905 free (np->pat);
1906 free (np);
1907 np = node_right;
1908 }
1909 }
1910
1911 /*
1912 * free_fdesc ()
1913 * delete a file description
1914 */
1915 static void
1916 free_fdesc (fdp)
1917 register fdesc *fdp;
1918 {
1919 if (fdp->infname != NULL) free (fdp->infname);
1920 if (fdp->infabsname != NULL) free (fdp->infabsname);
1921 if (fdp->infabsdir != NULL) free (fdp->infabsdir);
1922 if (fdp->taggedfname != NULL) free (fdp->taggedfname);
1923 if (fdp->prop != NULL) free (fdp->prop);
1924 free (fdp);
1925 }
1926
1927 /*
1928 * add_node ()
1929 * Adds a node to the tree of nodes. In etags mode, sort by file
1930 * name. In ctags mode, sort by tag name. Make no attempt at
1931 * balancing.
1932 *
1933 * add_node is the only function allowed to add nodes, so it can
1934 * maintain state.
1935 */
1936 static void
1937 add_node (np, cur_node_p)
1938 node *np, **cur_node_p;
1939 {
1940 register int dif;
1941 register node *cur_node = *cur_node_p;
1942
1943 if (cur_node == NULL)
1944 {
1945 *cur_node_p = np;
1946 last_node = np;
1947 return;
1948 }
1949
1950 if (!CTAGS)
1951 /* Etags Mode */
1952 {
1953 /* For each file name, tags are in a linked sublist on the right
1954 pointer. The first tags of different files are a linked list
1955 on the left pointer. last_node points to the end of the last
1956 used sublist. */
1957 if (last_node != NULL && last_node->fdp == np->fdp)
1958 {
1959 /* Let's use the same sublist as the last added node. */
1960 assert (last_node->right == NULL);
1961 last_node->right = np;
1962 last_node = np;
1963 }
1964 else if (cur_node->fdp == np->fdp)
1965 {
1966 /* Scanning the list we found the head of a sublist which is
1967 good for us. Let's scan this sublist. */
1968 add_node (np, &cur_node->right);
1969 }
1970 else
1971 /* The head of this sublist is not good for us. Let's try the
1972 next one. */
1973 add_node (np, &cur_node->left);
1974 } /* if ETAGS mode */
1975
1976 else
1977 {
1978 /* Ctags Mode */
1979 dif = strcmp (np->name, cur_node->name);
1980
1981 /*
1982 * If this tag name matches an existing one, then
1983 * do not add the node, but maybe print a warning.
1984 */
1985 if (!dif)
1986 {
1987 if (np->fdp == cur_node->fdp)
1988 {
1989 if (!no_warnings)
1990 {
1991 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1992 np->fdp->infname, lineno, np->name);
1993 fprintf (stderr, "Second entry ignored\n");
1994 }
1995 }
1996 else if (!cur_node->been_warned && !no_warnings)
1997 {
1998 fprintf
1999 (stderr,
2000 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2001 np->fdp->infname, cur_node->fdp->infname, np->name);
2002 cur_node->been_warned = TRUE;
2003 }
2004 return;
2005 }
2006
2007 /* Actually add the node */
2008 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2009 } /* if CTAGS mode */
2010 }
2011
2012 /*
2013 * invalidate_nodes ()
2014 * Scan the node tree and invalidate all nodes pointing to the
2015 * given file description (CTAGS case) or free them (ETAGS case).
2016 */
2017 static void
2018 invalidate_nodes (badfdp, npp)
2019 fdesc *badfdp;
2020 node **npp;
2021 {
2022 node *np = *npp;
2023
2024 if (np == NULL)
2025 return;
2026
2027 if (CTAGS)
2028 {
2029 if (np->left != NULL)
2030 invalidate_nodes (badfdp, &np->left);
2031 if (np->fdp == badfdp)
2032 np->valid = FALSE;
2033 if (np->right != NULL)
2034 invalidate_nodes (badfdp, &np->right);
2035 }
2036 else
2037 {
2038 assert (np->fdp != NULL);
2039 if (np->fdp == badfdp)
2040 {
2041 *npp = np->left; /* detach the sublist from the list */
2042 np->left = NULL; /* isolate it */
2043 free_tree (np); /* free it */
2044 invalidate_nodes (badfdp, npp);
2045 }
2046 else
2047 invalidate_nodes (badfdp, &np->left);
2048 }
2049 }
2050
2051 \f
2052 static int total_size_of_entries __P((node *));
2053 static int number_len __P((long));
2054
2055 /* Length of a non-negative number's decimal representation. */
2056 static int
2057 number_len (num)
2058 long num;
2059 {
2060 int len = 1;
2061 while ((num /= 10) > 0)
2062 len += 1;
2063 return len;
2064 }
2065
2066 /*
2067 * Return total number of characters that put_entries will output for
2068 * the nodes in the linked list at the right of the specified node.
2069 * This count is irrelevant with etags.el since emacs 19.34 at least,
2070 * but is still supplied for backward compatibility.
2071 */
2072 static int
2073 total_size_of_entries (np)
2074 register node *np;
2075 {
2076 register int total = 0;
2077
2078 for (; np != NULL; np = np->right)
2079 {
2080 total += strlen (np->pat) + 1; /* pat\177 */
2081 if (np->name != NULL)
2082 total += strlen (np->name) + 1; /* name\001 */
2083 total += number_len ((long) np->lno) + 1; /* lno, */
2084 if (np->cno != invalidcharno) /* cno */
2085 total += number_len (np->cno);
2086 total += 1; /* newline */
2087 }
2088
2089 return total;
2090 }
2091
2092 static void
2093 put_entries (np)
2094 register node *np;
2095 {
2096 register char *sp;
2097 static fdesc *fdp = NULL;
2098
2099 if (np == NULL)
2100 return;
2101
2102 /* Output subentries that precede this one */
2103 if (CTAGS)
2104 put_entries (np->left);
2105
2106 /* Output this entry */
2107 if (np->valid)
2108 {
2109 if (!CTAGS)
2110 {
2111 /* Etags mode */
2112 if (fdp != np->fdp)
2113 {
2114 fdp = np->fdp;
2115 fprintf (tagf, "\f\n%s,%d\n",
2116 fdp->taggedfname, total_size_of_entries (np));
2117 }
2118 fputs (np->pat, tagf);
2119 fputc ('\177', tagf);
2120 if (np->name != NULL)
2121 {
2122 fputs (np->name, tagf);
2123 fputc ('\001', tagf);
2124 }
2125 fprintf (tagf, "%d,", np->lno);
2126 if (np->cno != invalidcharno)
2127 fprintf (tagf, "%ld", np->cno);
2128 fputs ("\n", tagf);
2129 }
2130 else
2131 {
2132 /* Ctags mode */
2133 if (np->name == NULL)
2134 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2135
2136 if (cxref_style)
2137 {
2138 if (vgrind_style)
2139 fprintf (stdout, "%s %s %d\n",
2140 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2141 else
2142 fprintf (stdout, "%-16s %3d %-16s %s\n",
2143 np->name, np->lno, np->fdp->taggedfname, np->pat);
2144 }
2145 else
2146 {
2147 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2148
2149 if (np->is_func)
2150 { /* function or #define macro with args */
2151 putc (searchar, tagf);
2152 putc ('^', tagf);
2153
2154 for (sp = np->pat; *sp; sp++)
2155 {
2156 if (*sp == '\\' || *sp == searchar)
2157 putc ('\\', tagf);
2158 putc (*sp, tagf);
2159 }
2160 putc (searchar, tagf);
2161 }
2162 else
2163 { /* anything else; text pattern inadequate */
2164 fprintf (tagf, "%d", np->lno);
2165 }
2166 putc ('\n', tagf);
2167 }
2168 }
2169 } /* if this node contains a valid tag */
2170
2171 /* Output subentries that follow this one */
2172 put_entries (np->right);
2173 if (!CTAGS)
2174 put_entries (np->left);
2175 }
2176
2177 \f
2178 /* C extensions. */
2179 #define C_EXT 0x00fff /* C extensions */
2180 #define C_PLAIN 0x00000 /* C */
2181 #define C_PLPL 0x00001 /* C++ */
2182 #define C_STAR 0x00003 /* C* */
2183 #define C_JAVA 0x00005 /* JAVA */
2184 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2185 #define YACC 0x10000 /* yacc file */
2186
2187 /*
2188 * The C symbol tables.
2189 */
2190 enum sym_type
2191 {
2192 st_none,
2193 st_C_objprot, st_C_objimpl, st_C_objend,
2194 st_C_gnumacro,
2195 st_C_ignore,
2196 st_C_javastruct,
2197 st_C_operator,
2198 st_C_class, st_C_template,
2199 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
2200 };
2201
2202 static unsigned int hash __P((const char *, unsigned int));
2203 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2204 static enum sym_type C_symtype __P((char *, int, int));
2205
2206 /* Feed stuff between (but not including) %[ and %] lines to:
2207 gperf -c -k 1,3 -o -p -r -t
2208 %[
2209 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2210 %%
2211 if, 0, st_C_ignore
2212 for, 0, st_C_ignore
2213 while, 0, st_C_ignore
2214 switch, 0, st_C_ignore
2215 return, 0, st_C_ignore
2216 @interface, 0, st_C_objprot
2217 @protocol, 0, st_C_objprot
2218 @implementation,0, st_C_objimpl
2219 @end, 0, st_C_objend
2220 import, C_JAVA, st_C_ignore
2221 package, C_JAVA, st_C_ignore
2222 friend, C_PLPL, st_C_ignore
2223 extends, C_JAVA, st_C_javastruct
2224 implements, C_JAVA, st_C_javastruct
2225 interface, C_JAVA, st_C_struct
2226 class, 0, st_C_class
2227 namespace, C_PLPL, st_C_struct
2228 domain, C_STAR, st_C_struct
2229 union, 0, st_C_struct
2230 struct, 0, st_C_struct
2231 extern, 0, st_C_extern
2232 enum, 0, st_C_enum
2233 typedef, 0, st_C_typedef
2234 define, 0, st_C_define
2235 operator, C_PLPL, st_C_operator
2236 template, 0, st_C_template
2237 bool, C_PLPL, st_C_typespec
2238 long, 0, st_C_typespec
2239 short, 0, st_C_typespec
2240 int, 0, st_C_typespec
2241 char, 0, st_C_typespec
2242 float, 0, st_C_typespec
2243 double, 0, st_C_typespec
2244 signed, 0, st_C_typespec
2245 unsigned, 0, st_C_typespec
2246 auto, 0, st_C_typespec
2247 void, 0, st_C_typespec
2248 static, 0, st_C_typespec
2249 const, 0, st_C_typespec
2250 volatile, 0, st_C_typespec
2251 explicit, C_PLPL, st_C_typespec
2252 mutable, C_PLPL, st_C_typespec
2253 typename, C_PLPL, st_C_typespec
2254 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2255 DEFUN, 0, st_C_gnumacro
2256 SYSCALL, 0, st_C_gnumacro
2257 ENTRY, 0, st_C_gnumacro
2258 PSEUDO, 0, st_C_gnumacro
2259 # These are defined inside C functions, so currently they are not met.
2260 # EXFUN used in glibc, DEFVAR_* in emacs.
2261 #EXFUN, 0, st_C_gnumacro
2262 #DEFVAR_, 0, st_C_gnumacro
2263 %]
2264 and replace lines between %< and %> with its output,
2265 then make in_word_set and C_stab_entry static. */
2266 /*%<*/
2267 /* C code produced by gperf version 2.7.1 (19981006 egcs) */
2268 /* Command-line: gperf -c -k 1,3 -o -p -r -t */
2269 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2270
2271 #define TOTAL_KEYWORDS 47
2272 #define MIN_WORD_LENGTH 2
2273 #define MAX_WORD_LENGTH 15
2274 #define MIN_HASH_VALUE 18
2275 #define MAX_HASH_VALUE 138
2276 /* maximum key range = 121, duplicates = 0 */
2277
2278 #ifdef __GNUC__
2279 __inline
2280 #endif
2281 static unsigned int
2282 hash (str, len)
2283 register const char *str;
2284 register unsigned int len;
2285 {
2286 static unsigned char asso_values[] =
2287 {
2288 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2289 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2290 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2291 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2292 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2293 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2294 139, 139, 139, 139, 63, 139, 139, 139, 33, 44,
2295 62, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2296 42, 139, 139, 12, 32, 139, 139, 139, 139, 139,
2297 139, 139, 139, 139, 139, 139, 139, 34, 59, 37,
2298 24, 58, 33, 3, 139, 16, 139, 139, 42, 60,
2299 18, 11, 39, 139, 23, 57, 4, 63, 6, 20,
2300 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2301 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2302 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2303 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2304 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2305 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2306 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2307 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2308 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2309 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2310 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2311 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2312 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2313 139, 139, 139, 139, 139, 139
2314 };
2315 register int hval = len;
2316
2317 switch (hval)
2318 {
2319 default:
2320 case 3:
2321 hval += asso_values[(unsigned char)str[2]];
2322 case 2:
2323 case 1:
2324 hval += asso_values[(unsigned char)str[0]];
2325 break;
2326 }
2327 return hval;
2328 }
2329
2330 #ifdef __GNUC__
2331 __inline
2332 #endif
2333 static struct C_stab_entry *
2334 in_word_set (str, len)
2335 register const char *str;
2336 register unsigned int len;
2337 {
2338 static struct C_stab_entry wordlist[] =
2339 {
2340 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2341 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2342 {"if", 0, st_C_ignore},
2343 {""}, {""}, {""}, {""},
2344 {"int", 0, st_C_typespec},
2345 {""}, {""},
2346 {"void", 0, st_C_typespec},
2347 {""}, {""},
2348 {"interface", C_JAVA, st_C_struct},
2349 {""},
2350 {"SYSCALL", 0, st_C_gnumacro},
2351 {""},
2352 {"return", 0, st_C_ignore},
2353 {""}, {""}, {""}, {""}, {""}, {""}, {""},
2354 {"while", 0, st_C_ignore},
2355 {"auto", 0, st_C_typespec},
2356 {""}, {""}, {""}, {""}, {""}, {""},
2357 {"float", 0, st_C_typespec},
2358 {"typedef", 0, st_C_typedef},
2359 {"typename", C_PLPL, st_C_typespec},
2360 {""}, {""}, {""},
2361 {"friend", C_PLPL, st_C_ignore},
2362 {"volatile", 0, st_C_typespec},
2363 {""}, {""},
2364 {"for", 0, st_C_ignore},
2365 {"const", 0, st_C_typespec},
2366 {"import", C_JAVA, st_C_ignore},
2367 {""},
2368 {"define", 0, st_C_define},
2369 {"long", 0, st_C_typespec},
2370 {"implements", C_JAVA, st_C_javastruct},
2371 {"signed", 0, st_C_typespec},
2372 {""},
2373 {"extern", 0, st_C_extern},
2374 {"extends", C_JAVA, st_C_javastruct},
2375 {""},
2376 {"mutable", C_PLPL, st_C_typespec},
2377 {"template", 0, st_C_template},
2378 {"short", 0, st_C_typespec},
2379 {"bool", C_PLPL, st_C_typespec},
2380 {"char", 0, st_C_typespec},
2381 {"class", 0, st_C_class},
2382 {"operator", C_PLPL, st_C_operator},
2383 {""},
2384 {"switch", 0, st_C_ignore},
2385 {""},
2386 {"ENTRY", 0, st_C_gnumacro},
2387 {""},
2388 {"package", C_JAVA, st_C_ignore},
2389 {"union", 0, st_C_struct},
2390 {"@end", 0, st_C_objend},
2391 {"struct", 0, st_C_struct},
2392 {"namespace", C_PLPL, st_C_struct},
2393 {""}, {""},
2394 {"domain", C_STAR, st_C_struct},
2395 {"@interface", 0, st_C_objprot},
2396 {"PSEUDO", 0, st_C_gnumacro},
2397 {"double", 0, st_C_typespec},
2398 {""},
2399 {"@protocol", 0, st_C_objprot},
2400 {""},
2401 {"static", 0, st_C_typespec},
2402 {""}, {""},
2403 {"DEFUN", 0, st_C_gnumacro},
2404 {""}, {""}, {""}, {""},
2405 {"explicit", C_PLPL, st_C_typespec},
2406 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2407 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2408 {""},
2409 {"enum", 0, st_C_enum},
2410 {""}, {""},
2411 {"unsigned", 0, st_C_typespec},
2412 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2413 {"@implementation",0, st_C_objimpl}
2414 };
2415
2416 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2417 {
2418 register int key = hash (str, len);
2419
2420 if (key <= MAX_HASH_VALUE && key >= 0)
2421 {
2422 register const char *s = wordlist[key].name;
2423
2424 if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
2425 return &wordlist[key];
2426 }
2427 }
2428 return 0;
2429 }
2430 /*%>*/
2431
2432 static enum sym_type
2433 C_symtype (str, len, c_ext)
2434 char *str;
2435 int len;
2436 int c_ext;
2437 {
2438 register struct C_stab_entry *se = in_word_set (str, len);
2439
2440 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2441 return st_none;
2442 return se->type;
2443 }
2444
2445 \f
2446 /*
2447 * C functions and variables are recognized using a simple
2448 * finite automaton. fvdef is its state variable.
2449 */
2450 static enum
2451 {
2452 fvnone, /* nothing seen */
2453 fdefunkey, /* Emacs DEFUN keyword seen */
2454 fdefunname, /* Emacs DEFUN name seen */
2455 foperator, /* func: operator keyword seen (cplpl) */
2456 fvnameseen, /* function or variable name seen */
2457 fstartlist, /* func: just after open parenthesis */
2458 finlist, /* func: in parameter list */
2459 flistseen, /* func: after parameter list */
2460 fignore, /* func: before open brace */
2461 vignore /* var-like: ignore until ';' */
2462 } fvdef;
2463
2464 static bool fvextern; /* func or var: extern keyword seen; */
2465
2466 /*
2467 * typedefs are recognized using a simple finite automaton.
2468 * typdef is its state variable.
2469 */
2470 static enum
2471 {
2472 tnone, /* nothing seen */
2473 tkeyseen, /* typedef keyword seen */
2474 ttypeseen, /* defined type seen */
2475 tinbody, /* inside typedef body */
2476 tend, /* just before typedef tag */
2477 tignore /* junk after typedef tag */
2478 } typdef;
2479
2480 /*
2481 * struct-like structures (enum, struct and union) are recognized
2482 * using another simple finite automaton. `structdef' is its state
2483 * variable.
2484 */
2485 static enum
2486 {
2487 snone, /* nothing seen yet,
2488 or in struct body if cblev > 0 */
2489 skeyseen, /* struct-like keyword seen */
2490 stagseen, /* struct-like tag seen */
2491 sintemplate, /* inside template (ignore) */
2492 scolonseen /* colon seen after struct-like tag */
2493 } structdef;
2494
2495 /*
2496 * When objdef is different from onone, objtag is the name of the class.
2497 */
2498 static char *objtag = "<uninited>";
2499
2500 /*
2501 * Yet another little state machine to deal with preprocessor lines.
2502 */
2503 static enum
2504 {
2505 dnone, /* nothing seen */
2506 dsharpseen, /* '#' seen as first char on line */
2507 ddefineseen, /* '#' and 'define' seen */
2508 dignorerest /* ignore rest of line */
2509 } definedef;
2510
2511 /*
2512 * State machine for Objective C protocols and implementations.
2513 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2514 */
2515 static enum
2516 {
2517 onone, /* nothing seen */
2518 oprotocol, /* @interface or @protocol seen */
2519 oimplementation, /* @implementations seen */
2520 otagseen, /* class name seen */
2521 oparenseen, /* parenthesis before category seen */
2522 ocatseen, /* category name seen */
2523 oinbody, /* in @implementation body */
2524 omethodsign, /* in @implementation body, after +/- */
2525 omethodtag, /* after method name */
2526 omethodcolon, /* after method colon */
2527 omethodparm, /* after method parameter */
2528 oignore /* wait for @end */
2529 } objdef;
2530
2531
2532 /*
2533 * Use this structure to keep info about the token read, and how it
2534 * should be tagged. Used by the make_C_tag function to build a tag.
2535 */
2536 static struct tok
2537 {
2538 bool valid;
2539 bool named;
2540 int offset;
2541 int length;
2542 int lineno;
2543 long linepos;
2544 char *line;
2545 } token; /* latest token read */
2546 static linebuffer token_name; /* its name */
2547
2548 /*
2549 * Variables and functions for dealing with nested structures.
2550 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2551 */
2552 static void pushclass_above __P((int, char *, int));
2553 static void popclass_above __P((int));
2554 static void write_classname __P((linebuffer *, char *qualifier));
2555
2556 static struct {
2557 char **cname; /* nested class names */
2558 int *cblev; /* nested class curly brace level */
2559 int nl; /* class nesting level (elements used) */
2560 int size; /* length of the array */
2561 } cstack; /* stack for nested declaration tags */
2562 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2563 #define nestlev (cstack.nl)
2564 /* After struct keyword or in struct body, not inside an nested function. */
2565 #define instruct (structdef == snone && nestlev > 0 \
2566 && cblev == cstack.cblev[nestlev-1] + 1)
2567
2568 static void
2569 pushclass_above (cblev, str, len)
2570 int cblev;
2571 char *str;
2572 int len;
2573 {
2574 int nl;
2575
2576 popclass_above (cblev);
2577 nl = cstack.nl;
2578 if (nl >= cstack.size)
2579 {
2580 int size = cstack.size *= 2;
2581 xrnew (cstack.cname, size, char *);
2582 xrnew (cstack.cblev, size, int);
2583 }
2584 assert (nl == 0 || cstack.cblev[nl-1] < cblev);
2585 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2586 cstack.cblev[nl] = cblev;
2587 cstack.nl = nl + 1;
2588 }
2589
2590 static void
2591 popclass_above (cblev)
2592 int cblev;
2593 {
2594 int nl;
2595
2596 for (nl = cstack.nl - 1;
2597 nl >= 0 && cstack.cblev[nl] >= cblev;
2598 nl--)
2599 {
2600 if (cstack.cname[nl] != NULL)
2601 free (cstack.cname[nl]);
2602 cstack.nl = nl;
2603 }
2604 }
2605
2606 static void
2607 write_classname (cn, qualifier)
2608 linebuffer *cn;
2609 char *qualifier;
2610 {
2611 int i, len;
2612 int qlen = strlen (qualifier);
2613
2614 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2615 {
2616 len = 0;
2617 cn->len = 0;
2618 cn->buffer[0] = '\0';
2619 }
2620 else
2621 {
2622 len = strlen (cstack.cname[0]);
2623 linebuffer_setlen (cn, len);
2624 strcpy (cn->buffer, cstack.cname[0]);
2625 }
2626 for (i = 1; i < cstack.nl; i++)
2627 {
2628 char *s;
2629 int slen;
2630
2631 s = cstack.cname[i];
2632 if (s == NULL)
2633 continue;
2634 slen = strlen (s);
2635 len += slen + qlen;
2636 linebuffer_setlen (cn, len);
2637 strncat (cn->buffer, qualifier, qlen);
2638 strncat (cn->buffer, s, slen);
2639 }
2640 }
2641
2642 \f
2643 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2644 static void make_C_tag __P((bool));
2645
2646 /*
2647 * consider_token ()
2648 * checks to see if the current token is at the start of a
2649 * function or variable, or corresponds to a typedef, or
2650 * is a struct/union/enum tag, or #define, or an enum constant.
2651 *
2652 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2653 * with args. C_EXTP points to which language we are looking at.
2654 *
2655 * Globals
2656 * fvdef IN OUT
2657 * structdef IN OUT
2658 * definedef IN OUT
2659 * typdef IN OUT
2660 * objdef IN OUT
2661 */
2662
2663 static bool
2664 consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var)
2665 register char *str; /* IN: token pointer */
2666 register int len; /* IN: token length */
2667 register int c; /* IN: first char after the token */
2668 int *c_extp; /* IN, OUT: C extensions mask */
2669 int cblev; /* IN: curly brace level */
2670 int parlev; /* IN: parenthesis level */
2671 bool *is_func_or_var; /* OUT: function or variable found */
2672 {
2673 /* When structdef is stagseen, scolonseen, or snone with cblev > 0,
2674 structtype is the type of the preceding struct-like keyword, and
2675 structcblev is the curly brace level where it has been seen. */
2676 static enum sym_type structtype;
2677 static int structcblev;
2678 static enum sym_type toktype;
2679
2680
2681 toktype = C_symtype (str, len, *c_extp);
2682
2683 /*
2684 * Advance the definedef state machine.
2685 */
2686 switch (definedef)
2687 {
2688 case dnone:
2689 /* We're not on a preprocessor line. */
2690 if (toktype == st_C_gnumacro)
2691 {
2692 fvdef = fdefunkey;
2693 return FALSE;
2694 }
2695 break;
2696 case dsharpseen:
2697 if (toktype == st_C_define)
2698 {
2699 definedef = ddefineseen;
2700 }
2701 else
2702 {
2703 definedef = dignorerest;
2704 }
2705 return FALSE;
2706 case ddefineseen:
2707 /*
2708 * Make a tag for any macro, unless it is a constant
2709 * and constantypedefs is FALSE.
2710 */
2711 definedef = dignorerest;
2712 *is_func_or_var = (c == '(');
2713 if (!*is_func_or_var && !constantypedefs)
2714 return FALSE;
2715 else
2716 return TRUE;
2717 case dignorerest:
2718 return FALSE;
2719 default:
2720 error ("internal error: definedef value.", (char *)NULL);
2721 }
2722
2723 /*
2724 * Now typedefs
2725 */
2726 switch (typdef)
2727 {
2728 case tnone:
2729 if (toktype == st_C_typedef)
2730 {
2731 if (typedefs)
2732 typdef = tkeyseen;
2733 fvextern = FALSE;
2734 fvdef = fvnone;
2735 return FALSE;
2736 }
2737 break;
2738 case tkeyseen:
2739 switch (toktype)
2740 {
2741 case st_none:
2742 case st_C_typespec:
2743 case st_C_class:
2744 case st_C_struct:
2745 case st_C_enum:
2746 typdef = ttypeseen;
2747 break;
2748 }
2749 break;
2750 case ttypeseen:
2751 if (structdef == snone && fvdef == fvnone)
2752 {
2753 fvdef = fvnameseen;
2754 return TRUE;
2755 }
2756 break;
2757 case tend:
2758 switch (toktype)
2759 {
2760 case st_C_typespec:
2761 case st_C_class:
2762 case st_C_struct:
2763 case st_C_enum:
2764 return FALSE;
2765 }
2766 return TRUE;
2767 }
2768
2769 /*
2770 * This structdef business is NOT invoked when we are ctags and the
2771 * file is plain C. This is because a struct tag may have the same
2772 * name as another tag, and this loses with ctags.
2773 */
2774 switch (toktype)
2775 {
2776 case st_C_javastruct:
2777 if (structdef == stagseen)
2778 structdef = scolonseen;
2779 return FALSE;
2780 case st_C_template:
2781 case st_C_class:
2782 if (cblev == 0
2783 && (*c_extp & C_AUTO) /* automatic detection of C++ language */
2784 && definedef == dnone && structdef == snone
2785 && typdef == tnone && fvdef == fvnone)
2786 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2787 if (toktype == st_C_template)
2788 break;
2789 /* FALLTHRU */
2790 case st_C_struct:
2791 case st_C_enum:
2792 if (parlev == 0
2793 && fvdef != vignore
2794 && (typdef == tkeyseen
2795 || (typedefs_or_cplusplus && structdef == snone)))
2796 {
2797 structdef = skeyseen;
2798 structtype = toktype;
2799 structcblev = cblev;
2800 }
2801 return FALSE;
2802 }
2803
2804 if (structdef == skeyseen)
2805 {
2806 structdef = stagseen;
2807 return TRUE;
2808 }
2809
2810 if (typdef != tnone)
2811 definedef = dnone;
2812
2813 /* Detect Objective C constructs. */
2814 switch (objdef)
2815 {
2816 case onone:
2817 switch (toktype)
2818 {
2819 case st_C_objprot:
2820 objdef = oprotocol;
2821 return FALSE;
2822 case st_C_objimpl:
2823 objdef = oimplementation;
2824 return FALSE;
2825 }
2826 break;
2827 case oimplementation:
2828 /* Save the class tag for functions or variables defined inside. */
2829 objtag = savenstr (str, len);
2830 objdef = oinbody;
2831 return FALSE;
2832 case oprotocol:
2833 /* Save the class tag for categories. */
2834 objtag = savenstr (str, len);
2835 objdef = otagseen;
2836 *is_func_or_var = TRUE;
2837 return TRUE;
2838 case oparenseen:
2839 objdef = ocatseen;
2840 *is_func_or_var = TRUE;
2841 return TRUE;
2842 case oinbody:
2843 break;
2844 case omethodsign:
2845 if (parlev == 0)
2846 {
2847 objdef = omethodtag;
2848 linebuffer_setlen (&token_name, len);
2849 strncpy (token_name.buffer, str, len);
2850 token_name.buffer[len] = '\0';
2851 return TRUE;
2852 }
2853 return FALSE;
2854 case omethodcolon:
2855 if (parlev == 0)
2856 objdef = omethodparm;
2857 return FALSE;
2858 case omethodparm:
2859 if (parlev == 0)
2860 {
2861 objdef = omethodtag;
2862 linebuffer_setlen (&token_name, token_name.len + len);
2863 strncat (token_name.buffer, str, len);
2864 return TRUE;
2865 }
2866 return FALSE;
2867 case oignore:
2868 if (toktype == st_C_objend)
2869 {
2870 /* Memory leakage here: the string pointed by objtag is
2871 never released, because many tests would be needed to
2872 avoid breaking on incorrect input code. The amount of
2873 memory leaked here is the sum of the lengths of the
2874 class tags.
2875 free (objtag); */
2876 objdef = onone;
2877 }
2878 return FALSE;
2879 }
2880
2881 /* A function, variable or enum constant? */
2882 switch (toktype)
2883 {
2884 case st_C_extern:
2885 fvextern = TRUE;
2886 /* FALLTHRU */
2887 case st_C_typespec:
2888 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
2889 fvdef = fvnone; /* should be useless */
2890 return FALSE;
2891 case st_C_ignore:
2892 fvextern = FALSE;
2893 fvdef = vignore;
2894 return FALSE;
2895 case st_C_operator:
2896 fvdef = foperator;
2897 *is_func_or_var = TRUE;
2898 return TRUE;
2899 case st_none:
2900 if (constantypedefs
2901 && structdef == snone
2902 && structtype == st_C_enum && cblev > structcblev)
2903 return TRUE; /* enum constant */
2904 switch (fvdef)
2905 {
2906 case fdefunkey:
2907 if (cblev > 0)
2908 break;
2909 fvdef = fdefunname; /* GNU macro */
2910 *is_func_or_var = TRUE;
2911 return TRUE;
2912 case fvnone:
2913 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2914 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2915 {
2916 fvdef = vignore;
2917 return FALSE;
2918 }
2919 if ((*c_extp & C_PLPL) && strneq (str+len-10, "::operator", 10))
2920 {
2921 fvdef = foperator;
2922 *is_func_or_var = TRUE;
2923 return TRUE;
2924 }
2925 if (cblev > 0 && !instruct)
2926 break;
2927 fvdef = fvnameseen; /* function or variable */
2928 *is_func_or_var = TRUE;
2929 return TRUE;
2930 }
2931 break;
2932 }
2933
2934 return FALSE;
2935 }
2936
2937 \f
2938 /*
2939 * C_entries often keeps pointers to tokens or lines which are older than
2940 * the line currently read. By keeping two line buffers, and switching
2941 * them at end of line, it is possible to use those pointers.
2942 */
2943 static struct
2944 {
2945 long linepos;
2946 linebuffer lb;
2947 } lbs[2];
2948
2949 #define current_lb_is_new (newndx == curndx)
2950 #define switch_line_buffers() (curndx = 1 - curndx)
2951
2952 #define curlb (lbs[curndx].lb)
2953 #define newlb (lbs[newndx].lb)
2954 #define curlinepos (lbs[curndx].linepos)
2955 #define newlinepos (lbs[newndx].linepos)
2956
2957 #define CNL_SAVE_DEFINEDEF() \
2958 do { \
2959 curlinepos = charno; \
2960 readline (&curlb, inf); \
2961 lp = curlb.buffer; \
2962 quotednl = FALSE; \
2963 newndx = curndx; \
2964 } while (0)
2965
2966 #define CNL() \
2967 do { \
2968 CNL_SAVE_DEFINEDEF(); \
2969 if (savetoken.valid) \
2970 { \
2971 token = savetoken; \
2972 savetoken.valid = FALSE; \
2973 } \
2974 definedef = dnone; \
2975 } while (0)
2976
2977
2978 static void
2979 make_C_tag (isfun)
2980 bool isfun;
2981 {
2982 /* This function should never be called when token.valid is FALSE, but
2983 we must protect against invalid input or internal errors. */
2984 if (DEBUG || token.valid)
2985 {
2986 if (traditional_tag_style)
2987 {
2988 /* This was the original code. Now we call new_pfnote instead,
2989 which uses the new method for naming tags (see new_pfnote). */
2990 char *name = NULL;
2991
2992 if (CTAGS || token.named)
2993 name = savestr (token_name.buffer);
2994 if (DEBUG && !token.valid)
2995 {
2996 if (token.named)
2997 name = concat (name, "##invalid##", "");
2998 else
2999 name = savestr ("##invalid##");
3000 }
3001 pfnote (name, isfun, token.line,
3002 token.offset+token.length+1, token.lineno, token.linepos);
3003 }
3004 else
3005 new_pfnote (token_name.buffer, token_name.len, isfun, token.line,
3006 token.offset+token.length+1, token.lineno, token.linepos);
3007 token.valid = FALSE;
3008 }
3009 }
3010
3011
3012 /*
3013 * C_entries ()
3014 * This routine finds functions, variables, typedefs,
3015 * #define's, enum constants and struct/union/enum definitions in
3016 * C syntax and adds them to the list.
3017 */
3018 static void
3019 C_entries (c_ext, inf)
3020 int c_ext; /* extension of C */
3021 FILE *inf; /* input file */
3022 {
3023 register char c; /* latest char read; '\0' for end of line */
3024 register char *lp; /* pointer one beyond the character `c' */
3025 int curndx, newndx; /* indices for current and new lb */
3026 register int tokoff; /* offset in line of start of current token */
3027 register int toklen; /* length of current token */
3028 char *qualifier; /* string used to qualify names */
3029 int qlen; /* length of qualifier */
3030 int cblev; /* current curly brace level */
3031 int parlev; /* current parenthesis level */
3032 int typdefcblev; /* cblev where a typedef struct body begun */
3033 bool incomm, inquote, inchar, quotednl, midtoken;
3034 bool cplpl, cjava;
3035 bool yacc_rules; /* in the rules part of a yacc file */
3036 struct tok savetoken; /* token saved during preprocessor handling */
3037
3038
3039 initbuffer (&token_name);
3040 initbuffer (&lbs[0].lb);
3041 initbuffer (&lbs[1].lb);
3042 if (cstack.size == 0)
3043 {
3044 cstack.size = (DEBUG) ? 1 : 4;
3045 cstack.nl = 0;
3046 cstack.cname = xnew (cstack.size, char *);
3047 cstack.cblev = xnew (cstack.size, int);
3048 }
3049
3050 tokoff = toklen = typdefcblev = 0; /* keep compiler quiet */
3051 curndx = newndx = 0;
3052 lp = curlb.buffer;
3053 *lp = 0;
3054
3055 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3056 structdef = snone; definedef = dnone; objdef = onone;
3057 yacc_rules = FALSE;
3058 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3059 token.valid = savetoken.valid = FALSE;
3060 cblev = 0;
3061 parlev = 0;
3062 cplpl = (c_ext & C_PLPL) == C_PLPL;
3063 cjava = (c_ext & C_JAVA) == C_JAVA;
3064 if (cjava)
3065 { qualifier = "."; qlen = 1; }
3066 else
3067 { qualifier = "::"; qlen = 2; }
3068
3069
3070 while (!feof (inf))
3071 {
3072 c = *lp++;
3073 if (c == '\\')
3074 {
3075 /* If we're at the end of the line, the next character is a
3076 '\0'; don't skip it, because it's the thing that tells us
3077 to read the next line. */
3078 if (*lp == '\0')
3079 {
3080 quotednl = TRUE;
3081 continue;
3082 }
3083 lp++;
3084 c = ' ';
3085 }
3086 else if (incomm)
3087 {
3088 switch (c)
3089 {
3090 case '*':
3091 if (*lp == '/')
3092 {
3093 c = *lp++;
3094 incomm = FALSE;
3095 }
3096 break;
3097 case '\0':
3098 /* Newlines inside comments do not end macro definitions in
3099 traditional cpp. */
3100 CNL_SAVE_DEFINEDEF ();
3101 break;
3102 }
3103 continue;
3104 }
3105 else if (inquote)
3106 {
3107 switch (c)
3108 {
3109 case '"':
3110 inquote = FALSE;
3111 break;
3112 case '\0':
3113 /* Newlines inside strings do not end macro definitions
3114 in traditional cpp, even though compilers don't
3115 usually accept them. */
3116 CNL_SAVE_DEFINEDEF ();
3117 break;
3118 }
3119 continue;
3120 }
3121 else if (inchar)
3122 {
3123 switch (c)
3124 {
3125 case '\0':
3126 /* Hmmm, something went wrong. */
3127 CNL ();
3128 /* FALLTHRU */
3129 case '\'':
3130 inchar = FALSE;
3131 break;
3132 }
3133 continue;
3134 }
3135 else
3136 switch (c)
3137 {
3138 case '"':
3139 inquote = TRUE;
3140 switch (fvdef)
3141 {
3142 case fdefunkey:
3143 case fstartlist:
3144 case finlist:
3145 case fignore:
3146 case vignore:
3147 break;
3148 default:
3149 fvextern = FALSE;
3150 fvdef = fvnone;
3151 }
3152 continue;
3153 case '\'':
3154 inchar = TRUE;
3155 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3156 {
3157 fvextern = FALSE;
3158 fvdef = fvnone;
3159 }
3160 continue;
3161 case '/':
3162 if (*lp == '*')
3163 {
3164 lp++;
3165 incomm = TRUE;
3166 continue;
3167 }
3168 else if (/* cplpl && */ *lp == '/')
3169 {
3170 c = '\0';
3171 break;
3172 }
3173 else
3174 break;
3175 case '%':
3176 if ((c_ext & YACC) && *lp == '%')
3177 {
3178 /* Entering or exiting rules section in yacc file. */
3179 lp++;
3180 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3181 typdef = tnone; structdef = snone;
3182 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3183 cblev = 0;
3184 yacc_rules = !yacc_rules;
3185 continue;
3186 }
3187 else
3188 break;
3189 case '#':
3190 if (definedef == dnone)
3191 {
3192 char *cp;
3193 bool cpptoken = TRUE;
3194
3195 /* Look back on this line. If all blanks, or nonblanks
3196 followed by an end of comment, this is a preprocessor
3197 token. */
3198 for (cp = newlb.buffer; cp < lp-1; cp++)
3199 if (!iswhite (*cp))
3200 {
3201 if (*cp == '*' && *(cp+1) == '/')
3202 {
3203 cp++;
3204 cpptoken = TRUE;
3205 }
3206 else
3207 cpptoken = FALSE;
3208 }
3209 if (cpptoken)
3210 definedef = dsharpseen;
3211 } /* if (definedef == dnone) */
3212
3213 continue;
3214 } /* switch (c) */
3215
3216
3217 /* Consider token only if some involved conditions are satisfied. */
3218 if (typdef != tignore
3219 && definedef != dignorerest
3220 && fvdef != finlist
3221 && structdef != sintemplate
3222 && (definedef != dnone
3223 || structdef != scolonseen))
3224 {
3225 if (midtoken)
3226 {
3227 if (endtoken (c))
3228 {
3229 if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
3230 {
3231 /*
3232 * This handles :: in the middle, but not at the
3233 * beginning of an identifier. Also, space-separated
3234 * :: is not recognised.
3235 */
3236 lp += 2;
3237 toklen += 2;
3238 c = lp[-1];
3239 goto still_in_token;
3240 }
3241 else
3242 {
3243 bool funorvar = FALSE;
3244
3245 if (yacc_rules
3246 || consider_token (newlb.buffer + tokoff, toklen, c,
3247 &c_ext, cblev, parlev, &funorvar))
3248 {
3249 if (fvdef == foperator)
3250 {
3251 char *oldlp = lp;
3252 lp = skip_spaces (lp-1);
3253 if (*lp != '\0')
3254 lp += 1;
3255 while (*lp != '\0'
3256 && !iswhite (*lp) && *lp != '(')
3257 lp += 1;
3258 c = *lp++;
3259 toklen += lp - oldlp;
3260 }
3261 token.named = FALSE;
3262 if ((c_ext & C_EXT) /* not pure C */
3263 && nestlev > 0 && definedef == dnone)
3264 /* in struct body */
3265 {
3266 write_classname (&token_name, qualifier);
3267 linebuffer_setlen (&token_name,
3268 token_name.len+qlen+toklen);
3269 strcat (token_name.buffer, qualifier);
3270 strncat (token_name.buffer,
3271 newlb.buffer + tokoff, toklen);
3272 token.named = TRUE;
3273 }
3274 else if (objdef == ocatseen)
3275 /* Objective C category */
3276 {
3277 int len = strlen (objtag) + 2 + toklen;
3278 linebuffer_setlen (&token_name, len);
3279 strcpy (token_name.buffer, objtag);
3280 strcat (token_name.buffer, "(");
3281 strncat (token_name.buffer,
3282 newlb.buffer + tokoff, toklen);
3283 strcat (token_name.buffer, ")");
3284 token.named = TRUE;
3285 }
3286 else if (objdef == omethodtag
3287 || objdef == omethodparm)
3288 /* Objective C method */
3289 {
3290 token.named = TRUE;
3291 }
3292 else if (fvdef == fdefunname)
3293 /* GNU DEFUN and similar macros */
3294 {
3295 bool defun = (newlb.buffer[tokoff] == 'F');
3296 int off = tokoff;
3297 int len = toklen;
3298
3299 /* Rewrite the tag so that emacs lisp DEFUNs
3300 can be found by their elisp name */
3301 if (defun)
3302 {
3303 off += 1;
3304 len -= 1;
3305 }
3306 len = toklen;
3307 linebuffer_setlen (&token_name, len);
3308 strncpy (token_name.buffer,
3309 newlb.buffer + off, len);
3310 token_name.buffer[len] = '\0';
3311 if (defun)
3312 while (--len >= 0)
3313 if (token_name.buffer[len] == '_')
3314 token_name.buffer[len] = '-';
3315 token.named = defun;
3316 }
3317 else
3318 {
3319 linebuffer_setlen (&token_name, toklen);
3320 strncpy (token_name.buffer,
3321 newlb.buffer + tokoff, toklen);
3322 token_name.buffer[toklen] = '\0';
3323 /* Name macros and members. */
3324 token.named = (structdef == stagseen
3325 || typdef == ttypeseen
3326 || typdef == tend
3327 || (funorvar
3328 && definedef == dignorerest)
3329 || (funorvar
3330 && definedef == dnone
3331 && structdef == snone
3332 && cblev > 0));
3333 }
3334 token.lineno = lineno;
3335 token.offset = tokoff;
3336 token.length = toklen;
3337 token.line = newlb.buffer;
3338 token.linepos = newlinepos;
3339 token.valid = TRUE;
3340
3341 if (definedef == dnone
3342 && (fvdef == fvnameseen
3343 || fvdef == foperator
3344 || structdef == stagseen
3345 || typdef == tend
3346 || typdef == ttypeseen
3347 || objdef != onone))
3348 {
3349 if (current_lb_is_new)
3350 switch_line_buffers ();
3351 }
3352 else if (definedef != dnone
3353 || fvdef == fdefunname
3354 || instruct)
3355 make_C_tag (funorvar);
3356 }
3357 midtoken = FALSE;
3358 }
3359 } /* if (endtoken (c)) */
3360 else if (intoken (c))
3361 still_in_token:
3362 {
3363 toklen++;
3364 continue;
3365 }
3366 } /* if (midtoken) */
3367 else if (begtoken (c))
3368 {
3369 switch (definedef)
3370 {
3371 case dnone:
3372 switch (fvdef)
3373 {
3374 case fstartlist:
3375 fvdef = finlist;
3376 continue;
3377 case flistseen:
3378 make_C_tag (TRUE); /* a function */
3379 fvdef = fignore;
3380 break;
3381 case fvnameseen:
3382 fvdef = fvnone;
3383 break;
3384 }
3385 if (structdef == stagseen && !cjava)
3386 {
3387 popclass_above (cblev);
3388 structdef = snone;
3389 }
3390 break;
3391 case dsharpseen:
3392 savetoken = token;
3393 break;
3394 }
3395 if (!yacc_rules || lp == newlb.buffer + 1)
3396 {
3397 tokoff = lp - 1 - newlb.buffer;
3398 toklen = 1;
3399 midtoken = TRUE;
3400 }
3401 continue;
3402 } /* if (begtoken) */
3403 } /* if must look at token */
3404
3405
3406 /* Detect end of line, colon, comma, semicolon and various braces
3407 after having handled a token.*/
3408 switch (c)
3409 {
3410 case ':':
3411 if (yacc_rules && token.offset == 0 && token.valid)
3412 {
3413 make_C_tag (FALSE); /* a yacc function */
3414 break;
3415 }
3416 if (definedef != dnone)
3417 break;
3418 switch (objdef)
3419 {
3420 case otagseen:
3421 objdef = oignore;
3422 make_C_tag (TRUE); /* an Objective C class */
3423 break;
3424 case omethodtag:
3425 case omethodparm:
3426 objdef = omethodcolon;
3427 linebuffer_setlen (&token_name, token_name.len + 1);
3428 strcat (token_name.buffer, ":");
3429 break;
3430 }
3431 if (structdef == stagseen)
3432 structdef = scolonseen;
3433 break;
3434 case ';':
3435 if (definedef != dnone)
3436 break;
3437 switch (typdef)
3438 {
3439 case tend:
3440 case ttypeseen:
3441 make_C_tag (FALSE); /* a typedef */
3442 typdef = tnone;
3443 fvdef = fvnone;
3444 break;
3445 case tnone:
3446 case tinbody:
3447 case tignore:
3448 switch (fvdef)
3449 {
3450 case fignore:
3451 if (typdef == tignore)
3452 fvdef = fvnone;
3453 break;
3454 case fvnameseen:
3455 if ((globals && cblev == 0 && (!fvextern || declarations))
3456 || (members && instruct))
3457 make_C_tag (FALSE); /* a variable */
3458 fvextern = FALSE;
3459 fvdef = fvnone;
3460 token.valid = FALSE;
3461 break;
3462 case flistseen:
3463 if ((declarations && typdef == tnone && !instruct)
3464 || (members && typdef != tignore && instruct))
3465 make_C_tag (TRUE); /* a function declaration */
3466 /* FALLTHRU */
3467 default:
3468 fvextern = FALSE;
3469 fvdef = fvnone;
3470 if (declarations
3471 && structdef == stagseen && (c_ext & C_PLPL))
3472 make_C_tag (FALSE); /* forward declaration */
3473 else
3474 /* The following instruction invalidates the token.
3475 Probably the token should be invalidated in all other
3476 cases where some state machine is reset prematurely. */
3477 token.valid = FALSE;
3478 } /* switch (fvdef) */
3479 /* FALLTHRU */
3480 default:
3481 if (!instruct)
3482 typdef = tnone;
3483 }
3484 if (structdef == stagseen)
3485 structdef = snone;
3486 break;
3487 case ',':
3488 if (definedef != dnone)
3489 break;
3490 switch (objdef)
3491 {
3492 case omethodtag:
3493 case omethodparm:
3494 make_C_tag (TRUE); /* an Objective C method */
3495 objdef = oinbody;
3496 break;
3497 }
3498 switch (fvdef)
3499 {
3500 case fdefunkey:
3501 case foperator:
3502 case fstartlist:
3503 case finlist:
3504 case fignore:
3505 case vignore:
3506 break;
3507 case fdefunname:
3508 fvdef = fignore;
3509 break;
3510 case fvnameseen: /* a variable */
3511 if ((globals && cblev == 0 && (!fvextern || declarations))
3512 || (members && instruct))
3513 make_C_tag (FALSE);
3514 break;
3515 case flistseen: /* a function */
3516 if ((declarations && typdef == tnone && !instruct)
3517 || (members && typdef != tignore && instruct))
3518 {
3519 make_C_tag (TRUE); /* a function declaration */
3520 fvdef = fvnameseen;
3521 }
3522 else if (!declarations)
3523 fvdef = fvnone;
3524 token.valid = FALSE;
3525 break;
3526 default:
3527 fvdef = fvnone;
3528 }
3529 if (structdef == stagseen)
3530 structdef = snone;
3531 break;
3532 case '[':
3533 if (definedef != dnone)
3534 break;
3535 if (structdef == stagseen)
3536 structdef = snone;
3537 switch (typdef)
3538 {
3539 case ttypeseen:
3540 case tend:
3541 typdef = tignore;
3542 make_C_tag (FALSE); /* a typedef */
3543 break;
3544 case tnone:
3545 case tinbody:
3546 switch (fvdef)
3547 {
3548 case foperator:
3549 case finlist:
3550 case fignore:
3551 case vignore:
3552 break;
3553 case fvnameseen:
3554 if ((members && cblev == 1)
3555 || (globals && cblev == 0
3556 && (!fvextern || declarations)))
3557 make_C_tag (FALSE); /* a variable */
3558 /* FALLTHRU */
3559 default:
3560 fvdef = fvnone;
3561 }
3562 break;
3563 }
3564 break;
3565 case '(':
3566 if (definedef != dnone)
3567 break;
3568 if (objdef == otagseen && parlev == 0)
3569 objdef = oparenseen;
3570 switch (fvdef)
3571 {
3572 case fvnameseen:
3573 if (typdef == ttypeseen
3574 && *lp != '*'
3575 && !instruct)
3576 {
3577 /* This handles constructs like:
3578 typedef void OperatorFun (int fun); */
3579 make_C_tag (FALSE);
3580 typdef = tignore;
3581 fvdef = fignore;
3582 break;
3583 }
3584 /* FALLTHRU */
3585 case foperator:
3586 fvdef = fstartlist;
3587 break;
3588 case flistseen:
3589 fvdef = finlist;
3590 break;
3591 }
3592 parlev++;
3593 break;
3594 case ')':
3595 if (definedef != dnone)
3596 break;
3597 if (objdef == ocatseen && parlev == 1)
3598 {
3599 make_C_tag (TRUE); /* an Objective C category */
3600 objdef = oignore;
3601 }
3602 if (--parlev == 0)
3603 {
3604 switch (fvdef)
3605 {
3606 case fstartlist:
3607 case finlist:
3608 fvdef = flistseen;
3609 break;
3610 }
3611 if (!instruct
3612 && (typdef == tend
3613 || typdef == ttypeseen))
3614 {
3615 typdef = tignore;
3616 make_C_tag (FALSE); /* a typedef */
3617 }
3618 }
3619 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3620 parlev = 0;
3621 break;
3622 case '{':
3623 if (definedef != dnone)
3624 break;
3625 if (typdef == ttypeseen)
3626 {
3627 /* Whenever typdef is set to tinbody (currently only
3628 here), typdefcblev should be set to cblev. */
3629 typdef = tinbody;
3630 typdefcblev = cblev;
3631 }
3632 switch (fvdef)
3633 {
3634 case flistseen:
3635 make_C_tag (TRUE); /* a function */
3636 /* FALLTHRU */
3637 case fignore:
3638 fvdef = fvnone;
3639 break;
3640 case fvnone:
3641 switch (objdef)
3642 {
3643 case otagseen:
3644 make_C_tag (TRUE); /* an Objective C class */
3645 objdef = oignore;
3646 break;
3647 case omethodtag:
3648 case omethodparm:
3649 make_C_tag (TRUE); /* an Objective C method */
3650 objdef = oinbody;
3651 break;
3652 default:
3653 /* Neutralize `extern "C" {' grot. */
3654 if (cblev == 0 && structdef == snone && nestlev == 0
3655 && typdef == tnone)
3656 cblev = -1;
3657 }
3658 break;
3659 }
3660 switch (structdef)
3661 {
3662 case skeyseen: /* unnamed struct */
3663 pushclass_above (cblev, NULL, 0);
3664 structdef = snone;
3665 break;
3666 case stagseen: /* named struct or enum */
3667 case scolonseen: /* a class */
3668 pushclass_above (cblev, token.line+token.offset, token.length);
3669 structdef = snone;
3670 make_C_tag (FALSE); /* a struct or enum */
3671 break;
3672 }
3673 cblev++;
3674 break;
3675 case '*':
3676 if (definedef != dnone)
3677 break;
3678 if (fvdef == fstartlist)
3679 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3680 break;
3681 case '}':
3682 if (definedef != dnone)
3683 break;
3684 if (!noindentypedefs && lp == newlb.buffer + 1)
3685 {
3686 cblev = 0; /* reset curly brace level if first column */
3687 parlev = 0; /* also reset paren level, just in case... */
3688 }
3689 else if (cblev > 0)
3690 cblev--;
3691 popclass_above (cblev);
3692 structdef = snone;
3693 /* Only if typdef == tinbody is typdefcblev significant. */
3694 if (typdef == tinbody && cblev <= typdefcblev)
3695 {
3696 assert (cblev == typdefcblev);
3697 typdef = tend;
3698 }
3699 break;
3700 case '=':
3701 if (definedef != dnone)
3702 break;
3703 switch (fvdef)
3704 {
3705 case foperator:
3706 case finlist:
3707 case fignore:
3708 case vignore:
3709 break;
3710 case fvnameseen:
3711 if ((members && cblev == 1)
3712 || (globals && cblev == 0 && (!fvextern || declarations)))
3713 make_C_tag (FALSE); /* a variable */
3714 /* FALLTHRU */
3715 default:
3716 fvdef = vignore;
3717 }
3718 break;
3719 case '<':
3720 if (cplpl && structdef == stagseen)
3721 {
3722 structdef = sintemplate;
3723 break;
3724 }
3725 goto resetfvdef;
3726 case '>':
3727 if (structdef == sintemplate)
3728 {
3729 structdef = stagseen;
3730 break;
3731 }
3732 goto resetfvdef;
3733 case '+':
3734 case '-':
3735 if (objdef == oinbody && cblev == 0)
3736 {
3737 objdef = omethodsign;
3738 break;
3739 }
3740 /* FALLTHRU */
3741 resetfvdef:
3742 case '#': case '~': case '&': case '%': case '/': case '|':
3743 case '^': case '!': case '.': case '?': case ']':
3744 if (definedef != dnone)
3745 break;
3746 /* These surely cannot follow a function tag in C. */
3747 switch (fvdef)
3748 {
3749 case foperator:
3750 case finlist:
3751 case fignore:
3752 case vignore:
3753 break;
3754 default:
3755 fvdef = fvnone;
3756 }
3757 break;
3758 case '\0':
3759 if (objdef == otagseen)
3760 {
3761 make_C_tag (TRUE); /* an Objective C class */
3762 objdef = oignore;
3763 }
3764 /* If a macro spans multiple lines don't reset its state. */
3765 if (quotednl)
3766 CNL_SAVE_DEFINEDEF ();
3767 else
3768 CNL ();
3769 break;
3770 } /* switch (c) */
3771
3772 } /* while not eof */
3773
3774 free (token_name.buffer);
3775 free (lbs[0].lb.buffer);
3776 free (lbs[1].lb.buffer);
3777 }
3778
3779 /*
3780 * Process either a C++ file or a C file depending on the setting
3781 * of a global flag.
3782 */
3783 static void
3784 default_C_entries (inf)
3785 FILE *inf;
3786 {
3787 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3788 }
3789
3790 /* Always do plain C. */
3791 static void
3792 plain_C_entries (inf)
3793 FILE *inf;
3794 {
3795 C_entries (0, inf);
3796 }
3797
3798 /* Always do C++. */
3799 static void
3800 Cplusplus_entries (inf)
3801 FILE *inf;
3802 {
3803 C_entries (C_PLPL, inf);
3804 }
3805
3806 /* Always do Java. */
3807 static void
3808 Cjava_entries (inf)
3809 FILE *inf;
3810 {
3811 C_entries (C_JAVA, inf);
3812 }
3813
3814 /* Always do C*. */
3815 static void
3816 Cstar_entries (inf)
3817 FILE *inf;
3818 {
3819 C_entries (C_STAR, inf);
3820 }
3821
3822 /* Always do Yacc. */
3823 static void
3824 Yacc_entries (inf)
3825 FILE *inf;
3826 {
3827 C_entries (YACC, inf);
3828 }
3829
3830 \f
3831 /* Useful macros. */
3832 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3833 for (; /* loop initialization */ \
3834 !feof (file_pointer) /* loop test */ \
3835 && /* instructions at start of loop */ \
3836 (readline (&line_buffer, file_pointer), \
3837 char_pointer = line_buffer.buffer, \
3838 TRUE); \
3839 )
3840 #define LOOKING_AT(cp, keyword) /* keyword is a constant string */ \
3841 (strneq ((cp), keyword, sizeof(keyword)-1) /* cp points at keyword */ \
3842 && notinname ((cp)[sizeof(keyword)-1]) /* end of keyword */ \
3843 && ((cp) = skip_spaces((cp)+sizeof(keyword)-1))) /* skip spaces */
3844
3845 /*
3846 * Read a file, but do no processing. This is used to do regexp
3847 * matching on files that have no language defined.
3848 */
3849 static void
3850 just_read_file (inf)
3851 FILE *inf;
3852 {
3853 register char *dummy;
3854
3855 LOOP_ON_INPUT_LINES (inf, lb, dummy)
3856 continue;
3857 }
3858
3859 \f
3860 /* Fortran parsing */
3861
3862 static void F_takeprec __P((void));
3863 static void F_getit __P((FILE *));
3864
3865 static void
3866 F_takeprec ()
3867 {
3868 dbp = skip_spaces (dbp);
3869 if (*dbp != '*')
3870 return;
3871 dbp++;
3872 dbp = skip_spaces (dbp);
3873 if (strneq (dbp, "(*)", 3))
3874 {
3875 dbp += 3;
3876 return;
3877 }
3878 if (!ISDIGIT (*dbp))
3879 {
3880 --dbp; /* force failure */
3881 return;
3882 }
3883 do
3884 dbp++;
3885 while (ISDIGIT (*dbp));
3886 }
3887
3888 static void
3889 F_getit (inf)
3890 FILE *inf;
3891 {
3892 register char *cp;
3893
3894 dbp = skip_spaces (dbp);
3895 if (*dbp == '\0')
3896 {
3897 readline (&lb, inf);
3898 dbp = lb.buffer;
3899 if (dbp[5] != '&')
3900 return;
3901 dbp += 6;
3902 dbp = skip_spaces (dbp);
3903 }
3904 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3905 return;
3906 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3907 continue;
3908 pfnote (savenstr (dbp, cp-dbp), TRUE,
3909 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3910 }
3911
3912
3913 static void
3914 Fortran_functions (inf)
3915 FILE *inf;
3916 {
3917 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3918 {
3919 if (*dbp == '%')
3920 dbp++; /* Ratfor escape to fortran */
3921 dbp = skip_spaces (dbp);
3922 if (*dbp == '\0')
3923 continue;
3924 switch (lowcase (*dbp))
3925 {
3926 case 'i':
3927 if (nocase_tail ("integer"))
3928 F_takeprec ();
3929 break;
3930 case 'r':
3931 if (nocase_tail ("real"))
3932 F_takeprec ();
3933 break;
3934 case 'l':
3935 if (nocase_tail ("logical"))
3936 F_takeprec ();
3937 break;
3938 case 'c':
3939 if (nocase_tail ("complex") || nocase_tail ("character"))
3940 F_takeprec ();
3941 break;
3942 case 'd':
3943 if (nocase_tail ("double"))
3944 {
3945 dbp = skip_spaces (dbp);
3946 if (*dbp == '\0')
3947 continue;
3948 if (nocase_tail ("precision"))
3949 break;
3950 continue;
3951 }
3952 break;
3953 }
3954 dbp = skip_spaces (dbp);
3955 if (*dbp == '\0')
3956 continue;
3957 switch (lowcase (*dbp))
3958 {
3959 case 'f':
3960 if (nocase_tail ("function"))
3961 F_getit (inf);
3962 continue;
3963 case 's':
3964 if (nocase_tail ("subroutine"))
3965 F_getit (inf);
3966 continue;
3967 case 'e':
3968 if (nocase_tail ("entry"))
3969 F_getit (inf);
3970 continue;
3971 case 'b':
3972 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
3973 {
3974 dbp = skip_spaces (dbp);
3975 if (*dbp == '\0') /* assume un-named */
3976 pfnote (savestr ("blockdata"), TRUE,
3977 lb.buffer, dbp - lb.buffer, lineno, linecharno);
3978 else
3979 F_getit (inf); /* look for name */
3980 }
3981 continue;
3982 }
3983 }
3984 }
3985
3986 \f
3987 /*
3988 * Ada parsing
3989 * Original code by
3990 * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be> (1998)
3991 */
3992
3993 static void Ada_getit __P((FILE *, char *));
3994
3995 /* Once we are positioned after an "interesting" keyword, let's get
3996 the real tag value necessary. */
3997 static void
3998 Ada_getit (inf, name_qualifier)
3999 FILE *inf;
4000 char *name_qualifier;
4001 {
4002 register char *cp;
4003 char *name;
4004 char c;
4005
4006 while (!feof (inf))
4007 {
4008 dbp = skip_spaces (dbp);
4009 if (*dbp == '\0'
4010 || (dbp[0] == '-' && dbp[1] == '-'))
4011 {
4012 readline (&lb, inf);
4013 dbp = lb.buffer;
4014 }
4015 switch (lowcase(*dbp))
4016 {
4017 case 'b':
4018 if (nocase_tail ("body"))
4019 {
4020 /* Skipping body of procedure body or package body or ....
4021 resetting qualifier to body instead of spec. */
4022 name_qualifier = "/b";
4023 continue;
4024 }
4025 break;
4026 case 't':
4027 /* Skipping type of task type or protected type ... */
4028 if (nocase_tail ("type"))
4029 continue;
4030 break;
4031 }
4032 if (*dbp == '"')
4033 {
4034 dbp += 1;
4035 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4036 continue;
4037 }
4038 else
4039 {
4040 dbp = skip_spaces (dbp);
4041 for (cp = dbp;
4042 (*cp != '\0'
4043 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4044 cp++)
4045 continue;
4046 if (cp == dbp)
4047 return;
4048 }
4049 c = *cp;
4050 *cp = '\0';
4051 name = concat (dbp, name_qualifier, "");
4052 *cp = c;
4053 pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4054 if (c == '"')
4055 dbp = cp + 1;
4056 return;
4057 }
4058 }
4059
4060 static void
4061 Ada_funcs (inf)
4062 FILE *inf;
4063 {
4064 bool inquote = FALSE;
4065
4066 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4067 {
4068 while (*dbp != '\0')
4069 {
4070 /* Skip a string i.e. "abcd". */
4071 if (inquote || (*dbp == '"'))
4072 {
4073 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4074 if (dbp != NULL)
4075 {
4076 inquote = FALSE;
4077 dbp += 1;
4078 continue; /* advance char */
4079 }
4080 else
4081 {
4082 inquote = TRUE;
4083 break; /* advance line */
4084 }
4085 }
4086
4087 /* Skip comments. */
4088 if (dbp[0] == '-' && dbp[1] == '-')
4089 break; /* advance line */
4090
4091 /* Skip character enclosed in single quote i.e. 'a'
4092 and skip single quote starting an attribute i.e. 'Image. */
4093 if (*dbp == '\'')
4094 {
4095 dbp++ ;
4096 if (*dbp != '\0')
4097 dbp++;
4098 continue;
4099 }
4100
4101 /* Search for beginning of a token. */
4102 if (!begtoken (*dbp))
4103 {
4104 dbp++;
4105 continue; /* advance char */
4106 }
4107
4108 /* We are at the beginning of a token. */
4109 switch (lowcase(*dbp))
4110 {
4111 case 'f':
4112 if (!packages_only && nocase_tail ("function"))
4113 Ada_getit (inf, "/f");
4114 else
4115 break; /* from switch */
4116 continue; /* advance char */
4117 case 'p':
4118 if (!packages_only && nocase_tail ("procedure"))
4119 Ada_getit (inf, "/p");
4120 else if (nocase_tail ("package"))
4121 Ada_getit (inf, "/s");
4122 else if (nocase_tail ("protected")) /* protected type */
4123 Ada_getit (inf, "/t");
4124 else
4125 break; /* from switch */
4126 continue; /* advance char */
4127 case 't':
4128 if (!packages_only && nocase_tail ("task"))
4129 Ada_getit (inf, "/k");
4130 else if (typedefs && !packages_only && nocase_tail ("type"))
4131 {
4132 Ada_getit (inf, "/t");
4133 while (*dbp != '\0')
4134 dbp += 1;
4135 }
4136 else
4137 break; /* from switch */
4138 continue; /* advance char */
4139 }
4140
4141 /* Look for the end of the token. */
4142 while (!endtoken (*dbp))
4143 dbp++;
4144
4145 } /* advance char */
4146 } /* advance line */
4147 }
4148
4149 \f
4150 /*
4151 * Unix and microcontroller assembly tag handling
4152 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4153 * Idea by Bob Weiner, Motorola Inc. (1994)
4154 */
4155 static void
4156 Asm_labels (inf)
4157 FILE *inf;
4158 {
4159 register char *cp;
4160
4161 LOOP_ON_INPUT_LINES (inf, lb, cp)
4162 {
4163 /* If first char is alphabetic or one of [_.$], test for colon
4164 following identifier. */
4165 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4166 {
4167 /* Read past label. */
4168 cp++;
4169 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4170 cp++;
4171 if (*cp == ':' || iswhite (*cp))
4172 {
4173 /* Found end of label, so copy it and add it to the table. */
4174 pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
4175 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4176 }
4177 }
4178 }
4179 }
4180
4181 \f
4182 /*
4183 * Perl support
4184 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4185 * Perl variable names: /^(my|local).../
4186 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4187 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4188 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4189 */
4190 static void
4191 Perl_functions (inf)
4192 FILE *inf;
4193 {
4194 char *package = savestr ("main"); /* current package name */
4195 register char *cp;
4196
4197 LOOP_ON_INPUT_LINES (inf, lb, cp)
4198 {
4199 skip_spaces(cp);
4200
4201 if (LOOKING_AT (cp, "package"))
4202 {
4203 free (package);
4204 package = get_tag (cp);
4205 if (package == NULL) /* can't parse package name */
4206 package = savestr ("");
4207 else
4208 package = savestr(package); /* make a copy */
4209 }
4210 else if (LOOKING_AT (cp, "sub"))
4211 {
4212 char *name, *fullname, *pos;
4213 char *sp = cp;
4214
4215 while (!notinname (*cp))
4216 cp++;
4217 if (cp == sp)
4218 continue;
4219 name = savenstr (sp, cp-sp);
4220 if ((pos = etags_strchr (name, ':')) != NULL && pos[1] == ':')
4221 fullname = name;
4222 else
4223 fullname = concat (package, "::", name);
4224 pfnote (fullname, TRUE,
4225 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4226 if (name != fullname)
4227 free (name);
4228 }
4229 else if (globals /* only if tagging global vars is enabled */
4230 && (LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local")))
4231 {
4232 /* After "my" or "local", but before any following paren or space. */
4233 char *varname = NULL;
4234
4235 if (*cp == '$' || *cp == '@' || *cp == '%')
4236 {
4237 char* varstart = ++cp;
4238 while (ISALNUM (*cp) || *cp == '_')
4239 cp++;
4240 varname = savenstr (varstart, cp-varstart);
4241 }
4242 else
4243 {
4244 /* Should be examining a variable list at this point;
4245 could insist on seeing an open parenthesis. */
4246 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4247 cp++;
4248 }
4249
4250 /* Perhaps I should back cp up one character, so the TAGS table
4251 doesn't mention (and so depend upon) the following char. */
4252 pfnote (varname, FALSE,
4253 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4254 }
4255 }
4256 }
4257
4258
4259 /*
4260 * Python support
4261 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4262 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4263 * More ideas by seb bacon <seb@jamkit.com> (2002)
4264 */
4265 static void
4266 Python_functions (inf)
4267 FILE *inf;
4268 {
4269 register char *cp;
4270
4271 LOOP_ON_INPUT_LINES (inf, lb, cp)
4272 {
4273 cp = skip_spaces (cp);
4274 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4275 {
4276 char *name = cp;
4277 while (!notinname (*cp) && *cp != ':')
4278 cp++;
4279 pfnote (savenstr (name, cp-name), TRUE,
4280 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4281 }
4282 }
4283 }
4284
4285 \f
4286 /*
4287 * PHP support
4288 * Look for:
4289 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4290 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4291 * - /^[ \t]*define\(\"[^\"]+/
4292 * Only with --members:
4293 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4294 * Idea by Diez B. Roggisch (2001)
4295 */
4296 static void
4297 PHP_functions (inf)
4298 FILE *inf;
4299 {
4300 register char *cp, *name;
4301 bool search_identifier = FALSE;
4302
4303 LOOP_ON_INPUT_LINES (inf, lb, cp)
4304 {
4305 cp = skip_spaces (cp);
4306 name = cp;
4307 if (search_identifier
4308 && *cp != '\0')
4309 {
4310 while (!notinname (*cp))
4311 cp++;
4312 pfnote (savenstr (name, cp-name), TRUE,
4313 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4314 search_identifier = FALSE;
4315 }
4316 else if (LOOKING_AT (cp, "function"))
4317 {
4318 if(*cp == '&')
4319 cp = skip_spaces (cp+1);
4320 if(*cp != '\0')
4321 {
4322 name = cp;
4323 while (!notinname (*cp))
4324 cp++;
4325 pfnote (savenstr (name, cp-name), TRUE,
4326 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4327 }
4328 else
4329 search_identifier = TRUE;
4330 }
4331 else if (LOOKING_AT (cp, "class"))
4332 {
4333 if (*cp != '\0')
4334 {
4335 name = cp;
4336 while (*cp != '\0' && !iswhite (*cp))
4337 cp++;
4338 pfnote (savenstr (name, cp-name), FALSE,
4339 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4340 }
4341 else
4342 search_identifier = TRUE;
4343 }
4344 else if (strneq (cp, "define", 6)
4345 && (cp = skip_spaces (cp+6))
4346 && *cp++ == '('
4347 && (*cp == '"' || *cp == '\''))
4348 {
4349 char quote = *cp++;
4350 name = cp;
4351 while (*cp != quote && *cp != '\0')
4352 cp++;
4353 pfnote (savenstr (name, cp-name), FALSE,
4354 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4355 }
4356 else if (members
4357 && LOOKING_AT (cp, "var")
4358 && *cp == '$')
4359 {
4360 name = cp;
4361 while (!notinname(*cp))
4362 cp++;
4363 pfnote (savenstr (name, cp-name), FALSE,
4364 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4365 }
4366 }
4367 }
4368
4369 \f
4370 /*
4371 * Cobol tag functions
4372 * We could look for anything that could be a paragraph name.
4373 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4374 * Idea by Corny de Souza (1993)
4375 */
4376 static void
4377 Cobol_paragraphs (inf)
4378 FILE *inf;
4379 {
4380 register char *bp, *ep;
4381
4382 LOOP_ON_INPUT_LINES (inf, lb, bp)
4383 {
4384 if (lb.len < 9)
4385 continue;
4386 bp += 8;
4387
4388 /* If eoln, compiler option or comment ignore whole line. */
4389 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4390 continue;
4391
4392 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4393 continue;
4394 if (*ep++ == '.')
4395 pfnote (savenstr (bp, ep-bp), TRUE,
4396 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4397 }
4398 }
4399
4400 \f
4401 /*
4402 * Makefile support
4403 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4404 */
4405 static void
4406 Makefile_targets (inf)
4407 FILE *inf;
4408 {
4409 register char *bp;
4410
4411 LOOP_ON_INPUT_LINES (inf, lb, bp)
4412 {
4413 if (*bp == '\t' || *bp == '#')
4414 continue;
4415 while (*bp != '\0' && *bp != '=' && *bp != ':')
4416 bp++;
4417 if (*bp == ':' || (globals && *bp == '='))
4418 pfnote (savenstr (lb.buffer, bp - lb.buffer), TRUE,
4419 lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4420 }
4421 }
4422
4423 \f
4424 /*
4425 * Pascal parsing
4426 * Original code by Mosur K. Mohan (1989)
4427 *
4428 * Locates tags for procedures & functions. Doesn't do any type- or
4429 * var-definitions. It does look for the keyword "extern" or
4430 * "forward" immediately following the procedure statement; if found,
4431 * the tag is skipped.
4432 */
4433 static void
4434 Pascal_functions (inf)
4435 FILE *inf;
4436 {
4437 linebuffer tline; /* mostly copied from C_entries */
4438 long save_lcno;
4439 int save_lineno, save_len;
4440 char c, *cp, *namebuf;
4441
4442 bool /* each of these flags is TRUE iff: */
4443 incomment, /* point is inside a comment */
4444 inquote, /* point is inside '..' string */
4445 get_tagname, /* point is after PROCEDURE/FUNCTION
4446 keyword, so next item = potential tag */
4447 found_tag, /* point is after a potential tag */
4448 inparms, /* point is within parameter-list */
4449 verify_tag; /* point has passed the parm-list, so the
4450 next token will determine whether this
4451 is a FORWARD/EXTERN to be ignored, or
4452 whether it is a real tag */
4453
4454 save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
4455 namebuf = NULL; /* keep compiler quiet */
4456 dbp = lb.buffer;
4457 *dbp = '\0';
4458 initbuffer (&tline);
4459
4460 incomment = inquote = FALSE;
4461 found_tag = FALSE; /* have a proc name; check if extern */
4462 get_tagname = FALSE; /* have found "procedure" keyword */
4463 inparms = FALSE; /* found '(' after "proc" */
4464 verify_tag = FALSE; /* check if "extern" is ahead */
4465
4466
4467 while (!feof (inf)) /* long main loop to get next char */
4468 {
4469 c = *dbp++;
4470 if (c == '\0') /* if end of line */
4471 {
4472 readline (&lb, inf);
4473 dbp = lb.buffer;
4474 if (*dbp == '\0')
4475 continue;
4476 if (!((found_tag && verify_tag)
4477 || get_tagname))
4478 c = *dbp++; /* only if don't need *dbp pointing
4479 to the beginning of the name of
4480 the procedure or function */
4481 }
4482 if (incomment)
4483 {
4484 if (c == '}') /* within { } comments */
4485 incomment = FALSE;
4486 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4487 {
4488 dbp++;
4489 incomment = FALSE;
4490 }
4491 continue;
4492 }
4493 else if (inquote)
4494 {
4495 if (c == '\'')
4496 inquote = FALSE;
4497 continue;
4498 }
4499 else
4500 switch (c)
4501 {
4502 case '\'':
4503 inquote = TRUE; /* found first quote */
4504 continue;
4505 case '{': /* found open { comment */
4506 incomment = TRUE;
4507 continue;
4508 case '(':
4509 if (*dbp == '*') /* found open (* comment */
4510 {
4511 incomment = TRUE;
4512 dbp++;
4513 }
4514 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4515 inparms = TRUE;
4516 continue;
4517 case ')': /* end of parms list */
4518 if (inparms)
4519 inparms = FALSE;
4520 continue;
4521 case ';':
4522 if (found_tag && !inparms) /* end of proc or fn stmt */
4523 {
4524 verify_tag = TRUE;
4525 break;
4526 }
4527 continue;
4528 }
4529 if (found_tag && verify_tag && (*dbp != ' '))
4530 {
4531 /* check if this is an "extern" declaration */
4532 if (*dbp == '\0')
4533 continue;
4534 if (lowcase (*dbp == 'e'))
4535 {
4536 if (nocase_tail ("extern")) /* superfluous, really! */
4537 {
4538 found_tag = FALSE;
4539 verify_tag = FALSE;
4540 }
4541 }
4542 else if (lowcase (*dbp) == 'f')
4543 {
4544 if (nocase_tail ("forward")) /* check for forward reference */
4545 {
4546 found_tag = FALSE;
4547 verify_tag = FALSE;
4548 }
4549 }
4550 if (found_tag && verify_tag) /* not external proc, so make tag */
4551 {
4552 found_tag = FALSE;
4553 verify_tag = FALSE;
4554 pfnote (namebuf, TRUE,
4555 tline.buffer, save_len, save_lineno, save_lcno);
4556 continue;
4557 }
4558 }
4559 if (get_tagname) /* grab name of proc or fn */
4560 {
4561 if (*dbp == '\0')
4562 continue;
4563
4564 /* save all values for later tagging */
4565 linebuffer_setlen (&tline, lb.len);
4566 strcpy (tline.buffer, lb.buffer);
4567 save_lineno = lineno;
4568 save_lcno = linecharno;
4569
4570 /* grab block name */
4571 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4572 continue;
4573 namebuf = savenstr (dbp, cp-dbp);
4574 dbp = cp; /* set dbp to e-o-token */
4575 save_len = dbp - lb.buffer + 1;
4576 get_tagname = FALSE;
4577 found_tag = TRUE;
4578 continue;
4579
4580 /* and proceed to check for "extern" */
4581 }
4582 else if (!incomment && !inquote && !found_tag)
4583 {
4584 /* check for proc/fn keywords */
4585 switch (lowcase (c))
4586 {
4587 case 'p':
4588 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4589 get_tagname = TRUE;
4590 continue;
4591 case 'f':
4592 if (nocase_tail ("unction"))
4593 get_tagname = TRUE;
4594 continue;
4595 }
4596 }
4597 } /* while not eof */
4598
4599 free (tline.buffer);
4600 }
4601
4602 \f
4603 /*
4604 * Lisp tag functions
4605 * look for (def or (DEF, quote or QUOTE
4606 */
4607
4608 static void L_getit __P((void));
4609
4610 static void
4611 L_getit ()
4612 {
4613 if (*dbp == '\'') /* Skip prefix quote */
4614 dbp++;
4615 else if (*dbp == '(')
4616 {
4617 dbp++;
4618 /* Try to skip "(quote " */
4619 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4620 /* Ok, then skip "(" before name in (defstruct (foo)) */
4621 dbp = skip_spaces (dbp);
4622 }
4623 get_tag (dbp);
4624 }
4625
4626 static void
4627 Lisp_functions (inf)
4628 FILE *inf;
4629 {
4630 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4631 {
4632 if (dbp[0] != '(')
4633 continue;
4634
4635 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4636 {
4637 dbp = skip_non_spaces (dbp);
4638 dbp = skip_spaces (dbp);
4639 L_getit ();
4640 }
4641 else
4642 {
4643 /* Check for (foo::defmumble name-defined ... */
4644 do
4645 dbp++;
4646 while (!notinname (*dbp) && *dbp != ':');
4647 if (*dbp == ':')
4648 {
4649 do
4650 dbp++;
4651 while (*dbp == ':');
4652
4653 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4654 {
4655 dbp = skip_non_spaces (dbp);
4656 dbp = skip_spaces (dbp);
4657 L_getit ();
4658 }
4659 }
4660 }
4661 }
4662 }
4663
4664 \f
4665 /*
4666 * Postscript tag functions
4667 * Just look for lines where the first character is '/'
4668 * Also look at "defineps" for PSWrap
4669 * Ideas by:
4670 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4671 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4672 */
4673 static void
4674 Postscript_functions (inf)
4675 FILE *inf;
4676 {
4677 register char *bp, *ep;
4678
4679 LOOP_ON_INPUT_LINES (inf, lb, bp)
4680 {
4681 if (bp[0] == '/')
4682 {
4683 for (ep = bp+1;
4684 *ep != '\0' && *ep != ' ' && *ep != '{';
4685 ep++)
4686 continue;
4687 pfnote (savenstr (bp, ep-bp), TRUE,
4688 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4689 }
4690 else if (LOOKING_AT (bp, "defineps"))
4691 get_tag (bp);
4692 }
4693 }
4694
4695 \f
4696 /*
4697 * Scheme tag functions
4698 * look for (def... xyzzy
4699 * (def... (xyzzy
4700 * (def ... ((...(xyzzy ....
4701 * (set! xyzzy
4702 * Original code by Ken Haase (1985?)
4703 */
4704
4705 static void
4706 Scheme_functions (inf)
4707 FILE *inf;
4708 {
4709 register char *bp;
4710
4711 LOOP_ON_INPUT_LINES (inf, lb, bp)
4712 {
4713 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4714 {
4715 bp = skip_non_spaces (bp+4);
4716 /* Skip over open parens and white space */
4717 while (notinname (*bp))
4718 bp++;
4719 get_tag (bp);
4720 }
4721 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4722 get_tag (bp);
4723 }
4724 }
4725
4726 \f
4727 /* Find tags in TeX and LaTeX input files. */
4728
4729 /* TEX_toktab is a table of TeX control sequences that define tags.
4730 * Each entry records one such control sequence.
4731 *
4732 * Original code from who knows whom.
4733 * Ideas by:
4734 * Stefan Monnier (2002)
4735 */
4736
4737 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4738
4739 /* Default set of control sequences to put into TEX_toktab.
4740 The value of environment var TEXTAGS is prepended to this. */
4741 static char *TEX_defenv = "\
4742 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4743 :part:appendix:entry:index:def\
4744 :newcommand:renewcommand:newenvironment:renewenvironment";
4745
4746 static void TEX_mode __P((FILE *));
4747 static void TEX_decode_env __P((char *, char *));
4748
4749 static char TEX_esc = '\\';
4750 static char TEX_opgrp = '{';
4751 static char TEX_clgrp = '}';
4752
4753 /*
4754 * TeX/LaTeX scanning loop.
4755 */
4756 static void
4757 TeX_commands (inf)
4758 FILE *inf;
4759 {
4760 char *cp;
4761 linebuffer *key;
4762
4763 /* Select either \ or ! as escape character. */
4764 TEX_mode (inf);
4765
4766 /* Initialize token table once from environment. */
4767 if (TEX_toktab == NULL)
4768 TEX_decode_env ("TEXTAGS", TEX_defenv);
4769
4770 LOOP_ON_INPUT_LINES (inf, lb, cp)
4771 {
4772 /* Look at each TEX keyword in line. */
4773 for (;;)
4774 {
4775 /* Look for a TEX escape. */
4776 while (*cp++ != TEX_esc)
4777 if (cp[-1] == '\0' || cp[-1] == '%')
4778 goto tex_next_line;
4779
4780 for (key = TEX_toktab; key->buffer != NULL; key++)
4781 if (strneq (cp, key->buffer, key->len))
4782 {
4783 register char *p;
4784 char *name;
4785 int linelen;
4786 bool opgrp = FALSE;
4787
4788 cp = skip_spaces (cp + key->len);
4789 if (*cp == TEX_opgrp)
4790 {
4791 opgrp = TRUE;
4792 cp++;
4793 }
4794 for (p = cp;
4795 (!iswhite (*p) && *p != '#' &&
4796 *p != TEX_opgrp && *p != TEX_clgrp);
4797 p++)
4798 continue;
4799 name = savenstr (cp, p-cp);
4800 linelen = lb.len;
4801 if (!opgrp || *p == TEX_clgrp)
4802 {
4803 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
4804 *p++;
4805 linelen = p - lb.buffer + 1;
4806 }
4807 pfnote (name, TRUE, lb.buffer, linelen, lineno, linecharno);
4808 goto tex_next_line; /* We only tag a line once */
4809 }
4810 }
4811 tex_next_line:
4812 ;
4813 }
4814 }
4815
4816 #define TEX_LESC '\\'
4817 #define TEX_SESC '!'
4818
4819 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4820 chars accordingly. */
4821 static void
4822 TEX_mode (inf)
4823 FILE *inf;
4824 {
4825 int c;
4826
4827 while ((c = getc (inf)) != EOF)
4828 {
4829 /* Skip to next line if we hit the TeX comment char. */
4830 if (c == '%')
4831 while (c != '\n')
4832 c = getc (inf);
4833 else if (c == TEX_LESC || c == TEX_SESC )
4834 break;
4835 }
4836
4837 if (c == TEX_LESC)
4838 {
4839 TEX_esc = TEX_LESC;
4840 TEX_opgrp = '{';
4841 TEX_clgrp = '}';
4842 }
4843 else
4844 {
4845 TEX_esc = TEX_SESC;
4846 TEX_opgrp = '<';
4847 TEX_clgrp = '>';
4848 }
4849 /* If the input file is compressed, inf is a pipe, and rewind may fail.
4850 No attempt is made to correct the situation. */
4851 rewind (inf);
4852 }
4853
4854 /* Read environment and prepend it to the default string.
4855 Build token table. */
4856 static void
4857 TEX_decode_env (evarname, defenv)
4858 char *evarname;
4859 char *defenv;
4860 {
4861 register char *env, *p;
4862 int i, len;
4863
4864 /* Append default string to environment. */
4865 env = getenv (evarname);
4866 if (!env)
4867 env = defenv;
4868 else
4869 {
4870 char *oldenv = env;
4871 env = concat (oldenv, defenv, "");
4872 }
4873
4874 /* Allocate a token table */
4875 for (len = 1, p = env; p;)
4876 if ((p = etags_strchr (p, ':')) && *++p != '\0')
4877 len++;
4878 TEX_toktab = xnew (len, linebuffer);
4879
4880 /* Unpack environment string into token table. Be careful about */
4881 /* zero-length strings (leading ':', "::" and trailing ':') */
4882 for (i = 0; *env != '\0';)
4883 {
4884 p = etags_strchr (env, ':');
4885 if (!p) /* End of environment string. */
4886 p = env + strlen (env);
4887 if (p - env > 0)
4888 { /* Only non-zero strings. */
4889 TEX_toktab[i].buffer = savenstr (env, p - env);
4890 TEX_toktab[i].len = p - env;
4891 i++;
4892 }
4893 if (*p)
4894 env = p + 1;
4895 else
4896 {
4897 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
4898 TEX_toktab[i].len = 0;
4899 break;
4900 }
4901 }
4902 }
4903
4904 \f
4905 /* Texinfo support. Dave Love, Mar. 2000. */
4906 static void
4907 Texinfo_nodes (inf)
4908 FILE * inf;
4909 {
4910 char *cp, *start;
4911 LOOP_ON_INPUT_LINES (inf, lb, cp)
4912 if (LOOKING_AT (cp, "@node"))
4913 {
4914 start = cp;
4915 while (*cp != '\0' && *cp != ',')
4916 cp++;
4917 pfnote (savenstr (start, cp - start), TRUE,
4918 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4919 }
4920 }
4921
4922 \f
4923 /*
4924 * Prolog support
4925 *
4926 * Assumes that the predicate or rule starts at column 0.
4927 * Only the first clause of a predicate or rule is added.
4928 * Original code by Sunichirou Sugou (1989)
4929 * Rewritten by Anders Lindgren (1996)
4930 */
4931 static int prolog_pr __P((char *, char *));
4932 static void prolog_skip_comment __P((linebuffer *, FILE *));
4933 static int prolog_atom __P((char *, int));
4934
4935 static void
4936 Prolog_functions (inf)
4937 FILE *inf;
4938 {
4939 char *cp, *last;
4940 int len;
4941 int allocated;
4942
4943 allocated = 0;
4944 len = 0;
4945 last = NULL;
4946
4947 LOOP_ON_INPUT_LINES (inf, lb, cp)
4948 {
4949 if (cp[0] == '\0') /* Empty line */
4950 continue;
4951 else if (iswhite (cp[0])) /* Not a predicate */
4952 continue;
4953 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
4954 prolog_skip_comment (&lb, inf);
4955 else if ((len = prolog_pr (cp, last)) > 0)
4956 {
4957 /* Predicate or rule. Store the function name so that we
4958 only generate a tag for the first clause. */
4959 if (last == NULL)
4960 last = xnew(len + 1, char);
4961 else if (len + 1 > allocated)
4962 xrnew (last, len + 1, char);
4963 allocated = len + 1;
4964 strncpy (last, cp, len);
4965 last[len] = '\0';
4966 }
4967 }
4968 }
4969
4970
4971 static void
4972 prolog_skip_comment (plb, inf)
4973 linebuffer *plb;
4974 FILE *inf;
4975 {
4976 char *cp;
4977
4978 do
4979 {
4980 for (cp = plb->buffer; *cp != '\0'; cp++)
4981 if (cp[0] == '*' && cp[1] == '/')
4982 return;
4983 readline (plb, inf);
4984 }
4985 while (!feof(inf));
4986 }
4987
4988 /*
4989 * A predicate or rule definition is added if it matches:
4990 * <beginning of line><Prolog Atom><whitespace>(
4991 * or <beginning of line><Prolog Atom><whitespace>:-
4992 *
4993 * It is added to the tags database if it doesn't match the
4994 * name of the previous clause header.
4995 *
4996 * Return the size of the name of the predicate or rule, or 0 if no
4997 * header was found.
4998 */
4999 static int
5000 prolog_pr (s, last)
5001 char *s;
5002 char *last; /* Name of last clause. */
5003 {
5004 int pos;
5005 int len;
5006
5007 pos = prolog_atom (s, 0);
5008 if (pos < 1)
5009 return 0;
5010
5011 len = pos;
5012 pos = skip_spaces (s + pos) - s;
5013
5014 if ((s[pos] == '.'
5015 || (s[pos] == '(' && (pos += 1))
5016 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5017 && (last == NULL /* save only the first clause */
5018 || len != strlen (last)
5019 || !strneq (s, last, len)))
5020 {
5021 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
5022 return len;
5023 }
5024 else
5025 return 0;
5026 }
5027
5028 /*
5029 * Consume a Prolog atom.
5030 * Return the number of bytes consumed, or -1 if there was an error.
5031 *
5032 * A prolog atom, in this context, could be one of:
5033 * - An alphanumeric sequence, starting with a lower case letter.
5034 * - A quoted arbitrary string. Single quotes can escape themselves.
5035 * Backslash quotes everything.
5036 */
5037 static int
5038 prolog_atom (s, pos)
5039 char *s;
5040 int pos;
5041 {
5042 int origpos;
5043
5044 origpos = pos;
5045
5046 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5047 {
5048 /* The atom is unquoted. */
5049 pos++;
5050 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5051 {
5052 pos++;
5053 }
5054 return pos - origpos;
5055 }
5056 else if (s[pos] == '\'')
5057 {
5058 pos++;
5059
5060 for (;;)
5061 {
5062 if (s[pos] == '\'')
5063 {
5064 pos++;
5065 if (s[pos] != '\'')
5066 break;
5067 pos++; /* A double quote */
5068 }
5069 else if (s[pos] == '\0')
5070 /* Multiline quoted atoms are ignored. */
5071 return -1;
5072 else if (s[pos] == '\\')
5073 {
5074 if (s[pos+1] == '\0')
5075 return -1;
5076 pos += 2;
5077 }
5078 else
5079 pos++;
5080 }
5081 return pos - origpos;
5082 }
5083 else
5084 return -1;
5085 }
5086
5087 \f
5088 /*
5089 * Support for Erlang
5090 *
5091 * Generates tags for functions, defines, and records.
5092 * Assumes that Erlang functions start at column 0.
5093 * Original code by Anders Lindgren (1996)
5094 */
5095 static int erlang_func __P((char *, char *));
5096 static void erlang_attribute __P((char *));
5097 static int erlang_atom __P((char *));
5098
5099 static void
5100 Erlang_functions (inf)
5101 FILE *inf;
5102 {
5103 char *cp, *last;
5104 int len;
5105 int allocated;
5106
5107 allocated = 0;
5108 len = 0;
5109 last = NULL;
5110
5111 LOOP_ON_INPUT_LINES (inf, lb, cp)
5112 {
5113 if (cp[0] == '\0') /* Empty line */
5114 continue;
5115 else if (iswhite (cp[0])) /* Not function nor attribute */
5116 continue;
5117 else if (cp[0] == '%') /* comment */
5118 continue;
5119 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5120 continue;
5121 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5122 {
5123 erlang_attribute (cp);
5124 last = NULL;
5125 }
5126 else if ((len = erlang_func (cp, last)) > 0)
5127 {
5128 /*
5129 * Function. Store the function name so that we only
5130 * generates a tag for the first clause.
5131 */
5132 if (last == NULL)
5133 last = xnew (len + 1, char);
5134 else if (len + 1 > allocated)
5135 xrnew (last, len + 1, char);
5136 allocated = len + 1;
5137 strncpy (last, cp, len);
5138 last[len] = '\0';
5139 }
5140 }
5141 }
5142
5143
5144 /*
5145 * A function definition is added if it matches:
5146 * <beginning of line><Erlang Atom><whitespace>(
5147 *
5148 * It is added to the tags database if it doesn't match the
5149 * name of the previous clause header.
5150 *
5151 * Return the size of the name of the function, or 0 if no function
5152 * was found.
5153 */
5154 static int
5155 erlang_func (s, last)
5156 char *s;
5157 char *last; /* Name of last clause. */
5158 {
5159 int pos;
5160 int len;
5161
5162 pos = erlang_atom (s);
5163 if (pos < 1)
5164 return 0;
5165
5166 len = pos;
5167 pos = skip_spaces (s + pos) - s;
5168
5169 /* Save only the first clause. */
5170 if (s[pos++] == '('
5171 && (last == NULL
5172 || len != (int)strlen (last)
5173 || !strneq (s, last, len)))
5174 {
5175 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
5176 return len;
5177 }
5178
5179 return 0;
5180 }
5181
5182
5183 /*
5184 * Handle attributes. Currently, tags are generated for defines
5185 * and records.
5186 *
5187 * They are on the form:
5188 * -define(foo, bar).
5189 * -define(Foo(M, N), M+N).
5190 * -record(graph, {vtab = notable, cyclic = true}).
5191 */
5192 static void
5193 erlang_attribute (s)
5194 char *s;
5195 {
5196 char *cp = s;
5197
5198 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5199 && *cp++ == '(')
5200 {
5201 int len = erlang_atom (skip_spaces (cp));
5202 if (len > 0)
5203 pfnote (savenstr (cp, len), TRUE,
5204 s, cp + len - s, lineno, linecharno);
5205 }
5206 return;
5207 }
5208
5209
5210 /*
5211 * Consume an Erlang atom (or variable).
5212 * Return the number of bytes consumed, or -1 if there was an error.
5213 */
5214 static int
5215 erlang_atom (s)
5216 char *s;
5217 {
5218 int pos = 0;
5219
5220 if (ISALPHA (s[pos]) || s[pos] == '_')
5221 {
5222 /* The atom is unquoted. */
5223 do
5224 pos++;
5225 while (ISALNUM (s[pos]) || s[pos] == '_');
5226 }
5227 else if (s[pos] == '\'')
5228 {
5229 for (pos++; s[pos] != '\''; pos++)
5230 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5231 || (s[pos] == '\\' && s[++pos] == '\0'))
5232 return 0;
5233 pos++;
5234 }
5235
5236 return pos;
5237 }
5238
5239 \f
5240 #ifdef ETAGS_REGEXPS
5241
5242 static char *scan_separators __P((char *));
5243 static void add_regex __P((char *, language *));
5244 static char *substitute __P((char *, char *, struct re_registers *));
5245
5246 /*
5247 * Take a string like "/blah/" and turn it into "blah", verifying
5248 * that the first and last characters are the same, and handling
5249 * quoted separator characters. Actually, stops on the occurrence of
5250 * an unquoted separator. Also process \t, \n, etc. and turn into
5251 * appropriate characters. Works in place. Null terminates name string.
5252 * Returns pointer to terminating separator, or NULL for
5253 * unterminated regexps.
5254 */
5255 static char *
5256 scan_separators (name)
5257 char *name;
5258 {
5259 char sep = name[0];
5260 char *copyto = name;
5261 bool quoted = FALSE;
5262
5263 for (++name; *name != '\0'; ++name)
5264 {
5265 if (quoted)
5266 {
5267 switch (*name)
5268 {
5269 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5270 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5271 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5272 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5273 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5274 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5275 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5276 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5277 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5278 default:
5279 if (*name == sep)
5280 *copyto++ = sep;
5281 else
5282 {
5283 /* Something else is quoted, so preserve the quote. */
5284 *copyto++ = '\\';
5285 *copyto++ = *name;
5286 }
5287 break;
5288 }
5289 quoted = FALSE;
5290 }
5291 else if (*name == '\\')
5292 quoted = TRUE;
5293 else if (*name == sep)
5294 break;
5295 else
5296 *copyto++ = *name;
5297 }
5298 if (*name != sep)
5299 name = NULL; /* signal unterminated regexp */
5300
5301 /* Terminate copied string. */
5302 *copyto = '\0';
5303 return name;
5304 }
5305
5306 /* Look at the argument of --regex or --no-regex and do the right
5307 thing. Same for each line of a regexp file. */
5308 static void
5309 analyse_regex (regex_arg)
5310 char *regex_arg;
5311 {
5312 if (regex_arg == NULL)
5313 {
5314 free_patterns (); /* --no-regex: remove existing regexps */
5315 return;
5316 }
5317
5318 /* A real --regexp option or a line in a regexp file. */
5319 switch (regex_arg[0])
5320 {
5321 /* Comments in regexp file or null arg to --regex. */
5322 case '\0':
5323 case ' ':
5324 case '\t':
5325 break;
5326
5327 /* Read a regex file. This is recursive and may result in a
5328 loop, which will stop when the file descriptors are exhausted. */
5329 case '@':
5330 {
5331 FILE *regexfp;
5332 linebuffer regexbuf;
5333 char *regexfile = regex_arg + 1;
5334
5335 /* regexfile is a file containing regexps, one per line. */
5336 regexfp = fopen (regexfile, "r");
5337 if (regexfp == NULL)
5338 {
5339 pfatal (regexfile);
5340 return;
5341 }
5342 initbuffer (&regexbuf);
5343 while (readline_internal (&regexbuf, regexfp) > 0)
5344 analyse_regex (regexbuf.buffer);
5345 free (regexbuf.buffer);
5346 fclose (regexfp);
5347 }
5348 break;
5349
5350 /* Regexp to be used for a specific language only. */
5351 case '{':
5352 {
5353 language *lang;
5354 char *lang_name = regex_arg + 1;
5355 char *cp;
5356
5357 for (cp = lang_name; *cp != '}'; cp++)
5358 if (*cp == '\0')
5359 {
5360 error ("unterminated language name in regex: %s", regex_arg);
5361 return;
5362 }
5363 *cp++ = '\0';
5364 lang = get_language_from_langname (lang_name);
5365 if (lang == NULL)
5366 return;
5367 add_regex (cp, lang);
5368 }
5369 break;
5370
5371 /* Regexp to be used for any language. */
5372 default:
5373 add_regex (regex_arg, NULL);
5374 break;
5375 }
5376 }
5377
5378 /* Turn a name, which is an ed-style (but Emacs syntax) regular
5379 expression, into a real regular expression by compiling it. */
5380 static void
5381 add_regex (regexp_pattern, lang)
5382 char *regexp_pattern;
5383 language *lang;
5384 {
5385 static struct re_pattern_buffer zeropattern;
5386 char sep, *pat, *name, *modifiers;
5387 const char *err;
5388 struct re_pattern_buffer *patbuf;
5389 pattern *pp;
5390 bool ignore_case, multi_line, single_line;
5391
5392
5393 if (strlen(regexp_pattern) < 3)
5394 {
5395 error ("null regexp", (char *)NULL);
5396 return;
5397 }
5398 sep = regexp_pattern[0];
5399 name = scan_separators (regexp_pattern);
5400 if (name == NULL)
5401 {
5402 error ("%s: unterminated regexp", regexp_pattern);
5403 return;
5404 }
5405 if (name[1] == sep)
5406 {
5407 error ("null name for regexp \"%s\"", regexp_pattern);
5408 return;
5409 }
5410 modifiers = scan_separators (name);
5411 if (modifiers == NULL) /* no terminating separator --> no name */
5412 {
5413 modifiers = name;
5414 name = "";
5415 }
5416 else
5417 modifiers += 1; /* skip separator */
5418
5419 /* Parse regex modifiers. */
5420 ignore_case = FALSE; /* case is significant */
5421 multi_line = FALSE; /* matches are done one line at a time */
5422 single_line = FALSE; /* dot does not match newline */
5423 for (; modifiers[0] != '\0'; modifiers++)
5424 switch (modifiers[0])
5425 {
5426 case 'i':
5427 ignore_case = TRUE;
5428 break;
5429 case 's':
5430 single_line = TRUE;
5431 /* FALLTHRU */
5432 case 'm':
5433 multi_line = TRUE;
5434 need_filebuf = TRUE;
5435 break;
5436 default:
5437 {
5438 char wrongmod [2];
5439 wrongmod[0] = modifiers[0];
5440 wrongmod[1] = '\0';
5441 error ("invalid regexp modifier `%s', ignoring", wrongmod);
5442 }
5443 break;
5444 }
5445
5446 patbuf = xnew (1, struct re_pattern_buffer);
5447 *patbuf = zeropattern;
5448 if (ignore_case)
5449 {
5450 static char lc_trans[CHARS];
5451 int i;
5452 for (i = 0; i < CHARS; i++)
5453 lc_trans[i] = lowcase (i);
5454 patbuf->translate = lc_trans; /* translation table to fold case */
5455 }
5456
5457 if (multi_line)
5458 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5459 else
5460 pat = regexp_pattern;
5461
5462 if (single_line)
5463 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5464 else
5465 re_set_syntax (RE_SYNTAX_EMACS);
5466
5467 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5468 if (multi_line)
5469 free (pat);
5470 if (err != NULL)
5471 {
5472 error ("%s while compiling pattern", err);
5473 return;
5474 }
5475
5476 pp = p_head;
5477 p_head = xnew (1, pattern);
5478 p_head->regex = savestr (regexp_pattern);
5479 p_head->p_next = pp;
5480 p_head->lang = lang;
5481 p_head->pat = patbuf;
5482 p_head->name_pattern = savestr (name);
5483 p_head->error_signaled = FALSE;
5484 p_head->ignore_case = ignore_case;
5485 p_head->multi_line = multi_line;
5486 }
5487
5488 /*
5489 * Do the substitutions indicated by the regular expression and
5490 * arguments.
5491 */
5492 static char *
5493 substitute (in, out, regs)
5494 char *in, *out;
5495 struct re_registers *regs;
5496 {
5497 char *result, *t;
5498 int size, dig, diglen;
5499
5500 result = NULL;
5501 size = strlen (out);
5502
5503 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5504 if (out[size - 1] == '\\')
5505 fatal ("pattern error in \"%s\"", out);
5506 for (t = etags_strchr (out, '\\');
5507 t != NULL;
5508 t = etags_strchr (t + 2, '\\'))
5509 if (ISDIGIT (t[1]))
5510 {
5511 dig = t[1] - '0';
5512 diglen = regs->end[dig] - regs->start[dig];
5513 size += diglen - 2;
5514 }
5515 else
5516 size -= 1;
5517
5518 /* Allocate space and do the substitutions. */
5519 result = xnew (size + 1, char);
5520
5521 for (t = result; *out != '\0'; out++)
5522 if (*out == '\\' && ISDIGIT (*++out))
5523 {
5524 dig = *out - '0';
5525 diglen = regs->end[dig] - regs->start[dig];
5526 strncpy (t, in + regs->start[dig], diglen);
5527 t += diglen;
5528 }
5529 else
5530 *t++ = *out;
5531 *t = '\0';
5532
5533 assert (t <= result + size && t - result == (int)strlen (result));
5534
5535 return result;
5536 }
5537
5538 /* Deallocate all patterns. */
5539 static void
5540 free_patterns ()
5541 {
5542 pattern *pp;
5543 while (p_head != NULL)
5544 {
5545 pp = p_head->p_next;
5546 free (p_head->regex);
5547 free (p_head->name_pattern);
5548 free (p_head);
5549 p_head = pp;
5550 }
5551 return;
5552 }
5553
5554 /*
5555 * Reads the whole file as a single string from `filebuf' and looks for
5556 * multi-line regular expressions, creating tags on matches.
5557 * readline already dealt with normal regexps.
5558 *
5559 * Idea by Ben Wing <ben@666.com> (2002).
5560 */
5561 static void
5562 regex_tag_multiline ()
5563 {
5564 char *buffer = filebuf.buffer;
5565 pattern *pp;
5566
5567 for (pp = p_head; pp != NULL; pp = pp->p_next)
5568 {
5569 int match = 0;
5570
5571 if (!pp->multi_line)
5572 continue; /* skip normal regexps */
5573
5574 /* Generic initialisations before parsing file from memory. */
5575 lineno = 1; /* reset global line number */
5576 charno = 0; /* reset global char number */
5577 linecharno = 0; /* reset global char number of line start */
5578
5579 /* Only use generic regexps or those for the current language. */
5580 if (pp->lang != NULL && pp->lang != curfdp->lang)
5581 continue;
5582
5583 while (match >= 0 && match < filebuf.len)
5584 {
5585 match = re_search (pp->pat, buffer, filebuf.len, charno,
5586 filebuf.len - match, &pp->regs);
5587 switch (match)
5588 {
5589 case -2:
5590 /* Some error. */
5591 if (!pp->error_signaled)
5592 {
5593 error ("regexp stack overflow while matching \"%s\"",
5594 pp->regex);
5595 pp->error_signaled = TRUE;
5596 }
5597 break;
5598 case -1:
5599 /* No match. */
5600 break;
5601 default:
5602 if (match == pp->regs.end[0])
5603 {
5604 if (!pp->error_signaled)
5605 {
5606 error ("regexp matches the empty string: \"%s\"",
5607 pp->regex);
5608 pp->error_signaled = TRUE;
5609 }
5610 match = -3; /* exit from while loop */
5611 break;
5612 }
5613
5614 /* Match occurred. Construct a tag. */
5615 while (charno < pp->regs.end[0])
5616 if (buffer[charno++] == '\n')
5617 lineno++, linecharno = charno;
5618 if (pp->name_pattern[0] != '\0')
5619 {
5620 /* Make a named tag. */
5621 char *name = substitute (buffer,
5622 pp->name_pattern, &pp->regs);
5623 if (name != NULL)
5624 pfnote (name, TRUE, buffer + linecharno,
5625 charno - linecharno + 1, lineno, linecharno);
5626 }
5627 else
5628 {
5629 /* Make an unnamed tag. */
5630 pfnote ((char *)NULL, TRUE, buffer + linecharno,
5631 charno - linecharno + 1, lineno, linecharno);
5632 }
5633 break;
5634 }
5635 }
5636 }
5637 }
5638
5639 #endif /* ETAGS_REGEXPS */
5640
5641 \f
5642 static bool
5643 nocase_tail (cp)
5644 char *cp;
5645 {
5646 register int len = 0;
5647
5648 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5649 cp++, len++;
5650 if (*cp == '\0' && !intoken (dbp[len]))
5651 {
5652 dbp += len;
5653 return TRUE;
5654 }
5655 return FALSE;
5656 }
5657
5658 static char *
5659 get_tag (bp)
5660 register char *bp;
5661 {
5662 register char *cp, *name;
5663
5664 if (*bp == '\0')
5665 return NULL;
5666 /* Go till you get to white space or a syntactic break */
5667 for (cp = bp + 1; !notinname (*cp); cp++)
5668 continue;
5669 name = savenstr (bp, cp-bp);
5670 pfnote (name, TRUE,
5671 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5672 return name;
5673 }
5674
5675 /* Initialize a linebuffer for use */
5676 static void
5677 initbuffer (lbp)
5678 linebuffer *lbp;
5679 {
5680 lbp->size = (DEBUG) ? 3 : 200;
5681 lbp->buffer = xnew (lbp->size, char);
5682 lbp->buffer[0] = '\0';
5683 lbp->len = 0;
5684 }
5685
5686 /*
5687 * Read a line of text from `stream' into `lbp', excluding the
5688 * newline or CR-NL, if any. Return the number of characters read from
5689 * `stream', which is the length of the line including the newline.
5690 *
5691 * On DOS or Windows we do not count the CR character, if any before the
5692 * NL, in the returned length; this mirrors the behavior of Emacs on those
5693 * platforms (for text files, it translates CR-NL to NL as it reads in the
5694 * file).
5695 *
5696 * If multi-line regular expressions are requested, each line read is
5697 * appended to `filebuf'.
5698 */
5699 static long
5700 readline_internal (lbp, stream)
5701 linebuffer *lbp;
5702 register FILE *stream;
5703 {
5704 char *buffer = lbp->buffer;
5705 register char *p = lbp->buffer;
5706 register char *pend;
5707 int chars_deleted;
5708
5709 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
5710
5711 for (;;)
5712 {
5713 register int c = getc (stream);
5714 if (p == pend)
5715 {
5716 /* We're at the end of linebuffer: expand it. */
5717 lbp->size *= 2;
5718 xrnew (buffer, lbp->size, char);
5719 p += buffer - lbp->buffer;
5720 pend = buffer + lbp->size;
5721 lbp->buffer = buffer;
5722 }
5723 if (c == EOF)
5724 {
5725 *p = '\0';
5726 chars_deleted = 0;
5727 break;
5728 }
5729 if (c == '\n')
5730 {
5731 if (p > buffer && p[-1] == '\r')
5732 {
5733 p -= 1;
5734 #ifdef DOS_NT
5735 /* Assume CRLF->LF translation will be performed by Emacs
5736 when loading this file, so CRs won't appear in the buffer.
5737 It would be cleaner to compensate within Emacs;
5738 however, Emacs does not know how many CRs were deleted
5739 before any given point in the file. */
5740 chars_deleted = 1;
5741 #else
5742 chars_deleted = 2;
5743 #endif
5744 }
5745 else
5746 {
5747 chars_deleted = 1;
5748 }
5749 *p = '\0';
5750 break;
5751 }
5752 *p++ = c;
5753 }
5754 lbp->len = p - buffer;
5755
5756 if (need_filebuf /* we need filebuf for multi-line regexps */
5757 && chars_deleted > 0) /* not at EOF */
5758 {
5759 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
5760 {
5761 /* Expand filebuf. */
5762 filebuf.size *= 2;
5763 xrnew (filebuf.buffer, filebuf.size, char);
5764 }
5765 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
5766 filebuf.len += lbp->len;
5767 filebuf.buffer[filebuf.len++] = '\n';
5768 filebuf.buffer[filebuf.len] = '\0';
5769 }
5770
5771 return lbp->len + chars_deleted;
5772 }
5773
5774 /*
5775 * Like readline_internal, above, but in addition try to match the
5776 * input line against relevant regular expressions and manage #line
5777 * directives.
5778 */
5779 static void
5780 readline (lbp, stream)
5781 linebuffer *lbp;
5782 FILE *stream;
5783 {
5784 long result;
5785
5786 linecharno = charno; /* update global char number of line start */
5787 result = readline_internal (lbp, stream); /* read line */
5788 lineno += 1; /* increment global line number */
5789 charno += result; /* increment global char number */
5790
5791 /* Honour #line directives. */
5792 if (!no_line_directive)
5793 {
5794 static bool discard_until_line_directive;
5795
5796 /* Check whether this is a #line directive. */
5797 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
5798 {
5799 int start, lno;
5800
5801 if (DEBUG) start = 0; /* shut up the compiler */
5802 if (sscanf (lbp->buffer, "#line %d \"%n", &lno, &start) == 1)
5803 {
5804 char *endp = lbp->buffer + start;
5805
5806 assert (start > 0);
5807 while ((endp = etags_strchr (endp, '"')) != NULL
5808 && endp[-1] == '\\')
5809 endp++;
5810 if (endp != NULL)
5811 /* Ok, this is a real #line directive. Let's deal with it. */
5812 {
5813 char *taggedabsname; /* absolute name of original file */
5814 char *taggedfname; /* name of original file as given */
5815 char *name; /* temp var */
5816
5817 discard_until_line_directive = FALSE; /* found it */
5818 name = lbp->buffer + start;
5819 *endp = '\0';
5820 canonicalize_filename (name); /* for DOS */
5821 taggedabsname = absolute_filename (name, curfdp->infabsdir);
5822 if (filename_is_absolute (name)
5823 || filename_is_absolute (curfdp->infname))
5824 taggedfname = savestr (taggedabsname);
5825 else
5826 taggedfname = relative_filename (taggedabsname,tagfiledir);
5827
5828 if (streq (curfdp->taggedfname, taggedfname))
5829 /* The #line directive is only a line number change. We
5830 deal with this afterwards. */
5831 free (taggedfname);
5832 else
5833 /* The tags following this #line directive should be
5834 attributed to taggedfname. In order to do this, set
5835 curfdp accordingly. */
5836 {
5837 fdesc *fdp; /* file description pointer */
5838
5839 /* Go look for a file description already set up for the
5840 file indicated in the #line directive. If there is
5841 one, use it from now until the next #line
5842 directive. */
5843 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
5844 if (streq (fdp->infname, curfdp->infname)
5845 && streq (fdp->taggedfname, taggedfname))
5846 /* If we remove the second test above (after the &&)
5847 then all entries pertaining to the same file are
5848 coalesced in the tags file. If we use it, then
5849 entries pertaining to the same file but generated
5850 from different files (via #line directives) will
5851 go into separate sections in the tags file. These
5852 alternatives look equivalent. The first one
5853 destroys some apparently useless information. */
5854 {
5855 curfdp = fdp;
5856 free (taggedfname);
5857 break;
5858 }
5859 /* Else, if we already tagged the real file, skip all
5860 input lines until the next #line directive. */
5861 if (fdp == NULL) /* not found */
5862 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
5863 if (streq (fdp->infabsname, taggedabsname))
5864 {
5865 discard_until_line_directive = TRUE;
5866 free (taggedfname);
5867 break;
5868 }
5869 /* Else create a new file description and use that from
5870 now on, until the next #line directive. */
5871 if (fdp == NULL) /* not found */
5872 {
5873 fdp = fdhead;
5874 fdhead = xnew (1, fdesc);
5875 *fdhead = *curfdp; /* copy curr. file description */
5876 fdhead->next = fdp;
5877 fdhead->infname = savestr (curfdp->infname);
5878 fdhead->infabsname = savestr (curfdp->infabsname);
5879 fdhead->infabsdir = savestr (curfdp->infabsdir);
5880 fdhead->taggedfname = taggedfname;
5881 fdhead->usecharno = FALSE;
5882 curfdp = fdhead;
5883 }
5884 }
5885 free (taggedabsname);
5886 lineno = lno - 1;
5887 readline (lbp, stream);
5888 return;
5889 } /* if a real #line directive */
5890 } /* if #line is followed by a a number */
5891 } /* if line begins with "#line " */
5892
5893 /* If we are here, no #line directive was found. */
5894 if (discard_until_line_directive)
5895 {
5896 if (result > 0)
5897 {
5898 /* Do a tail recursion on ourselves, thus discarding the contents
5899 of the line buffer. */
5900 readline (lbp, stream);
5901 return;
5902 }
5903 /* End of file. */
5904 discard_until_line_directive = FALSE;
5905 return;
5906 }
5907 } /* if #line directives should be considered */
5908
5909 #ifdef ETAGS_REGEXPS
5910 {
5911 int match;
5912 pattern *pp;
5913
5914 /* Match against relevant patterns. */
5915 if (lbp->len > 0)
5916 for (pp = p_head; pp != NULL; pp = pp->p_next)
5917 {
5918 /* Only use generic regexps or those for the current language.
5919 Also do not use multiline regexps, which is the job of
5920 regex_tag_multiline. */
5921 if ((pp->lang != NULL && pp->lang != fdhead->lang)
5922 || pp->multi_line)
5923 continue;
5924
5925 match = re_match (pp->pat, lbp->buffer, lbp->len, 0, &pp->regs);
5926 switch (match)
5927 {
5928 case -2:
5929 /* Some error. */
5930 if (!pp->error_signaled)
5931 {
5932 error ("regexp stack overflow while matching \"%s\"",
5933 pp->regex);
5934 pp->error_signaled = TRUE;
5935 }
5936 break;
5937 case -1:
5938 /* No match. */
5939 break;
5940 case 0:
5941 /* Empty string matched. */
5942 if (!pp->error_signaled)
5943 {
5944 error ("regexp matches the empty string: \"%s\"",
5945 pp->regex);
5946 pp->error_signaled = TRUE;
5947 }
5948 break;
5949 default:
5950 /* Match occurred. Construct a tag. */
5951 if (pp->name_pattern[0] != '\0')
5952 {
5953 /* Make a named tag. */
5954 char *name = substitute (lbp->buffer,
5955 pp->name_pattern, &pp->regs);
5956 if (name != NULL)
5957 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
5958 }
5959 else
5960 {
5961 /* Make an unnamed tag. */
5962 pfnote ((char *)NULL, TRUE,
5963 lbp->buffer, match, lineno, linecharno);
5964 }
5965 break;
5966 }
5967 }
5968 }
5969 #endif /* ETAGS_REGEXPS */
5970 }
5971
5972 \f
5973 /*
5974 * Return a pointer to a space of size strlen(cp)+1 allocated
5975 * with xnew where the string CP has been copied.
5976 */
5977 static char *
5978 savestr (cp)
5979 char *cp;
5980 {
5981 return savenstr (cp, strlen (cp));
5982 }
5983
5984 /*
5985 * Return a pointer to a space of size LEN+1 allocated with xnew where
5986 * the string CP has been copied for at most the first LEN characters.
5987 */
5988 static char *
5989 savenstr (cp, len)
5990 char *cp;
5991 int len;
5992 {
5993 register char *dp;
5994
5995 dp = xnew (len + 1, char);
5996 strncpy (dp, cp, len);
5997 dp[len] = '\0';
5998 return dp;
5999 }
6000
6001 /*
6002 * Return the ptr in sp at which the character c last
6003 * appears; NULL if not found
6004 *
6005 * Identical to POSIX strrchr, included for portability.
6006 */
6007 static char *
6008 etags_strrchr (sp, c)
6009 register const char *sp;
6010 register int c;
6011 {
6012 register const char *r;
6013
6014 r = NULL;
6015 do
6016 {
6017 if (*sp == c)
6018 r = sp;
6019 } while (*sp++);
6020 return (char *)r;
6021 }
6022
6023 /*
6024 * Return the ptr in sp at which the character c first
6025 * appears; NULL if not found
6026 *
6027 * Identical to POSIX strchr, included for portability.
6028 */
6029 static char *
6030 etags_strchr (sp, c)
6031 register const char *sp;
6032 register int c;
6033 {
6034 do
6035 {
6036 if (*sp == c)
6037 return (char *)sp;
6038 } while (*sp++);
6039 return NULL;
6040 }
6041
6042 /*
6043 * Return TRUE if the two strings are equal, ignoring case for alphabetic
6044 * characters.
6045 *
6046 * Analogous to BSD's strcasecmp, included for portability.
6047 */
6048 static bool
6049 strcaseeq (s1, s2)
6050 register const char *s1;
6051 register const char *s2;
6052 {
6053 while (*s1 != '\0'
6054 && (ISALPHA (*s1) && ISALPHA (*s2)
6055 ? lowcase (*s1) == lowcase (*s2)
6056 : *s1 == *s2))
6057 s1++, s2++;
6058
6059 return (*s1 == *s2);
6060 }
6061
6062 /* Skip spaces, return new pointer. */
6063 static char *
6064 skip_spaces (cp)
6065 char *cp;
6066 {
6067 while (iswhite (*cp))
6068 cp++;
6069 return cp;
6070 }
6071
6072 /* Skip non spaces, return new pointer. */
6073 static char *
6074 skip_non_spaces (cp)
6075 char *cp;
6076 {
6077 while (*cp != '\0' && !iswhite (*cp))
6078 cp++;
6079 return cp;
6080 }
6081
6082 /* Print error message and exit. */
6083 void
6084 fatal (s1, s2)
6085 char *s1, *s2;
6086 {
6087 error (s1, s2);
6088 exit (BAD);
6089 }
6090
6091 static void
6092 pfatal (s1)
6093 char *s1;
6094 {
6095 perror (s1);
6096 exit (BAD);
6097 }
6098
6099 static void
6100 suggest_asking_for_help ()
6101 {
6102 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6103 progname,
6104 #ifdef LONG_OPTIONS
6105 "--help"
6106 #else
6107 "-h"
6108 #endif
6109 );
6110 exit (BAD);
6111 }
6112
6113 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6114 static void
6115 error (s1, s2)
6116 const char *s1, *s2;
6117 {
6118 fprintf (stderr, "%s: ", progname);
6119 fprintf (stderr, s1, s2);
6120 fprintf (stderr, "\n");
6121 }
6122
6123 /* Return a newly-allocated string whose contents
6124 concatenate those of s1, s2, s3. */
6125 static char *
6126 concat (s1, s2, s3)
6127 char *s1, *s2, *s3;
6128 {
6129 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6130 char *result = xnew (len1 + len2 + len3 + 1, char);
6131
6132 strcpy (result, s1);
6133 strcpy (result + len1, s2);
6134 strcpy (result + len1 + len2, s3);
6135 result[len1 + len2 + len3] = '\0';
6136
6137 return result;
6138 }
6139
6140 \f
6141 /* Does the same work as the system V getcwd, but does not need to
6142 guess the buffer size in advance. */
6143 static char *
6144 etags_getcwd ()
6145 {
6146 #ifdef HAVE_GETCWD
6147 int bufsize = 200;
6148 char *path = xnew (bufsize, char);
6149
6150 while (getcwd (path, bufsize) == NULL)
6151 {
6152 if (errno != ERANGE)
6153 pfatal ("getcwd");
6154 bufsize *= 2;
6155 free (path);
6156 path = xnew (bufsize, char);
6157 }
6158
6159 canonicalize_filename (path);
6160 return path;
6161
6162 #else /* not HAVE_GETCWD */
6163 #if MSDOS
6164
6165 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6166
6167 getwd (path);
6168
6169 for (p = path; *p != '\0'; p++)
6170 if (*p == '\\')
6171 *p = '/';
6172 else
6173 *p = lowcase (*p);
6174
6175 return strdup (path);
6176 #else /* not MSDOS */
6177 linebuffer path;
6178 FILE *pipe;
6179
6180 initbuffer (&path);
6181 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6182 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6183 pfatal ("pwd");
6184 pclose (pipe);
6185
6186 return path.buffer;
6187 #endif /* not MSDOS */
6188 #endif /* not HAVE_GETCWD */
6189 }
6190
6191 /* Return a newly allocated string containing the file name of FILE
6192 relative to the absolute directory DIR (which should end with a slash). */
6193 static char *
6194 relative_filename (file, dir)
6195 char *file, *dir;
6196 {
6197 char *fp, *dp, *afn, *res;
6198 int i;
6199
6200 /* Find the common root of file and dir (with a trailing slash). */
6201 afn = absolute_filename (file, cwd);
6202 fp = afn;
6203 dp = dir;
6204 while (*fp++ == *dp++)
6205 continue;
6206 fp--, dp--; /* back to the first differing char */
6207 #ifdef DOS_NT
6208 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6209 return afn;
6210 #endif
6211 do /* look at the equal chars until '/' */
6212 fp--, dp--;
6213 while (*fp != '/');
6214
6215 /* Build a sequence of "../" strings for the resulting relative file name. */
6216 i = 0;
6217 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6218 i += 1;
6219 res = xnew (3*i + strlen (fp + 1) + 1, char);
6220 res[0] = '\0';
6221 while (i-- > 0)
6222 strcat (res, "../");
6223
6224 /* Add the file name relative to the common root of file and dir. */
6225 strcat (res, fp + 1);
6226 free (afn);
6227
6228 return res;
6229 }
6230
6231 /* Return a newly allocated string containing the absolute file name
6232 of FILE given DIR (which should end with a slash). */
6233 static char *
6234 absolute_filename (file, dir)
6235 char *file, *dir;
6236 {
6237 char *slashp, *cp, *res;
6238
6239 if (filename_is_absolute (file))
6240 res = savestr (file);
6241 #ifdef DOS_NT
6242 /* We don't support non-absolute file names with a drive
6243 letter, like `d:NAME' (it's too much hassle). */
6244 else if (file[1] == ':')
6245 fatal ("%s: relative file names with drive letters not supported", file);
6246 #endif
6247 else
6248 res = concat (dir, file, "");
6249
6250 /* Delete the "/dirname/.." and "/." substrings. */
6251 slashp = etags_strchr (res, '/');
6252 while (slashp != NULL && slashp[0] != '\0')
6253 {
6254 if (slashp[1] == '.')
6255 {
6256 if (slashp[2] == '.'
6257 && (slashp[3] == '/' || slashp[3] == '\0'))
6258 {
6259 cp = slashp;
6260 do
6261 cp--;
6262 while (cp >= res && !filename_is_absolute (cp));
6263 if (cp < res)
6264 cp = slashp; /* the absolute name begins with "/.." */
6265 #ifdef DOS_NT
6266 /* Under MSDOS and NT we get `d:/NAME' as absolute
6267 file name, so the luser could say `d:/../NAME'.
6268 We silently treat this as `d:/NAME'. */
6269 else if (cp[0] != '/')
6270 cp = slashp;
6271 #endif
6272 strcpy (cp, slashp + 3);
6273 slashp = cp;
6274 continue;
6275 }
6276 else if (slashp[2] == '/' || slashp[2] == '\0')
6277 {
6278 strcpy (slashp, slashp + 2);
6279 continue;
6280 }
6281 }
6282
6283 slashp = etags_strchr (slashp + 1, '/');
6284 }
6285
6286 if (res[0] == '\0')
6287 return savestr ("/");
6288 else
6289 return res;
6290 }
6291
6292 /* Return a newly allocated string containing the absolute
6293 file name of dir where FILE resides given DIR (which should
6294 end with a slash). */
6295 static char *
6296 absolute_dirname (file, dir)
6297 char *file, *dir;
6298 {
6299 char *slashp, *res;
6300 char save;
6301
6302 canonicalize_filename (file);
6303 slashp = etags_strrchr (file, '/');
6304 if (slashp == NULL)
6305 return savestr (dir);
6306 save = slashp[1];
6307 slashp[1] = '\0';
6308 res = absolute_filename (file, dir);
6309 slashp[1] = save;
6310
6311 return res;
6312 }
6313
6314 /* Whether the argument string is an absolute file name. The argument
6315 string must have been canonicalized with canonicalize_filename. */
6316 static bool
6317 filename_is_absolute (fn)
6318 char *fn;
6319 {
6320 return (fn[0] == '/'
6321 #ifdef DOS_NT
6322 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6323 #endif
6324 );
6325 }
6326
6327 /* Translate backslashes into slashes. Works in place. */
6328 static void
6329 canonicalize_filename (fn)
6330 register char *fn;
6331 {
6332 #ifdef DOS_NT
6333 /* Canonicalize drive letter case. */
6334 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6335 fn[0] = upcase (fn[0]);
6336 /* Convert backslashes to slashes. */
6337 for (; *fn != '\0'; fn++)
6338 if (*fn == '\\')
6339 *fn = '/';
6340 #else
6341 /* No action. */
6342 fn = NULL; /* shut up the compiler */
6343 #endif
6344 }
6345
6346 /* Set the minimum size of a string contained in a linebuffer. */
6347 static void
6348 linebuffer_setlen (lbp, toksize)
6349 linebuffer *lbp;
6350 int toksize;
6351 {
6352 while (lbp->size <= toksize)
6353 {
6354 lbp->size *= 2;
6355 xrnew (lbp->buffer, lbp->size, char);
6356 }
6357 lbp->len = toksize;
6358 }
6359
6360 /* Like malloc but get fatal error if memory is exhausted. */
6361 static PTR
6362 xmalloc (size)
6363 unsigned int size;
6364 {
6365 PTR result = (PTR) malloc (size);
6366 if (result == NULL)
6367 fatal ("virtual memory exhausted", (char *)NULL);
6368 return result;
6369 }
6370
6371 static PTR
6372 xrealloc (ptr, size)
6373 char *ptr;
6374 unsigned int size;
6375 {
6376 PTR result = (PTR) realloc (ptr, size);
6377 if (result == NULL)
6378 fatal ("virtual memory exhausted", (char *)NULL);
6379 return result;
6380 }
6381
6382 /*
6383 * Local Variables:
6384 * c-indentation-style: gnu
6385 * indent-tabs-mode: t
6386 * tab-width: 8
6387 * fill-column: 79
6388 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "pattern")
6389 * End:
6390 */