]> code.delx.au - gnu-emacs/blob - lib-src/etags.c
* lisp/minibuffer.el (completions-format): Change default from nil to
[gnu-emacs] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2
3 Copyright (C) 1984 The Regents of the University of California
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
17
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
32 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
33 Free Software Foundation, Inc.
34
35 This file is not considered part of GNU Emacs.
36
37 This program is free software: you can redistribute it and/or modify
38 it under the terms of the GNU General Public License as published by
39 the Free Software Foundation, either version 3 of the License, or
40 (at your option) any later version.
41
42 This program is distributed in the hope that it will be useful,
43 but WITHOUT ANY WARRANTY; without even the implied warranty of
44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45 GNU General Public License for more details.
46
47 You should have received a copy of the GNU General Public License
48 along with this program. If not, see <http://www.gnu.org/licenses/>. */
49
50
51 /* NB To comply with the above BSD license, copyright information is
52 reproduced in etc/ETAGS.README. That file should be updated when the
53 above notices are.
54
55 To the best of our knowledge, this code was originally based on the
56 ctags.c distributed with BSD4.2, which was copyrighted by the
57 University of California, as described above. */
58
59
60 /*
61 * Authors:
62 * 1983 Ctags originally by Ken Arnold.
63 * 1984 Fortran added by Jim Kleckner.
64 * 1984 Ed Pelegri-Llopart added C typedefs.
65 * 1985 Emacs TAGS format by Richard Stallman.
66 * 1989 Sam Kendall added C++.
67 * 1992 Joseph B. Wells improved C and C++ parsing.
68 * 1993 Francesco Potortì reorganized C and C++.
69 * 1994 Line-by-line regexp tags by Tom Tromey.
70 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
71 * 2002 #line directives by Francesco Potortì.
72 *
73 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
74 */
75
76 /*
77 * If you want to add support for a new language, start by looking at the LUA
78 * language, which is the simplest. Alternatively, consider distributing etags
79 * together with a configuration file containing regexp definitions for etags.
80 */
81
82 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
83
84 #define TRUE 1
85 #define FALSE 0
86
87 #ifdef DEBUG
88 # undef DEBUG
89 # define DEBUG TRUE
90 #else
91 # define DEBUG FALSE
92 # define NDEBUG /* disable assert */
93 #endif
94
95 #ifdef HAVE_CONFIG_H
96 # include <config.h>
97 /* On some systems, Emacs defines static as nothing for the sake
98 of unexec. We don't want that here since we don't use unexec. */
99 # undef static
100 # ifndef PTR /* for XEmacs */
101 # define PTR void *
102 # endif
103 # ifndef __P /* for XEmacs */
104 # define __P(args) args
105 # endif
106 #else /* no config.h */
107 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
108 # define __P(args) args /* use prototypes */
109 # define PTR void * /* for generic pointers */
110 # else /* not standard C */
111 # define __P(args) () /* no prototypes */
112 # define const /* remove const for old compilers' sake */
113 # define PTR long * /* don't use void* */
114 # endif
115 #endif /* !HAVE_CONFIG_H */
116
117 #ifndef _GNU_SOURCE
118 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
119 #endif
120
121 /* WIN32_NATIVE is for XEmacs.
122 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
123 #ifdef WIN32_NATIVE
124 # undef MSDOS
125 # undef WINDOWSNT
126 # define WINDOWSNT
127 #endif /* WIN32_NATIVE */
128
129 #ifdef MSDOS
130 # undef MSDOS
131 # define MSDOS TRUE
132 # include <fcntl.h>
133 # include <sys/param.h>
134 # include <io.h>
135 # ifndef HAVE_CONFIG_H
136 # define DOS_NT
137 # include <sys/config.h>
138 # endif
139 #else
140 # define MSDOS FALSE
141 #endif /* MSDOS */
142
143 #ifdef WINDOWSNT
144 # include <stdlib.h>
145 # include <fcntl.h>
146 # include <string.h>
147 # include <direct.h>
148 # include <io.h>
149 # define MAXPATHLEN _MAX_PATH
150 # undef HAVE_NTGUI
151 # undef DOS_NT
152 # define DOS_NT
153 # ifndef HAVE_GETCWD
154 # define HAVE_GETCWD
155 # endif /* undef HAVE_GETCWD */
156 #else /* not WINDOWSNT */
157 # ifdef STDC_HEADERS
158 # include <stdlib.h>
159 # include <string.h>
160 # else /* no standard C headers */
161 extern char *getenv __P((const char *));
162 extern char *strcpy __P((char *, const char *));
163 extern char *strncpy __P((char *, const char *, unsigned long));
164 extern char *strcat __P((char *, const char *));
165 extern char *strncat __P((char *, const char *, unsigned long));
166 extern int strcmp __P((const char *, const char *));
167 extern int strncmp __P((const char *, const char *, unsigned long));
168 extern int system __P((const char *));
169 extern unsigned long strlen __P((const char *));
170 extern void *malloc __P((unsigned long));
171 extern void *realloc __P((void *, unsigned long));
172 extern void exit __P((int));
173 extern void free __P((void *));
174 extern void *memmove __P((void *, const void *, unsigned long));
175 # define EXIT_SUCCESS 0
176 # define EXIT_FAILURE 1
177 # endif
178 #endif /* !WINDOWSNT */
179
180 #ifdef HAVE_UNISTD_H
181 # include <unistd.h>
182 #else
183 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
184 extern char *getcwd (char *buf, size_t size);
185 # endif
186 #endif /* HAVE_UNISTD_H */
187
188 #include <stdio.h>
189 #include <ctype.h>
190 #include <errno.h>
191 #include <sys/types.h>
192 #include <sys/stat.h>
193
194 #include <assert.h>
195 #ifdef NDEBUG
196 # undef assert /* some systems have a buggy assert.h */
197 # define assert(x) ((void) 0)
198 #endif
199
200 #if !defined (S_ISREG) && defined (S_IFREG)
201 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
202 #endif
203
204 #ifdef NO_LONG_OPTIONS /* define this if you don't have GNU getopt */
205 # define NO_LONG_OPTIONS TRUE
206 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
207 extern char *optarg;
208 extern int optind, opterr;
209 #else
210 # define NO_LONG_OPTIONS FALSE
211 # include <getopt.h>
212 #endif /* NO_LONG_OPTIONS */
213
214 #ifndef HAVE_CONFIG_H /* this is a standalone compilation */
215 # ifdef __CYGWIN__ /* compiling on Cygwin */
216 !!! NOTICE !!!
217 the regex.h distributed with Cygwin is not compatible with etags, alas!
218 If you want regular expression support, you should delete this notice and
219 arrange to use the GNU regex.h and regex.c.
220 # endif
221 #endif
222 #include <regex.h>
223
224 /* Define CTAGS to make the program "ctags" compatible with the usual one.
225 Leave it undefined to make the program "etags", which makes emacs-style
226 tag tables and tags typedefs, #defines and struct/union/enum by default. */
227 #ifdef CTAGS
228 # undef CTAGS
229 # define CTAGS TRUE
230 #else
231 # define CTAGS FALSE
232 #endif
233
234 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
235 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
236 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
237 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
238
239 #define CHARS 256 /* 2^sizeof(char) */
240 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
241 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
242 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
243 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
244 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
245 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
246
247 #define ISALNUM(c) isalnum (CHAR(c))
248 #define ISALPHA(c) isalpha (CHAR(c))
249 #define ISDIGIT(c) isdigit (CHAR(c))
250 #define ISLOWER(c) islower (CHAR(c))
251
252 #define lowcase(c) tolower (CHAR(c))
253 #define upcase(c) toupper (CHAR(c))
254
255
256 /*
257 * xnew, xrnew -- allocate, reallocate storage
258 *
259 * SYNOPSIS: Type *xnew (int n, Type);
260 * void xrnew (OldPointer, int n, Type);
261 */
262 #if DEBUG
263 # include "chkmalloc.h"
264 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
265 (n) * sizeof (Type)))
266 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
267 (char *) (op), (n) * sizeof (Type)))
268 #else
269 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
270 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
271 (char *) (op), (n) * sizeof (Type)))
272 #endif
273
274 #define bool int
275
276 typedef void Lang_function __P((FILE *));
277
278 typedef struct
279 {
280 char *suffix; /* file name suffix for this compressor */
281 char *command; /* takes one arg and decompresses to stdout */
282 } compressor;
283
284 typedef struct
285 {
286 char *name; /* language name */
287 char *help; /* detailed help for the language */
288 Lang_function *function; /* parse function */
289 char **suffixes; /* name suffixes of this language's files */
290 char **filenames; /* names of this language's files */
291 char **interpreters; /* interpreters for this language */
292 bool metasource; /* source used to generate other sources */
293 } language;
294
295 typedef struct fdesc
296 {
297 struct fdesc *next; /* for the linked list */
298 char *infname; /* uncompressed input file name */
299 char *infabsname; /* absolute uncompressed input file name */
300 char *infabsdir; /* absolute dir of input file */
301 char *taggedfname; /* file name to write in tagfile */
302 language *lang; /* language of file */
303 char *prop; /* file properties to write in tagfile */
304 bool usecharno; /* etags tags shall contain char number */
305 bool written; /* entry written in the tags file */
306 } fdesc;
307
308 typedef struct node_st
309 { /* sorting structure */
310 struct node_st *left, *right; /* left and right sons */
311 fdesc *fdp; /* description of file to whom tag belongs */
312 char *name; /* tag name */
313 char *regex; /* search regexp */
314 bool valid; /* write this tag on the tag file */
315 bool is_func; /* function tag: use regexp in CTAGS mode */
316 bool been_warned; /* warning already given for duplicated tag */
317 int lno; /* line number tag is on */
318 long cno; /* character number line starts on */
319 } node;
320
321 /*
322 * A `linebuffer' is a structure which holds a line of text.
323 * `readline_internal' reads a line from a stream into a linebuffer
324 * and works regardless of the length of the line.
325 * SIZE is the size of BUFFER, LEN is the length of the string in
326 * BUFFER after readline reads it.
327 */
328 typedef struct
329 {
330 long size;
331 int len;
332 char *buffer;
333 } linebuffer;
334
335 /* Used to support mixing of --lang and file names. */
336 typedef struct
337 {
338 enum {
339 at_language, /* a language specification */
340 at_regexp, /* a regular expression */
341 at_filename, /* a file name */
342 at_stdin, /* read from stdin here */
343 at_end /* stop parsing the list */
344 } arg_type; /* argument type */
345 language *lang; /* language associated with the argument */
346 char *what; /* the argument itself */
347 } argument;
348
349 /* Structure defining a regular expression. */
350 typedef struct regexp
351 {
352 struct regexp *p_next; /* pointer to next in list */
353 language *lang; /* if set, use only for this language */
354 char *pattern; /* the regexp pattern */
355 char *name; /* tag name */
356 struct re_pattern_buffer *pat; /* the compiled pattern */
357 struct re_registers regs; /* re registers */
358 bool error_signaled; /* already signaled for this regexp */
359 bool force_explicit_name; /* do not allow implict tag name */
360 bool ignore_case; /* ignore case when matching */
361 bool multi_line; /* do a multi-line match on the whole file */
362 } regexp;
363
364
365 /* Many compilers barf on this:
366 Lang_function Ada_funcs;
367 so let's write it this way */
368 static void Ada_funcs __P((FILE *));
369 static void Asm_labels __P((FILE *));
370 static void C_entries __P((int c_ext, FILE *));
371 static void default_C_entries __P((FILE *));
372 static void plain_C_entries __P((FILE *));
373 static void Cjava_entries __P((FILE *));
374 static void Cobol_paragraphs __P((FILE *));
375 static void Cplusplus_entries __P((FILE *));
376 static void Cstar_entries __P((FILE *));
377 static void Erlang_functions __P((FILE *));
378 static void Forth_words __P((FILE *));
379 static void Fortran_functions __P((FILE *));
380 static void HTML_labels __P((FILE *));
381 static void Lisp_functions __P((FILE *));
382 static void Lua_functions __P((FILE *));
383 static void Makefile_targets __P((FILE *));
384 static void Pascal_functions __P((FILE *));
385 static void Perl_functions __P((FILE *));
386 static void PHP_functions __P((FILE *));
387 static void PS_functions __P((FILE *));
388 static void Prolog_functions __P((FILE *));
389 static void Python_functions __P((FILE *));
390 static void Scheme_functions __P((FILE *));
391 static void TeX_commands __P((FILE *));
392 static void Texinfo_nodes __P((FILE *));
393 static void Yacc_entries __P((FILE *));
394 static void just_read_file __P((FILE *));
395
396 static void print_language_names __P((void));
397 static void print_version __P((void));
398 static void print_help __P((argument *));
399 int main __P((int, char **));
400
401 static compressor *get_compressor_from_suffix __P((char *, char **));
402 static language *get_language_from_langname __P((const char *));
403 static language *get_language_from_interpreter __P((char *));
404 static language *get_language_from_filename __P((char *, bool));
405 static void readline __P((linebuffer *, FILE *));
406 static long readline_internal __P((linebuffer *, FILE *));
407 static bool nocase_tail __P((char *));
408 static void get_tag __P((char *, char **));
409
410 static void analyse_regex __P((char *));
411 static void free_regexps __P((void));
412 static void regex_tag_multiline __P((void));
413 static void error __P((const char *, const char *));
414 static void suggest_asking_for_help __P((void));
415 void fatal __P((char *, char *));
416 static void pfatal __P((char *));
417 static void add_node __P((node *, node **));
418
419 static void init __P((void));
420 static void process_file_name __P((char *, language *));
421 static void process_file __P((FILE *, char *, language *));
422 static void find_entries __P((FILE *));
423 static void free_tree __P((node *));
424 static void free_fdesc __P((fdesc *));
425 static void pfnote __P((char *, bool, char *, int, int, long));
426 static void make_tag __P((char *, int, bool, char *, int, int, long));
427 static void invalidate_nodes __P((fdesc *, node **));
428 static void put_entries __P((node *));
429
430 static char *concat __P((char *, char *, char *));
431 static char *skip_spaces __P((char *));
432 static char *skip_non_spaces __P((char *));
433 static char *savenstr __P((char *, int));
434 static char *savestr __P((char *));
435 static char *etags_strchr __P((const char *, int));
436 static char *etags_strrchr __P((const char *, int));
437 static int etags_strcasecmp __P((const char *, const char *));
438 static int etags_strncasecmp __P((const char *, const char *, int));
439 static char *etags_getcwd __P((void));
440 static char *relative_filename __P((char *, char *));
441 static char *absolute_filename __P((char *, char *));
442 static char *absolute_dirname __P((char *, char *));
443 static bool filename_is_absolute __P((char *f));
444 static void canonicalize_filename __P((char *));
445 static void linebuffer_init __P((linebuffer *));
446 static void linebuffer_setlen __P((linebuffer *, int));
447 static PTR xmalloc __P((unsigned int));
448 static PTR xrealloc __P((char *, unsigned int));
449
450 \f
451 static char searchar = '/'; /* use /.../ searches */
452
453 static char *tagfile; /* output file */
454 static char *progname; /* name this program was invoked with */
455 static char *cwd; /* current working directory */
456 static char *tagfiledir; /* directory of tagfile */
457 static FILE *tagf; /* ioptr for tags file */
458
459 static fdesc *fdhead; /* head of file description list */
460 static fdesc *curfdp; /* current file description */
461 static int lineno; /* line number of current line */
462 static long charno; /* current character number */
463 static long linecharno; /* charno of start of current line */
464 static char *dbp; /* pointer to start of current tag */
465
466 static const int invalidcharno = -1;
467
468 static node *nodehead; /* the head of the binary tree of tags */
469 static node *last_node; /* the last node created */
470
471 static linebuffer lb; /* the current line */
472 static linebuffer filebuf; /* a buffer containing the whole file */
473 static linebuffer token_name; /* a buffer containing a tag name */
474
475 /* boolean "functions" (see init) */
476 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
477 static char
478 /* white chars */
479 *white = " \f\t\n\r\v",
480 /* not in a name */
481 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
482 /* token ending chars */
483 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
484 /* token starting chars */
485 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
486 /* valid in-token chars */
487 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
488
489 static bool append_to_tagfile; /* -a: append to tags */
490 /* The next five default to TRUE in C and derived languages. */
491 static bool typedefs; /* -t: create tags for C and Ada typedefs */
492 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
493 /* 0 struct/enum/union decls, and C++ */
494 /* member functions. */
495 static bool constantypedefs; /* -d: create tags for C #define, enum */
496 /* constants and variables. */
497 /* -D: opposite of -d. Default under ctags. */
498 static bool globals; /* create tags for global variables */
499 static bool members; /* create tags for C member variables */
500 static bool declarations; /* --declarations: tag them and extern in C&Co*/
501 static bool no_line_directive; /* ignore #line directives (undocumented) */
502 static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */
503 static bool update; /* -u: update tags */
504 static bool vgrind_style; /* -v: create vgrind style index output */
505 static bool no_warnings; /* -w: suppress warnings (undocumented) */
506 static bool cxref_style; /* -x: create cxref style output */
507 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
508 static bool ignoreindent; /* -I: ignore indentation in C */
509 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
510
511 /* STDIN is defined in LynxOS system headers */
512 #ifdef STDIN
513 # undef STDIN
514 #endif
515
516 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
517 static bool parsing_stdin; /* --parse-stdin used */
518
519 static regexp *p_head; /* list of all regexps */
520 static bool need_filebuf; /* some regexes are multi-line */
521
522 static struct option longopts[] =
523 {
524 { "append", no_argument, NULL, 'a' },
525 { "packages-only", no_argument, &packages_only, TRUE },
526 { "c++", no_argument, NULL, 'C' },
527 { "declarations", no_argument, &declarations, TRUE },
528 { "no-line-directive", no_argument, &no_line_directive, TRUE },
529 { "no-duplicates", no_argument, &no_duplicates, TRUE },
530 { "help", no_argument, NULL, 'h' },
531 { "help", no_argument, NULL, 'H' },
532 { "ignore-indentation", no_argument, NULL, 'I' },
533 { "language", required_argument, NULL, 'l' },
534 { "members", no_argument, &members, TRUE },
535 { "no-members", no_argument, &members, FALSE },
536 { "output", required_argument, NULL, 'o' },
537 { "regex", required_argument, NULL, 'r' },
538 { "no-regex", no_argument, NULL, 'R' },
539 { "ignore-case-regex", required_argument, NULL, 'c' },
540 { "parse-stdin", required_argument, NULL, STDIN },
541 { "version", no_argument, NULL, 'V' },
542
543 #if CTAGS /* Ctags options */
544 { "backward-search", no_argument, NULL, 'B' },
545 { "cxref", no_argument, NULL, 'x' },
546 { "defines", no_argument, NULL, 'd' },
547 { "globals", no_argument, &globals, TRUE },
548 { "typedefs", no_argument, NULL, 't' },
549 { "typedefs-and-c++", no_argument, NULL, 'T' },
550 { "update", no_argument, NULL, 'u' },
551 { "vgrind", no_argument, NULL, 'v' },
552 { "no-warn", no_argument, NULL, 'w' },
553
554 #else /* Etags options */
555 { "no-defines", no_argument, NULL, 'D' },
556 { "no-globals", no_argument, &globals, FALSE },
557 { "include", required_argument, NULL, 'i' },
558 #endif
559 { NULL }
560 };
561
562 static compressor compressors[] =
563 {
564 { "z", "gzip -d -c"},
565 { "Z", "gzip -d -c"},
566 { "gz", "gzip -d -c"},
567 { "GZ", "gzip -d -c"},
568 { "bz2", "bzip2 -d -c" },
569 { NULL }
570 };
571
572 /*
573 * Language stuff.
574 */
575
576 /* Ada code */
577 static char *Ada_suffixes [] =
578 { "ads", "adb", "ada", NULL };
579 static char Ada_help [] =
580 "In Ada code, functions, procedures, packages, tasks and types are\n\
581 tags. Use the `--packages-only' option to create tags for\n\
582 packages only.\n\
583 Ada tag names have suffixes indicating the type of entity:\n\
584 Entity type: Qualifier:\n\
585 ------------ ----------\n\
586 function /f\n\
587 procedure /p\n\
588 package spec /s\n\
589 package body /b\n\
590 type /t\n\
591 task /k\n\
592 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
593 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
594 will just search for any tag `bidule'.";
595
596 /* Assembly code */
597 static char *Asm_suffixes [] =
598 { "a", /* Unix assembler */
599 "asm", /* Microcontroller assembly */
600 "def", /* BSO/Tasking definition includes */
601 "inc", /* Microcontroller include files */
602 "ins", /* Microcontroller include files */
603 "s", "sa", /* Unix assembler */
604 "S", /* cpp-processed Unix assembler */
605 "src", /* BSO/Tasking C compiler output */
606 NULL
607 };
608 static char Asm_help [] =
609 "In assembler code, labels appearing at the beginning of a line,\n\
610 followed by a colon, are tags.";
611
612
613 /* Note that .c and .h can be considered C++, if the --c++ flag was
614 given, or if the `class' or `template' keywords are met inside the file.
615 That is why default_C_entries is called for these. */
616 static char *default_C_suffixes [] =
617 { "c", "h", NULL };
618 #if CTAGS /* C help for Ctags */
619 static char default_C_help [] =
620 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
621 Use -T to tag definitions of `struct', `union' and `enum'.\n\
622 Use -d to tag `#define' macro definitions and `enum' constants.\n\
623 Use --globals to tag global variables.\n\
624 You can tag function declarations and external variables by\n\
625 using `--declarations', and struct members by using `--members'.";
626 #else /* C help for Etags */
627 static char default_C_help [] =
628 "In C code, any C function or typedef is a tag, and so are\n\
629 definitions of `struct', `union' and `enum'. `#define' macro\n\
630 definitions and `enum' constants are tags unless you specify\n\
631 `--no-defines'. Global variables are tags unless you specify\n\
632 `--no-globals' and so are struct members unless you specify\n\
633 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
634 `--no-members' can make the tags table file much smaller.\n\
635 You can tag function declarations and external variables by\n\
636 using `--declarations'.";
637 #endif /* C help for Ctags and Etags */
638
639 static char *Cplusplus_suffixes [] =
640 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
641 "M", /* Objective C++ */
642 "pdb", /* Postscript with C syntax */
643 NULL };
644 static char Cplusplus_help [] =
645 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
646 --help --lang=c --lang=c++ for full help.)\n\
647 In addition to C tags, member functions are also recognized. Member\n\
648 variables are recognized unless you use the `--no-members' option.\n\
649 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
650 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
651 `operator+'.";
652
653 static char *Cjava_suffixes [] =
654 { "java", NULL };
655 static char Cjava_help [] =
656 "In Java code, all the tags constructs of C and C++ code are\n\
657 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
658
659
660 static char *Cobol_suffixes [] =
661 { "COB", "cob", NULL };
662 static char Cobol_help [] =
663 "In Cobol code, tags are paragraph names; that is, any word\n\
664 starting in column 8 and followed by a period.";
665
666 static char *Cstar_suffixes [] =
667 { "cs", "hs", NULL };
668
669 static char *Erlang_suffixes [] =
670 { "erl", "hrl", NULL };
671 static char Erlang_help [] =
672 "In Erlang code, the tags are the functions, records and macros\n\
673 defined in the file.";
674
675 char *Forth_suffixes [] =
676 { "fth", "tok", NULL };
677 static char Forth_help [] =
678 "In Forth code, tags are words defined by `:',\n\
679 constant, code, create, defer, value, variable, buffer:, field.";
680
681 static char *Fortran_suffixes [] =
682 { "F", "f", "f90", "for", NULL };
683 static char Fortran_help [] =
684 "In Fortran code, functions, subroutines and block data are tags.";
685
686 static char *HTML_suffixes [] =
687 { "htm", "html", "shtml", NULL };
688 static char HTML_help [] =
689 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
690 `h3' headers. Also, tags are `name=' in anchors and all\n\
691 occurrences of `id='.";
692
693 static char *Lisp_suffixes [] =
694 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
695 static char Lisp_help [] =
696 "In Lisp code, any function defined with `defun', any variable\n\
697 defined with `defvar' or `defconst', and in general the first\n\
698 argument of any expression that starts with `(def' in column zero\n\
699 is a tag.";
700
701 static char *Lua_suffixes [] =
702 { "lua", "LUA", NULL };
703 static char Lua_help [] =
704 "In Lua scripts, all functions are tags.";
705
706 static char *Makefile_filenames [] =
707 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
708 static char Makefile_help [] =
709 "In makefiles, targets are tags; additionally, variables are tags\n\
710 unless you specify `--no-globals'.";
711
712 static char *Objc_suffixes [] =
713 { "lm", /* Objective lex file */
714 "m", /* Objective C file */
715 NULL };
716 static char Objc_help [] =
717 "In Objective C code, tags include Objective C definitions for classes,\n\
718 class categories, methods and protocols. Tags for variables and\n\
719 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
720 (Use --help --lang=c --lang=objc --lang=java for full help.)";
721
722 static char *Pascal_suffixes [] =
723 { "p", "pas", NULL };
724 static char Pascal_help [] =
725 "In Pascal code, the tags are the functions and procedures defined\n\
726 in the file.";
727 /* " // this is for working around an Emacs highlighting bug... */
728
729 static char *Perl_suffixes [] =
730 { "pl", "pm", NULL };
731 static char *Perl_interpreters [] =
732 { "perl", "@PERL@", NULL };
733 static char Perl_help [] =
734 "In Perl code, the tags are the packages, subroutines and variables\n\
735 defined by the `package', `sub', `my' and `local' keywords. Use\n\
736 `--globals' if you want to tag global variables. Tags for\n\
737 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
738 defined in the default package is `main::SUB'.";
739
740 static char *PHP_suffixes [] =
741 { "php", "php3", "php4", NULL };
742 static char PHP_help [] =
743 "In PHP code, tags are functions, classes and defines. Unless you use\n\
744 the `--no-members' option, vars are tags too.";
745
746 static char *plain_C_suffixes [] =
747 { "pc", /* Pro*C file */
748 NULL };
749
750 static char *PS_suffixes [] =
751 { "ps", "psw", NULL }; /* .psw is for PSWrap */
752 static char PS_help [] =
753 "In PostScript code, the tags are the functions.";
754
755 static char *Prolog_suffixes [] =
756 { "prolog", NULL };
757 static char Prolog_help [] =
758 "In Prolog code, tags are predicates and rules at the beginning of\n\
759 line.";
760
761 static char *Python_suffixes [] =
762 { "py", NULL };
763 static char Python_help [] =
764 "In Python code, `def' or `class' at the beginning of a line\n\
765 generate a tag.";
766
767 /* Can't do the `SCM' or `scm' prefix with a version number. */
768 static char *Scheme_suffixes [] =
769 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
770 static char Scheme_help [] =
771 "In Scheme code, tags include anything defined with `def' or with a\n\
772 construct whose name starts with `def'. They also include\n\
773 variables set with `set!' at top level in the file.";
774
775 static char *TeX_suffixes [] =
776 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
777 static char TeX_help [] =
778 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
779 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
780 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
781 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
782 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
783 \n\
784 Other commands can be specified by setting the environment variable\n\
785 `TEXTAGS' to a colon-separated list like, for example,\n\
786 TEXTAGS=\"mycommand:myothercommand\".";
787
788
789 static char *Texinfo_suffixes [] =
790 { "texi", "texinfo", "txi", NULL };
791 static char Texinfo_help [] =
792 "for texinfo files, lines starting with @node are tagged.";
793
794 static char *Yacc_suffixes [] =
795 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
796 static char Yacc_help [] =
797 "In Bison or Yacc input files, each rule defines as a tag the\n\
798 nonterminal it constructs. The portions of the file that contain\n\
799 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
800 for full help).";
801
802 static char auto_help [] =
803 "`auto' is not a real language, it indicates to use\n\
804 a default language for files base on file name suffix and file contents.";
805
806 static char none_help [] =
807 "`none' is not a real language, it indicates to only do\n\
808 regexp processing on files.";
809
810 static char no_lang_help [] =
811 "No detailed help available for this language.";
812
813
814 /*
815 * Table of languages.
816 *
817 * It is ok for a given function to be listed under more than one
818 * name. I just didn't.
819 */
820
821 static language lang_names [] =
822 {
823 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
824 { "asm", Asm_help, Asm_labels, Asm_suffixes },
825 { "c", default_C_help, default_C_entries, default_C_suffixes },
826 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
827 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
828 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
829 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
830 { "forth", Forth_help, Forth_words, Forth_suffixes },
831 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
832 { "html", HTML_help, HTML_labels, HTML_suffixes },
833 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
834 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
835 { "lua", Lua_help, Lua_functions, Lua_suffixes },
836 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
837 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
838 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
839 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
840 { "php", PHP_help, PHP_functions, PHP_suffixes },
841 { "postscript",PS_help, PS_functions, PS_suffixes },
842 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
843 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
844 { "python", Python_help, Python_functions, Python_suffixes },
845 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
846 { "tex", TeX_help, TeX_commands, TeX_suffixes },
847 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
848 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
849 { "auto", auto_help }, /* default guessing scheme */
850 { "none", none_help, just_read_file }, /* regexp matching only */
851 { NULL } /* end of list */
852 };
853
854 \f
855 static void
856 print_language_names ()
857 {
858 language *lang;
859 char **name, **ext;
860
861 puts ("\nThese are the currently supported languages, along with the\n\
862 default file names and dot suffixes:");
863 for (lang = lang_names; lang->name != NULL; lang++)
864 {
865 printf (" %-*s", 10, lang->name);
866 if (lang->filenames != NULL)
867 for (name = lang->filenames; *name != NULL; name++)
868 printf (" %s", *name);
869 if (lang->suffixes != NULL)
870 for (ext = lang->suffixes; *ext != NULL; ext++)
871 printf (" .%s", *ext);
872 puts ("");
873 }
874 puts ("where `auto' means use default language for files based on file\n\
875 name suffix, and `none' means only do regexp processing on files.\n\
876 If no language is specified and no matching suffix is found,\n\
877 the first line of the file is read for a sharp-bang (#!) sequence\n\
878 followed by the name of an interpreter. If no such sequence is found,\n\
879 Fortran is tried first; if no tags are found, C is tried next.\n\
880 When parsing any C file, a \"class\" or \"template\" keyword\n\
881 switches to C++.");
882 puts ("Compressed files are supported using gzip and bzip2.\n\
883 \n\
884 For detailed help on a given language use, for example,\n\
885 etags --help --lang=ada.");
886 }
887
888 #ifndef EMACS_NAME
889 # define EMACS_NAME "standalone"
890 #endif
891 #ifndef VERSION
892 # define VERSION "17.38.1.4"
893 #endif
894 static void
895 print_version ()
896 {
897 /* Makes it easier to update automatically. */
898 char emacs_copyright[] = "Copyright (C) 2010 Free Software Foundation, Inc.";
899
900 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
901 puts (emacs_copyright);
902 puts ("This program is distributed under the terms in ETAGS.README");
903
904 exit (EXIT_SUCCESS);
905 }
906
907 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
908 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
909 #endif
910
911 static void
912 print_help (argbuffer)
913 argument *argbuffer;
914 {
915 bool help_for_lang = FALSE;
916
917 for (; argbuffer->arg_type != at_end; argbuffer++)
918 if (argbuffer->arg_type == at_language)
919 {
920 if (help_for_lang)
921 puts ("");
922 puts (argbuffer->lang->help);
923 help_for_lang = TRUE;
924 }
925
926 if (help_for_lang)
927 exit (EXIT_SUCCESS);
928
929 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
930 \n\
931 These are the options accepted by %s.\n", progname, progname);
932 if (NO_LONG_OPTIONS)
933 puts ("WARNING: long option names do not work with this executable,\n\
934 as it is not linked with GNU getopt.");
935 else
936 puts ("You may use unambiguous abbreviations for the long option names.");
937 puts (" A - as file name means read names from stdin (one per line).\n\
938 Absolute names are stored in the output file as they are.\n\
939 Relative ones are stored relative to the output file's directory.\n");
940
941 puts ("-a, --append\n\
942 Append tag entries to existing tags file.");
943
944 puts ("--packages-only\n\
945 For Ada files, only generate tags for packages.");
946
947 if (CTAGS)
948 puts ("-B, --backward-search\n\
949 Write the search commands for the tag entries using '?', the\n\
950 backward-search command instead of '/', the forward-search command.");
951
952 /* This option is mostly obsolete, because etags can now automatically
953 detect C++. Retained for backward compatibility and for debugging and
954 experimentation. In principle, we could want to tag as C++ even
955 before any "class" or "template" keyword.
956 puts ("-C, --c++\n\
957 Treat files whose name suffix defaults to C language as C++ files.");
958 */
959
960 puts ("--declarations\n\
961 In C and derived languages, create tags for function declarations,");
962 if (CTAGS)
963 puts ("\tand create tags for extern variables if --globals is used.");
964 else
965 puts
966 ("\tand create tags for extern variables unless --no-globals is used.");
967
968 if (CTAGS)
969 puts ("-d, --defines\n\
970 Create tag entries for C #define constants and enum constants, too.");
971 else
972 puts ("-D, --no-defines\n\
973 Don't create tag entries for C #define constants and enum constants.\n\
974 This makes the tags file smaller.");
975
976 if (!CTAGS)
977 puts ("-i FILE, --include=FILE\n\
978 Include a note in tag file indicating that, when searching for\n\
979 a tag, one should also consult the tags file FILE after\n\
980 checking the current file.");
981
982 puts ("-l LANG, --language=LANG\n\
983 Force the following files to be considered as written in the\n\
984 named language up to the next --language=LANG option.");
985
986 if (CTAGS)
987 puts ("--globals\n\
988 Create tag entries for global variables in some languages.");
989 else
990 puts ("--no-globals\n\
991 Do not create tag entries for global variables in some\n\
992 languages. This makes the tags file smaller.");
993
994 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
995 puts ("--no-line-directive\n\
996 Ignore #line preprocessor directives in C and derived languages.");
997
998 if (CTAGS)
999 puts ("--members\n\
1000 Create tag entries for members of structures in some languages.");
1001 else
1002 puts ("--no-members\n\
1003 Do not create tag entries for members of structures\n\
1004 in some languages.");
1005
1006 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
1007 Make a tag for each line matching a regular expression pattern\n\
1008 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
1009 files only. REGEXFILE is a file containing one REGEXP per line.\n\
1010 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
1011 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
1012 puts (" If TAGNAME/ is present, the tags created are named.\n\
1013 For example Tcl named tags can be created with:\n\
1014 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1015 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
1016 `m' means to allow multi-line matches, `s' implies `m' and\n\
1017 causes dot to match any character, including newline.");
1018
1019 puts ("-R, --no-regex\n\
1020 Don't create tags from regexps for the following files.");
1021
1022 puts ("-I, --ignore-indentation\n\
1023 In C and C++ do not assume that a closing brace in the first\n\
1024 column is the final brace of a function or structure definition.");
1025
1026 puts ("-o FILE, --output=FILE\n\
1027 Write the tags to FILE.");
1028
1029 puts ("--parse-stdin=NAME\n\
1030 Read from standard input and record tags as belonging to file NAME.");
1031
1032 if (CTAGS)
1033 {
1034 puts ("-t, --typedefs\n\
1035 Generate tag entries for C and Ada typedefs.");
1036 puts ("-T, --typedefs-and-c++\n\
1037 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1038 and C++ member functions.");
1039 }
1040
1041 if (CTAGS)
1042 puts ("-u, --update\n\
1043 Update the tag entries for the given files, leaving tag\n\
1044 entries for other files in place. Currently, this is\n\
1045 implemented by deleting the existing entries for the given\n\
1046 files and then rewriting the new entries at the end of the\n\
1047 tags file. It is often faster to simply rebuild the entire\n\
1048 tag file than to use this.");
1049
1050 if (CTAGS)
1051 {
1052 puts ("-v, --vgrind\n\
1053 Print on the standard output an index of items intended for\n\
1054 human consumption, similar to the output of vgrind. The index\n\
1055 is sorted, and gives the page number of each item.");
1056
1057 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1058 puts ("-w, --no-duplicates\n\
1059 Do not create duplicate tag entries, for compatibility with\n\
1060 traditional ctags.");
1061
1062 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1063 puts ("-w, --no-warn\n\
1064 Suppress warning messages about duplicate tag entries.");
1065
1066 puts ("-x, --cxref\n\
1067 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1068 The output uses line numbers instead of page numbers, but\n\
1069 beyond that the differences are cosmetic; try both to see\n\
1070 which you like.");
1071 }
1072
1073 puts ("-V, --version\n\
1074 Print the version of the program.\n\
1075 -h, --help\n\
1076 Print this help message.\n\
1077 Followed by one or more `--language' options prints detailed\n\
1078 help about tag generation for the specified languages.");
1079
1080 print_language_names ();
1081
1082 puts ("");
1083 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1084
1085 exit (EXIT_SUCCESS);
1086 }
1087
1088 \f
1089 int
1090 main (argc, argv)
1091 int argc;
1092 char *argv[];
1093 {
1094 int i;
1095 unsigned int nincluded_files;
1096 char **included_files;
1097 argument *argbuffer;
1098 int current_arg, file_count;
1099 linebuffer filename_lb;
1100 bool help_asked = FALSE;
1101 char *optstring;
1102 int opt;
1103
1104
1105 #ifdef DOS_NT
1106 _fmode = O_BINARY; /* all of files are treated as binary files */
1107 #endif /* DOS_NT */
1108
1109 progname = argv[0];
1110 nincluded_files = 0;
1111 included_files = xnew (argc, char *);
1112 current_arg = 0;
1113 file_count = 0;
1114
1115 /* Allocate enough no matter what happens. Overkill, but each one
1116 is small. */
1117 argbuffer = xnew (argc, argument);
1118
1119 /*
1120 * Always find typedefs and structure tags.
1121 * Also default to find macro constants, enum constants, struct
1122 * members and global variables. Do it for both etags and ctags.
1123 */
1124 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1125 globals = members = TRUE;
1126
1127 /* When the optstring begins with a '-' getopt_long does not rearrange the
1128 non-options arguments to be at the end, but leaves them alone. */
1129 optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1130 "ac:Cf:Il:o:r:RSVhH",
1131 (CTAGS) ? "BxdtTuvw" : "Di:");
1132
1133 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1134 switch (opt)
1135 {
1136 case 0:
1137 /* If getopt returns 0, then it has already processed a
1138 long-named option. We should do nothing. */
1139 break;
1140
1141 case 1:
1142 /* This means that a file name has been seen. Record it. */
1143 argbuffer[current_arg].arg_type = at_filename;
1144 argbuffer[current_arg].what = optarg;
1145 ++current_arg;
1146 ++file_count;
1147 break;
1148
1149 case STDIN:
1150 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1151 argbuffer[current_arg].arg_type = at_stdin;
1152 argbuffer[current_arg].what = optarg;
1153 ++current_arg;
1154 ++file_count;
1155 if (parsing_stdin)
1156 fatal ("cannot parse standard input more than once", (char *)NULL);
1157 parsing_stdin = TRUE;
1158 break;
1159
1160 /* Common options. */
1161 case 'a': append_to_tagfile = TRUE; break;
1162 case 'C': cplusplus = TRUE; break;
1163 case 'f': /* for compatibility with old makefiles */
1164 case 'o':
1165 if (tagfile)
1166 {
1167 error ("-o option may only be given once.", (char *)NULL);
1168 suggest_asking_for_help ();
1169 /* NOTREACHED */
1170 }
1171 tagfile = optarg;
1172 break;
1173 case 'I':
1174 case 'S': /* for backward compatibility */
1175 ignoreindent = TRUE;
1176 break;
1177 case 'l':
1178 {
1179 language *lang = get_language_from_langname (optarg);
1180 if (lang != NULL)
1181 {
1182 argbuffer[current_arg].lang = lang;
1183 argbuffer[current_arg].arg_type = at_language;
1184 ++current_arg;
1185 }
1186 }
1187 break;
1188 case 'c':
1189 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1190 optarg = concat (optarg, "i", ""); /* memory leak here */
1191 /* FALLTHRU */
1192 case 'r':
1193 argbuffer[current_arg].arg_type = at_regexp;
1194 argbuffer[current_arg].what = optarg;
1195 ++current_arg;
1196 break;
1197 case 'R':
1198 argbuffer[current_arg].arg_type = at_regexp;
1199 argbuffer[current_arg].what = NULL;
1200 ++current_arg;
1201 break;
1202 case 'V':
1203 print_version ();
1204 break;
1205 case 'h':
1206 case 'H':
1207 help_asked = TRUE;
1208 break;
1209
1210 /* Etags options */
1211 case 'D': constantypedefs = FALSE; break;
1212 case 'i': included_files[nincluded_files++] = optarg; break;
1213
1214 /* Ctags options. */
1215 case 'B': searchar = '?'; break;
1216 case 'd': constantypedefs = TRUE; break;
1217 case 't': typedefs = TRUE; break;
1218 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1219 case 'u': update = TRUE; break;
1220 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1221 case 'x': cxref_style = TRUE; break;
1222 case 'w': no_warnings = TRUE; break;
1223 default:
1224 suggest_asking_for_help ();
1225 /* NOTREACHED */
1226 }
1227
1228 /* No more options. Store the rest of arguments. */
1229 for (; optind < argc; optind++)
1230 {
1231 argbuffer[current_arg].arg_type = at_filename;
1232 argbuffer[current_arg].what = argv[optind];
1233 ++current_arg;
1234 ++file_count;
1235 }
1236
1237 argbuffer[current_arg].arg_type = at_end;
1238
1239 if (help_asked)
1240 print_help (argbuffer);
1241 /* NOTREACHED */
1242
1243 if (nincluded_files == 0 && file_count == 0)
1244 {
1245 error ("no input files specified.", (char *)NULL);
1246 suggest_asking_for_help ();
1247 /* NOTREACHED */
1248 }
1249
1250 if (tagfile == NULL)
1251 tagfile = savestr (CTAGS ? "tags" : "TAGS");
1252 cwd = etags_getcwd (); /* the current working directory */
1253 if (cwd[strlen (cwd) - 1] != '/')
1254 {
1255 char *oldcwd = cwd;
1256 cwd = concat (oldcwd, "/", "");
1257 free (oldcwd);
1258 }
1259
1260 /* Compute base directory for relative file names. */
1261 if (streq (tagfile, "-")
1262 || strneq (tagfile, "/dev/", 5))
1263 tagfiledir = cwd; /* relative file names are relative to cwd */
1264 else
1265 {
1266 canonicalize_filename (tagfile);
1267 tagfiledir = absolute_dirname (tagfile, cwd);
1268 }
1269
1270 init (); /* set up boolean "functions" */
1271
1272 linebuffer_init (&lb);
1273 linebuffer_init (&filename_lb);
1274 linebuffer_init (&filebuf);
1275 linebuffer_init (&token_name);
1276
1277 if (!CTAGS)
1278 {
1279 if (streq (tagfile, "-"))
1280 {
1281 tagf = stdout;
1282 #ifdef DOS_NT
1283 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1284 doesn't take effect until after `stdout' is already open). */
1285 if (!isatty (fileno (stdout)))
1286 setmode (fileno (stdout), O_BINARY);
1287 #endif /* DOS_NT */
1288 }
1289 else
1290 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1291 if (tagf == NULL)
1292 pfatal (tagfile);
1293 }
1294
1295 /*
1296 * Loop through files finding functions.
1297 */
1298 for (i = 0; i < current_arg; i++)
1299 {
1300 static language *lang; /* non-NULL if language is forced */
1301 char *this_file;
1302
1303 switch (argbuffer[i].arg_type)
1304 {
1305 case at_language:
1306 lang = argbuffer[i].lang;
1307 break;
1308 case at_regexp:
1309 analyse_regex (argbuffer[i].what);
1310 break;
1311 case at_filename:
1312 this_file = argbuffer[i].what;
1313 /* Input file named "-" means read file names from stdin
1314 (one per line) and use them. */
1315 if (streq (this_file, "-"))
1316 {
1317 if (parsing_stdin)
1318 fatal ("cannot parse standard input AND read file names from it",
1319 (char *)NULL);
1320 while (readline_internal (&filename_lb, stdin) > 0)
1321 process_file_name (filename_lb.buffer, lang);
1322 }
1323 else
1324 process_file_name (this_file, lang);
1325 break;
1326 case at_stdin:
1327 this_file = argbuffer[i].what;
1328 process_file (stdin, this_file, lang);
1329 break;
1330 }
1331 }
1332
1333 free_regexps ();
1334 free (lb.buffer);
1335 free (filebuf.buffer);
1336 free (token_name.buffer);
1337
1338 if (!CTAGS || cxref_style)
1339 {
1340 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1341 put_entries (nodehead);
1342 free_tree (nodehead);
1343 nodehead = NULL;
1344 if (!CTAGS)
1345 {
1346 fdesc *fdp;
1347
1348 /* Output file entries that have no tags. */
1349 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1350 if (!fdp->written)
1351 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1352
1353 while (nincluded_files-- > 0)
1354 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1355
1356 if (fclose (tagf) == EOF)
1357 pfatal (tagfile);
1358 }
1359
1360 exit (EXIT_SUCCESS);
1361 }
1362
1363 /* From here on, we are in (CTAGS && !cxref_style) */
1364 if (update)
1365 {
1366 char cmd[BUFSIZ];
1367 for (i = 0; i < current_arg; ++i)
1368 {
1369 switch (argbuffer[i].arg_type)
1370 {
1371 case at_filename:
1372 case at_stdin:
1373 break;
1374 default:
1375 continue; /* the for loop */
1376 }
1377 sprintf (cmd,
1378 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1379 tagfile, argbuffer[i].what, tagfile);
1380 if (system (cmd) != EXIT_SUCCESS)
1381 fatal ("failed to execute shell command", (char *)NULL);
1382 }
1383 append_to_tagfile = TRUE;
1384 }
1385
1386 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1387 if (tagf == NULL)
1388 pfatal (tagfile);
1389 put_entries (nodehead); /* write all the tags (CTAGS) */
1390 free_tree (nodehead);
1391 nodehead = NULL;
1392 if (fclose (tagf) == EOF)
1393 pfatal (tagfile);
1394
1395 if (CTAGS)
1396 if (append_to_tagfile || update)
1397 {
1398 char cmd[2*BUFSIZ+20];
1399 /* Maybe these should be used:
1400 setenv ("LC_COLLATE", "C", 1);
1401 setenv ("LC_ALL", "C", 1); */
1402 sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1403 exit (system (cmd));
1404 }
1405 return EXIT_SUCCESS;
1406 }
1407
1408
1409 /*
1410 * Return a compressor given the file name. If EXTPTR is non-zero,
1411 * return a pointer into FILE where the compressor-specific
1412 * extension begins. If no compressor is found, NULL is returned
1413 * and EXTPTR is not significant.
1414 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1415 */
1416 static compressor *
1417 get_compressor_from_suffix (file, extptr)
1418 char *file;
1419 char **extptr;
1420 {
1421 compressor *compr;
1422 char *slash, *suffix;
1423
1424 /* File has been processed by canonicalize_filename,
1425 so we don't need to consider backslashes on DOS_NT. */
1426 slash = etags_strrchr (file, '/');
1427 suffix = etags_strrchr (file, '.');
1428 if (suffix == NULL || suffix < slash)
1429 return NULL;
1430 if (extptr != NULL)
1431 *extptr = suffix;
1432 suffix += 1;
1433 /* Let those poor souls who live with DOS 8+3 file name limits get
1434 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1435 Only the first do loop is run if not MSDOS */
1436 do
1437 {
1438 for (compr = compressors; compr->suffix != NULL; compr++)
1439 if (streq (compr->suffix, suffix))
1440 return compr;
1441 if (!MSDOS)
1442 break; /* do it only once: not really a loop */
1443 if (extptr != NULL)
1444 *extptr = ++suffix;
1445 } while (*suffix != '\0');
1446 return NULL;
1447 }
1448
1449
1450
1451 /*
1452 * Return a language given the name.
1453 */
1454 static language *
1455 get_language_from_langname (name)
1456 const char *name;
1457 {
1458 language *lang;
1459
1460 if (name == NULL)
1461 error ("empty language name", (char *)NULL);
1462 else
1463 {
1464 for (lang = lang_names; lang->name != NULL; lang++)
1465 if (streq (name, lang->name))
1466 return lang;
1467 error ("unknown language \"%s\"", name);
1468 }
1469
1470 return NULL;
1471 }
1472
1473
1474 /*
1475 * Return a language given the interpreter name.
1476 */
1477 static language *
1478 get_language_from_interpreter (interpreter)
1479 char *interpreter;
1480 {
1481 language *lang;
1482 char **iname;
1483
1484 if (interpreter == NULL)
1485 return NULL;
1486 for (lang = lang_names; lang->name != NULL; lang++)
1487 if (lang->interpreters != NULL)
1488 for (iname = lang->interpreters; *iname != NULL; iname++)
1489 if (streq (*iname, interpreter))
1490 return lang;
1491
1492 return NULL;
1493 }
1494
1495
1496
1497 /*
1498 * Return a language given the file name.
1499 */
1500 static language *
1501 get_language_from_filename (file, case_sensitive)
1502 char *file;
1503 bool case_sensitive;
1504 {
1505 language *lang;
1506 char **name, **ext, *suffix;
1507
1508 /* Try whole file name first. */
1509 for (lang = lang_names; lang->name != NULL; lang++)
1510 if (lang->filenames != NULL)
1511 for (name = lang->filenames; *name != NULL; name++)
1512 if ((case_sensitive)
1513 ? streq (*name, file)
1514 : strcaseeq (*name, file))
1515 return lang;
1516
1517 /* If not found, try suffix after last dot. */
1518 suffix = etags_strrchr (file, '.');
1519 if (suffix == NULL)
1520 return NULL;
1521 suffix += 1;
1522 for (lang = lang_names; lang->name != NULL; lang++)
1523 if (lang->suffixes != NULL)
1524 for (ext = lang->suffixes; *ext != NULL; ext++)
1525 if ((case_sensitive)
1526 ? streq (*ext, suffix)
1527 : strcaseeq (*ext, suffix))
1528 return lang;
1529 return NULL;
1530 }
1531
1532 \f
1533 /*
1534 * This routine is called on each file argument.
1535 */
1536 static void
1537 process_file_name (file, lang)
1538 char *file;
1539 language *lang;
1540 {
1541 struct stat stat_buf;
1542 FILE *inf;
1543 fdesc *fdp;
1544 compressor *compr;
1545 char *compressed_name, *uncompressed_name;
1546 char *ext, *real_name;
1547 int retval;
1548
1549 canonicalize_filename (file);
1550 if (streq (file, tagfile) && !streq (tagfile, "-"))
1551 {
1552 error ("skipping inclusion of %s in self.", file);
1553 return;
1554 }
1555 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1556 {
1557 compressed_name = NULL;
1558 real_name = uncompressed_name = savestr (file);
1559 }
1560 else
1561 {
1562 real_name = compressed_name = savestr (file);
1563 uncompressed_name = savenstr (file, ext - file);
1564 }
1565
1566 /* If the canonicalized uncompressed name
1567 has already been dealt with, skip it silently. */
1568 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1569 {
1570 assert (fdp->infname != NULL);
1571 if (streq (uncompressed_name, fdp->infname))
1572 goto cleanup;
1573 }
1574
1575 if (stat (real_name, &stat_buf) != 0)
1576 {
1577 /* Reset real_name and try with a different name. */
1578 real_name = NULL;
1579 if (compressed_name != NULL) /* try with the given suffix */
1580 {
1581 if (stat (uncompressed_name, &stat_buf) == 0)
1582 real_name = uncompressed_name;
1583 }
1584 else /* try all possible suffixes */
1585 {
1586 for (compr = compressors; compr->suffix != NULL; compr++)
1587 {
1588 compressed_name = concat (file, ".", compr->suffix);
1589 if (stat (compressed_name, &stat_buf) != 0)
1590 {
1591 if (MSDOS)
1592 {
1593 char *suf = compressed_name + strlen (file);
1594 size_t suflen = strlen (compr->suffix) + 1;
1595 for ( ; suf[1]; suf++, suflen--)
1596 {
1597 memmove (suf, suf + 1, suflen);
1598 if (stat (compressed_name, &stat_buf) == 0)
1599 {
1600 real_name = compressed_name;
1601 break;
1602 }
1603 }
1604 if (real_name != NULL)
1605 break;
1606 } /* MSDOS */
1607 free (compressed_name);
1608 compressed_name = NULL;
1609 }
1610 else
1611 {
1612 real_name = compressed_name;
1613 break;
1614 }
1615 }
1616 }
1617 if (real_name == NULL)
1618 {
1619 perror (file);
1620 goto cleanup;
1621 }
1622 } /* try with a different name */
1623
1624 if (!S_ISREG (stat_buf.st_mode))
1625 {
1626 error ("skipping %s: it is not a regular file.", real_name);
1627 goto cleanup;
1628 }
1629 if (real_name == compressed_name)
1630 {
1631 char *cmd = concat (compr->command, " ", real_name);
1632 inf = (FILE *) popen (cmd, "r");
1633 free (cmd);
1634 }
1635 else
1636 inf = fopen (real_name, "r");
1637 if (inf == NULL)
1638 {
1639 perror (real_name);
1640 goto cleanup;
1641 }
1642
1643 process_file (inf, uncompressed_name, lang);
1644
1645 if (real_name == compressed_name)
1646 retval = pclose (inf);
1647 else
1648 retval = fclose (inf);
1649 if (retval < 0)
1650 pfatal (file);
1651
1652 cleanup:
1653 free (compressed_name);
1654 free (uncompressed_name);
1655 last_node = NULL;
1656 curfdp = NULL;
1657 return;
1658 }
1659
1660 static void
1661 process_file (fh, fn, lang)
1662 FILE *fh;
1663 char *fn;
1664 language *lang;
1665 {
1666 static const fdesc emptyfdesc;
1667 fdesc *fdp;
1668
1669 /* Create a new input file description entry. */
1670 fdp = xnew (1, fdesc);
1671 *fdp = emptyfdesc;
1672 fdp->next = fdhead;
1673 fdp->infname = savestr (fn);
1674 fdp->lang = lang;
1675 fdp->infabsname = absolute_filename (fn, cwd);
1676 fdp->infabsdir = absolute_dirname (fn, cwd);
1677 if (filename_is_absolute (fn))
1678 {
1679 /* An absolute file name. Canonicalize it. */
1680 fdp->taggedfname = absolute_filename (fn, NULL);
1681 }
1682 else
1683 {
1684 /* A file name relative to cwd. Make it relative
1685 to the directory of the tags file. */
1686 fdp->taggedfname = relative_filename (fn, tagfiledir);
1687 }
1688 fdp->usecharno = TRUE; /* use char position when making tags */
1689 fdp->prop = NULL;
1690 fdp->written = FALSE; /* not written on tags file yet */
1691
1692 fdhead = fdp;
1693 curfdp = fdhead; /* the current file description */
1694
1695 find_entries (fh);
1696
1697 /* If not Ctags, and if this is not metasource and if it contained no #line
1698 directives, we can write the tags and free all nodes pointing to
1699 curfdp. */
1700 if (!CTAGS
1701 && curfdp->usecharno /* no #line directives in this file */
1702 && !curfdp->lang->metasource)
1703 {
1704 node *np, *prev;
1705
1706 /* Look for the head of the sublist relative to this file. See add_node
1707 for the structure of the node tree. */
1708 prev = NULL;
1709 for (np = nodehead; np != NULL; prev = np, np = np->left)
1710 if (np->fdp == curfdp)
1711 break;
1712
1713 /* If we generated tags for this file, write and delete them. */
1714 if (np != NULL)
1715 {
1716 /* This is the head of the last sublist, if any. The following
1717 instructions depend on this being true. */
1718 assert (np->left == NULL);
1719
1720 assert (fdhead == curfdp);
1721 assert (last_node->fdp == curfdp);
1722 put_entries (np); /* write tags for file curfdp->taggedfname */
1723 free_tree (np); /* remove the written nodes */
1724 if (prev == NULL)
1725 nodehead = NULL; /* no nodes left */
1726 else
1727 prev->left = NULL; /* delete the pointer to the sublist */
1728 }
1729 }
1730 }
1731
1732 /*
1733 * This routine sets up the boolean pseudo-functions which work
1734 * by setting boolean flags dependent upon the corresponding character.
1735 * Every char which is NOT in that string is not a white char. Therefore,
1736 * all of the array "_wht" is set to FALSE, and then the elements
1737 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1738 * of a char is TRUE if it is the string "white", else FALSE.
1739 */
1740 static void
1741 init ()
1742 {
1743 register char *sp;
1744 register int i;
1745
1746 for (i = 0; i < CHARS; i++)
1747 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1748 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1749 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1750 notinname('\0') = notinname('\n');
1751 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1752 begtoken('\0') = begtoken('\n');
1753 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1754 intoken('\0') = intoken('\n');
1755 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1756 endtoken('\0') = endtoken('\n');
1757 }
1758
1759 /*
1760 * This routine opens the specified file and calls the function
1761 * which finds the function and type definitions.
1762 */
1763 static void
1764 find_entries (inf)
1765 FILE *inf;
1766 {
1767 char *cp;
1768 language *lang = curfdp->lang;
1769 Lang_function *parser = NULL;
1770
1771 /* If user specified a language, use it. */
1772 if (lang != NULL && lang->function != NULL)
1773 {
1774 parser = lang->function;
1775 }
1776
1777 /* Else try to guess the language given the file name. */
1778 if (parser == NULL)
1779 {
1780 lang = get_language_from_filename (curfdp->infname, TRUE);
1781 if (lang != NULL && lang->function != NULL)
1782 {
1783 curfdp->lang = lang;
1784 parser = lang->function;
1785 }
1786 }
1787
1788 /* Else look for sharp-bang as the first two characters. */
1789 if (parser == NULL
1790 && readline_internal (&lb, inf) > 0
1791 && lb.len >= 2
1792 && lb.buffer[0] == '#'
1793 && lb.buffer[1] == '!')
1794 {
1795 char *lp;
1796
1797 /* Set lp to point at the first char after the last slash in the
1798 line or, if no slashes, at the first nonblank. Then set cp to
1799 the first successive blank and terminate the string. */
1800 lp = etags_strrchr (lb.buffer+2, '/');
1801 if (lp != NULL)
1802 lp += 1;
1803 else
1804 lp = skip_spaces (lb.buffer + 2);
1805 cp = skip_non_spaces (lp);
1806 *cp = '\0';
1807
1808 if (strlen (lp) > 0)
1809 {
1810 lang = get_language_from_interpreter (lp);
1811 if (lang != NULL && lang->function != NULL)
1812 {
1813 curfdp->lang = lang;
1814 parser = lang->function;
1815 }
1816 }
1817 }
1818
1819 /* We rewind here, even if inf may be a pipe. We fail if the
1820 length of the first line is longer than the pipe block size,
1821 which is unlikely. */
1822 rewind (inf);
1823
1824 /* Else try to guess the language given the case insensitive file name. */
1825 if (parser == NULL)
1826 {
1827 lang = get_language_from_filename (curfdp->infname, FALSE);
1828 if (lang != NULL && lang->function != NULL)
1829 {
1830 curfdp->lang = lang;
1831 parser = lang->function;
1832 }
1833 }
1834
1835 /* Else try Fortran or C. */
1836 if (parser == NULL)
1837 {
1838 node *old_last_node = last_node;
1839
1840 curfdp->lang = get_language_from_langname ("fortran");
1841 find_entries (inf);
1842
1843 if (old_last_node == last_node)
1844 /* No Fortran entries found. Try C. */
1845 {
1846 /* We do not tag if rewind fails.
1847 Only the file name will be recorded in the tags file. */
1848 rewind (inf);
1849 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1850 find_entries (inf);
1851 }
1852 return;
1853 }
1854
1855 if (!no_line_directive
1856 && curfdp->lang != NULL && curfdp->lang->metasource)
1857 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1858 file, or anyway we parsed a file that is automatically generated from
1859 this one. If this is the case, the bingo.c file contained #line
1860 directives that generated tags pointing to this file. Let's delete
1861 them all before parsing this file, which is the real source. */
1862 {
1863 fdesc **fdpp = &fdhead;
1864 while (*fdpp != NULL)
1865 if (*fdpp != curfdp
1866 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1867 /* We found one of those! We must delete both the file description
1868 and all tags referring to it. */
1869 {
1870 fdesc *badfdp = *fdpp;
1871
1872 /* Delete the tags referring to badfdp->taggedfname
1873 that were obtained from badfdp->infname. */
1874 invalidate_nodes (badfdp, &nodehead);
1875
1876 *fdpp = badfdp->next; /* remove the bad description from the list */
1877 free_fdesc (badfdp);
1878 }
1879 else
1880 fdpp = &(*fdpp)->next; /* advance the list pointer */
1881 }
1882
1883 assert (parser != NULL);
1884
1885 /* Generic initialisations before reading from file. */
1886 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1887
1888 /* Generic initialisations before parsing file with readline. */
1889 lineno = 0; /* reset global line number */
1890 charno = 0; /* reset global char number */
1891 linecharno = 0; /* reset global char number of line start */
1892
1893 parser (inf);
1894
1895 regex_tag_multiline ();
1896 }
1897
1898 \f
1899 /*
1900 * Check whether an implicitly named tag should be created,
1901 * then call `pfnote'.
1902 * NAME is a string that is internally copied by this function.
1903 *
1904 * TAGS format specification
1905 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1906 * The following is explained in some more detail in etc/ETAGS.EBNF.
1907 *
1908 * make_tag creates tags with "implicit tag names" (unnamed tags)
1909 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1910 * 1. NAME does not contain any of the characters in NONAM;
1911 * 2. LINESTART contains name as either a rightmost, or rightmost but
1912 * one character, substring;
1913 * 3. the character, if any, immediately before NAME in LINESTART must
1914 * be a character in NONAM;
1915 * 4. the character, if any, immediately after NAME in LINESTART must
1916 * also be a character in NONAM.
1917 *
1918 * The implementation uses the notinname() macro, which recognises the
1919 * characters stored in the string `nonam'.
1920 * etags.el needs to use the same characters that are in NONAM.
1921 */
1922 static void
1923 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
1924 char *name; /* tag name, or NULL if unnamed */
1925 int namelen; /* tag length */
1926 bool is_func; /* tag is a function */
1927 char *linestart; /* start of the line where tag is */
1928 int linelen; /* length of the line where tag is */
1929 int lno; /* line number */
1930 long cno; /* character number */
1931 {
1932 bool named = (name != NULL && namelen > 0);
1933
1934 if (!CTAGS && named) /* maybe set named to false */
1935 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1936 such that etags.el can guess a name from it. */
1937 {
1938 int i;
1939 register char *cp = name;
1940
1941 for (i = 0; i < namelen; i++)
1942 if (notinname (*cp++))
1943 break;
1944 if (i == namelen) /* rule #1 */
1945 {
1946 cp = linestart + linelen - namelen;
1947 if (notinname (linestart[linelen-1]))
1948 cp -= 1; /* rule #4 */
1949 if (cp >= linestart /* rule #2 */
1950 && (cp == linestart
1951 || notinname (cp[-1])) /* rule #3 */
1952 && strneq (name, cp, namelen)) /* rule #2 */
1953 named = FALSE; /* use implicit tag name */
1954 }
1955 }
1956
1957 if (named)
1958 name = savenstr (name, namelen);
1959 else
1960 name = NULL;
1961 pfnote (name, is_func, linestart, linelen, lno, cno);
1962 }
1963
1964 /* Record a tag. */
1965 static void
1966 pfnote (name, is_func, linestart, linelen, lno, cno)
1967 char *name; /* tag name, or NULL if unnamed */
1968 bool is_func; /* tag is a function */
1969 char *linestart; /* start of the line where tag is */
1970 int linelen; /* length of the line where tag is */
1971 int lno; /* line number */
1972 long cno; /* character number */
1973 {
1974 register node *np;
1975
1976 assert (name == NULL || name[0] != '\0');
1977 if (CTAGS && name == NULL)
1978 return;
1979
1980 np = xnew (1, node);
1981
1982 /* If ctags mode, change name "main" to M<thisfilename>. */
1983 if (CTAGS && !cxref_style && streq (name, "main"))
1984 {
1985 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1986 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1987 fp = etags_strrchr (np->name, '.');
1988 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1989 fp[0] = '\0';
1990 }
1991 else
1992 np->name = name;
1993 np->valid = TRUE;
1994 np->been_warned = FALSE;
1995 np->fdp = curfdp;
1996 np->is_func = is_func;
1997 np->lno = lno;
1998 if (np->fdp->usecharno)
1999 /* Our char numbers are 0-base, because of C language tradition?
2000 ctags compatibility? old versions compatibility? I don't know.
2001 Anyway, since emacs's are 1-base we expect etags.el to take care
2002 of the difference. If we wanted to have 1-based numbers, we would
2003 uncomment the +1 below. */
2004 np->cno = cno /* + 1 */ ;
2005 else
2006 np->cno = invalidcharno;
2007 np->left = np->right = NULL;
2008 if (CTAGS && !cxref_style)
2009 {
2010 if (strlen (linestart) < 50)
2011 np->regex = concat (linestart, "$", "");
2012 else
2013 np->regex = savenstr (linestart, 50);
2014 }
2015 else
2016 np->regex = savenstr (linestart, linelen);
2017
2018 add_node (np, &nodehead);
2019 }
2020
2021 /*
2022 * free_tree ()
2023 * recurse on left children, iterate on right children.
2024 */
2025 static void
2026 free_tree (np)
2027 register node *np;
2028 {
2029 while (np)
2030 {
2031 register node *node_right = np->right;
2032 free_tree (np->left);
2033 free (np->name);
2034 free (np->regex);
2035 free (np);
2036 np = node_right;
2037 }
2038 }
2039
2040 /*
2041 * free_fdesc ()
2042 * delete a file description
2043 */
2044 static void
2045 free_fdesc (fdp)
2046 register fdesc *fdp;
2047 {
2048 free (fdp->infname);
2049 free (fdp->infabsname);
2050 free (fdp->infabsdir);
2051 free (fdp->taggedfname);
2052 free (fdp->prop);
2053 free (fdp);
2054 }
2055
2056 /*
2057 * add_node ()
2058 * Adds a node to the tree of nodes. In etags mode, sort by file
2059 * name. In ctags mode, sort by tag name. Make no attempt at
2060 * balancing.
2061 *
2062 * add_node is the only function allowed to add nodes, so it can
2063 * maintain state.
2064 */
2065 static void
2066 add_node (np, cur_node_p)
2067 node *np, **cur_node_p;
2068 {
2069 register int dif;
2070 register node *cur_node = *cur_node_p;
2071
2072 if (cur_node == NULL)
2073 {
2074 *cur_node_p = np;
2075 last_node = np;
2076 return;
2077 }
2078
2079 if (!CTAGS)
2080 /* Etags Mode */
2081 {
2082 /* For each file name, tags are in a linked sublist on the right
2083 pointer. The first tags of different files are a linked list
2084 on the left pointer. last_node points to the end of the last
2085 used sublist. */
2086 if (last_node != NULL && last_node->fdp == np->fdp)
2087 {
2088 /* Let's use the same sublist as the last added node. */
2089 assert (last_node->right == NULL);
2090 last_node->right = np;
2091 last_node = np;
2092 }
2093 else if (cur_node->fdp == np->fdp)
2094 {
2095 /* Scanning the list we found the head of a sublist which is
2096 good for us. Let's scan this sublist. */
2097 add_node (np, &cur_node->right);
2098 }
2099 else
2100 /* The head of this sublist is not good for us. Let's try the
2101 next one. */
2102 add_node (np, &cur_node->left);
2103 } /* if ETAGS mode */
2104
2105 else
2106 {
2107 /* Ctags Mode */
2108 dif = strcmp (np->name, cur_node->name);
2109
2110 /*
2111 * If this tag name matches an existing one, then
2112 * do not add the node, but maybe print a warning.
2113 */
2114 if (no_duplicates && !dif)
2115 {
2116 if (np->fdp == cur_node->fdp)
2117 {
2118 if (!no_warnings)
2119 {
2120 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2121 np->fdp->infname, lineno, np->name);
2122 fprintf (stderr, "Second entry ignored\n");
2123 }
2124 }
2125 else if (!cur_node->been_warned && !no_warnings)
2126 {
2127 fprintf
2128 (stderr,
2129 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2130 np->fdp->infname, cur_node->fdp->infname, np->name);
2131 cur_node->been_warned = TRUE;
2132 }
2133 return;
2134 }
2135
2136 /* Actually add the node */
2137 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2138 } /* if CTAGS mode */
2139 }
2140
2141 /*
2142 * invalidate_nodes ()
2143 * Scan the node tree and invalidate all nodes pointing to the
2144 * given file description (CTAGS case) or free them (ETAGS case).
2145 */
2146 static void
2147 invalidate_nodes (badfdp, npp)
2148 fdesc *badfdp;
2149 node **npp;
2150 {
2151 node *np = *npp;
2152
2153 if (np == NULL)
2154 return;
2155
2156 if (CTAGS)
2157 {
2158 if (np->left != NULL)
2159 invalidate_nodes (badfdp, &np->left);
2160 if (np->fdp == badfdp)
2161 np->valid = FALSE;
2162 if (np->right != NULL)
2163 invalidate_nodes (badfdp, &np->right);
2164 }
2165 else
2166 {
2167 assert (np->fdp != NULL);
2168 if (np->fdp == badfdp)
2169 {
2170 *npp = np->left; /* detach the sublist from the list */
2171 np->left = NULL; /* isolate it */
2172 free_tree (np); /* free it */
2173 invalidate_nodes (badfdp, npp);
2174 }
2175 else
2176 invalidate_nodes (badfdp, &np->left);
2177 }
2178 }
2179
2180 \f
2181 static int total_size_of_entries __P((node *));
2182 static int number_len __P((long));
2183
2184 /* Length of a non-negative number's decimal representation. */
2185 static int
2186 number_len (num)
2187 long num;
2188 {
2189 int len = 1;
2190 while ((num /= 10) > 0)
2191 len += 1;
2192 return len;
2193 }
2194
2195 /*
2196 * Return total number of characters that put_entries will output for
2197 * the nodes in the linked list at the right of the specified node.
2198 * This count is irrelevant with etags.el since emacs 19.34 at least,
2199 * but is still supplied for backward compatibility.
2200 */
2201 static int
2202 total_size_of_entries (np)
2203 register node *np;
2204 {
2205 register int total = 0;
2206
2207 for (; np != NULL; np = np->right)
2208 if (np->valid)
2209 {
2210 total += strlen (np->regex) + 1; /* pat\177 */
2211 if (np->name != NULL)
2212 total += strlen (np->name) + 1; /* name\001 */
2213 total += number_len ((long) np->lno) + 1; /* lno, */
2214 if (np->cno != invalidcharno) /* cno */
2215 total += number_len (np->cno);
2216 total += 1; /* newline */
2217 }
2218
2219 return total;
2220 }
2221
2222 static void
2223 put_entries (np)
2224 register node *np;
2225 {
2226 register char *sp;
2227 static fdesc *fdp = NULL;
2228
2229 if (np == NULL)
2230 return;
2231
2232 /* Output subentries that precede this one */
2233 if (CTAGS)
2234 put_entries (np->left);
2235
2236 /* Output this entry */
2237 if (np->valid)
2238 {
2239 if (!CTAGS)
2240 {
2241 /* Etags mode */
2242 if (fdp != np->fdp)
2243 {
2244 fdp = np->fdp;
2245 fprintf (tagf, "\f\n%s,%d\n",
2246 fdp->taggedfname, total_size_of_entries (np));
2247 fdp->written = TRUE;
2248 }
2249 fputs (np->regex, tagf);
2250 fputc ('\177', tagf);
2251 if (np->name != NULL)
2252 {
2253 fputs (np->name, tagf);
2254 fputc ('\001', tagf);
2255 }
2256 fprintf (tagf, "%d,", np->lno);
2257 if (np->cno != invalidcharno)
2258 fprintf (tagf, "%ld", np->cno);
2259 fputs ("\n", tagf);
2260 }
2261 else
2262 {
2263 /* Ctags mode */
2264 if (np->name == NULL)
2265 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2266
2267 if (cxref_style)
2268 {
2269 if (vgrind_style)
2270 fprintf (stdout, "%s %s %d\n",
2271 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2272 else
2273 fprintf (stdout, "%-16s %3d %-16s %s\n",
2274 np->name, np->lno, np->fdp->taggedfname, np->regex);
2275 }
2276 else
2277 {
2278 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2279
2280 if (np->is_func)
2281 { /* function or #define macro with args */
2282 putc (searchar, tagf);
2283 putc ('^', tagf);
2284
2285 for (sp = np->regex; *sp; sp++)
2286 {
2287 if (*sp == '\\' || *sp == searchar)
2288 putc ('\\', tagf);
2289 putc (*sp, tagf);
2290 }
2291 putc (searchar, tagf);
2292 }
2293 else
2294 { /* anything else; text pattern inadequate */
2295 fprintf (tagf, "%d", np->lno);
2296 }
2297 putc ('\n', tagf);
2298 }
2299 }
2300 } /* if this node contains a valid tag */
2301
2302 /* Output subentries that follow this one */
2303 put_entries (np->right);
2304 if (!CTAGS)
2305 put_entries (np->left);
2306 }
2307
2308 \f
2309 /* C extensions. */
2310 #define C_EXT 0x00fff /* C extensions */
2311 #define C_PLAIN 0x00000 /* C */
2312 #define C_PLPL 0x00001 /* C++ */
2313 #define C_STAR 0x00003 /* C* */
2314 #define C_JAVA 0x00005 /* JAVA */
2315 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2316 #define YACC 0x10000 /* yacc file */
2317
2318 /*
2319 * The C symbol tables.
2320 */
2321 enum sym_type
2322 {
2323 st_none,
2324 st_C_objprot, st_C_objimpl, st_C_objend,
2325 st_C_gnumacro,
2326 st_C_ignore, st_C_attribute,
2327 st_C_javastruct,
2328 st_C_operator,
2329 st_C_class, st_C_template,
2330 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2331 };
2332
2333 static unsigned int hash __P((const char *, unsigned int));
2334 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2335 static enum sym_type C_symtype __P((char *, int, int));
2336
2337 /* Feed stuff between (but not including) %[ and %] lines to:
2338 gperf -m 5
2339 %[
2340 %compare-strncmp
2341 %enum
2342 %struct-type
2343 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2344 %%
2345 if, 0, st_C_ignore
2346 for, 0, st_C_ignore
2347 while, 0, st_C_ignore
2348 switch, 0, st_C_ignore
2349 return, 0, st_C_ignore
2350 __attribute__, 0, st_C_attribute
2351 GTY, 0, st_C_attribute
2352 @interface, 0, st_C_objprot
2353 @protocol, 0, st_C_objprot
2354 @implementation,0, st_C_objimpl
2355 @end, 0, st_C_objend
2356 import, (C_JAVA & ~C_PLPL), st_C_ignore
2357 package, (C_JAVA & ~C_PLPL), st_C_ignore
2358 friend, C_PLPL, st_C_ignore
2359 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2360 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2361 interface, (C_JAVA & ~C_PLPL), st_C_struct
2362 class, 0, st_C_class
2363 namespace, C_PLPL, st_C_struct
2364 domain, C_STAR, st_C_struct
2365 union, 0, st_C_struct
2366 struct, 0, st_C_struct
2367 extern, 0, st_C_extern
2368 enum, 0, st_C_enum
2369 typedef, 0, st_C_typedef
2370 define, 0, st_C_define
2371 undef, 0, st_C_define
2372 operator, C_PLPL, st_C_operator
2373 template, 0, st_C_template
2374 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2375 DEFUN, 0, st_C_gnumacro
2376 SYSCALL, 0, st_C_gnumacro
2377 ENTRY, 0, st_C_gnumacro
2378 PSEUDO, 0, st_C_gnumacro
2379 # These are defined inside C functions, so currently they are not met.
2380 # EXFUN used in glibc, DEFVAR_* in emacs.
2381 #EXFUN, 0, st_C_gnumacro
2382 #DEFVAR_, 0, st_C_gnumacro
2383 %]
2384 and replace lines between %< and %> with its output, then:
2385 - remove the #if characterset check
2386 - make in_word_set static and not inline. */
2387 /*%<*/
2388 /* C code produced by gperf version 3.0.1 */
2389 /* Command-line: gperf -m 5 */
2390 /* Computed positions: -k'2-3' */
2391
2392 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2393 /* maximum key range = 33, duplicates = 0 */
2394
2395 #ifdef __GNUC__
2396 __inline
2397 #else
2398 #ifdef __cplusplus
2399 inline
2400 #endif
2401 #endif
2402 static unsigned int
2403 hash (str, len)
2404 register const char *str;
2405 register unsigned int len;
2406 {
2407 static unsigned char asso_values[] =
2408 {
2409 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2410 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2411 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2412 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2413 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2414 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2415 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2416 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2417 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2418 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2419 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2420 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2421 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2422 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2423 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2424 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2425 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2426 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2427 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2428 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2429 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2430 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2431 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2432 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2433 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2434 35, 35, 35, 35, 35, 35
2435 };
2436 register int hval = len;
2437
2438 switch (hval)
2439 {
2440 default:
2441 hval += asso_values[(unsigned char)str[2]];
2442 /*FALLTHROUGH*/
2443 case 2:
2444 hval += asso_values[(unsigned char)str[1]];
2445 break;
2446 }
2447 return hval;
2448 }
2449
2450 static struct C_stab_entry *
2451 in_word_set (str, len)
2452 register const char *str;
2453 register unsigned int len;
2454 {
2455 enum
2456 {
2457 TOTAL_KEYWORDS = 33,
2458 MIN_WORD_LENGTH = 2,
2459 MAX_WORD_LENGTH = 15,
2460 MIN_HASH_VALUE = 2,
2461 MAX_HASH_VALUE = 34
2462 };
2463
2464 static struct C_stab_entry wordlist[] =
2465 {
2466 {""}, {""},
2467 {"if", 0, st_C_ignore},
2468 {"GTY", 0, st_C_attribute},
2469 {"@end", 0, st_C_objend},
2470 {"union", 0, st_C_struct},
2471 {"define", 0, st_C_define},
2472 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2473 {"template", 0, st_C_template},
2474 {"operator", C_PLPL, st_C_operator},
2475 {"@interface", 0, st_C_objprot},
2476 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2477 {"friend", C_PLPL, st_C_ignore},
2478 {"typedef", 0, st_C_typedef},
2479 {"return", 0, st_C_ignore},
2480 {"@implementation",0, st_C_objimpl},
2481 {"@protocol", 0, st_C_objprot},
2482 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2483 {"extern", 0, st_C_extern},
2484 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2485 {"struct", 0, st_C_struct},
2486 {"domain", C_STAR, st_C_struct},
2487 {"switch", 0, st_C_ignore},
2488 {"enum", 0, st_C_enum},
2489 {"for", 0, st_C_ignore},
2490 {"namespace", C_PLPL, st_C_struct},
2491 {"class", 0, st_C_class},
2492 {"while", 0, st_C_ignore},
2493 {"undef", 0, st_C_define},
2494 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2495 {"__attribute__", 0, st_C_attribute},
2496 {"SYSCALL", 0, st_C_gnumacro},
2497 {"ENTRY", 0, st_C_gnumacro},
2498 {"PSEUDO", 0, st_C_gnumacro},
2499 {"DEFUN", 0, st_C_gnumacro}
2500 };
2501
2502 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2503 {
2504 register int key = hash (str, len);
2505
2506 if (key <= MAX_HASH_VALUE && key >= 0)
2507 {
2508 register const char *s = wordlist[key].name;
2509
2510 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2511 return &wordlist[key];
2512 }
2513 }
2514 return 0;
2515 }
2516 /*%>*/
2517
2518 static enum sym_type
2519 C_symtype (str, len, c_ext)
2520 char *str;
2521 int len;
2522 int c_ext;
2523 {
2524 register struct C_stab_entry *se = in_word_set (str, len);
2525
2526 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2527 return st_none;
2528 return se->type;
2529 }
2530
2531 \f
2532 /*
2533 * Ignoring __attribute__ ((list))
2534 */
2535 static bool inattribute; /* looking at an __attribute__ construct */
2536
2537 /*
2538 * C functions and variables are recognized using a simple
2539 * finite automaton. fvdef is its state variable.
2540 */
2541 static enum
2542 {
2543 fvnone, /* nothing seen */
2544 fdefunkey, /* Emacs DEFUN keyword seen */
2545 fdefunname, /* Emacs DEFUN name seen */
2546 foperator, /* func: operator keyword seen (cplpl) */
2547 fvnameseen, /* function or variable name seen */
2548 fstartlist, /* func: just after open parenthesis */
2549 finlist, /* func: in parameter list */
2550 flistseen, /* func: after parameter list */
2551 fignore, /* func: before open brace */
2552 vignore /* var-like: ignore until ';' */
2553 } fvdef;
2554
2555 static bool fvextern; /* func or var: extern keyword seen; */
2556
2557 /*
2558 * typedefs are recognized using a simple finite automaton.
2559 * typdef is its state variable.
2560 */
2561 static enum
2562 {
2563 tnone, /* nothing seen */
2564 tkeyseen, /* typedef keyword seen */
2565 ttypeseen, /* defined type seen */
2566 tinbody, /* inside typedef body */
2567 tend, /* just before typedef tag */
2568 tignore /* junk after typedef tag */
2569 } typdef;
2570
2571 /*
2572 * struct-like structures (enum, struct and union) are recognized
2573 * using another simple finite automaton. `structdef' is its state
2574 * variable.
2575 */
2576 static enum
2577 {
2578 snone, /* nothing seen yet,
2579 or in struct body if bracelev > 0 */
2580 skeyseen, /* struct-like keyword seen */
2581 stagseen, /* struct-like tag seen */
2582 scolonseen /* colon seen after struct-like tag */
2583 } structdef;
2584
2585 /*
2586 * When objdef is different from onone, objtag is the name of the class.
2587 */
2588 static char *objtag = "<uninited>";
2589
2590 /*
2591 * Yet another little state machine to deal with preprocessor lines.
2592 */
2593 static enum
2594 {
2595 dnone, /* nothing seen */
2596 dsharpseen, /* '#' seen as first char on line */
2597 ddefineseen, /* '#' and 'define' seen */
2598 dignorerest /* ignore rest of line */
2599 } definedef;
2600
2601 /*
2602 * State machine for Objective C protocols and implementations.
2603 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2604 */
2605 static enum
2606 {
2607 onone, /* nothing seen */
2608 oprotocol, /* @interface or @protocol seen */
2609 oimplementation, /* @implementations seen */
2610 otagseen, /* class name seen */
2611 oparenseen, /* parenthesis before category seen */
2612 ocatseen, /* category name seen */
2613 oinbody, /* in @implementation body */
2614 omethodsign, /* in @implementation body, after +/- */
2615 omethodtag, /* after method name */
2616 omethodcolon, /* after method colon */
2617 omethodparm, /* after method parameter */
2618 oignore /* wait for @end */
2619 } objdef;
2620
2621
2622 /*
2623 * Use this structure to keep info about the token read, and how it
2624 * should be tagged. Used by the make_C_tag function to build a tag.
2625 */
2626 static struct tok
2627 {
2628 char *line; /* string containing the token */
2629 int offset; /* where the token starts in LINE */
2630 int length; /* token length */
2631 /*
2632 The previous members can be used to pass strings around for generic
2633 purposes. The following ones specifically refer to creating tags. In this
2634 case the token contained here is the pattern that will be used to create a
2635 tag.
2636 */
2637 bool valid; /* do not create a tag; the token should be
2638 invalidated whenever a state machine is
2639 reset prematurely */
2640 bool named; /* create a named tag */
2641 int lineno; /* source line number of tag */
2642 long linepos; /* source char number of tag */
2643 } token; /* latest token read */
2644
2645 /*
2646 * Variables and functions for dealing with nested structures.
2647 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2648 */
2649 static void pushclass_above __P((int, char *, int));
2650 static void popclass_above __P((int));
2651 static void write_classname __P((linebuffer *, char *qualifier));
2652
2653 static struct {
2654 char **cname; /* nested class names */
2655 int *bracelev; /* nested class brace level */
2656 int nl; /* class nesting level (elements used) */
2657 int size; /* length of the array */
2658 } cstack; /* stack for nested declaration tags */
2659 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2660 #define nestlev (cstack.nl)
2661 /* After struct keyword or in struct body, not inside a nested function. */
2662 #define instruct (structdef == snone && nestlev > 0 \
2663 && bracelev == cstack.bracelev[nestlev-1] + 1)
2664
2665 static void
2666 pushclass_above (bracelev, str, len)
2667 int bracelev;
2668 char *str;
2669 int len;
2670 {
2671 int nl;
2672
2673 popclass_above (bracelev);
2674 nl = cstack.nl;
2675 if (nl >= cstack.size)
2676 {
2677 int size = cstack.size *= 2;
2678 xrnew (cstack.cname, size, char *);
2679 xrnew (cstack.bracelev, size, int);
2680 }
2681 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2682 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2683 cstack.bracelev[nl] = bracelev;
2684 cstack.nl = nl + 1;
2685 }
2686
2687 static void
2688 popclass_above (bracelev)
2689 int bracelev;
2690 {
2691 int nl;
2692
2693 for (nl = cstack.nl - 1;
2694 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2695 nl--)
2696 {
2697 free (cstack.cname[nl]);
2698 cstack.nl = nl;
2699 }
2700 }
2701
2702 static void
2703 write_classname (cn, qualifier)
2704 linebuffer *cn;
2705 char *qualifier;
2706 {
2707 int i, len;
2708 int qlen = strlen (qualifier);
2709
2710 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2711 {
2712 len = 0;
2713 cn->len = 0;
2714 cn->buffer[0] = '\0';
2715 }
2716 else
2717 {
2718 len = strlen (cstack.cname[0]);
2719 linebuffer_setlen (cn, len);
2720 strcpy (cn->buffer, cstack.cname[0]);
2721 }
2722 for (i = 1; i < cstack.nl; i++)
2723 {
2724 char *s;
2725 int slen;
2726
2727 s = cstack.cname[i];
2728 if (s == NULL)
2729 continue;
2730 slen = strlen (s);
2731 len += slen + qlen;
2732 linebuffer_setlen (cn, len);
2733 strncat (cn->buffer, qualifier, qlen);
2734 strncat (cn->buffer, s, slen);
2735 }
2736 }
2737
2738 \f
2739 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2740 static void make_C_tag __P((bool));
2741
2742 /*
2743 * consider_token ()
2744 * checks to see if the current token is at the start of a
2745 * function or variable, or corresponds to a typedef, or
2746 * is a struct/union/enum tag, or #define, or an enum constant.
2747 *
2748 * *IS_FUNC gets TRUE if the token is a function or #define macro
2749 * with args. C_EXTP points to which language we are looking at.
2750 *
2751 * Globals
2752 * fvdef IN OUT
2753 * structdef IN OUT
2754 * definedef IN OUT
2755 * typdef IN OUT
2756 * objdef IN OUT
2757 */
2758
2759 static bool
2760 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2761 register char *str; /* IN: token pointer */
2762 register int len; /* IN: token length */
2763 register int c; /* IN: first char after the token */
2764 int *c_extp; /* IN, OUT: C extensions mask */
2765 int bracelev; /* IN: brace level */
2766 int parlev; /* IN: parenthesis level */
2767 bool *is_func_or_var; /* OUT: function or variable found */
2768 {
2769 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2770 structtype is the type of the preceding struct-like keyword, and
2771 structbracelev is the brace level where it has been seen. */
2772 static enum sym_type structtype;
2773 static int structbracelev;
2774 static enum sym_type toktype;
2775
2776
2777 toktype = C_symtype (str, len, *c_extp);
2778
2779 /*
2780 * Skip __attribute__
2781 */
2782 if (toktype == st_C_attribute)
2783 {
2784 inattribute = TRUE;
2785 return FALSE;
2786 }
2787
2788 /*
2789 * Advance the definedef state machine.
2790 */
2791 switch (definedef)
2792 {
2793 case dnone:
2794 /* We're not on a preprocessor line. */
2795 if (toktype == st_C_gnumacro)
2796 {
2797 fvdef = fdefunkey;
2798 return FALSE;
2799 }
2800 break;
2801 case dsharpseen:
2802 if (toktype == st_C_define)
2803 {
2804 definedef = ddefineseen;
2805 }
2806 else
2807 {
2808 definedef = dignorerest;
2809 }
2810 return FALSE;
2811 case ddefineseen:
2812 /*
2813 * Make a tag for any macro, unless it is a constant
2814 * and constantypedefs is FALSE.
2815 */
2816 definedef = dignorerest;
2817 *is_func_or_var = (c == '(');
2818 if (!*is_func_or_var && !constantypedefs)
2819 return FALSE;
2820 else
2821 return TRUE;
2822 case dignorerest:
2823 return FALSE;
2824 default:
2825 error ("internal error: definedef value.", (char *)NULL);
2826 }
2827
2828 /*
2829 * Now typedefs
2830 */
2831 switch (typdef)
2832 {
2833 case tnone:
2834 if (toktype == st_C_typedef)
2835 {
2836 if (typedefs)
2837 typdef = tkeyseen;
2838 fvextern = FALSE;
2839 fvdef = fvnone;
2840 return FALSE;
2841 }
2842 break;
2843 case tkeyseen:
2844 switch (toktype)
2845 {
2846 case st_none:
2847 case st_C_class:
2848 case st_C_struct:
2849 case st_C_enum:
2850 typdef = ttypeseen;
2851 }
2852 break;
2853 case ttypeseen:
2854 if (structdef == snone && fvdef == fvnone)
2855 {
2856 fvdef = fvnameseen;
2857 return TRUE;
2858 }
2859 break;
2860 case tend:
2861 switch (toktype)
2862 {
2863 case st_C_class:
2864 case st_C_struct:
2865 case st_C_enum:
2866 return FALSE;
2867 }
2868 return TRUE;
2869 }
2870
2871 switch (toktype)
2872 {
2873 case st_C_javastruct:
2874 if (structdef == stagseen)
2875 structdef = scolonseen;
2876 return FALSE;
2877 case st_C_template:
2878 case st_C_class:
2879 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2880 && bracelev == 0
2881 && definedef == dnone && structdef == snone
2882 && typdef == tnone && fvdef == fvnone)
2883 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2884 if (toktype == st_C_template)
2885 break;
2886 /* FALLTHRU */
2887 case st_C_struct:
2888 case st_C_enum:
2889 if (parlev == 0
2890 && fvdef != vignore
2891 && (typdef == tkeyseen
2892 || (typedefs_or_cplusplus && structdef == snone)))
2893 {
2894 structdef = skeyseen;
2895 structtype = toktype;
2896 structbracelev = bracelev;
2897 if (fvdef == fvnameseen)
2898 fvdef = fvnone;
2899 }
2900 return FALSE;
2901 }
2902
2903 if (structdef == skeyseen)
2904 {
2905 structdef = stagseen;
2906 return TRUE;
2907 }
2908
2909 if (typdef != tnone)
2910 definedef = dnone;
2911
2912 /* Detect Objective C constructs. */
2913 switch (objdef)
2914 {
2915 case onone:
2916 switch (toktype)
2917 {
2918 case st_C_objprot:
2919 objdef = oprotocol;
2920 return FALSE;
2921 case st_C_objimpl:
2922 objdef = oimplementation;
2923 return FALSE;
2924 }
2925 break;
2926 case oimplementation:
2927 /* Save the class tag for functions or variables defined inside. */
2928 objtag = savenstr (str, len);
2929 objdef = oinbody;
2930 return FALSE;
2931 case oprotocol:
2932 /* Save the class tag for categories. */
2933 objtag = savenstr (str, len);
2934 objdef = otagseen;
2935 *is_func_or_var = TRUE;
2936 return TRUE;
2937 case oparenseen:
2938 objdef = ocatseen;
2939 *is_func_or_var = TRUE;
2940 return TRUE;
2941 case oinbody:
2942 break;
2943 case omethodsign:
2944 if (parlev == 0)
2945 {
2946 fvdef = fvnone;
2947 objdef = omethodtag;
2948 linebuffer_setlen (&token_name, len);
2949 strncpy (token_name.buffer, str, len);
2950 token_name.buffer[len] = '\0';
2951 return TRUE;
2952 }
2953 return FALSE;
2954 case omethodcolon:
2955 if (parlev == 0)
2956 objdef = omethodparm;
2957 return FALSE;
2958 case omethodparm:
2959 if (parlev == 0)
2960 {
2961 fvdef = fvnone;
2962 objdef = omethodtag;
2963 linebuffer_setlen (&token_name, token_name.len + len);
2964 strncat (token_name.buffer, str, len);
2965 return TRUE;
2966 }
2967 return FALSE;
2968 case oignore:
2969 if (toktype == st_C_objend)
2970 {
2971 /* Memory leakage here: the string pointed by objtag is
2972 never released, because many tests would be needed to
2973 avoid breaking on incorrect input code. The amount of
2974 memory leaked here is the sum of the lengths of the
2975 class tags.
2976 free (objtag); */
2977 objdef = onone;
2978 }
2979 return FALSE;
2980 }
2981
2982 /* A function, variable or enum constant? */
2983 switch (toktype)
2984 {
2985 case st_C_extern:
2986 fvextern = TRUE;
2987 switch (fvdef)
2988 {
2989 case finlist:
2990 case flistseen:
2991 case fignore:
2992 case vignore:
2993 break;
2994 default:
2995 fvdef = fvnone;
2996 }
2997 return FALSE;
2998 case st_C_ignore:
2999 fvextern = FALSE;
3000 fvdef = vignore;
3001 return FALSE;
3002 case st_C_operator:
3003 fvdef = foperator;
3004 *is_func_or_var = TRUE;
3005 return TRUE;
3006 case st_none:
3007 if (constantypedefs
3008 && structdef == snone
3009 && structtype == st_C_enum && bracelev > structbracelev)
3010 return TRUE; /* enum constant */
3011 switch (fvdef)
3012 {
3013 case fdefunkey:
3014 if (bracelev > 0)
3015 break;
3016 fvdef = fdefunname; /* GNU macro */
3017 *is_func_or_var = TRUE;
3018 return TRUE;
3019 case fvnone:
3020 switch (typdef)
3021 {
3022 case ttypeseen:
3023 return FALSE;
3024 case tnone:
3025 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3026 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3027 {
3028 fvdef = vignore;
3029 return FALSE;
3030 }
3031 break;
3032 }
3033 /* FALLTHRU */
3034 case fvnameseen:
3035 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3036 {
3037 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3038 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3039 fvdef = foperator;
3040 *is_func_or_var = TRUE;
3041 return TRUE;
3042 }
3043 if (bracelev > 0 && !instruct)
3044 break;
3045 fvdef = fvnameseen; /* function or variable */
3046 *is_func_or_var = TRUE;
3047 return TRUE;
3048 }
3049 break;
3050 }
3051
3052 return FALSE;
3053 }
3054
3055 \f
3056 /*
3057 * C_entries often keeps pointers to tokens or lines which are older than
3058 * the line currently read. By keeping two line buffers, and switching
3059 * them at end of line, it is possible to use those pointers.
3060 */
3061 static struct
3062 {
3063 long linepos;
3064 linebuffer lb;
3065 } lbs[2];
3066
3067 #define current_lb_is_new (newndx == curndx)
3068 #define switch_line_buffers() (curndx = 1 - curndx)
3069
3070 #define curlb (lbs[curndx].lb)
3071 #define newlb (lbs[newndx].lb)
3072 #define curlinepos (lbs[curndx].linepos)
3073 #define newlinepos (lbs[newndx].linepos)
3074
3075 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3076 #define cplpl (c_ext & C_PLPL)
3077 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3078
3079 #define CNL_SAVE_DEFINEDEF() \
3080 do { \
3081 curlinepos = charno; \
3082 readline (&curlb, inf); \
3083 lp = curlb.buffer; \
3084 quotednl = FALSE; \
3085 newndx = curndx; \
3086 } while (0)
3087
3088 #define CNL() \
3089 do { \
3090 CNL_SAVE_DEFINEDEF(); \
3091 if (savetoken.valid) \
3092 { \
3093 token = savetoken; \
3094 savetoken.valid = FALSE; \
3095 } \
3096 definedef = dnone; \
3097 } while (0)
3098
3099
3100 static void
3101 make_C_tag (isfun)
3102 bool isfun;
3103 {
3104 /* This function is never called when token.valid is FALSE, but
3105 we must protect against invalid input or internal errors. */
3106 if (token.valid)
3107 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3108 token.offset+token.length+1, token.lineno, token.linepos);
3109 else if (DEBUG)
3110 { /* this branch is optimised away if !DEBUG */
3111 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3112 token_name.len + 17, isfun, token.line,
3113 token.offset+token.length+1, token.lineno, token.linepos);
3114 error ("INVALID TOKEN", NULL);
3115 }
3116
3117 token.valid = FALSE;
3118 }
3119
3120
3121 /*
3122 * C_entries ()
3123 * This routine finds functions, variables, typedefs,
3124 * #define's, enum constants and struct/union/enum definitions in
3125 * C syntax and adds them to the list.
3126 */
3127 static void
3128 C_entries (c_ext, inf)
3129 int c_ext; /* extension of C */
3130 FILE *inf; /* input file */
3131 {
3132 register char c; /* latest char read; '\0' for end of line */
3133 register char *lp; /* pointer one beyond the character `c' */
3134 int curndx, newndx; /* indices for current and new lb */
3135 register int tokoff; /* offset in line of start of current token */
3136 register int toklen; /* length of current token */
3137 char *qualifier; /* string used to qualify names */
3138 int qlen; /* length of qualifier */
3139 int bracelev; /* current brace level */
3140 int bracketlev; /* current bracket level */
3141 int parlev; /* current parenthesis level */
3142 int attrparlev; /* __attribute__ parenthesis level */
3143 int templatelev; /* current template level */
3144 int typdefbracelev; /* bracelev where a typedef struct body begun */
3145 bool incomm, inquote, inchar, quotednl, midtoken;
3146 bool yacc_rules; /* in the rules part of a yacc file */
3147 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3148
3149
3150 linebuffer_init (&lbs[0].lb);
3151 linebuffer_init (&lbs[1].lb);
3152 if (cstack.size == 0)
3153 {
3154 cstack.size = (DEBUG) ? 1 : 4;
3155 cstack.nl = 0;
3156 cstack.cname = xnew (cstack.size, char *);
3157 cstack.bracelev = xnew (cstack.size, int);
3158 }
3159
3160 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3161 curndx = newndx = 0;
3162 lp = curlb.buffer;
3163 *lp = 0;
3164
3165 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3166 structdef = snone; definedef = dnone; objdef = onone;
3167 yacc_rules = FALSE;
3168 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3169 token.valid = savetoken.valid = FALSE;
3170 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3171 if (cjava)
3172 { qualifier = "."; qlen = 1; }
3173 else
3174 { qualifier = "::"; qlen = 2; }
3175
3176
3177 while (!feof (inf))
3178 {
3179 c = *lp++;
3180 if (c == '\\')
3181 {
3182 /* If we are at the end of the line, the next character is a
3183 '\0'; do not skip it, because it is what tells us
3184 to read the next line. */
3185 if (*lp == '\0')
3186 {
3187 quotednl = TRUE;
3188 continue;
3189 }
3190 lp++;
3191 c = ' ';
3192 }
3193 else if (incomm)
3194 {
3195 switch (c)
3196 {
3197 case '*':
3198 if (*lp == '/')
3199 {
3200 c = *lp++;
3201 incomm = FALSE;
3202 }
3203 break;
3204 case '\0':
3205 /* Newlines inside comments do not end macro definitions in
3206 traditional cpp. */
3207 CNL_SAVE_DEFINEDEF ();
3208 break;
3209 }
3210 continue;
3211 }
3212 else if (inquote)
3213 {
3214 switch (c)
3215 {
3216 case '"':
3217 inquote = FALSE;
3218 break;
3219 case '\0':
3220 /* Newlines inside strings do not end macro definitions
3221 in traditional cpp, even though compilers don't
3222 usually accept them. */
3223 CNL_SAVE_DEFINEDEF ();
3224 break;
3225 }
3226 continue;
3227 }
3228 else if (inchar)
3229 {
3230 switch (c)
3231 {
3232 case '\0':
3233 /* Hmmm, something went wrong. */
3234 CNL ();
3235 /* FALLTHRU */
3236 case '\'':
3237 inchar = FALSE;
3238 break;
3239 }
3240 continue;
3241 }
3242 else if (bracketlev > 0)
3243 {
3244 switch (c)
3245 {
3246 case ']':
3247 if (--bracketlev > 0)
3248 continue;
3249 break;
3250 case '\0':
3251 CNL_SAVE_DEFINEDEF ();
3252 break;
3253 }
3254 continue;
3255 }
3256 else switch (c)
3257 {
3258 case '"':
3259 inquote = TRUE;
3260 if (inattribute)
3261 break;
3262 switch (fvdef)
3263 {
3264 case fdefunkey:
3265 case fstartlist:
3266 case finlist:
3267 case fignore:
3268 case vignore:
3269 break;
3270 default:
3271 fvextern = FALSE;
3272 fvdef = fvnone;
3273 }
3274 continue;
3275 case '\'':
3276 inchar = TRUE;
3277 if (inattribute)
3278 break;
3279 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3280 {
3281 fvextern = FALSE;
3282 fvdef = fvnone;
3283 }
3284 continue;
3285 case '/':
3286 if (*lp == '*')
3287 {
3288 incomm = TRUE;
3289 lp++;
3290 c = ' ';
3291 }
3292 else if (/* cplpl && */ *lp == '/')
3293 {
3294 c = '\0';
3295 }
3296 break;
3297 case '%':
3298 if ((c_ext & YACC) && *lp == '%')
3299 {
3300 /* Entering or exiting rules section in yacc file. */
3301 lp++;
3302 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3303 typdef = tnone; structdef = snone;
3304 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3305 bracelev = 0;
3306 yacc_rules = !yacc_rules;
3307 continue;
3308 }
3309 else
3310 break;
3311 case '#':
3312 if (definedef == dnone)
3313 {
3314 char *cp;
3315 bool cpptoken = TRUE;
3316
3317 /* Look back on this line. If all blanks, or nonblanks
3318 followed by an end of comment, this is a preprocessor
3319 token. */
3320 for (cp = newlb.buffer; cp < lp-1; cp++)
3321 if (!iswhite (*cp))
3322 {
3323 if (*cp == '*' && *(cp+1) == '/')
3324 {
3325 cp++;
3326 cpptoken = TRUE;
3327 }
3328 else
3329 cpptoken = FALSE;
3330 }
3331 if (cpptoken)
3332 definedef = dsharpseen;
3333 } /* if (definedef == dnone) */
3334 continue;
3335 case '[':
3336 bracketlev++;
3337 continue;
3338 } /* switch (c) */
3339
3340
3341 /* Consider token only if some involved conditions are satisfied. */
3342 if (typdef != tignore
3343 && definedef != dignorerest
3344 && fvdef != finlist
3345 && templatelev == 0
3346 && (definedef != dnone
3347 || structdef != scolonseen)
3348 && !inattribute)
3349 {
3350 if (midtoken)
3351 {
3352 if (endtoken (c))
3353 {
3354 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3355 /* This handles :: in the middle,
3356 but not at the beginning of an identifier.
3357 Also, space-separated :: is not recognised. */
3358 {
3359 if (c_ext & C_AUTO) /* automatic detection of C++ */
3360 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3361 lp += 2;
3362 toklen += 2;
3363 c = lp[-1];
3364 goto still_in_token;
3365 }
3366 else
3367 {
3368 bool funorvar = FALSE;
3369
3370 if (yacc_rules
3371 || consider_token (newlb.buffer + tokoff, toklen, c,
3372 &c_ext, bracelev, parlev,
3373 &funorvar))
3374 {
3375 if (fvdef == foperator)
3376 {
3377 char *oldlp = lp;
3378 lp = skip_spaces (lp-1);
3379 if (*lp != '\0')
3380 lp += 1;
3381 while (*lp != '\0'
3382 && !iswhite (*lp) && *lp != '(')
3383 lp += 1;
3384 c = *lp++;
3385 toklen += lp - oldlp;
3386 }
3387 token.named = FALSE;
3388 if (!plainc
3389 && nestlev > 0 && definedef == dnone)
3390 /* in struct body */
3391 {
3392 write_classname (&token_name, qualifier);
3393 linebuffer_setlen (&token_name,
3394 token_name.len+qlen+toklen);
3395 strcat (token_name.buffer, qualifier);
3396 strncat (token_name.buffer,
3397 newlb.buffer + tokoff, toklen);
3398 token.named = TRUE;
3399 }
3400 else if (objdef == ocatseen)
3401 /* Objective C category */
3402 {
3403 int len = strlen (objtag) + 2 + toklen;
3404 linebuffer_setlen (&token_name, len);
3405 strcpy (token_name.buffer, objtag);
3406 strcat (token_name.buffer, "(");
3407 strncat (token_name.buffer,
3408 newlb.buffer + tokoff, toklen);
3409 strcat (token_name.buffer, ")");
3410 token.named = TRUE;
3411 }
3412 else if (objdef == omethodtag
3413 || objdef == omethodparm)
3414 /* Objective C method */
3415 {
3416 token.named = TRUE;
3417 }
3418 else if (fvdef == fdefunname)
3419 /* GNU DEFUN and similar macros */
3420 {
3421 bool defun = (newlb.buffer[tokoff] == 'F');
3422 int off = tokoff;
3423 int len = toklen;
3424
3425 /* Rewrite the tag so that emacs lisp DEFUNs
3426 can be found by their elisp name */
3427 if (defun)
3428 {
3429 off += 1;
3430 len -= 1;
3431 }
3432 linebuffer_setlen (&token_name, len);
3433 strncpy (token_name.buffer,
3434 newlb.buffer + off, len);
3435 token_name.buffer[len] = '\0';
3436 if (defun)
3437 while (--len >= 0)
3438 if (token_name.buffer[len] == '_')
3439 token_name.buffer[len] = '-';
3440 token.named = defun;
3441 }
3442 else
3443 {
3444 linebuffer_setlen (&token_name, toklen);
3445 strncpy (token_name.buffer,
3446 newlb.buffer + tokoff, toklen);
3447 token_name.buffer[toklen] = '\0';
3448 /* Name macros and members. */
3449 token.named = (structdef == stagseen
3450 || typdef == ttypeseen
3451 || typdef == tend
3452 || (funorvar
3453 && definedef == dignorerest)
3454 || (funorvar
3455 && definedef == dnone
3456 && structdef == snone
3457 && bracelev > 0));
3458 }
3459 token.lineno = lineno;
3460 token.offset = tokoff;
3461 token.length = toklen;
3462 token.line = newlb.buffer;
3463 token.linepos = newlinepos;
3464 token.valid = TRUE;
3465
3466 if (definedef == dnone
3467 && (fvdef == fvnameseen
3468 || fvdef == foperator
3469 || structdef == stagseen
3470 || typdef == tend
3471 || typdef == ttypeseen
3472 || objdef != onone))
3473 {
3474 if (current_lb_is_new)
3475 switch_line_buffers ();
3476 }
3477 else if (definedef != dnone
3478 || fvdef == fdefunname
3479 || instruct)
3480 make_C_tag (funorvar);
3481 }
3482 else /* not yacc and consider_token failed */
3483 {
3484 if (inattribute && fvdef == fignore)
3485 {
3486 /* We have just met __attribute__ after a
3487 function parameter list: do not tag the
3488 function again. */
3489 fvdef = fvnone;
3490 }
3491 }
3492 midtoken = FALSE;
3493 }
3494 } /* if (endtoken (c)) */
3495 else if (intoken (c))
3496 still_in_token:
3497 {
3498 toklen++;
3499 continue;
3500 }
3501 } /* if (midtoken) */
3502 else if (begtoken (c))
3503 {
3504 switch (definedef)
3505 {
3506 case dnone:
3507 switch (fvdef)
3508 {
3509 case fstartlist:
3510 /* This prevents tagging fb in
3511 void (__attribute__((noreturn)) *fb) (void);
3512 Fixing this is not easy and not very important. */
3513 fvdef = finlist;
3514 continue;
3515 case flistseen:
3516 if (plainc || declarations)
3517 {
3518 make_C_tag (TRUE); /* a function */
3519 fvdef = fignore;
3520 }
3521 break;
3522 }
3523 if (structdef == stagseen && !cjava)
3524 {
3525 popclass_above (bracelev);
3526 structdef = snone;
3527 }
3528 break;
3529 case dsharpseen:
3530 savetoken = token;
3531 break;
3532 }
3533 if (!yacc_rules || lp == newlb.buffer + 1)
3534 {
3535 tokoff = lp - 1 - newlb.buffer;
3536 toklen = 1;
3537 midtoken = TRUE;
3538 }
3539 continue;
3540 } /* if (begtoken) */
3541 } /* if must look at token */
3542
3543
3544 /* Detect end of line, colon, comma, semicolon and various braces
3545 after having handled a token.*/
3546 switch (c)
3547 {
3548 case ':':
3549 if (inattribute)
3550 break;
3551 if (yacc_rules && token.offset == 0 && token.valid)
3552 {
3553 make_C_tag (FALSE); /* a yacc function */
3554 break;
3555 }
3556 if (definedef != dnone)
3557 break;
3558 switch (objdef)
3559 {
3560 case otagseen:
3561 objdef = oignore;
3562 make_C_tag (TRUE); /* an Objective C class */
3563 break;
3564 case omethodtag:
3565 case omethodparm:
3566 objdef = omethodcolon;
3567 linebuffer_setlen (&token_name, token_name.len + 1);
3568 strcat (token_name.buffer, ":");
3569 break;
3570 }
3571 if (structdef == stagseen)
3572 {
3573 structdef = scolonseen;
3574 break;
3575 }
3576 /* Should be useless, but may be work as a safety net. */
3577 if (cplpl && fvdef == flistseen)
3578 {
3579 make_C_tag (TRUE); /* a function */
3580 fvdef = fignore;
3581 break;
3582 }
3583 break;
3584 case ';':
3585 if (definedef != dnone || inattribute)
3586 break;
3587 switch (typdef)
3588 {
3589 case tend:
3590 case ttypeseen:
3591 make_C_tag (FALSE); /* a typedef */
3592 typdef = tnone;
3593 fvdef = fvnone;
3594 break;
3595 case tnone:
3596 case tinbody:
3597 case tignore:
3598 switch (fvdef)
3599 {
3600 case fignore:
3601 if (typdef == tignore || cplpl)
3602 fvdef = fvnone;
3603 break;
3604 case fvnameseen:
3605 if ((globals && bracelev == 0 && (!fvextern || declarations))
3606 || (members && instruct))
3607 make_C_tag (FALSE); /* a variable */
3608 fvextern = FALSE;
3609 fvdef = fvnone;
3610 token.valid = FALSE;
3611 break;
3612 case flistseen:
3613 if ((declarations
3614 && (cplpl || !instruct)
3615 && (typdef == tnone || (typdef != tignore && instruct)))
3616 || (members
3617 && plainc && instruct))
3618 make_C_tag (TRUE); /* a function */
3619 /* FALLTHRU */
3620 default:
3621 fvextern = FALSE;
3622 fvdef = fvnone;
3623 if (declarations
3624 && cplpl && structdef == stagseen)
3625 make_C_tag (FALSE); /* forward declaration */
3626 else
3627 token.valid = FALSE;
3628 } /* switch (fvdef) */
3629 /* FALLTHRU */
3630 default:
3631 if (!instruct)
3632 typdef = tnone;
3633 }
3634 if (structdef == stagseen)
3635 structdef = snone;
3636 break;
3637 case ',':
3638 if (definedef != dnone || inattribute)
3639 break;
3640 switch (objdef)
3641 {
3642 case omethodtag:
3643 case omethodparm:
3644 make_C_tag (TRUE); /* an Objective C method */
3645 objdef = oinbody;
3646 break;
3647 }
3648 switch (fvdef)
3649 {
3650 case fdefunkey:
3651 case foperator:
3652 case fstartlist:
3653 case finlist:
3654 case fignore:
3655 case vignore:
3656 break;
3657 case fdefunname:
3658 fvdef = fignore;
3659 break;
3660 case fvnameseen:
3661 if (parlev == 0
3662 && ((globals
3663 && bracelev == 0
3664 && templatelev == 0
3665 && (!fvextern || declarations))
3666 || (members && instruct)))
3667 make_C_tag (FALSE); /* a variable */
3668 break;
3669 case flistseen:
3670 if ((declarations && typdef == tnone && !instruct)
3671 || (members && typdef != tignore && instruct))
3672 {
3673 make_C_tag (TRUE); /* a function */
3674 fvdef = fvnameseen;
3675 }
3676 else if (!declarations)
3677 fvdef = fvnone;
3678 token.valid = FALSE;
3679 break;
3680 default:
3681 fvdef = fvnone;
3682 }
3683 if (structdef == stagseen)
3684 structdef = snone;
3685 break;
3686 case ']':
3687 if (definedef != dnone || inattribute)
3688 break;
3689 if (structdef == stagseen)
3690 structdef = snone;
3691 switch (typdef)
3692 {
3693 case ttypeseen:
3694 case tend:
3695 typdef = tignore;
3696 make_C_tag (FALSE); /* a typedef */
3697 break;
3698 case tnone:
3699 case tinbody:
3700 switch (fvdef)
3701 {
3702 case foperator:
3703 case finlist:
3704 case fignore:
3705 case vignore:
3706 break;
3707 case fvnameseen:
3708 if ((members && bracelev == 1)
3709 || (globals && bracelev == 0
3710 && (!fvextern || declarations)))
3711 make_C_tag (FALSE); /* a variable */
3712 /* FALLTHRU */
3713 default:
3714 fvdef = fvnone;
3715 }
3716 break;
3717 }
3718 break;
3719 case '(':
3720 if (inattribute)
3721 {
3722 attrparlev++;
3723 break;
3724 }
3725 if (definedef != dnone)
3726 break;
3727 if (objdef == otagseen && parlev == 0)
3728 objdef = oparenseen;
3729 switch (fvdef)
3730 {
3731 case fvnameseen:
3732 if (typdef == ttypeseen
3733 && *lp != '*'
3734 && !instruct)
3735 {
3736 /* This handles constructs like:
3737 typedef void OperatorFun (int fun); */
3738 make_C_tag (FALSE);
3739 typdef = tignore;
3740 fvdef = fignore;
3741 break;
3742 }
3743 /* FALLTHRU */
3744 case foperator:
3745 fvdef = fstartlist;
3746 break;
3747 case flistseen:
3748 fvdef = finlist;
3749 break;
3750 }
3751 parlev++;
3752 break;
3753 case ')':
3754 if (inattribute)
3755 {
3756 if (--attrparlev == 0)
3757 inattribute = FALSE;
3758 break;
3759 }
3760 if (definedef != dnone)
3761 break;
3762 if (objdef == ocatseen && parlev == 1)
3763 {
3764 make_C_tag (TRUE); /* an Objective C category */
3765 objdef = oignore;
3766 }
3767 if (--parlev == 0)
3768 {
3769 switch (fvdef)
3770 {
3771 case fstartlist:
3772 case finlist:
3773 fvdef = flistseen;
3774 break;
3775 }
3776 if (!instruct
3777 && (typdef == tend
3778 || typdef == ttypeseen))
3779 {
3780 typdef = tignore;
3781 make_C_tag (FALSE); /* a typedef */
3782 }
3783 }
3784 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3785 parlev = 0;
3786 break;
3787 case '{':
3788 if (definedef != dnone)
3789 break;
3790 if (typdef == ttypeseen)
3791 {
3792 /* Whenever typdef is set to tinbody (currently only
3793 here), typdefbracelev should be set to bracelev. */
3794 typdef = tinbody;
3795 typdefbracelev = bracelev;
3796 }
3797 switch (fvdef)
3798 {
3799 case flistseen:
3800 make_C_tag (TRUE); /* a function */
3801 /* FALLTHRU */
3802 case fignore:
3803 fvdef = fvnone;
3804 break;
3805 case fvnone:
3806 switch (objdef)
3807 {
3808 case otagseen:
3809 make_C_tag (TRUE); /* an Objective C class */
3810 objdef = oignore;
3811 break;
3812 case omethodtag:
3813 case omethodparm:
3814 make_C_tag (TRUE); /* an Objective C method */
3815 objdef = oinbody;
3816 break;
3817 default:
3818 /* Neutralize `extern "C" {' grot. */
3819 if (bracelev == 0 && structdef == snone && nestlev == 0
3820 && typdef == tnone)
3821 bracelev = -1;
3822 }
3823 break;
3824 }
3825 switch (structdef)
3826 {
3827 case skeyseen: /* unnamed struct */
3828 pushclass_above (bracelev, NULL, 0);
3829 structdef = snone;
3830 break;
3831 case stagseen: /* named struct or enum */
3832 case scolonseen: /* a class */
3833 pushclass_above (bracelev,token.line+token.offset, token.length);
3834 structdef = snone;
3835 make_C_tag (FALSE); /* a struct or enum */
3836 break;
3837 }
3838 bracelev += 1;
3839 break;
3840 case '*':
3841 if (definedef != dnone)
3842 break;
3843 if (fvdef == fstartlist)
3844 {
3845 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3846 token.valid = FALSE;
3847 }
3848 break;
3849 case '}':
3850 if (definedef != dnone)
3851 break;
3852 bracelev -= 1;
3853 if (!ignoreindent && lp == newlb.buffer + 1)
3854 {
3855 if (bracelev != 0)
3856 token.valid = FALSE; /* unexpected value, token unreliable */
3857 bracelev = 0; /* reset brace level if first column */
3858 parlev = 0; /* also reset paren level, just in case... */
3859 }
3860 else if (bracelev < 0)
3861 {
3862 token.valid = FALSE; /* something gone amiss, token unreliable */
3863 bracelev = 0;
3864 }
3865 if (bracelev == 0 && fvdef == vignore)
3866 fvdef = fvnone; /* end of function */
3867 popclass_above (bracelev);
3868 structdef = snone;
3869 /* Only if typdef == tinbody is typdefbracelev significant. */
3870 if (typdef == tinbody && bracelev <= typdefbracelev)
3871 {
3872 assert (bracelev == typdefbracelev);
3873 typdef = tend;
3874 }
3875 break;
3876 case '=':
3877 if (definedef != dnone)
3878 break;
3879 switch (fvdef)
3880 {
3881 case foperator:
3882 case finlist:
3883 case fignore:
3884 case vignore:
3885 break;
3886 case fvnameseen:
3887 if ((members && bracelev == 1)
3888 || (globals && bracelev == 0 && (!fvextern || declarations)))
3889 make_C_tag (FALSE); /* a variable */
3890 /* FALLTHRU */
3891 default:
3892 fvdef = vignore;
3893 }
3894 break;
3895 case '<':
3896 if (cplpl
3897 && (structdef == stagseen || fvdef == fvnameseen))
3898 {
3899 templatelev++;
3900 break;
3901 }
3902 goto resetfvdef;
3903 case '>':
3904 if (templatelev > 0)
3905 {
3906 templatelev--;
3907 break;
3908 }
3909 goto resetfvdef;
3910 case '+':
3911 case '-':
3912 if (objdef == oinbody && bracelev == 0)
3913 {
3914 objdef = omethodsign;
3915 break;
3916 }
3917 /* FALLTHRU */
3918 resetfvdef:
3919 case '#': case '~': case '&': case '%': case '/':
3920 case '|': case '^': case '!': case '.': case '?':
3921 if (definedef != dnone)
3922 break;
3923 /* These surely cannot follow a function tag in C. */
3924 switch (fvdef)
3925 {
3926 case foperator:
3927 case finlist:
3928 case fignore:
3929 case vignore:
3930 break;
3931 default:
3932 fvdef = fvnone;
3933 }
3934 break;
3935 case '\0':
3936 if (objdef == otagseen)
3937 {
3938 make_C_tag (TRUE); /* an Objective C class */
3939 objdef = oignore;
3940 }
3941 /* If a macro spans multiple lines don't reset its state. */
3942 if (quotednl)
3943 CNL_SAVE_DEFINEDEF ();
3944 else
3945 CNL ();
3946 break;
3947 } /* switch (c) */
3948
3949 } /* while not eof */
3950
3951 free (lbs[0].lb.buffer);
3952 free (lbs[1].lb.buffer);
3953 }
3954
3955 /*
3956 * Process either a C++ file or a C file depending on the setting
3957 * of a global flag.
3958 */
3959 static void
3960 default_C_entries (inf)
3961 FILE *inf;
3962 {
3963 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3964 }
3965
3966 /* Always do plain C. */
3967 static void
3968 plain_C_entries (inf)
3969 FILE *inf;
3970 {
3971 C_entries (0, inf);
3972 }
3973
3974 /* Always do C++. */
3975 static void
3976 Cplusplus_entries (inf)
3977 FILE *inf;
3978 {
3979 C_entries (C_PLPL, inf);
3980 }
3981
3982 /* Always do Java. */
3983 static void
3984 Cjava_entries (inf)
3985 FILE *inf;
3986 {
3987 C_entries (C_JAVA, inf);
3988 }
3989
3990 /* Always do C*. */
3991 static void
3992 Cstar_entries (inf)
3993 FILE *inf;
3994 {
3995 C_entries (C_STAR, inf);
3996 }
3997
3998 /* Always do Yacc. */
3999 static void
4000 Yacc_entries (inf)
4001 FILE *inf;
4002 {
4003 C_entries (YACC, inf);
4004 }
4005
4006 \f
4007 /* Useful macros. */
4008 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4009 for (; /* loop initialization */ \
4010 !feof (file_pointer) /* loop test */ \
4011 && /* instructions at start of loop */ \
4012 (readline (&line_buffer, file_pointer), \
4013 char_pointer = line_buffer.buffer, \
4014 TRUE); \
4015 )
4016
4017 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4018 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4019 && strneq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4020 && notinname ((cp)[sizeof(kw)-1]) /* end of kw */ \
4021 && ((cp) = skip_spaces((cp)+sizeof(kw)-1))) /* skip spaces */
4022
4023 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4024 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4025 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4026 && strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4027 && ((cp) += sizeof(kw)-1)) /* skip spaces */
4028
4029 /*
4030 * Read a file, but do no processing. This is used to do regexp
4031 * matching on files that have no language defined.
4032 */
4033 static void
4034 just_read_file (inf)
4035 FILE *inf;
4036 {
4037 register char *dummy;
4038
4039 LOOP_ON_INPUT_LINES (inf, lb, dummy)
4040 continue;
4041 }
4042
4043 \f
4044 /* Fortran parsing */
4045
4046 static void F_takeprec __P((void));
4047 static void F_getit __P((FILE *));
4048
4049 static void
4050 F_takeprec ()
4051 {
4052 dbp = skip_spaces (dbp);
4053 if (*dbp != '*')
4054 return;
4055 dbp++;
4056 dbp = skip_spaces (dbp);
4057 if (strneq (dbp, "(*)", 3))
4058 {
4059 dbp += 3;
4060 return;
4061 }
4062 if (!ISDIGIT (*dbp))
4063 {
4064 --dbp; /* force failure */
4065 return;
4066 }
4067 do
4068 dbp++;
4069 while (ISDIGIT (*dbp));
4070 }
4071
4072 static void
4073 F_getit (inf)
4074 FILE *inf;
4075 {
4076 register char *cp;
4077
4078 dbp = skip_spaces (dbp);
4079 if (*dbp == '\0')
4080 {
4081 readline (&lb, inf);
4082 dbp = lb.buffer;
4083 if (dbp[5] != '&')
4084 return;
4085 dbp += 6;
4086 dbp = skip_spaces (dbp);
4087 }
4088 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4089 return;
4090 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4091 continue;
4092 make_tag (dbp, cp-dbp, TRUE,
4093 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4094 }
4095
4096
4097 static void
4098 Fortran_functions (inf)
4099 FILE *inf;
4100 {
4101 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4102 {
4103 if (*dbp == '%')
4104 dbp++; /* Ratfor escape to fortran */
4105 dbp = skip_spaces (dbp);
4106 if (*dbp == '\0')
4107 continue;
4108
4109 if (LOOKING_AT_NOCASE (dbp, "recursive"))
4110 dbp = skip_spaces (dbp);
4111
4112 switch (lowcase (*dbp))
4113 {
4114 case 'i':
4115 if (nocase_tail ("integer"))
4116 F_takeprec ();
4117 break;
4118 case 'r':
4119 if (nocase_tail ("real"))
4120 F_takeprec ();
4121 break;
4122 case 'l':
4123 if (nocase_tail ("logical"))
4124 F_takeprec ();
4125 break;
4126 case 'c':
4127 if (nocase_tail ("complex") || nocase_tail ("character"))
4128 F_takeprec ();
4129 break;
4130 case 'd':
4131 if (nocase_tail ("double"))
4132 {
4133 dbp = skip_spaces (dbp);
4134 if (*dbp == '\0')
4135 continue;
4136 if (nocase_tail ("precision"))
4137 break;
4138 continue;
4139 }
4140 break;
4141 }
4142 dbp = skip_spaces (dbp);
4143 if (*dbp == '\0')
4144 continue;
4145 switch (lowcase (*dbp))
4146 {
4147 case 'f':
4148 if (nocase_tail ("function"))
4149 F_getit (inf);
4150 continue;
4151 case 's':
4152 if (nocase_tail ("subroutine"))
4153 F_getit (inf);
4154 continue;
4155 case 'e':
4156 if (nocase_tail ("entry"))
4157 F_getit (inf);
4158 continue;
4159 case 'b':
4160 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4161 {
4162 dbp = skip_spaces (dbp);
4163 if (*dbp == '\0') /* assume un-named */
4164 make_tag ("blockdata", 9, TRUE,
4165 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4166 else
4167 F_getit (inf); /* look for name */
4168 }
4169 continue;
4170 }
4171 }
4172 }
4173
4174 \f
4175 /*
4176 * Ada parsing
4177 * Original code by
4178 * Philippe Waroquiers (1998)
4179 */
4180
4181 static void Ada_getit __P((FILE *, char *));
4182
4183 /* Once we are positioned after an "interesting" keyword, let's get
4184 the real tag value necessary. */
4185 static void
4186 Ada_getit (inf, name_qualifier)
4187 FILE *inf;
4188 char *name_qualifier;
4189 {
4190 register char *cp;
4191 char *name;
4192 char c;
4193
4194 while (!feof (inf))
4195 {
4196 dbp = skip_spaces (dbp);
4197 if (*dbp == '\0'
4198 || (dbp[0] == '-' && dbp[1] == '-'))
4199 {
4200 readline (&lb, inf);
4201 dbp = lb.buffer;
4202 }
4203 switch (lowcase(*dbp))
4204 {
4205 case 'b':
4206 if (nocase_tail ("body"))
4207 {
4208 /* Skipping body of procedure body or package body or ....
4209 resetting qualifier to body instead of spec. */
4210 name_qualifier = "/b";
4211 continue;
4212 }
4213 break;
4214 case 't':
4215 /* Skipping type of task type or protected type ... */
4216 if (nocase_tail ("type"))
4217 continue;
4218 break;
4219 }
4220 if (*dbp == '"')
4221 {
4222 dbp += 1;
4223 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4224 continue;
4225 }
4226 else
4227 {
4228 dbp = skip_spaces (dbp);
4229 for (cp = dbp;
4230 (*cp != '\0'
4231 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4232 cp++)
4233 continue;
4234 if (cp == dbp)
4235 return;
4236 }
4237 c = *cp;
4238 *cp = '\0';
4239 name = concat (dbp, name_qualifier, "");
4240 *cp = c;
4241 make_tag (name, strlen (name), TRUE,
4242 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4243 free (name);
4244 if (c == '"')
4245 dbp = cp + 1;
4246 return;
4247 }
4248 }
4249
4250 static void
4251 Ada_funcs (inf)
4252 FILE *inf;
4253 {
4254 bool inquote = FALSE;
4255 bool skip_till_semicolumn = FALSE;
4256
4257 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4258 {
4259 while (*dbp != '\0')
4260 {
4261 /* Skip a string i.e. "abcd". */
4262 if (inquote || (*dbp == '"'))
4263 {
4264 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4265 if (dbp != NULL)
4266 {
4267 inquote = FALSE;
4268 dbp += 1;
4269 continue; /* advance char */
4270 }
4271 else
4272 {
4273 inquote = TRUE;
4274 break; /* advance line */
4275 }
4276 }
4277
4278 /* Skip comments. */
4279 if (dbp[0] == '-' && dbp[1] == '-')
4280 break; /* advance line */
4281
4282 /* Skip character enclosed in single quote i.e. 'a'
4283 and skip single quote starting an attribute i.e. 'Image. */
4284 if (*dbp == '\'')
4285 {
4286 dbp++ ;
4287 if (*dbp != '\0')
4288 dbp++;
4289 continue;
4290 }
4291
4292 if (skip_till_semicolumn)
4293 {
4294 if (*dbp == ';')
4295 skip_till_semicolumn = FALSE;
4296 dbp++;
4297 continue; /* advance char */
4298 }
4299
4300 /* Search for beginning of a token. */
4301 if (!begtoken (*dbp))
4302 {
4303 dbp++;
4304 continue; /* advance char */
4305 }
4306
4307 /* We are at the beginning of a token. */
4308 switch (lowcase(*dbp))
4309 {
4310 case 'f':
4311 if (!packages_only && nocase_tail ("function"))
4312 Ada_getit (inf, "/f");
4313 else
4314 break; /* from switch */
4315 continue; /* advance char */
4316 case 'p':
4317 if (!packages_only && nocase_tail ("procedure"))
4318 Ada_getit (inf, "/p");
4319 else if (nocase_tail ("package"))
4320 Ada_getit (inf, "/s");
4321 else if (nocase_tail ("protected")) /* protected type */
4322 Ada_getit (inf, "/t");
4323 else
4324 break; /* from switch */
4325 continue; /* advance char */
4326
4327 case 'u':
4328 if (typedefs && !packages_only && nocase_tail ("use"))
4329 {
4330 /* when tagging types, avoid tagging use type Pack.Typename;
4331 for this, we will skip everything till a ; */
4332 skip_till_semicolumn = TRUE;
4333 continue; /* advance char */
4334 }
4335
4336 case 't':
4337 if (!packages_only && nocase_tail ("task"))
4338 Ada_getit (inf, "/k");
4339 else if (typedefs && !packages_only && nocase_tail ("type"))
4340 {
4341 Ada_getit (inf, "/t");
4342 while (*dbp != '\0')
4343 dbp += 1;
4344 }
4345 else
4346 break; /* from switch */
4347 continue; /* advance char */
4348 }
4349
4350 /* Look for the end of the token. */
4351 while (!endtoken (*dbp))
4352 dbp++;
4353
4354 } /* advance char */
4355 } /* advance line */
4356 }
4357
4358 \f
4359 /*
4360 * Unix and microcontroller assembly tag handling
4361 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4362 * Idea by Bob Weiner, Motorola Inc. (1994)
4363 */
4364 static void
4365 Asm_labels (inf)
4366 FILE *inf;
4367 {
4368 register char *cp;
4369
4370 LOOP_ON_INPUT_LINES (inf, lb, cp)
4371 {
4372 /* If first char is alphabetic or one of [_.$], test for colon
4373 following identifier. */
4374 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4375 {
4376 /* Read past label. */
4377 cp++;
4378 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4379 cp++;
4380 if (*cp == ':' || iswhite (*cp))
4381 /* Found end of label, so copy it and add it to the table. */
4382 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4383 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4384 }
4385 }
4386 }
4387
4388 \f
4389 /*
4390 * Perl support
4391 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4392 * Perl variable names: /^(my|local).../
4393 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4394 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4395 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4396 */
4397 static void
4398 Perl_functions (inf)
4399 FILE *inf;
4400 {
4401 char *package = savestr ("main"); /* current package name */
4402 register char *cp;
4403
4404 LOOP_ON_INPUT_LINES (inf, lb, cp)
4405 {
4406 cp = skip_spaces (cp);
4407
4408 if (LOOKING_AT (cp, "package"))
4409 {
4410 free (package);
4411 get_tag (cp, &package);
4412 }
4413 else if (LOOKING_AT (cp, "sub"))
4414 {
4415 char *pos;
4416 char *sp = cp;
4417
4418 while (!notinname (*cp))
4419 cp++;
4420 if (cp == sp)
4421 continue; /* nothing found */
4422 if ((pos = etags_strchr (sp, ':')) != NULL
4423 && pos < cp && pos[1] == ':')
4424 /* The name is already qualified. */
4425 make_tag (sp, cp - sp, TRUE,
4426 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4427 else
4428 /* Qualify it. */
4429 {
4430 char savechar, *name;
4431
4432 savechar = *cp;
4433 *cp = '\0';
4434 name = concat (package, "::", sp);
4435 *cp = savechar;
4436 make_tag (name, strlen(name), TRUE,
4437 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4438 free (name);
4439 }
4440 }
4441 else if (globals) /* only if we are tagging global vars */
4442 {
4443 /* Skip a qualifier, if any. */
4444 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4445 /* After "my" or "local", but before any following paren or space. */
4446 char *varstart = cp;
4447
4448 if (qual /* should this be removed? If yes, how? */
4449 && (*cp == '$' || *cp == '@' || *cp == '%'))
4450 {
4451 varstart += 1;
4452 do
4453 cp++;
4454 while (ISALNUM (*cp) || *cp == '_');
4455 }
4456 else if (qual)
4457 {
4458 /* Should be examining a variable list at this point;
4459 could insist on seeing an open parenthesis. */
4460 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4461 cp++;
4462 }
4463 else
4464 continue;
4465
4466 make_tag (varstart, cp - varstart, FALSE,
4467 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4468 }
4469 }
4470 free (package);
4471 }
4472
4473
4474 /*
4475 * Python support
4476 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4477 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4478 * More ideas by seb bacon <seb@jamkit.com> (2002)
4479 */
4480 static void
4481 Python_functions (inf)
4482 FILE *inf;
4483 {
4484 register char *cp;
4485
4486 LOOP_ON_INPUT_LINES (inf, lb, cp)
4487 {
4488 cp = skip_spaces (cp);
4489 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4490 {
4491 char *name = cp;
4492 while (!notinname (*cp) && *cp != ':')
4493 cp++;
4494 make_tag (name, cp - name, TRUE,
4495 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4496 }
4497 }
4498 }
4499
4500 \f
4501 /*
4502 * PHP support
4503 * Look for:
4504 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4505 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4506 * - /^[ \t]*define\(\"[^\"]+/
4507 * Only with --members:
4508 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4509 * Idea by Diez B. Roggisch (2001)
4510 */
4511 static void
4512 PHP_functions (inf)
4513 FILE *inf;
4514 {
4515 register char *cp, *name;
4516 bool search_identifier = FALSE;
4517
4518 LOOP_ON_INPUT_LINES (inf, lb, cp)
4519 {
4520 cp = skip_spaces (cp);
4521 name = cp;
4522 if (search_identifier
4523 && *cp != '\0')
4524 {
4525 while (!notinname (*cp))
4526 cp++;
4527 make_tag (name, cp - name, TRUE,
4528 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4529 search_identifier = FALSE;
4530 }
4531 else if (LOOKING_AT (cp, "function"))
4532 {
4533 if(*cp == '&')
4534 cp = skip_spaces (cp+1);
4535 if(*cp != '\0')
4536 {
4537 name = cp;
4538 while (!notinname (*cp))
4539 cp++;
4540 make_tag (name, cp - name, TRUE,
4541 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4542 }
4543 else
4544 search_identifier = TRUE;
4545 }
4546 else if (LOOKING_AT (cp, "class"))
4547 {
4548 if (*cp != '\0')
4549 {
4550 name = cp;
4551 while (*cp != '\0' && !iswhite (*cp))
4552 cp++;
4553 make_tag (name, cp - name, FALSE,
4554 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4555 }
4556 else
4557 search_identifier = TRUE;
4558 }
4559 else if (strneq (cp, "define", 6)
4560 && (cp = skip_spaces (cp+6))
4561 && *cp++ == '('
4562 && (*cp == '"' || *cp == '\''))
4563 {
4564 char quote = *cp++;
4565 name = cp;
4566 while (*cp != quote && *cp != '\0')
4567 cp++;
4568 make_tag (name, cp - name, FALSE,
4569 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4570 }
4571 else if (members
4572 && LOOKING_AT (cp, "var")
4573 && *cp == '$')
4574 {
4575 name = cp;
4576 while (!notinname(*cp))
4577 cp++;
4578 make_tag (name, cp - name, FALSE,
4579 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4580 }
4581 }
4582 }
4583
4584 \f
4585 /*
4586 * Cobol tag functions
4587 * We could look for anything that could be a paragraph name.
4588 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4589 * Idea by Corny de Souza (1993)
4590 */
4591 static void
4592 Cobol_paragraphs (inf)
4593 FILE *inf;
4594 {
4595 register char *bp, *ep;
4596
4597 LOOP_ON_INPUT_LINES (inf, lb, bp)
4598 {
4599 if (lb.len < 9)
4600 continue;
4601 bp += 8;
4602
4603 /* If eoln, compiler option or comment ignore whole line. */
4604 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4605 continue;
4606
4607 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4608 continue;
4609 if (*ep++ == '.')
4610 make_tag (bp, ep - bp, TRUE,
4611 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4612 }
4613 }
4614
4615 \f
4616 /*
4617 * Makefile support
4618 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4619 */
4620 static void
4621 Makefile_targets (inf)
4622 FILE *inf;
4623 {
4624 register char *bp;
4625
4626 LOOP_ON_INPUT_LINES (inf, lb, bp)
4627 {
4628 if (*bp == '\t' || *bp == '#')
4629 continue;
4630 while (*bp != '\0' && *bp != '=' && *bp != ':')
4631 bp++;
4632 if (*bp == ':' || (globals && *bp == '='))
4633 {
4634 /* We should detect if there is more than one tag, but we do not.
4635 We just skip initial and final spaces. */
4636 char * namestart = skip_spaces (lb.buffer);
4637 while (--bp > namestart)
4638 if (!notinname (*bp))
4639 break;
4640 make_tag (namestart, bp - namestart + 1, TRUE,
4641 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4642 }
4643 }
4644 }
4645
4646 \f
4647 /*
4648 * Pascal parsing
4649 * Original code by Mosur K. Mohan (1989)
4650 *
4651 * Locates tags for procedures & functions. Doesn't do any type- or
4652 * var-definitions. It does look for the keyword "extern" or
4653 * "forward" immediately following the procedure statement; if found,
4654 * the tag is skipped.
4655 */
4656 static void
4657 Pascal_functions (inf)
4658 FILE *inf;
4659 {
4660 linebuffer tline; /* mostly copied from C_entries */
4661 long save_lcno;
4662 int save_lineno, namelen, taglen;
4663 char c, *name;
4664
4665 bool /* each of these flags is TRUE if: */
4666 incomment, /* point is inside a comment */
4667 inquote, /* point is inside '..' string */
4668 get_tagname, /* point is after PROCEDURE/FUNCTION
4669 keyword, so next item = potential tag */
4670 found_tag, /* point is after a potential tag */
4671 inparms, /* point is within parameter-list */
4672 verify_tag; /* point has passed the parm-list, so the
4673 next token will determine whether this
4674 is a FORWARD/EXTERN to be ignored, or
4675 whether it is a real tag */
4676
4677 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4678 name = NULL; /* keep compiler quiet */
4679 dbp = lb.buffer;
4680 *dbp = '\0';
4681 linebuffer_init (&tline);
4682
4683 incomment = inquote = FALSE;
4684 found_tag = FALSE; /* have a proc name; check if extern */
4685 get_tagname = FALSE; /* found "procedure" keyword */
4686 inparms = FALSE; /* found '(' after "proc" */
4687 verify_tag = FALSE; /* check if "extern" is ahead */
4688
4689
4690 while (!feof (inf)) /* long main loop to get next char */
4691 {
4692 c = *dbp++;
4693 if (c == '\0') /* if end of line */
4694 {
4695 readline (&lb, inf);
4696 dbp = lb.buffer;
4697 if (*dbp == '\0')
4698 continue;
4699 if (!((found_tag && verify_tag)
4700 || get_tagname))
4701 c = *dbp++; /* only if don't need *dbp pointing
4702 to the beginning of the name of
4703 the procedure or function */
4704 }
4705 if (incomment)
4706 {
4707 if (c == '}') /* within { } comments */
4708 incomment = FALSE;
4709 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4710 {
4711 dbp++;
4712 incomment = FALSE;
4713 }
4714 continue;
4715 }
4716 else if (inquote)
4717 {
4718 if (c == '\'')
4719 inquote = FALSE;
4720 continue;
4721 }
4722 else
4723 switch (c)
4724 {
4725 case '\'':
4726 inquote = TRUE; /* found first quote */
4727 continue;
4728 case '{': /* found open { comment */
4729 incomment = TRUE;
4730 continue;
4731 case '(':
4732 if (*dbp == '*') /* found open (* comment */
4733 {
4734 incomment = TRUE;
4735 dbp++;
4736 }
4737 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4738 inparms = TRUE;
4739 continue;
4740 case ')': /* end of parms list */
4741 if (inparms)
4742 inparms = FALSE;
4743 continue;
4744 case ';':
4745 if (found_tag && !inparms) /* end of proc or fn stmt */
4746 {
4747 verify_tag = TRUE;
4748 break;
4749 }
4750 continue;
4751 }
4752 if (found_tag && verify_tag && (*dbp != ' '))
4753 {
4754 /* Check if this is an "extern" declaration. */
4755 if (*dbp == '\0')
4756 continue;
4757 if (lowcase (*dbp == 'e'))
4758 {
4759 if (nocase_tail ("extern")) /* superfluous, really! */
4760 {
4761 found_tag = FALSE;
4762 verify_tag = FALSE;
4763 }
4764 }
4765 else if (lowcase (*dbp) == 'f')
4766 {
4767 if (nocase_tail ("forward")) /* check for forward reference */
4768 {
4769 found_tag = FALSE;
4770 verify_tag = FALSE;
4771 }
4772 }
4773 if (found_tag && verify_tag) /* not external proc, so make tag */
4774 {
4775 found_tag = FALSE;
4776 verify_tag = FALSE;
4777 make_tag (name, namelen, TRUE,
4778 tline.buffer, taglen, save_lineno, save_lcno);
4779 continue;
4780 }
4781 }
4782 if (get_tagname) /* grab name of proc or fn */
4783 {
4784 char *cp;
4785
4786 if (*dbp == '\0')
4787 continue;
4788
4789 /* Find block name. */
4790 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4791 continue;
4792
4793 /* Save all values for later tagging. */
4794 linebuffer_setlen (&tline, lb.len);
4795 strcpy (tline.buffer, lb.buffer);
4796 save_lineno = lineno;
4797 save_lcno = linecharno;
4798 name = tline.buffer + (dbp - lb.buffer);
4799 namelen = cp - dbp;
4800 taglen = cp - lb.buffer + 1;
4801
4802 dbp = cp; /* set dbp to e-o-token */
4803 get_tagname = FALSE;
4804 found_tag = TRUE;
4805 continue;
4806
4807 /* And proceed to check for "extern". */
4808 }
4809 else if (!incomment && !inquote && !found_tag)
4810 {
4811 /* Check for proc/fn keywords. */
4812 switch (lowcase (c))
4813 {
4814 case 'p':
4815 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4816 get_tagname = TRUE;
4817 continue;
4818 case 'f':
4819 if (nocase_tail ("unction"))
4820 get_tagname = TRUE;
4821 continue;
4822 }
4823 }
4824 } /* while not eof */
4825
4826 free (tline.buffer);
4827 }
4828
4829 \f
4830 /*
4831 * Lisp tag functions
4832 * look for (def or (DEF, quote or QUOTE
4833 */
4834
4835 static void L_getit __P((void));
4836
4837 static void
4838 L_getit ()
4839 {
4840 if (*dbp == '\'') /* Skip prefix quote */
4841 dbp++;
4842 else if (*dbp == '(')
4843 {
4844 dbp++;
4845 /* Try to skip "(quote " */
4846 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4847 /* Ok, then skip "(" before name in (defstruct (foo)) */
4848 dbp = skip_spaces (dbp);
4849 }
4850 get_tag (dbp, NULL);
4851 }
4852
4853 static void
4854 Lisp_functions (inf)
4855 FILE *inf;
4856 {
4857 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4858 {
4859 if (dbp[0] != '(')
4860 continue;
4861
4862 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4863 {
4864 dbp = skip_non_spaces (dbp);
4865 dbp = skip_spaces (dbp);
4866 L_getit ();
4867 }
4868 else
4869 {
4870 /* Check for (foo::defmumble name-defined ... */
4871 do
4872 dbp++;
4873 while (!notinname (*dbp) && *dbp != ':');
4874 if (*dbp == ':')
4875 {
4876 do
4877 dbp++;
4878 while (*dbp == ':');
4879
4880 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4881 {
4882 dbp = skip_non_spaces (dbp);
4883 dbp = skip_spaces (dbp);
4884 L_getit ();
4885 }
4886 }
4887 }
4888 }
4889 }
4890
4891 \f
4892 /*
4893 * Lua script language parsing
4894 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4895 *
4896 * "function" and "local function" are tags if they start at column 1.
4897 */
4898 static void
4899 Lua_functions (inf)
4900 FILE *inf;
4901 {
4902 register char *bp;
4903
4904 LOOP_ON_INPUT_LINES (inf, lb, bp)
4905 {
4906 if (bp[0] != 'f' && bp[0] != 'l')
4907 continue;
4908
4909 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4910
4911 if (LOOKING_AT (bp, "function"))
4912 get_tag (bp, NULL);
4913 }
4914 }
4915
4916 \f
4917 /*
4918 * Postscript tags
4919 * Just look for lines where the first character is '/'
4920 * Also look at "defineps" for PSWrap
4921 * Ideas by:
4922 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4923 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4924 */
4925 static void
4926 PS_functions (inf)
4927 FILE *inf;
4928 {
4929 register char *bp, *ep;
4930
4931 LOOP_ON_INPUT_LINES (inf, lb, bp)
4932 {
4933 if (bp[0] == '/')
4934 {
4935 for (ep = bp+1;
4936 *ep != '\0' && *ep != ' ' && *ep != '{';
4937 ep++)
4938 continue;
4939 make_tag (bp, ep - bp, TRUE,
4940 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4941 }
4942 else if (LOOKING_AT (bp, "defineps"))
4943 get_tag (bp, NULL);
4944 }
4945 }
4946
4947 \f
4948 /*
4949 * Forth tags
4950 * Ignore anything after \ followed by space or in ( )
4951 * Look for words defined by :
4952 * Look for constant, code, create, defer, value, and variable
4953 * OBP extensions: Look for buffer:, field,
4954 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4955 */
4956 static void
4957 Forth_words (inf)
4958 FILE *inf;
4959 {
4960 register char *bp;
4961
4962 LOOP_ON_INPUT_LINES (inf, lb, bp)
4963 while ((bp = skip_spaces (bp))[0] != '\0')
4964 if (bp[0] == '\\' && iswhite(bp[1]))
4965 break; /* read next line */
4966 else if (bp[0] == '(' && iswhite(bp[1]))
4967 do /* skip to ) or eol */
4968 bp++;
4969 while (*bp != ')' && *bp != '\0');
4970 else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
4971 || LOOKING_AT_NOCASE (bp, "constant")
4972 || LOOKING_AT_NOCASE (bp, "code")
4973 || LOOKING_AT_NOCASE (bp, "create")
4974 || LOOKING_AT_NOCASE (bp, "defer")
4975 || LOOKING_AT_NOCASE (bp, "value")
4976 || LOOKING_AT_NOCASE (bp, "variable")
4977 || LOOKING_AT_NOCASE (bp, "buffer:")
4978 || LOOKING_AT_NOCASE (bp, "field"))
4979 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
4980 else
4981 bp = skip_non_spaces (bp);
4982 }
4983
4984 \f
4985 /*
4986 * Scheme tag functions
4987 * look for (def... xyzzy
4988 * (def... (xyzzy
4989 * (def ... ((...(xyzzy ....
4990 * (set! xyzzy
4991 * Original code by Ken Haase (1985?)
4992 */
4993 static void
4994 Scheme_functions (inf)
4995 FILE *inf;
4996 {
4997 register char *bp;
4998
4999 LOOP_ON_INPUT_LINES (inf, lb, bp)
5000 {
5001 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5002 {
5003 bp = skip_non_spaces (bp+4);
5004 /* Skip over open parens and white space. Don't continue past
5005 '\0'. */
5006 while (*bp && notinname (*bp))
5007 bp++;
5008 get_tag (bp, NULL);
5009 }
5010 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5011 get_tag (bp, NULL);
5012 }
5013 }
5014
5015 \f
5016 /* Find tags in TeX and LaTeX input files. */
5017
5018 /* TEX_toktab is a table of TeX control sequences that define tags.
5019 * Each entry records one such control sequence.
5020 *
5021 * Original code from who knows whom.
5022 * Ideas by:
5023 * Stefan Monnier (2002)
5024 */
5025
5026 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5027
5028 /* Default set of control sequences to put into TEX_toktab.
5029 The value of environment var TEXTAGS is prepended to this. */
5030 static char *TEX_defenv = "\
5031 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5032 :part:appendix:entry:index:def\
5033 :newcommand:renewcommand:newenvironment:renewenvironment";
5034
5035 static void TEX_mode __P((FILE *));
5036 static void TEX_decode_env __P((char *, char *));
5037
5038 static char TEX_esc = '\\';
5039 static char TEX_opgrp = '{';
5040 static char TEX_clgrp = '}';
5041
5042 /*
5043 * TeX/LaTeX scanning loop.
5044 */
5045 static void
5046 TeX_commands (inf)
5047 FILE *inf;
5048 {
5049 char *cp;
5050 linebuffer *key;
5051
5052 /* Select either \ or ! as escape character. */
5053 TEX_mode (inf);
5054
5055 /* Initialize token table once from environment. */
5056 if (TEX_toktab == NULL)
5057 TEX_decode_env ("TEXTAGS", TEX_defenv);
5058
5059 LOOP_ON_INPUT_LINES (inf, lb, cp)
5060 {
5061 /* Look at each TEX keyword in line. */
5062 for (;;)
5063 {
5064 /* Look for a TEX escape. */
5065 while (*cp++ != TEX_esc)
5066 if (cp[-1] == '\0' || cp[-1] == '%')
5067 goto tex_next_line;
5068
5069 for (key = TEX_toktab; key->buffer != NULL; key++)
5070 if (strneq (cp, key->buffer, key->len))
5071 {
5072 register char *p;
5073 int namelen, linelen;
5074 bool opgrp = FALSE;
5075
5076 cp = skip_spaces (cp + key->len);
5077 if (*cp == TEX_opgrp)
5078 {
5079 opgrp = TRUE;
5080 cp++;
5081 }
5082 for (p = cp;
5083 (!iswhite (*p) && *p != '#' &&
5084 *p != TEX_opgrp && *p != TEX_clgrp);
5085 p++)
5086 continue;
5087 namelen = p - cp;
5088 linelen = lb.len;
5089 if (!opgrp || *p == TEX_clgrp)
5090 {
5091 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5092 p++;
5093 linelen = p - lb.buffer + 1;
5094 }
5095 make_tag (cp, namelen, TRUE,
5096 lb.buffer, linelen, lineno, linecharno);
5097 goto tex_next_line; /* We only tag a line once */
5098 }
5099 }
5100 tex_next_line:
5101 ;
5102 }
5103 }
5104
5105 #define TEX_LESC '\\'
5106 #define TEX_SESC '!'
5107
5108 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5109 chars accordingly. */
5110 static void
5111 TEX_mode (inf)
5112 FILE *inf;
5113 {
5114 int c;
5115
5116 while ((c = getc (inf)) != EOF)
5117 {
5118 /* Skip to next line if we hit the TeX comment char. */
5119 if (c == '%')
5120 while (c != '\n' && c != EOF)
5121 c = getc (inf);
5122 else if (c == TEX_LESC || c == TEX_SESC )
5123 break;
5124 }
5125
5126 if (c == TEX_LESC)
5127 {
5128 TEX_esc = TEX_LESC;
5129 TEX_opgrp = '{';
5130 TEX_clgrp = '}';
5131 }
5132 else
5133 {
5134 TEX_esc = TEX_SESC;
5135 TEX_opgrp = '<';
5136 TEX_clgrp = '>';
5137 }
5138 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5139 No attempt is made to correct the situation. */
5140 rewind (inf);
5141 }
5142
5143 /* Read environment and prepend it to the default string.
5144 Build token table. */
5145 static void
5146 TEX_decode_env (evarname, defenv)
5147 char *evarname;
5148 char *defenv;
5149 {
5150 register char *env, *p;
5151 int i, len;
5152
5153 /* Append default string to environment. */
5154 env = getenv (evarname);
5155 if (!env)
5156 env = defenv;
5157 else
5158 {
5159 char *oldenv = env;
5160 env = concat (oldenv, defenv, "");
5161 }
5162
5163 /* Allocate a token table */
5164 for (len = 1, p = env; p;)
5165 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5166 len++;
5167 TEX_toktab = xnew (len, linebuffer);
5168
5169 /* Unpack environment string into token table. Be careful about */
5170 /* zero-length strings (leading ':', "::" and trailing ':') */
5171 for (i = 0; *env != '\0';)
5172 {
5173 p = etags_strchr (env, ':');
5174 if (!p) /* End of environment string. */
5175 p = env + strlen (env);
5176 if (p - env > 0)
5177 { /* Only non-zero strings. */
5178 TEX_toktab[i].buffer = savenstr (env, p - env);
5179 TEX_toktab[i].len = p - env;
5180 i++;
5181 }
5182 if (*p)
5183 env = p + 1;
5184 else
5185 {
5186 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5187 TEX_toktab[i].len = 0;
5188 break;
5189 }
5190 }
5191 }
5192
5193 \f
5194 /* Texinfo support. Dave Love, Mar. 2000. */
5195 static void
5196 Texinfo_nodes (inf)
5197 FILE * inf;
5198 {
5199 char *cp, *start;
5200 LOOP_ON_INPUT_LINES (inf, lb, cp)
5201 if (LOOKING_AT (cp, "@node"))
5202 {
5203 start = cp;
5204 while (*cp != '\0' && *cp != ',')
5205 cp++;
5206 make_tag (start, cp - start, TRUE,
5207 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5208 }
5209 }
5210
5211 \f
5212 /*
5213 * HTML support.
5214 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5215 * Contents of <a name=xxx> are tags with name xxx.
5216 *
5217 * Francesco Potortì, 2002.
5218 */
5219 static void
5220 HTML_labels (inf)
5221 FILE * inf;
5222 {
5223 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5224 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5225 bool intag = FALSE; /* inside an html tag, looking for ID= */
5226 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5227 char *end;
5228
5229
5230 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5231
5232 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5233 for (;;) /* loop on the same line */
5234 {
5235 if (skiptag) /* skip HTML tag */
5236 {
5237 while (*dbp != '\0' && *dbp != '>')
5238 dbp++;
5239 if (*dbp == '>')
5240 {
5241 dbp += 1;
5242 skiptag = FALSE;
5243 continue; /* look on the same line */
5244 }
5245 break; /* go to next line */
5246 }
5247
5248 else if (intag) /* look for "name=" or "id=" */
5249 {
5250 while (*dbp != '\0' && *dbp != '>'
5251 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5252 dbp++;
5253 if (*dbp == '\0')
5254 break; /* go to next line */
5255 if (*dbp == '>')
5256 {
5257 dbp += 1;
5258 intag = FALSE;
5259 continue; /* look on the same line */
5260 }
5261 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5262 || LOOKING_AT_NOCASE (dbp, "id="))
5263 {
5264 bool quoted = (dbp[0] == '"');
5265
5266 if (quoted)
5267 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5268 continue;
5269 else
5270 for (end = dbp; *end != '\0' && intoken (*end); end++)
5271 continue;
5272 linebuffer_setlen (&token_name, end - dbp);
5273 strncpy (token_name.buffer, dbp, end - dbp);
5274 token_name.buffer[end - dbp] = '\0';
5275
5276 dbp = end;
5277 intag = FALSE; /* we found what we looked for */
5278 skiptag = TRUE; /* skip to the end of the tag */
5279 getnext = TRUE; /* then grab the text */
5280 continue; /* look on the same line */
5281 }
5282 dbp += 1;
5283 }
5284
5285 else if (getnext) /* grab next tokens and tag them */
5286 {
5287 dbp = skip_spaces (dbp);
5288 if (*dbp == '\0')
5289 break; /* go to next line */
5290 if (*dbp == '<')
5291 {
5292 intag = TRUE;
5293 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5294 continue; /* look on the same line */
5295 }
5296
5297 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5298 continue;
5299 make_tag (token_name.buffer, token_name.len, TRUE,
5300 dbp, end - dbp, lineno, linecharno);
5301 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5302 getnext = FALSE;
5303 break; /* go to next line */
5304 }
5305
5306 else /* look for an interesting HTML tag */
5307 {
5308 while (*dbp != '\0' && *dbp != '<')
5309 dbp++;
5310 if (*dbp == '\0')
5311 break; /* go to next line */
5312 intag = TRUE;
5313 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5314 {
5315 inanchor = TRUE;
5316 continue; /* look on the same line */
5317 }
5318 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5319 || LOOKING_AT_NOCASE (dbp, "<h1>")
5320 || LOOKING_AT_NOCASE (dbp, "<h2>")
5321 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5322 {
5323 intag = FALSE;
5324 getnext = TRUE;
5325 continue; /* look on the same line */
5326 }
5327 dbp += 1;
5328 }
5329 }
5330 }
5331
5332 \f
5333 /*
5334 * Prolog support
5335 *
5336 * Assumes that the predicate or rule starts at column 0.
5337 * Only the first clause of a predicate or rule is added.
5338 * Original code by Sunichirou Sugou (1989)
5339 * Rewritten by Anders Lindgren (1996)
5340 */
5341 static int prolog_pr __P((char *, char *));
5342 static void prolog_skip_comment __P((linebuffer *, FILE *));
5343 static int prolog_atom __P((char *, int));
5344
5345 static void
5346 Prolog_functions (inf)
5347 FILE *inf;
5348 {
5349 char *cp, *last;
5350 int len;
5351 int allocated;
5352
5353 allocated = 0;
5354 len = 0;
5355 last = NULL;
5356
5357 LOOP_ON_INPUT_LINES (inf, lb, cp)
5358 {
5359 if (cp[0] == '\0') /* Empty line */
5360 continue;
5361 else if (iswhite (cp[0])) /* Not a predicate */
5362 continue;
5363 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5364 prolog_skip_comment (&lb, inf);
5365 else if ((len = prolog_pr (cp, last)) > 0)
5366 {
5367 /* Predicate or rule. Store the function name so that we
5368 only generate a tag for the first clause. */
5369 if (last == NULL)
5370 last = xnew(len + 1, char);
5371 else if (len + 1 > allocated)
5372 xrnew (last, len + 1, char);
5373 allocated = len + 1;
5374 strncpy (last, cp, len);
5375 last[len] = '\0';
5376 }
5377 }
5378 free (last);
5379 }
5380
5381
5382 static void
5383 prolog_skip_comment (plb, inf)
5384 linebuffer *plb;
5385 FILE *inf;
5386 {
5387 char *cp;
5388
5389 do
5390 {
5391 for (cp = plb->buffer; *cp != '\0'; cp++)
5392 if (cp[0] == '*' && cp[1] == '/')
5393 return;
5394 readline (plb, inf);
5395 }
5396 while (!feof(inf));
5397 }
5398
5399 /*
5400 * A predicate or rule definition is added if it matches:
5401 * <beginning of line><Prolog Atom><whitespace>(
5402 * or <beginning of line><Prolog Atom><whitespace>:-
5403 *
5404 * It is added to the tags database if it doesn't match the
5405 * name of the previous clause header.
5406 *
5407 * Return the size of the name of the predicate or rule, or 0 if no
5408 * header was found.
5409 */
5410 static int
5411 prolog_pr (s, last)
5412 char *s;
5413 char *last; /* Name of last clause. */
5414 {
5415 int pos;
5416 int len;
5417
5418 pos = prolog_atom (s, 0);
5419 if (pos < 1)
5420 return 0;
5421
5422 len = pos;
5423 pos = skip_spaces (s + pos) - s;
5424
5425 if ((s[pos] == '.'
5426 || (s[pos] == '(' && (pos += 1))
5427 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5428 && (last == NULL /* save only the first clause */
5429 || len != (int)strlen (last)
5430 || !strneq (s, last, len)))
5431 {
5432 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5433 return len;
5434 }
5435 else
5436 return 0;
5437 }
5438
5439 /*
5440 * Consume a Prolog atom.
5441 * Return the number of bytes consumed, or -1 if there was an error.
5442 *
5443 * A prolog atom, in this context, could be one of:
5444 * - An alphanumeric sequence, starting with a lower case letter.
5445 * - A quoted arbitrary string. Single quotes can escape themselves.
5446 * Backslash quotes everything.
5447 */
5448 static int
5449 prolog_atom (s, pos)
5450 char *s;
5451 int pos;
5452 {
5453 int origpos;
5454
5455 origpos = pos;
5456
5457 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5458 {
5459 /* The atom is unquoted. */
5460 pos++;
5461 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5462 {
5463 pos++;
5464 }
5465 return pos - origpos;
5466 }
5467 else if (s[pos] == '\'')
5468 {
5469 pos++;
5470
5471 for (;;)
5472 {
5473 if (s[pos] == '\'')
5474 {
5475 pos++;
5476 if (s[pos] != '\'')
5477 break;
5478 pos++; /* A double quote */
5479 }
5480 else if (s[pos] == '\0')
5481 /* Multiline quoted atoms are ignored. */
5482 return -1;
5483 else if (s[pos] == '\\')
5484 {
5485 if (s[pos+1] == '\0')
5486 return -1;
5487 pos += 2;
5488 }
5489 else
5490 pos++;
5491 }
5492 return pos - origpos;
5493 }
5494 else
5495 return -1;
5496 }
5497
5498 \f
5499 /*
5500 * Support for Erlang
5501 *
5502 * Generates tags for functions, defines, and records.
5503 * Assumes that Erlang functions start at column 0.
5504 * Original code by Anders Lindgren (1996)
5505 */
5506 static int erlang_func __P((char *, char *));
5507 static void erlang_attribute __P((char *));
5508 static int erlang_atom __P((char *));
5509
5510 static void
5511 Erlang_functions (inf)
5512 FILE *inf;
5513 {
5514 char *cp, *last;
5515 int len;
5516 int allocated;
5517
5518 allocated = 0;
5519 len = 0;
5520 last = NULL;
5521
5522 LOOP_ON_INPUT_LINES (inf, lb, cp)
5523 {
5524 if (cp[0] == '\0') /* Empty line */
5525 continue;
5526 else if (iswhite (cp[0])) /* Not function nor attribute */
5527 continue;
5528 else if (cp[0] == '%') /* comment */
5529 continue;
5530 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5531 continue;
5532 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5533 {
5534 erlang_attribute (cp);
5535 if (last != NULL)
5536 {
5537 free (last);
5538 last = NULL;
5539 }
5540 }
5541 else if ((len = erlang_func (cp, last)) > 0)
5542 {
5543 /*
5544 * Function. Store the function name so that we only
5545 * generates a tag for the first clause.
5546 */
5547 if (last == NULL)
5548 last = xnew (len + 1, char);
5549 else if (len + 1 > allocated)
5550 xrnew (last, len + 1, char);
5551 allocated = len + 1;
5552 strncpy (last, cp, len);
5553 last[len] = '\0';
5554 }
5555 }
5556 free (last);
5557 }
5558
5559
5560 /*
5561 * A function definition is added if it matches:
5562 * <beginning of line><Erlang Atom><whitespace>(
5563 *
5564 * It is added to the tags database if it doesn't match the
5565 * name of the previous clause header.
5566 *
5567 * Return the size of the name of the function, or 0 if no function
5568 * was found.
5569 */
5570 static int
5571 erlang_func (s, last)
5572 char *s;
5573 char *last; /* Name of last clause. */
5574 {
5575 int pos;
5576 int len;
5577
5578 pos = erlang_atom (s);
5579 if (pos < 1)
5580 return 0;
5581
5582 len = pos;
5583 pos = skip_spaces (s + pos) - s;
5584
5585 /* Save only the first clause. */
5586 if (s[pos++] == '('
5587 && (last == NULL
5588 || len != (int)strlen (last)
5589 || !strneq (s, last, len)))
5590 {
5591 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5592 return len;
5593 }
5594
5595 return 0;
5596 }
5597
5598
5599 /*
5600 * Handle attributes. Currently, tags are generated for defines
5601 * and records.
5602 *
5603 * They are on the form:
5604 * -define(foo, bar).
5605 * -define(Foo(M, N), M+N).
5606 * -record(graph, {vtab = notable, cyclic = true}).
5607 */
5608 static void
5609 erlang_attribute (s)
5610 char *s;
5611 {
5612 char *cp = s;
5613
5614 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5615 && *cp++ == '(')
5616 {
5617 int len = erlang_atom (skip_spaces (cp));
5618 if (len > 0)
5619 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5620 }
5621 return;
5622 }
5623
5624
5625 /*
5626 * Consume an Erlang atom (or variable).
5627 * Return the number of bytes consumed, or -1 if there was an error.
5628 */
5629 static int
5630 erlang_atom (s)
5631 char *s;
5632 {
5633 int pos = 0;
5634
5635 if (ISALPHA (s[pos]) || s[pos] == '_')
5636 {
5637 /* The atom is unquoted. */
5638 do
5639 pos++;
5640 while (ISALNUM (s[pos]) || s[pos] == '_');
5641 }
5642 else if (s[pos] == '\'')
5643 {
5644 for (pos++; s[pos] != '\''; pos++)
5645 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5646 || (s[pos] == '\\' && s[++pos] == '\0'))
5647 return 0;
5648 pos++;
5649 }
5650
5651 return pos;
5652 }
5653
5654 \f
5655 static char *scan_separators __P((char *));
5656 static void add_regex __P((char *, language *));
5657 static char *substitute __P((char *, char *, struct re_registers *));
5658
5659 /*
5660 * Take a string like "/blah/" and turn it into "blah", verifying
5661 * that the first and last characters are the same, and handling
5662 * quoted separator characters. Actually, stops on the occurrence of
5663 * an unquoted separator. Also process \t, \n, etc. and turn into
5664 * appropriate characters. Works in place. Null terminates name string.
5665 * Returns pointer to terminating separator, or NULL for
5666 * unterminated regexps.
5667 */
5668 static char *
5669 scan_separators (name)
5670 char *name;
5671 {
5672 char sep = name[0];
5673 char *copyto = name;
5674 bool quoted = FALSE;
5675
5676 for (++name; *name != '\0'; ++name)
5677 {
5678 if (quoted)
5679 {
5680 switch (*name)
5681 {
5682 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5683 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5684 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5685 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5686 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5687 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5688 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5689 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5690 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5691 default:
5692 if (*name == sep)
5693 *copyto++ = sep;
5694 else
5695 {
5696 /* Something else is quoted, so preserve the quote. */
5697 *copyto++ = '\\';
5698 *copyto++ = *name;
5699 }
5700 break;
5701 }
5702 quoted = FALSE;
5703 }
5704 else if (*name == '\\')
5705 quoted = TRUE;
5706 else if (*name == sep)
5707 break;
5708 else
5709 *copyto++ = *name;
5710 }
5711 if (*name != sep)
5712 name = NULL; /* signal unterminated regexp */
5713
5714 /* Terminate copied string. */
5715 *copyto = '\0';
5716 return name;
5717 }
5718
5719 /* Look at the argument of --regex or --no-regex and do the right
5720 thing. Same for each line of a regexp file. */
5721 static void
5722 analyse_regex (regex_arg)
5723 char *regex_arg;
5724 {
5725 if (regex_arg == NULL)
5726 {
5727 free_regexps (); /* --no-regex: remove existing regexps */
5728 return;
5729 }
5730
5731 /* A real --regexp option or a line in a regexp file. */
5732 switch (regex_arg[0])
5733 {
5734 /* Comments in regexp file or null arg to --regex. */
5735 case '\0':
5736 case ' ':
5737 case '\t':
5738 break;
5739
5740 /* Read a regex file. This is recursive and may result in a
5741 loop, which will stop when the file descriptors are exhausted. */
5742 case '@':
5743 {
5744 FILE *regexfp;
5745 linebuffer regexbuf;
5746 char *regexfile = regex_arg + 1;
5747
5748 /* regexfile is a file containing regexps, one per line. */
5749 regexfp = fopen (regexfile, "r");
5750 if (regexfp == NULL)
5751 {
5752 pfatal (regexfile);
5753 return;
5754 }
5755 linebuffer_init (&regexbuf);
5756 while (readline_internal (&regexbuf, regexfp) > 0)
5757 analyse_regex (regexbuf.buffer);
5758 free (regexbuf.buffer);
5759 fclose (regexfp);
5760 }
5761 break;
5762
5763 /* Regexp to be used for a specific language only. */
5764 case '{':
5765 {
5766 language *lang;
5767 char *lang_name = regex_arg + 1;
5768 char *cp;
5769
5770 for (cp = lang_name; *cp != '}'; cp++)
5771 if (*cp == '\0')
5772 {
5773 error ("unterminated language name in regex: %s", regex_arg);
5774 return;
5775 }
5776 *cp++ = '\0';
5777 lang = get_language_from_langname (lang_name);
5778 if (lang == NULL)
5779 return;
5780 add_regex (cp, lang);
5781 }
5782 break;
5783
5784 /* Regexp to be used for any language. */
5785 default:
5786 add_regex (regex_arg, NULL);
5787 break;
5788 }
5789 }
5790
5791 /* Separate the regexp pattern, compile it,
5792 and care for optional name and modifiers. */
5793 static void
5794 add_regex (regexp_pattern, lang)
5795 char *regexp_pattern;
5796 language *lang;
5797 {
5798 static struct re_pattern_buffer zeropattern;
5799 char sep, *pat, *name, *modifiers;
5800 const char *err;
5801 struct re_pattern_buffer *patbuf;
5802 regexp *rp;
5803 bool
5804 force_explicit_name = TRUE, /* do not use implicit tag names */
5805 ignore_case = FALSE, /* case is significant */
5806 multi_line = FALSE, /* matches are done one line at a time */
5807 single_line = FALSE; /* dot does not match newline */
5808
5809
5810 if (strlen(regexp_pattern) < 3)
5811 {
5812 error ("null regexp", (char *)NULL);
5813 return;
5814 }
5815 sep = regexp_pattern[0];
5816 name = scan_separators (regexp_pattern);
5817 if (name == NULL)
5818 {
5819 error ("%s: unterminated regexp", regexp_pattern);
5820 return;
5821 }
5822 if (name[1] == sep)
5823 {
5824 error ("null name for regexp \"%s\"", regexp_pattern);
5825 return;
5826 }
5827 modifiers = scan_separators (name);
5828 if (modifiers == NULL) /* no terminating separator --> no name */
5829 {
5830 modifiers = name;
5831 name = "";
5832 }
5833 else
5834 modifiers += 1; /* skip separator */
5835
5836 /* Parse regex modifiers. */
5837 for (; modifiers[0] != '\0'; modifiers++)
5838 switch (modifiers[0])
5839 {
5840 case 'N':
5841 if (modifiers == name)
5842 error ("forcing explicit tag name but no name, ignoring", NULL);
5843 force_explicit_name = TRUE;
5844 break;
5845 case 'i':
5846 ignore_case = TRUE;
5847 break;
5848 case 's':
5849 single_line = TRUE;
5850 /* FALLTHRU */
5851 case 'm':
5852 multi_line = TRUE;
5853 need_filebuf = TRUE;
5854 break;
5855 default:
5856 {
5857 char wrongmod [2];
5858 wrongmod[0] = modifiers[0];
5859 wrongmod[1] = '\0';
5860 error ("invalid regexp modifier `%s', ignoring", wrongmod);
5861 }
5862 break;
5863 }
5864
5865 patbuf = xnew (1, struct re_pattern_buffer);
5866 *patbuf = zeropattern;
5867 if (ignore_case)
5868 {
5869 static char lc_trans[CHARS];
5870 int i;
5871 for (i = 0; i < CHARS; i++)
5872 lc_trans[i] = lowcase (i);
5873 patbuf->translate = lc_trans; /* translation table to fold case */
5874 }
5875
5876 if (multi_line)
5877 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5878 else
5879 pat = regexp_pattern;
5880
5881 if (single_line)
5882 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5883 else
5884 re_set_syntax (RE_SYNTAX_EMACS);
5885
5886 err = re_compile_pattern (pat, strlen (pat), patbuf);
5887 if (multi_line)
5888 free (pat);
5889 if (err != NULL)
5890 {
5891 error ("%s while compiling pattern", err);
5892 return;
5893 }
5894
5895 rp = p_head;
5896 p_head = xnew (1, regexp);
5897 p_head->pattern = savestr (regexp_pattern);
5898 p_head->p_next = rp;
5899 p_head->lang = lang;
5900 p_head->pat = patbuf;
5901 p_head->name = savestr (name);
5902 p_head->error_signaled = FALSE;
5903 p_head->force_explicit_name = force_explicit_name;
5904 p_head->ignore_case = ignore_case;
5905 p_head->multi_line = multi_line;
5906 }
5907
5908 /*
5909 * Do the substitutions indicated by the regular expression and
5910 * arguments.
5911 */
5912 static char *
5913 substitute (in, out, regs)
5914 char *in, *out;
5915 struct re_registers *regs;
5916 {
5917 char *result, *t;
5918 int size, dig, diglen;
5919
5920 result = NULL;
5921 size = strlen (out);
5922
5923 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5924 if (out[size - 1] == '\\')
5925 fatal ("pattern error in \"%s\"", out);
5926 for (t = etags_strchr (out, '\\');
5927 t != NULL;
5928 t = etags_strchr (t + 2, '\\'))
5929 if (ISDIGIT (t[1]))
5930 {
5931 dig = t[1] - '0';
5932 diglen = regs->end[dig] - regs->start[dig];
5933 size += diglen - 2;
5934 }
5935 else
5936 size -= 1;
5937
5938 /* Allocate space and do the substitutions. */
5939 assert (size >= 0);
5940 result = xnew (size + 1, char);
5941
5942 for (t = result; *out != '\0'; out++)
5943 if (*out == '\\' && ISDIGIT (*++out))
5944 {
5945 dig = *out - '0';
5946 diglen = regs->end[dig] - regs->start[dig];
5947 strncpy (t, in + regs->start[dig], diglen);
5948 t += diglen;
5949 }
5950 else
5951 *t++ = *out;
5952 *t = '\0';
5953
5954 assert (t <= result + size);
5955 assert (t - result == (int)strlen (result));
5956
5957 return result;
5958 }
5959
5960 /* Deallocate all regexps. */
5961 static void
5962 free_regexps ()
5963 {
5964 regexp *rp;
5965 while (p_head != NULL)
5966 {
5967 rp = p_head->p_next;
5968 free (p_head->pattern);
5969 free (p_head->name);
5970 free (p_head);
5971 p_head = rp;
5972 }
5973 return;
5974 }
5975
5976 /*
5977 * Reads the whole file as a single string from `filebuf' and looks for
5978 * multi-line regular expressions, creating tags on matches.
5979 * readline already dealt with normal regexps.
5980 *
5981 * Idea by Ben Wing <ben@666.com> (2002).
5982 */
5983 static void
5984 regex_tag_multiline ()
5985 {
5986 char *buffer = filebuf.buffer;
5987 regexp *rp;
5988 char *name;
5989
5990 for (rp = p_head; rp != NULL; rp = rp->p_next)
5991 {
5992 int match = 0;
5993
5994 if (!rp->multi_line)
5995 continue; /* skip normal regexps */
5996
5997 /* Generic initialisations before parsing file from memory. */
5998 lineno = 1; /* reset global line number */
5999 charno = 0; /* reset global char number */
6000 linecharno = 0; /* reset global char number of line start */
6001
6002 /* Only use generic regexps or those for the current language. */
6003 if (rp->lang != NULL && rp->lang != curfdp->lang)
6004 continue;
6005
6006 while (match >= 0 && match < filebuf.len)
6007 {
6008 match = re_search (rp->pat, buffer, filebuf.len, charno,
6009 filebuf.len - match, &rp->regs);
6010 switch (match)
6011 {
6012 case -2:
6013 /* Some error. */
6014 if (!rp->error_signaled)
6015 {
6016 error ("regexp stack overflow while matching \"%s\"",
6017 rp->pattern);
6018 rp->error_signaled = TRUE;
6019 }
6020 break;
6021 case -1:
6022 /* No match. */
6023 break;
6024 default:
6025 if (match == rp->regs.end[0])
6026 {
6027 if (!rp->error_signaled)
6028 {
6029 error ("regexp matches the empty string: \"%s\"",
6030 rp->pattern);
6031 rp->error_signaled = TRUE;
6032 }
6033 match = -3; /* exit from while loop */
6034 break;
6035 }
6036
6037 /* Match occurred. Construct a tag. */
6038 while (charno < rp->regs.end[0])
6039 if (buffer[charno++] == '\n')
6040 lineno++, linecharno = charno;
6041 name = rp->name;
6042 if (name[0] == '\0')
6043 name = NULL;
6044 else /* make a named tag */
6045 name = substitute (buffer, rp->name, &rp->regs);
6046 if (rp->force_explicit_name)
6047 /* Force explicit tag name, if a name is there. */
6048 pfnote (name, TRUE, buffer + linecharno,
6049 charno - linecharno + 1, lineno, linecharno);
6050 else
6051 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6052 charno - linecharno + 1, lineno, linecharno);
6053 break;
6054 }
6055 }
6056 }
6057 }
6058
6059 \f
6060 static bool
6061 nocase_tail (cp)
6062 char *cp;
6063 {
6064 register int len = 0;
6065
6066 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6067 cp++, len++;
6068 if (*cp == '\0' && !intoken (dbp[len]))
6069 {
6070 dbp += len;
6071 return TRUE;
6072 }
6073 return FALSE;
6074 }
6075
6076 static void
6077 get_tag (bp, namepp)
6078 register char *bp;
6079 char **namepp;
6080 {
6081 register char *cp = bp;
6082
6083 if (*bp != '\0')
6084 {
6085 /* Go till you get to white space or a syntactic break */
6086 for (cp = bp + 1; !notinname (*cp); cp++)
6087 continue;
6088 make_tag (bp, cp - bp, TRUE,
6089 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6090 }
6091
6092 if (namepp != NULL)
6093 *namepp = savenstr (bp, cp - bp);
6094 }
6095
6096 /*
6097 * Read a line of text from `stream' into `lbp', excluding the
6098 * newline or CR-NL, if any. Return the number of characters read from
6099 * `stream', which is the length of the line including the newline.
6100 *
6101 * On DOS or Windows we do not count the CR character, if any before the
6102 * NL, in the returned length; this mirrors the behavior of Emacs on those
6103 * platforms (for text files, it translates CR-NL to NL as it reads in the
6104 * file).
6105 *
6106 * If multi-line regular expressions are requested, each line read is
6107 * appended to `filebuf'.
6108 */
6109 static long
6110 readline_internal (lbp, stream)
6111 linebuffer *lbp;
6112 register FILE *stream;
6113 {
6114 char *buffer = lbp->buffer;
6115 register char *p = lbp->buffer;
6116 register char *pend;
6117 int chars_deleted;
6118
6119 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6120
6121 for (;;)
6122 {
6123 register int c = getc (stream);
6124 if (p == pend)
6125 {
6126 /* We're at the end of linebuffer: expand it. */
6127 lbp->size *= 2;
6128 xrnew (buffer, lbp->size, char);
6129 p += buffer - lbp->buffer;
6130 pend = buffer + lbp->size;
6131 lbp->buffer = buffer;
6132 }
6133 if (c == EOF)
6134 {
6135 *p = '\0';
6136 chars_deleted = 0;
6137 break;
6138 }
6139 if (c == '\n')
6140 {
6141 if (p > buffer && p[-1] == '\r')
6142 {
6143 p -= 1;
6144 #ifdef DOS_NT
6145 /* Assume CRLF->LF translation will be performed by Emacs
6146 when loading this file, so CRs won't appear in the buffer.
6147 It would be cleaner to compensate within Emacs;
6148 however, Emacs does not know how many CRs were deleted
6149 before any given point in the file. */
6150 chars_deleted = 1;
6151 #else
6152 chars_deleted = 2;
6153 #endif
6154 }
6155 else
6156 {
6157 chars_deleted = 1;
6158 }
6159 *p = '\0';
6160 break;
6161 }
6162 *p++ = c;
6163 }
6164 lbp->len = p - buffer;
6165
6166 if (need_filebuf /* we need filebuf for multi-line regexps */
6167 && chars_deleted > 0) /* not at EOF */
6168 {
6169 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6170 {
6171 /* Expand filebuf. */
6172 filebuf.size *= 2;
6173 xrnew (filebuf.buffer, filebuf.size, char);
6174 }
6175 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6176 filebuf.len += lbp->len;
6177 filebuf.buffer[filebuf.len++] = '\n';
6178 filebuf.buffer[filebuf.len] = '\0';
6179 }
6180
6181 return lbp->len + chars_deleted;
6182 }
6183
6184 /*
6185 * Like readline_internal, above, but in addition try to match the
6186 * input line against relevant regular expressions and manage #line
6187 * directives.
6188 */
6189 static void
6190 readline (lbp, stream)
6191 linebuffer *lbp;
6192 FILE *stream;
6193 {
6194 long result;
6195
6196 linecharno = charno; /* update global char number of line start */
6197 result = readline_internal (lbp, stream); /* read line */
6198 lineno += 1; /* increment global line number */
6199 charno += result; /* increment global char number */
6200
6201 /* Honour #line directives. */
6202 if (!no_line_directive)
6203 {
6204 static bool discard_until_line_directive;
6205
6206 /* Check whether this is a #line directive. */
6207 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6208 {
6209 unsigned int lno;
6210 int start = 0;
6211
6212 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6213 && start > 0) /* double quote character found */
6214 {
6215 char *endp = lbp->buffer + start;
6216
6217 while ((endp = etags_strchr (endp, '"')) != NULL
6218 && endp[-1] == '\\')
6219 endp++;
6220 if (endp != NULL)
6221 /* Ok, this is a real #line directive. Let's deal with it. */
6222 {
6223 char *taggedabsname; /* absolute name of original file */
6224 char *taggedfname; /* name of original file as given */
6225 char *name; /* temp var */
6226
6227 discard_until_line_directive = FALSE; /* found it */
6228 name = lbp->buffer + start;
6229 *endp = '\0';
6230 canonicalize_filename (name);
6231 taggedabsname = absolute_filename (name, tagfiledir);
6232 if (filename_is_absolute (name)
6233 || filename_is_absolute (curfdp->infname))
6234 taggedfname = savestr (taggedabsname);
6235 else
6236 taggedfname = relative_filename (taggedabsname,tagfiledir);
6237
6238 if (streq (curfdp->taggedfname, taggedfname))
6239 /* The #line directive is only a line number change. We
6240 deal with this afterwards. */
6241 free (taggedfname);
6242 else
6243 /* The tags following this #line directive should be
6244 attributed to taggedfname. In order to do this, set
6245 curfdp accordingly. */
6246 {
6247 fdesc *fdp; /* file description pointer */
6248
6249 /* Go look for a file description already set up for the
6250 file indicated in the #line directive. If there is
6251 one, use it from now until the next #line
6252 directive. */
6253 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6254 if (streq (fdp->infname, curfdp->infname)
6255 && streq (fdp->taggedfname, taggedfname))
6256 /* If we remove the second test above (after the &&)
6257 then all entries pertaining to the same file are
6258 coalesced in the tags file. If we use it, then
6259 entries pertaining to the same file but generated
6260 from different files (via #line directives) will
6261 go into separate sections in the tags file. These
6262 alternatives look equivalent. The first one
6263 destroys some apparently useless information. */
6264 {
6265 curfdp = fdp;
6266 free (taggedfname);
6267 break;
6268 }
6269 /* Else, if we already tagged the real file, skip all
6270 input lines until the next #line directive. */
6271 if (fdp == NULL) /* not found */
6272 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6273 if (streq (fdp->infabsname, taggedabsname))
6274 {
6275 discard_until_line_directive = TRUE;
6276 free (taggedfname);
6277 break;
6278 }
6279 /* Else create a new file description and use that from
6280 now on, until the next #line directive. */
6281 if (fdp == NULL) /* not found */
6282 {
6283 fdp = fdhead;
6284 fdhead = xnew (1, fdesc);
6285 *fdhead = *curfdp; /* copy curr. file description */
6286 fdhead->next = fdp;
6287 fdhead->infname = savestr (curfdp->infname);
6288 fdhead->infabsname = savestr (curfdp->infabsname);
6289 fdhead->infabsdir = savestr (curfdp->infabsdir);
6290 fdhead->taggedfname = taggedfname;
6291 fdhead->usecharno = FALSE;
6292 fdhead->prop = NULL;
6293 fdhead->written = FALSE;
6294 curfdp = fdhead;
6295 }
6296 }
6297 free (taggedabsname);
6298 lineno = lno - 1;
6299 readline (lbp, stream);
6300 return;
6301 } /* if a real #line directive */
6302 } /* if #line is followed by a number */
6303 } /* if line begins with "#line " */
6304
6305 /* If we are here, no #line directive was found. */
6306 if (discard_until_line_directive)
6307 {
6308 if (result > 0)
6309 {
6310 /* Do a tail recursion on ourselves, thus discarding the contents
6311 of the line buffer. */
6312 readline (lbp, stream);
6313 return;
6314 }
6315 /* End of file. */
6316 discard_until_line_directive = FALSE;
6317 return;
6318 }
6319 } /* if #line directives should be considered */
6320
6321 {
6322 int match;
6323 regexp *rp;
6324 char *name;
6325
6326 /* Match against relevant regexps. */
6327 if (lbp->len > 0)
6328 for (rp = p_head; rp != NULL; rp = rp->p_next)
6329 {
6330 /* Only use generic regexps or those for the current language.
6331 Also do not use multiline regexps, which is the job of
6332 regex_tag_multiline. */
6333 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6334 || rp->multi_line)
6335 continue;
6336
6337 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6338 switch (match)
6339 {
6340 case -2:
6341 /* Some error. */
6342 if (!rp->error_signaled)
6343 {
6344 error ("regexp stack overflow while matching \"%s\"",
6345 rp->pattern);
6346 rp->error_signaled = TRUE;
6347 }
6348 break;
6349 case -1:
6350 /* No match. */
6351 break;
6352 case 0:
6353 /* Empty string matched. */
6354 if (!rp->error_signaled)
6355 {
6356 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6357 rp->error_signaled = TRUE;
6358 }
6359 break;
6360 default:
6361 /* Match occurred. Construct a tag. */
6362 name = rp->name;
6363 if (name[0] == '\0')
6364 name = NULL;
6365 else /* make a named tag */
6366 name = substitute (lbp->buffer, rp->name, &rp->regs);
6367 if (rp->force_explicit_name)
6368 /* Force explicit tag name, if a name is there. */
6369 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6370 else
6371 make_tag (name, strlen (name), TRUE,
6372 lbp->buffer, match, lineno, linecharno);
6373 break;
6374 }
6375 }
6376 }
6377 }
6378
6379 \f
6380 /*
6381 * Return a pointer to a space of size strlen(cp)+1 allocated
6382 * with xnew where the string CP has been copied.
6383 */
6384 static char *
6385 savestr (cp)
6386 char *cp;
6387 {
6388 return savenstr (cp, strlen (cp));
6389 }
6390
6391 /*
6392 * Return a pointer to a space of size LEN+1 allocated with xnew where
6393 * the string CP has been copied for at most the first LEN characters.
6394 */
6395 static char *
6396 savenstr (cp, len)
6397 char *cp;
6398 int len;
6399 {
6400 register char *dp;
6401
6402 dp = xnew (len + 1, char);
6403 strncpy (dp, cp, len);
6404 dp[len] = '\0';
6405 return dp;
6406 }
6407
6408 /*
6409 * Return the ptr in sp at which the character c last
6410 * appears; NULL if not found
6411 *
6412 * Identical to POSIX strrchr, included for portability.
6413 */
6414 static char *
6415 etags_strrchr (sp, c)
6416 register const char *sp;
6417 register int c;
6418 {
6419 register const char *r;
6420
6421 r = NULL;
6422 do
6423 {
6424 if (*sp == c)
6425 r = sp;
6426 } while (*sp++);
6427 return (char *)r;
6428 }
6429
6430 /*
6431 * Return the ptr in sp at which the character c first
6432 * appears; NULL if not found
6433 *
6434 * Identical to POSIX strchr, included for portability.
6435 */
6436 static char *
6437 etags_strchr (sp, c)
6438 register const char *sp;
6439 register int c;
6440 {
6441 do
6442 {
6443 if (*sp == c)
6444 return (char *)sp;
6445 } while (*sp++);
6446 return NULL;
6447 }
6448
6449 /*
6450 * Compare two strings, ignoring case for alphabetic characters.
6451 *
6452 * Same as BSD's strcasecmp, included for portability.
6453 */
6454 static int
6455 etags_strcasecmp (s1, s2)
6456 register const char *s1;
6457 register const char *s2;
6458 {
6459 while (*s1 != '\0'
6460 && (ISALPHA (*s1) && ISALPHA (*s2)
6461 ? lowcase (*s1) == lowcase (*s2)
6462 : *s1 == *s2))
6463 s1++, s2++;
6464
6465 return (ISALPHA (*s1) && ISALPHA (*s2)
6466 ? lowcase (*s1) - lowcase (*s2)
6467 : *s1 - *s2);
6468 }
6469
6470 /*
6471 * Compare two strings, ignoring case for alphabetic characters.
6472 * Stop after a given number of characters
6473 *
6474 * Same as BSD's strncasecmp, included for portability.
6475 */
6476 static int
6477 etags_strncasecmp (s1, s2, n)
6478 register const char *s1;
6479 register const char *s2;
6480 register int n;
6481 {
6482 while (*s1 != '\0' && n-- > 0
6483 && (ISALPHA (*s1) && ISALPHA (*s2)
6484 ? lowcase (*s1) == lowcase (*s2)
6485 : *s1 == *s2))
6486 s1++, s2++;
6487
6488 if (n < 0)
6489 return 0;
6490 else
6491 return (ISALPHA (*s1) && ISALPHA (*s2)
6492 ? lowcase (*s1) - lowcase (*s2)
6493 : *s1 - *s2);
6494 }
6495
6496 /* Skip spaces (end of string is not space), return new pointer. */
6497 static char *
6498 skip_spaces (cp)
6499 char *cp;
6500 {
6501 while (iswhite (*cp))
6502 cp++;
6503 return cp;
6504 }
6505
6506 /* Skip non spaces, except end of string, return new pointer. */
6507 static char *
6508 skip_non_spaces (cp)
6509 char *cp;
6510 {
6511 while (*cp != '\0' && !iswhite (*cp))
6512 cp++;
6513 return cp;
6514 }
6515
6516 /* Print error message and exit. */
6517 void
6518 fatal (s1, s2)
6519 char *s1, *s2;
6520 {
6521 error (s1, s2);
6522 exit (EXIT_FAILURE);
6523 }
6524
6525 static void
6526 pfatal (s1)
6527 char *s1;
6528 {
6529 perror (s1);
6530 exit (EXIT_FAILURE);
6531 }
6532
6533 static void
6534 suggest_asking_for_help ()
6535 {
6536 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6537 progname, NO_LONG_OPTIONS ? "-h" : "--help");
6538 exit (EXIT_FAILURE);
6539 }
6540
6541 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6542 static void
6543 error (s1, s2)
6544 const char *s1, *s2;
6545 {
6546 fprintf (stderr, "%s: ", progname);
6547 fprintf (stderr, s1, s2);
6548 fprintf (stderr, "\n");
6549 }
6550
6551 /* Return a newly-allocated string whose contents
6552 concatenate those of s1, s2, s3. */
6553 static char *
6554 concat (s1, s2, s3)
6555 char *s1, *s2, *s3;
6556 {
6557 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6558 char *result = xnew (len1 + len2 + len3 + 1, char);
6559
6560 strcpy (result, s1);
6561 strcpy (result + len1, s2);
6562 strcpy (result + len1 + len2, s3);
6563 result[len1 + len2 + len3] = '\0';
6564
6565 return result;
6566 }
6567
6568 \f
6569 /* Does the same work as the system V getcwd, but does not need to
6570 guess the buffer size in advance. */
6571 static char *
6572 etags_getcwd ()
6573 {
6574 #ifdef HAVE_GETCWD
6575 int bufsize = 200;
6576 char *path = xnew (bufsize, char);
6577
6578 while (getcwd (path, bufsize) == NULL)
6579 {
6580 if (errno != ERANGE)
6581 pfatal ("getcwd");
6582 bufsize *= 2;
6583 free (path);
6584 path = xnew (bufsize, char);
6585 }
6586
6587 canonicalize_filename (path);
6588 return path;
6589
6590 #else /* not HAVE_GETCWD */
6591 #if MSDOS
6592
6593 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6594
6595 getwd (path);
6596
6597 for (p = path; *p != '\0'; p++)
6598 if (*p == '\\')
6599 *p = '/';
6600 else
6601 *p = lowcase (*p);
6602
6603 return strdup (path);
6604 #else /* not MSDOS */
6605 linebuffer path;
6606 FILE *pipe;
6607
6608 linebuffer_init (&path);
6609 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6610 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6611 pfatal ("pwd");
6612 pclose (pipe);
6613
6614 return path.buffer;
6615 #endif /* not MSDOS */
6616 #endif /* not HAVE_GETCWD */
6617 }
6618
6619 /* Return a newly allocated string containing the file name of FILE
6620 relative to the absolute directory DIR (which should end with a slash). */
6621 static char *
6622 relative_filename (file, dir)
6623 char *file, *dir;
6624 {
6625 char *fp, *dp, *afn, *res;
6626 int i;
6627
6628 /* Find the common root of file and dir (with a trailing slash). */
6629 afn = absolute_filename (file, cwd);
6630 fp = afn;
6631 dp = dir;
6632 while (*fp++ == *dp++)
6633 continue;
6634 fp--, dp--; /* back to the first differing char */
6635 #ifdef DOS_NT
6636 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6637 return afn;
6638 #endif
6639 do /* look at the equal chars until '/' */
6640 fp--, dp--;
6641 while (*fp != '/');
6642
6643 /* Build a sequence of "../" strings for the resulting relative file name. */
6644 i = 0;
6645 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6646 i += 1;
6647 res = xnew (3*i + strlen (fp + 1) + 1, char);
6648 res[0] = '\0';
6649 while (i-- > 0)
6650 strcat (res, "../");
6651
6652 /* Add the file name relative to the common root of file and dir. */
6653 strcat (res, fp + 1);
6654 free (afn);
6655
6656 return res;
6657 }
6658
6659 /* Return a newly allocated string containing the absolute file name
6660 of FILE given DIR (which should end with a slash). */
6661 static char *
6662 absolute_filename (file, dir)
6663 char *file, *dir;
6664 {
6665 char *slashp, *cp, *res;
6666
6667 if (filename_is_absolute (file))
6668 res = savestr (file);
6669 #ifdef DOS_NT
6670 /* We don't support non-absolute file names with a drive
6671 letter, like `d:NAME' (it's too much hassle). */
6672 else if (file[1] == ':')
6673 fatal ("%s: relative file names with drive letters not supported", file);
6674 #endif
6675 else
6676 res = concat (dir, file, "");
6677
6678 /* Delete the "/dirname/.." and "/." substrings. */
6679 slashp = etags_strchr (res, '/');
6680 while (slashp != NULL && slashp[0] != '\0')
6681 {
6682 if (slashp[1] == '.')
6683 {
6684 if (slashp[2] == '.'
6685 && (slashp[3] == '/' || slashp[3] == '\0'))
6686 {
6687 cp = slashp;
6688 do
6689 cp--;
6690 while (cp >= res && !filename_is_absolute (cp));
6691 if (cp < res)
6692 cp = slashp; /* the absolute name begins with "/.." */
6693 #ifdef DOS_NT
6694 /* Under MSDOS and NT we get `d:/NAME' as absolute
6695 file name, so the luser could say `d:/../NAME'.
6696 We silently treat this as `d:/NAME'. */
6697 else if (cp[0] != '/')
6698 cp = slashp;
6699 #endif
6700 #ifdef HAVE_MEMMOVE
6701 memmove (cp, slashp + 3, strlen (slashp + 2));
6702 #else
6703 /* Overlapping copy isn't really okay */
6704 strcpy (cp, slashp + 3);
6705 #endif
6706 slashp = cp;
6707 continue;
6708 }
6709 else if (slashp[2] == '/' || slashp[2] == '\0')
6710 {
6711 #ifdef HAVE_MEMMOVE
6712 memmove (slashp, slashp + 2, strlen (slashp + 1));
6713 #else
6714 strcpy (slashp, slashp + 2);
6715 #endif
6716 continue;
6717 }
6718 }
6719
6720 slashp = etags_strchr (slashp + 1, '/');
6721 }
6722
6723 if (res[0] == '\0') /* just a safety net: should never happen */
6724 {
6725 free (res);
6726 return savestr ("/");
6727 }
6728 else
6729 return res;
6730 }
6731
6732 /* Return a newly allocated string containing the absolute
6733 file name of dir where FILE resides given DIR (which should
6734 end with a slash). */
6735 static char *
6736 absolute_dirname (file, dir)
6737 char *file, *dir;
6738 {
6739 char *slashp, *res;
6740 char save;
6741
6742 slashp = etags_strrchr (file, '/');
6743 if (slashp == NULL)
6744 return savestr (dir);
6745 save = slashp[1];
6746 slashp[1] = '\0';
6747 res = absolute_filename (file, dir);
6748 slashp[1] = save;
6749
6750 return res;
6751 }
6752
6753 /* Whether the argument string is an absolute file name. The argument
6754 string must have been canonicalized with canonicalize_filename. */
6755 static bool
6756 filename_is_absolute (fn)
6757 char *fn;
6758 {
6759 return (fn[0] == '/'
6760 #ifdef DOS_NT
6761 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6762 #endif
6763 );
6764 }
6765
6766 /* Upcase DOS drive letter and collapse separators into single slashes.
6767 Works in place. */
6768 static void
6769 canonicalize_filename (fn)
6770 register char *fn;
6771 {
6772 register char* cp;
6773 char sep = '/';
6774
6775 #ifdef DOS_NT
6776 /* Canonicalize drive letter case. */
6777 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6778 fn[0] = upcase (fn[0]);
6779
6780 sep = '\\';
6781 #endif
6782
6783 /* Collapse multiple separators into a single slash. */
6784 for (cp = fn; *cp != '\0'; cp++, fn++)
6785 if (*cp == sep)
6786 {
6787 *fn = '/';
6788 while (cp[1] == sep)
6789 cp++;
6790 }
6791 else
6792 *fn = *cp;
6793 *fn = '\0';
6794 }
6795
6796 \f
6797 /* Initialize a linebuffer for use. */
6798 static void
6799 linebuffer_init (lbp)
6800 linebuffer *lbp;
6801 {
6802 lbp->size = (DEBUG) ? 3 : 200;
6803 lbp->buffer = xnew (lbp->size, char);
6804 lbp->buffer[0] = '\0';
6805 lbp->len = 0;
6806 }
6807
6808 /* Set the minimum size of a string contained in a linebuffer. */
6809 static void
6810 linebuffer_setlen (lbp, toksize)
6811 linebuffer *lbp;
6812 int toksize;
6813 {
6814 while (lbp->size <= toksize)
6815 {
6816 lbp->size *= 2;
6817 xrnew (lbp->buffer, lbp->size, char);
6818 }
6819 lbp->len = toksize;
6820 }
6821
6822 /* Like malloc but get fatal error if memory is exhausted. */
6823 static PTR
6824 xmalloc (size)
6825 unsigned int size;
6826 {
6827 PTR result = (PTR) malloc (size);
6828 if (result == NULL)
6829 fatal ("virtual memory exhausted", (char *)NULL);
6830 return result;
6831 }
6832
6833 static PTR
6834 xrealloc (ptr, size)
6835 char *ptr;
6836 unsigned int size;
6837 {
6838 PTR result = (PTR) realloc (ptr, size);
6839 if (result == NULL)
6840 fatal ("virtual memory exhausted", (char *)NULL);
6841 return result;
6842 }
6843
6844 /*
6845 * Local Variables:
6846 * indent-tabs-mode: t
6847 * tab-width: 8
6848 * fill-column: 79
6849 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6850 * c-file-style: "gnu"
6851 * End:
6852 */
6853
6854 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6855 (do not change this comment) */
6856
6857 /* etags.c ends here */