code.delx.au - gnu-emacs/blob - lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: utf-8 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2016 Free Software
  32 Foundation, Inc.
  33
  34 This file is not considered part of GNU Emacs.
  35
  36 This program is free software: you can redistribute it and/or modify
  37 it under the terms of the GNU General Public License as published by
  38 the Free Software Foundation, either version 3 of the License, or (at
  39 your option) any later version.
  40
  41 This program is distributed in the hope that it will be useful,
  42 but WITHOUT ANY WARRANTY; without even the implied warranty of
  43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  44 GNU General Public License for more details.
  45
  46 You should have received a copy of the GNU General Public License
  47 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  48
  49
  50 /* NB To comply with the above BSD license, copyright information is
  51 reproduced in etc/ETAGS.README.  That file should be updated when the
  52 above notices are.
  53
  54 To the best of our knowledge, this code was originally based on the
  55 ctags.c distributed with BSD4.2, which was copyrighted by the
  56 University of California, as described above. */
  57
  58
  59 /*
  60  * Authors:
  61  * 1983 Ctags originally by Ken Arnold.
  62  * 1984 Fortran added by Jim Kleckner.
  63  * 1984 Ed Pelegri-Llopart added C typedefs.
  64  * 1985 Emacs TAGS format by Richard Stallman.
  65  * 1989 Sam Kendall added C++.
  66  * 1992 Joseph B. Wells improved C and C++ parsing.
  67  * 1993 Francesco Potortì reorganized C and C++.
  68  * 1994 Line-by-line regexp tags by Tom Tromey.
  69  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  70  * 2002 #line directives by Francesco Potortì.
  71  * Francesco Potortì maintained and improved it for many years
  72    starting in 1993.
  73  */
  74
  75 /*
  76  * If you want to add support for a new language, start by looking at the LUA
  77  * language, which is the simplest.  Alternatively, consider distributing etags
  78  * together with a configuration file containing regexp definitions for etags.
  79  */
  80
  81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
  82
  83 #ifdef DEBUG
  84 #  undef DEBUG
  85 #  define DEBUG true
  86 #else
  87 #  define DEBUG  false
  88 #  define NDEBUG                /* disable assert */
  89 #endif
  90
  91 #include <config.h>
  92
  93 #ifndef _GNU_SOURCE
  94 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  95 #endif
  96
  97 /* WIN32_NATIVE is for XEmacs.
  98    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
  99 #ifdef WIN32_NATIVE
 100 # undef MSDOS
 101 # undef  WINDOWSNT
 102 # define WINDOWSNT
 103 #endif /* WIN32_NATIVE */
 104
 105 #ifdef MSDOS
 106 # undef MSDOS
 107 # define MSDOS true
 108 # include <sys/param.h>
 109 #else
 110 # define MSDOS false
 111 #endif /* MSDOS */
 112
 113 #ifdef WINDOWSNT
 114 # include <direct.h>
 115 # define MAXPATHLEN _MAX_PATH
 116 # undef HAVE_NTGUI
 117 # undef  DOS_NT
 118 # define DOS_NT
 119 # define O_CLOEXEC O_NOINHERIT
 120 #endif /* WINDOWSNT */
 121
 122 #include <limits.h>
 123 #include <unistd.h>
 124 #include <stdarg.h>
 125 #include <stdlib.h>
 126 #include <string.h>
 127 #include <sysstdio.h>
 128 #include <errno.h>
 129 #include <fcntl.h>
 130 #include <binary-io.h>
 131 #include <c-ctype.h>
 132 #include <c-strcase.h>
 133
 134 #include <assert.h>
 135 #ifdef NDEBUG
 136 # undef  assert                 /* some systems have a buggy assert.h */
 137 # define assert(x) ((void) 0)
 138 #endif
 139
 140 #include <getopt.h>
 141 #include <regex.h>
 142
 143 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 144  Leave it undefined to make the program "etags", which makes emacs-style
 145  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 146 #ifdef CTAGS
 147 # undef  CTAGS
 148 # define CTAGS true
 149 #else
 150 # define CTAGS false
 151 #endif
 152
 153 static bool
 154 streq (char const *s, char const *t)
 155 {
 156   return strcmp (s, t) == 0;
 157 }
 158
 159 static bool
 160 strcaseeq (char const *s, char const *t)
 161 {
 162   return c_strcasecmp (s, t) == 0;
 163 }
 164
 165 static bool
 166 strneq (char const *s, char const *t, size_t n)
 167 {
 168   return strncmp (s, t, n) == 0;
 169 }
 170
 171 static bool
 172 strncaseeq (char const *s, char const *t, size_t n)
 173 {
 174   return c_strncasecmp (s, t, n) == 0;
 175 }
 176
 177 /* C is not in a name.  */
 178 static bool
 179 notinname (unsigned char c)
 180 {
 181   /* Look at make_tag before modifying!  */
 182   static bool const table[UCHAR_MAX + 1] = {
 183     ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
 184     ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
 185   };
 186   return table[c];
 187 }
 188
 189 /* C can start a token.  */
 190 static bool
 191 begtoken (unsigned char c)
 192 {
 193   static bool const table[UCHAR_MAX + 1] = {
 194     ['$']=1, ['@']=1,
 195     ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
 196     ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
 197     ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
 198     ['Y']=1, ['Z']=1,
 199     ['_']=1,
 200     ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
 201     ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
 202     ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
 203     ['y']=1, ['z']=1,
 204     ['~']=1
 205   };
 206   return table[c];
 207 }
 208
 209 /* C can be in the middle of a token.  */
 210 static bool
 211 intoken (unsigned char c)
 212 {
 213   static bool const table[UCHAR_MAX + 1] = {
 214     ['$']=1,
 215     ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
 216     ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
 217     ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
 218     ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
 219     ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
 220     ['Y']=1, ['Z']=1,
 221     ['_']=1,
 222     ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
 223     ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
 224     ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
 225     ['y']=1, ['z']=1
 226   };
 227   return table[c];
 228 }
 229
 230 /* C can end a token.  */
 231 static bool
 232 endtoken (unsigned char c)
 233 {
 234   static bool const table[UCHAR_MAX + 1] = {
 235     ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
 236     ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
 237     ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
 238     ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
 239     ['{']=1, ['|']=1, ['}']=1, ['~']=1
 240   };
 241   return table[c];
 242 }
 243
 244 /*
 245  *      xnew, xrnew -- allocate, reallocate storage
 246  *
 247  * SYNOPSIS:    Type *xnew (int n, Type);
 248  *              void xrnew (OldPointer, int n, Type);
 249  */
 250 #define xnew(n, Type)      ((Type *) xmalloc ((n) * sizeof (Type)))
 251 #define xrnew(op, n, Type) ((op) = (Type *) xrealloc (op, (n) * sizeof (Type)))
 252
 253 typedef void Lang_function (FILE *);
 254
 255 typedef struct
 256 {
 257   const char *suffix;           /* file name suffix for this compressor */
 258   const char *command;          /* takes one arg and decompresses to stdout */
 259 } compressor;
 260
 261 typedef struct
 262 {
 263   const char *name;             /* language name */
 264   const char *help;             /* detailed help for the language */
 265   Lang_function *function;      /* parse function */
 266   const char **suffixes;        /* name suffixes of this language's files */
 267   const char **filenames;       /* names of this language's files */
 268   const char **interpreters;    /* interpreters for this language */
 269   bool metasource;              /* source used to generate other sources */
 270 } language;
 271
 272 typedef struct fdesc
 273 {
 274   struct fdesc *next;           /* for the linked list */
 275   char *infname;                /* uncompressed input file name */
 276   char *infabsname;             /* absolute uncompressed input file name */
 277   char *infabsdir;              /* absolute dir of input file */
 278   char *taggedfname;            /* file name to write in tagfile */
 279   language *lang;               /* language of file */
 280   char *prop;                   /* file properties to write in tagfile */
 281   bool usecharno;               /* etags tags shall contain char number */
 282   bool written;                 /* entry written in the tags file */
 283 } fdesc;
 284
 285 typedef struct node_st
 286 {                               /* sorting structure */
 287   struct node_st *left, *right; /* left and right sons */
 288   fdesc *fdp;                   /* description of file to whom tag belongs */
 289   char *name;                   /* tag name */
 290   char *regex;                  /* search regexp */
 291   bool valid;                   /* write this tag on the tag file */
 292   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 293   bool been_warned;             /* warning already given for duplicated tag */
 294   int lno;                      /* line number tag is on */
 295   long cno;                     /* character number line starts on */
 296 } node;
 297
 298 /*
 299  * A `linebuffer' is a structure which holds a line of text.
 300  * `readline_internal' reads a line from a stream into a linebuffer
 301  * and works regardless of the length of the line.
 302  * SIZE is the size of BUFFER, LEN is the length of the string in
 303  * BUFFER after readline reads it.
 304  */
 305 typedef struct
 306 {
 307   long size;
 308   int len;
 309   char *buffer;
 310 } linebuffer;
 311
 312 /* Used to support mixing of --lang and file names. */
 313 typedef struct
 314 {
 315   enum {
 316     at_language,                /* a language specification */
 317     at_regexp,                  /* a regular expression */
 318     at_filename,                /* a file name */
 319     at_stdin,                   /* read from stdin here */
 320     at_end                      /* stop parsing the list */
 321   } arg_type;                   /* argument type */
 322   language *lang;               /* language associated with the argument */
 323   char *what;                   /* the argument itself */
 324 } argument;
 325
 326 /* Structure defining a regular expression. */
 327 typedef struct regexp
 328 {
 329   struct regexp *p_next;        /* pointer to next in list */
 330   language *lang;               /* if set, use only for this language */
 331   char *pattern;                /* the regexp pattern */
 332   char *name;                   /* tag name */
 333   struct re_pattern_buffer *pat; /* the compiled pattern */
 334   struct re_registers regs;     /* re registers */
 335   bool error_signaled;          /* already signaled for this regexp */
 336   bool force_explicit_name;     /* do not allow implicit tag name */
 337   bool ignore_case;             /* ignore case when matching */
 338   bool multi_line;              /* do a multi-line match on the whole file */
 339 } regexp;
 340
 341
 342 /* Many compilers barf on this:
 343         Lang_function Ada_funcs;
 344    so let's write it this way */
 345 static void Ada_funcs (FILE *);
 346 static void Asm_labels (FILE *);
 347 static void C_entries (int c_ext, FILE *);
 348 static void default_C_entries (FILE *);
 349 static void plain_C_entries (FILE *);
 350 static void Cjava_entries (FILE *);
 351 static void Cobol_paragraphs (FILE *);
 352 static void Cplusplus_entries (FILE *);
 353 static void Cstar_entries (FILE *);
 354 static void Erlang_functions (FILE *);
 355 static void Forth_words (FILE *);
 356 static void Fortran_functions (FILE *);
 357 static void Go_functions (FILE *);
 358 static void HTML_labels (FILE *);
 359 static void Lisp_functions (FILE *);
 360 static void Lua_functions (FILE *);
 361 static void Makefile_targets (FILE *);
 362 static void Pascal_functions (FILE *);
 363 static void Perl_functions (FILE *);
 364 static void PHP_functions (FILE *);
 365 static void PS_functions (FILE *);
 366 static void Prolog_functions (FILE *);
 367 static void Python_functions (FILE *);
 368 static void Ruby_functions (FILE *);
 369 static void Scheme_functions (FILE *);
 370 static void TeX_commands (FILE *);
 371 static void Texinfo_nodes (FILE *);
 372 static void Yacc_entries (FILE *);
 373 static void just_read_file (FILE *);
 374
 375 static language *get_language_from_langname (const char *);
 376 static void readline (linebuffer *, FILE *);
 377 static long readline_internal (linebuffer *, FILE *, char const *);
 378 static bool nocase_tail (const char *);
 379 static void get_tag (char *, char **);
 380
 381 static void analyze_regex (char *);
 382 static void free_regexps (void);
 383 static void regex_tag_multiline (void);
 384 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
 385 static void verror (char const *, va_list) ATTRIBUTE_FORMAT_PRINTF (1, 0);
 386 static _Noreturn void suggest_asking_for_help (void);
 387 static _Noreturn void fatal (char const *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
 388 static _Noreturn void pfatal (const char *);
 389 static void add_node (node *, node **);
 390
 391 static void process_file_name (char *, language *);
 392 static void process_file (FILE *, char *, language *);
 393 static void find_entries (FILE *);
 394 static void free_tree (node *);
 395 static void free_fdesc (fdesc *);
 396 static void pfnote (char *, bool, char *, int, int, long);
 397 static void invalidate_nodes (fdesc *, node **);
 398 static void put_entries (node *);
 399
 400 static char *concat (const char *, const char *, const char *);
 401 static char *skip_spaces (char *);
 402 static char *skip_non_spaces (char *);
 403 static char *skip_name (char *);
 404 static char *savenstr (const char *, int);
 405 static char *savestr (const char *);
 406 static char *etags_getcwd (void);
 407 static char *relative_filename (char *, char *);
 408 static char *absolute_filename (char *, char *);
 409 static char *absolute_dirname (char *, char *);
 410 static bool filename_is_absolute (char *f);
 411 static void canonicalize_filename (char *);
 412 static char *etags_mktmp (void);
 413 static void linebuffer_init (linebuffer *);
 414 static void linebuffer_setlen (linebuffer *, int);
 415 static void *xmalloc (size_t);
 416 static void *xrealloc (void *, size_t);
 417
 418 \f
 419 static char searchar = '/';     /* use /.../ searches */
 420
 421 static char *tagfile;           /* output file */
 422 static char *progname;          /* name this program was invoked with */
 423 static char *cwd;               /* current working directory */
 424 static char *tagfiledir;        /* directory of tagfile */
 425 static FILE *tagf;              /* ioptr for tags file */
 426 static ptrdiff_t whatlen_max;   /* maximum length of any 'what' member */
 427
 428 static fdesc *fdhead;           /* head of file description list */
 429 static fdesc *curfdp;           /* current file description */
 430 static char *infilename;        /* current input file name */
 431 static int lineno;              /* line number of current line */
 432 static long charno;             /* current character number */
 433 static long linecharno;         /* charno of start of current line */
 434 static char *dbp;               /* pointer to start of current tag */
 435
 436 static const int invalidcharno = -1;
 437
 438 static node *nodehead;          /* the head of the binary tree of tags */
 439 static node *last_node;         /* the last node created */
 440
 441 static linebuffer lb;           /* the current line */
 442 static linebuffer filebuf;      /* a buffer containing the whole file */
 443 static linebuffer token_name;   /* a buffer containing a tag name */
 444
 445 static bool append_to_tagfile;  /* -a: append to tags */
 446 /* The next five default to true in C and derived languages.  */
 447 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 448 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 449                                 /* 0 struct/enum/union decls, and C++ */
 450                                 /* member functions. */
 451 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 452                                 /* constants and variables. */
 453                                 /* -D: opposite of -d.  Default under ctags. */
 454 static int globals;             /* create tags for global variables */
 455 static int members;             /* create tags for C member variables */
 456 static int declarations;        /* --declarations: tag them and extern in C&Co*/
 457 static int no_line_directive;   /* ignore #line directives (undocumented) */
 458 static int no_duplicates;       /* no duplicate tags for ctags (undocumented) */
 459 static bool update;             /* -u: update tags */
 460 static bool vgrind_style;       /* -v: create vgrind style index output */
 461 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 462 static bool cxref_style;        /* -x: create cxref style output */
 463 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 464 static bool ignoreindent;       /* -I: ignore indentation in C */
 465 static int packages_only;       /* --packages-only: in Ada, only tag packages*/
 466 static int class_qualify;       /* -Q: produce class-qualified tags in C++/Java */
 467
 468 /* STDIN is defined in LynxOS system headers */
 469 #ifdef STDIN
 470 # undef STDIN
 471 #endif
 472
 473 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 474 static bool parsing_stdin;      /* --parse-stdin used */
 475
 476 static regexp *p_head;          /* list of all regexps */
 477 static bool need_filebuf;       /* some regexes are multi-line */
 478
 479 static struct option longopts[] =
 480 {
 481   { "append",             no_argument,       NULL,               'a'   },
 482   { "packages-only",      no_argument,       &packages_only,     1     },
 483   { "c++",                no_argument,       NULL,               'C'   },
 484   { "declarations",       no_argument,       &declarations,      1     },
 485   { "no-line-directive",  no_argument,       &no_line_directive, 1     },
 486   { "no-duplicates",      no_argument,       &no_duplicates,     1     },
 487   { "help",               no_argument,       NULL,               'h'   },
 488   { "help",               no_argument,       NULL,               'H'   },
 489   { "ignore-indentation", no_argument,       NULL,               'I'   },
 490   { "language",           required_argument, NULL,               'l'   },
 491   { "members",            no_argument,       &members,           1     },
 492   { "no-members",         no_argument,       &members,           0     },
 493   { "output",             required_argument, NULL,               'o'   },
 494   { "class-qualify",      no_argument,       &class_qualify,     'Q'   },
 495   { "regex",              required_argument, NULL,               'r'   },
 496   { "no-regex",           no_argument,       NULL,               'R'   },
 497   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 498   { "parse-stdin",        required_argument, NULL,               STDIN },
 499   { "version",            no_argument,       NULL,               'V'   },
 500
 501 #if CTAGS /* Ctags options */
 502   { "backward-search",    no_argument,       NULL,               'B'   },
 503   { "cxref",              no_argument,       NULL,               'x'   },
 504   { "defines",            no_argument,       NULL,               'd'   },
 505   { "globals",            no_argument,       &globals,           1     },
 506   { "typedefs",           no_argument,       NULL,               't'   },
 507   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 508   { "update",             no_argument,       NULL,               'u'   },
 509   { "vgrind",             no_argument,       NULL,               'v'   },
 510   { "no-warn",            no_argument,       NULL,               'w'   },
 511
 512 #else /* Etags options */
 513   { "no-defines",         no_argument,       NULL,               'D'   },
 514   { "no-globals",         no_argument,       &globals,           0     },
 515   { "include",            required_argument, NULL,               'i'   },
 516 #endif
 517   { NULL }
 518 };
 519
 520 static compressor compressors[] =
 521 {
 522   { "z", "gzip -d -c"},
 523   { "Z", "gzip -d -c"},
 524   { "gz", "gzip -d -c"},
 525   { "GZ", "gzip -d -c"},
 526   { "bz2", "bzip2 -d -c" },
 527   { "xz", "xz -d -c" },
 528   { NULL }
 529 };
 530
 531 /*
 532  * Language stuff.
 533  */
 534
 535 /* Ada code */
 536 static const char *Ada_suffixes [] =
 537   { "ads", "adb", "ada", NULL };
 538 static const char Ada_help [] =
 539 "In Ada code, functions, procedures, packages, tasks and types are\n\
 540 tags.  Use the '--packages-only' option to create tags for\n\
 541 packages only.\n\
 542 Ada tag names have suffixes indicating the type of entity:\n\
 543         Entity type:    Qualifier:\n\
 544         ------------    ----------\n\
 545         function        /f\n\
 546         procedure       /p\n\
 547         package spec    /s\n\
 548         package body    /b\n\
 549         type            /t\n\
 550         task            /k\n\
 551 Thus, 'M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 552 body of the package 'bidule', while 'M-x find-tag <RET> bidule <RET>'\n\
 553 will just search for any tag 'bidule'.";
 554
 555 /* Assembly code */
 556 static const char *Asm_suffixes [] =
 557   { "a",        /* Unix assembler */
 558     "asm", /* Microcontroller assembly */
 559     "def", /* BSO/Tasking definition includes  */
 560     "inc", /* Microcontroller include files */
 561     "ins", /* Microcontroller include files */
 562     "s", "sa", /* Unix assembler */
 563     "S",   /* cpp-processed Unix assembler */
 564     "src", /* BSO/Tasking C compiler output */
 565     NULL
 566   };
 567 static const char Asm_help [] =
 568 "In assembler code, labels appearing at the beginning of a line,\n\
 569 followed by a colon, are tags.";
 570
 571
 572 /* Note that .c and .h can be considered C++, if the --c++ flag was
 573    given, or if the `class' or `template' keywords are met inside the file.
 574    That is why default_C_entries is called for these. */
 575 static const char *default_C_suffixes [] =
 576   { "c", "h", NULL };
 577 #if CTAGS                               /* C help for Ctags */
 578 static const char default_C_help [] =
 579 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 580 Use -T to tag definitions of 'struct', 'union' and 'enum'.\n\
 581 Use -d to tag '#define' macro definitions and 'enum' constants.\n\
 582 Use --globals to tag global variables.\n\
 583 You can tag function declarations and external variables by\n\
 584 using '--declarations', and struct members by using '--members'.";
 585 #else                                   /* C help for Etags */
 586 static const char default_C_help [] =
 587 "In C code, any C function or typedef is a tag, and so are\n\
 588 definitions of 'struct', 'union' and 'enum'.  '#define' macro\n\
 589 definitions and 'enum' constants are tags unless you specify\n\
 590 '--no-defines'.  Global variables are tags unless you specify\n\
 591 '--no-globals' and so are struct members unless you specify\n\
 592 '--no-members'.  Use of '--no-globals', '--no-defines' and\n\
 593 '--no-members' can make the tags table file much smaller.\n\
 594 You can tag function declarations and external variables by\n\
 595 using '--declarations'.";
 596 #endif  /* C help for Ctags and Etags */
 597
 598 static const char *Cplusplus_suffixes [] =
 599   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 600     "M",                        /* Objective C++ */
 601     "pdb",                      /* PostScript with C syntax */
 602     NULL };
 603 static const char Cplusplus_help [] =
 604 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 605 --help --lang=c --lang=c++ for full help.)\n\
 606 In addition to C tags, member functions are also recognized.  Member\n\
 607 variables are recognized unless you use the '--no-members' option.\n\
 608 Tags for variables and functions in classes are named 'CLASS::VARIABLE'\n\
 609 and 'CLASS::FUNCTION'.  'operator' definitions have tag names like\n\
 610 'operator+'.";
 611
 612 static const char *Cjava_suffixes [] =
 613   { "java", NULL };
 614 static char Cjava_help [] =
 615 "In Java code, all the tags constructs of C and C++ code are\n\
 616 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 617
 618
 619 static const char *Cobol_suffixes [] =
 620   { "COB", "cob", NULL };
 621 static char Cobol_help [] =
 622 "In Cobol code, tags are paragraph names; that is, any word\n\
 623 starting in column 8 and followed by a period.";
 624
 625 static const char *Cstar_suffixes [] =
 626   { "cs", "hs", NULL };
 627
 628 static const char *Erlang_suffixes [] =
 629   { "erl", "hrl", NULL };
 630 static const char Erlang_help [] =
 631 "In Erlang code, the tags are the functions, records and macros\n\
 632 defined in the file.";
 633
 634 const char *Forth_suffixes [] =
 635   { "fth", "tok", NULL };
 636 static const char Forth_help [] =
 637 "In Forth code, tags are words defined by ':',\n\
 638 constant, code, create, defer, value, variable, buffer:, field.";
 639
 640 static const char *Fortran_suffixes [] =
 641   { "F", "f", "f90", "for", NULL };
 642 static const char Fortran_help [] =
 643 "In Fortran code, functions, subroutines and block data are tags.";
 644
 645 static const char *Go_suffixes [] = {"go", NULL};
 646 static const char Go_help [] =
 647   "In Go code, functions, interfaces and packages are tags.";
 648
 649 static const char *HTML_suffixes [] =
 650   { "htm", "html", "shtml", NULL };
 651 static const char HTML_help [] =
 652 "In HTML input files, the tags are the 'title' and the 'h1', 'h2',\n\
 653 'h3' headers.  Also, tags are 'name=' in anchors and all\n\
 654 occurrences of 'id='.";
 655
 656 static const char *Lisp_suffixes [] =
 657   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 658 static const char Lisp_help [] =
 659 "In Lisp code, any function defined with 'defun', any variable\n\
 660 defined with 'defvar' or 'defconst', and in general the first\n\
 661 argument of any expression that starts with '(def' in column zero\n\
 662 is a tag.\n\
 663 The '--declarations' option tags \"(defvar foo)\" constructs too.";
 664
 665 static const char *Lua_suffixes [] =
 666   { "lua", "LUA", NULL };
 667 static const char Lua_help [] =
 668 "In Lua scripts, all functions are tags.";
 669
 670 static const char *Makefile_filenames [] =
 671   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 672 static const char Makefile_help [] =
 673 "In makefiles, targets are tags; additionally, variables are tags\n\
 674 unless you specify '--no-globals'.";
 675
 676 static const char *Objc_suffixes [] =
 677   { "lm",                       /* Objective lex file */
 678     "m",                        /* Objective C file */
 679      NULL };
 680 static const char Objc_help [] =
 681 "In Objective C code, tags include Objective C definitions for classes,\n\
 682 class categories, methods and protocols.  Tags for variables and\n\
 683 functions in classes are named 'CLASS::VARIABLE' and 'CLASS::FUNCTION'.\
 684 \n(Use --help --lang=c --lang=objc --lang=java for full help.)";
 685
 686 static const char *Pascal_suffixes [] =
 687   { "p", "pas", NULL };
 688 static const char Pascal_help [] =
 689 "In Pascal code, the tags are the functions and procedures defined\n\
 690 in the file.";
 691 /* " // this is for working around an Emacs highlighting bug... */
 692
 693 static const char *Perl_suffixes [] =
 694   { "pl", "pm", NULL };
 695 static const char *Perl_interpreters [] =
 696   { "perl", "@PERL@", NULL };
 697 static const char Perl_help [] =
 698 "In Perl code, the tags are the packages, subroutines and variables\n\
 699 defined by the 'package', 'sub', 'my' and 'local' keywords.  Use\n\
 700 '--globals' if you want to tag global variables.  Tags for\n\
 701 subroutines are named 'PACKAGE::SUB'.  The name for subroutines\n\
 702 defined in the default package is 'main::SUB'.";
 703
 704 static const char *PHP_suffixes [] =
 705   { "php", "php3", "php4", NULL };
 706 static const char PHP_help [] =
 707 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 708 the '--no-members' option, vars are tags too.";
 709
 710 static const char *plain_C_suffixes [] =
 711   { "pc",                       /* Pro*C file */
 712      NULL };
 713
 714 static const char *PS_suffixes [] =
 715   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 716 static const char PS_help [] =
 717 "In PostScript code, the tags are the functions.";
 718
 719 static const char *Prolog_suffixes [] =
 720   { "prolog", NULL };
 721 static const char Prolog_help [] =
 722 "In Prolog code, tags are predicates and rules at the beginning of\n\
 723 line.";
 724
 725 static const char *Python_suffixes [] =
 726   { "py", NULL };
 727 static const char Python_help [] =
 728 "In Python code, 'def' or 'class' at the beginning of a line\n\
 729 generate a tag.";
 730
 731 static const char *Ruby_suffixes [] =
 732   { "rb", "ru", "rbw", NULL };
 733 static const char *Ruby_filenames [] =
 734   { "Rakefile", "Thorfile", NULL };
 735 static const char Ruby_help [] =
 736   "In Ruby code, 'def' or 'class' or 'module' at the beginning of\n\
 737 a line generate a tag.  Constants also generate a tag.";
 738
 739 /* Can't do the `SCM' or `scm' prefix with a version number. */
 740 static const char *Scheme_suffixes [] =
 741   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 742 static const char Scheme_help [] =
 743 "In Scheme code, tags include anything defined with 'def' or with a\n\
 744 construct whose name starts with 'def'.  They also include\n\
 745 variables set with 'set!' at top level in the file.";
 746
 747 static const char *TeX_suffixes [] =
 748   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 749 static const char TeX_help [] =
 750 "In LaTeX text, the argument of any of the commands '\\chapter',\n\
 751 '\\section', '\\subsection', '\\subsubsection', '\\eqno', '\\label',\n\
 752 '\\ref', '\\cite', '\\bibitem', '\\part', '\\appendix', '\\entry',\n\
 753 '\\index', '\\def', '\\newcommand', '\\renewcommand',\n\
 754 '\\newenvironment' or '\\renewenvironment' is a tag.\n\
 755 \n\
 756 Other commands can be specified by setting the environment variable\n\
 757 'TEXTAGS' to a colon-separated list like, for example,\n\
 758      TEXTAGS=\"mycommand:myothercommand\".";
 759
 760
 761 static const char *Texinfo_suffixes [] =
 762   { "texi", "texinfo", "txi", NULL };
 763 static const char Texinfo_help [] =
 764 "for texinfo files, lines starting with @node are tagged.";
 765
 766 static const char *Yacc_suffixes [] =
 767   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 768 static const char Yacc_help [] =
 769 "In Bison or Yacc input files, each rule defines as a tag the\n\
 770 nonterminal it constructs.  The portions of the file that contain\n\
 771 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 772 for full help).";
 773
 774 static const char auto_help [] =
 775 "'auto' is not a real language, it indicates to use\n\
 776 a default language for files base on file name suffix and file contents.";
 777
 778 static const char none_help [] =
 779 "'none' is not a real language, it indicates to only do\n\
 780 regexp processing on files.";
 781
 782 static const char no_lang_help [] =
 783 "No detailed help available for this language.";
 784
 785
 786 /*
 787  * Table of languages.
 788  *
 789  * It is ok for a given function to be listed under more than one
 790  * name.  I just didn't.
 791  */
 792
 793 static language lang_names [] =
 794 {
 795   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 796   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 797   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 798   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 799   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 800   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 801   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 802   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 803   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 804   { "go",        Go_help,        Go_functions,      Go_suffixes        },
 805   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 806   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 807   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 808   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 809   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 810   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 811   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 812   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 813   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 814   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 815   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 816   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 817   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 818   { "ruby",      Ruby_help,Ruby_functions,Ruby_suffixes,Ruby_filenames },
 819   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 820   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 821   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 822   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
 823   { "auto",      auto_help },                      /* default guessing scheme */
 824   { "none",      none_help,      just_read_file }, /* regexp matching only */
 825   { NULL }                /* end of list */
 826 };
 827
 828 \f
 829 static void
 830 print_language_names (void)
 831 {
 832   language *lang;
 833   const char **name, **ext;
 834
 835   puts ("\nThese are the currently supported languages, along with the\n\
 836 default file names and dot suffixes:");
 837   for (lang = lang_names; lang->name != NULL; lang++)
 838     {
 839       printf ("  %-*s", 10, lang->name);
 840       if (lang->filenames != NULL)
 841         for (name = lang->filenames; *name != NULL; name++)
 842           printf (" %s", *name);
 843       if (lang->suffixes != NULL)
 844         for (ext = lang->suffixes; *ext != NULL; ext++)
 845           printf (" .%s", *ext);
 846       puts ("");
 847     }
 848   puts ("where 'auto' means use default language for files based on file\n\
 849 name suffix, and 'none' means only do regexp processing on files.\n\
 850 If no language is specified and no matching suffix is found,\n\
 851 the first line of the file is read for a sharp-bang (#!) sequence\n\
 852 followed by the name of an interpreter.  If no such sequence is found,\n\
 853 Fortran is tried first; if no tags are found, C is tried next.\n\
 854 When parsing any C file, a \"class\" or \"template\" keyword\n\
 855 switches to C++.");
 856   puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
 857 \n\
 858 For detailed help on a given language use, for example,\n\
 859 etags --help --lang=ada.");
 860 }
 861
 862 #ifndef EMACS_NAME
 863 # define EMACS_NAME "standalone"
 864 #endif
 865 #ifndef VERSION
 866 # define VERSION "17.38.1.4"
 867 #endif
 868 static _Noreturn void
 869 print_version (void)
 870 {
 871   char emacs_copyright[] = COPYRIGHT;
 872
 873   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 874   puts (emacs_copyright);
 875   puts ("This program is distributed under the terms in ETAGS.README");
 876
 877   exit (EXIT_SUCCESS);
 878 }
 879
 880 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 881 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
 882 #endif
 883
 884 static _Noreturn void
 885 print_help (argument *argbuffer)
 886 {
 887   bool help_for_lang = false;
 888
 889   for (; argbuffer->arg_type != at_end; argbuffer++)
 890     if (argbuffer->arg_type == at_language)
 891       {
 892         if (help_for_lang)
 893           puts ("");
 894         puts (argbuffer->lang->help);
 895         help_for_lang = true;
 896       }
 897
 898   if (help_for_lang)
 899     exit (EXIT_SUCCESS);
 900
 901   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 902 \n\
 903 These are the options accepted by %s.\n", progname, progname);
 904   puts ("You may use unambiguous abbreviations for the long option names.");
 905   puts ("  A - as file name means read names from stdin (one per line).\n\
 906 Absolute names are stored in the output file as they are.\n\
 907 Relative ones are stored relative to the output file's directory.\n");
 908
 909   puts ("-a, --append\n\
 910         Append tag entries to existing tags file.");
 911
 912   puts ("--packages-only\n\
 913         For Ada files, only generate tags for packages.");
 914
 915   if (CTAGS)
 916     puts ("-B, --backward-search\n\
 917         Write the search commands for the tag entries using '?', the\n\
 918         backward-search command instead of '/', the forward-search command.");
 919
 920   /* This option is mostly obsolete, because etags can now automatically
 921      detect C++.  Retained for backward compatibility and for debugging and
 922      experimentation.  In principle, we could want to tag as C++ even
 923      before any "class" or "template" keyword.
 924   puts ("-C, --c++\n\
 925         Treat files whose name suffix defaults to C language as C++ files.");
 926   */
 927
 928   puts ("--declarations\n\
 929         In C and derived languages, create tags for function declarations,");
 930   if (CTAGS)
 931     puts ("\tand create tags for extern variables if --globals is used.");
 932   else
 933     puts
 934       ("\tand create tags for extern variables unless --no-globals is used.");
 935
 936   if (CTAGS)
 937     puts ("-d, --defines\n\
 938         Create tag entries for C #define constants and enum constants, too.");
 939   else
 940     puts ("-D, --no-defines\n\
 941         Don't create tag entries for C #define constants and enum constants.\n\
 942         This makes the tags file smaller.");
 943
 944   if (!CTAGS)
 945     puts ("-i FILE, --include=FILE\n\
 946         Include a note in tag file indicating that, when searching for\n\
 947         a tag, one should also consult the tags file FILE after\n\
 948         checking the current file.");
 949
 950   puts ("-l LANG, --language=LANG\n\
 951         Force the following files to be considered as written in the\n\
 952         named language up to the next --language=LANG option.");
 953
 954   if (CTAGS)
 955     puts ("--globals\n\
 956         Create tag entries for global variables in some languages.");
 957   else
 958     puts ("--no-globals\n\
 959         Do not create tag entries for global variables in some\n\
 960         languages.  This makes the tags file smaller.");
 961
 962   if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 963     puts ("--no-line-directive\n\
 964         Ignore #line preprocessor directives in C and derived languages.");
 965
 966   if (CTAGS)
 967     puts ("--members\n\
 968         Create tag entries for members of structures in some languages.");
 969   else
 970     puts ("--no-members\n\
 971         Do not create tag entries for members of structures\n\
 972         in some languages.");
 973
 974   puts ("-Q, --class-qualify\n\
 975         Qualify tag names with their class name in C++, ObjC, Java, and Perl.\n\
 976         This produces tag names of the form \"class::member\" for C++,\n\
 977         \"class(category)\" for Objective C, and \"class.member\" for Java.\n\
 978         For Objective C, this also produces class methods qualified with\n\
 979         their arguments, as in \"foo:bar:baz:more\".\n\
 980         For Perl, this produces \"package::member\".");
 981   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 982         Make a tag for each line matching a regular expression pattern\n\
 983         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 984         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 985         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 986         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 987   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 988         For example Tcl named tags can be created with:\n\
 989           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 990         MODS are optional one-letter modifiers: 'i' means to ignore case,\n\
 991         'm' means to allow multi-line matches, 's' implies 'm' and\n\
 992         causes dot to match any character, including newline.");
 993
 994   puts ("-R, --no-regex\n\
 995         Don't create tags from regexps for the following files.");
 996
 997   puts ("-I, --ignore-indentation\n\
 998         In C and C++ do not assume that a closing brace in the first\n\
 999         column is the final brace of a function or structure definition.");
1000
1001   puts ("-o FILE, --output=FILE\n\
1002         Write the tags to FILE.");
1003
1004   puts ("--parse-stdin=NAME\n\
1005         Read from standard input and record tags as belonging to file NAME.");
1006
1007   if (CTAGS)
1008     {
1009       puts ("-t, --typedefs\n\
1010         Generate tag entries for C and Ada typedefs.");
1011       puts ("-T, --typedefs-and-c++\n\
1012         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1013         and C++ member functions.");
1014     }
1015
1016   if (CTAGS)
1017     puts ("-u, --update\n\
1018         Update the tag entries for the given files, leaving tag\n\
1019         entries for other files in place.  Currently, this is\n\
1020         implemented by deleting the existing entries for the given\n\
1021         files and then rewriting the new entries at the end of the\n\
1022         tags file.  It is often faster to simply rebuild the entire\n\
1023         tag file than to use this.");
1024
1025   if (CTAGS)
1026     {
1027       puts ("-v, --vgrind\n\
1028         Print on the standard output an index of items intended for\n\
1029         human consumption, similar to the output of vgrind.  The index\n\
1030         is sorted, and gives the page number of each item.");
1031
1032       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1033         puts ("-w, --no-duplicates\n\
1034         Do not create duplicate tag entries, for compatibility with\n\
1035         traditional ctags.");
1036
1037       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1038         puts ("-w, --no-warn\n\
1039         Suppress warning messages about duplicate tag entries.");
1040
1041       puts ("-x, --cxref\n\
1042         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1043         The output uses line numbers instead of page numbers, but\n\
1044         beyond that the differences are cosmetic; try both to see\n\
1045         which you like.");
1046     }
1047
1048   puts ("-V, --version\n\
1049         Print the version of the program.\n\
1050 -h, --help\n\
1051         Print this help message.\n\
1052         Followed by one or more '--language' options prints detailed\n\
1053         help about tag generation for the specified languages.");
1054
1055   print_language_names ();
1056
1057   puts ("");
1058   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1059
1060   exit (EXIT_SUCCESS);
1061 }
1062
1063 \f
1064 int
1065 main (int argc, char **argv)
1066 {
1067   int i;
1068   unsigned int nincluded_files;
1069   char **included_files;
1070   argument *argbuffer;
1071   int current_arg, file_count;
1072   linebuffer filename_lb;
1073   bool help_asked = false;
1074   ptrdiff_t len;
1075   char *optstring;
1076   int opt;
1077
1078   progname = argv[0];
1079   nincluded_files = 0;
1080   included_files = xnew (argc, char *);
1081   current_arg = 0;
1082   file_count = 0;
1083
1084   /* Allocate enough no matter what happens.  Overkill, but each one
1085      is small. */
1086   argbuffer = xnew (argc, argument);
1087
1088   /*
1089    * Always find typedefs and structure tags.
1090    * Also default to find macro constants, enum constants, struct
1091    * members and global variables.  Do it for both etags and ctags.
1092    */
1093   typedefs = typedefs_or_cplusplus = constantypedefs = true;
1094   globals = members = true;
1095
1096   /* When the optstring begins with a '-' getopt_long does not rearrange the
1097      non-options arguments to be at the end, but leaves them alone. */
1098   optstring = concat ("-ac:Cf:Il:o:Qr:RSVhH",
1099                       (CTAGS) ? "BxdtTuvw" : "Di:",
1100                       "");
1101
1102   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1103     switch (opt)
1104       {
1105       case 0:
1106         /* If getopt returns 0, then it has already processed a
1107            long-named option.  We should do nothing.  */
1108         break;
1109
1110       case 1:
1111         /* This means that a file name has been seen.  Record it. */
1112         argbuffer[current_arg].arg_type = at_filename;
1113         argbuffer[current_arg].what     = optarg;
1114         len = strlen (optarg);
1115         if (whatlen_max < len)
1116           whatlen_max = len;
1117         ++current_arg;
1118         ++file_count;
1119         break;
1120
1121       case STDIN:
1122         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1123         argbuffer[current_arg].arg_type = at_stdin;
1124         argbuffer[current_arg].what     = optarg;
1125         len = strlen (optarg);
1126         if (whatlen_max < len)
1127           whatlen_max = len;
1128         ++current_arg;
1129         ++file_count;
1130         if (parsing_stdin)
1131           fatal ("cannot parse standard input more than once");
1132         parsing_stdin = true;
1133         break;
1134
1135         /* Common options. */
1136       case 'a': append_to_tagfile = true;       break;
1137       case 'C': cplusplus = true;               break;
1138       case 'f':         /* for compatibility with old makefiles */
1139       case 'o':
1140         if (tagfile)
1141           {
1142             error ("-o option may only be given once.");
1143             suggest_asking_for_help ();
1144             /* NOTREACHED */
1145           }
1146         tagfile = optarg;
1147         break;
1148       case 'I':
1149       case 'S':         /* for backward compatibility */
1150         ignoreindent = true;
1151         break;
1152       case 'l':
1153         {
1154           language *lang = get_language_from_langname (optarg);
1155           if (lang != NULL)
1156             {
1157               argbuffer[current_arg].lang = lang;
1158               argbuffer[current_arg].arg_type = at_language;
1159               ++current_arg;
1160             }
1161         }
1162         break;
1163       case 'c':
1164         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1165         optarg = concat (optarg, "i", ""); /* memory leak here */
1166         /* FALLTHRU */
1167       case 'r':
1168         argbuffer[current_arg].arg_type = at_regexp;
1169         argbuffer[current_arg].what = optarg;
1170         len = strlen (optarg);
1171         if (whatlen_max < len)
1172           whatlen_max = len;
1173         ++current_arg;
1174         break;
1175       case 'R':
1176         argbuffer[current_arg].arg_type = at_regexp;
1177         argbuffer[current_arg].what = NULL;
1178         ++current_arg;
1179         break;
1180       case 'V':
1181         print_version ();
1182         break;
1183       case 'h':
1184       case 'H':
1185         help_asked = true;
1186         break;
1187       case 'Q':
1188         class_qualify = 1;
1189         break;
1190
1191         /* Etags options */
1192       case 'D': constantypedefs = false;                        break;
1193       case 'i': included_files[nincluded_files++] = optarg;     break;
1194
1195         /* Ctags options. */
1196       case 'B': searchar = '?';                                 break;
1197       case 'd': constantypedefs = true;                         break;
1198       case 't': typedefs = true;                                break;
1199       case 'T': typedefs = typedefs_or_cplusplus = true;        break;
1200       case 'u': update = true;                                  break;
1201       case 'v': vgrind_style = true;                      /*FALLTHRU*/
1202       case 'x': cxref_style = true;                             break;
1203       case 'w': no_warnings = true;                             break;
1204       default:
1205         suggest_asking_for_help ();
1206         /* NOTREACHED */
1207       }
1208
1209   /* No more options.  Store the rest of arguments. */
1210   for (; optind < argc; optind++)
1211     {
1212       argbuffer[current_arg].arg_type = at_filename;
1213       argbuffer[current_arg].what = argv[optind];
1214       len = strlen (argv[optind]);
1215       if (whatlen_max < len)
1216         whatlen_max = len;
1217       ++current_arg;
1218       ++file_count;
1219     }
1220
1221   argbuffer[current_arg].arg_type = at_end;
1222
1223   if (help_asked)
1224     print_help (argbuffer);
1225     /* NOTREACHED */
1226
1227   if (nincluded_files == 0 && file_count == 0)
1228     {
1229       error ("no input files specified.");
1230       suggest_asking_for_help ();
1231       /* NOTREACHED */
1232     }
1233
1234   if (tagfile == NULL)
1235     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1236   cwd = etags_getcwd ();        /* the current working directory */
1237   if (cwd[strlen (cwd) - 1] != '/')
1238     {
1239       char *oldcwd = cwd;
1240       cwd = concat (oldcwd, "/", "");
1241       free (oldcwd);
1242     }
1243
1244   /* Compute base directory for relative file names. */
1245   if (streq (tagfile, "-")
1246       || strneq (tagfile, "/dev/", 5))
1247     tagfiledir = cwd;            /* relative file names are relative to cwd */
1248   else
1249     {
1250       canonicalize_filename (tagfile);
1251       tagfiledir = absolute_dirname (tagfile, cwd);
1252     }
1253
1254   linebuffer_init (&lb);
1255   linebuffer_init (&filename_lb);
1256   linebuffer_init (&filebuf);
1257   linebuffer_init (&token_name);
1258
1259   if (!CTAGS)
1260     {
1261       if (streq (tagfile, "-"))
1262         {
1263           tagf = stdout;
1264           SET_BINARY (fileno (stdout));
1265         }
1266       else
1267         tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1268       if (tagf == NULL)
1269         pfatal (tagfile);
1270     }
1271
1272   /*
1273    * Loop through files finding functions.
1274    */
1275   for (i = 0; i < current_arg; i++)
1276     {
1277       static language *lang;    /* non-NULL if language is forced */
1278       char *this_file;
1279
1280       switch (argbuffer[i].arg_type)
1281         {
1282         case at_language:
1283           lang = argbuffer[i].lang;
1284           break;
1285         case at_regexp:
1286           analyze_regex (argbuffer[i].what);
1287           break;
1288         case at_filename:
1289               this_file = argbuffer[i].what;
1290               /* Input file named "-" means read file names from stdin
1291                  (one per line) and use them. */
1292               if (streq (this_file, "-"))
1293                 {
1294                   if (parsing_stdin)
1295                     fatal ("cannot parse standard input "
1296                            "AND read file names from it");
1297                   while (readline_internal (&filename_lb, stdin, "-") > 0)
1298                     process_file_name (filename_lb.buffer, lang);
1299                 }
1300               else
1301                 process_file_name (this_file, lang);
1302           break;
1303         case at_stdin:
1304           this_file = argbuffer[i].what;
1305           process_file (stdin, this_file, lang);
1306           break;
1307         default:
1308           error ("internal error: arg_type");
1309         }
1310     }
1311
1312   free_regexps ();
1313   free (lb.buffer);
1314   free (filebuf.buffer);
1315   free (token_name.buffer);
1316
1317   if (!CTAGS || cxref_style)
1318     {
1319       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1320       put_entries (nodehead);
1321       free_tree (nodehead);
1322       nodehead = NULL;
1323       if (!CTAGS)
1324         {
1325           fdesc *fdp;
1326
1327           /* Output file entries that have no tags. */
1328           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1329             if (!fdp->written)
1330               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1331
1332           while (nincluded_files-- > 0)
1333             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1334
1335           if (fclose (tagf) == EOF)
1336             pfatal (tagfile);
1337         }
1338
1339       exit (EXIT_SUCCESS);
1340     }
1341
1342   /* From here on, we are in (CTAGS && !cxref_style) */
1343   if (update)
1344     {
1345       char *cmd =
1346         xmalloc (strlen (tagfile) + whatlen_max +
1347                  sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1348       for (i = 0; i < current_arg; ++i)
1349         {
1350           switch (argbuffer[i].arg_type)
1351             {
1352             case at_filename:
1353             case at_stdin:
1354               break;
1355             default:
1356               continue;         /* the for loop */
1357             }
1358           char *z = stpcpy (cmd, "mv ");
1359           z = stpcpy (z, tagfile);
1360           z = stpcpy (z, " OTAGS;fgrep -v '\t");
1361           z = stpcpy (z, argbuffer[i].what);
1362           z = stpcpy (z, "\t' OTAGS >");
1363           z = stpcpy (z, tagfile);
1364           strcpy (z, ";rm OTAGS");
1365           if (system (cmd) != EXIT_SUCCESS)
1366             fatal ("failed to execute shell command");
1367         }
1368       free (cmd);
1369       append_to_tagfile = true;
1370     }
1371
1372   tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1373   if (tagf == NULL)
1374     pfatal (tagfile);
1375   put_entries (nodehead);       /* write all the tags (CTAGS) */
1376   free_tree (nodehead);
1377   nodehead = NULL;
1378   if (fclose (tagf) == EOF)
1379     pfatal (tagfile);
1380
1381   if (CTAGS)
1382     if (append_to_tagfile || update)
1383       {
1384         char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1385         /* Maybe these should be used:
1386            setenv ("LC_COLLATE", "C", 1);
1387            setenv ("LC_ALL", "C", 1); */
1388         char *z = stpcpy (cmd, "sort -u -o ");
1389         z = stpcpy (z, tagfile);
1390         *z++ = ' ';
1391         strcpy (z, tagfile);
1392         exit (system (cmd));
1393       }
1394   return EXIT_SUCCESS;
1395 }
1396
1397
1398 /*
1399  * Return a compressor given the file name.  If EXTPTR is non-zero,
1400  * return a pointer into FILE where the compressor-specific
1401  * extension begins.  If no compressor is found, NULL is returned
1402  * and EXTPTR is not significant.
1403  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1404  */
1405 static compressor *
1406 get_compressor_from_suffix (char *file, char **extptr)
1407 {
1408   compressor *compr;
1409   char *slash, *suffix;
1410
1411   /* File has been processed by canonicalize_filename,
1412      so we don't need to consider backslashes on DOS_NT.  */
1413   slash = strrchr (file, '/');
1414   suffix = strrchr (file, '.');
1415   if (suffix == NULL || suffix < slash)
1416     return NULL;
1417   if (extptr != NULL)
1418     *extptr = suffix;
1419   suffix += 1;
1420   /* Let those poor souls who live with DOS 8+3 file name limits get
1421      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1422      Only the first do loop is run if not MSDOS */
1423   do
1424     {
1425       for (compr = compressors; compr->suffix != NULL; compr++)
1426         if (streq (compr->suffix, suffix))
1427           return compr;
1428       if (!MSDOS)
1429         break;                  /* do it only once: not really a loop */
1430       if (extptr != NULL)
1431         *extptr = ++suffix;
1432     } while (*suffix != '\0');
1433   return NULL;
1434 }
1435
1436
1437
1438 /*
1439  * Return a language given the name.
1440  */
1441 static language *
1442 get_language_from_langname (const char *name)
1443 {
1444   language *lang;
1445
1446   if (name == NULL)
1447     error ("empty language name");
1448   else
1449     {
1450       for (lang = lang_names; lang->name != NULL; lang++)
1451         if (streq (name, lang->name))
1452           return lang;
1453       error ("unknown language \"%s\"", name);
1454     }
1455
1456   return NULL;
1457 }
1458
1459
1460 /*
1461  * Return a language given the interpreter name.
1462  */
1463 static language *
1464 get_language_from_interpreter (char *interpreter)
1465 {
1466   language *lang;
1467   const char **iname;
1468
1469   if (interpreter == NULL)
1470     return NULL;
1471   for (lang = lang_names; lang->name != NULL; lang++)
1472     if (lang->interpreters != NULL)
1473       for (iname = lang->interpreters; *iname != NULL; iname++)
1474         if (streq (*iname, interpreter))
1475             return lang;
1476
1477   return NULL;
1478 }
1479
1480
1481
1482 /*
1483  * Return a language given the file name.
1484  */
1485 static language *
1486 get_language_from_filename (char *file, int case_sensitive)
1487 {
1488   language *lang;
1489   const char **name, **ext, *suffix;
1490   char *slash;
1491
1492   /* Try whole file name first. */
1493   slash = strrchr (file, '/');
1494   if (slash != NULL)
1495     file = slash + 1;
1496 #ifdef DOS_NT
1497   else if (file[0] && file[1] == ':')
1498     file += 2;
1499 #endif
1500   for (lang = lang_names; lang->name != NULL; lang++)
1501     if (lang->filenames != NULL)
1502       for (name = lang->filenames; *name != NULL; name++)
1503         if ((case_sensitive)
1504             ? streq (*name, file)
1505             : strcaseeq (*name, file))
1506           return lang;
1507
1508   /* If not found, try suffix after last dot. */
1509   suffix = strrchr (file, '.');
1510   if (suffix == NULL)
1511     return NULL;
1512   suffix += 1;
1513   for (lang = lang_names; lang->name != NULL; lang++)
1514     if (lang->suffixes != NULL)
1515       for (ext = lang->suffixes; *ext != NULL; ext++)
1516         if ((case_sensitive)
1517             ? streq (*ext, suffix)
1518             : strcaseeq (*ext, suffix))
1519           return lang;
1520   return NULL;
1521 }
1522
1523 \f
1524 /*
1525  * This routine is called on each file argument.
1526  */
1527 static void
1528 process_file_name (char *file, language *lang)
1529 {
1530   FILE *inf;
1531   fdesc *fdp;
1532   compressor *compr;
1533   char *compressed_name, *uncompressed_name;
1534   char *ext, *real_name, *tmp_name;
1535   int retval;
1536
1537   canonicalize_filename (file);
1538   if (streq (file, tagfile) && !streq (tagfile, "-"))
1539     {
1540       error ("skipping inclusion of %s in self.", file);
1541       return;
1542     }
1543   compr = get_compressor_from_suffix (file, &ext);
1544   if (compr)
1545     {
1546       compressed_name = file;
1547       uncompressed_name = savenstr (file, ext - file);
1548     }
1549   else
1550     {
1551       compressed_name = NULL;
1552       uncompressed_name = file;
1553     }
1554
1555   /* If the canonicalized uncompressed name
1556      has already been dealt with, skip it silently. */
1557   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1558     {
1559       assert (fdp->infname != NULL);
1560       if (streq (uncompressed_name, fdp->infname))
1561         goto cleanup;
1562     }
1563
1564   inf = fopen (file, "r" FOPEN_BINARY);
1565   if (inf)
1566     real_name = file;
1567   else
1568     {
1569       int file_errno = errno;
1570       if (compressed_name)
1571         {
1572           /* Try with the given suffix.  */
1573           inf = fopen (uncompressed_name, "r" FOPEN_BINARY);
1574           if (inf)
1575             real_name = uncompressed_name;
1576         }
1577       else
1578         {
1579           /* Try all possible suffixes.  */
1580           for (compr = compressors; compr->suffix != NULL; compr++)
1581             {
1582               compressed_name = concat (file, ".", compr->suffix);
1583               inf = fopen (compressed_name, "r" FOPEN_BINARY);
1584               if (inf)
1585                 {
1586                   real_name = compressed_name;
1587                   break;
1588                 }
1589               if (MSDOS)
1590                 {
1591                   char *suf = compressed_name + strlen (file);
1592                   size_t suflen = strlen (compr->suffix) + 1;
1593                   for ( ; suf[1]; suf++, suflen--)
1594                     {
1595                       memmove (suf, suf + 1, suflen);
1596                       inf = fopen (compressed_name, "r" FOPEN_BINARY);
1597                       if (inf)
1598                         {
1599                           real_name = compressed_name;
1600                           break;
1601                         }
1602                     }
1603                   if (inf)
1604                     break;
1605                 }
1606               free (compressed_name);
1607               compressed_name = NULL;
1608             }
1609         }
1610       if (! inf)
1611         {
1612           errno = file_errno;
1613           perror (file);
1614           goto cleanup;
1615         }
1616     }
1617
1618   if (real_name == compressed_name)
1619     {
1620       fclose (inf);
1621       tmp_name = etags_mktmp ();
1622       if (!tmp_name)
1623         inf = NULL;
1624       else
1625         {
1626 #if MSDOS || defined (DOS_NT)
1627           char *cmd1 = concat (compr->command, " \"", real_name);
1628           char *cmd = concat (cmd1, "\" > ", tmp_name);
1629 #else
1630           char *cmd1 = concat (compr->command, " '", real_name);
1631           char *cmd = concat (cmd1, "' > ", tmp_name);
1632 #endif
1633           free (cmd1);
1634           int tmp_errno;
1635           if (system (cmd) == -1)
1636             {
1637               inf = NULL;
1638               tmp_errno = EINVAL;
1639             }
1640           else
1641             {
1642               inf = fopen (tmp_name, "r" FOPEN_BINARY);
1643               tmp_errno = errno;
1644             }
1645           free (cmd);
1646           errno = tmp_errno;
1647         }
1648
1649       if (!inf)
1650         {
1651           perror (real_name);
1652           goto cleanup;
1653         }
1654     }
1655
1656   process_file (inf, uncompressed_name, lang);
1657
1658   retval = fclose (inf);
1659   if (real_name == compressed_name)
1660     {
1661       remove (tmp_name);
1662       free (tmp_name);
1663     }
1664   if (retval < 0)
1665     pfatal (file);
1666
1667  cleanup:
1668   if (compressed_name != file)
1669     free (compressed_name);
1670   if (uncompressed_name != file)
1671     free (uncompressed_name);
1672   last_node = NULL;
1673   curfdp = NULL;
1674   return;
1675 }
1676
1677 static void
1678 process_file (FILE *fh, char *fn, language *lang)
1679 {
1680   static const fdesc emptyfdesc;
1681   fdesc *fdp;
1682
1683   infilename = fn;
1684   /* Create a new input file description entry. */
1685   fdp = xnew (1, fdesc);
1686   *fdp = emptyfdesc;
1687   fdp->next = fdhead;
1688   fdp->infname = savestr (fn);
1689   fdp->lang = lang;
1690   fdp->infabsname = absolute_filename (fn, cwd);
1691   fdp->infabsdir = absolute_dirname (fn, cwd);
1692   if (filename_is_absolute (fn))
1693     {
1694       /* An absolute file name.  Canonicalize it. */
1695       fdp->taggedfname = absolute_filename (fn, NULL);
1696     }
1697   else
1698     {
1699       /* A file name relative to cwd.  Make it relative
1700          to the directory of the tags file. */
1701       fdp->taggedfname = relative_filename (fn, tagfiledir);
1702     }
1703   fdp->usecharno = true;        /* use char position when making tags */
1704   fdp->prop = NULL;
1705   fdp->written = false;         /* not written on tags file yet */
1706
1707   fdhead = fdp;
1708   curfdp = fdhead;              /* the current file description */
1709
1710   find_entries (fh);
1711
1712   /* If not Ctags, and if this is not metasource and if it contained no #line
1713      directives, we can write the tags and free all nodes pointing to
1714      curfdp. */
1715   if (!CTAGS
1716       && curfdp->usecharno      /* no #line directives in this file */
1717       && !curfdp->lang->metasource)
1718     {
1719       node *np, *prev;
1720
1721       /* Look for the head of the sublist relative to this file.  See add_node
1722          for the structure of the node tree. */
1723       prev = NULL;
1724       for (np = nodehead; np != NULL; prev = np, np = np->left)
1725         if (np->fdp == curfdp)
1726           break;
1727
1728       /* If we generated tags for this file, write and delete them. */
1729       if (np != NULL)
1730         {
1731           /* This is the head of the last sublist, if any.  The following
1732              instructions depend on this being true. */
1733           assert (np->left == NULL);
1734
1735           assert (fdhead == curfdp);
1736           assert (last_node->fdp == curfdp);
1737           put_entries (np);     /* write tags for file curfdp->taggedfname */
1738           free_tree (np);       /* remove the written nodes */
1739           if (prev == NULL)
1740             nodehead = NULL;    /* no nodes left */
1741           else
1742             prev->left = NULL;  /* delete the pointer to the sublist */
1743         }
1744     }
1745 }
1746
1747 static void
1748 reset_input (FILE *inf)
1749 {
1750   if (fseek (inf, 0, SEEK_SET) != 0)
1751     perror (infilename);
1752 }
1753
1754 /*
1755  * This routine opens the specified file and calls the function
1756  * which finds the function and type definitions.
1757  */
1758 static void
1759 find_entries (FILE *inf)
1760 {
1761   char *cp;
1762   language *lang = curfdp->lang;
1763   Lang_function *parser = NULL;
1764
1765   /* If user specified a language, use it. */
1766   if (lang != NULL && lang->function != NULL)
1767     {
1768       parser = lang->function;
1769     }
1770
1771   /* Else try to guess the language given the file name. */
1772   if (parser == NULL)
1773     {
1774       lang = get_language_from_filename (curfdp->infname, true);
1775       if (lang != NULL && lang->function != NULL)
1776         {
1777           curfdp->lang = lang;
1778           parser = lang->function;
1779         }
1780     }
1781
1782   /* Else look for sharp-bang as the first two characters. */
1783   if (parser == NULL
1784       && readline_internal (&lb, inf, infilename) > 0
1785       && lb.len >= 2
1786       && lb.buffer[0] == '#'
1787       && lb.buffer[1] == '!')
1788     {
1789       char *lp;
1790
1791       /* Set lp to point at the first char after the last slash in the
1792          line or, if no slashes, at the first nonblank.  Then set cp to
1793          the first successive blank and terminate the string. */
1794       lp = strrchr (lb.buffer+2, '/');
1795       if (lp != NULL)
1796         lp += 1;
1797       else
1798         lp = skip_spaces (lb.buffer + 2);
1799       cp = skip_non_spaces (lp);
1800       *cp = '\0';
1801
1802       if (strlen (lp) > 0)
1803         {
1804           lang = get_language_from_interpreter (lp);
1805           if (lang != NULL && lang->function != NULL)
1806             {
1807               curfdp->lang = lang;
1808               parser = lang->function;
1809             }
1810         }
1811     }
1812
1813   reset_input (inf);
1814
1815   /* Else try to guess the language given the case insensitive file name. */
1816   if (parser == NULL)
1817     {
1818       lang = get_language_from_filename (curfdp->infname, false);
1819       if (lang != NULL && lang->function != NULL)
1820         {
1821           curfdp->lang = lang;
1822           parser = lang->function;
1823         }
1824     }
1825
1826   /* Else try Fortran or C. */
1827   if (parser == NULL)
1828     {
1829       node *old_last_node = last_node;
1830
1831       curfdp->lang = get_language_from_langname ("fortran");
1832       find_entries (inf);
1833
1834       if (old_last_node == last_node)
1835         /* No Fortran entries found.  Try C. */
1836         {
1837           reset_input (inf);
1838           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1839           find_entries (inf);
1840         }
1841       return;
1842     }
1843
1844   if (!no_line_directive
1845       && curfdp->lang != NULL && curfdp->lang->metasource)
1846     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1847        file, or anyway we parsed a file that is automatically generated from
1848        this one.  If this is the case, the bingo.c file contained #line
1849        directives that generated tags pointing to this file.  Let's delete
1850        them all before parsing this file, which is the real source. */
1851     {
1852       fdesc **fdpp = &fdhead;
1853       while (*fdpp != NULL)
1854         if (*fdpp != curfdp
1855             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1856           /* We found one of those!  We must delete both the file description
1857              and all tags referring to it. */
1858           {
1859             fdesc *badfdp = *fdpp;
1860
1861             /* Delete the tags referring to badfdp->taggedfname
1862                that were obtained from badfdp->infname. */
1863             invalidate_nodes (badfdp, &nodehead);
1864
1865             *fdpp = badfdp->next; /* remove the bad description from the list */
1866             free_fdesc (badfdp);
1867           }
1868         else
1869           fdpp = &(*fdpp)->next; /* advance the list pointer */
1870     }
1871
1872   assert (parser != NULL);
1873
1874   /* Generic initializations before reading from file. */
1875   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1876
1877   /* Generic initializations before parsing file with readline. */
1878   lineno = 0;                  /* reset global line number */
1879   charno = 0;                  /* reset global char number */
1880   linecharno = 0;              /* reset global char number of line start */
1881
1882   parser (inf);
1883
1884   regex_tag_multiline ();
1885 }
1886
1887 \f
1888 /*
1889  * Check whether an implicitly named tag should be created,
1890  * then call `pfnote'.
1891  * NAME is a string that is internally copied by this function.
1892  *
1893  * TAGS format specification
1894  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1895  * The following is explained in some more detail in etc/ETAGS.EBNF.
1896  *
1897  * make_tag creates tags with "implicit tag names" (unnamed tags)
1898  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1899  *  1. NAME does not contain any of the characters in NONAM;
1900  *  2. LINESTART contains name as either a rightmost, or rightmost but
1901  *     one character, substring;
1902  *  3. the character, if any, immediately before NAME in LINESTART must
1903  *     be a character in NONAM;
1904  *  4. the character, if any, immediately after NAME in LINESTART must
1905  *     also be a character in NONAM.
1906  *
1907  * The implementation uses the notinname() macro, which recognizes the
1908  * characters stored in the string `nonam'.
1909  * etags.el needs to use the same characters that are in NONAM.
1910  */
1911 static void
1912 make_tag (const char *name,     /* tag name, or NULL if unnamed */
1913           int namelen,          /* tag length */
1914           bool is_func,         /* tag is a function */
1915           char *linestart,      /* start of the line where tag is */
1916           int linelen,          /* length of the line where tag is */
1917           int lno,              /* line number */
1918           long int cno)         /* character number */
1919 {
1920   bool named = (name != NULL && namelen > 0);
1921   char *nname = NULL;
1922
1923   if (!CTAGS && named)          /* maybe set named to false */
1924     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1925        such that etags.el can guess a name from it. */
1926     {
1927       int i;
1928       register const char *cp = name;
1929
1930       for (i = 0; i < namelen; i++)
1931         if (notinname (*cp++))
1932           break;
1933       if (i == namelen)                         /* rule #1 */
1934         {
1935           cp = linestart + linelen - namelen;
1936           if (notinname (linestart[linelen-1]))
1937             cp -= 1;                            /* rule #4 */
1938           if (cp >= linestart                   /* rule #2 */
1939               && (cp == linestart
1940                   || notinname (cp[-1]))        /* rule #3 */
1941               && strneq (name, cp, namelen))    /* rule #2 */
1942             named = false;      /* use implicit tag name */
1943         }
1944     }
1945
1946   if (named)
1947     nname = savenstr (name, namelen);
1948
1949   pfnote (nname, is_func, linestart, linelen, lno, cno);
1950 }
1951
1952 /* Record a tag. */
1953 static void
1954 pfnote (char *name, bool is_func, char *linestart, int linelen, int lno,
1955         long int cno)
1956                                 /* tag name, or NULL if unnamed */
1957                                 /* tag is a function */
1958                                 /* start of the line where tag is */
1959                                 /* length of the line where tag is */
1960                                 /* line number */
1961                                 /* character number */
1962 {
1963   register node *np;
1964
1965   assert (name == NULL || name[0] != '\0');
1966   if (CTAGS && name == NULL)
1967     return;
1968
1969   np = xnew (1, node);
1970
1971   /* If ctags mode, change name "main" to M<thisfilename>. */
1972   if (CTAGS && !cxref_style && streq (name, "main"))
1973     {
1974       char *fp = strrchr (curfdp->taggedfname, '/');
1975       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1976       fp = strrchr (np->name, '.');
1977       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1978         fp[0] = '\0';
1979     }
1980   else
1981     np->name = name;
1982   np->valid = true;
1983   np->been_warned = false;
1984   np->fdp = curfdp;
1985   np->is_func = is_func;
1986   np->lno = lno;
1987   if (np->fdp->usecharno)
1988     /* Our char numbers are 0-base, because of C language tradition?
1989        ctags compatibility?  old versions compatibility?   I don't know.
1990        Anyway, since emacs's are 1-base we expect etags.el to take care
1991        of the difference.  If we wanted to have 1-based numbers, we would
1992        uncomment the +1 below. */
1993     np->cno = cno /* + 1 */ ;
1994   else
1995     np->cno = invalidcharno;
1996   np->left = np->right = NULL;
1997   if (CTAGS && !cxref_style)
1998     {
1999       if (strlen (linestart) < 50)
2000         np->regex = concat (linestart, "$", "");
2001       else
2002         np->regex = savenstr (linestart, 50);
2003     }
2004   else
2005     np->regex = savenstr (linestart, linelen);
2006
2007   add_node (np, &nodehead);
2008 }
2009
2010 /*
2011  * free_tree ()
2012  *      recurse on left children, iterate on right children.
2013  */
2014 static void
2015 free_tree (register node *np)
2016 {
2017   while (np)
2018     {
2019       register node *node_right = np->right;
2020       free_tree (np->left);
2021       free (np->name);
2022       free (np->regex);
2023       free (np);
2024       np = node_right;
2025     }
2026 }
2027
2028 /*
2029  * free_fdesc ()
2030  *      delete a file description
2031  */
2032 static void
2033 free_fdesc (register fdesc *fdp)
2034 {
2035   free (fdp->infname);
2036   free (fdp->infabsname);
2037   free (fdp->infabsdir);
2038   free (fdp->taggedfname);
2039   free (fdp->prop);
2040   free (fdp);
2041 }
2042
2043 /*
2044  * add_node ()
2045  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2046  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2047  *      balancing.
2048  *
2049  *      add_node is the only function allowed to add nodes, so it can
2050  *      maintain state.
2051  */
2052 static void
2053 add_node (node *np, node **cur_node_p)
2054 {
2055   register int dif;
2056   register node *cur_node = *cur_node_p;
2057
2058   if (cur_node == NULL)
2059     {
2060       *cur_node_p = np;
2061       last_node = np;
2062       return;
2063     }
2064
2065   if (!CTAGS)
2066     /* Etags Mode */
2067     {
2068       /* For each file name, tags are in a linked sublist on the right
2069          pointer.  The first tags of different files are a linked list
2070          on the left pointer.  last_node points to the end of the last
2071          used sublist. */
2072       if (last_node != NULL && last_node->fdp == np->fdp)
2073         {
2074           /* Let's use the same sublist as the last added node. */
2075           assert (last_node->right == NULL);
2076           last_node->right = np;
2077           last_node = np;
2078         }
2079       else if (cur_node->fdp == np->fdp)
2080         {
2081           /* Scanning the list we found the head of a sublist which is
2082              good for us.  Let's scan this sublist. */
2083           add_node (np, &cur_node->right);
2084         }
2085       else
2086         /* The head of this sublist is not good for us.  Let's try the
2087            next one. */
2088         add_node (np, &cur_node->left);
2089     } /* if ETAGS mode */
2090
2091   else
2092     {
2093       /* Ctags Mode */
2094       dif = strcmp (np->name, cur_node->name);
2095
2096       /*
2097        * If this tag name matches an existing one, then
2098        * do not add the node, but maybe print a warning.
2099        */
2100       if (no_duplicates && !dif)
2101         {
2102           if (np->fdp == cur_node->fdp)
2103             {
2104               if (!no_warnings)
2105                 {
2106                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2107                            np->fdp->infname, lineno, np->name);
2108                   fprintf (stderr, "Second entry ignored\n");
2109                 }
2110             }
2111           else if (!cur_node->been_warned && !no_warnings)
2112             {
2113               fprintf
2114                 (stderr,
2115                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2116                  np->fdp->infname, cur_node->fdp->infname, np->name);
2117               cur_node->been_warned = true;
2118             }
2119           return;
2120         }
2121
2122       /* Actually add the node */
2123       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2124     } /* if CTAGS mode */
2125 }
2126
2127 /*
2128  * invalidate_nodes ()
2129  *      Scan the node tree and invalidate all nodes pointing to the
2130  *      given file description (CTAGS case) or free them (ETAGS case).
2131  */
2132 static void
2133 invalidate_nodes (fdesc *badfdp, node **npp)
2134 {
2135   node *np = *npp;
2136
2137   if (np == NULL)
2138     return;
2139
2140   if (CTAGS)
2141     {
2142       if (np->left != NULL)
2143         invalidate_nodes (badfdp, &np->left);
2144       if (np->fdp == badfdp)
2145         np->valid = false;
2146       if (np->right != NULL)
2147         invalidate_nodes (badfdp, &np->right);
2148     }
2149   else
2150     {
2151       assert (np->fdp != NULL);
2152       if (np->fdp == badfdp)
2153         {
2154           *npp = np->left;      /* detach the sublist from the list */
2155           np->left = NULL;      /* isolate it */
2156           free_tree (np);       /* free it */
2157           invalidate_nodes (badfdp, npp);
2158         }
2159       else
2160         invalidate_nodes (badfdp, &np->left);
2161     }
2162 }
2163
2164 \f
2165 static int total_size_of_entries (node *);
2166 static int number_len (long) ATTRIBUTE_CONST;
2167
2168 /* Length of a non-negative number's decimal representation. */
2169 static int
2170 number_len (long int num)
2171 {
2172   int len = 1;
2173   while ((num /= 10) > 0)
2174     len += 1;
2175   return len;
2176 }
2177
2178 /*
2179  * Return total number of characters that put_entries will output for
2180  * the nodes in the linked list at the right of the specified node.
2181  * This count is irrelevant with etags.el since emacs 19.34 at least,
2182  * but is still supplied for backward compatibility.
2183  */
2184 static int
2185 total_size_of_entries (register node *np)
2186 {
2187   register int total = 0;
2188
2189   for (; np != NULL; np = np->right)
2190     if (np->valid)
2191       {
2192         total += strlen (np->regex) + 1;                /* pat\177 */
2193         if (np->name != NULL)
2194           total += strlen (np->name) + 1;               /* name\001 */
2195         total += number_len ((long) np->lno) + 1;       /* lno, */
2196         if (np->cno != invalidcharno)                   /* cno */
2197           total += number_len (np->cno);
2198         total += 1;                                     /* newline */
2199       }
2200
2201   return total;
2202 }
2203
2204 static void
2205 put_entries (register node *np)
2206 {
2207   register char *sp;
2208   static fdesc *fdp = NULL;
2209
2210   if (np == NULL)
2211     return;
2212
2213   /* Output subentries that precede this one */
2214   if (CTAGS)
2215     put_entries (np->left);
2216
2217   /* Output this entry */
2218   if (np->valid)
2219     {
2220       if (!CTAGS)
2221         {
2222           /* Etags mode */
2223           if (fdp != np->fdp)
2224             {
2225               fdp = np->fdp;
2226               fprintf (tagf, "\f\n%s,%d\n",
2227                        fdp->taggedfname, total_size_of_entries (np));
2228               fdp->written = true;
2229             }
2230           fputs (np->regex, tagf);
2231           fputc ('\177', tagf);
2232           if (np->name != NULL)
2233             {
2234               fputs (np->name, tagf);
2235               fputc ('\001', tagf);
2236             }
2237           fprintf (tagf, "%d,", np->lno);
2238           if (np->cno != invalidcharno)
2239             fprintf (tagf, "%ld", np->cno);
2240           fputs ("\n", tagf);
2241         }
2242       else
2243         {
2244           /* Ctags mode */
2245           if (np->name == NULL)
2246             error ("internal error: NULL name in ctags mode.");
2247
2248           if (cxref_style)
2249             {
2250               if (vgrind_style)
2251                 fprintf (stdout, "%s %s %d\n",
2252                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2253               else
2254                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2255                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2256             }
2257           else
2258             {
2259               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2260
2261               if (np->is_func)
2262                 {               /* function or #define macro with args */
2263                   putc (searchar, tagf);
2264                   putc ('^', tagf);
2265
2266                   for (sp = np->regex; *sp; sp++)
2267                     {
2268                       if (*sp == '\\' || *sp == searchar)
2269                         putc ('\\', tagf);
2270                       putc (*sp, tagf);
2271                     }
2272                   putc (searchar, tagf);
2273                 }
2274               else
2275                 {               /* anything else; text pattern inadequate */
2276                   fprintf (tagf, "%d", np->lno);
2277                 }
2278               putc ('\n', tagf);
2279             }
2280         }
2281     } /* if this node contains a valid tag */
2282
2283   /* Output subentries that follow this one */
2284   put_entries (np->right);
2285   if (!CTAGS)
2286     put_entries (np->left);
2287 }
2288
2289 \f
2290 /* C extensions. */
2291 #define C_EXT   0x00fff         /* C extensions */
2292 #define C_PLAIN 0x00000         /* C */
2293 #define C_PLPL  0x00001         /* C++ */
2294 #define C_STAR  0x00003         /* C* */
2295 #define C_JAVA  0x00005         /* JAVA */
2296 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2297 #define YACC    0x10000         /* yacc file */
2298
2299 /*
2300  * The C symbol tables.
2301  */
2302 enum sym_type
2303 {
2304   st_none,
2305   st_C_objprot, st_C_objimpl, st_C_objend,
2306   st_C_gnumacro,
2307   st_C_ignore, st_C_attribute,
2308   st_C_javastruct,
2309   st_C_operator,
2310   st_C_class, st_C_template,
2311   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2312 };
2313
2314 /* Feed stuff between (but not including) %[ and %] lines to:
2315      gperf -m 5
2316 %[
2317 %compare-strncmp
2318 %enum
2319 %struct-type
2320 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2321 %%
2322 if,             0,                      st_C_ignore
2323 for,            0,                      st_C_ignore
2324 while,          0,                      st_C_ignore
2325 switch,         0,                      st_C_ignore
2326 return,         0,                      st_C_ignore
2327 __attribute__,  0,                      st_C_attribute
2328 GTY,            0,                      st_C_attribute
2329 @interface,     0,                      st_C_objprot
2330 @protocol,      0,                      st_C_objprot
2331 @implementation,0,                      st_C_objimpl
2332 @end,           0,                      st_C_objend
2333 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2334 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2335 friend,         C_PLPL,                 st_C_ignore
2336 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2337 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2338 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2339 class,          0,                      st_C_class
2340 namespace,      C_PLPL,                 st_C_struct
2341 domain,         C_STAR,                 st_C_struct
2342 union,          0,                      st_C_struct
2343 struct,         0,                      st_C_struct
2344 extern,         0,                      st_C_extern
2345 enum,           0,                      st_C_enum
2346 typedef,        0,                      st_C_typedef
2347 define,         0,                      st_C_define
2348 undef,          0,                      st_C_define
2349 operator,       C_PLPL,                 st_C_operator
2350 template,       0,                      st_C_template
2351 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2352 DEFUN,          0,                      st_C_gnumacro
2353 SYSCALL,        0,                      st_C_gnumacro
2354 ENTRY,          0,                      st_C_gnumacro
2355 PSEUDO,         0,                      st_C_gnumacro
2356 # These are defined inside C functions, so currently they are not met.
2357 # EXFUN used in glibc, DEFVAR_* in emacs.
2358 #EXFUN,         0,                      st_C_gnumacro
2359 #DEFVAR_,       0,                      st_C_gnumacro
2360 %]
2361 and replace lines between %< and %> with its output, then:
2362  - remove the #if characterset check
2363  - make in_word_set static and not inline. */
2364 /*%<*/
2365 /* C code produced by gperf version 3.0.1 */
2366 /* Command-line: gperf -m 5  */
2367 /* Computed positions: -k'2-3' */
2368
2369 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2370 /* maximum key range = 33, duplicates = 0 */
2371
2372 static int
2373 hash (const char *str, int len)
2374 {
2375   static char const asso_values[] =
2376     {
2377       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2378       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2379       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2380       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2381       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2382       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2383       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2384       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2385       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2386       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2387       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2388        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2389        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2390       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2391       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2392       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2393       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2394       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2395       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2396       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2397       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2398       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2399       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2400       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2401       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2402       35, 35, 35, 35, 35, 35
2403     };
2404   int hval = len;
2405
2406   switch (hval)
2407     {
2408       default:
2409         hval += asso_values[(unsigned char) str[2]];
2410       /*FALLTHROUGH*/
2411       case 2:
2412         hval += asso_values[(unsigned char) str[1]];
2413         break;
2414     }
2415   return hval;
2416 }
2417
2418 static struct C_stab_entry *
2419 in_word_set (register const char *str, register unsigned int len)
2420 {
2421   enum
2422     {
2423       TOTAL_KEYWORDS = 33,
2424       MIN_WORD_LENGTH = 2,
2425       MAX_WORD_LENGTH = 15,
2426       MIN_HASH_VALUE = 2,
2427       MAX_HASH_VALUE = 34
2428     };
2429
2430   static struct C_stab_entry wordlist[] =
2431     {
2432       {""}, {""},
2433       {"if",            0,                      st_C_ignore},
2434       {"GTY",           0,                      st_C_attribute},
2435       {"@end",          0,                      st_C_objend},
2436       {"union",         0,                      st_C_struct},
2437       {"define",                0,                      st_C_define},
2438       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2439       {"template",      0,                      st_C_template},
2440       {"operator",      C_PLPL,                 st_C_operator},
2441       {"@interface",    0,                      st_C_objprot},
2442       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2443       {"friend",                C_PLPL,                 st_C_ignore},
2444       {"typedef",       0,                      st_C_typedef},
2445       {"return",                0,                      st_C_ignore},
2446       {"@implementation",0,                     st_C_objimpl},
2447       {"@protocol",     0,                      st_C_objprot},
2448       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2449       {"extern",                0,                      st_C_extern},
2450       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2451       {"struct",                0,                      st_C_struct},
2452       {"domain",                C_STAR,                 st_C_struct},
2453       {"switch",                0,                      st_C_ignore},
2454       {"enum",          0,                      st_C_enum},
2455       {"for",           0,                      st_C_ignore},
2456       {"namespace",     C_PLPL,                 st_C_struct},
2457       {"class",         0,                      st_C_class},
2458       {"while",         0,                      st_C_ignore},
2459       {"undef",         0,                      st_C_define},
2460       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2461       {"__attribute__", 0,                      st_C_attribute},
2462       {"SYSCALL",       0,                      st_C_gnumacro},
2463       {"ENTRY",         0,                      st_C_gnumacro},
2464       {"PSEUDO",                0,                      st_C_gnumacro},
2465       {"DEFUN",         0,                      st_C_gnumacro}
2466     };
2467
2468   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2469     {
2470       int key = hash (str, len);
2471
2472       if (key <= MAX_HASH_VALUE && key >= 0)
2473         {
2474           const char *s = wordlist[key].name;
2475
2476           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2477             return &wordlist[key];
2478         }
2479     }
2480   return 0;
2481 }
2482 /*%>*/
2483
2484 static enum sym_type
2485 C_symtype (char *str, int len, int c_ext)
2486 {
2487   register struct C_stab_entry *se = in_word_set (str, len);
2488
2489   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2490     return st_none;
2491   return se->type;
2492 }
2493
2494 \f
2495 /*
2496  * Ignoring __attribute__ ((list))
2497  */
2498 static bool inattribute;        /* looking at an __attribute__ construct */
2499
2500 /*
2501  * C functions and variables are recognized using a simple
2502  * finite automaton.  fvdef is its state variable.
2503  */
2504 static enum
2505 {
2506   fvnone,                       /* nothing seen */
2507   fdefunkey,                    /* Emacs DEFUN keyword seen */
2508   fdefunname,                   /* Emacs DEFUN name seen */
2509   foperator,                    /* func: operator keyword seen (cplpl) */
2510   fvnameseen,                   /* function or variable name seen */
2511   fstartlist,                   /* func: just after open parenthesis */
2512   finlist,                      /* func: in parameter list */
2513   flistseen,                    /* func: after parameter list */
2514   fignore,                      /* func: before open brace */
2515   vignore                       /* var-like: ignore until ';' */
2516 } fvdef;
2517
2518 static bool fvextern;           /* func or var: extern keyword seen; */
2519
2520 /*
2521  * typedefs are recognized using a simple finite automaton.
2522  * typdef is its state variable.
2523  */
2524 static enum
2525 {
2526   tnone,                        /* nothing seen */
2527   tkeyseen,                     /* typedef keyword seen */
2528   ttypeseen,                    /* defined type seen */
2529   tinbody,                      /* inside typedef body */
2530   tend,                         /* just before typedef tag */
2531   tignore                       /* junk after typedef tag */
2532 } typdef;
2533
2534 /*
2535  * struct-like structures (enum, struct and union) are recognized
2536  * using another simple finite automaton.  `structdef' is its state
2537  * variable.
2538  */
2539 static enum
2540 {
2541   snone,                        /* nothing seen yet,
2542                                    or in struct body if bracelev > 0 */
2543   skeyseen,                     /* struct-like keyword seen */
2544   stagseen,                     /* struct-like tag seen */
2545   scolonseen                    /* colon seen after struct-like tag */
2546 } structdef;
2547
2548 /*
2549  * When objdef is different from onone, objtag is the name of the class.
2550  */
2551 static const char *objtag = "<uninited>";
2552
2553 /*
2554  * Yet another little state machine to deal with preprocessor lines.
2555  */
2556 static enum
2557 {
2558   dnone,                        /* nothing seen */
2559   dsharpseen,                   /* '#' seen as first char on line */
2560   ddefineseen,                  /* '#' and 'define' seen */
2561   dignorerest                   /* ignore rest of line */
2562 } definedef;
2563
2564 /*
2565  * State machine for Objective C protocols and implementations.
2566  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2567  */
2568 static enum
2569 {
2570   onone,                        /* nothing seen */
2571   oprotocol,                    /* @interface or @protocol seen */
2572   oimplementation,              /* @implementations seen */
2573   otagseen,                     /* class name seen */
2574   oparenseen,                   /* parenthesis before category seen */
2575   ocatseen,                     /* category name seen */
2576   oinbody,                      /* in @implementation body */
2577   omethodsign,                  /* in @implementation body, after +/- */
2578   omethodtag,                   /* after method name */
2579   omethodcolon,                 /* after method colon */
2580   omethodparm,                  /* after method parameter */
2581   oignore                       /* wait for @end */
2582 } objdef;
2583
2584
2585 /*
2586  * Use this structure to keep info about the token read, and how it
2587  * should be tagged.  Used by the make_C_tag function to build a tag.
2588  */
2589 static struct tok
2590 {
2591   char *line;                   /* string containing the token */
2592   int offset;                   /* where the token starts in LINE */
2593   int length;                   /* token length */
2594   /*
2595     The previous members can be used to pass strings around for generic
2596     purposes.  The following ones specifically refer to creating tags.  In this
2597     case the token contained here is the pattern that will be used to create a
2598     tag.
2599   */
2600   bool valid;                   /* do not create a tag; the token should be
2601                                    invalidated whenever a state machine is
2602                                    reset prematurely */
2603   bool named;                   /* create a named tag */
2604   int lineno;                   /* source line number of tag */
2605   long linepos;                 /* source char number of tag */
2606 } token;                        /* latest token read */
2607
2608 /*
2609  * Variables and functions for dealing with nested structures.
2610  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2611  */
2612 static void pushclass_above (int, char *, int);
2613 static void popclass_above (int);
2614 static void write_classname (linebuffer *, const char *qualifier);
2615
2616 static struct {
2617   char **cname;                 /* nested class names */
2618   int *bracelev;                /* nested class brace level */
2619   int nl;                       /* class nesting level (elements used) */
2620   int size;                     /* length of the array */
2621 } cstack;                       /* stack for nested declaration tags */
2622 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2623 #define nestlev         (cstack.nl)
2624 /* After struct keyword or in struct body, not inside a nested function. */
2625 #define instruct        (structdef == snone && nestlev > 0                      \
2626                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2627
2628 static void
2629 pushclass_above (int bracelev, char *str, int len)
2630 {
2631   int nl;
2632
2633   popclass_above (bracelev);
2634   nl = cstack.nl;
2635   if (nl >= cstack.size)
2636     {
2637       int size = cstack.size *= 2;
2638       xrnew (cstack.cname, size, char *);
2639       xrnew (cstack.bracelev, size, int);
2640     }
2641   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2642   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2643   cstack.bracelev[nl] = bracelev;
2644   cstack.nl = nl + 1;
2645 }
2646
2647 static void
2648 popclass_above (int bracelev)
2649 {
2650   int nl;
2651
2652   for (nl = cstack.nl - 1;
2653        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2654        nl--)
2655     {
2656       free (cstack.cname[nl]);
2657       cstack.nl = nl;
2658     }
2659 }
2660
2661 static void
2662 write_classname (linebuffer *cn, const char *qualifier)
2663 {
2664   int i, len;
2665   int qlen = strlen (qualifier);
2666
2667   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2668     {
2669       len = 0;
2670       cn->len = 0;
2671       cn->buffer[0] = '\0';
2672     }
2673   else
2674     {
2675       len = strlen (cstack.cname[0]);
2676       linebuffer_setlen (cn, len);
2677       strcpy (cn->buffer, cstack.cname[0]);
2678     }
2679   for (i = 1; i < cstack.nl; i++)
2680     {
2681       char *s = cstack.cname[i];
2682       if (s == NULL)
2683         continue;
2684       linebuffer_setlen (cn, len + qlen + strlen (s));
2685       len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2686     }
2687 }
2688
2689 \f
2690 static bool consider_token (char *, int, int, int *, int, int, bool *);
2691 static void make_C_tag (bool);
2692
2693 /*
2694  * consider_token ()
2695  *      checks to see if the current token is at the start of a
2696  *      function or variable, or corresponds to a typedef, or
2697  *      is a struct/union/enum tag, or #define, or an enum constant.
2698  *
2699  *      *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2700  *      with args.  C_EXTP points to which language we are looking at.
2701  *
2702  * Globals
2703  *      fvdef                   IN OUT
2704  *      structdef               IN OUT
2705  *      definedef               IN OUT
2706  *      typdef                  IN OUT
2707  *      objdef                  IN OUT
2708  */
2709
2710 static bool
2711 consider_token (char *str, int len, int c, int *c_extp,
2712                 int bracelev, int parlev, bool *is_func_or_var)
2713                                 /* IN: token pointer */
2714                                 /* IN: token length */
2715                                 /* IN: first char after the token */
2716                                 /* IN, OUT: C extensions mask */
2717                                 /* IN: brace level */
2718                                 /* IN: parenthesis level */
2719                                 /* OUT: function or variable found */
2720 {
2721   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2722      structtype is the type of the preceding struct-like keyword, and
2723      structbracelev is the brace level where it has been seen. */
2724   static enum sym_type structtype;
2725   static int structbracelev;
2726   static enum sym_type toktype;
2727
2728
2729   toktype = C_symtype (str, len, *c_extp);
2730
2731   /*
2732    * Skip __attribute__
2733    */
2734   if (toktype == st_C_attribute)
2735     {
2736       inattribute = true;
2737       return false;
2738      }
2739
2740    /*
2741     * Advance the definedef state machine.
2742     */
2743    switch (definedef)
2744      {
2745      case dnone:
2746        /* We're not on a preprocessor line. */
2747        if (toktype == st_C_gnumacro)
2748          {
2749            fvdef = fdefunkey;
2750            return false;
2751          }
2752        break;
2753      case dsharpseen:
2754        if (toktype == st_C_define)
2755          {
2756            definedef = ddefineseen;
2757          }
2758        else
2759          {
2760            definedef = dignorerest;
2761          }
2762        return false;
2763      case ddefineseen:
2764        /*
2765         * Make a tag for any macro, unless it is a constant
2766         * and constantypedefs is false.
2767         */
2768        definedef = dignorerest;
2769        *is_func_or_var = (c == '(');
2770        if (!*is_func_or_var && !constantypedefs)
2771          return false;
2772        else
2773          return true;
2774      case dignorerest:
2775        return false;
2776      default:
2777        error ("internal error: definedef value.");
2778      }
2779
2780    /*
2781     * Now typedefs
2782     */
2783    switch (typdef)
2784      {
2785      case tnone:
2786        if (toktype == st_C_typedef)
2787          {
2788            if (typedefs)
2789              typdef = tkeyseen;
2790            fvextern = false;
2791            fvdef = fvnone;
2792            return false;
2793          }
2794        break;
2795      case tkeyseen:
2796        switch (toktype)
2797          {
2798          case st_none:
2799          case st_C_class:
2800          case st_C_struct:
2801          case st_C_enum:
2802            typdef = ttypeseen;
2803            break;
2804          default:
2805            break;
2806          }
2807        break;
2808      case ttypeseen:
2809        if (structdef == snone && fvdef == fvnone)
2810          {
2811            fvdef = fvnameseen;
2812            return true;
2813          }
2814        break;
2815      case tend:
2816        switch (toktype)
2817          {
2818          case st_C_class:
2819          case st_C_struct:
2820          case st_C_enum:
2821            return false;
2822          default:
2823            return true;
2824          }
2825      default:
2826        break;
2827      }
2828
2829    switch (toktype)
2830      {
2831      case st_C_javastruct:
2832        if (structdef == stagseen)
2833          structdef = scolonseen;
2834        return false;
2835      case st_C_template:
2836      case st_C_class:
2837        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2838            && bracelev == 0
2839            && definedef == dnone && structdef == snone
2840            && typdef == tnone && fvdef == fvnone)
2841          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2842        if (toktype == st_C_template)
2843          break;
2844        /* FALLTHRU */
2845      case st_C_struct:
2846      case st_C_enum:
2847        if (parlev == 0
2848            && fvdef != vignore
2849            && (typdef == tkeyseen
2850                || (typedefs_or_cplusplus && structdef == snone)))
2851          {
2852            structdef = skeyseen;
2853            structtype = toktype;
2854            structbracelev = bracelev;
2855            if (fvdef == fvnameseen)
2856              fvdef = fvnone;
2857          }
2858        return false;
2859      default:
2860        break;
2861      }
2862
2863    if (structdef == skeyseen)
2864      {
2865        structdef = stagseen;
2866        return true;
2867      }
2868
2869    if (typdef != tnone)
2870      definedef = dnone;
2871
2872    /* Detect Objective C constructs. */
2873    switch (objdef)
2874      {
2875      case onone:
2876        switch (toktype)
2877          {
2878          case st_C_objprot:
2879            objdef = oprotocol;
2880            return false;
2881          case st_C_objimpl:
2882            objdef = oimplementation;
2883            return false;
2884          default:
2885            break;
2886          }
2887        break;
2888      case oimplementation:
2889        /* Save the class tag for functions or variables defined inside. */
2890        objtag = savenstr (str, len);
2891        objdef = oinbody;
2892        return false;
2893      case oprotocol:
2894        /* Save the class tag for categories. */
2895        objtag = savenstr (str, len);
2896        objdef = otagseen;
2897        *is_func_or_var = true;
2898        return true;
2899      case oparenseen:
2900        objdef = ocatseen;
2901        *is_func_or_var = true;
2902        return true;
2903      case oinbody:
2904        break;
2905      case omethodsign:
2906        if (parlev == 0)
2907          {
2908            fvdef = fvnone;
2909            objdef = omethodtag;
2910            linebuffer_setlen (&token_name, len);
2911            memcpy (token_name.buffer, str, len);
2912            token_name.buffer[len] = '\0';
2913            return true;
2914          }
2915        return false;
2916      case omethodcolon:
2917        if (parlev == 0)
2918          objdef = omethodparm;
2919        return false;
2920      case omethodparm:
2921        if (parlev == 0)
2922          {
2923            objdef = omethodtag;
2924            if (class_qualify)
2925              {
2926                int oldlen = token_name.len;
2927                fvdef = fvnone;
2928                linebuffer_setlen (&token_name, oldlen + len);
2929                memcpy (token_name.buffer + oldlen, str, len);
2930                token_name.buffer[oldlen + len] = '\0';
2931              }
2932            return true;
2933          }
2934        return false;
2935      case oignore:
2936        if (toktype == st_C_objend)
2937          {
2938            /* Memory leakage here: the string pointed by objtag is
2939               never released, because many tests would be needed to
2940               avoid breaking on incorrect input code.  The amount of
2941               memory leaked here is the sum of the lengths of the
2942               class tags.
2943            free (objtag); */
2944            objdef = onone;
2945          }
2946        return false;
2947      default:
2948        break;
2949      }
2950
2951    /* A function, variable or enum constant? */
2952    switch (toktype)
2953      {
2954      case st_C_extern:
2955        fvextern = true;
2956        switch  (fvdef)
2957          {
2958          case finlist:
2959          case flistseen:
2960          case fignore:
2961          case vignore:
2962            break;
2963          default:
2964            fvdef = fvnone;
2965          }
2966        return false;
2967      case st_C_ignore:
2968        fvextern = false;
2969        fvdef = vignore;
2970        return false;
2971      case st_C_operator:
2972        fvdef = foperator;
2973        *is_func_or_var = true;
2974        return true;
2975      case st_none:
2976        if (constantypedefs
2977            && structdef == snone
2978            && structtype == st_C_enum && bracelev > structbracelev
2979            /* Don't tag tokens in expressions that assign values to enum
2980               constants.  */
2981            && fvdef != vignore)
2982          return true;           /* enum constant */
2983        switch (fvdef)
2984          {
2985          case fdefunkey:
2986            if (bracelev > 0)
2987              break;
2988            fvdef = fdefunname;  /* GNU macro */
2989            *is_func_or_var = true;
2990            return true;
2991          case fvnone:
2992            switch (typdef)
2993              {
2994              case ttypeseen:
2995                return false;
2996              case tnone:
2997                if ((strneq (str, "asm", 3) && endtoken (str[3]))
2998                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2999                  {
3000                    fvdef = vignore;
3001                    return false;
3002                  }
3003                break;
3004              default:
3005                break;
3006              }
3007           /* FALLTHRU */
3008           case fvnameseen:
3009           if (len >= 10 && strneq (str+len-10, "::operator", 10))
3010             {
3011               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3012                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3013               fvdef = foperator;
3014               *is_func_or_var = true;
3015               return true;
3016             }
3017           if (bracelev > 0 && !instruct)
3018             break;
3019           fvdef = fvnameseen;   /* function or variable */
3020           *is_func_or_var = true;
3021           return true;
3022          default:
3023            break;
3024         }
3025       break;
3026      default:
3027        break;
3028     }
3029
3030   return false;
3031 }
3032
3033 \f
3034 /*
3035  * C_entries often keeps pointers to tokens or lines which are older than
3036  * the line currently read.  By keeping two line buffers, and switching
3037  * them at end of line, it is possible to use those pointers.
3038  */
3039 static struct
3040 {
3041   long linepos;
3042   linebuffer lb;
3043 } lbs[2];
3044
3045 #define current_lb_is_new (newndx == curndx)
3046 #define switch_line_buffers() (curndx = 1 - curndx)
3047
3048 #define curlb (lbs[curndx].lb)
3049 #define newlb (lbs[newndx].lb)
3050 #define curlinepos (lbs[curndx].linepos)
3051 #define newlinepos (lbs[newndx].linepos)
3052
3053 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3054 #define cplpl (c_ext & C_PLPL)
3055 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3056
3057 #define CNL_SAVE_DEFINEDEF()                                            \
3058 do {                                                                    \
3059   curlinepos = charno;                                                  \
3060   readline (&curlb, inf);                                               \
3061   lp = curlb.buffer;                                                    \
3062   quotednl = false;                                                     \
3063   newndx = curndx;                                                      \
3064 } while (0)
3065
3066 #define CNL()                                                           \
3067 do {                                                                    \
3068   CNL_SAVE_DEFINEDEF ();                                                \
3069   if (savetoken.valid)                                                  \
3070     {                                                                   \
3071       token = savetoken;                                                \
3072       savetoken.valid = false;                                          \
3073     }                                                                   \
3074   definedef = dnone;                                                    \
3075 } while (0)
3076
3077
3078 static void
3079 make_C_tag (bool isfun)
3080 {
3081   /* This function is never called when token.valid is false, but
3082      we must protect against invalid input or internal errors. */
3083   if (token.valid)
3084     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3085               token.offset+token.length+1, token.lineno, token.linepos);
3086   else if (DEBUG)
3087     {                             /* this branch is optimized away if !DEBUG */
3088       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3089                 token_name.len + 17, isfun, token.line,
3090                 token.offset+token.length+1, token.lineno, token.linepos);
3091       error ("INVALID TOKEN");
3092     }
3093
3094   token.valid = false;
3095 }
3096
3097 static bool
3098 perhaps_more_input (FILE *inf)
3099 {
3100   return !feof (inf) && !ferror (inf);
3101 }
3102
3103
3104 /*
3105  * C_entries ()
3106  *      This routine finds functions, variables, typedefs,
3107  *      #define's, enum constants and struct/union/enum definitions in
3108  *      C syntax and adds them to the list.
3109  */
3110 static void
3111 C_entries (int c_ext, FILE *inf)
3112                                 /* extension of C */
3113                                 /* input file */
3114 {
3115   register char c;              /* latest char read; '\0' for end of line */
3116   register char *lp;            /* pointer one beyond the character `c' */
3117   int curndx, newndx;           /* indices for current and new lb */
3118   register int tokoff;          /* offset in line of start of current token */
3119   register int toklen;          /* length of current token */
3120   const char *qualifier;        /* string used to qualify names */
3121   int qlen;                     /* length of qualifier */
3122   int bracelev;                 /* current brace level */
3123   int bracketlev;               /* current bracket level */
3124   int parlev;                   /* current parenthesis level */
3125   int attrparlev;               /* __attribute__ parenthesis level */
3126   int templatelev;              /* current template level */
3127   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3128   bool incomm, inquote, inchar, quotednl, midtoken;
3129   bool yacc_rules;              /* in the rules part of a yacc file */
3130   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3131
3132
3133   linebuffer_init (&lbs[0].lb);
3134   linebuffer_init (&lbs[1].lb);
3135   if (cstack.size == 0)
3136     {
3137       cstack.size = (DEBUG) ? 1 : 4;
3138       cstack.nl = 0;
3139       cstack.cname = xnew (cstack.size, char *);
3140       cstack.bracelev = xnew (cstack.size, int);
3141     }
3142
3143   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3144   curndx = newndx = 0;
3145   lp = curlb.buffer;
3146   *lp = 0;
3147
3148   fvdef = fvnone; fvextern = false; typdef = tnone;
3149   structdef = snone; definedef = dnone; objdef = onone;
3150   yacc_rules = false;
3151   midtoken = inquote = inchar = incomm = quotednl = false;
3152   token.valid = savetoken.valid = false;
3153   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3154   if (cjava)
3155     { qualifier = "."; qlen = 1; }
3156   else
3157     { qualifier = "::"; qlen = 2; }
3158
3159
3160   while (perhaps_more_input (inf))
3161     {
3162       c = *lp++;
3163       if (c == '\\')
3164         {
3165           /* If we are at the end of the line, the next character is a
3166              '\0'; do not skip it, because it is what tells us
3167              to read the next line.  */
3168           if (*lp == '\0')
3169             {
3170               quotednl = true;
3171               continue;
3172             }
3173           lp++;
3174           c = ' ';
3175         }
3176       else if (incomm)
3177         {
3178           switch (c)
3179             {
3180             case '*':
3181               if (*lp == '/')
3182                 {
3183                   c = *lp++;
3184                   incomm = false;
3185                 }
3186               break;
3187             case '\0':
3188               /* Newlines inside comments do not end macro definitions in
3189                  traditional cpp. */
3190               CNL_SAVE_DEFINEDEF ();
3191               break;
3192             }
3193           continue;
3194         }
3195       else if (inquote)
3196         {
3197           switch (c)
3198             {
3199             case '"':
3200               inquote = false;
3201               break;
3202             case '\0':
3203               /* Newlines inside strings do not end macro definitions
3204                  in traditional cpp, even though compilers don't
3205                  usually accept them. */
3206               CNL_SAVE_DEFINEDEF ();
3207               break;
3208             }
3209           continue;
3210         }
3211       else if (inchar)
3212         {
3213           switch (c)
3214             {
3215             case '\0':
3216               /* Hmmm, something went wrong. */
3217               CNL ();
3218               /* FALLTHRU */
3219             case '\'':
3220               inchar = false;
3221               break;
3222             }
3223           continue;
3224         }
3225       else switch (c)
3226         {
3227         case '"':
3228           inquote = true;
3229           if (bracketlev > 0)
3230             continue;
3231           if (inattribute)
3232             break;
3233           switch (fvdef)
3234             {
3235             case fdefunkey:
3236             case fstartlist:
3237             case finlist:
3238             case fignore:
3239             case vignore:
3240               break;
3241             default:
3242               fvextern = false;
3243               fvdef = fvnone;
3244             }
3245           continue;
3246         case '\'':
3247           inchar = true;
3248           if (bracketlev > 0)
3249             continue;
3250           if (inattribute)
3251             break;
3252           if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3253             {
3254               fvextern = false;
3255               fvdef = fvnone;
3256             }
3257           continue;
3258         case '/':
3259           if (*lp == '*')
3260             {
3261               incomm = true;
3262               lp++;
3263               c = ' ';
3264               if (bracketlev > 0)
3265                 continue;
3266             }
3267           else if (/* cplpl && */ *lp == '/')
3268             {
3269               c = '\0';
3270             }
3271           break;
3272         case '%':
3273           if ((c_ext & YACC) && *lp == '%')
3274             {
3275               /* Entering or exiting rules section in yacc file. */
3276               lp++;
3277               definedef = dnone; fvdef = fvnone; fvextern = false;
3278               typdef = tnone; structdef = snone;
3279               midtoken = inquote = inchar = incomm = quotednl = false;
3280               bracelev = 0;
3281               yacc_rules = !yacc_rules;
3282               continue;
3283             }
3284           else
3285             break;
3286         case '#':
3287           if (definedef == dnone)
3288             {
3289               char *cp;
3290               bool cpptoken = true;
3291
3292               /* Look back on this line.  If all blanks, or nonblanks
3293                  followed by an end of comment, this is a preprocessor
3294                  token. */
3295               for (cp = newlb.buffer; cp < lp-1; cp++)
3296                 if (!c_isspace (*cp))
3297                   {
3298                     if (*cp == '*' && cp[1] == '/')
3299                       {
3300                         cp++;
3301                         cpptoken = true;
3302                       }
3303                     else
3304                       cpptoken = false;
3305                   }
3306               if (cpptoken)
3307                 {
3308                   definedef = dsharpseen;
3309                   /* This is needed for tagging enum values: when there are
3310                      preprocessor conditionals inside the enum, we need to
3311                      reset the value of fvdef so that the next enum value is
3312                      tagged even though the one before it did not end in a
3313                      comma.  */
3314                   if (fvdef == vignore && instruct && parlev == 0)
3315                     {
3316                       if (strneq (cp, "#if", 3) || strneq (cp, "#el", 3))
3317                         fvdef = fvnone;
3318                     }
3319                 }
3320             } /* if (definedef == dnone) */
3321           continue;
3322         case '[':
3323           bracketlev++;
3324           continue;
3325         default:
3326           if (bracketlev > 0)
3327             {
3328               if (c == ']')
3329                 --bracketlev;
3330               else if (c == '\0')
3331                 CNL_SAVE_DEFINEDEF ();
3332               continue;
3333             }
3334           break;
3335         } /* switch (c) */
3336
3337
3338       /* Consider token only if some involved conditions are satisfied. */
3339       if (typdef != tignore
3340           && definedef != dignorerest
3341           && fvdef != finlist
3342           && templatelev == 0
3343           && (definedef != dnone
3344               || structdef != scolonseen)
3345           && !inattribute)
3346         {
3347           if (midtoken)
3348             {
3349               if (endtoken (c))
3350                 {
3351                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3352                     /* This handles :: in the middle,
3353                        but not at the beginning of an identifier.
3354                        Also, space-separated :: is not recognized. */
3355                     {
3356                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3357                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3358                       lp += 2;
3359                       toklen += 2;
3360                       c = lp[-1];
3361                       goto still_in_token;
3362                     }
3363                   else
3364                     {
3365                       bool funorvar = false;
3366
3367                       if (yacc_rules
3368                           || consider_token (newlb.buffer + tokoff, toklen, c,
3369                                              &c_ext, bracelev, parlev,
3370                                              &funorvar))
3371                         {
3372                           if (fvdef == foperator)
3373                             {
3374                               char *oldlp = lp;
3375                               lp = skip_spaces (lp-1);
3376                               if (*lp != '\0')
3377                                 lp += 1;
3378                               while (*lp != '\0'
3379                                      && !c_isspace (*lp) && *lp != '(')
3380                                 lp += 1;
3381                               c = *lp++;
3382                               toklen += lp - oldlp;
3383                             }
3384                           token.named = false;
3385                           if (!plainc
3386                               && nestlev > 0 && definedef == dnone)
3387                             /* in struct body */
3388                             {
3389                               if (class_qualify)
3390                                 {
3391                                   int len;
3392                                   write_classname (&token_name, qualifier);
3393                                   len = token_name.len;
3394                                   linebuffer_setlen (&token_name,
3395                                                      len + qlen + toklen);
3396                                   sprintf (token_name.buffer + len, "%s%.*s",
3397                                            qualifier, toklen,
3398                                            newlb.buffer + tokoff);
3399                                 }
3400                               else
3401                                 {
3402                                   linebuffer_setlen (&token_name, toklen);
3403                                   sprintf (token_name.buffer, "%.*s",
3404                                            toklen, newlb.buffer + tokoff);
3405                                 }
3406                               token.named = true;
3407                             }
3408                           else if (objdef == ocatseen)
3409                             /* Objective C category */
3410                             {
3411                               if (class_qualify)
3412                                 {
3413                                   int len = strlen (objtag) + 2 + toklen;
3414                                   linebuffer_setlen (&token_name, len);
3415                                   sprintf (token_name.buffer, "%s(%.*s)",
3416                                            objtag, toklen,
3417                                            newlb.buffer + tokoff);
3418                                 }
3419                               else
3420                                 {
3421                                   linebuffer_setlen (&token_name, toklen);
3422                                   sprintf (token_name.buffer, "%.*s",
3423                                            toklen, newlb.buffer + tokoff);
3424                                 }
3425                               token.named = true;
3426                             }
3427                           else if (objdef == omethodtag
3428                                    || objdef == omethodparm)
3429                             /* Objective C method */
3430                             {
3431                               token.named = true;
3432                             }
3433                           else if (fvdef == fdefunname)
3434                             /* GNU DEFUN and similar macros */
3435                             {
3436                               bool defun = (newlb.buffer[tokoff] == 'F');
3437                               int off = tokoff;
3438                               int len = toklen;
3439
3440                               /* Rewrite the tag so that emacs lisp DEFUNs
3441                                  can be found by their elisp name */
3442                               if (defun)
3443                                 {
3444                                   off += 1;
3445                                   len -= 1;
3446                                 }
3447                               linebuffer_setlen (&token_name, len);
3448                               memcpy (token_name.buffer,
3449                                       newlb.buffer + off, len);
3450                               token_name.buffer[len] = '\0';
3451                               if (defun)
3452                                 while (--len >= 0)
3453                                   if (token_name.buffer[len] == '_')
3454                                     token_name.buffer[len] = '-';
3455                               token.named = defun;
3456                             }
3457                           else
3458                             {
3459                               linebuffer_setlen (&token_name, toklen);
3460                               memcpy (token_name.buffer,
3461                                       newlb.buffer + tokoff, toklen);
3462                               token_name.buffer[toklen] = '\0';
3463                               /* Name macros and members. */
3464                               token.named = (structdef == stagseen
3465                                              || typdef == ttypeseen
3466                                              || typdef == tend
3467                                              || (funorvar
3468                                                  && definedef == dignorerest)
3469                                              || (funorvar
3470                                                  && definedef == dnone
3471                                                  && structdef == snone
3472                                                  && bracelev > 0));
3473                             }
3474                           token.lineno = lineno;
3475                           token.offset = tokoff;
3476                           token.length = toklen;
3477                           token.line = newlb.buffer;
3478                           token.linepos = newlinepos;
3479                           token.valid = true;
3480
3481                           if (definedef == dnone
3482                               && (fvdef == fvnameseen
3483                                   || fvdef == foperator
3484                                   || structdef == stagseen
3485                                   || typdef == tend
3486                                   || typdef == ttypeseen
3487                                   || objdef != onone))
3488                             {
3489                               if (current_lb_is_new)
3490                                 switch_line_buffers ();
3491                             }
3492                           else if (definedef != dnone
3493                                    || fvdef == fdefunname
3494                                    || instruct)
3495                             make_C_tag (funorvar);
3496                         }
3497                       else /* not yacc and consider_token failed */
3498                         {
3499                           if (inattribute && fvdef == fignore)
3500                             {
3501                               /* We have just met __attribute__ after a
3502                                  function parameter list: do not tag the
3503                                  function again. */
3504                               fvdef = fvnone;
3505                             }
3506                         }
3507                       midtoken = false;
3508                     }
3509                 } /* if (endtoken (c)) */
3510               else if (intoken (c))
3511                 still_in_token:
3512                 {
3513                   toklen++;
3514                   continue;
3515                 }
3516             } /* if (midtoken) */
3517           else if (begtoken (c))
3518             {
3519               switch (definedef)
3520                 {
3521                 case dnone:
3522                   switch (fvdef)
3523                     {
3524                     case fstartlist:
3525                       /* This prevents tagging fb in
3526                          void (__attribute__((noreturn)) *fb) (void);
3527                          Fixing this is not easy and not very important. */
3528                       fvdef = finlist;
3529                       continue;
3530                     case flistseen:
3531                       if (plainc || declarations)
3532                         {
3533                           make_C_tag (true); /* a function */
3534                           fvdef = fignore;
3535                         }
3536                       break;
3537                     default:
3538                       break;
3539                     }
3540                   if (structdef == stagseen && !cjava)
3541                     {
3542                       popclass_above (bracelev);
3543                       structdef = snone;
3544                     }
3545                   break;
3546                 case dsharpseen:
3547                   savetoken = token;
3548                   break;
3549                 default:
3550                   break;
3551                 }
3552               if (!yacc_rules || lp == newlb.buffer + 1)
3553                 {
3554                   tokoff = lp - 1 - newlb.buffer;
3555                   toklen = 1;
3556                   midtoken = true;
3557                 }
3558               continue;
3559             } /* if (begtoken) */
3560         } /* if must look at token */
3561
3562
3563       /* Detect end of line, colon, comma, semicolon and various braces
3564          after having handled a token.*/
3565       switch (c)
3566         {
3567         case ':':
3568           if (inattribute)
3569             break;
3570           if (yacc_rules && token.offset == 0 && token.valid)
3571             {
3572               make_C_tag (false); /* a yacc function */
3573               break;
3574             }
3575           if (definedef != dnone)
3576             break;
3577           switch (objdef)
3578             {
3579             case otagseen:
3580               objdef = oignore;
3581               make_C_tag (true); /* an Objective C class */
3582               break;
3583             case omethodtag:
3584             case omethodparm:
3585               objdef = omethodcolon;
3586               if (class_qualify)
3587                 {
3588                   int toklen = token_name.len;
3589                   linebuffer_setlen (&token_name, toklen + 1);
3590                   strcpy (token_name.buffer + toklen, ":");
3591                 }
3592               break;
3593             default:
3594               break;
3595             }
3596           if (structdef == stagseen)
3597             {
3598               structdef = scolonseen;
3599               break;
3600             }
3601           /* Should be useless, but may be work as a safety net. */
3602           if (cplpl && fvdef == flistseen)
3603             {
3604               make_C_tag (true); /* a function */
3605               fvdef = fignore;
3606               break;
3607             }
3608           break;
3609         case ';':
3610           if (definedef != dnone || inattribute)
3611             break;
3612           switch (typdef)
3613             {
3614             case tend:
3615             case ttypeseen:
3616               make_C_tag (false); /* a typedef */
3617               typdef = tnone;
3618               fvdef = fvnone;
3619               break;
3620             case tnone:
3621             case tinbody:
3622             case tignore:
3623               switch (fvdef)
3624                 {
3625                 case fignore:
3626                   if (typdef == tignore || cplpl)
3627                     fvdef = fvnone;
3628                   break;
3629                 case fvnameseen:
3630                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3631                       || (members && instruct))
3632                     make_C_tag (false); /* a variable */
3633                   fvextern = false;
3634                   fvdef = fvnone;
3635                   token.valid = false;
3636                   break;
3637                 case flistseen:
3638                   if ((declarations
3639                        && (cplpl || !instruct)
3640                        && (typdef == tnone || (typdef != tignore && instruct)))
3641                       || (members
3642                           && plainc && instruct))
3643                     make_C_tag (true);  /* a function */
3644                   /* FALLTHRU */
3645                 default:
3646                   fvextern = false;
3647                   fvdef = fvnone;
3648                   if (declarations
3649                        && cplpl && structdef == stagseen)
3650                     make_C_tag (false); /* forward declaration */
3651                   else
3652                     token.valid = false;
3653                 } /* switch (fvdef) */
3654               /* FALLTHRU */
3655             default:
3656               if (!instruct)
3657                 typdef = tnone;
3658             }
3659           if (structdef == stagseen)
3660             structdef = snone;
3661           break;
3662         case ',':
3663           if (definedef != dnone || inattribute)
3664             break;
3665           switch (objdef)
3666             {
3667             case omethodtag:
3668             case omethodparm:
3669               make_C_tag (true); /* an Objective C method */
3670               objdef = oinbody;
3671               break;
3672             default:
3673               break;
3674             }
3675           switch (fvdef)
3676             {
3677             case fdefunkey:
3678             case foperator:
3679             case fstartlist:
3680             case finlist:
3681             case fignore:
3682               break;
3683             case vignore:
3684               if (instruct && parlev == 0)
3685                 fvdef = fvnone;
3686               break;
3687             case fdefunname:
3688               fvdef = fignore;
3689               break;
3690             case fvnameseen:
3691               if (parlev == 0
3692                   && ((globals
3693                        && bracelev == 0
3694                        && templatelev == 0
3695                        && (!fvextern || declarations))
3696                       || (members && instruct)))
3697                   make_C_tag (false); /* a variable */
3698               break;
3699             case flistseen:
3700               if ((declarations && typdef == tnone && !instruct)
3701                   || (members && typdef != tignore && instruct))
3702                 {
3703                   make_C_tag (true); /* a function */
3704                   fvdef = fvnameseen;
3705                 }
3706               else if (!declarations)
3707                 fvdef = fvnone;
3708               token.valid = false;
3709               break;
3710             default:
3711               fvdef = fvnone;
3712             }
3713           if (structdef == stagseen)
3714             structdef = snone;
3715           break;
3716         case ']':
3717           if (definedef != dnone || inattribute)
3718             break;
3719           if (structdef == stagseen)
3720             structdef = snone;
3721           switch (typdef)
3722             {
3723             case ttypeseen:
3724             case tend:
3725               typdef = tignore;
3726               make_C_tag (false);       /* a typedef */
3727               break;
3728             case tnone:
3729             case tinbody:
3730               switch (fvdef)
3731                 {
3732                 case foperator:
3733                 case finlist:
3734                 case fignore:
3735                 case vignore:
3736                   break;
3737                 case fvnameseen:
3738                   if ((members && bracelev == 1)
3739                       || (globals && bracelev == 0
3740                           && (!fvextern || declarations)))
3741                     make_C_tag (false); /* a variable */
3742                   /* FALLTHRU */
3743                 default:
3744                   fvdef = fvnone;
3745                 }
3746               break;
3747             default:
3748               break;
3749             }
3750           break;
3751         case '(':
3752           if (inattribute)
3753             {
3754               attrparlev++;
3755               break;
3756             }
3757           if (definedef != dnone)
3758             break;
3759           if (objdef == otagseen && parlev == 0)
3760             objdef = oparenseen;
3761           switch (fvdef)
3762             {
3763             case fvnameseen:
3764               if (typdef == ttypeseen
3765                   && *lp != '*'
3766                   && !instruct)
3767                 {
3768                   /* This handles constructs like:
3769                      typedef void OperatorFun (int fun); */
3770                   make_C_tag (false);
3771                   typdef = tignore;
3772                   fvdef = fignore;
3773                   break;
3774                 }
3775               /* FALLTHRU */
3776             case foperator:
3777               fvdef = fstartlist;
3778               break;
3779             case flistseen:
3780               fvdef = finlist;
3781               break;
3782             default:
3783               break;
3784             }
3785           parlev++;
3786           break;
3787         case ')':
3788           if (inattribute)
3789             {
3790               if (--attrparlev == 0)
3791                 inattribute = false;
3792               break;
3793             }
3794           if (definedef != dnone)
3795             break;
3796           if (objdef == ocatseen && parlev == 1)
3797             {
3798               make_C_tag (true); /* an Objective C category */
3799               objdef = oignore;
3800             }
3801           if (--parlev == 0)
3802             {
3803               switch (fvdef)
3804                 {
3805                 case fstartlist:
3806                 case finlist:
3807                   fvdef = flistseen;
3808                   break;
3809                 default:
3810                   break;
3811                 }
3812               if (!instruct
3813                   && (typdef == tend
3814                       || typdef == ttypeseen))
3815                 {
3816                   typdef = tignore;
3817                   make_C_tag (false); /* a typedef */
3818                 }
3819             }
3820           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3821             parlev = 0;
3822           break;
3823         case '{':
3824           if (definedef != dnone)
3825             break;
3826           if (typdef == ttypeseen)
3827             {
3828               /* Whenever typdef is set to tinbody (currently only
3829                  here), typdefbracelev should be set to bracelev. */
3830               typdef = tinbody;
3831               typdefbracelev = bracelev;
3832             }
3833           switch (fvdef)
3834             {
3835             case flistseen:
3836               if (cplpl && !class_qualify)
3837                 {
3838                   /* Remove class and namespace qualifiers from the token,
3839                      leaving only the method/member name.  */
3840                   char *cc, *uqname = token_name.buffer;
3841                   char *tok_end = token_name.buffer + token_name.len;
3842
3843                   for (cc = token_name.buffer; cc < tok_end; cc++)
3844                     {
3845                       if (*cc == ':' && cc[1] == ':')
3846                         {
3847                           uqname = cc + 2;
3848                           cc++;
3849                         }
3850                     }
3851                   if (uqname > token_name.buffer)
3852                     {
3853                       int uqlen = strlen (uqname);
3854                       linebuffer_setlen (&token_name, uqlen);
3855                       memmove (token_name.buffer, uqname, uqlen + 1);
3856                     }
3857                 }
3858               make_C_tag (true);    /* a function */
3859               /* FALLTHRU */
3860             case fignore:
3861               fvdef = fvnone;
3862               break;
3863             case fvnone:
3864               switch (objdef)
3865                 {
3866                 case otagseen:
3867                   make_C_tag (true); /* an Objective C class */
3868                   objdef = oignore;
3869                   break;
3870                 case omethodtag:
3871                 case omethodparm:
3872                   make_C_tag (true); /* an Objective C method */
3873                   objdef = oinbody;
3874                   break;
3875                 default:
3876                   /* Neutralize `extern "C" {' grot. */
3877                   if (bracelev == 0 && structdef == snone && nestlev == 0
3878                       && typdef == tnone)
3879                     bracelev = -1;
3880                 }
3881               break;
3882             default:
3883               break;
3884             }
3885           switch (structdef)
3886             {
3887             case skeyseen:         /* unnamed struct */
3888               pushclass_above (bracelev, NULL, 0);
3889               structdef = snone;
3890               break;
3891             case stagseen:         /* named struct or enum */
3892             case scolonseen:       /* a class */
3893               pushclass_above (bracelev,token.line+token.offset, token.length);
3894               structdef = snone;
3895               make_C_tag (false);  /* a struct or enum */
3896               break;
3897             default:
3898               break;
3899             }
3900           bracelev += 1;
3901           break;
3902         case '*':
3903           if (definedef != dnone)
3904             break;
3905           if (fvdef == fstartlist)
3906             {
3907               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3908               token.valid = false;
3909             }
3910           break;
3911         case '}':
3912           if (definedef != dnone)
3913             break;
3914           bracelev -= 1;
3915           if (!ignoreindent && lp == newlb.buffer + 1)
3916             {
3917               if (bracelev != 0)
3918                 token.valid = false; /* unexpected value, token unreliable */
3919               bracelev = 0;     /* reset brace level if first column */
3920               parlev = 0;       /* also reset paren level, just in case... */
3921             }
3922           else if (bracelev < 0)
3923             {
3924               token.valid = false; /* something gone amiss, token unreliable */
3925               bracelev = 0;
3926             }
3927           if (bracelev == 0 && fvdef == vignore)
3928             fvdef = fvnone;             /* end of function */
3929           popclass_above (bracelev);
3930           structdef = snone;
3931           /* Only if typdef == tinbody is typdefbracelev significant. */
3932           if (typdef == tinbody && bracelev <= typdefbracelev)
3933             {
3934               assert (bracelev == typdefbracelev);
3935               typdef = tend;
3936             }
3937           break;
3938         case '=':
3939           if (definedef != dnone)
3940             break;
3941           switch (fvdef)
3942             {
3943             case foperator:
3944             case finlist:
3945             case fignore:
3946             case vignore:
3947               break;
3948             case fvnameseen:
3949               if ((members && bracelev == 1)
3950                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3951                 make_C_tag (false); /* a variable */
3952               /* FALLTHRU */
3953             default:
3954               fvdef = vignore;
3955             }
3956           break;
3957         case '<':
3958           if (cplpl
3959               && (structdef == stagseen || fvdef == fvnameseen))
3960             {
3961               templatelev++;
3962               break;
3963             }
3964           goto resetfvdef;
3965         case '>':
3966           if (templatelev > 0)
3967             {
3968               templatelev--;
3969               break;
3970             }
3971           goto resetfvdef;
3972         case '+':
3973         case '-':
3974           if (objdef == oinbody && bracelev == 0)
3975             {
3976               objdef = omethodsign;
3977               break;
3978             }
3979           /* FALLTHRU */
3980         resetfvdef:
3981         case '#': case '~': case '&': case '%': case '/':
3982         case '|': case '^': case '!': case '.': case '?':
3983           if (definedef != dnone)
3984             break;
3985           /* These surely cannot follow a function tag in C. */
3986           switch (fvdef)
3987             {
3988             case foperator:
3989             case finlist:
3990             case fignore:
3991             case vignore:
3992               break;
3993             default:
3994               fvdef = fvnone;
3995             }
3996           break;
3997         case '\0':
3998           if (objdef == otagseen)
3999             {
4000               make_C_tag (true); /* an Objective C class */
4001               objdef = oignore;
4002             }
4003           /* If a macro spans multiple lines don't reset its state. */
4004           if (quotednl)
4005             CNL_SAVE_DEFINEDEF ();
4006           else
4007             CNL ();
4008           break;
4009         } /* switch (c) */
4010
4011     } /* while not eof */
4012
4013   free (lbs[0].lb.buffer);
4014   free (lbs[1].lb.buffer);
4015 }
4016
4017 /*
4018  * Process either a C++ file or a C file depending on the setting
4019  * of a global flag.
4020  */
4021 static void
4022 default_C_entries (FILE *inf)
4023 {
4024   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4025 }
4026
4027 /* Always do plain C. */
4028 static void
4029 plain_C_entries (FILE *inf)
4030 {
4031   C_entries (0, inf);
4032 }
4033
4034 /* Always do C++. */
4035 static void
4036 Cplusplus_entries (FILE *inf)
4037 {
4038   C_entries (C_PLPL, inf);
4039 }
4040
4041 /* Always do Java. */
4042 static void
4043 Cjava_entries (FILE *inf)
4044 {
4045   C_entries (C_JAVA, inf);
4046 }
4047
4048 /* Always do C*. */
4049 static void
4050 Cstar_entries (FILE *inf)
4051 {
4052   C_entries (C_STAR, inf);
4053 }
4054
4055 /* Always do Yacc. */
4056 static void
4057 Yacc_entries (FILE *inf)
4058 {
4059   C_entries (YACC, inf);
4060 }
4061
4062 \f
4063 /* Useful macros. */
4064 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4065   while (perhaps_more_input (file_pointer)                              \
4066          && (readline (&(line_buffer), file_pointer),                   \
4067              (char_pointer) = (line_buffer).buffer,                     \
4068              true))                                                     \
4069
4070 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
4071   ((assert ("" kw), true)   /* syntax error if not a literal string */  \
4072    && strneq ((cp), kw, sizeof (kw)-1)          /* cp points at kw */   \
4073    && notinname ((cp)[sizeof (kw)-1])           /* end of kw */         \
4074    && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
4075
4076 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4077 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4078   ((assert ("" kw), true) /* syntax error if not a literal string */    \
4079    && strncaseeq ((cp), kw, sizeof (kw)-1)      /* cp points at kw */   \
4080    && ((cp) += sizeof (kw)-1))                  /* skip spaces */
4081
4082 /*
4083  * Read a file, but do no processing.  This is used to do regexp
4084  * matching on files that have no language defined.
4085  */
4086 static void
4087 just_read_file (FILE *inf)
4088 {
4089   while (perhaps_more_input (inf))
4090     readline (&lb, inf);
4091 }
4092
4093 \f
4094 /* Fortran parsing */
4095
4096 static void F_takeprec (void);
4097 static void F_getit (FILE *);
4098
4099 static void
4100 F_takeprec (void)
4101 {
4102   dbp = skip_spaces (dbp);
4103   if (*dbp != '*')
4104     return;
4105   dbp++;
4106   dbp = skip_spaces (dbp);
4107   if (strneq (dbp, "(*)", 3))
4108     {
4109       dbp += 3;
4110       return;
4111     }
4112   if (!c_isdigit (*dbp))
4113     {
4114       --dbp;                    /* force failure */
4115       return;
4116     }
4117   do
4118     dbp++;
4119   while (c_isdigit (*dbp));
4120 }
4121
4122 static void
4123 F_getit (FILE *inf)
4124 {
4125   register char *cp;
4126
4127   dbp = skip_spaces (dbp);
4128   if (*dbp == '\0')
4129     {
4130       readline (&lb, inf);
4131       dbp = lb.buffer;
4132       if (dbp[5] != '&')
4133         return;
4134       dbp += 6;
4135       dbp = skip_spaces (dbp);
4136     }
4137   if (!c_isalpha (*dbp) && *dbp != '_' && *dbp != '$')
4138     return;
4139   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4140     continue;
4141   make_tag (dbp, cp-dbp, true,
4142             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4143 }
4144
4145
4146 static void
4147 Fortran_functions (FILE *inf)
4148 {
4149   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4150     {
4151       if (*dbp == '%')
4152         dbp++;                  /* Ratfor escape to fortran */
4153       dbp = skip_spaces (dbp);
4154       if (*dbp == '\0')
4155         continue;
4156
4157       if (LOOKING_AT_NOCASE (dbp, "recursive"))
4158         dbp = skip_spaces (dbp);
4159
4160       if (LOOKING_AT_NOCASE (dbp, "pure"))
4161         dbp = skip_spaces (dbp);
4162
4163       if (LOOKING_AT_NOCASE (dbp, "elemental"))
4164         dbp = skip_spaces (dbp);
4165
4166       switch (c_tolower (*dbp))
4167         {
4168         case 'i':
4169           if (nocase_tail ("integer"))
4170             F_takeprec ();
4171           break;
4172         case 'r':
4173           if (nocase_tail ("real"))
4174             F_takeprec ();
4175           break;
4176         case 'l':
4177           if (nocase_tail ("logical"))
4178             F_takeprec ();
4179           break;
4180         case 'c':
4181           if (nocase_tail ("complex") || nocase_tail ("character"))
4182             F_takeprec ();
4183           break;
4184         case 'd':
4185           if (nocase_tail ("double"))
4186             {
4187               dbp = skip_spaces (dbp);
4188               if (*dbp == '\0')
4189                 continue;
4190               if (nocase_tail ("precision"))
4191                 break;
4192               continue;
4193             }
4194           break;
4195         }
4196       dbp = skip_spaces (dbp);
4197       if (*dbp == '\0')
4198         continue;
4199       switch (c_tolower (*dbp))
4200         {
4201         case 'f':
4202           if (nocase_tail ("function"))
4203             F_getit (inf);
4204           continue;
4205         case 's':
4206           if (nocase_tail ("subroutine"))
4207             F_getit (inf);
4208           continue;
4209         case 'e':
4210           if (nocase_tail ("entry"))
4211             F_getit (inf);
4212           continue;
4213         case 'b':
4214           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4215             {
4216               dbp = skip_spaces (dbp);
4217               if (*dbp == '\0') /* assume un-named */
4218                 make_tag ("blockdata", 9, true,
4219                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4220               else
4221                 F_getit (inf);  /* look for name */
4222             }
4223           continue;
4224         }
4225     }
4226 }
4227
4228 \f
4229 /*
4230  * Go language support
4231  * Original code by Xi Lu <lx@shellcodes.org> (2016)
4232  */
4233 static void
4234 Go_functions(FILE *inf)
4235 {
4236   char *cp, *name;
4237
4238   LOOP_ON_INPUT_LINES(inf, lb, cp)
4239     {
4240       cp = skip_spaces (cp);
4241
4242       if (LOOKING_AT (cp, "package"))
4243         {
4244           name = cp;
4245           while (!notinname (*cp) && *cp != '\0')
4246             cp++;
4247           make_tag (name, cp - name, false, lb.buffer,
4248                     cp - lb.buffer + 1, lineno, linecharno);
4249         }
4250       else if (LOOKING_AT (cp, "func"))
4251         {
4252           /* Go implementation of interface, such as:
4253              func (n *Integer) Add(m Integer) ...
4254              skip `(n *Integer)` part.
4255           */
4256           if (*cp == '(')
4257             {
4258               while (*cp != ')')
4259                 cp++;
4260               cp = skip_spaces (cp+1);
4261             }
4262
4263           if (*cp)
4264             {
4265               name = cp;
4266
4267               while (!notinname (*cp))
4268                 cp++;
4269
4270               make_tag (name, cp - name, true, lb.buffer,
4271                         cp - lb.buffer + 1, lineno, linecharno);
4272             }
4273         }
4274       else if (members && LOOKING_AT (cp, "type"))
4275         {
4276           name = cp;
4277
4278           /* Ignore the likes of the following:
4279              type (
4280                     A
4281              )
4282            */
4283           if (*cp == '(')
4284             return;
4285
4286           while (!notinname (*cp) && *cp != '\0')
4287             cp++;
4288
4289           make_tag (name, cp - name, false, lb.buffer,
4290                     cp - lb.buffer + 1, lineno, linecharno);
4291         }
4292     }
4293 }
4294
4295 \f
4296 /*
4297  * Ada parsing
4298  * Original code by
4299  * Philippe Waroquiers (1998)
4300  */
4301
4302 /* Once we are positioned after an "interesting" keyword, let's get
4303    the real tag value necessary. */
4304 static void
4305 Ada_getit (FILE *inf, const char *name_qualifier)
4306 {
4307   register char *cp;
4308   char *name;
4309   char c;
4310
4311   while (perhaps_more_input (inf))
4312     {
4313       dbp = skip_spaces (dbp);
4314       if (*dbp == '\0'
4315           || (dbp[0] == '-' && dbp[1] == '-'))
4316         {
4317           readline (&lb, inf);
4318           dbp = lb.buffer;
4319         }
4320       switch (c_tolower (*dbp))
4321         {
4322         case 'b':
4323           if (nocase_tail ("body"))
4324             {
4325               /* Skipping body of   procedure body   or   package body or ....
4326                  resetting qualifier to body instead of spec. */
4327               name_qualifier = "/b";
4328               continue;
4329             }
4330           break;
4331         case 't':
4332           /* Skipping type of   task type   or   protected type ... */
4333           if (nocase_tail ("type"))
4334             continue;
4335           break;
4336         }
4337       if (*dbp == '"')
4338         {
4339           dbp += 1;
4340           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4341             continue;
4342         }
4343       else
4344         {
4345           dbp = skip_spaces (dbp);
4346           for (cp = dbp;
4347                c_isalnum (*cp) || *cp == '_' || *cp == '.';
4348                cp++)
4349             continue;
4350           if (cp == dbp)
4351             return;
4352         }
4353       c = *cp;
4354       *cp = '\0';
4355       name = concat (dbp, name_qualifier, "");
4356       *cp = c;
4357       make_tag (name, strlen (name), true,
4358                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4359       free (name);
4360       if (c == '"')
4361         dbp = cp + 1;
4362       return;
4363     }
4364 }
4365
4366 static void
4367 Ada_funcs (FILE *inf)
4368 {
4369   bool inquote = false;
4370   bool skip_till_semicolumn = false;
4371
4372   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4373     {
4374       while (*dbp != '\0')
4375         {
4376           /* Skip a string i.e. "abcd". */
4377           if (inquote || (*dbp == '"'))
4378             {
4379               dbp = strchr (dbp + !inquote, '"');
4380               if (dbp != NULL)
4381                 {
4382                   inquote = false;
4383                   dbp += 1;
4384                   continue;     /* advance char */
4385                 }
4386               else
4387                 {
4388                   inquote = true;
4389                   break;        /* advance line */
4390                 }
4391             }
4392
4393           /* Skip comments. */
4394           if (dbp[0] == '-' && dbp[1] == '-')
4395             break;              /* advance line */
4396
4397           /* Skip character enclosed in single quote i.e. 'a'
4398              and skip single quote starting an attribute i.e. 'Image. */
4399           if (*dbp == '\'')
4400             {
4401               dbp++ ;
4402               if (*dbp != '\0')
4403                 dbp++;
4404               continue;
4405             }
4406
4407           if (skip_till_semicolumn)
4408             {
4409               if (*dbp == ';')
4410                 skip_till_semicolumn = false;
4411               dbp++;
4412               continue;         /* advance char */
4413             }
4414
4415           /* Search for beginning of a token.  */
4416           if (!begtoken (*dbp))
4417             {
4418               dbp++;
4419               continue;         /* advance char */
4420             }
4421
4422           /* We are at the beginning of a token. */
4423           switch (c_tolower (*dbp))
4424             {
4425             case 'f':
4426               if (!packages_only && nocase_tail ("function"))
4427                 Ada_getit (inf, "/f");
4428               else
4429                 break;          /* from switch */
4430               continue;         /* advance char */
4431             case 'p':
4432               if (!packages_only && nocase_tail ("procedure"))
4433                 Ada_getit (inf, "/p");
4434               else if (nocase_tail ("package"))
4435                 Ada_getit (inf, "/s");
4436               else if (nocase_tail ("protected")) /* protected type */
4437                 Ada_getit (inf, "/t");
4438               else
4439                 break;          /* from switch */
4440               continue;         /* advance char */
4441
4442             case 'u':
4443               if (typedefs && !packages_only && nocase_tail ("use"))
4444                 {
4445                   /* when tagging types, avoid tagging  use type Pack.Typename;
4446                      for this, we will skip everything till a ; */
4447                   skip_till_semicolumn = true;
4448                   continue;     /* advance char */
4449                 }
4450
4451             case 't':
4452               if (!packages_only && nocase_tail ("task"))
4453                 Ada_getit (inf, "/k");
4454               else if (typedefs && !packages_only && nocase_tail ("type"))
4455                 {
4456                   Ada_getit (inf, "/t");
4457                   while (*dbp != '\0')
4458                     dbp += 1;
4459                 }
4460               else
4461                 break;          /* from switch */
4462               continue;         /* advance char */
4463             }
4464
4465           /* Look for the end of the token. */
4466           while (!endtoken (*dbp))
4467             dbp++;
4468
4469         } /* advance char */
4470     } /* advance line */
4471 }
4472
4473 \f
4474 /*
4475  * Unix and microcontroller assembly tag handling
4476  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4477  * Idea by Bob Weiner, Motorola Inc. (1994)
4478  */
4479 static void
4480 Asm_labels (FILE *inf)
4481 {
4482   register char *cp;
4483
4484   LOOP_ON_INPUT_LINES (inf, lb, cp)
4485     {
4486       /* If first char is alphabetic or one of [_.$], test for colon
4487          following identifier. */
4488       if (c_isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4489         {
4490           /* Read past label. */
4491           cp++;
4492           while (c_isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4493             cp++;
4494           if (*cp == ':' || c_isspace (*cp))
4495             /* Found end of label, so copy it and add it to the table. */
4496             make_tag (lb.buffer, cp - lb.buffer, true,
4497                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4498         }
4499     }
4500 }
4501
4502 \f
4503 /*
4504  * Perl support
4505  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4506  *                 /^use constant[ \t\n]+[^ \t\n{=,;]+/
4507  * Perl variable names: /^(my|local).../
4508  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4509  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4510  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4511  */
4512 static void
4513 Perl_functions (FILE *inf)
4514 {
4515   char *package = savestr ("main"); /* current package name */
4516   register char *cp;
4517
4518   LOOP_ON_INPUT_LINES (inf, lb, cp)
4519     {
4520       cp = skip_spaces (cp);
4521
4522       if (LOOKING_AT (cp, "package"))
4523         {
4524           free (package);
4525           get_tag (cp, &package);
4526         }
4527       else if (LOOKING_AT (cp, "sub"))
4528         {
4529           char *pos, *sp;
4530
4531         subr:
4532           sp = cp;
4533           while (!notinname (*cp))
4534             cp++;
4535           if (cp == sp)
4536             continue;           /* nothing found */
4537           pos = strchr (sp, ':');
4538           if (pos && pos < cp && pos[1] == ':')
4539             {
4540               /* The name is already qualified. */
4541               if (!class_qualify)
4542                 {
4543                   char *q = pos + 2, *qpos;
4544                   while ((qpos = strchr (q, ':')) != NULL
4545                          && qpos < cp
4546                          && qpos[1] == ':')
4547                     q = qpos + 2;
4548                   sp = q;
4549                 }
4550               make_tag (sp, cp - sp, true,
4551                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4552             }
4553           else if (class_qualify)
4554             /* Qualify it. */
4555             {
4556               char savechar, *name;
4557
4558               savechar = *cp;
4559               *cp = '\0';
4560               name = concat (package, "::", sp);
4561               *cp = savechar;
4562               make_tag (name, strlen (name), true,
4563                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4564               free (name);
4565             }
4566           else
4567             make_tag (sp, cp - sp, true,
4568                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4569         }
4570       else if (LOOKING_AT (cp, "use constant")
4571                || LOOKING_AT (cp, "use constant::defer"))
4572         {
4573           /* For hash style multi-constant like
4574                 use constant { FOO => 123,
4575                                BAR => 456 };
4576              only the first FOO is picked up.  Parsing across the value
4577              expressions would be difficult in general, due to possible nested
4578              hashes, here-documents, etc.  */
4579           if (*cp == '{')
4580             cp = skip_spaces (cp+1);
4581           goto subr;
4582         }
4583       else if (globals) /* only if we are tagging global vars */
4584         {
4585           /* Skip a qualifier, if any. */
4586           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4587           /* After "my" or "local", but before any following paren or space. */
4588           char *varstart = cp;
4589
4590           if (qual              /* should this be removed?  If yes, how? */
4591               && (*cp == '$' || *cp == '@' || *cp == '%'))
4592             {
4593               varstart += 1;
4594               do
4595                 cp++;
4596               while (c_isalnum (*cp) || *cp == '_');
4597             }
4598           else if (qual)
4599             {
4600               /* Should be examining a variable list at this point;
4601                  could insist on seeing an open parenthesis. */
4602               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4603                 cp++;
4604             }
4605           else
4606             continue;
4607
4608           make_tag (varstart, cp - varstart, false,
4609                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4610         }
4611     }
4612   free (package);
4613 }
4614
4615
4616 /*
4617  * Python support
4618  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4619  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4620  * More ideas by seb bacon <seb@jamkit.com> (2002)
4621  */
4622 static void
4623 Python_functions (FILE *inf)
4624 {
4625   register char *cp;
4626
4627   LOOP_ON_INPUT_LINES (inf, lb, cp)
4628     {
4629       cp = skip_spaces (cp);
4630       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4631         {
4632           char *name = cp;
4633           while (!notinname (*cp) && *cp != ':')
4634             cp++;
4635           make_tag (name, cp - name, true,
4636                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4637         }
4638     }
4639 }
4640
4641 /*
4642  * Ruby support
4643  * Original code by Xi Lu <lx@shellcodes.org> (2015)
4644  */
4645 static void
4646 Ruby_functions (FILE *inf)
4647 {
4648   char *cp = NULL;
4649   bool reader = false, writer = false, alias = false, continuation = false;
4650
4651   LOOP_ON_INPUT_LINES (inf, lb, cp)
4652     {
4653       bool is_class = false;
4654       bool is_method = false;
4655       char *name;
4656
4657       cp = skip_spaces (cp);
4658       if (!continuation
4659           /* Constants.  */
4660           && c_isalpha (*cp) && c_isupper (*cp))
4661         {
4662           char *bp, *colon = NULL;
4663
4664           name = cp;
4665
4666           for (cp++; c_isalnum (*cp) || *cp == '_' || *cp == ':'; cp++)
4667             {
4668               if (*cp == ':')
4669                 colon = cp;
4670             }
4671           if (cp > name + 1)
4672             {
4673               bp = skip_spaces (cp);
4674               if (*bp == '=' && !(bp[1] == '=' || bp[1] == '>'))
4675                 {
4676                   if (colon && !c_isspace (colon[1]))
4677                     name = colon + 1;
4678                   make_tag (name, cp - name, false,
4679                             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4680                 }
4681             }
4682         }
4683       else if (!continuation
4684                /* Modules, classes, methods.  */
4685                && ((is_method = LOOKING_AT (cp, "def"))
4686                    || (is_class = LOOKING_AT (cp, "class"))
4687                    || LOOKING_AT (cp, "module")))
4688         {
4689           const char self_name[] = "self.";
4690           const size_t self_size1 = sizeof (self_name) - 1;
4691
4692           name = cp;
4693
4694          /* Ruby method names can end in a '='.  Also, operator overloading can
4695             define operators whose names include '='.  */
4696           while (!notinname (*cp) || *cp == '=')
4697             cp++;
4698
4699           /* Remove "self." from the method name.  */
4700           if (cp - name > self_size1
4701               && strneq (name, self_name, self_size1))
4702             name += self_size1;
4703
4704           /* Remove the class/module qualifiers from method names.  */
4705           if (is_method)
4706             {
4707               char *q;
4708
4709               for (q = name; q < cp && *q != '.'; q++)
4710                 ;
4711               if (q < cp - 1)   /* punt if we see just "FOO." */
4712                 name = q + 1;
4713             }
4714
4715           /* Don't tag singleton classes.  */
4716           if (is_class && strneq (name, "<<", 2) && cp == name + 2)
4717             continue;
4718
4719           make_tag (name, cp - name, true,
4720                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4721         }
4722       else
4723         {
4724           /* Tag accessors and aliases.  */
4725
4726           if (!continuation)
4727             reader = writer = alias = false;
4728
4729           while (*cp && *cp != '#')
4730             {
4731               if (!continuation)
4732                 {
4733                   reader = writer = alias = false;
4734                   if (LOOKING_AT (cp, "attr_reader"))
4735                     reader = true;
4736                   else if (LOOKING_AT (cp, "attr_writer"))
4737                     writer = true;
4738                   else if (LOOKING_AT (cp, "attr_accessor"))
4739                     {
4740                       reader = true;
4741                       writer = true;
4742                     }
4743                   else if (LOOKING_AT (cp, "alias_method"))
4744                     alias = true;
4745                 }
4746               if (reader || writer || alias)
4747                 {
4748                   do {
4749                     char *np;
4750
4751                     cp = skip_spaces (cp);
4752                     if (*cp == '(')
4753                       cp = skip_spaces (cp + 1);
4754                     np = cp;
4755                     cp = skip_name (cp);
4756                     if (*np != ':')
4757                       continue;
4758                     np++;
4759                     if (reader)
4760                       {
4761                         make_tag (np, cp - np, true,
4762                                   lb.buffer, cp - lb.buffer + 1,
4763                                   lineno, linecharno);
4764                         continuation = false;
4765                       }
4766                     if (writer)
4767                       {
4768                         size_t name_len = cp - np + 1;
4769                         char *wr_name = xnew (name_len + 1, char);
4770
4771                         memcpy (wr_name, np, name_len - 1);
4772                         memcpy (wr_name + name_len - 1, "=", 2);
4773                         pfnote (wr_name, true, lb.buffer, cp - lb.buffer + 1,
4774                                 lineno, linecharno);
4775                         continuation = false;
4776                       }
4777                     if (alias)
4778                       {
4779                         if (!continuation)
4780                           make_tag (np, cp - np, true,
4781                                     lb.buffer, cp - lb.buffer + 1,
4782                                     lineno, linecharno);
4783                         continuation = false;
4784                         while (*cp && *cp != '#' && *cp != ';')
4785                           {
4786                             if (*cp == ',')
4787                               continuation = true;
4788                             else if (!c_isspace (*cp))
4789                               continuation = false;
4790                             cp++;
4791                           }
4792                         if (*cp == ';')
4793                           continuation = false;
4794                       }
4795                     cp = skip_spaces (cp);
4796                   } while ((alias
4797                             ? (*cp == ',')
4798                             : (continuation = (*cp == ',')))
4799                            && (cp = skip_spaces (cp + 1), *cp && *cp != '#'));
4800                 }
4801               if (*cp != '#')
4802                 cp = skip_name (cp);
4803               while (*cp && *cp != '#' && notinname (*cp))
4804                 cp++;
4805             }
4806         }
4807     }
4808 }
4809
4810 \f
4811 /*
4812  * PHP support
4813  * Look for:
4814  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4815  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4816  *  - /^[ \t]*define\(\"[^\"]+/
4817  * Only with --members:
4818  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4819  * Idea by Diez B. Roggisch (2001)
4820  */
4821 static void
4822 PHP_functions (FILE *inf)
4823 {
4824   char *cp, *name;
4825   bool search_identifier = false;
4826
4827   LOOP_ON_INPUT_LINES (inf, lb, cp)
4828     {
4829       cp = skip_spaces (cp);
4830       name = cp;
4831       if (search_identifier
4832           && *cp != '\0')
4833         {
4834           while (!notinname (*cp))
4835             cp++;
4836           make_tag (name, cp - name, true,
4837                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4838           search_identifier = false;
4839         }
4840       else if (LOOKING_AT (cp, "function"))
4841         {
4842           if (*cp == '&')
4843             cp = skip_spaces (cp+1);
4844           if (*cp != '\0')
4845             {
4846               name = cp;
4847               while (!notinname (*cp))
4848                 cp++;
4849               make_tag (name, cp - name, true,
4850                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4851             }
4852           else
4853             search_identifier = true;
4854         }
4855       else if (LOOKING_AT (cp, "class"))
4856         {
4857           if (*cp != '\0')
4858             {
4859               name = cp;
4860               while (*cp != '\0' && !c_isspace (*cp))
4861                 cp++;
4862               make_tag (name, cp - name, false,
4863                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4864             }
4865           else
4866             search_identifier = true;
4867         }
4868       else if (strneq (cp, "define", 6)
4869                && (cp = skip_spaces (cp+6))
4870                && *cp++ == '('
4871                && (*cp == '"' || *cp == '\''))
4872         {
4873           char quote = *cp++;
4874           name = cp;
4875           while (*cp != quote && *cp != '\0')
4876             cp++;
4877           make_tag (name, cp - name, false,
4878                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4879         }
4880       else if (members
4881                && LOOKING_AT (cp, "var")
4882                && *cp == '$')
4883         {
4884           name = cp;
4885           while (!notinname (*cp))
4886             cp++;
4887           make_tag (name, cp - name, false,
4888                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4889         }
4890     }
4891 }
4892
4893 \f
4894 /*
4895  * Cobol tag functions
4896  * We could look for anything that could be a paragraph name.
4897  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4898  * Idea by Corny de Souza (1993)
4899  */
4900 static void
4901 Cobol_paragraphs (FILE *inf)
4902 {
4903   register char *bp, *ep;
4904
4905   LOOP_ON_INPUT_LINES (inf, lb, bp)
4906     {
4907       if (lb.len < 9)
4908         continue;
4909       bp += 8;
4910
4911       /* If eoln, compiler option or comment ignore whole line. */
4912       if (bp[-1] != ' ' || !c_isalnum (bp[0]))
4913         continue;
4914
4915       for (ep = bp; c_isalnum (*ep) || *ep == '-'; ep++)
4916         continue;
4917       if (*ep++ == '.')
4918         make_tag (bp, ep - bp, true,
4919                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4920     }
4921 }
4922
4923 \f
4924 /*
4925  * Makefile support
4926  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4927  */
4928 static void
4929 Makefile_targets (FILE *inf)
4930 {
4931   register char *bp;
4932
4933   LOOP_ON_INPUT_LINES (inf, lb, bp)
4934     {
4935       if (*bp == '\t' || *bp == '#')
4936         continue;
4937       while (*bp != '\0' && *bp != '=' && *bp != ':')
4938         bp++;
4939       if (*bp == ':' || (globals && *bp == '='))
4940         {
4941           /* We should detect if there is more than one tag, but we do not.
4942              We just skip initial and final spaces. */
4943           char * namestart = skip_spaces (lb.buffer);
4944           while (--bp > namestart)
4945             if (!notinname (*bp))
4946               break;
4947           make_tag (namestart, bp - namestart + 1, true,
4948                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4949         }
4950     }
4951 }
4952
4953 \f
4954 /*
4955  * Pascal parsing
4956  * Original code by Mosur K. Mohan (1989)
4957  *
4958  *  Locates tags for procedures & functions.  Doesn't do any type- or
4959  *  var-definitions.  It does look for the keyword "extern" or
4960  *  "forward" immediately following the procedure statement; if found,
4961  *  the tag is skipped.
4962  */
4963 static void
4964 Pascal_functions (FILE *inf)
4965 {
4966   linebuffer tline;             /* mostly copied from C_entries */
4967   long save_lcno;
4968   int save_lineno, namelen, taglen;
4969   char c, *name;
4970
4971   bool                          /* each of these flags is true if: */
4972     incomment,                  /* point is inside a comment */
4973     inquote,                    /* point is inside '..' string */
4974     get_tagname,                /* point is after PROCEDURE/FUNCTION
4975                                    keyword, so next item = potential tag */
4976     found_tag,                  /* point is after a potential tag */
4977     inparms,                    /* point is within parameter-list */
4978     verify_tag;                 /* point has passed the parm-list, so the
4979                                    next token will determine whether this
4980                                    is a FORWARD/EXTERN to be ignored, or
4981                                    whether it is a real tag */
4982
4983   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4984   name = NULL;                  /* keep compiler quiet */
4985   dbp = lb.buffer;
4986   *dbp = '\0';
4987   linebuffer_init (&tline);
4988
4989   incomment = inquote = false;
4990   found_tag = false;            /* have a proc name; check if extern */
4991   get_tagname = false;          /* found "procedure" keyword         */
4992   inparms = false;              /* found '(' after "proc"            */
4993   verify_tag = false;           /* check if "extern" is ahead        */
4994
4995
4996   while (perhaps_more_input (inf)) /* long main loop to get next char */
4997     {
4998       c = *dbp++;
4999       if (c == '\0')            /* if end of line */
5000         {
5001           readline (&lb, inf);
5002           dbp = lb.buffer;
5003           if (*dbp == '\0')
5004             continue;
5005           if (!((found_tag && verify_tag)
5006                 || get_tagname))
5007             c = *dbp++;         /* only if don't need *dbp pointing
5008                                    to the beginning of the name of
5009                                    the procedure or function */
5010         }
5011       if (incomment)
5012         {
5013           if (c == '}')         /* within { } comments */
5014             incomment = false;
5015           else if (c == '*' && *dbp == ')') /* within (* *) comments */
5016             {
5017               dbp++;
5018               incomment = false;
5019             }
5020           continue;
5021         }
5022       else if (inquote)
5023         {
5024           if (c == '\'')
5025             inquote = false;
5026           continue;
5027         }
5028       else
5029         switch (c)
5030           {
5031           case '\'':
5032             inquote = true;     /* found first quote */
5033             continue;
5034           case '{':             /* found open { comment */
5035             incomment = true;
5036             continue;
5037           case '(':
5038             if (*dbp == '*')    /* found open (* comment */
5039               {
5040                 incomment = true;
5041                 dbp++;
5042               }
5043             else if (found_tag) /* found '(' after tag, i.e., parm-list */
5044               inparms = true;
5045             continue;
5046           case ')':             /* end of parms list */
5047             if (inparms)
5048               inparms = false;
5049             continue;
5050           case ';':
5051             if (found_tag && !inparms) /* end of proc or fn stmt */
5052               {
5053                 verify_tag = true;
5054                 break;
5055               }
5056             continue;
5057           }
5058       if (found_tag && verify_tag && (*dbp != ' '))
5059         {
5060           /* Check if this is an "extern" declaration. */
5061           if (*dbp == '\0')
5062             continue;
5063           if (c_tolower (*dbp) == 'e')
5064             {
5065               if (nocase_tail ("extern")) /* superfluous, really! */
5066                 {
5067                   found_tag = false;
5068                   verify_tag = false;
5069                 }
5070             }
5071           else if (c_tolower (*dbp) == 'f')
5072             {
5073               if (nocase_tail ("forward")) /* check for forward reference */
5074                 {
5075                   found_tag = false;
5076                   verify_tag = false;
5077                 }
5078             }
5079           if (found_tag && verify_tag) /* not external proc, so make tag */
5080             {
5081               found_tag = false;
5082               verify_tag = false;
5083               make_tag (name, namelen, true,
5084                         tline.buffer, taglen, save_lineno, save_lcno);
5085               continue;
5086             }
5087         }
5088       if (get_tagname)          /* grab name of proc or fn */
5089         {
5090           char *cp;
5091
5092           if (*dbp == '\0')
5093             continue;
5094
5095           /* Find block name. */
5096           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
5097             continue;
5098
5099           /* Save all values for later tagging. */
5100           linebuffer_setlen (&tline, lb.len);
5101           strcpy (tline.buffer, lb.buffer);
5102           save_lineno = lineno;
5103           save_lcno = linecharno;
5104           name = tline.buffer + (dbp - lb.buffer);
5105           namelen = cp - dbp;
5106           taglen = cp - lb.buffer + 1;
5107
5108           dbp = cp;             /* set dbp to e-o-token */
5109           get_tagname = false;
5110           found_tag = true;
5111           continue;
5112
5113           /* And proceed to check for "extern". */
5114         }
5115       else if (!incomment && !inquote && !found_tag)
5116         {
5117           /* Check for proc/fn keywords. */
5118           switch (c_tolower (c))
5119             {
5120             case 'p':
5121               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
5122                 get_tagname = true;
5123               continue;
5124             case 'f':
5125               if (nocase_tail ("unction"))
5126                 get_tagname = true;
5127               continue;
5128             }
5129         }
5130     } /* while not eof */
5131
5132   free (tline.buffer);
5133 }
5134
5135 \f
5136 /*
5137  * Lisp tag functions
5138  *  look for (def or (DEF, quote or QUOTE
5139  */
5140
5141 static void L_getit (void);
5142
5143 static void
5144 L_getit (void)
5145 {
5146   if (*dbp == '\'')             /* Skip prefix quote */
5147     dbp++;
5148   else if (*dbp == '(')
5149   {
5150     dbp++;
5151     /* Try to skip "(quote " */
5152     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
5153       /* Ok, then skip "(" before name in (defstruct (foo)) */
5154       dbp = skip_spaces (dbp);
5155   }
5156   get_tag (dbp, NULL);
5157 }
5158
5159 static void
5160 Lisp_functions (FILE *inf)
5161 {
5162   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5163     {
5164       if (dbp[0] != '(')
5165         continue;
5166
5167       /* "(defvar foo)" is a declaration rather than a definition.  */
5168       if (! declarations)
5169         {
5170           char *p = dbp + 1;
5171           if (LOOKING_AT (p, "defvar"))
5172             {
5173               p = skip_name (p); /* past var name */
5174               p = skip_spaces (p);
5175               if (*p == ')')
5176                 continue;
5177             }
5178         }
5179
5180       if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
5181         dbp += 3;
5182
5183       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
5184         {
5185           dbp = skip_non_spaces (dbp);
5186           dbp = skip_spaces (dbp);
5187           L_getit ();
5188         }
5189       else
5190         {
5191           /* Check for (foo::defmumble name-defined ... */
5192           do
5193             dbp++;
5194           while (!notinname (*dbp) && *dbp != ':');
5195           if (*dbp == ':')
5196             {
5197               do
5198                 dbp++;
5199               while (*dbp == ':');
5200
5201               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
5202                 {
5203                   dbp = skip_non_spaces (dbp);
5204                   dbp = skip_spaces (dbp);
5205                   L_getit ();
5206                 }
5207             }
5208         }
5209     }
5210 }
5211
5212 \f
5213 /*
5214  * Lua script language parsing
5215  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5216  *
5217  *  "function" and "local function" are tags if they start at column 1.
5218  */
5219 static void
5220 Lua_functions (FILE *inf)
5221 {
5222   register char *bp;
5223
5224   LOOP_ON_INPUT_LINES (inf, lb, bp)
5225     {
5226       bp = skip_spaces (bp);
5227       if (bp[0] != 'f' && bp[0] != 'l')
5228         continue;
5229
5230       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5231
5232       if (LOOKING_AT (bp, "function"))
5233         {
5234           char *tag_name, *tp_dot, *tp_colon;
5235
5236           get_tag (bp, &tag_name);
5237           /* If the tag ends with ".foo" or ":foo", make an additional tag for
5238              "foo".  */
5239           tp_dot = strrchr (tag_name, '.');
5240           tp_colon = strrchr (tag_name, ':');
5241           if (tp_dot || tp_colon)
5242             {
5243               char *p = tp_dot > tp_colon ? tp_dot : tp_colon;
5244               int len_add = p - tag_name + 1;
5245
5246               get_tag (bp + len_add, NULL);
5247             }
5248         }
5249     }
5250 }
5251
5252 \f
5253 /*
5254  * PostScript tags
5255  * Just look for lines where the first character is '/'
5256  * Also look at "defineps" for PSWrap
5257  * Ideas by:
5258  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
5259  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5260  */
5261 static void
5262 PS_functions (FILE *inf)
5263 {
5264   register char *bp, *ep;
5265
5266   LOOP_ON_INPUT_LINES (inf, lb, bp)
5267     {
5268       if (bp[0] == '/')
5269         {
5270           for (ep = bp+1;
5271                *ep != '\0' && *ep != ' ' && *ep != '{';
5272                ep++)
5273             continue;
5274           make_tag (bp, ep - bp, true,
5275                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5276         }
5277       else if (LOOKING_AT (bp, "defineps"))
5278         get_tag (bp, NULL);
5279     }
5280 }
5281
5282 \f
5283 /*
5284  * Forth tags
5285  * Ignore anything after \ followed by space or in ( )
5286  * Look for words defined by :
5287  * Look for constant, code, create, defer, value, and variable
5288  * OBP extensions:  Look for buffer:, field,
5289  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5290  */
5291 static void
5292 Forth_words (FILE *inf)
5293 {
5294   register char *bp;
5295
5296   LOOP_ON_INPUT_LINES (inf, lb, bp)
5297     while ((bp = skip_spaces (bp))[0] != '\0')
5298       if (bp[0] == '\\' && c_isspace (bp[1]))
5299         break;                  /* read next line */
5300       else if (bp[0] == '(' && c_isspace (bp[1]))
5301         do                      /* skip to ) or eol */
5302           bp++;
5303         while (*bp != ')' && *bp != '\0');
5304       else if ((bp[0] == ':' && c_isspace (bp[1]) && bp++)
5305                || LOOKING_AT_NOCASE (bp, "constant")
5306                || LOOKING_AT_NOCASE (bp, "code")
5307                || LOOKING_AT_NOCASE (bp, "create")
5308                || LOOKING_AT_NOCASE (bp, "defer")
5309                || LOOKING_AT_NOCASE (bp, "value")
5310                || LOOKING_AT_NOCASE (bp, "variable")
5311                || LOOKING_AT_NOCASE (bp, "buffer:")
5312                || LOOKING_AT_NOCASE (bp, "field"))
5313         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
5314       else
5315         bp = skip_non_spaces (bp);
5316 }
5317
5318 \f
5319 /*
5320  * Scheme tag functions
5321  * look for (def... xyzzy
5322  *          (def... (xyzzy
5323  *          (def ... ((...(xyzzy ....
5324  *          (set! xyzzy
5325  * Original code by Ken Haase (1985?)
5326  */
5327 static void
5328 Scheme_functions (FILE *inf)
5329 {
5330   register char *bp;
5331
5332   LOOP_ON_INPUT_LINES (inf, lb, bp)
5333     {
5334       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5335         {
5336           bp = skip_non_spaces (bp+4);
5337           /* Skip over open parens and white space.  Don't continue past
5338              '\0'. */
5339           while (*bp && notinname (*bp))
5340             bp++;
5341           get_tag (bp, NULL);
5342         }
5343       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5344         get_tag (bp, NULL);
5345     }
5346 }
5347
5348 \f
5349 /* Find tags in TeX and LaTeX input files.  */
5350
5351 /* TEX_toktab is a table of TeX control sequences that define tags.
5352  * Each entry records one such control sequence.
5353  *
5354  * Original code from who knows whom.
5355  * Ideas by:
5356  *   Stefan Monnier (2002)
5357  */
5358
5359 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5360
5361 /* Default set of control sequences to put into TEX_toktab.
5362    The value of environment var TEXTAGS is prepended to this.  */
5363 static const char *TEX_defenv = "\
5364 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5365 :part:appendix:entry:index:def\
5366 :newcommand:renewcommand:newenvironment:renewenvironment";
5367
5368 static void TEX_decode_env (const char *, const char *);
5369
5370 /*
5371  * TeX/LaTeX scanning loop.
5372  */
5373 static void
5374 TeX_commands (FILE *inf)
5375 {
5376   char *cp;
5377   linebuffer *key;
5378
5379   char TEX_esc = '\0';
5380   char TEX_opgrp, TEX_clgrp;
5381
5382   /* Initialize token table once from environment. */
5383   if (TEX_toktab == NULL)
5384     TEX_decode_env ("TEXTAGS", TEX_defenv);
5385
5386   LOOP_ON_INPUT_LINES (inf, lb, cp)
5387     {
5388       /* Look at each TEX keyword in line. */
5389       for (;;)
5390         {
5391           /* Look for a TEX escape. */
5392           while (true)
5393             {
5394               char c = *cp++;
5395               if (c == '\0' || c == '%')
5396                 goto tex_next_line;
5397
5398               /* Select either \ or ! as escape character, whichever comes
5399                  first outside a comment.  */
5400               if (!TEX_esc)
5401                 switch (c)
5402                   {
5403                   case '\\':
5404                     TEX_esc = c;
5405                     TEX_opgrp = '{';
5406                     TEX_clgrp = '}';
5407                     break;
5408
5409                   case '!':
5410                     TEX_esc = c;
5411                     TEX_opgrp = '<';
5412                     TEX_clgrp = '>';
5413                     break;
5414                   }
5415
5416               if (c == TEX_esc)
5417                 break;
5418             }
5419
5420           for (key = TEX_toktab; key->buffer != NULL; key++)
5421             if (strneq (cp, key->buffer, key->len))
5422               {
5423                 char *p;
5424                 int namelen, linelen;
5425                 bool opgrp = false;
5426
5427                 cp = skip_spaces (cp + key->len);
5428                 if (*cp == TEX_opgrp)
5429                   {
5430                     opgrp = true;
5431                     cp++;
5432                   }
5433                 for (p = cp;
5434                      (!c_isspace (*p) && *p != '#' &&
5435                       *p != TEX_opgrp && *p != TEX_clgrp);
5436                      p++)
5437                   continue;
5438                 namelen = p - cp;
5439                 linelen = lb.len;
5440                 if (!opgrp || *p == TEX_clgrp)
5441                   {
5442                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5443                       p++;
5444                     linelen = p - lb.buffer + 1;
5445                   }
5446                 make_tag (cp, namelen, true,
5447                           lb.buffer, linelen, lineno, linecharno);
5448                 goto tex_next_line; /* We only tag a line once */
5449               }
5450         }
5451     tex_next_line:
5452       ;
5453     }
5454 }
5455
5456 /* Read environment and prepend it to the default string.
5457    Build token table. */
5458 static void
5459 TEX_decode_env (const char *evarname, const char *defenv)
5460 {
5461   register const char *env, *p;
5462   int i, len;
5463
5464   /* Append default string to environment. */
5465   env = getenv (evarname);
5466   if (!env)
5467     env = defenv;
5468   else
5469     env = concat (env, defenv, "");
5470
5471   /* Allocate a token table */
5472   for (len = 1, p = env; (p = strchr (p, ':')); )
5473     if (*++p)
5474       len++;
5475   TEX_toktab = xnew (len, linebuffer);
5476
5477   /* Unpack environment string into token table. Be careful about */
5478   /* zero-length strings (leading ':', "::" and trailing ':') */
5479   for (i = 0; *env != '\0';)
5480     {
5481       p = strchr (env, ':');
5482       if (!p)                   /* End of environment string. */
5483         p = env + strlen (env);
5484       if (p - env > 0)
5485         {                       /* Only non-zero strings. */
5486           TEX_toktab[i].buffer = savenstr (env, p - env);
5487           TEX_toktab[i].len = p - env;
5488           i++;
5489         }
5490       if (*p)
5491         env = p + 1;
5492       else
5493         {
5494           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5495           TEX_toktab[i].len = 0;
5496           break;
5497         }
5498     }
5499 }
5500
5501 \f
5502 /* Texinfo support.  Dave Love, Mar. 2000.  */
5503 static void
5504 Texinfo_nodes (FILE *inf)
5505 {
5506   char *cp, *start;
5507   LOOP_ON_INPUT_LINES (inf, lb, cp)
5508     if (LOOKING_AT (cp, "@node"))
5509       {
5510         start = cp;
5511         while (*cp != '\0' && *cp != ',')
5512           cp++;
5513         make_tag (start, cp - start, true,
5514                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5515       }
5516 }
5517
5518 \f
5519 /*
5520  * HTML support.
5521  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5522  * Contents of <a name=xxx> are tags with name xxx.
5523  *
5524  * Francesco Potortì, 2002.
5525  */
5526 static void
5527 HTML_labels (FILE *inf)
5528 {
5529   bool getnext = false;         /* next text outside of HTML tags is a tag */
5530   bool skiptag = false;         /* skip to the end of the current HTML tag */
5531   bool intag = false;           /* inside an html tag, looking for ID= */
5532   bool inanchor = false;        /* when INTAG, is an anchor, look for NAME= */
5533   char *end;
5534
5535
5536   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5537
5538   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5539     for (;;)                    /* loop on the same line */
5540       {
5541         if (skiptag)            /* skip HTML tag */
5542           {
5543             while (*dbp != '\0' && *dbp != '>')
5544               dbp++;
5545             if (*dbp == '>')
5546               {
5547                 dbp += 1;
5548                 skiptag = false;
5549                 continue;       /* look on the same line */
5550               }
5551             break;              /* go to next line */
5552           }
5553
5554         else if (intag) /* look for "name=" or "id=" */
5555           {
5556             while (*dbp != '\0' && *dbp != '>'
5557                    && c_tolower (*dbp) != 'n' && c_tolower (*dbp) != 'i')
5558               dbp++;
5559             if (*dbp == '\0')
5560               break;            /* go to next line */
5561             if (*dbp == '>')
5562               {
5563                 dbp += 1;
5564                 intag = false;
5565                 continue;       /* look on the same line */
5566               }
5567             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5568                 || LOOKING_AT_NOCASE (dbp, "id="))
5569               {
5570                 bool quoted = (dbp[0] == '"');
5571
5572                 if (quoted)
5573                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5574                     continue;
5575                 else
5576                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5577                     continue;
5578                 linebuffer_setlen (&token_name, end - dbp);
5579                 memcpy (token_name.buffer, dbp, end - dbp);
5580                 token_name.buffer[end - dbp] = '\0';
5581
5582                 dbp = end;
5583                 intag = false;  /* we found what we looked for */
5584                 skiptag = true; /* skip to the end of the tag */
5585                 getnext = true; /* then grab the text */
5586                 continue;       /* look on the same line */
5587               }
5588             dbp += 1;
5589           }
5590
5591         else if (getnext)       /* grab next tokens and tag them */
5592           {
5593             dbp = skip_spaces (dbp);
5594             if (*dbp == '\0')
5595               break;            /* go to next line */
5596             if (*dbp == '<')
5597               {
5598                 intag = true;
5599                 inanchor = (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]));
5600                 continue;       /* look on the same line */
5601               }
5602
5603             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5604               continue;
5605             make_tag (token_name.buffer, token_name.len, true,
5606                       dbp, end - dbp, lineno, linecharno);
5607             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5608             getnext = false;
5609             break;              /* go to next line */
5610           }
5611
5612         else                    /* look for an interesting HTML tag */
5613           {
5614             while (*dbp != '\0' && *dbp != '<')
5615               dbp++;
5616             if (*dbp == '\0')
5617               break;            /* go to next line */
5618             intag = true;
5619             if (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]))
5620               {
5621                 inanchor = true;
5622                 continue;       /* look on the same line */
5623               }
5624             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5625                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5626                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5627                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5628               {
5629                 intag = false;
5630                 getnext = true;
5631                 continue;       /* look on the same line */
5632               }
5633             dbp += 1;
5634           }
5635       }
5636 }
5637
5638 \f
5639 /*
5640  * Prolog support
5641  *
5642  * Assumes that the predicate or rule starts at column 0.
5643  * Only the first clause of a predicate or rule is added.
5644  * Original code by Sunichirou Sugou (1989)
5645  * Rewritten by Anders Lindgren (1996)
5646  */
5647 static size_t prolog_pr (char *, char *);
5648 static void prolog_skip_comment (linebuffer *, FILE *);
5649 static size_t prolog_atom (char *, size_t);
5650
5651 static void
5652 Prolog_functions (FILE *inf)
5653 {
5654   char *cp, *last;
5655   size_t len;
5656   size_t allocated;
5657
5658   allocated = 0;
5659   len = 0;
5660   last = NULL;
5661
5662   LOOP_ON_INPUT_LINES (inf, lb, cp)
5663     {
5664       if (cp[0] == '\0')        /* Empty line */
5665         continue;
5666       else if (c_isspace (cp[0])) /* Not a predicate */
5667         continue;
5668       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5669         prolog_skip_comment (&lb, inf);
5670       else if ((len = prolog_pr (cp, last)) > 0)
5671         {
5672           /* Predicate or rule.  Store the function name so that we
5673              only generate a tag for the first clause.  */
5674           if (last == NULL)
5675             last = xnew (len + 1, char);
5676           else if (len + 1 > allocated)
5677             xrnew (last, len + 1, char);
5678           allocated = len + 1;
5679           memcpy (last, cp, len);
5680           last[len] = '\0';
5681         }
5682     }
5683   free (last);
5684 }
5685
5686
5687 static void
5688 prolog_skip_comment (linebuffer *plb, FILE *inf)
5689 {
5690   char *cp;
5691
5692   do
5693     {
5694       for (cp = plb->buffer; *cp != '\0'; cp++)
5695         if (cp[0] == '*' && cp[1] == '/')
5696           return;
5697       readline (plb, inf);
5698     }
5699   while (perhaps_more_input (inf));
5700 }
5701
5702 /*
5703  * A predicate or rule definition is added if it matches:
5704  *     <beginning of line><Prolog Atom><whitespace>(
5705  * or  <beginning of line><Prolog Atom><whitespace>:-
5706  *
5707  * It is added to the tags database if it doesn't match the
5708  * name of the previous clause header.
5709  *
5710  * Return the size of the name of the predicate or rule, or 0 if no
5711  * header was found.
5712  */
5713 static size_t
5714 prolog_pr (char *s, char *last)
5715
5716                                 /* Name of last clause. */
5717 {
5718   size_t pos;
5719   size_t len;
5720
5721   pos = prolog_atom (s, 0);
5722   if (! pos)
5723     return 0;
5724
5725   len = pos;
5726   pos = skip_spaces (s + pos) - s;
5727
5728   if ((s[pos] == '.'
5729        || (s[pos] == '(' && (pos += 1))
5730        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5731       && (last == NULL          /* save only the first clause */
5732           || len != strlen (last)
5733           || !strneq (s, last, len)))
5734         {
5735           make_tag (s, len, true, s, pos, lineno, linecharno);
5736           return len;
5737         }
5738   else
5739     return 0;
5740 }
5741
5742 /*
5743  * Consume a Prolog atom.
5744  * Return the number of bytes consumed, or 0 if there was an error.
5745  *
5746  * A prolog atom, in this context, could be one of:
5747  * - An alphanumeric sequence, starting with a lower case letter.
5748  * - A quoted arbitrary string. Single quotes can escape themselves.
5749  *   Backslash quotes everything.
5750  */
5751 static size_t
5752 prolog_atom (char *s, size_t pos)
5753 {
5754   size_t origpos;
5755
5756   origpos = pos;
5757
5758   if (c_islower (s[pos]) || s[pos] == '_')
5759     {
5760       /* The atom is unquoted. */
5761       pos++;
5762       while (c_isalnum (s[pos]) || s[pos] == '_')
5763         {
5764           pos++;
5765         }
5766       return pos - origpos;
5767     }
5768   else if (s[pos] == '\'')
5769     {
5770       pos++;
5771
5772       for (;;)
5773         {
5774           if (s[pos] == '\'')
5775             {
5776               pos++;
5777               if (s[pos] != '\'')
5778                 break;
5779               pos++;            /* A double quote */
5780             }
5781           else if (s[pos] == '\0')
5782             /* Multiline quoted atoms are ignored. */
5783             return 0;
5784           else if (s[pos] == '\\')
5785             {
5786               if (s[pos+1] == '\0')
5787                 return 0;
5788               pos += 2;
5789             }
5790           else
5791             pos++;
5792         }
5793       return pos - origpos;
5794     }
5795   else
5796     return 0;
5797 }
5798
5799 \f
5800 /*
5801  * Support for Erlang
5802  *
5803  * Generates tags for functions, defines, and records.
5804  * Assumes that Erlang functions start at column 0.
5805  * Original code by Anders Lindgren (1996)
5806  */
5807 static int erlang_func (char *, char *);
5808 static void erlang_attribute (char *);
5809 static int erlang_atom (char *);
5810
5811 static void
5812 Erlang_functions (FILE *inf)
5813 {
5814   char *cp, *last;
5815   int len;
5816   int allocated;
5817
5818   allocated = 0;
5819   len = 0;
5820   last = NULL;
5821
5822   LOOP_ON_INPUT_LINES (inf, lb, cp)
5823     {
5824       if (cp[0] == '\0')        /* Empty line */
5825         continue;
5826       else if (c_isspace (cp[0])) /* Not function nor attribute */
5827         continue;
5828       else if (cp[0] == '%')    /* comment */
5829         continue;
5830       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5831         continue;
5832       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5833         {
5834           erlang_attribute (cp);
5835           if (last != NULL)
5836             {
5837               free (last);
5838               last = NULL;
5839             }
5840         }
5841       else if ((len = erlang_func (cp, last)) > 0)
5842         {
5843           /*
5844            * Function.  Store the function name so that we only
5845            * generates a tag for the first clause.
5846            */
5847           if (last == NULL)
5848             last = xnew (len + 1, char);
5849           else if (len + 1 > allocated)
5850             xrnew (last, len + 1, char);
5851           allocated = len + 1;
5852           memcpy (last, cp, len);
5853           last[len] = '\0';
5854         }
5855     }
5856   free (last);
5857 }
5858
5859
5860 /*
5861  * A function definition is added if it matches:
5862  *     <beginning of line><Erlang Atom><whitespace>(
5863  *
5864  * It is added to the tags database if it doesn't match the
5865  * name of the previous clause header.
5866  *
5867  * Return the size of the name of the function, or 0 if no function
5868  * was found.
5869  */
5870 static int
5871 erlang_func (char *s, char *last)
5872
5873                                 /* Name of last clause. */
5874 {
5875   int pos;
5876   int len;
5877
5878   pos = erlang_atom (s);
5879   if (pos < 1)
5880     return 0;
5881
5882   len = pos;
5883   pos = skip_spaces (s + pos) - s;
5884
5885   /* Save only the first clause. */
5886   if (s[pos++] == '('
5887       && (last == NULL
5888           || len != (int)strlen (last)
5889           || !strneq (s, last, len)))
5890         {
5891           make_tag (s, len, true, s, pos, lineno, linecharno);
5892           return len;
5893         }
5894
5895   return 0;
5896 }
5897
5898
5899 /*
5900  * Handle attributes.  Currently, tags are generated for defines
5901  * and records.
5902  *
5903  * They are on the form:
5904  * -define(foo, bar).
5905  * -define(Foo(M, N), M+N).
5906  * -record(graph, {vtab = notable, cyclic = true}).
5907  */
5908 static void
5909 erlang_attribute (char *s)
5910 {
5911   char *cp = s;
5912
5913   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5914       && *cp++ == '(')
5915     {
5916       int len = erlang_atom (skip_spaces (cp));
5917       if (len > 0)
5918         make_tag (cp, len, true, s, cp + len - s, lineno, linecharno);
5919     }
5920   return;
5921 }
5922
5923
5924 /*
5925  * Consume an Erlang atom (or variable).
5926  * Return the number of bytes consumed, or -1 if there was an error.
5927  */
5928 static int
5929 erlang_atom (char *s)
5930 {
5931   int pos = 0;
5932
5933   if (c_isalpha (s[pos]) || s[pos] == '_')
5934     {
5935       /* The atom is unquoted. */
5936       do
5937         pos++;
5938       while (c_isalnum (s[pos]) || s[pos] == '_');
5939     }
5940   else if (s[pos] == '\'')
5941     {
5942       for (pos++; s[pos] != '\''; pos++)
5943         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5944             || (s[pos] == '\\' && s[++pos] == '\0'))
5945           return 0;
5946       pos++;
5947     }
5948
5949   return pos;
5950 }
5951
5952 \f
5953 static char *scan_separators (char *);
5954 static void add_regex (char *, language *);
5955 static char *substitute (char *, char *, struct re_registers *);
5956
5957 /*
5958  * Take a string like "/blah/" and turn it into "blah", verifying
5959  * that the first and last characters are the same, and handling
5960  * quoted separator characters.  Actually, stops on the occurrence of
5961  * an unquoted separator.  Also process \t, \n, etc. and turn into
5962  * appropriate characters. Works in place.  Null terminates name string.
5963  * Returns pointer to terminating separator, or NULL for
5964  * unterminated regexps.
5965  */
5966 static char *
5967 scan_separators (char *name)
5968 {
5969   char sep = name[0];
5970   char *copyto = name;
5971   bool quoted = false;
5972
5973   for (++name; *name != '\0'; ++name)
5974     {
5975       if (quoted)
5976         {
5977           switch (*name)
5978             {
5979             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5980             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5981             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5982             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5983             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5984             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5985             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5986             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5987             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5988             default:
5989               if (*name == sep)
5990                 *copyto++ = sep;
5991               else
5992                 {
5993                   /* Something else is quoted, so preserve the quote. */
5994                   *copyto++ = '\\';
5995                   *copyto++ = *name;
5996                 }
5997               break;
5998             }
5999           quoted = false;
6000         }
6001       else if (*name == '\\')
6002         quoted = true;
6003       else if (*name == sep)
6004         break;
6005       else
6006         *copyto++ = *name;
6007     }
6008   if (*name != sep)
6009     name = NULL;                /* signal unterminated regexp */
6010
6011   /* Terminate copied string. */
6012   *copyto = '\0';
6013   return name;
6014 }
6015
6016 /* Look at the argument of --regex or --no-regex and do the right
6017    thing.  Same for each line of a regexp file. */
6018 static void
6019 analyze_regex (char *regex_arg)
6020 {
6021   if (regex_arg == NULL)
6022     {
6023       free_regexps ();          /* --no-regex: remove existing regexps */
6024       return;
6025     }
6026
6027   /* A real --regexp option or a line in a regexp file. */
6028   switch (regex_arg[0])
6029     {
6030       /* Comments in regexp file or null arg to --regex. */
6031     case '\0':
6032     case ' ':
6033     case '\t':
6034       break;
6035
6036       /* Read a regex file.  This is recursive and may result in a
6037          loop, which will stop when the file descriptors are exhausted. */
6038     case '@':
6039       {
6040         FILE *regexfp;
6041         linebuffer regexbuf;
6042         char *regexfile = regex_arg + 1;
6043
6044         /* regexfile is a file containing regexps, one per line. */
6045         regexfp = fopen (regexfile, "r" FOPEN_BINARY);
6046         if (regexfp == NULL)
6047           pfatal (regexfile);
6048         linebuffer_init (&regexbuf);
6049         while (readline_internal (&regexbuf, regexfp, regexfile) > 0)
6050           analyze_regex (regexbuf.buffer);
6051         free (regexbuf.buffer);
6052         if (fclose (regexfp) != 0)
6053           pfatal (regexfile);
6054       }
6055       break;
6056
6057       /* Regexp to be used for a specific language only. */
6058     case '{':
6059       {
6060         language *lang;
6061         char *lang_name = regex_arg + 1;
6062         char *cp;
6063
6064         for (cp = lang_name; *cp != '}'; cp++)
6065           if (*cp == '\0')
6066             {
6067               error ("unterminated language name in regex: %s", regex_arg);
6068               return;
6069             }
6070         *cp++ = '\0';
6071         lang = get_language_from_langname (lang_name);
6072         if (lang == NULL)
6073           return;
6074         add_regex (cp, lang);
6075       }
6076       break;
6077
6078       /* Regexp to be used for any language. */
6079     default:
6080       add_regex (regex_arg, NULL);
6081       break;
6082     }
6083 }
6084
6085 /* Separate the regexp pattern, compile it,
6086    and care for optional name and modifiers. */
6087 static void
6088 add_regex (char *regexp_pattern, language *lang)
6089 {
6090   static struct re_pattern_buffer zeropattern;
6091   char sep, *pat, *name, *modifiers;
6092   char empty = '\0';
6093   const char *err;
6094   struct re_pattern_buffer *patbuf;
6095   regexp *rp;
6096   bool
6097     force_explicit_name = true, /* do not use implicit tag names */
6098     ignore_case = false,        /* case is significant */
6099     multi_line = false,         /* matches are done one line at a time */
6100     single_line = false;        /* dot does not match newline */
6101
6102
6103   if (strlen (regexp_pattern) < 3)
6104     {
6105       error ("null regexp");
6106       return;
6107     }
6108   sep = regexp_pattern[0];
6109   name = scan_separators (regexp_pattern);
6110   if (name == NULL)
6111     {
6112       error ("%s: unterminated regexp", regexp_pattern);
6113       return;
6114     }
6115   if (name[1] == sep)
6116     {
6117       error ("null name for regexp \"%s\"", regexp_pattern);
6118       return;
6119     }
6120   modifiers = scan_separators (name);
6121   if (modifiers == NULL)        /* no terminating separator --> no name */
6122     {
6123       modifiers = name;
6124       name = &empty;
6125     }
6126   else
6127     modifiers += 1;             /* skip separator */
6128
6129   /* Parse regex modifiers. */
6130   for (; modifiers[0] != '\0'; modifiers++)
6131     switch (modifiers[0])
6132       {
6133       case 'N':
6134         if (modifiers == name)
6135           error ("forcing explicit tag name but no name, ignoring");
6136         force_explicit_name = true;
6137         break;
6138       case 'i':
6139         ignore_case = true;
6140         break;
6141       case 's':
6142         single_line = true;
6143         /* FALLTHRU */
6144       case 'm':
6145         multi_line = true;
6146         need_filebuf = true;
6147         break;
6148       default:
6149         error ("invalid regexp modifier '%c', ignoring", modifiers[0]);
6150         break;
6151       }
6152
6153   patbuf = xnew (1, struct re_pattern_buffer);
6154   *patbuf = zeropattern;
6155   if (ignore_case)
6156     {
6157       static char lc_trans[UCHAR_MAX + 1];
6158       int i;
6159       for (i = 0; i < UCHAR_MAX + 1; i++)
6160         lc_trans[i] = c_tolower (i);
6161       patbuf->translate = lc_trans;     /* translation table to fold case  */
6162     }
6163
6164   if (multi_line)
6165     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
6166   else
6167     pat = regexp_pattern;
6168
6169   if (single_line)
6170     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
6171   else
6172     re_set_syntax (RE_SYNTAX_EMACS);
6173
6174   err = re_compile_pattern (pat, strlen (pat), patbuf);
6175   if (multi_line)
6176     free (pat);
6177   if (err != NULL)
6178     {
6179       error ("%s while compiling pattern", err);
6180       return;
6181     }
6182
6183   rp = p_head;
6184   p_head = xnew (1, regexp);
6185   p_head->pattern = savestr (regexp_pattern);
6186   p_head->p_next = rp;
6187   p_head->lang = lang;
6188   p_head->pat = patbuf;
6189   p_head->name = savestr (name);
6190   p_head->error_signaled = false;
6191   p_head->force_explicit_name = force_explicit_name;
6192   p_head->ignore_case = ignore_case;
6193   p_head->multi_line = multi_line;
6194 }
6195
6196 /*
6197  * Do the substitutions indicated by the regular expression and
6198  * arguments.
6199  */
6200 static char *
6201 substitute (char *in, char *out, struct re_registers *regs)
6202 {
6203   char *result, *t;
6204   int size, dig, diglen;
6205
6206   result = NULL;
6207   size = strlen (out);
6208
6209   /* Pass 1: figure out how much to allocate by finding all \N strings. */
6210   if (out[size - 1] == '\\')
6211     fatal ("pattern error in \"%s\"", out);
6212   for (t = strchr (out, '\\');
6213        t != NULL;
6214        t = strchr (t + 2, '\\'))
6215     if (c_isdigit (t[1]))
6216       {
6217         dig = t[1] - '0';
6218         diglen = regs->end[dig] - regs->start[dig];
6219         size += diglen - 2;
6220       }
6221     else
6222       size -= 1;
6223
6224   /* Allocate space and do the substitutions. */
6225   assert (size >= 0);
6226   result = xnew (size + 1, char);
6227
6228   for (t = result; *out != '\0'; out++)
6229     if (*out == '\\' && c_isdigit (*++out))
6230       {
6231         dig = *out - '0';
6232         diglen = regs->end[dig] - regs->start[dig];
6233         memcpy (t, in + regs->start[dig], diglen);
6234         t += diglen;
6235       }
6236     else
6237       *t++ = *out;
6238   *t = '\0';
6239
6240   assert (t <= result + size);
6241   assert (t - result == (int)strlen (result));
6242
6243   return result;
6244 }
6245
6246 /* Deallocate all regexps. */
6247 static void
6248 free_regexps (void)
6249 {
6250   regexp *rp;
6251   while (p_head != NULL)
6252     {
6253       rp = p_head->p_next;
6254       free (p_head->pattern);
6255       free (p_head->name);
6256       free (p_head);
6257       p_head = rp;
6258     }
6259   return;
6260 }
6261
6262 /*
6263  * Reads the whole file as a single string from `filebuf' and looks for
6264  * multi-line regular expressions, creating tags on matches.
6265  * readline already dealt with normal regexps.
6266  *
6267  * Idea by Ben Wing <ben@666.com> (2002).
6268  */
6269 static void
6270 regex_tag_multiline (void)
6271 {
6272   char *buffer = filebuf.buffer;
6273   regexp *rp;
6274   char *name;
6275
6276   for (rp = p_head; rp != NULL; rp = rp->p_next)
6277     {
6278       int match = 0;
6279
6280       if (!rp->multi_line)
6281         continue;               /* skip normal regexps */
6282
6283       /* Generic initializations before parsing file from memory. */
6284       lineno = 1;               /* reset global line number */
6285       charno = 0;               /* reset global char number */
6286       linecharno = 0;           /* reset global char number of line start */
6287
6288       /* Only use generic regexps or those for the current language. */
6289       if (rp->lang != NULL && rp->lang != curfdp->lang)
6290         continue;
6291
6292       while (match >= 0 && match < filebuf.len)
6293         {
6294           match = re_search (rp->pat, buffer, filebuf.len, charno,
6295                              filebuf.len - match, &rp->regs);
6296           switch (match)
6297             {
6298             case -2:
6299               /* Some error. */
6300               if (!rp->error_signaled)
6301                 {
6302                   error ("regexp stack overflow while matching \"%s\"",
6303                          rp->pattern);
6304                   rp->error_signaled = true;
6305                 }
6306               break;
6307             case -1:
6308               /* No match. */
6309               break;
6310             default:
6311               if (match == rp->regs.end[0])
6312                 {
6313                   if (!rp->error_signaled)
6314                     {
6315                       error ("regexp matches the empty string: \"%s\"",
6316                              rp->pattern);
6317                       rp->error_signaled = true;
6318                     }
6319                   match = -3;   /* exit from while loop */
6320                   break;
6321                 }
6322
6323               /* Match occurred.  Construct a tag. */
6324               while (charno < rp->regs.end[0])
6325                 if (buffer[charno++] == '\n')
6326                   lineno++, linecharno = charno;
6327               name = rp->name;
6328               if (name[0] == '\0')
6329                 name = NULL;
6330               else /* make a named tag */
6331                 name = substitute (buffer, rp->name, &rp->regs);
6332               if (rp->force_explicit_name)
6333                 /* Force explicit tag name, if a name is there. */
6334                 pfnote (name, true, buffer + linecharno,
6335                         charno - linecharno + 1, lineno, linecharno);
6336               else
6337                 make_tag (name, strlen (name), true, buffer + linecharno,
6338                           charno - linecharno + 1, lineno, linecharno);
6339               break;
6340             }
6341         }
6342     }
6343 }
6344
6345 \f
6346 static bool
6347 nocase_tail (const char *cp)
6348 {
6349   int len = 0;
6350
6351   while (*cp != '\0' && c_tolower (*cp) == c_tolower (dbp[len]))
6352     cp++, len++;
6353   if (*cp == '\0' && !intoken (dbp[len]))
6354     {
6355       dbp += len;
6356       return true;
6357     }
6358   return false;
6359 }
6360
6361 static void
6362 get_tag (register char *bp, char **namepp)
6363 {
6364   register char *cp = bp;
6365
6366   if (*bp != '\0')
6367     {
6368       /* Go till you get to white space or a syntactic break */
6369       for (cp = bp + 1; !notinname (*cp); cp++)
6370         continue;
6371       make_tag (bp, cp - bp, true,
6372                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6373     }
6374
6375   if (namepp != NULL)
6376     *namepp = savenstr (bp, cp - bp);
6377 }
6378
6379 /*
6380  * Read a line of text from `stream' into `lbp', excluding the
6381  * newline or CR-NL, if any.  Return the number of characters read from
6382  * `stream', which is the length of the line including the newline.
6383  *
6384  * On DOS or Windows we do not count the CR character, if any before the
6385  * NL, in the returned length; this mirrors the behavior of Emacs on those
6386  * platforms (for text files, it translates CR-NL to NL as it reads in the
6387  * file).
6388  *
6389  * If multi-line regular expressions are requested, each line read is
6390  * appended to `filebuf'.
6391  */
6392 static long
6393 readline_internal (linebuffer *lbp, FILE *stream, char const *filename)
6394 {
6395   char *buffer = lbp->buffer;
6396   char *p = lbp->buffer;
6397   char *pend;
6398   int chars_deleted;
6399
6400   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6401
6402   for (;;)
6403     {
6404       register int c = getc (stream);
6405       if (p == pend)
6406         {
6407           /* We're at the end of linebuffer: expand it. */
6408           lbp->size *= 2;
6409           xrnew (buffer, lbp->size, char);
6410           p += buffer - lbp->buffer;
6411           pend = buffer + lbp->size;
6412           lbp->buffer = buffer;
6413         }
6414       if (c == EOF)
6415         {
6416           if (ferror (stream))
6417             perror (filename);
6418           *p = '\0';
6419           chars_deleted = 0;
6420           break;
6421         }
6422       if (c == '\n')
6423         {
6424           if (p > buffer && p[-1] == '\r')
6425             {
6426               p -= 1;
6427               chars_deleted = 2;
6428             }
6429           else
6430             {
6431               chars_deleted = 1;
6432             }
6433           *p = '\0';
6434           break;
6435         }
6436       *p++ = c;
6437     }
6438   lbp->len = p - buffer;
6439
6440   if (need_filebuf              /* we need filebuf for multi-line regexps */
6441       && chars_deleted > 0)     /* not at EOF */
6442     {
6443       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6444         {
6445           /* Expand filebuf. */
6446           filebuf.size *= 2;
6447           xrnew (filebuf.buffer, filebuf.size, char);
6448         }
6449       memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6450       filebuf.len += lbp->len;
6451       filebuf.buffer[filebuf.len++] = '\n';
6452       filebuf.buffer[filebuf.len] = '\0';
6453     }
6454
6455   return lbp->len + chars_deleted;
6456 }
6457
6458 /*
6459  * Like readline_internal, above, but in addition try to match the
6460  * input line against relevant regular expressions and manage #line
6461  * directives.
6462  */
6463 static void
6464 readline (linebuffer *lbp, FILE *stream)
6465 {
6466   long result;
6467
6468   linecharno = charno;          /* update global char number of line start */
6469   result = readline_internal (lbp, stream, infilename); /* read line */
6470   lineno += 1;                  /* increment global line number */
6471   charno += result;             /* increment global char number */
6472
6473   /* Honor #line directives. */
6474   if (!no_line_directive)
6475     {
6476       static bool discard_until_line_directive;
6477
6478       /* Check whether this is a #line directive. */
6479       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6480         {
6481           unsigned int lno;
6482           int start = 0;
6483
6484           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6485               && start > 0)     /* double quote character found */
6486             {
6487               char *endp = lbp->buffer + start;
6488
6489               while ((endp = strchr (endp, '"')) != NULL
6490                      && endp[-1] == '\\')
6491                 endp++;
6492               if (endp != NULL)
6493                 /* Ok, this is a real #line directive.  Let's deal with it. */
6494                 {
6495                   char *taggedabsname;  /* absolute name of original file */
6496                   char *taggedfname;    /* name of original file as given */
6497                   char *name;           /* temp var */
6498
6499                   discard_until_line_directive = false; /* found it */
6500                   name = lbp->buffer + start;
6501                   *endp = '\0';
6502                   canonicalize_filename (name);
6503                   taggedabsname = absolute_filename (name, tagfiledir);
6504                   if (filename_is_absolute (name)
6505                       || filename_is_absolute (curfdp->infname))
6506                     taggedfname = savestr (taggedabsname);
6507                   else
6508                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6509
6510                   if (streq (curfdp->taggedfname, taggedfname))
6511                     /* The #line directive is only a line number change.  We
6512                        deal with this afterwards. */
6513                     free (taggedfname);
6514                   else
6515                     /* The tags following this #line directive should be
6516                        attributed to taggedfname.  In order to do this, set
6517                        curfdp accordingly. */
6518                     {
6519                       fdesc *fdp; /* file description pointer */
6520
6521                       /* Go look for a file description already set up for the
6522                          file indicated in the #line directive.  If there is
6523                          one, use it from now until the next #line
6524                          directive. */
6525                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6526                         if (streq (fdp->infname, curfdp->infname)
6527                             && streq (fdp->taggedfname, taggedfname))
6528                           /* If we remove the second test above (after the &&)
6529                              then all entries pertaining to the same file are
6530                              coalesced in the tags file.  If we use it, then
6531                              entries pertaining to the same file but generated
6532                              from different files (via #line directives) will
6533                              go into separate sections in the tags file.  These
6534                              alternatives look equivalent.  The first one
6535                              destroys some apparently useless information. */
6536                           {
6537                             curfdp = fdp;
6538                             free (taggedfname);
6539                             break;
6540                           }
6541                       /* Else, if we already tagged the real file, skip all
6542                          input lines until the next #line directive. */
6543                       if (fdp == NULL) /* not found */
6544                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6545                           if (streq (fdp->infabsname, taggedabsname))
6546                             {
6547                               discard_until_line_directive = true;
6548                               free (taggedfname);
6549                               break;
6550                             }
6551                       /* Else create a new file description and use that from
6552                          now on, until the next #line directive. */
6553                       if (fdp == NULL) /* not found */
6554                         {
6555                           fdp = fdhead;
6556                           fdhead = xnew (1, fdesc);
6557                           *fdhead = *curfdp; /* copy curr. file description */
6558                           fdhead->next = fdp;
6559                           fdhead->infname = savestr (curfdp->infname);
6560                           fdhead->infabsname = savestr (curfdp->infabsname);
6561                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6562                           fdhead->taggedfname = taggedfname;
6563                           fdhead->usecharno = false;
6564                           fdhead->prop = NULL;
6565                           fdhead->written = false;
6566                           curfdp = fdhead;
6567                         }
6568                     }
6569                   free (taggedabsname);
6570                   lineno = lno - 1;
6571                   readline (lbp, stream);
6572                   return;
6573                 } /* if a real #line directive */
6574             } /* if #line is followed by a number */
6575         } /* if line begins with "#line " */
6576
6577       /* If we are here, no #line directive was found. */
6578       if (discard_until_line_directive)
6579         {
6580           if (result > 0)
6581             {
6582               /* Do a tail recursion on ourselves, thus discarding the contents
6583                  of the line buffer. */
6584               readline (lbp, stream);
6585               return;
6586             }
6587           /* End of file. */
6588           discard_until_line_directive = false;
6589           return;
6590         }
6591     } /* if #line directives should be considered */
6592
6593   {
6594     int match;
6595     regexp *rp;
6596     char *name;
6597
6598     /* Match against relevant regexps. */
6599     if (lbp->len > 0)
6600       for (rp = p_head; rp != NULL; rp = rp->p_next)
6601         {
6602           /* Only use generic regexps or those for the current language.
6603              Also do not use multiline regexps, which is the job of
6604              regex_tag_multiline. */
6605           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6606               || rp->multi_line)
6607             continue;
6608
6609           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6610           switch (match)
6611             {
6612             case -2:
6613               /* Some error. */
6614               if (!rp->error_signaled)
6615                 {
6616                   error ("regexp stack overflow while matching \"%s\"",
6617                          rp->pattern);
6618                   rp->error_signaled = true;
6619                 }
6620               break;
6621             case -1:
6622               /* No match. */
6623               break;
6624             case 0:
6625               /* Empty string matched. */
6626               if (!rp->error_signaled)
6627                 {
6628                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6629                   rp->error_signaled = true;
6630                 }
6631               break;
6632             default:
6633               /* Match occurred.  Construct a tag. */
6634               name = rp->name;
6635               if (name[0] == '\0')
6636                 name = NULL;
6637               else /* make a named tag */
6638                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6639               if (rp->force_explicit_name)
6640                 /* Force explicit tag name, if a name is there. */
6641                 pfnote (name, true, lbp->buffer, match, lineno, linecharno);
6642               else
6643                 make_tag (name, strlen (name), true,
6644                           lbp->buffer, match, lineno, linecharno);
6645               break;
6646             }
6647         }
6648   }
6649 }
6650
6651 \f
6652 /*
6653  * Return a pointer to a space of size strlen(cp)+1 allocated
6654  * with xnew where the string CP has been copied.
6655  */
6656 static char *
6657 savestr (const char *cp)
6658 {
6659   return savenstr (cp, strlen (cp));
6660 }
6661
6662 /*
6663  * Return a pointer to a space of size LEN+1 allocated with xnew where
6664  * the string CP has been copied for at most the first LEN characters.
6665  */
6666 static char *
6667 savenstr (const char *cp, int len)
6668 {
6669   char *dp = xnew (len + 1, char);
6670   dp[len] = '\0';
6671   return memcpy (dp, cp, len);
6672 }
6673
6674 /* Skip spaces (end of string is not space), return new pointer. */
6675 static char *
6676 skip_spaces (char *cp)
6677 {
6678   while (c_isspace (*cp))
6679     cp++;
6680   return cp;
6681 }
6682
6683 /* Skip non spaces, except end of string, return new pointer. */
6684 static char *
6685 skip_non_spaces (char *cp)
6686 {
6687   while (*cp != '\0' && !c_isspace (*cp))
6688     cp++;
6689   return cp;
6690 }
6691
6692 /* Skip any chars in the "name" class.*/
6693 static char *
6694 skip_name (char *cp)
6695 {
6696   /* '\0' is a notinname() so loop stops there too */
6697   while (! notinname (*cp))
6698     cp++;
6699   return cp;
6700 }
6701
6702 /* Print error message and exit.  */
6703 static void
6704 fatal (char const *format, ...)
6705 {
6706   va_list ap;
6707   va_start (ap, format);
6708   verror (format, ap);
6709   va_end (ap);
6710   exit (EXIT_FAILURE);
6711 }
6712
6713 static void
6714 pfatal (const char *s1)
6715 {
6716   perror (s1);
6717   exit (EXIT_FAILURE);
6718 }
6719
6720 static void
6721 suggest_asking_for_help (void)
6722 {
6723   fprintf (stderr, "\tTry '%s --help' for a complete list of options.\n",
6724            progname);
6725   exit (EXIT_FAILURE);
6726 }
6727
6728 /* Output a diagnostic with printf-style FORMAT and args.  */
6729 static void
6730 error (const char *format, ...)
6731 {
6732   va_list ap;
6733   va_start (ap, format);
6734   verror (format, ap);
6735   va_end (ap);
6736 }
6737
6738 static void
6739 verror (char const *format, va_list ap)
6740 {
6741   fprintf (stderr, "%s: ", progname);
6742   vfprintf (stderr, format, ap);
6743   fprintf (stderr, "\n");
6744 }
6745
6746 /* Return a newly-allocated string whose contents
6747    concatenate those of s1, s2, s3.  */
6748 static char *
6749 concat (const char *s1, const char *s2, const char *s3)
6750 {
6751   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6752   char *result = xnew (len1 + len2 + len3 + 1, char);
6753
6754   strcpy (result, s1);
6755   strcpy (result + len1, s2);
6756   strcpy (result + len1 + len2, s3);
6757
6758   return result;
6759 }
6760
6761 \f
6762 /* Does the same work as the system V getcwd, but does not need to
6763    guess the buffer size in advance. */
6764 static char *
6765 etags_getcwd (void)
6766 {
6767   int bufsize = 200;
6768   char *path = xnew (bufsize, char);
6769
6770   while (getcwd (path, bufsize) == NULL)
6771     {
6772       if (errno != ERANGE)
6773         pfatal ("getcwd");
6774       bufsize *= 2;
6775       free (path);
6776       path = xnew (bufsize, char);
6777     }
6778
6779   canonicalize_filename (path);
6780   return path;
6781 }
6782
6783 /* Return a newly allocated string containing a name of a temporary file.  */
6784 static char *
6785 etags_mktmp (void)
6786 {
6787   const char *tmpdir = getenv ("TMPDIR");
6788   const char *slash = "/";
6789
6790 #if MSDOS || defined (DOS_NT)
6791   if (!tmpdir)
6792     tmpdir = getenv ("TEMP");
6793   if (!tmpdir)
6794     tmpdir = getenv ("TMP");
6795   if (!tmpdir)
6796     tmpdir = ".";
6797   if (tmpdir[strlen (tmpdir) - 1] == '/'
6798       || tmpdir[strlen (tmpdir) - 1] == '\\')
6799     slash = "";
6800 #else
6801   if (!tmpdir)
6802     tmpdir = "/tmp";
6803   if (tmpdir[strlen (tmpdir) - 1] == '/')
6804     slash = "";
6805 #endif
6806
6807   char *templt = concat (tmpdir, slash, "etXXXXXX");
6808   int fd = mkostemp (templt, O_CLOEXEC);
6809   if (fd < 0 || close (fd) != 0)
6810     {
6811       int temp_errno = errno;
6812       free (templt);
6813       errno = temp_errno;
6814       templt = NULL;
6815     }
6816
6817 #if defined (DOS_NT)
6818   /* The file name will be used in shell redirection, so it needs to have
6819      DOS-style backslashes, or else the Windows shell will barf.  */
6820   char *p;
6821   for (p = templt; *p; p++)
6822     if (*p == '/')
6823       *p = '\\';
6824 #endif
6825
6826   return templt;
6827 }
6828
6829 /* Return a newly allocated string containing the file name of FILE
6830    relative to the absolute directory DIR (which should end with a slash). */
6831 static char *
6832 relative_filename (char *file, char *dir)
6833 {
6834   char *fp, *dp, *afn, *res;
6835   int i;
6836
6837   /* Find the common root of file and dir (with a trailing slash). */
6838   afn = absolute_filename (file, cwd);
6839   fp = afn;
6840   dp = dir;
6841   while (*fp++ == *dp++)
6842     continue;
6843   fp--, dp--;                   /* back to the first differing char */
6844 #ifdef DOS_NT
6845   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6846     return afn;
6847 #endif
6848   do                            /* look at the equal chars until '/' */
6849     fp--, dp--;
6850   while (*fp != '/');
6851
6852   /* Build a sequence of "../" strings for the resulting relative file name. */
6853   i = 0;
6854   while ((dp = strchr (dp + 1, '/')) != NULL)
6855     i += 1;
6856   res = xnew (3*i + strlen (fp + 1) + 1, char);
6857   char *z = res;
6858   while (i-- > 0)
6859     z = stpcpy (z, "../");
6860
6861   /* Add the file name relative to the common root of file and dir. */
6862   strcpy (z, fp + 1);
6863   free (afn);
6864
6865   return res;
6866 }
6867
6868 /* Return a newly allocated string containing the absolute file name
6869    of FILE given DIR (which should end with a slash). */
6870 static char *
6871 absolute_filename (char *file, char *dir)
6872 {
6873   char *slashp, *cp, *res;
6874
6875   if (filename_is_absolute (file))
6876     res = savestr (file);
6877 #ifdef DOS_NT
6878   /* We don't support non-absolute file names with a drive
6879      letter, like `d:NAME' (it's too much hassle).  */
6880   else if (file[1] == ':')
6881     fatal ("%s: relative file names with drive letters not supported", file);
6882 #endif
6883   else
6884     res = concat (dir, file, "");
6885
6886   /* Delete the "/dirname/.." and "/." substrings. */
6887   slashp = strchr (res, '/');
6888   while (slashp != NULL && slashp[0] != '\0')
6889     {
6890       if (slashp[1] == '.')
6891         {
6892           if (slashp[2] == '.'
6893               && (slashp[3] == '/' || slashp[3] == '\0'))
6894             {
6895               cp = slashp;
6896               do
6897                 cp--;
6898               while (cp >= res && !filename_is_absolute (cp));
6899               if (cp < res)
6900                 cp = slashp;    /* the absolute name begins with "/.." */
6901 #ifdef DOS_NT
6902               /* Under MSDOS and NT we get `d:/NAME' as absolute
6903                  file name, so the luser could say `d:/../NAME'.
6904                  We silently treat this as `d:/NAME'.  */
6905               else if (cp[0] != '/')
6906                 cp = slashp;
6907 #endif
6908               memmove (cp, slashp + 3, strlen (slashp + 2));
6909               slashp = cp;
6910               continue;
6911             }
6912           else if (slashp[2] == '/' || slashp[2] == '\0')
6913             {
6914               memmove (slashp, slashp + 2, strlen (slashp + 1));
6915               continue;
6916             }
6917         }
6918
6919       slashp = strchr (slashp + 1, '/');
6920     }
6921
6922   if (res[0] == '\0')           /* just a safety net: should never happen */
6923     {
6924       free (res);
6925       return savestr ("/");
6926     }
6927   else
6928     return res;
6929 }
6930
6931 /* Return a newly allocated string containing the absolute
6932    file name of dir where FILE resides given DIR (which should
6933    end with a slash). */
6934 static char *
6935 absolute_dirname (char *file, char *dir)
6936 {
6937   char *slashp, *res;
6938   char save;
6939
6940   slashp = strrchr (file, '/');
6941   if (slashp == NULL)
6942     return savestr (dir);
6943   save = slashp[1];
6944   slashp[1] = '\0';
6945   res = absolute_filename (file, dir);
6946   slashp[1] = save;
6947
6948   return res;
6949 }
6950
6951 /* Whether the argument string is an absolute file name.  The argument
6952    string must have been canonicalized with canonicalize_filename. */
6953 static bool
6954 filename_is_absolute (char *fn)
6955 {
6956   return (fn[0] == '/'
6957 #ifdef DOS_NT
6958           || (c_isalpha (fn[0]) && fn[1] == ':' && fn[2] == '/')
6959 #endif
6960           );
6961 }
6962
6963 /* Downcase DOS drive letter and collapse separators into single slashes.
6964    Works in place. */
6965 static void
6966 canonicalize_filename (register char *fn)
6967 {
6968   register char* cp;
6969
6970 #ifdef DOS_NT
6971   /* Canonicalize drive letter case.  */
6972   if (c_isupper (fn[0]) && fn[1] == ':')
6973     fn[0] = c_tolower (fn[0]);
6974
6975   /* Collapse multiple forward- and back-slashes into a single forward
6976      slash. */
6977   for (cp = fn; *cp != '\0'; cp++, fn++)
6978     if (*cp == '/' || *cp == '\\')
6979       {
6980         *fn = '/';
6981         while (cp[1] == '/' || cp[1] == '\\')
6982           cp++;
6983       }
6984     else
6985       *fn = *cp;
6986
6987 #else  /* !DOS_NT */
6988
6989   /* Collapse multiple slashes into a single slash. */
6990   for (cp = fn; *cp != '\0'; cp++, fn++)
6991     if (*cp == '/')
6992       {
6993         *fn = '/';
6994         while (cp[1] == '/')
6995           cp++;
6996       }
6997     else
6998       *fn = *cp;
6999
7000 #endif  /* !DOS_NT */
7001
7002   *fn = '\0';
7003 }
7004
7005 \f
7006 /* Initialize a linebuffer for use. */
7007 static void
7008 linebuffer_init (linebuffer *lbp)
7009 {
7010   lbp->size = (DEBUG) ? 3 : 200;
7011   lbp->buffer = xnew (lbp->size, char);
7012   lbp->buffer[0] = '\0';
7013   lbp->len = 0;
7014 }
7015
7016 /* Set the minimum size of a string contained in a linebuffer. */
7017 static void
7018 linebuffer_setlen (linebuffer *lbp, int toksize)
7019 {
7020   while (lbp->size <= toksize)
7021     {
7022       lbp->size *= 2;
7023       xrnew (lbp->buffer, lbp->size, char);
7024     }
7025   lbp->len = toksize;
7026 }
7027
7028 /* Like malloc but get fatal error if memory is exhausted. */
7029 static void *
7030 xmalloc (size_t size)
7031 {
7032   void *result = malloc (size);
7033   if (result == NULL)
7034     fatal ("virtual memory exhausted");
7035   return result;
7036 }
7037
7038 static void *
7039 xrealloc (void *ptr, size_t size)
7040 {
7041   void *result = realloc (ptr, size);
7042   if (result == NULL)
7043     fatal ("virtual memory exhausted");
7044   return result;
7045 }
7046
7047 /*
7048  * Local Variables:
7049  * indent-tabs-mode: t
7050  * tab-width: 8
7051  * fill-column: 79
7052  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
7053  * c-file-style: "gnu"
7054  * End:
7055  */
7056
7057 /* etags.c ends here */