code.delx.au - gnu-emacs/blob - lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: utf-8 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2015 Free Software
  32 Foundation, Inc.
  33
  34 This file is not considered part of GNU Emacs.
  35
  36 This program is free software: you can redistribute it and/or modify
  37 it under the terms of the GNU General Public License as published by
  38 the Free Software Foundation, either version 3 of the License, or
  39 (at your option) any later version.
  40
  41 This program is distributed in the hope that it will be useful,
  42 but WITHOUT ANY WARRANTY; without even the implied warranty of
  43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  44 GNU General Public License for more details.
  45
  46 You should have received a copy of the GNU General Public License
  47 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  48
  49
  50 /* NB To comply with the above BSD license, copyright information is
  51 reproduced in etc/ETAGS.README.  That file should be updated when the
  52 above notices are.
  53
  54 To the best of our knowledge, this code was originally based on the
  55 ctags.c distributed with BSD4.2, which was copyrighted by the
  56 University of California, as described above. */
  57
  58
  59 /*
  60  * Authors:
  61  * 1983 Ctags originally by Ken Arnold.
  62  * 1984 Fortran added by Jim Kleckner.
  63  * 1984 Ed Pelegri-Llopart added C typedefs.
  64  * 1985 Emacs TAGS format by Richard Stallman.
  65  * 1989 Sam Kendall added C++.
  66  * 1992 Joseph B. Wells improved C and C++ parsing.
  67  * 1993 Francesco Potortì reorganized C and C++.
  68  * 1994 Line-by-line regexp tags by Tom Tromey.
  69  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  70  * 2002 #line directives by Francesco Potortì.
  71  * Francesco Potortì maintained and improved it for many years
  72    starting in 1993.
  73  */
  74
  75 /*
  76  * If you want to add support for a new language, start by looking at the LUA
  77  * language, which is the simplest.  Alternatively, consider distributing etags
  78  * together with a configuration file containing regexp definitions for etags.
  79  */
  80
  81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
  82
  83 #ifdef DEBUG
  84 #  undef DEBUG
  85 #  define DEBUG true
  86 #else
  87 #  define DEBUG  false
  88 #  define NDEBUG                /* disable assert */
  89 #endif
  90
  91 #include <config.h>
  92
  93 #ifndef _GNU_SOURCE
  94 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  95 #endif
  96
  97 /* WIN32_NATIVE is for XEmacs.
  98    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
  99 #ifdef WIN32_NATIVE
 100 # undef MSDOS
 101 # undef  WINDOWSNT
 102 # define WINDOWSNT
 103 #endif /* WIN32_NATIVE */
 104
 105 #ifdef MSDOS
 106 # undef MSDOS
 107 # define MSDOS true
 108 # include <sys/param.h>
 109 #else
 110 # define MSDOS false
 111 #endif /* MSDOS */
 112
 113 #ifdef WINDOWSNT
 114 # include <direct.h>
 115 # define MAXPATHLEN _MAX_PATH
 116 # undef HAVE_NTGUI
 117 # undef  DOS_NT
 118 # define DOS_NT
 119 # define O_CLOEXEC O_NOINHERIT
 120 #endif /* WINDOWSNT */
 121
 122 #include <limits.h>
 123 #include <unistd.h>
 124 #include <stdarg.h>
 125 #include <stdlib.h>
 126 #include <string.h>
 127 #include <sysstdio.h>
 128 #include <errno.h>
 129 #include <fcntl.h>
 130 #include <sys/types.h>
 131 #include <sys/stat.h>
 132 #include <binary-io.h>
 133 #include <c-ctype.h>
 134 #include <c-strcase.h>
 135
 136 #include <assert.h>
 137 #ifdef NDEBUG
 138 # undef  assert                 /* some systems have a buggy assert.h */
 139 # define assert(x) ((void) 0)
 140 #endif
 141
 142 #include <getopt.h>
 143 #include <regex.h>
 144
 145 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 146  Leave it undefined to make the program "etags", which makes emacs-style
 147  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 148 #ifdef CTAGS
 149 # undef  CTAGS
 150 # define CTAGS true
 151 #else
 152 # define CTAGS false
 153 #endif
 154
 155 #define streq(s,t)      (assert ((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 156 #define strcaseeq(s,t)  (assert ((s)!=NULL && (t)!=NULL), !c_strcasecmp (s, t))
 157 #define strneq(s,t,n)   (assert ((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 158 #define strncaseeq(s,t,n) (assert ((s)!=NULL && (t)!=NULL), !c_strncasecmp (s, t, n))
 159
 160 /* C is not in a name.  */
 161 static bool
 162 notinname (unsigned char c)
 163 {
 164   /* Look at make_tag before modifying!  */
 165   static bool const table[UCHAR_MAX + 1] = {
 166     ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
 167     ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
 168   };
 169   return table[c];
 170 }
 171
 172 /* C can start a token.  */
 173 static bool
 174 begtoken (unsigned char c)
 175 {
 176   static bool const table[UCHAR_MAX + 1] = {
 177     ['$']=1, ['@']=1,
 178     ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
 179     ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
 180     ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
 181     ['Y']=1, ['Z']=1,
 182     ['_']=1,
 183     ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
 184     ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
 185     ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
 186     ['y']=1, ['z']=1,
 187     ['~']=1
 188   };
 189   return table[c];
 190 }
 191
 192 /* C can be in the middle of a token.  */
 193 static bool
 194 intoken (unsigned char c)
 195 {
 196   static bool const table[UCHAR_MAX + 1] = {
 197     ['$']=1,
 198     ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
 199     ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
 200     ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
 201     ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
 202     ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
 203     ['Y']=1, ['Z']=1,
 204     ['_']=1,
 205     ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
 206     ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
 207     ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
 208     ['y']=1, ['z']=1
 209   };
 210   return table[c];
 211 }
 212
 213 /* C can end a token.  */
 214 static bool
 215 endtoken (unsigned char c)
 216 {
 217   static bool const table[UCHAR_MAX + 1] = {
 218     ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
 219     ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
 220     ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
 221     ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
 222     ['{']=1, ['|']=1, ['}']=1, ['~']=1
 223   };
 224   return table[c];
 225 }
 226
 227 /*
 228  *      xnew, xrnew -- allocate, reallocate storage
 229  *
 230  * SYNOPSIS:    Type *xnew (int n, Type);
 231  *              void xrnew (OldPointer, int n, Type);
 232  */
 233 #define xnew(n, Type)      ((Type *) xmalloc ((n) * sizeof (Type)))
 234 #define xrnew(op, n, Type) ((op) = (Type *) xrealloc (op, (n) * sizeof (Type)))
 235
 236 typedef void Lang_function (FILE *);
 237
 238 typedef struct
 239 {
 240   const char *suffix;           /* file name suffix for this compressor */
 241   const char *command;          /* takes one arg and decompresses to stdout */
 242 } compressor;
 243
 244 typedef struct
 245 {
 246   const char *name;             /* language name */
 247   const char *help;             /* detailed help for the language */
 248   Lang_function *function;      /* parse function */
 249   const char **suffixes;        /* name suffixes of this language's files */
 250   const char **filenames;       /* names of this language's files */
 251   const char **interpreters;    /* interpreters for this language */
 252   bool metasource;              /* source used to generate other sources */
 253 } language;
 254
 255 typedef struct fdesc
 256 {
 257   struct fdesc *next;           /* for the linked list */
 258   char *infname;                /* uncompressed input file name */
 259   char *infabsname;             /* absolute uncompressed input file name */
 260   char *infabsdir;              /* absolute dir of input file */
 261   char *taggedfname;            /* file name to write in tagfile */
 262   language *lang;               /* language of file */
 263   char *prop;                   /* file properties to write in tagfile */
 264   bool usecharno;               /* etags tags shall contain char number */
 265   bool written;                 /* entry written in the tags file */
 266 } fdesc;
 267
 268 typedef struct node_st
 269 {                               /* sorting structure */
 270   struct node_st *left, *right; /* left and right sons */
 271   fdesc *fdp;                   /* description of file to whom tag belongs */
 272   char *name;                   /* tag name */
 273   char *regex;                  /* search regexp */
 274   bool valid;                   /* write this tag on the tag file */
 275   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 276   bool been_warned;             /* warning already given for duplicated tag */
 277   int lno;                      /* line number tag is on */
 278   long cno;                     /* character number line starts on */
 279 } node;
 280
 281 /*
 282  * A `linebuffer' is a structure which holds a line of text.
 283  * `readline_internal' reads a line from a stream into a linebuffer
 284  * and works regardless of the length of the line.
 285  * SIZE is the size of BUFFER, LEN is the length of the string in
 286  * BUFFER after readline reads it.
 287  */
 288 typedef struct
 289 {
 290   long size;
 291   int len;
 292   char *buffer;
 293 } linebuffer;
 294
 295 /* Used to support mixing of --lang and file names. */
 296 typedef struct
 297 {
 298   enum {
 299     at_language,                /* a language specification */
 300     at_regexp,                  /* a regular expression */
 301     at_filename,                /* a file name */
 302     at_stdin,                   /* read from stdin here */
 303     at_end                      /* stop parsing the list */
 304   } arg_type;                   /* argument type */
 305   language *lang;               /* language associated with the argument */
 306   char *what;                   /* the argument itself */
 307 } argument;
 308
 309 /* Structure defining a regular expression. */
 310 typedef struct regexp
 311 {
 312   struct regexp *p_next;        /* pointer to next in list */
 313   language *lang;               /* if set, use only for this language */
 314   char *pattern;                /* the regexp pattern */
 315   char *name;                   /* tag name */
 316   struct re_pattern_buffer *pat; /* the compiled pattern */
 317   struct re_registers regs;     /* re registers */
 318   bool error_signaled;          /* already signaled for this regexp */
 319   bool force_explicit_name;     /* do not allow implicit tag name */
 320   bool ignore_case;             /* ignore case when matching */
 321   bool multi_line;              /* do a multi-line match on the whole file */
 322 } regexp;
 323
 324
 325 /* Many compilers barf on this:
 326         Lang_function Ada_funcs;
 327    so let's write it this way */
 328 static void Ada_funcs (FILE *);
 329 static void Asm_labels (FILE *);
 330 static void C_entries (int c_ext, FILE *);
 331 static void default_C_entries (FILE *);
 332 static void plain_C_entries (FILE *);
 333 static void Cjava_entries (FILE *);
 334 static void Cobol_paragraphs (FILE *);
 335 static void Cplusplus_entries (FILE *);
 336 static void Cstar_entries (FILE *);
 337 static void Erlang_functions (FILE *);
 338 static void Forth_words (FILE *);
 339 static void Fortran_functions (FILE *);
 340 static void HTML_labels (FILE *);
 341 static void Lisp_functions (FILE *);
 342 static void Lua_functions (FILE *);
 343 static void Makefile_targets (FILE *);
 344 static void Pascal_functions (FILE *);
 345 static void Perl_functions (FILE *);
 346 static void PHP_functions (FILE *);
 347 static void PS_functions (FILE *);
 348 static void Prolog_functions (FILE *);
 349 static void Python_functions (FILE *);
 350 static void Scheme_functions (FILE *);
 351 static void TeX_commands (FILE *);
 352 static void Texinfo_nodes (FILE *);
 353 static void Yacc_entries (FILE *);
 354 static void just_read_file (FILE *);
 355
 356 static language *get_language_from_langname (const char *);
 357 static void readline (linebuffer *, FILE *);
 358 static long readline_internal (linebuffer *, FILE *);
 359 static bool nocase_tail (const char *);
 360 static void get_tag (char *, char **);
 361
 362 static void analyze_regex (char *);
 363 static void free_regexps (void);
 364 static void regex_tag_multiline (void);
 365 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
 366 static _Noreturn void suggest_asking_for_help (void);
 367 _Noreturn void fatal (const char *, const char *);
 368 static _Noreturn void pfatal (const char *);
 369 static void add_node (node *, node **);
 370
 371 static void process_file_name (char *, language *);
 372 static void process_file (FILE *, char *, language *);
 373 static void find_entries (FILE *);
 374 static void free_tree (node *);
 375 static void free_fdesc (fdesc *);
 376 static void pfnote (char *, bool, char *, int, int, long);
 377 static void invalidate_nodes (fdesc *, node **);
 378 static void put_entries (node *);
 379
 380 static char *concat (const char *, const char *, const char *);
 381 static char *skip_spaces (char *);
 382 static char *skip_non_spaces (char *);
 383 static char *skip_name (char *);
 384 static char *savenstr (const char *, int);
 385 static char *savestr (const char *);
 386 static char *etags_getcwd (void);
 387 static char *relative_filename (char *, char *);
 388 static char *absolute_filename (char *, char *);
 389 static char *absolute_dirname (char *, char *);
 390 static bool filename_is_absolute (char *f);
 391 static void canonicalize_filename (char *);
 392 static char *etags_mktmp (void);
 393 static void linebuffer_init (linebuffer *);
 394 static void linebuffer_setlen (linebuffer *, int);
 395 static void *xmalloc (size_t);
 396 static void *xrealloc (void *, size_t);
 397
 398 \f
 399 static char searchar = '/';     /* use /.../ searches */
 400
 401 static char *tagfile;           /* output file */
 402 static char *progname;          /* name this program was invoked with */
 403 static char *cwd;               /* current working directory */
 404 static char *tagfiledir;        /* directory of tagfile */
 405 static FILE *tagf;              /* ioptr for tags file */
 406 static ptrdiff_t whatlen_max;   /* maximum length of any 'what' member */
 407
 408 static fdesc *fdhead;           /* head of file description list */
 409 static fdesc *curfdp;           /* current file description */
 410 static int lineno;              /* line number of current line */
 411 static long charno;             /* current character number */
 412 static long linecharno;         /* charno of start of current line */
 413 static char *dbp;               /* pointer to start of current tag */
 414
 415 static const int invalidcharno = -1;
 416
 417 static node *nodehead;          /* the head of the binary tree of tags */
 418 static node *last_node;         /* the last node created */
 419
 420 static linebuffer lb;           /* the current line */
 421 static linebuffer filebuf;      /* a buffer containing the whole file */
 422 static linebuffer token_name;   /* a buffer containing a tag name */
 423
 424 static bool append_to_tagfile;  /* -a: append to tags */
 425 /* The next five default to true in C and derived languages.  */
 426 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 427 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 428                                 /* 0 struct/enum/union decls, and C++ */
 429                                 /* member functions. */
 430 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 431                                 /* constants and variables. */
 432                                 /* -D: opposite of -d.  Default under ctags. */
 433 static int globals;             /* create tags for global variables */
 434 static int members;             /* create tags for C member variables */
 435 static int declarations;        /* --declarations: tag them and extern in C&Co*/
 436 static int no_line_directive;   /* ignore #line directives (undocumented) */
 437 static int no_duplicates;       /* no duplicate tags for ctags (undocumented) */
 438 static bool update;             /* -u: update tags */
 439 static bool vgrind_style;       /* -v: create vgrind style index output */
 440 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 441 static bool cxref_style;        /* -x: create cxref style output */
 442 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 443 static bool ignoreindent;       /* -I: ignore indentation in C */
 444 static int packages_only;       /* --packages-only: in Ada, only tag packages*/
 445
 446 /* STDIN is defined in LynxOS system headers */
 447 #ifdef STDIN
 448 # undef STDIN
 449 #endif
 450
 451 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 452 static bool parsing_stdin;      /* --parse-stdin used */
 453
 454 static regexp *p_head;          /* list of all regexps */
 455 static bool need_filebuf;       /* some regexes are multi-line */
 456
 457 static struct option longopts[] =
 458 {
 459   { "append",             no_argument,       NULL,               'a'   },
 460   { "packages-only",      no_argument,       &packages_only,     1     },
 461   { "c++",                no_argument,       NULL,               'C'   },
 462   { "declarations",       no_argument,       &declarations,      1     },
 463   { "no-line-directive",  no_argument,       &no_line_directive, 1     },
 464   { "no-duplicates",      no_argument,       &no_duplicates,     1     },
 465   { "help",               no_argument,       NULL,               'h'   },
 466   { "help",               no_argument,       NULL,               'H'   },
 467   { "ignore-indentation", no_argument,       NULL,               'I'   },
 468   { "language",           required_argument, NULL,               'l'   },
 469   { "members",            no_argument,       &members,           1     },
 470   { "no-members",         no_argument,       &members,           0     },
 471   { "output",             required_argument, NULL,               'o'   },
 472   { "regex",              required_argument, NULL,               'r'   },
 473   { "no-regex",           no_argument,       NULL,               'R'   },
 474   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 475   { "parse-stdin",        required_argument, NULL,               STDIN },
 476   { "version",            no_argument,       NULL,               'V'   },
 477
 478 #if CTAGS /* Ctags options */
 479   { "backward-search",    no_argument,       NULL,               'B'   },
 480   { "cxref",              no_argument,       NULL,               'x'   },
 481   { "defines",            no_argument,       NULL,               'd'   },
 482   { "globals",            no_argument,       &globals,           1     },
 483   { "typedefs",           no_argument,       NULL,               't'   },
 484   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 485   { "update",             no_argument,       NULL,               'u'   },
 486   { "vgrind",             no_argument,       NULL,               'v'   },
 487   { "no-warn",            no_argument,       NULL,               'w'   },
 488
 489 #else /* Etags options */
 490   { "no-defines",         no_argument,       NULL,               'D'   },
 491   { "no-globals",         no_argument,       &globals,           0     },
 492   { "include",            required_argument, NULL,               'i'   },
 493 #endif
 494   { NULL }
 495 };
 496
 497 static compressor compressors[] =
 498 {
 499   { "z", "gzip -d -c"},
 500   { "Z", "gzip -d -c"},
 501   { "gz", "gzip -d -c"},
 502   { "GZ", "gzip -d -c"},
 503   { "bz2", "bzip2 -d -c" },
 504   { "xz", "xz -d -c" },
 505   { NULL }
 506 };
 507
 508 /*
 509  * Language stuff.
 510  */
 511
 512 /* Ada code */
 513 static const char *Ada_suffixes [] =
 514   { "ads", "adb", "ada", NULL };
 515 static const char Ada_help [] =
 516 "In Ada code, functions, procedures, packages, tasks and types are\n\
 517 tags.  Use the `--packages-only' option to create tags for\n\
 518 packages only.\n\
 519 Ada tag names have suffixes indicating the type of entity:\n\
 520         Entity type:    Qualifier:\n\
 521         ------------    ----------\n\
 522         function        /f\n\
 523         procedure       /p\n\
 524         package spec    /s\n\
 525         package body    /b\n\
 526         type            /t\n\
 527         task            /k\n\
 528 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 529 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 530 will just search for any tag `bidule'.";
 531
 532 /* Assembly code */
 533 static const char *Asm_suffixes [] =
 534   { "a",        /* Unix assembler */
 535     "asm", /* Microcontroller assembly */
 536     "def", /* BSO/Tasking definition includes  */
 537     "inc", /* Microcontroller include files */
 538     "ins", /* Microcontroller include files */
 539     "s", "sa", /* Unix assembler */
 540     "S",   /* cpp-processed Unix assembler */
 541     "src", /* BSO/Tasking C compiler output */
 542     NULL
 543   };
 544 static const char Asm_help [] =
 545 "In assembler code, labels appearing at the beginning of a line,\n\
 546 followed by a colon, are tags.";
 547
 548
 549 /* Note that .c and .h can be considered C++, if the --c++ flag was
 550    given, or if the `class' or `template' keywords are met inside the file.
 551    That is why default_C_entries is called for these. */
 552 static const char *default_C_suffixes [] =
 553   { "c", "h", NULL };
 554 #if CTAGS                               /* C help for Ctags */
 555 static const char default_C_help [] =
 556 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 557 Use -T to tag definitions of `struct', `union' and `enum'.\n\
 558 Use -d to tag `#define' macro definitions and `enum' constants.\n\
 559 Use --globals to tag global variables.\n\
 560 You can tag function declarations and external variables by\n\
 561 using `--declarations', and struct members by using `--members'.";
 562 #else                                   /* C help for Etags */
 563 static const char default_C_help [] =
 564 "In C code, any C function or typedef is a tag, and so are\n\
 565 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 566 definitions and `enum' constants are tags unless you specify\n\
 567 `--no-defines'.  Global variables are tags unless you specify\n\
 568 `--no-globals' and so are struct members unless you specify\n\
 569 `--no-members'.  Use of `--no-globals', `--no-defines' and\n\
 570 `--no-members' can make the tags table file much smaller.\n\
 571 You can tag function declarations and external variables by\n\
 572 using `--declarations'.";
 573 #endif  /* C help for Ctags and Etags */
 574
 575 static const char *Cplusplus_suffixes [] =
 576   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 577     "M",                        /* Objective C++ */
 578     "pdb",                      /* PostScript with C syntax */
 579     NULL };
 580 static const char Cplusplus_help [] =
 581 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 582 --help --lang=c --lang=c++ for full help.)\n\
 583 In addition to C tags, member functions are also recognized.  Member\n\
 584 variables are recognized unless you use the `--no-members' option.\n\
 585 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 586 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 587 `operator+'.";
 588
 589 static const char *Cjava_suffixes [] =
 590   { "java", NULL };
 591 static char Cjava_help [] =
 592 "In Java code, all the tags constructs of C and C++ code are\n\
 593 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 594
 595
 596 static const char *Cobol_suffixes [] =
 597   { "COB", "cob", NULL };
 598 static char Cobol_help [] =
 599 "In Cobol code, tags are paragraph names; that is, any word\n\
 600 starting in column 8 and followed by a period.";
 601
 602 static const char *Cstar_suffixes [] =
 603   { "cs", "hs", NULL };
 604
 605 static const char *Erlang_suffixes [] =
 606   { "erl", "hrl", NULL };
 607 static const char Erlang_help [] =
 608 "In Erlang code, the tags are the functions, records and macros\n\
 609 defined in the file.";
 610
 611 const char *Forth_suffixes [] =
 612   { "fth", "tok", NULL };
 613 static const char Forth_help [] =
 614 "In Forth code, tags are words defined by `:',\n\
 615 constant, code, create, defer, value, variable, buffer:, field.";
 616
 617 static const char *Fortran_suffixes [] =
 618   { "F", "f", "f90", "for", NULL };
 619 static const char Fortran_help [] =
 620 "In Fortran code, functions, subroutines and block data are tags.";
 621
 622 static const char *HTML_suffixes [] =
 623   { "htm", "html", "shtml", NULL };
 624 static const char HTML_help [] =
 625 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 626 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 627 occurrences of `id='.";
 628
 629 static const char *Lisp_suffixes [] =
 630   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 631 static const char Lisp_help [] =
 632 "In Lisp code, any function defined with `defun', any variable\n\
 633 defined with `defvar' or `defconst', and in general the first\n\
 634 argument of any expression that starts with `(def' in column zero\n\
 635 is a tag.\n\
 636 The `--declarations' option tags \"(defvar foo)\" constructs too.";
 637
 638 static const char *Lua_suffixes [] =
 639   { "lua", "LUA", NULL };
 640 static const char Lua_help [] =
 641 "In Lua scripts, all functions are tags.";
 642
 643 static const char *Makefile_filenames [] =
 644   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 645 static const char Makefile_help [] =
 646 "In makefiles, targets are tags; additionally, variables are tags\n\
 647 unless you specify `--no-globals'.";
 648
 649 static const char *Objc_suffixes [] =
 650   { "lm",                       /* Objective lex file */
 651     "m",                        /* Objective C file */
 652      NULL };
 653 static const char Objc_help [] =
 654 "In Objective C code, tags include Objective C definitions for classes,\n\
 655 class categories, methods and protocols.  Tags for variables and\n\
 656 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 657 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 658
 659 static const char *Pascal_suffixes [] =
 660   { "p", "pas", NULL };
 661 static const char Pascal_help [] =
 662 "In Pascal code, the tags are the functions and procedures defined\n\
 663 in the file.";
 664 /* " // this is for working around an Emacs highlighting bug... */
 665
 666 static const char *Perl_suffixes [] =
 667   { "pl", "pm", NULL };
 668 static const char *Perl_interpreters [] =
 669   { "perl", "@PERL@", NULL };
 670 static const char Perl_help [] =
 671 "In Perl code, the tags are the packages, subroutines and variables\n\
 672 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 673 `--globals' if you want to tag global variables.  Tags for\n\
 674 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 675 defined in the default package is `main::SUB'.";
 676
 677 static const char *PHP_suffixes [] =
 678   { "php", "php3", "php4", NULL };
 679 static const char PHP_help [] =
 680 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 681 the `--no-members' option, vars are tags too.";
 682
 683 static const char *plain_C_suffixes [] =
 684   { "pc",                       /* Pro*C file */
 685      NULL };
 686
 687 static const char *PS_suffixes [] =
 688   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 689 static const char PS_help [] =
 690 "In PostScript code, the tags are the functions.";
 691
 692 static const char *Prolog_suffixes [] =
 693   { "prolog", NULL };
 694 static const char Prolog_help [] =
 695 "In Prolog code, tags are predicates and rules at the beginning of\n\
 696 line.";
 697
 698 static const char *Python_suffixes [] =
 699   { "py", NULL };
 700 static const char Python_help [] =
 701 "In Python code, `def' or `class' at the beginning of a line\n\
 702 generate a tag.";
 703
 704 /* Can't do the `SCM' or `scm' prefix with a version number. */
 705 static const char *Scheme_suffixes [] =
 706   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 707 static const char Scheme_help [] =
 708 "In Scheme code, tags include anything defined with `def' or with a\n\
 709 construct whose name starts with `def'.  They also include\n\
 710 variables set with `set!' at top level in the file.";
 711
 712 static const char *TeX_suffixes [] =
 713   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 714 static const char TeX_help [] =
 715 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 716 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 717 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 718 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 719 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 720 \n\
 721 Other commands can be specified by setting the environment variable\n\
 722 `TEXTAGS' to a colon-separated list like, for example,\n\
 723      TEXTAGS=\"mycommand:myothercommand\".";
 724
 725
 726 static const char *Texinfo_suffixes [] =
 727   { "texi", "texinfo", "txi", NULL };
 728 static const char Texinfo_help [] =
 729 "for texinfo files, lines starting with @node are tagged.";
 730
 731 static const char *Yacc_suffixes [] =
 732   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 733 static const char Yacc_help [] =
 734 "In Bison or Yacc input files, each rule defines as a tag the\n\
 735 nonterminal it constructs.  The portions of the file that contain\n\
 736 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 737 for full help).";
 738
 739 static const char auto_help [] =
 740 "`auto' is not a real language, it indicates to use\n\
 741 a default language for files base on file name suffix and file contents.";
 742
 743 static const char none_help [] =
 744 "`none' is not a real language, it indicates to only do\n\
 745 regexp processing on files.";
 746
 747 static const char no_lang_help [] =
 748 "No detailed help available for this language.";
 749
 750
 751 /*
 752  * Table of languages.
 753  *
 754  * It is ok for a given function to be listed under more than one
 755  * name.  I just didn't.
 756  */
 757
 758 static language lang_names [] =
 759 {
 760   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 761   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 762   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 763   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 764   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 765   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 766   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 767   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 768   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 769   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 770   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 771   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 772   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 773   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 774   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 775   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 776   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 777   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 778   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 779   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 780   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 781   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 782   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 783   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 784   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 785   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
 786   { "auto",      auto_help },                      /* default guessing scheme */
 787   { "none",      none_help,      just_read_file }, /* regexp matching only */
 788   { NULL }                /* end of list */
 789 };
 790
 791 \f
 792 static void
 793 print_language_names (void)
 794 {
 795   language *lang;
 796   const char **name, **ext;
 797
 798   puts ("\nThese are the currently supported languages, along with the\n\
 799 default file names and dot suffixes:");
 800   for (lang = lang_names; lang->name != NULL; lang++)
 801     {
 802       printf ("  %-*s", 10, lang->name);
 803       if (lang->filenames != NULL)
 804         for (name = lang->filenames; *name != NULL; name++)
 805           printf (" %s", *name);
 806       if (lang->suffixes != NULL)
 807         for (ext = lang->suffixes; *ext != NULL; ext++)
 808           printf (" .%s", *ext);
 809       puts ("");
 810     }
 811   puts ("where `auto' means use default language for files based on file\n\
 812 name suffix, and `none' means only do regexp processing on files.\n\
 813 If no language is specified and no matching suffix is found,\n\
 814 the first line of the file is read for a sharp-bang (#!) sequence\n\
 815 followed by the name of an interpreter.  If no such sequence is found,\n\
 816 Fortran is tried first; if no tags are found, C is tried next.\n\
 817 When parsing any C file, a \"class\" or \"template\" keyword\n\
 818 switches to C++.");
 819   puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
 820 \n\
 821 For detailed help on a given language use, for example,\n\
 822 etags --help --lang=ada.");
 823 }
 824
 825 #ifndef EMACS_NAME
 826 # define EMACS_NAME "standalone"
 827 #endif
 828 #ifndef VERSION
 829 # define VERSION "17.38.1.4"
 830 #endif
 831 static _Noreturn void
 832 print_version (void)
 833 {
 834   char emacs_copyright[] = COPYRIGHT;
 835
 836   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 837   puts (emacs_copyright);
 838   puts ("This program is distributed under the terms in ETAGS.README");
 839
 840   exit (EXIT_SUCCESS);
 841 }
 842
 843 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 844 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
 845 #endif
 846
 847 static _Noreturn void
 848 print_help (argument *argbuffer)
 849 {
 850   bool help_for_lang = false;
 851
 852   for (; argbuffer->arg_type != at_end; argbuffer++)
 853     if (argbuffer->arg_type == at_language)
 854       {
 855         if (help_for_lang)
 856           puts ("");
 857         puts (argbuffer->lang->help);
 858         help_for_lang = true;
 859       }
 860
 861   if (help_for_lang)
 862     exit (EXIT_SUCCESS);
 863
 864   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 865 \n\
 866 These are the options accepted by %s.\n", progname, progname);
 867   puts ("You may use unambiguous abbreviations for the long option names.");
 868   puts ("  A - as file name means read names from stdin (one per line).\n\
 869 Absolute names are stored in the output file as they are.\n\
 870 Relative ones are stored relative to the output file's directory.\n");
 871
 872   puts ("-a, --append\n\
 873         Append tag entries to existing tags file.");
 874
 875   puts ("--packages-only\n\
 876         For Ada files, only generate tags for packages.");
 877
 878   if (CTAGS)
 879     puts ("-B, --backward-search\n\
 880         Write the search commands for the tag entries using '?', the\n\
 881         backward-search command instead of '/', the forward-search command.");
 882
 883   /* This option is mostly obsolete, because etags can now automatically
 884      detect C++.  Retained for backward compatibility and for debugging and
 885      experimentation.  In principle, we could want to tag as C++ even
 886      before any "class" or "template" keyword.
 887   puts ("-C, --c++\n\
 888         Treat files whose name suffix defaults to C language as C++ files.");
 889   */
 890
 891   puts ("--declarations\n\
 892         In C and derived languages, create tags for function declarations,");
 893   if (CTAGS)
 894     puts ("\tand create tags for extern variables if --globals is used.");
 895   else
 896     puts
 897       ("\tand create tags for extern variables unless --no-globals is used.");
 898
 899   if (CTAGS)
 900     puts ("-d, --defines\n\
 901         Create tag entries for C #define constants and enum constants, too.");
 902   else
 903     puts ("-D, --no-defines\n\
 904         Don't create tag entries for C #define constants and enum constants.\n\
 905         This makes the tags file smaller.");
 906
 907   if (!CTAGS)
 908     puts ("-i FILE, --include=FILE\n\
 909         Include a note in tag file indicating that, when searching for\n\
 910         a tag, one should also consult the tags file FILE after\n\
 911         checking the current file.");
 912
 913   puts ("-l LANG, --language=LANG\n\
 914         Force the following files to be considered as written in the\n\
 915         named language up to the next --language=LANG option.");
 916
 917   if (CTAGS)
 918     puts ("--globals\n\
 919         Create tag entries for global variables in some languages.");
 920   else
 921     puts ("--no-globals\n\
 922         Do not create tag entries for global variables in some\n\
 923         languages.  This makes the tags file smaller.");
 924
 925   if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 926     puts ("--no-line-directive\n\
 927         Ignore #line preprocessor directives in C and derived languages.");
 928
 929   if (CTAGS)
 930     puts ("--members\n\
 931         Create tag entries for members of structures in some languages.");
 932   else
 933     puts ("--no-members\n\
 934         Do not create tag entries for members of structures\n\
 935         in some languages.");
 936
 937   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 938         Make a tag for each line matching a regular expression pattern\n\
 939         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 940         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 941         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 942         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 943   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 944         For example Tcl named tags can be created with:\n\
 945           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 946         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 947         `m' means to allow multi-line matches, `s' implies `m' and\n\
 948         causes dot to match any character, including newline.");
 949
 950   puts ("-R, --no-regex\n\
 951         Don't create tags from regexps for the following files.");
 952
 953   puts ("-I, --ignore-indentation\n\
 954         In C and C++ do not assume that a closing brace in the first\n\
 955         column is the final brace of a function or structure definition.");
 956
 957   puts ("-o FILE, --output=FILE\n\
 958         Write the tags to FILE.");
 959
 960   puts ("--parse-stdin=NAME\n\
 961         Read from standard input and record tags as belonging to file NAME.");
 962
 963   if (CTAGS)
 964     {
 965       puts ("-t, --typedefs\n\
 966         Generate tag entries for C and Ada typedefs.");
 967       puts ("-T, --typedefs-and-c++\n\
 968         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 969         and C++ member functions.");
 970     }
 971
 972   if (CTAGS)
 973     puts ("-u, --update\n\
 974         Update the tag entries for the given files, leaving tag\n\
 975         entries for other files in place.  Currently, this is\n\
 976         implemented by deleting the existing entries for the given\n\
 977         files and then rewriting the new entries at the end of the\n\
 978         tags file.  It is often faster to simply rebuild the entire\n\
 979         tag file than to use this.");
 980
 981   if (CTAGS)
 982     {
 983       puts ("-v, --vgrind\n\
 984         Print on the standard output an index of items intended for\n\
 985         human consumption, similar to the output of vgrind.  The index\n\
 986         is sorted, and gives the page number of each item.");
 987
 988       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 989         puts ("-w, --no-duplicates\n\
 990         Do not create duplicate tag entries, for compatibility with\n\
 991         traditional ctags.");
 992
 993       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 994         puts ("-w, --no-warn\n\
 995         Suppress warning messages about duplicate tag entries.");
 996
 997       puts ("-x, --cxref\n\
 998         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
 999         The output uses line numbers instead of page numbers, but\n\
1000         beyond that the differences are cosmetic; try both to see\n\
1001         which you like.");
1002     }
1003
1004   puts ("-V, --version\n\
1005         Print the version of the program.\n\
1006 -h, --help\n\
1007         Print this help message.\n\
1008         Followed by one or more `--language' options prints detailed\n\
1009         help about tag generation for the specified languages.");
1010
1011   print_language_names ();
1012
1013   puts ("");
1014   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1015
1016   exit (EXIT_SUCCESS);
1017 }
1018
1019 \f
1020 int
1021 main (int argc, char **argv)
1022 {
1023   int i;
1024   unsigned int nincluded_files;
1025   char **included_files;
1026   argument *argbuffer;
1027   int current_arg, file_count;
1028   linebuffer filename_lb;
1029   bool help_asked = false;
1030   ptrdiff_t len;
1031   char *optstring;
1032   int opt;
1033
1034   progname = argv[0];
1035   nincluded_files = 0;
1036   included_files = xnew (argc, char *);
1037   current_arg = 0;
1038   file_count = 0;
1039
1040   /* Allocate enough no matter what happens.  Overkill, but each one
1041      is small. */
1042   argbuffer = xnew (argc, argument);
1043
1044   /*
1045    * Always find typedefs and structure tags.
1046    * Also default to find macro constants, enum constants, struct
1047    * members and global variables.  Do it for both etags and ctags.
1048    */
1049   typedefs = typedefs_or_cplusplus = constantypedefs = true;
1050   globals = members = true;
1051
1052   /* When the optstring begins with a '-' getopt_long does not rearrange the
1053      non-options arguments to be at the end, but leaves them alone. */
1054   optstring = concat ("-ac:Cf:Il:o:r:RSVhH",
1055                       (CTAGS) ? "BxdtTuvw" : "Di:",
1056                       "");
1057
1058   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1059     switch (opt)
1060       {
1061       case 0:
1062         /* If getopt returns 0, then it has already processed a
1063            long-named option.  We should do nothing.  */
1064         break;
1065
1066       case 1:
1067         /* This means that a file name has been seen.  Record it. */
1068         argbuffer[current_arg].arg_type = at_filename;
1069         argbuffer[current_arg].what     = optarg;
1070         len = strlen (optarg);
1071         if (whatlen_max < len)
1072           whatlen_max = len;
1073         ++current_arg;
1074         ++file_count;
1075         break;
1076
1077       case STDIN:
1078         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1079         argbuffer[current_arg].arg_type = at_stdin;
1080         argbuffer[current_arg].what     = optarg;
1081         len = strlen (optarg);
1082         if (whatlen_max < len)
1083           whatlen_max = len;
1084         ++current_arg;
1085         ++file_count;
1086         if (parsing_stdin)
1087           fatal ("cannot parse standard input more than once", (char *)NULL);
1088         parsing_stdin = true;
1089         break;
1090
1091         /* Common options. */
1092       case 'a': append_to_tagfile = true;       break;
1093       case 'C': cplusplus = true;               break;
1094       case 'f':         /* for compatibility with old makefiles */
1095       case 'o':
1096         if (tagfile)
1097           {
1098             error ("-o option may only be given once.");
1099             suggest_asking_for_help ();
1100             /* NOTREACHED */
1101           }
1102         tagfile = optarg;
1103         break;
1104       case 'I':
1105       case 'S':         /* for backward compatibility */
1106         ignoreindent = true;
1107         break;
1108       case 'l':
1109         {
1110           language *lang = get_language_from_langname (optarg);
1111           if (lang != NULL)
1112             {
1113               argbuffer[current_arg].lang = lang;
1114               argbuffer[current_arg].arg_type = at_language;
1115               ++current_arg;
1116             }
1117         }
1118         break;
1119       case 'c':
1120         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1121         optarg = concat (optarg, "i", ""); /* memory leak here */
1122         /* FALLTHRU */
1123       case 'r':
1124         argbuffer[current_arg].arg_type = at_regexp;
1125         argbuffer[current_arg].what = optarg;
1126         len = strlen (optarg);
1127         if (whatlen_max < len)
1128           whatlen_max = len;
1129         ++current_arg;
1130         break;
1131       case 'R':
1132         argbuffer[current_arg].arg_type = at_regexp;
1133         argbuffer[current_arg].what = NULL;
1134         ++current_arg;
1135         break;
1136       case 'V':
1137         print_version ();
1138         break;
1139       case 'h':
1140       case 'H':
1141         help_asked = true;
1142         break;
1143
1144         /* Etags options */
1145       case 'D': constantypedefs = false;                        break;
1146       case 'i': included_files[nincluded_files++] = optarg;     break;
1147
1148         /* Ctags options. */
1149       case 'B': searchar = '?';                                 break;
1150       case 'd': constantypedefs = true;                         break;
1151       case 't': typedefs = true;                                break;
1152       case 'T': typedefs = typedefs_or_cplusplus = true;        break;
1153       case 'u': update = true;                                  break;
1154       case 'v': vgrind_style = true;                      /*FALLTHRU*/
1155       case 'x': cxref_style = true;                             break;
1156       case 'w': no_warnings = true;                             break;
1157       default:
1158         suggest_asking_for_help ();
1159         /* NOTREACHED */
1160       }
1161
1162   /* No more options.  Store the rest of arguments. */
1163   for (; optind < argc; optind++)
1164     {
1165       argbuffer[current_arg].arg_type = at_filename;
1166       argbuffer[current_arg].what = argv[optind];
1167       len = strlen (argv[optind]);
1168       if (whatlen_max < len)
1169         whatlen_max = len;
1170       ++current_arg;
1171       ++file_count;
1172     }
1173
1174   argbuffer[current_arg].arg_type = at_end;
1175
1176   if (help_asked)
1177     print_help (argbuffer);
1178     /* NOTREACHED */
1179
1180   if (nincluded_files == 0 && file_count == 0)
1181     {
1182       error ("no input files specified.");
1183       suggest_asking_for_help ();
1184       /* NOTREACHED */
1185     }
1186
1187   if (tagfile == NULL)
1188     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1189   cwd = etags_getcwd ();        /* the current working directory */
1190   if (cwd[strlen (cwd) - 1] != '/')
1191     {
1192       char *oldcwd = cwd;
1193       cwd = concat (oldcwd, "/", "");
1194       free (oldcwd);
1195     }
1196
1197   /* Compute base directory for relative file names. */
1198   if (streq (tagfile, "-")
1199       || strneq (tagfile, "/dev/", 5))
1200     tagfiledir = cwd;            /* relative file names are relative to cwd */
1201   else
1202     {
1203       canonicalize_filename (tagfile);
1204       tagfiledir = absolute_dirname (tagfile, cwd);
1205     }
1206
1207   linebuffer_init (&lb);
1208   linebuffer_init (&filename_lb);
1209   linebuffer_init (&filebuf);
1210   linebuffer_init (&token_name);
1211
1212   if (!CTAGS)
1213     {
1214       if (streq (tagfile, "-"))
1215         {
1216           tagf = stdout;
1217           SET_BINARY (fileno (stdout));
1218         }
1219       else
1220         tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1221       if (tagf == NULL)
1222         pfatal (tagfile);
1223     }
1224
1225   /*
1226    * Loop through files finding functions.
1227    */
1228   for (i = 0; i < current_arg; i++)
1229     {
1230       static language *lang;    /* non-NULL if language is forced */
1231       char *this_file;
1232
1233       switch (argbuffer[i].arg_type)
1234         {
1235         case at_language:
1236           lang = argbuffer[i].lang;
1237           break;
1238         case at_regexp:
1239           analyze_regex (argbuffer[i].what);
1240           break;
1241         case at_filename:
1242               this_file = argbuffer[i].what;
1243               /* Input file named "-" means read file names from stdin
1244                  (one per line) and use them. */
1245               if (streq (this_file, "-"))
1246                 {
1247                   if (parsing_stdin)
1248                     fatal ("cannot parse standard input AND read file names from it",
1249                            (char *)NULL);
1250                   while (readline_internal (&filename_lb, stdin) > 0)
1251                     process_file_name (filename_lb.buffer, lang);
1252                 }
1253               else
1254                 process_file_name (this_file, lang);
1255           break;
1256         case at_stdin:
1257           this_file = argbuffer[i].what;
1258           process_file (stdin, this_file, lang);
1259           break;
1260         }
1261     }
1262
1263   free_regexps ();
1264   free (lb.buffer);
1265   free (filebuf.buffer);
1266   free (token_name.buffer);
1267
1268   if (!CTAGS || cxref_style)
1269     {
1270       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1271       put_entries (nodehead);
1272       free_tree (nodehead);
1273       nodehead = NULL;
1274       if (!CTAGS)
1275         {
1276           fdesc *fdp;
1277
1278           /* Output file entries that have no tags. */
1279           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1280             if (!fdp->written)
1281               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1282
1283           while (nincluded_files-- > 0)
1284             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1285
1286           if (fclose (tagf) == EOF)
1287             pfatal (tagfile);
1288         }
1289
1290       exit (EXIT_SUCCESS);
1291     }
1292
1293   /* From here on, we are in (CTAGS && !cxref_style) */
1294   if (update)
1295     {
1296       char *cmd =
1297         xmalloc (strlen (tagfile) + whatlen_max +
1298                  sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1299       for (i = 0; i < current_arg; ++i)
1300         {
1301           switch (argbuffer[i].arg_type)
1302             {
1303             case at_filename:
1304             case at_stdin:
1305               break;
1306             default:
1307               continue;         /* the for loop */
1308             }
1309           char *z = stpcpy (cmd, "mv ");
1310           z = stpcpy (z, tagfile);
1311           z = stpcpy (z, " OTAGS;fgrep -v '\t");
1312           z = stpcpy (z, argbuffer[i].what);
1313           z = stpcpy (z, "\t' OTAGS >");
1314           z = stpcpy (z, tagfile);
1315           strcpy (z, ";rm OTAGS");
1316           if (system (cmd) != EXIT_SUCCESS)
1317             fatal ("failed to execute shell command", (char *)NULL);
1318         }
1319       free (cmd);
1320       append_to_tagfile = true;
1321     }
1322
1323   tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1324   if (tagf == NULL)
1325     pfatal (tagfile);
1326   put_entries (nodehead);       /* write all the tags (CTAGS) */
1327   free_tree (nodehead);
1328   nodehead = NULL;
1329   if (fclose (tagf) == EOF)
1330     pfatal (tagfile);
1331
1332   if (CTAGS)
1333     if (append_to_tagfile || update)
1334       {
1335         char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1336         /* Maybe these should be used:
1337            setenv ("LC_COLLATE", "C", 1);
1338            setenv ("LC_ALL", "C", 1); */
1339         char *z = stpcpy (cmd, "sort -u -o ");
1340         z = stpcpy (z, tagfile);
1341         *z++ = ' ';
1342         strcpy (z, tagfile);
1343         exit (system (cmd));
1344       }
1345   return EXIT_SUCCESS;
1346 }
1347
1348
1349 /*
1350  * Return a compressor given the file name.  If EXTPTR is non-zero,
1351  * return a pointer into FILE where the compressor-specific
1352  * extension begins.  If no compressor is found, NULL is returned
1353  * and EXTPTR is not significant.
1354  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1355  */
1356 static compressor *
1357 get_compressor_from_suffix (char *file, char **extptr)
1358 {
1359   compressor *compr;
1360   char *slash, *suffix;
1361
1362   /* File has been processed by canonicalize_filename,
1363      so we don't need to consider backslashes on DOS_NT.  */
1364   slash = strrchr (file, '/');
1365   suffix = strrchr (file, '.');
1366   if (suffix == NULL || suffix < slash)
1367     return NULL;
1368   if (extptr != NULL)
1369     *extptr = suffix;
1370   suffix += 1;
1371   /* Let those poor souls who live with DOS 8+3 file name limits get
1372      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1373      Only the first do loop is run if not MSDOS */
1374   do
1375     {
1376       for (compr = compressors; compr->suffix != NULL; compr++)
1377         if (streq (compr->suffix, suffix))
1378           return compr;
1379       if (!MSDOS)
1380         break;                  /* do it only once: not really a loop */
1381       if (extptr != NULL)
1382         *extptr = ++suffix;
1383     } while (*suffix != '\0');
1384   return NULL;
1385 }
1386
1387
1388
1389 /*
1390  * Return a language given the name.
1391  */
1392 static language *
1393 get_language_from_langname (const char *name)
1394 {
1395   language *lang;
1396
1397   if (name == NULL)
1398     error ("empty language name");
1399   else
1400     {
1401       for (lang = lang_names; lang->name != NULL; lang++)
1402         if (streq (name, lang->name))
1403           return lang;
1404       error ("unknown language \"%s\"", name);
1405     }
1406
1407   return NULL;
1408 }
1409
1410
1411 /*
1412  * Return a language given the interpreter name.
1413  */
1414 static language *
1415 get_language_from_interpreter (char *interpreter)
1416 {
1417   language *lang;
1418   const char **iname;
1419
1420   if (interpreter == NULL)
1421     return NULL;
1422   for (lang = lang_names; lang->name != NULL; lang++)
1423     if (lang->interpreters != NULL)
1424       for (iname = lang->interpreters; *iname != NULL; iname++)
1425         if (streq (*iname, interpreter))
1426             return lang;
1427
1428   return NULL;
1429 }
1430
1431
1432
1433 /*
1434  * Return a language given the file name.
1435  */
1436 static language *
1437 get_language_from_filename (char *file, int case_sensitive)
1438 {
1439   language *lang;
1440   const char **name, **ext, *suffix;
1441
1442   /* Try whole file name first. */
1443   for (lang = lang_names; lang->name != NULL; lang++)
1444     if (lang->filenames != NULL)
1445       for (name = lang->filenames; *name != NULL; name++)
1446         if ((case_sensitive)
1447             ? streq (*name, file)
1448             : strcaseeq (*name, file))
1449           return lang;
1450
1451   /* If not found, try suffix after last dot. */
1452   suffix = strrchr (file, '.');
1453   if (suffix == NULL)
1454     return NULL;
1455   suffix += 1;
1456   for (lang = lang_names; lang->name != NULL; lang++)
1457     if (lang->suffixes != NULL)
1458       for (ext = lang->suffixes; *ext != NULL; ext++)
1459         if ((case_sensitive)
1460             ? streq (*ext, suffix)
1461             : strcaseeq (*ext, suffix))
1462           return lang;
1463   return NULL;
1464 }
1465
1466 \f
1467 /*
1468  * This routine is called on each file argument.
1469  */
1470 static void
1471 process_file_name (char *file, language *lang)
1472 {
1473   struct stat stat_buf;
1474   FILE *inf;
1475   fdesc *fdp;
1476   compressor *compr;
1477   char *compressed_name, *uncompressed_name;
1478   char *ext, *real_name, *tmp_name;
1479   int retval;
1480
1481   canonicalize_filename (file);
1482   if (streq (file, tagfile) && !streq (tagfile, "-"))
1483     {
1484       error ("skipping inclusion of %s in self.", file);
1485       return;
1486     }
1487   compr = get_compressor_from_suffix (file, &ext);
1488   if (compr)
1489     {
1490       real_name = compressed_name = savestr (file);
1491       uncompressed_name = savenstr (file, ext - file);
1492     }
1493   else
1494     {
1495       compressed_name = NULL;
1496       real_name = uncompressed_name = savestr (file);
1497     }
1498
1499   /* If the canonicalized uncompressed name
1500      has already been dealt with, skip it silently. */
1501   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1502     {
1503       assert (fdp->infname != NULL);
1504       if (streq (uncompressed_name, fdp->infname))
1505         goto cleanup;
1506     }
1507
1508   if (stat (real_name, &stat_buf) != 0)
1509     {
1510       /* Reset real_name and try with a different name. */
1511       real_name = NULL;
1512       if (compressed_name != NULL) /* try with the given suffix */
1513         {
1514           if (stat (uncompressed_name, &stat_buf) == 0)
1515             real_name = uncompressed_name;
1516         }
1517       else                      /* try all possible suffixes */
1518         {
1519           for (compr = compressors; compr->suffix != NULL; compr++)
1520             {
1521               compressed_name = concat (file, ".", compr->suffix);
1522               if (stat (compressed_name, &stat_buf) != 0)
1523                 {
1524                   if (MSDOS)
1525                     {
1526                       char *suf = compressed_name + strlen (file);
1527                       size_t suflen = strlen (compr->suffix) + 1;
1528                       for ( ; suf[1]; suf++, suflen--)
1529                         {
1530                           memmove (suf, suf + 1, suflen);
1531                           if (stat (compressed_name, &stat_buf) == 0)
1532                             {
1533                               real_name = compressed_name;
1534                               break;
1535                             }
1536                         }
1537                       if (real_name != NULL)
1538                         break;
1539                     } /* MSDOS */
1540                   free (compressed_name);
1541                   compressed_name = NULL;
1542                 }
1543               else
1544                 {
1545                   real_name = compressed_name;
1546                   break;
1547                 }
1548             }
1549         }
1550       if (real_name == NULL)
1551         {
1552           perror (file);
1553           goto cleanup;
1554         }
1555     } /* try with a different name */
1556
1557   if (!S_ISREG (stat_buf.st_mode))
1558     {
1559       error ("skipping %s: it is not a regular file.", real_name);
1560       goto cleanup;
1561     }
1562   if (real_name == compressed_name)
1563     {
1564       tmp_name = etags_mktmp ();
1565       if (!tmp_name)
1566         inf = NULL;
1567       else
1568         {
1569           char *cmd1 = concat (compr->command, " ", real_name);
1570           char *cmd = concat (cmd1, " > ", tmp_name);
1571           free (cmd1);
1572           if (system (cmd) == -1)
1573             inf = NULL;
1574           else
1575             inf = fopen (tmp_name, "r" FOPEN_BINARY);
1576           free (cmd);
1577         }
1578     }
1579   else
1580     inf = fopen (real_name, "r" FOPEN_BINARY);
1581   if (inf == NULL)
1582     {
1583       perror (real_name);
1584       goto cleanup;
1585     }
1586
1587   process_file (inf, uncompressed_name, lang);
1588
1589   retval = fclose (inf);
1590   if (real_name == compressed_name)
1591     {
1592       remove (tmp_name);
1593       free (tmp_name);
1594     }
1595   if (retval < 0)
1596     pfatal (file);
1597
1598  cleanup:
1599   free (compressed_name);
1600   free (uncompressed_name);
1601   last_node = NULL;
1602   curfdp = NULL;
1603   return;
1604 }
1605
1606 static void
1607 process_file (FILE *fh, char *fn, language *lang)
1608 {
1609   static const fdesc emptyfdesc;
1610   fdesc *fdp;
1611
1612   /* Create a new input file description entry. */
1613   fdp = xnew (1, fdesc);
1614   *fdp = emptyfdesc;
1615   fdp->next = fdhead;
1616   fdp->infname = savestr (fn);
1617   fdp->lang = lang;
1618   fdp->infabsname = absolute_filename (fn, cwd);
1619   fdp->infabsdir = absolute_dirname (fn, cwd);
1620   if (filename_is_absolute (fn))
1621     {
1622       /* An absolute file name.  Canonicalize it. */
1623       fdp->taggedfname = absolute_filename (fn, NULL);
1624     }
1625   else
1626     {
1627       /* A file name relative to cwd.  Make it relative
1628          to the directory of the tags file. */
1629       fdp->taggedfname = relative_filename (fn, tagfiledir);
1630     }
1631   fdp->usecharno = true;        /* use char position when making tags */
1632   fdp->prop = NULL;
1633   fdp->written = false;         /* not written on tags file yet */
1634
1635   fdhead = fdp;
1636   curfdp = fdhead;              /* the current file description */
1637
1638   find_entries (fh);
1639
1640   /* If not Ctags, and if this is not metasource and if it contained no #line
1641      directives, we can write the tags and free all nodes pointing to
1642      curfdp. */
1643   if (!CTAGS
1644       && curfdp->usecharno      /* no #line directives in this file */
1645       && !curfdp->lang->metasource)
1646     {
1647       node *np, *prev;
1648
1649       /* Look for the head of the sublist relative to this file.  See add_node
1650          for the structure of the node tree. */
1651       prev = NULL;
1652       for (np = nodehead; np != NULL; prev = np, np = np->left)
1653         if (np->fdp == curfdp)
1654           break;
1655
1656       /* If we generated tags for this file, write and delete them. */
1657       if (np != NULL)
1658         {
1659           /* This is the head of the last sublist, if any.  The following
1660              instructions depend on this being true. */
1661           assert (np->left == NULL);
1662
1663           assert (fdhead == curfdp);
1664           assert (last_node->fdp == curfdp);
1665           put_entries (np);     /* write tags for file curfdp->taggedfname */
1666           free_tree (np);       /* remove the written nodes */
1667           if (prev == NULL)
1668             nodehead = NULL;    /* no nodes left */
1669           else
1670             prev->left = NULL;  /* delete the pointer to the sublist */
1671         }
1672     }
1673 }
1674
1675 /*
1676  * This routine opens the specified file and calls the function
1677  * which finds the function and type definitions.
1678  */
1679 static void
1680 find_entries (FILE *inf)
1681 {
1682   char *cp;
1683   language *lang = curfdp->lang;
1684   Lang_function *parser = NULL;
1685
1686   /* If user specified a language, use it. */
1687   if (lang != NULL && lang->function != NULL)
1688     {
1689       parser = lang->function;
1690     }
1691
1692   /* Else try to guess the language given the file name. */
1693   if (parser == NULL)
1694     {
1695       lang = get_language_from_filename (curfdp->infname, true);
1696       if (lang != NULL && lang->function != NULL)
1697         {
1698           curfdp->lang = lang;
1699           parser = lang->function;
1700         }
1701     }
1702
1703   /* Else look for sharp-bang as the first two characters. */
1704   if (parser == NULL
1705       && readline_internal (&lb, inf) > 0
1706       && lb.len >= 2
1707       && lb.buffer[0] == '#'
1708       && lb.buffer[1] == '!')
1709     {
1710       char *lp;
1711
1712       /* Set lp to point at the first char after the last slash in the
1713          line or, if no slashes, at the first nonblank.  Then set cp to
1714          the first successive blank and terminate the string. */
1715       lp = strrchr (lb.buffer+2, '/');
1716       if (lp != NULL)
1717         lp += 1;
1718       else
1719         lp = skip_spaces (lb.buffer + 2);
1720       cp = skip_non_spaces (lp);
1721       *cp = '\0';
1722
1723       if (strlen (lp) > 0)
1724         {
1725           lang = get_language_from_interpreter (lp);
1726           if (lang != NULL && lang->function != NULL)
1727             {
1728               curfdp->lang = lang;
1729               parser = lang->function;
1730             }
1731         }
1732     }
1733
1734   rewind (inf);
1735
1736   /* Else try to guess the language given the case insensitive file name. */
1737   if (parser == NULL)
1738     {
1739       lang = get_language_from_filename (curfdp->infname, false);
1740       if (lang != NULL && lang->function != NULL)
1741         {
1742           curfdp->lang = lang;
1743           parser = lang->function;
1744         }
1745     }
1746
1747   /* Else try Fortran or C. */
1748   if (parser == NULL)
1749     {
1750       node *old_last_node = last_node;
1751
1752       curfdp->lang = get_language_from_langname ("fortran");
1753       find_entries (inf);
1754
1755       if (old_last_node == last_node)
1756         /* No Fortran entries found.  Try C. */
1757         {
1758           rewind (inf);
1759           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1760           find_entries (inf);
1761         }
1762       return;
1763     }
1764
1765   if (!no_line_directive
1766       && curfdp->lang != NULL && curfdp->lang->metasource)
1767     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1768        file, or anyway we parsed a file that is automatically generated from
1769        this one.  If this is the case, the bingo.c file contained #line
1770        directives that generated tags pointing to this file.  Let's delete
1771        them all before parsing this file, which is the real source. */
1772     {
1773       fdesc **fdpp = &fdhead;
1774       while (*fdpp != NULL)
1775         if (*fdpp != curfdp
1776             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1777           /* We found one of those!  We must delete both the file description
1778              and all tags referring to it. */
1779           {
1780             fdesc *badfdp = *fdpp;
1781
1782             /* Delete the tags referring to badfdp->taggedfname
1783                that were obtained from badfdp->infname. */
1784             invalidate_nodes (badfdp, &nodehead);
1785
1786             *fdpp = badfdp->next; /* remove the bad description from the list */
1787             free_fdesc (badfdp);
1788           }
1789         else
1790           fdpp = &(*fdpp)->next; /* advance the list pointer */
1791     }
1792
1793   assert (parser != NULL);
1794
1795   /* Generic initializations before reading from file. */
1796   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1797
1798   /* Generic initializations before parsing file with readline. */
1799   lineno = 0;                  /* reset global line number */
1800   charno = 0;                  /* reset global char number */
1801   linecharno = 0;              /* reset global char number of line start */
1802
1803   parser (inf);
1804
1805   regex_tag_multiline ();
1806 }
1807
1808 \f
1809 /*
1810  * Check whether an implicitly named tag should be created,
1811  * then call `pfnote'.
1812  * NAME is a string that is internally copied by this function.
1813  *
1814  * TAGS format specification
1815  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1816  * The following is explained in some more detail in etc/ETAGS.EBNF.
1817  *
1818  * make_tag creates tags with "implicit tag names" (unnamed tags)
1819  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1820  *  1. NAME does not contain any of the characters in NONAM;
1821  *  2. LINESTART contains name as either a rightmost, or rightmost but
1822  *     one character, substring;
1823  *  3. the character, if any, immediately before NAME in LINESTART must
1824  *     be a character in NONAM;
1825  *  4. the character, if any, immediately after NAME in LINESTART must
1826  *     also be a character in NONAM.
1827  *
1828  * The implementation uses the notinname() macro, which recognizes the
1829  * characters stored in the string `nonam'.
1830  * etags.el needs to use the same characters that are in NONAM.
1831  */
1832 static void
1833 make_tag (const char *name,     /* tag name, or NULL if unnamed */
1834           int namelen,          /* tag length */
1835           bool is_func,         /* tag is a function */
1836           char *linestart,      /* start of the line where tag is */
1837           int linelen,          /* length of the line where tag is */
1838           int lno,              /* line number */
1839           long int cno)         /* character number */
1840 {
1841   bool named = (name != NULL && namelen > 0);
1842   char *nname = NULL;
1843
1844   if (!CTAGS && named)          /* maybe set named to false */
1845     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1846        such that etags.el can guess a name from it. */
1847     {
1848       int i;
1849       register const char *cp = name;
1850
1851       for (i = 0; i < namelen; i++)
1852         if (notinname (*cp++))
1853           break;
1854       if (i == namelen)                         /* rule #1 */
1855         {
1856           cp = linestart + linelen - namelen;
1857           if (notinname (linestart[linelen-1]))
1858             cp -= 1;                            /* rule #4 */
1859           if (cp >= linestart                   /* rule #2 */
1860               && (cp == linestart
1861                   || notinname (cp[-1]))        /* rule #3 */
1862               && strneq (name, cp, namelen))    /* rule #2 */
1863             named = false;      /* use implicit tag name */
1864         }
1865     }
1866
1867   if (named)
1868     nname = savenstr (name, namelen);
1869
1870   pfnote (nname, is_func, linestart, linelen, lno, cno);
1871 }
1872
1873 /* Record a tag. */
1874 static void
1875 pfnote (char *name, bool is_func, char *linestart, int linelen, int lno,
1876         long int cno)
1877                                 /* tag name, or NULL if unnamed */
1878                                 /* tag is a function */
1879                                 /* start of the line where tag is */
1880                                 /* length of the line where tag is */
1881                                 /* line number */
1882                                 /* character number */
1883 {
1884   register node *np;
1885
1886   assert (name == NULL || name[0] != '\0');
1887   if (CTAGS && name == NULL)
1888     return;
1889
1890   np = xnew (1, node);
1891
1892   /* If ctags mode, change name "main" to M<thisfilename>. */
1893   if (CTAGS && !cxref_style && streq (name, "main"))
1894     {
1895       char *fp = strrchr (curfdp->taggedfname, '/');
1896       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1897       fp = strrchr (np->name, '.');
1898       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1899         fp[0] = '\0';
1900     }
1901   else
1902     np->name = name;
1903   np->valid = true;
1904   np->been_warned = false;
1905   np->fdp = curfdp;
1906   np->is_func = is_func;
1907   np->lno = lno;
1908   if (np->fdp->usecharno)
1909     /* Our char numbers are 0-base, because of C language tradition?
1910        ctags compatibility?  old versions compatibility?   I don't know.
1911        Anyway, since emacs's are 1-base we expect etags.el to take care
1912        of the difference.  If we wanted to have 1-based numbers, we would
1913        uncomment the +1 below. */
1914     np->cno = cno /* + 1 */ ;
1915   else
1916     np->cno = invalidcharno;
1917   np->left = np->right = NULL;
1918   if (CTAGS && !cxref_style)
1919     {
1920       if (strlen (linestart) < 50)
1921         np->regex = concat (linestart, "$", "");
1922       else
1923         np->regex = savenstr (linestart, 50);
1924     }
1925   else
1926     np->regex = savenstr (linestart, linelen);
1927
1928   add_node (np, &nodehead);
1929 }
1930
1931 /*
1932  * free_tree ()
1933  *      recurse on left children, iterate on right children.
1934  */
1935 static void
1936 free_tree (register node *np)
1937 {
1938   while (np)
1939     {
1940       register node *node_right = np->right;
1941       free_tree (np->left);
1942       free (np->name);
1943       free (np->regex);
1944       free (np);
1945       np = node_right;
1946     }
1947 }
1948
1949 /*
1950  * free_fdesc ()
1951  *      delete a file description
1952  */
1953 static void
1954 free_fdesc (register fdesc *fdp)
1955 {
1956   free (fdp->infname);
1957   free (fdp->infabsname);
1958   free (fdp->infabsdir);
1959   free (fdp->taggedfname);
1960   free (fdp->prop);
1961   free (fdp);
1962 }
1963
1964 /*
1965  * add_node ()
1966  *      Adds a node to the tree of nodes.  In etags mode, sort by file
1967  *      name.  In ctags mode, sort by tag name.  Make no attempt at
1968  *      balancing.
1969  *
1970  *      add_node is the only function allowed to add nodes, so it can
1971  *      maintain state.
1972  */
1973 static void
1974 add_node (node *np, node **cur_node_p)
1975 {
1976   register int dif;
1977   register node *cur_node = *cur_node_p;
1978
1979   if (cur_node == NULL)
1980     {
1981       *cur_node_p = np;
1982       last_node = np;
1983       return;
1984     }
1985
1986   if (!CTAGS)
1987     /* Etags Mode */
1988     {
1989       /* For each file name, tags are in a linked sublist on the right
1990          pointer.  The first tags of different files are a linked list
1991          on the left pointer.  last_node points to the end of the last
1992          used sublist. */
1993       if (last_node != NULL && last_node->fdp == np->fdp)
1994         {
1995           /* Let's use the same sublist as the last added node. */
1996           assert (last_node->right == NULL);
1997           last_node->right = np;
1998           last_node = np;
1999         }
2000       else if (cur_node->fdp == np->fdp)
2001         {
2002           /* Scanning the list we found the head of a sublist which is
2003              good for us.  Let's scan this sublist. */
2004           add_node (np, &cur_node->right);
2005         }
2006       else
2007         /* The head of this sublist is not good for us.  Let's try the
2008            next one. */
2009         add_node (np, &cur_node->left);
2010     } /* if ETAGS mode */
2011
2012   else
2013     {
2014       /* Ctags Mode */
2015       dif = strcmp (np->name, cur_node->name);
2016
2017       /*
2018        * If this tag name matches an existing one, then
2019        * do not add the node, but maybe print a warning.
2020        */
2021       if (no_duplicates && !dif)
2022         {
2023           if (np->fdp == cur_node->fdp)
2024             {
2025               if (!no_warnings)
2026                 {
2027                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2028                            np->fdp->infname, lineno, np->name);
2029                   fprintf (stderr, "Second entry ignored\n");
2030                 }
2031             }
2032           else if (!cur_node->been_warned && !no_warnings)
2033             {
2034               fprintf
2035                 (stderr,
2036                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2037                  np->fdp->infname, cur_node->fdp->infname, np->name);
2038               cur_node->been_warned = true;
2039             }
2040           return;
2041         }
2042
2043       /* Actually add the node */
2044       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2045     } /* if CTAGS mode */
2046 }
2047
2048 /*
2049  * invalidate_nodes ()
2050  *      Scan the node tree and invalidate all nodes pointing to the
2051  *      given file description (CTAGS case) or free them (ETAGS case).
2052  */
2053 static void
2054 invalidate_nodes (fdesc *badfdp, node **npp)
2055 {
2056   node *np = *npp;
2057
2058   if (np == NULL)
2059     return;
2060
2061   if (CTAGS)
2062     {
2063       if (np->left != NULL)
2064         invalidate_nodes (badfdp, &np->left);
2065       if (np->fdp == badfdp)
2066         np->valid = false;
2067       if (np->right != NULL)
2068         invalidate_nodes (badfdp, &np->right);
2069     }
2070   else
2071     {
2072       assert (np->fdp != NULL);
2073       if (np->fdp == badfdp)
2074         {
2075           *npp = np->left;      /* detach the sublist from the list */
2076           np->left = NULL;      /* isolate it */
2077           free_tree (np);       /* free it */
2078           invalidate_nodes (badfdp, npp);
2079         }
2080       else
2081         invalidate_nodes (badfdp, &np->left);
2082     }
2083 }
2084
2085 \f
2086 static int total_size_of_entries (node *);
2087 static int number_len (long) ATTRIBUTE_CONST;
2088
2089 /* Length of a non-negative number's decimal representation. */
2090 static int
2091 number_len (long int num)
2092 {
2093   int len = 1;
2094   while ((num /= 10) > 0)
2095     len += 1;
2096   return len;
2097 }
2098
2099 /*
2100  * Return total number of characters that put_entries will output for
2101  * the nodes in the linked list at the right of the specified node.
2102  * This count is irrelevant with etags.el since emacs 19.34 at least,
2103  * but is still supplied for backward compatibility.
2104  */
2105 static int
2106 total_size_of_entries (register node *np)
2107 {
2108   register int total = 0;
2109
2110   for (; np != NULL; np = np->right)
2111     if (np->valid)
2112       {
2113         total += strlen (np->regex) + 1;                /* pat\177 */
2114         if (np->name != NULL)
2115           total += strlen (np->name) + 1;               /* name\001 */
2116         total += number_len ((long) np->lno) + 1;       /* lno, */
2117         if (np->cno != invalidcharno)                   /* cno */
2118           total += number_len (np->cno);
2119         total += 1;                                     /* newline */
2120       }
2121
2122   return total;
2123 }
2124
2125 static void
2126 put_entries (register node *np)
2127 {
2128   register char *sp;
2129   static fdesc *fdp = NULL;
2130
2131   if (np == NULL)
2132     return;
2133
2134   /* Output subentries that precede this one */
2135   if (CTAGS)
2136     put_entries (np->left);
2137
2138   /* Output this entry */
2139   if (np->valid)
2140     {
2141       if (!CTAGS)
2142         {
2143           /* Etags mode */
2144           if (fdp != np->fdp)
2145             {
2146               fdp = np->fdp;
2147               fprintf (tagf, "\f\n%s,%d\n",
2148                        fdp->taggedfname, total_size_of_entries (np));
2149               fdp->written = true;
2150             }
2151           fputs (np->regex, tagf);
2152           fputc ('\177', tagf);
2153           if (np->name != NULL)
2154             {
2155               fputs (np->name, tagf);
2156               fputc ('\001', tagf);
2157             }
2158           fprintf (tagf, "%d,", np->lno);
2159           if (np->cno != invalidcharno)
2160             fprintf (tagf, "%ld", np->cno);
2161           fputs ("\n", tagf);
2162         }
2163       else
2164         {
2165           /* Ctags mode */
2166           if (np->name == NULL)
2167             error ("internal error: NULL name in ctags mode.");
2168
2169           if (cxref_style)
2170             {
2171               if (vgrind_style)
2172                 fprintf (stdout, "%s %s %d\n",
2173                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2174               else
2175                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2176                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2177             }
2178           else
2179             {
2180               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2181
2182               if (np->is_func)
2183                 {               /* function or #define macro with args */
2184                   putc (searchar, tagf);
2185                   putc ('^', tagf);
2186
2187                   for (sp = np->regex; *sp; sp++)
2188                     {
2189                       if (*sp == '\\' || *sp == searchar)
2190                         putc ('\\', tagf);
2191                       putc (*sp, tagf);
2192                     }
2193                   putc (searchar, tagf);
2194                 }
2195               else
2196                 {               /* anything else; text pattern inadequate */
2197                   fprintf (tagf, "%d", np->lno);
2198                 }
2199               putc ('\n', tagf);
2200             }
2201         }
2202     } /* if this node contains a valid tag */
2203
2204   /* Output subentries that follow this one */
2205   put_entries (np->right);
2206   if (!CTAGS)
2207     put_entries (np->left);
2208 }
2209
2210 \f
2211 /* C extensions. */
2212 #define C_EXT   0x00fff         /* C extensions */
2213 #define C_PLAIN 0x00000         /* C */
2214 #define C_PLPL  0x00001         /* C++ */
2215 #define C_STAR  0x00003         /* C* */
2216 #define C_JAVA  0x00005         /* JAVA */
2217 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2218 #define YACC    0x10000         /* yacc file */
2219
2220 /*
2221  * The C symbol tables.
2222  */
2223 enum sym_type
2224 {
2225   st_none,
2226   st_C_objprot, st_C_objimpl, st_C_objend,
2227   st_C_gnumacro,
2228   st_C_ignore, st_C_attribute,
2229   st_C_javastruct,
2230   st_C_operator,
2231   st_C_class, st_C_template,
2232   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2233 };
2234
2235 /* Feed stuff between (but not including) %[ and %] lines to:
2236      gperf -m 5
2237 %[
2238 %compare-strncmp
2239 %enum
2240 %struct-type
2241 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2242 %%
2243 if,             0,                      st_C_ignore
2244 for,            0,                      st_C_ignore
2245 while,          0,                      st_C_ignore
2246 switch,         0,                      st_C_ignore
2247 return,         0,                      st_C_ignore
2248 __attribute__,  0,                      st_C_attribute
2249 GTY,            0,                      st_C_attribute
2250 @interface,     0,                      st_C_objprot
2251 @protocol,      0,                      st_C_objprot
2252 @implementation,0,                      st_C_objimpl
2253 @end,           0,                      st_C_objend
2254 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2255 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2256 friend,         C_PLPL,                 st_C_ignore
2257 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2258 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2259 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2260 class,          0,                      st_C_class
2261 namespace,      C_PLPL,                 st_C_struct
2262 domain,         C_STAR,                 st_C_struct
2263 union,          0,                      st_C_struct
2264 struct,         0,                      st_C_struct
2265 extern,         0,                      st_C_extern
2266 enum,           0,                      st_C_enum
2267 typedef,        0,                      st_C_typedef
2268 define,         0,                      st_C_define
2269 undef,          0,                      st_C_define
2270 operator,       C_PLPL,                 st_C_operator
2271 template,       0,                      st_C_template
2272 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2273 DEFUN,          0,                      st_C_gnumacro
2274 SYSCALL,        0,                      st_C_gnumacro
2275 ENTRY,          0,                      st_C_gnumacro
2276 PSEUDO,         0,                      st_C_gnumacro
2277 # These are defined inside C functions, so currently they are not met.
2278 # EXFUN used in glibc, DEFVAR_* in emacs.
2279 #EXFUN,         0,                      st_C_gnumacro
2280 #DEFVAR_,       0,                      st_C_gnumacro
2281 %]
2282 and replace lines between %< and %> with its output, then:
2283  - remove the #if characterset check
2284  - make in_word_set static and not inline. */
2285 /*%<*/
2286 /* C code produced by gperf version 3.0.1 */
2287 /* Command-line: gperf -m 5  */
2288 /* Computed positions: -k'2-3' */
2289
2290 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2291 /* maximum key range = 33, duplicates = 0 */
2292
2293 static int
2294 hash (const char *str, int len)
2295 {
2296   static char const asso_values[] =
2297     {
2298       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2299       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2300       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2301       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2302       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2303       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2304       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2305       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2306       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2307       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2308       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2309        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2310        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2311       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2312       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2313       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2314       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2315       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2316       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2317       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2318       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2319       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2320       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2321       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2322       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2323       35, 35, 35, 35, 35, 35
2324     };
2325   int hval = len;
2326
2327   switch (hval)
2328     {
2329       default:
2330         hval += asso_values[(unsigned char) str[2]];
2331       /*FALLTHROUGH*/
2332       case 2:
2333         hval += asso_values[(unsigned char) str[1]];
2334         break;
2335     }
2336   return hval;
2337 }
2338
2339 static struct C_stab_entry *
2340 in_word_set (register const char *str, register unsigned int len)
2341 {
2342   enum
2343     {
2344       TOTAL_KEYWORDS = 33,
2345       MIN_WORD_LENGTH = 2,
2346       MAX_WORD_LENGTH = 15,
2347       MIN_HASH_VALUE = 2,
2348       MAX_HASH_VALUE = 34
2349     };
2350
2351   static struct C_stab_entry wordlist[] =
2352     {
2353       {""}, {""},
2354       {"if",            0,                      st_C_ignore},
2355       {"GTY",           0,                      st_C_attribute},
2356       {"@end",          0,                      st_C_objend},
2357       {"union",         0,                      st_C_struct},
2358       {"define",                0,                      st_C_define},
2359       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2360       {"template",      0,                      st_C_template},
2361       {"operator",      C_PLPL,                 st_C_operator},
2362       {"@interface",    0,                      st_C_objprot},
2363       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2364       {"friend",                C_PLPL,                 st_C_ignore},
2365       {"typedef",       0,                      st_C_typedef},
2366       {"return",                0,                      st_C_ignore},
2367       {"@implementation",0,                     st_C_objimpl},
2368       {"@protocol",     0,                      st_C_objprot},
2369       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2370       {"extern",                0,                      st_C_extern},
2371       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2372       {"struct",                0,                      st_C_struct},
2373       {"domain",                C_STAR,                 st_C_struct},
2374       {"switch",                0,                      st_C_ignore},
2375       {"enum",          0,                      st_C_enum},
2376       {"for",           0,                      st_C_ignore},
2377       {"namespace",     C_PLPL,                 st_C_struct},
2378       {"class",         0,                      st_C_class},
2379       {"while",         0,                      st_C_ignore},
2380       {"undef",         0,                      st_C_define},
2381       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2382       {"__attribute__", 0,                      st_C_attribute},
2383       {"SYSCALL",       0,                      st_C_gnumacro},
2384       {"ENTRY",         0,                      st_C_gnumacro},
2385       {"PSEUDO",                0,                      st_C_gnumacro},
2386       {"DEFUN",         0,                      st_C_gnumacro}
2387     };
2388
2389   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2390     {
2391       int key = hash (str, len);
2392
2393       if (key <= MAX_HASH_VALUE && key >= 0)
2394         {
2395           const char *s = wordlist[key].name;
2396
2397           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2398             return &wordlist[key];
2399         }
2400     }
2401   return 0;
2402 }
2403 /*%>*/
2404
2405 static enum sym_type
2406 C_symtype (char *str, int len, int c_ext)
2407 {
2408   register struct C_stab_entry *se = in_word_set (str, len);
2409
2410   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2411     return st_none;
2412   return se->type;
2413 }
2414
2415 \f
2416 /*
2417  * Ignoring __attribute__ ((list))
2418  */
2419 static bool inattribute;        /* looking at an __attribute__ construct */
2420
2421 /*
2422  * C functions and variables are recognized using a simple
2423  * finite automaton.  fvdef is its state variable.
2424  */
2425 static enum
2426 {
2427   fvnone,                       /* nothing seen */
2428   fdefunkey,                    /* Emacs DEFUN keyword seen */
2429   fdefunname,                   /* Emacs DEFUN name seen */
2430   foperator,                    /* func: operator keyword seen (cplpl) */
2431   fvnameseen,                   /* function or variable name seen */
2432   fstartlist,                   /* func: just after open parenthesis */
2433   finlist,                      /* func: in parameter list */
2434   flistseen,                    /* func: after parameter list */
2435   fignore,                      /* func: before open brace */
2436   vignore                       /* var-like: ignore until ';' */
2437 } fvdef;
2438
2439 static bool fvextern;           /* func or var: extern keyword seen; */
2440
2441 /*
2442  * typedefs are recognized using a simple finite automaton.
2443  * typdef is its state variable.
2444  */
2445 static enum
2446 {
2447   tnone,                        /* nothing seen */
2448   tkeyseen,                     /* typedef keyword seen */
2449   ttypeseen,                    /* defined type seen */
2450   tinbody,                      /* inside typedef body */
2451   tend,                         /* just before typedef tag */
2452   tignore                       /* junk after typedef tag */
2453 } typdef;
2454
2455 /*
2456  * struct-like structures (enum, struct and union) are recognized
2457  * using another simple finite automaton.  `structdef' is its state
2458  * variable.
2459  */
2460 static enum
2461 {
2462   snone,                        /* nothing seen yet,
2463                                    or in struct body if bracelev > 0 */
2464   skeyseen,                     /* struct-like keyword seen */
2465   stagseen,                     /* struct-like tag seen */
2466   scolonseen                    /* colon seen after struct-like tag */
2467 } structdef;
2468
2469 /*
2470  * When objdef is different from onone, objtag is the name of the class.
2471  */
2472 static const char *objtag = "<uninited>";
2473
2474 /*
2475  * Yet another little state machine to deal with preprocessor lines.
2476  */
2477 static enum
2478 {
2479   dnone,                        /* nothing seen */
2480   dsharpseen,                   /* '#' seen as first char on line */
2481   ddefineseen,                  /* '#' and 'define' seen */
2482   dignorerest                   /* ignore rest of line */
2483 } definedef;
2484
2485 /*
2486  * State machine for Objective C protocols and implementations.
2487  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2488  */
2489 static enum
2490 {
2491   onone,                        /* nothing seen */
2492   oprotocol,                    /* @interface or @protocol seen */
2493   oimplementation,              /* @implementations seen */
2494   otagseen,                     /* class name seen */
2495   oparenseen,                   /* parenthesis before category seen */
2496   ocatseen,                     /* category name seen */
2497   oinbody,                      /* in @implementation body */
2498   omethodsign,                  /* in @implementation body, after +/- */
2499   omethodtag,                   /* after method name */
2500   omethodcolon,                 /* after method colon */
2501   omethodparm,                  /* after method parameter */
2502   oignore                       /* wait for @end */
2503 } objdef;
2504
2505
2506 /*
2507  * Use this structure to keep info about the token read, and how it
2508  * should be tagged.  Used by the make_C_tag function to build a tag.
2509  */
2510 static struct tok
2511 {
2512   char *line;                   /* string containing the token */
2513   int offset;                   /* where the token starts in LINE */
2514   int length;                   /* token length */
2515   /*
2516     The previous members can be used to pass strings around for generic
2517     purposes.  The following ones specifically refer to creating tags.  In this
2518     case the token contained here is the pattern that will be used to create a
2519     tag.
2520   */
2521   bool valid;                   /* do not create a tag; the token should be
2522                                    invalidated whenever a state machine is
2523                                    reset prematurely */
2524   bool named;                   /* create a named tag */
2525   int lineno;                   /* source line number of tag */
2526   long linepos;                 /* source char number of tag */
2527 } token;                        /* latest token read */
2528
2529 /*
2530  * Variables and functions for dealing with nested structures.
2531  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2532  */
2533 static void pushclass_above (int, char *, int);
2534 static void popclass_above (int);
2535 static void write_classname (linebuffer *, const char *qualifier);
2536
2537 static struct {
2538   char **cname;                 /* nested class names */
2539   int *bracelev;                /* nested class brace level */
2540   int nl;                       /* class nesting level (elements used) */
2541   int size;                     /* length of the array */
2542 } cstack;                       /* stack for nested declaration tags */
2543 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2544 #define nestlev         (cstack.nl)
2545 /* After struct keyword or in struct body, not inside a nested function. */
2546 #define instruct        (structdef == snone && nestlev > 0                      \
2547                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2548
2549 static void
2550 pushclass_above (int bracelev, char *str, int len)
2551 {
2552   int nl;
2553
2554   popclass_above (bracelev);
2555   nl = cstack.nl;
2556   if (nl >= cstack.size)
2557     {
2558       int size = cstack.size *= 2;
2559       xrnew (cstack.cname, size, char *);
2560       xrnew (cstack.bracelev, size, int);
2561     }
2562   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2563   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2564   cstack.bracelev[nl] = bracelev;
2565   cstack.nl = nl + 1;
2566 }
2567
2568 static void
2569 popclass_above (int bracelev)
2570 {
2571   int nl;
2572
2573   for (nl = cstack.nl - 1;
2574        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2575        nl--)
2576     {
2577       free (cstack.cname[nl]);
2578       cstack.nl = nl;
2579     }
2580 }
2581
2582 static void
2583 write_classname (linebuffer *cn, const char *qualifier)
2584 {
2585   int i, len;
2586   int qlen = strlen (qualifier);
2587
2588   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2589     {
2590       len = 0;
2591       cn->len = 0;
2592       cn->buffer[0] = '\0';
2593     }
2594   else
2595     {
2596       len = strlen (cstack.cname[0]);
2597       linebuffer_setlen (cn, len);
2598       strcpy (cn->buffer, cstack.cname[0]);
2599     }
2600   for (i = 1; i < cstack.nl; i++)
2601     {
2602       char *s = cstack.cname[i];
2603       if (s == NULL)
2604         continue;
2605       linebuffer_setlen (cn, len + qlen + strlen (s));
2606       len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2607     }
2608 }
2609
2610 \f
2611 static bool consider_token (char *, int, int, int *, int, int, bool *);
2612 static void make_C_tag (bool);
2613
2614 /*
2615  * consider_token ()
2616  *      checks to see if the current token is at the start of a
2617  *      function or variable, or corresponds to a typedef, or
2618  *      is a struct/union/enum tag, or #define, or an enum constant.
2619  *
2620  *      *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2621  *      with args.  C_EXTP points to which language we are looking at.
2622  *
2623  * Globals
2624  *      fvdef                   IN OUT
2625  *      structdef               IN OUT
2626  *      definedef               IN OUT
2627  *      typdef                  IN OUT
2628  *      objdef                  IN OUT
2629  */
2630
2631 static bool
2632 consider_token (char *str, int len, int c, int *c_extp,
2633                 int bracelev, int parlev, bool *is_func_or_var)
2634                                 /* IN: token pointer */
2635                                 /* IN: token length */
2636                                 /* IN: first char after the token */
2637                                 /* IN, OUT: C extensions mask */
2638                                 /* IN: brace level */
2639                                 /* IN: parenthesis level */
2640                                 /* OUT: function or variable found */
2641 {
2642   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2643      structtype is the type of the preceding struct-like keyword, and
2644      structbracelev is the brace level where it has been seen. */
2645   static enum sym_type structtype;
2646   static int structbracelev;
2647   static enum sym_type toktype;
2648
2649
2650   toktype = C_symtype (str, len, *c_extp);
2651
2652   /*
2653    * Skip __attribute__
2654    */
2655   if (toktype == st_C_attribute)
2656     {
2657       inattribute = true;
2658       return false;
2659      }
2660
2661    /*
2662     * Advance the definedef state machine.
2663     */
2664    switch (definedef)
2665      {
2666      case dnone:
2667        /* We're not on a preprocessor line. */
2668        if (toktype == st_C_gnumacro)
2669          {
2670            fvdef = fdefunkey;
2671            return false;
2672          }
2673        break;
2674      case dsharpseen:
2675        if (toktype == st_C_define)
2676          {
2677            definedef = ddefineseen;
2678          }
2679        else
2680          {
2681            definedef = dignorerest;
2682          }
2683        return false;
2684      case ddefineseen:
2685        /*
2686         * Make a tag for any macro, unless it is a constant
2687         * and constantypedefs is false.
2688         */
2689        definedef = dignorerest;
2690        *is_func_or_var = (c == '(');
2691        if (!*is_func_or_var && !constantypedefs)
2692          return false;
2693        else
2694          return true;
2695      case dignorerest:
2696        return false;
2697      default:
2698        error ("internal error: definedef value.");
2699      }
2700
2701    /*
2702     * Now typedefs
2703     */
2704    switch (typdef)
2705      {
2706      case tnone:
2707        if (toktype == st_C_typedef)
2708          {
2709            if (typedefs)
2710              typdef = tkeyseen;
2711            fvextern = false;
2712            fvdef = fvnone;
2713            return false;
2714          }
2715        break;
2716      case tkeyseen:
2717        switch (toktype)
2718          {
2719          case st_none:
2720          case st_C_class:
2721          case st_C_struct:
2722          case st_C_enum:
2723            typdef = ttypeseen;
2724          }
2725        break;
2726      case ttypeseen:
2727        if (structdef == snone && fvdef == fvnone)
2728          {
2729            fvdef = fvnameseen;
2730            return true;
2731          }
2732        break;
2733      case tend:
2734        switch (toktype)
2735          {
2736          case st_C_class:
2737          case st_C_struct:
2738          case st_C_enum:
2739            return false;
2740          }
2741        return true;
2742      }
2743
2744    switch (toktype)
2745      {
2746      case st_C_javastruct:
2747        if (structdef == stagseen)
2748          structdef = scolonseen;
2749        return false;
2750      case st_C_template:
2751      case st_C_class:
2752        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2753            && bracelev == 0
2754            && definedef == dnone && structdef == snone
2755            && typdef == tnone && fvdef == fvnone)
2756          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2757        if (toktype == st_C_template)
2758          break;
2759        /* FALLTHRU */
2760      case st_C_struct:
2761      case st_C_enum:
2762        if (parlev == 0
2763            && fvdef != vignore
2764            && (typdef == tkeyseen
2765                || (typedefs_or_cplusplus && structdef == snone)))
2766          {
2767            structdef = skeyseen;
2768            structtype = toktype;
2769            structbracelev = bracelev;
2770            if (fvdef == fvnameseen)
2771              fvdef = fvnone;
2772          }
2773        return false;
2774      }
2775
2776    if (structdef == skeyseen)
2777      {
2778        structdef = stagseen;
2779        return true;
2780      }
2781
2782    if (typdef != tnone)
2783      definedef = dnone;
2784
2785    /* Detect Objective C constructs. */
2786    switch (objdef)
2787      {
2788      case onone:
2789        switch (toktype)
2790          {
2791          case st_C_objprot:
2792            objdef = oprotocol;
2793            return false;
2794          case st_C_objimpl:
2795            objdef = oimplementation;
2796            return false;
2797          }
2798        break;
2799      case oimplementation:
2800        /* Save the class tag for functions or variables defined inside. */
2801        objtag = savenstr (str, len);
2802        objdef = oinbody;
2803        return false;
2804      case oprotocol:
2805        /* Save the class tag for categories. */
2806        objtag = savenstr (str, len);
2807        objdef = otagseen;
2808        *is_func_or_var = true;
2809        return true;
2810      case oparenseen:
2811        objdef = ocatseen;
2812        *is_func_or_var = true;
2813        return true;
2814      case oinbody:
2815        break;
2816      case omethodsign:
2817        if (parlev == 0)
2818          {
2819            fvdef = fvnone;
2820            objdef = omethodtag;
2821            linebuffer_setlen (&token_name, len);
2822            memcpy (token_name.buffer, str, len);
2823            token_name.buffer[len] = '\0';
2824            return true;
2825          }
2826        return false;
2827      case omethodcolon:
2828        if (parlev == 0)
2829          objdef = omethodparm;
2830        return false;
2831      case omethodparm:
2832        if (parlev == 0)
2833          {
2834            int oldlen = token_name.len;
2835            fvdef = fvnone;
2836            objdef = omethodtag;
2837            linebuffer_setlen (&token_name, oldlen + len);
2838            memcpy (token_name.buffer + oldlen, str, len);
2839            token_name.buffer[oldlen + len] = '\0';
2840            return true;
2841          }
2842        return false;
2843      case oignore:
2844        if (toktype == st_C_objend)
2845          {
2846            /* Memory leakage here: the string pointed by objtag is
2847               never released, because many tests would be needed to
2848               avoid breaking on incorrect input code.  The amount of
2849               memory leaked here is the sum of the lengths of the
2850               class tags.
2851            free (objtag); */
2852            objdef = onone;
2853          }
2854        return false;
2855      }
2856
2857    /* A function, variable or enum constant? */
2858    switch (toktype)
2859      {
2860      case st_C_extern:
2861        fvextern = true;
2862        switch  (fvdef)
2863          {
2864          case finlist:
2865          case flistseen:
2866          case fignore:
2867          case vignore:
2868            break;
2869          default:
2870            fvdef = fvnone;
2871          }
2872        return false;
2873      case st_C_ignore:
2874        fvextern = false;
2875        fvdef = vignore;
2876        return false;
2877      case st_C_operator:
2878        fvdef = foperator;
2879        *is_func_or_var = true;
2880        return true;
2881      case st_none:
2882        if (constantypedefs
2883            && structdef == snone
2884            && structtype == st_C_enum && bracelev > structbracelev
2885            /* Don't tag tokens in expressions that assign values to enum
2886               constants.  */
2887            && fvdef != vignore)
2888          return true;           /* enum constant */
2889        switch (fvdef)
2890          {
2891          case fdefunkey:
2892            if (bracelev > 0)
2893              break;
2894            fvdef = fdefunname;  /* GNU macro */
2895            *is_func_or_var = true;
2896            return true;
2897          case fvnone:
2898            switch (typdef)
2899              {
2900              case ttypeseen:
2901                return false;
2902              case tnone:
2903                if ((strneq (str, "asm", 3) && endtoken (str[3]))
2904                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2905                  {
2906                    fvdef = vignore;
2907                    return false;
2908                  }
2909                break;
2910              }
2911           /* FALLTHRU */
2912           case fvnameseen:
2913           if (len >= 10 && strneq (str+len-10, "::operator", 10))
2914             {
2915               if (*c_extp & C_AUTO) /* automatic detection of C++ */
2916                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2917               fvdef = foperator;
2918               *is_func_or_var = true;
2919               return true;
2920             }
2921           if (bracelev > 0 && !instruct)
2922             break;
2923           fvdef = fvnameseen;   /* function or variable */
2924           *is_func_or_var = true;
2925           return true;
2926         }
2927       break;
2928     }
2929
2930   return false;
2931 }
2932
2933 \f
2934 /*
2935  * C_entries often keeps pointers to tokens or lines which are older than
2936  * the line currently read.  By keeping two line buffers, and switching
2937  * them at end of line, it is possible to use those pointers.
2938  */
2939 static struct
2940 {
2941   long linepos;
2942   linebuffer lb;
2943 } lbs[2];
2944
2945 #define current_lb_is_new (newndx == curndx)
2946 #define switch_line_buffers() (curndx = 1 - curndx)
2947
2948 #define curlb (lbs[curndx].lb)
2949 #define newlb (lbs[newndx].lb)
2950 #define curlinepos (lbs[curndx].linepos)
2951 #define newlinepos (lbs[newndx].linepos)
2952
2953 #define plainc ((c_ext & C_EXT) == C_PLAIN)
2954 #define cplpl (c_ext & C_PLPL)
2955 #define cjava ((c_ext & C_JAVA) == C_JAVA)
2956
2957 #define CNL_SAVE_DEFINEDEF()                                            \
2958 do {                                                                    \
2959   curlinepos = charno;                                                  \
2960   readline (&curlb, inf);                                               \
2961   lp = curlb.buffer;                                                    \
2962   quotednl = false;                                                     \
2963   newndx = curndx;                                                      \
2964 } while (0)
2965
2966 #define CNL()                                                           \
2967 do {                                                                    \
2968   CNL_SAVE_DEFINEDEF();                                                 \
2969   if (savetoken.valid)                                                  \
2970     {                                                                   \
2971       token = savetoken;                                                \
2972       savetoken.valid = false;                                          \
2973     }                                                                   \
2974   definedef = dnone;                                                    \
2975 } while (0)
2976
2977
2978 static void
2979 make_C_tag (bool isfun)
2980 {
2981   /* This function is never called when token.valid is false, but
2982      we must protect against invalid input or internal errors. */
2983   if (token.valid)
2984     make_tag (token_name.buffer, token_name.len, isfun, token.line,
2985               token.offset+token.length+1, token.lineno, token.linepos);
2986   else if (DEBUG)
2987     {                             /* this branch is optimized away if !DEBUG */
2988       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
2989                 token_name.len + 17, isfun, token.line,
2990                 token.offset+token.length+1, token.lineno, token.linepos);
2991       error ("INVALID TOKEN");
2992     }
2993
2994   token.valid = false;
2995 }
2996
2997
2998 /*
2999  * C_entries ()
3000  *      This routine finds functions, variables, typedefs,
3001  *      #define's, enum constants and struct/union/enum definitions in
3002  *      C syntax and adds them to the list.
3003  */
3004 static void
3005 C_entries (int c_ext, FILE *inf)
3006                                 /* extension of C */
3007                                 /* input file */
3008 {
3009   register char c;              /* latest char read; '\0' for end of line */
3010   register char *lp;            /* pointer one beyond the character `c' */
3011   int curndx, newndx;           /* indices for current and new lb */
3012   register int tokoff;          /* offset in line of start of current token */
3013   register int toklen;          /* length of current token */
3014   const char *qualifier;        /* string used to qualify names */
3015   int qlen;                     /* length of qualifier */
3016   int bracelev;                 /* current brace level */
3017   int bracketlev;               /* current bracket level */
3018   int parlev;                   /* current parenthesis level */
3019   int attrparlev;               /* __attribute__ parenthesis level */
3020   int templatelev;              /* current template level */
3021   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3022   bool incomm, inquote, inchar, quotednl, midtoken;
3023   bool yacc_rules;              /* in the rules part of a yacc file */
3024   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3025
3026
3027   linebuffer_init (&lbs[0].lb);
3028   linebuffer_init (&lbs[1].lb);
3029   if (cstack.size == 0)
3030     {
3031       cstack.size = (DEBUG) ? 1 : 4;
3032       cstack.nl = 0;
3033       cstack.cname = xnew (cstack.size, char *);
3034       cstack.bracelev = xnew (cstack.size, int);
3035     }
3036
3037   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3038   curndx = newndx = 0;
3039   lp = curlb.buffer;
3040   *lp = 0;
3041
3042   fvdef = fvnone; fvextern = false; typdef = tnone;
3043   structdef = snone; definedef = dnone; objdef = onone;
3044   yacc_rules = false;
3045   midtoken = inquote = inchar = incomm = quotednl = false;
3046   token.valid = savetoken.valid = false;
3047   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3048   if (cjava)
3049     { qualifier = "."; qlen = 1; }
3050   else
3051     { qualifier = "::"; qlen = 2; }
3052
3053
3054   while (!feof (inf))
3055     {
3056       c = *lp++;
3057       if (c == '\\')
3058         {
3059           /* If we are at the end of the line, the next character is a
3060              '\0'; do not skip it, because it is what tells us
3061              to read the next line.  */
3062           if (*lp == '\0')
3063             {
3064               quotednl = true;
3065               continue;
3066             }
3067           lp++;
3068           c = ' ';
3069         }
3070       else if (incomm)
3071         {
3072           switch (c)
3073             {
3074             case '*':
3075               if (*lp == '/')
3076                 {
3077                   c = *lp++;
3078                   incomm = false;
3079                 }
3080               break;
3081             case '\0':
3082               /* Newlines inside comments do not end macro definitions in
3083                  traditional cpp. */
3084               CNL_SAVE_DEFINEDEF ();
3085               break;
3086             }
3087           continue;
3088         }
3089       else if (inquote)
3090         {
3091           switch (c)
3092             {
3093             case '"':
3094               inquote = false;
3095               break;
3096             case '\0':
3097               /* Newlines inside strings do not end macro definitions
3098                  in traditional cpp, even though compilers don't
3099                  usually accept them. */
3100               CNL_SAVE_DEFINEDEF ();
3101               break;
3102             }
3103           continue;
3104         }
3105       else if (inchar)
3106         {
3107           switch (c)
3108             {
3109             case '\0':
3110               /* Hmmm, something went wrong. */
3111               CNL ();
3112               /* FALLTHRU */
3113             case '\'':
3114               inchar = false;
3115               break;
3116             }
3117           continue;
3118         }
3119       else switch (c)
3120         {
3121         case '"':
3122           inquote = true;
3123           if (bracketlev > 0)
3124             continue;
3125           if (inattribute)
3126             break;
3127           switch (fvdef)
3128             {
3129             case fdefunkey:
3130             case fstartlist:
3131             case finlist:
3132             case fignore:
3133             case vignore:
3134               break;
3135             default:
3136               fvextern = false;
3137               fvdef = fvnone;
3138             }
3139           continue;
3140         case '\'':
3141           inchar = true;
3142           if (bracketlev > 0)
3143             continue;
3144           if (inattribute)
3145             break;
3146           if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3147             {
3148               fvextern = false;
3149               fvdef = fvnone;
3150             }
3151           continue;
3152         case '/':
3153           if (*lp == '*')
3154             {
3155               incomm = true;
3156               lp++;
3157               c = ' ';
3158               if (bracketlev > 0)
3159                 continue;
3160             }
3161           else if (/* cplpl && */ *lp == '/')
3162             {
3163               c = '\0';
3164             }
3165           break;
3166         case '%':
3167           if ((c_ext & YACC) && *lp == '%')
3168             {
3169               /* Entering or exiting rules section in yacc file. */
3170               lp++;
3171               definedef = dnone; fvdef = fvnone; fvextern = false;
3172               typdef = tnone; structdef = snone;
3173               midtoken = inquote = inchar = incomm = quotednl = false;
3174               bracelev = 0;
3175               yacc_rules = !yacc_rules;
3176               continue;
3177             }
3178           else
3179             break;
3180         case '#':
3181           if (definedef == dnone)
3182             {
3183               char *cp;
3184               bool cpptoken = true;
3185
3186               /* Look back on this line.  If all blanks, or nonblanks
3187                  followed by an end of comment, this is a preprocessor
3188                  token. */
3189               for (cp = newlb.buffer; cp < lp-1; cp++)
3190                 if (!c_isspace (*cp))
3191                   {
3192                     if (*cp == '*' && cp[1] == '/')
3193                       {
3194                         cp++;
3195                         cpptoken = true;
3196                       }
3197                     else
3198                       cpptoken = false;
3199                   }
3200               if (cpptoken)
3201                 {
3202                   definedef = dsharpseen;
3203                   /* This is needed for tagging enum values: when there are
3204                      preprocessor conditionals inside the enum, we need to
3205                      reset the value of fvdef so that the next enum value is
3206                      tagged even though the one before it did not end in a
3207                      comma.  */
3208                   if (fvdef == vignore && instruct && parlev == 0)
3209                     {
3210                       if (strneq (cp, "#if", 3) || strneq (cp, "#el", 3))
3211                         fvdef = fvnone;
3212                     }
3213                 }
3214             } /* if (definedef == dnone) */
3215           continue;
3216         case '[':
3217           bracketlev++;
3218           continue;
3219         default:
3220           if (bracketlev > 0)
3221             {
3222               if (c == ']')
3223                 --bracketlev;
3224               else if (c == '\0')
3225                 CNL_SAVE_DEFINEDEF ();
3226               continue;
3227             }
3228           break;
3229         } /* switch (c) */
3230
3231
3232       /* Consider token only if some involved conditions are satisfied. */
3233       if (typdef != tignore
3234           && definedef != dignorerest
3235           && fvdef != finlist
3236           && templatelev == 0
3237           && (definedef != dnone
3238               || structdef != scolonseen)
3239           && !inattribute)
3240         {
3241           if (midtoken)
3242             {
3243               if (endtoken (c))
3244                 {
3245                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3246                     /* This handles :: in the middle,
3247                        but not at the beginning of an identifier.
3248                        Also, space-separated :: is not recognized. */
3249                     {
3250                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3251                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3252                       lp += 2;
3253                       toklen += 2;
3254                       c = lp[-1];
3255                       goto still_in_token;
3256                     }
3257                   else
3258                     {
3259                       bool funorvar = false;
3260
3261                       if (yacc_rules
3262                           || consider_token (newlb.buffer + tokoff, toklen, c,
3263                                              &c_ext, bracelev, parlev,
3264                                              &funorvar))
3265                         {
3266                           if (fvdef == foperator)
3267                             {
3268                               char *oldlp = lp;
3269                               lp = skip_spaces (lp-1);
3270                               if (*lp != '\0')
3271                                 lp += 1;
3272                               while (*lp != '\0'
3273                                      && !c_isspace (*lp) && *lp != '(')
3274                                 lp += 1;
3275                               c = *lp++;
3276                               toklen += lp - oldlp;
3277                             }
3278                           token.named = false;
3279                           if (!plainc
3280                               && nestlev > 0 && definedef == dnone)
3281                             /* in struct body */
3282                             {
3283                               int len;
3284                               write_classname (&token_name, qualifier);
3285                               len = token_name.len;
3286                               linebuffer_setlen (&token_name, len+qlen+toklen);
3287                               sprintf (token_name.buffer + len, "%s%.*s",
3288                                        qualifier, toklen, newlb.buffer + tokoff);
3289                               token.named = true;
3290                             }
3291                           else if (objdef == ocatseen)
3292                             /* Objective C category */
3293                             {
3294                               int len = strlen (objtag) + 2 + toklen;
3295                               linebuffer_setlen (&token_name, len);
3296                               sprintf (token_name.buffer, "%s(%.*s)",
3297                                        objtag, toklen, newlb.buffer + tokoff);
3298                               token.named = true;
3299                             }
3300                           else if (objdef == omethodtag
3301                                    || objdef == omethodparm)
3302                             /* Objective C method */
3303                             {
3304                               token.named = true;
3305                             }
3306                           else if (fvdef == fdefunname)
3307                             /* GNU DEFUN and similar macros */
3308                             {
3309                               bool defun = (newlb.buffer[tokoff] == 'F');
3310                               int off = tokoff;
3311                               int len = toklen;
3312
3313                               /* Rewrite the tag so that emacs lisp DEFUNs
3314                                  can be found by their elisp name */
3315                               if (defun)
3316                                 {
3317                                   off += 1;
3318                                   len -= 1;
3319                                 }
3320                               linebuffer_setlen (&token_name, len);
3321                               memcpy (token_name.buffer,
3322                                       newlb.buffer + off, len);
3323                               token_name.buffer[len] = '\0';
3324                               if (defun)
3325                                 while (--len >= 0)
3326                                   if (token_name.buffer[len] == '_')
3327                                     token_name.buffer[len] = '-';
3328                               token.named = defun;
3329                             }
3330                           else
3331                             {
3332                               linebuffer_setlen (&token_name, toklen);
3333                               memcpy (token_name.buffer,
3334                                       newlb.buffer + tokoff, toklen);
3335                               token_name.buffer[toklen] = '\0';
3336                               /* Name macros and members. */
3337                               token.named = (structdef == stagseen
3338                                              || typdef == ttypeseen
3339                                              || typdef == tend
3340                                              || (funorvar
3341                                                  && definedef == dignorerest)
3342                                              || (funorvar
3343                                                  && definedef == dnone
3344                                                  && structdef == snone
3345                                                  && bracelev > 0));
3346                             }
3347                           token.lineno = lineno;
3348                           token.offset = tokoff;
3349                           token.length = toklen;
3350                           token.line = newlb.buffer;
3351                           token.linepos = newlinepos;
3352                           token.valid = true;
3353
3354                           if (definedef == dnone
3355                               && (fvdef == fvnameseen
3356                                   || fvdef == foperator
3357                                   || structdef == stagseen
3358                                   || typdef == tend
3359                                   || typdef == ttypeseen
3360                                   || objdef != onone))
3361                             {
3362                               if (current_lb_is_new)
3363                                 switch_line_buffers ();
3364                             }
3365                           else if (definedef != dnone
3366                                    || fvdef == fdefunname
3367                                    || instruct)
3368                             make_C_tag (funorvar);
3369                         }
3370                       else /* not yacc and consider_token failed */
3371                         {
3372                           if (inattribute && fvdef == fignore)
3373                             {
3374                               /* We have just met __attribute__ after a
3375                                  function parameter list: do not tag the
3376                                  function again. */
3377                               fvdef = fvnone;
3378                             }
3379                         }
3380                       midtoken = false;
3381                     }
3382                 } /* if (endtoken (c)) */
3383               else if (intoken (c))
3384                 still_in_token:
3385                 {
3386                   toklen++;
3387                   continue;
3388                 }
3389             } /* if (midtoken) */
3390           else if (begtoken (c))
3391             {
3392               switch (definedef)
3393                 {
3394                 case dnone:
3395                   switch (fvdef)
3396                     {
3397                     case fstartlist:
3398                       /* This prevents tagging fb in
3399                          void (__attribute__((noreturn)) *fb) (void);
3400                          Fixing this is not easy and not very important. */
3401                       fvdef = finlist;
3402                       continue;
3403                     case flistseen:
3404                       if (plainc || declarations)
3405                         {
3406                           make_C_tag (true); /* a function */
3407                           fvdef = fignore;
3408                         }
3409                       break;
3410                     }
3411                   if (structdef == stagseen && !cjava)
3412                     {
3413                       popclass_above (bracelev);
3414                       structdef = snone;
3415                     }
3416                   break;
3417                 case dsharpseen:
3418                   savetoken = token;
3419                   break;
3420                 }
3421               if (!yacc_rules || lp == newlb.buffer + 1)
3422                 {
3423                   tokoff = lp - 1 - newlb.buffer;
3424                   toklen = 1;
3425                   midtoken = true;
3426                 }
3427               continue;
3428             } /* if (begtoken) */
3429         } /* if must look at token */
3430
3431
3432       /* Detect end of line, colon, comma, semicolon and various braces
3433          after having handled a token.*/
3434       switch (c)
3435         {
3436         case ':':
3437           if (inattribute)
3438             break;
3439           if (yacc_rules && token.offset == 0 && token.valid)
3440             {
3441               make_C_tag (false); /* a yacc function */
3442               break;
3443             }
3444           if (definedef != dnone)
3445             break;
3446           switch (objdef)
3447             {
3448             case  otagseen:
3449               objdef = oignore;
3450               make_C_tag (true); /* an Objective C class */
3451               break;
3452             case omethodtag:
3453             case omethodparm:
3454               objdef = omethodcolon;
3455               int toklen = token_name.len;
3456               linebuffer_setlen (&token_name, toklen + 1);
3457               strcpy (token_name.buffer + toklen, ":");
3458               break;
3459             }
3460           if (structdef == stagseen)
3461             {
3462               structdef = scolonseen;
3463               break;
3464             }
3465           /* Should be useless, but may be work as a safety net. */
3466           if (cplpl && fvdef == flistseen)
3467             {
3468               make_C_tag (true); /* a function */
3469               fvdef = fignore;
3470               break;
3471             }
3472           break;
3473         case ';':
3474           if (definedef != dnone || inattribute)
3475             break;
3476           switch (typdef)
3477             {
3478             case tend:
3479             case ttypeseen:
3480               make_C_tag (false); /* a typedef */
3481               typdef = tnone;
3482               fvdef = fvnone;
3483               break;
3484             case tnone:
3485             case tinbody:
3486             case tignore:
3487               switch (fvdef)
3488                 {
3489                 case fignore:
3490                   if (typdef == tignore || cplpl)
3491                     fvdef = fvnone;
3492                   break;
3493                 case fvnameseen:
3494                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3495                       || (members && instruct))
3496                     make_C_tag (false); /* a variable */
3497                   fvextern = false;
3498                   fvdef = fvnone;
3499                   token.valid = false;
3500                   break;
3501                 case flistseen:
3502                   if ((declarations
3503                        && (cplpl || !instruct)
3504                        && (typdef == tnone || (typdef != tignore && instruct)))
3505                       || (members
3506                           && plainc && instruct))
3507                     make_C_tag (true);  /* a function */
3508                   /* FALLTHRU */
3509                 default:
3510                   fvextern = false;
3511                   fvdef = fvnone;
3512                   if (declarations
3513                        && cplpl && structdef == stagseen)
3514                     make_C_tag (false); /* forward declaration */
3515                   else
3516                     token.valid = false;
3517                 } /* switch (fvdef) */
3518               /* FALLTHRU */
3519             default:
3520               if (!instruct)
3521                 typdef = tnone;
3522             }
3523           if (structdef == stagseen)
3524             structdef = snone;
3525           break;
3526         case ',':
3527           if (definedef != dnone || inattribute)
3528             break;
3529           switch (objdef)
3530             {
3531             case omethodtag:
3532             case omethodparm:
3533               make_C_tag (true); /* an Objective C method */
3534               objdef = oinbody;
3535               break;
3536             }
3537           switch (fvdef)
3538             {
3539             case fdefunkey:
3540             case foperator:
3541             case fstartlist:
3542             case finlist:
3543             case fignore:
3544               break;
3545             case vignore:
3546               if (instruct && parlev == 0)
3547                 fvdef = fvnone;
3548               break;
3549             case fdefunname:
3550               fvdef = fignore;
3551               break;
3552             case fvnameseen:
3553               if (parlev == 0
3554                   && ((globals
3555                        && bracelev == 0
3556                        && templatelev == 0
3557                        && (!fvextern || declarations))
3558                       || (members && instruct)))
3559                   make_C_tag (false); /* a variable */
3560               break;
3561             case flistseen:
3562               if ((declarations && typdef == tnone && !instruct)
3563                   || (members && typdef != tignore && instruct))
3564                 {
3565                   make_C_tag (true); /* a function */
3566                   fvdef = fvnameseen;
3567                 }
3568               else if (!declarations)
3569                 fvdef = fvnone;
3570               token.valid = false;
3571               break;
3572             default:
3573               fvdef = fvnone;
3574             }
3575           if (structdef == stagseen)
3576             structdef = snone;
3577           break;
3578         case ']':
3579           if (definedef != dnone || inattribute)
3580             break;
3581           if (structdef == stagseen)
3582             structdef = snone;
3583           switch (typdef)
3584             {
3585             case ttypeseen:
3586             case tend:
3587               typdef = tignore;
3588               make_C_tag (false);       /* a typedef */
3589               break;
3590             case tnone:
3591             case tinbody:
3592               switch (fvdef)
3593                 {
3594                 case foperator:
3595                 case finlist:
3596                 case fignore:
3597                 case vignore:
3598                   break;
3599                 case fvnameseen:
3600                   if ((members && bracelev == 1)
3601                       || (globals && bracelev == 0
3602                           && (!fvextern || declarations)))
3603                     make_C_tag (false); /* a variable */
3604                   /* FALLTHRU */
3605                 default:
3606                   fvdef = fvnone;
3607                 }
3608               break;
3609             }
3610           break;
3611         case '(':
3612           if (inattribute)
3613             {
3614               attrparlev++;
3615               break;
3616             }
3617           if (definedef != dnone)
3618             break;
3619           if (objdef == otagseen && parlev == 0)
3620             objdef = oparenseen;
3621           switch (fvdef)
3622             {
3623             case fvnameseen:
3624               if (typdef == ttypeseen
3625                   && *lp != '*'
3626                   && !instruct)
3627                 {
3628                   /* This handles constructs like:
3629                      typedef void OperatorFun (int fun); */
3630                   make_C_tag (false);
3631                   typdef = tignore;
3632                   fvdef = fignore;
3633                   break;
3634                 }
3635               /* FALLTHRU */
3636             case foperator:
3637               fvdef = fstartlist;
3638               break;
3639             case flistseen:
3640               fvdef = finlist;
3641               break;
3642             }
3643           parlev++;
3644           break;
3645         case ')':
3646           if (inattribute)
3647             {
3648               if (--attrparlev == 0)
3649                 inattribute = false;
3650               break;
3651             }
3652           if (definedef != dnone)
3653             break;
3654           if (objdef == ocatseen && parlev == 1)
3655             {
3656               make_C_tag (true); /* an Objective C category */
3657               objdef = oignore;
3658             }
3659           if (--parlev == 0)
3660             {
3661               switch (fvdef)
3662                 {
3663                 case fstartlist:
3664                 case finlist:
3665                   fvdef = flistseen;
3666                   break;
3667                 }
3668               if (!instruct
3669                   && (typdef == tend
3670                       || typdef == ttypeseen))
3671                 {
3672                   typdef = tignore;
3673                   make_C_tag (false); /* a typedef */
3674                 }
3675             }
3676           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3677             parlev = 0;
3678           break;
3679         case '{':
3680           if (definedef != dnone)
3681             break;
3682           if (typdef == ttypeseen)
3683             {
3684               /* Whenever typdef is set to tinbody (currently only
3685                  here), typdefbracelev should be set to bracelev. */
3686               typdef = tinbody;
3687               typdefbracelev = bracelev;
3688             }
3689           switch (fvdef)
3690             {
3691             case flistseen:
3692               make_C_tag (true);    /* a function */
3693               /* FALLTHRU */
3694             case fignore:
3695               fvdef = fvnone;
3696               break;
3697             case fvnone:
3698               switch (objdef)
3699                 {
3700                 case otagseen:
3701                   make_C_tag (true); /* an Objective C class */
3702                   objdef = oignore;
3703                   break;
3704                 case omethodtag:
3705                 case omethodparm:
3706                   make_C_tag (true); /* an Objective C method */
3707                   objdef = oinbody;
3708                   break;
3709                 default:
3710                   /* Neutralize `extern "C" {' grot. */
3711                   if (bracelev == 0 && structdef == snone && nestlev == 0
3712                       && typdef == tnone)
3713                     bracelev = -1;
3714                 }
3715               break;
3716             }
3717           switch (structdef)
3718             {
3719             case skeyseen:         /* unnamed struct */
3720               pushclass_above (bracelev, NULL, 0);
3721               structdef = snone;
3722               break;
3723             case stagseen:         /* named struct or enum */
3724             case scolonseen:       /* a class */
3725               pushclass_above (bracelev,token.line+token.offset, token.length);
3726               structdef = snone;
3727               make_C_tag (false);  /* a struct or enum */
3728               break;
3729             }
3730           bracelev += 1;
3731           break;
3732         case '*':
3733           if (definedef != dnone)
3734             break;
3735           if (fvdef == fstartlist)
3736             {
3737               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3738               token.valid = false;
3739             }
3740           break;
3741         case '}':
3742           if (definedef != dnone)
3743             break;
3744           bracelev -= 1;
3745           if (!ignoreindent && lp == newlb.buffer + 1)
3746             {
3747               if (bracelev != 0)
3748                 token.valid = false; /* unexpected value, token unreliable */
3749               bracelev = 0;     /* reset brace level if first column */
3750               parlev = 0;       /* also reset paren level, just in case... */
3751             }
3752           else if (bracelev < 0)
3753             {
3754               token.valid = false; /* something gone amiss, token unreliable */
3755               bracelev = 0;
3756             }
3757           if (bracelev == 0 && fvdef == vignore)
3758             fvdef = fvnone;             /* end of function */
3759           popclass_above (bracelev);
3760           structdef = snone;
3761           /* Only if typdef == tinbody is typdefbracelev significant. */
3762           if (typdef == tinbody && bracelev <= typdefbracelev)
3763             {
3764               assert (bracelev == typdefbracelev);
3765               typdef = tend;
3766             }
3767           break;
3768         case '=':
3769           if (definedef != dnone)
3770             break;
3771           switch (fvdef)
3772             {
3773             case foperator:
3774             case finlist:
3775             case fignore:
3776             case vignore:
3777               break;
3778             case fvnameseen:
3779               if ((members && bracelev == 1)
3780                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3781                 make_C_tag (false); /* a variable */
3782               /* FALLTHRU */
3783             default:
3784               fvdef = vignore;
3785             }
3786           break;
3787         case '<':
3788           if (cplpl
3789               && (structdef == stagseen || fvdef == fvnameseen))
3790             {
3791               templatelev++;
3792               break;
3793             }
3794           goto resetfvdef;
3795         case '>':
3796           if (templatelev > 0)
3797             {
3798               templatelev--;
3799               break;
3800             }
3801           goto resetfvdef;
3802         case '+':
3803         case '-':
3804           if (objdef == oinbody && bracelev == 0)
3805             {
3806               objdef = omethodsign;
3807               break;
3808             }
3809           /* FALLTHRU */
3810         resetfvdef:
3811         case '#': case '~': case '&': case '%': case '/':
3812         case '|': case '^': case '!': case '.': case '?':
3813           if (definedef != dnone)
3814             break;
3815           /* These surely cannot follow a function tag in C. */
3816           switch (fvdef)
3817             {
3818             case foperator:
3819             case finlist:
3820             case fignore:
3821             case vignore:
3822               break;
3823             default:
3824               fvdef = fvnone;
3825             }
3826           break;
3827         case '\0':
3828           if (objdef == otagseen)
3829             {
3830               make_C_tag (true); /* an Objective C class */
3831               objdef = oignore;
3832             }
3833           /* If a macro spans multiple lines don't reset its state. */
3834           if (quotednl)
3835             CNL_SAVE_DEFINEDEF ();
3836           else
3837             CNL ();
3838           break;
3839         } /* switch (c) */
3840
3841     } /* while not eof */
3842
3843   free (lbs[0].lb.buffer);
3844   free (lbs[1].lb.buffer);
3845 }
3846
3847 /*
3848  * Process either a C++ file or a C file depending on the setting
3849  * of a global flag.
3850  */
3851 static void
3852 default_C_entries (FILE *inf)
3853 {
3854   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3855 }
3856
3857 /* Always do plain C. */
3858 static void
3859 plain_C_entries (FILE *inf)
3860 {
3861   C_entries (0, inf);
3862 }
3863
3864 /* Always do C++. */
3865 static void
3866 Cplusplus_entries (FILE *inf)
3867 {
3868   C_entries (C_PLPL, inf);
3869 }
3870
3871 /* Always do Java. */
3872 static void
3873 Cjava_entries (FILE *inf)
3874 {
3875   C_entries (C_JAVA, inf);
3876 }
3877
3878 /* Always do C*. */
3879 static void
3880 Cstar_entries (FILE *inf)
3881 {
3882   C_entries (C_STAR, inf);
3883 }
3884
3885 /* Always do Yacc. */
3886 static void
3887 Yacc_entries (FILE *inf)
3888 {
3889   C_entries (YACC, inf);
3890 }
3891
3892 \f
3893 /* Useful macros. */
3894 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
3895   for (;                        /* loop initialization */               \
3896        !feof (file_pointer)     /* loop test */                         \
3897        &&                       /* instructions at start of loop */     \
3898           (readline (&line_buffer, file_pointer),                       \
3899            char_pointer = line_buffer.buffer,                           \
3900            true);                                                       \
3901       )
3902
3903 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
3904   ((assert ("" kw), true)   /* syntax error if not a literal string */  \
3905    && strneq ((cp), kw, sizeof (kw)-1)          /* cp points at kw */   \
3906    && notinname ((cp)[sizeof (kw)-1])           /* end of kw */         \
3907    && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
3908
3909 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3910 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3911   ((assert ("" kw), true) /* syntax error if not a literal string */    \
3912    && strncaseeq ((cp), kw, sizeof (kw)-1)      /* cp points at kw */   \
3913    && ((cp) += sizeof (kw)-1))                  /* skip spaces */
3914
3915 /*
3916  * Read a file, but do no processing.  This is used to do regexp
3917  * matching on files that have no language defined.
3918  */
3919 static void
3920 just_read_file (FILE *inf)
3921 {
3922   while (!feof (inf))
3923     readline (&lb, inf);
3924 }
3925
3926 \f
3927 /* Fortran parsing */
3928
3929 static void F_takeprec (void);
3930 static void F_getit (FILE *);
3931
3932 static void
3933 F_takeprec (void)
3934 {
3935   dbp = skip_spaces (dbp);
3936   if (*dbp != '*')
3937     return;
3938   dbp++;
3939   dbp = skip_spaces (dbp);
3940   if (strneq (dbp, "(*)", 3))
3941     {
3942       dbp += 3;
3943       return;
3944     }
3945   if (!c_isdigit (*dbp))
3946     {
3947       --dbp;                    /* force failure */
3948       return;
3949     }
3950   do
3951     dbp++;
3952   while (c_isdigit (*dbp));
3953 }
3954
3955 static void
3956 F_getit (FILE *inf)
3957 {
3958   register char *cp;
3959
3960   dbp = skip_spaces (dbp);
3961   if (*dbp == '\0')
3962     {
3963       readline (&lb, inf);
3964       dbp = lb.buffer;
3965       if (dbp[5] != '&')
3966         return;
3967       dbp += 6;
3968       dbp = skip_spaces (dbp);
3969     }
3970   if (!c_isalpha (*dbp) && *dbp != '_' && *dbp != '$')
3971     return;
3972   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3973     continue;
3974   make_tag (dbp, cp-dbp, true,
3975             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3976 }
3977
3978
3979 static void
3980 Fortran_functions (FILE *inf)
3981 {
3982   LOOP_ON_INPUT_LINES (inf, lb, dbp)
3983     {
3984       if (*dbp == '%')
3985         dbp++;                  /* Ratfor escape to fortran */
3986       dbp = skip_spaces (dbp);
3987       if (*dbp == '\0')
3988         continue;
3989
3990       if (LOOKING_AT_NOCASE (dbp, "recursive"))
3991         dbp = skip_spaces (dbp);
3992
3993       if (LOOKING_AT_NOCASE (dbp, "pure"))
3994         dbp = skip_spaces (dbp);
3995
3996       if (LOOKING_AT_NOCASE (dbp, "elemental"))
3997         dbp = skip_spaces (dbp);
3998
3999       switch (c_tolower (*dbp))
4000         {
4001         case 'i':
4002           if (nocase_tail ("integer"))
4003             F_takeprec ();
4004           break;
4005         case 'r':
4006           if (nocase_tail ("real"))
4007             F_takeprec ();
4008           break;
4009         case 'l':
4010           if (nocase_tail ("logical"))
4011             F_takeprec ();
4012           break;
4013         case 'c':
4014           if (nocase_tail ("complex") || nocase_tail ("character"))
4015             F_takeprec ();
4016           break;
4017         case 'd':
4018           if (nocase_tail ("double"))
4019             {
4020               dbp = skip_spaces (dbp);
4021               if (*dbp == '\0')
4022                 continue;
4023               if (nocase_tail ("precision"))
4024                 break;
4025               continue;
4026             }
4027           break;
4028         }
4029       dbp = skip_spaces (dbp);
4030       if (*dbp == '\0')
4031         continue;
4032       switch (c_tolower (*dbp))
4033         {
4034         case 'f':
4035           if (nocase_tail ("function"))
4036             F_getit (inf);
4037           continue;
4038         case 's':
4039           if (nocase_tail ("subroutine"))
4040             F_getit (inf);
4041           continue;
4042         case 'e':
4043           if (nocase_tail ("entry"))
4044             F_getit (inf);
4045           continue;
4046         case 'b':
4047           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4048             {
4049               dbp = skip_spaces (dbp);
4050               if (*dbp == '\0') /* assume un-named */
4051                 make_tag ("blockdata", 9, true,
4052                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4053               else
4054                 F_getit (inf);  /* look for name */
4055             }
4056           continue;
4057         }
4058     }
4059 }
4060
4061 \f
4062 /*
4063  * Ada parsing
4064  * Original code by
4065  * Philippe Waroquiers (1998)
4066  */
4067
4068 /* Once we are positioned after an "interesting" keyword, let's get
4069    the real tag value necessary. */
4070 static void
4071 Ada_getit (FILE *inf, const char *name_qualifier)
4072 {
4073   register char *cp;
4074   char *name;
4075   char c;
4076
4077   while (!feof (inf))
4078     {
4079       dbp = skip_spaces (dbp);
4080       if (*dbp == '\0'
4081           || (dbp[0] == '-' && dbp[1] == '-'))
4082         {
4083           readline (&lb, inf);
4084           dbp = lb.buffer;
4085         }
4086       switch (c_tolower (*dbp))
4087         {
4088         case 'b':
4089           if (nocase_tail ("body"))
4090             {
4091               /* Skipping body of   procedure body   or   package body or ....
4092                  resetting qualifier to body instead of spec. */
4093               name_qualifier = "/b";
4094               continue;
4095             }
4096           break;
4097         case 't':
4098           /* Skipping type of   task type   or   protected type ... */
4099           if (nocase_tail ("type"))
4100             continue;
4101           break;
4102         }
4103       if (*dbp == '"')
4104         {
4105           dbp += 1;
4106           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4107             continue;
4108         }
4109       else
4110         {
4111           dbp = skip_spaces (dbp);
4112           for (cp = dbp;
4113                c_isalnum (*cp) || *cp == '_' || *cp == '.';
4114                cp++)
4115             continue;
4116           if (cp == dbp)
4117             return;
4118         }
4119       c = *cp;
4120       *cp = '\0';
4121       name = concat (dbp, name_qualifier, "");
4122       *cp = c;
4123       make_tag (name, strlen (name), true,
4124                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4125       free (name);
4126       if (c == '"')
4127         dbp = cp + 1;
4128       return;
4129     }
4130 }
4131
4132 static void
4133 Ada_funcs (FILE *inf)
4134 {
4135   bool inquote = false;
4136   bool skip_till_semicolumn = false;
4137
4138   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4139     {
4140       while (*dbp != '\0')
4141         {
4142           /* Skip a string i.e. "abcd". */
4143           if (inquote || (*dbp == '"'))
4144             {
4145               dbp = strchr (dbp + !inquote, '"');
4146               if (dbp != NULL)
4147                 {
4148                   inquote = false;
4149                   dbp += 1;
4150                   continue;     /* advance char */
4151                 }
4152               else
4153                 {
4154                   inquote = true;
4155                   break;        /* advance line */
4156                 }
4157             }
4158
4159           /* Skip comments. */
4160           if (dbp[0] == '-' && dbp[1] == '-')
4161             break;              /* advance line */
4162
4163           /* Skip character enclosed in single quote i.e. 'a'
4164              and skip single quote starting an attribute i.e. 'Image. */
4165           if (*dbp == '\'')
4166             {
4167               dbp++ ;
4168               if (*dbp != '\0')
4169                 dbp++;
4170               continue;
4171             }
4172
4173           if (skip_till_semicolumn)
4174             {
4175               if (*dbp == ';')
4176                 skip_till_semicolumn = false;
4177               dbp++;
4178               continue;         /* advance char */
4179             }
4180
4181           /* Search for beginning of a token.  */
4182           if (!begtoken (*dbp))
4183             {
4184               dbp++;
4185               continue;         /* advance char */
4186             }
4187
4188           /* We are at the beginning of a token. */
4189           switch (c_tolower (*dbp))
4190             {
4191             case 'f':
4192               if (!packages_only && nocase_tail ("function"))
4193                 Ada_getit (inf, "/f");
4194               else
4195                 break;          /* from switch */
4196               continue;         /* advance char */
4197             case 'p':
4198               if (!packages_only && nocase_tail ("procedure"))
4199                 Ada_getit (inf, "/p");
4200               else if (nocase_tail ("package"))
4201                 Ada_getit (inf, "/s");
4202               else if (nocase_tail ("protected")) /* protected type */
4203                 Ada_getit (inf, "/t");
4204               else
4205                 break;          /* from switch */
4206               continue;         /* advance char */
4207
4208             case 'u':
4209               if (typedefs && !packages_only && nocase_tail ("use"))
4210                 {
4211                   /* when tagging types, avoid tagging  use type Pack.Typename;
4212                      for this, we will skip everything till a ; */
4213                   skip_till_semicolumn = true;
4214                   continue;     /* advance char */
4215                 }
4216
4217             case 't':
4218               if (!packages_only && nocase_tail ("task"))
4219                 Ada_getit (inf, "/k");
4220               else if (typedefs && !packages_only && nocase_tail ("type"))
4221                 {
4222                   Ada_getit (inf, "/t");
4223                   while (*dbp != '\0')
4224                     dbp += 1;
4225                 }
4226               else
4227                 break;          /* from switch */
4228               continue;         /* advance char */
4229             }
4230
4231           /* Look for the end of the token. */
4232           while (!endtoken (*dbp))
4233             dbp++;
4234
4235         } /* advance char */
4236     } /* advance line */
4237 }
4238
4239 \f
4240 /*
4241  * Unix and microcontroller assembly tag handling
4242  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4243  * Idea by Bob Weiner, Motorola Inc. (1994)
4244  */
4245 static void
4246 Asm_labels (FILE *inf)
4247 {
4248   register char *cp;
4249
4250   LOOP_ON_INPUT_LINES (inf, lb, cp)
4251     {
4252       /* If first char is alphabetic or one of [_.$], test for colon
4253          following identifier. */
4254       if (c_isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4255         {
4256           /* Read past label. */
4257           cp++;
4258           while (c_isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4259             cp++;
4260           if (*cp == ':' || c_isspace (*cp))
4261             /* Found end of label, so copy it and add it to the table. */
4262             make_tag (lb.buffer, cp - lb.buffer, true,
4263                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4264         }
4265     }
4266 }
4267
4268 \f
4269 /*
4270  * Perl support
4271  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4272  *                 /^use constant[ \t\n]+[^ \t\n{=,;]+/
4273  * Perl variable names: /^(my|local).../
4274  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4275  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4276  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4277  */
4278 static void
4279 Perl_functions (FILE *inf)
4280 {
4281   char *package = savestr ("main"); /* current package name */
4282   register char *cp;
4283
4284   LOOP_ON_INPUT_LINES (inf, lb, cp)
4285     {
4286       cp = skip_spaces (cp);
4287
4288       if (LOOKING_AT (cp, "package"))
4289         {
4290           free (package);
4291           get_tag (cp, &package);
4292         }
4293       else if (LOOKING_AT (cp, "sub"))
4294         {
4295           char *pos, *sp;
4296
4297         subr:
4298           sp = cp;
4299           while (!notinname (*cp))
4300             cp++;
4301           if (cp == sp)
4302             continue;           /* nothing found */
4303           pos = strchr (sp, ':');
4304           if (pos && pos < cp && pos[1] == ':')
4305             /* The name is already qualified. */
4306             make_tag (sp, cp - sp, true,
4307                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4308           else
4309             /* Qualify it. */
4310             {
4311               char savechar, *name;
4312
4313               savechar = *cp;
4314               *cp = '\0';
4315               name = concat (package, "::", sp);
4316               *cp = savechar;
4317               make_tag (name, strlen (name), true,
4318                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4319               free (name);
4320             }
4321         }
4322       else if (LOOKING_AT (cp, "use constant")
4323                || LOOKING_AT (cp, "use constant::defer"))
4324         {
4325           /* For hash style multi-constant like
4326                 use constant { FOO => 123,
4327                                BAR => 456 };
4328              only the first FOO is picked up.  Parsing across the value
4329              expressions would be difficult in general, due to possible nested
4330              hashes, here-documents, etc.  */
4331           if (*cp == '{')
4332             cp = skip_spaces (cp+1);
4333           goto subr;
4334         }
4335       else if (globals) /* only if we are tagging global vars */
4336         {
4337           /* Skip a qualifier, if any. */
4338           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4339           /* After "my" or "local", but before any following paren or space. */
4340           char *varstart = cp;
4341
4342           if (qual              /* should this be removed?  If yes, how? */
4343               && (*cp == '$' || *cp == '@' || *cp == '%'))
4344             {
4345               varstart += 1;
4346               do
4347                 cp++;
4348               while (c_isalnum (*cp) || *cp == '_');
4349             }
4350           else if (qual)
4351             {
4352               /* Should be examining a variable list at this point;
4353                  could insist on seeing an open parenthesis. */
4354               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4355                 cp++;
4356             }
4357           else
4358             continue;
4359
4360           make_tag (varstart, cp - varstart, false,
4361                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4362         }
4363     }
4364   free (package);
4365 }
4366
4367
4368 /*
4369  * Python support
4370  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4371  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4372  * More ideas by seb bacon <seb@jamkit.com> (2002)
4373  */
4374 static void
4375 Python_functions (FILE *inf)
4376 {
4377   register char *cp;
4378
4379   LOOP_ON_INPUT_LINES (inf, lb, cp)
4380     {
4381       cp = skip_spaces (cp);
4382       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4383         {
4384           char *name = cp;
4385           while (!notinname (*cp) && *cp != ':')
4386             cp++;
4387           make_tag (name, cp - name, true,
4388                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4389         }
4390     }
4391 }
4392
4393 \f
4394 /*
4395  * PHP support
4396  * Look for:
4397  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4398  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4399  *  - /^[ \t]*define\(\"[^\"]+/
4400  * Only with --members:
4401  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4402  * Idea by Diez B. Roggisch (2001)
4403  */
4404 static void
4405 PHP_functions (FILE *inf)
4406 {
4407   char *cp, *name;
4408   bool search_identifier = false;
4409
4410   LOOP_ON_INPUT_LINES (inf, lb, cp)
4411     {
4412       cp = skip_spaces (cp);
4413       name = cp;
4414       if (search_identifier
4415           && *cp != '\0')
4416         {
4417           while (!notinname (*cp))
4418             cp++;
4419           make_tag (name, cp - name, true,
4420                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4421           search_identifier = false;
4422         }
4423       else if (LOOKING_AT (cp, "function"))
4424         {
4425           if (*cp == '&')
4426             cp = skip_spaces (cp+1);
4427           if (*cp != '\0')
4428             {
4429               name = cp;
4430               while (!notinname (*cp))
4431                 cp++;
4432               make_tag (name, cp - name, true,
4433                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4434             }
4435           else
4436             search_identifier = true;
4437         }
4438       else if (LOOKING_AT (cp, "class"))
4439         {
4440           if (*cp != '\0')
4441             {
4442               name = cp;
4443               while (*cp != '\0' && !c_isspace (*cp))
4444                 cp++;
4445               make_tag (name, cp - name, false,
4446                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4447             }
4448           else
4449             search_identifier = true;
4450         }
4451       else if (strneq (cp, "define", 6)
4452                && (cp = skip_spaces (cp+6))
4453                && *cp++ == '('
4454                && (*cp == '"' || *cp == '\''))
4455         {
4456           char quote = *cp++;
4457           name = cp;
4458           while (*cp != quote && *cp != '\0')
4459             cp++;
4460           make_tag (name, cp - name, false,
4461                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4462         }
4463       else if (members
4464                && LOOKING_AT (cp, "var")
4465                && *cp == '$')
4466         {
4467           name = cp;
4468           while (!notinname (*cp))
4469             cp++;
4470           make_tag (name, cp - name, false,
4471                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4472         }
4473     }
4474 }
4475
4476 \f
4477 /*
4478  * Cobol tag functions
4479  * We could look for anything that could be a paragraph name.
4480  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4481  * Idea by Corny de Souza (1993)
4482  */
4483 static void
4484 Cobol_paragraphs (FILE *inf)
4485 {
4486   register char *bp, *ep;
4487
4488   LOOP_ON_INPUT_LINES (inf, lb, bp)
4489     {
4490       if (lb.len < 9)
4491         continue;
4492       bp += 8;
4493
4494       /* If eoln, compiler option or comment ignore whole line. */
4495       if (bp[-1] != ' ' || !c_isalnum (bp[0]))
4496         continue;
4497
4498       for (ep = bp; c_isalnum (*ep) || *ep == '-'; ep++)
4499         continue;
4500       if (*ep++ == '.')
4501         make_tag (bp, ep - bp, true,
4502                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4503     }
4504 }
4505
4506 \f
4507 /*
4508  * Makefile support
4509  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4510  */
4511 static void
4512 Makefile_targets (FILE *inf)
4513 {
4514   register char *bp;
4515
4516   LOOP_ON_INPUT_LINES (inf, lb, bp)
4517     {
4518       if (*bp == '\t' || *bp == '#')
4519         continue;
4520       while (*bp != '\0' && *bp != '=' && *bp != ':')
4521         bp++;
4522       if (*bp == ':' || (globals && *bp == '='))
4523         {
4524           /* We should detect if there is more than one tag, but we do not.
4525              We just skip initial and final spaces. */
4526           char * namestart = skip_spaces (lb.buffer);
4527           while (--bp > namestart)
4528             if (!notinname (*bp))
4529               break;
4530           make_tag (namestart, bp - namestart + 1, true,
4531                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4532         }
4533     }
4534 }
4535
4536 \f
4537 /*
4538  * Pascal parsing
4539  * Original code by Mosur K. Mohan (1989)
4540  *
4541  *  Locates tags for procedures & functions.  Doesn't do any type- or
4542  *  var-definitions.  It does look for the keyword "extern" or
4543  *  "forward" immediately following the procedure statement; if found,
4544  *  the tag is skipped.
4545  */
4546 static void
4547 Pascal_functions (FILE *inf)
4548 {
4549   linebuffer tline;             /* mostly copied from C_entries */
4550   long save_lcno;
4551   int save_lineno, namelen, taglen;
4552   char c, *name;
4553
4554   bool                          /* each of these flags is true if: */
4555     incomment,                  /* point is inside a comment */
4556     inquote,                    /* point is inside '..' string */
4557     get_tagname,                /* point is after PROCEDURE/FUNCTION
4558                                    keyword, so next item = potential tag */
4559     found_tag,                  /* point is after a potential tag */
4560     inparms,                    /* point is within parameter-list */
4561     verify_tag;                 /* point has passed the parm-list, so the
4562                                    next token will determine whether this
4563                                    is a FORWARD/EXTERN to be ignored, or
4564                                    whether it is a real tag */
4565
4566   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4567   name = NULL;                  /* keep compiler quiet */
4568   dbp = lb.buffer;
4569   *dbp = '\0';
4570   linebuffer_init (&tline);
4571
4572   incomment = inquote = false;
4573   found_tag = false;            /* have a proc name; check if extern */
4574   get_tagname = false;          /* found "procedure" keyword         */
4575   inparms = false;              /* found '(' after "proc"            */
4576   verify_tag = false;           /* check if "extern" is ahead        */
4577
4578
4579   while (!feof (inf))           /* long main loop to get next char */
4580     {
4581       c = *dbp++;
4582       if (c == '\0')            /* if end of line */
4583         {
4584           readline (&lb, inf);
4585           dbp = lb.buffer;
4586           if (*dbp == '\0')
4587             continue;
4588           if (!((found_tag && verify_tag)
4589                 || get_tagname))
4590             c = *dbp++;         /* only if don't need *dbp pointing
4591                                    to the beginning of the name of
4592                                    the procedure or function */
4593         }
4594       if (incomment)
4595         {
4596           if (c == '}')         /* within { } comments */
4597             incomment = false;
4598           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4599             {
4600               dbp++;
4601               incomment = false;
4602             }
4603           continue;
4604         }
4605       else if (inquote)
4606         {
4607           if (c == '\'')
4608             inquote = false;
4609           continue;
4610         }
4611       else
4612         switch (c)
4613           {
4614           case '\'':
4615             inquote = true;     /* found first quote */
4616             continue;
4617           case '{':             /* found open { comment */
4618             incomment = true;
4619             continue;
4620           case '(':
4621             if (*dbp == '*')    /* found open (* comment */
4622               {
4623                 incomment = true;
4624                 dbp++;
4625               }
4626             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4627               inparms = true;
4628             continue;
4629           case ')':             /* end of parms list */
4630             if (inparms)
4631               inparms = false;
4632             continue;
4633           case ';':
4634             if (found_tag && !inparms) /* end of proc or fn stmt */
4635               {
4636                 verify_tag = true;
4637                 break;
4638               }
4639             continue;
4640           }
4641       if (found_tag && verify_tag && (*dbp != ' '))
4642         {
4643           /* Check if this is an "extern" declaration. */
4644           if (*dbp == '\0')
4645             continue;
4646           if (c_tolower (*dbp) == 'e')
4647             {
4648               if (nocase_tail ("extern")) /* superfluous, really! */
4649                 {
4650                   found_tag = false;
4651                   verify_tag = false;
4652                 }
4653             }
4654           else if (c_tolower (*dbp) == 'f')
4655             {
4656               if (nocase_tail ("forward")) /* check for forward reference */
4657                 {
4658                   found_tag = false;
4659                   verify_tag = false;
4660                 }
4661             }
4662           if (found_tag && verify_tag) /* not external proc, so make tag */
4663             {
4664               found_tag = false;
4665               verify_tag = false;
4666               make_tag (name, namelen, true,
4667                         tline.buffer, taglen, save_lineno, save_lcno);
4668               continue;
4669             }
4670         }
4671       if (get_tagname)          /* grab name of proc or fn */
4672         {
4673           char *cp;
4674
4675           if (*dbp == '\0')
4676             continue;
4677
4678           /* Find block name. */
4679           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4680             continue;
4681
4682           /* Save all values for later tagging. */
4683           linebuffer_setlen (&tline, lb.len);
4684           strcpy (tline.buffer, lb.buffer);
4685           save_lineno = lineno;
4686           save_lcno = linecharno;
4687           name = tline.buffer + (dbp - lb.buffer);
4688           namelen = cp - dbp;
4689           taglen = cp - lb.buffer + 1;
4690
4691           dbp = cp;             /* set dbp to e-o-token */
4692           get_tagname = false;
4693           found_tag = true;
4694           continue;
4695
4696           /* And proceed to check for "extern". */
4697         }
4698       else if (!incomment && !inquote && !found_tag)
4699         {
4700           /* Check for proc/fn keywords. */
4701           switch (c_tolower (c))
4702             {
4703             case 'p':
4704               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4705                 get_tagname = true;
4706               continue;
4707             case 'f':
4708               if (nocase_tail ("unction"))
4709                 get_tagname = true;
4710               continue;
4711             }
4712         }
4713     } /* while not eof */
4714
4715   free (tline.buffer);
4716 }
4717
4718 \f
4719 /*
4720  * Lisp tag functions
4721  *  look for (def or (DEF, quote or QUOTE
4722  */
4723
4724 static void L_getit (void);
4725
4726 static void
4727 L_getit (void)
4728 {
4729   if (*dbp == '\'')             /* Skip prefix quote */
4730     dbp++;
4731   else if (*dbp == '(')
4732   {
4733     dbp++;
4734     /* Try to skip "(quote " */
4735     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4736       /* Ok, then skip "(" before name in (defstruct (foo)) */
4737       dbp = skip_spaces (dbp);
4738   }
4739   get_tag (dbp, NULL);
4740 }
4741
4742 static void
4743 Lisp_functions (FILE *inf)
4744 {
4745   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4746     {
4747       if (dbp[0] != '(')
4748         continue;
4749
4750       /* "(defvar foo)" is a declaration rather than a definition.  */
4751       if (! declarations)
4752         {
4753           char *p = dbp + 1;
4754           if (LOOKING_AT (p, "defvar"))
4755             {
4756               p = skip_name (p); /* past var name */
4757               p = skip_spaces (p);
4758               if (*p == ')')
4759                 continue;
4760             }
4761         }
4762
4763       if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
4764         dbp += 3;
4765
4766       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4767         {
4768           dbp = skip_non_spaces (dbp);
4769           dbp = skip_spaces (dbp);
4770           L_getit ();
4771         }
4772       else
4773         {
4774           /* Check for (foo::defmumble name-defined ... */
4775           do
4776             dbp++;
4777           while (!notinname (*dbp) && *dbp != ':');
4778           if (*dbp == ':')
4779             {
4780               do
4781                 dbp++;
4782               while (*dbp == ':');
4783
4784               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4785                 {
4786                   dbp = skip_non_spaces (dbp);
4787                   dbp = skip_spaces (dbp);
4788                   L_getit ();
4789                 }
4790             }
4791         }
4792     }
4793 }
4794
4795 \f
4796 /*
4797  * Lua script language parsing
4798  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4799  *
4800  *  "function" and "local function" are tags if they start at column 1.
4801  */
4802 static void
4803 Lua_functions (FILE *inf)
4804 {
4805   register char *bp;
4806
4807   LOOP_ON_INPUT_LINES (inf, lb, bp)
4808     {
4809       if (bp[0] != 'f' && bp[0] != 'l')
4810         continue;
4811
4812       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4813
4814       if (LOOKING_AT (bp, "function"))
4815         get_tag (bp, NULL);
4816     }
4817 }
4818
4819 \f
4820 /*
4821  * PostScript tags
4822  * Just look for lines where the first character is '/'
4823  * Also look at "defineps" for PSWrap
4824  * Ideas by:
4825  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4826  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4827  */
4828 static void
4829 PS_functions (FILE *inf)
4830 {
4831   register char *bp, *ep;
4832
4833   LOOP_ON_INPUT_LINES (inf, lb, bp)
4834     {
4835       if (bp[0] == '/')
4836         {
4837           for (ep = bp+1;
4838                *ep != '\0' && *ep != ' ' && *ep != '{';
4839                ep++)
4840             continue;
4841           make_tag (bp, ep - bp, true,
4842                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4843         }
4844       else if (LOOKING_AT (bp, "defineps"))
4845         get_tag (bp, NULL);
4846     }
4847 }
4848
4849 \f
4850 /*
4851  * Forth tags
4852  * Ignore anything after \ followed by space or in ( )
4853  * Look for words defined by :
4854  * Look for constant, code, create, defer, value, and variable
4855  * OBP extensions:  Look for buffer:, field,
4856  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4857  */
4858 static void
4859 Forth_words (FILE *inf)
4860 {
4861   register char *bp;
4862
4863   LOOP_ON_INPUT_LINES (inf, lb, bp)
4864     while ((bp = skip_spaces (bp))[0] != '\0')
4865       if (bp[0] == '\\' && c_isspace (bp[1]))
4866         break;                  /* read next line */
4867       else if (bp[0] == '(' && c_isspace (bp[1]))
4868         do                      /* skip to ) or eol */
4869           bp++;
4870         while (*bp != ')' && *bp != '\0');
4871       else if ((bp[0] == ':' && c_isspace (bp[1]) && bp++)
4872                || LOOKING_AT_NOCASE (bp, "constant")
4873                || LOOKING_AT_NOCASE (bp, "code")
4874                || LOOKING_AT_NOCASE (bp, "create")
4875                || LOOKING_AT_NOCASE (bp, "defer")
4876                || LOOKING_AT_NOCASE (bp, "value")
4877                || LOOKING_AT_NOCASE (bp, "variable")
4878                || LOOKING_AT_NOCASE (bp, "buffer:")
4879                || LOOKING_AT_NOCASE (bp, "field"))
4880         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
4881       else
4882         bp = skip_non_spaces (bp);
4883 }
4884
4885 \f
4886 /*
4887  * Scheme tag functions
4888  * look for (def... xyzzy
4889  *          (def... (xyzzy
4890  *          (def ... ((...(xyzzy ....
4891  *          (set! xyzzy
4892  * Original code by Ken Haase (1985?)
4893  */
4894 static void
4895 Scheme_functions (FILE *inf)
4896 {
4897   register char *bp;
4898
4899   LOOP_ON_INPUT_LINES (inf, lb, bp)
4900     {
4901       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4902         {
4903           bp = skip_non_spaces (bp+4);
4904           /* Skip over open parens and white space.  Don't continue past
4905              '\0'. */
4906           while (*bp && notinname (*bp))
4907             bp++;
4908           get_tag (bp, NULL);
4909         }
4910       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4911         get_tag (bp, NULL);
4912     }
4913 }
4914
4915 \f
4916 /* Find tags in TeX and LaTeX input files.  */
4917
4918 /* TEX_toktab is a table of TeX control sequences that define tags.
4919  * Each entry records one such control sequence.
4920  *
4921  * Original code from who knows whom.
4922  * Ideas by:
4923  *   Stefan Monnier (2002)
4924  */
4925
4926 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4927
4928 /* Default set of control sequences to put into TEX_toktab.
4929    The value of environment var TEXTAGS is prepended to this.  */
4930 static const char *TEX_defenv = "\
4931 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4932 :part:appendix:entry:index:def\
4933 :newcommand:renewcommand:newenvironment:renewenvironment";
4934
4935 static void TEX_mode (FILE *);
4936 static void TEX_decode_env (const char *, const char *);
4937
4938 static char TEX_esc = '\\';
4939 static char TEX_opgrp = '{';
4940 static char TEX_clgrp = '}';
4941
4942 /*
4943  * TeX/LaTeX scanning loop.
4944  */
4945 static void
4946 TeX_commands (FILE *inf)
4947 {
4948   char *cp;
4949   linebuffer *key;
4950
4951   /* Select either \ or ! as escape character.  */
4952   TEX_mode (inf);
4953
4954   /* Initialize token table once from environment. */
4955   if (TEX_toktab == NULL)
4956     TEX_decode_env ("TEXTAGS", TEX_defenv);
4957
4958   LOOP_ON_INPUT_LINES (inf, lb, cp)
4959     {
4960       /* Look at each TEX keyword in line. */
4961       for (;;)
4962         {
4963           /* Look for a TEX escape. */
4964           while (*cp++ != TEX_esc)
4965             if (cp[-1] == '\0' || cp[-1] == '%')
4966               goto tex_next_line;
4967
4968           for (key = TEX_toktab; key->buffer != NULL; key++)
4969             if (strneq (cp, key->buffer, key->len))
4970               {
4971                 char *p;
4972                 int namelen, linelen;
4973                 bool opgrp = false;
4974
4975                 cp = skip_spaces (cp + key->len);
4976                 if (*cp == TEX_opgrp)
4977                   {
4978                     opgrp = true;
4979                     cp++;
4980                   }
4981                 for (p = cp;
4982                      (!c_isspace (*p) && *p != '#' &&
4983                       *p != TEX_opgrp && *p != TEX_clgrp);
4984                      p++)
4985                   continue;
4986                 namelen = p - cp;
4987                 linelen = lb.len;
4988                 if (!opgrp || *p == TEX_clgrp)
4989                   {
4990                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
4991                       p++;
4992                     linelen = p - lb.buffer + 1;
4993                   }
4994                 make_tag (cp, namelen, true,
4995                           lb.buffer, linelen, lineno, linecharno);
4996                 goto tex_next_line; /* We only tag a line once */
4997               }
4998         }
4999     tex_next_line:
5000       ;
5001     }
5002 }
5003
5004 #define TEX_LESC '\\'
5005 #define TEX_SESC '!'
5006
5007 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5008    chars accordingly. */
5009 static void
5010 TEX_mode (FILE *inf)
5011 {
5012   int c;
5013
5014   while ((c = getc (inf)) != EOF)
5015     {
5016       /* Skip to next line if we hit the TeX comment char. */
5017       if (c == '%')
5018         while (c != '\n' && c != EOF)
5019           c = getc (inf);
5020       else if (c == TEX_LESC || c == TEX_SESC )
5021         break;
5022     }
5023
5024   if (c == TEX_LESC)
5025     {
5026       TEX_esc = TEX_LESC;
5027       TEX_opgrp = '{';
5028       TEX_clgrp = '}';
5029     }
5030   else
5031     {
5032       TEX_esc = TEX_SESC;
5033       TEX_opgrp = '<';
5034       TEX_clgrp = '>';
5035     }
5036   rewind (inf);
5037 }
5038
5039 /* Read environment and prepend it to the default string.
5040    Build token table. */
5041 static void
5042 TEX_decode_env (const char *evarname, const char *defenv)
5043 {
5044   register const char *env, *p;
5045   int i, len;
5046
5047   /* Append default string to environment. */
5048   env = getenv (evarname);
5049   if (!env)
5050     env = defenv;
5051   else
5052     env = concat (env, defenv, "");
5053
5054   /* Allocate a token table */
5055   for (len = 1, p = env; (p = strchr (p, ':')); )
5056     if (*++p)
5057       len++;
5058   TEX_toktab = xnew (len, linebuffer);
5059
5060   /* Unpack environment string into token table. Be careful about */
5061   /* zero-length strings (leading ':', "::" and trailing ':') */
5062   for (i = 0; *env != '\0';)
5063     {
5064       p = strchr (env, ':');
5065       if (!p)                   /* End of environment string. */
5066         p = env + strlen (env);
5067       if (p - env > 0)
5068         {                       /* Only non-zero strings. */
5069           TEX_toktab[i].buffer = savenstr (env, p - env);
5070           TEX_toktab[i].len = p - env;
5071           i++;
5072         }
5073       if (*p)
5074         env = p + 1;
5075       else
5076         {
5077           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5078           TEX_toktab[i].len = 0;
5079           break;
5080         }
5081     }
5082 }
5083
5084 \f
5085 /* Texinfo support.  Dave Love, Mar. 2000.  */
5086 static void
5087 Texinfo_nodes (FILE *inf)
5088 {
5089   char *cp, *start;
5090   LOOP_ON_INPUT_LINES (inf, lb, cp)
5091     if (LOOKING_AT (cp, "@node"))
5092       {
5093         start = cp;
5094         while (*cp != '\0' && *cp != ',')
5095           cp++;
5096         make_tag (start, cp - start, true,
5097                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5098       }
5099 }
5100
5101 \f
5102 /*
5103  * HTML support.
5104  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5105  * Contents of <a name=xxx> are tags with name xxx.
5106  *
5107  * Francesco Potortì, 2002.
5108  */
5109 static void
5110 HTML_labels (FILE *inf)
5111 {
5112   bool getnext = false;         /* next text outside of HTML tags is a tag */
5113   bool skiptag = false;         /* skip to the end of the current HTML tag */
5114   bool intag = false;           /* inside an html tag, looking for ID= */
5115   bool inanchor = false;        /* when INTAG, is an anchor, look for NAME= */
5116   char *end;
5117
5118
5119   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5120
5121   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5122     for (;;)                    /* loop on the same line */
5123       {
5124         if (skiptag)            /* skip HTML tag */
5125           {
5126             while (*dbp != '\0' && *dbp != '>')
5127               dbp++;
5128             if (*dbp == '>')
5129               {
5130                 dbp += 1;
5131                 skiptag = false;
5132                 continue;       /* look on the same line */
5133               }
5134             break;              /* go to next line */
5135           }
5136
5137         else if (intag) /* look for "name=" or "id=" */
5138           {
5139             while (*dbp != '\0' && *dbp != '>'
5140                    && c_tolower (*dbp) != 'n' && c_tolower (*dbp) != 'i')
5141               dbp++;
5142             if (*dbp == '\0')
5143               break;            /* go to next line */
5144             if (*dbp == '>')
5145               {
5146                 dbp += 1;
5147                 intag = false;
5148                 continue;       /* look on the same line */
5149               }
5150             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5151                 || LOOKING_AT_NOCASE (dbp, "id="))
5152               {
5153                 bool quoted = (dbp[0] == '"');
5154
5155                 if (quoted)
5156                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5157                     continue;
5158                 else
5159                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5160                     continue;
5161                 linebuffer_setlen (&token_name, end - dbp);
5162                 memcpy (token_name.buffer, dbp, end - dbp);
5163                 token_name.buffer[end - dbp] = '\0';
5164
5165                 dbp = end;
5166                 intag = false;  /* we found what we looked for */
5167                 skiptag = true; /* skip to the end of the tag */
5168                 getnext = true; /* then grab the text */
5169                 continue;       /* look on the same line */
5170               }
5171             dbp += 1;
5172           }
5173
5174         else if (getnext)       /* grab next tokens and tag them */
5175           {
5176             dbp = skip_spaces (dbp);
5177             if (*dbp == '\0')
5178               break;            /* go to next line */
5179             if (*dbp == '<')
5180               {
5181                 intag = true;
5182                 inanchor = (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]));
5183                 continue;       /* look on the same line */
5184               }
5185
5186             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5187               continue;
5188             make_tag (token_name.buffer, token_name.len, true,
5189                       dbp, end - dbp, lineno, linecharno);
5190             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5191             getnext = false;
5192             break;              /* go to next line */
5193           }
5194
5195         else                    /* look for an interesting HTML tag */
5196           {
5197             while (*dbp != '\0' && *dbp != '<')
5198               dbp++;
5199             if (*dbp == '\0')
5200               break;            /* go to next line */
5201             intag = true;
5202             if (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]))
5203               {
5204                 inanchor = true;
5205                 continue;       /* look on the same line */
5206               }
5207             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5208                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5209                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5210                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5211               {
5212                 intag = false;
5213                 getnext = true;
5214                 continue;       /* look on the same line */
5215               }
5216             dbp += 1;
5217           }
5218       }
5219 }
5220
5221 \f
5222 /*
5223  * Prolog support
5224  *
5225  * Assumes that the predicate or rule starts at column 0.
5226  * Only the first clause of a predicate or rule is added.
5227  * Original code by Sunichirou Sugou (1989)
5228  * Rewritten by Anders Lindgren (1996)
5229  */
5230 static size_t prolog_pr (char *, char *);
5231 static void prolog_skip_comment (linebuffer *, FILE *);
5232 static size_t prolog_atom (char *, size_t);
5233
5234 static void
5235 Prolog_functions (FILE *inf)
5236 {
5237   char *cp, *last;
5238   size_t len;
5239   size_t allocated;
5240
5241   allocated = 0;
5242   len = 0;
5243   last = NULL;
5244
5245   LOOP_ON_INPUT_LINES (inf, lb, cp)
5246     {
5247       if (cp[0] == '\0')        /* Empty line */
5248         continue;
5249       else if (c_isspace (cp[0])) /* Not a predicate */
5250         continue;
5251       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5252         prolog_skip_comment (&lb, inf);
5253       else if ((len = prolog_pr (cp, last)) > 0)
5254         {
5255           /* Predicate or rule.  Store the function name so that we
5256              only generate a tag for the first clause.  */
5257           if (last == NULL)
5258             last = xnew (len + 1, char);
5259           else if (len + 1 > allocated)
5260             xrnew (last, len + 1, char);
5261           allocated = len + 1;
5262           memcpy (last, cp, len);
5263           last[len] = '\0';
5264         }
5265     }
5266   free (last);
5267 }
5268
5269
5270 static void
5271 prolog_skip_comment (linebuffer *plb, FILE *inf)
5272 {
5273   char *cp;
5274
5275   do
5276     {
5277       for (cp = plb->buffer; *cp != '\0'; cp++)
5278         if (cp[0] == '*' && cp[1] == '/')
5279           return;
5280       readline (plb, inf);
5281     }
5282   while (!feof (inf));
5283 }
5284
5285 /*
5286  * A predicate or rule definition is added if it matches:
5287  *     <beginning of line><Prolog Atom><whitespace>(
5288  * or  <beginning of line><Prolog Atom><whitespace>:-
5289  *
5290  * It is added to the tags database if it doesn't match the
5291  * name of the previous clause header.
5292  *
5293  * Return the size of the name of the predicate or rule, or 0 if no
5294  * header was found.
5295  */
5296 static size_t
5297 prolog_pr (char *s, char *last)
5298
5299                                 /* Name of last clause. */
5300 {
5301   size_t pos;
5302   size_t len;
5303
5304   pos = prolog_atom (s, 0);
5305   if (! pos)
5306     return 0;
5307
5308   len = pos;
5309   pos = skip_spaces (s + pos) - s;
5310
5311   if ((s[pos] == '.'
5312        || (s[pos] == '(' && (pos += 1))
5313        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5314       && (last == NULL          /* save only the first clause */
5315           || len != strlen (last)
5316           || !strneq (s, last, len)))
5317         {
5318           make_tag (s, len, true, s, pos, lineno, linecharno);
5319           return len;
5320         }
5321   else
5322     return 0;
5323 }
5324
5325 /*
5326  * Consume a Prolog atom.
5327  * Return the number of bytes consumed, or 0 if there was an error.
5328  *
5329  * A prolog atom, in this context, could be one of:
5330  * - An alphanumeric sequence, starting with a lower case letter.
5331  * - A quoted arbitrary string. Single quotes can escape themselves.
5332  *   Backslash quotes everything.
5333  */
5334 static size_t
5335 prolog_atom (char *s, size_t pos)
5336 {
5337   size_t origpos;
5338
5339   origpos = pos;
5340
5341   if (c_islower (s[pos]) || s[pos] == '_')
5342     {
5343       /* The atom is unquoted. */
5344       pos++;
5345       while (c_isalnum (s[pos]) || s[pos] == '_')
5346         {
5347           pos++;
5348         }
5349       return pos - origpos;
5350     }
5351   else if (s[pos] == '\'')
5352     {
5353       pos++;
5354
5355       for (;;)
5356         {
5357           if (s[pos] == '\'')
5358             {
5359               pos++;
5360               if (s[pos] != '\'')
5361                 break;
5362               pos++;            /* A double quote */
5363             }
5364           else if (s[pos] == '\0')
5365             /* Multiline quoted atoms are ignored. */
5366             return 0;
5367           else if (s[pos] == '\\')
5368             {
5369               if (s[pos+1] == '\0')
5370                 return 0;
5371               pos += 2;
5372             }
5373           else
5374             pos++;
5375         }
5376       return pos - origpos;
5377     }
5378   else
5379     return 0;
5380 }
5381
5382 \f
5383 /*
5384  * Support for Erlang
5385  *
5386  * Generates tags for functions, defines, and records.
5387  * Assumes that Erlang functions start at column 0.
5388  * Original code by Anders Lindgren (1996)
5389  */
5390 static int erlang_func (char *, char *);
5391 static void erlang_attribute (char *);
5392 static int erlang_atom (char *);
5393
5394 static void
5395 Erlang_functions (FILE *inf)
5396 {
5397   char *cp, *last;
5398   int len;
5399   int allocated;
5400
5401   allocated = 0;
5402   len = 0;
5403   last = NULL;
5404
5405   LOOP_ON_INPUT_LINES (inf, lb, cp)
5406     {
5407       if (cp[0] == '\0')        /* Empty line */
5408         continue;
5409       else if (c_isspace (cp[0])) /* Not function nor attribute */
5410         continue;
5411       else if (cp[0] == '%')    /* comment */
5412         continue;
5413       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5414         continue;
5415       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5416         {
5417           erlang_attribute (cp);
5418           if (last != NULL)
5419             {
5420               free (last);
5421               last = NULL;
5422             }
5423         }
5424       else if ((len = erlang_func (cp, last)) > 0)
5425         {
5426           /*
5427            * Function.  Store the function name so that we only
5428            * generates a tag for the first clause.
5429            */
5430           if (last == NULL)
5431             last = xnew (len + 1, char);
5432           else if (len + 1 > allocated)
5433             xrnew (last, len + 1, char);
5434           allocated = len + 1;
5435           memcpy (last, cp, len);
5436           last[len] = '\0';
5437         }
5438     }
5439   free (last);
5440 }
5441
5442
5443 /*
5444  * A function definition is added if it matches:
5445  *     <beginning of line><Erlang Atom><whitespace>(
5446  *
5447  * It is added to the tags database if it doesn't match the
5448  * name of the previous clause header.
5449  *
5450  * Return the size of the name of the function, or 0 if no function
5451  * was found.
5452  */
5453 static int
5454 erlang_func (char *s, char *last)
5455
5456                                 /* Name of last clause. */
5457 {
5458   int pos;
5459   int len;
5460
5461   pos = erlang_atom (s);
5462   if (pos < 1)
5463     return 0;
5464
5465   len = pos;
5466   pos = skip_spaces (s + pos) - s;
5467
5468   /* Save only the first clause. */
5469   if (s[pos++] == '('
5470       && (last == NULL
5471           || len != (int)strlen (last)
5472           || !strneq (s, last, len)))
5473         {
5474           make_tag (s, len, true, s, pos, lineno, linecharno);
5475           return len;
5476         }
5477
5478   return 0;
5479 }
5480
5481
5482 /*
5483  * Handle attributes.  Currently, tags are generated for defines
5484  * and records.
5485  *
5486  * They are on the form:
5487  * -define(foo, bar).
5488  * -define(Foo(M, N), M+N).
5489  * -record(graph, {vtab = notable, cyclic = true}).
5490  */
5491 static void
5492 erlang_attribute (char *s)
5493 {
5494   char *cp = s;
5495
5496   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5497       && *cp++ == '(')
5498     {
5499       int len = erlang_atom (skip_spaces (cp));
5500       if (len > 0)
5501         make_tag (cp, len, true, s, cp + len - s, lineno, linecharno);
5502     }
5503   return;
5504 }
5505
5506
5507 /*
5508  * Consume an Erlang atom (or variable).
5509  * Return the number of bytes consumed, or -1 if there was an error.
5510  */
5511 static int
5512 erlang_atom (char *s)
5513 {
5514   int pos = 0;
5515
5516   if (c_isalpha (s[pos]) || s[pos] == '_')
5517     {
5518       /* The atom is unquoted. */
5519       do
5520         pos++;
5521       while (c_isalnum (s[pos]) || s[pos] == '_');
5522     }
5523   else if (s[pos] == '\'')
5524     {
5525       for (pos++; s[pos] != '\''; pos++)
5526         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5527             || (s[pos] == '\\' && s[++pos] == '\0'))
5528           return 0;
5529       pos++;
5530     }
5531
5532   return pos;
5533 }
5534
5535 \f
5536 static char *scan_separators (char *);
5537 static void add_regex (char *, language *);
5538 static char *substitute (char *, char *, struct re_registers *);
5539
5540 /*
5541  * Take a string like "/blah/" and turn it into "blah", verifying
5542  * that the first and last characters are the same, and handling
5543  * quoted separator characters.  Actually, stops on the occurrence of
5544  * an unquoted separator.  Also process \t, \n, etc. and turn into
5545  * appropriate characters. Works in place.  Null terminates name string.
5546  * Returns pointer to terminating separator, or NULL for
5547  * unterminated regexps.
5548  */
5549 static char *
5550 scan_separators (char *name)
5551 {
5552   char sep = name[0];
5553   char *copyto = name;
5554   bool quoted = false;
5555
5556   for (++name; *name != '\0'; ++name)
5557     {
5558       if (quoted)
5559         {
5560           switch (*name)
5561             {
5562             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5563             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5564             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5565             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5566             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5567             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5568             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5569             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5570             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5571             default:
5572               if (*name == sep)
5573                 *copyto++ = sep;
5574               else
5575                 {
5576                   /* Something else is quoted, so preserve the quote. */
5577                   *copyto++ = '\\';
5578                   *copyto++ = *name;
5579                 }
5580               break;
5581             }
5582           quoted = false;
5583         }
5584       else if (*name == '\\')
5585         quoted = true;
5586       else if (*name == sep)
5587         break;
5588       else
5589         *copyto++ = *name;
5590     }
5591   if (*name != sep)
5592     name = NULL;                /* signal unterminated regexp */
5593
5594   /* Terminate copied string. */
5595   *copyto = '\0';
5596   return name;
5597 }
5598
5599 /* Look at the argument of --regex or --no-regex and do the right
5600    thing.  Same for each line of a regexp file. */
5601 static void
5602 analyze_regex (char *regex_arg)
5603 {
5604   if (regex_arg == NULL)
5605     {
5606       free_regexps ();          /* --no-regex: remove existing regexps */
5607       return;
5608     }
5609
5610   /* A real --regexp option or a line in a regexp file. */
5611   switch (regex_arg[0])
5612     {
5613       /* Comments in regexp file or null arg to --regex. */
5614     case '\0':
5615     case ' ':
5616     case '\t':
5617       break;
5618
5619       /* Read a regex file.  This is recursive and may result in a
5620          loop, which will stop when the file descriptors are exhausted. */
5621     case '@':
5622       {
5623         FILE *regexfp;
5624         linebuffer regexbuf;
5625         char *regexfile = regex_arg + 1;
5626
5627         /* regexfile is a file containing regexps, one per line. */
5628         regexfp = fopen (regexfile, "r" FOPEN_BINARY);
5629         if (regexfp == NULL)
5630           pfatal (regexfile);
5631         linebuffer_init (&regexbuf);
5632         while (readline_internal (&regexbuf, regexfp) > 0)
5633           analyze_regex (regexbuf.buffer);
5634         free (regexbuf.buffer);
5635         fclose (regexfp);
5636       }
5637       break;
5638
5639       /* Regexp to be used for a specific language only. */
5640     case '{':
5641       {
5642         language *lang;
5643         char *lang_name = regex_arg + 1;
5644         char *cp;
5645
5646         for (cp = lang_name; *cp != '}'; cp++)
5647           if (*cp == '\0')
5648             {
5649               error ("unterminated language name in regex: %s", regex_arg);
5650               return;
5651             }
5652         *cp++ = '\0';
5653         lang = get_language_from_langname (lang_name);
5654         if (lang == NULL)
5655           return;
5656         add_regex (cp, lang);
5657       }
5658       break;
5659
5660       /* Regexp to be used for any language. */
5661     default:
5662       add_regex (regex_arg, NULL);
5663       break;
5664     }
5665 }
5666
5667 /* Separate the regexp pattern, compile it,
5668    and care for optional name and modifiers. */
5669 static void
5670 add_regex (char *regexp_pattern, language *lang)
5671 {
5672   static struct re_pattern_buffer zeropattern;
5673   char sep, *pat, *name, *modifiers;
5674   char empty = '\0';
5675   const char *err;
5676   struct re_pattern_buffer *patbuf;
5677   regexp *rp;
5678   bool
5679     force_explicit_name = true, /* do not use implicit tag names */
5680     ignore_case = false,        /* case is significant */
5681     multi_line = false,         /* matches are done one line at a time */
5682     single_line = false;        /* dot does not match newline */
5683
5684
5685   if (strlen (regexp_pattern) < 3)
5686     {
5687       error ("null regexp");
5688       return;
5689     }
5690   sep = regexp_pattern[0];
5691   name = scan_separators (regexp_pattern);
5692   if (name == NULL)
5693     {
5694       error ("%s: unterminated regexp", regexp_pattern);
5695       return;
5696     }
5697   if (name[1] == sep)
5698     {
5699       error ("null name for regexp \"%s\"", regexp_pattern);
5700       return;
5701     }
5702   modifiers = scan_separators (name);
5703   if (modifiers == NULL)        /* no terminating separator --> no name */
5704     {
5705       modifiers = name;
5706       name = &empty;
5707     }
5708   else
5709     modifiers += 1;             /* skip separator */
5710
5711   /* Parse regex modifiers. */
5712   for (; modifiers[0] != '\0'; modifiers++)
5713     switch (modifiers[0])
5714       {
5715       case 'N':
5716         if (modifiers == name)
5717           error ("forcing explicit tag name but no name, ignoring");
5718         force_explicit_name = true;
5719         break;
5720       case 'i':
5721         ignore_case = true;
5722         break;
5723       case 's':
5724         single_line = true;
5725         /* FALLTHRU */
5726       case 'm':
5727         multi_line = true;
5728         need_filebuf = true;
5729         break;
5730       default:
5731         error ("invalid regexp modifier `%c', ignoring", modifiers[0]);
5732         break;
5733       }
5734
5735   patbuf = xnew (1, struct re_pattern_buffer);
5736   *patbuf = zeropattern;
5737   if (ignore_case)
5738     {
5739       static char lc_trans[UCHAR_MAX + 1];
5740       int i;
5741       for (i = 0; i < UCHAR_MAX + 1; i++)
5742         lc_trans[i] = c_tolower (i);
5743       patbuf->translate = lc_trans;     /* translation table to fold case  */
5744     }
5745
5746   if (multi_line)
5747     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5748   else
5749     pat = regexp_pattern;
5750
5751   if (single_line)
5752     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5753   else
5754     re_set_syntax (RE_SYNTAX_EMACS);
5755
5756   err = re_compile_pattern (pat, strlen (pat), patbuf);
5757   if (multi_line)
5758     free (pat);
5759   if (err != NULL)
5760     {
5761       error ("%s while compiling pattern", err);
5762       return;
5763     }
5764
5765   rp = p_head;
5766   p_head = xnew (1, regexp);
5767   p_head->pattern = savestr (regexp_pattern);
5768   p_head->p_next = rp;
5769   p_head->lang = lang;
5770   p_head->pat = patbuf;
5771   p_head->name = savestr (name);
5772   p_head->error_signaled = false;
5773   p_head->force_explicit_name = force_explicit_name;
5774   p_head->ignore_case = ignore_case;
5775   p_head->multi_line = multi_line;
5776 }
5777
5778 /*
5779  * Do the substitutions indicated by the regular expression and
5780  * arguments.
5781  */
5782 static char *
5783 substitute (char *in, char *out, struct re_registers *regs)
5784 {
5785   char *result, *t;
5786   int size, dig, diglen;
5787
5788   result = NULL;
5789   size = strlen (out);
5790
5791   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5792   if (out[size - 1] == '\\')
5793     fatal ("pattern error in \"%s\"", out);
5794   for (t = strchr (out, '\\');
5795        t != NULL;
5796        t = strchr (t + 2, '\\'))
5797     if (c_isdigit (t[1]))
5798       {
5799         dig = t[1] - '0';
5800         diglen = regs->end[dig] - regs->start[dig];
5801         size += diglen - 2;
5802       }
5803     else
5804       size -= 1;
5805
5806   /* Allocate space and do the substitutions. */
5807   assert (size >= 0);
5808   result = xnew (size + 1, char);
5809
5810   for (t = result; *out != '\0'; out++)
5811     if (*out == '\\' && c_isdigit (*++out))
5812       {
5813         dig = *out - '0';
5814         diglen = regs->end[dig] - regs->start[dig];
5815         memcpy (t, in + regs->start[dig], diglen);
5816         t += diglen;
5817       }
5818     else
5819       *t++ = *out;
5820   *t = '\0';
5821
5822   assert (t <= result + size);
5823   assert (t - result == (int)strlen (result));
5824
5825   return result;
5826 }
5827
5828 /* Deallocate all regexps. */
5829 static void
5830 free_regexps (void)
5831 {
5832   regexp *rp;
5833   while (p_head != NULL)
5834     {
5835       rp = p_head->p_next;
5836       free (p_head->pattern);
5837       free (p_head->name);
5838       free (p_head);
5839       p_head = rp;
5840     }
5841   return;
5842 }
5843
5844 /*
5845  * Reads the whole file as a single string from `filebuf' and looks for
5846  * multi-line regular expressions, creating tags on matches.
5847  * readline already dealt with normal regexps.
5848  *
5849  * Idea by Ben Wing <ben@666.com> (2002).
5850  */
5851 static void
5852 regex_tag_multiline (void)
5853 {
5854   char *buffer = filebuf.buffer;
5855   regexp *rp;
5856   char *name;
5857
5858   for (rp = p_head; rp != NULL; rp = rp->p_next)
5859     {
5860       int match = 0;
5861
5862       if (!rp->multi_line)
5863         continue;               /* skip normal regexps */
5864
5865       /* Generic initializations before parsing file from memory. */
5866       lineno = 1;               /* reset global line number */
5867       charno = 0;               /* reset global char number */
5868       linecharno = 0;           /* reset global char number of line start */
5869
5870       /* Only use generic regexps or those for the current language. */
5871       if (rp->lang != NULL && rp->lang != curfdp->lang)
5872         continue;
5873
5874       while (match >= 0 && match < filebuf.len)
5875         {
5876           match = re_search (rp->pat, buffer, filebuf.len, charno,
5877                              filebuf.len - match, &rp->regs);
5878           switch (match)
5879             {
5880             case -2:
5881               /* Some error. */
5882               if (!rp->error_signaled)
5883                 {
5884                   error ("regexp stack overflow while matching \"%s\"",
5885                          rp->pattern);
5886                   rp->error_signaled = true;
5887                 }
5888               break;
5889             case -1:
5890               /* No match. */
5891               break;
5892             default:
5893               if (match == rp->regs.end[0])
5894                 {
5895                   if (!rp->error_signaled)
5896                     {
5897                       error ("regexp matches the empty string: \"%s\"",
5898                              rp->pattern);
5899                       rp->error_signaled = true;
5900                     }
5901                   match = -3;   /* exit from while loop */
5902                   break;
5903                 }
5904
5905               /* Match occurred.  Construct a tag. */
5906               while (charno < rp->regs.end[0])
5907                 if (buffer[charno++] == '\n')
5908                   lineno++, linecharno = charno;
5909               name = rp->name;
5910               if (name[0] == '\0')
5911                 name = NULL;
5912               else /* make a named tag */
5913                 name = substitute (buffer, rp->name, &rp->regs);
5914               if (rp->force_explicit_name)
5915                 /* Force explicit tag name, if a name is there. */
5916                 pfnote (name, true, buffer + linecharno,
5917                         charno - linecharno + 1, lineno, linecharno);
5918               else
5919                 make_tag (name, strlen (name), true, buffer + linecharno,
5920                           charno - linecharno + 1, lineno, linecharno);
5921               break;
5922             }
5923         }
5924     }
5925 }
5926
5927 \f
5928 static bool
5929 nocase_tail (const char *cp)
5930 {
5931   int len = 0;
5932
5933   while (*cp != '\0' && c_tolower (*cp) == c_tolower (dbp[len]))
5934     cp++, len++;
5935   if (*cp == '\0' && !intoken (dbp[len]))
5936     {
5937       dbp += len;
5938       return true;
5939     }
5940   return false;
5941 }
5942
5943 static void
5944 get_tag (register char *bp, char **namepp)
5945 {
5946   register char *cp = bp;
5947
5948   if (*bp != '\0')
5949     {
5950       /* Go till you get to white space or a syntactic break */
5951       for (cp = bp + 1; !notinname (*cp); cp++)
5952         continue;
5953       make_tag (bp, cp - bp, true,
5954                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5955     }
5956
5957   if (namepp != NULL)
5958     *namepp = savenstr (bp, cp - bp);
5959 }
5960
5961 /*
5962  * Read a line of text from `stream' into `lbp', excluding the
5963  * newline or CR-NL, if any.  Return the number of characters read from
5964  * `stream', which is the length of the line including the newline.
5965  *
5966  * On DOS or Windows we do not count the CR character, if any before the
5967  * NL, in the returned length; this mirrors the behavior of Emacs on those
5968  * platforms (for text files, it translates CR-NL to NL as it reads in the
5969  * file).
5970  *
5971  * If multi-line regular expressions are requested, each line read is
5972  * appended to `filebuf'.
5973  */
5974 static long
5975 readline_internal (linebuffer *lbp, register FILE *stream)
5976 {
5977   char *buffer = lbp->buffer;
5978   register char *p = lbp->buffer;
5979   register char *pend;
5980   int chars_deleted;
5981
5982   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
5983
5984   for (;;)
5985     {
5986       register int c = getc (stream);
5987       if (p == pend)
5988         {
5989           /* We're at the end of linebuffer: expand it. */
5990           lbp->size *= 2;
5991           xrnew (buffer, lbp->size, char);
5992           p += buffer - lbp->buffer;
5993           pend = buffer + lbp->size;
5994           lbp->buffer = buffer;
5995         }
5996       if (c == EOF)
5997         {
5998           *p = '\0';
5999           chars_deleted = 0;
6000           break;
6001         }
6002       if (c == '\n')
6003         {
6004           if (p > buffer && p[-1] == '\r')
6005             {
6006               p -= 1;
6007 #ifdef DOS_NT
6008              /* Assume CRLF->LF translation will be performed by Emacs
6009                 when loading this file, so CRs won't appear in the buffer.
6010                 It would be cleaner to compensate within Emacs;
6011                 however, Emacs does not know how many CRs were deleted
6012                 before any given point in the file.  */
6013               chars_deleted = 1;
6014 #else
6015               chars_deleted = 2;
6016 #endif
6017             }
6018           else
6019             {
6020               chars_deleted = 1;
6021             }
6022           *p = '\0';
6023           break;
6024         }
6025       *p++ = c;
6026     }
6027   lbp->len = p - buffer;
6028
6029   if (need_filebuf              /* we need filebuf for multi-line regexps */
6030       && chars_deleted > 0)     /* not at EOF */
6031     {
6032       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6033         {
6034           /* Expand filebuf. */
6035           filebuf.size *= 2;
6036           xrnew (filebuf.buffer, filebuf.size, char);
6037         }
6038       memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6039       filebuf.len += lbp->len;
6040       filebuf.buffer[filebuf.len++] = '\n';
6041       filebuf.buffer[filebuf.len] = '\0';
6042     }
6043
6044   return lbp->len + chars_deleted;
6045 }
6046
6047 /*
6048  * Like readline_internal, above, but in addition try to match the
6049  * input line against relevant regular expressions and manage #line
6050  * directives.
6051  */
6052 static void
6053 readline (linebuffer *lbp, FILE *stream)
6054 {
6055   long result;
6056
6057   linecharno = charno;          /* update global char number of line start */
6058   result = readline_internal (lbp, stream); /* read line */
6059   lineno += 1;                  /* increment global line number */
6060   charno += result;             /* increment global char number */
6061
6062   /* Honor #line directives. */
6063   if (!no_line_directive)
6064     {
6065       static bool discard_until_line_directive;
6066
6067       /* Check whether this is a #line directive. */
6068       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6069         {
6070           unsigned int lno;
6071           int start = 0;
6072
6073           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6074               && start > 0)     /* double quote character found */
6075             {
6076               char *endp = lbp->buffer + start;
6077
6078               while ((endp = strchr (endp, '"')) != NULL
6079                      && endp[-1] == '\\')
6080                 endp++;
6081               if (endp != NULL)
6082                 /* Ok, this is a real #line directive.  Let's deal with it. */
6083                 {
6084                   char *taggedabsname;  /* absolute name of original file */
6085                   char *taggedfname;    /* name of original file as given */
6086                   char *name;           /* temp var */
6087
6088                   discard_until_line_directive = false; /* found it */
6089                   name = lbp->buffer + start;
6090                   *endp = '\0';
6091                   canonicalize_filename (name);
6092                   taggedabsname = absolute_filename (name, tagfiledir);
6093                   if (filename_is_absolute (name)
6094                       || filename_is_absolute (curfdp->infname))
6095                     taggedfname = savestr (taggedabsname);
6096                   else
6097                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6098
6099                   if (streq (curfdp->taggedfname, taggedfname))
6100                     /* The #line directive is only a line number change.  We
6101                        deal with this afterwards. */
6102                     free (taggedfname);
6103                   else
6104                     /* The tags following this #line directive should be
6105                        attributed to taggedfname.  In order to do this, set
6106                        curfdp accordingly. */
6107                     {
6108                       fdesc *fdp; /* file description pointer */
6109
6110                       /* Go look for a file description already set up for the
6111                          file indicated in the #line directive.  If there is
6112                          one, use it from now until the next #line
6113                          directive. */
6114                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6115                         if (streq (fdp->infname, curfdp->infname)
6116                             && streq (fdp->taggedfname, taggedfname))
6117                           /* If we remove the second test above (after the &&)
6118                              then all entries pertaining to the same file are
6119                              coalesced in the tags file.  If we use it, then
6120                              entries pertaining to the same file but generated
6121                              from different files (via #line directives) will
6122                              go into separate sections in the tags file.  These
6123                              alternatives look equivalent.  The first one
6124                              destroys some apparently useless information. */
6125                           {
6126                             curfdp = fdp;
6127                             free (taggedfname);
6128                             break;
6129                           }
6130                       /* Else, if we already tagged the real file, skip all
6131                          input lines until the next #line directive. */
6132                       if (fdp == NULL) /* not found */
6133                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6134                           if (streq (fdp->infabsname, taggedabsname))
6135                             {
6136                               discard_until_line_directive = true;
6137                               free (taggedfname);
6138                               break;
6139                             }
6140                       /* Else create a new file description and use that from
6141                          now on, until the next #line directive. */
6142                       if (fdp == NULL) /* not found */
6143                         {
6144                           fdp = fdhead;
6145                           fdhead = xnew (1, fdesc);
6146                           *fdhead = *curfdp; /* copy curr. file description */
6147                           fdhead->next = fdp;
6148                           fdhead->infname = savestr (curfdp->infname);
6149                           fdhead->infabsname = savestr (curfdp->infabsname);
6150                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6151                           fdhead->taggedfname = taggedfname;
6152                           fdhead->usecharno = false;
6153                           fdhead->prop = NULL;
6154                           fdhead->written = false;
6155                           curfdp = fdhead;
6156                         }
6157                     }
6158                   free (taggedabsname);
6159                   lineno = lno - 1;
6160                   readline (lbp, stream);
6161                   return;
6162                 } /* if a real #line directive */
6163             } /* if #line is followed by a number */
6164         } /* if line begins with "#line " */
6165
6166       /* If we are here, no #line directive was found. */
6167       if (discard_until_line_directive)
6168         {
6169           if (result > 0)
6170             {
6171               /* Do a tail recursion on ourselves, thus discarding the contents
6172                  of the line buffer. */
6173               readline (lbp, stream);
6174               return;
6175             }
6176           /* End of file. */
6177           discard_until_line_directive = false;
6178           return;
6179         }
6180     } /* if #line directives should be considered */
6181
6182   {
6183     int match;
6184     regexp *rp;
6185     char *name;
6186
6187     /* Match against relevant regexps. */
6188     if (lbp->len > 0)
6189       for (rp = p_head; rp != NULL; rp = rp->p_next)
6190         {
6191           /* Only use generic regexps or those for the current language.
6192              Also do not use multiline regexps, which is the job of
6193              regex_tag_multiline. */
6194           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6195               || rp->multi_line)
6196             continue;
6197
6198           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6199           switch (match)
6200             {
6201             case -2:
6202               /* Some error. */
6203               if (!rp->error_signaled)
6204                 {
6205                   error ("regexp stack overflow while matching \"%s\"",
6206                          rp->pattern);
6207                   rp->error_signaled = true;
6208                 }
6209               break;
6210             case -1:
6211               /* No match. */
6212               break;
6213             case 0:
6214               /* Empty string matched. */
6215               if (!rp->error_signaled)
6216                 {
6217                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6218                   rp->error_signaled = true;
6219                 }
6220               break;
6221             default:
6222               /* Match occurred.  Construct a tag. */
6223               name = rp->name;
6224               if (name[0] == '\0')
6225                 name = NULL;
6226               else /* make a named tag */
6227                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6228               if (rp->force_explicit_name)
6229                 /* Force explicit tag name, if a name is there. */
6230                 pfnote (name, true, lbp->buffer, match, lineno, linecharno);
6231               else
6232                 make_tag (name, strlen (name), true,
6233                           lbp->buffer, match, lineno, linecharno);
6234               break;
6235             }
6236         }
6237   }
6238 }
6239
6240 \f
6241 /*
6242  * Return a pointer to a space of size strlen(cp)+1 allocated
6243  * with xnew where the string CP has been copied.
6244  */
6245 static char *
6246 savestr (const char *cp)
6247 {
6248   return savenstr (cp, strlen (cp));
6249 }
6250
6251 /*
6252  * Return a pointer to a space of size LEN+1 allocated with xnew where
6253  * the string CP has been copied for at most the first LEN characters.
6254  */
6255 static char *
6256 savenstr (const char *cp, int len)
6257 {
6258   char *dp = xnew (len + 1, char);
6259   dp[len] = '\0';
6260   return memcpy (dp, cp, len);
6261 }
6262
6263 /* Skip spaces (end of string is not space), return new pointer. */
6264 static char *
6265 skip_spaces (char *cp)
6266 {
6267   while (c_isspace (*cp))
6268     cp++;
6269   return cp;
6270 }
6271
6272 /* Skip non spaces, except end of string, return new pointer. */
6273 static char *
6274 skip_non_spaces (char *cp)
6275 {
6276   while (*cp != '\0' && !c_isspace (*cp))
6277     cp++;
6278   return cp;
6279 }
6280
6281 /* Skip any chars in the "name" class.*/
6282 static char *
6283 skip_name (char *cp)
6284 {
6285   /* '\0' is a notinname() so loop stops there too */
6286   while (! notinname (*cp))
6287     cp++;
6288   return cp;
6289 }
6290
6291 /* Print error message and exit.  */
6292 void
6293 fatal (const char *s1, const char *s2)
6294 {
6295   error (s1, s2);
6296   exit (EXIT_FAILURE);
6297 }
6298
6299 static void
6300 pfatal (const char *s1)
6301 {
6302   perror (s1);
6303   exit (EXIT_FAILURE);
6304 }
6305
6306 static void
6307 suggest_asking_for_help (void)
6308 {
6309   fprintf (stderr, "\tTry `%s --help' for a complete list of options.\n",
6310            progname);
6311   exit (EXIT_FAILURE);
6312 }
6313
6314 /* Output a diagnostic with printf-style FORMAT and args.  */
6315 static void
6316 error (const char *format, ...)
6317 {
6318   va_list ap;
6319   va_start (ap, format);
6320   fprintf (stderr, "%s: ", progname);
6321   vfprintf (stderr, format, ap);
6322   fprintf (stderr, "\n");
6323   va_end (ap);
6324 }
6325
6326 /* Return a newly-allocated string whose contents
6327    concatenate those of s1, s2, s3.  */
6328 static char *
6329 concat (const char *s1, const char *s2, const char *s3)
6330 {
6331   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6332   char *result = xnew (len1 + len2 + len3 + 1, char);
6333
6334   strcpy (result, s1);
6335   strcpy (result + len1, s2);
6336   strcpy (result + len1 + len2, s3);
6337
6338   return result;
6339 }
6340
6341 \f
6342 /* Does the same work as the system V getcwd, but does not need to
6343    guess the buffer size in advance. */
6344 static char *
6345 etags_getcwd (void)
6346 {
6347   int bufsize = 200;
6348   char *path = xnew (bufsize, char);
6349
6350   while (getcwd (path, bufsize) == NULL)
6351     {
6352       if (errno != ERANGE)
6353         pfatal ("getcwd");
6354       bufsize *= 2;
6355       free (path);
6356       path = xnew (bufsize, char);
6357     }
6358
6359   canonicalize_filename (path);
6360   return path;
6361 }
6362
6363 /* Return a newly allocated string containing a name of a temporary file.  */
6364 static char *
6365 etags_mktmp (void)
6366 {
6367   const char *tmpdir = getenv ("TMPDIR");
6368   const char *slash = "/";
6369
6370 #if MSDOS || defined (DOS_NT)
6371   if (!tmpdir)
6372     tmpdir = getenv ("TEMP");
6373   if (!tmpdir)
6374     tmpdir = getenv ("TMP");
6375   if (!tmpdir)
6376     tmpdir = ".";
6377   if (tmpdir[strlen (tmpdir) - 1] == '/'
6378       || tmpdir[strlen (tmpdir) - 1] == '\\')
6379     slash = "";
6380 #else
6381   if (!tmpdir)
6382     tmpdir = "/tmp";
6383   if (tmpdir[strlen (tmpdir) - 1] == '/')
6384     slash = "";
6385 #endif
6386
6387   char *templt = concat (tmpdir, slash, "etXXXXXX");
6388   int fd = mkostemp (templt, O_CLOEXEC);
6389   if (fd < 0)
6390     {
6391       free (templt);
6392       templt = NULL;
6393     }
6394   else
6395     close (fd);
6396
6397 #if defined (DOS_NT)
6398   /* The file name will be used in shell redirection, so it needs to have
6399      DOS-style backslashes, or else the Windows shell will barf.  */
6400   char *p;
6401   for (p = templt; *p; p++)
6402     if (*p == '/')
6403       *p = '\\';
6404 #endif
6405   return templt;
6406 }
6407
6408 /* Return a newly allocated string containing the file name of FILE
6409    relative to the absolute directory DIR (which should end with a slash). */
6410 static char *
6411 relative_filename (char *file, char *dir)
6412 {
6413   char *fp, *dp, *afn, *res;
6414   int i;
6415
6416   /* Find the common root of file and dir (with a trailing slash). */
6417   afn = absolute_filename (file, cwd);
6418   fp = afn;
6419   dp = dir;
6420   while (*fp++ == *dp++)
6421     continue;
6422   fp--, dp--;                   /* back to the first differing char */
6423 #ifdef DOS_NT
6424   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6425     return afn;
6426 #endif
6427   do                            /* look at the equal chars until '/' */
6428     fp--, dp--;
6429   while (*fp != '/');
6430
6431   /* Build a sequence of "../" strings for the resulting relative file name. */
6432   i = 0;
6433   while ((dp = strchr (dp + 1, '/')) != NULL)
6434     i += 1;
6435   res = xnew (3*i + strlen (fp + 1) + 1, char);
6436   char *z = res;
6437   while (i-- > 0)
6438     z = stpcpy (z, "../");
6439
6440   /* Add the file name relative to the common root of file and dir. */
6441   strcpy (z, fp + 1);
6442   free (afn);
6443
6444   return res;
6445 }
6446
6447 /* Return a newly allocated string containing the absolute file name
6448    of FILE given DIR (which should end with a slash). */
6449 static char *
6450 absolute_filename (char *file, char *dir)
6451 {
6452   char *slashp, *cp, *res;
6453
6454   if (filename_is_absolute (file))
6455     res = savestr (file);
6456 #ifdef DOS_NT
6457   /* We don't support non-absolute file names with a drive
6458      letter, like `d:NAME' (it's too much hassle).  */
6459   else if (file[1] == ':')
6460     fatal ("%s: relative file names with drive letters not supported", file);
6461 #endif
6462   else
6463     res = concat (dir, file, "");
6464
6465   /* Delete the "/dirname/.." and "/." substrings. */
6466   slashp = strchr (res, '/');
6467   while (slashp != NULL && slashp[0] != '\0')
6468     {
6469       if (slashp[1] == '.')
6470         {
6471           if (slashp[2] == '.'
6472               && (slashp[3] == '/' || slashp[3] == '\0'))
6473             {
6474               cp = slashp;
6475               do
6476                 cp--;
6477               while (cp >= res && !filename_is_absolute (cp));
6478               if (cp < res)
6479                 cp = slashp;    /* the absolute name begins with "/.." */
6480 #ifdef DOS_NT
6481               /* Under MSDOS and NT we get `d:/NAME' as absolute
6482                  file name, so the luser could say `d:/../NAME'.
6483                  We silently treat this as `d:/NAME'.  */
6484               else if (cp[0] != '/')
6485                 cp = slashp;
6486 #endif
6487               memmove (cp, slashp + 3, strlen (slashp + 2));
6488               slashp = cp;
6489               continue;
6490             }
6491           else if (slashp[2] == '/' || slashp[2] == '\0')
6492             {
6493               memmove (slashp, slashp + 2, strlen (slashp + 1));
6494               continue;
6495             }
6496         }
6497
6498       slashp = strchr (slashp + 1, '/');
6499     }
6500
6501   if (res[0] == '\0')           /* just a safety net: should never happen */
6502     {
6503       free (res);
6504       return savestr ("/");
6505     }
6506   else
6507     return res;
6508 }
6509
6510 /* Return a newly allocated string containing the absolute
6511    file name of dir where FILE resides given DIR (which should
6512    end with a slash). */
6513 static char *
6514 absolute_dirname (char *file, char *dir)
6515 {
6516   char *slashp, *res;
6517   char save;
6518
6519   slashp = strrchr (file, '/');
6520   if (slashp == NULL)
6521     return savestr (dir);
6522   save = slashp[1];
6523   slashp[1] = '\0';
6524   res = absolute_filename (file, dir);
6525   slashp[1] = save;
6526
6527   return res;
6528 }
6529
6530 /* Whether the argument string is an absolute file name.  The argument
6531    string must have been canonicalized with canonicalize_filename. */
6532 static bool
6533 filename_is_absolute (char *fn)
6534 {
6535   return (fn[0] == '/'
6536 #ifdef DOS_NT
6537           || (c_isalpha (fn[0]) && fn[1] == ':' && fn[2] == '/')
6538 #endif
6539           );
6540 }
6541
6542 /* Downcase DOS drive letter and collapse separators into single slashes.
6543    Works in place. */
6544 static void
6545 canonicalize_filename (register char *fn)
6546 {
6547   register char* cp;
6548
6549 #ifdef DOS_NT
6550   /* Canonicalize drive letter case.  */
6551   if (c_isupper (fn[0]) && fn[1] == ':')
6552     fn[0] = c_tolower (fn[0]);
6553
6554   /* Collapse multiple forward- and back-slashes into a single forward
6555      slash. */
6556   for (cp = fn; *cp != '\0'; cp++, fn++)
6557     if (*cp == '/' || *cp == '\\')
6558       {
6559         *fn = '/';
6560         while (cp[1] == '/' || cp[1] == '\\')
6561           cp++;
6562       }
6563     else
6564       *fn = *cp;
6565
6566 #else  /* !DOS_NT */
6567
6568   /* Collapse multiple slashes into a single slash. */
6569   for (cp = fn; *cp != '\0'; cp++, fn++)
6570     if (*cp == '/')
6571       {
6572         *fn = '/';
6573         while (cp[1] == '/')
6574           cp++;
6575       }
6576     else
6577       *fn = *cp;
6578
6579 #endif  /* !DOS_NT */
6580
6581   *fn = '\0';
6582 }
6583
6584 \f
6585 /* Initialize a linebuffer for use. */
6586 static void
6587 linebuffer_init (linebuffer *lbp)
6588 {
6589   lbp->size = (DEBUG) ? 3 : 200;
6590   lbp->buffer = xnew (lbp->size, char);
6591   lbp->buffer[0] = '\0';
6592   lbp->len = 0;
6593 }
6594
6595 /* Set the minimum size of a string contained in a linebuffer. */
6596 static void
6597 linebuffer_setlen (linebuffer *lbp, int toksize)
6598 {
6599   while (lbp->size <= toksize)
6600     {
6601       lbp->size *= 2;
6602       xrnew (lbp->buffer, lbp->size, char);
6603     }
6604   lbp->len = toksize;
6605 }
6606
6607 /* Like malloc but get fatal error if memory is exhausted. */
6608 static void *
6609 xmalloc (size_t size)
6610 {
6611   void *result = malloc (size);
6612   if (result == NULL)
6613     fatal ("virtual memory exhausted", (char *)NULL);
6614   return result;
6615 }
6616
6617 static void *
6618 xrealloc (void *ptr, size_t size)
6619 {
6620   void *result = realloc (ptr, size);
6621   if (result == NULL)
6622     fatal ("virtual memory exhausted", (char *)NULL);
6623   return result;
6624 }
6625
6626 /*
6627  * Local Variables:
6628  * indent-tabs-mode: t
6629  * tab-width: 8
6630  * fill-column: 79
6631  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6632  * c-file-style: "gnu"
6633  * End:
6634  */
6635
6636 /* etags.c ends here */