code.delx.au - gnu-emacs/blob - lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: utf-8 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2015 Free Software
  32 Foundation, Inc.
  33
  34 This file is not considered part of GNU Emacs.
  35
  36 This program is free software: you can redistribute it and/or modify
  37 it under the terms of the GNU General Public License as published by
  38 the Free Software Foundation, either version 3 of the License, or
  39 (at your option) any later version.
  40
  41 This program is distributed in the hope that it will be useful,
  42 but WITHOUT ANY WARRANTY; without even the implied warranty of
  43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  44 GNU General Public License for more details.
  45
  46 You should have received a copy of the GNU General Public License
  47 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  48
  49
  50 /* NB To comply with the above BSD license, copyright information is
  51 reproduced in etc/ETAGS.README.  That file should be updated when the
  52 above notices are.
  53
  54 To the best of our knowledge, this code was originally based on the
  55 ctags.c distributed with BSD4.2, which was copyrighted by the
  56 University of California, as described above. */
  57
  58
  59 /*
  60  * Authors:
  61  * 1983 Ctags originally by Ken Arnold.
  62  * 1984 Fortran added by Jim Kleckner.
  63  * 1984 Ed Pelegri-Llopart added C typedefs.
  64  * 1985 Emacs TAGS format by Richard Stallman.
  65  * 1989 Sam Kendall added C++.
  66  * 1992 Joseph B. Wells improved C and C++ parsing.
  67  * 1993 Francesco Potortì reorganized C and C++.
  68  * 1994 Line-by-line regexp tags by Tom Tromey.
  69  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  70  * 2002 #line directives by Francesco Potortì.
  71  * Francesco Potortì maintained and improved it for many years
  72    starting in 1993.
  73  */
  74
  75 /*
  76  * If you want to add support for a new language, start by looking at the LUA
  77  * language, which is the simplest.  Alternatively, consider distributing etags
  78  * together with a configuration file containing regexp definitions for etags.
  79  */
  80
  81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
  82
  83 #ifdef DEBUG
  84 #  undef DEBUG
  85 #  define DEBUG true
  86 #else
  87 #  define DEBUG  false
  88 #  define NDEBUG                /* disable assert */
  89 #endif
  90
  91 #include <config.h>
  92
  93 #ifndef _GNU_SOURCE
  94 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  95 #endif
  96
  97 /* WIN32_NATIVE is for XEmacs.
  98    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
  99 #ifdef WIN32_NATIVE
 100 # undef MSDOS
 101 # undef  WINDOWSNT
 102 # define WINDOWSNT
 103 #endif /* WIN32_NATIVE */
 104
 105 #ifdef MSDOS
 106 # undef MSDOS
 107 # define MSDOS true
 108 # include <sys/param.h>
 109 #else
 110 # define MSDOS false
 111 #endif /* MSDOS */
 112
 113 #ifdef WINDOWSNT
 114 # include <direct.h>
 115 # define MAXPATHLEN _MAX_PATH
 116 # undef HAVE_NTGUI
 117 # undef  DOS_NT
 118 # define DOS_NT
 119 # define O_CLOEXEC O_NOINHERIT
 120 #endif /* WINDOWSNT */
 121
 122 #include <limits.h>
 123 #include <unistd.h>
 124 #include <stdarg.h>
 125 #include <stdlib.h>
 126 #include <string.h>
 127 #include <sysstdio.h>
 128 #include <errno.h>
 129 #include <fcntl.h>
 130 #include <binary-io.h>
 131 #include <c-ctype.h>
 132 #include <c-strcase.h>
 133
 134 #include <assert.h>
 135 #ifdef NDEBUG
 136 # undef  assert                 /* some systems have a buggy assert.h */
 137 # define assert(x) ((void) 0)
 138 #endif
 139
 140 #include <getopt.h>
 141 #include <regex.h>
 142
 143 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 144  Leave it undefined to make the program "etags", which makes emacs-style
 145  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 146 #ifdef CTAGS
 147 # undef  CTAGS
 148 # define CTAGS true
 149 #else
 150 # define CTAGS false
 151 #endif
 152
 153 #define streq(s,t)      (assert ((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 154 #define strcaseeq(s,t)  (assert ((s)!=NULL && (t)!=NULL), !c_strcasecmp (s, t))
 155 #define strneq(s,t,n)   (assert ((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 156 #define strncaseeq(s,t,n) (assert ((s)!=NULL && (t)!=NULL), !c_strncasecmp (s, t, n))
 157
 158 /* C is not in a name.  */
 159 static bool
 160 notinname (unsigned char c)
 161 {
 162   /* Look at make_tag before modifying!  */
 163   static bool const table[UCHAR_MAX + 1] = {
 164     ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
 165     ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
 166   };
 167   return table[c];
 168 }
 169
 170 /* C can start a token.  */
 171 static bool
 172 begtoken (unsigned char c)
 173 {
 174   static bool const table[UCHAR_MAX + 1] = {
 175     ['$']=1, ['@']=1,
 176     ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
 177     ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
 178     ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
 179     ['Y']=1, ['Z']=1,
 180     ['_']=1,
 181     ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
 182     ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
 183     ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
 184     ['y']=1, ['z']=1,
 185     ['~']=1
 186   };
 187   return table[c];
 188 }
 189
 190 /* C can be in the middle of a token.  */
 191 static bool
 192 intoken (unsigned char c)
 193 {
 194   static bool const table[UCHAR_MAX + 1] = {
 195     ['$']=1,
 196     ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
 197     ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
 198     ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
 199     ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
 200     ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
 201     ['Y']=1, ['Z']=1,
 202     ['_']=1,
 203     ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
 204     ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
 205     ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
 206     ['y']=1, ['z']=1
 207   };
 208   return table[c];
 209 }
 210
 211 /* C can end a token.  */
 212 static bool
 213 endtoken (unsigned char c)
 214 {
 215   static bool const table[UCHAR_MAX + 1] = {
 216     ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
 217     ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
 218     ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
 219     ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
 220     ['{']=1, ['|']=1, ['}']=1, ['~']=1
 221   };
 222   return table[c];
 223 }
 224
 225 /*
 226  *      xnew, xrnew -- allocate, reallocate storage
 227  *
 228  * SYNOPSIS:    Type *xnew (int n, Type);
 229  *              void xrnew (OldPointer, int n, Type);
 230  */
 231 #define xnew(n, Type)      ((Type *) xmalloc ((n) * sizeof (Type)))
 232 #define xrnew(op, n, Type) ((op) = (Type *) xrealloc (op, (n) * sizeof (Type)))
 233
 234 typedef void Lang_function (FILE *);
 235
 236 typedef struct
 237 {
 238   const char *suffix;           /* file name suffix for this compressor */
 239   const char *command;          /* takes one arg and decompresses to stdout */
 240 } compressor;
 241
 242 typedef struct
 243 {
 244   const char *name;             /* language name */
 245   const char *help;             /* detailed help for the language */
 246   Lang_function *function;      /* parse function */
 247   const char **suffixes;        /* name suffixes of this language's files */
 248   const char **filenames;       /* names of this language's files */
 249   const char **interpreters;    /* interpreters for this language */
 250   bool metasource;              /* source used to generate other sources */
 251 } language;
 252
 253 typedef struct fdesc
 254 {
 255   struct fdesc *next;           /* for the linked list */
 256   char *infname;                /* uncompressed input file name */
 257   char *infabsname;             /* absolute uncompressed input file name */
 258   char *infabsdir;              /* absolute dir of input file */
 259   char *taggedfname;            /* file name to write in tagfile */
 260   language *lang;               /* language of file */
 261   char *prop;                   /* file properties to write in tagfile */
 262   bool usecharno;               /* etags tags shall contain char number */
 263   bool written;                 /* entry written in the tags file */
 264 } fdesc;
 265
 266 typedef struct node_st
 267 {                               /* sorting structure */
 268   struct node_st *left, *right; /* left and right sons */
 269   fdesc *fdp;                   /* description of file to whom tag belongs */
 270   char *name;                   /* tag name */
 271   char *regex;                  /* search regexp */
 272   bool valid;                   /* write this tag on the tag file */
 273   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 274   bool been_warned;             /* warning already given for duplicated tag */
 275   int lno;                      /* line number tag is on */
 276   long cno;                     /* character number line starts on */
 277 } node;
 278
 279 /*
 280  * A `linebuffer' is a structure which holds a line of text.
 281  * `readline_internal' reads a line from a stream into a linebuffer
 282  * and works regardless of the length of the line.
 283  * SIZE is the size of BUFFER, LEN is the length of the string in
 284  * BUFFER after readline reads it.
 285  */
 286 typedef struct
 287 {
 288   long size;
 289   int len;
 290   char *buffer;
 291 } linebuffer;
 292
 293 /* Used to support mixing of --lang and file names. */
 294 typedef struct
 295 {
 296   enum {
 297     at_language,                /* a language specification */
 298     at_regexp,                  /* a regular expression */
 299     at_filename,                /* a file name */
 300     at_stdin,                   /* read from stdin here */
 301     at_end                      /* stop parsing the list */
 302   } arg_type;                   /* argument type */
 303   language *lang;               /* language associated with the argument */
 304   char *what;                   /* the argument itself */
 305 } argument;
 306
 307 /* Structure defining a regular expression. */
 308 typedef struct regexp
 309 {
 310   struct regexp *p_next;        /* pointer to next in list */
 311   language *lang;               /* if set, use only for this language */
 312   char *pattern;                /* the regexp pattern */
 313   char *name;                   /* tag name */
 314   struct re_pattern_buffer *pat; /* the compiled pattern */
 315   struct re_registers regs;     /* re registers */
 316   bool error_signaled;          /* already signaled for this regexp */
 317   bool force_explicit_name;     /* do not allow implicit tag name */
 318   bool ignore_case;             /* ignore case when matching */
 319   bool multi_line;              /* do a multi-line match on the whole file */
 320 } regexp;
 321
 322
 323 /* Many compilers barf on this:
 324         Lang_function Ada_funcs;
 325    so let's write it this way */
 326 static void Ada_funcs (FILE *);
 327 static void Asm_labels (FILE *);
 328 static void C_entries (int c_ext, FILE *);
 329 static void default_C_entries (FILE *);
 330 static void plain_C_entries (FILE *);
 331 static void Cjava_entries (FILE *);
 332 static void Cobol_paragraphs (FILE *);
 333 static void Cplusplus_entries (FILE *);
 334 static void Cstar_entries (FILE *);
 335 static void Erlang_functions (FILE *);
 336 static void Forth_words (FILE *);
 337 static void Fortran_functions (FILE *);
 338 static void HTML_labels (FILE *);
 339 static void Lisp_functions (FILE *);
 340 static void Lua_functions (FILE *);
 341 static void Makefile_targets (FILE *);
 342 static void Pascal_functions (FILE *);
 343 static void Perl_functions (FILE *);
 344 static void PHP_functions (FILE *);
 345 static void PS_functions (FILE *);
 346 static void Prolog_functions (FILE *);
 347 static void Python_functions (FILE *);
 348 static void Scheme_functions (FILE *);
 349 static void TeX_commands (FILE *);
 350 static void Texinfo_nodes (FILE *);
 351 static void Yacc_entries (FILE *);
 352 static void just_read_file (FILE *);
 353
 354 static language *get_language_from_langname (const char *);
 355 static void readline (linebuffer *, FILE *);
 356 static long readline_internal (linebuffer *, FILE *, char const *);
 357 static bool nocase_tail (const char *);
 358 static void get_tag (char *, char **);
 359
 360 static void analyze_regex (char *);
 361 static void free_regexps (void);
 362 static void regex_tag_multiline (void);
 363 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
 364 static _Noreturn void suggest_asking_for_help (void);
 365 _Noreturn void fatal (const char *, const char *);
 366 static _Noreturn void pfatal (const char *);
 367 static void add_node (node *, node **);
 368
 369 static void process_file_name (char *, language *);
 370 static void process_file (FILE *, char *, language *);
 371 static void find_entries (FILE *);
 372 static void free_tree (node *);
 373 static void free_fdesc (fdesc *);
 374 static void pfnote (char *, bool, char *, int, int, long);
 375 static void invalidate_nodes (fdesc *, node **);
 376 static void put_entries (node *);
 377
 378 static char *concat (const char *, const char *, const char *);
 379 static char *skip_spaces (char *);
 380 static char *skip_non_spaces (char *);
 381 static char *skip_name (char *);
 382 static char *savenstr (const char *, int);
 383 static char *savestr (const char *);
 384 static char *etags_getcwd (void);
 385 static char *relative_filename (char *, char *);
 386 static char *absolute_filename (char *, char *);
 387 static char *absolute_dirname (char *, char *);
 388 static bool filename_is_absolute (char *f);
 389 static void canonicalize_filename (char *);
 390 static char *etags_mktmp (void);
 391 static void linebuffer_init (linebuffer *);
 392 static void linebuffer_setlen (linebuffer *, int);
 393 static void *xmalloc (size_t);
 394 static void *xrealloc (void *, size_t);
 395
 396 \f
 397 static char searchar = '/';     /* use /.../ searches */
 398
 399 static char *tagfile;           /* output file */
 400 static char *progname;          /* name this program was invoked with */
 401 static char *cwd;               /* current working directory */
 402 static char *tagfiledir;        /* directory of tagfile */
 403 static FILE *tagf;              /* ioptr for tags file */
 404 static ptrdiff_t whatlen_max;   /* maximum length of any 'what' member */
 405
 406 static fdesc *fdhead;           /* head of file description list */
 407 static fdesc *curfdp;           /* current file description */
 408 static char *infilename;        /* current input file name */
 409 static int lineno;              /* line number of current line */
 410 static long charno;             /* current character number */
 411 static long linecharno;         /* charno of start of current line */
 412 static char *dbp;               /* pointer to start of current tag */
 413
 414 static const int invalidcharno = -1;
 415
 416 static node *nodehead;          /* the head of the binary tree of tags */
 417 static node *last_node;         /* the last node created */
 418
 419 static linebuffer lb;           /* the current line */
 420 static linebuffer filebuf;      /* a buffer containing the whole file */
 421 static linebuffer token_name;   /* a buffer containing a tag name */
 422
 423 static bool append_to_tagfile;  /* -a: append to tags */
 424 /* The next five default to true in C and derived languages.  */
 425 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 426 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 427                                 /* 0 struct/enum/union decls, and C++ */
 428                                 /* member functions. */
 429 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 430                                 /* constants and variables. */
 431                                 /* -D: opposite of -d.  Default under ctags. */
 432 static int globals;             /* create tags for global variables */
 433 static int members;             /* create tags for C member variables */
 434 static int declarations;        /* --declarations: tag them and extern in C&Co*/
 435 static int no_line_directive;   /* ignore #line directives (undocumented) */
 436 static int no_duplicates;       /* no duplicate tags for ctags (undocumented) */
 437 static bool update;             /* -u: update tags */
 438 static bool vgrind_style;       /* -v: create vgrind style index output */
 439 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 440 static bool cxref_style;        /* -x: create cxref style output */
 441 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 442 static bool ignoreindent;       /* -I: ignore indentation in C */
 443 static int packages_only;       /* --packages-only: in Ada, only tag packages*/
 444 static int class_qualify;       /* -Q: produce class-qualified tags in C++/Java */
 445
 446 /* STDIN is defined in LynxOS system headers */
 447 #ifdef STDIN
 448 # undef STDIN
 449 #endif
 450
 451 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 452 static bool parsing_stdin;      /* --parse-stdin used */
 453
 454 static regexp *p_head;          /* list of all regexps */
 455 static bool need_filebuf;       /* some regexes are multi-line */
 456
 457 static struct option longopts[] =
 458 {
 459   { "append",             no_argument,       NULL,               'a'   },
 460   { "packages-only",      no_argument,       &packages_only,     1     },
 461   { "c++",                no_argument,       NULL,               'C'   },
 462   { "declarations",       no_argument,       &declarations,      1     },
 463   { "no-line-directive",  no_argument,       &no_line_directive, 1     },
 464   { "no-duplicates",      no_argument,       &no_duplicates,     1     },
 465   { "help",               no_argument,       NULL,               'h'   },
 466   { "help",               no_argument,       NULL,               'H'   },
 467   { "ignore-indentation", no_argument,       NULL,               'I'   },
 468   { "language",           required_argument, NULL,               'l'   },
 469   { "members",            no_argument,       &members,           1     },
 470   { "no-members",         no_argument,       &members,           0     },
 471   { "output",             required_argument, NULL,               'o'   },
 472   { "class-qualify",      no_argument,       &class_qualify,     'Q'   },
 473   { "regex",              required_argument, NULL,               'r'   },
 474   { "no-regex",           no_argument,       NULL,               'R'   },
 475   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 476   { "parse-stdin",        required_argument, NULL,               STDIN },
 477   { "version",            no_argument,       NULL,               'V'   },
 478
 479 #if CTAGS /* Ctags options */
 480   { "backward-search",    no_argument,       NULL,               'B'   },
 481   { "cxref",              no_argument,       NULL,               'x'   },
 482   { "defines",            no_argument,       NULL,               'd'   },
 483   { "globals",            no_argument,       &globals,           1     },
 484   { "typedefs",           no_argument,       NULL,               't'   },
 485   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 486   { "update",             no_argument,       NULL,               'u'   },
 487   { "vgrind",             no_argument,       NULL,               'v'   },
 488   { "no-warn",            no_argument,       NULL,               'w'   },
 489
 490 #else /* Etags options */
 491   { "no-defines",         no_argument,       NULL,               'D'   },
 492   { "no-globals",         no_argument,       &globals,           0     },
 493   { "include",            required_argument, NULL,               'i'   },
 494 #endif
 495   { NULL }
 496 };
 497
 498 static compressor compressors[] =
 499 {
 500   { "z", "gzip -d -c"},
 501   { "Z", "gzip -d -c"},
 502   { "gz", "gzip -d -c"},
 503   { "GZ", "gzip -d -c"},
 504   { "bz2", "bzip2 -d -c" },
 505   { "xz", "xz -d -c" },
 506   { NULL }
 507 };
 508
 509 /*
 510  * Language stuff.
 511  */
 512
 513 /* Ada code */
 514 static const char *Ada_suffixes [] =
 515   { "ads", "adb", "ada", NULL };
 516 static const char Ada_help [] =
 517 "In Ada code, functions, procedures, packages, tasks and types are\n\
 518 tags.  Use the `--packages-only' option to create tags for\n\
 519 packages only.\n\
 520 Ada tag names have suffixes indicating the type of entity:\n\
 521         Entity type:    Qualifier:\n\
 522         ------------    ----------\n\
 523         function        /f\n\
 524         procedure       /p\n\
 525         package spec    /s\n\
 526         package body    /b\n\
 527         type            /t\n\
 528         task            /k\n\
 529 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 530 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 531 will just search for any tag `bidule'.";
 532
 533 /* Assembly code */
 534 static const char *Asm_suffixes [] =
 535   { "a",        /* Unix assembler */
 536     "asm", /* Microcontroller assembly */
 537     "def", /* BSO/Tasking definition includes  */
 538     "inc", /* Microcontroller include files */
 539     "ins", /* Microcontroller include files */
 540     "s", "sa", /* Unix assembler */
 541     "S",   /* cpp-processed Unix assembler */
 542     "src", /* BSO/Tasking C compiler output */
 543     NULL
 544   };
 545 static const char Asm_help [] =
 546 "In assembler code, labels appearing at the beginning of a line,\n\
 547 followed by a colon, are tags.";
 548
 549
 550 /* Note that .c and .h can be considered C++, if the --c++ flag was
 551    given, or if the `class' or `template' keywords are met inside the file.
 552    That is why default_C_entries is called for these. */
 553 static const char *default_C_suffixes [] =
 554   { "c", "h", NULL };
 555 #if CTAGS                               /* C help for Ctags */
 556 static const char default_C_help [] =
 557 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 558 Use -T to tag definitions of `struct', `union' and `enum'.\n\
 559 Use -d to tag `#define' macro definitions and `enum' constants.\n\
 560 Use --globals to tag global variables.\n\
 561 You can tag function declarations and external variables by\n\
 562 using `--declarations', and struct members by using `--members'.";
 563 #else                                   /* C help for Etags */
 564 static const char default_C_help [] =
 565 "In C code, any C function or typedef is a tag, and so are\n\
 566 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 567 definitions and `enum' constants are tags unless you specify\n\
 568 `--no-defines'.  Global variables are tags unless you specify\n\
 569 `--no-globals' and so are struct members unless you specify\n\
 570 `--no-members'.  Use of `--no-globals', `--no-defines' and\n\
 571 `--no-members' can make the tags table file much smaller.\n\
 572 You can tag function declarations and external variables by\n\
 573 using `--declarations'.";
 574 #endif  /* C help for Ctags and Etags */
 575
 576 static const char *Cplusplus_suffixes [] =
 577   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 578     "M",                        /* Objective C++ */
 579     "pdb",                      /* PostScript with C syntax */
 580     NULL };
 581 static const char Cplusplus_help [] =
 582 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 583 --help --lang=c --lang=c++ for full help.)\n\
 584 In addition to C tags, member functions are also recognized.  Member\n\
 585 variables are recognized unless you use the `--no-members' option.\n\
 586 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 587 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 588 `operator+'.";
 589
 590 static const char *Cjava_suffixes [] =
 591   { "java", NULL };
 592 static char Cjava_help [] =
 593 "In Java code, all the tags constructs of C and C++ code are\n\
 594 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 595
 596
 597 static const char *Cobol_suffixes [] =
 598   { "COB", "cob", NULL };
 599 static char Cobol_help [] =
 600 "In Cobol code, tags are paragraph names; that is, any word\n\
 601 starting in column 8 and followed by a period.";
 602
 603 static const char *Cstar_suffixes [] =
 604   { "cs", "hs", NULL };
 605
 606 static const char *Erlang_suffixes [] =
 607   { "erl", "hrl", NULL };
 608 static const char Erlang_help [] =
 609 "In Erlang code, the tags are the functions, records and macros\n\
 610 defined in the file.";
 611
 612 const char *Forth_suffixes [] =
 613   { "fth", "tok", NULL };
 614 static const char Forth_help [] =
 615 "In Forth code, tags are words defined by `:',\n\
 616 constant, code, create, defer, value, variable, buffer:, field.";
 617
 618 static const char *Fortran_suffixes [] =
 619   { "F", "f", "f90", "for", NULL };
 620 static const char Fortran_help [] =
 621 "In Fortran code, functions, subroutines and block data are tags.";
 622
 623 static const char *HTML_suffixes [] =
 624   { "htm", "html", "shtml", NULL };
 625 static const char HTML_help [] =
 626 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 627 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 628 occurrences of `id='.";
 629
 630 static const char *Lisp_suffixes [] =
 631   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 632 static const char Lisp_help [] =
 633 "In Lisp code, any function defined with `defun', any variable\n\
 634 defined with `defvar' or `defconst', and in general the first\n\
 635 argument of any expression that starts with `(def' in column zero\n\
 636 is a tag.\n\
 637 The `--declarations' option tags \"(defvar foo)\" constructs too.";
 638
 639 static const char *Lua_suffixes [] =
 640   { "lua", "LUA", NULL };
 641 static const char Lua_help [] =
 642 "In Lua scripts, all functions are tags.";
 643
 644 static const char *Makefile_filenames [] =
 645   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 646 static const char Makefile_help [] =
 647 "In makefiles, targets are tags; additionally, variables are tags\n\
 648 unless you specify `--no-globals'.";
 649
 650 static const char *Objc_suffixes [] =
 651   { "lm",                       /* Objective lex file */
 652     "m",                        /* Objective C file */
 653      NULL };
 654 static const char Objc_help [] =
 655 "In Objective C code, tags include Objective C definitions for classes,\n\
 656 class categories, methods and protocols.  Tags for variables and\n\
 657 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 658 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 659
 660 static const char *Pascal_suffixes [] =
 661   { "p", "pas", NULL };
 662 static const char Pascal_help [] =
 663 "In Pascal code, the tags are the functions and procedures defined\n\
 664 in the file.";
 665 /* " // this is for working around an Emacs highlighting bug... */
 666
 667 static const char *Perl_suffixes [] =
 668   { "pl", "pm", NULL };
 669 static const char *Perl_interpreters [] =
 670   { "perl", "@PERL@", NULL };
 671 static const char Perl_help [] =
 672 "In Perl code, the tags are the packages, subroutines and variables\n\
 673 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 674 `--globals' if you want to tag global variables.  Tags for\n\
 675 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 676 defined in the default package is `main::SUB'.";
 677
 678 static const char *PHP_suffixes [] =
 679   { "php", "php3", "php4", NULL };
 680 static const char PHP_help [] =
 681 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 682 the `--no-members' option, vars are tags too.";
 683
 684 static const char *plain_C_suffixes [] =
 685   { "pc",                       /* Pro*C file */
 686      NULL };
 687
 688 static const char *PS_suffixes [] =
 689   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 690 static const char PS_help [] =
 691 "In PostScript code, the tags are the functions.";
 692
 693 static const char *Prolog_suffixes [] =
 694   { "prolog", NULL };
 695 static const char Prolog_help [] =
 696 "In Prolog code, tags are predicates and rules at the beginning of\n\
 697 line.";
 698
 699 static const char *Python_suffixes [] =
 700   { "py", NULL };
 701 static const char Python_help [] =
 702 "In Python code, `def' or `class' at the beginning of a line\n\
 703 generate a tag.";
 704
 705 /* Can't do the `SCM' or `scm' prefix with a version number. */
 706 static const char *Scheme_suffixes [] =
 707   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 708 static const char Scheme_help [] =
 709 "In Scheme code, tags include anything defined with `def' or with a\n\
 710 construct whose name starts with `def'.  They also include\n\
 711 variables set with `set!' at top level in the file.";
 712
 713 static const char *TeX_suffixes [] =
 714   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 715 static const char TeX_help [] =
 716 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 717 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 718 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 719 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 720 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 721 \n\
 722 Other commands can be specified by setting the environment variable\n\
 723 `TEXTAGS' to a colon-separated list like, for example,\n\
 724      TEXTAGS=\"mycommand:myothercommand\".";
 725
 726
 727 static const char *Texinfo_suffixes [] =
 728   { "texi", "texinfo", "txi", NULL };
 729 static const char Texinfo_help [] =
 730 "for texinfo files, lines starting with @node are tagged.";
 731
 732 static const char *Yacc_suffixes [] =
 733   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 734 static const char Yacc_help [] =
 735 "In Bison or Yacc input files, each rule defines as a tag the\n\
 736 nonterminal it constructs.  The portions of the file that contain\n\
 737 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 738 for full help).";
 739
 740 static const char auto_help [] =
 741 "`auto' is not a real language, it indicates to use\n\
 742 a default language for files base on file name suffix and file contents.";
 743
 744 static const char none_help [] =
 745 "`none' is not a real language, it indicates to only do\n\
 746 regexp processing on files.";
 747
 748 static const char no_lang_help [] =
 749 "No detailed help available for this language.";
 750
 751
 752 /*
 753  * Table of languages.
 754  *
 755  * It is ok for a given function to be listed under more than one
 756  * name.  I just didn't.
 757  */
 758
 759 static language lang_names [] =
 760 {
 761   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 762   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 763   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 764   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 765   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 766   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 767   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 768   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 769   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 770   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 771   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 772   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 773   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 774   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 775   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 776   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 777   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 778   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 779   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 780   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 781   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 782   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 783   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 784   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 785   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 786   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
 787   { "auto",      auto_help },                      /* default guessing scheme */
 788   { "none",      none_help,      just_read_file }, /* regexp matching only */
 789   { NULL }                /* end of list */
 790 };
 791
 792 \f
 793 static void
 794 print_language_names (void)
 795 {
 796   language *lang;
 797   const char **name, **ext;
 798
 799   puts ("\nThese are the currently supported languages, along with the\n\
 800 default file names and dot suffixes:");
 801   for (lang = lang_names; lang->name != NULL; lang++)
 802     {
 803       printf ("  %-*s", 10, lang->name);
 804       if (lang->filenames != NULL)
 805         for (name = lang->filenames; *name != NULL; name++)
 806           printf (" %s", *name);
 807       if (lang->suffixes != NULL)
 808         for (ext = lang->suffixes; *ext != NULL; ext++)
 809           printf (" .%s", *ext);
 810       puts ("");
 811     }
 812   puts ("where `auto' means use default language for files based on file\n\
 813 name suffix, and `none' means only do regexp processing on files.\n\
 814 If no language is specified and no matching suffix is found,\n\
 815 the first line of the file is read for a sharp-bang (#!) sequence\n\
 816 followed by the name of an interpreter.  If no such sequence is found,\n\
 817 Fortran is tried first; if no tags are found, C is tried next.\n\
 818 When parsing any C file, a \"class\" or \"template\" keyword\n\
 819 switches to C++.");
 820   puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
 821 \n\
 822 For detailed help on a given language use, for example,\n\
 823 etags --help --lang=ada.");
 824 }
 825
 826 #ifndef EMACS_NAME
 827 # define EMACS_NAME "standalone"
 828 #endif
 829 #ifndef VERSION
 830 # define VERSION "17.38.1.4"
 831 #endif
 832 static _Noreturn void
 833 print_version (void)
 834 {
 835   char emacs_copyright[] = COPYRIGHT;
 836
 837   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 838   puts (emacs_copyright);
 839   puts ("This program is distributed under the terms in ETAGS.README");
 840
 841   exit (EXIT_SUCCESS);
 842 }
 843
 844 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 845 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
 846 #endif
 847
 848 static _Noreturn void
 849 print_help (argument *argbuffer)
 850 {
 851   bool help_for_lang = false;
 852
 853   for (; argbuffer->arg_type != at_end; argbuffer++)
 854     if (argbuffer->arg_type == at_language)
 855       {
 856         if (help_for_lang)
 857           puts ("");
 858         puts (argbuffer->lang->help);
 859         help_for_lang = true;
 860       }
 861
 862   if (help_for_lang)
 863     exit (EXIT_SUCCESS);
 864
 865   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 866 \n\
 867 These are the options accepted by %s.\n", progname, progname);
 868   puts ("You may use unambiguous abbreviations for the long option names.");
 869   puts ("  A - as file name means read names from stdin (one per line).\n\
 870 Absolute names are stored in the output file as they are.\n\
 871 Relative ones are stored relative to the output file's directory.\n");
 872
 873   puts ("-a, --append\n\
 874         Append tag entries to existing tags file.");
 875
 876   puts ("--packages-only\n\
 877         For Ada files, only generate tags for packages.");
 878
 879   if (CTAGS)
 880     puts ("-B, --backward-search\n\
 881         Write the search commands for the tag entries using '?', the\n\
 882         backward-search command instead of '/', the forward-search command.");
 883
 884   /* This option is mostly obsolete, because etags can now automatically
 885      detect C++.  Retained for backward compatibility and for debugging and
 886      experimentation.  In principle, we could want to tag as C++ even
 887      before any "class" or "template" keyword.
 888   puts ("-C, --c++\n\
 889         Treat files whose name suffix defaults to C language as C++ files.");
 890   */
 891
 892   puts ("--declarations\n\
 893         In C and derived languages, create tags for function declarations,");
 894   if (CTAGS)
 895     puts ("\tand create tags for extern variables if --globals is used.");
 896   else
 897     puts
 898       ("\tand create tags for extern variables unless --no-globals is used.");
 899
 900   if (CTAGS)
 901     puts ("-d, --defines\n\
 902         Create tag entries for C #define constants and enum constants, too.");
 903   else
 904     puts ("-D, --no-defines\n\
 905         Don't create tag entries for C #define constants and enum constants.\n\
 906         This makes the tags file smaller.");
 907
 908   if (!CTAGS)
 909     puts ("-i FILE, --include=FILE\n\
 910         Include a note in tag file indicating that, when searching for\n\
 911         a tag, one should also consult the tags file FILE after\n\
 912         checking the current file.");
 913
 914   puts ("-l LANG, --language=LANG\n\
 915         Force the following files to be considered as written in the\n\
 916         named language up to the next --language=LANG option.");
 917
 918   if (CTAGS)
 919     puts ("--globals\n\
 920         Create tag entries for global variables in some languages.");
 921   else
 922     puts ("--no-globals\n\
 923         Do not create tag entries for global variables in some\n\
 924         languages.  This makes the tags file smaller.");
 925
 926   if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 927     puts ("--no-line-directive\n\
 928         Ignore #line preprocessor directives in C and derived languages.");
 929
 930   if (CTAGS)
 931     puts ("--members\n\
 932         Create tag entries for members of structures in some languages.");
 933   else
 934     puts ("--no-members\n\
 935         Do not create tag entries for members of structures\n\
 936         in some languages.");
 937
 938   puts ("-Q, --class-qualify\n\
 939         Qualify tag names with their class name in C++, ObjC, and Java.\n\
 940         This produces tag names of the form \"class::member\" for C++,\n\
 941         \"class(category)\" for Objective C, and \"class.member\" for Java.\n\
 942         For Objective C, this also produces class methods qualified with\n\
 943         their arguments, as in \"foo:bar:baz:more\".");
 944   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 945         Make a tag for each line matching a regular expression pattern\n\
 946         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 947         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 948         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 949         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 950   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 951         For example Tcl named tags can be created with:\n\
 952           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 953         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 954         `m' means to allow multi-line matches, `s' implies `m' and\n\
 955         causes dot to match any character, including newline.");
 956
 957   puts ("-R, --no-regex\n\
 958         Don't create tags from regexps for the following files.");
 959
 960   puts ("-I, --ignore-indentation\n\
 961         In C and C++ do not assume that a closing brace in the first\n\
 962         column is the final brace of a function or structure definition.");
 963
 964   puts ("-o FILE, --output=FILE\n\
 965         Write the tags to FILE.");
 966
 967   puts ("--parse-stdin=NAME\n\
 968         Read from standard input and record tags as belonging to file NAME.");
 969
 970   if (CTAGS)
 971     {
 972       puts ("-t, --typedefs\n\
 973         Generate tag entries for C and Ada typedefs.");
 974       puts ("-T, --typedefs-and-c++\n\
 975         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 976         and C++ member functions.");
 977     }
 978
 979   if (CTAGS)
 980     puts ("-u, --update\n\
 981         Update the tag entries for the given files, leaving tag\n\
 982         entries for other files in place.  Currently, this is\n\
 983         implemented by deleting the existing entries for the given\n\
 984         files and then rewriting the new entries at the end of the\n\
 985         tags file.  It is often faster to simply rebuild the entire\n\
 986         tag file than to use this.");
 987
 988   if (CTAGS)
 989     {
 990       puts ("-v, --vgrind\n\
 991         Print on the standard output an index of items intended for\n\
 992         human consumption, similar to the output of vgrind.  The index\n\
 993         is sorted, and gives the page number of each item.");
 994
 995       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 996         puts ("-w, --no-duplicates\n\
 997         Do not create duplicate tag entries, for compatibility with\n\
 998         traditional ctags.");
 999
1000       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1001         puts ("-w, --no-warn\n\
1002         Suppress warning messages about duplicate tag entries.");
1003
1004       puts ("-x, --cxref\n\
1005         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1006         The output uses line numbers instead of page numbers, but\n\
1007         beyond that the differences are cosmetic; try both to see\n\
1008         which you like.");
1009     }
1010
1011   puts ("-V, --version\n\
1012         Print the version of the program.\n\
1013 -h, --help\n\
1014         Print this help message.\n\
1015         Followed by one or more `--language' options prints detailed\n\
1016         help about tag generation for the specified languages.");
1017
1018   print_language_names ();
1019
1020   puts ("");
1021   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1022
1023   exit (EXIT_SUCCESS);
1024 }
1025
1026 \f
1027 int
1028 main (int argc, char **argv)
1029 {
1030   int i;
1031   unsigned int nincluded_files;
1032   char **included_files;
1033   argument *argbuffer;
1034   int current_arg, file_count;
1035   linebuffer filename_lb;
1036   bool help_asked = false;
1037   ptrdiff_t len;
1038   char *optstring;
1039   int opt;
1040
1041   progname = argv[0];
1042   nincluded_files = 0;
1043   included_files = xnew (argc, char *);
1044   current_arg = 0;
1045   file_count = 0;
1046
1047   /* Allocate enough no matter what happens.  Overkill, but each one
1048      is small. */
1049   argbuffer = xnew (argc, argument);
1050
1051   /*
1052    * Always find typedefs and structure tags.
1053    * Also default to find macro constants, enum constants, struct
1054    * members and global variables.  Do it for both etags and ctags.
1055    */
1056   typedefs = typedefs_or_cplusplus = constantypedefs = true;
1057   globals = members = true;
1058
1059   /* When the optstring begins with a '-' getopt_long does not rearrange the
1060      non-options arguments to be at the end, but leaves them alone. */
1061   optstring = concat ("-ac:Cf:Il:o:Qr:RSVhH",
1062                       (CTAGS) ? "BxdtTuvw" : "Di:",
1063                       "");
1064
1065   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1066     switch (opt)
1067       {
1068       case 0:
1069         /* If getopt returns 0, then it has already processed a
1070            long-named option.  We should do nothing.  */
1071         break;
1072
1073       case 1:
1074         /* This means that a file name has been seen.  Record it. */
1075         argbuffer[current_arg].arg_type = at_filename;
1076         argbuffer[current_arg].what     = optarg;
1077         len = strlen (optarg);
1078         if (whatlen_max < len)
1079           whatlen_max = len;
1080         ++current_arg;
1081         ++file_count;
1082         break;
1083
1084       case STDIN:
1085         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1086         argbuffer[current_arg].arg_type = at_stdin;
1087         argbuffer[current_arg].what     = optarg;
1088         len = strlen (optarg);
1089         if (whatlen_max < len)
1090           whatlen_max = len;
1091         ++current_arg;
1092         ++file_count;
1093         if (parsing_stdin)
1094           fatal ("cannot parse standard input more than once", (char *)NULL);
1095         parsing_stdin = true;
1096         break;
1097
1098         /* Common options. */
1099       case 'a': append_to_tagfile = true;       break;
1100       case 'C': cplusplus = true;               break;
1101       case 'f':         /* for compatibility with old makefiles */
1102       case 'o':
1103         if (tagfile)
1104           {
1105             error ("-o option may only be given once.");
1106             suggest_asking_for_help ();
1107             /* NOTREACHED */
1108           }
1109         tagfile = optarg;
1110         break;
1111       case 'I':
1112       case 'S':         /* for backward compatibility */
1113         ignoreindent = true;
1114         break;
1115       case 'l':
1116         {
1117           language *lang = get_language_from_langname (optarg);
1118           if (lang != NULL)
1119             {
1120               argbuffer[current_arg].lang = lang;
1121               argbuffer[current_arg].arg_type = at_language;
1122               ++current_arg;
1123             }
1124         }
1125         break;
1126       case 'c':
1127         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1128         optarg = concat (optarg, "i", ""); /* memory leak here */
1129         /* FALLTHRU */
1130       case 'r':
1131         argbuffer[current_arg].arg_type = at_regexp;
1132         argbuffer[current_arg].what = optarg;
1133         len = strlen (optarg);
1134         if (whatlen_max < len)
1135           whatlen_max = len;
1136         ++current_arg;
1137         break;
1138       case 'R':
1139         argbuffer[current_arg].arg_type = at_regexp;
1140         argbuffer[current_arg].what = NULL;
1141         ++current_arg;
1142         break;
1143       case 'V':
1144         print_version ();
1145         break;
1146       case 'h':
1147       case 'H':
1148         help_asked = true;
1149         break;
1150       case 'Q':
1151         class_qualify = 1;
1152         break;
1153
1154         /* Etags options */
1155       case 'D': constantypedefs = false;                        break;
1156       case 'i': included_files[nincluded_files++] = optarg;     break;
1157
1158         /* Ctags options. */
1159       case 'B': searchar = '?';                                 break;
1160       case 'd': constantypedefs = true;                         break;
1161       case 't': typedefs = true;                                break;
1162       case 'T': typedefs = typedefs_or_cplusplus = true;        break;
1163       case 'u': update = true;                                  break;
1164       case 'v': vgrind_style = true;                      /*FALLTHRU*/
1165       case 'x': cxref_style = true;                             break;
1166       case 'w': no_warnings = true;                             break;
1167       default:
1168         suggest_asking_for_help ();
1169         /* NOTREACHED */
1170       }
1171
1172   /* No more options.  Store the rest of arguments. */
1173   for (; optind < argc; optind++)
1174     {
1175       argbuffer[current_arg].arg_type = at_filename;
1176       argbuffer[current_arg].what = argv[optind];
1177       len = strlen (argv[optind]);
1178       if (whatlen_max < len)
1179         whatlen_max = len;
1180       ++current_arg;
1181       ++file_count;
1182     }
1183
1184   argbuffer[current_arg].arg_type = at_end;
1185
1186   if (help_asked)
1187     print_help (argbuffer);
1188     /* NOTREACHED */
1189
1190   if (nincluded_files == 0 && file_count == 0)
1191     {
1192       error ("no input files specified.");
1193       suggest_asking_for_help ();
1194       /* NOTREACHED */
1195     }
1196
1197   if (tagfile == NULL)
1198     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1199   cwd = etags_getcwd ();        /* the current working directory */
1200   if (cwd[strlen (cwd) - 1] != '/')
1201     {
1202       char *oldcwd = cwd;
1203       cwd = concat (oldcwd, "/", "");
1204       free (oldcwd);
1205     }
1206
1207   /* Compute base directory for relative file names. */
1208   if (streq (tagfile, "-")
1209       || strneq (tagfile, "/dev/", 5))
1210     tagfiledir = cwd;            /* relative file names are relative to cwd */
1211   else
1212     {
1213       canonicalize_filename (tagfile);
1214       tagfiledir = absolute_dirname (tagfile, cwd);
1215     }
1216
1217   linebuffer_init (&lb);
1218   linebuffer_init (&filename_lb);
1219   linebuffer_init (&filebuf);
1220   linebuffer_init (&token_name);
1221
1222   if (!CTAGS)
1223     {
1224       if (streq (tagfile, "-"))
1225         {
1226           tagf = stdout;
1227           SET_BINARY (fileno (stdout));
1228         }
1229       else
1230         tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1231       if (tagf == NULL)
1232         pfatal (tagfile);
1233     }
1234
1235   /*
1236    * Loop through files finding functions.
1237    */
1238   for (i = 0; i < current_arg; i++)
1239     {
1240       static language *lang;    /* non-NULL if language is forced */
1241       char *this_file;
1242
1243       switch (argbuffer[i].arg_type)
1244         {
1245         case at_language:
1246           lang = argbuffer[i].lang;
1247           break;
1248         case at_regexp:
1249           analyze_regex (argbuffer[i].what);
1250           break;
1251         case at_filename:
1252               this_file = argbuffer[i].what;
1253               /* Input file named "-" means read file names from stdin
1254                  (one per line) and use them. */
1255               if (streq (this_file, "-"))
1256                 {
1257                   if (parsing_stdin)
1258                     fatal ("cannot parse standard input AND read file names from it",
1259                            (char *)NULL);
1260                   while (readline_internal (&filename_lb, stdin, "-") > 0)
1261                     process_file_name (filename_lb.buffer, lang);
1262                 }
1263               else
1264                 process_file_name (this_file, lang);
1265           break;
1266         case at_stdin:
1267           this_file = argbuffer[i].what;
1268           process_file (stdin, this_file, lang);
1269           break;
1270         }
1271     }
1272
1273   free_regexps ();
1274   free (lb.buffer);
1275   free (filebuf.buffer);
1276   free (token_name.buffer);
1277
1278   if (!CTAGS || cxref_style)
1279     {
1280       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1281       put_entries (nodehead);
1282       free_tree (nodehead);
1283       nodehead = NULL;
1284       if (!CTAGS)
1285         {
1286           fdesc *fdp;
1287
1288           /* Output file entries that have no tags. */
1289           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1290             if (!fdp->written)
1291               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1292
1293           while (nincluded_files-- > 0)
1294             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1295
1296           if (fclose (tagf) == EOF)
1297             pfatal (tagfile);
1298         }
1299
1300       exit (EXIT_SUCCESS);
1301     }
1302
1303   /* From here on, we are in (CTAGS && !cxref_style) */
1304   if (update)
1305     {
1306       char *cmd =
1307         xmalloc (strlen (tagfile) + whatlen_max +
1308                  sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1309       for (i = 0; i < current_arg; ++i)
1310         {
1311           switch (argbuffer[i].arg_type)
1312             {
1313             case at_filename:
1314             case at_stdin:
1315               break;
1316             default:
1317               continue;         /* the for loop */
1318             }
1319           char *z = stpcpy (cmd, "mv ");
1320           z = stpcpy (z, tagfile);
1321           z = stpcpy (z, " OTAGS;fgrep -v '\t");
1322           z = stpcpy (z, argbuffer[i].what);
1323           z = stpcpy (z, "\t' OTAGS >");
1324           z = stpcpy (z, tagfile);
1325           strcpy (z, ";rm OTAGS");
1326           if (system (cmd) != EXIT_SUCCESS)
1327             fatal ("failed to execute shell command", (char *)NULL);
1328         }
1329       free (cmd);
1330       append_to_tagfile = true;
1331     }
1332
1333   tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1334   if (tagf == NULL)
1335     pfatal (tagfile);
1336   put_entries (nodehead);       /* write all the tags (CTAGS) */
1337   free_tree (nodehead);
1338   nodehead = NULL;
1339   if (fclose (tagf) == EOF)
1340     pfatal (tagfile);
1341
1342   if (CTAGS)
1343     if (append_to_tagfile || update)
1344       {
1345         char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1346         /* Maybe these should be used:
1347            setenv ("LC_COLLATE", "C", 1);
1348            setenv ("LC_ALL", "C", 1); */
1349         char *z = stpcpy (cmd, "sort -u -o ");
1350         z = stpcpy (z, tagfile);
1351         *z++ = ' ';
1352         strcpy (z, tagfile);
1353         exit (system (cmd));
1354       }
1355   return EXIT_SUCCESS;
1356 }
1357
1358
1359 /*
1360  * Return a compressor given the file name.  If EXTPTR is non-zero,
1361  * return a pointer into FILE where the compressor-specific
1362  * extension begins.  If no compressor is found, NULL is returned
1363  * and EXTPTR is not significant.
1364  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1365  */
1366 static compressor *
1367 get_compressor_from_suffix (char *file, char **extptr)
1368 {
1369   compressor *compr;
1370   char *slash, *suffix;
1371
1372   /* File has been processed by canonicalize_filename,
1373      so we don't need to consider backslashes on DOS_NT.  */
1374   slash = strrchr (file, '/');
1375   suffix = strrchr (file, '.');
1376   if (suffix == NULL || suffix < slash)
1377     return NULL;
1378   if (extptr != NULL)
1379     *extptr = suffix;
1380   suffix += 1;
1381   /* Let those poor souls who live with DOS 8+3 file name limits get
1382      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1383      Only the first do loop is run if not MSDOS */
1384   do
1385     {
1386       for (compr = compressors; compr->suffix != NULL; compr++)
1387         if (streq (compr->suffix, suffix))
1388           return compr;
1389       if (!MSDOS)
1390         break;                  /* do it only once: not really a loop */
1391       if (extptr != NULL)
1392         *extptr = ++suffix;
1393     } while (*suffix != '\0');
1394   return NULL;
1395 }
1396
1397
1398
1399 /*
1400  * Return a language given the name.
1401  */
1402 static language *
1403 get_language_from_langname (const char *name)
1404 {
1405   language *lang;
1406
1407   if (name == NULL)
1408     error ("empty language name");
1409   else
1410     {
1411       for (lang = lang_names; lang->name != NULL; lang++)
1412         if (streq (name, lang->name))
1413           return lang;
1414       error ("unknown language \"%s\"", name);
1415     }
1416
1417   return NULL;
1418 }
1419
1420
1421 /*
1422  * Return a language given the interpreter name.
1423  */
1424 static language *
1425 get_language_from_interpreter (char *interpreter)
1426 {
1427   language *lang;
1428   const char **iname;
1429
1430   if (interpreter == NULL)
1431     return NULL;
1432   for (lang = lang_names; lang->name != NULL; lang++)
1433     if (lang->interpreters != NULL)
1434       for (iname = lang->interpreters; *iname != NULL; iname++)
1435         if (streq (*iname, interpreter))
1436             return lang;
1437
1438   return NULL;
1439 }
1440
1441
1442
1443 /*
1444  * Return a language given the file name.
1445  */
1446 static language *
1447 get_language_from_filename (char *file, int case_sensitive)
1448 {
1449   language *lang;
1450   const char **name, **ext, *suffix;
1451
1452   /* Try whole file name first. */
1453   for (lang = lang_names; lang->name != NULL; lang++)
1454     if (lang->filenames != NULL)
1455       for (name = lang->filenames; *name != NULL; name++)
1456         if ((case_sensitive)
1457             ? streq (*name, file)
1458             : strcaseeq (*name, file))
1459           return lang;
1460
1461   /* If not found, try suffix after last dot. */
1462   suffix = strrchr (file, '.');
1463   if (suffix == NULL)
1464     return NULL;
1465   suffix += 1;
1466   for (lang = lang_names; lang->name != NULL; lang++)
1467     if (lang->suffixes != NULL)
1468       for (ext = lang->suffixes; *ext != NULL; ext++)
1469         if ((case_sensitive)
1470             ? streq (*ext, suffix)
1471             : strcaseeq (*ext, suffix))
1472           return lang;
1473   return NULL;
1474 }
1475
1476 \f
1477 /*
1478  * This routine is called on each file argument.
1479  */
1480 static void
1481 process_file_name (char *file, language *lang)
1482 {
1483   FILE *inf;
1484   fdesc *fdp;
1485   compressor *compr;
1486   char *compressed_name, *uncompressed_name;
1487   char *ext, *real_name, *tmp_name;
1488   int retval;
1489
1490   canonicalize_filename (file);
1491   if (streq (file, tagfile) && !streq (tagfile, "-"))
1492     {
1493       error ("skipping inclusion of %s in self.", file);
1494       return;
1495     }
1496   compr = get_compressor_from_suffix (file, &ext);
1497   if (compr)
1498     {
1499       compressed_name = file;
1500       uncompressed_name = savenstr (file, ext - file);
1501     }
1502   else
1503     {
1504       compressed_name = NULL;
1505       uncompressed_name = file;
1506     }
1507
1508   /* If the canonicalized uncompressed name
1509      has already been dealt with, skip it silently. */
1510   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1511     {
1512       assert (fdp->infname != NULL);
1513       if (streq (uncompressed_name, fdp->infname))
1514         goto cleanup;
1515     }
1516
1517   inf = fopen (file, "r" FOPEN_BINARY);
1518   if (inf)
1519     real_name = file;
1520   else
1521     {
1522       int file_errno = errno;
1523       if (compressed_name)
1524         {
1525           /* Try with the given suffix.  */
1526           inf = fopen (uncompressed_name, "r" FOPEN_BINARY);
1527           if (inf)
1528             real_name = uncompressed_name;
1529         }
1530       else
1531         {
1532           /* Try all possible suffixes.  */
1533           for (compr = compressors; compr->suffix != NULL; compr++)
1534             {
1535               compressed_name = concat (file, ".", compr->suffix);
1536               inf = fopen (compressed_name, "r" FOPEN_BINARY);
1537               if (inf)
1538                 {
1539                   real_name = compressed_name;
1540                   break;
1541                 }
1542               if (MSDOS)
1543                 {
1544                   char *suf = compressed_name + strlen (file);
1545                   size_t suflen = strlen (compr->suffix) + 1;
1546                   for ( ; suf[1]; suf++, suflen--)
1547                     {
1548                       memmove (suf, suf + 1, suflen);
1549                       inf = fopen (compressed_name, "r" FOPEN_BINARY);
1550                       if (inf)
1551                         {
1552                           real_name = compressed_name;
1553                           break;
1554                         }
1555                     }
1556                   if (inf)
1557                     break;
1558                 }
1559               free (compressed_name);
1560               compressed_name = NULL;
1561             }
1562         }
1563       if (! inf)
1564         {
1565           errno = file_errno;
1566           perror (file);
1567           goto cleanup;
1568         }
1569     }
1570
1571   if (real_name == compressed_name)
1572     {
1573       fclose (inf);
1574       tmp_name = etags_mktmp ();
1575       if (!tmp_name)
1576         inf = NULL;
1577       else
1578         {
1579 #if MSDOS || defined (DOS_NT)
1580           char *cmd1 = concat (compr->command, " \"", real_name);
1581           char *cmd = concat (cmd1, "\" > ", tmp_name);
1582 #else
1583           char *cmd1 = concat (compr->command, " '", real_name);
1584           char *cmd = concat (cmd1, "' > ", tmp_name);
1585 #endif
1586           free (cmd1);
1587           int tmp_errno;
1588           if (system (cmd) == -1)
1589             {
1590               inf = NULL;
1591               tmp_errno = EINVAL;
1592             }
1593           else
1594             {
1595               inf = fopen (tmp_name, "r" FOPEN_BINARY);
1596               tmp_errno = errno;
1597             }
1598           free (cmd);
1599           errno = tmp_errno;
1600         }
1601
1602       if (!inf)
1603         {
1604           perror (real_name);
1605           goto cleanup;
1606         }
1607     }
1608
1609   process_file (inf, uncompressed_name, lang);
1610
1611   retval = fclose (inf);
1612   if (real_name == compressed_name)
1613     {
1614       remove (tmp_name);
1615       free (tmp_name);
1616     }
1617   if (retval < 0)
1618     pfatal (file);
1619
1620  cleanup:
1621   if (compressed_name != file)
1622     free (compressed_name);
1623   if (uncompressed_name != file)
1624     free (uncompressed_name);
1625   last_node = NULL;
1626   curfdp = NULL;
1627   return;
1628 }
1629
1630 static void
1631 process_file (FILE *fh, char *fn, language *lang)
1632 {
1633   static const fdesc emptyfdesc;
1634   fdesc *fdp;
1635
1636   infilename = fn;
1637   /* Create a new input file description entry. */
1638   fdp = xnew (1, fdesc);
1639   *fdp = emptyfdesc;
1640   fdp->next = fdhead;
1641   fdp->infname = savestr (fn);
1642   fdp->lang = lang;
1643   fdp->infabsname = absolute_filename (fn, cwd);
1644   fdp->infabsdir = absolute_dirname (fn, cwd);
1645   if (filename_is_absolute (fn))
1646     {
1647       /* An absolute file name.  Canonicalize it. */
1648       fdp->taggedfname = absolute_filename (fn, NULL);
1649     }
1650   else
1651     {
1652       /* A file name relative to cwd.  Make it relative
1653          to the directory of the tags file. */
1654       fdp->taggedfname = relative_filename (fn, tagfiledir);
1655     }
1656   fdp->usecharno = true;        /* use char position when making tags */
1657   fdp->prop = NULL;
1658   fdp->written = false;         /* not written on tags file yet */
1659
1660   fdhead = fdp;
1661   curfdp = fdhead;              /* the current file description */
1662
1663   find_entries (fh);
1664
1665   /* If not Ctags, and if this is not metasource and if it contained no #line
1666      directives, we can write the tags and free all nodes pointing to
1667      curfdp. */
1668   if (!CTAGS
1669       && curfdp->usecharno      /* no #line directives in this file */
1670       && !curfdp->lang->metasource)
1671     {
1672       node *np, *prev;
1673
1674       /* Look for the head of the sublist relative to this file.  See add_node
1675          for the structure of the node tree. */
1676       prev = NULL;
1677       for (np = nodehead; np != NULL; prev = np, np = np->left)
1678         if (np->fdp == curfdp)
1679           break;
1680
1681       /* If we generated tags for this file, write and delete them. */
1682       if (np != NULL)
1683         {
1684           /* This is the head of the last sublist, if any.  The following
1685              instructions depend on this being true. */
1686           assert (np->left == NULL);
1687
1688           assert (fdhead == curfdp);
1689           assert (last_node->fdp == curfdp);
1690           put_entries (np);     /* write tags for file curfdp->taggedfname */
1691           free_tree (np);       /* remove the written nodes */
1692           if (prev == NULL)
1693             nodehead = NULL;    /* no nodes left */
1694           else
1695             prev->left = NULL;  /* delete the pointer to the sublist */
1696         }
1697     }
1698 }
1699
1700 static void
1701 reset_input (FILE *inf)
1702 {
1703   if (fseek (inf, 0, SEEK_SET) != 0)
1704     perror (infilename);
1705 }
1706
1707 /*
1708  * This routine opens the specified file and calls the function
1709  * which finds the function and type definitions.
1710  */
1711 static void
1712 find_entries (FILE *inf)
1713 {
1714   char *cp;
1715   language *lang = curfdp->lang;
1716   Lang_function *parser = NULL;
1717
1718   /* If user specified a language, use it. */
1719   if (lang != NULL && lang->function != NULL)
1720     {
1721       parser = lang->function;
1722     }
1723
1724   /* Else try to guess the language given the file name. */
1725   if (parser == NULL)
1726     {
1727       lang = get_language_from_filename (curfdp->infname, true);
1728       if (lang != NULL && lang->function != NULL)
1729         {
1730           curfdp->lang = lang;
1731           parser = lang->function;
1732         }
1733     }
1734
1735   /* Else look for sharp-bang as the first two characters. */
1736   if (parser == NULL
1737       && readline_internal (&lb, inf, infilename) > 0
1738       && lb.len >= 2
1739       && lb.buffer[0] == '#'
1740       && lb.buffer[1] == '!')
1741     {
1742       char *lp;
1743
1744       /* Set lp to point at the first char after the last slash in the
1745          line or, if no slashes, at the first nonblank.  Then set cp to
1746          the first successive blank and terminate the string. */
1747       lp = strrchr (lb.buffer+2, '/');
1748       if (lp != NULL)
1749         lp += 1;
1750       else
1751         lp = skip_spaces (lb.buffer + 2);
1752       cp = skip_non_spaces (lp);
1753       *cp = '\0';
1754
1755       if (strlen (lp) > 0)
1756         {
1757           lang = get_language_from_interpreter (lp);
1758           if (lang != NULL && lang->function != NULL)
1759             {
1760               curfdp->lang = lang;
1761               parser = lang->function;
1762             }
1763         }
1764     }
1765
1766   reset_input (inf);
1767
1768   /* Else try to guess the language given the case insensitive file name. */
1769   if (parser == NULL)
1770     {
1771       lang = get_language_from_filename (curfdp->infname, false);
1772       if (lang != NULL && lang->function != NULL)
1773         {
1774           curfdp->lang = lang;
1775           parser = lang->function;
1776         }
1777     }
1778
1779   /* Else try Fortran or C. */
1780   if (parser == NULL)
1781     {
1782       node *old_last_node = last_node;
1783
1784       curfdp->lang = get_language_from_langname ("fortran");
1785       find_entries (inf);
1786
1787       if (old_last_node == last_node)
1788         /* No Fortran entries found.  Try C. */
1789         {
1790           reset_input (inf);
1791           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1792           find_entries (inf);
1793         }
1794       return;
1795     }
1796
1797   if (!no_line_directive
1798       && curfdp->lang != NULL && curfdp->lang->metasource)
1799     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1800        file, or anyway we parsed a file that is automatically generated from
1801        this one.  If this is the case, the bingo.c file contained #line
1802        directives that generated tags pointing to this file.  Let's delete
1803        them all before parsing this file, which is the real source. */
1804     {
1805       fdesc **fdpp = &fdhead;
1806       while (*fdpp != NULL)
1807         if (*fdpp != curfdp
1808             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1809           /* We found one of those!  We must delete both the file description
1810              and all tags referring to it. */
1811           {
1812             fdesc *badfdp = *fdpp;
1813
1814             /* Delete the tags referring to badfdp->taggedfname
1815                that were obtained from badfdp->infname. */
1816             invalidate_nodes (badfdp, &nodehead);
1817
1818             *fdpp = badfdp->next; /* remove the bad description from the list */
1819             free_fdesc (badfdp);
1820           }
1821         else
1822           fdpp = &(*fdpp)->next; /* advance the list pointer */
1823     }
1824
1825   assert (parser != NULL);
1826
1827   /* Generic initializations before reading from file. */
1828   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1829
1830   /* Generic initializations before parsing file with readline. */
1831   lineno = 0;                  /* reset global line number */
1832   charno = 0;                  /* reset global char number */
1833   linecharno = 0;              /* reset global char number of line start */
1834
1835   parser (inf);
1836
1837   regex_tag_multiline ();
1838 }
1839
1840 \f
1841 /*
1842  * Check whether an implicitly named tag should be created,
1843  * then call `pfnote'.
1844  * NAME is a string that is internally copied by this function.
1845  *
1846  * TAGS format specification
1847  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1848  * The following is explained in some more detail in etc/ETAGS.EBNF.
1849  *
1850  * make_tag creates tags with "implicit tag names" (unnamed tags)
1851  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1852  *  1. NAME does not contain any of the characters in NONAM;
1853  *  2. LINESTART contains name as either a rightmost, or rightmost but
1854  *     one character, substring;
1855  *  3. the character, if any, immediately before NAME in LINESTART must
1856  *     be a character in NONAM;
1857  *  4. the character, if any, immediately after NAME in LINESTART must
1858  *     also be a character in NONAM.
1859  *
1860  * The implementation uses the notinname() macro, which recognizes the
1861  * characters stored in the string `nonam'.
1862  * etags.el needs to use the same characters that are in NONAM.
1863  */
1864 static void
1865 make_tag (const char *name,     /* tag name, or NULL if unnamed */
1866           int namelen,          /* tag length */
1867           bool is_func,         /* tag is a function */
1868           char *linestart,      /* start of the line where tag is */
1869           int linelen,          /* length of the line where tag is */
1870           int lno,              /* line number */
1871           long int cno)         /* character number */
1872 {
1873   bool named = (name != NULL && namelen > 0);
1874   char *nname = NULL;
1875
1876   if (!CTAGS && named)          /* maybe set named to false */
1877     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1878        such that etags.el can guess a name from it. */
1879     {
1880       int i;
1881       register const char *cp = name;
1882
1883       for (i = 0; i < namelen; i++)
1884         if (notinname (*cp++))
1885           break;
1886       if (i == namelen)                         /* rule #1 */
1887         {
1888           cp = linestart + linelen - namelen;
1889           if (notinname (linestart[linelen-1]))
1890             cp -= 1;                            /* rule #4 */
1891           if (cp >= linestart                   /* rule #2 */
1892               && (cp == linestart
1893                   || notinname (cp[-1]))        /* rule #3 */
1894               && strneq (name, cp, namelen))    /* rule #2 */
1895             named = false;      /* use implicit tag name */
1896         }
1897     }
1898
1899   if (named)
1900     nname = savenstr (name, namelen);
1901
1902   pfnote (nname, is_func, linestart, linelen, lno, cno);
1903 }
1904
1905 /* Record a tag. */
1906 static void
1907 pfnote (char *name, bool is_func, char *linestart, int linelen, int lno,
1908         long int cno)
1909                                 /* tag name, or NULL if unnamed */
1910                                 /* tag is a function */
1911                                 /* start of the line where tag is */
1912                                 /* length of the line where tag is */
1913                                 /* line number */
1914                                 /* character number */
1915 {
1916   register node *np;
1917
1918   assert (name == NULL || name[0] != '\0');
1919   if (CTAGS && name == NULL)
1920     return;
1921
1922   np = xnew (1, node);
1923
1924   /* If ctags mode, change name "main" to M<thisfilename>. */
1925   if (CTAGS && !cxref_style && streq (name, "main"))
1926     {
1927       char *fp = strrchr (curfdp->taggedfname, '/');
1928       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1929       fp = strrchr (np->name, '.');
1930       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1931         fp[0] = '\0';
1932     }
1933   else
1934     np->name = name;
1935   np->valid = true;
1936   np->been_warned = false;
1937   np->fdp = curfdp;
1938   np->is_func = is_func;
1939   np->lno = lno;
1940   if (np->fdp->usecharno)
1941     /* Our char numbers are 0-base, because of C language tradition?
1942        ctags compatibility?  old versions compatibility?   I don't know.
1943        Anyway, since emacs's are 1-base we expect etags.el to take care
1944        of the difference.  If we wanted to have 1-based numbers, we would
1945        uncomment the +1 below. */
1946     np->cno = cno /* + 1 */ ;
1947   else
1948     np->cno = invalidcharno;
1949   np->left = np->right = NULL;
1950   if (CTAGS && !cxref_style)
1951     {
1952       if (strlen (linestart) < 50)
1953         np->regex = concat (linestart, "$", "");
1954       else
1955         np->regex = savenstr (linestart, 50);
1956     }
1957   else
1958     np->regex = savenstr (linestart, linelen);
1959
1960   add_node (np, &nodehead);
1961 }
1962
1963 /*
1964  * free_tree ()
1965  *      recurse on left children, iterate on right children.
1966  */
1967 static void
1968 free_tree (register node *np)
1969 {
1970   while (np)
1971     {
1972       register node *node_right = np->right;
1973       free_tree (np->left);
1974       free (np->name);
1975       free (np->regex);
1976       free (np);
1977       np = node_right;
1978     }
1979 }
1980
1981 /*
1982  * free_fdesc ()
1983  *      delete a file description
1984  */
1985 static void
1986 free_fdesc (register fdesc *fdp)
1987 {
1988   free (fdp->infname);
1989   free (fdp->infabsname);
1990   free (fdp->infabsdir);
1991   free (fdp->taggedfname);
1992   free (fdp->prop);
1993   free (fdp);
1994 }
1995
1996 /*
1997  * add_node ()
1998  *      Adds a node to the tree of nodes.  In etags mode, sort by file
1999  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2000  *      balancing.
2001  *
2002  *      add_node is the only function allowed to add nodes, so it can
2003  *      maintain state.
2004  */
2005 static void
2006 add_node (node *np, node **cur_node_p)
2007 {
2008   register int dif;
2009   register node *cur_node = *cur_node_p;
2010
2011   if (cur_node == NULL)
2012     {
2013       *cur_node_p = np;
2014       last_node = np;
2015       return;
2016     }
2017
2018   if (!CTAGS)
2019     /* Etags Mode */
2020     {
2021       /* For each file name, tags are in a linked sublist on the right
2022          pointer.  The first tags of different files are a linked list
2023          on the left pointer.  last_node points to the end of the last
2024          used sublist. */
2025       if (last_node != NULL && last_node->fdp == np->fdp)
2026         {
2027           /* Let's use the same sublist as the last added node. */
2028           assert (last_node->right == NULL);
2029           last_node->right = np;
2030           last_node = np;
2031         }
2032       else if (cur_node->fdp == np->fdp)
2033         {
2034           /* Scanning the list we found the head of a sublist which is
2035              good for us.  Let's scan this sublist. */
2036           add_node (np, &cur_node->right);
2037         }
2038       else
2039         /* The head of this sublist is not good for us.  Let's try the
2040            next one. */
2041         add_node (np, &cur_node->left);
2042     } /* if ETAGS mode */
2043
2044   else
2045     {
2046       /* Ctags Mode */
2047       dif = strcmp (np->name, cur_node->name);
2048
2049       /*
2050        * If this tag name matches an existing one, then
2051        * do not add the node, but maybe print a warning.
2052        */
2053       if (no_duplicates && !dif)
2054         {
2055           if (np->fdp == cur_node->fdp)
2056             {
2057               if (!no_warnings)
2058                 {
2059                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2060                            np->fdp->infname, lineno, np->name);
2061                   fprintf (stderr, "Second entry ignored\n");
2062                 }
2063             }
2064           else if (!cur_node->been_warned && !no_warnings)
2065             {
2066               fprintf
2067                 (stderr,
2068                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2069                  np->fdp->infname, cur_node->fdp->infname, np->name);
2070               cur_node->been_warned = true;
2071             }
2072           return;
2073         }
2074
2075       /* Actually add the node */
2076       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2077     } /* if CTAGS mode */
2078 }
2079
2080 /*
2081  * invalidate_nodes ()
2082  *      Scan the node tree and invalidate all nodes pointing to the
2083  *      given file description (CTAGS case) or free them (ETAGS case).
2084  */
2085 static void
2086 invalidate_nodes (fdesc *badfdp, node **npp)
2087 {
2088   node *np = *npp;
2089
2090   if (np == NULL)
2091     return;
2092
2093   if (CTAGS)
2094     {
2095       if (np->left != NULL)
2096         invalidate_nodes (badfdp, &np->left);
2097       if (np->fdp == badfdp)
2098         np->valid = false;
2099       if (np->right != NULL)
2100         invalidate_nodes (badfdp, &np->right);
2101     }
2102   else
2103     {
2104       assert (np->fdp != NULL);
2105       if (np->fdp == badfdp)
2106         {
2107           *npp = np->left;      /* detach the sublist from the list */
2108           np->left = NULL;      /* isolate it */
2109           free_tree (np);       /* free it */
2110           invalidate_nodes (badfdp, npp);
2111         }
2112       else
2113         invalidate_nodes (badfdp, &np->left);
2114     }
2115 }
2116
2117 \f
2118 static int total_size_of_entries (node *);
2119 static int number_len (long) ATTRIBUTE_CONST;
2120
2121 /* Length of a non-negative number's decimal representation. */
2122 static int
2123 number_len (long int num)
2124 {
2125   int len = 1;
2126   while ((num /= 10) > 0)
2127     len += 1;
2128   return len;
2129 }
2130
2131 /*
2132  * Return total number of characters that put_entries will output for
2133  * the nodes in the linked list at the right of the specified node.
2134  * This count is irrelevant with etags.el since emacs 19.34 at least,
2135  * but is still supplied for backward compatibility.
2136  */
2137 static int
2138 total_size_of_entries (register node *np)
2139 {
2140   register int total = 0;
2141
2142   for (; np != NULL; np = np->right)
2143     if (np->valid)
2144       {
2145         total += strlen (np->regex) + 1;                /* pat\177 */
2146         if (np->name != NULL)
2147           total += strlen (np->name) + 1;               /* name\001 */
2148         total += number_len ((long) np->lno) + 1;       /* lno, */
2149         if (np->cno != invalidcharno)                   /* cno */
2150           total += number_len (np->cno);
2151         total += 1;                                     /* newline */
2152       }
2153
2154   return total;
2155 }
2156
2157 static void
2158 put_entries (register node *np)
2159 {
2160   register char *sp;
2161   static fdesc *fdp = NULL;
2162
2163   if (np == NULL)
2164     return;
2165
2166   /* Output subentries that precede this one */
2167   if (CTAGS)
2168     put_entries (np->left);
2169
2170   /* Output this entry */
2171   if (np->valid)
2172     {
2173       if (!CTAGS)
2174         {
2175           /* Etags mode */
2176           if (fdp != np->fdp)
2177             {
2178               fdp = np->fdp;
2179               fprintf (tagf, "\f\n%s,%d\n",
2180                        fdp->taggedfname, total_size_of_entries (np));
2181               fdp->written = true;
2182             }
2183           fputs (np->regex, tagf);
2184           fputc ('\177', tagf);
2185           if (np->name != NULL)
2186             {
2187               fputs (np->name, tagf);
2188               fputc ('\001', tagf);
2189             }
2190           fprintf (tagf, "%d,", np->lno);
2191           if (np->cno != invalidcharno)
2192             fprintf (tagf, "%ld", np->cno);
2193           fputs ("\n", tagf);
2194         }
2195       else
2196         {
2197           /* Ctags mode */
2198           if (np->name == NULL)
2199             error ("internal error: NULL name in ctags mode.");
2200
2201           if (cxref_style)
2202             {
2203               if (vgrind_style)
2204                 fprintf (stdout, "%s %s %d\n",
2205                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2206               else
2207                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2208                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2209             }
2210           else
2211             {
2212               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2213
2214               if (np->is_func)
2215                 {               /* function or #define macro with args */
2216                   putc (searchar, tagf);
2217                   putc ('^', tagf);
2218
2219                   for (sp = np->regex; *sp; sp++)
2220                     {
2221                       if (*sp == '\\' || *sp == searchar)
2222                         putc ('\\', tagf);
2223                       putc (*sp, tagf);
2224                     }
2225                   putc (searchar, tagf);
2226                 }
2227               else
2228                 {               /* anything else; text pattern inadequate */
2229                   fprintf (tagf, "%d", np->lno);
2230                 }
2231               putc ('\n', tagf);
2232             }
2233         }
2234     } /* if this node contains a valid tag */
2235
2236   /* Output subentries that follow this one */
2237   put_entries (np->right);
2238   if (!CTAGS)
2239     put_entries (np->left);
2240 }
2241
2242 \f
2243 /* C extensions. */
2244 #define C_EXT   0x00fff         /* C extensions */
2245 #define C_PLAIN 0x00000         /* C */
2246 #define C_PLPL  0x00001         /* C++ */
2247 #define C_STAR  0x00003         /* C* */
2248 #define C_JAVA  0x00005         /* JAVA */
2249 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2250 #define YACC    0x10000         /* yacc file */
2251
2252 /*
2253  * The C symbol tables.
2254  */
2255 enum sym_type
2256 {
2257   st_none,
2258   st_C_objprot, st_C_objimpl, st_C_objend,
2259   st_C_gnumacro,
2260   st_C_ignore, st_C_attribute,
2261   st_C_javastruct,
2262   st_C_operator,
2263   st_C_class, st_C_template,
2264   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2265 };
2266
2267 /* Feed stuff between (but not including) %[ and %] lines to:
2268      gperf -m 5
2269 %[
2270 %compare-strncmp
2271 %enum
2272 %struct-type
2273 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2274 %%
2275 if,             0,                      st_C_ignore
2276 for,            0,                      st_C_ignore
2277 while,          0,                      st_C_ignore
2278 switch,         0,                      st_C_ignore
2279 return,         0,                      st_C_ignore
2280 __attribute__,  0,                      st_C_attribute
2281 GTY,            0,                      st_C_attribute
2282 @interface,     0,                      st_C_objprot
2283 @protocol,      0,                      st_C_objprot
2284 @implementation,0,                      st_C_objimpl
2285 @end,           0,                      st_C_objend
2286 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2287 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2288 friend,         C_PLPL,                 st_C_ignore
2289 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2290 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2291 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2292 class,          0,                      st_C_class
2293 namespace,      C_PLPL,                 st_C_struct
2294 domain,         C_STAR,                 st_C_struct
2295 union,          0,                      st_C_struct
2296 struct,         0,                      st_C_struct
2297 extern,         0,                      st_C_extern
2298 enum,           0,                      st_C_enum
2299 typedef,        0,                      st_C_typedef
2300 define,         0,                      st_C_define
2301 undef,          0,                      st_C_define
2302 operator,       C_PLPL,                 st_C_operator
2303 template,       0,                      st_C_template
2304 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2305 DEFUN,          0,                      st_C_gnumacro
2306 SYSCALL,        0,                      st_C_gnumacro
2307 ENTRY,          0,                      st_C_gnumacro
2308 PSEUDO,         0,                      st_C_gnumacro
2309 # These are defined inside C functions, so currently they are not met.
2310 # EXFUN used in glibc, DEFVAR_* in emacs.
2311 #EXFUN,         0,                      st_C_gnumacro
2312 #DEFVAR_,       0,                      st_C_gnumacro
2313 %]
2314 and replace lines between %< and %> with its output, then:
2315  - remove the #if characterset check
2316  - make in_word_set static and not inline. */
2317 /*%<*/
2318 /* C code produced by gperf version 3.0.1 */
2319 /* Command-line: gperf -m 5  */
2320 /* Computed positions: -k'2-3' */
2321
2322 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2323 /* maximum key range = 33, duplicates = 0 */
2324
2325 static int
2326 hash (const char *str, int len)
2327 {
2328   static char const asso_values[] =
2329     {
2330       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2331       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2332       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2333       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2334       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2335       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2336       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2337       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2338       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2339       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2340       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2341        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2342        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2343       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2344       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2345       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2346       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2347       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2348       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2349       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2350       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2351       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2352       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2353       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2354       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2355       35, 35, 35, 35, 35, 35
2356     };
2357   int hval = len;
2358
2359   switch (hval)
2360     {
2361       default:
2362         hval += asso_values[(unsigned char) str[2]];
2363       /*FALLTHROUGH*/
2364       case 2:
2365         hval += asso_values[(unsigned char) str[1]];
2366         break;
2367     }
2368   return hval;
2369 }
2370
2371 static struct C_stab_entry *
2372 in_word_set (register const char *str, register unsigned int len)
2373 {
2374   enum
2375     {
2376       TOTAL_KEYWORDS = 33,
2377       MIN_WORD_LENGTH = 2,
2378       MAX_WORD_LENGTH = 15,
2379       MIN_HASH_VALUE = 2,
2380       MAX_HASH_VALUE = 34
2381     };
2382
2383   static struct C_stab_entry wordlist[] =
2384     {
2385       {""}, {""},
2386       {"if",            0,                      st_C_ignore},
2387       {"GTY",           0,                      st_C_attribute},
2388       {"@end",          0,                      st_C_objend},
2389       {"union",         0,                      st_C_struct},
2390       {"define",                0,                      st_C_define},
2391       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2392       {"template",      0,                      st_C_template},
2393       {"operator",      C_PLPL,                 st_C_operator},
2394       {"@interface",    0,                      st_C_objprot},
2395       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2396       {"friend",                C_PLPL,                 st_C_ignore},
2397       {"typedef",       0,                      st_C_typedef},
2398       {"return",                0,                      st_C_ignore},
2399       {"@implementation",0,                     st_C_objimpl},
2400       {"@protocol",     0,                      st_C_objprot},
2401       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2402       {"extern",                0,                      st_C_extern},
2403       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2404       {"struct",                0,                      st_C_struct},
2405       {"domain",                C_STAR,                 st_C_struct},
2406       {"switch",                0,                      st_C_ignore},
2407       {"enum",          0,                      st_C_enum},
2408       {"for",           0,                      st_C_ignore},
2409       {"namespace",     C_PLPL,                 st_C_struct},
2410       {"class",         0,                      st_C_class},
2411       {"while",         0,                      st_C_ignore},
2412       {"undef",         0,                      st_C_define},
2413       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2414       {"__attribute__", 0,                      st_C_attribute},
2415       {"SYSCALL",       0,                      st_C_gnumacro},
2416       {"ENTRY",         0,                      st_C_gnumacro},
2417       {"PSEUDO",                0,                      st_C_gnumacro},
2418       {"DEFUN",         0,                      st_C_gnumacro}
2419     };
2420
2421   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2422     {
2423       int key = hash (str, len);
2424
2425       if (key <= MAX_HASH_VALUE && key >= 0)
2426         {
2427           const char *s = wordlist[key].name;
2428
2429           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2430             return &wordlist[key];
2431         }
2432     }
2433   return 0;
2434 }
2435 /*%>*/
2436
2437 static enum sym_type
2438 C_symtype (char *str, int len, int c_ext)
2439 {
2440   register struct C_stab_entry *se = in_word_set (str, len);
2441
2442   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2443     return st_none;
2444   return se->type;
2445 }
2446
2447 \f
2448 /*
2449  * Ignoring __attribute__ ((list))
2450  */
2451 static bool inattribute;        /* looking at an __attribute__ construct */
2452
2453 /*
2454  * C functions and variables are recognized using a simple
2455  * finite automaton.  fvdef is its state variable.
2456  */
2457 static enum
2458 {
2459   fvnone,                       /* nothing seen */
2460   fdefunkey,                    /* Emacs DEFUN keyword seen */
2461   fdefunname,                   /* Emacs DEFUN name seen */
2462   foperator,                    /* func: operator keyword seen (cplpl) */
2463   fvnameseen,                   /* function or variable name seen */
2464   fstartlist,                   /* func: just after open parenthesis */
2465   finlist,                      /* func: in parameter list */
2466   flistseen,                    /* func: after parameter list */
2467   fignore,                      /* func: before open brace */
2468   vignore                       /* var-like: ignore until ';' */
2469 } fvdef;
2470
2471 static bool fvextern;           /* func or var: extern keyword seen; */
2472
2473 /*
2474  * typedefs are recognized using a simple finite automaton.
2475  * typdef is its state variable.
2476  */
2477 static enum
2478 {
2479   tnone,                        /* nothing seen */
2480   tkeyseen,                     /* typedef keyword seen */
2481   ttypeseen,                    /* defined type seen */
2482   tinbody,                      /* inside typedef body */
2483   tend,                         /* just before typedef tag */
2484   tignore                       /* junk after typedef tag */
2485 } typdef;
2486
2487 /*
2488  * struct-like structures (enum, struct and union) are recognized
2489  * using another simple finite automaton.  `structdef' is its state
2490  * variable.
2491  */
2492 static enum
2493 {
2494   snone,                        /* nothing seen yet,
2495                                    or in struct body if bracelev > 0 */
2496   skeyseen,                     /* struct-like keyword seen */
2497   stagseen,                     /* struct-like tag seen */
2498   scolonseen                    /* colon seen after struct-like tag */
2499 } structdef;
2500
2501 /*
2502  * When objdef is different from onone, objtag is the name of the class.
2503  */
2504 static const char *objtag = "<uninited>";
2505
2506 /*
2507  * Yet another little state machine to deal with preprocessor lines.
2508  */
2509 static enum
2510 {
2511   dnone,                        /* nothing seen */
2512   dsharpseen,                   /* '#' seen as first char on line */
2513   ddefineseen,                  /* '#' and 'define' seen */
2514   dignorerest                   /* ignore rest of line */
2515 } definedef;
2516
2517 /*
2518  * State machine for Objective C protocols and implementations.
2519  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2520  */
2521 static enum
2522 {
2523   onone,                        /* nothing seen */
2524   oprotocol,                    /* @interface or @protocol seen */
2525   oimplementation,              /* @implementations seen */
2526   otagseen,                     /* class name seen */
2527   oparenseen,                   /* parenthesis before category seen */
2528   ocatseen,                     /* category name seen */
2529   oinbody,                      /* in @implementation body */
2530   omethodsign,                  /* in @implementation body, after +/- */
2531   omethodtag,                   /* after method name */
2532   omethodcolon,                 /* after method colon */
2533   omethodparm,                  /* after method parameter */
2534   oignore                       /* wait for @end */
2535 } objdef;
2536
2537
2538 /*
2539  * Use this structure to keep info about the token read, and how it
2540  * should be tagged.  Used by the make_C_tag function to build a tag.
2541  */
2542 static struct tok
2543 {
2544   char *line;                   /* string containing the token */
2545   int offset;                   /* where the token starts in LINE */
2546   int length;                   /* token length */
2547   /*
2548     The previous members can be used to pass strings around for generic
2549     purposes.  The following ones specifically refer to creating tags.  In this
2550     case the token contained here is the pattern that will be used to create a
2551     tag.
2552   */
2553   bool valid;                   /* do not create a tag; the token should be
2554                                    invalidated whenever a state machine is
2555                                    reset prematurely */
2556   bool named;                   /* create a named tag */
2557   int lineno;                   /* source line number of tag */
2558   long linepos;                 /* source char number of tag */
2559 } token;                        /* latest token read */
2560
2561 /*
2562  * Variables and functions for dealing with nested structures.
2563  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2564  */
2565 static void pushclass_above (int, char *, int);
2566 static void popclass_above (int);
2567 static void write_classname (linebuffer *, const char *qualifier);
2568
2569 static struct {
2570   char **cname;                 /* nested class names */
2571   int *bracelev;                /* nested class brace level */
2572   int nl;                       /* class nesting level (elements used) */
2573   int size;                     /* length of the array */
2574 } cstack;                       /* stack for nested declaration tags */
2575 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2576 #define nestlev         (cstack.nl)
2577 /* After struct keyword or in struct body, not inside a nested function. */
2578 #define instruct        (structdef == snone && nestlev > 0                      \
2579                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2580
2581 static void
2582 pushclass_above (int bracelev, char *str, int len)
2583 {
2584   int nl;
2585
2586   popclass_above (bracelev);
2587   nl = cstack.nl;
2588   if (nl >= cstack.size)
2589     {
2590       int size = cstack.size *= 2;
2591       xrnew (cstack.cname, size, char *);
2592       xrnew (cstack.bracelev, size, int);
2593     }
2594   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2595   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2596   cstack.bracelev[nl] = bracelev;
2597   cstack.nl = nl + 1;
2598 }
2599
2600 static void
2601 popclass_above (int bracelev)
2602 {
2603   int nl;
2604
2605   for (nl = cstack.nl - 1;
2606        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2607        nl--)
2608     {
2609       free (cstack.cname[nl]);
2610       cstack.nl = nl;
2611     }
2612 }
2613
2614 static void
2615 write_classname (linebuffer *cn, const char *qualifier)
2616 {
2617   int i, len;
2618   int qlen = strlen (qualifier);
2619
2620   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2621     {
2622       len = 0;
2623       cn->len = 0;
2624       cn->buffer[0] = '\0';
2625     }
2626   else
2627     {
2628       len = strlen (cstack.cname[0]);
2629       linebuffer_setlen (cn, len);
2630       strcpy (cn->buffer, cstack.cname[0]);
2631     }
2632   for (i = 1; i < cstack.nl; i++)
2633     {
2634       char *s = cstack.cname[i];
2635       if (s == NULL)
2636         continue;
2637       linebuffer_setlen (cn, len + qlen + strlen (s));
2638       len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2639     }
2640 }
2641
2642 \f
2643 static bool consider_token (char *, int, int, int *, int, int, bool *);
2644 static void make_C_tag (bool);
2645
2646 /*
2647  * consider_token ()
2648  *      checks to see if the current token is at the start of a
2649  *      function or variable, or corresponds to a typedef, or
2650  *      is a struct/union/enum tag, or #define, or an enum constant.
2651  *
2652  *      *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2653  *      with args.  C_EXTP points to which language we are looking at.
2654  *
2655  * Globals
2656  *      fvdef                   IN OUT
2657  *      structdef               IN OUT
2658  *      definedef               IN OUT
2659  *      typdef                  IN OUT
2660  *      objdef                  IN OUT
2661  */
2662
2663 static bool
2664 consider_token (char *str, int len, int c, int *c_extp,
2665                 int bracelev, int parlev, bool *is_func_or_var)
2666                                 /* IN: token pointer */
2667                                 /* IN: token length */
2668                                 /* IN: first char after the token */
2669                                 /* IN, OUT: C extensions mask */
2670                                 /* IN: brace level */
2671                                 /* IN: parenthesis level */
2672                                 /* OUT: function or variable found */
2673 {
2674   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2675      structtype is the type of the preceding struct-like keyword, and
2676      structbracelev is the brace level where it has been seen. */
2677   static enum sym_type structtype;
2678   static int structbracelev;
2679   static enum sym_type toktype;
2680
2681
2682   toktype = C_symtype (str, len, *c_extp);
2683
2684   /*
2685    * Skip __attribute__
2686    */
2687   if (toktype == st_C_attribute)
2688     {
2689       inattribute = true;
2690       return false;
2691      }
2692
2693    /*
2694     * Advance the definedef state machine.
2695     */
2696    switch (definedef)
2697      {
2698      case dnone:
2699        /* We're not on a preprocessor line. */
2700        if (toktype == st_C_gnumacro)
2701          {
2702            fvdef = fdefunkey;
2703            return false;
2704          }
2705        break;
2706      case dsharpseen:
2707        if (toktype == st_C_define)
2708          {
2709            definedef = ddefineseen;
2710          }
2711        else
2712          {
2713            definedef = dignorerest;
2714          }
2715        return false;
2716      case ddefineseen:
2717        /*
2718         * Make a tag for any macro, unless it is a constant
2719         * and constantypedefs is false.
2720         */
2721        definedef = dignorerest;
2722        *is_func_or_var = (c == '(');
2723        if (!*is_func_or_var && !constantypedefs)
2724          return false;
2725        else
2726          return true;
2727      case dignorerest:
2728        return false;
2729      default:
2730        error ("internal error: definedef value.");
2731      }
2732
2733    /*
2734     * Now typedefs
2735     */
2736    switch (typdef)
2737      {
2738      case tnone:
2739        if (toktype == st_C_typedef)
2740          {
2741            if (typedefs)
2742              typdef = tkeyseen;
2743            fvextern = false;
2744            fvdef = fvnone;
2745            return false;
2746          }
2747        break;
2748      case tkeyseen:
2749        switch (toktype)
2750          {
2751          case st_none:
2752          case st_C_class:
2753          case st_C_struct:
2754          case st_C_enum:
2755            typdef = ttypeseen;
2756          }
2757        break;
2758      case ttypeseen:
2759        if (structdef == snone && fvdef == fvnone)
2760          {
2761            fvdef = fvnameseen;
2762            return true;
2763          }
2764        break;
2765      case tend:
2766        switch (toktype)
2767          {
2768          case st_C_class:
2769          case st_C_struct:
2770          case st_C_enum:
2771            return false;
2772          }
2773        return true;
2774      }
2775
2776    switch (toktype)
2777      {
2778      case st_C_javastruct:
2779        if (structdef == stagseen)
2780          structdef = scolonseen;
2781        return false;
2782      case st_C_template:
2783      case st_C_class:
2784        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2785            && bracelev == 0
2786            && definedef == dnone && structdef == snone
2787            && typdef == tnone && fvdef == fvnone)
2788          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2789        if (toktype == st_C_template)
2790          break;
2791        /* FALLTHRU */
2792      case st_C_struct:
2793      case st_C_enum:
2794        if (parlev == 0
2795            && fvdef != vignore
2796            && (typdef == tkeyseen
2797                || (typedefs_or_cplusplus && structdef == snone)))
2798          {
2799            structdef = skeyseen;
2800            structtype = toktype;
2801            structbracelev = bracelev;
2802            if (fvdef == fvnameseen)
2803              fvdef = fvnone;
2804          }
2805        return false;
2806      }
2807
2808    if (structdef == skeyseen)
2809      {
2810        structdef = stagseen;
2811        return true;
2812      }
2813
2814    if (typdef != tnone)
2815      definedef = dnone;
2816
2817    /* Detect Objective C constructs. */
2818    switch (objdef)
2819      {
2820      case onone:
2821        switch (toktype)
2822          {
2823          case st_C_objprot:
2824            objdef = oprotocol;
2825            return false;
2826          case st_C_objimpl:
2827            objdef = oimplementation;
2828            return false;
2829          }
2830        break;
2831      case oimplementation:
2832        /* Save the class tag for functions or variables defined inside. */
2833        objtag = savenstr (str, len);
2834        objdef = oinbody;
2835        return false;
2836      case oprotocol:
2837        /* Save the class tag for categories. */
2838        objtag = savenstr (str, len);
2839        objdef = otagseen;
2840        *is_func_or_var = true;
2841        return true;
2842      case oparenseen:
2843        objdef = ocatseen;
2844        *is_func_or_var = true;
2845        return true;
2846      case oinbody:
2847        break;
2848      case omethodsign:
2849        if (parlev == 0)
2850          {
2851            fvdef = fvnone;
2852            objdef = omethodtag;
2853            linebuffer_setlen (&token_name, len);
2854            memcpy (token_name.buffer, str, len);
2855            token_name.buffer[len] = '\0';
2856            return true;
2857          }
2858        return false;
2859      case omethodcolon:
2860        if (parlev == 0)
2861          objdef = omethodparm;
2862        return false;
2863      case omethodparm:
2864        if (parlev == 0)
2865          {
2866            objdef = omethodtag;
2867            if (class_qualify)
2868              {
2869                int oldlen = token_name.len;
2870                fvdef = fvnone;
2871                linebuffer_setlen (&token_name, oldlen + len);
2872                memcpy (token_name.buffer + oldlen, str, len);
2873                token_name.buffer[oldlen + len] = '\0';
2874              }
2875            return true;
2876          }
2877        return false;
2878      case oignore:
2879        if (toktype == st_C_objend)
2880          {
2881            /* Memory leakage here: the string pointed by objtag is
2882               never released, because many tests would be needed to
2883               avoid breaking on incorrect input code.  The amount of
2884               memory leaked here is the sum of the lengths of the
2885               class tags.
2886            free (objtag); */
2887            objdef = onone;
2888          }
2889        return false;
2890      }
2891
2892    /* A function, variable or enum constant? */
2893    switch (toktype)
2894      {
2895      case st_C_extern:
2896        fvextern = true;
2897        switch  (fvdef)
2898          {
2899          case finlist:
2900          case flistseen:
2901          case fignore:
2902          case vignore:
2903            break;
2904          default:
2905            fvdef = fvnone;
2906          }
2907        return false;
2908      case st_C_ignore:
2909        fvextern = false;
2910        fvdef = vignore;
2911        return false;
2912      case st_C_operator:
2913        fvdef = foperator;
2914        *is_func_or_var = true;
2915        return true;
2916      case st_none:
2917        if (constantypedefs
2918            && structdef == snone
2919            && structtype == st_C_enum && bracelev > structbracelev
2920            /* Don't tag tokens in expressions that assign values to enum
2921               constants.  */
2922            && fvdef != vignore)
2923          return true;           /* enum constant */
2924        switch (fvdef)
2925          {
2926          case fdefunkey:
2927            if (bracelev > 0)
2928              break;
2929            fvdef = fdefunname;  /* GNU macro */
2930            *is_func_or_var = true;
2931            return true;
2932          case fvnone:
2933            switch (typdef)
2934              {
2935              case ttypeseen:
2936                return false;
2937              case tnone:
2938                if ((strneq (str, "asm", 3) && endtoken (str[3]))
2939                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2940                  {
2941                    fvdef = vignore;
2942                    return false;
2943                  }
2944                break;
2945              }
2946           /* FALLTHRU */
2947           case fvnameseen:
2948           if (len >= 10 && strneq (str+len-10, "::operator", 10))
2949             {
2950               if (*c_extp & C_AUTO) /* automatic detection of C++ */
2951                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2952               fvdef = foperator;
2953               *is_func_or_var = true;
2954               return true;
2955             }
2956           if (bracelev > 0 && !instruct)
2957             break;
2958           fvdef = fvnameseen;   /* function or variable */
2959           *is_func_or_var = true;
2960           return true;
2961         }
2962       break;
2963     }
2964
2965   return false;
2966 }
2967
2968 \f
2969 /*
2970  * C_entries often keeps pointers to tokens or lines which are older than
2971  * the line currently read.  By keeping two line buffers, and switching
2972  * them at end of line, it is possible to use those pointers.
2973  */
2974 static struct
2975 {
2976   long linepos;
2977   linebuffer lb;
2978 } lbs[2];
2979
2980 #define current_lb_is_new (newndx == curndx)
2981 #define switch_line_buffers() (curndx = 1 - curndx)
2982
2983 #define curlb (lbs[curndx].lb)
2984 #define newlb (lbs[newndx].lb)
2985 #define curlinepos (lbs[curndx].linepos)
2986 #define newlinepos (lbs[newndx].linepos)
2987
2988 #define plainc ((c_ext & C_EXT) == C_PLAIN)
2989 #define cplpl (c_ext & C_PLPL)
2990 #define cjava ((c_ext & C_JAVA) == C_JAVA)
2991
2992 #define CNL_SAVE_DEFINEDEF()                                            \
2993 do {                                                                    \
2994   curlinepos = charno;                                                  \
2995   readline (&curlb, inf);                                               \
2996   lp = curlb.buffer;                                                    \
2997   quotednl = false;                                                     \
2998   newndx = curndx;                                                      \
2999 } while (0)
3000
3001 #define CNL()                                                           \
3002 do {                                                                    \
3003   CNL_SAVE_DEFINEDEF ();                                                \
3004   if (savetoken.valid)                                                  \
3005     {                                                                   \
3006       token = savetoken;                                                \
3007       savetoken.valid = false;                                          \
3008     }                                                                   \
3009   definedef = dnone;                                                    \
3010 } while (0)
3011
3012
3013 static void
3014 make_C_tag (bool isfun)
3015 {
3016   /* This function is never called when token.valid is false, but
3017      we must protect against invalid input or internal errors. */
3018   if (token.valid)
3019     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3020               token.offset+token.length+1, token.lineno, token.linepos);
3021   else if (DEBUG)
3022     {                             /* this branch is optimized away if !DEBUG */
3023       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3024                 token_name.len + 17, isfun, token.line,
3025                 token.offset+token.length+1, token.lineno, token.linepos);
3026       error ("INVALID TOKEN");
3027     }
3028
3029   token.valid = false;
3030 }
3031
3032 static bool
3033 perhaps_more_input (FILE *inf)
3034 {
3035   return !feof (inf) && !ferror (inf);
3036 }
3037
3038
3039 /*
3040  * C_entries ()
3041  *      This routine finds functions, variables, typedefs,
3042  *      #define's, enum constants and struct/union/enum definitions in
3043  *      C syntax and adds them to the list.
3044  */
3045 static void
3046 C_entries (int c_ext, FILE *inf)
3047                                 /* extension of C */
3048                                 /* input file */
3049 {
3050   register char c;              /* latest char read; '\0' for end of line */
3051   register char *lp;            /* pointer one beyond the character `c' */
3052   int curndx, newndx;           /* indices for current and new lb */
3053   register int tokoff;          /* offset in line of start of current token */
3054   register int toklen;          /* length of current token */
3055   const char *qualifier;        /* string used to qualify names */
3056   int qlen;                     /* length of qualifier */
3057   int bracelev;                 /* current brace level */
3058   int bracketlev;               /* current bracket level */
3059   int parlev;                   /* current parenthesis level */
3060   int attrparlev;               /* __attribute__ parenthesis level */
3061   int templatelev;              /* current template level */
3062   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3063   bool incomm, inquote, inchar, quotednl, midtoken;
3064   bool yacc_rules;              /* in the rules part of a yacc file */
3065   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3066
3067
3068   linebuffer_init (&lbs[0].lb);
3069   linebuffer_init (&lbs[1].lb);
3070   if (cstack.size == 0)
3071     {
3072       cstack.size = (DEBUG) ? 1 : 4;
3073       cstack.nl = 0;
3074       cstack.cname = xnew (cstack.size, char *);
3075       cstack.bracelev = xnew (cstack.size, int);
3076     }
3077
3078   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3079   curndx = newndx = 0;
3080   lp = curlb.buffer;
3081   *lp = 0;
3082
3083   fvdef = fvnone; fvextern = false; typdef = tnone;
3084   structdef = snone; definedef = dnone; objdef = onone;
3085   yacc_rules = false;
3086   midtoken = inquote = inchar = incomm = quotednl = false;
3087   token.valid = savetoken.valid = false;
3088   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3089   if (cjava)
3090     { qualifier = "."; qlen = 1; }
3091   else
3092     { qualifier = "::"; qlen = 2; }
3093
3094
3095   while (perhaps_more_input (inf))
3096     {
3097       c = *lp++;
3098       if (c == '\\')
3099         {
3100           /* If we are at the end of the line, the next character is a
3101              '\0'; do not skip it, because it is what tells us
3102              to read the next line.  */
3103           if (*lp == '\0')
3104             {
3105               quotednl = true;
3106               continue;
3107             }
3108           lp++;
3109           c = ' ';
3110         }
3111       else if (incomm)
3112         {
3113           switch (c)
3114             {
3115             case '*':
3116               if (*lp == '/')
3117                 {
3118                   c = *lp++;
3119                   incomm = false;
3120                 }
3121               break;
3122             case '\0':
3123               /* Newlines inside comments do not end macro definitions in
3124                  traditional cpp. */
3125               CNL_SAVE_DEFINEDEF ();
3126               break;
3127             }
3128           continue;
3129         }
3130       else if (inquote)
3131         {
3132           switch (c)
3133             {
3134             case '"':
3135               inquote = false;
3136               break;
3137             case '\0':
3138               /* Newlines inside strings do not end macro definitions
3139                  in traditional cpp, even though compilers don't
3140                  usually accept them. */
3141               CNL_SAVE_DEFINEDEF ();
3142               break;
3143             }
3144           continue;
3145         }
3146       else if (inchar)
3147         {
3148           switch (c)
3149             {
3150             case '\0':
3151               /* Hmmm, something went wrong. */
3152               CNL ();
3153               /* FALLTHRU */
3154             case '\'':
3155               inchar = false;
3156               break;
3157             }
3158           continue;
3159         }
3160       else switch (c)
3161         {
3162         case '"':
3163           inquote = true;
3164           if (bracketlev > 0)
3165             continue;
3166           if (inattribute)
3167             break;
3168           switch (fvdef)
3169             {
3170             case fdefunkey:
3171             case fstartlist:
3172             case finlist:
3173             case fignore:
3174             case vignore:
3175               break;
3176             default:
3177               fvextern = false;
3178               fvdef = fvnone;
3179             }
3180           continue;
3181         case '\'':
3182           inchar = true;
3183           if (bracketlev > 0)
3184             continue;
3185           if (inattribute)
3186             break;
3187           if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3188             {
3189               fvextern = false;
3190               fvdef = fvnone;
3191             }
3192           continue;
3193         case '/':
3194           if (*lp == '*')
3195             {
3196               incomm = true;
3197               lp++;
3198               c = ' ';
3199               if (bracketlev > 0)
3200                 continue;
3201             }
3202           else if (/* cplpl && */ *lp == '/')
3203             {
3204               c = '\0';
3205             }
3206           break;
3207         case '%':
3208           if ((c_ext & YACC) && *lp == '%')
3209             {
3210               /* Entering or exiting rules section in yacc file. */
3211               lp++;
3212               definedef = dnone; fvdef = fvnone; fvextern = false;
3213               typdef = tnone; structdef = snone;
3214               midtoken = inquote = inchar = incomm = quotednl = false;
3215               bracelev = 0;
3216               yacc_rules = !yacc_rules;
3217               continue;
3218             }
3219           else
3220             break;
3221         case '#':
3222           if (definedef == dnone)
3223             {
3224               char *cp;
3225               bool cpptoken = true;
3226
3227               /* Look back on this line.  If all blanks, or nonblanks
3228                  followed by an end of comment, this is a preprocessor
3229                  token. */
3230               for (cp = newlb.buffer; cp < lp-1; cp++)
3231                 if (!c_isspace (*cp))
3232                   {
3233                     if (*cp == '*' && cp[1] == '/')
3234                       {
3235                         cp++;
3236                         cpptoken = true;
3237                       }
3238                     else
3239                       cpptoken = false;
3240                   }
3241               if (cpptoken)
3242                 {
3243                   definedef = dsharpseen;
3244                   /* This is needed for tagging enum values: when there are
3245                      preprocessor conditionals inside the enum, we need to
3246                      reset the value of fvdef so that the next enum value is
3247                      tagged even though the one before it did not end in a
3248                      comma.  */
3249                   if (fvdef == vignore && instruct && parlev == 0)
3250                     {
3251                       if (strneq (cp, "#if", 3) || strneq (cp, "#el", 3))
3252                         fvdef = fvnone;
3253                     }
3254                 }
3255             } /* if (definedef == dnone) */
3256           continue;
3257         case '[':
3258           bracketlev++;
3259           continue;
3260         default:
3261           if (bracketlev > 0)
3262             {
3263               if (c == ']')
3264                 --bracketlev;
3265               else if (c == '\0')
3266                 CNL_SAVE_DEFINEDEF ();
3267               continue;
3268             }
3269           break;
3270         } /* switch (c) */
3271
3272
3273       /* Consider token only if some involved conditions are satisfied. */
3274       if (typdef != tignore
3275           && definedef != dignorerest
3276           && fvdef != finlist
3277           && templatelev == 0
3278           && (definedef != dnone
3279               || structdef != scolonseen)
3280           && !inattribute)
3281         {
3282           if (midtoken)
3283             {
3284               if (endtoken (c))
3285                 {
3286                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3287                     /* This handles :: in the middle,
3288                        but not at the beginning of an identifier.
3289                        Also, space-separated :: is not recognized. */
3290                     {
3291                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3292                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3293                       lp += 2;
3294                       toklen += 2;
3295                       c = lp[-1];
3296                       goto still_in_token;
3297                     }
3298                   else
3299                     {
3300                       bool funorvar = false;
3301
3302                       if (yacc_rules
3303                           || consider_token (newlb.buffer + tokoff, toklen, c,
3304                                              &c_ext, bracelev, parlev,
3305                                              &funorvar))
3306                         {
3307                           if (fvdef == foperator)
3308                             {
3309                               char *oldlp = lp;
3310                               lp = skip_spaces (lp-1);
3311                               if (*lp != '\0')
3312                                 lp += 1;
3313                               while (*lp != '\0'
3314                                      && !c_isspace (*lp) && *lp != '(')
3315                                 lp += 1;
3316                               c = *lp++;
3317                               toklen += lp - oldlp;
3318                             }
3319                           token.named = false;
3320                           if (!plainc
3321                               && nestlev > 0 && definedef == dnone)
3322                             /* in struct body */
3323                             {
3324                               if (class_qualify)
3325                                 {
3326                                   int len;
3327                                   write_classname (&token_name, qualifier);
3328                                   len = token_name.len;
3329                                   linebuffer_setlen (&token_name,
3330                                                      len + qlen + toklen);
3331                                   sprintf (token_name.buffer + len, "%s%.*s",
3332                                            qualifier, toklen,
3333                                            newlb.buffer + tokoff);
3334                                 }
3335                               else
3336                                 {
3337                                   linebuffer_setlen (&token_name, toklen);
3338                                   sprintf (token_name.buffer, "%.*s",
3339                                            toklen, newlb.buffer + tokoff);
3340                                 }
3341                               token.named = true;
3342                             }
3343                           else if (objdef == ocatseen)
3344                             /* Objective C category */
3345                             {
3346                               if (class_qualify)
3347                                 {
3348                                   int len = strlen (objtag) + 2 + toklen;
3349                                   linebuffer_setlen (&token_name, len);
3350                                   sprintf (token_name.buffer, "%s(%.*s)",
3351                                            objtag, toklen,
3352                                            newlb.buffer + tokoff);
3353                                 }
3354                               else
3355                                 {
3356                                   linebuffer_setlen (&token_name, toklen);
3357                                   sprintf (token_name.buffer, "%.*s",
3358                                            toklen, newlb.buffer + tokoff);
3359                                 }
3360                               token.named = true;
3361                             }
3362                           else if (objdef == omethodtag
3363                                    || objdef == omethodparm)
3364                             /* Objective C method */
3365                             {
3366                               token.named = true;
3367                             }
3368                           else if (fvdef == fdefunname)
3369                             /* GNU DEFUN and similar macros */
3370                             {
3371                               bool defun = (newlb.buffer[tokoff] == 'F');
3372                               int off = tokoff;
3373                               int len = toklen;
3374
3375                               /* Rewrite the tag so that emacs lisp DEFUNs
3376                                  can be found by their elisp name */
3377                               if (defun)
3378                                 {
3379                                   off += 1;
3380                                   len -= 1;
3381                                 }
3382                               linebuffer_setlen (&token_name, len);
3383                               memcpy (token_name.buffer,
3384                                       newlb.buffer + off, len);
3385                               token_name.buffer[len] = '\0';
3386                               if (defun)
3387                                 while (--len >= 0)
3388                                   if (token_name.buffer[len] == '_')
3389                                     token_name.buffer[len] = '-';
3390                               token.named = defun;
3391                             }
3392                           else
3393                             {
3394                               linebuffer_setlen (&token_name, toklen);
3395                               memcpy (token_name.buffer,
3396                                       newlb.buffer + tokoff, toklen);
3397                               token_name.buffer[toklen] = '\0';
3398                               /* Name macros and members. */
3399                               token.named = (structdef == stagseen
3400                                              || typdef == ttypeseen
3401                                              || typdef == tend
3402                                              || (funorvar
3403                                                  && definedef == dignorerest)
3404                                              || (funorvar
3405                                                  && definedef == dnone
3406                                                  && structdef == snone
3407                                                  && bracelev > 0));
3408                             }
3409                           token.lineno = lineno;
3410                           token.offset = tokoff;
3411                           token.length = toklen;
3412                           token.line = newlb.buffer;
3413                           token.linepos = newlinepos;
3414                           token.valid = true;
3415
3416                           if (definedef == dnone
3417                               && (fvdef == fvnameseen
3418                                   || fvdef == foperator
3419                                   || structdef == stagseen
3420                                   || typdef == tend
3421                                   || typdef == ttypeseen
3422                                   || objdef != onone))
3423                             {
3424                               if (current_lb_is_new)
3425                                 switch_line_buffers ();
3426                             }
3427                           else if (definedef != dnone
3428                                    || fvdef == fdefunname
3429                                    || instruct)
3430                             make_C_tag (funorvar);
3431                         }
3432                       else /* not yacc and consider_token failed */
3433                         {
3434                           if (inattribute && fvdef == fignore)
3435                             {
3436                               /* We have just met __attribute__ after a
3437                                  function parameter list: do not tag the
3438                                  function again. */
3439                               fvdef = fvnone;
3440                             }
3441                         }
3442                       midtoken = false;
3443                     }
3444                 } /* if (endtoken (c)) */
3445               else if (intoken (c))
3446                 still_in_token:
3447                 {
3448                   toklen++;
3449                   continue;
3450                 }
3451             } /* if (midtoken) */
3452           else if (begtoken (c))
3453             {
3454               switch (definedef)
3455                 {
3456                 case dnone:
3457                   switch (fvdef)
3458                     {
3459                     case fstartlist:
3460                       /* This prevents tagging fb in
3461                          void (__attribute__((noreturn)) *fb) (void);
3462                          Fixing this is not easy and not very important. */
3463                       fvdef = finlist;
3464                       continue;
3465                     case flistseen:
3466                       if (plainc || declarations)
3467                         {
3468                           make_C_tag (true); /* a function */
3469                           fvdef = fignore;
3470                         }
3471                       break;
3472                     }
3473                   if (structdef == stagseen && !cjava)
3474                     {
3475                       popclass_above (bracelev);
3476                       structdef = snone;
3477                     }
3478                   break;
3479                 case dsharpseen:
3480                   savetoken = token;
3481                   break;
3482                 }
3483               if (!yacc_rules || lp == newlb.buffer + 1)
3484                 {
3485                   tokoff = lp - 1 - newlb.buffer;
3486                   toklen = 1;
3487                   midtoken = true;
3488                 }
3489               continue;
3490             } /* if (begtoken) */
3491         } /* if must look at token */
3492
3493
3494       /* Detect end of line, colon, comma, semicolon and various braces
3495          after having handled a token.*/
3496       switch (c)
3497         {
3498         case ':':
3499           if (inattribute)
3500             break;
3501           if (yacc_rules && token.offset == 0 && token.valid)
3502             {
3503               make_C_tag (false); /* a yacc function */
3504               break;
3505             }
3506           if (definedef != dnone)
3507             break;
3508           switch (objdef)
3509             {
3510             case  otagseen:
3511               objdef = oignore;
3512               make_C_tag (true); /* an Objective C class */
3513               break;
3514             case omethodtag:
3515             case omethodparm:
3516               objdef = omethodcolon;
3517               if (class_qualify)
3518                 {
3519                   int toklen = token_name.len;
3520                   linebuffer_setlen (&token_name, toklen + 1);
3521                   strcpy (token_name.buffer + toklen, ":");
3522                 }
3523               break;
3524             }
3525           if (structdef == stagseen)
3526             {
3527               structdef = scolonseen;
3528               break;
3529             }
3530           /* Should be useless, but may be work as a safety net. */
3531           if (cplpl && fvdef == flistseen)
3532             {
3533               make_C_tag (true); /* a function */
3534               fvdef = fignore;
3535               break;
3536             }
3537           break;
3538         case ';':
3539           if (definedef != dnone || inattribute)
3540             break;
3541           switch (typdef)
3542             {
3543             case tend:
3544             case ttypeseen:
3545               make_C_tag (false); /* a typedef */
3546               typdef = tnone;
3547               fvdef = fvnone;
3548               break;
3549             case tnone:
3550             case tinbody:
3551             case tignore:
3552               switch (fvdef)
3553                 {
3554                 case fignore:
3555                   if (typdef == tignore || cplpl)
3556                     fvdef = fvnone;
3557                   break;
3558                 case fvnameseen:
3559                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3560                       || (members && instruct))
3561                     make_C_tag (false); /* a variable */
3562                   fvextern = false;
3563                   fvdef = fvnone;
3564                   token.valid = false;
3565                   break;
3566                 case flistseen:
3567                   if ((declarations
3568                        && (cplpl || !instruct)
3569                        && (typdef == tnone || (typdef != tignore && instruct)))
3570                       || (members
3571                           && plainc && instruct))
3572                     make_C_tag (true);  /* a function */
3573                   /* FALLTHRU */
3574                 default:
3575                   fvextern = false;
3576                   fvdef = fvnone;
3577                   if (declarations
3578                        && cplpl && structdef == stagseen)
3579                     make_C_tag (false); /* forward declaration */
3580                   else
3581                     token.valid = false;
3582                 } /* switch (fvdef) */
3583               /* FALLTHRU */
3584             default:
3585               if (!instruct)
3586                 typdef = tnone;
3587             }
3588           if (structdef == stagseen)
3589             structdef = snone;
3590           break;
3591         case ',':
3592           if (definedef != dnone || inattribute)
3593             break;
3594           switch (objdef)
3595             {
3596             case omethodtag:
3597             case omethodparm:
3598               make_C_tag (true); /* an Objective C method */
3599               objdef = oinbody;
3600               break;
3601             }
3602           switch (fvdef)
3603             {
3604             case fdefunkey:
3605             case foperator:
3606             case fstartlist:
3607             case finlist:
3608             case fignore:
3609               break;
3610             case vignore:
3611               if (instruct && parlev == 0)
3612                 fvdef = fvnone;
3613               break;
3614             case fdefunname:
3615               fvdef = fignore;
3616               break;
3617             case fvnameseen:
3618               if (parlev == 0
3619                   && ((globals
3620                        && bracelev == 0
3621                        && templatelev == 0
3622                        && (!fvextern || declarations))
3623                       || (members && instruct)))
3624                   make_C_tag (false); /* a variable */
3625               break;
3626             case flistseen:
3627               if ((declarations && typdef == tnone && !instruct)
3628                   || (members && typdef != tignore && instruct))
3629                 {
3630                   make_C_tag (true); /* a function */
3631                   fvdef = fvnameseen;
3632                 }
3633               else if (!declarations)
3634                 fvdef = fvnone;
3635               token.valid = false;
3636               break;
3637             default:
3638               fvdef = fvnone;
3639             }
3640           if (structdef == stagseen)
3641             structdef = snone;
3642           break;
3643         case ']':
3644           if (definedef != dnone || inattribute)
3645             break;
3646           if (structdef == stagseen)
3647             structdef = snone;
3648           switch (typdef)
3649             {
3650             case ttypeseen:
3651             case tend:
3652               typdef = tignore;
3653               make_C_tag (false);       /* a typedef */
3654               break;
3655             case tnone:
3656             case tinbody:
3657               switch (fvdef)
3658                 {
3659                 case foperator:
3660                 case finlist:
3661                 case fignore:
3662                 case vignore:
3663                   break;
3664                 case fvnameseen:
3665                   if ((members && bracelev == 1)
3666                       || (globals && bracelev == 0
3667                           && (!fvextern || declarations)))
3668                     make_C_tag (false); /* a variable */
3669                   /* FALLTHRU */
3670                 default:
3671                   fvdef = fvnone;
3672                 }
3673               break;
3674             }
3675           break;
3676         case '(':
3677           if (inattribute)
3678             {
3679               attrparlev++;
3680               break;
3681             }
3682           if (definedef != dnone)
3683             break;
3684           if (objdef == otagseen && parlev == 0)
3685             objdef = oparenseen;
3686           switch (fvdef)
3687             {
3688             case fvnameseen:
3689               if (typdef == ttypeseen
3690                   && *lp != '*'
3691                   && !instruct)
3692                 {
3693                   /* This handles constructs like:
3694                      typedef void OperatorFun (int fun); */
3695                   make_C_tag (false);
3696                   typdef = tignore;
3697                   fvdef = fignore;
3698                   break;
3699                 }
3700               /* FALLTHRU */
3701             case foperator:
3702               fvdef = fstartlist;
3703               break;
3704             case flistseen:
3705               fvdef = finlist;
3706               break;
3707             }
3708           parlev++;
3709           break;
3710         case ')':
3711           if (inattribute)
3712             {
3713               if (--attrparlev == 0)
3714                 inattribute = false;
3715               break;
3716             }
3717           if (definedef != dnone)
3718             break;
3719           if (objdef == ocatseen && parlev == 1)
3720             {
3721               make_C_tag (true); /* an Objective C category */
3722               objdef = oignore;
3723             }
3724           if (--parlev == 0)
3725             {
3726               switch (fvdef)
3727                 {
3728                 case fstartlist:
3729                 case finlist:
3730                   fvdef = flistseen;
3731                   break;
3732                 }
3733               if (!instruct
3734                   && (typdef == tend
3735                       || typdef == ttypeseen))
3736                 {
3737                   typdef = tignore;
3738                   make_C_tag (false); /* a typedef */
3739                 }
3740             }
3741           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3742             parlev = 0;
3743           break;
3744         case '{':
3745           if (definedef != dnone)
3746             break;
3747           if (typdef == ttypeseen)
3748             {
3749               /* Whenever typdef is set to tinbody (currently only
3750                  here), typdefbracelev should be set to bracelev. */
3751               typdef = tinbody;
3752               typdefbracelev = bracelev;
3753             }
3754           switch (fvdef)
3755             {
3756             case flistseen:
3757               if (cplpl && !class_qualify)
3758                 {
3759                   /* Remove class and namespace qualifiers from the token,
3760                      leaving only the method/member name.  */
3761                   char *cc, *uqname = token_name.buffer;
3762                   char *tok_end = token_name.buffer + token_name.len;
3763
3764                   for (cc = token_name.buffer; cc < tok_end; cc++)
3765                     {
3766                       if (*cc == ':' && cc[1] == ':')
3767                         {
3768                           uqname = cc + 2;
3769                           cc++;
3770                         }
3771                     }
3772                   if (uqname > token_name.buffer)
3773                     {
3774                       int uqlen = strlen (uqname);
3775                       linebuffer_setlen (&token_name, uqlen);
3776                       memmove (token_name.buffer, uqname, uqlen + 1);
3777                     }
3778                 }
3779               make_C_tag (true);    /* a function */
3780               /* FALLTHRU */
3781             case fignore:
3782               fvdef = fvnone;
3783               break;
3784             case fvnone:
3785               switch (objdef)
3786                 {
3787                 case otagseen:
3788                   make_C_tag (true); /* an Objective C class */
3789                   objdef = oignore;
3790                   break;
3791                 case omethodtag:
3792                 case omethodparm:
3793                   make_C_tag (true); /* an Objective C method */
3794                   objdef = oinbody;
3795                   break;
3796                 default:
3797                   /* Neutralize `extern "C" {' grot. */
3798                   if (bracelev == 0 && structdef == snone && nestlev == 0
3799                       && typdef == tnone)
3800                     bracelev = -1;
3801                 }
3802               break;
3803             }
3804           switch (structdef)
3805             {
3806             case skeyseen:         /* unnamed struct */
3807               pushclass_above (bracelev, NULL, 0);
3808               structdef = snone;
3809               break;
3810             case stagseen:         /* named struct or enum */
3811             case scolonseen:       /* a class */
3812               pushclass_above (bracelev,token.line+token.offset, token.length);
3813               structdef = snone;
3814               make_C_tag (false);  /* a struct or enum */
3815               break;
3816             }
3817           bracelev += 1;
3818           break;
3819         case '*':
3820           if (definedef != dnone)
3821             break;
3822           if (fvdef == fstartlist)
3823             {
3824               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3825               token.valid = false;
3826             }
3827           break;
3828         case '}':
3829           if (definedef != dnone)
3830             break;
3831           bracelev -= 1;
3832           if (!ignoreindent && lp == newlb.buffer + 1)
3833             {
3834               if (bracelev != 0)
3835                 token.valid = false; /* unexpected value, token unreliable */
3836               bracelev = 0;     /* reset brace level if first column */
3837               parlev = 0;       /* also reset paren level, just in case... */
3838             }
3839           else if (bracelev < 0)
3840             {
3841               token.valid = false; /* something gone amiss, token unreliable */
3842               bracelev = 0;
3843             }
3844           if (bracelev == 0 && fvdef == vignore)
3845             fvdef = fvnone;             /* end of function */
3846           popclass_above (bracelev);
3847           structdef = snone;
3848           /* Only if typdef == tinbody is typdefbracelev significant. */
3849           if (typdef == tinbody && bracelev <= typdefbracelev)
3850             {
3851               assert (bracelev == typdefbracelev);
3852               typdef = tend;
3853             }
3854           break;
3855         case '=':
3856           if (definedef != dnone)
3857             break;
3858           switch (fvdef)
3859             {
3860             case foperator:
3861             case finlist:
3862             case fignore:
3863             case vignore:
3864               break;
3865             case fvnameseen:
3866               if ((members && bracelev == 1)
3867                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3868                 make_C_tag (false); /* a variable */
3869               /* FALLTHRU */
3870             default:
3871               fvdef = vignore;
3872             }
3873           break;
3874         case '<':
3875           if (cplpl
3876               && (structdef == stagseen || fvdef == fvnameseen))
3877             {
3878               templatelev++;
3879               break;
3880             }
3881           goto resetfvdef;
3882         case '>':
3883           if (templatelev > 0)
3884             {
3885               templatelev--;
3886               break;
3887             }
3888           goto resetfvdef;
3889         case '+':
3890         case '-':
3891           if (objdef == oinbody && bracelev == 0)
3892             {
3893               objdef = omethodsign;
3894               break;
3895             }
3896           /* FALLTHRU */
3897         resetfvdef:
3898         case '#': case '~': case '&': case '%': case '/':
3899         case '|': case '^': case '!': case '.': case '?':
3900           if (definedef != dnone)
3901             break;
3902           /* These surely cannot follow a function tag in C. */
3903           switch (fvdef)
3904             {
3905             case foperator:
3906             case finlist:
3907             case fignore:
3908             case vignore:
3909               break;
3910             default:
3911               fvdef = fvnone;
3912             }
3913           break;
3914         case '\0':
3915           if (objdef == otagseen)
3916             {
3917               make_C_tag (true); /* an Objective C class */
3918               objdef = oignore;
3919             }
3920           /* If a macro spans multiple lines don't reset its state. */
3921           if (quotednl)
3922             CNL_SAVE_DEFINEDEF ();
3923           else
3924             CNL ();
3925           break;
3926         } /* switch (c) */
3927
3928     } /* while not eof */
3929
3930   free (lbs[0].lb.buffer);
3931   free (lbs[1].lb.buffer);
3932 }
3933
3934 /*
3935  * Process either a C++ file or a C file depending on the setting
3936  * of a global flag.
3937  */
3938 static void
3939 default_C_entries (FILE *inf)
3940 {
3941   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3942 }
3943
3944 /* Always do plain C. */
3945 static void
3946 plain_C_entries (FILE *inf)
3947 {
3948   C_entries (0, inf);
3949 }
3950
3951 /* Always do C++. */
3952 static void
3953 Cplusplus_entries (FILE *inf)
3954 {
3955   C_entries (C_PLPL, inf);
3956 }
3957
3958 /* Always do Java. */
3959 static void
3960 Cjava_entries (FILE *inf)
3961 {
3962   C_entries (C_JAVA, inf);
3963 }
3964
3965 /* Always do C*. */
3966 static void
3967 Cstar_entries (FILE *inf)
3968 {
3969   C_entries (C_STAR, inf);
3970 }
3971
3972 /* Always do Yacc. */
3973 static void
3974 Yacc_entries (FILE *inf)
3975 {
3976   C_entries (YACC, inf);
3977 }
3978
3979 \f
3980 /* Useful macros. */
3981 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
3982   while (perhaps_more_input (file_pointer)                              \
3983          && (readline (&(line_buffer), file_pointer),                   \
3984              (char_pointer) = (line_buffer).buffer,                     \
3985              true))                                                     \
3986
3987 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
3988   ((assert ("" kw), true)   /* syntax error if not a literal string */  \
3989    && strneq ((cp), kw, sizeof (kw)-1)          /* cp points at kw */   \
3990    && notinname ((cp)[sizeof (kw)-1])           /* end of kw */         \
3991    && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
3992
3993 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3994 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3995   ((assert ("" kw), true) /* syntax error if not a literal string */    \
3996    && strncaseeq ((cp), kw, sizeof (kw)-1)      /* cp points at kw */   \
3997    && ((cp) += sizeof (kw)-1))                  /* skip spaces */
3998
3999 /*
4000  * Read a file, but do no processing.  This is used to do regexp
4001  * matching on files that have no language defined.
4002  */
4003 static void
4004 just_read_file (FILE *inf)
4005 {
4006   while (perhaps_more_input (inf))
4007     readline (&lb, inf);
4008 }
4009
4010 \f
4011 /* Fortran parsing */
4012
4013 static void F_takeprec (void);
4014 static void F_getit (FILE *);
4015
4016 static void
4017 F_takeprec (void)
4018 {
4019   dbp = skip_spaces (dbp);
4020   if (*dbp != '*')
4021     return;
4022   dbp++;
4023   dbp = skip_spaces (dbp);
4024   if (strneq (dbp, "(*)", 3))
4025     {
4026       dbp += 3;
4027       return;
4028     }
4029   if (!c_isdigit (*dbp))
4030     {
4031       --dbp;                    /* force failure */
4032       return;
4033     }
4034   do
4035     dbp++;
4036   while (c_isdigit (*dbp));
4037 }
4038
4039 static void
4040 F_getit (FILE *inf)
4041 {
4042   register char *cp;
4043
4044   dbp = skip_spaces (dbp);
4045   if (*dbp == '\0')
4046     {
4047       readline (&lb, inf);
4048       dbp = lb.buffer;
4049       if (dbp[5] != '&')
4050         return;
4051       dbp += 6;
4052       dbp = skip_spaces (dbp);
4053     }
4054   if (!c_isalpha (*dbp) && *dbp != '_' && *dbp != '$')
4055     return;
4056   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4057     continue;
4058   make_tag (dbp, cp-dbp, true,
4059             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4060 }
4061
4062
4063 static void
4064 Fortran_functions (FILE *inf)
4065 {
4066   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4067     {
4068       if (*dbp == '%')
4069         dbp++;                  /* Ratfor escape to fortran */
4070       dbp = skip_spaces (dbp);
4071       if (*dbp == '\0')
4072         continue;
4073
4074       if (LOOKING_AT_NOCASE (dbp, "recursive"))
4075         dbp = skip_spaces (dbp);
4076
4077       if (LOOKING_AT_NOCASE (dbp, "pure"))
4078         dbp = skip_spaces (dbp);
4079
4080       if (LOOKING_AT_NOCASE (dbp, "elemental"))
4081         dbp = skip_spaces (dbp);
4082
4083       switch (c_tolower (*dbp))
4084         {
4085         case 'i':
4086           if (nocase_tail ("integer"))
4087             F_takeprec ();
4088           break;
4089         case 'r':
4090           if (nocase_tail ("real"))
4091             F_takeprec ();
4092           break;
4093         case 'l':
4094           if (nocase_tail ("logical"))
4095             F_takeprec ();
4096           break;
4097         case 'c':
4098           if (nocase_tail ("complex") || nocase_tail ("character"))
4099             F_takeprec ();
4100           break;
4101         case 'd':
4102           if (nocase_tail ("double"))
4103             {
4104               dbp = skip_spaces (dbp);
4105               if (*dbp == '\0')
4106                 continue;
4107               if (nocase_tail ("precision"))
4108                 break;
4109               continue;
4110             }
4111           break;
4112         }
4113       dbp = skip_spaces (dbp);
4114       if (*dbp == '\0')
4115         continue;
4116       switch (c_tolower (*dbp))
4117         {
4118         case 'f':
4119           if (nocase_tail ("function"))
4120             F_getit (inf);
4121           continue;
4122         case 's':
4123           if (nocase_tail ("subroutine"))
4124             F_getit (inf);
4125           continue;
4126         case 'e':
4127           if (nocase_tail ("entry"))
4128             F_getit (inf);
4129           continue;
4130         case 'b':
4131           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4132             {
4133               dbp = skip_spaces (dbp);
4134               if (*dbp == '\0') /* assume un-named */
4135                 make_tag ("blockdata", 9, true,
4136                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4137               else
4138                 F_getit (inf);  /* look for name */
4139             }
4140           continue;
4141         }
4142     }
4143 }
4144
4145 \f
4146 /*
4147  * Ada parsing
4148  * Original code by
4149  * Philippe Waroquiers (1998)
4150  */
4151
4152 /* Once we are positioned after an "interesting" keyword, let's get
4153    the real tag value necessary. */
4154 static void
4155 Ada_getit (FILE *inf, const char *name_qualifier)
4156 {
4157   register char *cp;
4158   char *name;
4159   char c;
4160
4161   while (perhaps_more_input (inf))
4162     {
4163       dbp = skip_spaces (dbp);
4164       if (*dbp == '\0'
4165           || (dbp[0] == '-' && dbp[1] == '-'))
4166         {
4167           readline (&lb, inf);
4168           dbp = lb.buffer;
4169         }
4170       switch (c_tolower (*dbp))
4171         {
4172         case 'b':
4173           if (nocase_tail ("body"))
4174             {
4175               /* Skipping body of   procedure body   or   package body or ....
4176                  resetting qualifier to body instead of spec. */
4177               name_qualifier = "/b";
4178               continue;
4179             }
4180           break;
4181         case 't':
4182           /* Skipping type of   task type   or   protected type ... */
4183           if (nocase_tail ("type"))
4184             continue;
4185           break;
4186         }
4187       if (*dbp == '"')
4188         {
4189           dbp += 1;
4190           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4191             continue;
4192         }
4193       else
4194         {
4195           dbp = skip_spaces (dbp);
4196           for (cp = dbp;
4197                c_isalnum (*cp) || *cp == '_' || *cp == '.';
4198                cp++)
4199             continue;
4200           if (cp == dbp)
4201             return;
4202         }
4203       c = *cp;
4204       *cp = '\0';
4205       name = concat (dbp, name_qualifier, "");
4206       *cp = c;
4207       make_tag (name, strlen (name), true,
4208                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4209       free (name);
4210       if (c == '"')
4211         dbp = cp + 1;
4212       return;
4213     }
4214 }
4215
4216 static void
4217 Ada_funcs (FILE *inf)
4218 {
4219   bool inquote = false;
4220   bool skip_till_semicolumn = false;
4221
4222   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4223     {
4224       while (*dbp != '\0')
4225         {
4226           /* Skip a string i.e. "abcd". */
4227           if (inquote || (*dbp == '"'))
4228             {
4229               dbp = strchr (dbp + !inquote, '"');
4230               if (dbp != NULL)
4231                 {
4232                   inquote = false;
4233                   dbp += 1;
4234                   continue;     /* advance char */
4235                 }
4236               else
4237                 {
4238                   inquote = true;
4239                   break;        /* advance line */
4240                 }
4241             }
4242
4243           /* Skip comments. */
4244           if (dbp[0] == '-' && dbp[1] == '-')
4245             break;              /* advance line */
4246
4247           /* Skip character enclosed in single quote i.e. 'a'
4248              and skip single quote starting an attribute i.e. 'Image. */
4249           if (*dbp == '\'')
4250             {
4251               dbp++ ;
4252               if (*dbp != '\0')
4253                 dbp++;
4254               continue;
4255             }
4256
4257           if (skip_till_semicolumn)
4258             {
4259               if (*dbp == ';')
4260                 skip_till_semicolumn = false;
4261               dbp++;
4262               continue;         /* advance char */
4263             }
4264
4265           /* Search for beginning of a token.  */
4266           if (!begtoken (*dbp))
4267             {
4268               dbp++;
4269               continue;         /* advance char */
4270             }
4271
4272           /* We are at the beginning of a token. */
4273           switch (c_tolower (*dbp))
4274             {
4275             case 'f':
4276               if (!packages_only && nocase_tail ("function"))
4277                 Ada_getit (inf, "/f");
4278               else
4279                 break;          /* from switch */
4280               continue;         /* advance char */
4281             case 'p':
4282               if (!packages_only && nocase_tail ("procedure"))
4283                 Ada_getit (inf, "/p");
4284               else if (nocase_tail ("package"))
4285                 Ada_getit (inf, "/s");
4286               else if (nocase_tail ("protected")) /* protected type */
4287                 Ada_getit (inf, "/t");
4288               else
4289                 break;          /* from switch */
4290               continue;         /* advance char */
4291
4292             case 'u':
4293               if (typedefs && !packages_only && nocase_tail ("use"))
4294                 {
4295                   /* when tagging types, avoid tagging  use type Pack.Typename;
4296                      for this, we will skip everything till a ; */
4297                   skip_till_semicolumn = true;
4298                   continue;     /* advance char */
4299                 }
4300
4301             case 't':
4302               if (!packages_only && nocase_tail ("task"))
4303                 Ada_getit (inf, "/k");
4304               else if (typedefs && !packages_only && nocase_tail ("type"))
4305                 {
4306                   Ada_getit (inf, "/t");
4307                   while (*dbp != '\0')
4308                     dbp += 1;
4309                 }
4310               else
4311                 break;          /* from switch */
4312               continue;         /* advance char */
4313             }
4314
4315           /* Look for the end of the token. */
4316           while (!endtoken (*dbp))
4317             dbp++;
4318
4319         } /* advance char */
4320     } /* advance line */
4321 }
4322
4323 \f
4324 /*
4325  * Unix and microcontroller assembly tag handling
4326  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4327  * Idea by Bob Weiner, Motorola Inc. (1994)
4328  */
4329 static void
4330 Asm_labels (FILE *inf)
4331 {
4332   register char *cp;
4333
4334   LOOP_ON_INPUT_LINES (inf, lb, cp)
4335     {
4336       /* If first char is alphabetic or one of [_.$], test for colon
4337          following identifier. */
4338       if (c_isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4339         {
4340           /* Read past label. */
4341           cp++;
4342           while (c_isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4343             cp++;
4344           if (*cp == ':' || c_isspace (*cp))
4345             /* Found end of label, so copy it and add it to the table. */
4346             make_tag (lb.buffer, cp - lb.buffer, true,
4347                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4348         }
4349     }
4350 }
4351
4352 \f
4353 /*
4354  * Perl support
4355  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4356  *                 /^use constant[ \t\n]+[^ \t\n{=,;]+/
4357  * Perl variable names: /^(my|local).../
4358  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4359  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4360  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4361  */
4362 static void
4363 Perl_functions (FILE *inf)
4364 {
4365   char *package = savestr ("main"); /* current package name */
4366   register char *cp;
4367
4368   LOOP_ON_INPUT_LINES (inf, lb, cp)
4369     {
4370       cp = skip_spaces (cp);
4371
4372       if (LOOKING_AT (cp, "package"))
4373         {
4374           free (package);
4375           get_tag (cp, &package);
4376         }
4377       else if (LOOKING_AT (cp, "sub"))
4378         {
4379           char *pos, *sp;
4380
4381         subr:
4382           sp = cp;
4383           while (!notinname (*cp))
4384             cp++;
4385           if (cp == sp)
4386             continue;           /* nothing found */
4387           pos = strchr (sp, ':');
4388           if (pos && pos < cp && pos[1] == ':')
4389             /* The name is already qualified. */
4390             make_tag (sp, cp - sp, true,
4391                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4392           else
4393             /* Qualify it. */
4394             {
4395               char savechar, *name;
4396
4397               savechar = *cp;
4398               *cp = '\0';
4399               name = concat (package, "::", sp);
4400               *cp = savechar;
4401               make_tag (name, strlen (name), true,
4402                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4403               free (name);
4404             }
4405         }
4406       else if (LOOKING_AT (cp, "use constant")
4407                || LOOKING_AT (cp, "use constant::defer"))
4408         {
4409           /* For hash style multi-constant like
4410                 use constant { FOO => 123,
4411                                BAR => 456 };
4412              only the first FOO is picked up.  Parsing across the value
4413              expressions would be difficult in general, due to possible nested
4414              hashes, here-documents, etc.  */
4415           if (*cp == '{')
4416             cp = skip_spaces (cp+1);
4417           goto subr;
4418         }
4419       else if (globals) /* only if we are tagging global vars */
4420         {
4421           /* Skip a qualifier, if any. */
4422           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4423           /* After "my" or "local", but before any following paren or space. */
4424           char *varstart = cp;
4425
4426           if (qual              /* should this be removed?  If yes, how? */
4427               && (*cp == '$' || *cp == '@' || *cp == '%'))
4428             {
4429               varstart += 1;
4430               do
4431                 cp++;
4432               while (c_isalnum (*cp) || *cp == '_');
4433             }
4434           else if (qual)
4435             {
4436               /* Should be examining a variable list at this point;
4437                  could insist on seeing an open parenthesis. */
4438               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4439                 cp++;
4440             }
4441           else
4442             continue;
4443
4444           make_tag (varstart, cp - varstart, false,
4445                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4446         }
4447     }
4448   free (package);
4449 }
4450
4451
4452 /*
4453  * Python support
4454  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4455  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4456  * More ideas by seb bacon <seb@jamkit.com> (2002)
4457  */
4458 static void
4459 Python_functions (FILE *inf)
4460 {
4461   register char *cp;
4462
4463   LOOP_ON_INPUT_LINES (inf, lb, cp)
4464     {
4465       cp = skip_spaces (cp);
4466       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4467         {
4468           char *name = cp;
4469           while (!notinname (*cp) && *cp != ':')
4470             cp++;
4471           make_tag (name, cp - name, true,
4472                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4473         }
4474     }
4475 }
4476
4477 \f
4478 /*
4479  * PHP support
4480  * Look for:
4481  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4482  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4483  *  - /^[ \t]*define\(\"[^\"]+/
4484  * Only with --members:
4485  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4486  * Idea by Diez B. Roggisch (2001)
4487  */
4488 static void
4489 PHP_functions (FILE *inf)
4490 {
4491   char *cp, *name;
4492   bool search_identifier = false;
4493
4494   LOOP_ON_INPUT_LINES (inf, lb, cp)
4495     {
4496       cp = skip_spaces (cp);
4497       name = cp;
4498       if (search_identifier
4499           && *cp != '\0')
4500         {
4501           while (!notinname (*cp))
4502             cp++;
4503           make_tag (name, cp - name, true,
4504                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4505           search_identifier = false;
4506         }
4507       else if (LOOKING_AT (cp, "function"))
4508         {
4509           if (*cp == '&')
4510             cp = skip_spaces (cp+1);
4511           if (*cp != '\0')
4512             {
4513               name = cp;
4514               while (!notinname (*cp))
4515                 cp++;
4516               make_tag (name, cp - name, true,
4517                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4518             }
4519           else
4520             search_identifier = true;
4521         }
4522       else if (LOOKING_AT (cp, "class"))
4523         {
4524           if (*cp != '\0')
4525             {
4526               name = cp;
4527               while (*cp != '\0' && !c_isspace (*cp))
4528                 cp++;
4529               make_tag (name, cp - name, false,
4530                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4531             }
4532           else
4533             search_identifier = true;
4534         }
4535       else if (strneq (cp, "define", 6)
4536                && (cp = skip_spaces (cp+6))
4537                && *cp++ == '('
4538                && (*cp == '"' || *cp == '\''))
4539         {
4540           char quote = *cp++;
4541           name = cp;
4542           while (*cp != quote && *cp != '\0')
4543             cp++;
4544           make_tag (name, cp - name, false,
4545                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4546         }
4547       else if (members
4548                && LOOKING_AT (cp, "var")
4549                && *cp == '$')
4550         {
4551           name = cp;
4552           while (!notinname (*cp))
4553             cp++;
4554           make_tag (name, cp - name, false,
4555                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4556         }
4557     }
4558 }
4559
4560 \f
4561 /*
4562  * Cobol tag functions
4563  * We could look for anything that could be a paragraph name.
4564  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4565  * Idea by Corny de Souza (1993)
4566  */
4567 static void
4568 Cobol_paragraphs (FILE *inf)
4569 {
4570   register char *bp, *ep;
4571
4572   LOOP_ON_INPUT_LINES (inf, lb, bp)
4573     {
4574       if (lb.len < 9)
4575         continue;
4576       bp += 8;
4577
4578       /* If eoln, compiler option or comment ignore whole line. */
4579       if (bp[-1] != ' ' || !c_isalnum (bp[0]))
4580         continue;
4581
4582       for (ep = bp; c_isalnum (*ep) || *ep == '-'; ep++)
4583         continue;
4584       if (*ep++ == '.')
4585         make_tag (bp, ep - bp, true,
4586                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4587     }
4588 }
4589
4590 \f
4591 /*
4592  * Makefile support
4593  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4594  */
4595 static void
4596 Makefile_targets (FILE *inf)
4597 {
4598   register char *bp;
4599
4600   LOOP_ON_INPUT_LINES (inf, lb, bp)
4601     {
4602       if (*bp == '\t' || *bp == '#')
4603         continue;
4604       while (*bp != '\0' && *bp != '=' && *bp != ':')
4605         bp++;
4606       if (*bp == ':' || (globals && *bp == '='))
4607         {
4608           /* We should detect if there is more than one tag, but we do not.
4609              We just skip initial and final spaces. */
4610           char * namestart = skip_spaces (lb.buffer);
4611           while (--bp > namestart)
4612             if (!notinname (*bp))
4613               break;
4614           make_tag (namestart, bp - namestart + 1, true,
4615                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4616         }
4617     }
4618 }
4619
4620 \f
4621 /*
4622  * Pascal parsing
4623  * Original code by Mosur K. Mohan (1989)
4624  *
4625  *  Locates tags for procedures & functions.  Doesn't do any type- or
4626  *  var-definitions.  It does look for the keyword "extern" or
4627  *  "forward" immediately following the procedure statement; if found,
4628  *  the tag is skipped.
4629  */
4630 static void
4631 Pascal_functions (FILE *inf)
4632 {
4633   linebuffer tline;             /* mostly copied from C_entries */
4634   long save_lcno;
4635   int save_lineno, namelen, taglen;
4636   char c, *name;
4637
4638   bool                          /* each of these flags is true if: */
4639     incomment,                  /* point is inside a comment */
4640     inquote,                    /* point is inside '..' string */
4641     get_tagname,                /* point is after PROCEDURE/FUNCTION
4642                                    keyword, so next item = potential tag */
4643     found_tag,                  /* point is after a potential tag */
4644     inparms,                    /* point is within parameter-list */
4645     verify_tag;                 /* point has passed the parm-list, so the
4646                                    next token will determine whether this
4647                                    is a FORWARD/EXTERN to be ignored, or
4648                                    whether it is a real tag */
4649
4650   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4651   name = NULL;                  /* keep compiler quiet */
4652   dbp = lb.buffer;
4653   *dbp = '\0';
4654   linebuffer_init (&tline);
4655
4656   incomment = inquote = false;
4657   found_tag = false;            /* have a proc name; check if extern */
4658   get_tagname = false;          /* found "procedure" keyword         */
4659   inparms = false;              /* found '(' after "proc"            */
4660   verify_tag = false;           /* check if "extern" is ahead        */
4661
4662
4663   while (perhaps_more_input (inf)) /* long main loop to get next char */
4664     {
4665       c = *dbp++;
4666       if (c == '\0')            /* if end of line */
4667         {
4668           readline (&lb, inf);
4669           dbp = lb.buffer;
4670           if (*dbp == '\0')
4671             continue;
4672           if (!((found_tag && verify_tag)
4673                 || get_tagname))
4674             c = *dbp++;         /* only if don't need *dbp pointing
4675                                    to the beginning of the name of
4676                                    the procedure or function */
4677         }
4678       if (incomment)
4679         {
4680           if (c == '}')         /* within { } comments */
4681             incomment = false;
4682           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4683             {
4684               dbp++;
4685               incomment = false;
4686             }
4687           continue;
4688         }
4689       else if (inquote)
4690         {
4691           if (c == '\'')
4692             inquote = false;
4693           continue;
4694         }
4695       else
4696         switch (c)
4697           {
4698           case '\'':
4699             inquote = true;     /* found first quote */
4700             continue;
4701           case '{':             /* found open { comment */
4702             incomment = true;
4703             continue;
4704           case '(':
4705             if (*dbp == '*')    /* found open (* comment */
4706               {
4707                 incomment = true;
4708                 dbp++;
4709               }
4710             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4711               inparms = true;
4712             continue;
4713           case ')':             /* end of parms list */
4714             if (inparms)
4715               inparms = false;
4716             continue;
4717           case ';':
4718             if (found_tag && !inparms) /* end of proc or fn stmt */
4719               {
4720                 verify_tag = true;
4721                 break;
4722               }
4723             continue;
4724           }
4725       if (found_tag && verify_tag && (*dbp != ' '))
4726         {
4727           /* Check if this is an "extern" declaration. */
4728           if (*dbp == '\0')
4729             continue;
4730           if (c_tolower (*dbp) == 'e')
4731             {
4732               if (nocase_tail ("extern")) /* superfluous, really! */
4733                 {
4734                   found_tag = false;
4735                   verify_tag = false;
4736                 }
4737             }
4738           else if (c_tolower (*dbp) == 'f')
4739             {
4740               if (nocase_tail ("forward")) /* check for forward reference */
4741                 {
4742                   found_tag = false;
4743                   verify_tag = false;
4744                 }
4745             }
4746           if (found_tag && verify_tag) /* not external proc, so make tag */
4747             {
4748               found_tag = false;
4749               verify_tag = false;
4750               make_tag (name, namelen, true,
4751                         tline.buffer, taglen, save_lineno, save_lcno);
4752               continue;
4753             }
4754         }
4755       if (get_tagname)          /* grab name of proc or fn */
4756         {
4757           char *cp;
4758
4759           if (*dbp == '\0')
4760             continue;
4761
4762           /* Find block name. */
4763           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4764             continue;
4765
4766           /* Save all values for later tagging. */
4767           linebuffer_setlen (&tline, lb.len);
4768           strcpy (tline.buffer, lb.buffer);
4769           save_lineno = lineno;
4770           save_lcno = linecharno;
4771           name = tline.buffer + (dbp - lb.buffer);
4772           namelen = cp - dbp;
4773           taglen = cp - lb.buffer + 1;
4774
4775           dbp = cp;             /* set dbp to e-o-token */
4776           get_tagname = false;
4777           found_tag = true;
4778           continue;
4779
4780           /* And proceed to check for "extern". */
4781         }
4782       else if (!incomment && !inquote && !found_tag)
4783         {
4784           /* Check for proc/fn keywords. */
4785           switch (c_tolower (c))
4786             {
4787             case 'p':
4788               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4789                 get_tagname = true;
4790               continue;
4791             case 'f':
4792               if (nocase_tail ("unction"))
4793                 get_tagname = true;
4794               continue;
4795             }
4796         }
4797     } /* while not eof */
4798
4799   free (tline.buffer);
4800 }
4801
4802 \f
4803 /*
4804  * Lisp tag functions
4805  *  look for (def or (DEF, quote or QUOTE
4806  */
4807
4808 static void L_getit (void);
4809
4810 static void
4811 L_getit (void)
4812 {
4813   if (*dbp == '\'')             /* Skip prefix quote */
4814     dbp++;
4815   else if (*dbp == '(')
4816   {
4817     dbp++;
4818     /* Try to skip "(quote " */
4819     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4820       /* Ok, then skip "(" before name in (defstruct (foo)) */
4821       dbp = skip_spaces (dbp);
4822   }
4823   get_tag (dbp, NULL);
4824 }
4825
4826 static void
4827 Lisp_functions (FILE *inf)
4828 {
4829   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4830     {
4831       if (dbp[0] != '(')
4832         continue;
4833
4834       /* "(defvar foo)" is a declaration rather than a definition.  */
4835       if (! declarations)
4836         {
4837           char *p = dbp + 1;
4838           if (LOOKING_AT (p, "defvar"))
4839             {
4840               p = skip_name (p); /* past var name */
4841               p = skip_spaces (p);
4842               if (*p == ')')
4843                 continue;
4844             }
4845         }
4846
4847       if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
4848         dbp += 3;
4849
4850       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4851         {
4852           dbp = skip_non_spaces (dbp);
4853           dbp = skip_spaces (dbp);
4854           L_getit ();
4855         }
4856       else
4857         {
4858           /* Check for (foo::defmumble name-defined ... */
4859           do
4860             dbp++;
4861           while (!notinname (*dbp) && *dbp != ':');
4862           if (*dbp == ':')
4863             {
4864               do
4865                 dbp++;
4866               while (*dbp == ':');
4867
4868               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4869                 {
4870                   dbp = skip_non_spaces (dbp);
4871                   dbp = skip_spaces (dbp);
4872                   L_getit ();
4873                 }
4874             }
4875         }
4876     }
4877 }
4878
4879 \f
4880 /*
4881  * Lua script language parsing
4882  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4883  *
4884  *  "function" and "local function" are tags if they start at column 1.
4885  */
4886 static void
4887 Lua_functions (FILE *inf)
4888 {
4889   register char *bp;
4890
4891   LOOP_ON_INPUT_LINES (inf, lb, bp)
4892     {
4893       if (bp[0] != 'f' && bp[0] != 'l')
4894         continue;
4895
4896       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4897
4898       if (LOOKING_AT (bp, "function"))
4899         get_tag (bp, NULL);
4900     }
4901 }
4902
4903 \f
4904 /*
4905  * PostScript tags
4906  * Just look for lines where the first character is '/'
4907  * Also look at "defineps" for PSWrap
4908  * Ideas by:
4909  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4910  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4911  */
4912 static void
4913 PS_functions (FILE *inf)
4914 {
4915   register char *bp, *ep;
4916
4917   LOOP_ON_INPUT_LINES (inf, lb, bp)
4918     {
4919       if (bp[0] == '/')
4920         {
4921           for (ep = bp+1;
4922                *ep != '\0' && *ep != ' ' && *ep != '{';
4923                ep++)
4924             continue;
4925           make_tag (bp, ep - bp, true,
4926                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4927         }
4928       else if (LOOKING_AT (bp, "defineps"))
4929         get_tag (bp, NULL);
4930     }
4931 }
4932
4933 \f
4934 /*
4935  * Forth tags
4936  * Ignore anything after \ followed by space or in ( )
4937  * Look for words defined by :
4938  * Look for constant, code, create, defer, value, and variable
4939  * OBP extensions:  Look for buffer:, field,
4940  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4941  */
4942 static void
4943 Forth_words (FILE *inf)
4944 {
4945   register char *bp;
4946
4947   LOOP_ON_INPUT_LINES (inf, lb, bp)
4948     while ((bp = skip_spaces (bp))[0] != '\0')
4949       if (bp[0] == '\\' && c_isspace (bp[1]))
4950         break;                  /* read next line */
4951       else if (bp[0] == '(' && c_isspace (bp[1]))
4952         do                      /* skip to ) or eol */
4953           bp++;
4954         while (*bp != ')' && *bp != '\0');
4955       else if ((bp[0] == ':' && c_isspace (bp[1]) && bp++)
4956                || LOOKING_AT_NOCASE (bp, "constant")
4957                || LOOKING_AT_NOCASE (bp, "code")
4958                || LOOKING_AT_NOCASE (bp, "create")
4959                || LOOKING_AT_NOCASE (bp, "defer")
4960                || LOOKING_AT_NOCASE (bp, "value")
4961                || LOOKING_AT_NOCASE (bp, "variable")
4962                || LOOKING_AT_NOCASE (bp, "buffer:")
4963                || LOOKING_AT_NOCASE (bp, "field"))
4964         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
4965       else
4966         bp = skip_non_spaces (bp);
4967 }
4968
4969 \f
4970 /*
4971  * Scheme tag functions
4972  * look for (def... xyzzy
4973  *          (def... (xyzzy
4974  *          (def ... ((...(xyzzy ....
4975  *          (set! xyzzy
4976  * Original code by Ken Haase (1985?)
4977  */
4978 static void
4979 Scheme_functions (FILE *inf)
4980 {
4981   register char *bp;
4982
4983   LOOP_ON_INPUT_LINES (inf, lb, bp)
4984     {
4985       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4986         {
4987           bp = skip_non_spaces (bp+4);
4988           /* Skip over open parens and white space.  Don't continue past
4989              '\0'. */
4990           while (*bp && notinname (*bp))
4991             bp++;
4992           get_tag (bp, NULL);
4993         }
4994       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4995         get_tag (bp, NULL);
4996     }
4997 }
4998
4999 \f
5000 /* Find tags in TeX and LaTeX input files.  */
5001
5002 /* TEX_toktab is a table of TeX control sequences that define tags.
5003  * Each entry records one such control sequence.
5004  *
5005  * Original code from who knows whom.
5006  * Ideas by:
5007  *   Stefan Monnier (2002)
5008  */
5009
5010 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5011
5012 /* Default set of control sequences to put into TEX_toktab.
5013    The value of environment var TEXTAGS is prepended to this.  */
5014 static const char *TEX_defenv = "\
5015 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5016 :part:appendix:entry:index:def\
5017 :newcommand:renewcommand:newenvironment:renewenvironment";
5018
5019 static void TEX_decode_env (const char *, const char *);
5020
5021 /*
5022  * TeX/LaTeX scanning loop.
5023  */
5024 static void
5025 TeX_commands (FILE *inf)
5026 {
5027   char *cp;
5028   linebuffer *key;
5029
5030   char TEX_esc = '\0';
5031   char TEX_opgrp, TEX_clgrp;
5032
5033   /* Initialize token table once from environment. */
5034   if (TEX_toktab == NULL)
5035     TEX_decode_env ("TEXTAGS", TEX_defenv);
5036
5037   LOOP_ON_INPUT_LINES (inf, lb, cp)
5038     {
5039       /* Look at each TEX keyword in line. */
5040       for (;;)
5041         {
5042           /* Look for a TEX escape. */
5043           while (true)
5044             {
5045               char c = *cp++;
5046               if (c == '\0' || c == '%')
5047                 goto tex_next_line;
5048
5049               /* Select either \ or ! as escape character, whichever comes
5050                  first outside a comment.  */
5051               if (!TEX_esc)
5052                 switch (c)
5053                   {
5054                   case '\\':
5055                     TEX_esc = c;
5056                     TEX_opgrp = '{';
5057                     TEX_clgrp = '}';
5058                     break;
5059
5060                   case '!':
5061                     TEX_esc = c;
5062                     TEX_opgrp = '<';
5063                     TEX_clgrp = '>';
5064                     break;
5065                   }
5066
5067               if (c == TEX_esc)
5068                 break;
5069             }
5070
5071           for (key = TEX_toktab; key->buffer != NULL; key++)
5072             if (strneq (cp, key->buffer, key->len))
5073               {
5074                 char *p;
5075                 int namelen, linelen;
5076                 bool opgrp = false;
5077
5078                 cp = skip_spaces (cp + key->len);
5079                 if (*cp == TEX_opgrp)
5080                   {
5081                     opgrp = true;
5082                     cp++;
5083                   }
5084                 for (p = cp;
5085                      (!c_isspace (*p) && *p != '#' &&
5086                       *p != TEX_opgrp && *p != TEX_clgrp);
5087                      p++)
5088                   continue;
5089                 namelen = p - cp;
5090                 linelen = lb.len;
5091                 if (!opgrp || *p == TEX_clgrp)
5092                   {
5093                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5094                       p++;
5095                     linelen = p - lb.buffer + 1;
5096                   }
5097                 make_tag (cp, namelen, true,
5098                           lb.buffer, linelen, lineno, linecharno);
5099                 goto tex_next_line; /* We only tag a line once */
5100               }
5101         }
5102     tex_next_line:
5103       ;
5104     }
5105 }
5106
5107 /* Read environment and prepend it to the default string.
5108    Build token table. */
5109 static void
5110 TEX_decode_env (const char *evarname, const char *defenv)
5111 {
5112   register const char *env, *p;
5113   int i, len;
5114
5115   /* Append default string to environment. */
5116   env = getenv (evarname);
5117   if (!env)
5118     env = defenv;
5119   else
5120     env = concat (env, defenv, "");
5121
5122   /* Allocate a token table */
5123   for (len = 1, p = env; (p = strchr (p, ':')); )
5124     if (*++p)
5125       len++;
5126   TEX_toktab = xnew (len, linebuffer);
5127
5128   /* Unpack environment string into token table. Be careful about */
5129   /* zero-length strings (leading ':', "::" and trailing ':') */
5130   for (i = 0; *env != '\0';)
5131     {
5132       p = strchr (env, ':');
5133       if (!p)                   /* End of environment string. */
5134         p = env + strlen (env);
5135       if (p - env > 0)
5136         {                       /* Only non-zero strings. */
5137           TEX_toktab[i].buffer = savenstr (env, p - env);
5138           TEX_toktab[i].len = p - env;
5139           i++;
5140         }
5141       if (*p)
5142         env = p + 1;
5143       else
5144         {
5145           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5146           TEX_toktab[i].len = 0;
5147           break;
5148         }
5149     }
5150 }
5151
5152 \f
5153 /* Texinfo support.  Dave Love, Mar. 2000.  */
5154 static void
5155 Texinfo_nodes (FILE *inf)
5156 {
5157   char *cp, *start;
5158   LOOP_ON_INPUT_LINES (inf, lb, cp)
5159     if (LOOKING_AT (cp, "@node"))
5160       {
5161         start = cp;
5162         while (*cp != '\0' && *cp != ',')
5163           cp++;
5164         make_tag (start, cp - start, true,
5165                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5166       }
5167 }
5168
5169 \f
5170 /*
5171  * HTML support.
5172  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5173  * Contents of <a name=xxx> are tags with name xxx.
5174  *
5175  * Francesco Potortì, 2002.
5176  */
5177 static void
5178 HTML_labels (FILE *inf)
5179 {
5180   bool getnext = false;         /* next text outside of HTML tags is a tag */
5181   bool skiptag = false;         /* skip to the end of the current HTML tag */
5182   bool intag = false;           /* inside an html tag, looking for ID= */
5183   bool inanchor = false;        /* when INTAG, is an anchor, look for NAME= */
5184   char *end;
5185
5186
5187   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5188
5189   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5190     for (;;)                    /* loop on the same line */
5191       {
5192         if (skiptag)            /* skip HTML tag */
5193           {
5194             while (*dbp != '\0' && *dbp != '>')
5195               dbp++;
5196             if (*dbp == '>')
5197               {
5198                 dbp += 1;
5199                 skiptag = false;
5200                 continue;       /* look on the same line */
5201               }
5202             break;              /* go to next line */
5203           }
5204
5205         else if (intag) /* look for "name=" or "id=" */
5206           {
5207             while (*dbp != '\0' && *dbp != '>'
5208                    && c_tolower (*dbp) != 'n' && c_tolower (*dbp) != 'i')
5209               dbp++;
5210             if (*dbp == '\0')
5211               break;            /* go to next line */
5212             if (*dbp == '>')
5213               {
5214                 dbp += 1;
5215                 intag = false;
5216                 continue;       /* look on the same line */
5217               }
5218             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5219                 || LOOKING_AT_NOCASE (dbp, "id="))
5220               {
5221                 bool quoted = (dbp[0] == '"');
5222
5223                 if (quoted)
5224                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5225                     continue;
5226                 else
5227                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5228                     continue;
5229                 linebuffer_setlen (&token_name, end - dbp);
5230                 memcpy (token_name.buffer, dbp, end - dbp);
5231                 token_name.buffer[end - dbp] = '\0';
5232
5233                 dbp = end;
5234                 intag = false;  /* we found what we looked for */
5235                 skiptag = true; /* skip to the end of the tag */
5236                 getnext = true; /* then grab the text */
5237                 continue;       /* look on the same line */
5238               }
5239             dbp += 1;
5240           }
5241
5242         else if (getnext)       /* grab next tokens and tag them */
5243           {
5244             dbp = skip_spaces (dbp);
5245             if (*dbp == '\0')
5246               break;            /* go to next line */
5247             if (*dbp == '<')
5248               {
5249                 intag = true;
5250                 inanchor = (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]));
5251                 continue;       /* look on the same line */
5252               }
5253
5254             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5255               continue;
5256             make_tag (token_name.buffer, token_name.len, true,
5257                       dbp, end - dbp, lineno, linecharno);
5258             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5259             getnext = false;
5260             break;              /* go to next line */
5261           }
5262
5263         else                    /* look for an interesting HTML tag */
5264           {
5265             while (*dbp != '\0' && *dbp != '<')
5266               dbp++;
5267             if (*dbp == '\0')
5268               break;            /* go to next line */
5269             intag = true;
5270             if (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]))
5271               {
5272                 inanchor = true;
5273                 continue;       /* look on the same line */
5274               }
5275             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5276                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5277                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5278                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5279               {
5280                 intag = false;
5281                 getnext = true;
5282                 continue;       /* look on the same line */
5283               }
5284             dbp += 1;
5285           }
5286       }
5287 }
5288
5289 \f
5290 /*
5291  * Prolog support
5292  *
5293  * Assumes that the predicate or rule starts at column 0.
5294  * Only the first clause of a predicate or rule is added.
5295  * Original code by Sunichirou Sugou (1989)
5296  * Rewritten by Anders Lindgren (1996)
5297  */
5298 static size_t prolog_pr (char *, char *);
5299 static void prolog_skip_comment (linebuffer *, FILE *);
5300 static size_t prolog_atom (char *, size_t);
5301
5302 static void
5303 Prolog_functions (FILE *inf)
5304 {
5305   char *cp, *last;
5306   size_t len;
5307   size_t allocated;
5308
5309   allocated = 0;
5310   len = 0;
5311   last = NULL;
5312
5313   LOOP_ON_INPUT_LINES (inf, lb, cp)
5314     {
5315       if (cp[0] == '\0')        /* Empty line */
5316         continue;
5317       else if (c_isspace (cp[0])) /* Not a predicate */
5318         continue;
5319       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5320         prolog_skip_comment (&lb, inf);
5321       else if ((len = prolog_pr (cp, last)) > 0)
5322         {
5323           /* Predicate or rule.  Store the function name so that we
5324              only generate a tag for the first clause.  */
5325           if (last == NULL)
5326             last = xnew (len + 1, char);
5327           else if (len + 1 > allocated)
5328             xrnew (last, len + 1, char);
5329           allocated = len + 1;
5330           memcpy (last, cp, len);
5331           last[len] = '\0';
5332         }
5333     }
5334   free (last);
5335 }
5336
5337
5338 static void
5339 prolog_skip_comment (linebuffer *plb, FILE *inf)
5340 {
5341   char *cp;
5342
5343   do
5344     {
5345       for (cp = plb->buffer; *cp != '\0'; cp++)
5346         if (cp[0] == '*' && cp[1] == '/')
5347           return;
5348       readline (plb, inf);
5349     }
5350   while (perhaps_more_input (inf));
5351 }
5352
5353 /*
5354  * A predicate or rule definition is added if it matches:
5355  *     <beginning of line><Prolog Atom><whitespace>(
5356  * or  <beginning of line><Prolog Atom><whitespace>:-
5357  *
5358  * It is added to the tags database if it doesn't match the
5359  * name of the previous clause header.
5360  *
5361  * Return the size of the name of the predicate or rule, or 0 if no
5362  * header was found.
5363  */
5364 static size_t
5365 prolog_pr (char *s, char *last)
5366
5367                                 /* Name of last clause. */
5368 {
5369   size_t pos;
5370   size_t len;
5371
5372   pos = prolog_atom (s, 0);
5373   if (! pos)
5374     return 0;
5375
5376   len = pos;
5377   pos = skip_spaces (s + pos) - s;
5378
5379   if ((s[pos] == '.'
5380        || (s[pos] == '(' && (pos += 1))
5381        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5382       && (last == NULL          /* save only the first clause */
5383           || len != strlen (last)
5384           || !strneq (s, last, len)))
5385         {
5386           make_tag (s, len, true, s, pos, lineno, linecharno);
5387           return len;
5388         }
5389   else
5390     return 0;
5391 }
5392
5393 /*
5394  * Consume a Prolog atom.
5395  * Return the number of bytes consumed, or 0 if there was an error.
5396  *
5397  * A prolog atom, in this context, could be one of:
5398  * - An alphanumeric sequence, starting with a lower case letter.
5399  * - A quoted arbitrary string. Single quotes can escape themselves.
5400  *   Backslash quotes everything.
5401  */
5402 static size_t
5403 prolog_atom (char *s, size_t pos)
5404 {
5405   size_t origpos;
5406
5407   origpos = pos;
5408
5409   if (c_islower (s[pos]) || s[pos] == '_')
5410     {
5411       /* The atom is unquoted. */
5412       pos++;
5413       while (c_isalnum (s[pos]) || s[pos] == '_')
5414         {
5415           pos++;
5416         }
5417       return pos - origpos;
5418     }
5419   else if (s[pos] == '\'')
5420     {
5421       pos++;
5422
5423       for (;;)
5424         {
5425           if (s[pos] == '\'')
5426             {
5427               pos++;
5428               if (s[pos] != '\'')
5429                 break;
5430               pos++;            /* A double quote */
5431             }
5432           else if (s[pos] == '\0')
5433             /* Multiline quoted atoms are ignored. */
5434             return 0;
5435           else if (s[pos] == '\\')
5436             {
5437               if (s[pos+1] == '\0')
5438                 return 0;
5439               pos += 2;
5440             }
5441           else
5442             pos++;
5443         }
5444       return pos - origpos;
5445     }
5446   else
5447     return 0;
5448 }
5449
5450 \f
5451 /*
5452  * Support for Erlang
5453  *
5454  * Generates tags for functions, defines, and records.
5455  * Assumes that Erlang functions start at column 0.
5456  * Original code by Anders Lindgren (1996)
5457  */
5458 static int erlang_func (char *, char *);
5459 static void erlang_attribute (char *);
5460 static int erlang_atom (char *);
5461
5462 static void
5463 Erlang_functions (FILE *inf)
5464 {
5465   char *cp, *last;
5466   int len;
5467   int allocated;
5468
5469   allocated = 0;
5470   len = 0;
5471   last = NULL;
5472
5473   LOOP_ON_INPUT_LINES (inf, lb, cp)
5474     {
5475       if (cp[0] == '\0')        /* Empty line */
5476         continue;
5477       else if (c_isspace (cp[0])) /* Not function nor attribute */
5478         continue;
5479       else if (cp[0] == '%')    /* comment */
5480         continue;
5481       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5482         continue;
5483       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5484         {
5485           erlang_attribute (cp);
5486           if (last != NULL)
5487             {
5488               free (last);
5489               last = NULL;
5490             }
5491         }
5492       else if ((len = erlang_func (cp, last)) > 0)
5493         {
5494           /*
5495            * Function.  Store the function name so that we only
5496            * generates a tag for the first clause.
5497            */
5498           if (last == NULL)
5499             last = xnew (len + 1, char);
5500           else if (len + 1 > allocated)
5501             xrnew (last, len + 1, char);
5502           allocated = len + 1;
5503           memcpy (last, cp, len);
5504           last[len] = '\0';
5505         }
5506     }
5507   free (last);
5508 }
5509
5510
5511 /*
5512  * A function definition is added if it matches:
5513  *     <beginning of line><Erlang Atom><whitespace>(
5514  *
5515  * It is added to the tags database if it doesn't match the
5516  * name of the previous clause header.
5517  *
5518  * Return the size of the name of the function, or 0 if no function
5519  * was found.
5520  */
5521 static int
5522 erlang_func (char *s, char *last)
5523
5524                                 /* Name of last clause. */
5525 {
5526   int pos;
5527   int len;
5528
5529   pos = erlang_atom (s);
5530   if (pos < 1)
5531     return 0;
5532
5533   len = pos;
5534   pos = skip_spaces (s + pos) - s;
5535
5536   /* Save only the first clause. */
5537   if (s[pos++] == '('
5538       && (last == NULL
5539           || len != (int)strlen (last)
5540           || !strneq (s, last, len)))
5541         {
5542           make_tag (s, len, true, s, pos, lineno, linecharno);
5543           return len;
5544         }
5545
5546   return 0;
5547 }
5548
5549
5550 /*
5551  * Handle attributes.  Currently, tags are generated for defines
5552  * and records.
5553  *
5554  * They are on the form:
5555  * -define(foo, bar).
5556  * -define(Foo(M, N), M+N).
5557  * -record(graph, {vtab = notable, cyclic = true}).
5558  */
5559 static void
5560 erlang_attribute (char *s)
5561 {
5562   char *cp = s;
5563
5564   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5565       && *cp++ == '(')
5566     {
5567       int len = erlang_atom (skip_spaces (cp));
5568       if (len > 0)
5569         make_tag (cp, len, true, s, cp + len - s, lineno, linecharno);
5570     }
5571   return;
5572 }
5573
5574
5575 /*
5576  * Consume an Erlang atom (or variable).
5577  * Return the number of bytes consumed, or -1 if there was an error.
5578  */
5579 static int
5580 erlang_atom (char *s)
5581 {
5582   int pos = 0;
5583
5584   if (c_isalpha (s[pos]) || s[pos] == '_')
5585     {
5586       /* The atom is unquoted. */
5587       do
5588         pos++;
5589       while (c_isalnum (s[pos]) || s[pos] == '_');
5590     }
5591   else if (s[pos] == '\'')
5592     {
5593       for (pos++; s[pos] != '\''; pos++)
5594         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5595             || (s[pos] == '\\' && s[++pos] == '\0'))
5596           return 0;
5597       pos++;
5598     }
5599
5600   return pos;
5601 }
5602
5603 \f
5604 static char *scan_separators (char *);
5605 static void add_regex (char *, language *);
5606 static char *substitute (char *, char *, struct re_registers *);
5607
5608 /*
5609  * Take a string like "/blah/" and turn it into "blah", verifying
5610  * that the first and last characters are the same, and handling
5611  * quoted separator characters.  Actually, stops on the occurrence of
5612  * an unquoted separator.  Also process \t, \n, etc. and turn into
5613  * appropriate characters. Works in place.  Null terminates name string.
5614  * Returns pointer to terminating separator, or NULL for
5615  * unterminated regexps.
5616  */
5617 static char *
5618 scan_separators (char *name)
5619 {
5620   char sep = name[0];
5621   char *copyto = name;
5622   bool quoted = false;
5623
5624   for (++name; *name != '\0'; ++name)
5625     {
5626       if (quoted)
5627         {
5628           switch (*name)
5629             {
5630             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5631             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5632             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5633             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5634             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5635             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5636             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5637             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5638             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5639             default:
5640               if (*name == sep)
5641                 *copyto++ = sep;
5642               else
5643                 {
5644                   /* Something else is quoted, so preserve the quote. */
5645                   *copyto++ = '\\';
5646                   *copyto++ = *name;
5647                 }
5648               break;
5649             }
5650           quoted = false;
5651         }
5652       else if (*name == '\\')
5653         quoted = true;
5654       else if (*name == sep)
5655         break;
5656       else
5657         *copyto++ = *name;
5658     }
5659   if (*name != sep)
5660     name = NULL;                /* signal unterminated regexp */
5661
5662   /* Terminate copied string. */
5663   *copyto = '\0';
5664   return name;
5665 }
5666
5667 /* Look at the argument of --regex or --no-regex and do the right
5668    thing.  Same for each line of a regexp file. */
5669 static void
5670 analyze_regex (char *regex_arg)
5671 {
5672   if (regex_arg == NULL)
5673     {
5674       free_regexps ();          /* --no-regex: remove existing regexps */
5675       return;
5676     }
5677
5678   /* A real --regexp option or a line in a regexp file. */
5679   switch (regex_arg[0])
5680     {
5681       /* Comments in regexp file or null arg to --regex. */
5682     case '\0':
5683     case ' ':
5684     case '\t':
5685       break;
5686
5687       /* Read a regex file.  This is recursive and may result in a
5688          loop, which will stop when the file descriptors are exhausted. */
5689     case '@':
5690       {
5691         FILE *regexfp;
5692         linebuffer regexbuf;
5693         char *regexfile = regex_arg + 1;
5694
5695         /* regexfile is a file containing regexps, one per line. */
5696         regexfp = fopen (regexfile, "r" FOPEN_BINARY);
5697         if (regexfp == NULL)
5698           pfatal (regexfile);
5699         linebuffer_init (&regexbuf);
5700         while (readline_internal (&regexbuf, regexfp, regexfile) > 0)
5701           analyze_regex (regexbuf.buffer);
5702         free (regexbuf.buffer);
5703         if (fclose (regexfp) != 0)
5704           pfatal (regexfile);
5705       }
5706       break;
5707
5708       /* Regexp to be used for a specific language only. */
5709     case '{':
5710       {
5711         language *lang;
5712         char *lang_name = regex_arg + 1;
5713         char *cp;
5714
5715         for (cp = lang_name; *cp != '}'; cp++)
5716           if (*cp == '\0')
5717             {
5718               error ("unterminated language name in regex: %s", regex_arg);
5719               return;
5720             }
5721         *cp++ = '\0';
5722         lang = get_language_from_langname (lang_name);
5723         if (lang == NULL)
5724           return;
5725         add_regex (cp, lang);
5726       }
5727       break;
5728
5729       /* Regexp to be used for any language. */
5730     default:
5731       add_regex (regex_arg, NULL);
5732       break;
5733     }
5734 }
5735
5736 /* Separate the regexp pattern, compile it,
5737    and care for optional name and modifiers. */
5738 static void
5739 add_regex (char *regexp_pattern, language *lang)
5740 {
5741   static struct re_pattern_buffer zeropattern;
5742   char sep, *pat, *name, *modifiers;
5743   char empty = '\0';
5744   const char *err;
5745   struct re_pattern_buffer *patbuf;
5746   regexp *rp;
5747   bool
5748     force_explicit_name = true, /* do not use implicit tag names */
5749     ignore_case = false,        /* case is significant */
5750     multi_line = false,         /* matches are done one line at a time */
5751     single_line = false;        /* dot does not match newline */
5752
5753
5754   if (strlen (regexp_pattern) < 3)
5755     {
5756       error ("null regexp");
5757       return;
5758     }
5759   sep = regexp_pattern[0];
5760   name = scan_separators (regexp_pattern);
5761   if (name == NULL)
5762     {
5763       error ("%s: unterminated regexp", regexp_pattern);
5764       return;
5765     }
5766   if (name[1] == sep)
5767     {
5768       error ("null name for regexp \"%s\"", regexp_pattern);
5769       return;
5770     }
5771   modifiers = scan_separators (name);
5772   if (modifiers == NULL)        /* no terminating separator --> no name */
5773     {
5774       modifiers = name;
5775       name = &empty;
5776     }
5777   else
5778     modifiers += 1;             /* skip separator */
5779
5780   /* Parse regex modifiers. */
5781   for (; modifiers[0] != '\0'; modifiers++)
5782     switch (modifiers[0])
5783       {
5784       case 'N':
5785         if (modifiers == name)
5786           error ("forcing explicit tag name but no name, ignoring");
5787         force_explicit_name = true;
5788         break;
5789       case 'i':
5790         ignore_case = true;
5791         break;
5792       case 's':
5793         single_line = true;
5794         /* FALLTHRU */
5795       case 'm':
5796         multi_line = true;
5797         need_filebuf = true;
5798         break;
5799       default:
5800         error ("invalid regexp modifier `%c', ignoring", modifiers[0]);
5801         break;
5802       }
5803
5804   patbuf = xnew (1, struct re_pattern_buffer);
5805   *patbuf = zeropattern;
5806   if (ignore_case)
5807     {
5808       static char lc_trans[UCHAR_MAX + 1];
5809       int i;
5810       for (i = 0; i < UCHAR_MAX + 1; i++)
5811         lc_trans[i] = c_tolower (i);
5812       patbuf->translate = lc_trans;     /* translation table to fold case  */
5813     }
5814
5815   if (multi_line)
5816     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5817   else
5818     pat = regexp_pattern;
5819
5820   if (single_line)
5821     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5822   else
5823     re_set_syntax (RE_SYNTAX_EMACS);
5824
5825   err = re_compile_pattern (pat, strlen (pat), patbuf);
5826   if (multi_line)
5827     free (pat);
5828   if (err != NULL)
5829     {
5830       error ("%s while compiling pattern", err);
5831       return;
5832     }
5833
5834   rp = p_head;
5835   p_head = xnew (1, regexp);
5836   p_head->pattern = savestr (regexp_pattern);
5837   p_head->p_next = rp;
5838   p_head->lang = lang;
5839   p_head->pat = patbuf;
5840   p_head->name = savestr (name);
5841   p_head->error_signaled = false;
5842   p_head->force_explicit_name = force_explicit_name;
5843   p_head->ignore_case = ignore_case;
5844   p_head->multi_line = multi_line;
5845 }
5846
5847 /*
5848  * Do the substitutions indicated by the regular expression and
5849  * arguments.
5850  */
5851 static char *
5852 substitute (char *in, char *out, struct re_registers *regs)
5853 {
5854   char *result, *t;
5855   int size, dig, diglen;
5856
5857   result = NULL;
5858   size = strlen (out);
5859
5860   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5861   if (out[size - 1] == '\\')
5862     fatal ("pattern error in \"%s\"", out);
5863   for (t = strchr (out, '\\');
5864        t != NULL;
5865        t = strchr (t + 2, '\\'))
5866     if (c_isdigit (t[1]))
5867       {
5868         dig = t[1] - '0';
5869         diglen = regs->end[dig] - regs->start[dig];
5870         size += diglen - 2;
5871       }
5872     else
5873       size -= 1;
5874
5875   /* Allocate space and do the substitutions. */
5876   assert (size >= 0);
5877   result = xnew (size + 1, char);
5878
5879   for (t = result; *out != '\0'; out++)
5880     if (*out == '\\' && c_isdigit (*++out))
5881       {
5882         dig = *out - '0';
5883         diglen = regs->end[dig] - regs->start[dig];
5884         memcpy (t, in + regs->start[dig], diglen);
5885         t += diglen;
5886       }
5887     else
5888       *t++ = *out;
5889   *t = '\0';
5890
5891   assert (t <= result + size);
5892   assert (t - result == (int)strlen (result));
5893
5894   return result;
5895 }
5896
5897 /* Deallocate all regexps. */
5898 static void
5899 free_regexps (void)
5900 {
5901   regexp *rp;
5902   while (p_head != NULL)
5903     {
5904       rp = p_head->p_next;
5905       free (p_head->pattern);
5906       free (p_head->name);
5907       free (p_head);
5908       p_head = rp;
5909     }
5910   return;
5911 }
5912
5913 /*
5914  * Reads the whole file as a single string from `filebuf' and looks for
5915  * multi-line regular expressions, creating tags on matches.
5916  * readline already dealt with normal regexps.
5917  *
5918  * Idea by Ben Wing <ben@666.com> (2002).
5919  */
5920 static void
5921 regex_tag_multiline (void)
5922 {
5923   char *buffer = filebuf.buffer;
5924   regexp *rp;
5925   char *name;
5926
5927   for (rp = p_head; rp != NULL; rp = rp->p_next)
5928     {
5929       int match = 0;
5930
5931       if (!rp->multi_line)
5932         continue;               /* skip normal regexps */
5933
5934       /* Generic initializations before parsing file from memory. */
5935       lineno = 1;               /* reset global line number */
5936       charno = 0;               /* reset global char number */
5937       linecharno = 0;           /* reset global char number of line start */
5938
5939       /* Only use generic regexps or those for the current language. */
5940       if (rp->lang != NULL && rp->lang != curfdp->lang)
5941         continue;
5942
5943       while (match >= 0 && match < filebuf.len)
5944         {
5945           match = re_search (rp->pat, buffer, filebuf.len, charno,
5946                              filebuf.len - match, &rp->regs);
5947           switch (match)
5948             {
5949             case -2:
5950               /* Some error. */
5951               if (!rp->error_signaled)
5952                 {
5953                   error ("regexp stack overflow while matching \"%s\"",
5954                          rp->pattern);
5955                   rp->error_signaled = true;
5956                 }
5957               break;
5958             case -1:
5959               /* No match. */
5960               break;
5961             default:
5962               if (match == rp->regs.end[0])
5963                 {
5964                   if (!rp->error_signaled)
5965                     {
5966                       error ("regexp matches the empty string: \"%s\"",
5967                              rp->pattern);
5968                       rp->error_signaled = true;
5969                     }
5970                   match = -3;   /* exit from while loop */
5971                   break;
5972                 }
5973
5974               /* Match occurred.  Construct a tag. */
5975               while (charno < rp->regs.end[0])
5976                 if (buffer[charno++] == '\n')
5977                   lineno++, linecharno = charno;
5978               name = rp->name;
5979               if (name[0] == '\0')
5980                 name = NULL;
5981               else /* make a named tag */
5982                 name = substitute (buffer, rp->name, &rp->regs);
5983               if (rp->force_explicit_name)
5984                 /* Force explicit tag name, if a name is there. */
5985                 pfnote (name, true, buffer + linecharno,
5986                         charno - linecharno + 1, lineno, linecharno);
5987               else
5988                 make_tag (name, strlen (name), true, buffer + linecharno,
5989                           charno - linecharno + 1, lineno, linecharno);
5990               break;
5991             }
5992         }
5993     }
5994 }
5995
5996 \f
5997 static bool
5998 nocase_tail (const char *cp)
5999 {
6000   int len = 0;
6001
6002   while (*cp != '\0' && c_tolower (*cp) == c_tolower (dbp[len]))
6003     cp++, len++;
6004   if (*cp == '\0' && !intoken (dbp[len]))
6005     {
6006       dbp += len;
6007       return true;
6008     }
6009   return false;
6010 }
6011
6012 static void
6013 get_tag (register char *bp, char **namepp)
6014 {
6015   register char *cp = bp;
6016
6017   if (*bp != '\0')
6018     {
6019       /* Go till you get to white space or a syntactic break */
6020       for (cp = bp + 1; !notinname (*cp); cp++)
6021         continue;
6022       make_tag (bp, cp - bp, true,
6023                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6024     }
6025
6026   if (namepp != NULL)
6027     *namepp = savenstr (bp, cp - bp);
6028 }
6029
6030 /*
6031  * Read a line of text from `stream' into `lbp', excluding the
6032  * newline or CR-NL, if any.  Return the number of characters read from
6033  * `stream', which is the length of the line including the newline.
6034  *
6035  * On DOS or Windows we do not count the CR character, if any before the
6036  * NL, in the returned length; this mirrors the behavior of Emacs on those
6037  * platforms (for text files, it translates CR-NL to NL as it reads in the
6038  * file).
6039  *
6040  * If multi-line regular expressions are requested, each line read is
6041  * appended to `filebuf'.
6042  */
6043 static long
6044 readline_internal (linebuffer *lbp, FILE *stream, char const *filename)
6045 {
6046   char *buffer = lbp->buffer;
6047   char *p = lbp->buffer;
6048   char *pend;
6049   int chars_deleted;
6050
6051   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6052
6053   for (;;)
6054     {
6055       register int c = getc (stream);
6056       if (p == pend)
6057         {
6058           /* We're at the end of linebuffer: expand it. */
6059           lbp->size *= 2;
6060           xrnew (buffer, lbp->size, char);
6061           p += buffer - lbp->buffer;
6062           pend = buffer + lbp->size;
6063           lbp->buffer = buffer;
6064         }
6065       if (c == EOF)
6066         {
6067           if (ferror (stream))
6068             perror (filename);
6069           *p = '\0';
6070           chars_deleted = 0;
6071           break;
6072         }
6073       if (c == '\n')
6074         {
6075           if (p > buffer && p[-1] == '\r')
6076             {
6077               p -= 1;
6078               chars_deleted = 2;
6079             }
6080           else
6081             {
6082               chars_deleted = 1;
6083             }
6084           *p = '\0';
6085           break;
6086         }
6087       *p++ = c;
6088     }
6089   lbp->len = p - buffer;
6090
6091   if (need_filebuf              /* we need filebuf for multi-line regexps */
6092       && chars_deleted > 0)     /* not at EOF */
6093     {
6094       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6095         {
6096           /* Expand filebuf. */
6097           filebuf.size *= 2;
6098           xrnew (filebuf.buffer, filebuf.size, char);
6099         }
6100       memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6101       filebuf.len += lbp->len;
6102       filebuf.buffer[filebuf.len++] = '\n';
6103       filebuf.buffer[filebuf.len] = '\0';
6104     }
6105
6106   return lbp->len + chars_deleted;
6107 }
6108
6109 /*
6110  * Like readline_internal, above, but in addition try to match the
6111  * input line against relevant regular expressions and manage #line
6112  * directives.
6113  */
6114 static void
6115 readline (linebuffer *lbp, FILE *stream)
6116 {
6117   long result;
6118
6119   linecharno = charno;          /* update global char number of line start */
6120   result = readline_internal (lbp, stream, infilename); /* read line */
6121   lineno += 1;                  /* increment global line number */
6122   charno += result;             /* increment global char number */
6123
6124   /* Honor #line directives. */
6125   if (!no_line_directive)
6126     {
6127       static bool discard_until_line_directive;
6128
6129       /* Check whether this is a #line directive. */
6130       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6131         {
6132           unsigned int lno;
6133           int start = 0;
6134
6135           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6136               && start > 0)     /* double quote character found */
6137             {
6138               char *endp = lbp->buffer + start;
6139
6140               while ((endp = strchr (endp, '"')) != NULL
6141                      && endp[-1] == '\\')
6142                 endp++;
6143               if (endp != NULL)
6144                 /* Ok, this is a real #line directive.  Let's deal with it. */
6145                 {
6146                   char *taggedabsname;  /* absolute name of original file */
6147                   char *taggedfname;    /* name of original file as given */
6148                   char *name;           /* temp var */
6149
6150                   discard_until_line_directive = false; /* found it */
6151                   name = lbp->buffer + start;
6152                   *endp = '\0';
6153                   canonicalize_filename (name);
6154                   taggedabsname = absolute_filename (name, tagfiledir);
6155                   if (filename_is_absolute (name)
6156                       || filename_is_absolute (curfdp->infname))
6157                     taggedfname = savestr (taggedabsname);
6158                   else
6159                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6160
6161                   if (streq (curfdp->taggedfname, taggedfname))
6162                     /* The #line directive is only a line number change.  We
6163                        deal with this afterwards. */
6164                     free (taggedfname);
6165                   else
6166                     /* The tags following this #line directive should be
6167                        attributed to taggedfname.  In order to do this, set
6168                        curfdp accordingly. */
6169                     {
6170                       fdesc *fdp; /* file description pointer */
6171
6172                       /* Go look for a file description already set up for the
6173                          file indicated in the #line directive.  If there is
6174                          one, use it from now until the next #line
6175                          directive. */
6176                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6177                         if (streq (fdp->infname, curfdp->infname)
6178                             && streq (fdp->taggedfname, taggedfname))
6179                           /* If we remove the second test above (after the &&)
6180                              then all entries pertaining to the same file are
6181                              coalesced in the tags file.  If we use it, then
6182                              entries pertaining to the same file but generated
6183                              from different files (via #line directives) will
6184                              go into separate sections in the tags file.  These
6185                              alternatives look equivalent.  The first one
6186                              destroys some apparently useless information. */
6187                           {
6188                             curfdp = fdp;
6189                             free (taggedfname);
6190                             break;
6191                           }
6192                       /* Else, if we already tagged the real file, skip all
6193                          input lines until the next #line directive. */
6194                       if (fdp == NULL) /* not found */
6195                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6196                           if (streq (fdp->infabsname, taggedabsname))
6197                             {
6198                               discard_until_line_directive = true;
6199                               free (taggedfname);
6200                               break;
6201                             }
6202                       /* Else create a new file description and use that from
6203                          now on, until the next #line directive. */
6204                       if (fdp == NULL) /* not found */
6205                         {
6206                           fdp = fdhead;
6207                           fdhead = xnew (1, fdesc);
6208                           *fdhead = *curfdp; /* copy curr. file description */
6209                           fdhead->next = fdp;
6210                           fdhead->infname = savestr (curfdp->infname);
6211                           fdhead->infabsname = savestr (curfdp->infabsname);
6212                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6213                           fdhead->taggedfname = taggedfname;
6214                           fdhead->usecharno = false;
6215                           fdhead->prop = NULL;
6216                           fdhead->written = false;
6217                           curfdp = fdhead;
6218                         }
6219                     }
6220                   free (taggedabsname);
6221                   lineno = lno - 1;
6222                   readline (lbp, stream);
6223                   return;
6224                 } /* if a real #line directive */
6225             } /* if #line is followed by a number */
6226         } /* if line begins with "#line " */
6227
6228       /* If we are here, no #line directive was found. */
6229       if (discard_until_line_directive)
6230         {
6231           if (result > 0)
6232             {
6233               /* Do a tail recursion on ourselves, thus discarding the contents
6234                  of the line buffer. */
6235               readline (lbp, stream);
6236               return;
6237             }
6238           /* End of file. */
6239           discard_until_line_directive = false;
6240           return;
6241         }
6242     } /* if #line directives should be considered */
6243
6244   {
6245     int match;
6246     regexp *rp;
6247     char *name;
6248
6249     /* Match against relevant regexps. */
6250     if (lbp->len > 0)
6251       for (rp = p_head; rp != NULL; rp = rp->p_next)
6252         {
6253           /* Only use generic regexps or those for the current language.
6254              Also do not use multiline regexps, which is the job of
6255              regex_tag_multiline. */
6256           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6257               || rp->multi_line)
6258             continue;
6259
6260           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6261           switch (match)
6262             {
6263             case -2:
6264               /* Some error. */
6265               if (!rp->error_signaled)
6266                 {
6267                   error ("regexp stack overflow while matching \"%s\"",
6268                          rp->pattern);
6269                   rp->error_signaled = true;
6270                 }
6271               break;
6272             case -1:
6273               /* No match. */
6274               break;
6275             case 0:
6276               /* Empty string matched. */
6277               if (!rp->error_signaled)
6278                 {
6279                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6280                   rp->error_signaled = true;
6281                 }
6282               break;
6283             default:
6284               /* Match occurred.  Construct a tag. */
6285               name = rp->name;
6286               if (name[0] == '\0')
6287                 name = NULL;
6288               else /* make a named tag */
6289                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6290               if (rp->force_explicit_name)
6291                 /* Force explicit tag name, if a name is there. */
6292                 pfnote (name, true, lbp->buffer, match, lineno, linecharno);
6293               else
6294                 make_tag (name, strlen (name), true,
6295                           lbp->buffer, match, lineno, linecharno);
6296               break;
6297             }
6298         }
6299   }
6300 }
6301
6302 \f
6303 /*
6304  * Return a pointer to a space of size strlen(cp)+1 allocated
6305  * with xnew where the string CP has been copied.
6306  */
6307 static char *
6308 savestr (const char *cp)
6309 {
6310   return savenstr (cp, strlen (cp));
6311 }
6312
6313 /*
6314  * Return a pointer to a space of size LEN+1 allocated with xnew where
6315  * the string CP has been copied for at most the first LEN characters.
6316  */
6317 static char *
6318 savenstr (const char *cp, int len)
6319 {
6320   char *dp = xnew (len + 1, char);
6321   dp[len] = '\0';
6322   return memcpy (dp, cp, len);
6323 }
6324
6325 /* Skip spaces (end of string is not space), return new pointer. */
6326 static char *
6327 skip_spaces (char *cp)
6328 {
6329   while (c_isspace (*cp))
6330     cp++;
6331   return cp;
6332 }
6333
6334 /* Skip non spaces, except end of string, return new pointer. */
6335 static char *
6336 skip_non_spaces (char *cp)
6337 {
6338   while (*cp != '\0' && !c_isspace (*cp))
6339     cp++;
6340   return cp;
6341 }
6342
6343 /* Skip any chars in the "name" class.*/
6344 static char *
6345 skip_name (char *cp)
6346 {
6347   /* '\0' is a notinname() so loop stops there too */
6348   while (! notinname (*cp))
6349     cp++;
6350   return cp;
6351 }
6352
6353 /* Print error message and exit.  */
6354 void
6355 fatal (const char *s1, const char *s2)
6356 {
6357   error (s1, s2);
6358   exit (EXIT_FAILURE);
6359 }
6360
6361 static void
6362 pfatal (const char *s1)
6363 {
6364   perror (s1);
6365   exit (EXIT_FAILURE);
6366 }
6367
6368 static void
6369 suggest_asking_for_help (void)
6370 {
6371   fprintf (stderr, "\tTry `%s --help' for a complete list of options.\n",
6372            progname);
6373   exit (EXIT_FAILURE);
6374 }
6375
6376 /* Output a diagnostic with printf-style FORMAT and args.  */
6377 static void
6378 error (const char *format, ...)
6379 {
6380   va_list ap;
6381   va_start (ap, format);
6382   fprintf (stderr, "%s: ", progname);
6383   vfprintf (stderr, format, ap);
6384   fprintf (stderr, "\n");
6385   va_end (ap);
6386 }
6387
6388 /* Return a newly-allocated string whose contents
6389    concatenate those of s1, s2, s3.  */
6390 static char *
6391 concat (const char *s1, const char *s2, const char *s3)
6392 {
6393   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6394   char *result = xnew (len1 + len2 + len3 + 1, char);
6395
6396   strcpy (result, s1);
6397   strcpy (result + len1, s2);
6398   strcpy (result + len1 + len2, s3);
6399
6400   return result;
6401 }
6402
6403 \f
6404 /* Does the same work as the system V getcwd, but does not need to
6405    guess the buffer size in advance. */
6406 static char *
6407 etags_getcwd (void)
6408 {
6409   int bufsize = 200;
6410   char *path = xnew (bufsize, char);
6411
6412   while (getcwd (path, bufsize) == NULL)
6413     {
6414       if (errno != ERANGE)
6415         pfatal ("getcwd");
6416       bufsize *= 2;
6417       free (path);
6418       path = xnew (bufsize, char);
6419     }
6420
6421   canonicalize_filename (path);
6422   return path;
6423 }
6424
6425 /* Return a newly allocated string containing a name of a temporary file.  */
6426 static char *
6427 etags_mktmp (void)
6428 {
6429   const char *tmpdir = getenv ("TMPDIR");
6430   const char *slash = "/";
6431
6432 #if MSDOS || defined (DOS_NT)
6433   if (!tmpdir)
6434     tmpdir = getenv ("TEMP");
6435   if (!tmpdir)
6436     tmpdir = getenv ("TMP");
6437   if (!tmpdir)
6438     tmpdir = ".";
6439   if (tmpdir[strlen (tmpdir) - 1] == '/'
6440       || tmpdir[strlen (tmpdir) - 1] == '\\')
6441     slash = "";
6442 #else
6443   if (!tmpdir)
6444     tmpdir = "/tmp";
6445   if (tmpdir[strlen (tmpdir) - 1] == '/')
6446     slash = "";
6447 #endif
6448
6449   char *templt = concat (tmpdir, slash, "etXXXXXX");
6450   int fd = mkostemp (templt, O_CLOEXEC);
6451   if (fd < 0 || close (fd) != 0)
6452     {
6453       int temp_errno = errno;
6454       free (templt);
6455       errno = temp_errno;
6456       templt = NULL;
6457     }
6458
6459 #if defined (DOS_NT)
6460   /* The file name will be used in shell redirection, so it needs to have
6461      DOS-style backslashes, or else the Windows shell will barf.  */
6462   char *p;
6463   for (p = templt; *p; p++)
6464     if (*p == '/')
6465       *p = '\\';
6466 #endif
6467
6468   return templt;
6469 }
6470
6471 /* Return a newly allocated string containing the file name of FILE
6472    relative to the absolute directory DIR (which should end with a slash). */
6473 static char *
6474 relative_filename (char *file, char *dir)
6475 {
6476   char *fp, *dp, *afn, *res;
6477   int i;
6478
6479   /* Find the common root of file and dir (with a trailing slash). */
6480   afn = absolute_filename (file, cwd);
6481   fp = afn;
6482   dp = dir;
6483   while (*fp++ == *dp++)
6484     continue;
6485   fp--, dp--;                   /* back to the first differing char */
6486 #ifdef DOS_NT
6487   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6488     return afn;
6489 #endif
6490   do                            /* look at the equal chars until '/' */
6491     fp--, dp--;
6492   while (*fp != '/');
6493
6494   /* Build a sequence of "../" strings for the resulting relative file name. */
6495   i = 0;
6496   while ((dp = strchr (dp + 1, '/')) != NULL)
6497     i += 1;
6498   res = xnew (3*i + strlen (fp + 1) + 1, char);
6499   char *z = res;
6500   while (i-- > 0)
6501     z = stpcpy (z, "../");
6502
6503   /* Add the file name relative to the common root of file and dir. */
6504   strcpy (z, fp + 1);
6505   free (afn);
6506
6507   return res;
6508 }
6509
6510 /* Return a newly allocated string containing the absolute file name
6511    of FILE given DIR (which should end with a slash). */
6512 static char *
6513 absolute_filename (char *file, char *dir)
6514 {
6515   char *slashp, *cp, *res;
6516
6517   if (filename_is_absolute (file))
6518     res = savestr (file);
6519 #ifdef DOS_NT
6520   /* We don't support non-absolute file names with a drive
6521      letter, like `d:NAME' (it's too much hassle).  */
6522   else if (file[1] == ':')
6523     fatal ("%s: relative file names with drive letters not supported", file);
6524 #endif
6525   else
6526     res = concat (dir, file, "");
6527
6528   /* Delete the "/dirname/.." and "/." substrings. */
6529   slashp = strchr (res, '/');
6530   while (slashp != NULL && slashp[0] != '\0')
6531     {
6532       if (slashp[1] == '.')
6533         {
6534           if (slashp[2] == '.'
6535               && (slashp[3] == '/' || slashp[3] == '\0'))
6536             {
6537               cp = slashp;
6538               do
6539                 cp--;
6540               while (cp >= res && !filename_is_absolute (cp));
6541               if (cp < res)
6542                 cp = slashp;    /* the absolute name begins with "/.." */
6543 #ifdef DOS_NT
6544               /* Under MSDOS and NT we get `d:/NAME' as absolute
6545                  file name, so the luser could say `d:/../NAME'.
6546                  We silently treat this as `d:/NAME'.  */
6547               else if (cp[0] != '/')
6548                 cp = slashp;
6549 #endif
6550               memmove (cp, slashp + 3, strlen (slashp + 2));
6551               slashp = cp;
6552               continue;
6553             }
6554           else if (slashp[2] == '/' || slashp[2] == '\0')
6555             {
6556               memmove (slashp, slashp + 2, strlen (slashp + 1));
6557               continue;
6558             }
6559         }
6560
6561       slashp = strchr (slashp + 1, '/');
6562     }
6563
6564   if (res[0] == '\0')           /* just a safety net: should never happen */
6565     {
6566       free (res);
6567       return savestr ("/");
6568     }
6569   else
6570     return res;
6571 }
6572
6573 /* Return a newly allocated string containing the absolute
6574    file name of dir where FILE resides given DIR (which should
6575    end with a slash). */
6576 static char *
6577 absolute_dirname (char *file, char *dir)
6578 {
6579   char *slashp, *res;
6580   char save;
6581
6582   slashp = strrchr (file, '/');
6583   if (slashp == NULL)
6584     return savestr (dir);
6585   save = slashp[1];
6586   slashp[1] = '\0';
6587   res = absolute_filename (file, dir);
6588   slashp[1] = save;
6589
6590   return res;
6591 }
6592
6593 /* Whether the argument string is an absolute file name.  The argument
6594    string must have been canonicalized with canonicalize_filename. */
6595 static bool
6596 filename_is_absolute (char *fn)
6597 {
6598   return (fn[0] == '/'
6599 #ifdef DOS_NT
6600           || (c_isalpha (fn[0]) && fn[1] == ':' && fn[2] == '/')
6601 #endif
6602           );
6603 }
6604
6605 /* Downcase DOS drive letter and collapse separators into single slashes.
6606    Works in place. */
6607 static void
6608 canonicalize_filename (register char *fn)
6609 {
6610   register char* cp;
6611
6612 #ifdef DOS_NT
6613   /* Canonicalize drive letter case.  */
6614   if (c_isupper (fn[0]) && fn[1] == ':')
6615     fn[0] = c_tolower (fn[0]);
6616
6617   /* Collapse multiple forward- and back-slashes into a single forward
6618      slash. */
6619   for (cp = fn; *cp != '\0'; cp++, fn++)
6620     if (*cp == '/' || *cp == '\\')
6621       {
6622         *fn = '/';
6623         while (cp[1] == '/' || cp[1] == '\\')
6624           cp++;
6625       }
6626     else
6627       *fn = *cp;
6628
6629 #else  /* !DOS_NT */
6630
6631   /* Collapse multiple slashes into a single slash. */
6632   for (cp = fn; *cp != '\0'; cp++, fn++)
6633     if (*cp == '/')
6634       {
6635         *fn = '/';
6636         while (cp[1] == '/')
6637           cp++;
6638       }
6639     else
6640       *fn = *cp;
6641
6642 #endif  /* !DOS_NT */
6643
6644   *fn = '\0';
6645 }
6646
6647 \f
6648 /* Initialize a linebuffer for use. */
6649 static void
6650 linebuffer_init (linebuffer *lbp)
6651 {
6652   lbp->size = (DEBUG) ? 3 : 200;
6653   lbp->buffer = xnew (lbp->size, char);
6654   lbp->buffer[0] = '\0';
6655   lbp->len = 0;
6656 }
6657
6658 /* Set the minimum size of a string contained in a linebuffer. */
6659 static void
6660 linebuffer_setlen (linebuffer *lbp, int toksize)
6661 {
6662   while (lbp->size <= toksize)
6663     {
6664       lbp->size *= 2;
6665       xrnew (lbp->buffer, lbp->size, char);
6666     }
6667   lbp->len = toksize;
6668 }
6669
6670 /* Like malloc but get fatal error if memory is exhausted. */
6671 static void *
6672 xmalloc (size_t size)
6673 {
6674   void *result = malloc (size);
6675   if (result == NULL)
6676     fatal ("virtual memory exhausted", (char *)NULL);
6677   return result;
6678 }
6679
6680 static void *
6681 xrealloc (void *ptr, size_t size)
6682 {
6683   void *result = realloc (ptr, size);
6684   if (result == NULL)
6685     fatal ("virtual memory exhausted", (char *)NULL);
6686   return result;
6687 }
6688
6689 /*
6690  * Local Variables:
6691  * indent-tabs-mode: t
6692  * tab-width: 8
6693  * fill-column: 79
6694  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6695  * c-file-style: "gnu"
6696  * End:
6697  */
6698
6699 /* etags.c ends here */