code.delx.au - gnu-emacs/blob - lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: utf-8 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2014 Free Software
  32 Foundation, Inc.
  33
  34 This file is not considered part of GNU Emacs.
  35
  36 This program is free software: you can redistribute it and/or modify
  37 it under the terms of the GNU General Public License as published by
  38 the Free Software Foundation, either version 3 of the License, or
  39 (at your option) any later version.
  40
  41 This program is distributed in the hope that it will be useful,
  42 but WITHOUT ANY WARRANTY; without even the implied warranty of
  43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  44 GNU General Public License for more details.
  45
  46 You should have received a copy of the GNU General Public License
  47 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  48
  49
  50 /* NB To comply with the above BSD license, copyright information is
  51 reproduced in etc/ETAGS.README.  That file should be updated when the
  52 above notices are.
  53
  54 To the best of our knowledge, this code was originally based on the
  55 ctags.c distributed with BSD4.2, which was copyrighted by the
  56 University of California, as described above. */
  57
  58
  59 /*
  60  * Authors:
  61  * 1983 Ctags originally by Ken Arnold.
  62  * 1984 Fortran added by Jim Kleckner.
  63  * 1984 Ed Pelegri-Llopart added C typedefs.
  64  * 1985 Emacs TAGS format by Richard Stallman.
  65  * 1989 Sam Kendall added C++.
  66  * 1992 Joseph B. Wells improved C and C++ parsing.
  67  * 1993 Francesco Potortì reorganized C and C++.
  68  * 1994 Line-by-line regexp tags by Tom Tromey.
  69  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  70  * 2002 #line directives by Francesco Potortì.
  71  *
  72  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  73  */
  74
  75 /*
  76  * If you want to add support for a new language, start by looking at the LUA
  77  * language, which is the simplest.  Alternatively, consider distributing etags
  78  * together with a configuration file containing regexp definitions for etags.
  79  */
  80
  81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
  82
  83 #ifdef DEBUG
  84 #  undef DEBUG
  85 #  define DEBUG true
  86 #else
  87 #  define DEBUG  false
  88 #  define NDEBUG                /* disable assert */
  89 #endif
  90
  91 #include <config.h>
  92
  93 #ifndef _GNU_SOURCE
  94 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  95 #endif
  96
  97 /* WIN32_NATIVE is for XEmacs.
  98    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
  99 #ifdef WIN32_NATIVE
 100 # undef MSDOS
 101 # undef  WINDOWSNT
 102 # define WINDOWSNT
 103 #endif /* WIN32_NATIVE */
 104
 105 #ifdef MSDOS
 106 # undef MSDOS
 107 # define MSDOS true
 108 # include <sys/param.h>
 109 #else
 110 # define MSDOS false
 111 #endif /* MSDOS */
 112
 113 #ifdef WINDOWSNT
 114 # include <direct.h>
 115 # define MAXPATHLEN _MAX_PATH
 116 # undef HAVE_NTGUI
 117 # undef  DOS_NT
 118 # define DOS_NT
 119 #endif /* WINDOWSNT */
 120
 121 #include <unistd.h>
 122 #include <stdarg.h>
 123 #include <stdlib.h>
 124 #include <string.h>
 125 #include <stdio.h>
 126 #include <ctype.h>
 127 #include <errno.h>
 128 #include <sys/types.h>
 129 #include <sys/stat.h>
 130 #include <binary-io.h>
 131 #include <c-strcase.h>
 132
 133 #include <assert.h>
 134 #ifdef NDEBUG
 135 # undef  assert                 /* some systems have a buggy assert.h */
 136 # define assert(x) ((void) 0)
 137 #endif
 138
 139 #include <getopt.h>
 140 #include <regex.h>
 141
 142 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 143  Leave it undefined to make the program "etags", which makes emacs-style
 144  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 145 #ifdef CTAGS
 146 # undef  CTAGS
 147 # define CTAGS true
 148 #else
 149 # define CTAGS false
 150 #endif
 151
 152 #define streq(s,t)      (assert ((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 153 #define strcaseeq(s,t)  (assert ((s)!=NULL && (t)!=NULL), !c_strcasecmp (s, t))
 154 #define strneq(s,t,n)   (assert ((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 155 #define strncaseeq(s,t,n) (assert ((s)!=NULL && (t)!=NULL), !c_strncasecmp (s, t, n))
 156
 157 #define CHARS 256               /* 2^sizeof(char) */
 158 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 159 #define iswhite(c)      (_wht[CHAR (c)]) /* c is white (see white) */
 160 #define notinname(c)    (_nin[CHAR (c)]) /* c is not in a name (see nonam) */
 161 #define begtoken(c)     (_btk[CHAR (c)]) /* c can start token (see begtk) */
 162 #define intoken(c)      (_itk[CHAR (c)]) /* c can be in token (see midtk) */
 163 #define endtoken(c)     (_etk[CHAR (c)]) /* c ends tokens (see endtk) */
 164
 165 #define ISALNUM(c)      isalnum (CHAR (c))
 166 #define ISALPHA(c)      isalpha (CHAR (c))
 167 #define ISDIGIT(c)      isdigit (CHAR (c))
 168 #define ISLOWER(c)      islower (CHAR (c))
 169
 170 #define lowcase(c)      tolower (CHAR (c))
 171
 172
 173 /*
 174  *      xnew, xrnew -- allocate, reallocate storage
 175  *
 176  * SYNOPSIS:    Type *xnew (int n, Type);
 177  *              void xrnew (OldPointer, int n, Type);
 178  */
 179 #if DEBUG
 180 # include "chkmalloc.h"
 181 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 182                                                   (n) * sizeof (Type)))
 183 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 184                                         (char *) (op), (n) * sizeof (Type)))
 185 #else
 186 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 187 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 188                                         (char *) (op), (n) * sizeof (Type)))
 189 #endif
 190
 191 typedef void Lang_function (FILE *);
 192
 193 typedef struct
 194 {
 195   const char *suffix;           /* file name suffix for this compressor */
 196   const char *command;          /* takes one arg and decompresses to stdout */
 197 } compressor;
 198
 199 typedef struct
 200 {
 201   const char *name;             /* language name */
 202   const char *help;             /* detailed help for the language */
 203   Lang_function *function;      /* parse function */
 204   const char **suffixes;        /* name suffixes of this language's files */
 205   const char **filenames;       /* names of this language's files */
 206   const char **interpreters;    /* interpreters for this language */
 207   bool metasource;              /* source used to generate other sources */
 208 } language;
 209
 210 typedef struct fdesc
 211 {
 212   struct fdesc *next;           /* for the linked list */
 213   char *infname;                /* uncompressed input file name */
 214   char *infabsname;             /* absolute uncompressed input file name */
 215   char *infabsdir;              /* absolute dir of input file */
 216   char *taggedfname;            /* file name to write in tagfile */
 217   language *lang;               /* language of file */
 218   char *prop;                   /* file properties to write in tagfile */
 219   bool usecharno;               /* etags tags shall contain char number */
 220   bool written;                 /* entry written in the tags file */
 221 } fdesc;
 222
 223 typedef struct node_st
 224 {                               /* sorting structure */
 225   struct node_st *left, *right; /* left and right sons */
 226   fdesc *fdp;                   /* description of file to whom tag belongs */
 227   char *name;                   /* tag name */
 228   char *regex;                  /* search regexp */
 229   bool valid;                   /* write this tag on the tag file */
 230   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 231   bool been_warned;             /* warning already given for duplicated tag */
 232   int lno;                      /* line number tag is on */
 233   long cno;                     /* character number line starts on */
 234 } node;
 235
 236 /*
 237  * A `linebuffer' is a structure which holds a line of text.
 238  * `readline_internal' reads a line from a stream into a linebuffer
 239  * and works regardless of the length of the line.
 240  * SIZE is the size of BUFFER, LEN is the length of the string in
 241  * BUFFER after readline reads it.
 242  */
 243 typedef struct
 244 {
 245   long size;
 246   int len;
 247   char *buffer;
 248 } linebuffer;
 249
 250 /* Used to support mixing of --lang and file names. */
 251 typedef struct
 252 {
 253   enum {
 254     at_language,                /* a language specification */
 255     at_regexp,                  /* a regular expression */
 256     at_filename,                /* a file name */
 257     at_stdin,                   /* read from stdin here */
 258     at_end                      /* stop parsing the list */
 259   } arg_type;                   /* argument type */
 260   language *lang;               /* language associated with the argument */
 261   char *what;                   /* the argument itself */
 262 } argument;
 263
 264 /* Structure defining a regular expression. */
 265 typedef struct regexp
 266 {
 267   struct regexp *p_next;        /* pointer to next in list */
 268   language *lang;               /* if set, use only for this language */
 269   char *pattern;                /* the regexp pattern */
 270   char *name;                   /* tag name */
 271   struct re_pattern_buffer *pat; /* the compiled pattern */
 272   struct re_registers regs;     /* re registers */
 273   bool error_signaled;          /* already signaled for this regexp */
 274   bool force_explicit_name;     /* do not allow implicit tag name */
 275   bool ignore_case;             /* ignore case when matching */
 276   bool multi_line;              /* do a multi-line match on the whole file */
 277 } regexp;
 278
 279
 280 /* Many compilers barf on this:
 281         Lang_function Ada_funcs;
 282    so let's write it this way */
 283 static void Ada_funcs (FILE *);
 284 static void Asm_labels (FILE *);
 285 static void C_entries (int c_ext, FILE *);
 286 static void default_C_entries (FILE *);
 287 static void plain_C_entries (FILE *);
 288 static void Cjava_entries (FILE *);
 289 static void Cobol_paragraphs (FILE *);
 290 static void Cplusplus_entries (FILE *);
 291 static void Cstar_entries (FILE *);
 292 static void Erlang_functions (FILE *);
 293 static void Forth_words (FILE *);
 294 static void Fortran_functions (FILE *);
 295 static void HTML_labels (FILE *);
 296 static void Lisp_functions (FILE *);
 297 static void Lua_functions (FILE *);
 298 static void Makefile_targets (FILE *);
 299 static void Pascal_functions (FILE *);
 300 static void Perl_functions (FILE *);
 301 static void PHP_functions (FILE *);
 302 static void PS_functions (FILE *);
 303 static void Prolog_functions (FILE *);
 304 static void Python_functions (FILE *);
 305 static void Scheme_functions (FILE *);
 306 static void TeX_commands (FILE *);
 307 static void Texinfo_nodes (FILE *);
 308 static void Yacc_entries (FILE *);
 309 static void just_read_file (FILE *);
 310
 311 static language *get_language_from_langname (const char *);
 312 static void readline (linebuffer *, FILE *);
 313 static long readline_internal (linebuffer *, FILE *);
 314 static bool nocase_tail (const char *);
 315 static void get_tag (char *, char **);
 316
 317 static void analyse_regex (char *);
 318 static void free_regexps (void);
 319 static void regex_tag_multiline (void);
 320 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
 321 static _Noreturn void suggest_asking_for_help (void);
 322 _Noreturn void fatal (const char *, const char *);
 323 static _Noreturn void pfatal (const char *);
 324 static void add_node (node *, node **);
 325
 326 static void init (void);
 327 static void process_file_name (char *, language *);
 328 static void process_file (FILE *, char *, language *);
 329 static void find_entries (FILE *);
 330 static void free_tree (node *);
 331 static void free_fdesc (fdesc *);
 332 static void pfnote (char *, bool, char *, int, int, long);
 333 static void invalidate_nodes (fdesc *, node **);
 334 static void put_entries (node *);
 335
 336 static char *concat (const char *, const char *, const char *);
 337 static char *skip_spaces (char *);
 338 static char *skip_non_spaces (char *);
 339 static char *skip_name (char *);
 340 static char *savenstr (const char *, int);
 341 static char *savestr (const char *);
 342 static char *etags_getcwd (void);
 343 static char *relative_filename (char *, char *);
 344 static char *absolute_filename (char *, char *);
 345 static char *absolute_dirname (char *, char *);
 346 static bool filename_is_absolute (char *f);
 347 static void canonicalize_filename (char *);
 348 static void linebuffer_init (linebuffer *);
 349 static void linebuffer_setlen (linebuffer *, int);
 350 static void *xmalloc (size_t);
 351 static void *xrealloc (char *, size_t);
 352
 353 \f
 354 static char searchar = '/';     /* use /.../ searches */
 355
 356 static char *tagfile;           /* output file */
 357 static char *progname;          /* name this program was invoked with */
 358 static char *cwd;               /* current working directory */
 359 static char *tagfiledir;        /* directory of tagfile */
 360 static FILE *tagf;              /* ioptr for tags file */
 361 static ptrdiff_t whatlen_max;   /* maximum length of any 'what' member */
 362
 363 static fdesc *fdhead;           /* head of file description list */
 364 static fdesc *curfdp;           /* current file description */
 365 static int lineno;              /* line number of current line */
 366 static long charno;             /* current character number */
 367 static long linecharno;         /* charno of start of current line */
 368 static char *dbp;               /* pointer to start of current tag */
 369
 370 static const int invalidcharno = -1;
 371
 372 static node *nodehead;          /* the head of the binary tree of tags */
 373 static node *last_node;         /* the last node created */
 374
 375 static linebuffer lb;           /* the current line */
 376 static linebuffer filebuf;      /* a buffer containing the whole file */
 377 static linebuffer token_name;   /* a buffer containing a tag name */
 378
 379 /* boolean "functions" (see init)       */
 380 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 381 static const char
 382   /* white chars */
 383   *white = " \f\t\n\r\v",
 384   /* not in a name */
 385   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 386   /* token ending chars */
 387   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 388   /* token starting chars */
 389   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 390   /* valid in-token chars */
 391   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 392
 393 static bool append_to_tagfile;  /* -a: append to tags */
 394 /* The next five default to true in C and derived languages.  */
 395 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 396 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 397                                 /* 0 struct/enum/union decls, and C++ */
 398                                 /* member functions. */
 399 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 400                                 /* constants and variables. */
 401                                 /* -D: opposite of -d.  Default under ctags. */
 402 static int globals;             /* create tags for global variables */
 403 static int members;             /* create tags for C member variables */
 404 static int declarations;        /* --declarations: tag them and extern in C&Co*/
 405 static int no_line_directive;   /* ignore #line directives (undocumented) */
 406 static int no_duplicates;       /* no duplicate tags for ctags (undocumented) */
 407 static bool update;             /* -u: update tags */
 408 static bool vgrind_style;       /* -v: create vgrind style index output */
 409 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 410 static bool cxref_style;        /* -x: create cxref style output */
 411 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 412 static bool ignoreindent;       /* -I: ignore indentation in C */
 413 static int packages_only;       /* --packages-only: in Ada, only tag packages*/
 414
 415 /* STDIN is defined in LynxOS system headers */
 416 #ifdef STDIN
 417 # undef STDIN
 418 #endif
 419
 420 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 421 static bool parsing_stdin;      /* --parse-stdin used */
 422
 423 static regexp *p_head;          /* list of all regexps */
 424 static bool need_filebuf;       /* some regexes are multi-line */
 425
 426 static struct option longopts[] =
 427 {
 428   { "append",             no_argument,       NULL,               'a'   },
 429   { "packages-only",      no_argument,       &packages_only,     1     },
 430   { "c++",                no_argument,       NULL,               'C'   },
 431   { "declarations",       no_argument,       &declarations,      1     },
 432   { "no-line-directive",  no_argument,       &no_line_directive, 1     },
 433   { "no-duplicates",      no_argument,       &no_duplicates,     1     },
 434   { "help",               no_argument,       NULL,               'h'   },
 435   { "help",               no_argument,       NULL,               'H'   },
 436   { "ignore-indentation", no_argument,       NULL,               'I'   },
 437   { "language",           required_argument, NULL,               'l'   },
 438   { "members",            no_argument,       &members,           1     },
 439   { "no-members",         no_argument,       &members,           0     },
 440   { "output",             required_argument, NULL,               'o'   },
 441   { "regex",              required_argument, NULL,               'r'   },
 442   { "no-regex",           no_argument,       NULL,               'R'   },
 443   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 444   { "parse-stdin",        required_argument, NULL,               STDIN },
 445   { "version",            no_argument,       NULL,               'V'   },
 446
 447 #if CTAGS /* Ctags options */
 448   { "backward-search",    no_argument,       NULL,               'B'   },
 449   { "cxref",              no_argument,       NULL,               'x'   },
 450   { "defines",            no_argument,       NULL,               'd'   },
 451   { "globals",            no_argument,       &globals,           1     },
 452   { "typedefs",           no_argument,       NULL,               't'   },
 453   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 454   { "update",             no_argument,       NULL,               'u'   },
 455   { "vgrind",             no_argument,       NULL,               'v'   },
 456   { "no-warn",            no_argument,       NULL,               'w'   },
 457
 458 #else /* Etags options */
 459   { "no-defines",         no_argument,       NULL,               'D'   },
 460   { "no-globals",         no_argument,       &globals,           0     },
 461   { "include",            required_argument, NULL,               'i'   },
 462 #endif
 463   { NULL }
 464 };
 465
 466 static compressor compressors[] =
 467 {
 468   { "z", "gzip -d -c"},
 469   { "Z", "gzip -d -c"},
 470   { "gz", "gzip -d -c"},
 471   { "GZ", "gzip -d -c"},
 472   { "bz2", "bzip2 -d -c" },
 473   { "xz", "xz -d -c" },
 474   { NULL }
 475 };
 476
 477 /*
 478  * Language stuff.
 479  */
 480
 481 /* Ada code */
 482 static const char *Ada_suffixes [] =
 483   { "ads", "adb", "ada", NULL };
 484 static const char Ada_help [] =
 485 "In Ada code, functions, procedures, packages, tasks and types are\n\
 486 tags.  Use the `--packages-only' option to create tags for\n\
 487 packages only.\n\
 488 Ada tag names have suffixes indicating the type of entity:\n\
 489         Entity type:    Qualifier:\n\
 490         ------------    ----------\n\
 491         function        /f\n\
 492         procedure       /p\n\
 493         package spec    /s\n\
 494         package body    /b\n\
 495         type            /t\n\
 496         task            /k\n\
 497 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 498 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 499 will just search for any tag `bidule'.";
 500
 501 /* Assembly code */
 502 static const char *Asm_suffixes [] =
 503   { "a",        /* Unix assembler */
 504     "asm", /* Microcontroller assembly */
 505     "def", /* BSO/Tasking definition includes  */
 506     "inc", /* Microcontroller include files */
 507     "ins", /* Microcontroller include files */
 508     "s", "sa", /* Unix assembler */
 509     "S",   /* cpp-processed Unix assembler */
 510     "src", /* BSO/Tasking C compiler output */
 511     NULL
 512   };
 513 static const char Asm_help [] =
 514 "In assembler code, labels appearing at the beginning of a line,\n\
 515 followed by a colon, are tags.";
 516
 517
 518 /* Note that .c and .h can be considered C++, if the --c++ flag was
 519    given, or if the `class' or `template' keywords are met inside the file.
 520    That is why default_C_entries is called for these. */
 521 static const char *default_C_suffixes [] =
 522   { "c", "h", NULL };
 523 #if CTAGS                               /* C help for Ctags */
 524 static const char default_C_help [] =
 525 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 526 Use -T to tag definitions of `struct', `union' and `enum'.\n\
 527 Use -d to tag `#define' macro definitions and `enum' constants.\n\
 528 Use --globals to tag global variables.\n\
 529 You can tag function declarations and external variables by\n\
 530 using `--declarations', and struct members by using `--members'.";
 531 #else                                   /* C help for Etags */
 532 static const char default_C_help [] =
 533 "In C code, any C function or typedef is a tag, and so are\n\
 534 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 535 definitions and `enum' constants are tags unless you specify\n\
 536 `--no-defines'.  Global variables are tags unless you specify\n\
 537 `--no-globals' and so are struct members unless you specify\n\
 538 `--no-members'.  Use of `--no-globals', `--no-defines' and\n\
 539 `--no-members' can make the tags table file much smaller.\n\
 540 You can tag function declarations and external variables by\n\
 541 using `--declarations'.";
 542 #endif  /* C help for Ctags and Etags */
 543
 544 static const char *Cplusplus_suffixes [] =
 545   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 546     "M",                        /* Objective C++ */
 547     "pdb",                      /* PostScript with C syntax */
 548     NULL };
 549 static const char Cplusplus_help [] =
 550 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 551 --help --lang=c --lang=c++ for full help.)\n\
 552 In addition to C tags, member functions are also recognized.  Member\n\
 553 variables are recognized unless you use the `--no-members' option.\n\
 554 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 555 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 556 `operator+'.";
 557
 558 static const char *Cjava_suffixes [] =
 559   { "java", NULL };
 560 static char Cjava_help [] =
 561 "In Java code, all the tags constructs of C and C++ code are\n\
 562 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 563
 564
 565 static const char *Cobol_suffixes [] =
 566   { "COB", "cob", NULL };
 567 static char Cobol_help [] =
 568 "In Cobol code, tags are paragraph names; that is, any word\n\
 569 starting in column 8 and followed by a period.";
 570
 571 static const char *Cstar_suffixes [] =
 572   { "cs", "hs", NULL };
 573
 574 static const char *Erlang_suffixes [] =
 575   { "erl", "hrl", NULL };
 576 static const char Erlang_help [] =
 577 "In Erlang code, the tags are the functions, records and macros\n\
 578 defined in the file.";
 579
 580 const char *Forth_suffixes [] =
 581   { "fth", "tok", NULL };
 582 static const char Forth_help [] =
 583 "In Forth code, tags are words defined by `:',\n\
 584 constant, code, create, defer, value, variable, buffer:, field.";
 585
 586 static const char *Fortran_suffixes [] =
 587   { "F", "f", "f90", "for", NULL };
 588 static const char Fortran_help [] =
 589 "In Fortran code, functions, subroutines and block data are tags.";
 590
 591 static const char *HTML_suffixes [] =
 592   { "htm", "html", "shtml", NULL };
 593 static const char HTML_help [] =
 594 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 595 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 596 occurrences of `id='.";
 597
 598 static const char *Lisp_suffixes [] =
 599   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 600 static const char Lisp_help [] =
 601 "In Lisp code, any function defined with `defun', any variable\n\
 602 defined with `defvar' or `defconst', and in general the first\n\
 603 argument of any expression that starts with `(def' in column zero\n\
 604 is a tag.\n\
 605 The `--declarations' option tags \"(defvar foo)\" constructs too.";
 606
 607 static const char *Lua_suffixes [] =
 608   { "lua", "LUA", NULL };
 609 static const char Lua_help [] =
 610 "In Lua scripts, all functions are tags.";
 611
 612 static const char *Makefile_filenames [] =
 613   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 614 static const char Makefile_help [] =
 615 "In makefiles, targets are tags; additionally, variables are tags\n\
 616 unless you specify `--no-globals'.";
 617
 618 static const char *Objc_suffixes [] =
 619   { "lm",                       /* Objective lex file */
 620     "m",                        /* Objective C file */
 621      NULL };
 622 static const char Objc_help [] =
 623 "In Objective C code, tags include Objective C definitions for classes,\n\
 624 class categories, methods and protocols.  Tags for variables and\n\
 625 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 626 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 627
 628 static const char *Pascal_suffixes [] =
 629   { "p", "pas", NULL };
 630 static const char Pascal_help [] =
 631 "In Pascal code, the tags are the functions and procedures defined\n\
 632 in the file.";
 633 /* " // this is for working around an Emacs highlighting bug... */
 634
 635 static const char *Perl_suffixes [] =
 636   { "pl", "pm", NULL };
 637 static const char *Perl_interpreters [] =
 638   { "perl", "@PERL@", NULL };
 639 static const char Perl_help [] =
 640 "In Perl code, the tags are the packages, subroutines and variables\n\
 641 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 642 `--globals' if you want to tag global variables.  Tags for\n\
 643 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 644 defined in the default package is `main::SUB'.";
 645
 646 static const char *PHP_suffixes [] =
 647   { "php", "php3", "php4", NULL };
 648 static const char PHP_help [] =
 649 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 650 the `--no-members' option, vars are tags too.";
 651
 652 static const char *plain_C_suffixes [] =
 653   { "pc",                       /* Pro*C file */
 654      NULL };
 655
 656 static const char *PS_suffixes [] =
 657   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 658 static const char PS_help [] =
 659 "In PostScript code, the tags are the functions.";
 660
 661 static const char *Prolog_suffixes [] =
 662   { "prolog", NULL };
 663 static const char Prolog_help [] =
 664 "In Prolog code, tags are predicates and rules at the beginning of\n\
 665 line.";
 666
 667 static const char *Python_suffixes [] =
 668   { "py", NULL };
 669 static const char Python_help [] =
 670 "In Python code, `def' or `class' at the beginning of a line\n\
 671 generate a tag.";
 672
 673 /* Can't do the `SCM' or `scm' prefix with a version number. */
 674 static const char *Scheme_suffixes [] =
 675   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 676 static const char Scheme_help [] =
 677 "In Scheme code, tags include anything defined with `def' or with a\n\
 678 construct whose name starts with `def'.  They also include\n\
 679 variables set with `set!' at top level in the file.";
 680
 681 static const char *TeX_suffixes [] =
 682   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 683 static const char TeX_help [] =
 684 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 685 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 686 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 687 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 688 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 689 \n\
 690 Other commands can be specified by setting the environment variable\n\
 691 `TEXTAGS' to a colon-separated list like, for example,\n\
 692      TEXTAGS=\"mycommand:myothercommand\".";
 693
 694
 695 static const char *Texinfo_suffixes [] =
 696   { "texi", "texinfo", "txi", NULL };
 697 static const char Texinfo_help [] =
 698 "for texinfo files, lines starting with @node are tagged.";
 699
 700 static const char *Yacc_suffixes [] =
 701   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 702 static const char Yacc_help [] =
 703 "In Bison or Yacc input files, each rule defines as a tag the\n\
 704 nonterminal it constructs.  The portions of the file that contain\n\
 705 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 706 for full help).";
 707
 708 static const char auto_help [] =
 709 "`auto' is not a real language, it indicates to use\n\
 710 a default language for files base on file name suffix and file contents.";
 711
 712 static const char none_help [] =
 713 "`none' is not a real language, it indicates to only do\n\
 714 regexp processing on files.";
 715
 716 static const char no_lang_help [] =
 717 "No detailed help available for this language.";
 718
 719
 720 /*
 721  * Table of languages.
 722  *
 723  * It is ok for a given function to be listed under more than one
 724  * name.  I just didn't.
 725  */
 726
 727 static language lang_names [] =
 728 {
 729   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 730   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 731   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 732   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 733   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 734   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 735   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 736   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 737   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 738   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 739   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 740   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 741   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 742   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 743   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 744   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 745   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 746   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 747   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 748   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 749   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 750   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 751   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 752   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 753   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 754   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
 755   { "auto",      auto_help },                      /* default guessing scheme */
 756   { "none",      none_help,      just_read_file }, /* regexp matching only */
 757   { NULL }                /* end of list */
 758 };
 759
 760 \f
 761 static void
 762 print_language_names (void)
 763 {
 764   language *lang;
 765   const char **name, **ext;
 766
 767   puts ("\nThese are the currently supported languages, along with the\n\
 768 default file names and dot suffixes:");
 769   for (lang = lang_names; lang->name != NULL; lang++)
 770     {
 771       printf ("  %-*s", 10, lang->name);
 772       if (lang->filenames != NULL)
 773         for (name = lang->filenames; *name != NULL; name++)
 774           printf (" %s", *name);
 775       if (lang->suffixes != NULL)
 776         for (ext = lang->suffixes; *ext != NULL; ext++)
 777           printf (" .%s", *ext);
 778       puts ("");
 779     }
 780   puts ("where `auto' means use default language for files based on file\n\
 781 name suffix, and `none' means only do regexp processing on files.\n\
 782 If no language is specified and no matching suffix is found,\n\
 783 the first line of the file is read for a sharp-bang (#!) sequence\n\
 784 followed by the name of an interpreter.  If no such sequence is found,\n\
 785 Fortran is tried first; if no tags are found, C is tried next.\n\
 786 When parsing any C file, a \"class\" or \"template\" keyword\n\
 787 switches to C++.");
 788   puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
 789 \n\
 790 For detailed help on a given language use, for example,\n\
 791 etags --help --lang=ada.");
 792 }
 793
 794 #ifndef EMACS_NAME
 795 # define EMACS_NAME "standalone"
 796 #endif
 797 #ifndef VERSION
 798 # define VERSION "17.38.1.4"
 799 #endif
 800 static _Noreturn void
 801 print_version (void)
 802 {
 803   char emacs_copyright[] = COPYRIGHT;
 804
 805   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 806   puts (emacs_copyright);
 807   puts ("This program is distributed under the terms in ETAGS.README");
 808
 809   exit (EXIT_SUCCESS);
 810 }
 811
 812 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 813 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
 814 #endif
 815
 816 static _Noreturn void
 817 print_help (argument *argbuffer)
 818 {
 819   bool help_for_lang = false;
 820
 821   for (; argbuffer->arg_type != at_end; argbuffer++)
 822     if (argbuffer->arg_type == at_language)
 823       {
 824         if (help_for_lang)
 825           puts ("");
 826         puts (argbuffer->lang->help);
 827         help_for_lang = true;
 828       }
 829
 830   if (help_for_lang)
 831     exit (EXIT_SUCCESS);
 832
 833   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 834 \n\
 835 These are the options accepted by %s.\n", progname, progname);
 836   puts ("You may use unambiguous abbreviations for the long option names.");
 837   puts ("  A - as file name means read names from stdin (one per line).\n\
 838 Absolute names are stored in the output file as they are.\n\
 839 Relative ones are stored relative to the output file's directory.\n");
 840
 841   puts ("-a, --append\n\
 842         Append tag entries to existing tags file.");
 843
 844   puts ("--packages-only\n\
 845         For Ada files, only generate tags for packages.");
 846
 847   if (CTAGS)
 848     puts ("-B, --backward-search\n\
 849         Write the search commands for the tag entries using '?', the\n\
 850         backward-search command instead of '/', the forward-search command.");
 851
 852   /* This option is mostly obsolete, because etags can now automatically
 853      detect C++.  Retained for backward compatibility and for debugging and
 854      experimentation.  In principle, we could want to tag as C++ even
 855      before any "class" or "template" keyword.
 856   puts ("-C, --c++\n\
 857         Treat files whose name suffix defaults to C language as C++ files.");
 858   */
 859
 860   puts ("--declarations\n\
 861         In C and derived languages, create tags for function declarations,");
 862   if (CTAGS)
 863     puts ("\tand create tags for extern variables if --globals is used.");
 864   else
 865     puts
 866       ("\tand create tags for extern variables unless --no-globals is used.");
 867
 868   if (CTAGS)
 869     puts ("-d, --defines\n\
 870         Create tag entries for C #define constants and enum constants, too.");
 871   else
 872     puts ("-D, --no-defines\n\
 873         Don't create tag entries for C #define constants and enum constants.\n\
 874         This makes the tags file smaller.");
 875
 876   if (!CTAGS)
 877     puts ("-i FILE, --include=FILE\n\
 878         Include a note in tag file indicating that, when searching for\n\
 879         a tag, one should also consult the tags file FILE after\n\
 880         checking the current file.");
 881
 882   puts ("-l LANG, --language=LANG\n\
 883         Force the following files to be considered as written in the\n\
 884         named language up to the next --language=LANG option.");
 885
 886   if (CTAGS)
 887     puts ("--globals\n\
 888         Create tag entries for global variables in some languages.");
 889   else
 890     puts ("--no-globals\n\
 891         Do not create tag entries for global variables in some\n\
 892         languages.  This makes the tags file smaller.");
 893
 894   if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 895     puts ("--no-line-directive\n\
 896         Ignore #line preprocessor directives in C and derived languages.");
 897
 898   if (CTAGS)
 899     puts ("--members\n\
 900         Create tag entries for members of structures in some languages.");
 901   else
 902     puts ("--no-members\n\
 903         Do not create tag entries for members of structures\n\
 904         in some languages.");
 905
 906   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 907         Make a tag for each line matching a regular expression pattern\n\
 908         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 909         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 910         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 911         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 912   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 913         For example Tcl named tags can be created with:\n\
 914           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 915         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 916         `m' means to allow multi-line matches, `s' implies `m' and\n\
 917         causes dot to match any character, including newline.");
 918
 919   puts ("-R, --no-regex\n\
 920         Don't create tags from regexps for the following files.");
 921
 922   puts ("-I, --ignore-indentation\n\
 923         In C and C++ do not assume that a closing brace in the first\n\
 924         column is the final brace of a function or structure definition.");
 925
 926   puts ("-o FILE, --output=FILE\n\
 927         Write the tags to FILE.");
 928
 929   puts ("--parse-stdin=NAME\n\
 930         Read from standard input and record tags as belonging to file NAME.");
 931
 932   if (CTAGS)
 933     {
 934       puts ("-t, --typedefs\n\
 935         Generate tag entries for C and Ada typedefs.");
 936       puts ("-T, --typedefs-and-c++\n\
 937         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 938         and C++ member functions.");
 939     }
 940
 941   if (CTAGS)
 942     puts ("-u, --update\n\
 943         Update the tag entries for the given files, leaving tag\n\
 944         entries for other files in place.  Currently, this is\n\
 945         implemented by deleting the existing entries for the given\n\
 946         files and then rewriting the new entries at the end of the\n\
 947         tags file.  It is often faster to simply rebuild the entire\n\
 948         tag file than to use this.");
 949
 950   if (CTAGS)
 951     {
 952       puts ("-v, --vgrind\n\
 953         Print on the standard output an index of items intended for\n\
 954         human consumption, similar to the output of vgrind.  The index\n\
 955         is sorted, and gives the page number of each item.");
 956
 957       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 958         puts ("-w, --no-duplicates\n\
 959         Do not create duplicate tag entries, for compatibility with\n\
 960         traditional ctags.");
 961
 962       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 963         puts ("-w, --no-warn\n\
 964         Suppress warning messages about duplicate tag entries.");
 965
 966       puts ("-x, --cxref\n\
 967         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
 968         The output uses line numbers instead of page numbers, but\n\
 969         beyond that the differences are cosmetic; try both to see\n\
 970         which you like.");
 971     }
 972
 973   puts ("-V, --version\n\
 974         Print the version of the program.\n\
 975 -h, --help\n\
 976         Print this help message.\n\
 977         Followed by one or more `--language' options prints detailed\n\
 978         help about tag generation for the specified languages.");
 979
 980   print_language_names ();
 981
 982   puts ("");
 983   puts ("Report bugs to bug-gnu-emacs@gnu.org");
 984
 985   exit (EXIT_SUCCESS);
 986 }
 987
 988 \f
 989 int
 990 main (int argc, char **argv)
 991 {
 992   int i;
 993   unsigned int nincluded_files;
 994   char **included_files;
 995   argument *argbuffer;
 996   int current_arg, file_count;
 997   linebuffer filename_lb;
 998   bool help_asked = false;
 999   ptrdiff_t len;
1000   char *optstring;
1001   int opt;
1002
1003   progname = argv[0];
1004   nincluded_files = 0;
1005   included_files = xnew (argc, char *);
1006   current_arg = 0;
1007   file_count = 0;
1008
1009   /* Allocate enough no matter what happens.  Overkill, but each one
1010      is small. */
1011   argbuffer = xnew (argc, argument);
1012
1013   /*
1014    * Always find typedefs and structure tags.
1015    * Also default to find macro constants, enum constants, struct
1016    * members and global variables.  Do it for both etags and ctags.
1017    */
1018   typedefs = typedefs_or_cplusplus = constantypedefs = true;
1019   globals = members = true;
1020
1021   /* When the optstring begins with a '-' getopt_long does not rearrange the
1022      non-options arguments to be at the end, but leaves them alone. */
1023   optstring = concat ("-ac:Cf:Il:o:r:RSVhH",
1024                       (CTAGS) ? "BxdtTuvw" : "Di:",
1025                       "");
1026
1027   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1028     switch (opt)
1029       {
1030       case 0:
1031         /* If getopt returns 0, then it has already processed a
1032            long-named option.  We should do nothing.  */
1033         break;
1034
1035       case 1:
1036         /* This means that a file name has been seen.  Record it. */
1037         argbuffer[current_arg].arg_type = at_filename;
1038         argbuffer[current_arg].what     = optarg;
1039         len = strlen (optarg);
1040         if (whatlen_max < len)
1041           whatlen_max = len;
1042         ++current_arg;
1043         ++file_count;
1044         break;
1045
1046       case STDIN:
1047         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1048         argbuffer[current_arg].arg_type = at_stdin;
1049         argbuffer[current_arg].what     = optarg;
1050         len = strlen (optarg);
1051         if (whatlen_max < len)
1052           whatlen_max = len;
1053         ++current_arg;
1054         ++file_count;
1055         if (parsing_stdin)
1056           fatal ("cannot parse standard input more than once", (char *)NULL);
1057         parsing_stdin = true;
1058         break;
1059
1060         /* Common options. */
1061       case 'a': append_to_tagfile = true;       break;
1062       case 'C': cplusplus = true;               break;
1063       case 'f':         /* for compatibility with old makefiles */
1064       case 'o':
1065         if (tagfile)
1066           {
1067             error ("-o option may only be given once.");
1068             suggest_asking_for_help ();
1069             /* NOTREACHED */
1070           }
1071         tagfile = optarg;
1072         break;
1073       case 'I':
1074       case 'S':         /* for backward compatibility */
1075         ignoreindent = true;
1076         break;
1077       case 'l':
1078         {
1079           language *lang = get_language_from_langname (optarg);
1080           if (lang != NULL)
1081             {
1082               argbuffer[current_arg].lang = lang;
1083               argbuffer[current_arg].arg_type = at_language;
1084               ++current_arg;
1085             }
1086         }
1087         break;
1088       case 'c':
1089         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1090         optarg = concat (optarg, "i", ""); /* memory leak here */
1091         /* FALLTHRU */
1092       case 'r':
1093         argbuffer[current_arg].arg_type = at_regexp;
1094         argbuffer[current_arg].what = optarg;
1095         len = strlen (optarg);
1096         if (whatlen_max < len)
1097           whatlen_max = len;
1098         ++current_arg;
1099         break;
1100       case 'R':
1101         argbuffer[current_arg].arg_type = at_regexp;
1102         argbuffer[current_arg].what = NULL;
1103         ++current_arg;
1104         break;
1105       case 'V':
1106         print_version ();
1107         break;
1108       case 'h':
1109       case 'H':
1110         help_asked = true;
1111         break;
1112
1113         /* Etags options */
1114       case 'D': constantypedefs = false;                        break;
1115       case 'i': included_files[nincluded_files++] = optarg;     break;
1116
1117         /* Ctags options. */
1118       case 'B': searchar = '?';                                 break;
1119       case 'd': constantypedefs = true;                         break;
1120       case 't': typedefs = true;                                break;
1121       case 'T': typedefs = typedefs_or_cplusplus = true;        break;
1122       case 'u': update = true;                                  break;
1123       case 'v': vgrind_style = true;                      /*FALLTHRU*/
1124       case 'x': cxref_style = true;                             break;
1125       case 'w': no_warnings = true;                             break;
1126       default:
1127         suggest_asking_for_help ();
1128         /* NOTREACHED */
1129       }
1130
1131   /* No more options.  Store the rest of arguments. */
1132   for (; optind < argc; optind++)
1133     {
1134       argbuffer[current_arg].arg_type = at_filename;
1135       argbuffer[current_arg].what = argv[optind];
1136       len = strlen (argv[optind]);
1137       if (whatlen_max < len)
1138         whatlen_max = len;
1139       ++current_arg;
1140       ++file_count;
1141     }
1142
1143   argbuffer[current_arg].arg_type = at_end;
1144
1145   if (help_asked)
1146     print_help (argbuffer);
1147     /* NOTREACHED */
1148
1149   if (nincluded_files == 0 && file_count == 0)
1150     {
1151       error ("no input files specified.");
1152       suggest_asking_for_help ();
1153       /* NOTREACHED */
1154     }
1155
1156   if (tagfile == NULL)
1157     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1158   cwd = etags_getcwd ();        /* the current working directory */
1159   if (cwd[strlen (cwd) - 1] != '/')
1160     {
1161       char *oldcwd = cwd;
1162       cwd = concat (oldcwd, "/", "");
1163       free (oldcwd);
1164     }
1165
1166   /* Compute base directory for relative file names. */
1167   if (streq (tagfile, "-")
1168       || strneq (tagfile, "/dev/", 5))
1169     tagfiledir = cwd;            /* relative file names are relative to cwd */
1170   else
1171     {
1172       canonicalize_filename (tagfile);
1173       tagfiledir = absolute_dirname (tagfile, cwd);
1174     }
1175
1176   init ();                      /* set up boolean "functions" */
1177
1178   linebuffer_init (&lb);
1179   linebuffer_init (&filename_lb);
1180   linebuffer_init (&filebuf);
1181   linebuffer_init (&token_name);
1182
1183   if (!CTAGS)
1184     {
1185       if (streq (tagfile, "-"))
1186         {
1187           tagf = stdout;
1188           SET_BINARY (fileno (stdout));
1189         }
1190       else
1191         tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1192       if (tagf == NULL)
1193         pfatal (tagfile);
1194     }
1195
1196   /*
1197    * Loop through files finding functions.
1198    */
1199   for (i = 0; i < current_arg; i++)
1200     {
1201       static language *lang;    /* non-NULL if language is forced */
1202       char *this_file;
1203
1204       switch (argbuffer[i].arg_type)
1205         {
1206         case at_language:
1207           lang = argbuffer[i].lang;
1208           break;
1209         case at_regexp:
1210           analyse_regex (argbuffer[i].what);
1211           break;
1212         case at_filename:
1213               this_file = argbuffer[i].what;
1214               /* Input file named "-" means read file names from stdin
1215                  (one per line) and use them. */
1216               if (streq (this_file, "-"))
1217                 {
1218                   if (parsing_stdin)
1219                     fatal ("cannot parse standard input AND read file names from it",
1220                            (char *)NULL);
1221                   while (readline_internal (&filename_lb, stdin) > 0)
1222                     process_file_name (filename_lb.buffer, lang);
1223                 }
1224               else
1225                 process_file_name (this_file, lang);
1226           break;
1227         case at_stdin:
1228           this_file = argbuffer[i].what;
1229           process_file (stdin, this_file, lang);
1230           break;
1231         }
1232     }
1233
1234   free_regexps ();
1235   free (lb.buffer);
1236   free (filebuf.buffer);
1237   free (token_name.buffer);
1238
1239   if (!CTAGS || cxref_style)
1240     {
1241       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1242       put_entries (nodehead);
1243       free_tree (nodehead);
1244       nodehead = NULL;
1245       if (!CTAGS)
1246         {
1247           fdesc *fdp;
1248
1249           /* Output file entries that have no tags. */
1250           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1251             if (!fdp->written)
1252               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1253
1254           while (nincluded_files-- > 0)
1255             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1256
1257           if (fclose (tagf) == EOF)
1258             pfatal (tagfile);
1259         }
1260
1261       exit (EXIT_SUCCESS);
1262     }
1263
1264   /* From here on, we are in (CTAGS && !cxref_style) */
1265   if (update)
1266     {
1267       char *cmd =
1268         xmalloc (strlen (tagfile) + whatlen_max +
1269                  sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1270       for (i = 0; i < current_arg; ++i)
1271         {
1272           switch (argbuffer[i].arg_type)
1273             {
1274             case at_filename:
1275             case at_stdin:
1276               break;
1277             default:
1278               continue;         /* the for loop */
1279             }
1280           strcpy (cmd, "mv ");
1281           strcat (cmd, tagfile);
1282           strcat (cmd, " OTAGS;fgrep -v '\t");
1283           strcat (cmd, argbuffer[i].what);
1284           strcat (cmd, "\t' OTAGS >");
1285           strcat (cmd, tagfile);
1286           strcat (cmd, ";rm OTAGS");
1287           if (system (cmd) != EXIT_SUCCESS)
1288             fatal ("failed to execute shell command", (char *)NULL);
1289         }
1290       free (cmd);
1291       append_to_tagfile = true;
1292     }
1293
1294   tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1295   if (tagf == NULL)
1296     pfatal (tagfile);
1297   put_entries (nodehead);       /* write all the tags (CTAGS) */
1298   free_tree (nodehead);
1299   nodehead = NULL;
1300   if (fclose (tagf) == EOF)
1301     pfatal (tagfile);
1302
1303   if (CTAGS)
1304     if (append_to_tagfile || update)
1305       {
1306         char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1307         /* Maybe these should be used:
1308            setenv ("LC_COLLATE", "C", 1);
1309            setenv ("LC_ALL", "C", 1); */
1310         strcpy (cmd, "sort -u -o ");
1311         strcat (cmd, tagfile);
1312         strcat (cmd, " ");
1313         strcat (cmd, tagfile);
1314         exit (system (cmd));
1315       }
1316   return EXIT_SUCCESS;
1317 }
1318
1319
1320 /*
1321  * Return a compressor given the file name.  If EXTPTR is non-zero,
1322  * return a pointer into FILE where the compressor-specific
1323  * extension begins.  If no compressor is found, NULL is returned
1324  * and EXTPTR is not significant.
1325  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1326  */
1327 static compressor *
1328 get_compressor_from_suffix (char *file, char **extptr)
1329 {
1330   compressor *compr;
1331   char *slash, *suffix;
1332
1333   /* File has been processed by canonicalize_filename,
1334      so we don't need to consider backslashes on DOS_NT.  */
1335   slash = strrchr (file, '/');
1336   suffix = strrchr (file, '.');
1337   if (suffix == NULL || suffix < slash)
1338     return NULL;
1339   if (extptr != NULL)
1340     *extptr = suffix;
1341   suffix += 1;
1342   /* Let those poor souls who live with DOS 8+3 file name limits get
1343      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1344      Only the first do loop is run if not MSDOS */
1345   do
1346     {
1347       for (compr = compressors; compr->suffix != NULL; compr++)
1348         if (streq (compr->suffix, suffix))
1349           return compr;
1350       if (!MSDOS)
1351         break;                  /* do it only once: not really a loop */
1352       if (extptr != NULL)
1353         *extptr = ++suffix;
1354     } while (*suffix != '\0');
1355   return NULL;
1356 }
1357
1358
1359
1360 /*
1361  * Return a language given the name.
1362  */
1363 static language *
1364 get_language_from_langname (const char *name)
1365 {
1366   language *lang;
1367
1368   if (name == NULL)
1369     error ("empty language name");
1370   else
1371     {
1372       for (lang = lang_names; lang->name != NULL; lang++)
1373         if (streq (name, lang->name))
1374           return lang;
1375       error ("unknown language \"%s\"", name);
1376     }
1377
1378   return NULL;
1379 }
1380
1381
1382 /*
1383  * Return a language given the interpreter name.
1384  */
1385 static language *
1386 get_language_from_interpreter (char *interpreter)
1387 {
1388   language *lang;
1389   const char **iname;
1390
1391   if (interpreter == NULL)
1392     return NULL;
1393   for (lang = lang_names; lang->name != NULL; lang++)
1394     if (lang->interpreters != NULL)
1395       for (iname = lang->interpreters; *iname != NULL; iname++)
1396         if (streq (*iname, interpreter))
1397             return lang;
1398
1399   return NULL;
1400 }
1401
1402
1403
1404 /*
1405  * Return a language given the file name.
1406  */
1407 static language *
1408 get_language_from_filename (char *file, int case_sensitive)
1409 {
1410   language *lang;
1411   const char **name, **ext, *suffix;
1412
1413   /* Try whole file name first. */
1414   for (lang = lang_names; lang->name != NULL; lang++)
1415     if (lang->filenames != NULL)
1416       for (name = lang->filenames; *name != NULL; name++)
1417         if ((case_sensitive)
1418             ? streq (*name, file)
1419             : strcaseeq (*name, file))
1420           return lang;
1421
1422   /* If not found, try suffix after last dot. */
1423   suffix = strrchr (file, '.');
1424   if (suffix == NULL)
1425     return NULL;
1426   suffix += 1;
1427   for (lang = lang_names; lang->name != NULL; lang++)
1428     if (lang->suffixes != NULL)
1429       for (ext = lang->suffixes; *ext != NULL; ext++)
1430         if ((case_sensitive)
1431             ? streq (*ext, suffix)
1432             : strcaseeq (*ext, suffix))
1433           return lang;
1434   return NULL;
1435 }
1436
1437 \f
1438 /*
1439  * This routine is called on each file argument.
1440  */
1441 static void
1442 process_file_name (char *file, language *lang)
1443 {
1444   struct stat stat_buf;
1445   FILE *inf;
1446   fdesc *fdp;
1447   compressor *compr;
1448   char *compressed_name, *uncompressed_name;
1449   char *ext, *real_name;
1450   int retval;
1451
1452   canonicalize_filename (file);
1453   if (streq (file, tagfile) && !streq (tagfile, "-"))
1454     {
1455       error ("skipping inclusion of %s in self.", file);
1456       return;
1457     }
1458   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1459     {
1460       compressed_name = NULL;
1461       real_name = uncompressed_name = savestr (file);
1462     }
1463   else
1464     {
1465       real_name = compressed_name = savestr (file);
1466       uncompressed_name = savenstr (file, ext - file);
1467     }
1468
1469   /* If the canonicalized uncompressed name
1470      has already been dealt with, skip it silently. */
1471   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1472     {
1473       assert (fdp->infname != NULL);
1474       if (streq (uncompressed_name, fdp->infname))
1475         goto cleanup;
1476     }
1477
1478   if (stat (real_name, &stat_buf) != 0)
1479     {
1480       /* Reset real_name and try with a different name. */
1481       real_name = NULL;
1482       if (compressed_name != NULL) /* try with the given suffix */
1483         {
1484           if (stat (uncompressed_name, &stat_buf) == 0)
1485             real_name = uncompressed_name;
1486         }
1487       else                      /* try all possible suffixes */
1488         {
1489           for (compr = compressors; compr->suffix != NULL; compr++)
1490             {
1491               compressed_name = concat (file, ".", compr->suffix);
1492               if (stat (compressed_name, &stat_buf) != 0)
1493                 {
1494                   if (MSDOS)
1495                     {
1496                       char *suf = compressed_name + strlen (file);
1497                       size_t suflen = strlen (compr->suffix) + 1;
1498                       for ( ; suf[1]; suf++, suflen--)
1499                         {
1500                           memmove (suf, suf + 1, suflen);
1501                           if (stat (compressed_name, &stat_buf) == 0)
1502                             {
1503                               real_name = compressed_name;
1504                               break;
1505                             }
1506                         }
1507                       if (real_name != NULL)
1508                         break;
1509                     } /* MSDOS */
1510                   free (compressed_name);
1511                   compressed_name = NULL;
1512                 }
1513               else
1514                 {
1515                   real_name = compressed_name;
1516                   break;
1517                 }
1518             }
1519         }
1520       if (real_name == NULL)
1521         {
1522           perror (file);
1523           goto cleanup;
1524         }
1525     } /* try with a different name */
1526
1527   if (!S_ISREG (stat_buf.st_mode))
1528     {
1529       error ("skipping %s: it is not a regular file.", real_name);
1530       goto cleanup;
1531     }
1532   if (real_name == compressed_name)
1533     {
1534       char *cmd = concat (compr->command, " ", real_name);
1535       inf = popen (cmd, "rb");
1536       free (cmd);
1537     }
1538   else
1539     inf = fopen (real_name, "rb");
1540   if (inf == NULL)
1541     {
1542       perror (real_name);
1543       goto cleanup;
1544     }
1545
1546   process_file (inf, uncompressed_name, lang);
1547
1548   if (real_name == compressed_name)
1549     retval = pclose (inf);
1550   else
1551     retval = fclose (inf);
1552   if (retval < 0)
1553     pfatal (file);
1554
1555  cleanup:
1556   free (compressed_name);
1557   free (uncompressed_name);
1558   last_node = NULL;
1559   curfdp = NULL;
1560   return;
1561 }
1562
1563 static void
1564 process_file (FILE *fh, char *fn, language *lang)
1565 {
1566   static const fdesc emptyfdesc;
1567   fdesc *fdp;
1568
1569   /* Create a new input file description entry. */
1570   fdp = xnew (1, fdesc);
1571   *fdp = emptyfdesc;
1572   fdp->next = fdhead;
1573   fdp->infname = savestr (fn);
1574   fdp->lang = lang;
1575   fdp->infabsname = absolute_filename (fn, cwd);
1576   fdp->infabsdir = absolute_dirname (fn, cwd);
1577   if (filename_is_absolute (fn))
1578     {
1579       /* An absolute file name.  Canonicalize it. */
1580       fdp->taggedfname = absolute_filename (fn, NULL);
1581     }
1582   else
1583     {
1584       /* A file name relative to cwd.  Make it relative
1585          to the directory of the tags file. */
1586       fdp->taggedfname = relative_filename (fn, tagfiledir);
1587     }
1588   fdp->usecharno = true;        /* use char position when making tags */
1589   fdp->prop = NULL;
1590   fdp->written = false;         /* not written on tags file yet */
1591
1592   fdhead = fdp;
1593   curfdp = fdhead;              /* the current file description */
1594
1595   find_entries (fh);
1596
1597   /* If not Ctags, and if this is not metasource and if it contained no #line
1598      directives, we can write the tags and free all nodes pointing to
1599      curfdp. */
1600   if (!CTAGS
1601       && curfdp->usecharno      /* no #line directives in this file */
1602       && !curfdp->lang->metasource)
1603     {
1604       node *np, *prev;
1605
1606       /* Look for the head of the sublist relative to this file.  See add_node
1607          for the structure of the node tree. */
1608       prev = NULL;
1609       for (np = nodehead; np != NULL; prev = np, np = np->left)
1610         if (np->fdp == curfdp)
1611           break;
1612
1613       /* If we generated tags for this file, write and delete them. */
1614       if (np != NULL)
1615         {
1616           /* This is the head of the last sublist, if any.  The following
1617              instructions depend on this being true. */
1618           assert (np->left == NULL);
1619
1620           assert (fdhead == curfdp);
1621           assert (last_node->fdp == curfdp);
1622           put_entries (np);     /* write tags for file curfdp->taggedfname */
1623           free_tree (np);       /* remove the written nodes */
1624           if (prev == NULL)
1625             nodehead = NULL;    /* no nodes left */
1626           else
1627             prev->left = NULL;  /* delete the pointer to the sublist */
1628         }
1629     }
1630 }
1631
1632 /*
1633  * This routine sets up the boolean pseudo-functions which work
1634  * by setting boolean flags dependent upon the corresponding character.
1635  * Every char which is NOT in that string is not a white char.  Therefore,
1636  * all of the array "_wht" is set to false, and then the elements
1637  * subscripted by the chars in "white" are set to true.  Thus "_wht"
1638  * of a char is true if it is the string "white", else false.
1639  */
1640 static void
1641 init (void)
1642 {
1643   const char *sp;
1644   int i;
1645
1646   for (i = 0; i < CHARS; i++)
1647     iswhite (i) = notinname (i) = begtoken (i) = intoken (i) = endtoken (i)
1648       = false;
1649   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = true;
1650   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = true;
1651   notinname ('\0') = notinname ('\n');
1652   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = true;
1653   begtoken ('\0') = begtoken ('\n');
1654   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = true;
1655   intoken ('\0') = intoken ('\n');
1656   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = true;
1657   endtoken ('\0') = endtoken ('\n');
1658 }
1659
1660 /*
1661  * This routine opens the specified file and calls the function
1662  * which finds the function and type definitions.
1663  */
1664 static void
1665 find_entries (FILE *inf)
1666 {
1667   char *cp;
1668   language *lang = curfdp->lang;
1669   Lang_function *parser = NULL;
1670
1671   /* If user specified a language, use it. */
1672   if (lang != NULL && lang->function != NULL)
1673     {
1674       parser = lang->function;
1675     }
1676
1677   /* Else try to guess the language given the file name. */
1678   if (parser == NULL)
1679     {
1680       lang = get_language_from_filename (curfdp->infname, true);
1681       if (lang != NULL && lang->function != NULL)
1682         {
1683           curfdp->lang = lang;
1684           parser = lang->function;
1685         }
1686     }
1687
1688   /* Else look for sharp-bang as the first two characters. */
1689   if (parser == NULL
1690       && readline_internal (&lb, inf) > 0
1691       && lb.len >= 2
1692       && lb.buffer[0] == '#'
1693       && lb.buffer[1] == '!')
1694     {
1695       char *lp;
1696
1697       /* Set lp to point at the first char after the last slash in the
1698          line or, if no slashes, at the first nonblank.  Then set cp to
1699          the first successive blank and terminate the string. */
1700       lp = strrchr (lb.buffer+2, '/');
1701       if (lp != NULL)
1702         lp += 1;
1703       else
1704         lp = skip_spaces (lb.buffer + 2);
1705       cp = skip_non_spaces (lp);
1706       *cp = '\0';
1707
1708       if (strlen (lp) > 0)
1709         {
1710           lang = get_language_from_interpreter (lp);
1711           if (lang != NULL && lang->function != NULL)
1712             {
1713               curfdp->lang = lang;
1714               parser = lang->function;
1715             }
1716         }
1717     }
1718
1719   /* We rewind here, even if inf may be a pipe.  We fail if the
1720      length of the first line is longer than the pipe block size,
1721      which is unlikely. */
1722   rewind (inf);
1723
1724   /* Else try to guess the language given the case insensitive file name. */
1725   if (parser == NULL)
1726     {
1727       lang = get_language_from_filename (curfdp->infname, false);
1728       if (lang != NULL && lang->function != NULL)
1729         {
1730           curfdp->lang = lang;
1731           parser = lang->function;
1732         }
1733     }
1734
1735   /* Else try Fortran or C. */
1736   if (parser == NULL)
1737     {
1738       node *old_last_node = last_node;
1739
1740       curfdp->lang = get_language_from_langname ("fortran");
1741       find_entries (inf);
1742
1743       if (old_last_node == last_node)
1744         /* No Fortran entries found.  Try C. */
1745         {
1746           /* We do not tag if rewind fails.
1747              Only the file name will be recorded in the tags file. */
1748           rewind (inf);
1749           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1750           find_entries (inf);
1751         }
1752       return;
1753     }
1754
1755   if (!no_line_directive
1756       && curfdp->lang != NULL && curfdp->lang->metasource)
1757     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1758        file, or anyway we parsed a file that is automatically generated from
1759        this one.  If this is the case, the bingo.c file contained #line
1760        directives that generated tags pointing to this file.  Let's delete
1761        them all before parsing this file, which is the real source. */
1762     {
1763       fdesc **fdpp = &fdhead;
1764       while (*fdpp != NULL)
1765         if (*fdpp != curfdp
1766             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1767           /* We found one of those!  We must delete both the file description
1768              and all tags referring to it. */
1769           {
1770             fdesc *badfdp = *fdpp;
1771
1772             /* Delete the tags referring to badfdp->taggedfname
1773                that were obtained from badfdp->infname. */
1774             invalidate_nodes (badfdp, &nodehead);
1775
1776             *fdpp = badfdp->next; /* remove the bad description from the list */
1777             free_fdesc (badfdp);
1778           }
1779         else
1780           fdpp = &(*fdpp)->next; /* advance the list pointer */
1781     }
1782
1783   assert (parser != NULL);
1784
1785   /* Generic initializations before reading from file. */
1786   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1787
1788   /* Generic initializations before parsing file with readline. */
1789   lineno = 0;                  /* reset global line number */
1790   charno = 0;                  /* reset global char number */
1791   linecharno = 0;              /* reset global char number of line start */
1792
1793   parser (inf);
1794
1795   regex_tag_multiline ();
1796 }
1797
1798 \f
1799 /*
1800  * Check whether an implicitly named tag should be created,
1801  * then call `pfnote'.
1802  * NAME is a string that is internally copied by this function.
1803  *
1804  * TAGS format specification
1805  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1806  * The following is explained in some more detail in etc/ETAGS.EBNF.
1807  *
1808  * make_tag creates tags with "implicit tag names" (unnamed tags)
1809  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1810  *  1. NAME does not contain any of the characters in NONAM;
1811  *  2. LINESTART contains name as either a rightmost, or rightmost but
1812  *     one character, substring;
1813  *  3. the character, if any, immediately before NAME in LINESTART must
1814  *     be a character in NONAM;
1815  *  4. the character, if any, immediately after NAME in LINESTART must
1816  *     also be a character in NONAM.
1817  *
1818  * The implementation uses the notinname() macro, which recognizes the
1819  * characters stored in the string `nonam'.
1820  * etags.el needs to use the same characters that are in NONAM.
1821  */
1822 static void
1823 make_tag (const char *name,     /* tag name, or NULL if unnamed */
1824           int namelen,          /* tag length */
1825           bool is_func,         /* tag is a function */
1826           char *linestart,      /* start of the line where tag is */
1827           int linelen,          /* length of the line where tag is */
1828           int lno,              /* line number */
1829           long int cno)         /* character number */
1830 {
1831   bool named = (name != NULL && namelen > 0);
1832   char *nname = NULL;
1833
1834   if (!CTAGS && named)          /* maybe set named to false */
1835     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1836        such that etags.el can guess a name from it. */
1837     {
1838       int i;
1839       register const char *cp = name;
1840
1841       for (i = 0; i < namelen; i++)
1842         if (notinname (*cp++))
1843           break;
1844       if (i == namelen)                         /* rule #1 */
1845         {
1846           cp = linestart + linelen - namelen;
1847           if (notinname (linestart[linelen-1]))
1848             cp -= 1;                            /* rule #4 */
1849           if (cp >= linestart                   /* rule #2 */
1850               && (cp == linestart
1851                   || notinname (cp[-1]))        /* rule #3 */
1852               && strneq (name, cp, namelen))    /* rule #2 */
1853             named = false;      /* use implicit tag name */
1854         }
1855     }
1856
1857   if (named)
1858     nname = savenstr (name, namelen);
1859
1860   pfnote (nname, is_func, linestart, linelen, lno, cno);
1861 }
1862
1863 /* Record a tag. */
1864 static void
1865 pfnote (char *name, bool is_func, char *linestart, int linelen, int lno,
1866         long int cno)
1867                                 /* tag name, or NULL if unnamed */
1868                                 /* tag is a function */
1869                                 /* start of the line where tag is */
1870                                 /* length of the line where tag is */
1871                                 /* line number */
1872                                 /* character number */
1873 {
1874   register node *np;
1875
1876   assert (name == NULL || name[0] != '\0');
1877   if (CTAGS && name == NULL)
1878     return;
1879
1880   np = xnew (1, node);
1881
1882   /* If ctags mode, change name "main" to M<thisfilename>. */
1883   if (CTAGS && !cxref_style && streq (name, "main"))
1884     {
1885       char *fp = strrchr (curfdp->taggedfname, '/');
1886       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1887       fp = strrchr (np->name, '.');
1888       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1889         fp[0] = '\0';
1890     }
1891   else
1892     np->name = name;
1893   np->valid = true;
1894   np->been_warned = false;
1895   np->fdp = curfdp;
1896   np->is_func = is_func;
1897   np->lno = lno;
1898   if (np->fdp->usecharno)
1899     /* Our char numbers are 0-base, because of C language tradition?
1900        ctags compatibility?  old versions compatibility?   I don't know.
1901        Anyway, since emacs's are 1-base we expect etags.el to take care
1902        of the difference.  If we wanted to have 1-based numbers, we would
1903        uncomment the +1 below. */
1904     np->cno = cno /* + 1 */ ;
1905   else
1906     np->cno = invalidcharno;
1907   np->left = np->right = NULL;
1908   if (CTAGS && !cxref_style)
1909     {
1910       if (strlen (linestart) < 50)
1911         np->regex = concat (linestart, "$", "");
1912       else
1913         np->regex = savenstr (linestart, 50);
1914     }
1915   else
1916     np->regex = savenstr (linestart, linelen);
1917
1918   add_node (np, &nodehead);
1919 }
1920
1921 /*
1922  * free_tree ()
1923  *      recurse on left children, iterate on right children.
1924  */
1925 static void
1926 free_tree (register node *np)
1927 {
1928   while (np)
1929     {
1930       register node *node_right = np->right;
1931       free_tree (np->left);
1932       free (np->name);
1933       free (np->regex);
1934       free (np);
1935       np = node_right;
1936     }
1937 }
1938
1939 /*
1940  * free_fdesc ()
1941  *      delete a file description
1942  */
1943 static void
1944 free_fdesc (register fdesc *fdp)
1945 {
1946   free (fdp->infname);
1947   free (fdp->infabsname);
1948   free (fdp->infabsdir);
1949   free (fdp->taggedfname);
1950   free (fdp->prop);
1951   free (fdp);
1952 }
1953
1954 /*
1955  * add_node ()
1956  *      Adds a node to the tree of nodes.  In etags mode, sort by file
1957  *      name.  In ctags mode, sort by tag name.  Make no attempt at
1958  *      balancing.
1959  *
1960  *      add_node is the only function allowed to add nodes, so it can
1961  *      maintain state.
1962  */
1963 static void
1964 add_node (node *np, node **cur_node_p)
1965 {
1966   register int dif;
1967   register node *cur_node = *cur_node_p;
1968
1969   if (cur_node == NULL)
1970     {
1971       *cur_node_p = np;
1972       last_node = np;
1973       return;
1974     }
1975
1976   if (!CTAGS)
1977     /* Etags Mode */
1978     {
1979       /* For each file name, tags are in a linked sublist on the right
1980          pointer.  The first tags of different files are a linked list
1981          on the left pointer.  last_node points to the end of the last
1982          used sublist. */
1983       if (last_node != NULL && last_node->fdp == np->fdp)
1984         {
1985           /* Let's use the same sublist as the last added node. */
1986           assert (last_node->right == NULL);
1987           last_node->right = np;
1988           last_node = np;
1989         }
1990       else if (cur_node->fdp == np->fdp)
1991         {
1992           /* Scanning the list we found the head of a sublist which is
1993              good for us.  Let's scan this sublist. */
1994           add_node (np, &cur_node->right);
1995         }
1996       else
1997         /* The head of this sublist is not good for us.  Let's try the
1998            next one. */
1999         add_node (np, &cur_node->left);
2000     } /* if ETAGS mode */
2001
2002   else
2003     {
2004       /* Ctags Mode */
2005       dif = strcmp (np->name, cur_node->name);
2006
2007       /*
2008        * If this tag name matches an existing one, then
2009        * do not add the node, but maybe print a warning.
2010        */
2011       if (no_duplicates && !dif)
2012         {
2013           if (np->fdp == cur_node->fdp)
2014             {
2015               if (!no_warnings)
2016                 {
2017                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2018                            np->fdp->infname, lineno, np->name);
2019                   fprintf (stderr, "Second entry ignored\n");
2020                 }
2021             }
2022           else if (!cur_node->been_warned && !no_warnings)
2023             {
2024               fprintf
2025                 (stderr,
2026                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2027                  np->fdp->infname, cur_node->fdp->infname, np->name);
2028               cur_node->been_warned = true;
2029             }
2030           return;
2031         }
2032
2033       /* Actually add the node */
2034       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2035     } /* if CTAGS mode */
2036 }
2037
2038 /*
2039  * invalidate_nodes ()
2040  *      Scan the node tree and invalidate all nodes pointing to the
2041  *      given file description (CTAGS case) or free them (ETAGS case).
2042  */
2043 static void
2044 invalidate_nodes (fdesc *badfdp, node **npp)
2045 {
2046   node *np = *npp;
2047
2048   if (np == NULL)
2049     return;
2050
2051   if (CTAGS)
2052     {
2053       if (np->left != NULL)
2054         invalidate_nodes (badfdp, &np->left);
2055       if (np->fdp == badfdp)
2056         np->valid = false;
2057       if (np->right != NULL)
2058         invalidate_nodes (badfdp, &np->right);
2059     }
2060   else
2061     {
2062       assert (np->fdp != NULL);
2063       if (np->fdp == badfdp)
2064         {
2065           *npp = np->left;      /* detach the sublist from the list */
2066           np->left = NULL;      /* isolate it */
2067           free_tree (np);       /* free it */
2068           invalidate_nodes (badfdp, npp);
2069         }
2070       else
2071         invalidate_nodes (badfdp, &np->left);
2072     }
2073 }
2074
2075 \f
2076 static int total_size_of_entries (node *);
2077 static int number_len (long) ATTRIBUTE_CONST;
2078
2079 /* Length of a non-negative number's decimal representation. */
2080 static int
2081 number_len (long int num)
2082 {
2083   int len = 1;
2084   while ((num /= 10) > 0)
2085     len += 1;
2086   return len;
2087 }
2088
2089 /*
2090  * Return total number of characters that put_entries will output for
2091  * the nodes in the linked list at the right of the specified node.
2092  * This count is irrelevant with etags.el since emacs 19.34 at least,
2093  * but is still supplied for backward compatibility.
2094  */
2095 static int
2096 total_size_of_entries (register node *np)
2097 {
2098   register int total = 0;
2099
2100   for (; np != NULL; np = np->right)
2101     if (np->valid)
2102       {
2103         total += strlen (np->regex) + 1;                /* pat\177 */
2104         if (np->name != NULL)
2105           total += strlen (np->name) + 1;               /* name\001 */
2106         total += number_len ((long) np->lno) + 1;       /* lno, */
2107         if (np->cno != invalidcharno)                   /* cno */
2108           total += number_len (np->cno);
2109         total += 1;                                     /* newline */
2110       }
2111
2112   return total;
2113 }
2114
2115 static void
2116 put_entries (register node *np)
2117 {
2118   register char *sp;
2119   static fdesc *fdp = NULL;
2120
2121   if (np == NULL)
2122     return;
2123
2124   /* Output subentries that precede this one */
2125   if (CTAGS)
2126     put_entries (np->left);
2127
2128   /* Output this entry */
2129   if (np->valid)
2130     {
2131       if (!CTAGS)
2132         {
2133           /* Etags mode */
2134           if (fdp != np->fdp)
2135             {
2136               fdp = np->fdp;
2137               fprintf (tagf, "\f\n%s,%d\n",
2138                        fdp->taggedfname, total_size_of_entries (np));
2139               fdp->written = true;
2140             }
2141           fputs (np->regex, tagf);
2142           fputc ('\177', tagf);
2143           if (np->name != NULL)
2144             {
2145               fputs (np->name, tagf);
2146               fputc ('\001', tagf);
2147             }
2148           fprintf (tagf, "%d,", np->lno);
2149           if (np->cno != invalidcharno)
2150             fprintf (tagf, "%ld", np->cno);
2151           fputs ("\n", tagf);
2152         }
2153       else
2154         {
2155           /* Ctags mode */
2156           if (np->name == NULL)
2157             error ("internal error: NULL name in ctags mode.");
2158
2159           if (cxref_style)
2160             {
2161               if (vgrind_style)
2162                 fprintf (stdout, "%s %s %d\n",
2163                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2164               else
2165                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2166                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2167             }
2168           else
2169             {
2170               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2171
2172               if (np->is_func)
2173                 {               /* function or #define macro with args */
2174                   putc (searchar, tagf);
2175                   putc ('^', tagf);
2176
2177                   for (sp = np->regex; *sp; sp++)
2178                     {
2179                       if (*sp == '\\' || *sp == searchar)
2180                         putc ('\\', tagf);
2181                       putc (*sp, tagf);
2182                     }
2183                   putc (searchar, tagf);
2184                 }
2185               else
2186                 {               /* anything else; text pattern inadequate */
2187                   fprintf (tagf, "%d", np->lno);
2188                 }
2189               putc ('\n', tagf);
2190             }
2191         }
2192     } /* if this node contains a valid tag */
2193
2194   /* Output subentries that follow this one */
2195   put_entries (np->right);
2196   if (!CTAGS)
2197     put_entries (np->left);
2198 }
2199
2200 \f
2201 /* C extensions. */
2202 #define C_EXT   0x00fff         /* C extensions */
2203 #define C_PLAIN 0x00000         /* C */
2204 #define C_PLPL  0x00001         /* C++ */
2205 #define C_STAR  0x00003         /* C* */
2206 #define C_JAVA  0x00005         /* JAVA */
2207 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2208 #define YACC    0x10000         /* yacc file */
2209
2210 /*
2211  * The C symbol tables.
2212  */
2213 enum sym_type
2214 {
2215   st_none,
2216   st_C_objprot, st_C_objimpl, st_C_objend,
2217   st_C_gnumacro,
2218   st_C_ignore, st_C_attribute,
2219   st_C_javastruct,
2220   st_C_operator,
2221   st_C_class, st_C_template,
2222   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2223 };
2224
2225 /* Feed stuff between (but not including) %[ and %] lines to:
2226      gperf -m 5
2227 %[
2228 %compare-strncmp
2229 %enum
2230 %struct-type
2231 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2232 %%
2233 if,             0,                      st_C_ignore
2234 for,            0,                      st_C_ignore
2235 while,          0,                      st_C_ignore
2236 switch,         0,                      st_C_ignore
2237 return,         0,                      st_C_ignore
2238 __attribute__,  0,                      st_C_attribute
2239 GTY,            0,                      st_C_attribute
2240 @interface,     0,                      st_C_objprot
2241 @protocol,      0,                      st_C_objprot
2242 @implementation,0,                      st_C_objimpl
2243 @end,           0,                      st_C_objend
2244 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2245 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2246 friend,         C_PLPL,                 st_C_ignore
2247 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2248 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2249 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2250 class,          0,                      st_C_class
2251 namespace,      C_PLPL,                 st_C_struct
2252 domain,         C_STAR,                 st_C_struct
2253 union,          0,                      st_C_struct
2254 struct,         0,                      st_C_struct
2255 extern,         0,                      st_C_extern
2256 enum,           0,                      st_C_enum
2257 typedef,        0,                      st_C_typedef
2258 define,         0,                      st_C_define
2259 undef,          0,                      st_C_define
2260 operator,       C_PLPL,                 st_C_operator
2261 template,       0,                      st_C_template
2262 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2263 DEFUN,          0,                      st_C_gnumacro
2264 SYSCALL,        0,                      st_C_gnumacro
2265 ENTRY,          0,                      st_C_gnumacro
2266 PSEUDO,         0,                      st_C_gnumacro
2267 # These are defined inside C functions, so currently they are not met.
2268 # EXFUN used in glibc, DEFVAR_* in emacs.
2269 #EXFUN,         0,                      st_C_gnumacro
2270 #DEFVAR_,       0,                      st_C_gnumacro
2271 %]
2272 and replace lines between %< and %> with its output, then:
2273  - remove the #if characterset check
2274  - make in_word_set static and not inline. */
2275 /*%<*/
2276 /* C code produced by gperf version 3.0.1 */
2277 /* Command-line: gperf -m 5  */
2278 /* Computed positions: -k'2-3' */
2279
2280 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2281 /* maximum key range = 33, duplicates = 0 */
2282
2283 static int
2284 hash (const char *str, int len)
2285 {
2286   static char const asso_values[] =
2287     {
2288       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2289       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2290       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2291       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2292       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2293       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2294       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2295       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2296       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2297       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2298       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2299        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2300        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2301       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2302       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2303       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2304       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2305       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2306       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2307       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2308       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2309       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2310       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2311       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2312       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2313       35, 35, 35, 35, 35, 35
2314     };
2315   int hval = len;
2316
2317   switch (hval)
2318     {
2319       default:
2320         hval += asso_values[(unsigned char) str[2]];
2321       /*FALLTHROUGH*/
2322       case 2:
2323         hval += asso_values[(unsigned char) str[1]];
2324         break;
2325     }
2326   return hval;
2327 }
2328
2329 static struct C_stab_entry *
2330 in_word_set (register const char *str, register unsigned int len)
2331 {
2332   enum
2333     {
2334       TOTAL_KEYWORDS = 33,
2335       MIN_WORD_LENGTH = 2,
2336       MAX_WORD_LENGTH = 15,
2337       MIN_HASH_VALUE = 2,
2338       MAX_HASH_VALUE = 34
2339     };
2340
2341   static struct C_stab_entry wordlist[] =
2342     {
2343       {""}, {""},
2344       {"if",            0,                      st_C_ignore},
2345       {"GTY",           0,                      st_C_attribute},
2346       {"@end",          0,                      st_C_objend},
2347       {"union",         0,                      st_C_struct},
2348       {"define",                0,                      st_C_define},
2349       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2350       {"template",      0,                      st_C_template},
2351       {"operator",      C_PLPL,                 st_C_operator},
2352       {"@interface",    0,                      st_C_objprot},
2353       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2354       {"friend",                C_PLPL,                 st_C_ignore},
2355       {"typedef",       0,                      st_C_typedef},
2356       {"return",                0,                      st_C_ignore},
2357       {"@implementation",0,                     st_C_objimpl},
2358       {"@protocol",     0,                      st_C_objprot},
2359       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2360       {"extern",                0,                      st_C_extern},
2361       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2362       {"struct",                0,                      st_C_struct},
2363       {"domain",                C_STAR,                 st_C_struct},
2364       {"switch",                0,                      st_C_ignore},
2365       {"enum",          0,                      st_C_enum},
2366       {"for",           0,                      st_C_ignore},
2367       {"namespace",     C_PLPL,                 st_C_struct},
2368       {"class",         0,                      st_C_class},
2369       {"while",         0,                      st_C_ignore},
2370       {"undef",         0,                      st_C_define},
2371       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2372       {"__attribute__", 0,                      st_C_attribute},
2373       {"SYSCALL",       0,                      st_C_gnumacro},
2374       {"ENTRY",         0,                      st_C_gnumacro},
2375       {"PSEUDO",                0,                      st_C_gnumacro},
2376       {"DEFUN",         0,                      st_C_gnumacro}
2377     };
2378
2379   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2380     {
2381       int key = hash (str, len);
2382
2383       if (key <= MAX_HASH_VALUE && key >= 0)
2384         {
2385           const char *s = wordlist[key].name;
2386
2387           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2388             return &wordlist[key];
2389         }
2390     }
2391   return 0;
2392 }
2393 /*%>*/
2394
2395 static enum sym_type
2396 C_symtype (char *str, int len, int c_ext)
2397 {
2398   register struct C_stab_entry *se = in_word_set (str, len);
2399
2400   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2401     return st_none;
2402   return se->type;
2403 }
2404
2405 \f
2406 /*
2407  * Ignoring __attribute__ ((list))
2408  */
2409 static bool inattribute;        /* looking at an __attribute__ construct */
2410
2411 /*
2412  * C functions and variables are recognized using a simple
2413  * finite automaton.  fvdef is its state variable.
2414  */
2415 static enum
2416 {
2417   fvnone,                       /* nothing seen */
2418   fdefunkey,                    /* Emacs DEFUN keyword seen */
2419   fdefunname,                   /* Emacs DEFUN name seen */
2420   foperator,                    /* func: operator keyword seen (cplpl) */
2421   fvnameseen,                   /* function or variable name seen */
2422   fstartlist,                   /* func: just after open parenthesis */
2423   finlist,                      /* func: in parameter list */
2424   flistseen,                    /* func: after parameter list */
2425   fignore,                      /* func: before open brace */
2426   vignore                       /* var-like: ignore until ';' */
2427 } fvdef;
2428
2429 static bool fvextern;           /* func or var: extern keyword seen; */
2430
2431 /*
2432  * typedefs are recognized using a simple finite automaton.
2433  * typdef is its state variable.
2434  */
2435 static enum
2436 {
2437   tnone,                        /* nothing seen */
2438   tkeyseen,                     /* typedef keyword seen */
2439   ttypeseen,                    /* defined type seen */
2440   tinbody,                      /* inside typedef body */
2441   tend,                         /* just before typedef tag */
2442   tignore                       /* junk after typedef tag */
2443 } typdef;
2444
2445 /*
2446  * struct-like structures (enum, struct and union) are recognized
2447  * using another simple finite automaton.  `structdef' is its state
2448  * variable.
2449  */
2450 static enum
2451 {
2452   snone,                        /* nothing seen yet,
2453                                    or in struct body if bracelev > 0 */
2454   skeyseen,                     /* struct-like keyword seen */
2455   stagseen,                     /* struct-like tag seen */
2456   scolonseen                    /* colon seen after struct-like tag */
2457 } structdef;
2458
2459 /*
2460  * When objdef is different from onone, objtag is the name of the class.
2461  */
2462 static const char *objtag = "<uninited>";
2463
2464 /*
2465  * Yet another little state machine to deal with preprocessor lines.
2466  */
2467 static enum
2468 {
2469   dnone,                        /* nothing seen */
2470   dsharpseen,                   /* '#' seen as first char on line */
2471   ddefineseen,                  /* '#' and 'define' seen */
2472   dignorerest                   /* ignore rest of line */
2473 } definedef;
2474
2475 /*
2476  * State machine for Objective C protocols and implementations.
2477  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2478  */
2479 static enum
2480 {
2481   onone,                        /* nothing seen */
2482   oprotocol,                    /* @interface or @protocol seen */
2483   oimplementation,              /* @implementations seen */
2484   otagseen,                     /* class name seen */
2485   oparenseen,                   /* parenthesis before category seen */
2486   ocatseen,                     /* category name seen */
2487   oinbody,                      /* in @implementation body */
2488   omethodsign,                  /* in @implementation body, after +/- */
2489   omethodtag,                   /* after method name */
2490   omethodcolon,                 /* after method colon */
2491   omethodparm,                  /* after method parameter */
2492   oignore                       /* wait for @end */
2493 } objdef;
2494
2495
2496 /*
2497  * Use this structure to keep info about the token read, and how it
2498  * should be tagged.  Used by the make_C_tag function to build a tag.
2499  */
2500 static struct tok
2501 {
2502   char *line;                   /* string containing the token */
2503   int offset;                   /* where the token starts in LINE */
2504   int length;                   /* token length */
2505   /*
2506     The previous members can be used to pass strings around for generic
2507     purposes.  The following ones specifically refer to creating tags.  In this
2508     case the token contained here is the pattern that will be used to create a
2509     tag.
2510   */
2511   bool valid;                   /* do not create a tag; the token should be
2512                                    invalidated whenever a state machine is
2513                                    reset prematurely */
2514   bool named;                   /* create a named tag */
2515   int lineno;                   /* source line number of tag */
2516   long linepos;                 /* source char number of tag */
2517 } token;                        /* latest token read */
2518
2519 /*
2520  * Variables and functions for dealing with nested structures.
2521  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2522  */
2523 static void pushclass_above (int, char *, int);
2524 static void popclass_above (int);
2525 static void write_classname (linebuffer *, const char *qualifier);
2526
2527 static struct {
2528   char **cname;                 /* nested class names */
2529   int *bracelev;                /* nested class brace level */
2530   int nl;                       /* class nesting level (elements used) */
2531   int size;                     /* length of the array */
2532 } cstack;                       /* stack for nested declaration tags */
2533 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2534 #define nestlev         (cstack.nl)
2535 /* After struct keyword or in struct body, not inside a nested function. */
2536 #define instruct        (structdef == snone && nestlev > 0                      \
2537                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2538
2539 static void
2540 pushclass_above (int bracelev, char *str, int len)
2541 {
2542   int nl;
2543
2544   popclass_above (bracelev);
2545   nl = cstack.nl;
2546   if (nl >= cstack.size)
2547     {
2548       int size = cstack.size *= 2;
2549       xrnew (cstack.cname, size, char *);
2550       xrnew (cstack.bracelev, size, int);
2551     }
2552   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2553   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2554   cstack.bracelev[nl] = bracelev;
2555   cstack.nl = nl + 1;
2556 }
2557
2558 static void
2559 popclass_above (int bracelev)
2560 {
2561   int nl;
2562
2563   for (nl = cstack.nl - 1;
2564        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2565        nl--)
2566     {
2567       free (cstack.cname[nl]);
2568       cstack.nl = nl;
2569     }
2570 }
2571
2572 static void
2573 write_classname (linebuffer *cn, const char *qualifier)
2574 {
2575   int i, len;
2576   int qlen = strlen (qualifier);
2577
2578   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2579     {
2580       len = 0;
2581       cn->len = 0;
2582       cn->buffer[0] = '\0';
2583     }
2584   else
2585     {
2586       len = strlen (cstack.cname[0]);
2587       linebuffer_setlen (cn, len);
2588       strcpy (cn->buffer, cstack.cname[0]);
2589     }
2590   for (i = 1; i < cstack.nl; i++)
2591     {
2592       char *s = cstack.cname[i];
2593       if (s == NULL)
2594         continue;
2595       linebuffer_setlen (cn, len + qlen + strlen (s));
2596       len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2597     }
2598 }
2599
2600 \f
2601 static bool consider_token (char *, int, int, int *, int, int, bool *);
2602 static void make_C_tag (bool);
2603
2604 /*
2605  * consider_token ()
2606  *      checks to see if the current token is at the start of a
2607  *      function or variable, or corresponds to a typedef, or
2608  *      is a struct/union/enum tag, or #define, or an enum constant.
2609  *
2610  *      *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2611  *      with args.  C_EXTP points to which language we are looking at.
2612  *
2613  * Globals
2614  *      fvdef                   IN OUT
2615  *      structdef               IN OUT
2616  *      definedef               IN OUT
2617  *      typdef                  IN OUT
2618  *      objdef                  IN OUT
2619  */
2620
2621 static bool
2622 consider_token (char *str, int len, int c, int *c_extp,
2623                 int bracelev, int parlev, bool *is_func_or_var)
2624                                 /* IN: token pointer */
2625                                 /* IN: token length */
2626                                 /* IN: first char after the token */
2627                                 /* IN, OUT: C extensions mask */
2628                                 /* IN: brace level */
2629                                 /* IN: parenthesis level */
2630                                 /* OUT: function or variable found */
2631 {
2632   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2633      structtype is the type of the preceding struct-like keyword, and
2634      structbracelev is the brace level where it has been seen. */
2635   static enum sym_type structtype;
2636   static int structbracelev;
2637   static enum sym_type toktype;
2638
2639
2640   toktype = C_symtype (str, len, *c_extp);
2641
2642   /*
2643    * Skip __attribute__
2644    */
2645   if (toktype == st_C_attribute)
2646     {
2647       inattribute = true;
2648       return false;
2649      }
2650
2651    /*
2652     * Advance the definedef state machine.
2653     */
2654    switch (definedef)
2655      {
2656      case dnone:
2657        /* We're not on a preprocessor line. */
2658        if (toktype == st_C_gnumacro)
2659          {
2660            fvdef = fdefunkey;
2661            return false;
2662          }
2663        break;
2664      case dsharpseen:
2665        if (toktype == st_C_define)
2666          {
2667            definedef = ddefineseen;
2668          }
2669        else
2670          {
2671            definedef = dignorerest;
2672          }
2673        return false;
2674      case ddefineseen:
2675        /*
2676         * Make a tag for any macro, unless it is a constant
2677         * and constantypedefs is false.
2678         */
2679        definedef = dignorerest;
2680        *is_func_or_var = (c == '(');
2681        if (!*is_func_or_var && !constantypedefs)
2682          return false;
2683        else
2684          return true;
2685      case dignorerest:
2686        return false;
2687      default:
2688        error ("internal error: definedef value.");
2689      }
2690
2691    /*
2692     * Now typedefs
2693     */
2694    switch (typdef)
2695      {
2696      case tnone:
2697        if (toktype == st_C_typedef)
2698          {
2699            if (typedefs)
2700              typdef = tkeyseen;
2701            fvextern = false;
2702            fvdef = fvnone;
2703            return false;
2704          }
2705        break;
2706      case tkeyseen:
2707        switch (toktype)
2708          {
2709          case st_none:
2710          case st_C_class:
2711          case st_C_struct:
2712          case st_C_enum:
2713            typdef = ttypeseen;
2714          }
2715        break;
2716      case ttypeseen:
2717        if (structdef == snone && fvdef == fvnone)
2718          {
2719            fvdef = fvnameseen;
2720            return true;
2721          }
2722        break;
2723      case tend:
2724        switch (toktype)
2725          {
2726          case st_C_class:
2727          case st_C_struct:
2728          case st_C_enum:
2729            return false;
2730          }
2731        return true;
2732      }
2733
2734    switch (toktype)
2735      {
2736      case st_C_javastruct:
2737        if (structdef == stagseen)
2738          structdef = scolonseen;
2739        return false;
2740      case st_C_template:
2741      case st_C_class:
2742        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2743            && bracelev == 0
2744            && definedef == dnone && structdef == snone
2745            && typdef == tnone && fvdef == fvnone)
2746          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2747        if (toktype == st_C_template)
2748          break;
2749        /* FALLTHRU */
2750      case st_C_struct:
2751      case st_C_enum:
2752        if (parlev == 0
2753            && fvdef != vignore
2754            && (typdef == tkeyseen
2755                || (typedefs_or_cplusplus && structdef == snone)))
2756          {
2757            structdef = skeyseen;
2758            structtype = toktype;
2759            structbracelev = bracelev;
2760            if (fvdef == fvnameseen)
2761              fvdef = fvnone;
2762          }
2763        return false;
2764      }
2765
2766    if (structdef == skeyseen)
2767      {
2768        structdef = stagseen;
2769        return true;
2770      }
2771
2772    if (typdef != tnone)
2773      definedef = dnone;
2774
2775    /* Detect Objective C constructs. */
2776    switch (objdef)
2777      {
2778      case onone:
2779        switch (toktype)
2780          {
2781          case st_C_objprot:
2782            objdef = oprotocol;
2783            return false;
2784          case st_C_objimpl:
2785            objdef = oimplementation;
2786            return false;
2787          }
2788        break;
2789      case oimplementation:
2790        /* Save the class tag for functions or variables defined inside. */
2791        objtag = savenstr (str, len);
2792        objdef = oinbody;
2793        return false;
2794      case oprotocol:
2795        /* Save the class tag for categories. */
2796        objtag = savenstr (str, len);
2797        objdef = otagseen;
2798        *is_func_or_var = true;
2799        return true;
2800      case oparenseen:
2801        objdef = ocatseen;
2802        *is_func_or_var = true;
2803        return true;
2804      case oinbody:
2805        break;
2806      case omethodsign:
2807        if (parlev == 0)
2808          {
2809            fvdef = fvnone;
2810            objdef = omethodtag;
2811            linebuffer_setlen (&token_name, len);
2812            memcpy (token_name.buffer, str, len);
2813            token_name.buffer[len] = '\0';
2814            return true;
2815          }
2816        return false;
2817      case omethodcolon:
2818        if (parlev == 0)
2819          objdef = omethodparm;
2820        return false;
2821      case omethodparm:
2822        if (parlev == 0)
2823          {
2824            int oldlen = token_name.len;
2825            fvdef = fvnone;
2826            objdef = omethodtag;
2827            linebuffer_setlen (&token_name, oldlen + len);
2828            memcpy (token_name.buffer + oldlen, str, len);
2829            token_name.buffer[oldlen + len] = '\0';
2830            return true;
2831          }
2832        return false;
2833      case oignore:
2834        if (toktype == st_C_objend)
2835          {
2836            /* Memory leakage here: the string pointed by objtag is
2837               never released, because many tests would be needed to
2838               avoid breaking on incorrect input code.  The amount of
2839               memory leaked here is the sum of the lengths of the
2840               class tags.
2841            free (objtag); */
2842            objdef = onone;
2843          }
2844        return false;
2845      }
2846
2847    /* A function, variable or enum constant? */
2848    switch (toktype)
2849      {
2850      case st_C_extern:
2851        fvextern = true;
2852        switch  (fvdef)
2853          {
2854          case finlist:
2855          case flistseen:
2856          case fignore:
2857          case vignore:
2858            break;
2859          default:
2860            fvdef = fvnone;
2861          }
2862        return false;
2863      case st_C_ignore:
2864        fvextern = false;
2865        fvdef = vignore;
2866        return false;
2867      case st_C_operator:
2868        fvdef = foperator;
2869        *is_func_or_var = true;
2870        return true;
2871      case st_none:
2872        if (constantypedefs
2873            && structdef == snone
2874            && structtype == st_C_enum && bracelev > structbracelev)
2875          return true;           /* enum constant */
2876        switch (fvdef)
2877          {
2878          case fdefunkey:
2879            if (bracelev > 0)
2880              break;
2881            fvdef = fdefunname;  /* GNU macro */
2882            *is_func_or_var = true;
2883            return true;
2884          case fvnone:
2885            switch (typdef)
2886              {
2887              case ttypeseen:
2888                return false;
2889              case tnone:
2890                if ((strneq (str, "asm", 3) && endtoken (str[3]))
2891                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2892                  {
2893                    fvdef = vignore;
2894                    return false;
2895                  }
2896                break;
2897              }
2898           /* FALLTHRU */
2899           case fvnameseen:
2900           if (len >= 10 && strneq (str+len-10, "::operator", 10))
2901             {
2902               if (*c_extp & C_AUTO) /* automatic detection of C++ */
2903                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2904               fvdef = foperator;
2905               *is_func_or_var = true;
2906               return true;
2907             }
2908           if (bracelev > 0 && !instruct)
2909             break;
2910           fvdef = fvnameseen;   /* function or variable */
2911           *is_func_or_var = true;
2912           return true;
2913         }
2914       break;
2915     }
2916
2917   return false;
2918 }
2919
2920 \f
2921 /*
2922  * C_entries often keeps pointers to tokens or lines which are older than
2923  * the line currently read.  By keeping two line buffers, and switching
2924  * them at end of line, it is possible to use those pointers.
2925  */
2926 static struct
2927 {
2928   long linepos;
2929   linebuffer lb;
2930 } lbs[2];
2931
2932 #define current_lb_is_new (newndx == curndx)
2933 #define switch_line_buffers() (curndx = 1 - curndx)
2934
2935 #define curlb (lbs[curndx].lb)
2936 #define newlb (lbs[newndx].lb)
2937 #define curlinepos (lbs[curndx].linepos)
2938 #define newlinepos (lbs[newndx].linepos)
2939
2940 #define plainc ((c_ext & C_EXT) == C_PLAIN)
2941 #define cplpl (c_ext & C_PLPL)
2942 #define cjava ((c_ext & C_JAVA) == C_JAVA)
2943
2944 #define CNL_SAVE_DEFINEDEF()                                            \
2945 do {                                                                    \
2946   curlinepos = charno;                                                  \
2947   readline (&curlb, inf);                                               \
2948   lp = curlb.buffer;                                                    \
2949   quotednl = false;                                                     \
2950   newndx = curndx;                                                      \
2951 } while (0)
2952
2953 #define CNL()                                                           \
2954 do {                                                                    \
2955   CNL_SAVE_DEFINEDEF();                                                 \
2956   if (savetoken.valid)                                                  \
2957     {                                                                   \
2958       token = savetoken;                                                \
2959       savetoken.valid = false;                                          \
2960     }                                                                   \
2961   definedef = dnone;                                                    \
2962 } while (0)
2963
2964
2965 static void
2966 make_C_tag (bool isfun)
2967 {
2968   /* This function is never called when token.valid is false, but
2969      we must protect against invalid input or internal errors. */
2970   if (token.valid)
2971     make_tag (token_name.buffer, token_name.len, isfun, token.line,
2972               token.offset+token.length+1, token.lineno, token.linepos);
2973   else if (DEBUG)
2974     {                             /* this branch is optimized away if !DEBUG */
2975       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
2976                 token_name.len + 17, isfun, token.line,
2977                 token.offset+token.length+1, token.lineno, token.linepos);
2978       error ("INVALID TOKEN");
2979     }
2980
2981   token.valid = false;
2982 }
2983
2984
2985 /*
2986  * C_entries ()
2987  *      This routine finds functions, variables, typedefs,
2988  *      #define's, enum constants and struct/union/enum definitions in
2989  *      C syntax and adds them to the list.
2990  */
2991 static void
2992 C_entries (int c_ext, FILE *inf)
2993                                 /* extension of C */
2994                                 /* input file */
2995 {
2996   register char c;              /* latest char read; '\0' for end of line */
2997   register char *lp;            /* pointer one beyond the character `c' */
2998   int curndx, newndx;           /* indices for current and new lb */
2999   register int tokoff;          /* offset in line of start of current token */
3000   register int toklen;          /* length of current token */
3001   const char *qualifier;        /* string used to qualify names */
3002   int qlen;                     /* length of qualifier */
3003   int bracelev;                 /* current brace level */
3004   int bracketlev;               /* current bracket level */
3005   int parlev;                   /* current parenthesis level */
3006   int attrparlev;               /* __attribute__ parenthesis level */
3007   int templatelev;              /* current template level */
3008   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3009   bool incomm, inquote, inchar, quotednl, midtoken;
3010   bool yacc_rules;              /* in the rules part of a yacc file */
3011   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3012
3013
3014   linebuffer_init (&lbs[0].lb);
3015   linebuffer_init (&lbs[1].lb);
3016   if (cstack.size == 0)
3017     {
3018       cstack.size = (DEBUG) ? 1 : 4;
3019       cstack.nl = 0;
3020       cstack.cname = xnew (cstack.size, char *);
3021       cstack.bracelev = xnew (cstack.size, int);
3022     }
3023
3024   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3025   curndx = newndx = 0;
3026   lp = curlb.buffer;
3027   *lp = 0;
3028
3029   fvdef = fvnone; fvextern = false; typdef = tnone;
3030   structdef = snone; definedef = dnone; objdef = onone;
3031   yacc_rules = false;
3032   midtoken = inquote = inchar = incomm = quotednl = false;
3033   token.valid = savetoken.valid = false;
3034   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3035   if (cjava)
3036     { qualifier = "."; qlen = 1; }
3037   else
3038     { qualifier = "::"; qlen = 2; }
3039
3040
3041   while (!feof (inf))
3042     {
3043       c = *lp++;
3044       if (c == '\\')
3045         {
3046           /* If we are at the end of the line, the next character is a
3047              '\0'; do not skip it, because it is what tells us
3048              to read the next line.  */
3049           if (*lp == '\0')
3050             {
3051               quotednl = true;
3052               continue;
3053             }
3054           lp++;
3055           c = ' ';
3056         }
3057       else if (incomm)
3058         {
3059           switch (c)
3060             {
3061             case '*':
3062               if (*lp == '/')
3063                 {
3064                   c = *lp++;
3065                   incomm = false;
3066                 }
3067               break;
3068             case '\0':
3069               /* Newlines inside comments do not end macro definitions in
3070                  traditional cpp. */
3071               CNL_SAVE_DEFINEDEF ();
3072               break;
3073             }
3074           continue;
3075         }
3076       else if (inquote)
3077         {
3078           switch (c)
3079             {
3080             case '"':
3081               inquote = false;
3082               break;
3083             case '\0':
3084               /* Newlines inside strings do not end macro definitions
3085                  in traditional cpp, even though compilers don't
3086                  usually accept them. */
3087               CNL_SAVE_DEFINEDEF ();
3088               break;
3089             }
3090           continue;
3091         }
3092       else if (inchar)
3093         {
3094           switch (c)
3095             {
3096             case '\0':
3097               /* Hmmm, something went wrong. */
3098               CNL ();
3099               /* FALLTHRU */
3100             case '\'':
3101               inchar = false;
3102               break;
3103             }
3104           continue;
3105         }
3106       else switch (c)
3107         {
3108         case '"':
3109           inquote = true;
3110           if (bracketlev > 0)
3111             continue;
3112           if (inattribute)
3113             break;
3114           switch (fvdef)
3115             {
3116             case fdefunkey:
3117             case fstartlist:
3118             case finlist:
3119             case fignore:
3120             case vignore:
3121               break;
3122             default:
3123               fvextern = false;
3124               fvdef = fvnone;
3125             }
3126           continue;
3127         case '\'':
3128           inchar = true;
3129           if (bracketlev > 0)
3130             continue;
3131           if (inattribute)
3132             break;
3133           if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3134             {
3135               fvextern = false;
3136               fvdef = fvnone;
3137             }
3138           continue;
3139         case '/':
3140           if (*lp == '*')
3141             {
3142               incomm = true;
3143               lp++;
3144               c = ' ';
3145               if (bracketlev > 0)
3146                 continue;
3147             }
3148           else if (/* cplpl && */ *lp == '/')
3149             {
3150               c = '\0';
3151             }
3152           break;
3153         case '%':
3154           if ((c_ext & YACC) && *lp == '%')
3155             {
3156               /* Entering or exiting rules section in yacc file. */
3157               lp++;
3158               definedef = dnone; fvdef = fvnone; fvextern = false;
3159               typdef = tnone; structdef = snone;
3160               midtoken = inquote = inchar = incomm = quotednl = false;
3161               bracelev = 0;
3162               yacc_rules = !yacc_rules;
3163               continue;
3164             }
3165           else
3166             break;
3167         case '#':
3168           if (definedef == dnone)
3169             {
3170               char *cp;
3171               bool cpptoken = true;
3172
3173               /* Look back on this line.  If all blanks, or nonblanks
3174                  followed by an end of comment, this is a preprocessor
3175                  token. */
3176               for (cp = newlb.buffer; cp < lp-1; cp++)
3177                 if (!iswhite (*cp))
3178                   {
3179                     if (*cp == '*' && cp[1] == '/')
3180                       {
3181                         cp++;
3182                         cpptoken = true;
3183                       }
3184                     else
3185                       cpptoken = false;
3186                   }
3187               if (cpptoken)
3188                 definedef = dsharpseen;
3189             } /* if (definedef == dnone) */
3190           continue;
3191         case '[':
3192           bracketlev++;
3193           continue;
3194         default:
3195           if (bracketlev > 0)
3196             {
3197               if (c == ']')
3198                 --bracketlev;
3199               else if (c == '\0')
3200                 CNL_SAVE_DEFINEDEF ();
3201               continue;
3202             }
3203           break;
3204         } /* switch (c) */
3205
3206
3207       /* Consider token only if some involved conditions are satisfied. */
3208       if (typdef != tignore
3209           && definedef != dignorerest
3210           && fvdef != finlist
3211           && templatelev == 0
3212           && (definedef != dnone
3213               || structdef != scolonseen)
3214           && !inattribute)
3215         {
3216           if (midtoken)
3217             {
3218               if (endtoken (c))
3219                 {
3220                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3221                     /* This handles :: in the middle,
3222                        but not at the beginning of an identifier.
3223                        Also, space-separated :: is not recognized. */
3224                     {
3225                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3226                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3227                       lp += 2;
3228                       toklen += 2;
3229                       c = lp[-1];
3230                       goto still_in_token;
3231                     }
3232                   else
3233                     {
3234                       bool funorvar = false;
3235
3236                       if (yacc_rules
3237                           || consider_token (newlb.buffer + tokoff, toklen, c,
3238                                              &c_ext, bracelev, parlev,
3239                                              &funorvar))
3240                         {
3241                           if (fvdef == foperator)
3242                             {
3243                               char *oldlp = lp;
3244                               lp = skip_spaces (lp-1);
3245                               if (*lp != '\0')
3246                                 lp += 1;
3247                               while (*lp != '\0'
3248                                      && !iswhite (*lp) && *lp != '(')
3249                                 lp += 1;
3250                               c = *lp++;
3251                               toklen += lp - oldlp;
3252                             }
3253                           token.named = false;
3254                           if (!plainc
3255                               && nestlev > 0 && definedef == dnone)
3256                             /* in struct body */
3257                             {
3258                               int len;
3259                               write_classname (&token_name, qualifier);
3260                               len = token_name.len;
3261                               linebuffer_setlen (&token_name, len+qlen+toklen);
3262                               sprintf (token_name.buffer + len, "%s%.*s",
3263                                        qualifier, toklen, newlb.buffer + tokoff);
3264                               token.named = true;
3265                             }
3266                           else if (objdef == ocatseen)
3267                             /* Objective C category */
3268                             {
3269                               int len = strlen (objtag) + 2 + toklen;
3270                               linebuffer_setlen (&token_name, len);
3271                               sprintf (token_name.buffer, "%s(%.*s)",
3272                                        objtag, toklen, newlb.buffer + tokoff);
3273                               token.named = true;
3274                             }
3275                           else if (objdef == omethodtag
3276                                    || objdef == omethodparm)
3277                             /* Objective C method */
3278                             {
3279                               token.named = true;
3280                             }
3281                           else if (fvdef == fdefunname)
3282                             /* GNU DEFUN and similar macros */
3283                             {
3284                               bool defun = (newlb.buffer[tokoff] == 'F');
3285                               int off = tokoff;
3286                               int len = toklen;
3287
3288                               /* Rewrite the tag so that emacs lisp DEFUNs
3289                                  can be found by their elisp name */
3290                               if (defun)
3291                                 {
3292                                   off += 1;
3293                                   len -= 1;
3294                                 }
3295                               linebuffer_setlen (&token_name, len);
3296                               memcpy (token_name.buffer,
3297                                       newlb.buffer + off, len);
3298                               token_name.buffer[len] = '\0';
3299                               if (defun)
3300                                 while (--len >= 0)
3301                                   if (token_name.buffer[len] == '_')
3302                                     token_name.buffer[len] = '-';
3303                               token.named = defun;
3304                             }
3305                           else
3306                             {
3307                               linebuffer_setlen (&token_name, toklen);
3308                               memcpy (token_name.buffer,
3309                                       newlb.buffer + tokoff, toklen);
3310                               token_name.buffer[toklen] = '\0';
3311                               /* Name macros and members. */
3312                               token.named = (structdef == stagseen
3313                                              || typdef == ttypeseen
3314                                              || typdef == tend
3315                                              || (funorvar
3316                                                  && definedef == dignorerest)
3317                                              || (funorvar
3318                                                  && definedef == dnone
3319                                                  && structdef == snone
3320                                                  && bracelev > 0));
3321                             }
3322                           token.lineno = lineno;
3323                           token.offset = tokoff;
3324                           token.length = toklen;
3325                           token.line = newlb.buffer;
3326                           token.linepos = newlinepos;
3327                           token.valid = true;
3328
3329                           if (definedef == dnone
3330                               && (fvdef == fvnameseen
3331                                   || fvdef == foperator
3332                                   || structdef == stagseen
3333                                   || typdef == tend
3334                                   || typdef == ttypeseen
3335                                   || objdef != onone))
3336                             {
3337                               if (current_lb_is_new)
3338                                 switch_line_buffers ();
3339                             }
3340                           else if (definedef != dnone
3341                                    || fvdef == fdefunname
3342                                    || instruct)
3343                             make_C_tag (funorvar);
3344                         }
3345                       else /* not yacc and consider_token failed */
3346                         {
3347                           if (inattribute && fvdef == fignore)
3348                             {
3349                               /* We have just met __attribute__ after a
3350                                  function parameter list: do not tag the
3351                                  function again. */
3352                               fvdef = fvnone;
3353                             }
3354                         }
3355                       midtoken = false;
3356                     }
3357                 } /* if (endtoken (c)) */
3358               else if (intoken (c))
3359                 still_in_token:
3360                 {
3361                   toklen++;
3362                   continue;
3363                 }
3364             } /* if (midtoken) */
3365           else if (begtoken (c))
3366             {
3367               switch (definedef)
3368                 {
3369                 case dnone:
3370                   switch (fvdef)
3371                     {
3372                     case fstartlist:
3373                       /* This prevents tagging fb in
3374                          void (__attribute__((noreturn)) *fb) (void);
3375                          Fixing this is not easy and not very important. */
3376                       fvdef = finlist;
3377                       continue;
3378                     case flistseen:
3379                       if (plainc || declarations)
3380                         {
3381                           make_C_tag (true); /* a function */
3382                           fvdef = fignore;
3383                         }
3384                       break;
3385                     }
3386                   if (structdef == stagseen && !cjava)
3387                     {
3388                       popclass_above (bracelev);
3389                       structdef = snone;
3390                     }
3391                   break;
3392                 case dsharpseen:
3393                   savetoken = token;
3394                   break;
3395                 }
3396               if (!yacc_rules || lp == newlb.buffer + 1)
3397                 {
3398                   tokoff = lp - 1 - newlb.buffer;
3399                   toklen = 1;
3400                   midtoken = true;
3401                 }
3402               continue;
3403             } /* if (begtoken) */
3404         } /* if must look at token */
3405
3406
3407       /* Detect end of line, colon, comma, semicolon and various braces
3408          after having handled a token.*/
3409       switch (c)
3410         {
3411         case ':':
3412           if (inattribute)
3413             break;
3414           if (yacc_rules && token.offset == 0 && token.valid)
3415             {
3416               make_C_tag (false); /* a yacc function */
3417               break;
3418             }
3419           if (definedef != dnone)
3420             break;
3421           switch (objdef)
3422             {
3423             case  otagseen:
3424               objdef = oignore;
3425               make_C_tag (true); /* an Objective C class */
3426               break;
3427             case omethodtag:
3428             case omethodparm:
3429               objdef = omethodcolon;
3430               linebuffer_setlen (&token_name, token_name.len + 1);
3431               strcat (token_name.buffer, ":");
3432               break;
3433             }
3434           if (structdef == stagseen)
3435             {
3436               structdef = scolonseen;
3437               break;
3438             }
3439           /* Should be useless, but may be work as a safety net. */
3440           if (cplpl && fvdef == flistseen)
3441             {
3442               make_C_tag (true); /* a function */
3443               fvdef = fignore;
3444               break;
3445             }
3446           break;
3447         case ';':
3448           if (definedef != dnone || inattribute)
3449             break;
3450           switch (typdef)
3451             {
3452             case tend:
3453             case ttypeseen:
3454               make_C_tag (false); /* a typedef */
3455               typdef = tnone;
3456               fvdef = fvnone;
3457               break;
3458             case tnone:
3459             case tinbody:
3460             case tignore:
3461               switch (fvdef)
3462                 {
3463                 case fignore:
3464                   if (typdef == tignore || cplpl)
3465                     fvdef = fvnone;
3466                   break;
3467                 case fvnameseen:
3468                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3469                       || (members && instruct))
3470                     make_C_tag (false); /* a variable */
3471                   fvextern = false;
3472                   fvdef = fvnone;
3473                   token.valid = false;
3474                   break;
3475                 case flistseen:
3476                   if ((declarations
3477                        && (cplpl || !instruct)
3478                        && (typdef == tnone || (typdef != tignore && instruct)))
3479                       || (members
3480                           && plainc && instruct))
3481                     make_C_tag (true);  /* a function */
3482                   /* FALLTHRU */
3483                 default:
3484                   fvextern = false;
3485                   fvdef = fvnone;
3486                   if (declarations
3487                        && cplpl && structdef == stagseen)
3488                     make_C_tag (false); /* forward declaration */
3489                   else
3490                     token.valid = false;
3491                 } /* switch (fvdef) */
3492               /* FALLTHRU */
3493             default:
3494               if (!instruct)
3495                 typdef = tnone;
3496             }
3497           if (structdef == stagseen)
3498             structdef = snone;
3499           break;
3500         case ',':
3501           if (definedef != dnone || inattribute)
3502             break;
3503           switch (objdef)
3504             {
3505             case omethodtag:
3506             case omethodparm:
3507               make_C_tag (true); /* an Objective C method */
3508               objdef = oinbody;
3509               break;
3510             }
3511           switch (fvdef)
3512             {
3513             case fdefunkey:
3514             case foperator:
3515             case fstartlist:
3516             case finlist:
3517             case fignore:
3518             case vignore:
3519               break;
3520             case fdefunname:
3521               fvdef = fignore;
3522               break;
3523             case fvnameseen:
3524               if (parlev == 0
3525                   && ((globals
3526                        && bracelev == 0
3527                        && templatelev == 0
3528                        && (!fvextern || declarations))
3529                       || (members && instruct)))
3530                   make_C_tag (false); /* a variable */
3531               break;
3532             case flistseen:
3533               if ((declarations && typdef == tnone && !instruct)
3534                   || (members && typdef != tignore && instruct))
3535                 {
3536                   make_C_tag (true); /* a function */
3537                   fvdef = fvnameseen;
3538                 }
3539               else if (!declarations)
3540                 fvdef = fvnone;
3541               token.valid = false;
3542               break;
3543             default:
3544               fvdef = fvnone;
3545             }
3546           if (structdef == stagseen)
3547             structdef = snone;
3548           break;
3549         case ']':
3550           if (definedef != dnone || inattribute)
3551             break;
3552           if (structdef == stagseen)
3553             structdef = snone;
3554           switch (typdef)
3555             {
3556             case ttypeseen:
3557             case tend:
3558               typdef = tignore;
3559               make_C_tag (false);       /* a typedef */
3560               break;
3561             case tnone:
3562             case tinbody:
3563               switch (fvdef)
3564                 {
3565                 case foperator:
3566                 case finlist:
3567                 case fignore:
3568                 case vignore:
3569                   break;
3570                 case fvnameseen:
3571                   if ((members && bracelev == 1)
3572                       || (globals && bracelev == 0
3573                           && (!fvextern || declarations)))
3574                     make_C_tag (false); /* a variable */
3575                   /* FALLTHRU */
3576                 default:
3577                   fvdef = fvnone;
3578                 }
3579               break;
3580             }
3581           break;
3582         case '(':
3583           if (inattribute)
3584             {
3585               attrparlev++;
3586               break;
3587             }
3588           if (definedef != dnone)
3589             break;
3590           if (objdef == otagseen && parlev == 0)
3591             objdef = oparenseen;
3592           switch (fvdef)
3593             {
3594             case fvnameseen:
3595               if (typdef == ttypeseen
3596                   && *lp != '*'
3597                   && !instruct)
3598                 {
3599                   /* This handles constructs like:
3600                      typedef void OperatorFun (int fun); */
3601                   make_C_tag (false);
3602                   typdef = tignore;
3603                   fvdef = fignore;
3604                   break;
3605                 }
3606               /* FALLTHRU */
3607             case foperator:
3608               fvdef = fstartlist;
3609               break;
3610             case flistseen:
3611               fvdef = finlist;
3612               break;
3613             }
3614           parlev++;
3615           break;
3616         case ')':
3617           if (inattribute)
3618             {
3619               if (--attrparlev == 0)
3620                 inattribute = false;
3621               break;
3622             }
3623           if (definedef != dnone)
3624             break;
3625           if (objdef == ocatseen && parlev == 1)
3626             {
3627               make_C_tag (true); /* an Objective C category */
3628               objdef = oignore;
3629             }
3630           if (--parlev == 0)
3631             {
3632               switch (fvdef)
3633                 {
3634                 case fstartlist:
3635                 case finlist:
3636                   fvdef = flistseen;
3637                   break;
3638                 }
3639               if (!instruct
3640                   && (typdef == tend
3641                       || typdef == ttypeseen))
3642                 {
3643                   typdef = tignore;
3644                   make_C_tag (false); /* a typedef */
3645                 }
3646             }
3647           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3648             parlev = 0;
3649           break;
3650         case '{':
3651           if (definedef != dnone)
3652             break;
3653           if (typdef == ttypeseen)
3654             {
3655               /* Whenever typdef is set to tinbody (currently only
3656                  here), typdefbracelev should be set to bracelev. */
3657               typdef = tinbody;
3658               typdefbracelev = bracelev;
3659             }
3660           switch (fvdef)
3661             {
3662             case flistseen:
3663               make_C_tag (true);    /* a function */
3664               /* FALLTHRU */
3665             case fignore:
3666               fvdef = fvnone;
3667               break;
3668             case fvnone:
3669               switch (objdef)
3670                 {
3671                 case otagseen:
3672                   make_C_tag (true); /* an Objective C class */
3673                   objdef = oignore;
3674                   break;
3675                 case omethodtag:
3676                 case omethodparm:
3677                   make_C_tag (true); /* an Objective C method */
3678                   objdef = oinbody;
3679                   break;
3680                 default:
3681                   /* Neutralize `extern "C" {' grot. */
3682                   if (bracelev == 0 && structdef == snone && nestlev == 0
3683                       && typdef == tnone)
3684                     bracelev = -1;
3685                 }
3686               break;
3687             }
3688           switch (structdef)
3689             {
3690             case skeyseen:         /* unnamed struct */
3691               pushclass_above (bracelev, NULL, 0);
3692               structdef = snone;
3693               break;
3694             case stagseen:         /* named struct or enum */
3695             case scolonseen:       /* a class */
3696               pushclass_above (bracelev,token.line+token.offset, token.length);
3697               structdef = snone;
3698               make_C_tag (false);  /* a struct or enum */
3699               break;
3700             }
3701           bracelev += 1;
3702           break;
3703         case '*':
3704           if (definedef != dnone)
3705             break;
3706           if (fvdef == fstartlist)
3707             {
3708               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3709               token.valid = false;
3710             }
3711           break;
3712         case '}':
3713           if (definedef != dnone)
3714             break;
3715           bracelev -= 1;
3716           if (!ignoreindent && lp == newlb.buffer + 1)
3717             {
3718               if (bracelev != 0)
3719                 token.valid = false; /* unexpected value, token unreliable */
3720               bracelev = 0;     /* reset brace level if first column */
3721               parlev = 0;       /* also reset paren level, just in case... */
3722             }
3723           else if (bracelev < 0)
3724             {
3725               token.valid = false; /* something gone amiss, token unreliable */
3726               bracelev = 0;
3727             }
3728           if (bracelev == 0 && fvdef == vignore)
3729             fvdef = fvnone;             /* end of function */
3730           popclass_above (bracelev);
3731           structdef = snone;
3732           /* Only if typdef == tinbody is typdefbracelev significant. */
3733           if (typdef == tinbody && bracelev <= typdefbracelev)
3734             {
3735               assert (bracelev == typdefbracelev);
3736               typdef = tend;
3737             }
3738           break;
3739         case '=':
3740           if (definedef != dnone)
3741             break;
3742           switch (fvdef)
3743             {
3744             case foperator:
3745             case finlist:
3746             case fignore:
3747             case vignore:
3748               break;
3749             case fvnameseen:
3750               if ((members && bracelev == 1)
3751                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3752                 make_C_tag (false); /* a variable */
3753               /* FALLTHRU */
3754             default:
3755               fvdef = vignore;
3756             }
3757           break;
3758         case '<':
3759           if (cplpl
3760               && (structdef == stagseen || fvdef == fvnameseen))
3761             {
3762               templatelev++;
3763               break;
3764             }
3765           goto resetfvdef;
3766         case '>':
3767           if (templatelev > 0)
3768             {
3769               templatelev--;
3770               break;
3771             }
3772           goto resetfvdef;
3773         case '+':
3774         case '-':
3775           if (objdef == oinbody && bracelev == 0)
3776             {
3777               objdef = omethodsign;
3778               break;
3779             }
3780           /* FALLTHRU */
3781         resetfvdef:
3782         case '#': case '~': case '&': case '%': case '/':
3783         case '|': case '^': case '!': case '.': case '?':
3784           if (definedef != dnone)
3785             break;
3786           /* These surely cannot follow a function tag in C. */
3787           switch (fvdef)
3788             {
3789             case foperator:
3790             case finlist:
3791             case fignore:
3792             case vignore:
3793               break;
3794             default:
3795               fvdef = fvnone;
3796             }
3797           break;
3798         case '\0':
3799           if (objdef == otagseen)
3800             {
3801               make_C_tag (true); /* an Objective C class */
3802               objdef = oignore;
3803             }
3804           /* If a macro spans multiple lines don't reset its state. */
3805           if (quotednl)
3806             CNL_SAVE_DEFINEDEF ();
3807           else
3808             CNL ();
3809           break;
3810         } /* switch (c) */
3811
3812     } /* while not eof */
3813
3814   free (lbs[0].lb.buffer);
3815   free (lbs[1].lb.buffer);
3816 }
3817
3818 /*
3819  * Process either a C++ file or a C file depending on the setting
3820  * of a global flag.
3821  */
3822 static void
3823 default_C_entries (FILE *inf)
3824 {
3825   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3826 }
3827
3828 /* Always do plain C. */
3829 static void
3830 plain_C_entries (FILE *inf)
3831 {
3832   C_entries (0, inf);
3833 }
3834
3835 /* Always do C++. */
3836 static void
3837 Cplusplus_entries (FILE *inf)
3838 {
3839   C_entries (C_PLPL, inf);
3840 }
3841
3842 /* Always do Java. */
3843 static void
3844 Cjava_entries (FILE *inf)
3845 {
3846   C_entries (C_JAVA, inf);
3847 }
3848
3849 /* Always do C*. */
3850 static void
3851 Cstar_entries (FILE *inf)
3852 {
3853   C_entries (C_STAR, inf);
3854 }
3855
3856 /* Always do Yacc. */
3857 static void
3858 Yacc_entries (FILE *inf)
3859 {
3860   C_entries (YACC, inf);
3861 }
3862
3863 \f
3864 /* Useful macros. */
3865 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
3866   for (;                        /* loop initialization */               \
3867        !feof (file_pointer)     /* loop test */                         \
3868        &&                       /* instructions at start of loop */     \
3869           (readline (&line_buffer, file_pointer),                       \
3870            char_pointer = line_buffer.buffer,                           \
3871            true);                                                       \
3872       )
3873
3874 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
3875   ((assert ("" kw), true)   /* syntax error if not a literal string */  \
3876    && strneq ((cp), kw, sizeof (kw)-1)          /* cp points at kw */   \
3877    && notinname ((cp)[sizeof (kw)-1])           /* end of kw */         \
3878    && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
3879
3880 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3881 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3882   ((assert ("" kw), true) /* syntax error if not a literal string */    \
3883    && strncaseeq ((cp), kw, sizeof (kw)-1)      /* cp points at kw */   \
3884    && ((cp) += sizeof (kw)-1))                  /* skip spaces */
3885
3886 /*
3887  * Read a file, but do no processing.  This is used to do regexp
3888  * matching on files that have no language defined.
3889  */
3890 static void
3891 just_read_file (FILE *inf)
3892 {
3893   while (!feof (inf))
3894     readline (&lb, inf);
3895 }
3896
3897 \f
3898 /* Fortran parsing */
3899
3900 static void F_takeprec (void);
3901 static void F_getit (FILE *);
3902
3903 static void
3904 F_takeprec (void)
3905 {
3906   dbp = skip_spaces (dbp);
3907   if (*dbp != '*')
3908     return;
3909   dbp++;
3910   dbp = skip_spaces (dbp);
3911   if (strneq (dbp, "(*)", 3))
3912     {
3913       dbp += 3;
3914       return;
3915     }
3916   if (!ISDIGIT (*dbp))
3917     {
3918       --dbp;                    /* force failure */
3919       return;
3920     }
3921   do
3922     dbp++;
3923   while (ISDIGIT (*dbp));
3924 }
3925
3926 static void
3927 F_getit (FILE *inf)
3928 {
3929   register char *cp;
3930
3931   dbp = skip_spaces (dbp);
3932   if (*dbp == '\0')
3933     {
3934       readline (&lb, inf);
3935       dbp = lb.buffer;
3936       if (dbp[5] != '&')
3937         return;
3938       dbp += 6;
3939       dbp = skip_spaces (dbp);
3940     }
3941   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3942     return;
3943   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3944     continue;
3945   make_tag (dbp, cp-dbp, true,
3946             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3947 }
3948
3949
3950 static void
3951 Fortran_functions (FILE *inf)
3952 {
3953   LOOP_ON_INPUT_LINES (inf, lb, dbp)
3954     {
3955       if (*dbp == '%')
3956         dbp++;                  /* Ratfor escape to fortran */
3957       dbp = skip_spaces (dbp);
3958       if (*dbp == '\0')
3959         continue;
3960
3961       if (LOOKING_AT_NOCASE (dbp, "recursive"))
3962         dbp = skip_spaces (dbp);
3963
3964       if (LOOKING_AT_NOCASE (dbp, "pure"))
3965         dbp = skip_spaces (dbp);
3966
3967       if (LOOKING_AT_NOCASE (dbp, "elemental"))
3968         dbp = skip_spaces (dbp);
3969
3970       switch (lowcase (*dbp))
3971         {
3972         case 'i':
3973           if (nocase_tail ("integer"))
3974             F_takeprec ();
3975           break;
3976         case 'r':
3977           if (nocase_tail ("real"))
3978             F_takeprec ();
3979           break;
3980         case 'l':
3981           if (nocase_tail ("logical"))
3982             F_takeprec ();
3983           break;
3984         case 'c':
3985           if (nocase_tail ("complex") || nocase_tail ("character"))
3986             F_takeprec ();
3987           break;
3988         case 'd':
3989           if (nocase_tail ("double"))
3990             {
3991               dbp = skip_spaces (dbp);
3992               if (*dbp == '\0')
3993                 continue;
3994               if (nocase_tail ("precision"))
3995                 break;
3996               continue;
3997             }
3998           break;
3999         }
4000       dbp = skip_spaces (dbp);
4001       if (*dbp == '\0')
4002         continue;
4003       switch (lowcase (*dbp))
4004         {
4005         case 'f':
4006           if (nocase_tail ("function"))
4007             F_getit (inf);
4008           continue;
4009         case 's':
4010           if (nocase_tail ("subroutine"))
4011             F_getit (inf);
4012           continue;
4013         case 'e':
4014           if (nocase_tail ("entry"))
4015             F_getit (inf);
4016           continue;
4017         case 'b':
4018           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4019             {
4020               dbp = skip_spaces (dbp);
4021               if (*dbp == '\0') /* assume un-named */
4022                 make_tag ("blockdata", 9, true,
4023                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4024               else
4025                 F_getit (inf);  /* look for name */
4026             }
4027           continue;
4028         }
4029     }
4030 }
4031
4032 \f
4033 /*
4034  * Ada parsing
4035  * Original code by
4036  * Philippe Waroquiers (1998)
4037  */
4038
4039 /* Once we are positioned after an "interesting" keyword, let's get
4040    the real tag value necessary. */
4041 static void
4042 Ada_getit (FILE *inf, const char *name_qualifier)
4043 {
4044   register char *cp;
4045   char *name;
4046   char c;
4047
4048   while (!feof (inf))
4049     {
4050       dbp = skip_spaces (dbp);
4051       if (*dbp == '\0'
4052           || (dbp[0] == '-' && dbp[1] == '-'))
4053         {
4054           readline (&lb, inf);
4055           dbp = lb.buffer;
4056         }
4057       switch (lowcase (*dbp))
4058         {
4059         case 'b':
4060           if (nocase_tail ("body"))
4061             {
4062               /* Skipping body of   procedure body   or   package body or ....
4063                  resetting qualifier to body instead of spec. */
4064               name_qualifier = "/b";
4065               continue;
4066             }
4067           break;
4068         case 't':
4069           /* Skipping type of   task type   or   protected type ... */
4070           if (nocase_tail ("type"))
4071             continue;
4072           break;
4073         }
4074       if (*dbp == '"')
4075         {
4076           dbp += 1;
4077           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4078             continue;
4079         }
4080       else
4081         {
4082           dbp = skip_spaces (dbp);
4083           for (cp = dbp;
4084                (*cp != '\0'
4085                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4086                cp++)
4087             continue;
4088           if (cp == dbp)
4089             return;
4090         }
4091       c = *cp;
4092       *cp = '\0';
4093       name = concat (dbp, name_qualifier, "");
4094       *cp = c;
4095       make_tag (name, strlen (name), true,
4096                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4097       free (name);
4098       if (c == '"')
4099         dbp = cp + 1;
4100       return;
4101     }
4102 }
4103
4104 static void
4105 Ada_funcs (FILE *inf)
4106 {
4107   bool inquote = false;
4108   bool skip_till_semicolumn = false;
4109
4110   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4111     {
4112       while (*dbp != '\0')
4113         {
4114           /* Skip a string i.e. "abcd". */
4115           if (inquote || (*dbp == '"'))
4116             {
4117               dbp = strchr (dbp + !inquote, '"');
4118               if (dbp != NULL)
4119                 {
4120                   inquote = false;
4121                   dbp += 1;
4122                   continue;     /* advance char */
4123                 }
4124               else
4125                 {
4126                   inquote = true;
4127                   break;        /* advance line */
4128                 }
4129             }
4130
4131           /* Skip comments. */
4132           if (dbp[0] == '-' && dbp[1] == '-')
4133             break;              /* advance line */
4134
4135           /* Skip character enclosed in single quote i.e. 'a'
4136              and skip single quote starting an attribute i.e. 'Image. */
4137           if (*dbp == '\'')
4138             {
4139               dbp++ ;
4140               if (*dbp != '\0')
4141                 dbp++;
4142               continue;
4143             }
4144
4145           if (skip_till_semicolumn)
4146             {
4147               if (*dbp == ';')
4148                 skip_till_semicolumn = false;
4149               dbp++;
4150               continue;         /* advance char */
4151             }
4152
4153           /* Search for beginning of a token.  */
4154           if (!begtoken (*dbp))
4155             {
4156               dbp++;
4157               continue;         /* advance char */
4158             }
4159
4160           /* We are at the beginning of a token. */
4161           switch (lowcase (*dbp))
4162             {
4163             case 'f':
4164               if (!packages_only && nocase_tail ("function"))
4165                 Ada_getit (inf, "/f");
4166               else
4167                 break;          /* from switch */
4168               continue;         /* advance char */
4169             case 'p':
4170               if (!packages_only && nocase_tail ("procedure"))
4171                 Ada_getit (inf, "/p");
4172               else if (nocase_tail ("package"))
4173                 Ada_getit (inf, "/s");
4174               else if (nocase_tail ("protected")) /* protected type */
4175                 Ada_getit (inf, "/t");
4176               else
4177                 break;          /* from switch */
4178               continue;         /* advance char */
4179
4180             case 'u':
4181               if (typedefs && !packages_only && nocase_tail ("use"))
4182                 {
4183                   /* when tagging types, avoid tagging  use type Pack.Typename;
4184                      for this, we will skip everything till a ; */
4185                   skip_till_semicolumn = true;
4186                   continue;     /* advance char */
4187                 }
4188
4189             case 't':
4190               if (!packages_only && nocase_tail ("task"))
4191                 Ada_getit (inf, "/k");
4192               else if (typedefs && !packages_only && nocase_tail ("type"))
4193                 {
4194                   Ada_getit (inf, "/t");
4195                   while (*dbp != '\0')
4196                     dbp += 1;
4197                 }
4198               else
4199                 break;          /* from switch */
4200               continue;         /* advance char */
4201             }
4202
4203           /* Look for the end of the token. */
4204           while (!endtoken (*dbp))
4205             dbp++;
4206
4207         } /* advance char */
4208     } /* advance line */
4209 }
4210
4211 \f
4212 /*
4213  * Unix and microcontroller assembly tag handling
4214  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4215  * Idea by Bob Weiner, Motorola Inc. (1994)
4216  */
4217 static void
4218 Asm_labels (FILE *inf)
4219 {
4220   register char *cp;
4221
4222   LOOP_ON_INPUT_LINES (inf, lb, cp)
4223     {
4224       /* If first char is alphabetic or one of [_.$], test for colon
4225          following identifier. */
4226       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4227         {
4228           /* Read past label. */
4229           cp++;
4230           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4231             cp++;
4232           if (*cp == ':' || iswhite (*cp))
4233             /* Found end of label, so copy it and add it to the table. */
4234             make_tag (lb.buffer, cp - lb.buffer, true,
4235                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4236         }
4237     }
4238 }
4239
4240 \f
4241 /*
4242  * Perl support
4243  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4244  *                 /^use constant[ \t\n]+[^ \t\n{=,;]+/
4245  * Perl variable names: /^(my|local).../
4246  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4247  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4248  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4249  */
4250 static void
4251 Perl_functions (FILE *inf)
4252 {
4253   char *package = savestr ("main"); /* current package name */
4254   register char *cp;
4255
4256   LOOP_ON_INPUT_LINES (inf, lb, cp)
4257     {
4258       cp = skip_spaces (cp);
4259
4260       if (LOOKING_AT (cp, "package"))
4261         {
4262           free (package);
4263           get_tag (cp, &package);
4264         }
4265       else if (LOOKING_AT (cp, "sub"))
4266         {
4267           char *pos, *sp;
4268
4269         subr:
4270           sp = cp;
4271           while (!notinname (*cp))
4272             cp++;
4273           if (cp == sp)
4274             continue;           /* nothing found */
4275           if ((pos = strchr (sp, ':')) != NULL
4276               && pos < cp && pos[1] == ':')
4277             /* The name is already qualified. */
4278             make_tag (sp, cp - sp, true,
4279                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4280           else
4281             /* Qualify it. */
4282             {
4283               char savechar, *name;
4284
4285               savechar = *cp;
4286               *cp = '\0';
4287               name = concat (package, "::", sp);
4288               *cp = savechar;
4289               make_tag (name, strlen (name), true,
4290                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4291               free (name);
4292             }
4293         }
4294       else if (LOOKING_AT (cp, "use constant")
4295                || LOOKING_AT (cp, "use constant::defer"))
4296         {
4297           /* For hash style multi-constant like
4298                 use constant { FOO => 123,
4299                                BAR => 456 };
4300              only the first FOO is picked up.  Parsing across the value
4301              expressions would be difficult in general, due to possible nested
4302              hashes, here-documents, etc.  */
4303           if (*cp == '{')
4304             cp = skip_spaces (cp+1);
4305           goto subr;
4306         }
4307       else if (globals) /* only if we are tagging global vars */
4308         {
4309           /* Skip a qualifier, if any. */
4310           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4311           /* After "my" or "local", but before any following paren or space. */
4312           char *varstart = cp;
4313
4314           if (qual              /* should this be removed?  If yes, how? */
4315               && (*cp == '$' || *cp == '@' || *cp == '%'))
4316             {
4317               varstart += 1;
4318               do
4319                 cp++;
4320               while (ISALNUM (*cp) || *cp == '_');
4321             }
4322           else if (qual)
4323             {
4324               /* Should be examining a variable list at this point;
4325                  could insist on seeing an open parenthesis. */
4326               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4327                 cp++;
4328             }
4329           else
4330             continue;
4331
4332           make_tag (varstart, cp - varstart, false,
4333                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4334         }
4335     }
4336   free (package);
4337 }
4338
4339
4340 /*
4341  * Python support
4342  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4343  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4344  * More ideas by seb bacon <seb@jamkit.com> (2002)
4345  */
4346 static void
4347 Python_functions (FILE *inf)
4348 {
4349   register char *cp;
4350
4351   LOOP_ON_INPUT_LINES (inf, lb, cp)
4352     {
4353       cp = skip_spaces (cp);
4354       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4355         {
4356           char *name = cp;
4357           while (!notinname (*cp) && *cp != ':')
4358             cp++;
4359           make_tag (name, cp - name, true,
4360                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4361         }
4362     }
4363 }
4364
4365 \f
4366 /*
4367  * PHP support
4368  * Look for:
4369  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4370  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4371  *  - /^[ \t]*define\(\"[^\"]+/
4372  * Only with --members:
4373  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4374  * Idea by Diez B. Roggisch (2001)
4375  */
4376 static void
4377 PHP_functions (FILE *inf)
4378 {
4379   char *cp, *name;
4380   bool search_identifier = false;
4381
4382   LOOP_ON_INPUT_LINES (inf, lb, cp)
4383     {
4384       cp = skip_spaces (cp);
4385       name = cp;
4386       if (search_identifier
4387           && *cp != '\0')
4388         {
4389           while (!notinname (*cp))
4390             cp++;
4391           make_tag (name, cp - name, true,
4392                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4393           search_identifier = false;
4394         }
4395       else if (LOOKING_AT (cp, "function"))
4396         {
4397           if (*cp == '&')
4398             cp = skip_spaces (cp+1);
4399           if (*cp != '\0')
4400             {
4401               name = cp;
4402               while (!notinname (*cp))
4403                 cp++;
4404               make_tag (name, cp - name, true,
4405                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4406             }
4407           else
4408             search_identifier = true;
4409         }
4410       else if (LOOKING_AT (cp, "class"))
4411         {
4412           if (*cp != '\0')
4413             {
4414               name = cp;
4415               while (*cp != '\0' && !iswhite (*cp))
4416                 cp++;
4417               make_tag (name, cp - name, false,
4418                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4419             }
4420           else
4421             search_identifier = true;
4422         }
4423       else if (strneq (cp, "define", 6)
4424                && (cp = skip_spaces (cp+6))
4425                && *cp++ == '('
4426                && (*cp == '"' || *cp == '\''))
4427         {
4428           char quote = *cp++;
4429           name = cp;
4430           while (*cp != quote && *cp != '\0')
4431             cp++;
4432           make_tag (name, cp - name, false,
4433                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4434         }
4435       else if (members
4436                && LOOKING_AT (cp, "var")
4437                && *cp == '$')
4438         {
4439           name = cp;
4440           while (!notinname (*cp))
4441             cp++;
4442           make_tag (name, cp - name, false,
4443                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4444         }
4445     }
4446 }
4447
4448 \f
4449 /*
4450  * Cobol tag functions
4451  * We could look for anything that could be a paragraph name.
4452  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4453  * Idea by Corny de Souza (1993)
4454  */
4455 static void
4456 Cobol_paragraphs (FILE *inf)
4457 {
4458   register char *bp, *ep;
4459
4460   LOOP_ON_INPUT_LINES (inf, lb, bp)
4461     {
4462       if (lb.len < 9)
4463         continue;
4464       bp += 8;
4465
4466       /* If eoln, compiler option or comment ignore whole line. */
4467       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4468         continue;
4469
4470       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4471         continue;
4472       if (*ep++ == '.')
4473         make_tag (bp, ep - bp, true,
4474                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4475     }
4476 }
4477
4478 \f
4479 /*
4480  * Makefile support
4481  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4482  */
4483 static void
4484 Makefile_targets (FILE *inf)
4485 {
4486   register char *bp;
4487
4488   LOOP_ON_INPUT_LINES (inf, lb, bp)
4489     {
4490       if (*bp == '\t' || *bp == '#')
4491         continue;
4492       while (*bp != '\0' && *bp != '=' && *bp != ':')
4493         bp++;
4494       if (*bp == ':' || (globals && *bp == '='))
4495         {
4496           /* We should detect if there is more than one tag, but we do not.
4497              We just skip initial and final spaces. */
4498           char * namestart = skip_spaces (lb.buffer);
4499           while (--bp > namestart)
4500             if (!notinname (*bp))
4501               break;
4502           make_tag (namestart, bp - namestart + 1, true,
4503                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4504         }
4505     }
4506 }
4507
4508 \f
4509 /*
4510  * Pascal parsing
4511  * Original code by Mosur K. Mohan (1989)
4512  *
4513  *  Locates tags for procedures & functions.  Doesn't do any type- or
4514  *  var-definitions.  It does look for the keyword "extern" or
4515  *  "forward" immediately following the procedure statement; if found,
4516  *  the tag is skipped.
4517  */
4518 static void
4519 Pascal_functions (FILE *inf)
4520 {
4521   linebuffer tline;             /* mostly copied from C_entries */
4522   long save_lcno;
4523   int save_lineno, namelen, taglen;
4524   char c, *name;
4525
4526   bool                          /* each of these flags is true if: */
4527     incomment,                  /* point is inside a comment */
4528     inquote,                    /* point is inside '..' string */
4529     get_tagname,                /* point is after PROCEDURE/FUNCTION
4530                                    keyword, so next item = potential tag */
4531     found_tag,                  /* point is after a potential tag */
4532     inparms,                    /* point is within parameter-list */
4533     verify_tag;                 /* point has passed the parm-list, so the
4534                                    next token will determine whether this
4535                                    is a FORWARD/EXTERN to be ignored, or
4536                                    whether it is a real tag */
4537
4538   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4539   name = NULL;                  /* keep compiler quiet */
4540   dbp = lb.buffer;
4541   *dbp = '\0';
4542   linebuffer_init (&tline);
4543
4544   incomment = inquote = false;
4545   found_tag = false;            /* have a proc name; check if extern */
4546   get_tagname = false;          /* found "procedure" keyword         */
4547   inparms = false;              /* found '(' after "proc"            */
4548   verify_tag = false;           /* check if "extern" is ahead        */
4549
4550
4551   while (!feof (inf))           /* long main loop to get next char */
4552     {
4553       c = *dbp++;
4554       if (c == '\0')            /* if end of line */
4555         {
4556           readline (&lb, inf);
4557           dbp = lb.buffer;
4558           if (*dbp == '\0')
4559             continue;
4560           if (!((found_tag && verify_tag)
4561                 || get_tagname))
4562             c = *dbp++;         /* only if don't need *dbp pointing
4563                                    to the beginning of the name of
4564                                    the procedure or function */
4565         }
4566       if (incomment)
4567         {
4568           if (c == '}')         /* within { } comments */
4569             incomment = false;
4570           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4571             {
4572               dbp++;
4573               incomment = false;
4574             }
4575           continue;
4576         }
4577       else if (inquote)
4578         {
4579           if (c == '\'')
4580             inquote = false;
4581           continue;
4582         }
4583       else
4584         switch (c)
4585           {
4586           case '\'':
4587             inquote = true;     /* found first quote */
4588             continue;
4589           case '{':             /* found open { comment */
4590             incomment = true;
4591             continue;
4592           case '(':
4593             if (*dbp == '*')    /* found open (* comment */
4594               {
4595                 incomment = true;
4596                 dbp++;
4597               }
4598             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4599               inparms = true;
4600             continue;
4601           case ')':             /* end of parms list */
4602             if (inparms)
4603               inparms = false;
4604             continue;
4605           case ';':
4606             if (found_tag && !inparms) /* end of proc or fn stmt */
4607               {
4608                 verify_tag = true;
4609                 break;
4610               }
4611             continue;
4612           }
4613       if (found_tag && verify_tag && (*dbp != ' '))
4614         {
4615           /* Check if this is an "extern" declaration. */
4616           if (*dbp == '\0')
4617             continue;
4618           if (lowcase (*dbp) == 'e')
4619             {
4620               if (nocase_tail ("extern")) /* superfluous, really! */
4621                 {
4622                   found_tag = false;
4623                   verify_tag = false;
4624                 }
4625             }
4626           else if (lowcase (*dbp) == 'f')
4627             {
4628               if (nocase_tail ("forward")) /* check for forward reference */
4629                 {
4630                   found_tag = false;
4631                   verify_tag = false;
4632                 }
4633             }
4634           if (found_tag && verify_tag) /* not external proc, so make tag */
4635             {
4636               found_tag = false;
4637               verify_tag = false;
4638               make_tag (name, namelen, true,
4639                         tline.buffer, taglen, save_lineno, save_lcno);
4640               continue;
4641             }
4642         }
4643       if (get_tagname)          /* grab name of proc or fn */
4644         {
4645           char *cp;
4646
4647           if (*dbp == '\0')
4648             continue;
4649
4650           /* Find block name. */
4651           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4652             continue;
4653
4654           /* Save all values for later tagging. */
4655           linebuffer_setlen (&tline, lb.len);
4656           strcpy (tline.buffer, lb.buffer);
4657           save_lineno = lineno;
4658           save_lcno = linecharno;
4659           name = tline.buffer + (dbp - lb.buffer);
4660           namelen = cp - dbp;
4661           taglen = cp - lb.buffer + 1;
4662
4663           dbp = cp;             /* set dbp to e-o-token */
4664           get_tagname = false;
4665           found_tag = true;
4666           continue;
4667
4668           /* And proceed to check for "extern". */
4669         }
4670       else if (!incomment && !inquote && !found_tag)
4671         {
4672           /* Check for proc/fn keywords. */
4673           switch (lowcase (c))
4674             {
4675             case 'p':
4676               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4677                 get_tagname = true;
4678               continue;
4679             case 'f':
4680               if (nocase_tail ("unction"))
4681                 get_tagname = true;
4682               continue;
4683             }
4684         }
4685     } /* while not eof */
4686
4687   free (tline.buffer);
4688 }
4689
4690 \f
4691 /*
4692  * Lisp tag functions
4693  *  look for (def or (DEF, quote or QUOTE
4694  */
4695
4696 static void L_getit (void);
4697
4698 static void
4699 L_getit (void)
4700 {
4701   if (*dbp == '\'')             /* Skip prefix quote */
4702     dbp++;
4703   else if (*dbp == '(')
4704   {
4705     dbp++;
4706     /* Try to skip "(quote " */
4707     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4708       /* Ok, then skip "(" before name in (defstruct (foo)) */
4709       dbp = skip_spaces (dbp);
4710   }
4711   get_tag (dbp, NULL);
4712 }
4713
4714 static void
4715 Lisp_functions (FILE *inf)
4716 {
4717   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4718     {
4719       if (dbp[0] != '(')
4720         continue;
4721
4722       /* "(defvar foo)" is a declaration rather than a definition.  */
4723       if (! declarations)
4724         {
4725           char *p = dbp + 1;
4726           if (LOOKING_AT (p, "defvar"))
4727             {
4728               p = skip_name (p); /* past var name */
4729               p = skip_spaces (p);
4730               if (*p == ')')
4731                 continue;
4732             }
4733         }
4734
4735       if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
4736         dbp += 3;
4737
4738       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4739         {
4740           dbp = skip_non_spaces (dbp);
4741           dbp = skip_spaces (dbp);
4742           L_getit ();
4743         }
4744       else
4745         {
4746           /* Check for (foo::defmumble name-defined ... */
4747           do
4748             dbp++;
4749           while (!notinname (*dbp) && *dbp != ':');
4750           if (*dbp == ':')
4751             {
4752               do
4753                 dbp++;
4754               while (*dbp == ':');
4755
4756               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4757                 {
4758                   dbp = skip_non_spaces (dbp);
4759                   dbp = skip_spaces (dbp);
4760                   L_getit ();
4761                 }
4762             }
4763         }
4764     }
4765 }
4766
4767 \f
4768 /*
4769  * Lua script language parsing
4770  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4771  *
4772  *  "function" and "local function" are tags if they start at column 1.
4773  */
4774 static void
4775 Lua_functions (FILE *inf)
4776 {
4777   register char *bp;
4778
4779   LOOP_ON_INPUT_LINES (inf, lb, bp)
4780     {
4781       if (bp[0] != 'f' && bp[0] != 'l')
4782         continue;
4783
4784       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4785
4786       if (LOOKING_AT (bp, "function"))
4787         get_tag (bp, NULL);
4788     }
4789 }
4790
4791 \f
4792 /*
4793  * PostScript tags
4794  * Just look for lines where the first character is '/'
4795  * Also look at "defineps" for PSWrap
4796  * Ideas by:
4797  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4798  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4799  */
4800 static void
4801 PS_functions (FILE *inf)
4802 {
4803   register char *bp, *ep;
4804
4805   LOOP_ON_INPUT_LINES (inf, lb, bp)
4806     {
4807       if (bp[0] == '/')
4808         {
4809           for (ep = bp+1;
4810                *ep != '\0' && *ep != ' ' && *ep != '{';
4811                ep++)
4812             continue;
4813           make_tag (bp, ep - bp, true,
4814                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4815         }
4816       else if (LOOKING_AT (bp, "defineps"))
4817         get_tag (bp, NULL);
4818     }
4819 }
4820
4821 \f
4822 /*
4823  * Forth tags
4824  * Ignore anything after \ followed by space or in ( )
4825  * Look for words defined by :
4826  * Look for constant, code, create, defer, value, and variable
4827  * OBP extensions:  Look for buffer:, field,
4828  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4829  */
4830 static void
4831 Forth_words (FILE *inf)
4832 {
4833   register char *bp;
4834
4835   LOOP_ON_INPUT_LINES (inf, lb, bp)
4836     while ((bp = skip_spaces (bp))[0] != '\0')
4837       if (bp[0] == '\\' && iswhite (bp[1]))
4838         break;                  /* read next line */
4839       else if (bp[0] == '(' && iswhite (bp[1]))
4840         do                      /* skip to ) or eol */
4841           bp++;
4842         while (*bp != ')' && *bp != '\0');
4843       else if ((bp[0] == ':' && iswhite (bp[1]) && bp++)
4844                || LOOKING_AT_NOCASE (bp, "constant")
4845                || LOOKING_AT_NOCASE (bp, "code")
4846                || LOOKING_AT_NOCASE (bp, "create")
4847                || LOOKING_AT_NOCASE (bp, "defer")
4848                || LOOKING_AT_NOCASE (bp, "value")
4849                || LOOKING_AT_NOCASE (bp, "variable")
4850                || LOOKING_AT_NOCASE (bp, "buffer:")
4851                || LOOKING_AT_NOCASE (bp, "field"))
4852         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
4853       else
4854         bp = skip_non_spaces (bp);
4855 }
4856
4857 \f
4858 /*
4859  * Scheme tag functions
4860  * look for (def... xyzzy
4861  *          (def... (xyzzy
4862  *          (def ... ((...(xyzzy ....
4863  *          (set! xyzzy
4864  * Original code by Ken Haase (1985?)
4865  */
4866 static void
4867 Scheme_functions (FILE *inf)
4868 {
4869   register char *bp;
4870
4871   LOOP_ON_INPUT_LINES (inf, lb, bp)
4872     {
4873       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4874         {
4875           bp = skip_non_spaces (bp+4);
4876           /* Skip over open parens and white space.  Don't continue past
4877              '\0'. */
4878           while (*bp && notinname (*bp))
4879             bp++;
4880           get_tag (bp, NULL);
4881         }
4882       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4883         get_tag (bp, NULL);
4884     }
4885 }
4886
4887 \f
4888 /* Find tags in TeX and LaTeX input files.  */
4889
4890 /* TEX_toktab is a table of TeX control sequences that define tags.
4891  * Each entry records one such control sequence.
4892  *
4893  * Original code from who knows whom.
4894  * Ideas by:
4895  *   Stefan Monnier (2002)
4896  */
4897
4898 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4899
4900 /* Default set of control sequences to put into TEX_toktab.
4901    The value of environment var TEXTAGS is prepended to this.  */
4902 static const char *TEX_defenv = "\
4903 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4904 :part:appendix:entry:index:def\
4905 :newcommand:renewcommand:newenvironment:renewenvironment";
4906
4907 static void TEX_mode (FILE *);
4908 static void TEX_decode_env (const char *, const char *);
4909
4910 static char TEX_esc = '\\';
4911 static char TEX_opgrp = '{';
4912 static char TEX_clgrp = '}';
4913
4914 /*
4915  * TeX/LaTeX scanning loop.
4916  */
4917 static void
4918 TeX_commands (FILE *inf)
4919 {
4920   char *cp;
4921   linebuffer *key;
4922
4923   /* Select either \ or ! as escape character.  */
4924   TEX_mode (inf);
4925
4926   /* Initialize token table once from environment. */
4927   if (TEX_toktab == NULL)
4928     TEX_decode_env ("TEXTAGS", TEX_defenv);
4929
4930   LOOP_ON_INPUT_LINES (inf, lb, cp)
4931     {
4932       /* Look at each TEX keyword in line. */
4933       for (;;)
4934         {
4935           /* Look for a TEX escape. */
4936           while (*cp++ != TEX_esc)
4937             if (cp[-1] == '\0' || cp[-1] == '%')
4938               goto tex_next_line;
4939
4940           for (key = TEX_toktab; key->buffer != NULL; key++)
4941             if (strneq (cp, key->buffer, key->len))
4942               {
4943                 char *p;
4944                 int namelen, linelen;
4945                 bool opgrp = false;
4946
4947                 cp = skip_spaces (cp + key->len);
4948                 if (*cp == TEX_opgrp)
4949                   {
4950                     opgrp = true;
4951                     cp++;
4952                   }
4953                 for (p = cp;
4954                      (!iswhite (*p) && *p != '#' &&
4955                       *p != TEX_opgrp && *p != TEX_clgrp);
4956                      p++)
4957                   continue;
4958                 namelen = p - cp;
4959                 linelen = lb.len;
4960                 if (!opgrp || *p == TEX_clgrp)
4961                   {
4962                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
4963                       p++;
4964                     linelen = p - lb.buffer + 1;
4965                   }
4966                 make_tag (cp, namelen, true,
4967                           lb.buffer, linelen, lineno, linecharno);
4968                 goto tex_next_line; /* We only tag a line once */
4969               }
4970         }
4971     tex_next_line:
4972       ;
4973     }
4974 }
4975
4976 #define TEX_LESC '\\'
4977 #define TEX_SESC '!'
4978
4979 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4980    chars accordingly. */
4981 static void
4982 TEX_mode (FILE *inf)
4983 {
4984   int c;
4985
4986   while ((c = getc (inf)) != EOF)
4987     {
4988       /* Skip to next line if we hit the TeX comment char. */
4989       if (c == '%')
4990         while (c != '\n' && c != EOF)
4991           c = getc (inf);
4992       else if (c == TEX_LESC || c == TEX_SESC )
4993         break;
4994     }
4995
4996   if (c == TEX_LESC)
4997     {
4998       TEX_esc = TEX_LESC;
4999       TEX_opgrp = '{';
5000       TEX_clgrp = '}';
5001     }
5002   else
5003     {
5004       TEX_esc = TEX_SESC;
5005       TEX_opgrp = '<';
5006       TEX_clgrp = '>';
5007     }
5008   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5009      No attempt is made to correct the situation. */
5010   rewind (inf);
5011 }
5012
5013 /* Read environment and prepend it to the default string.
5014    Build token table. */
5015 static void
5016 TEX_decode_env (const char *evarname, const char *defenv)
5017 {
5018   register const char *env, *p;
5019   int i, len;
5020
5021   /* Append default string to environment. */
5022   env = getenv (evarname);
5023   if (!env)
5024     env = defenv;
5025   else
5026     env = concat (env, defenv, "");
5027
5028   /* Allocate a token table */
5029   for (len = 1, p = env; p;)
5030     if ((p = strchr (p, ':')) && *++p != '\0')
5031       len++;
5032   TEX_toktab = xnew (len, linebuffer);
5033
5034   /* Unpack environment string into token table. Be careful about */
5035   /* zero-length strings (leading ':', "::" and trailing ':') */
5036   for (i = 0; *env != '\0';)
5037     {
5038       p = strchr (env, ':');
5039       if (!p)                   /* End of environment string. */
5040         p = env + strlen (env);
5041       if (p - env > 0)
5042         {                       /* Only non-zero strings. */
5043           TEX_toktab[i].buffer = savenstr (env, p - env);
5044           TEX_toktab[i].len = p - env;
5045           i++;
5046         }
5047       if (*p)
5048         env = p + 1;
5049       else
5050         {
5051           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5052           TEX_toktab[i].len = 0;
5053           break;
5054         }
5055     }
5056 }
5057
5058 \f
5059 /* Texinfo support.  Dave Love, Mar. 2000.  */
5060 static void
5061 Texinfo_nodes (FILE *inf)
5062 {
5063   char *cp, *start;
5064   LOOP_ON_INPUT_LINES (inf, lb, cp)
5065     if (LOOKING_AT (cp, "@node"))
5066       {
5067         start = cp;
5068         while (*cp != '\0' && *cp != ',')
5069           cp++;
5070         make_tag (start, cp - start, true,
5071                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5072       }
5073 }
5074
5075 \f
5076 /*
5077  * HTML support.
5078  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5079  * Contents of <a name=xxx> are tags with name xxx.
5080  *
5081  * Francesco Potortì, 2002.
5082  */
5083 static void
5084 HTML_labels (FILE *inf)
5085 {
5086   bool getnext = false;         /* next text outside of HTML tags is a tag */
5087   bool skiptag = false;         /* skip to the end of the current HTML tag */
5088   bool intag = false;           /* inside an html tag, looking for ID= */
5089   bool inanchor = false;        /* when INTAG, is an anchor, look for NAME= */
5090   char *end;
5091
5092
5093   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5094
5095   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5096     for (;;)                    /* loop on the same line */
5097       {
5098         if (skiptag)            /* skip HTML tag */
5099           {
5100             while (*dbp != '\0' && *dbp != '>')
5101               dbp++;
5102             if (*dbp == '>')
5103               {
5104                 dbp += 1;
5105                 skiptag = false;
5106                 continue;       /* look on the same line */
5107               }
5108             break;              /* go to next line */
5109           }
5110
5111         else if (intag) /* look for "name=" or "id=" */
5112           {
5113             while (*dbp != '\0' && *dbp != '>'
5114                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5115               dbp++;
5116             if (*dbp == '\0')
5117               break;            /* go to next line */
5118             if (*dbp == '>')
5119               {
5120                 dbp += 1;
5121                 intag = false;
5122                 continue;       /* look on the same line */
5123               }
5124             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5125                 || LOOKING_AT_NOCASE (dbp, "id="))
5126               {
5127                 bool quoted = (dbp[0] == '"');
5128
5129                 if (quoted)
5130                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5131                     continue;
5132                 else
5133                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5134                     continue;
5135                 linebuffer_setlen (&token_name, end - dbp);
5136                 memcpy (token_name.buffer, dbp, end - dbp);
5137                 token_name.buffer[end - dbp] = '\0';
5138
5139                 dbp = end;
5140                 intag = false;  /* we found what we looked for */
5141                 skiptag = true; /* skip to the end of the tag */
5142                 getnext = true; /* then grab the text */
5143                 continue;       /* look on the same line */
5144               }
5145             dbp += 1;
5146           }
5147
5148         else if (getnext)       /* grab next tokens and tag them */
5149           {
5150             dbp = skip_spaces (dbp);
5151             if (*dbp == '\0')
5152               break;            /* go to next line */
5153             if (*dbp == '<')
5154               {
5155                 intag = true;
5156                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5157                 continue;       /* look on the same line */
5158               }
5159
5160             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5161               continue;
5162             make_tag (token_name.buffer, token_name.len, true,
5163                       dbp, end - dbp, lineno, linecharno);
5164             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5165             getnext = false;
5166             break;              /* go to next line */
5167           }
5168
5169         else                    /* look for an interesting HTML tag */
5170           {
5171             while (*dbp != '\0' && *dbp != '<')
5172               dbp++;
5173             if (*dbp == '\0')
5174               break;            /* go to next line */
5175             intag = true;
5176             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5177               {
5178                 inanchor = true;
5179                 continue;       /* look on the same line */
5180               }
5181             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5182                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5183                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5184                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5185               {
5186                 intag = false;
5187                 getnext = true;
5188                 continue;       /* look on the same line */
5189               }
5190             dbp += 1;
5191           }
5192       }
5193 }
5194
5195 \f
5196 /*
5197  * Prolog support
5198  *
5199  * Assumes that the predicate or rule starts at column 0.
5200  * Only the first clause of a predicate or rule is added.
5201  * Original code by Sunichirou Sugou (1989)
5202  * Rewritten by Anders Lindgren (1996)
5203  */
5204 static size_t prolog_pr (char *, char *);
5205 static void prolog_skip_comment (linebuffer *, FILE *);
5206 static size_t prolog_atom (char *, size_t);
5207
5208 static void
5209 Prolog_functions (FILE *inf)
5210 {
5211   char *cp, *last;
5212   size_t len;
5213   size_t allocated;
5214
5215   allocated = 0;
5216   len = 0;
5217   last = NULL;
5218
5219   LOOP_ON_INPUT_LINES (inf, lb, cp)
5220     {
5221       if (cp[0] == '\0')        /* Empty line */
5222         continue;
5223       else if (iswhite (cp[0])) /* Not a predicate */
5224         continue;
5225       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5226         prolog_skip_comment (&lb, inf);
5227       else if ((len = prolog_pr (cp, last)) > 0)
5228         {
5229           /* Predicate or rule.  Store the function name so that we
5230              only generate a tag for the first clause.  */
5231           if (last == NULL)
5232             last = xnew (len + 1, char);
5233           else if (len + 1 > allocated)
5234             xrnew (last, len + 1, char);
5235           allocated = len + 1;
5236           memcpy (last, cp, len);
5237           last[len] = '\0';
5238         }
5239     }
5240   free (last);
5241 }
5242
5243
5244 static void
5245 prolog_skip_comment (linebuffer *plb, FILE *inf)
5246 {
5247   char *cp;
5248
5249   do
5250     {
5251       for (cp = plb->buffer; *cp != '\0'; cp++)
5252         if (cp[0] == '*' && cp[1] == '/')
5253           return;
5254       readline (plb, inf);
5255     }
5256   while (!feof (inf));
5257 }
5258
5259 /*
5260  * A predicate or rule definition is added if it matches:
5261  *     <beginning of line><Prolog Atom><whitespace>(
5262  * or  <beginning of line><Prolog Atom><whitespace>:-
5263  *
5264  * It is added to the tags database if it doesn't match the
5265  * name of the previous clause header.
5266  *
5267  * Return the size of the name of the predicate or rule, or 0 if no
5268  * header was found.
5269  */
5270 static size_t
5271 prolog_pr (char *s, char *last)
5272
5273                                 /* Name of last clause. */
5274 {
5275   size_t pos;
5276   size_t len;
5277
5278   pos = prolog_atom (s, 0);
5279   if (! pos)
5280     return 0;
5281
5282   len = pos;
5283   pos = skip_spaces (s + pos) - s;
5284
5285   if ((s[pos] == '.'
5286        || (s[pos] == '(' && (pos += 1))
5287        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5288       && (last == NULL          /* save only the first clause */
5289           || len != strlen (last)
5290           || !strneq (s, last, len)))
5291         {
5292           make_tag (s, len, true, s, pos, lineno, linecharno);
5293           return len;
5294         }
5295   else
5296     return 0;
5297 }
5298
5299 /*
5300  * Consume a Prolog atom.
5301  * Return the number of bytes consumed, or 0 if there was an error.
5302  *
5303  * A prolog atom, in this context, could be one of:
5304  * - An alphanumeric sequence, starting with a lower case letter.
5305  * - A quoted arbitrary string. Single quotes can escape themselves.
5306  *   Backslash quotes everything.
5307  */
5308 static size_t
5309 prolog_atom (char *s, size_t pos)
5310 {
5311   size_t origpos;
5312
5313   origpos = pos;
5314
5315   if (ISLOWER (s[pos]) || (s[pos] == '_'))
5316     {
5317       /* The atom is unquoted. */
5318       pos++;
5319       while (ISALNUM (s[pos]) || (s[pos] == '_'))
5320         {
5321           pos++;
5322         }
5323       return pos - origpos;
5324     }
5325   else if (s[pos] == '\'')
5326     {
5327       pos++;
5328
5329       for (;;)
5330         {
5331           if (s[pos] == '\'')
5332             {
5333               pos++;
5334               if (s[pos] != '\'')
5335                 break;
5336               pos++;            /* A double quote */
5337             }
5338           else if (s[pos] == '\0')
5339             /* Multiline quoted atoms are ignored. */
5340             return 0;
5341           else if (s[pos] == '\\')
5342             {
5343               if (s[pos+1] == '\0')
5344                 return 0;
5345               pos += 2;
5346             }
5347           else
5348             pos++;
5349         }
5350       return pos - origpos;
5351     }
5352   else
5353     return 0;
5354 }
5355
5356 \f
5357 /*
5358  * Support for Erlang
5359  *
5360  * Generates tags for functions, defines, and records.
5361  * Assumes that Erlang functions start at column 0.
5362  * Original code by Anders Lindgren (1996)
5363  */
5364 static int erlang_func (char *, char *);
5365 static void erlang_attribute (char *);
5366 static int erlang_atom (char *);
5367
5368 static void
5369 Erlang_functions (FILE *inf)
5370 {
5371   char *cp, *last;
5372   int len;
5373   int allocated;
5374
5375   allocated = 0;
5376   len = 0;
5377   last = NULL;
5378
5379   LOOP_ON_INPUT_LINES (inf, lb, cp)
5380     {
5381       if (cp[0] == '\0')        /* Empty line */
5382         continue;
5383       else if (iswhite (cp[0])) /* Not function nor attribute */
5384         continue;
5385       else if (cp[0] == '%')    /* comment */
5386         continue;
5387       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5388         continue;
5389       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5390         {
5391           erlang_attribute (cp);
5392           if (last != NULL)
5393             {
5394               free (last);
5395               last = NULL;
5396             }
5397         }
5398       else if ((len = erlang_func (cp, last)) > 0)
5399         {
5400           /*
5401            * Function.  Store the function name so that we only
5402            * generates a tag for the first clause.
5403            */
5404           if (last == NULL)
5405             last = xnew (len + 1, char);
5406           else if (len + 1 > allocated)
5407             xrnew (last, len + 1, char);
5408           allocated = len + 1;
5409           memcpy (last, cp, len);
5410           last[len] = '\0';
5411         }
5412     }
5413   free (last);
5414 }
5415
5416
5417 /*
5418  * A function definition is added if it matches:
5419  *     <beginning of line><Erlang Atom><whitespace>(
5420  *
5421  * It is added to the tags database if it doesn't match the
5422  * name of the previous clause header.
5423  *
5424  * Return the size of the name of the function, or 0 if no function
5425  * was found.
5426  */
5427 static int
5428 erlang_func (char *s, char *last)
5429
5430                                 /* Name of last clause. */
5431 {
5432   int pos;
5433   int len;
5434
5435   pos = erlang_atom (s);
5436   if (pos < 1)
5437     return 0;
5438
5439   len = pos;
5440   pos = skip_spaces (s + pos) - s;
5441
5442   /* Save only the first clause. */
5443   if (s[pos++] == '('
5444       && (last == NULL
5445           || len != (int)strlen (last)
5446           || !strneq (s, last, len)))
5447         {
5448           make_tag (s, len, true, s, pos, lineno, linecharno);
5449           return len;
5450         }
5451
5452   return 0;
5453 }
5454
5455
5456 /*
5457  * Handle attributes.  Currently, tags are generated for defines
5458  * and records.
5459  *
5460  * They are on the form:
5461  * -define(foo, bar).
5462  * -define(Foo(M, N), M+N).
5463  * -record(graph, {vtab = notable, cyclic = true}).
5464  */
5465 static void
5466 erlang_attribute (char *s)
5467 {
5468   char *cp = s;
5469
5470   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5471       && *cp++ == '(')
5472     {
5473       int len = erlang_atom (skip_spaces (cp));
5474       if (len > 0)
5475         make_tag (cp, len, true, s, cp + len - s, lineno, linecharno);
5476     }
5477   return;
5478 }
5479
5480
5481 /*
5482  * Consume an Erlang atom (or variable).
5483  * Return the number of bytes consumed, or -1 if there was an error.
5484  */
5485 static int
5486 erlang_atom (char *s)
5487 {
5488   int pos = 0;
5489
5490   if (ISALPHA (s[pos]) || s[pos] == '_')
5491     {
5492       /* The atom is unquoted. */
5493       do
5494         pos++;
5495       while (ISALNUM (s[pos]) || s[pos] == '_');
5496     }
5497   else if (s[pos] == '\'')
5498     {
5499       for (pos++; s[pos] != '\''; pos++)
5500         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5501             || (s[pos] == '\\' && s[++pos] == '\0'))
5502           return 0;
5503       pos++;
5504     }
5505
5506   return pos;
5507 }
5508
5509 \f
5510 static char *scan_separators (char *);
5511 static void add_regex (char *, language *);
5512 static char *substitute (char *, char *, struct re_registers *);
5513
5514 /*
5515  * Take a string like "/blah/" and turn it into "blah", verifying
5516  * that the first and last characters are the same, and handling
5517  * quoted separator characters.  Actually, stops on the occurrence of
5518  * an unquoted separator.  Also process \t, \n, etc. and turn into
5519  * appropriate characters. Works in place.  Null terminates name string.
5520  * Returns pointer to terminating separator, or NULL for
5521  * unterminated regexps.
5522  */
5523 static char *
5524 scan_separators (char *name)
5525 {
5526   char sep = name[0];
5527   char *copyto = name;
5528   bool quoted = false;
5529
5530   for (++name; *name != '\0'; ++name)
5531     {
5532       if (quoted)
5533         {
5534           switch (*name)
5535             {
5536             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5537             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5538             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5539             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5540             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5541             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5542             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5543             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5544             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5545             default:
5546               if (*name == sep)
5547                 *copyto++ = sep;
5548               else
5549                 {
5550                   /* Something else is quoted, so preserve the quote. */
5551                   *copyto++ = '\\';
5552                   *copyto++ = *name;
5553                 }
5554               break;
5555             }
5556           quoted = false;
5557         }
5558       else if (*name == '\\')
5559         quoted = true;
5560       else if (*name == sep)
5561         break;
5562       else
5563         *copyto++ = *name;
5564     }
5565   if (*name != sep)
5566     name = NULL;                /* signal unterminated regexp */
5567
5568   /* Terminate copied string. */
5569   *copyto = '\0';
5570   return name;
5571 }
5572
5573 /* Look at the argument of --regex or --no-regex and do the right
5574    thing.  Same for each line of a regexp file. */
5575 static void
5576 analyse_regex (char *regex_arg)
5577 {
5578   if (regex_arg == NULL)
5579     {
5580       free_regexps ();          /* --no-regex: remove existing regexps */
5581       return;
5582     }
5583
5584   /* A real --regexp option or a line in a regexp file. */
5585   switch (regex_arg[0])
5586     {
5587       /* Comments in regexp file or null arg to --regex. */
5588     case '\0':
5589     case ' ':
5590     case '\t':
5591       break;
5592
5593       /* Read a regex file.  This is recursive and may result in a
5594          loop, which will stop when the file descriptors are exhausted. */
5595     case '@':
5596       {
5597         FILE *regexfp;
5598         linebuffer regexbuf;
5599         char *regexfile = regex_arg + 1;
5600
5601         /* regexfile is a file containing regexps, one per line. */
5602         regexfp = fopen (regexfile, "rb");
5603         if (regexfp == NULL)
5604           pfatal (regexfile);
5605         linebuffer_init (&regexbuf);
5606         while (readline_internal (&regexbuf, regexfp) > 0)
5607           analyse_regex (regexbuf.buffer);
5608         free (regexbuf.buffer);
5609         fclose (regexfp);
5610       }
5611       break;
5612
5613       /* Regexp to be used for a specific language only. */
5614     case '{':
5615       {
5616         language *lang;
5617         char *lang_name = regex_arg + 1;
5618         char *cp;
5619
5620         for (cp = lang_name; *cp != '}'; cp++)
5621           if (*cp == '\0')
5622             {
5623               error ("unterminated language name in regex: %s", regex_arg);
5624               return;
5625             }
5626         *cp++ = '\0';
5627         lang = get_language_from_langname (lang_name);
5628         if (lang == NULL)
5629           return;
5630         add_regex (cp, lang);
5631       }
5632       break;
5633
5634       /* Regexp to be used for any language. */
5635     default:
5636       add_regex (regex_arg, NULL);
5637       break;
5638     }
5639 }
5640
5641 /* Separate the regexp pattern, compile it,
5642    and care for optional name and modifiers. */
5643 static void
5644 add_regex (char *regexp_pattern, language *lang)
5645 {
5646   static struct re_pattern_buffer zeropattern;
5647   char sep, *pat, *name, *modifiers;
5648   char empty = '\0';
5649   const char *err;
5650   struct re_pattern_buffer *patbuf;
5651   regexp *rp;
5652   bool
5653     force_explicit_name = true, /* do not use implicit tag names */
5654     ignore_case = false,        /* case is significant */
5655     multi_line = false,         /* matches are done one line at a time */
5656     single_line = false;        /* dot does not match newline */
5657
5658
5659   if (strlen (regexp_pattern) < 3)
5660     {
5661       error ("null regexp");
5662       return;
5663     }
5664   sep = regexp_pattern[0];
5665   name = scan_separators (regexp_pattern);
5666   if (name == NULL)
5667     {
5668       error ("%s: unterminated regexp", regexp_pattern);
5669       return;
5670     }
5671   if (name[1] == sep)
5672     {
5673       error ("null name for regexp \"%s\"", regexp_pattern);
5674       return;
5675     }
5676   modifiers = scan_separators (name);
5677   if (modifiers == NULL)        /* no terminating separator --> no name */
5678     {
5679       modifiers = name;
5680       name = &empty;
5681     }
5682   else
5683     modifiers += 1;             /* skip separator */
5684
5685   /* Parse regex modifiers. */
5686   for (; modifiers[0] != '\0'; modifiers++)
5687     switch (modifiers[0])
5688       {
5689       case 'N':
5690         if (modifiers == name)
5691           error ("forcing explicit tag name but no name, ignoring");
5692         force_explicit_name = true;
5693         break;
5694       case 'i':
5695         ignore_case = true;
5696         break;
5697       case 's':
5698         single_line = true;
5699         /* FALLTHRU */
5700       case 'm':
5701         multi_line = true;
5702         need_filebuf = true;
5703         break;
5704       default:
5705         error ("invalid regexp modifier `%c', ignoring", modifiers[0]);
5706         break;
5707       }
5708
5709   patbuf = xnew (1, struct re_pattern_buffer);
5710   *patbuf = zeropattern;
5711   if (ignore_case)
5712     {
5713       static char lc_trans[CHARS];
5714       int i;
5715       for (i = 0; i < CHARS; i++)
5716         lc_trans[i] = lowcase (i);
5717       patbuf->translate = lc_trans;     /* translation table to fold case  */
5718     }
5719
5720   if (multi_line)
5721     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5722   else
5723     pat = regexp_pattern;
5724
5725   if (single_line)
5726     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5727   else
5728     re_set_syntax (RE_SYNTAX_EMACS);
5729
5730   err = re_compile_pattern (pat, strlen (pat), patbuf);
5731   if (multi_line)
5732     free (pat);
5733   if (err != NULL)
5734     {
5735       error ("%s while compiling pattern", err);
5736       return;
5737     }
5738
5739   rp = p_head;
5740   p_head = xnew (1, regexp);
5741   p_head->pattern = savestr (regexp_pattern);
5742   p_head->p_next = rp;
5743   p_head->lang = lang;
5744   p_head->pat = patbuf;
5745   p_head->name = savestr (name);
5746   p_head->error_signaled = false;
5747   p_head->force_explicit_name = force_explicit_name;
5748   p_head->ignore_case = ignore_case;
5749   p_head->multi_line = multi_line;
5750 }
5751
5752 /*
5753  * Do the substitutions indicated by the regular expression and
5754  * arguments.
5755  */
5756 static char *
5757 substitute (char *in, char *out, struct re_registers *regs)
5758 {
5759   char *result, *t;
5760   int size, dig, diglen;
5761
5762   result = NULL;
5763   size = strlen (out);
5764
5765   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5766   if (out[size - 1] == '\\')
5767     fatal ("pattern error in \"%s\"", out);
5768   for (t = strchr (out, '\\');
5769        t != NULL;
5770        t = strchr (t + 2, '\\'))
5771     if (ISDIGIT (t[1]))
5772       {
5773         dig = t[1] - '0';
5774         diglen = regs->end[dig] - regs->start[dig];
5775         size += diglen - 2;
5776       }
5777     else
5778       size -= 1;
5779
5780   /* Allocate space and do the substitutions. */
5781   assert (size >= 0);
5782   result = xnew (size + 1, char);
5783
5784   for (t = result; *out != '\0'; out++)
5785     if (*out == '\\' && ISDIGIT (*++out))
5786       {
5787         dig = *out - '0';
5788         diglen = regs->end[dig] - regs->start[dig];
5789         memcpy (t, in + regs->start[dig], diglen);
5790         t += diglen;
5791       }
5792     else
5793       *t++ = *out;
5794   *t = '\0';
5795
5796   assert (t <= result + size);
5797   assert (t - result == (int)strlen (result));
5798
5799   return result;
5800 }
5801
5802 /* Deallocate all regexps. */
5803 static void
5804 free_regexps (void)
5805 {
5806   regexp *rp;
5807   while (p_head != NULL)
5808     {
5809       rp = p_head->p_next;
5810       free (p_head->pattern);
5811       free (p_head->name);
5812       free (p_head);
5813       p_head = rp;
5814     }
5815   return;
5816 }
5817
5818 /*
5819  * Reads the whole file as a single string from `filebuf' and looks for
5820  * multi-line regular expressions, creating tags on matches.
5821  * readline already dealt with normal regexps.
5822  *
5823  * Idea by Ben Wing <ben@666.com> (2002).
5824  */
5825 static void
5826 regex_tag_multiline (void)
5827 {
5828   char *buffer = filebuf.buffer;
5829   regexp *rp;
5830   char *name;
5831
5832   for (rp = p_head; rp != NULL; rp = rp->p_next)
5833     {
5834       int match = 0;
5835
5836       if (!rp->multi_line)
5837         continue;               /* skip normal regexps */
5838
5839       /* Generic initializations before parsing file from memory. */
5840       lineno = 1;               /* reset global line number */
5841       charno = 0;               /* reset global char number */
5842       linecharno = 0;           /* reset global char number of line start */
5843
5844       /* Only use generic regexps or those for the current language. */
5845       if (rp->lang != NULL && rp->lang != curfdp->lang)
5846         continue;
5847
5848       while (match >= 0 && match < filebuf.len)
5849         {
5850           match = re_search (rp->pat, buffer, filebuf.len, charno,
5851                              filebuf.len - match, &rp->regs);
5852           switch (match)
5853             {
5854             case -2:
5855               /* Some error. */
5856               if (!rp->error_signaled)
5857                 {
5858                   error ("regexp stack overflow while matching \"%s\"",
5859                          rp->pattern);
5860                   rp->error_signaled = true;
5861                 }
5862               break;
5863             case -1:
5864               /* No match. */
5865               break;
5866             default:
5867               if (match == rp->regs.end[0])
5868                 {
5869                   if (!rp->error_signaled)
5870                     {
5871                       error ("regexp matches the empty string: \"%s\"",
5872                              rp->pattern);
5873                       rp->error_signaled = true;
5874                     }
5875                   match = -3;   /* exit from while loop */
5876                   break;
5877                 }
5878
5879               /* Match occurred.  Construct a tag. */
5880               while (charno < rp->regs.end[0])
5881                 if (buffer[charno++] == '\n')
5882                   lineno++, linecharno = charno;
5883               name = rp->name;
5884               if (name[0] == '\0')
5885                 name = NULL;
5886               else /* make a named tag */
5887                 name = substitute (buffer, rp->name, &rp->regs);
5888               if (rp->force_explicit_name)
5889                 /* Force explicit tag name, if a name is there. */
5890                 pfnote (name, true, buffer + linecharno,
5891                         charno - linecharno + 1, lineno, linecharno);
5892               else
5893                 make_tag (name, strlen (name), true, buffer + linecharno,
5894                           charno - linecharno + 1, lineno, linecharno);
5895               break;
5896             }
5897         }
5898     }
5899 }
5900
5901 \f
5902 static bool
5903 nocase_tail (const char *cp)
5904 {
5905   register int len = 0;
5906
5907   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5908     cp++, len++;
5909   if (*cp == '\0' && !intoken (dbp[len]))
5910     {
5911       dbp += len;
5912       return true;
5913     }
5914   return false;
5915 }
5916
5917 static void
5918 get_tag (register char *bp, char **namepp)
5919 {
5920   register char *cp = bp;
5921
5922   if (*bp != '\0')
5923     {
5924       /* Go till you get to white space or a syntactic break */
5925       for (cp = bp + 1; !notinname (*cp); cp++)
5926         continue;
5927       make_tag (bp, cp - bp, true,
5928                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5929     }
5930
5931   if (namepp != NULL)
5932     *namepp = savenstr (bp, cp - bp);
5933 }
5934
5935 /*
5936  * Read a line of text from `stream' into `lbp', excluding the
5937  * newline or CR-NL, if any.  Return the number of characters read from
5938  * `stream', which is the length of the line including the newline.
5939  *
5940  * On DOS or Windows we do not count the CR character, if any before the
5941  * NL, in the returned length; this mirrors the behavior of Emacs on those
5942  * platforms (for text files, it translates CR-NL to NL as it reads in the
5943  * file).
5944  *
5945  * If multi-line regular expressions are requested, each line read is
5946  * appended to `filebuf'.
5947  */
5948 static long
5949 readline_internal (linebuffer *lbp, register FILE *stream)
5950 {
5951   char *buffer = lbp->buffer;
5952   register char *p = lbp->buffer;
5953   register char *pend;
5954   int chars_deleted;
5955
5956   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
5957
5958   for (;;)
5959     {
5960       register int c = getc (stream);
5961       if (p == pend)
5962         {
5963           /* We're at the end of linebuffer: expand it. */
5964           lbp->size *= 2;
5965           xrnew (buffer, lbp->size, char);
5966           p += buffer - lbp->buffer;
5967           pend = buffer + lbp->size;
5968           lbp->buffer = buffer;
5969         }
5970       if (c == EOF)
5971         {
5972           *p = '\0';
5973           chars_deleted = 0;
5974           break;
5975         }
5976       if (c == '\n')
5977         {
5978           if (p > buffer && p[-1] == '\r')
5979             {
5980               p -= 1;
5981 #ifdef DOS_NT
5982              /* Assume CRLF->LF translation will be performed by Emacs
5983                 when loading this file, so CRs won't appear in the buffer.
5984                 It would be cleaner to compensate within Emacs;
5985                 however, Emacs does not know how many CRs were deleted
5986                 before any given point in the file.  */
5987               chars_deleted = 1;
5988 #else
5989               chars_deleted = 2;
5990 #endif
5991             }
5992           else
5993             {
5994               chars_deleted = 1;
5995             }
5996           *p = '\0';
5997           break;
5998         }
5999       *p++ = c;
6000     }
6001   lbp->len = p - buffer;
6002
6003   if (need_filebuf              /* we need filebuf for multi-line regexps */
6004       && chars_deleted > 0)     /* not at EOF */
6005     {
6006       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6007         {
6008           /* Expand filebuf. */
6009           filebuf.size *= 2;
6010           xrnew (filebuf.buffer, filebuf.size, char);
6011         }
6012       memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6013       filebuf.len += lbp->len;
6014       filebuf.buffer[filebuf.len++] = '\n';
6015       filebuf.buffer[filebuf.len] = '\0';
6016     }
6017
6018   return lbp->len + chars_deleted;
6019 }
6020
6021 /*
6022  * Like readline_internal, above, but in addition try to match the
6023  * input line against relevant regular expressions and manage #line
6024  * directives.
6025  */
6026 static void
6027 readline (linebuffer *lbp, FILE *stream)
6028 {
6029   long result;
6030
6031   linecharno = charno;          /* update global char number of line start */
6032   result = readline_internal (lbp, stream); /* read line */
6033   lineno += 1;                  /* increment global line number */
6034   charno += result;             /* increment global char number */
6035
6036   /* Honor #line directives. */
6037   if (!no_line_directive)
6038     {
6039       static bool discard_until_line_directive;
6040
6041       /* Check whether this is a #line directive. */
6042       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6043         {
6044           unsigned int lno;
6045           int start = 0;
6046
6047           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6048               && start > 0)     /* double quote character found */
6049             {
6050               char *endp = lbp->buffer + start;
6051
6052               while ((endp = strchr (endp, '"')) != NULL
6053                      && endp[-1] == '\\')
6054                 endp++;
6055               if (endp != NULL)
6056                 /* Ok, this is a real #line directive.  Let's deal with it. */
6057                 {
6058                   char *taggedabsname;  /* absolute name of original file */
6059                   char *taggedfname;    /* name of original file as given */
6060                   char *name;           /* temp var */
6061
6062                   discard_until_line_directive = false; /* found it */
6063                   name = lbp->buffer + start;
6064                   *endp = '\0';
6065                   canonicalize_filename (name);
6066                   taggedabsname = absolute_filename (name, tagfiledir);
6067                   if (filename_is_absolute (name)
6068                       || filename_is_absolute (curfdp->infname))
6069                     taggedfname = savestr (taggedabsname);
6070                   else
6071                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6072
6073                   if (streq (curfdp->taggedfname, taggedfname))
6074                     /* The #line directive is only a line number change.  We
6075                        deal with this afterwards. */
6076                     free (taggedfname);
6077                   else
6078                     /* The tags following this #line directive should be
6079                        attributed to taggedfname.  In order to do this, set
6080                        curfdp accordingly. */
6081                     {
6082                       fdesc *fdp; /* file description pointer */
6083
6084                       /* Go look for a file description already set up for the
6085                          file indicated in the #line directive.  If there is
6086                          one, use it from now until the next #line
6087                          directive. */
6088                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6089                         if (streq (fdp->infname, curfdp->infname)
6090                             && streq (fdp->taggedfname, taggedfname))
6091                           /* If we remove the second test above (after the &&)
6092                              then all entries pertaining to the same file are
6093                              coalesced in the tags file.  If we use it, then
6094                              entries pertaining to the same file but generated
6095                              from different files (via #line directives) will
6096                              go into separate sections in the tags file.  These
6097                              alternatives look equivalent.  The first one
6098                              destroys some apparently useless information. */
6099                           {
6100                             curfdp = fdp;
6101                             free (taggedfname);
6102                             break;
6103                           }
6104                       /* Else, if we already tagged the real file, skip all
6105                          input lines until the next #line directive. */
6106                       if (fdp == NULL) /* not found */
6107                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6108                           if (streq (fdp->infabsname, taggedabsname))
6109                             {
6110                               discard_until_line_directive = true;
6111                               free (taggedfname);
6112                               break;
6113                             }
6114                       /* Else create a new file description and use that from
6115                          now on, until the next #line directive. */
6116                       if (fdp == NULL) /* not found */
6117                         {
6118                           fdp = fdhead;
6119                           fdhead = xnew (1, fdesc);
6120                           *fdhead = *curfdp; /* copy curr. file description */
6121                           fdhead->next = fdp;
6122                           fdhead->infname = savestr (curfdp->infname);
6123                           fdhead->infabsname = savestr (curfdp->infabsname);
6124                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6125                           fdhead->taggedfname = taggedfname;
6126                           fdhead->usecharno = false;
6127                           fdhead->prop = NULL;
6128                           fdhead->written = false;
6129                           curfdp = fdhead;
6130                         }
6131                     }
6132                   free (taggedabsname);
6133                   lineno = lno - 1;
6134                   readline (lbp, stream);
6135                   return;
6136                 } /* if a real #line directive */
6137             } /* if #line is followed by a number */
6138         } /* if line begins with "#line " */
6139
6140       /* If we are here, no #line directive was found. */
6141       if (discard_until_line_directive)
6142         {
6143           if (result > 0)
6144             {
6145               /* Do a tail recursion on ourselves, thus discarding the contents
6146                  of the line buffer. */
6147               readline (lbp, stream);
6148               return;
6149             }
6150           /* End of file. */
6151           discard_until_line_directive = false;
6152           return;
6153         }
6154     } /* if #line directives should be considered */
6155
6156   {
6157     int match;
6158     regexp *rp;
6159     char *name;
6160
6161     /* Match against relevant regexps. */
6162     if (lbp->len > 0)
6163       for (rp = p_head; rp != NULL; rp = rp->p_next)
6164         {
6165           /* Only use generic regexps or those for the current language.
6166              Also do not use multiline regexps, which is the job of
6167              regex_tag_multiline. */
6168           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6169               || rp->multi_line)
6170             continue;
6171
6172           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6173           switch (match)
6174             {
6175             case -2:
6176               /* Some error. */
6177               if (!rp->error_signaled)
6178                 {
6179                   error ("regexp stack overflow while matching \"%s\"",
6180                          rp->pattern);
6181                   rp->error_signaled = true;
6182                 }
6183               break;
6184             case -1:
6185               /* No match. */
6186               break;
6187             case 0:
6188               /* Empty string matched. */
6189               if (!rp->error_signaled)
6190                 {
6191                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6192                   rp->error_signaled = true;
6193                 }
6194               break;
6195             default:
6196               /* Match occurred.  Construct a tag. */
6197               name = rp->name;
6198               if (name[0] == '\0')
6199                 name = NULL;
6200               else /* make a named tag */
6201                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6202               if (rp->force_explicit_name)
6203                 /* Force explicit tag name, if a name is there. */
6204                 pfnote (name, true, lbp->buffer, match, lineno, linecharno);
6205               else
6206                 make_tag (name, strlen (name), true,
6207                           lbp->buffer, match, lineno, linecharno);
6208               break;
6209             }
6210         }
6211   }
6212 }
6213
6214 \f
6215 /*
6216  * Return a pointer to a space of size strlen(cp)+1 allocated
6217  * with xnew where the string CP has been copied.
6218  */
6219 static char *
6220 savestr (const char *cp)
6221 {
6222   return savenstr (cp, strlen (cp));
6223 }
6224
6225 /*
6226  * Return a pointer to a space of size LEN+1 allocated with xnew where
6227  * the string CP has been copied for at most the first LEN characters.
6228  */
6229 static char *
6230 savenstr (const char *cp, int len)
6231 {
6232   char *dp = xnew (len + 1, char);
6233   dp[len] = '\0';
6234   return memcpy (dp, cp, len);
6235 }
6236
6237 /* Skip spaces (end of string is not space), return new pointer. */
6238 static char *
6239 skip_spaces (char *cp)
6240 {
6241   while (iswhite (*cp))
6242     cp++;
6243   return cp;
6244 }
6245
6246 /* Skip non spaces, except end of string, return new pointer. */
6247 static char *
6248 skip_non_spaces (char *cp)
6249 {
6250   while (*cp != '\0' && !iswhite (*cp))
6251     cp++;
6252   return cp;
6253 }
6254
6255 /* Skip any chars in the "name" class.*/
6256 static char *
6257 skip_name (char *cp)
6258 {
6259   /* '\0' is a notinname() so loop stops there too */
6260   while (! notinname (*cp))
6261     cp++;
6262   return cp;
6263 }
6264
6265 /* Print error message and exit.  */
6266 void
6267 fatal (const char *s1, const char *s2)
6268 {
6269   error (s1, s2);
6270   exit (EXIT_FAILURE);
6271 }
6272
6273 static void
6274 pfatal (const char *s1)
6275 {
6276   perror (s1);
6277   exit (EXIT_FAILURE);
6278 }
6279
6280 static void
6281 suggest_asking_for_help (void)
6282 {
6283   fprintf (stderr, "\tTry `%s --help' for a complete list of options.\n",
6284            progname);
6285   exit (EXIT_FAILURE);
6286 }
6287
6288 /* Output a diagnostic with printf-style FORMAT and args.  */
6289 static void
6290 error (const char *format, ...)
6291 {
6292   va_list ap;
6293   va_start (ap, format);
6294   fprintf (stderr, "%s: ", progname);
6295   vfprintf (stderr, format, ap);
6296   fprintf (stderr, "\n");
6297   va_end (ap);
6298 }
6299
6300 /* Return a newly-allocated string whose contents
6301    concatenate those of s1, s2, s3.  */
6302 static char *
6303 concat (const char *s1, const char *s2, const char *s3)
6304 {
6305   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6306   char *result = xnew (len1 + len2 + len3 + 1, char);
6307
6308   strcpy (result, s1);
6309   strcpy (result + len1, s2);
6310   strcpy (result + len1 + len2, s3);
6311
6312   return result;
6313 }
6314
6315 \f
6316 /* Does the same work as the system V getcwd, but does not need to
6317    guess the buffer size in advance. */
6318 static char *
6319 etags_getcwd (void)
6320 {
6321   int bufsize = 200;
6322   char *path = xnew (bufsize, char);
6323
6324   while (getcwd (path, bufsize) == NULL)
6325     {
6326       if (errno != ERANGE)
6327         pfatal ("getcwd");
6328       bufsize *= 2;
6329       free (path);
6330       path = xnew (bufsize, char);
6331     }
6332
6333   canonicalize_filename (path);
6334   return path;
6335 }
6336
6337 /* Return a newly allocated string containing the file name of FILE
6338    relative to the absolute directory DIR (which should end with a slash). */
6339 static char *
6340 relative_filename (char *file, char *dir)
6341 {
6342   char *fp, *dp, *afn, *res;
6343   int i;
6344
6345   /* Find the common root of file and dir (with a trailing slash). */
6346   afn = absolute_filename (file, cwd);
6347   fp = afn;
6348   dp = dir;
6349   while (*fp++ == *dp++)
6350     continue;
6351   fp--, dp--;                   /* back to the first differing char */
6352 #ifdef DOS_NT
6353   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6354     return afn;
6355 #endif
6356   do                            /* look at the equal chars until '/' */
6357     fp--, dp--;
6358   while (*fp != '/');
6359
6360   /* Build a sequence of "../" strings for the resulting relative file name. */
6361   i = 0;
6362   while ((dp = strchr (dp + 1, '/')) != NULL)
6363     i += 1;
6364   res = xnew (3*i + strlen (fp + 1) + 1, char);
6365   res[0] = '\0';
6366   while (i-- > 0)
6367     strcat (res, "../");
6368
6369   /* Add the file name relative to the common root of file and dir. */
6370   strcat (res, fp + 1);
6371   free (afn);
6372
6373   return res;
6374 }
6375
6376 /* Return a newly allocated string containing the absolute file name
6377    of FILE given DIR (which should end with a slash). */
6378 static char *
6379 absolute_filename (char *file, char *dir)
6380 {
6381   char *slashp, *cp, *res;
6382
6383   if (filename_is_absolute (file))
6384     res = savestr (file);
6385 #ifdef DOS_NT
6386   /* We don't support non-absolute file names with a drive
6387      letter, like `d:NAME' (it's too much hassle).  */
6388   else if (file[1] == ':')
6389     fatal ("%s: relative file names with drive letters not supported", file);
6390 #endif
6391   else
6392     res = concat (dir, file, "");
6393
6394   /* Delete the "/dirname/.." and "/." substrings. */
6395   slashp = strchr (res, '/');
6396   while (slashp != NULL && slashp[0] != '\0')
6397     {
6398       if (slashp[1] == '.')
6399         {
6400           if (slashp[2] == '.'
6401               && (slashp[3] == '/' || slashp[3] == '\0'))
6402             {
6403               cp = slashp;
6404               do
6405                 cp--;
6406               while (cp >= res && !filename_is_absolute (cp));
6407               if (cp < res)
6408                 cp = slashp;    /* the absolute name begins with "/.." */
6409 #ifdef DOS_NT
6410               /* Under MSDOS and NT we get `d:/NAME' as absolute
6411                  file name, so the luser could say `d:/../NAME'.
6412                  We silently treat this as `d:/NAME'.  */
6413               else if (cp[0] != '/')
6414                 cp = slashp;
6415 #endif
6416               memmove (cp, slashp + 3, strlen (slashp + 2));
6417               slashp = cp;
6418               continue;
6419             }
6420           else if (slashp[2] == '/' || slashp[2] == '\0')
6421             {
6422               memmove (slashp, slashp + 2, strlen (slashp + 1));
6423               continue;
6424             }
6425         }
6426
6427       slashp = strchr (slashp + 1, '/');
6428     }
6429
6430   if (res[0] == '\0')           /* just a safety net: should never happen */
6431     {
6432       free (res);
6433       return savestr ("/");
6434     }
6435   else
6436     return res;
6437 }
6438
6439 /* Return a newly allocated string containing the absolute
6440    file name of dir where FILE resides given DIR (which should
6441    end with a slash). */
6442 static char *
6443 absolute_dirname (char *file, char *dir)
6444 {
6445   char *slashp, *res;
6446   char save;
6447
6448   slashp = strrchr (file, '/');
6449   if (slashp == NULL)
6450     return savestr (dir);
6451   save = slashp[1];
6452   slashp[1] = '\0';
6453   res = absolute_filename (file, dir);
6454   slashp[1] = save;
6455
6456   return res;
6457 }
6458
6459 /* Whether the argument string is an absolute file name.  The argument
6460    string must have been canonicalized with canonicalize_filename. */
6461 static bool
6462 filename_is_absolute (char *fn)
6463 {
6464   return (fn[0] == '/'
6465 #ifdef DOS_NT
6466           || (ISALPHA (fn[0]) && fn[1] == ':' && fn[2] == '/')
6467 #endif
6468           );
6469 }
6470
6471 /* Downcase DOS drive letter and collapse separators into single slashes.
6472    Works in place. */
6473 static void
6474 canonicalize_filename (register char *fn)
6475 {
6476   register char* cp;
6477   char sep = '/';
6478
6479 #ifdef DOS_NT
6480   /* Canonicalize drive letter case.  */
6481 # define ISUPPER(c)     isupper (CHAR (c))
6482   if (fn[0] != '\0' && fn[1] == ':' && ISUPPER (fn[0]))
6483     fn[0] = lowcase (fn[0]);
6484
6485   sep = '\\';
6486 #endif
6487
6488   /* Collapse multiple separators into a single slash. */
6489   for (cp = fn; *cp != '\0'; cp++, fn++)
6490     if (*cp == sep)
6491       {
6492         *fn = '/';
6493         while (cp[1] == sep)
6494           cp++;
6495       }
6496     else
6497       *fn = *cp;
6498   *fn = '\0';
6499 }
6500
6501 \f
6502 /* Initialize a linebuffer for use. */
6503 static void
6504 linebuffer_init (linebuffer *lbp)
6505 {
6506   lbp->size = (DEBUG) ? 3 : 200;
6507   lbp->buffer = xnew (lbp->size, char);
6508   lbp->buffer[0] = '\0';
6509   lbp->len = 0;
6510 }
6511
6512 /* Set the minimum size of a string contained in a linebuffer. */
6513 static void
6514 linebuffer_setlen (linebuffer *lbp, int toksize)
6515 {
6516   while (lbp->size <= toksize)
6517     {
6518       lbp->size *= 2;
6519       xrnew (lbp->buffer, lbp->size, char);
6520     }
6521   lbp->len = toksize;
6522 }
6523
6524 /* Like malloc but get fatal error if memory is exhausted. */
6525 static void *
6526 xmalloc (size_t size)
6527 {
6528   void *result = malloc (size);
6529   if (result == NULL)
6530     fatal ("virtual memory exhausted", (char *)NULL);
6531   return result;
6532 }
6533
6534 static void *
6535 xrealloc (char *ptr, size_t size)
6536 {
6537   void *result = realloc (ptr, size);
6538   if (result == NULL)
6539     fatal ("virtual memory exhausted", (char *)NULL);
6540   return result;
6541 }
6542
6543 /*
6544  * Local Variables:
6545  * indent-tabs-mode: t
6546  * tab-width: 8
6547  * fill-column: 79
6548  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6549  * c-file-style: "gnu"
6550  * End:
6551  */
6552
6553 /* etags.c ends here */