code.delx.au - gnu-emacs/blob - lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: utf-8 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2014 Free Software
  32 Foundation, Inc.
  33
  34 This file is not considered part of GNU Emacs.
  35
  36 This program is free software: you can redistribute it and/or modify
  37 it under the terms of the GNU General Public License as published by
  38 the Free Software Foundation, either version 3 of the License, or
  39 (at your option) any later version.
  40
  41 This program is distributed in the hope that it will be useful,
  42 but WITHOUT ANY WARRANTY; without even the implied warranty of
  43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  44 GNU General Public License for more details.
  45
  46 You should have received a copy of the GNU General Public License
  47 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  48
  49
  50 /* NB To comply with the above BSD license, copyright information is
  51 reproduced in etc/ETAGS.README.  That file should be updated when the
  52 above notices are.
  53
  54 To the best of our knowledge, this code was originally based on the
  55 ctags.c distributed with BSD4.2, which was copyrighted by the
  56 University of California, as described above. */
  57
  58
  59 /*
  60  * Authors:
  61  * 1983 Ctags originally by Ken Arnold.
  62  * 1984 Fortran added by Jim Kleckner.
  63  * 1984 Ed Pelegri-Llopart added C typedefs.
  64  * 1985 Emacs TAGS format by Richard Stallman.
  65  * 1989 Sam Kendall added C++.
  66  * 1992 Joseph B. Wells improved C and C++ parsing.
  67  * 1993 Francesco Potortì reorganized C and C++.
  68  * 1994 Line-by-line regexp tags by Tom Tromey.
  69  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  70  * 2002 #line directives by Francesco Potortì.
  71  *
  72  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  73  */
  74
  75 /*
  76  * If you want to add support for a new language, start by looking at the LUA
  77  * language, which is the simplest.  Alternatively, consider distributing etags
  78  * together with a configuration file containing regexp definitions for etags.
  79  */
  80
  81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
  82
  83 #ifdef DEBUG
  84 #  undef DEBUG
  85 #  define DEBUG true
  86 #else
  87 #  define DEBUG  false
  88 #  define NDEBUG                /* disable assert */
  89 #endif
  90
  91 #include <config.h>
  92
  93 #ifndef _GNU_SOURCE
  94 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  95 #endif
  96
  97 /* WIN32_NATIVE is for XEmacs.
  98    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
  99 #ifdef WIN32_NATIVE
 100 # undef MSDOS
 101 # undef  WINDOWSNT
 102 # define WINDOWSNT
 103 #endif /* WIN32_NATIVE */
 104
 105 #ifdef MSDOS
 106 # undef MSDOS
 107 # define MSDOS true
 108 # include <sys/param.h>
 109 #else
 110 # define MSDOS false
 111 #endif /* MSDOS */
 112
 113 #ifdef WINDOWSNT
 114 # include <direct.h>
 115 # define MAXPATHLEN _MAX_PATH
 116 # undef HAVE_NTGUI
 117 # undef  DOS_NT
 118 # define DOS_NT
 119 #endif /* WINDOWSNT */
 120
 121 #include <unistd.h>
 122 #include <stdarg.h>
 123 #include <stdlib.h>
 124 #include <string.h>
 125 #include <stdio.h>
 126 #include <ctype.h>
 127 #include <errno.h>
 128 #include <sys/types.h>
 129 #include <sys/stat.h>
 130 #include <binary-io.h>
 131 #include <c-strcase.h>
 132
 133 #include <assert.h>
 134 #ifdef NDEBUG
 135 # undef  assert                 /* some systems have a buggy assert.h */
 136 # define assert(x) ((void) 0)
 137 #endif
 138
 139 #include <getopt.h>
 140 #include <regex.h>
 141
 142 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 143  Leave it undefined to make the program "etags", which makes emacs-style
 144  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 145 #ifdef CTAGS
 146 # undef  CTAGS
 147 # define CTAGS true
 148 #else
 149 # define CTAGS false
 150 #endif
 151
 152 #define streq(s,t)      (assert ((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 153 #define strcaseeq(s,t)  (assert ((s)!=NULL && (t)!=NULL), !c_strcasecmp (s, t))
 154 #define strneq(s,t,n)   (assert ((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 155 #define strncaseeq(s,t,n) (assert ((s)!=NULL && (t)!=NULL), !c_strncasecmp (s, t, n))
 156
 157 #define CHARS 256               /* 2^sizeof(char) */
 158 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 159 #define iswhite(c)      (_wht[CHAR (c)]) /* c is white (see white) */
 160 #define notinname(c)    (_nin[CHAR (c)]) /* c is not in a name (see nonam) */
 161 #define begtoken(c)     (_btk[CHAR (c)]) /* c can start token (see begtk) */
 162 #define intoken(c)      (_itk[CHAR (c)]) /* c can be in token (see midtk) */
 163 #define endtoken(c)     (_etk[CHAR (c)]) /* c ends tokens (see endtk) */
 164
 165 #define ISALNUM(c)      isalnum (CHAR (c))
 166 #define ISALPHA(c)      isalpha (CHAR (c))
 167 #define ISDIGIT(c)      isdigit (CHAR (c))
 168 #define ISLOWER(c)      islower (CHAR (c))
 169
 170 #define lowcase(c)      tolower (CHAR (c))
 171
 172
 173 /*
 174  *      xnew, xrnew -- allocate, reallocate storage
 175  *
 176  * SYNOPSIS:    Type *xnew (int n, Type);
 177  *              void xrnew (OldPointer, int n, Type);
 178  */
 179 #if DEBUG
 180 # include "chkmalloc.h"
 181 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 182                                                   (n) * sizeof (Type)))
 183 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 184                                         (char *) (op), (n) * sizeof (Type)))
 185 #else
 186 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 187 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 188                                         (char *) (op), (n) * sizeof (Type)))
 189 #endif
 190
 191 typedef void Lang_function (FILE *);
 192
 193 typedef struct
 194 {
 195   const char *suffix;           /* file name suffix for this compressor */
 196   const char *command;          /* takes one arg and decompresses to stdout */
 197 } compressor;
 198
 199 typedef struct
 200 {
 201   const char *name;             /* language name */
 202   const char *help;             /* detailed help for the language */
 203   Lang_function *function;      /* parse function */
 204   const char **suffixes;        /* name suffixes of this language's files */
 205   const char **filenames;       /* names of this language's files */
 206   const char **interpreters;    /* interpreters for this language */
 207   bool metasource;              /* source used to generate other sources */
 208 } language;
 209
 210 typedef struct fdesc
 211 {
 212   struct fdesc *next;           /* for the linked list */
 213   char *infname;                /* uncompressed input file name */
 214   char *infabsname;             /* absolute uncompressed input file name */
 215   char *infabsdir;              /* absolute dir of input file */
 216   char *taggedfname;            /* file name to write in tagfile */
 217   language *lang;               /* language of file */
 218   char *prop;                   /* file properties to write in tagfile */
 219   bool usecharno;               /* etags tags shall contain char number */
 220   bool written;                 /* entry written in the tags file */
 221 } fdesc;
 222
 223 typedef struct node_st
 224 {                               /* sorting structure */
 225   struct node_st *left, *right; /* left and right sons */
 226   fdesc *fdp;                   /* description of file to whom tag belongs */
 227   char *name;                   /* tag name */
 228   char *regex;                  /* search regexp */
 229   bool valid;                   /* write this tag on the tag file */
 230   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 231   bool been_warned;             /* warning already given for duplicated tag */
 232   int lno;                      /* line number tag is on */
 233   long cno;                     /* character number line starts on */
 234 } node;
 235
 236 /*
 237  * A `linebuffer' is a structure which holds a line of text.
 238  * `readline_internal' reads a line from a stream into a linebuffer
 239  * and works regardless of the length of the line.
 240  * SIZE is the size of BUFFER, LEN is the length of the string in
 241  * BUFFER after readline reads it.
 242  */
 243 typedef struct
 244 {
 245   long size;
 246   int len;
 247   char *buffer;
 248 } linebuffer;
 249
 250 /* Used to support mixing of --lang and file names. */
 251 typedef struct
 252 {
 253   enum {
 254     at_language,                /* a language specification */
 255     at_regexp,                  /* a regular expression */
 256     at_filename,                /* a file name */
 257     at_stdin,                   /* read from stdin here */
 258     at_end                      /* stop parsing the list */
 259   } arg_type;                   /* argument type */
 260   language *lang;               /* language associated with the argument */
 261   char *what;                   /* the argument itself */
 262 } argument;
 263
 264 /* Structure defining a regular expression. */
 265 typedef struct regexp
 266 {
 267   struct regexp *p_next;        /* pointer to next in list */
 268   language *lang;               /* if set, use only for this language */
 269   char *pattern;                /* the regexp pattern */
 270   char *name;                   /* tag name */
 271   struct re_pattern_buffer *pat; /* the compiled pattern */
 272   struct re_registers regs;     /* re registers */
 273   bool error_signaled;          /* already signaled for this regexp */
 274   bool force_explicit_name;     /* do not allow implicit tag name */
 275   bool ignore_case;             /* ignore case when matching */
 276   bool multi_line;              /* do a multi-line match on the whole file */
 277 } regexp;
 278
 279
 280 /* Many compilers barf on this:
 281         Lang_function Ada_funcs;
 282    so let's write it this way */
 283 static void Ada_funcs (FILE *);
 284 static void Asm_labels (FILE *);
 285 static void C_entries (int c_ext, FILE *);
 286 static void default_C_entries (FILE *);
 287 static void plain_C_entries (FILE *);
 288 static void Cjava_entries (FILE *);
 289 static void Cobol_paragraphs (FILE *);
 290 static void Cplusplus_entries (FILE *);
 291 static void Cstar_entries (FILE *);
 292 static void Erlang_functions (FILE *);
 293 static void Forth_words (FILE *);
 294 static void Fortran_functions (FILE *);
 295 static void HTML_labels (FILE *);
 296 static void Lisp_functions (FILE *);
 297 static void Lua_functions (FILE *);
 298 static void Makefile_targets (FILE *);
 299 static void Pascal_functions (FILE *);
 300 static void Perl_functions (FILE *);
 301 static void PHP_functions (FILE *);
 302 static void PS_functions (FILE *);
 303 static void Prolog_functions (FILE *);
 304 static void Python_functions (FILE *);
 305 static void Scheme_functions (FILE *);
 306 static void TeX_commands (FILE *);
 307 static void Texinfo_nodes (FILE *);
 308 static void Yacc_entries (FILE *);
 309 static void just_read_file (FILE *);
 310
 311 static language *get_language_from_langname (const char *);
 312 static void readline (linebuffer *, FILE *);
 313 static long readline_internal (linebuffer *, FILE *);
 314 static bool nocase_tail (const char *);
 315 static void get_tag (char *, char **);
 316
 317 static void analyse_regex (char *);
 318 static void free_regexps (void);
 319 static void regex_tag_multiline (void);
 320 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
 321 static _Noreturn void suggest_asking_for_help (void);
 322 _Noreturn void fatal (const char *, const char *);
 323 static _Noreturn void pfatal (const char *);
 324 static void add_node (node *, node **);
 325
 326 static void init (void);
 327 static void process_file_name (char *, language *);
 328 static void process_file (FILE *, char *, language *);
 329 static void find_entries (FILE *);
 330 static void free_tree (node *);
 331 static void free_fdesc (fdesc *);
 332 static void pfnote (char *, bool, char *, int, int, long);
 333 static void invalidate_nodes (fdesc *, node **);
 334 static void put_entries (node *);
 335
 336 static char *concat (const char *, const char *, const char *);
 337 static char *skip_spaces (char *);
 338 static char *skip_non_spaces (char *);
 339 static char *skip_name (char *);
 340 static char *savenstr (const char *, int);
 341 static char *savestr (const char *);
 342 static char *etags_strchr (const char *, int);
 343 static char *etags_strrchr (const char *, int);
 344 static char *etags_getcwd (void);
 345 static char *relative_filename (char *, char *);
 346 static char *absolute_filename (char *, char *);
 347 static char *absolute_dirname (char *, char *);
 348 static bool filename_is_absolute (char *f);
 349 static void canonicalize_filename (char *);
 350 static void linebuffer_init (linebuffer *);
 351 static void linebuffer_setlen (linebuffer *, int);
 352 static void *xmalloc (size_t);
 353 static void *xrealloc (char *, size_t);
 354
 355 \f
 356 static char searchar = '/';     /* use /.../ searches */
 357
 358 static char *tagfile;           /* output file */
 359 static char *progname;          /* name this program was invoked with */
 360 static char *cwd;               /* current working directory */
 361 static char *tagfiledir;        /* directory of tagfile */
 362 static FILE *tagf;              /* ioptr for tags file */
 363 static ptrdiff_t whatlen_max;   /* maximum length of any 'what' member */
 364
 365 static fdesc *fdhead;           /* head of file description list */
 366 static fdesc *curfdp;           /* current file description */
 367 static int lineno;              /* line number of current line */
 368 static long charno;             /* current character number */
 369 static long linecharno;         /* charno of start of current line */
 370 static char *dbp;               /* pointer to start of current tag */
 371
 372 static const int invalidcharno = -1;
 373
 374 static node *nodehead;          /* the head of the binary tree of tags */
 375 static node *last_node;         /* the last node created */
 376
 377 static linebuffer lb;           /* the current line */
 378 static linebuffer filebuf;      /* a buffer containing the whole file */
 379 static linebuffer token_name;   /* a buffer containing a tag name */
 380
 381 /* boolean "functions" (see init)       */
 382 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 383 static const char
 384   /* white chars */
 385   *white = " \f\t\n\r\v",
 386   /* not in a name */
 387   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 388   /* token ending chars */
 389   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 390   /* token starting chars */
 391   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 392   /* valid in-token chars */
 393   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 394
 395 static bool append_to_tagfile;  /* -a: append to tags */
 396 /* The next five default to true in C and derived languages.  */
 397 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 398 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 399                                 /* 0 struct/enum/union decls, and C++ */
 400                                 /* member functions. */
 401 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 402                                 /* constants and variables. */
 403                                 /* -D: opposite of -d.  Default under ctags. */
 404 static int globals;             /* create tags for global variables */
 405 static int members;             /* create tags for C member variables */
 406 static int declarations;        /* --declarations: tag them and extern in C&Co*/
 407 static int no_line_directive;   /* ignore #line directives (undocumented) */
 408 static int no_duplicates;       /* no duplicate tags for ctags (undocumented) */
 409 static bool update;             /* -u: update tags */
 410 static bool vgrind_style;       /* -v: create vgrind style index output */
 411 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 412 static bool cxref_style;        /* -x: create cxref style output */
 413 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 414 static bool ignoreindent;       /* -I: ignore indentation in C */
 415 static int packages_only;       /* --packages-only: in Ada, only tag packages*/
 416
 417 /* STDIN is defined in LynxOS system headers */
 418 #ifdef STDIN
 419 # undef STDIN
 420 #endif
 421
 422 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 423 static bool parsing_stdin;      /* --parse-stdin used */
 424
 425 static regexp *p_head;          /* list of all regexps */
 426 static bool need_filebuf;       /* some regexes are multi-line */
 427
 428 static struct option longopts[] =
 429 {
 430   { "append",             no_argument,       NULL,               'a'   },
 431   { "packages-only",      no_argument,       &packages_only,     1     },
 432   { "c++",                no_argument,       NULL,               'C'   },
 433   { "declarations",       no_argument,       &declarations,      1     },
 434   { "no-line-directive",  no_argument,       &no_line_directive, 1     },
 435   { "no-duplicates",      no_argument,       &no_duplicates,     1     },
 436   { "help",               no_argument,       NULL,               'h'   },
 437   { "help",               no_argument,       NULL,               'H'   },
 438   { "ignore-indentation", no_argument,       NULL,               'I'   },
 439   { "language",           required_argument, NULL,               'l'   },
 440   { "members",            no_argument,       &members,           1     },
 441   { "no-members",         no_argument,       &members,           0     },
 442   { "output",             required_argument, NULL,               'o'   },
 443   { "regex",              required_argument, NULL,               'r'   },
 444   { "no-regex",           no_argument,       NULL,               'R'   },
 445   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 446   { "parse-stdin",        required_argument, NULL,               STDIN },
 447   { "version",            no_argument,       NULL,               'V'   },
 448
 449 #if CTAGS /* Ctags options */
 450   { "backward-search",    no_argument,       NULL,               'B'   },
 451   { "cxref",              no_argument,       NULL,               'x'   },
 452   { "defines",            no_argument,       NULL,               'd'   },
 453   { "globals",            no_argument,       &globals,           1     },
 454   { "typedefs",           no_argument,       NULL,               't'   },
 455   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 456   { "update",             no_argument,       NULL,               'u'   },
 457   { "vgrind",             no_argument,       NULL,               'v'   },
 458   { "no-warn",            no_argument,       NULL,               'w'   },
 459
 460 #else /* Etags options */
 461   { "no-defines",         no_argument,       NULL,               'D'   },
 462   { "no-globals",         no_argument,       &globals,           0     },
 463   { "include",            required_argument, NULL,               'i'   },
 464 #endif
 465   { NULL }
 466 };
 467
 468 static compressor compressors[] =
 469 {
 470   { "z", "gzip -d -c"},
 471   { "Z", "gzip -d -c"},
 472   { "gz", "gzip -d -c"},
 473   { "GZ", "gzip -d -c"},
 474   { "bz2", "bzip2 -d -c" },
 475   { "xz", "xz -d -c" },
 476   { NULL }
 477 };
 478
 479 /*
 480  * Language stuff.
 481  */
 482
 483 /* Ada code */
 484 static const char *Ada_suffixes [] =
 485   { "ads", "adb", "ada", NULL };
 486 static const char Ada_help [] =
 487 "In Ada code, functions, procedures, packages, tasks and types are\n\
 488 tags.  Use the `--packages-only' option to create tags for\n\
 489 packages only.\n\
 490 Ada tag names have suffixes indicating the type of entity:\n\
 491         Entity type:    Qualifier:\n\
 492         ------------    ----------\n\
 493         function        /f\n\
 494         procedure       /p\n\
 495         package spec    /s\n\
 496         package body    /b\n\
 497         type            /t\n\
 498         task            /k\n\
 499 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 500 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 501 will just search for any tag `bidule'.";
 502
 503 /* Assembly code */
 504 static const char *Asm_suffixes [] =
 505   { "a",        /* Unix assembler */
 506     "asm", /* Microcontroller assembly */
 507     "def", /* BSO/Tasking definition includes  */
 508     "inc", /* Microcontroller include files */
 509     "ins", /* Microcontroller include files */
 510     "s", "sa", /* Unix assembler */
 511     "S",   /* cpp-processed Unix assembler */
 512     "src", /* BSO/Tasking C compiler output */
 513     NULL
 514   };
 515 static const char Asm_help [] =
 516 "In assembler code, labels appearing at the beginning of a line,\n\
 517 followed by a colon, are tags.";
 518
 519
 520 /* Note that .c and .h can be considered C++, if the --c++ flag was
 521    given, or if the `class' or `template' keywords are met inside the file.
 522    That is why default_C_entries is called for these. */
 523 static const char *default_C_suffixes [] =
 524   { "c", "h", NULL };
 525 #if CTAGS                               /* C help for Ctags */
 526 static const char default_C_help [] =
 527 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 528 Use -T to tag definitions of `struct', `union' and `enum'.\n\
 529 Use -d to tag `#define' macro definitions and `enum' constants.\n\
 530 Use --globals to tag global variables.\n\
 531 You can tag function declarations and external variables by\n\
 532 using `--declarations', and struct members by using `--members'.";
 533 #else                                   /* C help for Etags */
 534 static const char default_C_help [] =
 535 "In C code, any C function or typedef is a tag, and so are\n\
 536 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 537 definitions and `enum' constants are tags unless you specify\n\
 538 `--no-defines'.  Global variables are tags unless you specify\n\
 539 `--no-globals' and so are struct members unless you specify\n\
 540 `--no-members'.  Use of `--no-globals', `--no-defines' and\n\
 541 `--no-members' can make the tags table file much smaller.\n\
 542 You can tag function declarations and external variables by\n\
 543 using `--declarations'.";
 544 #endif  /* C help for Ctags and Etags */
 545
 546 static const char *Cplusplus_suffixes [] =
 547   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 548     "M",                        /* Objective C++ */
 549     "pdb",                      /* PostScript with C syntax */
 550     NULL };
 551 static const char Cplusplus_help [] =
 552 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 553 --help --lang=c --lang=c++ for full help.)\n\
 554 In addition to C tags, member functions are also recognized.  Member\n\
 555 variables are recognized unless you use the `--no-members' option.\n\
 556 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 557 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 558 `operator+'.";
 559
 560 static const char *Cjava_suffixes [] =
 561   { "java", NULL };
 562 static char Cjava_help [] =
 563 "In Java code, all the tags constructs of C and C++ code are\n\
 564 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 565
 566
 567 static const char *Cobol_suffixes [] =
 568   { "COB", "cob", NULL };
 569 static char Cobol_help [] =
 570 "In Cobol code, tags are paragraph names; that is, any word\n\
 571 starting in column 8 and followed by a period.";
 572
 573 static const char *Cstar_suffixes [] =
 574   { "cs", "hs", NULL };
 575
 576 static const char *Erlang_suffixes [] =
 577   { "erl", "hrl", NULL };
 578 static const char Erlang_help [] =
 579 "In Erlang code, the tags are the functions, records and macros\n\
 580 defined in the file.";
 581
 582 const char *Forth_suffixes [] =
 583   { "fth", "tok", NULL };
 584 static const char Forth_help [] =
 585 "In Forth code, tags are words defined by `:',\n\
 586 constant, code, create, defer, value, variable, buffer:, field.";
 587
 588 static const char *Fortran_suffixes [] =
 589   { "F", "f", "f90", "for", NULL };
 590 static const char Fortran_help [] =
 591 "In Fortran code, functions, subroutines and block data are tags.";
 592
 593 static const char *HTML_suffixes [] =
 594   { "htm", "html", "shtml", NULL };
 595 static const char HTML_help [] =
 596 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 597 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 598 occurrences of `id='.";
 599
 600 static const char *Lisp_suffixes [] =
 601   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 602 static const char Lisp_help [] =
 603 "In Lisp code, any function defined with `defun', any variable\n\
 604 defined with `defvar' or `defconst', and in general the first\n\
 605 argument of any expression that starts with `(def' in column zero\n\
 606 is a tag.\n\
 607 The `--declarations' option tags \"(defvar foo)\" constructs too.";
 608
 609 static const char *Lua_suffixes [] =
 610   { "lua", "LUA", NULL };
 611 static const char Lua_help [] =
 612 "In Lua scripts, all functions are tags.";
 613
 614 static const char *Makefile_filenames [] =
 615   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 616 static const char Makefile_help [] =
 617 "In makefiles, targets are tags; additionally, variables are tags\n\
 618 unless you specify `--no-globals'.";
 619
 620 static const char *Objc_suffixes [] =
 621   { "lm",                       /* Objective lex file */
 622     "m",                        /* Objective C file */
 623      NULL };
 624 static const char Objc_help [] =
 625 "In Objective C code, tags include Objective C definitions for classes,\n\
 626 class categories, methods and protocols.  Tags for variables and\n\
 627 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 628 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 629
 630 static const char *Pascal_suffixes [] =
 631   { "p", "pas", NULL };
 632 static const char Pascal_help [] =
 633 "In Pascal code, the tags are the functions and procedures defined\n\
 634 in the file.";
 635 /* " // this is for working around an Emacs highlighting bug... */
 636
 637 static const char *Perl_suffixes [] =
 638   { "pl", "pm", NULL };
 639 static const char *Perl_interpreters [] =
 640   { "perl", "@PERL@", NULL };
 641 static const char Perl_help [] =
 642 "In Perl code, the tags are the packages, subroutines and variables\n\
 643 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 644 `--globals' if you want to tag global variables.  Tags for\n\
 645 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 646 defined in the default package is `main::SUB'.";
 647
 648 static const char *PHP_suffixes [] =
 649   { "php", "php3", "php4", NULL };
 650 static const char PHP_help [] =
 651 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 652 the `--no-members' option, vars are tags too.";
 653
 654 static const char *plain_C_suffixes [] =
 655   { "pc",                       /* Pro*C file */
 656      NULL };
 657
 658 static const char *PS_suffixes [] =
 659   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 660 static const char PS_help [] =
 661 "In PostScript code, the tags are the functions.";
 662
 663 static const char *Prolog_suffixes [] =
 664   { "prolog", NULL };
 665 static const char Prolog_help [] =
 666 "In Prolog code, tags are predicates and rules at the beginning of\n\
 667 line.";
 668
 669 static const char *Python_suffixes [] =
 670   { "py", NULL };
 671 static const char Python_help [] =
 672 "In Python code, `def' or `class' at the beginning of a line\n\
 673 generate a tag.";
 674
 675 /* Can't do the `SCM' or `scm' prefix with a version number. */
 676 static const char *Scheme_suffixes [] =
 677   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 678 static const char Scheme_help [] =
 679 "In Scheme code, tags include anything defined with `def' or with a\n\
 680 construct whose name starts with `def'.  They also include\n\
 681 variables set with `set!' at top level in the file.";
 682
 683 static const char *TeX_suffixes [] =
 684   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 685 static const char TeX_help [] =
 686 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 687 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 688 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 689 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 690 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 691 \n\
 692 Other commands can be specified by setting the environment variable\n\
 693 `TEXTAGS' to a colon-separated list like, for example,\n\
 694      TEXTAGS=\"mycommand:myothercommand\".";
 695
 696
 697 static const char *Texinfo_suffixes [] =
 698   { "texi", "texinfo", "txi", NULL };
 699 static const char Texinfo_help [] =
 700 "for texinfo files, lines starting with @node are tagged.";
 701
 702 static const char *Yacc_suffixes [] =
 703   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 704 static const char Yacc_help [] =
 705 "In Bison or Yacc input files, each rule defines as a tag the\n\
 706 nonterminal it constructs.  The portions of the file that contain\n\
 707 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 708 for full help).";
 709
 710 static const char auto_help [] =
 711 "`auto' is not a real language, it indicates to use\n\
 712 a default language for files base on file name suffix and file contents.";
 713
 714 static const char none_help [] =
 715 "`none' is not a real language, it indicates to only do\n\
 716 regexp processing on files.";
 717
 718 static const char no_lang_help [] =
 719 "No detailed help available for this language.";
 720
 721
 722 /*
 723  * Table of languages.
 724  *
 725  * It is ok for a given function to be listed under more than one
 726  * name.  I just didn't.
 727  */
 728
 729 static language lang_names [] =
 730 {
 731   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 732   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 733   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 734   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 735   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 736   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 737   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 738   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 739   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 740   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 741   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 742   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 743   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 744   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 745   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 746   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 747   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 748   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 749   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 750   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 751   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 752   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 753   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 754   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 755   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 756   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
 757   { "auto",      auto_help },                      /* default guessing scheme */
 758   { "none",      none_help,      just_read_file }, /* regexp matching only */
 759   { NULL }                /* end of list */
 760 };
 761
 762 \f
 763 static void
 764 print_language_names (void)
 765 {
 766   language *lang;
 767   const char **name, **ext;
 768
 769   puts ("\nThese are the currently supported languages, along with the\n\
 770 default file names and dot suffixes:");
 771   for (lang = lang_names; lang->name != NULL; lang++)
 772     {
 773       printf ("  %-*s", 10, lang->name);
 774       if (lang->filenames != NULL)
 775         for (name = lang->filenames; *name != NULL; name++)
 776           printf (" %s", *name);
 777       if (lang->suffixes != NULL)
 778         for (ext = lang->suffixes; *ext != NULL; ext++)
 779           printf (" .%s", *ext);
 780       puts ("");
 781     }
 782   puts ("where `auto' means use default language for files based on file\n\
 783 name suffix, and `none' means only do regexp processing on files.\n\
 784 If no language is specified and no matching suffix is found,\n\
 785 the first line of the file is read for a sharp-bang (#!) sequence\n\
 786 followed by the name of an interpreter.  If no such sequence is found,\n\
 787 Fortran is tried first; if no tags are found, C is tried next.\n\
 788 When parsing any C file, a \"class\" or \"template\" keyword\n\
 789 switches to C++.");
 790   puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
 791 \n\
 792 For detailed help on a given language use, for example,\n\
 793 etags --help --lang=ada.");
 794 }
 795
 796 #ifndef EMACS_NAME
 797 # define EMACS_NAME "standalone"
 798 #endif
 799 #ifndef VERSION
 800 # define VERSION "17.38.1.4"
 801 #endif
 802 static _Noreturn void
 803 print_version (void)
 804 {
 805   char emacs_copyright[] = COPYRIGHT;
 806
 807   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 808   puts (emacs_copyright);
 809   puts ("This program is distributed under the terms in ETAGS.README");
 810
 811   exit (EXIT_SUCCESS);
 812 }
 813
 814 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 815 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
 816 #endif
 817
 818 static _Noreturn void
 819 print_help (argument *argbuffer)
 820 {
 821   bool help_for_lang = false;
 822
 823   for (; argbuffer->arg_type != at_end; argbuffer++)
 824     if (argbuffer->arg_type == at_language)
 825       {
 826         if (help_for_lang)
 827           puts ("");
 828         puts (argbuffer->lang->help);
 829         help_for_lang = true;
 830       }
 831
 832   if (help_for_lang)
 833     exit (EXIT_SUCCESS);
 834
 835   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 836 \n\
 837 These are the options accepted by %s.\n", progname, progname);
 838   puts ("You may use unambiguous abbreviations for the long option names.");
 839   puts ("  A - as file name means read names from stdin (one per line).\n\
 840 Absolute names are stored in the output file as they are.\n\
 841 Relative ones are stored relative to the output file's directory.\n");
 842
 843   puts ("-a, --append\n\
 844         Append tag entries to existing tags file.");
 845
 846   puts ("--packages-only\n\
 847         For Ada files, only generate tags for packages.");
 848
 849   if (CTAGS)
 850     puts ("-B, --backward-search\n\
 851         Write the search commands for the tag entries using '?', the\n\
 852         backward-search command instead of '/', the forward-search command.");
 853
 854   /* This option is mostly obsolete, because etags can now automatically
 855      detect C++.  Retained for backward compatibility and for debugging and
 856      experimentation.  In principle, we could want to tag as C++ even
 857      before any "class" or "template" keyword.
 858   puts ("-C, --c++\n\
 859         Treat files whose name suffix defaults to C language as C++ files.");
 860   */
 861
 862   puts ("--declarations\n\
 863         In C and derived languages, create tags for function declarations,");
 864   if (CTAGS)
 865     puts ("\tand create tags for extern variables if --globals is used.");
 866   else
 867     puts
 868       ("\tand create tags for extern variables unless --no-globals is used.");
 869
 870   if (CTAGS)
 871     puts ("-d, --defines\n\
 872         Create tag entries for C #define constants and enum constants, too.");
 873   else
 874     puts ("-D, --no-defines\n\
 875         Don't create tag entries for C #define constants and enum constants.\n\
 876         This makes the tags file smaller.");
 877
 878   if (!CTAGS)
 879     puts ("-i FILE, --include=FILE\n\
 880         Include a note in tag file indicating that, when searching for\n\
 881         a tag, one should also consult the tags file FILE after\n\
 882         checking the current file.");
 883
 884   puts ("-l LANG, --language=LANG\n\
 885         Force the following files to be considered as written in the\n\
 886         named language up to the next --language=LANG option.");
 887
 888   if (CTAGS)
 889     puts ("--globals\n\
 890         Create tag entries for global variables in some languages.");
 891   else
 892     puts ("--no-globals\n\
 893         Do not create tag entries for global variables in some\n\
 894         languages.  This makes the tags file smaller.");
 895
 896   if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 897     puts ("--no-line-directive\n\
 898         Ignore #line preprocessor directives in C and derived languages.");
 899
 900   if (CTAGS)
 901     puts ("--members\n\
 902         Create tag entries for members of structures in some languages.");
 903   else
 904     puts ("--no-members\n\
 905         Do not create tag entries for members of structures\n\
 906         in some languages.");
 907
 908   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 909         Make a tag for each line matching a regular expression pattern\n\
 910         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 911         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 912         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 913         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 914   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 915         For example Tcl named tags can be created with:\n\
 916           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 917         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 918         `m' means to allow multi-line matches, `s' implies `m' and\n\
 919         causes dot to match any character, including newline.");
 920
 921   puts ("-R, --no-regex\n\
 922         Don't create tags from regexps for the following files.");
 923
 924   puts ("-I, --ignore-indentation\n\
 925         In C and C++ do not assume that a closing brace in the first\n\
 926         column is the final brace of a function or structure definition.");
 927
 928   puts ("-o FILE, --output=FILE\n\
 929         Write the tags to FILE.");
 930
 931   puts ("--parse-stdin=NAME\n\
 932         Read from standard input and record tags as belonging to file NAME.");
 933
 934   if (CTAGS)
 935     {
 936       puts ("-t, --typedefs\n\
 937         Generate tag entries for C and Ada typedefs.");
 938       puts ("-T, --typedefs-and-c++\n\
 939         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 940         and C++ member functions.");
 941     }
 942
 943   if (CTAGS)
 944     puts ("-u, --update\n\
 945         Update the tag entries for the given files, leaving tag\n\
 946         entries for other files in place.  Currently, this is\n\
 947         implemented by deleting the existing entries for the given\n\
 948         files and then rewriting the new entries at the end of the\n\
 949         tags file.  It is often faster to simply rebuild the entire\n\
 950         tag file than to use this.");
 951
 952   if (CTAGS)
 953     {
 954       puts ("-v, --vgrind\n\
 955         Print on the standard output an index of items intended for\n\
 956         human consumption, similar to the output of vgrind.  The index\n\
 957         is sorted, and gives the page number of each item.");
 958
 959       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 960         puts ("-w, --no-duplicates\n\
 961         Do not create duplicate tag entries, for compatibility with\n\
 962         traditional ctags.");
 963
 964       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 965         puts ("-w, --no-warn\n\
 966         Suppress warning messages about duplicate tag entries.");
 967
 968       puts ("-x, --cxref\n\
 969         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
 970         The output uses line numbers instead of page numbers, but\n\
 971         beyond that the differences are cosmetic; try both to see\n\
 972         which you like.");
 973     }
 974
 975   puts ("-V, --version\n\
 976         Print the version of the program.\n\
 977 -h, --help\n\
 978         Print this help message.\n\
 979         Followed by one or more `--language' options prints detailed\n\
 980         help about tag generation for the specified languages.");
 981
 982   print_language_names ();
 983
 984   puts ("");
 985   puts ("Report bugs to bug-gnu-emacs@gnu.org");
 986
 987   exit (EXIT_SUCCESS);
 988 }
 989
 990 \f
 991 int
 992 main (int argc, char **argv)
 993 {
 994   int i;
 995   unsigned int nincluded_files;
 996   char **included_files;
 997   argument *argbuffer;
 998   int current_arg, file_count;
 999   linebuffer filename_lb;
1000   bool help_asked = false;
1001   ptrdiff_t len;
1002   char *optstring;
1003   int opt;
1004
1005   progname = argv[0];
1006   nincluded_files = 0;
1007   included_files = xnew (argc, char *);
1008   current_arg = 0;
1009   file_count = 0;
1010
1011   /* Allocate enough no matter what happens.  Overkill, but each one
1012      is small. */
1013   argbuffer = xnew (argc, argument);
1014
1015   /*
1016    * Always find typedefs and structure tags.
1017    * Also default to find macro constants, enum constants, struct
1018    * members and global variables.  Do it for both etags and ctags.
1019    */
1020   typedefs = typedefs_or_cplusplus = constantypedefs = true;
1021   globals = members = true;
1022
1023   /* When the optstring begins with a '-' getopt_long does not rearrange the
1024      non-options arguments to be at the end, but leaves them alone. */
1025   optstring = concat ("-ac:Cf:Il:o:r:RSVhH",
1026                       (CTAGS) ? "BxdtTuvw" : "Di:",
1027                       "");
1028
1029   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1030     switch (opt)
1031       {
1032       case 0:
1033         /* If getopt returns 0, then it has already processed a
1034            long-named option.  We should do nothing.  */
1035         break;
1036
1037       case 1:
1038         /* This means that a file name has been seen.  Record it. */
1039         argbuffer[current_arg].arg_type = at_filename;
1040         argbuffer[current_arg].what     = optarg;
1041         len = strlen (optarg);
1042         if (whatlen_max < len)
1043           whatlen_max = len;
1044         ++current_arg;
1045         ++file_count;
1046         break;
1047
1048       case STDIN:
1049         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1050         argbuffer[current_arg].arg_type = at_stdin;
1051         argbuffer[current_arg].what     = optarg;
1052         len = strlen (optarg);
1053         if (whatlen_max < len)
1054           whatlen_max = len;
1055         ++current_arg;
1056         ++file_count;
1057         if (parsing_stdin)
1058           fatal ("cannot parse standard input more than once", (char *)NULL);
1059         parsing_stdin = true;
1060         break;
1061
1062         /* Common options. */
1063       case 'a': append_to_tagfile = true;       break;
1064       case 'C': cplusplus = true;               break;
1065       case 'f':         /* for compatibility with old makefiles */
1066       case 'o':
1067         if (tagfile)
1068           {
1069             error ("-o option may only be given once.");
1070             suggest_asking_for_help ();
1071             /* NOTREACHED */
1072           }
1073         tagfile = optarg;
1074         break;
1075       case 'I':
1076       case 'S':         /* for backward compatibility */
1077         ignoreindent = true;
1078         break;
1079       case 'l':
1080         {
1081           language *lang = get_language_from_langname (optarg);
1082           if (lang != NULL)
1083             {
1084               argbuffer[current_arg].lang = lang;
1085               argbuffer[current_arg].arg_type = at_language;
1086               ++current_arg;
1087             }
1088         }
1089         break;
1090       case 'c':
1091         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1092         optarg = concat (optarg, "i", ""); /* memory leak here */
1093         /* FALLTHRU */
1094       case 'r':
1095         argbuffer[current_arg].arg_type = at_regexp;
1096         argbuffer[current_arg].what = optarg;
1097         len = strlen (optarg);
1098         if (whatlen_max < len)
1099           whatlen_max = len;
1100         ++current_arg;
1101         break;
1102       case 'R':
1103         argbuffer[current_arg].arg_type = at_regexp;
1104         argbuffer[current_arg].what = NULL;
1105         ++current_arg;
1106         break;
1107       case 'V':
1108         print_version ();
1109         break;
1110       case 'h':
1111       case 'H':
1112         help_asked = true;
1113         break;
1114
1115         /* Etags options */
1116       case 'D': constantypedefs = false;                        break;
1117       case 'i': included_files[nincluded_files++] = optarg;     break;
1118
1119         /* Ctags options. */
1120       case 'B': searchar = '?';                                 break;
1121       case 'd': constantypedefs = true;                         break;
1122       case 't': typedefs = true;                                break;
1123       case 'T': typedefs = typedefs_or_cplusplus = true;        break;
1124       case 'u': update = true;                                  break;
1125       case 'v': vgrind_style = true;                      /*FALLTHRU*/
1126       case 'x': cxref_style = true;                             break;
1127       case 'w': no_warnings = true;                             break;
1128       default:
1129         suggest_asking_for_help ();
1130         /* NOTREACHED */
1131       }
1132
1133   /* No more options.  Store the rest of arguments. */
1134   for (; optind < argc; optind++)
1135     {
1136       argbuffer[current_arg].arg_type = at_filename;
1137       argbuffer[current_arg].what = argv[optind];
1138       len = strlen (argv[optind]);
1139       if (whatlen_max < len)
1140         whatlen_max = len;
1141       ++current_arg;
1142       ++file_count;
1143     }
1144
1145   argbuffer[current_arg].arg_type = at_end;
1146
1147   if (help_asked)
1148     print_help (argbuffer);
1149     /* NOTREACHED */
1150
1151   if (nincluded_files == 0 && file_count == 0)
1152     {
1153       error ("no input files specified.");
1154       suggest_asking_for_help ();
1155       /* NOTREACHED */
1156     }
1157
1158   if (tagfile == NULL)
1159     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1160   cwd = etags_getcwd ();        /* the current working directory */
1161   if (cwd[strlen (cwd) - 1] != '/')
1162     {
1163       char *oldcwd = cwd;
1164       cwd = concat (oldcwd, "/", "");
1165       free (oldcwd);
1166     }
1167
1168   /* Compute base directory for relative file names. */
1169   if (streq (tagfile, "-")
1170       || strneq (tagfile, "/dev/", 5))
1171     tagfiledir = cwd;            /* relative file names are relative to cwd */
1172   else
1173     {
1174       canonicalize_filename (tagfile);
1175       tagfiledir = absolute_dirname (tagfile, cwd);
1176     }
1177
1178   init ();                      /* set up boolean "functions" */
1179
1180   linebuffer_init (&lb);
1181   linebuffer_init (&filename_lb);
1182   linebuffer_init (&filebuf);
1183   linebuffer_init (&token_name);
1184
1185   if (!CTAGS)
1186     {
1187       if (streq (tagfile, "-"))
1188         {
1189           tagf = stdout;
1190           SET_BINARY (fileno (stdout));
1191         }
1192       else
1193         tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1194       if (tagf == NULL)
1195         pfatal (tagfile);
1196     }
1197
1198   /*
1199    * Loop through files finding functions.
1200    */
1201   for (i = 0; i < current_arg; i++)
1202     {
1203       static language *lang;    /* non-NULL if language is forced */
1204       char *this_file;
1205
1206       switch (argbuffer[i].arg_type)
1207         {
1208         case at_language:
1209           lang = argbuffer[i].lang;
1210           break;
1211         case at_regexp:
1212           analyse_regex (argbuffer[i].what);
1213           break;
1214         case at_filename:
1215               this_file = argbuffer[i].what;
1216               /* Input file named "-" means read file names from stdin
1217                  (one per line) and use them. */
1218               if (streq (this_file, "-"))
1219                 {
1220                   if (parsing_stdin)
1221                     fatal ("cannot parse standard input AND read file names from it",
1222                            (char *)NULL);
1223                   while (readline_internal (&filename_lb, stdin) > 0)
1224                     process_file_name (filename_lb.buffer, lang);
1225                 }
1226               else
1227                 process_file_name (this_file, lang);
1228           break;
1229         case at_stdin:
1230           this_file = argbuffer[i].what;
1231           process_file (stdin, this_file, lang);
1232           break;
1233         }
1234     }
1235
1236   free_regexps ();
1237   free (lb.buffer);
1238   free (filebuf.buffer);
1239   free (token_name.buffer);
1240
1241   if (!CTAGS || cxref_style)
1242     {
1243       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1244       put_entries (nodehead);
1245       free_tree (nodehead);
1246       nodehead = NULL;
1247       if (!CTAGS)
1248         {
1249           fdesc *fdp;
1250
1251           /* Output file entries that have no tags. */
1252           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1253             if (!fdp->written)
1254               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1255
1256           while (nincluded_files-- > 0)
1257             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1258
1259           if (fclose (tagf) == EOF)
1260             pfatal (tagfile);
1261         }
1262
1263       exit (EXIT_SUCCESS);
1264     }
1265
1266   /* From here on, we are in (CTAGS && !cxref_style) */
1267   if (update)
1268     {
1269       char *cmd =
1270         xmalloc (strlen (tagfile) + whatlen_max +
1271                  sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1272       for (i = 0; i < current_arg; ++i)
1273         {
1274           switch (argbuffer[i].arg_type)
1275             {
1276             case at_filename:
1277             case at_stdin:
1278               break;
1279             default:
1280               continue;         /* the for loop */
1281             }
1282           strcpy (cmd, "mv ");
1283           strcat (cmd, tagfile);
1284           strcat (cmd, " OTAGS;fgrep -v '\t");
1285           strcat (cmd, argbuffer[i].what);
1286           strcat (cmd, "\t' OTAGS >");
1287           strcat (cmd, tagfile);
1288           strcat (cmd, ";rm OTAGS");
1289           if (system (cmd) != EXIT_SUCCESS)
1290             fatal ("failed to execute shell command", (char *)NULL);
1291         }
1292       free (cmd);
1293       append_to_tagfile = true;
1294     }
1295
1296   tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1297   if (tagf == NULL)
1298     pfatal (tagfile);
1299   put_entries (nodehead);       /* write all the tags (CTAGS) */
1300   free_tree (nodehead);
1301   nodehead = NULL;
1302   if (fclose (tagf) == EOF)
1303     pfatal (tagfile);
1304
1305   if (CTAGS)
1306     if (append_to_tagfile || update)
1307       {
1308         char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1309         /* Maybe these should be used:
1310            setenv ("LC_COLLATE", "C", 1);
1311            setenv ("LC_ALL", "C", 1); */
1312         strcpy (cmd, "sort -u -o ");
1313         strcat (cmd, tagfile);
1314         strcat (cmd, " ");
1315         strcat (cmd, tagfile);
1316         exit (system (cmd));
1317       }
1318   return EXIT_SUCCESS;
1319 }
1320
1321
1322 /*
1323  * Return a compressor given the file name.  If EXTPTR is non-zero,
1324  * return a pointer into FILE where the compressor-specific
1325  * extension begins.  If no compressor is found, NULL is returned
1326  * and EXTPTR is not significant.
1327  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1328  */
1329 static compressor *
1330 get_compressor_from_suffix (char *file, char **extptr)
1331 {
1332   compressor *compr;
1333   char *slash, *suffix;
1334
1335   /* File has been processed by canonicalize_filename,
1336      so we don't need to consider backslashes on DOS_NT.  */
1337   slash = etags_strrchr (file, '/');
1338   suffix = etags_strrchr (file, '.');
1339   if (suffix == NULL || suffix < slash)
1340     return NULL;
1341   if (extptr != NULL)
1342     *extptr = suffix;
1343   suffix += 1;
1344   /* Let those poor souls who live with DOS 8+3 file name limits get
1345      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1346      Only the first do loop is run if not MSDOS */
1347   do
1348     {
1349       for (compr = compressors; compr->suffix != NULL; compr++)
1350         if (streq (compr->suffix, suffix))
1351           return compr;
1352       if (!MSDOS)
1353         break;                  /* do it only once: not really a loop */
1354       if (extptr != NULL)
1355         *extptr = ++suffix;
1356     } while (*suffix != '\0');
1357   return NULL;
1358 }
1359
1360
1361
1362 /*
1363  * Return a language given the name.
1364  */
1365 static language *
1366 get_language_from_langname (const char *name)
1367 {
1368   language *lang;
1369
1370   if (name == NULL)
1371     error ("empty language name");
1372   else
1373     {
1374       for (lang = lang_names; lang->name != NULL; lang++)
1375         if (streq (name, lang->name))
1376           return lang;
1377       error ("unknown language \"%s\"", name);
1378     }
1379
1380   return NULL;
1381 }
1382
1383
1384 /*
1385  * Return a language given the interpreter name.
1386  */
1387 static language *
1388 get_language_from_interpreter (char *interpreter)
1389 {
1390   language *lang;
1391   const char **iname;
1392
1393   if (interpreter == NULL)
1394     return NULL;
1395   for (lang = lang_names; lang->name != NULL; lang++)
1396     if (lang->interpreters != NULL)
1397       for (iname = lang->interpreters; *iname != NULL; iname++)
1398         if (streq (*iname, interpreter))
1399             return lang;
1400
1401   return NULL;
1402 }
1403
1404
1405
1406 /*
1407  * Return a language given the file name.
1408  */
1409 static language *
1410 get_language_from_filename (char *file, int case_sensitive)
1411 {
1412   language *lang;
1413   const char **name, **ext, *suffix;
1414
1415   /* Try whole file name first. */
1416   for (lang = lang_names; lang->name != NULL; lang++)
1417     if (lang->filenames != NULL)
1418       for (name = lang->filenames; *name != NULL; name++)
1419         if ((case_sensitive)
1420             ? streq (*name, file)
1421             : strcaseeq (*name, file))
1422           return lang;
1423
1424   /* If not found, try suffix after last dot. */
1425   suffix = etags_strrchr (file, '.');
1426   if (suffix == NULL)
1427     return NULL;
1428   suffix += 1;
1429   for (lang = lang_names; lang->name != NULL; lang++)
1430     if (lang->suffixes != NULL)
1431       for (ext = lang->suffixes; *ext != NULL; ext++)
1432         if ((case_sensitive)
1433             ? streq (*ext, suffix)
1434             : strcaseeq (*ext, suffix))
1435           return lang;
1436   return NULL;
1437 }
1438
1439 \f
1440 /*
1441  * This routine is called on each file argument.
1442  */
1443 static void
1444 process_file_name (char *file, language *lang)
1445 {
1446   struct stat stat_buf;
1447   FILE *inf;
1448   fdesc *fdp;
1449   compressor *compr;
1450   char *compressed_name, *uncompressed_name;
1451   char *ext, *real_name;
1452   int retval;
1453
1454   canonicalize_filename (file);
1455   if (streq (file, tagfile) && !streq (tagfile, "-"))
1456     {
1457       error ("skipping inclusion of %s in self.", file);
1458       return;
1459     }
1460   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1461     {
1462       compressed_name = NULL;
1463       real_name = uncompressed_name = savestr (file);
1464     }
1465   else
1466     {
1467       real_name = compressed_name = savestr (file);
1468       uncompressed_name = savenstr (file, ext - file);
1469     }
1470
1471   /* If the canonicalized uncompressed name
1472      has already been dealt with, skip it silently. */
1473   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1474     {
1475       assert (fdp->infname != NULL);
1476       if (streq (uncompressed_name, fdp->infname))
1477         goto cleanup;
1478     }
1479
1480   if (stat (real_name, &stat_buf) != 0)
1481     {
1482       /* Reset real_name and try with a different name. */
1483       real_name = NULL;
1484       if (compressed_name != NULL) /* try with the given suffix */
1485         {
1486           if (stat (uncompressed_name, &stat_buf) == 0)
1487             real_name = uncompressed_name;
1488         }
1489       else                      /* try all possible suffixes */
1490         {
1491           for (compr = compressors; compr->suffix != NULL; compr++)
1492             {
1493               compressed_name = concat (file, ".", compr->suffix);
1494               if (stat (compressed_name, &stat_buf) != 0)
1495                 {
1496                   if (MSDOS)
1497                     {
1498                       char *suf = compressed_name + strlen (file);
1499                       size_t suflen = strlen (compr->suffix) + 1;
1500                       for ( ; suf[1]; suf++, suflen--)
1501                         {
1502                           memmove (suf, suf + 1, suflen);
1503                           if (stat (compressed_name, &stat_buf) == 0)
1504                             {
1505                               real_name = compressed_name;
1506                               break;
1507                             }
1508                         }
1509                       if (real_name != NULL)
1510                         break;
1511                     } /* MSDOS */
1512                   free (compressed_name);
1513                   compressed_name = NULL;
1514                 }
1515               else
1516                 {
1517                   real_name = compressed_name;
1518                   break;
1519                 }
1520             }
1521         }
1522       if (real_name == NULL)
1523         {
1524           perror (file);
1525           goto cleanup;
1526         }
1527     } /* try with a different name */
1528
1529   if (!S_ISREG (stat_buf.st_mode))
1530     {
1531       error ("skipping %s: it is not a regular file.", real_name);
1532       goto cleanup;
1533     }
1534   if (real_name == compressed_name)
1535     {
1536       char *cmd = concat (compr->command, " ", real_name);
1537       inf = popen (cmd, "rb");
1538       free (cmd);
1539     }
1540   else
1541     inf = fopen (real_name, "rb");
1542   if (inf == NULL)
1543     {
1544       perror (real_name);
1545       goto cleanup;
1546     }
1547
1548   process_file (inf, uncompressed_name, lang);
1549
1550   if (real_name == compressed_name)
1551     retval = pclose (inf);
1552   else
1553     retval = fclose (inf);
1554   if (retval < 0)
1555     pfatal (file);
1556
1557  cleanup:
1558   free (compressed_name);
1559   free (uncompressed_name);
1560   last_node = NULL;
1561   curfdp = NULL;
1562   return;
1563 }
1564
1565 static void
1566 process_file (FILE *fh, char *fn, language *lang)
1567 {
1568   static const fdesc emptyfdesc;
1569   fdesc *fdp;
1570
1571   /* Create a new input file description entry. */
1572   fdp = xnew (1, fdesc);
1573   *fdp = emptyfdesc;
1574   fdp->next = fdhead;
1575   fdp->infname = savestr (fn);
1576   fdp->lang = lang;
1577   fdp->infabsname = absolute_filename (fn, cwd);
1578   fdp->infabsdir = absolute_dirname (fn, cwd);
1579   if (filename_is_absolute (fn))
1580     {
1581       /* An absolute file name.  Canonicalize it. */
1582       fdp->taggedfname = absolute_filename (fn, NULL);
1583     }
1584   else
1585     {
1586       /* A file name relative to cwd.  Make it relative
1587          to the directory of the tags file. */
1588       fdp->taggedfname = relative_filename (fn, tagfiledir);
1589     }
1590   fdp->usecharno = true;        /* use char position when making tags */
1591   fdp->prop = NULL;
1592   fdp->written = false;         /* not written on tags file yet */
1593
1594   fdhead = fdp;
1595   curfdp = fdhead;              /* the current file description */
1596
1597   find_entries (fh);
1598
1599   /* If not Ctags, and if this is not metasource and if it contained no #line
1600      directives, we can write the tags and free all nodes pointing to
1601      curfdp. */
1602   if (!CTAGS
1603       && curfdp->usecharno      /* no #line directives in this file */
1604       && !curfdp->lang->metasource)
1605     {
1606       node *np, *prev;
1607
1608       /* Look for the head of the sublist relative to this file.  See add_node
1609          for the structure of the node tree. */
1610       prev = NULL;
1611       for (np = nodehead; np != NULL; prev = np, np = np->left)
1612         if (np->fdp == curfdp)
1613           break;
1614
1615       /* If we generated tags for this file, write and delete them. */
1616       if (np != NULL)
1617         {
1618           /* This is the head of the last sublist, if any.  The following
1619              instructions depend on this being true. */
1620           assert (np->left == NULL);
1621
1622           assert (fdhead == curfdp);
1623           assert (last_node->fdp == curfdp);
1624           put_entries (np);     /* write tags for file curfdp->taggedfname */
1625           free_tree (np);       /* remove the written nodes */
1626           if (prev == NULL)
1627             nodehead = NULL;    /* no nodes left */
1628           else
1629             prev->left = NULL;  /* delete the pointer to the sublist */
1630         }
1631     }
1632 }
1633
1634 /*
1635  * This routine sets up the boolean pseudo-functions which work
1636  * by setting boolean flags dependent upon the corresponding character.
1637  * Every char which is NOT in that string is not a white char.  Therefore,
1638  * all of the array "_wht" is set to false, and then the elements
1639  * subscripted by the chars in "white" are set to true.  Thus "_wht"
1640  * of a char is true if it is the string "white", else false.
1641  */
1642 static void
1643 init (void)
1644 {
1645   const char *sp;
1646   int i;
1647
1648   for (i = 0; i < CHARS; i++)
1649     iswhite (i) = notinname (i) = begtoken (i) = intoken (i) = endtoken (i)
1650       = false;
1651   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = true;
1652   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = true;
1653   notinname ('\0') = notinname ('\n');
1654   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = true;
1655   begtoken ('\0') = begtoken ('\n');
1656   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = true;
1657   intoken ('\0') = intoken ('\n');
1658   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = true;
1659   endtoken ('\0') = endtoken ('\n');
1660 }
1661
1662 /*
1663  * This routine opens the specified file and calls the function
1664  * which finds the function and type definitions.
1665  */
1666 static void
1667 find_entries (FILE *inf)
1668 {
1669   char *cp;
1670   language *lang = curfdp->lang;
1671   Lang_function *parser = NULL;
1672
1673   /* If user specified a language, use it. */
1674   if (lang != NULL && lang->function != NULL)
1675     {
1676       parser = lang->function;
1677     }
1678
1679   /* Else try to guess the language given the file name. */
1680   if (parser == NULL)
1681     {
1682       lang = get_language_from_filename (curfdp->infname, true);
1683       if (lang != NULL && lang->function != NULL)
1684         {
1685           curfdp->lang = lang;
1686           parser = lang->function;
1687         }
1688     }
1689
1690   /* Else look for sharp-bang as the first two characters. */
1691   if (parser == NULL
1692       && readline_internal (&lb, inf) > 0
1693       && lb.len >= 2
1694       && lb.buffer[0] == '#'
1695       && lb.buffer[1] == '!')
1696     {
1697       char *lp;
1698
1699       /* Set lp to point at the first char after the last slash in the
1700          line or, if no slashes, at the first nonblank.  Then set cp to
1701          the first successive blank and terminate the string. */
1702       lp = etags_strrchr (lb.buffer+2, '/');
1703       if (lp != NULL)
1704         lp += 1;
1705       else
1706         lp = skip_spaces (lb.buffer + 2);
1707       cp = skip_non_spaces (lp);
1708       *cp = '\0';
1709
1710       if (strlen (lp) > 0)
1711         {
1712           lang = get_language_from_interpreter (lp);
1713           if (lang != NULL && lang->function != NULL)
1714             {
1715               curfdp->lang = lang;
1716               parser = lang->function;
1717             }
1718         }
1719     }
1720
1721   /* We rewind here, even if inf may be a pipe.  We fail if the
1722      length of the first line is longer than the pipe block size,
1723      which is unlikely. */
1724   rewind (inf);
1725
1726   /* Else try to guess the language given the case insensitive file name. */
1727   if (parser == NULL)
1728     {
1729       lang = get_language_from_filename (curfdp->infname, false);
1730       if (lang != NULL && lang->function != NULL)
1731         {
1732           curfdp->lang = lang;
1733           parser = lang->function;
1734         }
1735     }
1736
1737   /* Else try Fortran or C. */
1738   if (parser == NULL)
1739     {
1740       node *old_last_node = last_node;
1741
1742       curfdp->lang = get_language_from_langname ("fortran");
1743       find_entries (inf);
1744
1745       if (old_last_node == last_node)
1746         /* No Fortran entries found.  Try C. */
1747         {
1748           /* We do not tag if rewind fails.
1749              Only the file name will be recorded in the tags file. */
1750           rewind (inf);
1751           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1752           find_entries (inf);
1753         }
1754       return;
1755     }
1756
1757   if (!no_line_directive
1758       && curfdp->lang != NULL && curfdp->lang->metasource)
1759     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1760        file, or anyway we parsed a file that is automatically generated from
1761        this one.  If this is the case, the bingo.c file contained #line
1762        directives that generated tags pointing to this file.  Let's delete
1763        them all before parsing this file, which is the real source. */
1764     {
1765       fdesc **fdpp = &fdhead;
1766       while (*fdpp != NULL)
1767         if (*fdpp != curfdp
1768             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1769           /* We found one of those!  We must delete both the file description
1770              and all tags referring to it. */
1771           {
1772             fdesc *badfdp = *fdpp;
1773
1774             /* Delete the tags referring to badfdp->taggedfname
1775                that were obtained from badfdp->infname. */
1776             invalidate_nodes (badfdp, &nodehead);
1777
1778             *fdpp = badfdp->next; /* remove the bad description from the list */
1779             free_fdesc (badfdp);
1780           }
1781         else
1782           fdpp = &(*fdpp)->next; /* advance the list pointer */
1783     }
1784
1785   assert (parser != NULL);
1786
1787   /* Generic initializations before reading from file. */
1788   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1789
1790   /* Generic initializations before parsing file with readline. */
1791   lineno = 0;                  /* reset global line number */
1792   charno = 0;                  /* reset global char number */
1793   linecharno = 0;              /* reset global char number of line start */
1794
1795   parser (inf);
1796
1797   regex_tag_multiline ();
1798 }
1799
1800 \f
1801 /*
1802  * Check whether an implicitly named tag should be created,
1803  * then call `pfnote'.
1804  * NAME is a string that is internally copied by this function.
1805  *
1806  * TAGS format specification
1807  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1808  * The following is explained in some more detail in etc/ETAGS.EBNF.
1809  *
1810  * make_tag creates tags with "implicit tag names" (unnamed tags)
1811  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1812  *  1. NAME does not contain any of the characters in NONAM;
1813  *  2. LINESTART contains name as either a rightmost, or rightmost but
1814  *     one character, substring;
1815  *  3. the character, if any, immediately before NAME in LINESTART must
1816  *     be a character in NONAM;
1817  *  4. the character, if any, immediately after NAME in LINESTART must
1818  *     also be a character in NONAM.
1819  *
1820  * The implementation uses the notinname() macro, which recognizes the
1821  * characters stored in the string `nonam'.
1822  * etags.el needs to use the same characters that are in NONAM.
1823  */
1824 static void
1825 make_tag (const char *name,     /* tag name, or NULL if unnamed */
1826           int namelen,          /* tag length */
1827           bool is_func,         /* tag is a function */
1828           char *linestart,      /* start of the line where tag is */
1829           int linelen,          /* length of the line where tag is */
1830           int lno,              /* line number */
1831           long int cno)         /* character number */
1832 {
1833   bool named = (name != NULL && namelen > 0);
1834   char *nname = NULL;
1835
1836   if (!CTAGS && named)          /* maybe set named to false */
1837     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1838        such that etags.el can guess a name from it. */
1839     {
1840       int i;
1841       register const char *cp = name;
1842
1843       for (i = 0; i < namelen; i++)
1844         if (notinname (*cp++))
1845           break;
1846       if (i == namelen)                         /* rule #1 */
1847         {
1848           cp = linestart + linelen - namelen;
1849           if (notinname (linestart[linelen-1]))
1850             cp -= 1;                            /* rule #4 */
1851           if (cp >= linestart                   /* rule #2 */
1852               && (cp == linestart
1853                   || notinname (cp[-1]))        /* rule #3 */
1854               && strneq (name, cp, namelen))    /* rule #2 */
1855             named = false;      /* use implicit tag name */
1856         }
1857     }
1858
1859   if (named)
1860     nname = savenstr (name, namelen);
1861
1862   pfnote (nname, is_func, linestart, linelen, lno, cno);
1863 }
1864
1865 /* Record a tag. */
1866 static void
1867 pfnote (char *name, bool is_func, char *linestart, int linelen, int lno,
1868         long int cno)
1869                                 /* tag name, or NULL if unnamed */
1870                                 /* tag is a function */
1871                                 /* start of the line where tag is */
1872                                 /* length of the line where tag is */
1873                                 /* line number */
1874                                 /* character number */
1875 {
1876   register node *np;
1877
1878   assert (name == NULL || name[0] != '\0');
1879   if (CTAGS && name == NULL)
1880     return;
1881
1882   np = xnew (1, node);
1883
1884   /* If ctags mode, change name "main" to M<thisfilename>. */
1885   if (CTAGS && !cxref_style && streq (name, "main"))
1886     {
1887       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1888       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1889       fp = etags_strrchr (np->name, '.');
1890       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1891         fp[0] = '\0';
1892     }
1893   else
1894     np->name = name;
1895   np->valid = true;
1896   np->been_warned = false;
1897   np->fdp = curfdp;
1898   np->is_func = is_func;
1899   np->lno = lno;
1900   if (np->fdp->usecharno)
1901     /* Our char numbers are 0-base, because of C language tradition?
1902        ctags compatibility?  old versions compatibility?   I don't know.
1903        Anyway, since emacs's are 1-base we expect etags.el to take care
1904        of the difference.  If we wanted to have 1-based numbers, we would
1905        uncomment the +1 below. */
1906     np->cno = cno /* + 1 */ ;
1907   else
1908     np->cno = invalidcharno;
1909   np->left = np->right = NULL;
1910   if (CTAGS && !cxref_style)
1911     {
1912       if (strlen (linestart) < 50)
1913         np->regex = concat (linestart, "$", "");
1914       else
1915         np->regex = savenstr (linestart, 50);
1916     }
1917   else
1918     np->regex = savenstr (linestart, linelen);
1919
1920   add_node (np, &nodehead);
1921 }
1922
1923 /*
1924  * free_tree ()
1925  *      recurse on left children, iterate on right children.
1926  */
1927 static void
1928 free_tree (register node *np)
1929 {
1930   while (np)
1931     {
1932       register node *node_right = np->right;
1933       free_tree (np->left);
1934       free (np->name);
1935       free (np->regex);
1936       free (np);
1937       np = node_right;
1938     }
1939 }
1940
1941 /*
1942  * free_fdesc ()
1943  *      delete a file description
1944  */
1945 static void
1946 free_fdesc (register fdesc *fdp)
1947 {
1948   free (fdp->infname);
1949   free (fdp->infabsname);
1950   free (fdp->infabsdir);
1951   free (fdp->taggedfname);
1952   free (fdp->prop);
1953   free (fdp);
1954 }
1955
1956 /*
1957  * add_node ()
1958  *      Adds a node to the tree of nodes.  In etags mode, sort by file
1959  *      name.  In ctags mode, sort by tag name.  Make no attempt at
1960  *      balancing.
1961  *
1962  *      add_node is the only function allowed to add nodes, so it can
1963  *      maintain state.
1964  */
1965 static void
1966 add_node (node *np, node **cur_node_p)
1967 {
1968   register int dif;
1969   register node *cur_node = *cur_node_p;
1970
1971   if (cur_node == NULL)
1972     {
1973       *cur_node_p = np;
1974       last_node = np;
1975       return;
1976     }
1977
1978   if (!CTAGS)
1979     /* Etags Mode */
1980     {
1981       /* For each file name, tags are in a linked sublist on the right
1982          pointer.  The first tags of different files are a linked list
1983          on the left pointer.  last_node points to the end of the last
1984          used sublist. */
1985       if (last_node != NULL && last_node->fdp == np->fdp)
1986         {
1987           /* Let's use the same sublist as the last added node. */
1988           assert (last_node->right == NULL);
1989           last_node->right = np;
1990           last_node = np;
1991         }
1992       else if (cur_node->fdp == np->fdp)
1993         {
1994           /* Scanning the list we found the head of a sublist which is
1995              good for us.  Let's scan this sublist. */
1996           add_node (np, &cur_node->right);
1997         }
1998       else
1999         /* The head of this sublist is not good for us.  Let's try the
2000            next one. */
2001         add_node (np, &cur_node->left);
2002     } /* if ETAGS mode */
2003
2004   else
2005     {
2006       /* Ctags Mode */
2007       dif = strcmp (np->name, cur_node->name);
2008
2009       /*
2010        * If this tag name matches an existing one, then
2011        * do not add the node, but maybe print a warning.
2012        */
2013       if (no_duplicates && !dif)
2014         {
2015           if (np->fdp == cur_node->fdp)
2016             {
2017               if (!no_warnings)
2018                 {
2019                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2020                            np->fdp->infname, lineno, np->name);
2021                   fprintf (stderr, "Second entry ignored\n");
2022                 }
2023             }
2024           else if (!cur_node->been_warned && !no_warnings)
2025             {
2026               fprintf
2027                 (stderr,
2028                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2029                  np->fdp->infname, cur_node->fdp->infname, np->name);
2030               cur_node->been_warned = true;
2031             }
2032           return;
2033         }
2034
2035       /* Actually add the node */
2036       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2037     } /* if CTAGS mode */
2038 }
2039
2040 /*
2041  * invalidate_nodes ()
2042  *      Scan the node tree and invalidate all nodes pointing to the
2043  *      given file description (CTAGS case) or free them (ETAGS case).
2044  */
2045 static void
2046 invalidate_nodes (fdesc *badfdp, node **npp)
2047 {
2048   node *np = *npp;
2049
2050   if (np == NULL)
2051     return;
2052
2053   if (CTAGS)
2054     {
2055       if (np->left != NULL)
2056         invalidate_nodes (badfdp, &np->left);
2057       if (np->fdp == badfdp)
2058         np->valid = false;
2059       if (np->right != NULL)
2060         invalidate_nodes (badfdp, &np->right);
2061     }
2062   else
2063     {
2064       assert (np->fdp != NULL);
2065       if (np->fdp == badfdp)
2066         {
2067           *npp = np->left;      /* detach the sublist from the list */
2068           np->left = NULL;      /* isolate it */
2069           free_tree (np);       /* free it */
2070           invalidate_nodes (badfdp, npp);
2071         }
2072       else
2073         invalidate_nodes (badfdp, &np->left);
2074     }
2075 }
2076
2077 \f
2078 static int total_size_of_entries (node *);
2079 static int number_len (long) ATTRIBUTE_CONST;
2080
2081 /* Length of a non-negative number's decimal representation. */
2082 static int
2083 number_len (long int num)
2084 {
2085   int len = 1;
2086   while ((num /= 10) > 0)
2087     len += 1;
2088   return len;
2089 }
2090
2091 /*
2092  * Return total number of characters that put_entries will output for
2093  * the nodes in the linked list at the right of the specified node.
2094  * This count is irrelevant with etags.el since emacs 19.34 at least,
2095  * but is still supplied for backward compatibility.
2096  */
2097 static int
2098 total_size_of_entries (register node *np)
2099 {
2100   register int total = 0;
2101
2102   for (; np != NULL; np = np->right)
2103     if (np->valid)
2104       {
2105         total += strlen (np->regex) + 1;                /* pat\177 */
2106         if (np->name != NULL)
2107           total += strlen (np->name) + 1;               /* name\001 */
2108         total += number_len ((long) np->lno) + 1;       /* lno, */
2109         if (np->cno != invalidcharno)                   /* cno */
2110           total += number_len (np->cno);
2111         total += 1;                                     /* newline */
2112       }
2113
2114   return total;
2115 }
2116
2117 static void
2118 put_entries (register node *np)
2119 {
2120   register char *sp;
2121   static fdesc *fdp = NULL;
2122
2123   if (np == NULL)
2124     return;
2125
2126   /* Output subentries that precede this one */
2127   if (CTAGS)
2128     put_entries (np->left);
2129
2130   /* Output this entry */
2131   if (np->valid)
2132     {
2133       if (!CTAGS)
2134         {
2135           /* Etags mode */
2136           if (fdp != np->fdp)
2137             {
2138               fdp = np->fdp;
2139               fprintf (tagf, "\f\n%s,%d\n",
2140                        fdp->taggedfname, total_size_of_entries (np));
2141               fdp->written = true;
2142             }
2143           fputs (np->regex, tagf);
2144           fputc ('\177', tagf);
2145           if (np->name != NULL)
2146             {
2147               fputs (np->name, tagf);
2148               fputc ('\001', tagf);
2149             }
2150           fprintf (tagf, "%d,", np->lno);
2151           if (np->cno != invalidcharno)
2152             fprintf (tagf, "%ld", np->cno);
2153           fputs ("\n", tagf);
2154         }
2155       else
2156         {
2157           /* Ctags mode */
2158           if (np->name == NULL)
2159             error ("internal error: NULL name in ctags mode.");
2160
2161           if (cxref_style)
2162             {
2163               if (vgrind_style)
2164                 fprintf (stdout, "%s %s %d\n",
2165                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2166               else
2167                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2168                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2169             }
2170           else
2171             {
2172               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2173
2174               if (np->is_func)
2175                 {               /* function or #define macro with args */
2176                   putc (searchar, tagf);
2177                   putc ('^', tagf);
2178
2179                   for (sp = np->regex; *sp; sp++)
2180                     {
2181                       if (*sp == '\\' || *sp == searchar)
2182                         putc ('\\', tagf);
2183                       putc (*sp, tagf);
2184                     }
2185                   putc (searchar, tagf);
2186                 }
2187               else
2188                 {               /* anything else; text pattern inadequate */
2189                   fprintf (tagf, "%d", np->lno);
2190                 }
2191               putc ('\n', tagf);
2192             }
2193         }
2194     } /* if this node contains a valid tag */
2195
2196   /* Output subentries that follow this one */
2197   put_entries (np->right);
2198   if (!CTAGS)
2199     put_entries (np->left);
2200 }
2201
2202 \f
2203 /* C extensions. */
2204 #define C_EXT   0x00fff         /* C extensions */
2205 #define C_PLAIN 0x00000         /* C */
2206 #define C_PLPL  0x00001         /* C++ */
2207 #define C_STAR  0x00003         /* C* */
2208 #define C_JAVA  0x00005         /* JAVA */
2209 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2210 #define YACC    0x10000         /* yacc file */
2211
2212 /*
2213  * The C symbol tables.
2214  */
2215 enum sym_type
2216 {
2217   st_none,
2218   st_C_objprot, st_C_objimpl, st_C_objend,
2219   st_C_gnumacro,
2220   st_C_ignore, st_C_attribute,
2221   st_C_javastruct,
2222   st_C_operator,
2223   st_C_class, st_C_template,
2224   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2225 };
2226
2227 /* Feed stuff between (but not including) %[ and %] lines to:
2228      gperf -m 5
2229 %[
2230 %compare-strncmp
2231 %enum
2232 %struct-type
2233 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2234 %%
2235 if,             0,                      st_C_ignore
2236 for,            0,                      st_C_ignore
2237 while,          0,                      st_C_ignore
2238 switch,         0,                      st_C_ignore
2239 return,         0,                      st_C_ignore
2240 __attribute__,  0,                      st_C_attribute
2241 GTY,            0,                      st_C_attribute
2242 @interface,     0,                      st_C_objprot
2243 @protocol,      0,                      st_C_objprot
2244 @implementation,0,                      st_C_objimpl
2245 @end,           0,                      st_C_objend
2246 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2247 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2248 friend,         C_PLPL,                 st_C_ignore
2249 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2250 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2251 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2252 class,          0,                      st_C_class
2253 namespace,      C_PLPL,                 st_C_struct
2254 domain,         C_STAR,                 st_C_struct
2255 union,          0,                      st_C_struct
2256 struct,         0,                      st_C_struct
2257 extern,         0,                      st_C_extern
2258 enum,           0,                      st_C_enum
2259 typedef,        0,                      st_C_typedef
2260 define,         0,                      st_C_define
2261 undef,          0,                      st_C_define
2262 operator,       C_PLPL,                 st_C_operator
2263 template,       0,                      st_C_template
2264 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2265 DEFUN,          0,                      st_C_gnumacro
2266 SYSCALL,        0,                      st_C_gnumacro
2267 ENTRY,          0,                      st_C_gnumacro
2268 PSEUDO,         0,                      st_C_gnumacro
2269 # These are defined inside C functions, so currently they are not met.
2270 # EXFUN used in glibc, DEFVAR_* in emacs.
2271 #EXFUN,         0,                      st_C_gnumacro
2272 #DEFVAR_,       0,                      st_C_gnumacro
2273 %]
2274 and replace lines between %< and %> with its output, then:
2275  - remove the #if characterset check
2276  - make in_word_set static and not inline. */
2277 /*%<*/
2278 /* C code produced by gperf version 3.0.1 */
2279 /* Command-line: gperf -m 5  */
2280 /* Computed positions: -k'2-3' */
2281
2282 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2283 /* maximum key range = 33, duplicates = 0 */
2284
2285 static int
2286 hash (const char *str, int len)
2287 {
2288   static char const asso_values[] =
2289     {
2290       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2291       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2292       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2293       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2294       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2295       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2296       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2297       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2298       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2299       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2300       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2301        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2302        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2303       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2304       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2305       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2306       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2307       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2308       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2309       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2310       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2311       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2312       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2313       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2314       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2315       35, 35, 35, 35, 35, 35
2316     };
2317   int hval = len;
2318
2319   switch (hval)
2320     {
2321       default:
2322         hval += asso_values[(unsigned char) str[2]];
2323       /*FALLTHROUGH*/
2324       case 2:
2325         hval += asso_values[(unsigned char) str[1]];
2326         break;
2327     }
2328   return hval;
2329 }
2330
2331 static struct C_stab_entry *
2332 in_word_set (register const char *str, register unsigned int len)
2333 {
2334   enum
2335     {
2336       TOTAL_KEYWORDS = 33,
2337       MIN_WORD_LENGTH = 2,
2338       MAX_WORD_LENGTH = 15,
2339       MIN_HASH_VALUE = 2,
2340       MAX_HASH_VALUE = 34
2341     };
2342
2343   static struct C_stab_entry wordlist[] =
2344     {
2345       {""}, {""},
2346       {"if",            0,                      st_C_ignore},
2347       {"GTY",           0,                      st_C_attribute},
2348       {"@end",          0,                      st_C_objend},
2349       {"union",         0,                      st_C_struct},
2350       {"define",                0,                      st_C_define},
2351       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2352       {"template",      0,                      st_C_template},
2353       {"operator",      C_PLPL,                 st_C_operator},
2354       {"@interface",    0,                      st_C_objprot},
2355       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2356       {"friend",                C_PLPL,                 st_C_ignore},
2357       {"typedef",       0,                      st_C_typedef},
2358       {"return",                0,                      st_C_ignore},
2359       {"@implementation",0,                     st_C_objimpl},
2360       {"@protocol",     0,                      st_C_objprot},
2361       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2362       {"extern",                0,                      st_C_extern},
2363       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2364       {"struct",                0,                      st_C_struct},
2365       {"domain",                C_STAR,                 st_C_struct},
2366       {"switch",                0,                      st_C_ignore},
2367       {"enum",          0,                      st_C_enum},
2368       {"for",           0,                      st_C_ignore},
2369       {"namespace",     C_PLPL,                 st_C_struct},
2370       {"class",         0,                      st_C_class},
2371       {"while",         0,                      st_C_ignore},
2372       {"undef",         0,                      st_C_define},
2373       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2374       {"__attribute__", 0,                      st_C_attribute},
2375       {"SYSCALL",       0,                      st_C_gnumacro},
2376       {"ENTRY",         0,                      st_C_gnumacro},
2377       {"PSEUDO",                0,                      st_C_gnumacro},
2378       {"DEFUN",         0,                      st_C_gnumacro}
2379     };
2380
2381   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2382     {
2383       int key = hash (str, len);
2384
2385       if (key <= MAX_HASH_VALUE && key >= 0)
2386         {
2387           const char *s = wordlist[key].name;
2388
2389           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2390             return &wordlist[key];
2391         }
2392     }
2393   return 0;
2394 }
2395 /*%>*/
2396
2397 static enum sym_type
2398 C_symtype (char *str, int len, int c_ext)
2399 {
2400   register struct C_stab_entry *se = in_word_set (str, len);
2401
2402   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2403     return st_none;
2404   return se->type;
2405 }
2406
2407 \f
2408 /*
2409  * Ignoring __attribute__ ((list))
2410  */
2411 static bool inattribute;        /* looking at an __attribute__ construct */
2412
2413 /*
2414  * C functions and variables are recognized using a simple
2415  * finite automaton.  fvdef is its state variable.
2416  */
2417 static enum
2418 {
2419   fvnone,                       /* nothing seen */
2420   fdefunkey,                    /* Emacs DEFUN keyword seen */
2421   fdefunname,                   /* Emacs DEFUN name seen */
2422   foperator,                    /* func: operator keyword seen (cplpl) */
2423   fvnameseen,                   /* function or variable name seen */
2424   fstartlist,                   /* func: just after open parenthesis */
2425   finlist,                      /* func: in parameter list */
2426   flistseen,                    /* func: after parameter list */
2427   fignore,                      /* func: before open brace */
2428   vignore                       /* var-like: ignore until ';' */
2429 } fvdef;
2430
2431 static bool fvextern;           /* func or var: extern keyword seen; */
2432
2433 /*
2434  * typedefs are recognized using a simple finite automaton.
2435  * typdef is its state variable.
2436  */
2437 static enum
2438 {
2439   tnone,                        /* nothing seen */
2440   tkeyseen,                     /* typedef keyword seen */
2441   ttypeseen,                    /* defined type seen */
2442   tinbody,                      /* inside typedef body */
2443   tend,                         /* just before typedef tag */
2444   tignore                       /* junk after typedef tag */
2445 } typdef;
2446
2447 /*
2448  * struct-like structures (enum, struct and union) are recognized
2449  * using another simple finite automaton.  `structdef' is its state
2450  * variable.
2451  */
2452 static enum
2453 {
2454   snone,                        /* nothing seen yet,
2455                                    or in struct body if bracelev > 0 */
2456   skeyseen,                     /* struct-like keyword seen */
2457   stagseen,                     /* struct-like tag seen */
2458   scolonseen                    /* colon seen after struct-like tag */
2459 } structdef;
2460
2461 /*
2462  * When objdef is different from onone, objtag is the name of the class.
2463  */
2464 static const char *objtag = "<uninited>";
2465
2466 /*
2467  * Yet another little state machine to deal with preprocessor lines.
2468  */
2469 static enum
2470 {
2471   dnone,                        /* nothing seen */
2472   dsharpseen,                   /* '#' seen as first char on line */
2473   ddefineseen,                  /* '#' and 'define' seen */
2474   dignorerest                   /* ignore rest of line */
2475 } definedef;
2476
2477 /*
2478  * State machine for Objective C protocols and implementations.
2479  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2480  */
2481 static enum
2482 {
2483   onone,                        /* nothing seen */
2484   oprotocol,                    /* @interface or @protocol seen */
2485   oimplementation,              /* @implementations seen */
2486   otagseen,                     /* class name seen */
2487   oparenseen,                   /* parenthesis before category seen */
2488   ocatseen,                     /* category name seen */
2489   oinbody,                      /* in @implementation body */
2490   omethodsign,                  /* in @implementation body, after +/- */
2491   omethodtag,                   /* after method name */
2492   omethodcolon,                 /* after method colon */
2493   omethodparm,                  /* after method parameter */
2494   oignore                       /* wait for @end */
2495 } objdef;
2496
2497
2498 /*
2499  * Use this structure to keep info about the token read, and how it
2500  * should be tagged.  Used by the make_C_tag function to build a tag.
2501  */
2502 static struct tok
2503 {
2504   char *line;                   /* string containing the token */
2505   int offset;                   /* where the token starts in LINE */
2506   int length;                   /* token length */
2507   /*
2508     The previous members can be used to pass strings around for generic
2509     purposes.  The following ones specifically refer to creating tags.  In this
2510     case the token contained here is the pattern that will be used to create a
2511     tag.
2512   */
2513   bool valid;                   /* do not create a tag; the token should be
2514                                    invalidated whenever a state machine is
2515                                    reset prematurely */
2516   bool named;                   /* create a named tag */
2517   int lineno;                   /* source line number of tag */
2518   long linepos;                 /* source char number of tag */
2519 } token;                        /* latest token read */
2520
2521 /*
2522  * Variables and functions for dealing with nested structures.
2523  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2524  */
2525 static void pushclass_above (int, char *, int);
2526 static void popclass_above (int);
2527 static void write_classname (linebuffer *, const char *qualifier);
2528
2529 static struct {
2530   char **cname;                 /* nested class names */
2531   int *bracelev;                /* nested class brace level */
2532   int nl;                       /* class nesting level (elements used) */
2533   int size;                     /* length of the array */
2534 } cstack;                       /* stack for nested declaration tags */
2535 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2536 #define nestlev         (cstack.nl)
2537 /* After struct keyword or in struct body, not inside a nested function. */
2538 #define instruct        (structdef == snone && nestlev > 0                      \
2539                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2540
2541 static void
2542 pushclass_above (int bracelev, char *str, int len)
2543 {
2544   int nl;
2545
2546   popclass_above (bracelev);
2547   nl = cstack.nl;
2548   if (nl >= cstack.size)
2549     {
2550       int size = cstack.size *= 2;
2551       xrnew (cstack.cname, size, char *);
2552       xrnew (cstack.bracelev, size, int);
2553     }
2554   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2555   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2556   cstack.bracelev[nl] = bracelev;
2557   cstack.nl = nl + 1;
2558 }
2559
2560 static void
2561 popclass_above (int bracelev)
2562 {
2563   int nl;
2564
2565   for (nl = cstack.nl - 1;
2566        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2567        nl--)
2568     {
2569       free (cstack.cname[nl]);
2570       cstack.nl = nl;
2571     }
2572 }
2573
2574 static void
2575 write_classname (linebuffer *cn, const char *qualifier)
2576 {
2577   int i, len;
2578   int qlen = strlen (qualifier);
2579
2580   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2581     {
2582       len = 0;
2583       cn->len = 0;
2584       cn->buffer[0] = '\0';
2585     }
2586   else
2587     {
2588       len = strlen (cstack.cname[0]);
2589       linebuffer_setlen (cn, len);
2590       strcpy (cn->buffer, cstack.cname[0]);
2591     }
2592   for (i = 1; i < cstack.nl; i++)
2593     {
2594       char *s = cstack.cname[i];
2595       if (s == NULL)
2596         continue;
2597       linebuffer_setlen (cn, len + qlen + strlen (s));
2598       len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2599     }
2600 }
2601
2602 \f
2603 static bool consider_token (char *, int, int, int *, int, int, bool *);
2604 static void make_C_tag (bool);
2605
2606 /*
2607  * consider_token ()
2608  *      checks to see if the current token is at the start of a
2609  *      function or variable, or corresponds to a typedef, or
2610  *      is a struct/union/enum tag, or #define, or an enum constant.
2611  *
2612  *      *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2613  *      with args.  C_EXTP points to which language we are looking at.
2614  *
2615  * Globals
2616  *      fvdef                   IN OUT
2617  *      structdef               IN OUT
2618  *      definedef               IN OUT
2619  *      typdef                  IN OUT
2620  *      objdef                  IN OUT
2621  */
2622
2623 static bool
2624 consider_token (char *str, int len, int c, int *c_extp,
2625                 int bracelev, int parlev, bool *is_func_or_var)
2626                                 /* IN: token pointer */
2627                                 /* IN: token length */
2628                                 /* IN: first char after the token */
2629                                 /* IN, OUT: C extensions mask */
2630                                 /* IN: brace level */
2631                                 /* IN: parenthesis level */
2632                                 /* OUT: function or variable found */
2633 {
2634   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2635      structtype is the type of the preceding struct-like keyword, and
2636      structbracelev is the brace level where it has been seen. */
2637   static enum sym_type structtype;
2638   static int structbracelev;
2639   static enum sym_type toktype;
2640
2641
2642   toktype = C_symtype (str, len, *c_extp);
2643
2644   /*
2645    * Skip __attribute__
2646    */
2647   if (toktype == st_C_attribute)
2648     {
2649       inattribute = true;
2650       return false;
2651      }
2652
2653    /*
2654     * Advance the definedef state machine.
2655     */
2656    switch (definedef)
2657      {
2658      case dnone:
2659        /* We're not on a preprocessor line. */
2660        if (toktype == st_C_gnumacro)
2661          {
2662            fvdef = fdefunkey;
2663            return false;
2664          }
2665        break;
2666      case dsharpseen:
2667        if (toktype == st_C_define)
2668          {
2669            definedef = ddefineseen;
2670          }
2671        else
2672          {
2673            definedef = dignorerest;
2674          }
2675        return false;
2676      case ddefineseen:
2677        /*
2678         * Make a tag for any macro, unless it is a constant
2679         * and constantypedefs is false.
2680         */
2681        definedef = dignorerest;
2682        *is_func_or_var = (c == '(');
2683        if (!*is_func_or_var && !constantypedefs)
2684          return false;
2685        else
2686          return true;
2687      case dignorerest:
2688        return false;
2689      default:
2690        error ("internal error: definedef value.");
2691      }
2692
2693    /*
2694     * Now typedefs
2695     */
2696    switch (typdef)
2697      {
2698      case tnone:
2699        if (toktype == st_C_typedef)
2700          {
2701            if (typedefs)
2702              typdef = tkeyseen;
2703            fvextern = false;
2704            fvdef = fvnone;
2705            return false;
2706          }
2707        break;
2708      case tkeyseen:
2709        switch (toktype)
2710          {
2711          case st_none:
2712          case st_C_class:
2713          case st_C_struct:
2714          case st_C_enum:
2715            typdef = ttypeseen;
2716          }
2717        break;
2718      case ttypeseen:
2719        if (structdef == snone && fvdef == fvnone)
2720          {
2721            fvdef = fvnameseen;
2722            return true;
2723          }
2724        break;
2725      case tend:
2726        switch (toktype)
2727          {
2728          case st_C_class:
2729          case st_C_struct:
2730          case st_C_enum:
2731            return false;
2732          }
2733        return true;
2734      }
2735
2736    switch (toktype)
2737      {
2738      case st_C_javastruct:
2739        if (structdef == stagseen)
2740          structdef = scolonseen;
2741        return false;
2742      case st_C_template:
2743      case st_C_class:
2744        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2745            && bracelev == 0
2746            && definedef == dnone && structdef == snone
2747            && typdef == tnone && fvdef == fvnone)
2748          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2749        if (toktype == st_C_template)
2750          break;
2751        /* FALLTHRU */
2752      case st_C_struct:
2753      case st_C_enum:
2754        if (parlev == 0
2755            && fvdef != vignore
2756            && (typdef == tkeyseen
2757                || (typedefs_or_cplusplus && structdef == snone)))
2758          {
2759            structdef = skeyseen;
2760            structtype = toktype;
2761            structbracelev = bracelev;
2762            if (fvdef == fvnameseen)
2763              fvdef = fvnone;
2764          }
2765        return false;
2766      }
2767
2768    if (structdef == skeyseen)
2769      {
2770        structdef = stagseen;
2771        return true;
2772      }
2773
2774    if (typdef != tnone)
2775      definedef = dnone;
2776
2777    /* Detect Objective C constructs. */
2778    switch (objdef)
2779      {
2780      case onone:
2781        switch (toktype)
2782          {
2783          case st_C_objprot:
2784            objdef = oprotocol;
2785            return false;
2786          case st_C_objimpl:
2787            objdef = oimplementation;
2788            return false;
2789          }
2790        break;
2791      case oimplementation:
2792        /* Save the class tag for functions or variables defined inside. */
2793        objtag = savenstr (str, len);
2794        objdef = oinbody;
2795        return false;
2796      case oprotocol:
2797        /* Save the class tag for categories. */
2798        objtag = savenstr (str, len);
2799        objdef = otagseen;
2800        *is_func_or_var = true;
2801        return true;
2802      case oparenseen:
2803        objdef = ocatseen;
2804        *is_func_or_var = true;
2805        return true;
2806      case oinbody:
2807        break;
2808      case omethodsign:
2809        if (parlev == 0)
2810          {
2811            fvdef = fvnone;
2812            objdef = omethodtag;
2813            linebuffer_setlen (&token_name, len);
2814            memcpy (token_name.buffer, str, len);
2815            token_name.buffer[len] = '\0';
2816            return true;
2817          }
2818        return false;
2819      case omethodcolon:
2820        if (parlev == 0)
2821          objdef = omethodparm;
2822        return false;
2823      case omethodparm:
2824        if (parlev == 0)
2825          {
2826            int oldlen = token_name.len;
2827            fvdef = fvnone;
2828            objdef = omethodtag;
2829            linebuffer_setlen (&token_name, oldlen + len);
2830            memcpy (token_name.buffer + oldlen, str, len);
2831            token_name.buffer[oldlen + len] = '\0';
2832            return true;
2833          }
2834        return false;
2835      case oignore:
2836        if (toktype == st_C_objend)
2837          {
2838            /* Memory leakage here: the string pointed by objtag is
2839               never released, because many tests would be needed to
2840               avoid breaking on incorrect input code.  The amount of
2841               memory leaked here is the sum of the lengths of the
2842               class tags.
2843            free (objtag); */
2844            objdef = onone;
2845          }
2846        return false;
2847      }
2848
2849    /* A function, variable or enum constant? */
2850    switch (toktype)
2851      {
2852      case st_C_extern:
2853        fvextern = true;
2854        switch  (fvdef)
2855          {
2856          case finlist:
2857          case flistseen:
2858          case fignore:
2859          case vignore:
2860            break;
2861          default:
2862            fvdef = fvnone;
2863          }
2864        return false;
2865      case st_C_ignore:
2866        fvextern = false;
2867        fvdef = vignore;
2868        return false;
2869      case st_C_operator:
2870        fvdef = foperator;
2871        *is_func_or_var = true;
2872        return true;
2873      case st_none:
2874        if (constantypedefs
2875            && structdef == snone
2876            && structtype == st_C_enum && bracelev > structbracelev)
2877          return true;           /* enum constant */
2878        switch (fvdef)
2879          {
2880          case fdefunkey:
2881            if (bracelev > 0)
2882              break;
2883            fvdef = fdefunname;  /* GNU macro */
2884            *is_func_or_var = true;
2885            return true;
2886          case fvnone:
2887            switch (typdef)
2888              {
2889              case ttypeseen:
2890                return false;
2891              case tnone:
2892                if ((strneq (str, "asm", 3) && endtoken (str[3]))
2893                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2894                  {
2895                    fvdef = vignore;
2896                    return false;
2897                  }
2898                break;
2899              }
2900           /* FALLTHRU */
2901           case fvnameseen:
2902           if (len >= 10 && strneq (str+len-10, "::operator", 10))
2903             {
2904               if (*c_extp & C_AUTO) /* automatic detection of C++ */
2905                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2906               fvdef = foperator;
2907               *is_func_or_var = true;
2908               return true;
2909             }
2910           if (bracelev > 0 && !instruct)
2911             break;
2912           fvdef = fvnameseen;   /* function or variable */
2913           *is_func_or_var = true;
2914           return true;
2915         }
2916       break;
2917     }
2918
2919   return false;
2920 }
2921
2922 \f
2923 /*
2924  * C_entries often keeps pointers to tokens or lines which are older than
2925  * the line currently read.  By keeping two line buffers, and switching
2926  * them at end of line, it is possible to use those pointers.
2927  */
2928 static struct
2929 {
2930   long linepos;
2931   linebuffer lb;
2932 } lbs[2];
2933
2934 #define current_lb_is_new (newndx == curndx)
2935 #define switch_line_buffers() (curndx = 1 - curndx)
2936
2937 #define curlb (lbs[curndx].lb)
2938 #define newlb (lbs[newndx].lb)
2939 #define curlinepos (lbs[curndx].linepos)
2940 #define newlinepos (lbs[newndx].linepos)
2941
2942 #define plainc ((c_ext & C_EXT) == C_PLAIN)
2943 #define cplpl (c_ext & C_PLPL)
2944 #define cjava ((c_ext & C_JAVA) == C_JAVA)
2945
2946 #define CNL_SAVE_DEFINEDEF()                                            \
2947 do {                                                                    \
2948   curlinepos = charno;                                                  \
2949   readline (&curlb, inf);                                               \
2950   lp = curlb.buffer;                                                    \
2951   quotednl = false;                                                     \
2952   newndx = curndx;                                                      \
2953 } while (0)
2954
2955 #define CNL()                                                           \
2956 do {                                                                    \
2957   CNL_SAVE_DEFINEDEF();                                                 \
2958   if (savetoken.valid)                                                  \
2959     {                                                                   \
2960       token = savetoken;                                                \
2961       savetoken.valid = false;                                          \
2962     }                                                                   \
2963   definedef = dnone;                                                    \
2964 } while (0)
2965
2966
2967 static void
2968 make_C_tag (bool isfun)
2969 {
2970   /* This function is never called when token.valid is false, but
2971      we must protect against invalid input or internal errors. */
2972   if (token.valid)
2973     make_tag (token_name.buffer, token_name.len, isfun, token.line,
2974               token.offset+token.length+1, token.lineno, token.linepos);
2975   else if (DEBUG)
2976     {                             /* this branch is optimized away if !DEBUG */
2977       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
2978                 token_name.len + 17, isfun, token.line,
2979                 token.offset+token.length+1, token.lineno, token.linepos);
2980       error ("INVALID TOKEN");
2981     }
2982
2983   token.valid = false;
2984 }
2985
2986
2987 /*
2988  * C_entries ()
2989  *      This routine finds functions, variables, typedefs,
2990  *      #define's, enum constants and struct/union/enum definitions in
2991  *      C syntax and adds them to the list.
2992  */
2993 static void
2994 C_entries (int c_ext, FILE *inf)
2995                                 /* extension of C */
2996                                 /* input file */
2997 {
2998   register char c;              /* latest char read; '\0' for end of line */
2999   register char *lp;            /* pointer one beyond the character `c' */
3000   int curndx, newndx;           /* indices for current and new lb */
3001   register int tokoff;          /* offset in line of start of current token */
3002   register int toklen;          /* length of current token */
3003   const char *qualifier;        /* string used to qualify names */
3004   int qlen;                     /* length of qualifier */
3005   int bracelev;                 /* current brace level */
3006   int bracketlev;               /* current bracket level */
3007   int parlev;                   /* current parenthesis level */
3008   int attrparlev;               /* __attribute__ parenthesis level */
3009   int templatelev;              /* current template level */
3010   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3011   bool incomm, inquote, inchar, quotednl, midtoken;
3012   bool yacc_rules;              /* in the rules part of a yacc file */
3013   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3014
3015
3016   linebuffer_init (&lbs[0].lb);
3017   linebuffer_init (&lbs[1].lb);
3018   if (cstack.size == 0)
3019     {
3020       cstack.size = (DEBUG) ? 1 : 4;
3021       cstack.nl = 0;
3022       cstack.cname = xnew (cstack.size, char *);
3023       cstack.bracelev = xnew (cstack.size, int);
3024     }
3025
3026   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3027   curndx = newndx = 0;
3028   lp = curlb.buffer;
3029   *lp = 0;
3030
3031   fvdef = fvnone; fvextern = false; typdef = tnone;
3032   structdef = snone; definedef = dnone; objdef = onone;
3033   yacc_rules = false;
3034   midtoken = inquote = inchar = incomm = quotednl = false;
3035   token.valid = savetoken.valid = false;
3036   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3037   if (cjava)
3038     { qualifier = "."; qlen = 1; }
3039   else
3040     { qualifier = "::"; qlen = 2; }
3041
3042
3043   while (!feof (inf))
3044     {
3045       c = *lp++;
3046       if (c == '\\')
3047         {
3048           /* If we are at the end of the line, the next character is a
3049              '\0'; do not skip it, because it is what tells us
3050              to read the next line.  */
3051           if (*lp == '\0')
3052             {
3053               quotednl = true;
3054               continue;
3055             }
3056           lp++;
3057           c = ' ';
3058         }
3059       else if (incomm)
3060         {
3061           switch (c)
3062             {
3063             case '*':
3064               if (*lp == '/')
3065                 {
3066                   c = *lp++;
3067                   incomm = false;
3068                 }
3069               break;
3070             case '\0':
3071               /* Newlines inside comments do not end macro definitions in
3072                  traditional cpp. */
3073               CNL_SAVE_DEFINEDEF ();
3074               break;
3075             }
3076           continue;
3077         }
3078       else if (inquote)
3079         {
3080           switch (c)
3081             {
3082             case '"':
3083               inquote = false;
3084               break;
3085             case '\0':
3086               /* Newlines inside strings do not end macro definitions
3087                  in traditional cpp, even though compilers don't
3088                  usually accept them. */
3089               CNL_SAVE_DEFINEDEF ();
3090               break;
3091             }
3092           continue;
3093         }
3094       else if (inchar)
3095         {
3096           switch (c)
3097             {
3098             case '\0':
3099               /* Hmmm, something went wrong. */
3100               CNL ();
3101               /* FALLTHRU */
3102             case '\'':
3103               inchar = false;
3104               break;
3105             }
3106           continue;
3107         }
3108       else switch (c)
3109         {
3110         case '"':
3111           inquote = true;
3112           if (bracketlev > 0)
3113             continue;
3114           if (inattribute)
3115             break;
3116           switch (fvdef)
3117             {
3118             case fdefunkey:
3119             case fstartlist:
3120             case finlist:
3121             case fignore:
3122             case vignore:
3123               break;
3124             default:
3125               fvextern = false;
3126               fvdef = fvnone;
3127             }
3128           continue;
3129         case '\'':
3130           inchar = true;
3131           if (bracketlev > 0)
3132             continue;
3133           if (inattribute)
3134             break;
3135           if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3136             {
3137               fvextern = false;
3138               fvdef = fvnone;
3139             }
3140           continue;
3141         case '/':
3142           if (*lp == '*')
3143             {
3144               incomm = true;
3145               lp++;
3146               c = ' ';
3147               if (bracketlev > 0)
3148                 continue;
3149             }
3150           else if (/* cplpl && */ *lp == '/')
3151             {
3152               c = '\0';
3153             }
3154           break;
3155         case '%':
3156           if ((c_ext & YACC) && *lp == '%')
3157             {
3158               /* Entering or exiting rules section in yacc file. */
3159               lp++;
3160               definedef = dnone; fvdef = fvnone; fvextern = false;
3161               typdef = tnone; structdef = snone;
3162               midtoken = inquote = inchar = incomm = quotednl = false;
3163               bracelev = 0;
3164               yacc_rules = !yacc_rules;
3165               continue;
3166             }
3167           else
3168             break;
3169         case '#':
3170           if (definedef == dnone)
3171             {
3172               char *cp;
3173               bool cpptoken = true;
3174
3175               /* Look back on this line.  If all blanks, or nonblanks
3176                  followed by an end of comment, this is a preprocessor
3177                  token. */
3178               for (cp = newlb.buffer; cp < lp-1; cp++)
3179                 if (!iswhite (*cp))
3180                   {
3181                     if (*cp == '*' && cp[1] == '/')
3182                       {
3183                         cp++;
3184                         cpptoken = true;
3185                       }
3186                     else
3187                       cpptoken = false;
3188                   }
3189               if (cpptoken)
3190                 definedef = dsharpseen;
3191             } /* if (definedef == dnone) */
3192           continue;
3193         case '[':
3194           bracketlev++;
3195           continue;
3196         default:
3197           if (bracketlev > 0)
3198             {
3199               if (c == ']')
3200                 --bracketlev;
3201               else if (c == '\0')
3202                 CNL_SAVE_DEFINEDEF ();
3203               continue;
3204             }
3205           break;
3206         } /* switch (c) */
3207
3208
3209       /* Consider token only if some involved conditions are satisfied. */
3210       if (typdef != tignore
3211           && definedef != dignorerest
3212           && fvdef != finlist
3213           && templatelev == 0
3214           && (definedef != dnone
3215               || structdef != scolonseen)
3216           && !inattribute)
3217         {
3218           if (midtoken)
3219             {
3220               if (endtoken (c))
3221                 {
3222                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3223                     /* This handles :: in the middle,
3224                        but not at the beginning of an identifier.
3225                        Also, space-separated :: is not recognized. */
3226                     {
3227                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3228                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3229                       lp += 2;
3230                       toklen += 2;
3231                       c = lp[-1];
3232                       goto still_in_token;
3233                     }
3234                   else
3235                     {
3236                       bool funorvar = false;
3237
3238                       if (yacc_rules
3239                           || consider_token (newlb.buffer + tokoff, toklen, c,
3240                                              &c_ext, bracelev, parlev,
3241                                              &funorvar))
3242                         {
3243                           if (fvdef == foperator)
3244                             {
3245                               char *oldlp = lp;
3246                               lp = skip_spaces (lp-1);
3247                               if (*lp != '\0')
3248                                 lp += 1;
3249                               while (*lp != '\0'
3250                                      && !iswhite (*lp) && *lp != '(')
3251                                 lp += 1;
3252                               c = *lp++;
3253                               toklen += lp - oldlp;
3254                             }
3255                           token.named = false;
3256                           if (!plainc
3257                               && nestlev > 0 && definedef == dnone)
3258                             /* in struct body */
3259                             {
3260                               int len;
3261                               write_classname (&token_name, qualifier);
3262                               len = token_name.len;
3263                               linebuffer_setlen (&token_name, len+qlen+toklen);
3264                               sprintf (token_name.buffer + len, "%s%.*s",
3265                                        qualifier, toklen, newlb.buffer + tokoff);
3266                               token.named = true;
3267                             }
3268                           else if (objdef == ocatseen)
3269                             /* Objective C category */
3270                             {
3271                               int len = strlen (objtag) + 2 + toklen;
3272                               linebuffer_setlen (&token_name, len);
3273                               sprintf (token_name.buffer, "%s(%.*s)",
3274                                        objtag, toklen, newlb.buffer + tokoff);
3275                               token.named = true;
3276                             }
3277                           else if (objdef == omethodtag
3278                                    || objdef == omethodparm)
3279                             /* Objective C method */
3280                             {
3281                               token.named = true;
3282                             }
3283                           else if (fvdef == fdefunname)
3284                             /* GNU DEFUN and similar macros */
3285                             {
3286                               bool defun = (newlb.buffer[tokoff] == 'F');
3287                               int off = tokoff;
3288                               int len = toklen;
3289
3290                               /* Rewrite the tag so that emacs lisp DEFUNs
3291                                  can be found by their elisp name */
3292                               if (defun)
3293                                 {
3294                                   off += 1;
3295                                   len -= 1;
3296                                 }
3297                               linebuffer_setlen (&token_name, len);
3298                               memcpy (token_name.buffer,
3299                                       newlb.buffer + off, len);
3300                               token_name.buffer[len] = '\0';
3301                               if (defun)
3302                                 while (--len >= 0)
3303                                   if (token_name.buffer[len] == '_')
3304                                     token_name.buffer[len] = '-';
3305                               token.named = defun;
3306                             }
3307                           else
3308                             {
3309                               linebuffer_setlen (&token_name, toklen);
3310                               memcpy (token_name.buffer,
3311                                       newlb.buffer + tokoff, toklen);
3312                               token_name.buffer[toklen] = '\0';
3313                               /* Name macros and members. */
3314                               token.named = (structdef == stagseen
3315                                              || typdef == ttypeseen
3316                                              || typdef == tend
3317                                              || (funorvar
3318                                                  && definedef == dignorerest)
3319                                              || (funorvar
3320                                                  && definedef == dnone
3321                                                  && structdef == snone
3322                                                  && bracelev > 0));
3323                             }
3324                           token.lineno = lineno;
3325                           token.offset = tokoff;
3326                           token.length = toklen;
3327                           token.line = newlb.buffer;
3328                           token.linepos = newlinepos;
3329                           token.valid = true;
3330
3331                           if (definedef == dnone
3332                               && (fvdef == fvnameseen
3333                                   || fvdef == foperator
3334                                   || structdef == stagseen
3335                                   || typdef == tend
3336                                   || typdef == ttypeseen
3337                                   || objdef != onone))
3338                             {
3339                               if (current_lb_is_new)
3340                                 switch_line_buffers ();
3341                             }
3342                           else if (definedef != dnone
3343                                    || fvdef == fdefunname
3344                                    || instruct)
3345                             make_C_tag (funorvar);
3346                         }
3347                       else /* not yacc and consider_token failed */
3348                         {
3349                           if (inattribute && fvdef == fignore)
3350                             {
3351                               /* We have just met __attribute__ after a
3352                                  function parameter list: do not tag the
3353                                  function again. */
3354                               fvdef = fvnone;
3355                             }
3356                         }
3357                       midtoken = false;
3358                     }
3359                 } /* if (endtoken (c)) */
3360               else if (intoken (c))
3361                 still_in_token:
3362                 {
3363                   toklen++;
3364                   continue;
3365                 }
3366             } /* if (midtoken) */
3367           else if (begtoken (c))
3368             {
3369               switch (definedef)
3370                 {
3371                 case dnone:
3372                   switch (fvdef)
3373                     {
3374                     case fstartlist:
3375                       /* This prevents tagging fb in
3376                          void (__attribute__((noreturn)) *fb) (void);
3377                          Fixing this is not easy and not very important. */
3378                       fvdef = finlist;
3379                       continue;
3380                     case flistseen:
3381                       if (plainc || declarations)
3382                         {
3383                           make_C_tag (true); /* a function */
3384                           fvdef = fignore;
3385                         }
3386                       break;
3387                     }
3388                   if (structdef == stagseen && !cjava)
3389                     {
3390                       popclass_above (bracelev);
3391                       structdef = snone;
3392                     }
3393                   break;
3394                 case dsharpseen:
3395                   savetoken = token;
3396                   break;
3397                 }
3398               if (!yacc_rules || lp == newlb.buffer + 1)
3399                 {
3400                   tokoff = lp - 1 - newlb.buffer;
3401                   toklen = 1;
3402                   midtoken = true;
3403                 }
3404               continue;
3405             } /* if (begtoken) */
3406         } /* if must look at token */
3407
3408
3409       /* Detect end of line, colon, comma, semicolon and various braces
3410          after having handled a token.*/
3411       switch (c)
3412         {
3413         case ':':
3414           if (inattribute)
3415             break;
3416           if (yacc_rules && token.offset == 0 && token.valid)
3417             {
3418               make_C_tag (false); /* a yacc function */
3419               break;
3420             }
3421           if (definedef != dnone)
3422             break;
3423           switch (objdef)
3424             {
3425             case  otagseen:
3426               objdef = oignore;
3427               make_C_tag (true); /* an Objective C class */
3428               break;
3429             case omethodtag:
3430             case omethodparm:
3431               objdef = omethodcolon;
3432               linebuffer_setlen (&token_name, token_name.len + 1);
3433               strcat (token_name.buffer, ":");
3434               break;
3435             }
3436           if (structdef == stagseen)
3437             {
3438               structdef = scolonseen;
3439               break;
3440             }
3441           /* Should be useless, but may be work as a safety net. */
3442           if (cplpl && fvdef == flistseen)
3443             {
3444               make_C_tag (true); /* a function */
3445               fvdef = fignore;
3446               break;
3447             }
3448           break;
3449         case ';':
3450           if (definedef != dnone || inattribute)
3451             break;
3452           switch (typdef)
3453             {
3454             case tend:
3455             case ttypeseen:
3456               make_C_tag (false); /* a typedef */
3457               typdef = tnone;
3458               fvdef = fvnone;
3459               break;
3460             case tnone:
3461             case tinbody:
3462             case tignore:
3463               switch (fvdef)
3464                 {
3465                 case fignore:
3466                   if (typdef == tignore || cplpl)
3467                     fvdef = fvnone;
3468                   break;
3469                 case fvnameseen:
3470                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3471                       || (members && instruct))
3472                     make_C_tag (false); /* a variable */
3473                   fvextern = false;
3474                   fvdef = fvnone;
3475                   token.valid = false;
3476                   break;
3477                 case flistseen:
3478                   if ((declarations
3479                        && (cplpl || !instruct)
3480                        && (typdef == tnone || (typdef != tignore && instruct)))
3481                       || (members
3482                           && plainc && instruct))
3483                     make_C_tag (true);  /* a function */
3484                   /* FALLTHRU */
3485                 default:
3486                   fvextern = false;
3487                   fvdef = fvnone;
3488                   if (declarations
3489                        && cplpl && structdef == stagseen)
3490                     make_C_tag (false); /* forward declaration */
3491                   else
3492                     token.valid = false;
3493                 } /* switch (fvdef) */
3494               /* FALLTHRU */
3495             default:
3496               if (!instruct)
3497                 typdef = tnone;
3498             }
3499           if (structdef == stagseen)
3500             structdef = snone;
3501           break;
3502         case ',':
3503           if (definedef != dnone || inattribute)
3504             break;
3505           switch (objdef)
3506             {
3507             case omethodtag:
3508             case omethodparm:
3509               make_C_tag (true); /* an Objective C method */
3510               objdef = oinbody;
3511               break;
3512             }
3513           switch (fvdef)
3514             {
3515             case fdefunkey:
3516             case foperator:
3517             case fstartlist:
3518             case finlist:
3519             case fignore:
3520             case vignore:
3521               break;
3522             case fdefunname:
3523               fvdef = fignore;
3524               break;
3525             case fvnameseen:
3526               if (parlev == 0
3527                   && ((globals
3528                        && bracelev == 0
3529                        && templatelev == 0
3530                        && (!fvextern || declarations))
3531                       || (members && instruct)))
3532                   make_C_tag (false); /* a variable */
3533               break;
3534             case flistseen:
3535               if ((declarations && typdef == tnone && !instruct)
3536                   || (members && typdef != tignore && instruct))
3537                 {
3538                   make_C_tag (true); /* a function */
3539                   fvdef = fvnameseen;
3540                 }
3541               else if (!declarations)
3542                 fvdef = fvnone;
3543               token.valid = false;
3544               break;
3545             default:
3546               fvdef = fvnone;
3547             }
3548           if (structdef == stagseen)
3549             structdef = snone;
3550           break;
3551         case ']':
3552           if (definedef != dnone || inattribute)
3553             break;
3554           if (structdef == stagseen)
3555             structdef = snone;
3556           switch (typdef)
3557             {
3558             case ttypeseen:
3559             case tend:
3560               typdef = tignore;
3561               make_C_tag (false);       /* a typedef */
3562               break;
3563             case tnone:
3564             case tinbody:
3565               switch (fvdef)
3566                 {
3567                 case foperator:
3568                 case finlist:
3569                 case fignore:
3570                 case vignore:
3571                   break;
3572                 case fvnameseen:
3573                   if ((members && bracelev == 1)
3574                       || (globals && bracelev == 0
3575                           && (!fvextern || declarations)))
3576                     make_C_tag (false); /* a variable */
3577                   /* FALLTHRU */
3578                 default:
3579                   fvdef = fvnone;
3580                 }
3581               break;
3582             }
3583           break;
3584         case '(':
3585           if (inattribute)
3586             {
3587               attrparlev++;
3588               break;
3589             }
3590           if (definedef != dnone)
3591             break;
3592           if (objdef == otagseen && parlev == 0)
3593             objdef = oparenseen;
3594           switch (fvdef)
3595             {
3596             case fvnameseen:
3597               if (typdef == ttypeseen
3598                   && *lp != '*'
3599                   && !instruct)
3600                 {
3601                   /* This handles constructs like:
3602                      typedef void OperatorFun (int fun); */
3603                   make_C_tag (false);
3604                   typdef = tignore;
3605                   fvdef = fignore;
3606                   break;
3607                 }
3608               /* FALLTHRU */
3609             case foperator:
3610               fvdef = fstartlist;
3611               break;
3612             case flistseen:
3613               fvdef = finlist;
3614               break;
3615             }
3616           parlev++;
3617           break;
3618         case ')':
3619           if (inattribute)
3620             {
3621               if (--attrparlev == 0)
3622                 inattribute = false;
3623               break;
3624             }
3625           if (definedef != dnone)
3626             break;
3627           if (objdef == ocatseen && parlev == 1)
3628             {
3629               make_C_tag (true); /* an Objective C category */
3630               objdef = oignore;
3631             }
3632           if (--parlev == 0)
3633             {
3634               switch (fvdef)
3635                 {
3636                 case fstartlist:
3637                 case finlist:
3638                   fvdef = flistseen;
3639                   break;
3640                 }
3641               if (!instruct
3642                   && (typdef == tend
3643                       || typdef == ttypeseen))
3644                 {
3645                   typdef = tignore;
3646                   make_C_tag (false); /* a typedef */
3647                 }
3648             }
3649           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3650             parlev = 0;
3651           break;
3652         case '{':
3653           if (definedef != dnone)
3654             break;
3655           if (typdef == ttypeseen)
3656             {
3657               /* Whenever typdef is set to tinbody (currently only
3658                  here), typdefbracelev should be set to bracelev. */
3659               typdef = tinbody;
3660               typdefbracelev = bracelev;
3661             }
3662           switch (fvdef)
3663             {
3664             case flistseen:
3665               make_C_tag (true);    /* a function */
3666               /* FALLTHRU */
3667             case fignore:
3668               fvdef = fvnone;
3669               break;
3670             case fvnone:
3671               switch (objdef)
3672                 {
3673                 case otagseen:
3674                   make_C_tag (true); /* an Objective C class */
3675                   objdef = oignore;
3676                   break;
3677                 case omethodtag:
3678                 case omethodparm:
3679                   make_C_tag (true); /* an Objective C method */
3680                   objdef = oinbody;
3681                   break;
3682                 default:
3683                   /* Neutralize `extern "C" {' grot. */
3684                   if (bracelev == 0 && structdef == snone && nestlev == 0
3685                       && typdef == tnone)
3686                     bracelev = -1;
3687                 }
3688               break;
3689             }
3690           switch (structdef)
3691             {
3692             case skeyseen:         /* unnamed struct */
3693               pushclass_above (bracelev, NULL, 0);
3694               structdef = snone;
3695               break;
3696             case stagseen:         /* named struct or enum */
3697             case scolonseen:       /* a class */
3698               pushclass_above (bracelev,token.line+token.offset, token.length);
3699               structdef = snone;
3700               make_C_tag (false);  /* a struct or enum */
3701               break;
3702             }
3703           bracelev += 1;
3704           break;
3705         case '*':
3706           if (definedef != dnone)
3707             break;
3708           if (fvdef == fstartlist)
3709             {
3710               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3711               token.valid = false;
3712             }
3713           break;
3714         case '}':
3715           if (definedef != dnone)
3716             break;
3717           bracelev -= 1;
3718           if (!ignoreindent && lp == newlb.buffer + 1)
3719             {
3720               if (bracelev != 0)
3721                 token.valid = false; /* unexpected value, token unreliable */
3722               bracelev = 0;     /* reset brace level if first column */
3723               parlev = 0;       /* also reset paren level, just in case... */
3724             }
3725           else if (bracelev < 0)
3726             {
3727               token.valid = false; /* something gone amiss, token unreliable */
3728               bracelev = 0;
3729             }
3730           if (bracelev == 0 && fvdef == vignore)
3731             fvdef = fvnone;             /* end of function */
3732           popclass_above (bracelev);
3733           structdef = snone;
3734           /* Only if typdef == tinbody is typdefbracelev significant. */
3735           if (typdef == tinbody && bracelev <= typdefbracelev)
3736             {
3737               assert (bracelev == typdefbracelev);
3738               typdef = tend;
3739             }
3740           break;
3741         case '=':
3742           if (definedef != dnone)
3743             break;
3744           switch (fvdef)
3745             {
3746             case foperator:
3747             case finlist:
3748             case fignore:
3749             case vignore:
3750               break;
3751             case fvnameseen:
3752               if ((members && bracelev == 1)
3753                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3754                 make_C_tag (false); /* a variable */
3755               /* FALLTHRU */
3756             default:
3757               fvdef = vignore;
3758             }
3759           break;
3760         case '<':
3761           if (cplpl
3762               && (structdef == stagseen || fvdef == fvnameseen))
3763             {
3764               templatelev++;
3765               break;
3766             }
3767           goto resetfvdef;
3768         case '>':
3769           if (templatelev > 0)
3770             {
3771               templatelev--;
3772               break;
3773             }
3774           goto resetfvdef;
3775         case '+':
3776         case '-':
3777           if (objdef == oinbody && bracelev == 0)
3778             {
3779               objdef = omethodsign;
3780               break;
3781             }
3782           /* FALLTHRU */
3783         resetfvdef:
3784         case '#': case '~': case '&': case '%': case '/':
3785         case '|': case '^': case '!': case '.': case '?':
3786           if (definedef != dnone)
3787             break;
3788           /* These surely cannot follow a function tag in C. */
3789           switch (fvdef)
3790             {
3791             case foperator:
3792             case finlist:
3793             case fignore:
3794             case vignore:
3795               break;
3796             default:
3797               fvdef = fvnone;
3798             }
3799           break;
3800         case '\0':
3801           if (objdef == otagseen)
3802             {
3803               make_C_tag (true); /* an Objective C class */
3804               objdef = oignore;
3805             }
3806           /* If a macro spans multiple lines don't reset its state. */
3807           if (quotednl)
3808             CNL_SAVE_DEFINEDEF ();
3809           else
3810             CNL ();
3811           break;
3812         } /* switch (c) */
3813
3814     } /* while not eof */
3815
3816   free (lbs[0].lb.buffer);
3817   free (lbs[1].lb.buffer);
3818 }
3819
3820 /*
3821  * Process either a C++ file or a C file depending on the setting
3822  * of a global flag.
3823  */
3824 static void
3825 default_C_entries (FILE *inf)
3826 {
3827   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3828 }
3829
3830 /* Always do plain C. */
3831 static void
3832 plain_C_entries (FILE *inf)
3833 {
3834   C_entries (0, inf);
3835 }
3836
3837 /* Always do C++. */
3838 static void
3839 Cplusplus_entries (FILE *inf)
3840 {
3841   C_entries (C_PLPL, inf);
3842 }
3843
3844 /* Always do Java. */
3845 static void
3846 Cjava_entries (FILE *inf)
3847 {
3848   C_entries (C_JAVA, inf);
3849 }
3850
3851 /* Always do C*. */
3852 static void
3853 Cstar_entries (FILE *inf)
3854 {
3855   C_entries (C_STAR, inf);
3856 }
3857
3858 /* Always do Yacc. */
3859 static void
3860 Yacc_entries (FILE *inf)
3861 {
3862   C_entries (YACC, inf);
3863 }
3864
3865 \f
3866 /* Useful macros. */
3867 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
3868   for (;                        /* loop initialization */               \
3869        !feof (file_pointer)     /* loop test */                         \
3870        &&                       /* instructions at start of loop */     \
3871           (readline (&line_buffer, file_pointer),                       \
3872            char_pointer = line_buffer.buffer,                           \
3873            true);                                                       \
3874       )
3875
3876 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
3877   ((assert ("" kw), true)   /* syntax error if not a literal string */  \
3878    && strneq ((cp), kw, sizeof (kw)-1)          /* cp points at kw */   \
3879    && notinname ((cp)[sizeof (kw)-1])           /* end of kw */         \
3880    && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
3881
3882 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3883 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3884   ((assert ("" kw), true) /* syntax error if not a literal string */    \
3885    && strncaseeq ((cp), kw, sizeof (kw)-1)      /* cp points at kw */   \
3886    && ((cp) += sizeof (kw)-1))                  /* skip spaces */
3887
3888 /*
3889  * Read a file, but do no processing.  This is used to do regexp
3890  * matching on files that have no language defined.
3891  */
3892 static void
3893 just_read_file (FILE *inf)
3894 {
3895   while (!feof (inf))
3896     readline (&lb, inf);
3897 }
3898
3899 \f
3900 /* Fortran parsing */
3901
3902 static void F_takeprec (void);
3903 static void F_getit (FILE *);
3904
3905 static void
3906 F_takeprec (void)
3907 {
3908   dbp = skip_spaces (dbp);
3909   if (*dbp != '*')
3910     return;
3911   dbp++;
3912   dbp = skip_spaces (dbp);
3913   if (strneq (dbp, "(*)", 3))
3914     {
3915       dbp += 3;
3916       return;
3917     }
3918   if (!ISDIGIT (*dbp))
3919     {
3920       --dbp;                    /* force failure */
3921       return;
3922     }
3923   do
3924     dbp++;
3925   while (ISDIGIT (*dbp));
3926 }
3927
3928 static void
3929 F_getit (FILE *inf)
3930 {
3931   register char *cp;
3932
3933   dbp = skip_spaces (dbp);
3934   if (*dbp == '\0')
3935     {
3936       readline (&lb, inf);
3937       dbp = lb.buffer;
3938       if (dbp[5] != '&')
3939         return;
3940       dbp += 6;
3941       dbp = skip_spaces (dbp);
3942     }
3943   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3944     return;
3945   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3946     continue;
3947   make_tag (dbp, cp-dbp, true,
3948             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3949 }
3950
3951
3952 static void
3953 Fortran_functions (FILE *inf)
3954 {
3955   LOOP_ON_INPUT_LINES (inf, lb, dbp)
3956     {
3957       if (*dbp == '%')
3958         dbp++;                  /* Ratfor escape to fortran */
3959       dbp = skip_spaces (dbp);
3960       if (*dbp == '\0')
3961         continue;
3962
3963       if (LOOKING_AT_NOCASE (dbp, "recursive"))
3964         dbp = skip_spaces (dbp);
3965
3966       if (LOOKING_AT_NOCASE (dbp, "pure"))
3967         dbp = skip_spaces (dbp);
3968
3969       if (LOOKING_AT_NOCASE (dbp, "elemental"))
3970         dbp = skip_spaces (dbp);
3971
3972       switch (lowcase (*dbp))
3973         {
3974         case 'i':
3975           if (nocase_tail ("integer"))
3976             F_takeprec ();
3977           break;
3978         case 'r':
3979           if (nocase_tail ("real"))
3980             F_takeprec ();
3981           break;
3982         case 'l':
3983           if (nocase_tail ("logical"))
3984             F_takeprec ();
3985           break;
3986         case 'c':
3987           if (nocase_tail ("complex") || nocase_tail ("character"))
3988             F_takeprec ();
3989           break;
3990         case 'd':
3991           if (nocase_tail ("double"))
3992             {
3993               dbp = skip_spaces (dbp);
3994               if (*dbp == '\0')
3995                 continue;
3996               if (nocase_tail ("precision"))
3997                 break;
3998               continue;
3999             }
4000           break;
4001         }
4002       dbp = skip_spaces (dbp);
4003       if (*dbp == '\0')
4004         continue;
4005       switch (lowcase (*dbp))
4006         {
4007         case 'f':
4008           if (nocase_tail ("function"))
4009             F_getit (inf);
4010           continue;
4011         case 's':
4012           if (nocase_tail ("subroutine"))
4013             F_getit (inf);
4014           continue;
4015         case 'e':
4016           if (nocase_tail ("entry"))
4017             F_getit (inf);
4018           continue;
4019         case 'b':
4020           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4021             {
4022               dbp = skip_spaces (dbp);
4023               if (*dbp == '\0') /* assume un-named */
4024                 make_tag ("blockdata", 9, true,
4025                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4026               else
4027                 F_getit (inf);  /* look for name */
4028             }
4029           continue;
4030         }
4031     }
4032 }
4033
4034 \f
4035 /*
4036  * Ada parsing
4037  * Original code by
4038  * Philippe Waroquiers (1998)
4039  */
4040
4041 /* Once we are positioned after an "interesting" keyword, let's get
4042    the real tag value necessary. */
4043 static void
4044 Ada_getit (FILE *inf, const char *name_qualifier)
4045 {
4046   register char *cp;
4047   char *name;
4048   char c;
4049
4050   while (!feof (inf))
4051     {
4052       dbp = skip_spaces (dbp);
4053       if (*dbp == '\0'
4054           || (dbp[0] == '-' && dbp[1] == '-'))
4055         {
4056           readline (&lb, inf);
4057           dbp = lb.buffer;
4058         }
4059       switch (lowcase (*dbp))
4060         {
4061         case 'b':
4062           if (nocase_tail ("body"))
4063             {
4064               /* Skipping body of   procedure body   or   package body or ....
4065                  resetting qualifier to body instead of spec. */
4066               name_qualifier = "/b";
4067               continue;
4068             }
4069           break;
4070         case 't':
4071           /* Skipping type of   task type   or   protected type ... */
4072           if (nocase_tail ("type"))
4073             continue;
4074           break;
4075         }
4076       if (*dbp == '"')
4077         {
4078           dbp += 1;
4079           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4080             continue;
4081         }
4082       else
4083         {
4084           dbp = skip_spaces (dbp);
4085           for (cp = dbp;
4086                (*cp != '\0'
4087                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4088                cp++)
4089             continue;
4090           if (cp == dbp)
4091             return;
4092         }
4093       c = *cp;
4094       *cp = '\0';
4095       name = concat (dbp, name_qualifier, "");
4096       *cp = c;
4097       make_tag (name, strlen (name), true,
4098                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4099       free (name);
4100       if (c == '"')
4101         dbp = cp + 1;
4102       return;
4103     }
4104 }
4105
4106 static void
4107 Ada_funcs (FILE *inf)
4108 {
4109   bool inquote = false;
4110   bool skip_till_semicolumn = false;
4111
4112   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4113     {
4114       while (*dbp != '\0')
4115         {
4116           /* Skip a string i.e. "abcd". */
4117           if (inquote || (*dbp == '"'))
4118             {
4119               dbp = etags_strchr (dbp + !inquote, '"');
4120               if (dbp != NULL)
4121                 {
4122                   inquote = false;
4123                   dbp += 1;
4124                   continue;     /* advance char */
4125                 }
4126               else
4127                 {
4128                   inquote = true;
4129                   break;        /* advance line */
4130                 }
4131             }
4132
4133           /* Skip comments. */
4134           if (dbp[0] == '-' && dbp[1] == '-')
4135             break;              /* advance line */
4136
4137           /* Skip character enclosed in single quote i.e. 'a'
4138              and skip single quote starting an attribute i.e. 'Image. */
4139           if (*dbp == '\'')
4140             {
4141               dbp++ ;
4142               if (*dbp != '\0')
4143                 dbp++;
4144               continue;
4145             }
4146
4147           if (skip_till_semicolumn)
4148             {
4149               if (*dbp == ';')
4150                 skip_till_semicolumn = false;
4151               dbp++;
4152               continue;         /* advance char */
4153             }
4154
4155           /* Search for beginning of a token.  */
4156           if (!begtoken (*dbp))
4157             {
4158               dbp++;
4159               continue;         /* advance char */
4160             }
4161
4162           /* We are at the beginning of a token. */
4163           switch (lowcase (*dbp))
4164             {
4165             case 'f':
4166               if (!packages_only && nocase_tail ("function"))
4167                 Ada_getit (inf, "/f");
4168               else
4169                 break;          /* from switch */
4170               continue;         /* advance char */
4171             case 'p':
4172               if (!packages_only && nocase_tail ("procedure"))
4173                 Ada_getit (inf, "/p");
4174               else if (nocase_tail ("package"))
4175                 Ada_getit (inf, "/s");
4176               else if (nocase_tail ("protected")) /* protected type */
4177                 Ada_getit (inf, "/t");
4178               else
4179                 break;          /* from switch */
4180               continue;         /* advance char */
4181
4182             case 'u':
4183               if (typedefs && !packages_only && nocase_tail ("use"))
4184                 {
4185                   /* when tagging types, avoid tagging  use type Pack.Typename;
4186                      for this, we will skip everything till a ; */
4187                   skip_till_semicolumn = true;
4188                   continue;     /* advance char */
4189                 }
4190
4191             case 't':
4192               if (!packages_only && nocase_tail ("task"))
4193                 Ada_getit (inf, "/k");
4194               else if (typedefs && !packages_only && nocase_tail ("type"))
4195                 {
4196                   Ada_getit (inf, "/t");
4197                   while (*dbp != '\0')
4198                     dbp += 1;
4199                 }
4200               else
4201                 break;          /* from switch */
4202               continue;         /* advance char */
4203             }
4204
4205           /* Look for the end of the token. */
4206           while (!endtoken (*dbp))
4207             dbp++;
4208
4209         } /* advance char */
4210     } /* advance line */
4211 }
4212
4213 \f
4214 /*
4215  * Unix and microcontroller assembly tag handling
4216  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4217  * Idea by Bob Weiner, Motorola Inc. (1994)
4218  */
4219 static void
4220 Asm_labels (FILE *inf)
4221 {
4222   register char *cp;
4223
4224   LOOP_ON_INPUT_LINES (inf, lb, cp)
4225     {
4226       /* If first char is alphabetic or one of [_.$], test for colon
4227          following identifier. */
4228       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4229         {
4230           /* Read past label. */
4231           cp++;
4232           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4233             cp++;
4234           if (*cp == ':' || iswhite (*cp))
4235             /* Found end of label, so copy it and add it to the table. */
4236             make_tag (lb.buffer, cp - lb.buffer, true,
4237                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4238         }
4239     }
4240 }
4241
4242 \f
4243 /*
4244  * Perl support
4245  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4246  *                 /^use constant[ \t\n]+[^ \t\n{=,;]+/
4247  * Perl variable names: /^(my|local).../
4248  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4249  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4250  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4251  */
4252 static void
4253 Perl_functions (FILE *inf)
4254 {
4255   char *package = savestr ("main"); /* current package name */
4256   register char *cp;
4257
4258   LOOP_ON_INPUT_LINES (inf, lb, cp)
4259     {
4260       cp = skip_spaces (cp);
4261
4262       if (LOOKING_AT (cp, "package"))
4263         {
4264           free (package);
4265           get_tag (cp, &package);
4266         }
4267       else if (LOOKING_AT (cp, "sub"))
4268         {
4269           char *pos, *sp;
4270
4271         subr:
4272           sp = cp;
4273           while (!notinname (*cp))
4274             cp++;
4275           if (cp == sp)
4276             continue;           /* nothing found */
4277           if ((pos = etags_strchr (sp, ':')) != NULL
4278               && pos < cp && pos[1] == ':')
4279             /* The name is already qualified. */
4280             make_tag (sp, cp - sp, true,
4281                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4282           else
4283             /* Qualify it. */
4284             {
4285               char savechar, *name;
4286
4287               savechar = *cp;
4288               *cp = '\0';
4289               name = concat (package, "::", sp);
4290               *cp = savechar;
4291               make_tag (name, strlen (name), true,
4292                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4293               free (name);
4294             }
4295         }
4296       else if (LOOKING_AT (cp, "use constant")
4297                || LOOKING_AT (cp, "use constant::defer"))
4298         {
4299           /* For hash style multi-constant like
4300                 use constant { FOO => 123,
4301                                BAR => 456 };
4302              only the first FOO is picked up.  Parsing across the value
4303              expressions would be difficult in general, due to possible nested
4304              hashes, here-documents, etc.  */
4305           if (*cp == '{')
4306             cp = skip_spaces (cp+1);
4307           goto subr;
4308         }
4309       else if (globals) /* only if we are tagging global vars */
4310         {
4311           /* Skip a qualifier, if any. */
4312           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4313           /* After "my" or "local", but before any following paren or space. */
4314           char *varstart = cp;
4315
4316           if (qual              /* should this be removed?  If yes, how? */
4317               && (*cp == '$' || *cp == '@' || *cp == '%'))
4318             {
4319               varstart += 1;
4320               do
4321                 cp++;
4322               while (ISALNUM (*cp) || *cp == '_');
4323             }
4324           else if (qual)
4325             {
4326               /* Should be examining a variable list at this point;
4327                  could insist on seeing an open parenthesis. */
4328               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4329                 cp++;
4330             }
4331           else
4332             continue;
4333
4334           make_tag (varstart, cp - varstart, false,
4335                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4336         }
4337     }
4338   free (package);
4339 }
4340
4341
4342 /*
4343  * Python support
4344  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4345  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4346  * More ideas by seb bacon <seb@jamkit.com> (2002)
4347  */
4348 static void
4349 Python_functions (FILE *inf)
4350 {
4351   register char *cp;
4352
4353   LOOP_ON_INPUT_LINES (inf, lb, cp)
4354     {
4355       cp = skip_spaces (cp);
4356       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4357         {
4358           char *name = cp;
4359           while (!notinname (*cp) && *cp != ':')
4360             cp++;
4361           make_tag (name, cp - name, true,
4362                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4363         }
4364     }
4365 }
4366
4367 \f
4368 /*
4369  * PHP support
4370  * Look for:
4371  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4372  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4373  *  - /^[ \t]*define\(\"[^\"]+/
4374  * Only with --members:
4375  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4376  * Idea by Diez B. Roggisch (2001)
4377  */
4378 static void
4379 PHP_functions (FILE *inf)
4380 {
4381   char *cp, *name;
4382   bool search_identifier = false;
4383
4384   LOOP_ON_INPUT_LINES (inf, lb, cp)
4385     {
4386       cp = skip_spaces (cp);
4387       name = cp;
4388       if (search_identifier
4389           && *cp != '\0')
4390         {
4391           while (!notinname (*cp))
4392             cp++;
4393           make_tag (name, cp - name, true,
4394                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4395           search_identifier = false;
4396         }
4397       else if (LOOKING_AT (cp, "function"))
4398         {
4399           if (*cp == '&')
4400             cp = skip_spaces (cp+1);
4401           if (*cp != '\0')
4402             {
4403               name = cp;
4404               while (!notinname (*cp))
4405                 cp++;
4406               make_tag (name, cp - name, true,
4407                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4408             }
4409           else
4410             search_identifier = true;
4411         }
4412       else if (LOOKING_AT (cp, "class"))
4413         {
4414           if (*cp != '\0')
4415             {
4416               name = cp;
4417               while (*cp != '\0' && !iswhite (*cp))
4418                 cp++;
4419               make_tag (name, cp - name, false,
4420                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4421             }
4422           else
4423             search_identifier = true;
4424         }
4425       else if (strneq (cp, "define", 6)
4426                && (cp = skip_spaces (cp+6))
4427                && *cp++ == '('
4428                && (*cp == '"' || *cp == '\''))
4429         {
4430           char quote = *cp++;
4431           name = cp;
4432           while (*cp != quote && *cp != '\0')
4433             cp++;
4434           make_tag (name, cp - name, false,
4435                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4436         }
4437       else if (members
4438                && LOOKING_AT (cp, "var")
4439                && *cp == '$')
4440         {
4441           name = cp;
4442           while (!notinname (*cp))
4443             cp++;
4444           make_tag (name, cp - name, false,
4445                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4446         }
4447     }
4448 }
4449
4450 \f
4451 /*
4452  * Cobol tag functions
4453  * We could look for anything that could be a paragraph name.
4454  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4455  * Idea by Corny de Souza (1993)
4456  */
4457 static void
4458 Cobol_paragraphs (FILE *inf)
4459 {
4460   register char *bp, *ep;
4461
4462   LOOP_ON_INPUT_LINES (inf, lb, bp)
4463     {
4464       if (lb.len < 9)
4465         continue;
4466       bp += 8;
4467
4468       /* If eoln, compiler option or comment ignore whole line. */
4469       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4470         continue;
4471
4472       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4473         continue;
4474       if (*ep++ == '.')
4475         make_tag (bp, ep - bp, true,
4476                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4477     }
4478 }
4479
4480 \f
4481 /*
4482  * Makefile support
4483  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4484  */
4485 static void
4486 Makefile_targets (FILE *inf)
4487 {
4488   register char *bp;
4489
4490   LOOP_ON_INPUT_LINES (inf, lb, bp)
4491     {
4492       if (*bp == '\t' || *bp == '#')
4493         continue;
4494       while (*bp != '\0' && *bp != '=' && *bp != ':')
4495         bp++;
4496       if (*bp == ':' || (globals && *bp == '='))
4497         {
4498           /* We should detect if there is more than one tag, but we do not.
4499              We just skip initial and final spaces. */
4500           char * namestart = skip_spaces (lb.buffer);
4501           while (--bp > namestart)
4502             if (!notinname (*bp))
4503               break;
4504           make_tag (namestart, bp - namestart + 1, true,
4505                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4506         }
4507     }
4508 }
4509
4510 \f
4511 /*
4512  * Pascal parsing
4513  * Original code by Mosur K. Mohan (1989)
4514  *
4515  *  Locates tags for procedures & functions.  Doesn't do any type- or
4516  *  var-definitions.  It does look for the keyword "extern" or
4517  *  "forward" immediately following the procedure statement; if found,
4518  *  the tag is skipped.
4519  */
4520 static void
4521 Pascal_functions (FILE *inf)
4522 {
4523   linebuffer tline;             /* mostly copied from C_entries */
4524   long save_lcno;
4525   int save_lineno, namelen, taglen;
4526   char c, *name;
4527
4528   bool                          /* each of these flags is true if: */
4529     incomment,                  /* point is inside a comment */
4530     inquote,                    /* point is inside '..' string */
4531     get_tagname,                /* point is after PROCEDURE/FUNCTION
4532                                    keyword, so next item = potential tag */
4533     found_tag,                  /* point is after a potential tag */
4534     inparms,                    /* point is within parameter-list */
4535     verify_tag;                 /* point has passed the parm-list, so the
4536                                    next token will determine whether this
4537                                    is a FORWARD/EXTERN to be ignored, or
4538                                    whether it is a real tag */
4539
4540   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4541   name = NULL;                  /* keep compiler quiet */
4542   dbp = lb.buffer;
4543   *dbp = '\0';
4544   linebuffer_init (&tline);
4545
4546   incomment = inquote = false;
4547   found_tag = false;            /* have a proc name; check if extern */
4548   get_tagname = false;          /* found "procedure" keyword         */
4549   inparms = false;              /* found '(' after "proc"            */
4550   verify_tag = false;           /* check if "extern" is ahead        */
4551
4552
4553   while (!feof (inf))           /* long main loop to get next char */
4554     {
4555       c = *dbp++;
4556       if (c == '\0')            /* if end of line */
4557         {
4558           readline (&lb, inf);
4559           dbp = lb.buffer;
4560           if (*dbp == '\0')
4561             continue;
4562           if (!((found_tag && verify_tag)
4563                 || get_tagname))
4564             c = *dbp++;         /* only if don't need *dbp pointing
4565                                    to the beginning of the name of
4566                                    the procedure or function */
4567         }
4568       if (incomment)
4569         {
4570           if (c == '}')         /* within { } comments */
4571             incomment = false;
4572           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4573             {
4574               dbp++;
4575               incomment = false;
4576             }
4577           continue;
4578         }
4579       else if (inquote)
4580         {
4581           if (c == '\'')
4582             inquote = false;
4583           continue;
4584         }
4585       else
4586         switch (c)
4587           {
4588           case '\'':
4589             inquote = true;     /* found first quote */
4590             continue;
4591           case '{':             /* found open { comment */
4592             incomment = true;
4593             continue;
4594           case '(':
4595             if (*dbp == '*')    /* found open (* comment */
4596               {
4597                 incomment = true;
4598                 dbp++;
4599               }
4600             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4601               inparms = true;
4602             continue;
4603           case ')':             /* end of parms list */
4604             if (inparms)
4605               inparms = false;
4606             continue;
4607           case ';':
4608             if (found_tag && !inparms) /* end of proc or fn stmt */
4609               {
4610                 verify_tag = true;
4611                 break;
4612               }
4613             continue;
4614           }
4615       if (found_tag && verify_tag && (*dbp != ' '))
4616         {
4617           /* Check if this is an "extern" declaration. */
4618           if (*dbp == '\0')
4619             continue;
4620           if (lowcase (*dbp) == 'e')
4621             {
4622               if (nocase_tail ("extern")) /* superfluous, really! */
4623                 {
4624                   found_tag = false;
4625                   verify_tag = false;
4626                 }
4627             }
4628           else if (lowcase (*dbp) == 'f')
4629             {
4630               if (nocase_tail ("forward")) /* check for forward reference */
4631                 {
4632                   found_tag = false;
4633                   verify_tag = false;
4634                 }
4635             }
4636           if (found_tag && verify_tag) /* not external proc, so make tag */
4637             {
4638               found_tag = false;
4639               verify_tag = false;
4640               make_tag (name, namelen, true,
4641                         tline.buffer, taglen, save_lineno, save_lcno);
4642               continue;
4643             }
4644         }
4645       if (get_tagname)          /* grab name of proc or fn */
4646         {
4647           char *cp;
4648
4649           if (*dbp == '\0')
4650             continue;
4651
4652           /* Find block name. */
4653           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4654             continue;
4655
4656           /* Save all values for later tagging. */
4657           linebuffer_setlen (&tline, lb.len);
4658           strcpy (tline.buffer, lb.buffer);
4659           save_lineno = lineno;
4660           save_lcno = linecharno;
4661           name = tline.buffer + (dbp - lb.buffer);
4662           namelen = cp - dbp;
4663           taglen = cp - lb.buffer + 1;
4664
4665           dbp = cp;             /* set dbp to e-o-token */
4666           get_tagname = false;
4667           found_tag = true;
4668           continue;
4669
4670           /* And proceed to check for "extern". */
4671         }
4672       else if (!incomment && !inquote && !found_tag)
4673         {
4674           /* Check for proc/fn keywords. */
4675           switch (lowcase (c))
4676             {
4677             case 'p':
4678               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4679                 get_tagname = true;
4680               continue;
4681             case 'f':
4682               if (nocase_tail ("unction"))
4683                 get_tagname = true;
4684               continue;
4685             }
4686         }
4687     } /* while not eof */
4688
4689   free (tline.buffer);
4690 }
4691
4692 \f
4693 /*
4694  * Lisp tag functions
4695  *  look for (def or (DEF, quote or QUOTE
4696  */
4697
4698 static void L_getit (void);
4699
4700 static void
4701 L_getit (void)
4702 {
4703   if (*dbp == '\'')             /* Skip prefix quote */
4704     dbp++;
4705   else if (*dbp == '(')
4706   {
4707     dbp++;
4708     /* Try to skip "(quote " */
4709     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4710       /* Ok, then skip "(" before name in (defstruct (foo)) */
4711       dbp = skip_spaces (dbp);
4712   }
4713   get_tag (dbp, NULL);
4714 }
4715
4716 static void
4717 Lisp_functions (FILE *inf)
4718 {
4719   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4720     {
4721       if (dbp[0] != '(')
4722         continue;
4723
4724       /* "(defvar foo)" is a declaration rather than a definition.  */
4725       if (! declarations)
4726         {
4727           char *p = dbp + 1;
4728           if (LOOKING_AT (p, "defvar"))
4729             {
4730               p = skip_name (p); /* past var name */
4731               p = skip_spaces (p);
4732               if (*p == ')')
4733                 continue;
4734             }
4735         }
4736
4737       if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
4738         dbp += 3;
4739
4740       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4741         {
4742           dbp = skip_non_spaces (dbp);
4743           dbp = skip_spaces (dbp);
4744           L_getit ();
4745         }
4746       else
4747         {
4748           /* Check for (foo::defmumble name-defined ... */
4749           do
4750             dbp++;
4751           while (!notinname (*dbp) && *dbp != ':');
4752           if (*dbp == ':')
4753             {
4754               do
4755                 dbp++;
4756               while (*dbp == ':');
4757
4758               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4759                 {
4760                   dbp = skip_non_spaces (dbp);
4761                   dbp = skip_spaces (dbp);
4762                   L_getit ();
4763                 }
4764             }
4765         }
4766     }
4767 }
4768
4769 \f
4770 /*
4771  * Lua script language parsing
4772  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4773  *
4774  *  "function" and "local function" are tags if they start at column 1.
4775  */
4776 static void
4777 Lua_functions (FILE *inf)
4778 {
4779   register char *bp;
4780
4781   LOOP_ON_INPUT_LINES (inf, lb, bp)
4782     {
4783       if (bp[0] != 'f' && bp[0] != 'l')
4784         continue;
4785
4786       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4787
4788       if (LOOKING_AT (bp, "function"))
4789         get_tag (bp, NULL);
4790     }
4791 }
4792
4793 \f
4794 /*
4795  * PostScript tags
4796  * Just look for lines where the first character is '/'
4797  * Also look at "defineps" for PSWrap
4798  * Ideas by:
4799  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4800  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4801  */
4802 static void
4803 PS_functions (FILE *inf)
4804 {
4805   register char *bp, *ep;
4806
4807   LOOP_ON_INPUT_LINES (inf, lb, bp)
4808     {
4809       if (bp[0] == '/')
4810         {
4811           for (ep = bp+1;
4812                *ep != '\0' && *ep != ' ' && *ep != '{';
4813                ep++)
4814             continue;
4815           make_tag (bp, ep - bp, true,
4816                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4817         }
4818       else if (LOOKING_AT (bp, "defineps"))
4819         get_tag (bp, NULL);
4820     }
4821 }
4822
4823 \f
4824 /*
4825  * Forth tags
4826  * Ignore anything after \ followed by space or in ( )
4827  * Look for words defined by :
4828  * Look for constant, code, create, defer, value, and variable
4829  * OBP extensions:  Look for buffer:, field,
4830  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4831  */
4832 static void
4833 Forth_words (FILE *inf)
4834 {
4835   register char *bp;
4836
4837   LOOP_ON_INPUT_LINES (inf, lb, bp)
4838     while ((bp = skip_spaces (bp))[0] != '\0')
4839       if (bp[0] == '\\' && iswhite (bp[1]))
4840         break;                  /* read next line */
4841       else if (bp[0] == '(' && iswhite (bp[1]))
4842         do                      /* skip to ) or eol */
4843           bp++;
4844         while (*bp != ')' && *bp != '\0');
4845       else if ((bp[0] == ':' && iswhite (bp[1]) && bp++)
4846                || LOOKING_AT_NOCASE (bp, "constant")
4847                || LOOKING_AT_NOCASE (bp, "code")
4848                || LOOKING_AT_NOCASE (bp, "create")
4849                || LOOKING_AT_NOCASE (bp, "defer")
4850                || LOOKING_AT_NOCASE (bp, "value")
4851                || LOOKING_AT_NOCASE (bp, "variable")
4852                || LOOKING_AT_NOCASE (bp, "buffer:")
4853                || LOOKING_AT_NOCASE (bp, "field"))
4854         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
4855       else
4856         bp = skip_non_spaces (bp);
4857 }
4858
4859 \f
4860 /*
4861  * Scheme tag functions
4862  * look for (def... xyzzy
4863  *          (def... (xyzzy
4864  *          (def ... ((...(xyzzy ....
4865  *          (set! xyzzy
4866  * Original code by Ken Haase (1985?)
4867  */
4868 static void
4869 Scheme_functions (FILE *inf)
4870 {
4871   register char *bp;
4872
4873   LOOP_ON_INPUT_LINES (inf, lb, bp)
4874     {
4875       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4876         {
4877           bp = skip_non_spaces (bp+4);
4878           /* Skip over open parens and white space.  Don't continue past
4879              '\0'. */
4880           while (*bp && notinname (*bp))
4881             bp++;
4882           get_tag (bp, NULL);
4883         }
4884       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4885         get_tag (bp, NULL);
4886     }
4887 }
4888
4889 \f
4890 /* Find tags in TeX and LaTeX input files.  */
4891
4892 /* TEX_toktab is a table of TeX control sequences that define tags.
4893  * Each entry records one such control sequence.
4894  *
4895  * Original code from who knows whom.
4896  * Ideas by:
4897  *   Stefan Monnier (2002)
4898  */
4899
4900 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4901
4902 /* Default set of control sequences to put into TEX_toktab.
4903    The value of environment var TEXTAGS is prepended to this.  */
4904 static const char *TEX_defenv = "\
4905 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4906 :part:appendix:entry:index:def\
4907 :newcommand:renewcommand:newenvironment:renewenvironment";
4908
4909 static void TEX_mode (FILE *);
4910 static void TEX_decode_env (const char *, const char *);
4911
4912 static char TEX_esc = '\\';
4913 static char TEX_opgrp = '{';
4914 static char TEX_clgrp = '}';
4915
4916 /*
4917  * TeX/LaTeX scanning loop.
4918  */
4919 static void
4920 TeX_commands (FILE *inf)
4921 {
4922   char *cp;
4923   linebuffer *key;
4924
4925   /* Select either \ or ! as escape character.  */
4926   TEX_mode (inf);
4927
4928   /* Initialize token table once from environment. */
4929   if (TEX_toktab == NULL)
4930     TEX_decode_env ("TEXTAGS", TEX_defenv);
4931
4932   LOOP_ON_INPUT_LINES (inf, lb, cp)
4933     {
4934       /* Look at each TEX keyword in line. */
4935       for (;;)
4936         {
4937           /* Look for a TEX escape. */
4938           while (*cp++ != TEX_esc)
4939             if (cp[-1] == '\0' || cp[-1] == '%')
4940               goto tex_next_line;
4941
4942           for (key = TEX_toktab; key->buffer != NULL; key++)
4943             if (strneq (cp, key->buffer, key->len))
4944               {
4945                 char *p;
4946                 int namelen, linelen;
4947                 bool opgrp = false;
4948
4949                 cp = skip_spaces (cp + key->len);
4950                 if (*cp == TEX_opgrp)
4951                   {
4952                     opgrp = true;
4953                     cp++;
4954                   }
4955                 for (p = cp;
4956                      (!iswhite (*p) && *p != '#' &&
4957                       *p != TEX_opgrp && *p != TEX_clgrp);
4958                      p++)
4959                   continue;
4960                 namelen = p - cp;
4961                 linelen = lb.len;
4962                 if (!opgrp || *p == TEX_clgrp)
4963                   {
4964                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
4965                       p++;
4966                     linelen = p - lb.buffer + 1;
4967                   }
4968                 make_tag (cp, namelen, true,
4969                           lb.buffer, linelen, lineno, linecharno);
4970                 goto tex_next_line; /* We only tag a line once */
4971               }
4972         }
4973     tex_next_line:
4974       ;
4975     }
4976 }
4977
4978 #define TEX_LESC '\\'
4979 #define TEX_SESC '!'
4980
4981 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4982    chars accordingly. */
4983 static void
4984 TEX_mode (FILE *inf)
4985 {
4986   int c;
4987
4988   while ((c = getc (inf)) != EOF)
4989     {
4990       /* Skip to next line if we hit the TeX comment char. */
4991       if (c == '%')
4992         while (c != '\n' && c != EOF)
4993           c = getc (inf);
4994       else if (c == TEX_LESC || c == TEX_SESC )
4995         break;
4996     }
4997
4998   if (c == TEX_LESC)
4999     {
5000       TEX_esc = TEX_LESC;
5001       TEX_opgrp = '{';
5002       TEX_clgrp = '}';
5003     }
5004   else
5005     {
5006       TEX_esc = TEX_SESC;
5007       TEX_opgrp = '<';
5008       TEX_clgrp = '>';
5009     }
5010   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5011      No attempt is made to correct the situation. */
5012   rewind (inf);
5013 }
5014
5015 /* Read environment and prepend it to the default string.
5016    Build token table. */
5017 static void
5018 TEX_decode_env (const char *evarname, const char *defenv)
5019 {
5020   register const char *env, *p;
5021   int i, len;
5022
5023   /* Append default string to environment. */
5024   env = getenv (evarname);
5025   if (!env)
5026     env = defenv;
5027   else
5028     env = concat (env, defenv, "");
5029
5030   /* Allocate a token table */
5031   for (len = 1, p = env; p;)
5032     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5033       len++;
5034   TEX_toktab = xnew (len, linebuffer);
5035
5036   /* Unpack environment string into token table. Be careful about */
5037   /* zero-length strings (leading ':', "::" and trailing ':') */
5038   for (i = 0; *env != '\0';)
5039     {
5040       p = etags_strchr (env, ':');
5041       if (!p)                   /* End of environment string. */
5042         p = env + strlen (env);
5043       if (p - env > 0)
5044         {                       /* Only non-zero strings. */
5045           TEX_toktab[i].buffer = savenstr (env, p - env);
5046           TEX_toktab[i].len = p - env;
5047           i++;
5048         }
5049       if (*p)
5050         env = p + 1;
5051       else
5052         {
5053           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5054           TEX_toktab[i].len = 0;
5055           break;
5056         }
5057     }
5058 }
5059
5060 \f
5061 /* Texinfo support.  Dave Love, Mar. 2000.  */
5062 static void
5063 Texinfo_nodes (FILE *inf)
5064 {
5065   char *cp, *start;
5066   LOOP_ON_INPUT_LINES (inf, lb, cp)
5067     if (LOOKING_AT (cp, "@node"))
5068       {
5069         start = cp;
5070         while (*cp != '\0' && *cp != ',')
5071           cp++;
5072         make_tag (start, cp - start, true,
5073                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5074       }
5075 }
5076
5077 \f
5078 /*
5079  * HTML support.
5080  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5081  * Contents of <a name=xxx> are tags with name xxx.
5082  *
5083  * Francesco Potortì, 2002.
5084  */
5085 static void
5086 HTML_labels (FILE *inf)
5087 {
5088   bool getnext = false;         /* next text outside of HTML tags is a tag */
5089   bool skiptag = false;         /* skip to the end of the current HTML tag */
5090   bool intag = false;           /* inside an html tag, looking for ID= */
5091   bool inanchor = false;        /* when INTAG, is an anchor, look for NAME= */
5092   char *end;
5093
5094
5095   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5096
5097   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5098     for (;;)                    /* loop on the same line */
5099       {
5100         if (skiptag)            /* skip HTML tag */
5101           {
5102             while (*dbp != '\0' && *dbp != '>')
5103               dbp++;
5104             if (*dbp == '>')
5105               {
5106                 dbp += 1;
5107                 skiptag = false;
5108                 continue;       /* look on the same line */
5109               }
5110             break;              /* go to next line */
5111           }
5112
5113         else if (intag) /* look for "name=" or "id=" */
5114           {
5115             while (*dbp != '\0' && *dbp != '>'
5116                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5117               dbp++;
5118             if (*dbp == '\0')
5119               break;            /* go to next line */
5120             if (*dbp == '>')
5121               {
5122                 dbp += 1;
5123                 intag = false;
5124                 continue;       /* look on the same line */
5125               }
5126             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5127                 || LOOKING_AT_NOCASE (dbp, "id="))
5128               {
5129                 bool quoted = (dbp[0] == '"');
5130
5131                 if (quoted)
5132                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5133                     continue;
5134                 else
5135                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5136                     continue;
5137                 linebuffer_setlen (&token_name, end - dbp);
5138                 memcpy (token_name.buffer, dbp, end - dbp);
5139                 token_name.buffer[end - dbp] = '\0';
5140
5141                 dbp = end;
5142                 intag = false;  /* we found what we looked for */
5143                 skiptag = true; /* skip to the end of the tag */
5144                 getnext = true; /* then grab the text */
5145                 continue;       /* look on the same line */
5146               }
5147             dbp += 1;
5148           }
5149
5150         else if (getnext)       /* grab next tokens and tag them */
5151           {
5152             dbp = skip_spaces (dbp);
5153             if (*dbp == '\0')
5154               break;            /* go to next line */
5155             if (*dbp == '<')
5156               {
5157                 intag = true;
5158                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5159                 continue;       /* look on the same line */
5160               }
5161
5162             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5163               continue;
5164             make_tag (token_name.buffer, token_name.len, true,
5165                       dbp, end - dbp, lineno, linecharno);
5166             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5167             getnext = false;
5168             break;              /* go to next line */
5169           }
5170
5171         else                    /* look for an interesting HTML tag */
5172           {
5173             while (*dbp != '\0' && *dbp != '<')
5174               dbp++;
5175             if (*dbp == '\0')
5176               break;            /* go to next line */
5177             intag = true;
5178             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5179               {
5180                 inanchor = true;
5181                 continue;       /* look on the same line */
5182               }
5183             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5184                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5185                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5186                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5187               {
5188                 intag = false;
5189                 getnext = true;
5190                 continue;       /* look on the same line */
5191               }
5192             dbp += 1;
5193           }
5194       }
5195 }
5196
5197 \f
5198 /*
5199  * Prolog support
5200  *
5201  * Assumes that the predicate or rule starts at column 0.
5202  * Only the first clause of a predicate or rule is added.
5203  * Original code by Sunichirou Sugou (1989)
5204  * Rewritten by Anders Lindgren (1996)
5205  */
5206 static size_t prolog_pr (char *, char *);
5207 static void prolog_skip_comment (linebuffer *, FILE *);
5208 static size_t prolog_atom (char *, size_t);
5209
5210 static void
5211 Prolog_functions (FILE *inf)
5212 {
5213   char *cp, *last;
5214   size_t len;
5215   size_t allocated;
5216
5217   allocated = 0;
5218   len = 0;
5219   last = NULL;
5220
5221   LOOP_ON_INPUT_LINES (inf, lb, cp)
5222     {
5223       if (cp[0] == '\0')        /* Empty line */
5224         continue;
5225       else if (iswhite (cp[0])) /* Not a predicate */
5226         continue;
5227       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5228         prolog_skip_comment (&lb, inf);
5229       else if ((len = prolog_pr (cp, last)) > 0)
5230         {
5231           /* Predicate or rule.  Store the function name so that we
5232              only generate a tag for the first clause.  */
5233           if (last == NULL)
5234             last = xnew (len + 1, char);
5235           else if (len + 1 > allocated)
5236             xrnew (last, len + 1, char);
5237           allocated = len + 1;
5238           memcpy (last, cp, len);
5239           last[len] = '\0';
5240         }
5241     }
5242   free (last);
5243 }
5244
5245
5246 static void
5247 prolog_skip_comment (linebuffer *plb, FILE *inf)
5248 {
5249   char *cp;
5250
5251   do
5252     {
5253       for (cp = plb->buffer; *cp != '\0'; cp++)
5254         if (cp[0] == '*' && cp[1] == '/')
5255           return;
5256       readline (plb, inf);
5257     }
5258   while (!feof (inf));
5259 }
5260
5261 /*
5262  * A predicate or rule definition is added if it matches:
5263  *     <beginning of line><Prolog Atom><whitespace>(
5264  * or  <beginning of line><Prolog Atom><whitespace>:-
5265  *
5266  * It is added to the tags database if it doesn't match the
5267  * name of the previous clause header.
5268  *
5269  * Return the size of the name of the predicate or rule, or 0 if no
5270  * header was found.
5271  */
5272 static size_t
5273 prolog_pr (char *s, char *last)
5274
5275                                 /* Name of last clause. */
5276 {
5277   size_t pos;
5278   size_t len;
5279
5280   pos = prolog_atom (s, 0);
5281   if (! pos)
5282     return 0;
5283
5284   len = pos;
5285   pos = skip_spaces (s + pos) - s;
5286
5287   if ((s[pos] == '.'
5288        || (s[pos] == '(' && (pos += 1))
5289        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5290       && (last == NULL          /* save only the first clause */
5291           || len != strlen (last)
5292           || !strneq (s, last, len)))
5293         {
5294           make_tag (s, len, true, s, pos, lineno, linecharno);
5295           return len;
5296         }
5297   else
5298     return 0;
5299 }
5300
5301 /*
5302  * Consume a Prolog atom.
5303  * Return the number of bytes consumed, or 0 if there was an error.
5304  *
5305  * A prolog atom, in this context, could be one of:
5306  * - An alphanumeric sequence, starting with a lower case letter.
5307  * - A quoted arbitrary string. Single quotes can escape themselves.
5308  *   Backslash quotes everything.
5309  */
5310 static size_t
5311 prolog_atom (char *s, size_t pos)
5312 {
5313   size_t origpos;
5314
5315   origpos = pos;
5316
5317   if (ISLOWER (s[pos]) || (s[pos] == '_'))
5318     {
5319       /* The atom is unquoted. */
5320       pos++;
5321       while (ISALNUM (s[pos]) || (s[pos] == '_'))
5322         {
5323           pos++;
5324         }
5325       return pos - origpos;
5326     }
5327   else if (s[pos] == '\'')
5328     {
5329       pos++;
5330
5331       for (;;)
5332         {
5333           if (s[pos] == '\'')
5334             {
5335               pos++;
5336               if (s[pos] != '\'')
5337                 break;
5338               pos++;            /* A double quote */
5339             }
5340           else if (s[pos] == '\0')
5341             /* Multiline quoted atoms are ignored. */
5342             return 0;
5343           else if (s[pos] == '\\')
5344             {
5345               if (s[pos+1] == '\0')
5346                 return 0;
5347               pos += 2;
5348             }
5349           else
5350             pos++;
5351         }
5352       return pos - origpos;
5353     }
5354   else
5355     return 0;
5356 }
5357
5358 \f
5359 /*
5360  * Support for Erlang
5361  *
5362  * Generates tags for functions, defines, and records.
5363  * Assumes that Erlang functions start at column 0.
5364  * Original code by Anders Lindgren (1996)
5365  */
5366 static int erlang_func (char *, char *);
5367 static void erlang_attribute (char *);
5368 static int erlang_atom (char *);
5369
5370 static void
5371 Erlang_functions (FILE *inf)
5372 {
5373   char *cp, *last;
5374   int len;
5375   int allocated;
5376
5377   allocated = 0;
5378   len = 0;
5379   last = NULL;
5380
5381   LOOP_ON_INPUT_LINES (inf, lb, cp)
5382     {
5383       if (cp[0] == '\0')        /* Empty line */
5384         continue;
5385       else if (iswhite (cp[0])) /* Not function nor attribute */
5386         continue;
5387       else if (cp[0] == '%')    /* comment */
5388         continue;
5389       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5390         continue;
5391       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5392         {
5393           erlang_attribute (cp);
5394           if (last != NULL)
5395             {
5396               free (last);
5397               last = NULL;
5398             }
5399         }
5400       else if ((len = erlang_func (cp, last)) > 0)
5401         {
5402           /*
5403            * Function.  Store the function name so that we only
5404            * generates a tag for the first clause.
5405            */
5406           if (last == NULL)
5407             last = xnew (len + 1, char);
5408           else if (len + 1 > allocated)
5409             xrnew (last, len + 1, char);
5410           allocated = len + 1;
5411           memcpy (last, cp, len);
5412           last[len] = '\0';
5413         }
5414     }
5415   free (last);
5416 }
5417
5418
5419 /*
5420  * A function definition is added if it matches:
5421  *     <beginning of line><Erlang Atom><whitespace>(
5422  *
5423  * It is added to the tags database if it doesn't match the
5424  * name of the previous clause header.
5425  *
5426  * Return the size of the name of the function, or 0 if no function
5427  * was found.
5428  */
5429 static int
5430 erlang_func (char *s, char *last)
5431
5432                                 /* Name of last clause. */
5433 {
5434   int pos;
5435   int len;
5436
5437   pos = erlang_atom (s);
5438   if (pos < 1)
5439     return 0;
5440
5441   len = pos;
5442   pos = skip_spaces (s + pos) - s;
5443
5444   /* Save only the first clause. */
5445   if (s[pos++] == '('
5446       && (last == NULL
5447           || len != (int)strlen (last)
5448           || !strneq (s, last, len)))
5449         {
5450           make_tag (s, len, true, s, pos, lineno, linecharno);
5451           return len;
5452         }
5453
5454   return 0;
5455 }
5456
5457
5458 /*
5459  * Handle attributes.  Currently, tags are generated for defines
5460  * and records.
5461  *
5462  * They are on the form:
5463  * -define(foo, bar).
5464  * -define(Foo(M, N), M+N).
5465  * -record(graph, {vtab = notable, cyclic = true}).
5466  */
5467 static void
5468 erlang_attribute (char *s)
5469 {
5470   char *cp = s;
5471
5472   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5473       && *cp++ == '(')
5474     {
5475       int len = erlang_atom (skip_spaces (cp));
5476       if (len > 0)
5477         make_tag (cp, len, true, s, cp + len - s, lineno, linecharno);
5478     }
5479   return;
5480 }
5481
5482
5483 /*
5484  * Consume an Erlang atom (or variable).
5485  * Return the number of bytes consumed, or -1 if there was an error.
5486  */
5487 static int
5488 erlang_atom (char *s)
5489 {
5490   int pos = 0;
5491
5492   if (ISALPHA (s[pos]) || s[pos] == '_')
5493     {
5494       /* The atom is unquoted. */
5495       do
5496         pos++;
5497       while (ISALNUM (s[pos]) || s[pos] == '_');
5498     }
5499   else if (s[pos] == '\'')
5500     {
5501       for (pos++; s[pos] != '\''; pos++)
5502         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5503             || (s[pos] == '\\' && s[++pos] == '\0'))
5504           return 0;
5505       pos++;
5506     }
5507
5508   return pos;
5509 }
5510
5511 \f
5512 static char *scan_separators (char *);
5513 static void add_regex (char *, language *);
5514 static char *substitute (char *, char *, struct re_registers *);
5515
5516 /*
5517  * Take a string like "/blah/" and turn it into "blah", verifying
5518  * that the first and last characters are the same, and handling
5519  * quoted separator characters.  Actually, stops on the occurrence of
5520  * an unquoted separator.  Also process \t, \n, etc. and turn into
5521  * appropriate characters. Works in place.  Null terminates name string.
5522  * Returns pointer to terminating separator, or NULL for
5523  * unterminated regexps.
5524  */
5525 static char *
5526 scan_separators (char *name)
5527 {
5528   char sep = name[0];
5529   char *copyto = name;
5530   bool quoted = false;
5531
5532   for (++name; *name != '\0'; ++name)
5533     {
5534       if (quoted)
5535         {
5536           switch (*name)
5537             {
5538             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5539             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5540             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5541             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5542             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5543             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5544             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5545             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5546             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5547             default:
5548               if (*name == sep)
5549                 *copyto++ = sep;
5550               else
5551                 {
5552                   /* Something else is quoted, so preserve the quote. */
5553                   *copyto++ = '\\';
5554                   *copyto++ = *name;
5555                 }
5556               break;
5557             }
5558           quoted = false;
5559         }
5560       else if (*name == '\\')
5561         quoted = true;
5562       else if (*name == sep)
5563         break;
5564       else
5565         *copyto++ = *name;
5566     }
5567   if (*name != sep)
5568     name = NULL;                /* signal unterminated regexp */
5569
5570   /* Terminate copied string. */
5571   *copyto = '\0';
5572   return name;
5573 }
5574
5575 /* Look at the argument of --regex or --no-regex and do the right
5576    thing.  Same for each line of a regexp file. */
5577 static void
5578 analyse_regex (char *regex_arg)
5579 {
5580   if (regex_arg == NULL)
5581     {
5582       free_regexps ();          /* --no-regex: remove existing regexps */
5583       return;
5584     }
5585
5586   /* A real --regexp option or a line in a regexp file. */
5587   switch (regex_arg[0])
5588     {
5589       /* Comments in regexp file or null arg to --regex. */
5590     case '\0':
5591     case ' ':
5592     case '\t':
5593       break;
5594
5595       /* Read a regex file.  This is recursive and may result in a
5596          loop, which will stop when the file descriptors are exhausted. */
5597     case '@':
5598       {
5599         FILE *regexfp;
5600         linebuffer regexbuf;
5601         char *regexfile = regex_arg + 1;
5602
5603         /* regexfile is a file containing regexps, one per line. */
5604         regexfp = fopen (regexfile, "rb");
5605         if (regexfp == NULL)
5606           pfatal (regexfile);
5607         linebuffer_init (&regexbuf);
5608         while (readline_internal (&regexbuf, regexfp) > 0)
5609           analyse_regex (regexbuf.buffer);
5610         free (regexbuf.buffer);
5611         fclose (regexfp);
5612       }
5613       break;
5614
5615       /* Regexp to be used for a specific language only. */
5616     case '{':
5617       {
5618         language *lang;
5619         char *lang_name = regex_arg + 1;
5620         char *cp;
5621
5622         for (cp = lang_name; *cp != '}'; cp++)
5623           if (*cp == '\0')
5624             {
5625               error ("unterminated language name in regex: %s", regex_arg);
5626               return;
5627             }
5628         *cp++ = '\0';
5629         lang = get_language_from_langname (lang_name);
5630         if (lang == NULL)
5631           return;
5632         add_regex (cp, lang);
5633       }
5634       break;
5635
5636       /* Regexp to be used for any language. */
5637     default:
5638       add_regex (regex_arg, NULL);
5639       break;
5640     }
5641 }
5642
5643 /* Separate the regexp pattern, compile it,
5644    and care for optional name and modifiers. */
5645 static void
5646 add_regex (char *regexp_pattern, language *lang)
5647 {
5648   static struct re_pattern_buffer zeropattern;
5649   char sep, *pat, *name, *modifiers;
5650   char empty = '\0';
5651   const char *err;
5652   struct re_pattern_buffer *patbuf;
5653   regexp *rp;
5654   bool
5655     force_explicit_name = true, /* do not use implicit tag names */
5656     ignore_case = false,        /* case is significant */
5657     multi_line = false,         /* matches are done one line at a time */
5658     single_line = false;        /* dot does not match newline */
5659
5660
5661   if (strlen (regexp_pattern) < 3)
5662     {
5663       error ("null regexp");
5664       return;
5665     }
5666   sep = regexp_pattern[0];
5667   name = scan_separators (regexp_pattern);
5668   if (name == NULL)
5669     {
5670       error ("%s: unterminated regexp", regexp_pattern);
5671       return;
5672     }
5673   if (name[1] == sep)
5674     {
5675       error ("null name for regexp \"%s\"", regexp_pattern);
5676       return;
5677     }
5678   modifiers = scan_separators (name);
5679   if (modifiers == NULL)        /* no terminating separator --> no name */
5680     {
5681       modifiers = name;
5682       name = &empty;
5683     }
5684   else
5685     modifiers += 1;             /* skip separator */
5686
5687   /* Parse regex modifiers. */
5688   for (; modifiers[0] != '\0'; modifiers++)
5689     switch (modifiers[0])
5690       {
5691       case 'N':
5692         if (modifiers == name)
5693           error ("forcing explicit tag name but no name, ignoring");
5694         force_explicit_name = true;
5695         break;
5696       case 'i':
5697         ignore_case = true;
5698         break;
5699       case 's':
5700         single_line = true;
5701         /* FALLTHRU */
5702       case 'm':
5703         multi_line = true;
5704         need_filebuf = true;
5705         break;
5706       default:
5707         error ("invalid regexp modifier `%c', ignoring", modifiers[0]);
5708         break;
5709       }
5710
5711   patbuf = xnew (1, struct re_pattern_buffer);
5712   *patbuf = zeropattern;
5713   if (ignore_case)
5714     {
5715       static char lc_trans[CHARS];
5716       int i;
5717       for (i = 0; i < CHARS; i++)
5718         lc_trans[i] = lowcase (i);
5719       patbuf->translate = lc_trans;     /* translation table to fold case  */
5720     }
5721
5722   if (multi_line)
5723     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5724   else
5725     pat = regexp_pattern;
5726
5727   if (single_line)
5728     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5729   else
5730     re_set_syntax (RE_SYNTAX_EMACS);
5731
5732   err = re_compile_pattern (pat, strlen (pat), patbuf);
5733   if (multi_line)
5734     free (pat);
5735   if (err != NULL)
5736     {
5737       error ("%s while compiling pattern", err);
5738       return;
5739     }
5740
5741   rp = p_head;
5742   p_head = xnew (1, regexp);
5743   p_head->pattern = savestr (regexp_pattern);
5744   p_head->p_next = rp;
5745   p_head->lang = lang;
5746   p_head->pat = patbuf;
5747   p_head->name = savestr (name);
5748   p_head->error_signaled = false;
5749   p_head->force_explicit_name = force_explicit_name;
5750   p_head->ignore_case = ignore_case;
5751   p_head->multi_line = multi_line;
5752 }
5753
5754 /*
5755  * Do the substitutions indicated by the regular expression and
5756  * arguments.
5757  */
5758 static char *
5759 substitute (char *in, char *out, struct re_registers *regs)
5760 {
5761   char *result, *t;
5762   int size, dig, diglen;
5763
5764   result = NULL;
5765   size = strlen (out);
5766
5767   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5768   if (out[size - 1] == '\\')
5769     fatal ("pattern error in \"%s\"", out);
5770   for (t = etags_strchr (out, '\\');
5771        t != NULL;
5772        t = etags_strchr (t + 2, '\\'))
5773     if (ISDIGIT (t[1]))
5774       {
5775         dig = t[1] - '0';
5776         diglen = regs->end[dig] - regs->start[dig];
5777         size += diglen - 2;
5778       }
5779     else
5780       size -= 1;
5781
5782   /* Allocate space and do the substitutions. */
5783   assert (size >= 0);
5784   result = xnew (size + 1, char);
5785
5786   for (t = result; *out != '\0'; out++)
5787     if (*out == '\\' && ISDIGIT (*++out))
5788       {
5789         dig = *out - '0';
5790         diglen = regs->end[dig] - regs->start[dig];
5791         memcpy (t, in + regs->start[dig], diglen);
5792         t += diglen;
5793       }
5794     else
5795       *t++ = *out;
5796   *t = '\0';
5797
5798   assert (t <= result + size);
5799   assert (t - result == (int)strlen (result));
5800
5801   return result;
5802 }
5803
5804 /* Deallocate all regexps. */
5805 static void
5806 free_regexps (void)
5807 {
5808   regexp *rp;
5809   while (p_head != NULL)
5810     {
5811       rp = p_head->p_next;
5812       free (p_head->pattern);
5813       free (p_head->name);
5814       free (p_head);
5815       p_head = rp;
5816     }
5817   return;
5818 }
5819
5820 /*
5821  * Reads the whole file as a single string from `filebuf' and looks for
5822  * multi-line regular expressions, creating tags on matches.
5823  * readline already dealt with normal regexps.
5824  *
5825  * Idea by Ben Wing <ben@666.com> (2002).
5826  */
5827 static void
5828 regex_tag_multiline (void)
5829 {
5830   char *buffer = filebuf.buffer;
5831   regexp *rp;
5832   char *name;
5833
5834   for (rp = p_head; rp != NULL; rp = rp->p_next)
5835     {
5836       int match = 0;
5837
5838       if (!rp->multi_line)
5839         continue;               /* skip normal regexps */
5840
5841       /* Generic initializations before parsing file from memory. */
5842       lineno = 1;               /* reset global line number */
5843       charno = 0;               /* reset global char number */
5844       linecharno = 0;           /* reset global char number of line start */
5845
5846       /* Only use generic regexps or those for the current language. */
5847       if (rp->lang != NULL && rp->lang != curfdp->lang)
5848         continue;
5849
5850       while (match >= 0 && match < filebuf.len)
5851         {
5852           match = re_search (rp->pat, buffer, filebuf.len, charno,
5853                              filebuf.len - match, &rp->regs);
5854           switch (match)
5855             {
5856             case -2:
5857               /* Some error. */
5858               if (!rp->error_signaled)
5859                 {
5860                   error ("regexp stack overflow while matching \"%s\"",
5861                          rp->pattern);
5862                   rp->error_signaled = true;
5863                 }
5864               break;
5865             case -1:
5866               /* No match. */
5867               break;
5868             default:
5869               if (match == rp->regs.end[0])
5870                 {
5871                   if (!rp->error_signaled)
5872                     {
5873                       error ("regexp matches the empty string: \"%s\"",
5874                              rp->pattern);
5875                       rp->error_signaled = true;
5876                     }
5877                   match = -3;   /* exit from while loop */
5878                   break;
5879                 }
5880
5881               /* Match occurred.  Construct a tag. */
5882               while (charno < rp->regs.end[0])
5883                 if (buffer[charno++] == '\n')
5884                   lineno++, linecharno = charno;
5885               name = rp->name;
5886               if (name[0] == '\0')
5887                 name = NULL;
5888               else /* make a named tag */
5889                 name = substitute (buffer, rp->name, &rp->regs);
5890               if (rp->force_explicit_name)
5891                 /* Force explicit tag name, if a name is there. */
5892                 pfnote (name, true, buffer + linecharno,
5893                         charno - linecharno + 1, lineno, linecharno);
5894               else
5895                 make_tag (name, strlen (name), true, buffer + linecharno,
5896                           charno - linecharno + 1, lineno, linecharno);
5897               break;
5898             }
5899         }
5900     }
5901 }
5902
5903 \f
5904 static bool
5905 nocase_tail (const char *cp)
5906 {
5907   register int len = 0;
5908
5909   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5910     cp++, len++;
5911   if (*cp == '\0' && !intoken (dbp[len]))
5912     {
5913       dbp += len;
5914       return true;
5915     }
5916   return false;
5917 }
5918
5919 static void
5920 get_tag (register char *bp, char **namepp)
5921 {
5922   register char *cp = bp;
5923
5924   if (*bp != '\0')
5925     {
5926       /* Go till you get to white space or a syntactic break */
5927       for (cp = bp + 1; !notinname (*cp); cp++)
5928         continue;
5929       make_tag (bp, cp - bp, true,
5930                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5931     }
5932
5933   if (namepp != NULL)
5934     *namepp = savenstr (bp, cp - bp);
5935 }
5936
5937 /*
5938  * Read a line of text from `stream' into `lbp', excluding the
5939  * newline or CR-NL, if any.  Return the number of characters read from
5940  * `stream', which is the length of the line including the newline.
5941  *
5942  * On DOS or Windows we do not count the CR character, if any before the
5943  * NL, in the returned length; this mirrors the behavior of Emacs on those
5944  * platforms (for text files, it translates CR-NL to NL as it reads in the
5945  * file).
5946  *
5947  * If multi-line regular expressions are requested, each line read is
5948  * appended to `filebuf'.
5949  */
5950 static long
5951 readline_internal (linebuffer *lbp, register FILE *stream)
5952 {
5953   char *buffer = lbp->buffer;
5954   register char *p = lbp->buffer;
5955   register char *pend;
5956   int chars_deleted;
5957
5958   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
5959
5960   for (;;)
5961     {
5962       register int c = getc (stream);
5963       if (p == pend)
5964         {
5965           /* We're at the end of linebuffer: expand it. */
5966           lbp->size *= 2;
5967           xrnew (buffer, lbp->size, char);
5968           p += buffer - lbp->buffer;
5969           pend = buffer + lbp->size;
5970           lbp->buffer = buffer;
5971         }
5972       if (c == EOF)
5973         {
5974           *p = '\0';
5975           chars_deleted = 0;
5976           break;
5977         }
5978       if (c == '\n')
5979         {
5980           if (p > buffer && p[-1] == '\r')
5981             {
5982               p -= 1;
5983 #ifdef DOS_NT
5984              /* Assume CRLF->LF translation will be performed by Emacs
5985                 when loading this file, so CRs won't appear in the buffer.
5986                 It would be cleaner to compensate within Emacs;
5987                 however, Emacs does not know how many CRs were deleted
5988                 before any given point in the file.  */
5989               chars_deleted = 1;
5990 #else
5991               chars_deleted = 2;
5992 #endif
5993             }
5994           else
5995             {
5996               chars_deleted = 1;
5997             }
5998           *p = '\0';
5999           break;
6000         }
6001       *p++ = c;
6002     }
6003   lbp->len = p - buffer;
6004
6005   if (need_filebuf              /* we need filebuf for multi-line regexps */
6006       && chars_deleted > 0)     /* not at EOF */
6007     {
6008       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6009         {
6010           /* Expand filebuf. */
6011           filebuf.size *= 2;
6012           xrnew (filebuf.buffer, filebuf.size, char);
6013         }
6014       memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6015       filebuf.len += lbp->len;
6016       filebuf.buffer[filebuf.len++] = '\n';
6017       filebuf.buffer[filebuf.len] = '\0';
6018     }
6019
6020   return lbp->len + chars_deleted;
6021 }
6022
6023 /*
6024  * Like readline_internal, above, but in addition try to match the
6025  * input line against relevant regular expressions and manage #line
6026  * directives.
6027  */
6028 static void
6029 readline (linebuffer *lbp, FILE *stream)
6030 {
6031   long result;
6032
6033   linecharno = charno;          /* update global char number of line start */
6034   result = readline_internal (lbp, stream); /* read line */
6035   lineno += 1;                  /* increment global line number */
6036   charno += result;             /* increment global char number */
6037
6038   /* Honor #line directives. */
6039   if (!no_line_directive)
6040     {
6041       static bool discard_until_line_directive;
6042
6043       /* Check whether this is a #line directive. */
6044       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6045         {
6046           unsigned int lno;
6047           int start = 0;
6048
6049           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6050               && start > 0)     /* double quote character found */
6051             {
6052               char *endp = lbp->buffer + start;
6053
6054               while ((endp = etags_strchr (endp, '"')) != NULL
6055                      && endp[-1] == '\\')
6056                 endp++;
6057               if (endp != NULL)
6058                 /* Ok, this is a real #line directive.  Let's deal with it. */
6059                 {
6060                   char *taggedabsname;  /* absolute name of original file */
6061                   char *taggedfname;    /* name of original file as given */
6062                   char *name;           /* temp var */
6063
6064                   discard_until_line_directive = false; /* found it */
6065                   name = lbp->buffer + start;
6066                   *endp = '\0';
6067                   canonicalize_filename (name);
6068                   taggedabsname = absolute_filename (name, tagfiledir);
6069                   if (filename_is_absolute (name)
6070                       || filename_is_absolute (curfdp->infname))
6071                     taggedfname = savestr (taggedabsname);
6072                   else
6073                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6074
6075                   if (streq (curfdp->taggedfname, taggedfname))
6076                     /* The #line directive is only a line number change.  We
6077                        deal with this afterwards. */
6078                     free (taggedfname);
6079                   else
6080                     /* The tags following this #line directive should be
6081                        attributed to taggedfname.  In order to do this, set
6082                        curfdp accordingly. */
6083                     {
6084                       fdesc *fdp; /* file description pointer */
6085
6086                       /* Go look for a file description already set up for the
6087                          file indicated in the #line directive.  If there is
6088                          one, use it from now until the next #line
6089                          directive. */
6090                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6091                         if (streq (fdp->infname, curfdp->infname)
6092                             && streq (fdp->taggedfname, taggedfname))
6093                           /* If we remove the second test above (after the &&)
6094                              then all entries pertaining to the same file are
6095                              coalesced in the tags file.  If we use it, then
6096                              entries pertaining to the same file but generated
6097                              from different files (via #line directives) will
6098                              go into separate sections in the tags file.  These
6099                              alternatives look equivalent.  The first one
6100                              destroys some apparently useless information. */
6101                           {
6102                             curfdp = fdp;
6103                             free (taggedfname);
6104                             break;
6105                           }
6106                       /* Else, if we already tagged the real file, skip all
6107                          input lines until the next #line directive. */
6108                       if (fdp == NULL) /* not found */
6109                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6110                           if (streq (fdp->infabsname, taggedabsname))
6111                             {
6112                               discard_until_line_directive = true;
6113                               free (taggedfname);
6114                               break;
6115                             }
6116                       /* Else create a new file description and use that from
6117                          now on, until the next #line directive. */
6118                       if (fdp == NULL) /* not found */
6119                         {
6120                           fdp = fdhead;
6121                           fdhead = xnew (1, fdesc);
6122                           *fdhead = *curfdp; /* copy curr. file description */
6123                           fdhead->next = fdp;
6124                           fdhead->infname = savestr (curfdp->infname);
6125                           fdhead->infabsname = savestr (curfdp->infabsname);
6126                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6127                           fdhead->taggedfname = taggedfname;
6128                           fdhead->usecharno = false;
6129                           fdhead->prop = NULL;
6130                           fdhead->written = false;
6131                           curfdp = fdhead;
6132                         }
6133                     }
6134                   free (taggedabsname);
6135                   lineno = lno - 1;
6136                   readline (lbp, stream);
6137                   return;
6138                 } /* if a real #line directive */
6139             } /* if #line is followed by a number */
6140         } /* if line begins with "#line " */
6141
6142       /* If we are here, no #line directive was found. */
6143       if (discard_until_line_directive)
6144         {
6145           if (result > 0)
6146             {
6147               /* Do a tail recursion on ourselves, thus discarding the contents
6148                  of the line buffer. */
6149               readline (lbp, stream);
6150               return;
6151             }
6152           /* End of file. */
6153           discard_until_line_directive = false;
6154           return;
6155         }
6156     } /* if #line directives should be considered */
6157
6158   {
6159     int match;
6160     regexp *rp;
6161     char *name;
6162
6163     /* Match against relevant regexps. */
6164     if (lbp->len > 0)
6165       for (rp = p_head; rp != NULL; rp = rp->p_next)
6166         {
6167           /* Only use generic regexps or those for the current language.
6168              Also do not use multiline regexps, which is the job of
6169              regex_tag_multiline. */
6170           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6171               || rp->multi_line)
6172             continue;
6173
6174           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6175           switch (match)
6176             {
6177             case -2:
6178               /* Some error. */
6179               if (!rp->error_signaled)
6180                 {
6181                   error ("regexp stack overflow while matching \"%s\"",
6182                          rp->pattern);
6183                   rp->error_signaled = true;
6184                 }
6185               break;
6186             case -1:
6187               /* No match. */
6188               break;
6189             case 0:
6190               /* Empty string matched. */
6191               if (!rp->error_signaled)
6192                 {
6193                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6194                   rp->error_signaled = true;
6195                 }
6196               break;
6197             default:
6198               /* Match occurred.  Construct a tag. */
6199               name = rp->name;
6200               if (name[0] == '\0')
6201                 name = NULL;
6202               else /* make a named tag */
6203                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6204               if (rp->force_explicit_name)
6205                 /* Force explicit tag name, if a name is there. */
6206                 pfnote (name, true, lbp->buffer, match, lineno, linecharno);
6207               else
6208                 make_tag (name, strlen (name), true,
6209                           lbp->buffer, match, lineno, linecharno);
6210               break;
6211             }
6212         }
6213   }
6214 }
6215
6216 \f
6217 /*
6218  * Return a pointer to a space of size strlen(cp)+1 allocated
6219  * with xnew where the string CP has been copied.
6220  */
6221 static char *
6222 savestr (const char *cp)
6223 {
6224   return savenstr (cp, strlen (cp));
6225 }
6226
6227 /*
6228  * Return a pointer to a space of size LEN+1 allocated with xnew where
6229  * the string CP has been copied for at most the first LEN characters.
6230  */
6231 static char *
6232 savenstr (const char *cp, int len)
6233 {
6234   char *dp = xnew (len + 1, char);
6235   dp[len] = '\0';
6236   return memcpy (dp, cp, len);
6237 }
6238
6239 /*
6240  * Return the ptr in sp at which the character c last
6241  * appears; NULL if not found
6242  *
6243  * Identical to POSIX strrchr, included for portability.
6244  */
6245 static char *
6246 etags_strrchr (register const char *sp, register int c)
6247 {
6248   register const char *r;
6249
6250   r = NULL;
6251   do
6252     {
6253       if (*sp == c)
6254         r = sp;
6255   } while (*sp++);
6256   return (char *)r;
6257 }
6258
6259 /*
6260  * Return the ptr in sp at which the character c first
6261  * appears; NULL if not found
6262  *
6263  * Identical to POSIX strchr, included for portability.
6264  */
6265 static char *
6266 etags_strchr (register const char *sp, register int c)
6267 {
6268   do
6269     {
6270       if (*sp == c)
6271         return (char *)sp;
6272     } while (*sp++);
6273   return NULL;
6274 }
6275
6276 /* Skip spaces (end of string is not space), return new pointer. */
6277 static char *
6278 skip_spaces (char *cp)
6279 {
6280   while (iswhite (*cp))
6281     cp++;
6282   return cp;
6283 }
6284
6285 /* Skip non spaces, except end of string, return new pointer. */
6286 static char *
6287 skip_non_spaces (char *cp)
6288 {
6289   while (*cp != '\0' && !iswhite (*cp))
6290     cp++;
6291   return cp;
6292 }
6293
6294 /* Skip any chars in the "name" class.*/
6295 static char *
6296 skip_name (char *cp)
6297 {
6298   /* '\0' is a notinname() so loop stops there too */
6299   while (! notinname (*cp))
6300     cp++;
6301   return cp;
6302 }
6303
6304 /* Print error message and exit.  */
6305 void
6306 fatal (const char *s1, const char *s2)
6307 {
6308   error (s1, s2);
6309   exit (EXIT_FAILURE);
6310 }
6311
6312 static void
6313 pfatal (const char *s1)
6314 {
6315   perror (s1);
6316   exit (EXIT_FAILURE);
6317 }
6318
6319 static void
6320 suggest_asking_for_help (void)
6321 {
6322   fprintf (stderr, "\tTry `%s --help' for a complete list of options.\n",
6323            progname);
6324   exit (EXIT_FAILURE);
6325 }
6326
6327 /* Output a diagnostic with printf-style FORMAT and args.  */
6328 static void
6329 error (const char *format, ...)
6330 {
6331   va_list ap;
6332   va_start (ap, format);
6333   fprintf (stderr, "%s: ", progname);
6334   vfprintf (stderr, format, ap);
6335   fprintf (stderr, "\n");
6336   va_end (ap);
6337 }
6338
6339 /* Return a newly-allocated string whose contents
6340    concatenate those of s1, s2, s3.  */
6341 static char *
6342 concat (const char *s1, const char *s2, const char *s3)
6343 {
6344   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6345   char *result = xnew (len1 + len2 + len3 + 1, char);
6346
6347   strcpy (result, s1);
6348   strcpy (result + len1, s2);
6349   strcpy (result + len1 + len2, s3);
6350
6351   return result;
6352 }
6353
6354 \f
6355 /* Does the same work as the system V getcwd, but does not need to
6356    guess the buffer size in advance. */
6357 static char *
6358 etags_getcwd (void)
6359 {
6360   int bufsize = 200;
6361   char *path = xnew (bufsize, char);
6362
6363   while (getcwd (path, bufsize) == NULL)
6364     {
6365       if (errno != ERANGE)
6366         pfatal ("getcwd");
6367       bufsize *= 2;
6368       free (path);
6369       path = xnew (bufsize, char);
6370     }
6371
6372   canonicalize_filename (path);
6373   return path;
6374 }
6375
6376 /* Return a newly allocated string containing the file name of FILE
6377    relative to the absolute directory DIR (which should end with a slash). */
6378 static char *
6379 relative_filename (char *file, char *dir)
6380 {
6381   char *fp, *dp, *afn, *res;
6382   int i;
6383
6384   /* Find the common root of file and dir (with a trailing slash). */
6385   afn = absolute_filename (file, cwd);
6386   fp = afn;
6387   dp = dir;
6388   while (*fp++ == *dp++)
6389     continue;
6390   fp--, dp--;                   /* back to the first differing char */
6391 #ifdef DOS_NT
6392   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6393     return afn;
6394 #endif
6395   do                            /* look at the equal chars until '/' */
6396     fp--, dp--;
6397   while (*fp != '/');
6398
6399   /* Build a sequence of "../" strings for the resulting relative file name. */
6400   i = 0;
6401   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6402     i += 1;
6403   res = xnew (3*i + strlen (fp + 1) + 1, char);
6404   res[0] = '\0';
6405   while (i-- > 0)
6406     strcat (res, "../");
6407
6408   /* Add the file name relative to the common root of file and dir. */
6409   strcat (res, fp + 1);
6410   free (afn);
6411
6412   return res;
6413 }
6414
6415 /* Return a newly allocated string containing the absolute file name
6416    of FILE given DIR (which should end with a slash). */
6417 static char *
6418 absolute_filename (char *file, char *dir)
6419 {
6420   char *slashp, *cp, *res;
6421
6422   if (filename_is_absolute (file))
6423     res = savestr (file);
6424 #ifdef DOS_NT
6425   /* We don't support non-absolute file names with a drive
6426      letter, like `d:NAME' (it's too much hassle).  */
6427   else if (file[1] == ':')
6428     fatal ("%s: relative file names with drive letters not supported", file);
6429 #endif
6430   else
6431     res = concat (dir, file, "");
6432
6433   /* Delete the "/dirname/.." and "/." substrings. */
6434   slashp = etags_strchr (res, '/');
6435   while (slashp != NULL && slashp[0] != '\0')
6436     {
6437       if (slashp[1] == '.')
6438         {
6439           if (slashp[2] == '.'
6440               && (slashp[3] == '/' || slashp[3] == '\0'))
6441             {
6442               cp = slashp;
6443               do
6444                 cp--;
6445               while (cp >= res && !filename_is_absolute (cp));
6446               if (cp < res)
6447                 cp = slashp;    /* the absolute name begins with "/.." */
6448 #ifdef DOS_NT
6449               /* Under MSDOS and NT we get `d:/NAME' as absolute
6450                  file name, so the luser could say `d:/../NAME'.
6451                  We silently treat this as `d:/NAME'.  */
6452               else if (cp[0] != '/')
6453                 cp = slashp;
6454 #endif
6455               memmove (cp, slashp + 3, strlen (slashp + 2));
6456               slashp = cp;
6457               continue;
6458             }
6459           else if (slashp[2] == '/' || slashp[2] == '\0')
6460             {
6461               memmove (slashp, slashp + 2, strlen (slashp + 1));
6462               continue;
6463             }
6464         }
6465
6466       slashp = etags_strchr (slashp + 1, '/');
6467     }
6468
6469   if (res[0] == '\0')           /* just a safety net: should never happen */
6470     {
6471       free (res);
6472       return savestr ("/");
6473     }
6474   else
6475     return res;
6476 }
6477
6478 /* Return a newly allocated string containing the absolute
6479    file name of dir where FILE resides given DIR (which should
6480    end with a slash). */
6481 static char *
6482 absolute_dirname (char *file, char *dir)
6483 {
6484   char *slashp, *res;
6485   char save;
6486
6487   slashp = etags_strrchr (file, '/');
6488   if (slashp == NULL)
6489     return savestr (dir);
6490   save = slashp[1];
6491   slashp[1] = '\0';
6492   res = absolute_filename (file, dir);
6493   slashp[1] = save;
6494
6495   return res;
6496 }
6497
6498 /* Whether the argument string is an absolute file name.  The argument
6499    string must have been canonicalized with canonicalize_filename. */
6500 static bool
6501 filename_is_absolute (char *fn)
6502 {
6503   return (fn[0] == '/'
6504 #ifdef DOS_NT
6505           || (ISALPHA (fn[0]) && fn[1] == ':' && fn[2] == '/')
6506 #endif
6507           );
6508 }
6509
6510 /* Downcase DOS drive letter and collapse separators into single slashes.
6511    Works in place. */
6512 static void
6513 canonicalize_filename (register char *fn)
6514 {
6515   register char* cp;
6516   char sep = '/';
6517
6518 #ifdef DOS_NT
6519   /* Canonicalize drive letter case.  */
6520 # define ISUPPER(c)     isupper (CHAR (c))
6521   if (fn[0] != '\0' && fn[1] == ':' && ISUPPER (fn[0]))
6522     fn[0] = lowcase (fn[0]);
6523
6524   sep = '\\';
6525 #endif
6526
6527   /* Collapse multiple separators into a single slash. */
6528   for (cp = fn; *cp != '\0'; cp++, fn++)
6529     if (*cp == sep)
6530       {
6531         *fn = '/';
6532         while (cp[1] == sep)
6533           cp++;
6534       }
6535     else
6536       *fn = *cp;
6537   *fn = '\0';
6538 }
6539
6540 \f
6541 /* Initialize a linebuffer for use. */
6542 static void
6543 linebuffer_init (linebuffer *lbp)
6544 {
6545   lbp->size = (DEBUG) ? 3 : 200;
6546   lbp->buffer = xnew (lbp->size, char);
6547   lbp->buffer[0] = '\0';
6548   lbp->len = 0;
6549 }
6550
6551 /* Set the minimum size of a string contained in a linebuffer. */
6552 static void
6553 linebuffer_setlen (linebuffer *lbp, int toksize)
6554 {
6555   while (lbp->size <= toksize)
6556     {
6557       lbp->size *= 2;
6558       xrnew (lbp->buffer, lbp->size, char);
6559     }
6560   lbp->len = toksize;
6561 }
6562
6563 /* Like malloc but get fatal error if memory is exhausted. */
6564 static void *
6565 xmalloc (size_t size)
6566 {
6567   void *result = malloc (size);
6568   if (result == NULL)
6569     fatal ("virtual memory exhausted", (char *)NULL);
6570   return result;
6571 }
6572
6573 static void *
6574 xrealloc (char *ptr, size_t size)
6575 {
6576   void *result = realloc (ptr, size);
6577   if (result == NULL)
6578     fatal ("virtual memory exhausted", (char *)NULL);
6579   return result;
6580 }
6581
6582 /*
6583  * Local Variables:
6584  * indent-tabs-mode: t
6585  * tab-width: 8
6586  * fill-column: 79
6587  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6588  * c-file-style: "gnu"
6589  * End:
6590  */
6591
6592 /* etags.c ends here */