code.delx.au - gnu-emacs/blob - lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2011
  32   Free Software Foundation, Inc.
  33
  34 This file is not considered part of GNU Emacs.
  35
  36 This program is free software: you can redistribute it and/or modify
  37 it under the terms of the GNU General Public License as published by
  38 the Free Software Foundation, either version 3 of the License, or
  39 (at your option) any later version.
  40
  41 This program is distributed in the hope that it will be useful,
  42 but WITHOUT ANY WARRANTY; without even the implied warranty of
  43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  44 GNU General Public License for more details.
  45
  46 You should have received a copy of the GNU General Public License
  47 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  48
  49
  50 /* NB To comply with the above BSD license, copyright information is
  51 reproduced in etc/ETAGS.README.  That file should be updated when the
  52 above notices are.
  53
  54 To the best of our knowledge, this code was originally based on the
  55 ctags.c distributed with BSD4.2, which was copyrighted by the
  56 University of California, as described above. */
  57
  58
  59 /*
  60  * Authors:
  61  * 1983 Ctags originally by Ken Arnold.
  62  * 1984 Fortran added by Jim Kleckner.
  63  * 1984 Ed Pelegri-Llopart added C typedefs.
  64  * 1985 Emacs TAGS format by Richard Stallman.
  65  * 1989 Sam Kendall added C++.
  66  * 1992 Joseph B. Wells improved C and C++ parsing.
  67  * 1993 Francesco Potortì reorganized C and C++.
  68  * 1994 Line-by-line regexp tags by Tom Tromey.
  69  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  70  * 2002 #line directives by Francesco Potortì.
  71  *
  72  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  73  */
  74
  75 /*
  76  * If you want to add support for a new language, start by looking at the LUA
  77  * language, which is the simplest.  Alternatively, consider distributing etags
  78  * together with a configuration file containing regexp definitions for etags.
  79  */
  80
  81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
  82
  83 #define TRUE    1
  84 #define FALSE   0
  85
  86 #ifdef DEBUG
  87 #  undef DEBUG
  88 #  define DEBUG TRUE
  89 #else
  90 #  define DEBUG  FALSE
  91 #  define NDEBUG                /* disable assert */
  92 #endif
  93
  94 #ifdef HAVE_CONFIG_H
  95 # include <config.h>
  96   /* This is probably not necessary any more.  On some systems, config.h
  97      used to define static as nothing for the sake of unexec.  We don't
  98      want that here since we don't use unexec.  None of these systems
  99      are supported any more, but the idea is still mentioned in
 100      etc/PROBLEMS.  */
 101 # undef static
 102 # ifndef PTR                    /* for XEmacs */
 103 #   define PTR void *
 104 # endif
 105 #else  /* no config.h */
 106 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
 107 #   define PTR void *           /* for generic pointers */
 108 # else /* not standard C */
 109 #   define const                /* remove const for old compilers' sake */
 110 #   define PTR long *           /* don't use void* */
 111 # endif
 112 #endif /* !HAVE_CONFIG_H */
 113
 114 #ifndef _GNU_SOURCE
 115 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
 116 #endif
 117
 118 /* WIN32_NATIVE is for XEmacs.
 119    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
 120 #ifdef WIN32_NATIVE
 121 # undef MSDOS
 122 # undef  WINDOWSNT
 123 # define WINDOWSNT
 124 #endif /* WIN32_NATIVE */
 125
 126 #ifdef MSDOS
 127 # undef MSDOS
 128 # define MSDOS TRUE
 129 # include <fcntl.h>
 130 # include <sys/param.h>
 131 # include <io.h>
 132 # ifndef HAVE_CONFIG_H
 133 #   define DOS_NT
 134 #   include <sys/config.h>
 135 # endif
 136 #else
 137 # define MSDOS FALSE
 138 #endif /* MSDOS */
 139
 140 #ifdef WINDOWSNT
 141 # include <fcntl.h>
 142 # include <direct.h>
 143 # include <io.h>
 144 # define MAXPATHLEN _MAX_PATH
 145 # undef HAVE_NTGUI
 146 # undef  DOS_NT
 147 # define DOS_NT
 148 # ifndef HAVE_GETCWD
 149 #   define HAVE_GETCWD
 150 # endif /* undef HAVE_GETCWD */
 151 #else /* not WINDOWSNT */
 152 #endif /* !WINDOWSNT */
 153
 154 #include <unistd.h>
 155 #ifndef HAVE_UNISTD_H
 156 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 157     extern char *getcwd (char *buf, size_t size);
 158 # endif
 159 #endif /* HAVE_UNISTD_H */
 160
 161 #include <stdlib.h>
 162 #include <string.h>
 163 #include <stdio.h>
 164 #include <ctype.h>
 165 #include <errno.h>
 166 #include <sys/types.h>
 167 #include <sys/stat.h>
 168
 169 #include <assert.h>
 170 #ifdef NDEBUG
 171 # undef  assert                 /* some systems have a buggy assert.h */
 172 # define assert(x) ((void) 0)
 173 #endif
 174
 175 #ifdef NO_LONG_OPTIONS          /* define this if you don't have GNU getopt */
 176 # define NO_LONG_OPTIONS TRUE
 177 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 178   extern char *optarg;
 179   extern int optind, opterr;
 180 #else
 181 # define NO_LONG_OPTIONS FALSE
 182 # include <getopt.h>
 183 #endif /* NO_LONG_OPTIONS */
 184
 185 #ifndef HAVE_CONFIG_H           /* this is a standalone compilation */
 186 # ifdef __CYGWIN__              /* compiling on Cygwin */
 187                              !!! NOTICE !!!
 188  the regex.h distributed with Cygwin is not compatible with etags, alas!
 189 If you want regular expression support, you should delete this notice and
 190               arrange to use the GNU regex.h and regex.c.
 191 # endif
 192 #endif
 193 #include <regex.h>
 194
 195 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 196  Leave it undefined to make the program "etags", which makes emacs-style
 197  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 198 #ifdef CTAGS
 199 # undef  CTAGS
 200 # define CTAGS TRUE
 201 #else
 202 # define CTAGS FALSE
 203 #endif
 204
 205 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 206 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 207 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 208 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 209
 210 #define CHARS 256               /* 2^sizeof(char) */
 211 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 212 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 213 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 214 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 215 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 216 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 217
 218 #define ISALNUM(c)      isalnum (CHAR(c))
 219 #define ISALPHA(c)      isalpha (CHAR(c))
 220 #define ISDIGIT(c)      isdigit (CHAR(c))
 221 #define ISLOWER(c)      islower (CHAR(c))
 222
 223 #define lowcase(c)      tolower (CHAR(c))
 224
 225
 226 /*
 227  *      xnew, xrnew -- allocate, reallocate storage
 228  *
 229  * SYNOPSIS:    Type *xnew (int n, Type);
 230  *              void xrnew (OldPointer, int n, Type);
 231  */
 232 #if DEBUG
 233 # include "chkmalloc.h"
 234 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 235                                                   (n) * sizeof (Type)))
 236 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 237                                         (char *) (op), (n) * sizeof (Type)))
 238 #else
 239 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 240 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 241                                         (char *) (op), (n) * sizeof (Type)))
 242 #endif
 243
 244 #define bool int
 245
 246 typedef void Lang_function (FILE *);
 247
 248 typedef struct
 249 {
 250   const char *suffix;           /* file name suffix for this compressor */
 251   const char *command;          /* takes one arg and decompresses to stdout */
 252 } compressor;
 253
 254 typedef struct
 255 {
 256   const char *name;             /* language name */
 257   const char *help;             /* detailed help for the language */
 258   Lang_function *function;      /* parse function */
 259   const char **suffixes;        /* name suffixes of this language's files */
 260   const char **filenames;       /* names of this language's files */
 261   const char **interpreters;    /* interpreters for this language */
 262   bool metasource;              /* source used to generate other sources */
 263 } language;
 264
 265 typedef struct fdesc
 266 {
 267   struct fdesc *next;           /* for the linked list */
 268   char *infname;                /* uncompressed input file name */
 269   char *infabsname;             /* absolute uncompressed input file name */
 270   char *infabsdir;              /* absolute dir of input file */
 271   char *taggedfname;            /* file name to write in tagfile */
 272   language *lang;               /* language of file */
 273   char *prop;                   /* file properties to write in tagfile */
 274   bool usecharno;               /* etags tags shall contain char number */
 275   bool written;                 /* entry written in the tags file */
 276 } fdesc;
 277
 278 typedef struct node_st
 279 {                               /* sorting structure */
 280   struct node_st *left, *right; /* left and right sons */
 281   fdesc *fdp;                   /* description of file to whom tag belongs */
 282   char *name;                   /* tag name */
 283   char *regex;                  /* search regexp */
 284   bool valid;                   /* write this tag on the tag file */
 285   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 286   bool been_warned;             /* warning already given for duplicated tag */
 287   int lno;                      /* line number tag is on */
 288   long cno;                     /* character number line starts on */
 289 } node;
 290
 291 /*
 292  * A `linebuffer' is a structure which holds a line of text.
 293  * `readline_internal' reads a line from a stream into a linebuffer
 294  * and works regardless of the length of the line.
 295  * SIZE is the size of BUFFER, LEN is the length of the string in
 296  * BUFFER after readline reads it.
 297  */
 298 typedef struct
 299 {
 300   long size;
 301   int len;
 302   char *buffer;
 303 } linebuffer;
 304
 305 /* Used to support mixing of --lang and file names. */
 306 typedef struct
 307 {
 308   enum {
 309     at_language,                /* a language specification */
 310     at_regexp,                  /* a regular expression */
 311     at_filename,                /* a file name */
 312     at_stdin,                   /* read from stdin here */
 313     at_end                      /* stop parsing the list */
 314   } arg_type;                   /* argument type */
 315   language *lang;               /* language associated with the argument */
 316   char *what;                   /* the argument itself */
 317 } argument;
 318
 319 /* Structure defining a regular expression. */
 320 typedef struct regexp
 321 {
 322   struct regexp *p_next;        /* pointer to next in list */
 323   language *lang;               /* if set, use only for this language */
 324   char *pattern;                /* the regexp pattern */
 325   char *name;                   /* tag name */
 326   struct re_pattern_buffer *pat; /* the compiled pattern */
 327   struct re_registers regs;     /* re registers */
 328   bool error_signaled;          /* already signaled for this regexp */
 329   bool force_explicit_name;     /* do not allow implict tag name */
 330   bool ignore_case;             /* ignore case when matching */
 331   bool multi_line;              /* do a multi-line match on the whole file */
 332 } regexp;
 333
 334
 335 /* Many compilers barf on this:
 336         Lang_function Ada_funcs;
 337    so let's write it this way */
 338 static void Ada_funcs (FILE *);
 339 static void Asm_labels (FILE *);
 340 static void C_entries (int c_ext, FILE *);
 341 static void default_C_entries (FILE *);
 342 static void plain_C_entries (FILE *);
 343 static void Cjava_entries (FILE *);
 344 static void Cobol_paragraphs (FILE *);
 345 static void Cplusplus_entries (FILE *);
 346 static void Cstar_entries (FILE *);
 347 static void Erlang_functions (FILE *);
 348 static void Forth_words (FILE *);
 349 static void Fortran_functions (FILE *);
 350 static void HTML_labels (FILE *);
 351 static void Lisp_functions (FILE *);
 352 static void Lua_functions (FILE *);
 353 static void Makefile_targets (FILE *);
 354 static void Pascal_functions (FILE *);
 355 static void Perl_functions (FILE *);
 356 static void PHP_functions (FILE *);
 357 static void PS_functions (FILE *);
 358 static void Prolog_functions (FILE *);
 359 static void Python_functions (FILE *);
 360 static void Scheme_functions (FILE *);
 361 static void TeX_commands (FILE *);
 362 static void Texinfo_nodes (FILE *);
 363 static void Yacc_entries (FILE *);
 364 static void just_read_file (FILE *);
 365
 366 static void print_language_names (void);
 367 static void print_version (void);
 368 static void print_help (argument *);
 369 int main (int, char **);
 370
 371 static compressor *get_compressor_from_suffix (char *, char **);
 372 static language *get_language_from_langname (const char *);
 373 static language *get_language_from_interpreter (char *);
 374 static language *get_language_from_filename (char *, bool);
 375 static void readline (linebuffer *, FILE *);
 376 static long readline_internal (linebuffer *, FILE *);
 377 static bool nocase_tail (const char *);
 378 static void get_tag (char *, char **);
 379
 380 static void analyse_regex (char *);
 381 static void free_regexps (void);
 382 static void regex_tag_multiline (void);
 383 static void error (const char *, const char *);
 384 static void suggest_asking_for_help (void) NO_RETURN;
 385 void fatal (const char *, const char *) NO_RETURN;
 386 static void pfatal (const char *) NO_RETURN;
 387 static void add_node (node *, node **);
 388
 389 static void init (void);
 390 static void process_file_name (char *, language *);
 391 static void process_file (FILE *, char *, language *);
 392 static void find_entries (FILE *);
 393 static void free_tree (node *);
 394 static void free_fdesc (fdesc *);
 395 static void pfnote (char *, bool, char *, int, int, long);
 396 static void make_tag (const char *, int, bool, char *, int, int, long);
 397 static void invalidate_nodes (fdesc *, node **);
 398 static void put_entries (node *);
 399
 400 static char *concat (const char *, const char *, const char *);
 401 static char *skip_spaces (char *);
 402 static char *skip_non_spaces (char *);
 403 static char *savenstr (const char *, int);
 404 static char *savestr (const char *);
 405 static char *etags_strchr (const char *, int);
 406 static char *etags_strrchr (const char *, int);
 407 static int etags_strcasecmp (const char *, const char *);
 408 static int etags_strncasecmp (const char *, const char *, int);
 409 static char *etags_getcwd (void);
 410 static char *relative_filename (char *, char *);
 411 static char *absolute_filename (char *, char *);
 412 static char *absolute_dirname (char *, char *);
 413 static bool filename_is_absolute (char *f);
 414 static void canonicalize_filename (char *);
 415 static void linebuffer_init (linebuffer *);
 416 static void linebuffer_setlen (linebuffer *, int);
 417 static PTR xmalloc (size_t);
 418 static PTR xrealloc (char *, size_t);
 419
 420 \f
 421 static char searchar = '/';     /* use /.../ searches */
 422
 423 static char *tagfile;           /* output file */
 424 static char *progname;          /* name this program was invoked with */
 425 static char *cwd;               /* current working directory */
 426 static char *tagfiledir;        /* directory of tagfile */
 427 static FILE *tagf;              /* ioptr for tags file */
 428 static ptrdiff_t whatlen_max;   /* maximum length of any 'what' member */
 429
 430 static fdesc *fdhead;           /* head of file description list */
 431 static fdesc *curfdp;           /* current file description */
 432 static int lineno;              /* line number of current line */
 433 static long charno;             /* current character number */
 434 static long linecharno;         /* charno of start of current line */
 435 static char *dbp;               /* pointer to start of current tag */
 436
 437 static const int invalidcharno = -1;
 438
 439 static node *nodehead;          /* the head of the binary tree of tags */
 440 static node *last_node;         /* the last node created */
 441
 442 static linebuffer lb;           /* the current line */
 443 static linebuffer filebuf;      /* a buffer containing the whole file */
 444 static linebuffer token_name;   /* a buffer containing a tag name */
 445
 446 /* boolean "functions" (see init)       */
 447 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 448 static const char
 449   /* white chars */
 450   *white = " \f\t\n\r\v",
 451   /* not in a name */
 452   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 453   /* token ending chars */
 454   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 455   /* token starting chars */
 456   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 457   /* valid in-token chars */
 458   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 459
 460 static bool append_to_tagfile;  /* -a: append to tags */
 461 /* The next five default to TRUE in C and derived languages.  */
 462 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 463 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 464                                 /* 0 struct/enum/union decls, and C++ */
 465                                 /* member functions. */
 466 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 467                                 /* constants and variables. */
 468                                 /* -D: opposite of -d.  Default under ctags. */
 469 static bool globals;            /* create tags for global variables */
 470 static bool members;            /* create tags for C member variables */
 471 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 472 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 473 static bool no_duplicates;      /* no duplicate tags for ctags (undocumented) */
 474 static bool update;             /* -u: update tags */
 475 static bool vgrind_style;       /* -v: create vgrind style index output */
 476 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 477 static bool cxref_style;        /* -x: create cxref style output */
 478 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 479 static bool ignoreindent;       /* -I: ignore indentation in C */
 480 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 481
 482 /* STDIN is defined in LynxOS system headers */
 483 #ifdef STDIN
 484 # undef STDIN
 485 #endif
 486
 487 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 488 static bool parsing_stdin;      /* --parse-stdin used */
 489
 490 static regexp *p_head;          /* list of all regexps */
 491 static bool need_filebuf;       /* some regexes are multi-line */
 492
 493 static struct option longopts[] =
 494 {
 495   { "append",             no_argument,       NULL,               'a'   },
 496   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 497   { "c++",                no_argument,       NULL,               'C'   },
 498   { "declarations",       no_argument,       &declarations,      TRUE  },
 499   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 500   { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
 501   { "help",               no_argument,       NULL,               'h'   },
 502   { "help",               no_argument,       NULL,               'H'   },
 503   { "ignore-indentation", no_argument,       NULL,               'I'   },
 504   { "language",           required_argument, NULL,               'l'   },
 505   { "members",            no_argument,       &members,           TRUE  },
 506   { "no-members",         no_argument,       &members,           FALSE },
 507   { "output",             required_argument, NULL,               'o'   },
 508   { "regex",              required_argument, NULL,               'r'   },
 509   { "no-regex",           no_argument,       NULL,               'R'   },
 510   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 511   { "parse-stdin",        required_argument, NULL,               STDIN },
 512   { "version",            no_argument,       NULL,               'V'   },
 513
 514 #if CTAGS /* Ctags options */
 515   { "backward-search",    no_argument,       NULL,               'B'   },
 516   { "cxref",              no_argument,       NULL,               'x'   },
 517   { "defines",            no_argument,       NULL,               'd'   },
 518   { "globals",            no_argument,       &globals,           TRUE  },
 519   { "typedefs",           no_argument,       NULL,               't'   },
 520   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 521   { "update",             no_argument,       NULL,               'u'   },
 522   { "vgrind",             no_argument,       NULL,               'v'   },
 523   { "no-warn",            no_argument,       NULL,               'w'   },
 524
 525 #else /* Etags options */
 526   { "no-defines",         no_argument,       NULL,               'D'   },
 527   { "no-globals",         no_argument,       &globals,           FALSE },
 528   { "include",            required_argument, NULL,               'i'   },
 529 #endif
 530   { NULL }
 531 };
 532
 533 static compressor compressors[] =
 534 {
 535   { "z", "gzip -d -c"},
 536   { "Z", "gzip -d -c"},
 537   { "gz", "gzip -d -c"},
 538   { "GZ", "gzip -d -c"},
 539   { "bz2", "bzip2 -d -c" },
 540   { "xz", "xz -d -c" },
 541   { NULL }
 542 };
 543
 544 /*
 545  * Language stuff.
 546  */
 547
 548 /* Ada code */
 549 static const char *Ada_suffixes [] =
 550   { "ads", "adb", "ada", NULL };
 551 static const char Ada_help [] =
 552 "In Ada code, functions, procedures, packages, tasks and types are\n\
 553 tags.  Use the `--packages-only' option to create tags for\n\
 554 packages only.\n\
 555 Ada tag names have suffixes indicating the type of entity:\n\
 556         Entity type:    Qualifier:\n\
 557         ------------    ----------\n\
 558         function        /f\n\
 559         procedure       /p\n\
 560         package spec    /s\n\
 561         package body    /b\n\
 562         type            /t\n\
 563         task            /k\n\
 564 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 565 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 566 will just search for any tag `bidule'.";
 567
 568 /* Assembly code */
 569 static const char *Asm_suffixes [] =
 570   { "a",        /* Unix assembler */
 571     "asm", /* Microcontroller assembly */
 572     "def", /* BSO/Tasking definition includes  */
 573     "inc", /* Microcontroller include files */
 574     "ins", /* Microcontroller include files */
 575     "s", "sa", /* Unix assembler */
 576     "S",   /* cpp-processed Unix assembler */
 577     "src", /* BSO/Tasking C compiler output */
 578     NULL
 579   };
 580 static const char Asm_help [] =
 581 "In assembler code, labels appearing at the beginning of a line,\n\
 582 followed by a colon, are tags.";
 583
 584
 585 /* Note that .c and .h can be considered C++, if the --c++ flag was
 586    given, or if the `class' or `template' keywords are met inside the file.
 587    That is why default_C_entries is called for these. */
 588 static const char *default_C_suffixes [] =
 589   { "c", "h", NULL };
 590 #if CTAGS                               /* C help for Ctags */
 591 static const char default_C_help [] =
 592 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 593 Use -T to tag definitions of `struct', `union' and `enum'.\n\
 594 Use -d to tag `#define' macro definitions and `enum' constants.\n\
 595 Use --globals to tag global variables.\n\
 596 You can tag function declarations and external variables by\n\
 597 using `--declarations', and struct members by using `--members'.";
 598 #else                                   /* C help for Etags */
 599 static const char default_C_help [] =
 600 "In C code, any C function or typedef is a tag, and so are\n\
 601 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 602 definitions and `enum' constants are tags unless you specify\n\
 603 `--no-defines'.  Global variables are tags unless you specify\n\
 604 `--no-globals' and so are struct members unless you specify\n\
 605 `--no-members'.  Use of `--no-globals', `--no-defines' and\n\
 606 `--no-members' can make the tags table file much smaller.\n\
 607 You can tag function declarations and external variables by\n\
 608 using `--declarations'.";
 609 #endif  /* C help for Ctags and Etags */
 610
 611 static const char *Cplusplus_suffixes [] =
 612   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 613     "M",                        /* Objective C++ */
 614     "pdb",                      /* Postscript with C syntax */
 615     NULL };
 616 static const char Cplusplus_help [] =
 617 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 618 --help --lang=c --lang=c++ for full help.)\n\
 619 In addition to C tags, member functions are also recognized.  Member\n\
 620 variables are recognized unless you use the `--no-members' option.\n\
 621 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 622 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 623 `operator+'.";
 624
 625 static const char *Cjava_suffixes [] =
 626   { "java", NULL };
 627 static char Cjava_help [] =
 628 "In Java code, all the tags constructs of C and C++ code are\n\
 629 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 630
 631
 632 static const char *Cobol_suffixes [] =
 633   { "COB", "cob", NULL };
 634 static char Cobol_help [] =
 635 "In Cobol code, tags are paragraph names; that is, any word\n\
 636 starting in column 8 and followed by a period.";
 637
 638 static const char *Cstar_suffixes [] =
 639   { "cs", "hs", NULL };
 640
 641 static const char *Erlang_suffixes [] =
 642   { "erl", "hrl", NULL };
 643 static const char Erlang_help [] =
 644 "In Erlang code, the tags are the functions, records and macros\n\
 645 defined in the file.";
 646
 647 const char *Forth_suffixes [] =
 648   { "fth", "tok", NULL };
 649 static const char Forth_help [] =
 650 "In Forth code, tags are words defined by `:',\n\
 651 constant, code, create, defer, value, variable, buffer:, field.";
 652
 653 static const char *Fortran_suffixes [] =
 654   { "F", "f", "f90", "for", NULL };
 655 static const char Fortran_help [] =
 656 "In Fortran code, functions, subroutines and block data are tags.";
 657
 658 static const char *HTML_suffixes [] =
 659   { "htm", "html", "shtml", NULL };
 660 static const char HTML_help [] =
 661 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 662 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 663 occurrences of `id='.";
 664
 665 static const char *Lisp_suffixes [] =
 666   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 667 static const char Lisp_help [] =
 668 "In Lisp code, any function defined with `defun', any variable\n\
 669 defined with `defvar' or `defconst', and in general the first\n\
 670 argument of any expression that starts with `(def' in column zero\n\
 671 is a tag.";
 672
 673 static const char *Lua_suffixes [] =
 674   { "lua", "LUA", NULL };
 675 static const char Lua_help [] =
 676 "In Lua scripts, all functions are tags.";
 677
 678 static const char *Makefile_filenames [] =
 679   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 680 static const char Makefile_help [] =
 681 "In makefiles, targets are tags; additionally, variables are tags\n\
 682 unless you specify `--no-globals'.";
 683
 684 static const char *Objc_suffixes [] =
 685   { "lm",                       /* Objective lex file */
 686     "m",                        /* Objective C file */
 687      NULL };
 688 static const char Objc_help [] =
 689 "In Objective C code, tags include Objective C definitions for classes,\n\
 690 class categories, methods and protocols.  Tags for variables and\n\
 691 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 692 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 693
 694 static const char *Pascal_suffixes [] =
 695   { "p", "pas", NULL };
 696 static const char Pascal_help [] =
 697 "In Pascal code, the tags are the functions and procedures defined\n\
 698 in the file.";
 699 /* " // this is for working around an Emacs highlighting bug... */
 700
 701 static const char *Perl_suffixes [] =
 702   { "pl", "pm", NULL };
 703 static const char *Perl_interpreters [] =
 704   { "perl", "@PERL@", NULL };
 705 static const char Perl_help [] =
 706 "In Perl code, the tags are the packages, subroutines and variables\n\
 707 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 708 `--globals' if you want to tag global variables.  Tags for\n\
 709 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 710 defined in the default package is `main::SUB'.";
 711
 712 static const char *PHP_suffixes [] =
 713   { "php", "php3", "php4", NULL };
 714 static const char PHP_help [] =
 715 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 716 the `--no-members' option, vars are tags too.";
 717
 718 static const char *plain_C_suffixes [] =
 719   { "pc",                       /* Pro*C file */
 720      NULL };
 721
 722 static const char *PS_suffixes [] =
 723   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 724 static const char PS_help [] =
 725 "In PostScript code, the tags are the functions.";
 726
 727 static const char *Prolog_suffixes [] =
 728   { "prolog", NULL };
 729 static const char Prolog_help [] =
 730 "In Prolog code, tags are predicates and rules at the beginning of\n\
 731 line.";
 732
 733 static const char *Python_suffixes [] =
 734   { "py", NULL };
 735 static const char Python_help [] =
 736 "In Python code, `def' or `class' at the beginning of a line\n\
 737 generate a tag.";
 738
 739 /* Can't do the `SCM' or `scm' prefix with a version number. */
 740 static const char *Scheme_suffixes [] =
 741   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 742 static const char Scheme_help [] =
 743 "In Scheme code, tags include anything defined with `def' or with a\n\
 744 construct whose name starts with `def'.  They also include\n\
 745 variables set with `set!' at top level in the file.";
 746
 747 static const char *TeX_suffixes [] =
 748   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 749 static const char TeX_help [] =
 750 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 751 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 752 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 753 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 754 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 755 \n\
 756 Other commands can be specified by setting the environment variable\n\
 757 `TEXTAGS' to a colon-separated list like, for example,\n\
 758      TEXTAGS=\"mycommand:myothercommand\".";
 759
 760
 761 static const char *Texinfo_suffixes [] =
 762   { "texi", "texinfo", "txi", NULL };
 763 static const char Texinfo_help [] =
 764 "for texinfo files, lines starting with @node are tagged.";
 765
 766 static const char *Yacc_suffixes [] =
 767   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 768 static const char Yacc_help [] =
 769 "In Bison or Yacc input files, each rule defines as a tag the\n\
 770 nonterminal it constructs.  The portions of the file that contain\n\
 771 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 772 for full help).";
 773
 774 static const char auto_help [] =
 775 "`auto' is not a real language, it indicates to use\n\
 776 a default language for files base on file name suffix and file contents.";
 777
 778 static const char none_help [] =
 779 "`none' is not a real language, it indicates to only do\n\
 780 regexp processing on files.";
 781
 782 static const char no_lang_help [] =
 783 "No detailed help available for this language.";
 784
 785
 786 /*
 787  * Table of languages.
 788  *
 789  * It is ok for a given function to be listed under more than one
 790  * name.  I just didn't.
 791  */
 792
 793 static language lang_names [] =
 794 {
 795   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 796   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 797   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 798   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 799   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 800   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 801   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 802   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 803   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 804   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 805   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 806   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 807   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 808   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 809   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 810   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 811   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 812   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 813   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 814   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 815   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 816   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 817   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 818   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 819   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 820   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 821   { "auto",      auto_help },                      /* default guessing scheme */
 822   { "none",      none_help,      just_read_file }, /* regexp matching only */
 823   { NULL }                /* end of list */
 824 };
 825
 826 \f
 827 static void
 828 print_language_names (void)
 829 {
 830   language *lang;
 831   const char **name, **ext;
 832
 833   puts ("\nThese are the currently supported languages, along with the\n\
 834 default file names and dot suffixes:");
 835   for (lang = lang_names; lang->name != NULL; lang++)
 836     {
 837       printf ("  %-*s", 10, lang->name);
 838       if (lang->filenames != NULL)
 839         for (name = lang->filenames; *name != NULL; name++)
 840           printf (" %s", *name);
 841       if (lang->suffixes != NULL)
 842         for (ext = lang->suffixes; *ext != NULL; ext++)
 843           printf (" .%s", *ext);
 844       puts ("");
 845     }
 846   puts ("where `auto' means use default language for files based on file\n\
 847 name suffix, and `none' means only do regexp processing on files.\n\
 848 If no language is specified and no matching suffix is found,\n\
 849 the first line of the file is read for a sharp-bang (#!) sequence\n\
 850 followed by the name of an interpreter.  If no such sequence is found,\n\
 851 Fortran is tried first; if no tags are found, C is tried next.\n\
 852 When parsing any C file, a \"class\" or \"template\" keyword\n\
 853 switches to C++.");
 854   puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
 855 \n\
 856 For detailed help on a given language use, for example,\n\
 857 etags --help --lang=ada.");
 858 }
 859
 860 #ifndef EMACS_NAME
 861 # define EMACS_NAME "standalone"
 862 #endif
 863 #ifndef VERSION
 864 # define VERSION "17.38.1.4"
 865 #endif
 866 static void
 867 print_version (void)
 868 {
 869   /* Makes it easier to update automatically. */
 870   char emacs_copyright[] = "Copyright (C) 2011 Free Software Foundation, Inc.";
 871
 872   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 873   puts (emacs_copyright);
 874   puts ("This program is distributed under the terms in ETAGS.README");
 875
 876   exit (EXIT_SUCCESS);
 877 }
 878
 879 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 880 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
 881 #endif
 882
 883 static void
 884 print_help (argument *argbuffer)
 885 {
 886   bool help_for_lang = FALSE;
 887
 888   for (; argbuffer->arg_type != at_end; argbuffer++)
 889     if (argbuffer->arg_type == at_language)
 890       {
 891         if (help_for_lang)
 892           puts ("");
 893         puts (argbuffer->lang->help);
 894         help_for_lang = TRUE;
 895       }
 896
 897   if (help_for_lang)
 898     exit (EXIT_SUCCESS);
 899
 900   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 901 \n\
 902 These are the options accepted by %s.\n", progname, progname);
 903   if (NO_LONG_OPTIONS)
 904     puts ("WARNING: long option names do not work with this executable,\n\
 905 as it is not linked with GNU getopt.");
 906   else
 907     puts ("You may use unambiguous abbreviations for the long option names.");
 908   puts ("  A - as file name means read names from stdin (one per line).\n\
 909 Absolute names are stored in the output file as they are.\n\
 910 Relative ones are stored relative to the output file's directory.\n");
 911
 912   puts ("-a, --append\n\
 913         Append tag entries to existing tags file.");
 914
 915   puts ("--packages-only\n\
 916         For Ada files, only generate tags for packages.");
 917
 918   if (CTAGS)
 919     puts ("-B, --backward-search\n\
 920         Write the search commands for the tag entries using '?', the\n\
 921         backward-search command instead of '/', the forward-search command.");
 922
 923   /* This option is mostly obsolete, because etags can now automatically
 924      detect C++.  Retained for backward compatibility and for debugging and
 925      experimentation.  In principle, we could want to tag as C++ even
 926      before any "class" or "template" keyword.
 927   puts ("-C, --c++\n\
 928         Treat files whose name suffix defaults to C language as C++ files.");
 929   */
 930
 931   puts ("--declarations\n\
 932         In C and derived languages, create tags for function declarations,");
 933   if (CTAGS)
 934     puts ("\tand create tags for extern variables if --globals is used.");
 935   else
 936     puts
 937       ("\tand create tags for extern variables unless --no-globals is used.");
 938
 939   if (CTAGS)
 940     puts ("-d, --defines\n\
 941         Create tag entries for C #define constants and enum constants, too.");
 942   else
 943     puts ("-D, --no-defines\n\
 944         Don't create tag entries for C #define constants and enum constants.\n\
 945         This makes the tags file smaller.");
 946
 947   if (!CTAGS)
 948     puts ("-i FILE, --include=FILE\n\
 949         Include a note in tag file indicating that, when searching for\n\
 950         a tag, one should also consult the tags file FILE after\n\
 951         checking the current file.");
 952
 953   puts ("-l LANG, --language=LANG\n\
 954         Force the following files to be considered as written in the\n\
 955         named language up to the next --language=LANG option.");
 956
 957   if (CTAGS)
 958     puts ("--globals\n\
 959         Create tag entries for global variables in some languages.");
 960   else
 961     puts ("--no-globals\n\
 962         Do not create tag entries for global variables in some\n\
 963         languages.  This makes the tags file smaller.");
 964
 965   if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 966     puts ("--no-line-directive\n\
 967         Ignore #line preprocessor directives in C and derived languages.");
 968
 969   if (CTAGS)
 970     puts ("--members\n\
 971         Create tag entries for members of structures in some languages.");
 972   else
 973     puts ("--no-members\n\
 974         Do not create tag entries for members of structures\n\
 975         in some languages.");
 976
 977   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 978         Make a tag for each line matching a regular expression pattern\n\
 979         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 980         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 981         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 982         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 983   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 984         For example Tcl named tags can be created with:\n\
 985           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 986         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 987         `m' means to allow multi-line matches, `s' implies `m' and\n\
 988         causes dot to match any character, including newline.");
 989
 990   puts ("-R, --no-regex\n\
 991         Don't create tags from regexps for the following files.");
 992
 993   puts ("-I, --ignore-indentation\n\
 994         In C and C++ do not assume that a closing brace in the first\n\
 995         column is the final brace of a function or structure definition.");
 996
 997   puts ("-o FILE, --output=FILE\n\
 998         Write the tags to FILE.");
 999
1000   puts ("--parse-stdin=NAME\n\
1001         Read from standard input and record tags as belonging to file NAME.");
1002
1003   if (CTAGS)
1004     {
1005       puts ("-t, --typedefs\n\
1006         Generate tag entries for C and Ada typedefs.");
1007       puts ("-T, --typedefs-and-c++\n\
1008         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1009         and C++ member functions.");
1010     }
1011
1012   if (CTAGS)
1013     puts ("-u, --update\n\
1014         Update the tag entries for the given files, leaving tag\n\
1015         entries for other files in place.  Currently, this is\n\
1016         implemented by deleting the existing entries for the given\n\
1017         files and then rewriting the new entries at the end of the\n\
1018         tags file.  It is often faster to simply rebuild the entire\n\
1019         tag file than to use this.");
1020
1021   if (CTAGS)
1022     {
1023       puts ("-v, --vgrind\n\
1024         Print on the standard output an index of items intended for\n\
1025         human consumption, similar to the output of vgrind.  The index\n\
1026         is sorted, and gives the page number of each item.");
1027
1028       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1029         puts ("-w, --no-duplicates\n\
1030         Do not create duplicate tag entries, for compatibility with\n\
1031         traditional ctags.");
1032
1033       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1034         puts ("-w, --no-warn\n\
1035         Suppress warning messages about duplicate tag entries.");
1036
1037       puts ("-x, --cxref\n\
1038         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1039         The output uses line numbers instead of page numbers, but\n\
1040         beyond that the differences are cosmetic; try both to see\n\
1041         which you like.");
1042     }
1043
1044   puts ("-V, --version\n\
1045         Print the version of the program.\n\
1046 -h, --help\n\
1047         Print this help message.\n\
1048         Followed by one or more `--language' options prints detailed\n\
1049         help about tag generation for the specified languages.");
1050
1051   print_language_names ();
1052
1053   puts ("");
1054   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1055
1056   exit (EXIT_SUCCESS);
1057 }
1058
1059 \f
1060 int
1061 main (int argc, char **argv)
1062 {
1063   int i;
1064   unsigned int nincluded_files;
1065   char **included_files;
1066   argument *argbuffer;
1067   int current_arg, file_count;
1068   linebuffer filename_lb;
1069   bool help_asked = FALSE;
1070   ptrdiff_t len;
1071  char *optstring;
1072  int opt;
1073
1074
1075 #ifdef DOS_NT
1076   _fmode = O_BINARY;   /* all of files are treated as binary files */
1077 #endif /* DOS_NT */
1078
1079   progname = argv[0];
1080   nincluded_files = 0;
1081   included_files = xnew (argc, char *);
1082   current_arg = 0;
1083   file_count = 0;
1084
1085   /* Allocate enough no matter what happens.  Overkill, but each one
1086      is small. */
1087   argbuffer = xnew (argc, argument);
1088
1089   /*
1090    * Always find typedefs and structure tags.
1091    * Also default to find macro constants, enum constants, struct
1092    * members and global variables.  Do it for both etags and ctags.
1093    */
1094   typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1095   globals = members = TRUE;
1096
1097   /* When the optstring begins with a '-' getopt_long does not rearrange the
1098      non-options arguments to be at the end, but leaves them alone. */
1099   optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1100                       "ac:Cf:Il:o:r:RSVhH",
1101                       (CTAGS) ? "BxdtTuvw" : "Di:");
1102
1103   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1104     switch (opt)
1105       {
1106       case 0:
1107         /* If getopt returns 0, then it has already processed a
1108            long-named option.  We should do nothing.  */
1109         break;
1110
1111       case 1:
1112         /* This means that a file name has been seen.  Record it. */
1113         argbuffer[current_arg].arg_type = at_filename;
1114         argbuffer[current_arg].what     = optarg;
1115         len = strlen (optarg);
1116         if (whatlen_max < len)
1117           whatlen_max = len;
1118         ++current_arg;
1119         ++file_count;
1120         break;
1121
1122       case STDIN:
1123         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1124         argbuffer[current_arg].arg_type = at_stdin;
1125         argbuffer[current_arg].what     = optarg;
1126         len = strlen (optarg);
1127         if (whatlen_max < len)
1128           whatlen_max = len;
1129         ++current_arg;
1130         ++file_count;
1131         if (parsing_stdin)
1132           fatal ("cannot parse standard input more than once", (char *)NULL);
1133         parsing_stdin = TRUE;
1134         break;
1135
1136         /* Common options. */
1137       case 'a': append_to_tagfile = TRUE;       break;
1138       case 'C': cplusplus = TRUE;               break;
1139       case 'f':         /* for compatibility with old makefiles */
1140       case 'o':
1141         if (tagfile)
1142           {
1143             error ("-o option may only be given once.", (char *)NULL);
1144             suggest_asking_for_help ();
1145             /* NOTREACHED */
1146           }
1147         tagfile = optarg;
1148         break;
1149       case 'I':
1150       case 'S':         /* for backward compatibility */
1151         ignoreindent = TRUE;
1152         break;
1153       case 'l':
1154         {
1155           language *lang = get_language_from_langname (optarg);
1156           if (lang != NULL)
1157             {
1158               argbuffer[current_arg].lang = lang;
1159               argbuffer[current_arg].arg_type = at_language;
1160               ++current_arg;
1161             }
1162         }
1163         break;
1164       case 'c':
1165         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1166         optarg = concat (optarg, "i", ""); /* memory leak here */
1167         /* FALLTHRU */
1168       case 'r':
1169         argbuffer[current_arg].arg_type = at_regexp;
1170         argbuffer[current_arg].what = optarg;
1171         len = strlen (optarg);
1172         if (whatlen_max < len)
1173           whatlen_max = len;
1174         ++current_arg;
1175         break;
1176       case 'R':
1177         argbuffer[current_arg].arg_type = at_regexp;
1178         argbuffer[current_arg].what = NULL;
1179         ++current_arg;
1180         break;
1181       case 'V':
1182         print_version ();
1183         break;
1184       case 'h':
1185       case 'H':
1186         help_asked = TRUE;
1187         break;
1188
1189         /* Etags options */
1190       case 'D': constantypedefs = FALSE;                        break;
1191       case 'i': included_files[nincluded_files++] = optarg;     break;
1192
1193         /* Ctags options. */
1194       case 'B': searchar = '?';                                 break;
1195       case 'd': constantypedefs = TRUE;                         break;
1196       case 't': typedefs = TRUE;                                break;
1197       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1198       case 'u': update = TRUE;                                  break;
1199       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1200       case 'x': cxref_style = TRUE;                             break;
1201       case 'w': no_warnings = TRUE;                             break;
1202       default:
1203         suggest_asking_for_help ();
1204         /* NOTREACHED */
1205       }
1206
1207   /* No more options.  Store the rest of arguments. */
1208   for (; optind < argc; optind++)
1209     {
1210       argbuffer[current_arg].arg_type = at_filename;
1211       argbuffer[current_arg].what = argv[optind];
1212       len = strlen (argv[optind]);
1213       if (whatlen_max < len)
1214         whatlen_max = len;
1215       ++current_arg;
1216       ++file_count;
1217     }
1218
1219   argbuffer[current_arg].arg_type = at_end;
1220
1221   if (help_asked)
1222     print_help (argbuffer);
1223     /* NOTREACHED */
1224
1225   if (nincluded_files == 0 && file_count == 0)
1226     {
1227       error ("no input files specified.", (char *)NULL);
1228       suggest_asking_for_help ();
1229       /* NOTREACHED */
1230     }
1231
1232   if (tagfile == NULL)
1233     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1234   cwd = etags_getcwd ();        /* the current working directory */
1235   if (cwd[strlen (cwd) - 1] != '/')
1236     {
1237       char *oldcwd = cwd;
1238       cwd = concat (oldcwd, "/", "");
1239       free (oldcwd);
1240     }
1241
1242   /* Compute base directory for relative file names. */
1243   if (streq (tagfile, "-")
1244       || strneq (tagfile, "/dev/", 5))
1245     tagfiledir = cwd;            /* relative file names are relative to cwd */
1246   else
1247     {
1248       canonicalize_filename (tagfile);
1249       tagfiledir = absolute_dirname (tagfile, cwd);
1250     }
1251
1252   init ();                      /* set up boolean "functions" */
1253
1254   linebuffer_init (&lb);
1255   linebuffer_init (&filename_lb);
1256   linebuffer_init (&filebuf);
1257   linebuffer_init (&token_name);
1258
1259   if (!CTAGS)
1260     {
1261       if (streq (tagfile, "-"))
1262         {
1263           tagf = stdout;
1264 #ifdef DOS_NT
1265           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1266              doesn't take effect until after `stdout' is already open). */
1267           if (!isatty (fileno (stdout)))
1268             setmode (fileno (stdout), O_BINARY);
1269 #endif /* DOS_NT */
1270         }
1271       else
1272         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1273       if (tagf == NULL)
1274         pfatal (tagfile);
1275     }
1276
1277   /*
1278    * Loop through files finding functions.
1279    */
1280   for (i = 0; i < current_arg; i++)
1281     {
1282       static language *lang;    /* non-NULL if language is forced */
1283       char *this_file;
1284
1285       switch (argbuffer[i].arg_type)
1286         {
1287         case at_language:
1288           lang = argbuffer[i].lang;
1289           break;
1290         case at_regexp:
1291           analyse_regex (argbuffer[i].what);
1292           break;
1293         case at_filename:
1294               this_file = argbuffer[i].what;
1295               /* Input file named "-" means read file names from stdin
1296                  (one per line) and use them. */
1297               if (streq (this_file, "-"))
1298                 {
1299                   if (parsing_stdin)
1300                     fatal ("cannot parse standard input AND read file names from it",
1301                            (char *)NULL);
1302                   while (readline_internal (&filename_lb, stdin) > 0)
1303                     process_file_name (filename_lb.buffer, lang);
1304                 }
1305               else
1306                 process_file_name (this_file, lang);
1307           break;
1308         case at_stdin:
1309           this_file = argbuffer[i].what;
1310           process_file (stdin, this_file, lang);
1311           break;
1312         }
1313     }
1314
1315   free_regexps ();
1316   free (lb.buffer);
1317   free (filebuf.buffer);
1318   free (token_name.buffer);
1319
1320   if (!CTAGS || cxref_style)
1321     {
1322       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1323       put_entries (nodehead);
1324       free_tree (nodehead);
1325       nodehead = NULL;
1326       if (!CTAGS)
1327         {
1328           fdesc *fdp;
1329
1330           /* Output file entries that have no tags. */
1331           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1332             if (!fdp->written)
1333               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1334
1335           while (nincluded_files-- > 0)
1336             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1337
1338           if (fclose (tagf) == EOF)
1339             pfatal (tagfile);
1340         }
1341
1342       exit (EXIT_SUCCESS);
1343     }
1344
1345   /* From here on, we are in (CTAGS && !cxref_style) */
1346   if (update)
1347     {
1348       char *cmd =
1349         xmalloc (strlen (tagfile) + whatlen_max +
1350                  sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1351       for (i = 0; i < current_arg; ++i)
1352         {
1353           switch (argbuffer[i].arg_type)
1354             {
1355             case at_filename:
1356             case at_stdin:
1357               break;
1358             default:
1359               continue;         /* the for loop */
1360             }
1361           strcpy (cmd, "mv ");
1362           strcat (cmd, tagfile);
1363           strcat (cmd, " OTAGS;fgrep -v '\t");
1364           strcat (cmd, argbuffer[i].what);
1365           strcat (cmd, "\t' OTAGS >");
1366           strcat (cmd, tagfile);
1367           strcat (cmd, ";rm OTAGS");
1368           if (system (cmd) != EXIT_SUCCESS)
1369             fatal ("failed to execute shell command", (char *)NULL);
1370         }
1371       free (cmd);
1372       append_to_tagfile = TRUE;
1373     }
1374
1375   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1376   if (tagf == NULL)
1377     pfatal (tagfile);
1378   put_entries (nodehead);       /* write all the tags (CTAGS) */
1379   free_tree (nodehead);
1380   nodehead = NULL;
1381   if (fclose (tagf) == EOF)
1382     pfatal (tagfile);
1383
1384   if (CTAGS)
1385     if (append_to_tagfile || update)
1386       {
1387         char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1388         /* Maybe these should be used:
1389            setenv ("LC_COLLATE", "C", 1);
1390            setenv ("LC_ALL", "C", 1); */
1391         strcpy (cmd, "sort -u -o ");
1392         strcat (cmd, tagfile);
1393         strcat (cmd, " ");
1394         strcat (cmd, tagfile);
1395         exit (system (cmd));
1396       }
1397   return EXIT_SUCCESS;
1398 }
1399
1400
1401 /*
1402  * Return a compressor given the file name.  If EXTPTR is non-zero,
1403  * return a pointer into FILE where the compressor-specific
1404  * extension begins.  If no compressor is found, NULL is returned
1405  * and EXTPTR is not significant.
1406  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1407  */
1408 static compressor *
1409 get_compressor_from_suffix (char *file, char **extptr)
1410 {
1411   compressor *compr;
1412   char *slash, *suffix;
1413
1414   /* File has been processed by canonicalize_filename,
1415      so we don't need to consider backslashes on DOS_NT.  */
1416   slash = etags_strrchr (file, '/');
1417   suffix = etags_strrchr (file, '.');
1418   if (suffix == NULL || suffix < slash)
1419     return NULL;
1420   if (extptr != NULL)
1421     *extptr = suffix;
1422   suffix += 1;
1423   /* Let those poor souls who live with DOS 8+3 file name limits get
1424      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1425      Only the first do loop is run if not MSDOS */
1426   do
1427     {
1428       for (compr = compressors; compr->suffix != NULL; compr++)
1429         if (streq (compr->suffix, suffix))
1430           return compr;
1431       if (!MSDOS)
1432         break;                  /* do it only once: not really a loop */
1433       if (extptr != NULL)
1434         *extptr = ++suffix;
1435     } while (*suffix != '\0');
1436   return NULL;
1437 }
1438
1439
1440
1441 /*
1442  * Return a language given the name.
1443  */
1444 static language *
1445 get_language_from_langname (const char *name)
1446 {
1447   language *lang;
1448
1449   if (name == NULL)
1450     error ("empty language name", (char *)NULL);
1451   else
1452     {
1453       for (lang = lang_names; lang->name != NULL; lang++)
1454         if (streq (name, lang->name))
1455           return lang;
1456       error ("unknown language \"%s\"", name);
1457     }
1458
1459   return NULL;
1460 }
1461
1462
1463 /*
1464  * Return a language given the interpreter name.
1465  */
1466 static language *
1467 get_language_from_interpreter (char *interpreter)
1468 {
1469   language *lang;
1470   const char **iname;
1471
1472   if (interpreter == NULL)
1473     return NULL;
1474   for (lang = lang_names; lang->name != NULL; lang++)
1475     if (lang->interpreters != NULL)
1476       for (iname = lang->interpreters; *iname != NULL; iname++)
1477         if (streq (*iname, interpreter))
1478             return lang;
1479
1480   return NULL;
1481 }
1482
1483
1484
1485 /*
1486  * Return a language given the file name.
1487  */
1488 static language *
1489 get_language_from_filename (char *file, int case_sensitive)
1490 {
1491   language *lang;
1492   const char **name, **ext, *suffix;
1493
1494   /* Try whole file name first. */
1495   for (lang = lang_names; lang->name != NULL; lang++)
1496     if (lang->filenames != NULL)
1497       for (name = lang->filenames; *name != NULL; name++)
1498         if ((case_sensitive)
1499             ? streq (*name, file)
1500             : strcaseeq (*name, file))
1501           return lang;
1502
1503   /* If not found, try suffix after last dot. */
1504   suffix = etags_strrchr (file, '.');
1505   if (suffix == NULL)
1506     return NULL;
1507   suffix += 1;
1508   for (lang = lang_names; lang->name != NULL; lang++)
1509     if (lang->suffixes != NULL)
1510       for (ext = lang->suffixes; *ext != NULL; ext++)
1511         if ((case_sensitive)
1512             ? streq (*ext, suffix)
1513             : strcaseeq (*ext, suffix))
1514           return lang;
1515   return NULL;
1516 }
1517
1518 \f
1519 /*
1520  * This routine is called on each file argument.
1521  */
1522 static void
1523 process_file_name (char *file, language *lang)
1524 {
1525   struct stat stat_buf;
1526   FILE *inf;
1527   fdesc *fdp;
1528   compressor *compr;
1529   char *compressed_name, *uncompressed_name;
1530   char *ext, *real_name;
1531   int retval;
1532
1533   canonicalize_filename (file);
1534   if (streq (file, tagfile) && !streq (tagfile, "-"))
1535     {
1536       error ("skipping inclusion of %s in self.", file);
1537       return;
1538     }
1539   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1540     {
1541       compressed_name = NULL;
1542       real_name = uncompressed_name = savestr (file);
1543     }
1544   else
1545     {
1546       real_name = compressed_name = savestr (file);
1547       uncompressed_name = savenstr (file, ext - file);
1548     }
1549
1550   /* If the canonicalized uncompressed name
1551      has already been dealt with, skip it silently. */
1552   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1553     {
1554       assert (fdp->infname != NULL);
1555       if (streq (uncompressed_name, fdp->infname))
1556         goto cleanup;
1557     }
1558
1559   if (stat (real_name, &stat_buf) != 0)
1560     {
1561       /* Reset real_name and try with a different name. */
1562       real_name = NULL;
1563       if (compressed_name != NULL) /* try with the given suffix */
1564         {
1565           if (stat (uncompressed_name, &stat_buf) == 0)
1566             real_name = uncompressed_name;
1567         }
1568       else                      /* try all possible suffixes */
1569         {
1570           for (compr = compressors; compr->suffix != NULL; compr++)
1571             {
1572               compressed_name = concat (file, ".", compr->suffix);
1573               if (stat (compressed_name, &stat_buf) != 0)
1574                 {
1575                   if (MSDOS)
1576                     {
1577                       char *suf = compressed_name + strlen (file);
1578                       size_t suflen = strlen (compr->suffix) + 1;
1579                       for ( ; suf[1]; suf++, suflen--)
1580                         {
1581                           memmove (suf, suf + 1, suflen);
1582                           if (stat (compressed_name, &stat_buf) == 0)
1583                             {
1584                               real_name = compressed_name;
1585                               break;
1586                             }
1587                         }
1588                       if (real_name != NULL)
1589                         break;
1590                     } /* MSDOS */
1591                   free (compressed_name);
1592                   compressed_name = NULL;
1593                 }
1594               else
1595                 {
1596                   real_name = compressed_name;
1597                   break;
1598                 }
1599             }
1600         }
1601       if (real_name == NULL)
1602         {
1603           perror (file);
1604           goto cleanup;
1605         }
1606     } /* try with a different name */
1607
1608   if (!S_ISREG (stat_buf.st_mode))
1609     {
1610       error ("skipping %s: it is not a regular file.", real_name);
1611       goto cleanup;
1612     }
1613   if (real_name == compressed_name)
1614     {
1615       char *cmd = concat (compr->command, " ", real_name);
1616       inf = (FILE *) popen (cmd, "r");
1617       free (cmd);
1618     }
1619   else
1620     inf = fopen (real_name, "r");
1621   if (inf == NULL)
1622     {
1623       perror (real_name);
1624       goto cleanup;
1625     }
1626
1627   process_file (inf, uncompressed_name, lang);
1628
1629   if (real_name == compressed_name)
1630     retval = pclose (inf);
1631   else
1632     retval = fclose (inf);
1633   if (retval < 0)
1634     pfatal (file);
1635
1636  cleanup:
1637   free (compressed_name);
1638   free (uncompressed_name);
1639   last_node = NULL;
1640   curfdp = NULL;
1641   return;
1642 }
1643
1644 static void
1645 process_file (FILE *fh, char *fn, language *lang)
1646 {
1647   static const fdesc emptyfdesc;
1648   fdesc *fdp;
1649
1650   /* Create a new input file description entry. */
1651   fdp = xnew (1, fdesc);
1652   *fdp = emptyfdesc;
1653   fdp->next = fdhead;
1654   fdp->infname = savestr (fn);
1655   fdp->lang = lang;
1656   fdp->infabsname = absolute_filename (fn, cwd);
1657   fdp->infabsdir = absolute_dirname (fn, cwd);
1658   if (filename_is_absolute (fn))
1659     {
1660       /* An absolute file name.  Canonicalize it. */
1661       fdp->taggedfname = absolute_filename (fn, NULL);
1662     }
1663   else
1664     {
1665       /* A file name relative to cwd.  Make it relative
1666          to the directory of the tags file. */
1667       fdp->taggedfname = relative_filename (fn, tagfiledir);
1668     }
1669   fdp->usecharno = TRUE;        /* use char position when making tags */
1670   fdp->prop = NULL;
1671   fdp->written = FALSE;         /* not written on tags file yet */
1672
1673   fdhead = fdp;
1674   curfdp = fdhead;              /* the current file description */
1675
1676   find_entries (fh);
1677
1678   /* If not Ctags, and if this is not metasource and if it contained no #line
1679      directives, we can write the tags and free all nodes pointing to
1680      curfdp. */
1681   if (!CTAGS
1682       && curfdp->usecharno      /* no #line directives in this file */
1683       && !curfdp->lang->metasource)
1684     {
1685       node *np, *prev;
1686
1687       /* Look for the head of the sublist relative to this file.  See add_node
1688          for the structure of the node tree. */
1689       prev = NULL;
1690       for (np = nodehead; np != NULL; prev = np, np = np->left)
1691         if (np->fdp == curfdp)
1692           break;
1693
1694       /* If we generated tags for this file, write and delete them. */
1695       if (np != NULL)
1696         {
1697           /* This is the head of the last sublist, if any.  The following
1698              instructions depend on this being true. */
1699           assert (np->left == NULL);
1700
1701           assert (fdhead == curfdp);
1702           assert (last_node->fdp == curfdp);
1703           put_entries (np);     /* write tags for file curfdp->taggedfname */
1704           free_tree (np);       /* remove the written nodes */
1705           if (prev == NULL)
1706             nodehead = NULL;    /* no nodes left */
1707           else
1708             prev->left = NULL;  /* delete the pointer to the sublist */
1709         }
1710     }
1711 }
1712
1713 /*
1714  * This routine sets up the boolean pseudo-functions which work
1715  * by setting boolean flags dependent upon the corresponding character.
1716  * Every char which is NOT in that string is not a white char.  Therefore,
1717  * all of the array "_wht" is set to FALSE, and then the elements
1718  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1719  * of a char is TRUE if it is the string "white", else FALSE.
1720  */
1721 static void
1722 init (void)
1723 {
1724   register const char *sp;
1725   register int i;
1726
1727   for (i = 0; i < CHARS; i++)
1728     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1729   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1730   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1731   notinname('\0') = notinname('\n');
1732   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1733   begtoken('\0') = begtoken('\n');
1734   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1735   intoken('\0') = intoken('\n');
1736   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1737   endtoken('\0') = endtoken('\n');
1738 }
1739
1740 /*
1741  * This routine opens the specified file and calls the function
1742  * which finds the function and type definitions.
1743  */
1744 static void
1745 find_entries (FILE *inf)
1746 {
1747   char *cp;
1748   language *lang = curfdp->lang;
1749   Lang_function *parser = NULL;
1750
1751   /* If user specified a language, use it. */
1752   if (lang != NULL && lang->function != NULL)
1753     {
1754       parser = lang->function;
1755     }
1756
1757   /* Else try to guess the language given the file name. */
1758   if (parser == NULL)
1759     {
1760       lang = get_language_from_filename (curfdp->infname, TRUE);
1761       if (lang != NULL && lang->function != NULL)
1762         {
1763           curfdp->lang = lang;
1764           parser = lang->function;
1765         }
1766     }
1767
1768   /* Else look for sharp-bang as the first two characters. */
1769   if (parser == NULL
1770       && readline_internal (&lb, inf) > 0
1771       && lb.len >= 2
1772       && lb.buffer[0] == '#'
1773       && lb.buffer[1] == '!')
1774     {
1775       char *lp;
1776
1777       /* Set lp to point at the first char after the last slash in the
1778          line or, if no slashes, at the first nonblank.  Then set cp to
1779          the first successive blank and terminate the string. */
1780       lp = etags_strrchr (lb.buffer+2, '/');
1781       if (lp != NULL)
1782         lp += 1;
1783       else
1784         lp = skip_spaces (lb.buffer + 2);
1785       cp = skip_non_spaces (lp);
1786       *cp = '\0';
1787
1788       if (strlen (lp) > 0)
1789         {
1790           lang = get_language_from_interpreter (lp);
1791           if (lang != NULL && lang->function != NULL)
1792             {
1793               curfdp->lang = lang;
1794               parser = lang->function;
1795             }
1796         }
1797     }
1798
1799   /* We rewind here, even if inf may be a pipe.  We fail if the
1800      length of the first line is longer than the pipe block size,
1801      which is unlikely. */
1802   rewind (inf);
1803
1804   /* Else try to guess the language given the case insensitive file name. */
1805   if (parser == NULL)
1806     {
1807       lang = get_language_from_filename (curfdp->infname, FALSE);
1808       if (lang != NULL && lang->function != NULL)
1809         {
1810           curfdp->lang = lang;
1811           parser = lang->function;
1812         }
1813     }
1814
1815   /* Else try Fortran or C. */
1816   if (parser == NULL)
1817     {
1818       node *old_last_node = last_node;
1819
1820       curfdp->lang = get_language_from_langname ("fortran");
1821       find_entries (inf);
1822
1823       if (old_last_node == last_node)
1824         /* No Fortran entries found.  Try C. */
1825         {
1826           /* We do not tag if rewind fails.
1827              Only the file name will be recorded in the tags file. */
1828           rewind (inf);
1829           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1830           find_entries (inf);
1831         }
1832       return;
1833     }
1834
1835   if (!no_line_directive
1836       && curfdp->lang != NULL && curfdp->lang->metasource)
1837     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1838        file, or anyway we parsed a file that is automatically generated from
1839        this one.  If this is the case, the bingo.c file contained #line
1840        directives that generated tags pointing to this file.  Let's delete
1841        them all before parsing this file, which is the real source. */
1842     {
1843       fdesc **fdpp = &fdhead;
1844       while (*fdpp != NULL)
1845         if (*fdpp != curfdp
1846             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1847           /* We found one of those!  We must delete both the file description
1848              and all tags referring to it. */
1849           {
1850             fdesc *badfdp = *fdpp;
1851
1852             /* Delete the tags referring to badfdp->taggedfname
1853                that were obtained from badfdp->infname. */
1854             invalidate_nodes (badfdp, &nodehead);
1855
1856             *fdpp = badfdp->next; /* remove the bad description from the list */
1857             free_fdesc (badfdp);
1858           }
1859         else
1860           fdpp = &(*fdpp)->next; /* advance the list pointer */
1861     }
1862
1863   assert (parser != NULL);
1864
1865   /* Generic initialisations before reading from file. */
1866   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1867
1868   /* Generic initialisations before parsing file with readline. */
1869   lineno = 0;                  /* reset global line number */
1870   charno = 0;                  /* reset global char number */
1871   linecharno = 0;              /* reset global char number of line start */
1872
1873   parser (inf);
1874
1875   regex_tag_multiline ();
1876 }
1877
1878 \f
1879 /*
1880  * Check whether an implicitly named tag should be created,
1881  * then call `pfnote'.
1882  * NAME is a string that is internally copied by this function.
1883  *
1884  * TAGS format specification
1885  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1886  * The following is explained in some more detail in etc/ETAGS.EBNF.
1887  *
1888  * make_tag creates tags with "implicit tag names" (unnamed tags)
1889  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1890  *  1. NAME does not contain any of the characters in NONAM;
1891  *  2. LINESTART contains name as either a rightmost, or rightmost but
1892  *     one character, substring;
1893  *  3. the character, if any, immediately before NAME in LINESTART must
1894  *     be a character in NONAM;
1895  *  4. the character, if any, immediately after NAME in LINESTART must
1896  *     also be a character in NONAM.
1897  *
1898  * The implementation uses the notinname() macro, which recognises the
1899  * characters stored in the string `nonam'.
1900  * etags.el needs to use the same characters that are in NONAM.
1901  */
1902 static void
1903 make_tag (const char *name,     /* tag name, or NULL if unnamed */
1904           int namelen,          /* tag length */
1905           int is_func,          /* tag is a function */
1906           char *linestart,      /* start of the line where tag is */
1907           int linelen,          /* length of the line where tag is */
1908           int lno,              /* line number */
1909           long int cno)         /* character number */
1910 {
1911   bool named = (name != NULL && namelen > 0);
1912   char *nname = NULL;
1913
1914   if (!CTAGS && named)          /* maybe set named to false */
1915     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1916        such that etags.el can guess a name from it. */
1917     {
1918       int i;
1919       register const char *cp = name;
1920
1921       for (i = 0; i < namelen; i++)
1922         if (notinname (*cp++))
1923           break;
1924       if (i == namelen)                         /* rule #1 */
1925         {
1926           cp = linestart + linelen - namelen;
1927           if (notinname (linestart[linelen-1]))
1928             cp -= 1;                            /* rule #4 */
1929           if (cp >= linestart                   /* rule #2 */
1930               && (cp == linestart
1931                   || notinname (cp[-1]))        /* rule #3 */
1932               && strneq (name, cp, namelen))    /* rule #2 */
1933             named = FALSE;      /* use implicit tag name */
1934         }
1935     }
1936
1937   if (named)
1938     nname = savenstr (name, namelen);
1939
1940   pfnote (nname, is_func, linestart, linelen, lno, cno);
1941 }
1942
1943 /* Record a tag. */
1944 static void
1945 pfnote (char *name, int is_func, char *linestart, int linelen, int lno, long int cno)
1946                                 /* tag name, or NULL if unnamed */
1947                                 /* tag is a function */
1948                                 /* start of the line where tag is */
1949                                 /* length of the line where tag is */
1950                                 /* line number */
1951                                 /* character number */
1952 {
1953   register node *np;
1954
1955   assert (name == NULL || name[0] != '\0');
1956   if (CTAGS && name == NULL)
1957     return;
1958
1959   np = xnew (1, node);
1960
1961   /* If ctags mode, change name "main" to M<thisfilename>. */
1962   if (CTAGS && !cxref_style && streq (name, "main"))
1963     {
1964       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1965       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1966       fp = etags_strrchr (np->name, '.');
1967       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1968         fp[0] = '\0';
1969     }
1970   else
1971     np->name = name;
1972   np->valid = TRUE;
1973   np->been_warned = FALSE;
1974   np->fdp = curfdp;
1975   np->is_func = is_func;
1976   np->lno = lno;
1977   if (np->fdp->usecharno)
1978     /* Our char numbers are 0-base, because of C language tradition?
1979        ctags compatibility?  old versions compatibility?   I don't know.
1980        Anyway, since emacs's are 1-base we expect etags.el to take care
1981        of the difference.  If we wanted to have 1-based numbers, we would
1982        uncomment the +1 below. */
1983     np->cno = cno /* + 1 */ ;
1984   else
1985     np->cno = invalidcharno;
1986   np->left = np->right = NULL;
1987   if (CTAGS && !cxref_style)
1988     {
1989       if (strlen (linestart) < 50)
1990         np->regex = concat (linestart, "$", "");
1991       else
1992         np->regex = savenstr (linestart, 50);
1993     }
1994   else
1995     np->regex = savenstr (linestart, linelen);
1996
1997   add_node (np, &nodehead);
1998 }
1999
2000 /*
2001  * free_tree ()
2002  *      recurse on left children, iterate on right children.
2003  */
2004 static void
2005 free_tree (register node *np)
2006 {
2007   while (np)
2008     {
2009       register node *node_right = np->right;
2010       free_tree (np->left);
2011       free (np->name);
2012       free (np->regex);
2013       free (np);
2014       np = node_right;
2015     }
2016 }
2017
2018 /*
2019  * free_fdesc ()
2020  *      delete a file description
2021  */
2022 static void
2023 free_fdesc (register fdesc *fdp)
2024 {
2025   free (fdp->infname);
2026   free (fdp->infabsname);
2027   free (fdp->infabsdir);
2028   free (fdp->taggedfname);
2029   free (fdp->prop);
2030   free (fdp);
2031 }
2032
2033 /*
2034  * add_node ()
2035  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2036  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2037  *      balancing.
2038  *
2039  *      add_node is the only function allowed to add nodes, so it can
2040  *      maintain state.
2041  */
2042 static void
2043 add_node (node *np, node **cur_node_p)
2044 {
2045   register int dif;
2046   register node *cur_node = *cur_node_p;
2047
2048   if (cur_node == NULL)
2049     {
2050       *cur_node_p = np;
2051       last_node = np;
2052       return;
2053     }
2054
2055   if (!CTAGS)
2056     /* Etags Mode */
2057     {
2058       /* For each file name, tags are in a linked sublist on the right
2059          pointer.  The first tags of different files are a linked list
2060          on the left pointer.  last_node points to the end of the last
2061          used sublist. */
2062       if (last_node != NULL && last_node->fdp == np->fdp)
2063         {
2064           /* Let's use the same sublist as the last added node. */
2065           assert (last_node->right == NULL);
2066           last_node->right = np;
2067           last_node = np;
2068         }
2069       else if (cur_node->fdp == np->fdp)
2070         {
2071           /* Scanning the list we found the head of a sublist which is
2072              good for us.  Let's scan this sublist. */
2073           add_node (np, &cur_node->right);
2074         }
2075       else
2076         /* The head of this sublist is not good for us.  Let's try the
2077            next one. */
2078         add_node (np, &cur_node->left);
2079     } /* if ETAGS mode */
2080
2081   else
2082     {
2083       /* Ctags Mode */
2084       dif = strcmp (np->name, cur_node->name);
2085
2086       /*
2087        * If this tag name matches an existing one, then
2088        * do not add the node, but maybe print a warning.
2089        */
2090       if (no_duplicates && !dif)
2091         {
2092           if (np->fdp == cur_node->fdp)
2093             {
2094               if (!no_warnings)
2095                 {
2096                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2097                            np->fdp->infname, lineno, np->name);
2098                   fprintf (stderr, "Second entry ignored\n");
2099                 }
2100             }
2101           else if (!cur_node->been_warned && !no_warnings)
2102             {
2103               fprintf
2104                 (stderr,
2105                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2106                  np->fdp->infname, cur_node->fdp->infname, np->name);
2107               cur_node->been_warned = TRUE;
2108             }
2109           return;
2110         }
2111
2112       /* Actually add the node */
2113       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2114     } /* if CTAGS mode */
2115 }
2116
2117 /*
2118  * invalidate_nodes ()
2119  *      Scan the node tree and invalidate all nodes pointing to the
2120  *      given file description (CTAGS case) or free them (ETAGS case).
2121  */
2122 static void
2123 invalidate_nodes (fdesc *badfdp, node **npp)
2124 {
2125   node *np = *npp;
2126
2127   if (np == NULL)
2128     return;
2129
2130   if (CTAGS)
2131     {
2132       if (np->left != NULL)
2133         invalidate_nodes (badfdp, &np->left);
2134       if (np->fdp == badfdp)
2135         np->valid = FALSE;
2136       if (np->right != NULL)
2137         invalidate_nodes (badfdp, &np->right);
2138     }
2139   else
2140     {
2141       assert (np->fdp != NULL);
2142       if (np->fdp == badfdp)
2143         {
2144           *npp = np->left;      /* detach the sublist from the list */
2145           np->left = NULL;      /* isolate it */
2146           free_tree (np);       /* free it */
2147           invalidate_nodes (badfdp, npp);
2148         }
2149       else
2150         invalidate_nodes (badfdp, &np->left);
2151     }
2152 }
2153
2154 \f
2155 static int total_size_of_entries (node *);
2156 static int number_len (long);
2157
2158 /* Length of a non-negative number's decimal representation. */
2159 static int
2160 number_len (long int num)
2161 {
2162   int len = 1;
2163   while ((num /= 10) > 0)
2164     len += 1;
2165   return len;
2166 }
2167
2168 /*
2169  * Return total number of characters that put_entries will output for
2170  * the nodes in the linked list at the right of the specified node.
2171  * This count is irrelevant with etags.el since emacs 19.34 at least,
2172  * but is still supplied for backward compatibility.
2173  */
2174 static int
2175 total_size_of_entries (register node *np)
2176 {
2177   register int total = 0;
2178
2179   for (; np != NULL; np = np->right)
2180     if (np->valid)
2181       {
2182         total += strlen (np->regex) + 1;                /* pat\177 */
2183         if (np->name != NULL)
2184           total += strlen (np->name) + 1;               /* name\001 */
2185         total += number_len ((long) np->lno) + 1;       /* lno, */
2186         if (np->cno != invalidcharno)                   /* cno */
2187           total += number_len (np->cno);
2188         total += 1;                                     /* newline */
2189       }
2190
2191   return total;
2192 }
2193
2194 static void
2195 put_entries (register node *np)
2196 {
2197   register char *sp;
2198   static fdesc *fdp = NULL;
2199
2200   if (np == NULL)
2201     return;
2202
2203   /* Output subentries that precede this one */
2204   if (CTAGS)
2205     put_entries (np->left);
2206
2207   /* Output this entry */
2208   if (np->valid)
2209     {
2210       if (!CTAGS)
2211         {
2212           /* Etags mode */
2213           if (fdp != np->fdp)
2214             {
2215               fdp = np->fdp;
2216               fprintf (tagf, "\f\n%s,%d\n",
2217                        fdp->taggedfname, total_size_of_entries (np));
2218               fdp->written = TRUE;
2219             }
2220           fputs (np->regex, tagf);
2221           fputc ('\177', tagf);
2222           if (np->name != NULL)
2223             {
2224               fputs (np->name, tagf);
2225               fputc ('\001', tagf);
2226             }
2227           fprintf (tagf, "%d,", np->lno);
2228           if (np->cno != invalidcharno)
2229             fprintf (tagf, "%ld", np->cno);
2230           fputs ("\n", tagf);
2231         }
2232       else
2233         {
2234           /* Ctags mode */
2235           if (np->name == NULL)
2236             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2237
2238           if (cxref_style)
2239             {
2240               if (vgrind_style)
2241                 fprintf (stdout, "%s %s %d\n",
2242                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2243               else
2244                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2245                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2246             }
2247           else
2248             {
2249               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2250
2251               if (np->is_func)
2252                 {               /* function or #define macro with args */
2253                   putc (searchar, tagf);
2254                   putc ('^', tagf);
2255
2256                   for (sp = np->regex; *sp; sp++)
2257                     {
2258                       if (*sp == '\\' || *sp == searchar)
2259                         putc ('\\', tagf);
2260                       putc (*sp, tagf);
2261                     }
2262                   putc (searchar, tagf);
2263                 }
2264               else
2265                 {               /* anything else; text pattern inadequate */
2266                   fprintf (tagf, "%d", np->lno);
2267                 }
2268               putc ('\n', tagf);
2269             }
2270         }
2271     } /* if this node contains a valid tag */
2272
2273   /* Output subentries that follow this one */
2274   put_entries (np->right);
2275   if (!CTAGS)
2276     put_entries (np->left);
2277 }
2278
2279 \f
2280 /* C extensions. */
2281 #define C_EXT   0x00fff         /* C extensions */
2282 #define C_PLAIN 0x00000         /* C */
2283 #define C_PLPL  0x00001         /* C++ */
2284 #define C_STAR  0x00003         /* C* */
2285 #define C_JAVA  0x00005         /* JAVA */
2286 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2287 #define YACC    0x10000         /* yacc file */
2288
2289 /*
2290  * The C symbol tables.
2291  */
2292 enum sym_type
2293 {
2294   st_none,
2295   st_C_objprot, st_C_objimpl, st_C_objend,
2296   st_C_gnumacro,
2297   st_C_ignore, st_C_attribute,
2298   st_C_javastruct,
2299   st_C_operator,
2300   st_C_class, st_C_template,
2301   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2302 };
2303
2304 static unsigned int hash (const char *, unsigned int);
2305 static struct C_stab_entry * in_word_set (const char *, unsigned int);
2306 static enum sym_type C_symtype (char *, int, int);
2307
2308 /* Feed stuff between (but not including) %[ and %] lines to:
2309      gperf -m 5
2310 %[
2311 %compare-strncmp
2312 %enum
2313 %struct-type
2314 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2315 %%
2316 if,             0,                      st_C_ignore
2317 for,            0,                      st_C_ignore
2318 while,          0,                      st_C_ignore
2319 switch,         0,                      st_C_ignore
2320 return,         0,                      st_C_ignore
2321 __attribute__,  0,                      st_C_attribute
2322 GTY,            0,                      st_C_attribute
2323 @interface,     0,                      st_C_objprot
2324 @protocol,      0,                      st_C_objprot
2325 @implementation,0,                      st_C_objimpl
2326 @end,           0,                      st_C_objend
2327 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2328 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2329 friend,         C_PLPL,                 st_C_ignore
2330 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2331 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2332 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2333 class,          0,                      st_C_class
2334 namespace,      C_PLPL,                 st_C_struct
2335 domain,         C_STAR,                 st_C_struct
2336 union,          0,                      st_C_struct
2337 struct,         0,                      st_C_struct
2338 extern,         0,                      st_C_extern
2339 enum,           0,                      st_C_enum
2340 typedef,        0,                      st_C_typedef
2341 define,         0,                      st_C_define
2342 undef,          0,                      st_C_define
2343 operator,       C_PLPL,                 st_C_operator
2344 template,       0,                      st_C_template
2345 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2346 DEFUN,          0,                      st_C_gnumacro
2347 SYSCALL,        0,                      st_C_gnumacro
2348 ENTRY,          0,                      st_C_gnumacro
2349 PSEUDO,         0,                      st_C_gnumacro
2350 # These are defined inside C functions, so currently they are not met.
2351 # EXFUN used in glibc, DEFVAR_* in emacs.
2352 #EXFUN,         0,                      st_C_gnumacro
2353 #DEFVAR_,       0,                      st_C_gnumacro
2354 %]
2355 and replace lines between %< and %> with its output, then:
2356  - remove the #if characterset check
2357  - make in_word_set static and not inline. */
2358 /*%<*/
2359 /* C code produced by gperf version 3.0.1 */
2360 /* Command-line: gperf -m 5  */
2361 /* Computed positions: -k'2-3' */
2362
2363 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2364 /* maximum key range = 33, duplicates = 0 */
2365
2366 static inline unsigned int
2367 hash (register const char *str, register unsigned int len)
2368 {
2369   static unsigned char asso_values[] =
2370     {
2371       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2372       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2373       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2374       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2375       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2376       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2377       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2378       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2379       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2380       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2381       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2382        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2383        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2384       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2385       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2386       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2387       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2388       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2389       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2390       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2391       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2392       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2393       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2394       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2395       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2396       35, 35, 35, 35, 35, 35
2397     };
2398   register int hval = len;
2399
2400   switch (hval)
2401     {
2402       default:
2403         hval += asso_values[(unsigned char)str[2]];
2404       /*FALLTHROUGH*/
2405       case 2:
2406         hval += asso_values[(unsigned char)str[1]];
2407         break;
2408     }
2409   return hval;
2410 }
2411
2412 static struct C_stab_entry *
2413 in_word_set (register const char *str, register unsigned int len)
2414 {
2415   enum
2416     {
2417       TOTAL_KEYWORDS = 33,
2418       MIN_WORD_LENGTH = 2,
2419       MAX_WORD_LENGTH = 15,
2420       MIN_HASH_VALUE = 2,
2421       MAX_HASH_VALUE = 34
2422     };
2423
2424   static struct C_stab_entry wordlist[] =
2425     {
2426       {""}, {""},
2427       {"if",            0,                      st_C_ignore},
2428       {"GTY",           0,                      st_C_attribute},
2429       {"@end",          0,                      st_C_objend},
2430       {"union",         0,                      st_C_struct},
2431       {"define",                0,                      st_C_define},
2432       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2433       {"template",      0,                      st_C_template},
2434       {"operator",      C_PLPL,                 st_C_operator},
2435       {"@interface",    0,                      st_C_objprot},
2436       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2437       {"friend",                C_PLPL,                 st_C_ignore},
2438       {"typedef",       0,                      st_C_typedef},
2439       {"return",                0,                      st_C_ignore},
2440       {"@implementation",0,                     st_C_objimpl},
2441       {"@protocol",     0,                      st_C_objprot},
2442       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2443       {"extern",                0,                      st_C_extern},
2444       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2445       {"struct",                0,                      st_C_struct},
2446       {"domain",                C_STAR,                 st_C_struct},
2447       {"switch",                0,                      st_C_ignore},
2448       {"enum",          0,                      st_C_enum},
2449       {"for",           0,                      st_C_ignore},
2450       {"namespace",     C_PLPL,                 st_C_struct},
2451       {"class",         0,                      st_C_class},
2452       {"while",         0,                      st_C_ignore},
2453       {"undef",         0,                      st_C_define},
2454       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2455       {"__attribute__", 0,                      st_C_attribute},
2456       {"SYSCALL",       0,                      st_C_gnumacro},
2457       {"ENTRY",         0,                      st_C_gnumacro},
2458       {"PSEUDO",                0,                      st_C_gnumacro},
2459       {"DEFUN",         0,                      st_C_gnumacro}
2460     };
2461
2462   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2463     {
2464       register int key = hash (str, len);
2465
2466       if (key <= MAX_HASH_VALUE && key >= 0)
2467         {
2468           register const char *s = wordlist[key].name;
2469
2470           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2471             return &wordlist[key];
2472         }
2473     }
2474   return 0;
2475 }
2476 /*%>*/
2477
2478 static enum sym_type
2479 C_symtype (char *str, int len, int c_ext)
2480 {
2481   register struct C_stab_entry *se = in_word_set (str, len);
2482
2483   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2484     return st_none;
2485   return se->type;
2486 }
2487
2488 \f
2489 /*
2490  * Ignoring __attribute__ ((list))
2491  */
2492 static bool inattribute;        /* looking at an __attribute__ construct */
2493
2494 /*
2495  * C functions and variables are recognized using a simple
2496  * finite automaton.  fvdef is its state variable.
2497  */
2498 static enum
2499 {
2500   fvnone,                       /* nothing seen */
2501   fdefunkey,                    /* Emacs DEFUN keyword seen */
2502   fdefunname,                   /* Emacs DEFUN name seen */
2503   foperator,                    /* func: operator keyword seen (cplpl) */
2504   fvnameseen,                   /* function or variable name seen */
2505   fstartlist,                   /* func: just after open parenthesis */
2506   finlist,                      /* func: in parameter list */
2507   flistseen,                    /* func: after parameter list */
2508   fignore,                      /* func: before open brace */
2509   vignore                       /* var-like: ignore until ';' */
2510 } fvdef;
2511
2512 static bool fvextern;           /* func or var: extern keyword seen; */
2513
2514 /*
2515  * typedefs are recognized using a simple finite automaton.
2516  * typdef is its state variable.
2517  */
2518 static enum
2519 {
2520   tnone,                        /* nothing seen */
2521   tkeyseen,                     /* typedef keyword seen */
2522   ttypeseen,                    /* defined type seen */
2523   tinbody,                      /* inside typedef body */
2524   tend,                         /* just before typedef tag */
2525   tignore                       /* junk after typedef tag */
2526 } typdef;
2527
2528 /*
2529  * struct-like structures (enum, struct and union) are recognized
2530  * using another simple finite automaton.  `structdef' is its state
2531  * variable.
2532  */
2533 static enum
2534 {
2535   snone,                        /* nothing seen yet,
2536                                    or in struct body if bracelev > 0 */
2537   skeyseen,                     /* struct-like keyword seen */
2538   stagseen,                     /* struct-like tag seen */
2539   scolonseen                    /* colon seen after struct-like tag */
2540 } structdef;
2541
2542 /*
2543  * When objdef is different from onone, objtag is the name of the class.
2544  */
2545 static const char *objtag = "<uninited>";
2546
2547 /*
2548  * Yet another little state machine to deal with preprocessor lines.
2549  */
2550 static enum
2551 {
2552   dnone,                        /* nothing seen */
2553   dsharpseen,                   /* '#' seen as first char on line */
2554   ddefineseen,                  /* '#' and 'define' seen */
2555   dignorerest                   /* ignore rest of line */
2556 } definedef;
2557
2558 /*
2559  * State machine for Objective C protocols and implementations.
2560  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2561  */
2562 static enum
2563 {
2564   onone,                        /* nothing seen */
2565   oprotocol,                    /* @interface or @protocol seen */
2566   oimplementation,              /* @implementations seen */
2567   otagseen,                     /* class name seen */
2568   oparenseen,                   /* parenthesis before category seen */
2569   ocatseen,                     /* category name seen */
2570   oinbody,                      /* in @implementation body */
2571   omethodsign,                  /* in @implementation body, after +/- */
2572   omethodtag,                   /* after method name */
2573   omethodcolon,                 /* after method colon */
2574   omethodparm,                  /* after method parameter */
2575   oignore                       /* wait for @end */
2576 } objdef;
2577
2578
2579 /*
2580  * Use this structure to keep info about the token read, and how it
2581  * should be tagged.  Used by the make_C_tag function to build a tag.
2582  */
2583 static struct tok
2584 {
2585   char *line;                   /* string containing the token */
2586   int offset;                   /* where the token starts in LINE */
2587   int length;                   /* token length */
2588   /*
2589     The previous members can be used to pass strings around for generic
2590     purposes.  The following ones specifically refer to creating tags.  In this
2591     case the token contained here is the pattern that will be used to create a
2592     tag.
2593   */
2594   bool valid;                   /* do not create a tag; the token should be
2595                                    invalidated whenever a state machine is
2596                                    reset prematurely */
2597   bool named;                   /* create a named tag */
2598   int lineno;                   /* source line number of tag */
2599   long linepos;                 /* source char number of tag */
2600 } token;                        /* latest token read */
2601
2602 /*
2603  * Variables and functions for dealing with nested structures.
2604  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2605  */
2606 static void pushclass_above (int, char *, int);
2607 static void popclass_above (int);
2608 static void write_classname (linebuffer *, const char *qualifier);
2609
2610 static struct {
2611   char **cname;                 /* nested class names */
2612   int *bracelev;                /* nested class brace level */
2613   int nl;                       /* class nesting level (elements used) */
2614   int size;                     /* length of the array */
2615 } cstack;                       /* stack for nested declaration tags */
2616 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2617 #define nestlev         (cstack.nl)
2618 /* After struct keyword or in struct body, not inside a nested function. */
2619 #define instruct        (structdef == snone && nestlev > 0                      \
2620                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2621
2622 static void
2623 pushclass_above (int bracelev, char *str, int len)
2624 {
2625   int nl;
2626
2627   popclass_above (bracelev);
2628   nl = cstack.nl;
2629   if (nl >= cstack.size)
2630     {
2631       int size = cstack.size *= 2;
2632       xrnew (cstack.cname, size, char *);
2633       xrnew (cstack.bracelev, size, int);
2634     }
2635   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2636   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2637   cstack.bracelev[nl] = bracelev;
2638   cstack.nl = nl + 1;
2639 }
2640
2641 static void
2642 popclass_above (int bracelev)
2643 {
2644   int nl;
2645
2646   for (nl = cstack.nl - 1;
2647        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2648        nl--)
2649     {
2650       free (cstack.cname[nl]);
2651       cstack.nl = nl;
2652     }
2653 }
2654
2655 static void
2656 write_classname (linebuffer *cn, const char *qualifier)
2657 {
2658   int i, len;
2659   int qlen = strlen (qualifier);
2660
2661   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2662     {
2663       len = 0;
2664       cn->len = 0;
2665       cn->buffer[0] = '\0';
2666     }
2667   else
2668     {
2669       len = strlen (cstack.cname[0]);
2670       linebuffer_setlen (cn, len);
2671       strcpy (cn->buffer, cstack.cname[0]);
2672     }
2673   for (i = 1; i < cstack.nl; i++)
2674     {
2675       char *s;
2676       int slen;
2677
2678       s = cstack.cname[i];
2679       if (s == NULL)
2680         continue;
2681       slen = strlen (s);
2682       len += slen + qlen;
2683       linebuffer_setlen (cn, len);
2684       strncat (cn->buffer, qualifier, qlen);
2685       strncat (cn->buffer, s, slen);
2686     }
2687 }
2688
2689 \f
2690 static bool consider_token (char *, int, int, int *, int, int, bool *);
2691 static void make_C_tag (bool);
2692
2693 /*
2694  * consider_token ()
2695  *      checks to see if the current token is at the start of a
2696  *      function or variable, or corresponds to a typedef, or
2697  *      is a struct/union/enum tag, or #define, or an enum constant.
2698  *
2699  *      *IS_FUNC gets TRUE if the token is a function or #define macro
2700  *      with args.  C_EXTP points to which language we are looking at.
2701  *
2702  * Globals
2703  *      fvdef                   IN OUT
2704  *      structdef               IN OUT
2705  *      definedef               IN OUT
2706  *      typdef                  IN OUT
2707  *      objdef                  IN OUT
2708  */
2709
2710 static bool
2711 consider_token (register char *str, register int len, register int c, int *c_extp, int bracelev, int parlev, int *is_func_or_var)
2712                                 /* IN: token pointer */
2713                                 /* IN: token length */
2714                                 /* IN: first char after the token */
2715                                 /* IN, OUT: C extensions mask */
2716                                 /* IN: brace level */
2717                                 /* IN: parenthesis level */
2718                                 /* OUT: function or variable found */
2719 {
2720   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2721      structtype is the type of the preceding struct-like keyword, and
2722      structbracelev is the brace level where it has been seen. */
2723   static enum sym_type structtype;
2724   static int structbracelev;
2725   static enum sym_type toktype;
2726
2727
2728   toktype = C_symtype (str, len, *c_extp);
2729
2730   /*
2731    * Skip __attribute__
2732    */
2733   if (toktype == st_C_attribute)
2734     {
2735       inattribute = TRUE;
2736       return FALSE;
2737      }
2738
2739    /*
2740     * Advance the definedef state machine.
2741     */
2742    switch (definedef)
2743      {
2744      case dnone:
2745        /* We're not on a preprocessor line. */
2746        if (toktype == st_C_gnumacro)
2747          {
2748            fvdef = fdefunkey;
2749            return FALSE;
2750          }
2751        break;
2752      case dsharpseen:
2753        if (toktype == st_C_define)
2754          {
2755            definedef = ddefineseen;
2756          }
2757        else
2758          {
2759            definedef = dignorerest;
2760          }
2761        return FALSE;
2762      case ddefineseen:
2763        /*
2764         * Make a tag for any macro, unless it is a constant
2765         * and constantypedefs is FALSE.
2766         */
2767        definedef = dignorerest;
2768        *is_func_or_var = (c == '(');
2769        if (!*is_func_or_var && !constantypedefs)
2770          return FALSE;
2771        else
2772          return TRUE;
2773      case dignorerest:
2774        return FALSE;
2775      default:
2776        error ("internal error: definedef value.", (char *)NULL);
2777      }
2778
2779    /*
2780     * Now typedefs
2781     */
2782    switch (typdef)
2783      {
2784      case tnone:
2785        if (toktype == st_C_typedef)
2786          {
2787            if (typedefs)
2788              typdef = tkeyseen;
2789            fvextern = FALSE;
2790            fvdef = fvnone;
2791            return FALSE;
2792          }
2793        break;
2794      case tkeyseen:
2795        switch (toktype)
2796          {
2797          case st_none:
2798          case st_C_class:
2799          case st_C_struct:
2800          case st_C_enum:
2801            typdef = ttypeseen;
2802          }
2803        break;
2804      case ttypeseen:
2805        if (structdef == snone && fvdef == fvnone)
2806          {
2807            fvdef = fvnameseen;
2808            return TRUE;
2809          }
2810        break;
2811      case tend:
2812        switch (toktype)
2813          {
2814          case st_C_class:
2815          case st_C_struct:
2816          case st_C_enum:
2817            return FALSE;
2818          }
2819        return TRUE;
2820      }
2821
2822    switch (toktype)
2823      {
2824      case st_C_javastruct:
2825        if (structdef == stagseen)
2826          structdef = scolonseen;
2827        return FALSE;
2828      case st_C_template:
2829      case st_C_class:
2830        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2831            && bracelev == 0
2832            && definedef == dnone && structdef == snone
2833            && typdef == tnone && fvdef == fvnone)
2834          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2835        if (toktype == st_C_template)
2836          break;
2837        /* FALLTHRU */
2838      case st_C_struct:
2839      case st_C_enum:
2840        if (parlev == 0
2841            && fvdef != vignore
2842            && (typdef == tkeyseen
2843                || (typedefs_or_cplusplus && structdef == snone)))
2844          {
2845            structdef = skeyseen;
2846            structtype = toktype;
2847            structbracelev = bracelev;
2848            if (fvdef == fvnameseen)
2849              fvdef = fvnone;
2850          }
2851        return FALSE;
2852      }
2853
2854    if (structdef == skeyseen)
2855      {
2856        structdef = stagseen;
2857        return TRUE;
2858      }
2859
2860    if (typdef != tnone)
2861      definedef = dnone;
2862
2863    /* Detect Objective C constructs. */
2864    switch (objdef)
2865      {
2866      case onone:
2867        switch (toktype)
2868          {
2869          case st_C_objprot:
2870            objdef = oprotocol;
2871            return FALSE;
2872          case st_C_objimpl:
2873            objdef = oimplementation;
2874            return FALSE;
2875          }
2876        break;
2877      case oimplementation:
2878        /* Save the class tag for functions or variables defined inside. */
2879        objtag = savenstr (str, len);
2880        objdef = oinbody;
2881        return FALSE;
2882      case oprotocol:
2883        /* Save the class tag for categories. */
2884        objtag = savenstr (str, len);
2885        objdef = otagseen;
2886        *is_func_or_var = TRUE;
2887        return TRUE;
2888      case oparenseen:
2889        objdef = ocatseen;
2890        *is_func_or_var = TRUE;
2891        return TRUE;
2892      case oinbody:
2893        break;
2894      case omethodsign:
2895        if (parlev == 0)
2896          {
2897            fvdef = fvnone;
2898            objdef = omethodtag;
2899            linebuffer_setlen (&token_name, len);
2900            strncpy (token_name.buffer, str, len);
2901            token_name.buffer[len] = '\0';
2902            return TRUE;
2903          }
2904        return FALSE;
2905      case omethodcolon:
2906        if (parlev == 0)
2907          objdef = omethodparm;
2908        return FALSE;
2909      case omethodparm:
2910        if (parlev == 0)
2911          {
2912            fvdef = fvnone;
2913            objdef = omethodtag;
2914            linebuffer_setlen (&token_name, token_name.len + len);
2915            strncat (token_name.buffer, str, len);
2916            return TRUE;
2917          }
2918        return FALSE;
2919      case oignore:
2920        if (toktype == st_C_objend)
2921          {
2922            /* Memory leakage here: the string pointed by objtag is
2923               never released, because many tests would be needed to
2924               avoid breaking on incorrect input code.  The amount of
2925               memory leaked here is the sum of the lengths of the
2926               class tags.
2927            free (objtag); */
2928            objdef = onone;
2929          }
2930        return FALSE;
2931      }
2932
2933    /* A function, variable or enum constant? */
2934    switch (toktype)
2935      {
2936      case st_C_extern:
2937        fvextern = TRUE;
2938        switch  (fvdef)
2939          {
2940          case finlist:
2941          case flistseen:
2942          case fignore:
2943          case vignore:
2944            break;
2945          default:
2946            fvdef = fvnone;
2947          }
2948        return FALSE;
2949      case st_C_ignore:
2950        fvextern = FALSE;
2951        fvdef = vignore;
2952        return FALSE;
2953      case st_C_operator:
2954        fvdef = foperator;
2955        *is_func_or_var = TRUE;
2956        return TRUE;
2957      case st_none:
2958        if (constantypedefs
2959            && structdef == snone
2960            && structtype == st_C_enum && bracelev > structbracelev)
2961          return TRUE;           /* enum constant */
2962        switch (fvdef)
2963          {
2964          case fdefunkey:
2965            if (bracelev > 0)
2966              break;
2967            fvdef = fdefunname;  /* GNU macro */
2968            *is_func_or_var = TRUE;
2969            return TRUE;
2970          case fvnone:
2971            switch (typdef)
2972              {
2973              case ttypeseen:
2974                return FALSE;
2975              case tnone:
2976                if ((strneq (str, "asm", 3) && endtoken (str[3]))
2977                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2978                  {
2979                    fvdef = vignore;
2980                    return FALSE;
2981                  }
2982                break;
2983              }
2984           /* FALLTHRU */
2985           case fvnameseen:
2986           if (len >= 10 && strneq (str+len-10, "::operator", 10))
2987             {
2988               if (*c_extp & C_AUTO) /* automatic detection of C++ */
2989                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2990               fvdef = foperator;
2991               *is_func_or_var = TRUE;
2992               return TRUE;
2993             }
2994           if (bracelev > 0 && !instruct)
2995             break;
2996           fvdef = fvnameseen;   /* function or variable */
2997           *is_func_or_var = TRUE;
2998           return TRUE;
2999         }
3000       break;
3001     }
3002
3003   return FALSE;
3004 }
3005
3006 \f
3007 /*
3008  * C_entries often keeps pointers to tokens or lines which are older than
3009  * the line currently read.  By keeping two line buffers, and switching
3010  * them at end of line, it is possible to use those pointers.
3011  */
3012 static struct
3013 {
3014   long linepos;
3015   linebuffer lb;
3016 } lbs[2];
3017
3018 #define current_lb_is_new (newndx == curndx)
3019 #define switch_line_buffers() (curndx = 1 - curndx)
3020
3021 #define curlb (lbs[curndx].lb)
3022 #define newlb (lbs[newndx].lb)
3023 #define curlinepos (lbs[curndx].linepos)
3024 #define newlinepos (lbs[newndx].linepos)
3025
3026 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3027 #define cplpl (c_ext & C_PLPL)
3028 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3029
3030 #define CNL_SAVE_DEFINEDEF()                                            \
3031 do {                                                                    \
3032   curlinepos = charno;                                                  \
3033   readline (&curlb, inf);                                               \
3034   lp = curlb.buffer;                                                    \
3035   quotednl = FALSE;                                                     \
3036   newndx = curndx;                                                      \
3037 } while (0)
3038
3039 #define CNL()                                                           \
3040 do {                                                                    \
3041   CNL_SAVE_DEFINEDEF();                                                 \
3042   if (savetoken.valid)                                                  \
3043     {                                                                   \
3044       token = savetoken;                                                \
3045       savetoken.valid = FALSE;                                          \
3046     }                                                                   \
3047   definedef = dnone;                                                    \
3048 } while (0)
3049
3050
3051 static void
3052 make_C_tag (int isfun)
3053 {
3054   /* This function is never called when token.valid is FALSE, but
3055      we must protect against invalid input or internal errors. */
3056   if (token.valid)
3057     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3058               token.offset+token.length+1, token.lineno, token.linepos);
3059   else if (DEBUG)
3060     {                             /* this branch is optimised away if !DEBUG */
3061       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3062                 token_name.len + 17, isfun, token.line,
3063                 token.offset+token.length+1, token.lineno, token.linepos);
3064       error ("INVALID TOKEN", NULL);
3065     }
3066
3067   token.valid = FALSE;
3068 }
3069
3070
3071 /*
3072  * C_entries ()
3073  *      This routine finds functions, variables, typedefs,
3074  *      #define's, enum constants and struct/union/enum definitions in
3075  *      C syntax and adds them to the list.
3076  */
3077 static void
3078 C_entries (int c_ext, FILE *inf)
3079                                 /* extension of C */
3080                                 /* input file */
3081 {
3082   register char c;              /* latest char read; '\0' for end of line */
3083   register char *lp;            /* pointer one beyond the character `c' */
3084   int curndx, newndx;           /* indices for current and new lb */
3085   register int tokoff;          /* offset in line of start of current token */
3086   register int toklen;          /* length of current token */
3087   const char *qualifier;        /* string used to qualify names */
3088   int qlen;                     /* length of qualifier */
3089   int bracelev;                 /* current brace level */
3090   int bracketlev;               /* current bracket level */
3091   int parlev;                   /* current parenthesis level */
3092   int attrparlev;               /* __attribute__ parenthesis level */
3093   int templatelev;              /* current template level */
3094   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3095   bool incomm, inquote, inchar, quotednl, midtoken;
3096   bool yacc_rules;              /* in the rules part of a yacc file */
3097   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3098
3099
3100   linebuffer_init (&lbs[0].lb);
3101   linebuffer_init (&lbs[1].lb);
3102   if (cstack.size == 0)
3103     {
3104       cstack.size = (DEBUG) ? 1 : 4;
3105       cstack.nl = 0;
3106       cstack.cname = xnew (cstack.size, char *);
3107       cstack.bracelev = xnew (cstack.size, int);
3108     }
3109
3110   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3111   curndx = newndx = 0;
3112   lp = curlb.buffer;
3113   *lp = 0;
3114
3115   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3116   structdef = snone; definedef = dnone; objdef = onone;
3117   yacc_rules = FALSE;
3118   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3119   token.valid = savetoken.valid = FALSE;
3120   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3121   if (cjava)
3122     { qualifier = "."; qlen = 1; }
3123   else
3124     { qualifier = "::"; qlen = 2; }
3125
3126
3127   while (!feof (inf))
3128     {
3129       c = *lp++;
3130       if (c == '\\')
3131         {
3132           /* If we are at the end of the line, the next character is a
3133              '\0'; do not skip it, because it is what tells us
3134              to read the next line.  */
3135           if (*lp == '\0')
3136             {
3137               quotednl = TRUE;
3138               continue;
3139             }
3140           lp++;
3141           c = ' ';
3142         }
3143       else if (incomm)
3144         {
3145           switch (c)
3146             {
3147             case '*':
3148               if (*lp == '/')
3149                 {
3150                   c = *lp++;
3151                   incomm = FALSE;
3152                 }
3153               break;
3154             case '\0':
3155               /* Newlines inside comments do not end macro definitions in
3156                  traditional cpp. */
3157               CNL_SAVE_DEFINEDEF ();
3158               break;
3159             }
3160           continue;
3161         }
3162       else if (inquote)
3163         {
3164           switch (c)
3165             {
3166             case '"':
3167               inquote = FALSE;
3168               break;
3169             case '\0':
3170               /* Newlines inside strings do not end macro definitions
3171                  in traditional cpp, even though compilers don't
3172                  usually accept them. */
3173               CNL_SAVE_DEFINEDEF ();
3174               break;
3175             }
3176           continue;
3177         }
3178       else if (inchar)
3179         {
3180           switch (c)
3181             {
3182             case '\0':
3183               /* Hmmm, something went wrong. */
3184               CNL ();
3185               /* FALLTHRU */
3186             case '\'':
3187               inchar = FALSE;
3188               break;
3189             }
3190           continue;
3191         }
3192       else if (bracketlev > 0)
3193         {
3194           switch (c)
3195             {
3196             case ']':
3197               if (--bracketlev > 0)
3198                 continue;
3199               break;
3200             case '\0':
3201               CNL_SAVE_DEFINEDEF ();
3202               break;
3203             }
3204           continue;
3205         }
3206       else switch (c)
3207         {
3208         case '"':
3209           inquote = TRUE;
3210           if (inattribute)
3211             break;
3212           switch (fvdef)
3213             {
3214             case fdefunkey:
3215             case fstartlist:
3216             case finlist:
3217             case fignore:
3218             case vignore:
3219               break;
3220             default:
3221               fvextern = FALSE;
3222               fvdef = fvnone;
3223             }
3224           continue;
3225         case '\'':
3226           inchar = TRUE;
3227           if (inattribute)
3228             break;
3229           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3230             {
3231               fvextern = FALSE;
3232               fvdef = fvnone;
3233             }
3234           continue;
3235         case '/':
3236           if (*lp == '*')
3237             {
3238               incomm = TRUE;
3239               lp++;
3240               c = ' ';
3241             }
3242           else if (/* cplpl && */ *lp == '/')
3243             {
3244               c = '\0';
3245             }
3246           break;
3247         case '%':
3248           if ((c_ext & YACC) && *lp == '%')
3249             {
3250               /* Entering or exiting rules section in yacc file. */
3251               lp++;
3252               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3253               typdef = tnone; structdef = snone;
3254               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3255               bracelev = 0;
3256               yacc_rules = !yacc_rules;
3257               continue;
3258             }
3259           else
3260             break;
3261         case '#':
3262           if (definedef == dnone)
3263             {
3264               char *cp;
3265               bool cpptoken = TRUE;
3266
3267               /* Look back on this line.  If all blanks, or nonblanks
3268                  followed by an end of comment, this is a preprocessor
3269                  token. */
3270               for (cp = newlb.buffer; cp < lp-1; cp++)
3271                 if (!iswhite (*cp))
3272                   {
3273                     if (*cp == '*' && *(cp+1) == '/')
3274                       {
3275                         cp++;
3276                         cpptoken = TRUE;
3277                       }
3278                     else
3279                       cpptoken = FALSE;
3280                   }
3281               if (cpptoken)
3282                 definedef = dsharpseen;
3283             } /* if (definedef == dnone) */
3284           continue;
3285         case '[':
3286           bracketlev++;
3287             continue;
3288         } /* switch (c) */
3289
3290
3291       /* Consider token only if some involved conditions are satisfied. */
3292       if (typdef != tignore
3293           && definedef != dignorerest
3294           && fvdef != finlist
3295           && templatelev == 0
3296           && (definedef != dnone
3297               || structdef != scolonseen)
3298           && !inattribute)
3299         {
3300           if (midtoken)
3301             {
3302               if (endtoken (c))
3303                 {
3304                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3305                     /* This handles :: in the middle,
3306                        but not at the beginning of an identifier.
3307                        Also, space-separated :: is not recognised. */
3308                     {
3309                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3310                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3311                       lp += 2;
3312                       toklen += 2;
3313                       c = lp[-1];
3314                       goto still_in_token;
3315                     }
3316                   else
3317                     {
3318                       bool funorvar = FALSE;
3319
3320                       if (yacc_rules
3321                           || consider_token (newlb.buffer + tokoff, toklen, c,
3322                                              &c_ext, bracelev, parlev,
3323                                              &funorvar))
3324                         {
3325                           if (fvdef == foperator)
3326                             {
3327                               char *oldlp = lp;
3328                               lp = skip_spaces (lp-1);
3329                               if (*lp != '\0')
3330                                 lp += 1;
3331                               while (*lp != '\0'
3332                                      && !iswhite (*lp) && *lp != '(')
3333                                 lp += 1;
3334                               c = *lp++;
3335                               toklen += lp - oldlp;
3336                             }
3337                           token.named = FALSE;
3338                           if (!plainc
3339                               && nestlev > 0 && definedef == dnone)
3340                             /* in struct body */
3341                             {
3342                               write_classname (&token_name, qualifier);
3343                               linebuffer_setlen (&token_name,
3344                                                  token_name.len+qlen+toklen);
3345                               strcat (token_name.buffer, qualifier);
3346                               strncat (token_name.buffer,
3347                                        newlb.buffer + tokoff, toklen);
3348                               token.named = TRUE;
3349                             }
3350                           else if (objdef == ocatseen)
3351                             /* Objective C category */
3352                             {
3353                               int len = strlen (objtag) + 2 + toklen;
3354                               linebuffer_setlen (&token_name, len);
3355                               strcpy (token_name.buffer, objtag);
3356                               strcat (token_name.buffer, "(");
3357                               strncat (token_name.buffer,
3358                                        newlb.buffer + tokoff, toklen);
3359                               strcat (token_name.buffer, ")");
3360                               token.named = TRUE;
3361                             }
3362                           else if (objdef == omethodtag
3363                                    || objdef == omethodparm)
3364                             /* Objective C method */
3365                             {
3366                               token.named = TRUE;
3367                             }
3368                           else if (fvdef == fdefunname)
3369                             /* GNU DEFUN and similar macros */
3370                             {
3371                               bool defun = (newlb.buffer[tokoff] == 'F');
3372                               int off = tokoff;
3373                               int len = toklen;
3374
3375                               /* Rewrite the tag so that emacs lisp DEFUNs
3376                                  can be found by their elisp name */
3377                               if (defun)
3378                                 {
3379                                   off += 1;
3380                                   len -= 1;
3381                                 }
3382                               linebuffer_setlen (&token_name, len);
3383                               strncpy (token_name.buffer,
3384                                        newlb.buffer + off, len);
3385                               token_name.buffer[len] = '\0';
3386                               if (defun)
3387                                 while (--len >= 0)
3388                                   if (token_name.buffer[len] == '_')
3389                                     token_name.buffer[len] = '-';
3390                               token.named = defun;
3391                             }
3392                           else
3393                             {
3394                               linebuffer_setlen (&token_name, toklen);
3395                               strncpy (token_name.buffer,
3396                                        newlb.buffer + tokoff, toklen);
3397                               token_name.buffer[toklen] = '\0';
3398                               /* Name macros and members. */
3399                               token.named = (structdef == stagseen
3400                                              || typdef == ttypeseen
3401                                              || typdef == tend
3402                                              || (funorvar
3403                                                  && definedef == dignorerest)
3404                                              || (funorvar
3405                                                  && definedef == dnone
3406                                                  && structdef == snone
3407                                                  && bracelev > 0));
3408                             }
3409                           token.lineno = lineno;
3410                           token.offset = tokoff;
3411                           token.length = toklen;
3412                           token.line = newlb.buffer;
3413                           token.linepos = newlinepos;
3414                           token.valid = TRUE;
3415
3416                           if (definedef == dnone
3417                               && (fvdef == fvnameseen
3418                                   || fvdef == foperator
3419                                   || structdef == stagseen
3420                                   || typdef == tend
3421                                   || typdef == ttypeseen
3422                                   || objdef != onone))
3423                             {
3424                               if (current_lb_is_new)
3425                                 switch_line_buffers ();
3426                             }
3427                           else if (definedef != dnone
3428                                    || fvdef == fdefunname
3429                                    || instruct)
3430                             make_C_tag (funorvar);
3431                         }
3432                       else /* not yacc and consider_token failed */
3433                         {
3434                           if (inattribute && fvdef == fignore)
3435                             {
3436                               /* We have just met __attribute__ after a
3437                                  function parameter list: do not tag the
3438                                  function again. */
3439                               fvdef = fvnone;
3440                             }
3441                         }
3442                       midtoken = FALSE;
3443                     }
3444                 } /* if (endtoken (c)) */
3445               else if (intoken (c))
3446                 still_in_token:
3447                 {
3448                   toklen++;
3449                   continue;
3450                 }
3451             } /* if (midtoken) */
3452           else if (begtoken (c))
3453             {
3454               switch (definedef)
3455                 {
3456                 case dnone:
3457                   switch (fvdef)
3458                     {
3459                     case fstartlist:
3460                       /* This prevents tagging fb in
3461                          void (__attribute__((noreturn)) *fb) (void);
3462                          Fixing this is not easy and not very important. */
3463                       fvdef = finlist;
3464                       continue;
3465                     case flistseen:
3466                       if (plainc || declarations)
3467                         {
3468                           make_C_tag (TRUE); /* a function */
3469                           fvdef = fignore;
3470                         }
3471                       break;
3472                     }
3473                   if (structdef == stagseen && !cjava)
3474                     {
3475                       popclass_above (bracelev);
3476                       structdef = snone;
3477                     }
3478                   break;
3479                 case dsharpseen:
3480                   savetoken = token;
3481                   break;
3482                 }
3483               if (!yacc_rules || lp == newlb.buffer + 1)
3484                 {
3485                   tokoff = lp - 1 - newlb.buffer;
3486                   toklen = 1;
3487                   midtoken = TRUE;
3488                 }
3489               continue;
3490             } /* if (begtoken) */
3491         } /* if must look at token */
3492
3493
3494       /* Detect end of line, colon, comma, semicolon and various braces
3495          after having handled a token.*/
3496       switch (c)
3497         {
3498         case ':':
3499           if (inattribute)
3500             break;
3501           if (yacc_rules && token.offset == 0 && token.valid)
3502             {
3503               make_C_tag (FALSE); /* a yacc function */
3504               break;
3505             }
3506           if (definedef != dnone)
3507             break;
3508           switch (objdef)
3509             {
3510             case  otagseen:
3511               objdef = oignore;
3512               make_C_tag (TRUE); /* an Objective C class */
3513               break;
3514             case omethodtag:
3515             case omethodparm:
3516               objdef = omethodcolon;
3517               linebuffer_setlen (&token_name, token_name.len + 1);
3518               strcat (token_name.buffer, ":");
3519               break;
3520             }
3521           if (structdef == stagseen)
3522             {
3523               structdef = scolonseen;
3524               break;
3525             }
3526           /* Should be useless, but may be work as a safety net. */
3527           if (cplpl && fvdef == flistseen)
3528             {
3529               make_C_tag (TRUE); /* a function */
3530               fvdef = fignore;
3531               break;
3532             }
3533           break;
3534         case ';':
3535           if (definedef != dnone || inattribute)
3536             break;
3537           switch (typdef)
3538             {
3539             case tend:
3540             case ttypeseen:
3541               make_C_tag (FALSE); /* a typedef */
3542               typdef = tnone;
3543               fvdef = fvnone;
3544               break;
3545             case tnone:
3546             case tinbody:
3547             case tignore:
3548               switch (fvdef)
3549                 {
3550                 case fignore:
3551                   if (typdef == tignore || cplpl)
3552                     fvdef = fvnone;
3553                   break;
3554                 case fvnameseen:
3555                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3556                       || (members && instruct))
3557                     make_C_tag (FALSE); /* a variable */
3558                   fvextern = FALSE;
3559                   fvdef = fvnone;
3560                   token.valid = FALSE;
3561                   break;
3562                 case flistseen:
3563                   if ((declarations
3564                        && (cplpl || !instruct)
3565                        && (typdef == tnone || (typdef != tignore && instruct)))
3566                       || (members
3567                           && plainc && instruct))
3568                     make_C_tag (TRUE);  /* a function */
3569                   /* FALLTHRU */
3570                 default:
3571                   fvextern = FALSE;
3572                   fvdef = fvnone;
3573                   if (declarations
3574                        && cplpl && structdef == stagseen)
3575                     make_C_tag (FALSE); /* forward declaration */
3576                   else
3577                     token.valid = FALSE;
3578                 } /* switch (fvdef) */
3579               /* FALLTHRU */
3580             default:
3581               if (!instruct)
3582                 typdef = tnone;
3583             }
3584           if (structdef == stagseen)
3585             structdef = snone;
3586           break;
3587         case ',':
3588           if (definedef != dnone || inattribute)
3589             break;
3590           switch (objdef)
3591             {
3592             case omethodtag:
3593             case omethodparm:
3594               make_C_tag (TRUE); /* an Objective C method */
3595               objdef = oinbody;
3596               break;
3597             }
3598           switch (fvdef)
3599             {
3600             case fdefunkey:
3601             case foperator:
3602             case fstartlist:
3603             case finlist:
3604             case fignore:
3605             case vignore:
3606               break;
3607             case fdefunname:
3608               fvdef = fignore;
3609               break;
3610             case fvnameseen:
3611               if (parlev == 0
3612                   && ((globals
3613                        && bracelev == 0
3614                        && templatelev == 0
3615                        && (!fvextern || declarations))
3616                       || (members && instruct)))
3617                   make_C_tag (FALSE); /* a variable */
3618               break;
3619             case flistseen:
3620               if ((declarations && typdef == tnone && !instruct)
3621                   || (members && typdef != tignore && instruct))
3622                 {
3623                   make_C_tag (TRUE); /* a function */
3624                   fvdef = fvnameseen;
3625                 }
3626               else if (!declarations)
3627                 fvdef = fvnone;
3628               token.valid = FALSE;
3629               break;
3630             default:
3631               fvdef = fvnone;
3632             }
3633           if (structdef == stagseen)
3634             structdef = snone;
3635           break;
3636         case ']':
3637           if (definedef != dnone || inattribute)
3638             break;
3639           if (structdef == stagseen)
3640             structdef = snone;
3641           switch (typdef)
3642             {
3643             case ttypeseen:
3644             case tend:
3645               typdef = tignore;
3646               make_C_tag (FALSE);       /* a typedef */
3647               break;
3648             case tnone:
3649             case tinbody:
3650               switch (fvdef)
3651                 {
3652                 case foperator:
3653                 case finlist:
3654                 case fignore:
3655                 case vignore:
3656                   break;
3657                 case fvnameseen:
3658                   if ((members && bracelev == 1)
3659                       || (globals && bracelev == 0
3660                           && (!fvextern || declarations)))
3661                     make_C_tag (FALSE); /* a variable */
3662                   /* FALLTHRU */
3663                 default:
3664                   fvdef = fvnone;
3665                 }
3666               break;
3667             }
3668           break;
3669         case '(':
3670           if (inattribute)
3671             {
3672               attrparlev++;
3673               break;
3674             }
3675           if (definedef != dnone)
3676             break;
3677           if (objdef == otagseen && parlev == 0)
3678             objdef = oparenseen;
3679           switch (fvdef)
3680             {
3681             case fvnameseen:
3682               if (typdef == ttypeseen
3683                   && *lp != '*'
3684                   && !instruct)
3685                 {
3686                   /* This handles constructs like:
3687                      typedef void OperatorFun (int fun); */
3688                   make_C_tag (FALSE);
3689                   typdef = tignore;
3690                   fvdef = fignore;
3691                   break;
3692                 }
3693               /* FALLTHRU */
3694             case foperator:
3695               fvdef = fstartlist;
3696               break;
3697             case flistseen:
3698               fvdef = finlist;
3699               break;
3700             }
3701           parlev++;
3702           break;
3703         case ')':
3704           if (inattribute)
3705             {
3706               if (--attrparlev == 0)
3707                 inattribute = FALSE;
3708               break;
3709             }
3710           if (definedef != dnone)
3711             break;
3712           if (objdef == ocatseen && parlev == 1)
3713             {
3714               make_C_tag (TRUE); /* an Objective C category */
3715               objdef = oignore;
3716             }
3717           if (--parlev == 0)
3718             {
3719               switch (fvdef)
3720                 {
3721                 case fstartlist:
3722                 case finlist:
3723                   fvdef = flistseen;
3724                   break;
3725                 }
3726               if (!instruct
3727                   && (typdef == tend
3728                       || typdef == ttypeseen))
3729                 {
3730                   typdef = tignore;
3731                   make_C_tag (FALSE); /* a typedef */
3732                 }
3733             }
3734           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3735             parlev = 0;
3736           break;
3737         case '{':
3738           if (definedef != dnone)
3739             break;
3740           if (typdef == ttypeseen)
3741             {
3742               /* Whenever typdef is set to tinbody (currently only
3743                  here), typdefbracelev should be set to bracelev. */
3744               typdef = tinbody;
3745               typdefbracelev = bracelev;
3746             }
3747           switch (fvdef)
3748             {
3749             case flistseen:
3750               make_C_tag (TRUE);    /* a function */
3751               /* FALLTHRU */
3752             case fignore:
3753               fvdef = fvnone;
3754               break;
3755             case fvnone:
3756               switch (objdef)
3757                 {
3758                 case otagseen:
3759                   make_C_tag (TRUE); /* an Objective C class */
3760                   objdef = oignore;
3761                   break;
3762                 case omethodtag:
3763                 case omethodparm:
3764                   make_C_tag (TRUE); /* an Objective C method */
3765                   objdef = oinbody;
3766                   break;
3767                 default:
3768                   /* Neutralize `extern "C" {' grot. */
3769                   if (bracelev == 0 && structdef == snone && nestlev == 0
3770                       && typdef == tnone)
3771                     bracelev = -1;
3772                 }
3773               break;
3774             }
3775           switch (structdef)
3776             {
3777             case skeyseen:         /* unnamed struct */
3778               pushclass_above (bracelev, NULL, 0);
3779               structdef = snone;
3780               break;
3781             case stagseen:         /* named struct or enum */
3782             case scolonseen:       /* a class */
3783               pushclass_above (bracelev,token.line+token.offset, token.length);
3784               structdef = snone;
3785               make_C_tag (FALSE);  /* a struct or enum */
3786               break;
3787             }
3788           bracelev += 1;
3789           break;
3790         case '*':
3791           if (definedef != dnone)
3792             break;
3793           if (fvdef == fstartlist)
3794             {
3795               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3796               token.valid = FALSE;
3797             }
3798           break;
3799         case '}':
3800           if (definedef != dnone)
3801             break;
3802           bracelev -= 1;
3803           if (!ignoreindent && lp == newlb.buffer + 1)
3804             {
3805               if (bracelev != 0)
3806                 token.valid = FALSE; /* unexpected value, token unreliable */
3807               bracelev = 0;     /* reset brace level if first column */
3808               parlev = 0;       /* also reset paren level, just in case... */
3809             }
3810           else if (bracelev < 0)
3811             {
3812               token.valid = FALSE; /* something gone amiss, token unreliable */
3813               bracelev = 0;
3814             }
3815           if (bracelev == 0 && fvdef == vignore)
3816             fvdef = fvnone;             /* end of function */
3817           popclass_above (bracelev);
3818           structdef = snone;
3819           /* Only if typdef == tinbody is typdefbracelev significant. */
3820           if (typdef == tinbody && bracelev <= typdefbracelev)
3821             {
3822               assert (bracelev == typdefbracelev);
3823               typdef = tend;
3824             }
3825           break;
3826         case '=':
3827           if (definedef != dnone)
3828             break;
3829           switch (fvdef)
3830             {
3831             case foperator:
3832             case finlist:
3833             case fignore:
3834             case vignore:
3835               break;
3836             case fvnameseen:
3837               if ((members && bracelev == 1)
3838                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3839                 make_C_tag (FALSE); /* a variable */
3840               /* FALLTHRU */
3841             default:
3842               fvdef = vignore;
3843             }
3844           break;
3845         case '<':
3846           if (cplpl
3847               && (structdef == stagseen || fvdef == fvnameseen))
3848             {
3849               templatelev++;
3850               break;
3851             }
3852           goto resetfvdef;
3853         case '>':
3854           if (templatelev > 0)
3855             {
3856               templatelev--;
3857               break;
3858             }
3859           goto resetfvdef;
3860         case '+':
3861         case '-':
3862           if (objdef == oinbody && bracelev == 0)
3863             {
3864               objdef = omethodsign;
3865               break;
3866             }
3867           /* FALLTHRU */
3868         resetfvdef:
3869         case '#': case '~': case '&': case '%': case '/':
3870         case '|': case '^': case '!': case '.': case '?':
3871           if (definedef != dnone)
3872             break;
3873           /* These surely cannot follow a function tag in C. */
3874           switch (fvdef)
3875             {
3876             case foperator:
3877             case finlist:
3878             case fignore:
3879             case vignore:
3880               break;
3881             default:
3882               fvdef = fvnone;
3883             }
3884           break;
3885         case '\0':
3886           if (objdef == otagseen)
3887             {
3888               make_C_tag (TRUE); /* an Objective C class */
3889               objdef = oignore;
3890             }
3891           /* If a macro spans multiple lines don't reset its state. */
3892           if (quotednl)
3893             CNL_SAVE_DEFINEDEF ();
3894           else
3895             CNL ();
3896           break;
3897         } /* switch (c) */
3898
3899     } /* while not eof */
3900
3901   free (lbs[0].lb.buffer);
3902   free (lbs[1].lb.buffer);
3903 }
3904
3905 /*
3906  * Process either a C++ file or a C file depending on the setting
3907  * of a global flag.
3908  */
3909 static void
3910 default_C_entries (FILE *inf)
3911 {
3912   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3913 }
3914
3915 /* Always do plain C. */
3916 static void
3917 plain_C_entries (FILE *inf)
3918 {
3919   C_entries (0, inf);
3920 }
3921
3922 /* Always do C++. */
3923 static void
3924 Cplusplus_entries (FILE *inf)
3925 {
3926   C_entries (C_PLPL, inf);
3927 }
3928
3929 /* Always do Java. */
3930 static void
3931 Cjava_entries (FILE *inf)
3932 {
3933   C_entries (C_JAVA, inf);
3934 }
3935
3936 /* Always do C*. */
3937 static void
3938 Cstar_entries (FILE *inf)
3939 {
3940   C_entries (C_STAR, inf);
3941 }
3942
3943 /* Always do Yacc. */
3944 static void
3945 Yacc_entries (FILE *inf)
3946 {
3947   C_entries (YACC, inf);
3948 }
3949
3950 \f
3951 /* Useful macros. */
3952 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
3953   for (;                        /* loop initialization */               \
3954        !feof (file_pointer)     /* loop test */                         \
3955        &&                       /* instructions at start of loop */     \
3956           (readline (&line_buffer, file_pointer),                       \
3957            char_pointer = line_buffer.buffer,                           \
3958            TRUE);                                                       \
3959       )
3960
3961 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
3962   ((assert("" kw), TRUE)   /* syntax error if not a literal string */   \
3963    && strneq ((cp), kw, sizeof(kw)-1)           /* cp points at kw */   \
3964    && notinname ((cp)[sizeof(kw)-1])            /* end of kw */         \
3965    && ((cp) = skip_spaces((cp)+sizeof(kw)-1)))  /* skip spaces */
3966
3967 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3968 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3969   ((assert("" kw), TRUE)     /* syntax error if not a literal string */ \
3970    && strncaseeq ((cp), kw, sizeof(kw)-1)       /* cp points at kw */   \
3971    && ((cp) += sizeof(kw)-1))                   /* skip spaces */
3972
3973 /*
3974  * Read a file, but do no processing.  This is used to do regexp
3975  * matching on files that have no language defined.
3976  */
3977 static void
3978 just_read_file (FILE *inf)
3979 {
3980   while (!feof (inf))
3981     readline (&lb, inf);
3982 }
3983
3984 \f
3985 /* Fortran parsing */
3986
3987 static void F_takeprec (void);
3988 static void F_getit (FILE *);
3989
3990 static void
3991 F_takeprec (void)
3992 {
3993   dbp = skip_spaces (dbp);
3994   if (*dbp != '*')
3995     return;
3996   dbp++;
3997   dbp = skip_spaces (dbp);
3998   if (strneq (dbp, "(*)", 3))
3999     {
4000       dbp += 3;
4001       return;
4002     }
4003   if (!ISDIGIT (*dbp))
4004     {
4005       --dbp;                    /* force failure */
4006       return;
4007     }
4008   do
4009     dbp++;
4010   while (ISDIGIT (*dbp));
4011 }
4012
4013 static void
4014 F_getit (FILE *inf)
4015 {
4016   register char *cp;
4017
4018   dbp = skip_spaces (dbp);
4019   if (*dbp == '\0')
4020     {
4021       readline (&lb, inf);
4022       dbp = lb.buffer;
4023       if (dbp[5] != '&')
4024         return;
4025       dbp += 6;
4026       dbp = skip_spaces (dbp);
4027     }
4028   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4029     return;
4030   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4031     continue;
4032   make_tag (dbp, cp-dbp, TRUE,
4033             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4034 }
4035
4036
4037 static void
4038 Fortran_functions (FILE *inf)
4039 {
4040   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4041     {
4042       if (*dbp == '%')
4043         dbp++;                  /* Ratfor escape to fortran */
4044       dbp = skip_spaces (dbp);
4045       if (*dbp == '\0')
4046         continue;
4047
4048       if (LOOKING_AT_NOCASE (dbp, "recursive"))
4049         dbp = skip_spaces (dbp);
4050
4051       if (LOOKING_AT_NOCASE (dbp, "pure"))
4052         dbp = skip_spaces (dbp);
4053
4054       switch (lowcase (*dbp))
4055         {
4056         case 'i':
4057           if (nocase_tail ("integer"))
4058             F_takeprec ();
4059           break;
4060         case 'r':
4061           if (nocase_tail ("real"))
4062             F_takeprec ();
4063           break;
4064         case 'l':
4065           if (nocase_tail ("logical"))
4066             F_takeprec ();
4067           break;
4068         case 'c':
4069           if (nocase_tail ("complex") || nocase_tail ("character"))
4070             F_takeprec ();
4071           break;
4072         case 'd':
4073           if (nocase_tail ("double"))
4074             {
4075               dbp = skip_spaces (dbp);
4076               if (*dbp == '\0')
4077                 continue;
4078               if (nocase_tail ("precision"))
4079                 break;
4080               continue;
4081             }
4082           break;
4083         }
4084       dbp = skip_spaces (dbp);
4085       if (*dbp == '\0')
4086         continue;
4087       switch (lowcase (*dbp))
4088         {
4089         case 'f':
4090           if (nocase_tail ("function"))
4091             F_getit (inf);
4092           continue;
4093         case 's':
4094           if (nocase_tail ("subroutine"))
4095             F_getit (inf);
4096           continue;
4097         case 'e':
4098           if (nocase_tail ("entry"))
4099             F_getit (inf);
4100           continue;
4101         case 'b':
4102           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4103             {
4104               dbp = skip_spaces (dbp);
4105               if (*dbp == '\0') /* assume un-named */
4106                 make_tag ("blockdata", 9, TRUE,
4107                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4108               else
4109                 F_getit (inf);  /* look for name */
4110             }
4111           continue;
4112         }
4113     }
4114 }
4115
4116 \f
4117 /*
4118  * Ada parsing
4119  * Original code by
4120  * Philippe Waroquiers (1998)
4121  */
4122
4123 /* Once we are positioned after an "interesting" keyword, let's get
4124    the real tag value necessary. */
4125 static void
4126 Ada_getit (FILE *inf, const char *name_qualifier)
4127 {
4128   register char *cp;
4129   char *name;
4130   char c;
4131
4132   while (!feof (inf))
4133     {
4134       dbp = skip_spaces (dbp);
4135       if (*dbp == '\0'
4136           || (dbp[0] == '-' && dbp[1] == '-'))
4137         {
4138           readline (&lb, inf);
4139           dbp = lb.buffer;
4140         }
4141       switch (lowcase(*dbp))
4142         {
4143         case 'b':
4144           if (nocase_tail ("body"))
4145             {
4146               /* Skipping body of   procedure body   or   package body or ....
4147                  resetting qualifier to body instead of spec. */
4148               name_qualifier = "/b";
4149               continue;
4150             }
4151           break;
4152         case 't':
4153           /* Skipping type of   task type   or   protected type ... */
4154           if (nocase_tail ("type"))
4155             continue;
4156           break;
4157         }
4158       if (*dbp == '"')
4159         {
4160           dbp += 1;
4161           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4162             continue;
4163         }
4164       else
4165         {
4166           dbp = skip_spaces (dbp);
4167           for (cp = dbp;
4168                (*cp != '\0'
4169                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4170                cp++)
4171             continue;
4172           if (cp == dbp)
4173             return;
4174         }
4175       c = *cp;
4176       *cp = '\0';
4177       name = concat (dbp, name_qualifier, "");
4178       *cp = c;
4179       make_tag (name, strlen (name), TRUE,
4180                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4181       free (name);
4182       if (c == '"')
4183         dbp = cp + 1;
4184       return;
4185     }
4186 }
4187
4188 static void
4189 Ada_funcs (FILE *inf)
4190 {
4191   bool inquote = FALSE;
4192   bool skip_till_semicolumn = FALSE;
4193
4194   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4195     {
4196       while (*dbp != '\0')
4197         {
4198           /* Skip a string i.e. "abcd". */
4199           if (inquote || (*dbp == '"'))
4200             {
4201               dbp = etags_strchr (dbp + !inquote, '"');
4202               if (dbp != NULL)
4203                 {
4204                   inquote = FALSE;
4205                   dbp += 1;
4206                   continue;     /* advance char */
4207                 }
4208               else
4209                 {
4210                   inquote = TRUE;
4211                   break;        /* advance line */
4212                 }
4213             }
4214
4215           /* Skip comments. */
4216           if (dbp[0] == '-' && dbp[1] == '-')
4217             break;              /* advance line */
4218
4219           /* Skip character enclosed in single quote i.e. 'a'
4220              and skip single quote starting an attribute i.e. 'Image. */
4221           if (*dbp == '\'')
4222             {
4223               dbp++ ;
4224               if (*dbp != '\0')
4225                 dbp++;
4226               continue;
4227             }
4228
4229           if (skip_till_semicolumn)
4230             {
4231               if (*dbp == ';')
4232                 skip_till_semicolumn = FALSE;
4233               dbp++;
4234               continue;         /* advance char */
4235             }
4236
4237           /* Search for beginning of a token.  */
4238           if (!begtoken (*dbp))
4239             {
4240               dbp++;
4241               continue;         /* advance char */
4242             }
4243
4244           /* We are at the beginning of a token. */
4245           switch (lowcase(*dbp))
4246             {
4247             case 'f':
4248               if (!packages_only && nocase_tail ("function"))
4249                 Ada_getit (inf, "/f");
4250               else
4251                 break;          /* from switch */
4252               continue;         /* advance char */
4253             case 'p':
4254               if (!packages_only && nocase_tail ("procedure"))
4255                 Ada_getit (inf, "/p");
4256               else if (nocase_tail ("package"))
4257                 Ada_getit (inf, "/s");
4258               else if (nocase_tail ("protected")) /* protected type */
4259                 Ada_getit (inf, "/t");
4260               else
4261                 break;          /* from switch */
4262               continue;         /* advance char */
4263
4264             case 'u':
4265               if (typedefs && !packages_only && nocase_tail ("use"))
4266                 {
4267                   /* when tagging types, avoid tagging  use type Pack.Typename;
4268                      for this, we will skip everything till a ; */
4269                   skip_till_semicolumn = TRUE;
4270                   continue;     /* advance char */
4271                 }
4272
4273             case 't':
4274               if (!packages_only && nocase_tail ("task"))
4275                 Ada_getit (inf, "/k");
4276               else if (typedefs && !packages_only && nocase_tail ("type"))
4277                 {
4278                   Ada_getit (inf, "/t");
4279                   while (*dbp != '\0')
4280                     dbp += 1;
4281                 }
4282               else
4283                 break;          /* from switch */
4284               continue;         /* advance char */
4285             }
4286
4287           /* Look for the end of the token. */
4288           while (!endtoken (*dbp))
4289             dbp++;
4290
4291         } /* advance char */
4292     } /* advance line */
4293 }
4294
4295 \f
4296 /*
4297  * Unix and microcontroller assembly tag handling
4298  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4299  * Idea by Bob Weiner, Motorola Inc. (1994)
4300  */
4301 static void
4302 Asm_labels (FILE *inf)
4303 {
4304   register char *cp;
4305
4306   LOOP_ON_INPUT_LINES (inf, lb, cp)
4307     {
4308       /* If first char is alphabetic or one of [_.$], test for colon
4309          following identifier. */
4310       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4311         {
4312           /* Read past label. */
4313           cp++;
4314           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4315             cp++;
4316           if (*cp == ':' || iswhite (*cp))
4317             /* Found end of label, so copy it and add it to the table. */
4318             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4319                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4320         }
4321     }
4322 }
4323
4324 \f
4325 /*
4326  * Perl support
4327  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4328  * Perl variable names: /^(my|local).../
4329  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4330  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4331  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4332  */
4333 static void
4334 Perl_functions (FILE *inf)
4335 {
4336   char *package = savestr ("main"); /* current package name */
4337   register char *cp;
4338
4339   LOOP_ON_INPUT_LINES (inf, lb, cp)
4340     {
4341       cp = skip_spaces (cp);
4342
4343       if (LOOKING_AT (cp, "package"))
4344         {
4345           free (package);
4346           get_tag (cp, &package);
4347         }
4348       else if (LOOKING_AT (cp, "sub"))
4349         {
4350           char *pos;
4351           char *sp = cp;
4352
4353           while (!notinname (*cp))
4354             cp++;
4355           if (cp == sp)
4356             continue;           /* nothing found */
4357           if ((pos = etags_strchr (sp, ':')) != NULL
4358               && pos < cp && pos[1] == ':')
4359             /* The name is already qualified. */
4360             make_tag (sp, cp - sp, TRUE,
4361                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4362           else
4363             /* Qualify it. */
4364             {
4365               char savechar, *name;
4366
4367               savechar = *cp;
4368               *cp = '\0';
4369               name = concat (package, "::", sp);
4370               *cp = savechar;
4371               make_tag (name, strlen(name), TRUE,
4372                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4373               free (name);
4374             }
4375         }
4376        else if (globals)        /* only if we are tagging global vars */
4377         {
4378           /* Skip a qualifier, if any. */
4379           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4380           /* After "my" or "local", but before any following paren or space. */
4381           char *varstart = cp;
4382
4383           if (qual              /* should this be removed?  If yes, how? */
4384               && (*cp == '$' || *cp == '@' || *cp == '%'))
4385             {
4386               varstart += 1;
4387               do
4388                 cp++;
4389               while (ISALNUM (*cp) || *cp == '_');
4390             }
4391           else if (qual)
4392             {
4393               /* Should be examining a variable list at this point;
4394                  could insist on seeing an open parenthesis. */
4395               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4396                 cp++;
4397             }
4398           else
4399             continue;
4400
4401           make_tag (varstart, cp - varstart, FALSE,
4402                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4403         }
4404     }
4405   free (package);
4406 }
4407
4408
4409 /*
4410  * Python support
4411  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4412  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4413  * More ideas by seb bacon <seb@jamkit.com> (2002)
4414  */
4415 static void
4416 Python_functions (FILE *inf)
4417 {
4418   register char *cp;
4419
4420   LOOP_ON_INPUT_LINES (inf, lb, cp)
4421     {
4422       cp = skip_spaces (cp);
4423       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4424         {
4425           char *name = cp;
4426           while (!notinname (*cp) && *cp != ':')
4427             cp++;
4428           make_tag (name, cp - name, TRUE,
4429                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4430         }
4431     }
4432 }
4433
4434 \f
4435 /*
4436  * PHP support
4437  * Look for:
4438  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4439  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4440  *  - /^[ \t]*define\(\"[^\"]+/
4441  * Only with --members:
4442  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4443  * Idea by Diez B. Roggisch (2001)
4444  */
4445 static void
4446 PHP_functions (FILE *inf)
4447 {
4448   register char *cp, *name;
4449   bool search_identifier = FALSE;
4450
4451   LOOP_ON_INPUT_LINES (inf, lb, cp)
4452     {
4453       cp = skip_spaces (cp);
4454       name = cp;
4455       if (search_identifier
4456           && *cp != '\0')
4457         {
4458           while (!notinname (*cp))
4459             cp++;
4460           make_tag (name, cp - name, TRUE,
4461                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4462           search_identifier = FALSE;
4463         }
4464       else if (LOOKING_AT (cp, "function"))
4465         {
4466           if(*cp == '&')
4467             cp = skip_spaces (cp+1);
4468           if(*cp != '\0')
4469             {
4470               name = cp;
4471               while (!notinname (*cp))
4472                 cp++;
4473               make_tag (name, cp - name, TRUE,
4474                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4475             }
4476           else
4477             search_identifier = TRUE;
4478         }
4479       else if (LOOKING_AT (cp, "class"))
4480         {
4481           if (*cp != '\0')
4482             {
4483               name = cp;
4484               while (*cp != '\0' && !iswhite (*cp))
4485                 cp++;
4486               make_tag (name, cp - name, FALSE,
4487                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4488             }
4489           else
4490             search_identifier = TRUE;
4491         }
4492       else if (strneq (cp, "define", 6)
4493                && (cp = skip_spaces (cp+6))
4494                && *cp++ == '('
4495                && (*cp == '"' || *cp == '\''))
4496         {
4497           char quote = *cp++;
4498           name = cp;
4499           while (*cp != quote && *cp != '\0')
4500             cp++;
4501           make_tag (name, cp - name, FALSE,
4502                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4503         }
4504       else if (members
4505                && LOOKING_AT (cp, "var")
4506                && *cp == '$')
4507         {
4508           name = cp;
4509           while (!notinname(*cp))
4510             cp++;
4511           make_tag (name, cp - name, FALSE,
4512                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4513         }
4514     }
4515 }
4516
4517 \f
4518 /*
4519  * Cobol tag functions
4520  * We could look for anything that could be a paragraph name.
4521  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4522  * Idea by Corny de Souza (1993)
4523  */
4524 static void
4525 Cobol_paragraphs (FILE *inf)
4526 {
4527   register char *bp, *ep;
4528
4529   LOOP_ON_INPUT_LINES (inf, lb, bp)
4530     {
4531       if (lb.len < 9)
4532         continue;
4533       bp += 8;
4534
4535       /* If eoln, compiler option or comment ignore whole line. */
4536       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4537         continue;
4538
4539       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4540         continue;
4541       if (*ep++ == '.')
4542         make_tag (bp, ep - bp, TRUE,
4543                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4544     }
4545 }
4546
4547 \f
4548 /*
4549  * Makefile support
4550  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4551  */
4552 static void
4553 Makefile_targets (FILE *inf)
4554 {
4555   register char *bp;
4556
4557   LOOP_ON_INPUT_LINES (inf, lb, bp)
4558     {
4559       if (*bp == '\t' || *bp == '#')
4560         continue;
4561       while (*bp != '\0' && *bp != '=' && *bp != ':')
4562         bp++;
4563       if (*bp == ':' || (globals && *bp == '='))
4564         {
4565           /* We should detect if there is more than one tag, but we do not.
4566              We just skip initial and final spaces. */
4567           char * namestart = skip_spaces (lb.buffer);
4568           while (--bp > namestart)
4569             if (!notinname (*bp))
4570               break;
4571           make_tag (namestart, bp - namestart + 1, TRUE,
4572                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4573         }
4574     }
4575 }
4576
4577 \f
4578 /*
4579  * Pascal parsing
4580  * Original code by Mosur K. Mohan (1989)
4581  *
4582  *  Locates tags for procedures & functions.  Doesn't do any type- or
4583  *  var-definitions.  It does look for the keyword "extern" or
4584  *  "forward" immediately following the procedure statement; if found,
4585  *  the tag is skipped.
4586  */
4587 static void
4588 Pascal_functions (FILE *inf)
4589 {
4590   linebuffer tline;             /* mostly copied from C_entries */
4591   long save_lcno;
4592   int save_lineno, namelen, taglen;
4593   char c, *name;
4594
4595   bool                          /* each of these flags is TRUE if: */
4596     incomment,                  /* point is inside a comment */
4597     inquote,                    /* point is inside '..' string */
4598     get_tagname,                /* point is after PROCEDURE/FUNCTION
4599                                    keyword, so next item = potential tag */
4600     found_tag,                  /* point is after a potential tag */
4601     inparms,                    /* point is within parameter-list */
4602     verify_tag;                 /* point has passed the parm-list, so the
4603                                    next token will determine whether this
4604                                    is a FORWARD/EXTERN to be ignored, or
4605                                    whether it is a real tag */
4606
4607   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4608   name = NULL;                  /* keep compiler quiet */
4609   dbp = lb.buffer;
4610   *dbp = '\0';
4611   linebuffer_init (&tline);
4612
4613   incomment = inquote = FALSE;
4614   found_tag = FALSE;            /* have a proc name; check if extern */
4615   get_tagname = FALSE;          /* found "procedure" keyword         */
4616   inparms = FALSE;              /* found '(' after "proc"            */
4617   verify_tag = FALSE;           /* check if "extern" is ahead        */
4618
4619
4620   while (!feof (inf))           /* long main loop to get next char */
4621     {
4622       c = *dbp++;
4623       if (c == '\0')            /* if end of line */
4624         {
4625           readline (&lb, inf);
4626           dbp = lb.buffer;
4627           if (*dbp == '\0')
4628             continue;
4629           if (!((found_tag && verify_tag)
4630                 || get_tagname))
4631             c = *dbp++;         /* only if don't need *dbp pointing
4632                                    to the beginning of the name of
4633                                    the procedure or function */
4634         }
4635       if (incomment)
4636         {
4637           if (c == '}')         /* within { } comments */
4638             incomment = FALSE;
4639           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4640             {
4641               dbp++;
4642               incomment = FALSE;
4643             }
4644           continue;
4645         }
4646       else if (inquote)
4647         {
4648           if (c == '\'')
4649             inquote = FALSE;
4650           continue;
4651         }
4652       else
4653         switch (c)
4654           {
4655           case '\'':
4656             inquote = TRUE;     /* found first quote */
4657             continue;
4658           case '{':             /* found open { comment */
4659             incomment = TRUE;
4660             continue;
4661           case '(':
4662             if (*dbp == '*')    /* found open (* comment */
4663               {
4664                 incomment = TRUE;
4665                 dbp++;
4666               }
4667             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4668               inparms = TRUE;
4669             continue;
4670           case ')':             /* end of parms list */
4671             if (inparms)
4672               inparms = FALSE;
4673             continue;
4674           case ';':
4675             if (found_tag && !inparms) /* end of proc or fn stmt */
4676               {
4677                 verify_tag = TRUE;
4678                 break;
4679               }
4680             continue;
4681           }
4682       if (found_tag && verify_tag && (*dbp != ' '))
4683         {
4684           /* Check if this is an "extern" declaration. */
4685           if (*dbp == '\0')
4686             continue;
4687           if (lowcase (*dbp == 'e'))
4688             {
4689               if (nocase_tail ("extern")) /* superfluous, really! */
4690                 {
4691                   found_tag = FALSE;
4692                   verify_tag = FALSE;
4693                 }
4694             }
4695           else if (lowcase (*dbp) == 'f')
4696             {
4697               if (nocase_tail ("forward")) /* check for forward reference */
4698                 {
4699                   found_tag = FALSE;
4700                   verify_tag = FALSE;
4701                 }
4702             }
4703           if (found_tag && verify_tag) /* not external proc, so make tag */
4704             {
4705               found_tag = FALSE;
4706               verify_tag = FALSE;
4707               make_tag (name, namelen, TRUE,
4708                         tline.buffer, taglen, save_lineno, save_lcno);
4709               continue;
4710             }
4711         }
4712       if (get_tagname)          /* grab name of proc or fn */
4713         {
4714           char *cp;
4715
4716           if (*dbp == '\0')
4717             continue;
4718
4719           /* Find block name. */
4720           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4721             continue;
4722
4723           /* Save all values for later tagging. */
4724           linebuffer_setlen (&tline, lb.len);
4725           strcpy (tline.buffer, lb.buffer);
4726           save_lineno = lineno;
4727           save_lcno = linecharno;
4728           name = tline.buffer + (dbp - lb.buffer);
4729           namelen = cp - dbp;
4730           taglen = cp - lb.buffer + 1;
4731
4732           dbp = cp;             /* set dbp to e-o-token */
4733           get_tagname = FALSE;
4734           found_tag = TRUE;
4735           continue;
4736
4737           /* And proceed to check for "extern". */
4738         }
4739       else if (!incomment && !inquote && !found_tag)
4740         {
4741           /* Check for proc/fn keywords. */
4742           switch (lowcase (c))
4743             {
4744             case 'p':
4745               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4746                 get_tagname = TRUE;
4747               continue;
4748             case 'f':
4749               if (nocase_tail ("unction"))
4750                 get_tagname = TRUE;
4751               continue;
4752             }
4753         }
4754     } /* while not eof */
4755
4756   free (tline.buffer);
4757 }
4758
4759 \f
4760 /*
4761  * Lisp tag functions
4762  *  look for (def or (DEF, quote or QUOTE
4763  */
4764
4765 static void L_getit (void);
4766
4767 static void
4768 L_getit (void)
4769 {
4770   if (*dbp == '\'')             /* Skip prefix quote */
4771     dbp++;
4772   else if (*dbp == '(')
4773   {
4774     dbp++;
4775     /* Try to skip "(quote " */
4776     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4777       /* Ok, then skip "(" before name in (defstruct (foo)) */
4778       dbp = skip_spaces (dbp);
4779   }
4780   get_tag (dbp, NULL);
4781 }
4782
4783 static void
4784 Lisp_functions (FILE *inf)
4785 {
4786   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4787     {
4788       if (dbp[0] != '(')
4789         continue;
4790
4791       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4792         {
4793           dbp = skip_non_spaces (dbp);
4794           dbp = skip_spaces (dbp);
4795           L_getit ();
4796         }
4797       else
4798         {
4799           /* Check for (foo::defmumble name-defined ... */
4800           do
4801             dbp++;
4802           while (!notinname (*dbp) && *dbp != ':');
4803           if (*dbp == ':')
4804             {
4805               do
4806                 dbp++;
4807               while (*dbp == ':');
4808
4809               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4810                 {
4811                   dbp = skip_non_spaces (dbp);
4812                   dbp = skip_spaces (dbp);
4813                   L_getit ();
4814                 }
4815             }
4816         }
4817     }
4818 }
4819
4820 \f
4821 /*
4822  * Lua script language parsing
4823  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4824  *
4825  *  "function" and "local function" are tags if they start at column 1.
4826  */
4827 static void
4828 Lua_functions (FILE *inf)
4829 {
4830   register char *bp;
4831
4832   LOOP_ON_INPUT_LINES (inf, lb, bp)
4833     {
4834       if (bp[0] != 'f' && bp[0] != 'l')
4835         continue;
4836
4837       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4838
4839       if (LOOKING_AT (bp, "function"))
4840         get_tag (bp, NULL);
4841     }
4842 }
4843
4844 \f
4845 /*
4846  * Postscript tags
4847  * Just look for lines where the first character is '/'
4848  * Also look at "defineps" for PSWrap
4849  * Ideas by:
4850  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4851  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4852  */
4853 static void
4854 PS_functions (FILE *inf)
4855 {
4856   register char *bp, *ep;
4857
4858   LOOP_ON_INPUT_LINES (inf, lb, bp)
4859     {
4860       if (bp[0] == '/')
4861         {
4862           for (ep = bp+1;
4863                *ep != '\0' && *ep != ' ' && *ep != '{';
4864                ep++)
4865             continue;
4866           make_tag (bp, ep - bp, TRUE,
4867                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4868         }
4869       else if (LOOKING_AT (bp, "defineps"))
4870         get_tag (bp, NULL);
4871     }
4872 }
4873
4874 \f
4875 /*
4876  * Forth tags
4877  * Ignore anything after \ followed by space or in ( )
4878  * Look for words defined by :
4879  * Look for constant, code, create, defer, value, and variable
4880  * OBP extensions:  Look for buffer:, field,
4881  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4882  */
4883 static void
4884 Forth_words (FILE *inf)
4885 {
4886   register char *bp;
4887
4888   LOOP_ON_INPUT_LINES (inf, lb, bp)
4889     while ((bp = skip_spaces (bp))[0] != '\0')
4890       if (bp[0] == '\\' && iswhite(bp[1]))
4891         break;                  /* read next line */
4892       else if (bp[0] == '(' && iswhite(bp[1]))
4893         do                      /* skip to ) or eol */
4894           bp++;
4895         while (*bp != ')' && *bp != '\0');
4896       else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
4897                || LOOKING_AT_NOCASE (bp, "constant")
4898                || LOOKING_AT_NOCASE (bp, "code")
4899                || LOOKING_AT_NOCASE (bp, "create")
4900                || LOOKING_AT_NOCASE (bp, "defer")
4901                || LOOKING_AT_NOCASE (bp, "value")
4902                || LOOKING_AT_NOCASE (bp, "variable")
4903                || LOOKING_AT_NOCASE (bp, "buffer:")
4904                || LOOKING_AT_NOCASE (bp, "field"))
4905         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
4906       else
4907         bp = skip_non_spaces (bp);
4908 }
4909
4910 \f
4911 /*
4912  * Scheme tag functions
4913  * look for (def... xyzzy
4914  *          (def... (xyzzy
4915  *          (def ... ((...(xyzzy ....
4916  *          (set! xyzzy
4917  * Original code by Ken Haase (1985?)
4918  */
4919 static void
4920 Scheme_functions (FILE *inf)
4921 {
4922   register char *bp;
4923
4924   LOOP_ON_INPUT_LINES (inf, lb, bp)
4925     {
4926       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4927         {
4928           bp = skip_non_spaces (bp+4);
4929           /* Skip over open parens and white space.  Don't continue past
4930              '\0'. */
4931           while (*bp && notinname (*bp))
4932             bp++;
4933           get_tag (bp, NULL);
4934         }
4935       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4936         get_tag (bp, NULL);
4937     }
4938 }
4939
4940 \f
4941 /* Find tags in TeX and LaTeX input files.  */
4942
4943 /* TEX_toktab is a table of TeX control sequences that define tags.
4944  * Each entry records one such control sequence.
4945  *
4946  * Original code from who knows whom.
4947  * Ideas by:
4948  *   Stefan Monnier (2002)
4949  */
4950
4951 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4952
4953 /* Default set of control sequences to put into TEX_toktab.
4954    The value of environment var TEXTAGS is prepended to this.  */
4955 static const char *TEX_defenv = "\
4956 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4957 :part:appendix:entry:index:def\
4958 :newcommand:renewcommand:newenvironment:renewenvironment";
4959
4960 static void TEX_mode (FILE *);
4961 static void TEX_decode_env (const char *, const char *);
4962
4963 static char TEX_esc = '\\';
4964 static char TEX_opgrp = '{';
4965 static char TEX_clgrp = '}';
4966
4967 /*
4968  * TeX/LaTeX scanning loop.
4969  */
4970 static void
4971 TeX_commands (FILE *inf)
4972 {
4973   char *cp;
4974   linebuffer *key;
4975
4976   /* Select either \ or ! as escape character.  */
4977   TEX_mode (inf);
4978
4979   /* Initialize token table once from environment. */
4980   if (TEX_toktab == NULL)
4981     TEX_decode_env ("TEXTAGS", TEX_defenv);
4982
4983   LOOP_ON_INPUT_LINES (inf, lb, cp)
4984     {
4985       /* Look at each TEX keyword in line. */
4986       for (;;)
4987         {
4988           /* Look for a TEX escape. */
4989           while (*cp++ != TEX_esc)
4990             if (cp[-1] == '\0' || cp[-1] == '%')
4991               goto tex_next_line;
4992
4993           for (key = TEX_toktab; key->buffer != NULL; key++)
4994             if (strneq (cp, key->buffer, key->len))
4995               {
4996                 register char *p;
4997                 int namelen, linelen;
4998                 bool opgrp = FALSE;
4999
5000                 cp = skip_spaces (cp + key->len);
5001                 if (*cp == TEX_opgrp)
5002                   {
5003                     opgrp = TRUE;
5004                     cp++;
5005                   }
5006                 for (p = cp;
5007                      (!iswhite (*p) && *p != '#' &&
5008                       *p != TEX_opgrp && *p != TEX_clgrp);
5009                      p++)
5010                   continue;
5011                 namelen = p - cp;
5012                 linelen = lb.len;
5013                 if (!opgrp || *p == TEX_clgrp)
5014                   {
5015                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5016                       p++;
5017                     linelen = p - lb.buffer + 1;
5018                   }
5019                 make_tag (cp, namelen, TRUE,
5020                           lb.buffer, linelen, lineno, linecharno);
5021                 goto tex_next_line; /* We only tag a line once */
5022               }
5023         }
5024     tex_next_line:
5025       ;
5026     }
5027 }
5028
5029 #define TEX_LESC '\\'
5030 #define TEX_SESC '!'
5031
5032 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5033    chars accordingly. */
5034 static void
5035 TEX_mode (FILE *inf)
5036 {
5037   int c;
5038
5039   while ((c = getc (inf)) != EOF)
5040     {
5041       /* Skip to next line if we hit the TeX comment char. */
5042       if (c == '%')
5043         while (c != '\n' && c != EOF)
5044           c = getc (inf);
5045       else if (c == TEX_LESC || c == TEX_SESC )
5046         break;
5047     }
5048
5049   if (c == TEX_LESC)
5050     {
5051       TEX_esc = TEX_LESC;
5052       TEX_opgrp = '{';
5053       TEX_clgrp = '}';
5054     }
5055   else
5056     {
5057       TEX_esc = TEX_SESC;
5058       TEX_opgrp = '<';
5059       TEX_clgrp = '>';
5060     }
5061   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5062      No attempt is made to correct the situation. */
5063   rewind (inf);
5064 }
5065
5066 /* Read environment and prepend it to the default string.
5067    Build token table. */
5068 static void
5069 TEX_decode_env (const char *evarname, const char *defenv)
5070 {
5071   register const char *env, *p;
5072   int i, len;
5073
5074   /* Append default string to environment. */
5075   env = getenv (evarname);
5076   if (!env)
5077     env = defenv;
5078   else
5079     env = concat (env, defenv, "");
5080
5081   /* Allocate a token table */
5082   for (len = 1, p = env; p;)
5083     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5084       len++;
5085   TEX_toktab = xnew (len, linebuffer);
5086
5087   /* Unpack environment string into token table. Be careful about */
5088   /* zero-length strings (leading ':', "::" and trailing ':') */
5089   for (i = 0; *env != '\0';)
5090     {
5091       p = etags_strchr (env, ':');
5092       if (!p)                   /* End of environment string. */
5093         p = env + strlen (env);
5094       if (p - env > 0)
5095         {                       /* Only non-zero strings. */
5096           TEX_toktab[i].buffer = savenstr (env, p - env);
5097           TEX_toktab[i].len = p - env;
5098           i++;
5099         }
5100       if (*p)
5101         env = p + 1;
5102       else
5103         {
5104           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5105           TEX_toktab[i].len = 0;
5106           break;
5107         }
5108     }
5109 }
5110
5111 \f
5112 /* Texinfo support.  Dave Love, Mar. 2000.  */
5113 static void
5114 Texinfo_nodes (FILE *inf)
5115 {
5116   char *cp, *start;
5117   LOOP_ON_INPUT_LINES (inf, lb, cp)
5118     if (LOOKING_AT (cp, "@node"))
5119       {
5120         start = cp;
5121         while (*cp != '\0' && *cp != ',')
5122           cp++;
5123         make_tag (start, cp - start, TRUE,
5124                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5125       }
5126 }
5127
5128 \f
5129 /*
5130  * HTML support.
5131  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5132  * Contents of <a name=xxx> are tags with name xxx.
5133  *
5134  * Francesco Potortì, 2002.
5135  */
5136 static void
5137 HTML_labels (FILE *inf)
5138 {
5139   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5140   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5141   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5142   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5143   char *end;
5144
5145
5146   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5147
5148   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5149     for (;;)                    /* loop on the same line */
5150       {
5151         if (skiptag)            /* skip HTML tag */
5152           {
5153             while (*dbp != '\0' && *dbp != '>')
5154               dbp++;
5155             if (*dbp == '>')
5156               {
5157                 dbp += 1;
5158                 skiptag = FALSE;
5159                 continue;       /* look on the same line */
5160               }
5161             break;              /* go to next line */
5162           }
5163
5164         else if (intag) /* look for "name=" or "id=" */
5165           {
5166             while (*dbp != '\0' && *dbp != '>'
5167                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5168               dbp++;
5169             if (*dbp == '\0')
5170               break;            /* go to next line */
5171             if (*dbp == '>')
5172               {
5173                 dbp += 1;
5174                 intag = FALSE;
5175                 continue;       /* look on the same line */
5176               }
5177             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5178                 || LOOKING_AT_NOCASE (dbp, "id="))
5179               {
5180                 bool quoted = (dbp[0] == '"');
5181
5182                 if (quoted)
5183                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5184                     continue;
5185                 else
5186                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5187                     continue;
5188                 linebuffer_setlen (&token_name, end - dbp);
5189                 strncpy (token_name.buffer, dbp, end - dbp);
5190                 token_name.buffer[end - dbp] = '\0';
5191
5192                 dbp = end;
5193                 intag = FALSE;  /* we found what we looked for */
5194                 skiptag = TRUE; /* skip to the end of the tag */
5195                 getnext = TRUE; /* then grab the text */
5196                 continue;       /* look on the same line */
5197               }
5198             dbp += 1;
5199           }
5200
5201         else if (getnext)       /* grab next tokens and tag them */
5202           {
5203             dbp = skip_spaces (dbp);
5204             if (*dbp == '\0')
5205               break;            /* go to next line */
5206             if (*dbp == '<')
5207               {
5208                 intag = TRUE;
5209                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5210                 continue;       /* look on the same line */
5211               }
5212
5213             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5214               continue;
5215             make_tag (token_name.buffer, token_name.len, TRUE,
5216                       dbp, end - dbp, lineno, linecharno);
5217             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5218             getnext = FALSE;
5219             break;              /* go to next line */
5220           }
5221
5222         else                    /* look for an interesting HTML tag */
5223           {
5224             while (*dbp != '\0' && *dbp != '<')
5225               dbp++;
5226             if (*dbp == '\0')
5227               break;            /* go to next line */
5228             intag = TRUE;
5229             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5230               {
5231                 inanchor = TRUE;
5232                 continue;       /* look on the same line */
5233               }
5234             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5235                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5236                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5237                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5238               {
5239                 intag = FALSE;
5240                 getnext = TRUE;
5241                 continue;       /* look on the same line */
5242               }
5243             dbp += 1;
5244           }
5245       }
5246 }
5247
5248 \f
5249 /*
5250  * Prolog support
5251  *
5252  * Assumes that the predicate or rule starts at column 0.
5253  * Only the first clause of a predicate or rule is added.
5254  * Original code by Sunichirou Sugou (1989)
5255  * Rewritten by Anders Lindgren (1996)
5256  */
5257 static size_t prolog_pr (char *, char *);
5258 static void prolog_skip_comment (linebuffer *, FILE *);
5259 static size_t prolog_atom (char *, size_t);
5260
5261 static void
5262 Prolog_functions (FILE *inf)
5263 {
5264   char *cp, *last;
5265   size_t len;
5266   size_t allocated;
5267
5268   allocated = 0;
5269   len = 0;
5270   last = NULL;
5271
5272   LOOP_ON_INPUT_LINES (inf, lb, cp)
5273     {
5274       if (cp[0] == '\0')        /* Empty line */
5275         continue;
5276       else if (iswhite (cp[0])) /* Not a predicate */
5277         continue;
5278       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5279         prolog_skip_comment (&lb, inf);
5280       else if ((len = prolog_pr (cp, last)) > 0)
5281         {
5282           /* Predicate or rule.  Store the function name so that we
5283              only generate a tag for the first clause.  */
5284           if (last == NULL)
5285             last = xnew(len + 1, char);
5286           else if (len + 1 > allocated)
5287             xrnew (last, len + 1, char);
5288           allocated = len + 1;
5289           strncpy (last, cp, len);
5290           last[len] = '\0';
5291         }
5292     }
5293   free (last);
5294 }
5295
5296
5297 static void
5298 prolog_skip_comment (linebuffer *plb, FILE *inf)
5299 {
5300   char *cp;
5301
5302   do
5303     {
5304       for (cp = plb->buffer; *cp != '\0'; cp++)
5305         if (cp[0] == '*' && cp[1] == '/')
5306           return;
5307       readline (plb, inf);
5308     }
5309   while (!feof(inf));
5310 }
5311
5312 /*
5313  * A predicate or rule definition is added if it matches:
5314  *     <beginning of line><Prolog Atom><whitespace>(
5315  * or  <beginning of line><Prolog Atom><whitespace>:-
5316  *
5317  * It is added to the tags database if it doesn't match the
5318  * name of the previous clause header.
5319  *
5320  * Return the size of the name of the predicate or rule, or 0 if no
5321  * header was found.
5322  */
5323 static size_t
5324 prolog_pr (char *s, char *last)
5325
5326                                 /* Name of last clause. */
5327 {
5328   size_t pos;
5329   size_t len;
5330
5331   pos = prolog_atom (s, 0);
5332   if (! pos)
5333     return 0;
5334
5335   len = pos;
5336   pos = skip_spaces (s + pos) - s;
5337
5338   if ((s[pos] == '.'
5339        || (s[pos] == '(' && (pos += 1))
5340        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5341       && (last == NULL          /* save only the first clause */
5342           || len != strlen (last)
5343           || !strneq (s, last, len)))
5344         {
5345           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5346           return len;
5347         }
5348   else
5349     return 0;
5350 }
5351
5352 /*
5353  * Consume a Prolog atom.
5354  * Return the number of bytes consumed, or 0 if there was an error.
5355  *
5356  * A prolog atom, in this context, could be one of:
5357  * - An alphanumeric sequence, starting with a lower case letter.
5358  * - A quoted arbitrary string. Single quotes can escape themselves.
5359  *   Backslash quotes everything.
5360  */
5361 static size_t
5362 prolog_atom (char *s, size_t pos)
5363 {
5364   size_t origpos;
5365
5366   origpos = pos;
5367
5368   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5369     {
5370       /* The atom is unquoted. */
5371       pos++;
5372       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5373         {
5374           pos++;
5375         }
5376       return pos - origpos;
5377     }
5378   else if (s[pos] == '\'')
5379     {
5380       pos++;
5381
5382       for (;;)
5383         {
5384           if (s[pos] == '\'')
5385             {
5386               pos++;
5387               if (s[pos] != '\'')
5388                 break;
5389               pos++;            /* A double quote */
5390             }
5391           else if (s[pos] == '\0')
5392             /* Multiline quoted atoms are ignored. */
5393             return 0;
5394           else if (s[pos] == '\\')
5395             {
5396               if (s[pos+1] == '\0')
5397                 return 0;
5398               pos += 2;
5399             }
5400           else
5401             pos++;
5402         }
5403       return pos - origpos;
5404     }
5405   else
5406     return 0;
5407 }
5408
5409 \f
5410 /*
5411  * Support for Erlang
5412  *
5413  * Generates tags for functions, defines, and records.
5414  * Assumes that Erlang functions start at column 0.
5415  * Original code by Anders Lindgren (1996)
5416  */
5417 static int erlang_func (char *, char *);
5418 static void erlang_attribute (char *);
5419 static int erlang_atom (char *);
5420
5421 static void
5422 Erlang_functions (FILE *inf)
5423 {
5424   char *cp, *last;
5425   int len;
5426   int allocated;
5427
5428   allocated = 0;
5429   len = 0;
5430   last = NULL;
5431
5432   LOOP_ON_INPUT_LINES (inf, lb, cp)
5433     {
5434       if (cp[0] == '\0')        /* Empty line */
5435         continue;
5436       else if (iswhite (cp[0])) /* Not function nor attribute */
5437         continue;
5438       else if (cp[0] == '%')    /* comment */
5439         continue;
5440       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5441         continue;
5442       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5443         {
5444           erlang_attribute (cp);
5445           if (last != NULL)
5446             {
5447               free (last);
5448               last = NULL;
5449             }
5450         }
5451       else if ((len = erlang_func (cp, last)) > 0)
5452         {
5453           /*
5454            * Function.  Store the function name so that we only
5455            * generates a tag for the first clause.
5456            */
5457           if (last == NULL)
5458             last = xnew (len + 1, char);
5459           else if (len + 1 > allocated)
5460             xrnew (last, len + 1, char);
5461           allocated = len + 1;
5462           strncpy (last, cp, len);
5463           last[len] = '\0';
5464         }
5465     }
5466   free (last);
5467 }
5468
5469
5470 /*
5471  * A function definition is added if it matches:
5472  *     <beginning of line><Erlang Atom><whitespace>(
5473  *
5474  * It is added to the tags database if it doesn't match the
5475  * name of the previous clause header.
5476  *
5477  * Return the size of the name of the function, or 0 if no function
5478  * was found.
5479  */
5480 static int
5481 erlang_func (char *s, char *last)
5482
5483                                 /* Name of last clause. */
5484 {
5485   int pos;
5486   int len;
5487
5488   pos = erlang_atom (s);
5489   if (pos < 1)
5490     return 0;
5491
5492   len = pos;
5493   pos = skip_spaces (s + pos) - s;
5494
5495   /* Save only the first clause. */
5496   if (s[pos++] == '('
5497       && (last == NULL
5498           || len != (int)strlen (last)
5499           || !strneq (s, last, len)))
5500         {
5501           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5502           return len;
5503         }
5504
5505   return 0;
5506 }
5507
5508
5509 /*
5510  * Handle attributes.  Currently, tags are generated for defines
5511  * and records.
5512  *
5513  * They are on the form:
5514  * -define(foo, bar).
5515  * -define(Foo(M, N), M+N).
5516  * -record(graph, {vtab = notable, cyclic = true}).
5517  */
5518 static void
5519 erlang_attribute (char *s)
5520 {
5521   char *cp = s;
5522
5523   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5524       && *cp++ == '(')
5525     {
5526       int len = erlang_atom (skip_spaces (cp));
5527       if (len > 0)
5528         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5529     }
5530   return;
5531 }
5532
5533
5534 /*
5535  * Consume an Erlang atom (or variable).
5536  * Return the number of bytes consumed, or -1 if there was an error.
5537  */
5538 static int
5539 erlang_atom (char *s)
5540 {
5541   int pos = 0;
5542
5543   if (ISALPHA (s[pos]) || s[pos] == '_')
5544     {
5545       /* The atom is unquoted. */
5546       do
5547         pos++;
5548       while (ISALNUM (s[pos]) || s[pos] == '_');
5549     }
5550   else if (s[pos] == '\'')
5551     {
5552       for (pos++; s[pos] != '\''; pos++)
5553         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5554             || (s[pos] == '\\' && s[++pos] == '\0'))
5555           return 0;
5556       pos++;
5557     }
5558
5559   return pos;
5560 }
5561
5562 \f
5563 static char *scan_separators (char *);
5564 static void add_regex (char *, language *);
5565 static char *substitute (char *, char *, struct re_registers *);
5566
5567 /*
5568  * Take a string like "/blah/" and turn it into "blah", verifying
5569  * that the first and last characters are the same, and handling
5570  * quoted separator characters.  Actually, stops on the occurrence of
5571  * an unquoted separator.  Also process \t, \n, etc. and turn into
5572  * appropriate characters. Works in place.  Null terminates name string.
5573  * Returns pointer to terminating separator, or NULL for
5574  * unterminated regexps.
5575  */
5576 static char *
5577 scan_separators (char *name)
5578 {
5579   char sep = name[0];
5580   char *copyto = name;
5581   bool quoted = FALSE;
5582
5583   for (++name; *name != '\0'; ++name)
5584     {
5585       if (quoted)
5586         {
5587           switch (*name)
5588             {
5589             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5590             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5591             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5592             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5593             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5594             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5595             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5596             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5597             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5598             default:
5599               if (*name == sep)
5600                 *copyto++ = sep;
5601               else
5602                 {
5603                   /* Something else is quoted, so preserve the quote. */
5604                   *copyto++ = '\\';
5605                   *copyto++ = *name;
5606                 }
5607               break;
5608             }
5609           quoted = FALSE;
5610         }
5611       else if (*name == '\\')
5612         quoted = TRUE;
5613       else if (*name == sep)
5614         break;
5615       else
5616         *copyto++ = *name;
5617     }
5618   if (*name != sep)
5619     name = NULL;                /* signal unterminated regexp */
5620
5621   /* Terminate copied string. */
5622   *copyto = '\0';
5623   return name;
5624 }
5625
5626 /* Look at the argument of --regex or --no-regex and do the right
5627    thing.  Same for each line of a regexp file. */
5628 static void
5629 analyse_regex (char *regex_arg)
5630 {
5631   if (regex_arg == NULL)
5632     {
5633       free_regexps ();          /* --no-regex: remove existing regexps */
5634       return;
5635     }
5636
5637   /* A real --regexp option or a line in a regexp file. */
5638   switch (regex_arg[0])
5639     {
5640       /* Comments in regexp file or null arg to --regex. */
5641     case '\0':
5642     case ' ':
5643     case '\t':
5644       break;
5645
5646       /* Read a regex file.  This is recursive and may result in a
5647          loop, which will stop when the file descriptors are exhausted. */
5648     case '@':
5649       {
5650         FILE *regexfp;
5651         linebuffer regexbuf;
5652         char *regexfile = regex_arg + 1;
5653
5654         /* regexfile is a file containing regexps, one per line. */
5655         regexfp = fopen (regexfile, "r");
5656         if (regexfp == NULL)
5657           {
5658             pfatal (regexfile);
5659             return;
5660           }
5661         linebuffer_init (&regexbuf);
5662         while (readline_internal (&regexbuf, regexfp) > 0)
5663           analyse_regex (regexbuf.buffer);
5664         free (regexbuf.buffer);
5665         fclose (regexfp);
5666       }
5667       break;
5668
5669       /* Regexp to be used for a specific language only. */
5670     case '{':
5671       {
5672         language *lang;
5673         char *lang_name = regex_arg + 1;
5674         char *cp;
5675
5676         for (cp = lang_name; *cp != '}'; cp++)
5677           if (*cp == '\0')
5678             {
5679               error ("unterminated language name in regex: %s", regex_arg);
5680               return;
5681             }
5682         *cp++ = '\0';
5683         lang = get_language_from_langname (lang_name);
5684         if (lang == NULL)
5685           return;
5686         add_regex (cp, lang);
5687       }
5688       break;
5689
5690       /* Regexp to be used for any language. */
5691     default:
5692       add_regex (regex_arg, NULL);
5693       break;
5694     }
5695 }
5696
5697 /* Separate the regexp pattern, compile it,
5698    and care for optional name and modifiers. */
5699 static void
5700 add_regex (char *regexp_pattern, language *lang)
5701 {
5702   static struct re_pattern_buffer zeropattern;
5703   char sep, *pat, *name, *modifiers;
5704   char empty[] = "";
5705   const char *err;
5706   struct re_pattern_buffer *patbuf;
5707   regexp *rp;
5708   bool
5709     force_explicit_name = TRUE, /* do not use implicit tag names */
5710     ignore_case = FALSE,        /* case is significant */
5711     multi_line = FALSE,         /* matches are done one line at a time */
5712     single_line = FALSE;        /* dot does not match newline */
5713
5714
5715   if (strlen(regexp_pattern) < 3)
5716     {
5717       error ("null regexp", (char *)NULL);
5718       return;
5719     }
5720   sep = regexp_pattern[0];
5721   name = scan_separators (regexp_pattern);
5722   if (name == NULL)
5723     {
5724       error ("%s: unterminated regexp", regexp_pattern);
5725       return;
5726     }
5727   if (name[1] == sep)
5728     {
5729       error ("null name for regexp \"%s\"", regexp_pattern);
5730       return;
5731     }
5732   modifiers = scan_separators (name);
5733   if (modifiers == NULL)        /* no terminating separator --> no name */
5734     {
5735       modifiers = name;
5736       name = empty;
5737     }
5738   else
5739     modifiers += 1;             /* skip separator */
5740
5741   /* Parse regex modifiers. */
5742   for (; modifiers[0] != '\0'; modifiers++)
5743     switch (modifiers[0])
5744       {
5745       case 'N':
5746         if (modifiers == name)
5747           error ("forcing explicit tag name but no name, ignoring", NULL);
5748         force_explicit_name = TRUE;
5749         break;
5750       case 'i':
5751         ignore_case = TRUE;
5752         break;
5753       case 's':
5754         single_line = TRUE;
5755         /* FALLTHRU */
5756       case 'm':
5757         multi_line = TRUE;
5758         need_filebuf = TRUE;
5759         break;
5760       default:
5761         {
5762           char wrongmod [2];
5763           wrongmod[0] = modifiers[0];
5764           wrongmod[1] = '\0';
5765           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5766         }
5767         break;
5768       }
5769
5770   patbuf = xnew (1, struct re_pattern_buffer);
5771   *patbuf = zeropattern;
5772   if (ignore_case)
5773     {
5774       static char lc_trans[CHARS];
5775       int i;
5776       for (i = 0; i < CHARS; i++)
5777         lc_trans[i] = lowcase (i);
5778       patbuf->translate = lc_trans;     /* translation table to fold case  */
5779     }
5780
5781   if (multi_line)
5782     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5783   else
5784     pat = regexp_pattern;
5785
5786   if (single_line)
5787     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5788   else
5789     re_set_syntax (RE_SYNTAX_EMACS);
5790
5791   err = re_compile_pattern (pat, strlen (pat), patbuf);
5792   if (multi_line)
5793     free (pat);
5794   if (err != NULL)
5795     {
5796       error ("%s while compiling pattern", err);
5797       return;
5798     }
5799
5800   rp = p_head;
5801   p_head = xnew (1, regexp);
5802   p_head->pattern = savestr (regexp_pattern);
5803   p_head->p_next = rp;
5804   p_head->lang = lang;
5805   p_head->pat = patbuf;
5806   p_head->name = savestr (name);
5807   p_head->error_signaled = FALSE;
5808   p_head->force_explicit_name = force_explicit_name;
5809   p_head->ignore_case = ignore_case;
5810   p_head->multi_line = multi_line;
5811 }
5812
5813 /*
5814  * Do the substitutions indicated by the regular expression and
5815  * arguments.
5816  */
5817 static char *
5818 substitute (char *in, char *out, struct re_registers *regs)
5819 {
5820   char *result, *t;
5821   int size, dig, diglen;
5822
5823   result = NULL;
5824   size = strlen (out);
5825
5826   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5827   if (out[size - 1] == '\\')
5828     fatal ("pattern error in \"%s\"", out);
5829   for (t = etags_strchr (out, '\\');
5830        t != NULL;
5831        t = etags_strchr (t + 2, '\\'))
5832     if (ISDIGIT (t[1]))
5833       {
5834         dig = t[1] - '0';
5835         diglen = regs->end[dig] - regs->start[dig];
5836         size += diglen - 2;
5837       }
5838     else
5839       size -= 1;
5840
5841   /* Allocate space and do the substitutions. */
5842   assert (size >= 0);
5843   result = xnew (size + 1, char);
5844
5845   for (t = result; *out != '\0'; out++)
5846     if (*out == '\\' && ISDIGIT (*++out))
5847       {
5848         dig = *out - '0';
5849         diglen = regs->end[dig] - regs->start[dig];
5850         strncpy (t, in + regs->start[dig], diglen);
5851         t += diglen;
5852       }
5853     else
5854       *t++ = *out;
5855   *t = '\0';
5856
5857   assert (t <= result + size);
5858   assert (t - result == (int)strlen (result));
5859
5860   return result;
5861 }
5862
5863 /* Deallocate all regexps. */
5864 static void
5865 free_regexps (void)
5866 {
5867   regexp *rp;
5868   while (p_head != NULL)
5869     {
5870       rp = p_head->p_next;
5871       free (p_head->pattern);
5872       free (p_head->name);
5873       free (p_head);
5874       p_head = rp;
5875     }
5876   return;
5877 }
5878
5879 /*
5880  * Reads the whole file as a single string from `filebuf' and looks for
5881  * multi-line regular expressions, creating tags on matches.
5882  * readline already dealt with normal regexps.
5883  *
5884  * Idea by Ben Wing <ben@666.com> (2002).
5885  */
5886 static void
5887 regex_tag_multiline (void)
5888 {
5889   char *buffer = filebuf.buffer;
5890   regexp *rp;
5891   char *name;
5892
5893   for (rp = p_head; rp != NULL; rp = rp->p_next)
5894     {
5895       int match = 0;
5896
5897       if (!rp->multi_line)
5898         continue;               /* skip normal regexps */
5899
5900       /* Generic initialisations before parsing file from memory. */
5901       lineno = 1;               /* reset global line number */
5902       charno = 0;               /* reset global char number */
5903       linecharno = 0;           /* reset global char number of line start */
5904
5905       /* Only use generic regexps or those for the current language. */
5906       if (rp->lang != NULL && rp->lang != curfdp->lang)
5907         continue;
5908
5909       while (match >= 0 && match < filebuf.len)
5910         {
5911           match = re_search (rp->pat, buffer, filebuf.len, charno,
5912                              filebuf.len - match, &rp->regs);
5913           switch (match)
5914             {
5915             case -2:
5916               /* Some error. */
5917               if (!rp->error_signaled)
5918                 {
5919                   error ("regexp stack overflow while matching \"%s\"",
5920                          rp->pattern);
5921                   rp->error_signaled = TRUE;
5922                 }
5923               break;
5924             case -1:
5925               /* No match. */
5926               break;
5927             default:
5928               if (match == rp->regs.end[0])
5929                 {
5930                   if (!rp->error_signaled)
5931                     {
5932                       error ("regexp matches the empty string: \"%s\"",
5933                              rp->pattern);
5934                       rp->error_signaled = TRUE;
5935                     }
5936                   match = -3;   /* exit from while loop */
5937                   break;
5938                 }
5939
5940               /* Match occurred.  Construct a tag. */
5941               while (charno < rp->regs.end[0])
5942                 if (buffer[charno++] == '\n')
5943                   lineno++, linecharno = charno;
5944               name = rp->name;
5945               if (name[0] == '\0')
5946                 name = NULL;
5947               else /* make a named tag */
5948                 name = substitute (buffer, rp->name, &rp->regs);
5949               if (rp->force_explicit_name)
5950                 /* Force explicit tag name, if a name is there. */
5951                 pfnote (name, TRUE, buffer + linecharno,
5952                         charno - linecharno + 1, lineno, linecharno);
5953               else
5954                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
5955                           charno - linecharno + 1, lineno, linecharno);
5956               break;
5957             }
5958         }
5959     }
5960 }
5961
5962 \f
5963 static bool
5964 nocase_tail (const char *cp)
5965 {
5966   register int len = 0;
5967
5968   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5969     cp++, len++;
5970   if (*cp == '\0' && !intoken (dbp[len]))
5971     {
5972       dbp += len;
5973       return TRUE;
5974     }
5975   return FALSE;
5976 }
5977
5978 static void
5979 get_tag (register char *bp, char **namepp)
5980 {
5981   register char *cp = bp;
5982
5983   if (*bp != '\0')
5984     {
5985       /* Go till you get to white space or a syntactic break */
5986       for (cp = bp + 1; !notinname (*cp); cp++)
5987         continue;
5988       make_tag (bp, cp - bp, TRUE,
5989                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5990     }
5991
5992   if (namepp != NULL)
5993     *namepp = savenstr (bp, cp - bp);
5994 }
5995
5996 /*
5997  * Read a line of text from `stream' into `lbp', excluding the
5998  * newline or CR-NL, if any.  Return the number of characters read from
5999  * `stream', which is the length of the line including the newline.
6000  *
6001  * On DOS or Windows we do not count the CR character, if any before the
6002  * NL, in the returned length; this mirrors the behavior of Emacs on those
6003  * platforms (for text files, it translates CR-NL to NL as it reads in the
6004  * file).
6005  *
6006  * If multi-line regular expressions are requested, each line read is
6007  * appended to `filebuf'.
6008  */
6009 static long
6010 readline_internal (linebuffer *lbp, register FILE *stream)
6011 {
6012   char *buffer = lbp->buffer;
6013   register char *p = lbp->buffer;
6014   register char *pend;
6015   int chars_deleted;
6016
6017   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6018
6019   for (;;)
6020     {
6021       register int c = getc (stream);
6022       if (p == pend)
6023         {
6024           /* We're at the end of linebuffer: expand it. */
6025           lbp->size *= 2;
6026           xrnew (buffer, lbp->size, char);
6027           p += buffer - lbp->buffer;
6028           pend = buffer + lbp->size;
6029           lbp->buffer = buffer;
6030         }
6031       if (c == EOF)
6032         {
6033           *p = '\0';
6034           chars_deleted = 0;
6035           break;
6036         }
6037       if (c == '\n')
6038         {
6039           if (p > buffer && p[-1] == '\r')
6040             {
6041               p -= 1;
6042 #ifdef DOS_NT
6043              /* Assume CRLF->LF translation will be performed by Emacs
6044                 when loading this file, so CRs won't appear in the buffer.
6045                 It would be cleaner to compensate within Emacs;
6046                 however, Emacs does not know how many CRs were deleted
6047                 before any given point in the file.  */
6048               chars_deleted = 1;
6049 #else
6050               chars_deleted = 2;
6051 #endif
6052             }
6053           else
6054             {
6055               chars_deleted = 1;
6056             }
6057           *p = '\0';
6058           break;
6059         }
6060       *p++ = c;
6061     }
6062   lbp->len = p - buffer;
6063
6064   if (need_filebuf              /* we need filebuf for multi-line regexps */
6065       && chars_deleted > 0)     /* not at EOF */
6066     {
6067       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6068         {
6069           /* Expand filebuf. */
6070           filebuf.size *= 2;
6071           xrnew (filebuf.buffer, filebuf.size, char);
6072         }
6073       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6074       filebuf.len += lbp->len;
6075       filebuf.buffer[filebuf.len++] = '\n';
6076       filebuf.buffer[filebuf.len] = '\0';
6077     }
6078
6079   return lbp->len + chars_deleted;
6080 }
6081
6082 /*
6083  * Like readline_internal, above, but in addition try to match the
6084  * input line against relevant regular expressions and manage #line
6085  * directives.
6086  */
6087 static void
6088 readline (linebuffer *lbp, FILE *stream)
6089 {
6090   long result;
6091
6092   linecharno = charno;          /* update global char number of line start */
6093   result = readline_internal (lbp, stream); /* read line */
6094   lineno += 1;                  /* increment global line number */
6095   charno += result;             /* increment global char number */
6096
6097   /* Honour #line directives. */
6098   if (!no_line_directive)
6099     {
6100       static bool discard_until_line_directive;
6101
6102       /* Check whether this is a #line directive. */
6103       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6104         {
6105           unsigned int lno;
6106           int start = 0;
6107
6108           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6109               && start > 0)     /* double quote character found */
6110             {
6111               char *endp = lbp->buffer + start;
6112
6113               while ((endp = etags_strchr (endp, '"')) != NULL
6114                      && endp[-1] == '\\')
6115                 endp++;
6116               if (endp != NULL)
6117                 /* Ok, this is a real #line directive.  Let's deal with it. */
6118                 {
6119                   char *taggedabsname;  /* absolute name of original file */
6120                   char *taggedfname;    /* name of original file as given */
6121                   char *name;           /* temp var */
6122
6123                   discard_until_line_directive = FALSE; /* found it */
6124                   name = lbp->buffer + start;
6125                   *endp = '\0';
6126                   canonicalize_filename (name);
6127                   taggedabsname = absolute_filename (name, tagfiledir);
6128                   if (filename_is_absolute (name)
6129                       || filename_is_absolute (curfdp->infname))
6130                     taggedfname = savestr (taggedabsname);
6131                   else
6132                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6133
6134                   if (streq (curfdp->taggedfname, taggedfname))
6135                     /* The #line directive is only a line number change.  We
6136                        deal with this afterwards. */
6137                     free (taggedfname);
6138                   else
6139                     /* The tags following this #line directive should be
6140                        attributed to taggedfname.  In order to do this, set
6141                        curfdp accordingly. */
6142                     {
6143                       fdesc *fdp; /* file description pointer */
6144
6145                       /* Go look for a file description already set up for the
6146                          file indicated in the #line directive.  If there is
6147                          one, use it from now until the next #line
6148                          directive. */
6149                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6150                         if (streq (fdp->infname, curfdp->infname)
6151                             && streq (fdp->taggedfname, taggedfname))
6152                           /* If we remove the second test above (after the &&)
6153                              then all entries pertaining to the same file are
6154                              coalesced in the tags file.  If we use it, then
6155                              entries pertaining to the same file but generated
6156                              from different files (via #line directives) will
6157                              go into separate sections in the tags file.  These
6158                              alternatives look equivalent.  The first one
6159                              destroys some apparently useless information. */
6160                           {
6161                             curfdp = fdp;
6162                             free (taggedfname);
6163                             break;
6164                           }
6165                       /* Else, if we already tagged the real file, skip all
6166                          input lines until the next #line directive. */
6167                       if (fdp == NULL) /* not found */
6168                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6169                           if (streq (fdp->infabsname, taggedabsname))
6170                             {
6171                               discard_until_line_directive = TRUE;
6172                               free (taggedfname);
6173                               break;
6174                             }
6175                       /* Else create a new file description and use that from
6176                          now on, until the next #line directive. */
6177                       if (fdp == NULL) /* not found */
6178                         {
6179                           fdp = fdhead;
6180                           fdhead = xnew (1, fdesc);
6181                           *fdhead = *curfdp; /* copy curr. file description */
6182                           fdhead->next = fdp;
6183                           fdhead->infname = savestr (curfdp->infname);
6184                           fdhead->infabsname = savestr (curfdp->infabsname);
6185                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6186                           fdhead->taggedfname = taggedfname;
6187                           fdhead->usecharno = FALSE;
6188                           fdhead->prop = NULL;
6189                           fdhead->written = FALSE;
6190                           curfdp = fdhead;
6191                         }
6192                     }
6193                   free (taggedabsname);
6194                   lineno = lno - 1;
6195                   readline (lbp, stream);
6196                   return;
6197                 } /* if a real #line directive */
6198             } /* if #line is followed by a number */
6199         } /* if line begins with "#line " */
6200
6201       /* If we are here, no #line directive was found. */
6202       if (discard_until_line_directive)
6203         {
6204           if (result > 0)
6205             {
6206               /* Do a tail recursion on ourselves, thus discarding the contents
6207                  of the line buffer. */
6208               readline (lbp, stream);
6209               return;
6210             }
6211           /* End of file. */
6212           discard_until_line_directive = FALSE;
6213           return;
6214         }
6215     } /* if #line directives should be considered */
6216
6217   {
6218     int match;
6219     regexp *rp;
6220     char *name;
6221
6222     /* Match against relevant regexps. */
6223     if (lbp->len > 0)
6224       for (rp = p_head; rp != NULL; rp = rp->p_next)
6225         {
6226           /* Only use generic regexps or those for the current language.
6227              Also do not use multiline regexps, which is the job of
6228              regex_tag_multiline. */
6229           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6230               || rp->multi_line)
6231             continue;
6232
6233           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6234           switch (match)
6235             {
6236             case -2:
6237               /* Some error. */
6238               if (!rp->error_signaled)
6239                 {
6240                   error ("regexp stack overflow while matching \"%s\"",
6241                          rp->pattern);
6242                   rp->error_signaled = TRUE;
6243                 }
6244               break;
6245             case -1:
6246               /* No match. */
6247               break;
6248             case 0:
6249               /* Empty string matched. */
6250               if (!rp->error_signaled)
6251                 {
6252                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6253                   rp->error_signaled = TRUE;
6254                 }
6255               break;
6256             default:
6257               /* Match occurred.  Construct a tag. */
6258               name = rp->name;
6259               if (name[0] == '\0')
6260                 name = NULL;
6261               else /* make a named tag */
6262                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6263               if (rp->force_explicit_name)
6264                 /* Force explicit tag name, if a name is there. */
6265                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6266               else
6267                 make_tag (name, strlen (name), TRUE,
6268                           lbp->buffer, match, lineno, linecharno);
6269               break;
6270             }
6271         }
6272   }
6273 }
6274
6275 \f
6276 /*
6277  * Return a pointer to a space of size strlen(cp)+1 allocated
6278  * with xnew where the string CP has been copied.
6279  */
6280 static char *
6281 savestr (const char *cp)
6282 {
6283   return savenstr (cp, strlen (cp));
6284 }
6285
6286 /*
6287  * Return a pointer to a space of size LEN+1 allocated with xnew where
6288  * the string CP has been copied for at most the first LEN characters.
6289  */
6290 static char *
6291 savenstr (const char *cp, int len)
6292 {
6293   register char *dp;
6294
6295   dp = xnew (len + 1, char);
6296   strncpy (dp, cp, len);
6297   dp[len] = '\0';
6298   return dp;
6299 }
6300
6301 /*
6302  * Return the ptr in sp at which the character c last
6303  * appears; NULL if not found
6304  *
6305  * Identical to POSIX strrchr, included for portability.
6306  */
6307 static char *
6308 etags_strrchr (register const char *sp, register int c)
6309 {
6310   register const char *r;
6311
6312   r = NULL;
6313   do
6314     {
6315       if (*sp == c)
6316         r = sp;
6317   } while (*sp++);
6318   return (char *)r;
6319 }
6320
6321 /*
6322  * Return the ptr in sp at which the character c first
6323  * appears; NULL if not found
6324  *
6325  * Identical to POSIX strchr, included for portability.
6326  */
6327 static char *
6328 etags_strchr (register const char *sp, register int c)
6329 {
6330   do
6331     {
6332       if (*sp == c)
6333         return (char *)sp;
6334     } while (*sp++);
6335   return NULL;
6336 }
6337
6338 /*
6339  * Compare two strings, ignoring case for alphabetic characters.
6340  *
6341  * Same as BSD's strcasecmp, included for portability.
6342  */
6343 static int
6344 etags_strcasecmp (register const char *s1, register const char *s2)
6345 {
6346   while (*s1 != '\0'
6347          && (ISALPHA (*s1) && ISALPHA (*s2)
6348              ? lowcase (*s1) == lowcase (*s2)
6349              : *s1 == *s2))
6350     s1++, s2++;
6351
6352   return (ISALPHA (*s1) && ISALPHA (*s2)
6353           ? lowcase (*s1) - lowcase (*s2)
6354           : *s1 - *s2);
6355 }
6356
6357 /*
6358  * Compare two strings, ignoring case for alphabetic characters.
6359  * Stop after a given number of characters
6360  *
6361  * Same as BSD's strncasecmp, included for portability.
6362  */
6363 static int
6364 etags_strncasecmp (register const char *s1, register const char *s2, register int n)
6365 {
6366   while (*s1 != '\0' && n-- > 0
6367          && (ISALPHA (*s1) && ISALPHA (*s2)
6368              ? lowcase (*s1) == lowcase (*s2)
6369              : *s1 == *s2))
6370     s1++, s2++;
6371
6372   if (n < 0)
6373     return 0;
6374   else
6375     return (ISALPHA (*s1) && ISALPHA (*s2)
6376             ? lowcase (*s1) - lowcase (*s2)
6377             : *s1 - *s2);
6378 }
6379
6380 /* Skip spaces (end of string is not space), return new pointer. */
6381 static char *
6382 skip_spaces (char *cp)
6383 {
6384   while (iswhite (*cp))
6385     cp++;
6386   return cp;
6387 }
6388
6389 /* Skip non spaces, except end of string, return new pointer. */
6390 static char *
6391 skip_non_spaces (char *cp)
6392 {
6393   while (*cp != '\0' && !iswhite (*cp))
6394     cp++;
6395   return cp;
6396 }
6397
6398 /* Print error message and exit.  */
6399 void
6400 fatal (const char *s1, const char *s2)
6401 {
6402   error (s1, s2);
6403   exit (EXIT_FAILURE);
6404 }
6405
6406 static void
6407 pfatal (const char *s1)
6408 {
6409   perror (s1);
6410   exit (EXIT_FAILURE);
6411 }
6412
6413 static void
6414 suggest_asking_for_help (void)
6415 {
6416   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6417            progname, NO_LONG_OPTIONS ? "-h" : "--help");
6418   exit (EXIT_FAILURE);
6419 }
6420
6421 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6422 static void
6423 error (const char *s1, const char *s2)
6424 {
6425   fprintf (stderr, "%s: ", progname);
6426   fprintf (stderr, s1, s2);
6427   fprintf (stderr, "\n");
6428 }
6429
6430 /* Return a newly-allocated string whose contents
6431    concatenate those of s1, s2, s3.  */
6432 static char *
6433 concat (const char *s1, const char *s2, const char *s3)
6434 {
6435   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6436   char *result = xnew (len1 + len2 + len3 + 1, char);
6437
6438   strcpy (result, s1);
6439   strcpy (result + len1, s2);
6440   strcpy (result + len1 + len2, s3);
6441   result[len1 + len2 + len3] = '\0';
6442
6443   return result;
6444 }
6445
6446 \f
6447 /* Does the same work as the system V getcwd, but does not need to
6448    guess the buffer size in advance. */
6449 static char *
6450 etags_getcwd (void)
6451 {
6452 #ifdef HAVE_GETCWD
6453   int bufsize = 200;
6454   char *path = xnew (bufsize, char);
6455
6456   while (getcwd (path, bufsize) == NULL)
6457     {
6458       if (errno != ERANGE)
6459         pfatal ("getcwd");
6460       bufsize *= 2;
6461       free (path);
6462       path = xnew (bufsize, char);
6463     }
6464
6465   canonicalize_filename (path);
6466   return path;
6467
6468 #else /* not HAVE_GETCWD */
6469 #if MSDOS
6470
6471   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6472
6473   getwd (path);
6474
6475   for (p = path; *p != '\0'; p++)
6476     if (*p == '\\')
6477       *p = '/';
6478     else
6479       *p = lowcase (*p);
6480
6481   return strdup (path);
6482 #else /* not MSDOS */
6483   linebuffer path;
6484   FILE *pipe;
6485
6486   linebuffer_init (&path);
6487   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6488   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6489     pfatal ("pwd");
6490   pclose (pipe);
6491
6492   return path.buffer;
6493 #endif /* not MSDOS */
6494 #endif /* not HAVE_GETCWD */
6495 }
6496
6497 /* Return a newly allocated string containing the file name of FILE
6498    relative to the absolute directory DIR (which should end with a slash). */
6499 static char *
6500 relative_filename (char *file, char *dir)
6501 {
6502   char *fp, *dp, *afn, *res;
6503   int i;
6504
6505   /* Find the common root of file and dir (with a trailing slash). */
6506   afn = absolute_filename (file, cwd);
6507   fp = afn;
6508   dp = dir;
6509   while (*fp++ == *dp++)
6510     continue;
6511   fp--, dp--;                   /* back to the first differing char */
6512 #ifdef DOS_NT
6513   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6514     return afn;
6515 #endif
6516   do                            /* look at the equal chars until '/' */
6517     fp--, dp--;
6518   while (*fp != '/');
6519
6520   /* Build a sequence of "../" strings for the resulting relative file name. */
6521   i = 0;
6522   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6523     i += 1;
6524   res = xnew (3*i + strlen (fp + 1) + 1, char);
6525   res[0] = '\0';
6526   while (i-- > 0)
6527     strcat (res, "../");
6528
6529   /* Add the file name relative to the common root of file and dir. */
6530   strcat (res, fp + 1);
6531   free (afn);
6532
6533   return res;
6534 }
6535
6536 /* Return a newly allocated string containing the absolute file name
6537    of FILE given DIR (which should end with a slash). */
6538 static char *
6539 absolute_filename (char *file, char *dir)
6540 {
6541   char *slashp, *cp, *res;
6542
6543   if (filename_is_absolute (file))
6544     res = savestr (file);
6545 #ifdef DOS_NT
6546   /* We don't support non-absolute file names with a drive
6547      letter, like `d:NAME' (it's too much hassle).  */
6548   else if (file[1] == ':')
6549     fatal ("%s: relative file names with drive letters not supported", file);
6550 #endif
6551   else
6552     res = concat (dir, file, "");
6553
6554   /* Delete the "/dirname/.." and "/." substrings. */
6555   slashp = etags_strchr (res, '/');
6556   while (slashp != NULL && slashp[0] != '\0')
6557     {
6558       if (slashp[1] == '.')
6559         {
6560           if (slashp[2] == '.'
6561               && (slashp[3] == '/' || slashp[3] == '\0'))
6562             {
6563               cp = slashp;
6564               do
6565                 cp--;
6566               while (cp >= res && !filename_is_absolute (cp));
6567               if (cp < res)
6568                 cp = slashp;    /* the absolute name begins with "/.." */
6569 #ifdef DOS_NT
6570               /* Under MSDOS and NT we get `d:/NAME' as absolute
6571                  file name, so the luser could say `d:/../NAME'.
6572                  We silently treat this as `d:/NAME'.  */
6573               else if (cp[0] != '/')
6574                 cp = slashp;
6575 #endif
6576               memmove (cp, slashp + 3, strlen (slashp + 2));
6577               slashp = cp;
6578               continue;
6579             }
6580           else if (slashp[2] == '/' || slashp[2] == '\0')
6581             {
6582               memmove (slashp, slashp + 2, strlen (slashp + 1));
6583               continue;
6584             }
6585         }
6586
6587       slashp = etags_strchr (slashp + 1, '/');
6588     }
6589
6590   if (res[0] == '\0')           /* just a safety net: should never happen */
6591     {
6592       free (res);
6593       return savestr ("/");
6594     }
6595   else
6596     return res;
6597 }
6598
6599 /* Return a newly allocated string containing the absolute
6600    file name of dir where FILE resides given DIR (which should
6601    end with a slash). */
6602 static char *
6603 absolute_dirname (char *file, char *dir)
6604 {
6605   char *slashp, *res;
6606   char save;
6607
6608   slashp = etags_strrchr (file, '/');
6609   if (slashp == NULL)
6610     return savestr (dir);
6611   save = slashp[1];
6612   slashp[1] = '\0';
6613   res = absolute_filename (file, dir);
6614   slashp[1] = save;
6615
6616   return res;
6617 }
6618
6619 /* Whether the argument string is an absolute file name.  The argument
6620    string must have been canonicalized with canonicalize_filename. */
6621 static bool
6622 filename_is_absolute (char *fn)
6623 {
6624   return (fn[0] == '/'
6625 #ifdef DOS_NT
6626           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6627 #endif
6628           );
6629 }
6630
6631 /* Downcase DOS drive letter and collapse separators into single slashes.
6632    Works in place. */
6633 static void
6634 canonicalize_filename (register char *fn)
6635 {
6636   register char* cp;
6637   char sep = '/';
6638
6639 #ifdef DOS_NT
6640   /* Canonicalize drive letter case.  */
6641 # define ISUPPER(c)     isupper (CHAR(c))
6642   if (fn[0] != '\0' && fn[1] == ':' && ISUPPER (fn[0]))
6643     fn[0] = lowcase (fn[0]);
6644
6645   sep = '\\';
6646 #endif
6647
6648   /* Collapse multiple separators into a single slash. */
6649   for (cp = fn; *cp != '\0'; cp++, fn++)
6650     if (*cp == sep)
6651       {
6652         *fn = '/';
6653         while (cp[1] == sep)
6654           cp++;
6655       }
6656     else
6657       *fn = *cp;
6658   *fn = '\0';
6659 }
6660
6661 \f
6662 /* Initialize a linebuffer for use. */
6663 static void
6664 linebuffer_init (linebuffer *lbp)
6665 {
6666   lbp->size = (DEBUG) ? 3 : 200;
6667   lbp->buffer = xnew (lbp->size, char);
6668   lbp->buffer[0] = '\0';
6669   lbp->len = 0;
6670 }
6671
6672 /* Set the minimum size of a string contained in a linebuffer. */
6673 static void
6674 linebuffer_setlen (linebuffer *lbp, int toksize)
6675 {
6676   while (lbp->size <= toksize)
6677     {
6678       lbp->size *= 2;
6679       xrnew (lbp->buffer, lbp->size, char);
6680     }
6681   lbp->len = toksize;
6682 }
6683
6684 /* Like malloc but get fatal error if memory is exhausted. */
6685 static PTR
6686 xmalloc (size_t size)
6687 {
6688   PTR result = (PTR) malloc (size);
6689   if (result == NULL)
6690     fatal ("virtual memory exhausted", (char *)NULL);
6691   return result;
6692 }
6693
6694 static PTR
6695 xrealloc (char *ptr, size_t size)
6696 {
6697   PTR result = (PTR) realloc (ptr, size);
6698   if (result == NULL)
6699     fatal ("virtual memory exhausted", (char *)NULL);
6700   return result;
6701 }
6702
6703 /*
6704  * Local Variables:
6705  * indent-tabs-mode: t
6706  * tab-width: 8
6707  * fill-column: 79
6708  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6709  * c-file-style: "gnu"
6710  * End:
6711  */
6712
6713 /* etags.c ends here */