code.delx.au - gnu-emacs/blob - lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2    Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001, 2002
   3    Free Software Foundation, Inc. and Ken Arnold
   4
   5  This file is not considered part of GNU Emacs.
   6
   7  This program is free software; you can redistribute it and/or modify
   8  it under the terms of the GNU General Public License as published by
   9  the Free Software Foundation; either version 2 of the License, or
  10  (at your option) any later version.
  11
  12  This program is distributed in the hope that it will be useful,
  13  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  GNU General Public License for more details.
  16
  17  You should have received a copy of the GNU General Public License
  18  along with this program; if not, write to the Free Software Foundation,
  19  Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
  20
  21 /*
  22  * Authors:
  23  *      Ctags originally by Ken Arnold.
  24  *      Fortran added by Jim Kleckner.
  25  *      Ed Pelegri-Llopart added C typedefs.
  26  *      Gnu Emacs TAGS format and modifications by RMS?
  27  * 1989 Sam Kendall added C++.
  28  * 1992 Joseph B. Wells improved C and C++ parsing.
  29  * 1993 Francesco Potortì reorganised C and C++.
  30  * 1994 Regexp tags by Tom Tromey.
  31  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  32  * 2002 #line directives by Francesco Potortì.
  33  *
  34  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  35  *
  36  */
  37
  38 char pot_etags_version[] = "@(#) pot revision number is 16.27";
  39
  40 #define TRUE    1
  41 #define FALSE   0
  42
  43 #ifdef DEBUG
  44 #  undef DEBUG
  45 #  define DEBUG TRUE
  46 #else
  47 #  define DEBUG  FALSE
  48 #  define NDEBUG                /* disable assert */
  49 #endif
  50
  51 #ifdef HAVE_CONFIG_H
  52 # include <config.h>
  53   /* On some systems, Emacs defines static as nothing for the sake
  54      of unexec.  We don't want that here since we don't use unexec. */
  55 # undef static
  56 # define ETAGS_REGEXPS          /* use the regexp features */
  57 # define LONG_OPTIONS           /* accept long options */
  58 # ifndef PTR                    /* for Xemacs */
  59 #   define PTR void *
  60 # endif
  61 # ifndef __P                    /* for Xemacs */
  62 #   define __P(args) args
  63 # endif
  64 #else
  65 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
  66 #   define __P(args) args       /* use prototypes */
  67 #   define PTR void *           /* for generic pointers */
  68 # else
  69 #   define __P(args) ()         /* no prototypes */
  70 #   define const                /* remove const for old compilers' sake */
  71 #   define PTR long *           /* don't use void* */
  72 # endif
  73 #endif /* !HAVE_CONFIG_H */
  74
  75 #ifndef _GNU_SOURCE
  76 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  77 #endif
  78
  79 /* WIN32_NATIVE is for Xemacs.
  80    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
  81 #ifdef WIN32_NATIVE
  82 # undef MSDOS
  83 # undef  WINDOWSNT
  84 # define WINDOWSNT
  85 #endif /* WIN32_NATIVE */
  86
  87 #ifdef MSDOS
  88 # undef MSDOS
  89 # define MSDOS TRUE
  90 # include <fcntl.h>
  91 # include <sys/param.h>
  92 # include <io.h>
  93 # ifndef HAVE_CONFIG_H
  94 #   define DOS_NT
  95 #   include <sys/config.h>
  96 # endif
  97 #else
  98 # define MSDOS FALSE
  99 #endif /* MSDOS */
 100
 101 #ifdef WINDOWSNT
 102 # include <stdlib.h>
 103 # include <fcntl.h>
 104 # include <string.h>
 105 # include <direct.h>
 106 # include <io.h>
 107 # define MAXPATHLEN _MAX_PATH
 108 # undef HAVE_NTGUI
 109 # undef  DOS_NT
 110 # define DOS_NT
 111 # ifndef HAVE_GETCWD
 112 #   define HAVE_GETCWD
 113 # endif /* undef HAVE_GETCWD */
 114 #else /* !WINDOWSNT */
 115 # ifdef STDC_HEADERS
 116 #  include <stdlib.h>
 117 #  include <string.h>
 118 # else
 119     extern char *getenv ();
 120 # endif
 121 #endif /* !WINDOWSNT */
 122
 123 #ifdef HAVE_UNISTD_H
 124 # include <unistd.h>
 125 #else
 126 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 127     extern char *getcwd (char *buf, size_t size);
 128 # endif
 129 #endif /* HAVE_UNISTD_H */
 130
 131 #include <stdio.h>
 132 #include <ctype.h>
 133 #include <errno.h>
 134 #ifndef errno
 135   extern int errno;
 136 #endif
 137 #include <sys/types.h>
 138 #include <sys/stat.h>
 139
 140 #include <assert.h>
 141 #ifdef NDEBUG
 142 # undef  assert                 /* some systems have a buggy assert.h */
 143 # define assert(x) ((void) 0)
 144 #endif
 145
 146 #if !defined (S_ISREG) && defined (S_IFREG)
 147 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 148 #endif
 149
 150 #ifdef LONG_OPTIONS
 151 # include <getopt.h>
 152 #else
 153 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 154   extern char *optarg;
 155   extern int optind, opterr;
 156 #endif /* LONG_OPTIONS */
 157
 158 #ifdef ETAGS_REGEXPS
 159 # ifndef HAVE_CONFIG_H          /* this is a standalone compilation */
 160 #   ifdef __CYGWIN__            /* compiling on Cygwin */
 161                              !!! NOTICE !!!
 162  the regex.h distributed with Cygwin is not compatible with etags, alas!
 163 If you want regular expression support, you should delete this notice and
 164               arrange to use the GNU regex.h and regex.c.
 165 #   endif
 166 # endif
 167 # include <regex.h>
 168 #endif /* ETAGS_REGEXPS */
 169
 170 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 171  Leave it undefined to make the program "etags", which makes emacs-style
 172  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 173 #ifdef CTAGS
 174 # undef  CTAGS
 175 # define CTAGS TRUE
 176 #else
 177 # define CTAGS FALSE
 178 #endif
 179
 180 /* Exit codes for success and failure.  */
 181 #ifdef VMS
 182 # define        GOOD    1
 183 # define        BAD     0
 184 #else
 185 # define        GOOD    0
 186 # define        BAD     1
 187 #endif
 188
 189 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 190 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 191
 192 #define CHARS 256               /* 2^sizeof(char) */
 193 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 194 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white */
 195 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name */
 196 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token */
 197 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token */
 198 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens */
 199
 200 #define ISALNUM(c)      isalnum (CHAR(c))
 201 #define ISALPHA(c)      isalpha (CHAR(c))
 202 #define ISDIGIT(c)      isdigit (CHAR(c))
 203 #define ISLOWER(c)      islower (CHAR(c))
 204
 205 #define lowcase(c)      tolower (CHAR(c))
 206 #define upcase(c)       toupper (CHAR(c))
 207
 208
 209 /*
 210  *      xnew, xrnew -- allocate, reallocate storage
 211  *
 212  * SYNOPSIS:    Type *xnew (int n, Type);
 213  *              void xrnew (OldPointer, int n, Type);
 214  */
 215 #if DEBUG
 216 # include "chkmalloc.h"
 217 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 218                                                   (n) * sizeof (Type)))
 219 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 220                                         (char *) (op), (n) * sizeof (Type)))
 221 #else
 222 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 223 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 224                                         (char *) (op), (n) * sizeof (Type)))
 225 #endif
 226
 227 #define bool int
 228
 229 typedef void Lang_function __P((FILE *));
 230
 231 typedef struct
 232 {
 233   char *suffix;                 /* file name suffix for this compressor */
 234   char *command;                /* takes one arg and decompresses to stdout */
 235 } compressor;
 236
 237 typedef struct
 238 {
 239   char *name;                   /* language name */
 240   bool metasource;              /* source used to generate other sources */
 241   Lang_function *function;      /* parse function */
 242   char **filenames;             /* names of this language's files */
 243   char **suffixes;              /* name suffixes of this language's files */
 244   char **interpreters;          /* interpreters for this language */
 245 } language;
 246
 247 typedef struct fdesc
 248 {
 249   struct fdesc *next;           /* for the linked list */
 250   char *infname;                /* uncompressed input file name */
 251   char *infabsname;             /* absolute uncompressed input file name */
 252   char *infabsdir;              /* absolute dir of input file */
 253   char *taggedfname;            /* file name to write in tagfile */
 254   language *lang;               /* language of file */
 255   char *prop;                   /* file properties to write in tagfile */
 256   bool usecharno;               /* etags tags shall contain char number */
 257 } fdesc;
 258
 259 typedef struct node_st
 260 {                               /* sorting structure */
 261   struct node_st *left, *right; /* left and right sons */
 262   fdesc *fdp;                   /* description of file to whom tag belongs */
 263   char *name;                   /* tag name */
 264   char *pat;                    /* search pattern */
 265   bool valid;                   /* write this tag on the tag file */
 266   bool is_func;                 /* function tag: use pattern in CTAGS mode */
 267   bool been_warned;             /* warning already given for duplicated tag */
 268   int lno;                      /* line number tag is on */
 269   long cno;                     /* character number line starts on */
 270 } node;
 271
 272 /*
 273  * A `linebuffer' is a structure which holds a line of text.
 274  * `readline_internal' reads a line from a stream into a linebuffer
 275  * and works regardless of the length of the line.
 276  * SIZE is the size of BUFFER, LEN is the length of the string in
 277  * BUFFER after readline reads it.
 278  */
 279 typedef struct
 280 {
 281   long size;
 282   int len;
 283   char *buffer;
 284 } linebuffer;
 285
 286 /* Used to support mixing of --lang and file names. */
 287 typedef struct
 288 {
 289   enum {
 290     at_language,                /* a language specification */
 291     at_regexp,                  /* a regular expression */
 292     at_filename,                /* a file name */
 293     at_stdin                    /* read from stdin here */
 294   } arg_type;                   /* argument type */
 295   language *lang;               /* language associated with the argument */
 296   char *what;                   /* the argument itself */
 297 } argument;
 298
 299 #ifdef ETAGS_REGEXPS
 300 /* Structure defining a regular expression. */
 301 typedef struct pattern
 302 {
 303   struct pattern *p_next;
 304   language *lang;
 305   char *regex;
 306   struct re_pattern_buffer *pat;
 307   struct re_registers regs;
 308   char *name_pattern;
 309   bool error_signaled;
 310   bool ignore_case;
 311   bool multi_line;
 312 } pattern;
 313 #endif /* ETAGS_REGEXPS */
 314
 315
 316 /* Many compilers barf on this:
 317         Lang_function Ada_funcs;
 318    so let's write it this way */
 319 static void Ada_funcs __P((FILE *));
 320 static void Asm_labels __P((FILE *));
 321 static void C_entries __P((int c_ext, FILE *));
 322 static void default_C_entries __P((FILE *));
 323 static void plain_C_entries __P((FILE *));
 324 static void Cjava_entries __P((FILE *));
 325 static void Cobol_paragraphs __P((FILE *));
 326 static void Cplusplus_entries __P((FILE *));
 327 static void Cstar_entries __P((FILE *));
 328 static void Erlang_functions __P((FILE *));
 329 static void Fortran_functions __P((FILE *));
 330 static void Yacc_entries __P((FILE *));
 331 static void Lisp_functions __P((FILE *));
 332 static void Makefile_targets __P((FILE *));
 333 static void Pascal_functions __P((FILE *));
 334 static void Perl_functions __P((FILE *));
 335 static void PHP_functions __P((FILE *));
 336 static void Postscript_functions __P((FILE *));
 337 static void Prolog_functions __P((FILE *));
 338 static void Python_functions __P((FILE *));
 339 static void Scheme_functions __P((FILE *));
 340 static void TeX_commands __P((FILE *));
 341 static void Texinfo_nodes __P((FILE *));
 342 static void just_read_file __P((FILE *));
 343
 344 static void print_language_names __P((void));
 345 static void print_version __P((void));
 346 static void print_help __P((void));
 347 int main __P((int, char **));
 348
 349 static compressor *get_compressor_from_suffix __P((char *, char **));
 350 static language *get_language_from_langname __P((const char *));
 351 static language *get_language_from_interpreter __P((char *));
 352 static language *get_language_from_filename __P((char *, bool));
 353 static void readline __P((linebuffer *, FILE *));
 354 static long readline_internal __P((linebuffer *, FILE *));
 355 static bool nocase_tail __P((char *));
 356 static char *get_tag __P((char *));
 357
 358 #ifdef ETAGS_REGEXPS
 359 static void analyse_regex __P((char *));
 360 static void free_patterns __P((void));
 361 static void regex_tag_multiline __P((void));
 362 #endif /* ETAGS_REGEXPS */
 363 static void error __P((const char *, const char *));
 364 static void suggest_asking_for_help __P((void));
 365 void fatal __P((char *, char *));
 366 static void pfatal __P((char *));
 367 static void add_node __P((node *, node **));
 368
 369 static void init __P((void));
 370 static void initbuffer __P((linebuffer *));
 371 static void process_file_name __P((char *, language *));
 372 static void process_file __P((FILE *, char *, language *));
 373 static void find_entries __P((FILE *));
 374 static void free_tree __P((node *));
 375 static void free_fdesc __P((fdesc *));
 376 static void pfnote __P((char *, bool, char *, int, int, long));
 377 static void new_pfnote __P((char *, int, bool, char *, int, int, long));
 378 static void invalidate_nodes __P((fdesc *, node **));
 379 static void put_entries __P((node *));
 380
 381 static char *concat __P((char *, char *, char *));
 382 static char *skip_spaces __P((char *));
 383 static char *skip_non_spaces __P((char *));
 384 static char *savenstr __P((char *, int));
 385 static char *savestr __P((char *));
 386 static char *etags_strchr __P((const char *, int));
 387 static char *etags_strrchr __P((const char *, int));
 388 static bool strcaseeq __P((const char *, const char *));
 389 static char *etags_getcwd __P((void));
 390 static char *relative_filename __P((char *, char *));
 391 static char *absolute_filename __P((char *, char *));
 392 static char *absolute_dirname __P((char *, char *));
 393 static bool filename_is_absolute __P((char *f));
 394 static void canonicalize_filename __P((char *));
 395 static void linebuffer_setlen __P((linebuffer *, int));
 396 static PTR xmalloc __P((unsigned int));
 397 static PTR xrealloc __P((char *, unsigned int));
 398
 399 \f
 400 static char searchar = '/';     /* use /.../ searches */
 401
 402 static char *tagfile;           /* output file */
 403 static char *progname;          /* name this program was invoked with */
 404 static char *cwd;               /* current working directory */
 405 static char *tagfiledir;        /* directory of tagfile */
 406 static FILE *tagf;              /* ioptr for tags file */
 407
 408 static fdesc *fdhead;           /* head of file description list */
 409 static fdesc *curfdp;           /* current file description */
 410 static int lineno;              /* line number of current line */
 411 static long charno;             /* current character number */
 412 static long linecharno;         /* charno of start of current line */
 413 static char *dbp;               /* pointer to start of current tag */
 414
 415 static const int invalidcharno = -1;
 416
 417 static node *nodehead;          /* the head of the binary tree of tags */
 418 static node *last_node;         /* the last node created */
 419
 420 static linebuffer lb;           /* the current line */
 421 static linebuffer filebuf;      /* a buffer containing the whole file */
 422
 423 /* boolean "functions" (see init)       */
 424 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 425 static char
 426   /* white chars */
 427   *white = " \f\t\n\r\v",
 428   /* not in a name */
 429   *nonam = " \f\t\n\r()=,;",
 430   /* token ending chars */
 431   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 432   /* token starting chars */
 433   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 434   /* valid in-token chars */
 435   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 436
 437 static bool append_to_tagfile;  /* -a: append to tags */
 438 /* The next four default to TRUE for etags, but to FALSE for ctags.  */
 439 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 440 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 441                                 /* 0 struct/enum/union decls, and C++ */
 442                                 /* member functions. */
 443 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 444                                 /* constants and variables. */
 445                                 /* -D: opposite of -d.  Default under ctags. */
 446 static bool globals;            /* create tags for global variables */
 447 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 448 static bool members;            /* create tags for C member variables */
 449 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 450 static bool update;             /* -u: update tags */
 451 static bool vgrind_style;       /* -v: create vgrind style index output */
 452 static bool no_warnings;        /* -w: suppress warnings */
 453 static bool cxref_style;        /* -x: create cxref style output */
 454 static bool cplusplus;          /* .[hc] means C++, not C */
 455 static bool noindentypedefs;    /* -I: ignore indentation in C */
 456 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 457
 458 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 459 static bool parsing_stdin;      /* --parse-stdin used */
 460
 461 #ifdef ETAGS_REGEXPS
 462 static pattern *p_head;         /* list of all regexps */
 463 static bool need_filebuf;       /* some regexes are multi-line */
 464 #else
 465 # define need_filebuf FALSE
 466 #endif /* ETAGS_REGEXPS */
 467
 468 #ifdef LONG_OPTIONS
 469 static struct option longopts[] =
 470 {
 471   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 472   { "c++",                no_argument,       NULL,               'C'   },
 473   { "declarations",       no_argument,       &declarations,      TRUE  },
 474   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 475   { "help",               no_argument,       NULL,               'h'   },
 476   { "help",               no_argument,       NULL,               'H'   },
 477   { "ignore-indentation", no_argument,       NULL,               'I'   },
 478   { "language",           required_argument, NULL,               'l'   },
 479   { "members",            no_argument,       &members,           TRUE  },
 480   { "no-members",         no_argument,       &members,           FALSE },
 481   { "output",             required_argument, NULL,               'o'   },
 482 #ifdef ETAGS_REGEXPS
 483   { "regex",              required_argument, NULL,               'r'   },
 484   { "no-regex",           no_argument,       NULL,               'R'   },
 485   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 486 #endif /* ETAGS_REGEXPS */
 487   { "parse-stdin",        required_argument, NULL,               STDIN },
 488   { "version",            no_argument,       NULL,               'V'   },
 489
 490 #if CTAGS /* Etags options */
 491   { "backward-search",    no_argument,       NULL,               'B'   },
 492   { "cxref",              no_argument,       NULL,               'x'   },
 493   { "defines",            no_argument,       NULL,               'd'   },
 494   { "globals",            no_argument,       &globals,           TRUE  },
 495   { "typedefs",           no_argument,       NULL,               't'   },
 496   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 497   { "update",             no_argument,       NULL,               'u'   },
 498   { "vgrind",             no_argument,       NULL,               'v'   },
 499   { "no-warn",            no_argument,       NULL,               'w'   },
 500
 501 #else /* Ctags options */
 502   { "append",             no_argument,       NULL,               'a'   },
 503   { "no-defines",         no_argument,       NULL,               'D'   },
 504   { "no-globals",         no_argument,       &globals,           FALSE },
 505   { "include",            required_argument, NULL,               'i'   },
 506 #endif
 507   { NULL }
 508 };
 509 #endif /* LONG_OPTIONS */
 510
 511 static compressor compressors[] =
 512 {
 513   { "z", "gzip -d -c"},
 514   { "Z", "gzip -d -c"},
 515   { "gz", "gzip -d -c"},
 516   { "GZ", "gzip -d -c"},
 517   { "bz2", "bzip2 -d -c" },
 518   { NULL }
 519 };
 520
 521 /*
 522  * Language stuff.
 523  */
 524
 525 /* Ada code */
 526 static char *Ada_suffixes [] =
 527   { "ads", "adb", "ada", NULL };
 528
 529 /* Assembly code */
 530 static char *Asm_suffixes [] =
 531   { "a",        /* Unix assembler */
 532     "asm", /* Microcontroller assembly */
 533     "def", /* BSO/Tasking definition includes  */
 534     "inc", /* Microcontroller include files */
 535     "ins", /* Microcontroller include files */
 536     "s", "sa", /* Unix assembler */
 537     "S",   /* cpp-processed Unix assembler */
 538     "src", /* BSO/Tasking C compiler output */
 539     NULL
 540   };
 541
 542 /* Note that .c and .h can be considered C++, if the --c++ flag was
 543    given, or if the `class' keyowrd is met inside the file.
 544    That is why default_C_entries is called for these. */
 545 static char *default_C_suffixes [] =
 546   { "c", "h", NULL };
 547
 548 static char *Cplusplus_suffixes [] =
 549   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 550     "M",                        /* Objective C++ */
 551     "pdb",                      /* Postscript with C syntax */
 552     NULL };
 553
 554 static char *Cjava_suffixes [] =
 555   { "java", NULL };
 556
 557 static char *Cobol_suffixes [] =
 558   { "COB", "cob", NULL };
 559
 560 static char *Cstar_suffixes [] =
 561   { "cs", "hs", NULL };
 562
 563 static char *Erlang_suffixes [] =
 564   { "erl", "hrl", NULL };
 565
 566 static char *Fortran_suffixes [] =
 567   { "F", "f", "f90", "for", NULL };
 568
 569 static char *Lisp_suffixes [] =
 570   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 571
 572 static char *Makefile_filenames [] =
 573   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 574
 575 static char *Pascal_suffixes [] =
 576   { "p", "pas", NULL };
 577
 578 static char *Perl_suffixes [] =
 579   { "pl", "pm", NULL };
 580
 581 static char *Perl_interpreters [] =
 582   { "perl", "@PERL@", NULL };
 583
 584 static char *PHP_suffixes [] =
 585   { "php", "php3", "php4", NULL };
 586
 587 static char *plain_C_suffixes [] =
 588   { "lm",                       /* Objective lex file */
 589     "m",                        /* Objective C file */
 590     "pc",                       /* Pro*C file */
 591      NULL };
 592
 593 static char *Postscript_suffixes [] =
 594   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 595
 596 static char *Prolog_suffixes [] =
 597   { "prolog", NULL };
 598
 599 static char *Python_suffixes [] =
 600   { "py", NULL };
 601
 602 /* Can't do the `SCM' or `scm' prefix with a version number. */
 603 static char *Scheme_suffixes [] =
 604   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 605
 606 static char *TeX_suffixes [] =
 607   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 608
 609 static char *Texinfo_suffixes [] =
 610   { "texi", "texinfo", "txi", NULL };
 611
 612 static char *Yacc_suffixes [] =
 613   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 614
 615 /*
 616  * Table of languages.
 617  *
 618  * It is ok for a given function to be listed under more than one
 619  * name.  I just didn't.
 620  */
 621
 622 static language lang_names [] =
 623 {
 624   { "ada",      FALSE, Ada_funcs,            NULL, Ada_suffixes,        NULL },
 625   { "asm",      FALSE, Asm_labels,           NULL, Asm_suffixes,        NULL },
 626   { "c",        FALSE, default_C_entries,    NULL, default_C_suffixes,  NULL },
 627   { "c++",      FALSE, Cplusplus_entries,    NULL, Cplusplus_suffixes,  NULL },
 628   { "c*",       FALSE, Cstar_entries,        NULL, Cstar_suffixes,      NULL },
 629   { "cobol",    FALSE, Cobol_paragraphs,     NULL, Cobol_suffixes,      NULL },
 630   { "erlang",   FALSE, Erlang_functions,     NULL, Erlang_suffixes,     NULL },
 631   { "fortran",  FALSE, Fortran_functions,    NULL, Fortran_suffixes,    NULL },
 632   { "java",     FALSE, Cjava_entries,        NULL, Cjava_suffixes,      NULL },
 633   { "lisp",     FALSE, Lisp_functions,       NULL, Lisp_suffixes,       NULL },
 634   { "makefile", FALSE, Makefile_targets,     Makefile_filenames, NULL,  NULL },
 635   { "pascal",   FALSE, Pascal_functions,     NULL, Pascal_suffixes,     NULL },
 636   { "perl",     FALSE, Perl_functions,NULL, Perl_suffixes, Perl_interpreters },
 637   { "php",      FALSE, PHP_functions,        NULL, PHP_suffixes,        NULL },
 638   { "postscript",FALSE, Postscript_functions,NULL, Postscript_suffixes, NULL },
 639   { "proc",     FALSE, plain_C_entries,      NULL, plain_C_suffixes,    NULL },
 640   { "prolog",   FALSE, Prolog_functions,     NULL, Prolog_suffixes,     NULL },
 641   { "python",   FALSE, Python_functions,     NULL, Python_suffixes,     NULL },
 642   { "scheme",   FALSE, Scheme_functions,     NULL, Scheme_suffixes,     NULL },
 643   { "tex",      FALSE, TeX_commands,         NULL, TeX_suffixes,        NULL },
 644   { "texinfo",  FALSE, Texinfo_nodes,        NULL, Texinfo_suffixes,    NULL },
 645   { "yacc",      TRUE, Yacc_entries,         NULL, Yacc_suffixes,       NULL },
 646   { "auto", FALSE, NULL },             /* default guessing scheme */
 647   { "none", FALSE, just_read_file },   /* regexp matching only */
 648   { NULL, FALSE, NULL }                /* end of list */
 649 };
 650
 651 \f
 652 static void
 653 print_language_names ()
 654 {
 655   language *lang;
 656   char **name, **ext;
 657
 658   puts ("\nThese are the currently supported languages, along with the\n\
 659 default file names and dot suffixes:");
 660   for (lang = lang_names; lang->name != NULL; lang++)
 661     {
 662       printf ("  %-*s", 10, lang->name);
 663       if (lang->filenames != NULL)
 664         for (name = lang->filenames; *name != NULL; name++)
 665           printf (" %s", *name);
 666       if (lang->suffixes != NULL)
 667         for (ext = lang->suffixes; *ext != NULL; ext++)
 668           printf (" .%s", *ext);
 669       puts ("");
 670     }
 671   puts ("Where `auto' means use default language for files based on file\n\
 672 name suffix, and `none' means only do regexp processing on files.\n\
 673 If no language is specified and no matching suffix is found,\n\
 674 the first line of the file is read for a sharp-bang (#!) sequence\n\
 675 followed by the name of an interpreter.  If no such sequence is found,\n\
 676 Fortran is tried first; if no tags are found, C is tried next.\n\
 677 When parsing any C file, a \"class\" keyword switches to C++.\n\
 678 Compressed files are supported using gzip and bzip2.");
 679 }
 680
 681 #ifndef EMACS_NAME
 682 # define EMACS_NAME "standalone"
 683 #endif
 684 #ifndef VERSION
 685 # define VERSION "version"
 686 #endif
 687 static void
 688 print_version ()
 689 {
 690   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 691   puts ("Copyright (C) 2002 Free Software Foundation, Inc. and Ken Arnold");
 692   puts ("This program is distributed under the same terms as Emacs");
 693
 694   exit (GOOD);
 695 }
 696
 697 static void
 698 print_help ()
 699 {
 700   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 701 \n\
 702 These are the options accepted by %s.\n", progname, progname);
 703 #ifdef LONG_OPTIONS
 704   puts ("You may use unambiguous abbreviations for the long option names.");
 705 #else
 706   puts ("Long option names do not work with this executable, as it is not\n\
 707 linked with GNU getopt.");
 708 #endif /* LONG_OPTIONS */
 709   puts ("  A - as file name means read names from stdin (one per line).\n\
 710 Absolute names are stored in the output file as they are.\n\
 711 Relative ones are stored relative to the output file's directory.\n");
 712
 713   if (!CTAGS)
 714     puts ("-a, --append\n\
 715         Append tag entries to existing tags file.");
 716
 717   puts ("--packages-only\n\
 718         For Ada files, only generate tags for packages.");
 719
 720   if (CTAGS)
 721     puts ("-B, --backward-search\n\
 722         Write the search commands for the tag entries using '?', the\n\
 723         backward-search command instead of '/', the forward-search command.");
 724
 725   /* This option is mostly obsolete, because etags can now automatically
 726      detect C++.  Retained for backward compatibility and for debugging and
 727      experimentation.  In principle, we could want to tag as C++ even
 728      before any "class" keyword.
 729   puts ("-C, --c++\n\
 730         Treat files whose name suffix defaults to C language as C++ files.");
 731   */
 732
 733   puts ("--declarations\n\
 734         In C and derived languages, create tags for function declarations,");
 735   if (CTAGS)
 736     puts ("\tand create tags for extern variables if --globals is used.");
 737   else
 738     puts
 739       ("\tand create tags for extern variables unless --no-globals is used.");
 740
 741   if (CTAGS)
 742     puts ("-d, --defines\n\
 743         Create tag entries for C #define constants and enum constants, too.");
 744   else
 745     puts ("-D, --no-defines\n\
 746         Don't create tag entries for C #define constants and enum constants.\n\
 747         This makes the tags file smaller.");
 748
 749   if (!CTAGS)
 750     puts ("-i FILE, --include=FILE\n\
 751         Include a note in tag file indicating that, when searching for\n\
 752         a tag, one should also consult the tags file FILE after\n\
 753         checking the current file.");
 754
 755   puts ("-l LANG, --language=LANG\n\
 756         Force the following files to be considered as written in the\n\
 757         named language up to the next --language=LANG option.");
 758
 759   if (CTAGS)
 760     puts ("--globals\n\
 761         Create tag entries for global variables in some languages.");
 762   else
 763     puts ("--no-globals\n\
 764         Do not create tag entries for global variables in some\n\
 765         languages.  This makes the tags file smaller.");
 766   puts ("--members\n\
 767         Create tag entries for member variables in C and derived languages.");
 768
 769 #ifdef ETAGS_REGEXPS
 770   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 771         Make a tag for each line matching the regular expression pattern\n\
 772         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 773         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 774         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 775         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 776   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 777         For example Tcl named tags can be created with:\n\
 778           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 779         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 780         `m' means to allow multi-line matches, `s' implies `m' and\n\
 781         causes dot to match the newline character as well.");
 782   puts ("-R, --no-regex\n\
 783         Don't create tags from regexps for the following files.");
 784 #endif /* ETAGS_REGEXPS */
 785   puts ("-I, --ignore-indentation\n\
 786         Don't rely on indentation quite as much as normal.  Currently,\n\
 787         this means not to assume that a closing brace in the first\n\
 788         column is the final brace of a function or structure\n\
 789         definition in C and C++.");
 790   puts ("-o FILE, --output=FILE\n\
 791         Write the tags to FILE.");
 792   puts ("--parse-stdin=NAME\n\
 793         Read from standard input and record tags as belonging to file NAME.");
 794
 795   if (CTAGS)
 796     {
 797       puts ("-t, --typedefs\n\
 798         Generate tag entries for C and Ada typedefs.");
 799       puts ("-T, --typedefs-and-c++\n\
 800         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 801         and C++ member functions.");
 802     }
 803
 804   if (CTAGS)
 805     puts ("-u, --update\n\
 806         Update the tag entries for the given files, leaving tag\n\
 807         entries for other files in place.  Currently, this is\n\
 808         implemented by deleting the existing entries for the given\n\
 809         files and then rewriting the new entries at the end of the\n\
 810         tags file.  It is often faster to simply rebuild the entire\n\
 811         tag file than to use this.");
 812
 813   if (CTAGS)
 814     {
 815       puts ("-v, --vgrind\n\
 816         Generates an index of items intended for human consumption,\n\
 817         similar to the output of vgrind.  The index is sorted, and\n\
 818         gives the page number of each item.");
 819       puts ("-w, --no-warn\n\
 820         Suppress warning messages about entries defined in multiple\n\
 821         files.");
 822       puts ("-x, --cxref\n\
 823         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
 824         The output uses line numbers instead of page numbers, but\n\
 825         beyond that the differences are cosmetic; try both to see\n\
 826         which you like.");
 827     }
 828
 829   puts ("-V, --version\n\
 830         Print the version of the program.\n\
 831 -h, --help\n\
 832         Print this help message.");
 833
 834   print_language_names ();
 835
 836   puts ("");
 837   puts ("Report bugs to bug-gnu-emacs@gnu.org");
 838
 839   exit (GOOD);
 840 }
 841
 842 \f
 843 #ifdef VMS                      /* VMS specific functions */
 844
 845 #define EOS     '\0'
 846
 847 /* This is a BUG!  ANY arbitrary limit is a BUG!
 848    Won't someone please fix this?  */
 849 #define MAX_FILE_SPEC_LEN       255
 850 typedef struct  {
 851   short   curlen;
 852   char    body[MAX_FILE_SPEC_LEN + 1];
 853 } vspec;
 854
 855 /*
 856  v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
 857  returning in each successive call the next file name matching the input
 858  spec. The function expects that each in_spec passed
 859  to it will be processed to completion; in particular, up to and
 860  including the call following that in which the last matching name
 861  is returned, the function ignores the value of in_spec, and will
 862  only start processing a new spec with the following call.
 863  If an error occurs, on return out_spec contains the value
 864  of in_spec when the error occurred.
 865
 866  With each successive file name returned in out_spec, the
 867  function's return value is one. When there are no more matching
 868  names the function returns zero. If on the first call no file
 869  matches in_spec, or there is any other error, -1 is returned.
 870 */
 871
 872 #include        <rmsdef.h>
 873 #include        <descrip.h>
 874 #define         OUTSIZE MAX_FILE_SPEC_LEN
 875 static short
 876 fn_exp (out, in)
 877      vspec *out;
 878      char *in;
 879 {
 880   static long context = 0;
 881   static struct dsc$descriptor_s o;
 882   static struct dsc$descriptor_s i;
 883   static bool pass1 = TRUE;
 884   long status;
 885   short retval;
 886
 887   if (pass1)
 888     {
 889       pass1 = FALSE;
 890       o.dsc$a_pointer = (char *) out;
 891       o.dsc$w_length = (short)OUTSIZE;
 892       i.dsc$a_pointer = in;
 893       i.dsc$w_length = (short)strlen(in);
 894       i.dsc$b_dtype = DSC$K_DTYPE_T;
 895       i.dsc$b_class = DSC$K_CLASS_S;
 896       o.dsc$b_dtype = DSC$K_DTYPE_VT;
 897       o.dsc$b_class = DSC$K_CLASS_VS;
 898     }
 899   if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
 900     {
 901       out->body[out->curlen] = EOS;
 902       return 1;
 903     }
 904   else if (status == RMS$_NMF)
 905     retval = 0;
 906   else
 907     {
 908       strcpy(out->body, in);
 909       retval = -1;
 910     }
 911   lib$find_file_end(&context);
 912   pass1 = TRUE;
 913   return retval;
 914 }
 915
 916 /*
 917   v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
 918   name of each file specified by the provided arg expanding wildcards.
 919 */
 920 static char *
 921 gfnames (arg, p_error)
 922      char *arg;
 923      bool *p_error;
 924 {
 925   static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
 926
 927   switch (fn_exp (&filename, arg))
 928     {
 929     case 1:
 930       *p_error = FALSE;
 931       return filename.body;
 932     case 0:
 933       *p_error = FALSE;
 934       return NULL;
 935     default:
 936       *p_error = TRUE;
 937       return filename.body;
 938     }
 939 }
 940
 941 #ifndef OLD  /* Newer versions of VMS do provide `system'.  */
 942 system (cmd)
 943      char *cmd;
 944 {
 945   error ("%s", "system() function not implemented under VMS");
 946 }
 947 #endif
 948
 949 #define VERSION_DELIM   ';'
 950 char *massage_name (s)
 951      char *s;
 952 {
 953   char *start = s;
 954
 955   for ( ; *s; s++)
 956     if (*s == VERSION_DELIM)
 957       {
 958         *s = EOS;
 959         break;
 960       }
 961     else
 962       *s = lowcase (*s);
 963   return start;
 964 }
 965 #endif /* VMS */
 966
 967 \f
 968 int
 969 main (argc, argv)
 970      int argc;
 971      char *argv[];
 972 {
 973   int i;
 974   unsigned int nincluded_files;
 975   char **included_files;
 976   argument *argbuffer;
 977   int current_arg, file_count;
 978   linebuffer filename_lb;
 979 #ifdef VMS
 980   bool got_err;
 981 #endif
 982  char *optstring;
 983  int opt;
 984
 985
 986 #ifdef DOS_NT
 987   _fmode = O_BINARY;   /* all of files are treated as binary files */
 988 #endif /* DOS_NT */
 989
 990   progname = argv[0];
 991   nincluded_files = 0;
 992   included_files = xnew (argc, char *);
 993   current_arg = 0;
 994   file_count = 0;
 995
 996   /* Allocate enough no matter what happens.  Overkill, but each one
 997      is small. */
 998   argbuffer = xnew (argc, argument);
 999
1000   /*
1001    * If etags, always find typedefs and structure tags.  Why not?
1002    * Also default to find macro constants, enum constants and
1003    * global variables.
1004    */
1005   if (!CTAGS)
1006     {
1007       typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1008       globals = TRUE;
1009     }
1010
1011   optstring = "-";
1012 #ifdef ETAGS_REGEXPS
1013   optstring = "-r:Rc:";
1014 #endif /* ETAGS_REGEXPS */
1015 #ifndef LONG_OPTIONS
1016   optstring = optstring + 1;
1017 #endif /* LONG_OPTIONS */
1018   optstring = concat (optstring,
1019                       "Cf:Il:o:SVhH",
1020                       (CTAGS) ? "BxdtTuvw" : "aDi:");
1021
1022   while ((opt = getopt_long (argc, argv, optstring, longopts, 0)) != EOF)
1023     switch (opt)
1024       {
1025       case 0:
1026         /* If getopt returns 0, then it has already processed a
1027            long-named option.  We should do nothing.  */
1028         break;
1029
1030       case 1:
1031         /* This means that a file name has been seen.  Record it. */
1032         argbuffer[current_arg].arg_type = at_filename;
1033         argbuffer[current_arg].what     = optarg;
1034         ++current_arg;
1035         ++file_count;
1036         break;
1037
1038       case STDIN:
1039         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1040         argbuffer[current_arg].arg_type = at_stdin;
1041         argbuffer[current_arg].what     = optarg;
1042         ++current_arg;
1043         ++file_count;
1044         if (parsing_stdin)
1045           fatal ("cannot parse standard input more than once", (char *)NULL);
1046         parsing_stdin = TRUE;
1047         break;
1048
1049         /* Common options. */
1050       case 'C': cplusplus = TRUE;               break;
1051       case 'f':         /* for compatibility with old makefiles */
1052       case 'o':
1053         if (tagfile)
1054           {
1055             error ("-o option may only be given once.", (char *)NULL);
1056             suggest_asking_for_help ();
1057           }
1058         tagfile = optarg;
1059         break;
1060       case 'I':
1061       case 'S':         /* for backward compatibility */
1062         noindentypedefs = TRUE;
1063         break;
1064       case 'l':
1065         {
1066           language *lang = get_language_from_langname (optarg);
1067           if (lang != NULL)
1068             {
1069               argbuffer[current_arg].lang = lang;
1070               argbuffer[current_arg].arg_type = at_language;
1071               ++current_arg;
1072             }
1073         }
1074         break;
1075       case 'c':
1076         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1077         optarg = concat (optarg, "i", ""); /* memory leak here */
1078         /* FALLTHRU */
1079       case 'r':
1080         argbuffer[current_arg].arg_type = at_regexp;
1081         argbuffer[current_arg].what = optarg;
1082         ++current_arg;
1083         break;
1084       case 'R':
1085         argbuffer[current_arg].arg_type = at_regexp;
1086         argbuffer[current_arg].what = NULL;
1087         ++current_arg;
1088         break;
1089       case 'V':
1090         print_version ();
1091         break;
1092       case 'h':
1093       case 'H':
1094         print_help ();
1095         break;
1096
1097         /* Etags options */
1098       case 'a': append_to_tagfile = TRUE;                       break;
1099       case 'D': constantypedefs = FALSE;                        break;
1100       case 'i': included_files[nincluded_files++] = optarg;     break;
1101
1102         /* Ctags options. */
1103       case 'B': searchar = '?';                                 break;
1104       case 'd': constantypedefs = TRUE;                         break;
1105       case 't': typedefs = TRUE;                                break;
1106       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1107       case 'u': update = TRUE;                                  break;
1108       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1109       case 'x': cxref_style = TRUE;                             break;
1110       case 'w': no_warnings = TRUE;                             break;
1111       default:
1112         suggest_asking_for_help ();
1113       }
1114
1115   for (; optind < argc; ++optind)
1116     {
1117       argbuffer[current_arg].arg_type = at_filename;
1118       argbuffer[current_arg].what = argv[optind];
1119       ++current_arg;
1120       ++file_count;
1121     }
1122
1123   if (nincluded_files == 0 && file_count == 0)
1124     {
1125       error ("no input files specified.", (char *)NULL);
1126       suggest_asking_for_help ();
1127     }
1128
1129   if (tagfile == NULL)
1130     tagfile = CTAGS ? "tags" : "TAGS";
1131   cwd = etags_getcwd ();        /* the current working directory */
1132   if (cwd[strlen (cwd) - 1] != '/')
1133     {
1134       char *oldcwd = cwd;
1135       cwd = concat (oldcwd, "/", "");
1136       free (oldcwd);
1137     }
1138   if (streq (tagfile, "-"))
1139     tagfiledir = cwd;
1140   else
1141     tagfiledir = absolute_dirname (tagfile, cwd);
1142
1143   init ();                      /* set up boolean "functions" */
1144
1145   initbuffer (&lb);
1146   initbuffer (&filename_lb);
1147   initbuffer (&filebuf);
1148
1149   if (!CTAGS)
1150     {
1151       if (streq (tagfile, "-"))
1152         {
1153           tagf = stdout;
1154 #ifdef DOS_NT
1155           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1156              doesn't take effect until after `stdout' is already open). */
1157           if (!isatty (fileno (stdout)))
1158             setmode (fileno (stdout), O_BINARY);
1159 #endif /* DOS_NT */
1160         }
1161       else
1162         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1163       if (tagf == NULL)
1164         pfatal (tagfile);
1165     }
1166
1167   /*
1168    * Loop through files finding functions.
1169    */
1170   for (i = 0; i < current_arg; ++i)
1171     {
1172       static language *lang;    /* non-NULL if language is forced */
1173       char *this_file;
1174
1175       switch (argbuffer[i].arg_type)
1176         {
1177         case at_language:
1178           lang = argbuffer[i].lang;
1179           break;
1180 #ifdef ETAGS_REGEXPS
1181         case at_regexp:
1182           analyse_regex (argbuffer[i].what);
1183           break;
1184 #endif
1185         case at_filename:
1186 #ifdef VMS
1187           while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1188             {
1189               if (got_err)
1190                 {
1191                   error ("can't find file %s\n", this_file);
1192                   argc--, argv++;
1193                 }
1194               else
1195                 {
1196                   this_file = massage_name (this_file);
1197                 }
1198 #else
1199               this_file = argbuffer[i].what;
1200 #endif
1201               /* Input file named "-" means read file names from stdin
1202                  (one per line) and use them. */
1203               if (streq (this_file, "-"))
1204                 {
1205                   if (parsing_stdin)
1206                     fatal ("cannot parse standard input AND read file names from it",
1207                            (char *)NULL);
1208                   while (readline_internal (&filename_lb, stdin) > 0)
1209                     process_file_name (filename_lb.buffer, lang);
1210                 }
1211               else
1212                 process_file_name (this_file, lang);
1213 #ifdef VMS
1214             }
1215 #endif
1216           break;
1217         case at_stdin:
1218           this_file = argbuffer[i].what;
1219           process_file (stdin, this_file, lang);
1220           break;
1221         }
1222     }
1223
1224 #ifdef ETAGS_REGEXPS
1225   free_patterns ();
1226 #endif /* ETAGS_REGEXPS */
1227   free (filebuf.buffer);
1228
1229   if (!CTAGS || cxref_style)
1230     {
1231       put_entries (nodehead);   /* write the remainig tags (ETAGS) */
1232       free_tree (nodehead);
1233       nodehead = NULL;
1234       if (!CTAGS)
1235         while (nincluded_files-- > 0)
1236           fprintf (tagf, "\f\n%s,include\n", *included_files++);
1237
1238       if (fclose (tagf) == EOF)
1239         pfatal (tagfile);
1240       exit (GOOD);
1241     }
1242
1243   if (update)
1244     {
1245       char cmd[BUFSIZ];
1246       for (i = 0; i < current_arg; ++i)
1247         {
1248           switch (argbuffer[i].arg_type)
1249             {
1250             case at_filename:
1251             case at_stdin:
1252               break;
1253             default:
1254               continue;         /* the for loop */
1255             }
1256           sprintf (cmd,
1257                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1258                    tagfile, argbuffer[i].what, tagfile);
1259           if (system (cmd) != GOOD)
1260             fatal ("failed to execute shell command", (char *)NULL);
1261         }
1262       append_to_tagfile = TRUE;
1263     }
1264
1265   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1266   if (tagf == NULL)
1267     pfatal (tagfile);
1268   put_entries (nodehead);       /* write all the tags (CTAGS) */
1269   free_tree (nodehead);
1270   nodehead = NULL;
1271   if (fclose (tagf) == EOF)
1272     pfatal (tagfile);
1273
1274   if (update)
1275     {
1276       char cmd[2*BUFSIZ+10];
1277       sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1278       exit (system (cmd));
1279     }
1280   return GOOD;
1281 }
1282
1283
1284 /*
1285  * Return a compressor given the file name.  If EXTPTR is non-zero,
1286  * return a pointer into FILE where the compressor-specific
1287  * extension begins.  If no compressor is found, NULL is returned
1288  * and EXTPTR is not significant.
1289  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1290  */
1291 static compressor *
1292 get_compressor_from_suffix (file, extptr)
1293      char *file;
1294      char **extptr;
1295 {
1296   compressor *compr;
1297   char *slash, *suffix;
1298
1299   /* This relies on FN to be after canonicalize_filename,
1300      so we don't need to consider backslashes on DOS_NT.  */
1301   slash = etags_strrchr (file, '/');
1302   suffix = etags_strrchr (file, '.');
1303   if (suffix == NULL || suffix < slash)
1304     return NULL;
1305   if (extptr != NULL)
1306     *extptr = suffix;
1307   suffix += 1;
1308   /* Let those poor souls who live with DOS 8+3 file name limits get
1309      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1310      Only the first do loop is run if not MSDOS */
1311   do
1312     {
1313       for (compr = compressors; compr->suffix != NULL; compr++)
1314         if (streq (compr->suffix, suffix))
1315           return compr;
1316       if (!MSDOS)
1317         break;                  /* do it only once: not really a loop */
1318       if (extptr != NULL)
1319         *extptr = ++suffix;
1320     } while (*suffix != '\0');
1321   return NULL;
1322 }
1323
1324
1325
1326 /*
1327  * Return a language given the name.
1328  */
1329 static language *
1330 get_language_from_langname (name)
1331      const char *name;
1332 {
1333   language *lang;
1334
1335   if (name == NULL)
1336     error ("empty language name", (char *)NULL);
1337   else
1338     {
1339       for (lang = lang_names; lang->name != NULL; lang++)
1340         if (streq (name, lang->name))
1341           return lang;
1342       error ("unknown language \"%s\"", name);
1343     }
1344
1345   return NULL;
1346 }
1347
1348
1349 /*
1350  * Return a language given the interpreter name.
1351  */
1352 static language *
1353 get_language_from_interpreter (interpreter)
1354      char *interpreter;
1355 {
1356   language *lang;
1357   char **iname;
1358
1359   if (interpreter == NULL)
1360     return NULL;
1361   for (lang = lang_names; lang->name != NULL; lang++)
1362     if (lang->interpreters != NULL)
1363       for (iname = lang->interpreters; *iname != NULL; iname++)
1364         if (streq (*iname, interpreter))
1365             return lang;
1366
1367   return NULL;
1368 }
1369
1370
1371
1372 /*
1373  * Return a language given the file name.
1374  */
1375 static language *
1376 get_language_from_filename (file, case_sensitive)
1377      char *file;
1378      bool case_sensitive;
1379 {
1380   language *lang;
1381   char **name, **ext, *suffix;
1382
1383   /* Try whole file name first. */
1384   for (lang = lang_names; lang->name != NULL; lang++)
1385     if (lang->filenames != NULL)
1386       for (name = lang->filenames; *name != NULL; name++)
1387         if ((case_sensitive)
1388             ? streq (*name, file)
1389             : strcaseeq (*name, file))
1390           return lang;
1391
1392   /* If not found, try suffix after last dot. */
1393   suffix = etags_strrchr (file, '.');
1394   if (suffix == NULL)
1395     return NULL;
1396   suffix += 1;
1397   for (lang = lang_names; lang->name != NULL; lang++)
1398     if (lang->suffixes != NULL)
1399       for (ext = lang->suffixes; *ext != NULL; ext++)
1400         if ((case_sensitive)
1401             ? streq (*ext, suffix)
1402             : strcaseeq (*ext, suffix))
1403           return lang;
1404   return NULL;
1405 }
1406
1407 \f
1408 /*
1409  * This routine is called on each file argument.
1410  */
1411 static void
1412 process_file_name (file, lang)
1413      char *file;
1414      language *lang;
1415 {
1416   struct stat stat_buf;
1417   FILE *inf;
1418   fdesc *fdp;
1419   compressor *compr;
1420   char *compressed_name, *uncompressed_name;
1421   char *ext, *real_name;
1422   int retval;
1423
1424   canonicalize_filename (file);
1425   if (streq (file, tagfile) && !streq (tagfile, "-"))
1426     {
1427       error ("skipping inclusion of %s in self.", file);
1428       return;
1429     }
1430   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1431     {
1432       compressed_name = NULL;
1433       real_name = uncompressed_name = savestr (file);
1434     }
1435   else
1436     {
1437       real_name = compressed_name = savestr (file);
1438       uncompressed_name = savenstr (file, ext - file);
1439     }
1440
1441   /* If the canonicalized uncompressed name
1442      has already been dealt with, skip it silently. */
1443   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1444     {
1445       assert (fdp->infname != NULL);
1446       if (streq (uncompressed_name, fdp->infname))
1447         goto cleanup;
1448     }
1449
1450   if (stat (real_name, &stat_buf) != 0)
1451     {
1452       /* Reset real_name and try with a different name. */
1453       real_name = NULL;
1454       if (compressed_name != NULL) /* try with the given suffix */
1455         {
1456           if (stat (uncompressed_name, &stat_buf) == 0)
1457             real_name = uncompressed_name;
1458         }
1459       else                      /* try all possible suffixes */
1460         {
1461           for (compr = compressors; compr->suffix != NULL; compr++)
1462             {
1463               compressed_name = concat (file, ".", compr->suffix);
1464               if (stat (compressed_name, &stat_buf) != 0)
1465                 {
1466                   if (MSDOS)
1467                     {
1468                       char *suf = compressed_name + strlen (file);
1469                       size_t suflen = strlen (compr->suffix) + 1;
1470                       for ( ; suf[1]; suf++, suflen--)
1471                         {
1472                           memmove (suf, suf + 1, suflen);
1473                           if (stat (compressed_name, &stat_buf) == 0)
1474                             {
1475                               real_name = compressed_name;
1476                               break;
1477                             }
1478                         }
1479                       if (real_name != NULL)
1480                         break;
1481                     } /* MSDOS */
1482                   free (compressed_name);
1483                   compressed_name = NULL;
1484                 }
1485               else
1486                 {
1487                   real_name = compressed_name;
1488                   break;
1489                 }
1490             }
1491         }
1492       if (real_name == NULL)
1493         {
1494           perror (file);
1495           goto cleanup;
1496         }
1497     } /* try with a different name */
1498
1499   if (!S_ISREG (stat_buf.st_mode))
1500     {
1501       error ("skipping %s: it is not a regular file.", real_name);
1502       goto cleanup;
1503     }
1504   if (real_name == compressed_name)
1505     {
1506       char *cmd = concat (compr->command, " ", real_name);
1507       inf = (FILE *) popen (cmd, "r");
1508       free (cmd);
1509     }
1510   else
1511     inf = fopen (real_name, "r");
1512   if (inf == NULL)
1513     {
1514       perror (real_name);
1515       goto cleanup;
1516     }
1517
1518   process_file (inf, uncompressed_name, lang);
1519
1520   if (real_name == compressed_name)
1521     retval = pclose (inf);
1522   else
1523     retval = fclose (inf);
1524   if (retval < 0)
1525     pfatal (file);
1526
1527  cleanup:
1528   if (compressed_name) free (compressed_name);
1529   if (uncompressed_name) free (uncompressed_name);
1530   last_node = NULL;
1531   curfdp = NULL;
1532   return;
1533 }
1534
1535 static void
1536 process_file (fh, fn, lang)
1537      FILE *fh;
1538      char *fn;
1539      language *lang;
1540 {
1541   static const fdesc emptyfdesc;
1542   fdesc *fdp;
1543
1544   /* Create a new input file description entry. */
1545   fdp = xnew (1, fdesc);
1546   *fdp = emptyfdesc;
1547   fdp->next = fdhead;
1548   fdp->infname = savestr (fn);
1549   fdp->lang = lang;
1550   fdp->infabsname = absolute_filename (fn, cwd);
1551   fdp->infabsdir = absolute_dirname (fn, cwd);
1552   if (filename_is_absolute (fn))
1553     {
1554       /* An absolute file name.  Canonicalize it. */
1555       fdp->taggedfname = absolute_filename (fn, NULL);
1556     }
1557   else
1558     {
1559       /* A file name relative to cwd.  Make it relative
1560          to the directory of the tags file. */
1561       fdp->taggedfname = relative_filename (fn, tagfiledir);
1562     }
1563   fdp->usecharno = TRUE;        /* use char position when making tags */
1564   fdp->prop = NULL;
1565
1566   fdhead = fdp;
1567   curfdp = fdhead;              /* the current file description */
1568
1569   find_entries (fh);
1570
1571   /* If not Ctags, and if this is not metasource and if it contained no #line
1572      directives, we can write the tags and free all nodes pointing to
1573      curfdp. */
1574   if (!CTAGS
1575       && curfdp->usecharno      /* no #line directives in this file */
1576       && !curfdp->lang->metasource)
1577     {
1578       node *np, *prev;
1579
1580       /* Look for the head of the sublist relative to this file.  See add_node
1581          for the structure of the node tree. */
1582       prev = NULL;
1583       for (np = nodehead; np != NULL; prev = np, np = np->left)
1584         if (np->fdp == curfdp)
1585           break;
1586
1587       /* If we generated tags for this file, write and delete them. */
1588       if (np != NULL)
1589         {
1590           /* This is the head of the last sublist, if any.  The following
1591              instructions depend on this being true. */
1592           assert (np->left == NULL);
1593
1594           assert (fdhead == curfdp);
1595           assert (last_node->fdp == curfdp);
1596           put_entries (np);     /* write tags for file curfdp->taggedfname */
1597           free_tree (np);       /* remove the written nodes */
1598           if (prev == NULL)
1599             nodehead = NULL;    /* no nodes left */
1600           else
1601             prev->left = NULL;  /* delete the pointer to the sublist */
1602         }
1603     }
1604 }
1605
1606 /*
1607  * This routine sets up the boolean pseudo-functions which work
1608  * by setting boolean flags dependent upon the corresponding character.
1609  * Every char which is NOT in that string is not a white char.  Therefore,
1610  * all of the array "_wht" is set to FALSE, and then the elements
1611  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1612  * of a char is TRUE if it is the string "white", else FALSE.
1613  */
1614 static void
1615 init ()
1616 {
1617   register char *sp;
1618   register int i;
1619
1620   for (i = 0; i < CHARS; i++)
1621     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1622   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1623   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1624   notinname('\0') = notinname('\n');
1625   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1626   begtoken('\0') = begtoken('\n');
1627   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1628   intoken('\0') = intoken('\n');
1629   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1630   endtoken('\0') = endtoken('\n');
1631 }
1632
1633 /*
1634  * This routine opens the specified file and calls the function
1635  * which finds the function and type definitions.
1636  */
1637 static void
1638 find_entries (inf)
1639      FILE *inf;
1640 {
1641   char *cp;
1642   language *lang = curfdp->lang;
1643   Lang_function *parser = NULL;
1644
1645   /* If user specified a language, use it. */
1646   if (lang != NULL && lang->function != NULL)
1647     {
1648       parser = lang->function;
1649     }
1650
1651   /* Else try to guess the language given the file name. */
1652   if (parser == NULL)
1653     {
1654       lang = get_language_from_filename (curfdp->infname, TRUE);
1655       if (lang != NULL && lang->function != NULL)
1656         {
1657           curfdp->lang = lang;
1658           parser = lang->function;
1659         }
1660     }
1661
1662   /* Else look for sharp-bang as the first two characters. */
1663   if (parser == NULL
1664       && readline_internal (&lb, inf) > 0
1665       && lb.len >= 2
1666       && lb.buffer[0] == '#'
1667       && lb.buffer[1] == '!')
1668     {
1669       char *lp;
1670
1671       /* Set lp to point at the first char after the last slash in the
1672          line or, if no slashes, at the first nonblank.  Then set cp to
1673          the first successive blank and terminate the string. */
1674       lp = etags_strrchr (lb.buffer+2, '/');
1675       if (lp != NULL)
1676         lp += 1;
1677       else
1678         lp = skip_spaces (lb.buffer + 2);
1679       cp = skip_non_spaces (lp);
1680       *cp = '\0';
1681
1682       if (strlen (lp) > 0)
1683         {
1684           lang = get_language_from_interpreter (lp);
1685           if (lang != NULL && lang->function != NULL)
1686             {
1687               curfdp->lang = lang;
1688               parser = lang->function;
1689             }
1690         }
1691     }
1692
1693   /* We rewind here, even if inf may be a pipe.  We fail if the
1694      length of the first line is longer than the pipe block size,
1695      which is unlikely. */
1696   rewind (inf);
1697
1698   /* Else try to guess the language given the case insensitive file name. */
1699   if (parser == NULL)
1700     {
1701       lang = get_language_from_filename (curfdp->infname, FALSE);
1702       if (lang != NULL && lang->function != NULL)
1703         {
1704           curfdp->lang = lang;
1705           parser = lang->function;
1706         }
1707     }
1708
1709   /* Else try Fortran or C. */
1710   if (parser == NULL)
1711     {
1712       node *old_last_node = last_node;
1713
1714       curfdp->lang = get_language_from_langname ("fortran");
1715       find_entries (inf);
1716
1717       if (old_last_node == last_node)
1718         /* No Fortran entries found.  Try C. */
1719         {
1720           /* We do not tag if rewind fails.
1721              Only the file name will be recorded in the tags file. */
1722           rewind (inf);
1723           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1724           find_entries (inf);
1725         }
1726       return;
1727     }
1728
1729   if (!no_line_directive
1730       && curfdp->lang != NULL && curfdp->lang->metasource)
1731     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1732        file, or anyway we parsed a file that is automatically generated from
1733        this one.  If this is the case, the bingo.c file contained #line
1734        directives that generated tags pointing to this file.  Let's delete
1735        them all before parsing this file, which is the real source. */
1736     {
1737       fdesc **fdpp = &fdhead;
1738       while (*fdpp != NULL)
1739         if (*fdpp != curfdp
1740             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1741           /* We found one of those!  We must delete both the file description
1742              and all tags referring to it. */
1743           {
1744             fdesc *badfdp = *fdpp;
1745
1746             if (DEBUG)
1747               fprintf (stderr,
1748                        "Removing references to \"%s\" obtained from \"%s\"\n",
1749                        badfdp->taggedfname, badfdp->infname);
1750
1751             /* Delete the tags referring to badfdp. */
1752             invalidate_nodes (badfdp, &nodehead);
1753
1754             *fdpp = badfdp->next; /* remove the bad description from the list */
1755             free_fdesc (badfdp);
1756           }
1757         else
1758           fdpp = &(*fdpp)->next; /* advance the list pointer */
1759     }
1760
1761   assert (parser != NULL);
1762
1763   /* Generic initialisations before reading from file. */
1764   filebuf.len = 0;              /* reset the file buffer */
1765
1766   /* Generic initialisations before parsing file with readline. */
1767   lineno = 0;                  /* reset global line number */
1768   charno = 0;                  /* reset global char number */
1769   linecharno = 0;              /* reset global char number of line start */
1770
1771   parser (inf);
1772
1773 #ifdef ETAGS_REGEXPS
1774   regex_tag_multiline ();
1775 #endif /* ETAGS_REGEXPS */
1776 }
1777
1778 \f
1779 /* Record a tag. */
1780 static void
1781 pfnote (name, is_func, linestart, linelen, lno, cno)
1782      char *name;                /* tag name, or NULL if unnamed */
1783      bool is_func;              /* tag is a function */
1784      char *linestart;           /* start of the line where tag is */
1785      int linelen;               /* length of the line where tag is */
1786      int lno;                   /* line number */
1787      long cno;                  /* character number */
1788 {
1789   register node *np;
1790
1791   if (CTAGS && name == NULL)
1792     return;
1793
1794   np = xnew (1, node);
1795
1796   /* If ctags mode, change name "main" to M<thisfilename>. */
1797   if (CTAGS && !cxref_style && streq (name, "main"))
1798     {
1799       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1800       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1801       fp = etags_strrchr (np->name, '.');
1802       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1803         fp[0] = '\0';
1804     }
1805   else
1806     np->name = name;
1807   np->valid = TRUE;
1808   np->been_warned = FALSE;
1809   np->fdp = curfdp;
1810   np->is_func = is_func;
1811   np->lno = lno;
1812   if (np->fdp->usecharno)
1813     /* Our char numbers are 0-base, because of C language tradition?
1814        ctags compatibility?  old versions compatibility?   I don't know.
1815        Anyway, since emacs's are 1-base we expect etags.el to take care
1816        of the difference.  If we wanted to have 1-based numbers, we would
1817        uncomment the +1 below. */
1818     np->cno = cno /* + 1 */ ;
1819   else
1820     np->cno = invalidcharno;
1821   np->left = np->right = NULL;
1822   if (CTAGS && !cxref_style)
1823     {
1824       if (strlen (linestart) < 50)
1825         np->pat = concat (linestart, "$", "");
1826       else
1827         np->pat = savenstr (linestart, 50);
1828     }
1829   else
1830     np->pat = savenstr (linestart, linelen);
1831
1832   add_node (np, &nodehead);
1833 }
1834
1835 /*
1836  * TAGS format specification
1837  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1838  *
1839  * pfnote should emit the optimized form [unnamed tag] only if:
1840  *  1. name does not contain any of the characters " \t\r\n(),;";
1841  *  2. linestart contains name as either a rightmost, or rightmost but
1842  *     one character, substring;
1843  *  3. the character, if any, immediately before name in linestart must
1844  *     be one of the characters " \t(),;";
1845  *  4. the character, if any, immediately after name in linestart must
1846  *     also be one of the characters " \t(),;".
1847  *
1848  * The real implementation uses the notinname() macro, which recognises
1849  * characters slightly different from " \t\r\n(),;".  See the variable
1850  * `nonam'.
1851  */
1852 #define traditional_tag_style TRUE
1853 static void
1854 new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
1855      char *name;                /* tag name, or NULL if unnamed */
1856      int namelen;               /* tag length */
1857      bool is_func;              /* tag is a function */
1858      char *linestart;           /* start of the line where tag is */
1859      int linelen;               /* length of the line where tag is */
1860      int lno;                   /* line number */
1861      long cno;                  /* character number */
1862 {
1863   register char *cp;
1864   bool named;
1865
1866   named = TRUE;
1867   if (!CTAGS)
1868     {
1869       for (cp = name; !notinname (*cp); cp++)
1870         continue;
1871       if (*cp == '\0')                          /* rule #1 */
1872         {
1873           cp = linestart + linelen - namelen;
1874           if (notinname (linestart[linelen-1]))
1875             cp -= 1;                            /* rule #4 */
1876           if (cp >= linestart                   /* rule #2 */
1877               && (cp == linestart
1878                   || notinname (cp[-1]))        /* rule #3 */
1879               && strneq (name, cp, namelen))    /* rule #2 */
1880             named = FALSE;      /* use unnamed tag */
1881         }
1882     }
1883
1884   if (named)
1885     name = savenstr (name, namelen);
1886   else
1887     name = NULL;
1888   pfnote (name, is_func, linestart, linelen, lno, cno);
1889 }
1890
1891 /*
1892  * free_tree ()
1893  *      recurse on left children, iterate on right children.
1894  */
1895 static void
1896 free_tree (np)
1897      register node *np;
1898 {
1899   while (np)
1900     {
1901       register node *node_right = np->right;
1902       free_tree (np->left);
1903       if (np->name != NULL)
1904         free (np->name);
1905       free (np->pat);
1906       free (np);
1907       np = node_right;
1908     }
1909 }
1910
1911 /*
1912  * free_fdesc ()
1913  *      delete a file description
1914  */
1915 static void
1916 free_fdesc (fdp)
1917      register fdesc *fdp;
1918 {
1919   if (fdp->infname != NULL) free (fdp->infname);
1920   if (fdp->infabsname != NULL) free (fdp->infabsname);
1921   if (fdp->infabsdir != NULL) free (fdp->infabsdir);
1922   if (fdp->taggedfname != NULL) free (fdp->taggedfname);
1923   if (fdp->prop != NULL) free (fdp->prop);
1924   free (fdp);
1925 }
1926
1927 /*
1928  * add_node ()
1929  *      Adds a node to the tree of nodes.  In etags mode, sort by file
1930  *      name.  In ctags mode, sort by tag name.  Make no attempt at
1931  *      balancing.
1932  *
1933  *      add_node is the only function allowed to add nodes, so it can
1934  *      maintain state.
1935  */
1936 static void
1937 add_node (np, cur_node_p)
1938      node *np, **cur_node_p;
1939 {
1940   register int dif;
1941   register node *cur_node = *cur_node_p;
1942
1943   if (cur_node == NULL)
1944     {
1945       *cur_node_p = np;
1946       last_node = np;
1947       return;
1948     }
1949
1950   if (!CTAGS)
1951     /* Etags Mode */
1952     {
1953       /* For each file name, tags are in a linked sublist on the right
1954          pointer.  The first tags of different files are a linked list
1955          on the left pointer.  last_node points to the end of the last
1956          used sublist. */
1957       if (last_node != NULL && last_node->fdp == np->fdp)
1958         {
1959           /* Let's use the same sublist as the last added node. */
1960           assert (last_node->right == NULL);
1961           last_node->right = np;
1962           last_node = np;
1963         }
1964       else if (cur_node->fdp == np->fdp)
1965         {
1966           /* Scanning the list we found the head of a sublist which is
1967              good for us.  Let's scan this sublist. */
1968           add_node (np, &cur_node->right);
1969         }
1970       else
1971         /* The head of this sublist is not good for us.  Let's try the
1972            next one. */
1973         add_node (np, &cur_node->left);
1974     } /* if ETAGS mode */
1975
1976   else
1977     {
1978       /* Ctags Mode */
1979       dif = strcmp (np->name, cur_node->name);
1980
1981       /*
1982        * If this tag name matches an existing one, then
1983        * do not add the node, but maybe print a warning.
1984        */
1985       if (!dif)
1986         {
1987           if (np->fdp == cur_node->fdp)
1988             {
1989               if (!no_warnings)
1990                 {
1991                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1992                            np->fdp->infname, lineno, np->name);
1993                   fprintf (stderr, "Second entry ignored\n");
1994                 }
1995             }
1996           else if (!cur_node->been_warned && !no_warnings)
1997             {
1998               fprintf
1999                 (stderr,
2000                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2001                  np->fdp->infname, cur_node->fdp->infname, np->name);
2002               cur_node->been_warned = TRUE;
2003             }
2004           return;
2005         }
2006
2007       /* Actually add the node */
2008       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2009     } /* if CTAGS mode */
2010 }
2011
2012 /*
2013  * invalidate_nodes ()
2014  *      Scan the node tree and invalidate all nodes pointing to the
2015  *      given file description (CTAGS case) or free them (ETAGS case).
2016  */
2017 static void
2018 invalidate_nodes (badfdp, npp)
2019      fdesc *badfdp;
2020      node **npp;
2021 {
2022   node *np = *npp;
2023
2024   if (np == NULL)
2025     return;
2026
2027   if (CTAGS)
2028     {
2029       if (np->left != NULL)
2030         invalidate_nodes (badfdp, &np->left);
2031       if (np->fdp == badfdp)
2032         np->valid = FALSE;
2033       if (np->right != NULL)
2034         invalidate_nodes (badfdp, &np->right);
2035     }
2036   else
2037     {
2038       assert (np->fdp != NULL);
2039       if (np->fdp == badfdp)
2040         {
2041           *npp = np->left;      /* detach the sublist from the list */
2042           np->left = NULL;      /* isolate it */
2043           free_tree (np);       /* free it */
2044           invalidate_nodes (badfdp, npp);
2045         }
2046       else
2047         invalidate_nodes (badfdp, &np->left);
2048     }
2049 }
2050
2051 \f
2052 static int total_size_of_entries __P((node *));
2053 static int number_len __P((long));
2054
2055 /* Length of a non-negative number's decimal representation. */
2056 static int
2057 number_len (num)
2058      long num;
2059 {
2060   int len = 1;
2061   while ((num /= 10) > 0)
2062     len += 1;
2063   return len;
2064 }
2065
2066 /*
2067  * Return total number of characters that put_entries will output for
2068  * the nodes in the linked list at the right of the specified node.
2069  * This count is irrelevant with etags.el since emacs 19.34 at least,
2070  * but is still supplied for backward compatibility.
2071  */
2072 static int
2073 total_size_of_entries (np)
2074      register node *np;
2075 {
2076   register int total = 0;
2077
2078   for (; np != NULL; np = np->right)
2079     {
2080       total += strlen (np->pat) + 1;            /* pat\177 */
2081       if (np->name != NULL)
2082         total += strlen (np->name) + 1;         /* name\001 */
2083       total += number_len ((long) np->lno) + 1; /* lno, */
2084       if (np->cno != invalidcharno)             /* cno */
2085         total += number_len (np->cno);
2086       total += 1;                               /* newline */
2087     }
2088
2089   return total;
2090 }
2091
2092 static void
2093 put_entries (np)
2094      register node *np;
2095 {
2096   register char *sp;
2097   static fdesc *fdp = NULL;
2098
2099   if (np == NULL)
2100     return;
2101
2102   /* Output subentries that precede this one */
2103   if (CTAGS)
2104     put_entries (np->left);
2105
2106   /* Output this entry */
2107   if (np->valid)
2108     {
2109       if (!CTAGS)
2110         {
2111           /* Etags mode */
2112           if (fdp != np->fdp)
2113             {
2114               fdp = np->fdp;
2115               fprintf (tagf, "\f\n%s,%d\n",
2116                        fdp->taggedfname, total_size_of_entries (np));
2117             }
2118           fputs (np->pat, tagf);
2119           fputc ('\177', tagf);
2120           if (np->name != NULL)
2121             {
2122               fputs (np->name, tagf);
2123               fputc ('\001', tagf);
2124             }
2125           fprintf (tagf, "%d,", np->lno);
2126           if (np->cno != invalidcharno)
2127             fprintf (tagf, "%ld", np->cno);
2128           fputs ("\n", tagf);
2129         }
2130       else
2131         {
2132           /* Ctags mode */
2133           if (np->name == NULL)
2134             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2135
2136           if (cxref_style)
2137             {
2138               if (vgrind_style)
2139                 fprintf (stdout, "%s %s %d\n",
2140                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2141               else
2142                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2143                          np->name, np->lno, np->fdp->taggedfname, np->pat);
2144             }
2145           else
2146             {
2147               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2148
2149               if (np->is_func)
2150                 {               /* function or #define macro with args */
2151                   putc (searchar, tagf);
2152                   putc ('^', tagf);
2153
2154                   for (sp = np->pat; *sp; sp++)
2155                     {
2156                       if (*sp == '\\' || *sp == searchar)
2157                         putc ('\\', tagf);
2158                       putc (*sp, tagf);
2159                     }
2160                   putc (searchar, tagf);
2161                 }
2162               else
2163                 {               /* anything else; text pattern inadequate */
2164                   fprintf (tagf, "%d", np->lno);
2165                 }
2166               putc ('\n', tagf);
2167             }
2168         }
2169     } /* if this node contains a valid tag */
2170
2171   /* Output subentries that follow this one */
2172   put_entries (np->right);
2173   if (!CTAGS)
2174     put_entries (np->left);
2175 }
2176
2177 \f
2178 /* C extensions. */
2179 #define C_EXT   0x00fff         /* C extensions */
2180 #define C_PLAIN 0x00000         /* C */
2181 #define C_PLPL  0x00001         /* C++ */
2182 #define C_STAR  0x00003         /* C* */
2183 #define C_JAVA  0x00005         /* JAVA */
2184 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2185 #define YACC    0x10000         /* yacc file */
2186
2187 /*
2188  * The C symbol tables.
2189  */
2190 enum sym_type
2191 {
2192   st_none,
2193   st_C_objprot, st_C_objimpl, st_C_objend,
2194   st_C_gnumacro,
2195   st_C_ignore,
2196   st_C_javastruct,
2197   st_C_operator,
2198   st_C_class, st_C_template,
2199   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
2200 };
2201
2202 static unsigned int hash __P((const char *, unsigned int));
2203 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2204 static enum sym_type C_symtype __P((char *, int, int));
2205
2206 /* Feed stuff between (but not including) %[ and %] lines to:
2207       gperf -c -k 1,3 -o -p -r -t
2208 %[
2209 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2210 %%
2211 if,             0,      st_C_ignore
2212 for,            0,      st_C_ignore
2213 while,          0,      st_C_ignore
2214 switch,         0,      st_C_ignore
2215 return,         0,      st_C_ignore
2216 @interface,     0,      st_C_objprot
2217 @protocol,      0,      st_C_objprot
2218 @implementation,0,      st_C_objimpl
2219 @end,           0,      st_C_objend
2220 import,         C_JAVA, st_C_ignore
2221 package,        C_JAVA, st_C_ignore
2222 friend,         C_PLPL, st_C_ignore
2223 extends,        C_JAVA, st_C_javastruct
2224 implements,     C_JAVA, st_C_javastruct
2225 interface,      C_JAVA, st_C_struct
2226 class,          0,      st_C_class
2227 namespace,      C_PLPL, st_C_struct
2228 domain,         C_STAR, st_C_struct
2229 union,          0,      st_C_struct
2230 struct,         0,      st_C_struct
2231 extern,         0,      st_C_extern
2232 enum,           0,      st_C_enum
2233 typedef,        0,      st_C_typedef
2234 define,         0,      st_C_define
2235 operator,       C_PLPL, st_C_operator
2236 template,       0,      st_C_template
2237 bool,           C_PLPL, st_C_typespec
2238 long,           0,      st_C_typespec
2239 short,          0,      st_C_typespec
2240 int,            0,      st_C_typespec
2241 char,           0,      st_C_typespec
2242 float,          0,      st_C_typespec
2243 double,         0,      st_C_typespec
2244 signed,         0,      st_C_typespec
2245 unsigned,       0,      st_C_typespec
2246 auto,           0,      st_C_typespec
2247 void,           0,      st_C_typespec
2248 static,         0,      st_C_typespec
2249 const,          0,      st_C_typespec
2250 volatile,       0,      st_C_typespec
2251 explicit,       C_PLPL, st_C_typespec
2252 mutable,        C_PLPL, st_C_typespec
2253 typename,       C_PLPL, st_C_typespec
2254 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2255 DEFUN,          0,      st_C_gnumacro
2256 SYSCALL,        0,      st_C_gnumacro
2257 ENTRY,          0,      st_C_gnumacro
2258 PSEUDO,         0,      st_C_gnumacro
2259 # These are defined inside C functions, so currently they are not met.
2260 # EXFUN used in glibc, DEFVAR_* in emacs.
2261 #EXFUN,         0,      st_C_gnumacro
2262 #DEFVAR_,       0,      st_C_gnumacro
2263 %]
2264 and replace lines between %< and %> with its output,
2265 then make in_word_set and C_stab_entry static. */
2266 /*%<*/
2267 /* C code produced by gperf version 2.7.1 (19981006 egcs) */
2268 /* Command-line: gperf -c -k 1,3 -o -p -r -t  */
2269 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2270
2271 #define TOTAL_KEYWORDS 47
2272 #define MIN_WORD_LENGTH 2
2273 #define MAX_WORD_LENGTH 15
2274 #define MIN_HASH_VALUE 18
2275 #define MAX_HASH_VALUE 138
2276 /* maximum key range = 121, duplicates = 0 */
2277
2278 #ifdef __GNUC__
2279 __inline
2280 #endif
2281 static unsigned int
2282 hash (str, len)
2283      register const char *str;
2284      register unsigned int len;
2285 {
2286   static unsigned char asso_values[] =
2287     {
2288       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2289       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2290       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2291       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2292       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2293       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2294       139, 139, 139, 139,  63, 139, 139, 139,  33,  44,
2295        62, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2296        42, 139, 139,  12,  32, 139, 139, 139, 139, 139,
2297       139, 139, 139, 139, 139, 139, 139,  34,  59,  37,
2298        24,  58,  33,   3, 139,  16, 139, 139,  42,  60,
2299        18,  11,  39, 139,  23,  57,   4,  63,   6,  20,
2300       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2301       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2302       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2303       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2304       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2305       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2306       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2307       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2308       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2309       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2310       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2311       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2312       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2313       139, 139, 139, 139, 139, 139
2314     };
2315   register int hval = len;
2316
2317   switch (hval)
2318     {
2319       default:
2320       case 3:
2321         hval += asso_values[(unsigned char)str[2]];
2322       case 2:
2323       case 1:
2324         hval += asso_values[(unsigned char)str[0]];
2325         break;
2326     }
2327   return hval;
2328 }
2329
2330 #ifdef __GNUC__
2331 __inline
2332 #endif
2333 static struct C_stab_entry *
2334 in_word_set (str, len)
2335      register const char *str;
2336      register unsigned int len;
2337 {
2338   static struct C_stab_entry wordlist[] =
2339     {
2340       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2341       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2342       {"if",            0,      st_C_ignore},
2343       {""}, {""}, {""}, {""},
2344       {"int",           0,      st_C_typespec},
2345       {""}, {""},
2346       {"void",          0,      st_C_typespec},
2347       {""}, {""},
2348       {"interface",     C_JAVA, st_C_struct},
2349       {""},
2350       {"SYSCALL",       0,      st_C_gnumacro},
2351       {""},
2352       {"return",                0,      st_C_ignore},
2353       {""}, {""}, {""}, {""}, {""}, {""}, {""},
2354       {"while",         0,      st_C_ignore},
2355       {"auto",          0,      st_C_typespec},
2356       {""}, {""}, {""}, {""}, {""}, {""},
2357       {"float",         0,      st_C_typespec},
2358       {"typedef",       0,      st_C_typedef},
2359       {"typename",      C_PLPL, st_C_typespec},
2360       {""}, {""}, {""},
2361       {"friend",                C_PLPL, st_C_ignore},
2362       {"volatile",      0,      st_C_typespec},
2363       {""}, {""},
2364       {"for",           0,      st_C_ignore},
2365       {"const",         0,      st_C_typespec},
2366       {"import",                C_JAVA, st_C_ignore},
2367       {""},
2368       {"define",        0,      st_C_define},
2369       {"long",          0,      st_C_typespec},
2370       {"implements",    C_JAVA, st_C_javastruct},
2371       {"signed",        0,      st_C_typespec},
2372       {""},
2373       {"extern",        0,      st_C_extern},
2374       {"extends",       C_JAVA, st_C_javastruct},
2375       {""},
2376       {"mutable",       C_PLPL, st_C_typespec},
2377       {"template",      0,      st_C_template},
2378       {"short",         0,      st_C_typespec},
2379       {"bool",          C_PLPL, st_C_typespec},
2380       {"char",          0,      st_C_typespec},
2381       {"class",         0,      st_C_class},
2382       {"operator",      C_PLPL, st_C_operator},
2383       {""},
2384       {"switch",                0,      st_C_ignore},
2385       {""},
2386       {"ENTRY",         0,      st_C_gnumacro},
2387       {""},
2388       {"package",       C_JAVA, st_C_ignore},
2389       {"union",         0,      st_C_struct},
2390       {"@end",          0,      st_C_objend},
2391       {"struct",        0,      st_C_struct},
2392       {"namespace",     C_PLPL, st_C_struct},
2393       {""}, {""},
2394       {"domain",        C_STAR, st_C_struct},
2395       {"@interface",    0,      st_C_objprot},
2396       {"PSEUDO",                0,      st_C_gnumacro},
2397       {"double",        0,      st_C_typespec},
2398       {""},
2399       {"@protocol",     0,      st_C_objprot},
2400       {""},
2401       {"static",        0,      st_C_typespec},
2402       {""}, {""},
2403       {"DEFUN",         0,      st_C_gnumacro},
2404       {""}, {""}, {""}, {""},
2405       {"explicit",      C_PLPL, st_C_typespec},
2406       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2407       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2408       {""},
2409       {"enum",          0,      st_C_enum},
2410       {""}, {""},
2411       {"unsigned",      0,      st_C_typespec},
2412       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2413       {"@implementation",0,     st_C_objimpl}
2414     };
2415
2416   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2417     {
2418       register int key = hash (str, len);
2419
2420       if (key <= MAX_HASH_VALUE && key >= 0)
2421         {
2422           register const char *s = wordlist[key].name;
2423
2424           if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
2425             return &wordlist[key];
2426         }
2427     }
2428   return 0;
2429 }
2430 /*%>*/
2431
2432 static enum sym_type
2433 C_symtype (str, len, c_ext)
2434      char *str;
2435      int len;
2436      int c_ext;
2437 {
2438   register struct C_stab_entry *se = in_word_set (str, len);
2439
2440   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2441     return st_none;
2442   return se->type;
2443 }
2444
2445 \f
2446 /*
2447  * C functions and variables are recognized using a simple
2448  * finite automaton.  fvdef is its state variable.
2449  */
2450 static enum
2451 {
2452   fvnone,                       /* nothing seen */
2453   fdefunkey,                    /* Emacs DEFUN keyword seen */
2454   fdefunname,                   /* Emacs DEFUN name seen */
2455   foperator,                    /* func: operator keyword seen (cplpl) */
2456   fvnameseen,                   /* function or variable name seen */
2457   fstartlist,                   /* func: just after open parenthesis */
2458   finlist,                      /* func: in parameter list */
2459   flistseen,                    /* func: after parameter list */
2460   fignore,                      /* func: before open brace */
2461   vignore                       /* var-like: ignore until ';' */
2462 } fvdef;
2463
2464 static bool fvextern;           /* func or var: extern keyword seen; */
2465
2466 /*
2467  * typedefs are recognized using a simple finite automaton.
2468  * typdef is its state variable.
2469  */
2470 static enum
2471 {
2472   tnone,                        /* nothing seen */
2473   tkeyseen,                     /* typedef keyword seen */
2474   ttypeseen,                    /* defined type seen */
2475   tinbody,                      /* inside typedef body */
2476   tend,                         /* just before typedef tag */
2477   tignore                       /* junk after typedef tag */
2478 } typdef;
2479
2480 /*
2481  * struct-like structures (enum, struct and union) are recognized
2482  * using another simple finite automaton.  `structdef' is its state
2483  * variable.
2484  */
2485 static enum
2486 {
2487   snone,                        /* nothing seen yet,
2488                                    or in struct body if cblev > 0 */
2489   skeyseen,                     /* struct-like keyword seen */
2490   stagseen,                     /* struct-like tag seen */
2491   sintemplate,                  /* inside template (ignore) */
2492   scolonseen                    /* colon seen after struct-like tag */
2493 } structdef;
2494
2495 /*
2496  * When objdef is different from onone, objtag is the name of the class.
2497  */
2498 static char *objtag = "<uninited>";
2499
2500 /*
2501  * Yet another little state machine to deal with preprocessor lines.
2502  */
2503 static enum
2504 {
2505   dnone,                        /* nothing seen */
2506   dsharpseen,                   /* '#' seen as first char on line */
2507   ddefineseen,                  /* '#' and 'define' seen */
2508   dignorerest                   /* ignore rest of line */
2509 } definedef;
2510
2511 /*
2512  * State machine for Objective C protocols and implementations.
2513  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2514  */
2515 static enum
2516 {
2517   onone,                        /* nothing seen */
2518   oprotocol,                    /* @interface or @protocol seen */
2519   oimplementation,              /* @implementations seen */
2520   otagseen,                     /* class name seen */
2521   oparenseen,                   /* parenthesis before category seen */
2522   ocatseen,                     /* category name seen */
2523   oinbody,                      /* in @implementation body */
2524   omethodsign,                  /* in @implementation body, after +/- */
2525   omethodtag,                   /* after method name */
2526   omethodcolon,                 /* after method colon */
2527   omethodparm,                  /* after method parameter */
2528   oignore                       /* wait for @end */
2529 } objdef;
2530
2531
2532 /*
2533  * Use this structure to keep info about the token read, and how it
2534  * should be tagged.  Used by the make_C_tag function to build a tag.
2535  */
2536 static struct tok
2537 {
2538   bool valid;
2539   bool named;
2540   int offset;
2541   int length;
2542   int lineno;
2543   long linepos;
2544   char *line;
2545 } token;                        /* latest token read */
2546 static linebuffer token_name;   /* its name */
2547
2548 /*
2549  * Variables and functions for dealing with nested structures.
2550  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2551  */
2552 static void pushclass_above __P((int, char *, int));
2553 static void popclass_above __P((int));
2554 static void write_classname __P((linebuffer *, char *qualifier));
2555
2556 static struct {
2557   char **cname;                 /* nested class names */
2558   int *cblev;                   /* nested class curly brace level */
2559   int nl;                       /* class nesting level (elements used) */
2560   int size;                     /* length of the array */
2561 } cstack;                       /* stack for nested declaration tags */
2562 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2563 #define nestlev         (cstack.nl)
2564 /* After struct keyword or in struct body, not inside an nested function. */
2565 #define instruct        (structdef == snone && nestlev > 0                      \
2566                          && cblev == cstack.cblev[nestlev-1] + 1)
2567
2568 static void
2569 pushclass_above (cblev, str, len)
2570      int cblev;
2571      char *str;
2572      int len;
2573 {
2574   int nl;
2575
2576   popclass_above (cblev);
2577   nl = cstack.nl;
2578   if (nl >= cstack.size)
2579     {
2580       int size = cstack.size *= 2;
2581       xrnew (cstack.cname, size, char *);
2582       xrnew (cstack.cblev, size, int);
2583     }
2584   assert (nl == 0 || cstack.cblev[nl-1] < cblev);
2585   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2586   cstack.cblev[nl] = cblev;
2587   cstack.nl = nl + 1;
2588 }
2589
2590 static void
2591 popclass_above (cblev)
2592      int cblev;
2593 {
2594   int nl;
2595
2596   for (nl = cstack.nl - 1;
2597        nl >= 0 && cstack.cblev[nl] >= cblev;
2598        nl--)
2599     {
2600       if (cstack.cname[nl] != NULL)
2601         free (cstack.cname[nl]);
2602       cstack.nl = nl;
2603     }
2604 }
2605
2606 static void
2607 write_classname (cn, qualifier)
2608      linebuffer *cn;
2609      char *qualifier;
2610 {
2611   int i, len;
2612   int qlen = strlen (qualifier);
2613
2614   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2615     {
2616       len = 0;
2617       cn->len = 0;
2618       cn->buffer[0] = '\0';
2619     }
2620   else
2621     {
2622       len = strlen (cstack.cname[0]);
2623       linebuffer_setlen (cn, len);
2624       strcpy (cn->buffer, cstack.cname[0]);
2625     }
2626   for (i = 1; i < cstack.nl; i++)
2627     {
2628       char *s;
2629       int slen;
2630
2631       s = cstack.cname[i];
2632       if (s == NULL)
2633         continue;
2634       slen = strlen (s);
2635       len += slen + qlen;
2636       linebuffer_setlen (cn, len);
2637       strncat (cn->buffer, qualifier, qlen);
2638       strncat (cn->buffer, s, slen);
2639     }
2640 }
2641
2642 \f
2643 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2644 static void make_C_tag __P((bool));
2645
2646 /*
2647  * consider_token ()
2648  *      checks to see if the current token is at the start of a
2649  *      function or variable, or corresponds to a typedef, or
2650  *      is a struct/union/enum tag, or #define, or an enum constant.
2651  *
2652  *      *IS_FUNC gets TRUE iff the token is a function or #define macro
2653  *      with args.  C_EXTP points to which language we are looking at.
2654  *
2655  * Globals
2656  *      fvdef                   IN OUT
2657  *      structdef               IN OUT
2658  *      definedef               IN OUT
2659  *      typdef                  IN OUT
2660  *      objdef                  IN OUT
2661  */
2662
2663 static bool
2664 consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var)
2665      register char *str;        /* IN: token pointer */
2666      register int len;          /* IN: token length */
2667      register int c;            /* IN: first char after the token */
2668      int *c_extp;               /* IN, OUT: C extensions mask */
2669      int cblev;                 /* IN: curly brace level */
2670      int parlev;                /* IN: parenthesis level */
2671      bool *is_func_or_var;      /* OUT: function or variable found */
2672 {
2673   /* When structdef is stagseen, scolonseen, or snone with cblev > 0,
2674      structtype is the type of the preceding struct-like keyword, and
2675      structcblev is the curly brace level where it has been seen. */
2676   static enum sym_type structtype;
2677   static int structcblev;
2678   static enum sym_type toktype;
2679
2680
2681   toktype = C_symtype (str, len, *c_extp);
2682
2683   /*
2684    * Advance the definedef state machine.
2685    */
2686   switch (definedef)
2687     {
2688     case dnone:
2689       /* We're not on a preprocessor line. */
2690       if (toktype == st_C_gnumacro)
2691         {
2692           fvdef = fdefunkey;
2693           return FALSE;
2694         }
2695       break;
2696     case dsharpseen:
2697       if (toktype == st_C_define)
2698         {
2699           definedef = ddefineseen;
2700         }
2701       else
2702         {
2703           definedef = dignorerest;
2704         }
2705       return FALSE;
2706     case ddefineseen:
2707       /*
2708        * Make a tag for any macro, unless it is a constant
2709        * and constantypedefs is FALSE.
2710        */
2711       definedef = dignorerest;
2712       *is_func_or_var = (c == '(');
2713       if (!*is_func_or_var && !constantypedefs)
2714         return FALSE;
2715       else
2716         return TRUE;
2717     case dignorerest:
2718       return FALSE;
2719     default:
2720       error ("internal error: definedef value.", (char *)NULL);
2721     }
2722
2723   /*
2724    * Now typedefs
2725    */
2726   switch (typdef)
2727     {
2728     case tnone:
2729       if (toktype == st_C_typedef)
2730         {
2731           if (typedefs)
2732             typdef = tkeyseen;
2733           fvextern = FALSE;
2734           fvdef = fvnone;
2735           return FALSE;
2736         }
2737       break;
2738     case tkeyseen:
2739       switch (toktype)
2740         {
2741         case st_none:
2742         case st_C_typespec:
2743         case st_C_class:
2744         case st_C_struct:
2745         case st_C_enum:
2746           typdef = ttypeseen;
2747           break;
2748         }
2749       break;
2750     case ttypeseen:
2751       if (structdef == snone && fvdef == fvnone)
2752         {
2753           fvdef = fvnameseen;
2754           return TRUE;
2755         }
2756       break;
2757     case tend:
2758       switch (toktype)
2759         {
2760         case st_C_typespec:
2761         case st_C_class:
2762         case st_C_struct:
2763         case st_C_enum:
2764           return FALSE;
2765         }
2766       return TRUE;
2767     }
2768
2769   /*
2770    * This structdef business is NOT invoked when we are ctags and the
2771    * file is plain C.  This is because a struct tag may have the same
2772    * name as another tag, and this loses with ctags.
2773    */
2774   switch (toktype)
2775     {
2776     case st_C_javastruct:
2777       if (structdef == stagseen)
2778         structdef = scolonseen;
2779       return FALSE;
2780     case st_C_template:
2781     case st_C_class:
2782       if (cblev == 0
2783           && (*c_extp & C_AUTO) /* automatic detection of C++ language */
2784           && definedef == dnone && structdef == snone
2785           && typdef == tnone && fvdef == fvnone)
2786         *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2787       if (toktype == st_C_template)
2788         break;
2789       /* FALLTHRU */
2790     case st_C_struct:
2791     case st_C_enum:
2792       if (parlev == 0
2793           && fvdef != vignore
2794           && (typdef == tkeyseen
2795               || (typedefs_or_cplusplus && structdef == snone)))
2796         {
2797           structdef = skeyseen;
2798           structtype = toktype;
2799           structcblev = cblev;
2800         }
2801       return FALSE;
2802     }
2803
2804   if (structdef == skeyseen)
2805     {
2806       structdef = stagseen;
2807       return TRUE;
2808     }
2809
2810   if (typdef != tnone)
2811     definedef = dnone;
2812
2813   /* Detect Objective C constructs. */
2814   switch (objdef)
2815     {
2816     case onone:
2817       switch (toktype)
2818         {
2819         case st_C_objprot:
2820           objdef = oprotocol;
2821           return FALSE;
2822         case st_C_objimpl:
2823           objdef = oimplementation;
2824           return FALSE;
2825         }
2826       break;
2827     case oimplementation:
2828       /* Save the class tag for functions or variables defined inside. */
2829       objtag = savenstr (str, len);
2830       objdef = oinbody;
2831       return FALSE;
2832     case oprotocol:
2833       /* Save the class tag for categories. */
2834       objtag = savenstr (str, len);
2835       objdef = otagseen;
2836       *is_func_or_var = TRUE;
2837       return TRUE;
2838     case oparenseen:
2839       objdef = ocatseen;
2840       *is_func_or_var = TRUE;
2841       return TRUE;
2842     case oinbody:
2843       break;
2844     case omethodsign:
2845       if (parlev == 0)
2846         {
2847           objdef = omethodtag;
2848           linebuffer_setlen (&token_name, len);
2849           strncpy (token_name.buffer, str, len);
2850           token_name.buffer[len] = '\0';
2851           return TRUE;
2852         }
2853       return FALSE;
2854     case omethodcolon:
2855       if (parlev == 0)
2856         objdef = omethodparm;
2857       return FALSE;
2858     case omethodparm:
2859       if (parlev == 0)
2860         {
2861           objdef = omethodtag;
2862           linebuffer_setlen (&token_name, token_name.len + len);
2863           strncat (token_name.buffer, str, len);
2864           return TRUE;
2865         }
2866       return FALSE;
2867     case oignore:
2868       if (toktype == st_C_objend)
2869         {
2870           /* Memory leakage here: the string pointed by objtag is
2871              never released, because many tests would be needed to
2872              avoid breaking on incorrect input code.  The amount of
2873              memory leaked here is the sum of the lengths of the
2874              class tags.
2875           free (objtag); */
2876           objdef = onone;
2877         }
2878       return FALSE;
2879     }
2880
2881   /* A function, variable or enum constant? */
2882   switch (toktype)
2883     {
2884     case st_C_extern:
2885       fvextern = TRUE;
2886       /* FALLTHRU */
2887     case st_C_typespec:
2888       if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
2889         fvdef = fvnone;         /* should be useless */
2890       return FALSE;
2891     case st_C_ignore:
2892       fvextern = FALSE;
2893       fvdef = vignore;
2894       return FALSE;
2895     case st_C_operator:
2896       fvdef = foperator;
2897       *is_func_or_var = TRUE;
2898       return TRUE;
2899     case st_none:
2900       if (constantypedefs
2901           && structdef == snone
2902           && structtype == st_C_enum && cblev > structcblev)
2903         return TRUE;            /* enum constant */
2904       switch (fvdef)
2905         {
2906         case fdefunkey:
2907           if (cblev > 0)
2908             break;
2909           fvdef = fdefunname;   /* GNU macro */
2910           *is_func_or_var = TRUE;
2911           return TRUE;
2912         case fvnone:
2913           if ((strneq (str, "asm", 3) && endtoken (str[3]))
2914               || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2915             {
2916               fvdef = vignore;
2917               return FALSE;
2918             }
2919           if ((*c_extp & C_PLPL) && strneq (str+len-10, "::operator", 10))
2920             {
2921               fvdef = foperator;
2922               *is_func_or_var = TRUE;
2923               return TRUE;
2924             }
2925           if (cblev > 0 && !instruct)
2926             break;
2927           fvdef = fvnameseen;   /* function or variable */
2928           *is_func_or_var = TRUE;
2929           return TRUE;
2930         }
2931       break;
2932     }
2933
2934   return FALSE;
2935 }
2936
2937 \f
2938 /*
2939  * C_entries often keeps pointers to tokens or lines which are older than
2940  * the line currently read.  By keeping two line buffers, and switching
2941  * them at end of line, it is possible to use those pointers.
2942  */
2943 static struct
2944 {
2945   long linepos;
2946   linebuffer lb;
2947 } lbs[2];
2948
2949 #define current_lb_is_new (newndx == curndx)
2950 #define switch_line_buffers() (curndx = 1 - curndx)
2951
2952 #define curlb (lbs[curndx].lb)
2953 #define newlb (lbs[newndx].lb)
2954 #define curlinepos (lbs[curndx].linepos)
2955 #define newlinepos (lbs[newndx].linepos)
2956
2957 #define CNL_SAVE_DEFINEDEF()                                            \
2958 do {                                                                    \
2959   curlinepos = charno;                                                  \
2960   readline (&curlb, inf);                                               \
2961   lp = curlb.buffer;                                                    \
2962   quotednl = FALSE;                                                     \
2963   newndx = curndx;                                                      \
2964 } while (0)
2965
2966 #define CNL()                                                           \
2967 do {                                                                    \
2968   CNL_SAVE_DEFINEDEF();                                                 \
2969   if (savetoken.valid)                                                  \
2970     {                                                                   \
2971       token = savetoken;                                                \
2972       savetoken.valid = FALSE;                                          \
2973     }                                                                   \
2974   definedef = dnone;                                                    \
2975 } while (0)
2976
2977
2978 static void
2979 make_C_tag (isfun)
2980      bool isfun;
2981 {
2982   /* This function should never be called when token.valid is FALSE, but
2983      we must protect against invalid input or internal errors. */
2984   if (DEBUG || token.valid)
2985     {
2986       if (traditional_tag_style)
2987         {
2988           /* This was the original code.  Now we call new_pfnote instead,
2989              which uses the new method for naming tags (see new_pfnote). */
2990           char *name = NULL;
2991
2992           if (CTAGS || token.named)
2993             name = savestr (token_name.buffer);
2994           if (DEBUG && !token.valid)
2995             {
2996               if (token.named)
2997                 name = concat (name, "##invalid##", "");
2998               else
2999                 name = savestr ("##invalid##");
3000             }
3001           pfnote (name, isfun, token.line,
3002                   token.offset+token.length+1, token.lineno, token.linepos);
3003         }
3004       else
3005         new_pfnote (token_name.buffer, token_name.len, isfun, token.line,
3006                     token.offset+token.length+1, token.lineno, token.linepos);
3007       token.valid = FALSE;
3008     }
3009 }
3010
3011
3012 /*
3013  * C_entries ()
3014  *      This routine finds functions, variables, typedefs,
3015  *      #define's, enum constants and struct/union/enum definitions in
3016  *      C syntax and adds them to the list.
3017  */
3018 static void
3019 C_entries (c_ext, inf)
3020      int c_ext;                 /* extension of C */
3021      FILE *inf;                 /* input file */
3022 {
3023   register char c;              /* latest char read; '\0' for end of line */
3024   register char *lp;            /* pointer one beyond the character `c' */
3025   int curndx, newndx;           /* indices for current and new lb */
3026   register int tokoff;          /* offset in line of start of current token */
3027   register int toklen;          /* length of current token */
3028   char *qualifier;              /* string used to qualify names */
3029   int qlen;                     /* length of qualifier */
3030   int cblev;                    /* current curly brace level */
3031   int parlev;                   /* current parenthesis level */
3032   int typdefcblev;              /* cblev where a typedef struct body begun */
3033   bool incomm, inquote, inchar, quotednl, midtoken;
3034   bool cplpl, cjava;
3035   bool yacc_rules;              /* in the rules part of a yacc file */
3036   struct tok savetoken;         /* token saved during preprocessor handling */
3037
3038
3039   initbuffer (&token_name);
3040   initbuffer (&lbs[0].lb);
3041   initbuffer (&lbs[1].lb);
3042   if (cstack.size == 0)
3043     {
3044       cstack.size = (DEBUG) ? 1 : 4;
3045       cstack.nl = 0;
3046       cstack.cname = xnew (cstack.size, char *);
3047       cstack.cblev = xnew (cstack.size, int);
3048     }
3049
3050   tokoff = toklen = typdefcblev = 0; /* keep compiler quiet */
3051   curndx = newndx = 0;
3052   lp = curlb.buffer;
3053   *lp = 0;
3054
3055   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3056   structdef = snone; definedef = dnone; objdef = onone;
3057   yacc_rules = FALSE;
3058   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3059   token.valid = savetoken.valid = FALSE;
3060   cblev = 0;
3061   parlev = 0;
3062   cplpl = (c_ext & C_PLPL) == C_PLPL;
3063   cjava = (c_ext & C_JAVA) == C_JAVA;
3064   if (cjava)
3065     { qualifier = "."; qlen = 1; }
3066   else
3067     { qualifier = "::"; qlen = 2; }
3068
3069
3070   while (!feof (inf))
3071     {
3072       c = *lp++;
3073       if (c == '\\')
3074         {
3075           /* If we're at the end of the line, the next character is a
3076              '\0'; don't skip it, because it's the thing that tells us
3077              to read the next line.  */
3078           if (*lp == '\0')
3079             {
3080               quotednl = TRUE;
3081               continue;
3082             }
3083           lp++;
3084           c = ' ';
3085         }
3086       else if (incomm)
3087         {
3088           switch (c)
3089             {
3090             case '*':
3091               if (*lp == '/')
3092                 {
3093                   c = *lp++;
3094                   incomm = FALSE;
3095                 }
3096               break;
3097             case '\0':
3098               /* Newlines inside comments do not end macro definitions in
3099                  traditional cpp. */
3100               CNL_SAVE_DEFINEDEF ();
3101               break;
3102             }
3103           continue;
3104         }
3105       else if (inquote)
3106         {
3107           switch (c)
3108             {
3109             case '"':
3110               inquote = FALSE;
3111               break;
3112             case '\0':
3113               /* Newlines inside strings do not end macro definitions
3114                  in traditional cpp, even though compilers don't
3115                  usually accept them. */
3116               CNL_SAVE_DEFINEDEF ();
3117               break;
3118             }
3119           continue;
3120         }
3121       else if (inchar)
3122         {
3123           switch (c)
3124             {
3125             case '\0':
3126               /* Hmmm, something went wrong. */
3127               CNL ();
3128               /* FALLTHRU */
3129             case '\'':
3130               inchar = FALSE;
3131               break;
3132             }
3133           continue;
3134         }
3135       else
3136         switch (c)
3137           {
3138           case '"':
3139             inquote = TRUE;
3140             switch (fvdef)
3141               {
3142               case fdefunkey:
3143               case fstartlist:
3144               case finlist:
3145               case fignore:
3146               case vignore:
3147                 break;
3148               default:
3149                 fvextern = FALSE;
3150                 fvdef = fvnone;
3151               }
3152             continue;
3153           case '\'':
3154             inchar = TRUE;
3155             if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3156               {
3157                 fvextern = FALSE;
3158                 fvdef = fvnone;
3159               }
3160             continue;
3161           case '/':
3162             if (*lp == '*')
3163               {
3164                 lp++;
3165                 incomm = TRUE;
3166                 continue;
3167               }
3168             else if (/* cplpl && */ *lp == '/')
3169               {
3170                 c = '\0';
3171                 break;
3172               }
3173             else
3174               break;
3175           case '%':
3176             if ((c_ext & YACC) && *lp == '%')
3177               {
3178                 /* Entering or exiting rules section in yacc file. */
3179                 lp++;
3180                 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3181                 typdef = tnone; structdef = snone;
3182                 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3183                 cblev = 0;
3184                 yacc_rules = !yacc_rules;
3185                 continue;
3186               }
3187             else
3188               break;
3189           case '#':
3190             if (definedef == dnone)
3191               {
3192                 char *cp;
3193                 bool cpptoken = TRUE;
3194
3195                 /* Look back on this line.  If all blanks, or nonblanks
3196                    followed by an end of comment, this is a preprocessor
3197                    token. */
3198                 for (cp = newlb.buffer; cp < lp-1; cp++)
3199                   if (!iswhite (*cp))
3200                     {
3201                       if (*cp == '*' && *(cp+1) == '/')
3202                         {
3203                           cp++;
3204                           cpptoken = TRUE;
3205                         }
3206                       else
3207                         cpptoken = FALSE;
3208                     }
3209                 if (cpptoken)
3210                   definedef = dsharpseen;
3211               } /* if (definedef == dnone) */
3212
3213             continue;
3214           } /* switch (c) */
3215
3216
3217       /* Consider token only if some involved conditions are satisfied. */
3218       if (typdef != tignore
3219           && definedef != dignorerest
3220           && fvdef != finlist
3221           && structdef != sintemplate
3222           && (definedef != dnone
3223               || structdef != scolonseen))
3224         {
3225           if (midtoken)
3226             {
3227               if (endtoken (c))
3228                 {
3229                   if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
3230                     {
3231                       /*
3232                        * This handles :: in the middle, but not at the
3233                        * beginning of an identifier.  Also, space-separated
3234                        * :: is not recognised.
3235                        */
3236                       lp += 2;
3237                       toklen += 2;
3238                       c = lp[-1];
3239                       goto still_in_token;
3240                     }
3241                   else
3242                     {
3243                       bool funorvar = FALSE;
3244
3245                       if (yacc_rules
3246                           || consider_token (newlb.buffer + tokoff, toklen, c,
3247                                              &c_ext, cblev, parlev, &funorvar))
3248                         {
3249                           if (fvdef == foperator)
3250                             {
3251                               char *oldlp = lp;
3252                               lp = skip_spaces (lp-1);
3253                               if (*lp != '\0')
3254                                 lp += 1;
3255                               while (*lp != '\0'
3256                                      && !iswhite (*lp) && *lp != '(')
3257                                 lp += 1;
3258                               c = *lp++;
3259                               toklen += lp - oldlp;
3260                             }
3261                           token.named = FALSE;
3262                           if ((c_ext & C_EXT)   /* not pure C */
3263                               && nestlev > 0 && definedef == dnone)
3264                             /* in struct body */
3265                             {
3266                               write_classname (&token_name, qualifier);
3267                               linebuffer_setlen (&token_name,
3268                                                  token_name.len+qlen+toklen);
3269                               strcat (token_name.buffer, qualifier);
3270                               strncat (token_name.buffer,
3271                                        newlb.buffer + tokoff, toklen);
3272                               token.named = TRUE;
3273                             }
3274                           else if (objdef == ocatseen)
3275                             /* Objective C category */
3276                             {
3277                               int len = strlen (objtag) + 2 + toklen;
3278                               linebuffer_setlen (&token_name, len);
3279                               strcpy (token_name.buffer, objtag);
3280                               strcat (token_name.buffer, "(");
3281                               strncat (token_name.buffer,
3282                                        newlb.buffer + tokoff, toklen);
3283                               strcat (token_name.buffer, ")");
3284                               token.named = TRUE;
3285                             }
3286                           else if (objdef == omethodtag
3287                                    || objdef == omethodparm)
3288                             /* Objective C method */
3289                             {
3290                               token.named = TRUE;
3291                             }
3292                           else if (fvdef == fdefunname)
3293                             /* GNU DEFUN and similar macros */
3294                             {
3295                               bool defun = (newlb.buffer[tokoff] == 'F');
3296                               int off = tokoff;
3297                               int len = toklen;
3298
3299                               /* Rewrite the tag so that emacs lisp DEFUNs
3300                                  can be found by their elisp name */
3301                               if (defun)
3302                                 {
3303                                   off += 1;
3304                                   len -= 1;
3305                                 }
3306                               len = toklen;
3307                               linebuffer_setlen (&token_name, len);
3308                               strncpy (token_name.buffer,
3309                                        newlb.buffer + off, len);
3310                               token_name.buffer[len] = '\0';
3311                               if (defun)
3312                                 while (--len >= 0)
3313                                   if (token_name.buffer[len] == '_')
3314                                     token_name.buffer[len] = '-';
3315                               token.named = defun;
3316                             }
3317                           else
3318                             {
3319                               linebuffer_setlen (&token_name, toklen);
3320                               strncpy (token_name.buffer,
3321                                        newlb.buffer + tokoff, toklen);
3322                               token_name.buffer[toklen] = '\0';
3323                               /* Name macros and members. */
3324                               token.named = (structdef == stagseen
3325                                              || typdef == ttypeseen
3326                                              || typdef == tend
3327                                              || (funorvar
3328                                                  && definedef == dignorerest)
3329                                              || (funorvar
3330                                                  && definedef == dnone
3331                                                  && structdef == snone
3332                                                  && cblev > 0));
3333                             }
3334                           token.lineno = lineno;
3335                           token.offset = tokoff;
3336                           token.length = toklen;
3337                           token.line = newlb.buffer;
3338                           token.linepos = newlinepos;
3339                           token.valid = TRUE;
3340
3341                           if (definedef == dnone
3342                               && (fvdef == fvnameseen
3343                                   || fvdef == foperator
3344                                   || structdef == stagseen
3345                                   || typdef == tend
3346                                   || typdef == ttypeseen
3347                                   || objdef != onone))
3348                             {
3349                               if (current_lb_is_new)
3350                                 switch_line_buffers ();
3351                             }
3352                           else if (definedef != dnone
3353                                    || fvdef == fdefunname
3354                                    || instruct)
3355                             make_C_tag (funorvar);
3356                         }
3357                       midtoken = FALSE;
3358                     }
3359                 } /* if (endtoken (c)) */
3360               else if (intoken (c))
3361                 still_in_token:
3362                 {
3363                   toklen++;
3364                   continue;
3365                 }
3366             } /* if (midtoken) */
3367           else if (begtoken (c))
3368             {
3369               switch (definedef)
3370                 {
3371                 case dnone:
3372                   switch (fvdef)
3373                     {
3374                     case fstartlist:
3375                       fvdef = finlist;
3376                       continue;
3377                     case flistseen:
3378                       make_C_tag (TRUE); /* a function */
3379                       fvdef = fignore;
3380                       break;
3381                     case fvnameseen:
3382                       fvdef = fvnone;
3383                       break;
3384                     }
3385                   if (structdef == stagseen && !cjava)
3386                     {
3387                       popclass_above (cblev);
3388                       structdef = snone;
3389                     }
3390                   break;
3391                 case dsharpseen:
3392                   savetoken = token;
3393                   break;
3394                 }
3395               if (!yacc_rules || lp == newlb.buffer + 1)
3396                 {
3397                   tokoff = lp - 1 - newlb.buffer;
3398                   toklen = 1;
3399                   midtoken = TRUE;
3400                 }
3401               continue;
3402             } /* if (begtoken) */
3403         } /* if must look at token */
3404
3405
3406       /* Detect end of line, colon, comma, semicolon and various braces
3407          after having handled a token.*/
3408       switch (c)
3409         {
3410         case ':':
3411           if (yacc_rules && token.offset == 0 && token.valid)
3412             {
3413               make_C_tag (FALSE); /* a yacc function */
3414               break;
3415             }
3416           if (definedef != dnone)
3417             break;
3418           switch (objdef)
3419             {
3420             case  otagseen:
3421               objdef = oignore;
3422               make_C_tag (TRUE); /* an Objective C class */
3423               break;
3424             case omethodtag:
3425             case omethodparm:
3426               objdef = omethodcolon;
3427               linebuffer_setlen (&token_name, token_name.len + 1);
3428               strcat (token_name.buffer, ":");
3429               break;
3430             }
3431           if (structdef == stagseen)
3432             structdef = scolonseen;
3433           break;
3434         case ';':
3435           if (definedef != dnone)
3436             break;
3437           switch (typdef)
3438             {
3439             case tend:
3440             case ttypeseen:
3441               make_C_tag (FALSE); /* a typedef */
3442               typdef = tnone;
3443               fvdef = fvnone;
3444               break;
3445             case tnone:
3446             case tinbody:
3447             case tignore:
3448               switch (fvdef)
3449                 {
3450                 case fignore:
3451                   if (typdef == tignore)
3452                     fvdef = fvnone;
3453                   break;
3454                 case fvnameseen:
3455                   if ((globals && cblev == 0 && (!fvextern || declarations))
3456                       || (members && instruct))
3457                     make_C_tag (FALSE); /* a variable */
3458                   fvextern = FALSE;
3459                   fvdef = fvnone;
3460                   token.valid = FALSE;
3461                   break;
3462                 case flistseen:
3463                   if ((declarations && typdef == tnone && !instruct)
3464                       || (members && typdef != tignore && instruct))
3465                     make_C_tag (TRUE);  /* a function declaration */
3466                   /* FALLTHRU */
3467                 default:
3468                   fvextern = FALSE;
3469                   fvdef = fvnone;
3470                   if (declarations
3471                       && structdef == stagseen && (c_ext & C_PLPL))
3472                     make_C_tag (FALSE); /* forward declaration */
3473                   else
3474                     /* The following instruction invalidates the token.
3475                        Probably the token should be invalidated in all other
3476                        cases where some state machine is reset prematurely. */
3477                     token.valid = FALSE;
3478                 } /* switch (fvdef) */
3479               /* FALLTHRU */
3480             default:
3481               if (!instruct)
3482                 typdef = tnone;
3483             }
3484           if (structdef == stagseen)
3485             structdef = snone;
3486           break;
3487         case ',':
3488           if (definedef != dnone)
3489             break;
3490           switch (objdef)
3491             {
3492             case omethodtag:
3493             case omethodparm:
3494               make_C_tag (TRUE); /* an Objective C method */
3495               objdef = oinbody;
3496               break;
3497             }
3498           switch (fvdef)
3499             {
3500             case fdefunkey:
3501             case foperator:
3502             case fstartlist:
3503             case finlist:
3504             case fignore:
3505             case vignore:
3506               break;
3507             case fdefunname:
3508               fvdef = fignore;
3509               break;
3510             case fvnameseen:    /* a variable */
3511               if ((globals && cblev == 0 && (!fvextern || declarations))
3512                   || (members && instruct))
3513                 make_C_tag (FALSE);
3514               break;
3515             case flistseen:     /* a function */
3516               if ((declarations && typdef == tnone && !instruct)
3517                   || (members && typdef != tignore && instruct))
3518                 {
3519                   make_C_tag (TRUE); /* a function declaration */
3520                   fvdef = fvnameseen;
3521                 }
3522               else if (!declarations)
3523                 fvdef = fvnone;
3524               token.valid = FALSE;
3525               break;
3526             default:
3527               fvdef = fvnone;
3528             }
3529           if (structdef == stagseen)
3530             structdef = snone;
3531           break;
3532         case '[':
3533           if (definedef != dnone)
3534             break;
3535           if (structdef == stagseen)
3536             structdef = snone;
3537           switch (typdef)
3538             {
3539             case ttypeseen:
3540             case tend:
3541               typdef = tignore;
3542               make_C_tag (FALSE);       /* a typedef */
3543               break;
3544             case tnone:
3545             case tinbody:
3546               switch (fvdef)
3547                 {
3548                 case foperator:
3549                 case finlist:
3550                 case fignore:
3551                 case vignore:
3552                   break;
3553                 case fvnameseen:
3554                   if ((members && cblev == 1)
3555                       || (globals && cblev == 0
3556                           && (!fvextern || declarations)))
3557                     make_C_tag (FALSE); /* a variable */
3558                   /* FALLTHRU */
3559                 default:
3560                   fvdef = fvnone;
3561                 }
3562               break;
3563             }
3564           break;
3565         case '(':
3566           if (definedef != dnone)
3567             break;
3568           if (objdef == otagseen && parlev == 0)
3569             objdef = oparenseen;
3570           switch (fvdef)
3571             {
3572             case fvnameseen:
3573               if (typdef == ttypeseen
3574                   && *lp != '*'
3575                   && !instruct)
3576                 {
3577                   /* This handles constructs like:
3578                      typedef void OperatorFun (int fun); */
3579                   make_C_tag (FALSE);
3580                   typdef = tignore;
3581                   fvdef = fignore;
3582                   break;
3583                 }
3584               /* FALLTHRU */
3585             case foperator:
3586               fvdef = fstartlist;
3587               break;
3588             case flistseen:
3589               fvdef = finlist;
3590               break;
3591             }
3592           parlev++;
3593           break;
3594         case ')':
3595           if (definedef != dnone)
3596             break;
3597           if (objdef == ocatseen && parlev == 1)
3598             {
3599               make_C_tag (TRUE); /* an Objective C category */
3600               objdef = oignore;
3601             }
3602           if (--parlev == 0)
3603             {
3604               switch (fvdef)
3605                 {
3606                 case fstartlist:
3607                 case finlist:
3608                   fvdef = flistseen;
3609                   break;
3610                 }
3611               if (!instruct
3612                   && (typdef == tend
3613                       || typdef == ttypeseen))
3614                 {
3615                   typdef = tignore;
3616                   make_C_tag (FALSE); /* a typedef */
3617                 }
3618             }
3619           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3620             parlev = 0;
3621           break;
3622         case '{':
3623           if (definedef != dnone)
3624             break;
3625           if (typdef == ttypeseen)
3626             {
3627               /* Whenever typdef is set to tinbody (currently only
3628                  here), typdefcblev should be set to cblev. */
3629               typdef = tinbody;
3630               typdefcblev = cblev;
3631             }
3632           switch (fvdef)
3633             {
3634             case flistseen:
3635               make_C_tag (TRUE);    /* a function */
3636               /* FALLTHRU */
3637             case fignore:
3638               fvdef = fvnone;
3639               break;
3640             case fvnone:
3641               switch (objdef)
3642                 {
3643                 case otagseen:
3644                   make_C_tag (TRUE); /* an Objective C class */
3645                   objdef = oignore;
3646                   break;
3647                 case omethodtag:
3648                 case omethodparm:
3649                   make_C_tag (TRUE); /* an Objective C method */
3650                   objdef = oinbody;
3651                   break;
3652                 default:
3653                   /* Neutralize `extern "C" {' grot. */
3654                   if (cblev == 0 && structdef == snone && nestlev == 0
3655                       && typdef == tnone)
3656                     cblev = -1;
3657                 }
3658               break;
3659             }
3660           switch (structdef)
3661             {
3662             case skeyseen:         /* unnamed struct */
3663               pushclass_above (cblev, NULL, 0);
3664               structdef = snone;
3665               break;
3666             case stagseen:         /* named struct or enum */
3667             case scolonseen:       /* a class */
3668               pushclass_above (cblev, token.line+token.offset, token.length);
3669               structdef = snone;
3670               make_C_tag (FALSE);  /* a struct or enum */
3671               break;
3672             }
3673           cblev++;
3674           break;
3675         case '*':
3676           if (definedef != dnone)
3677             break;
3678           if (fvdef == fstartlist)
3679             fvdef = fvnone;     /* avoid tagging `foo' in `foo (*bar()) ()' */
3680           break;
3681         case '}':
3682           if (definedef != dnone)
3683             break;
3684           if (!noindentypedefs && lp == newlb.buffer + 1)
3685             {
3686               cblev = 0;        /* reset curly brace level if first column */
3687               parlev = 0;       /* also reset paren level, just in case... */
3688             }
3689           else if (cblev > 0)
3690             cblev--;
3691           popclass_above (cblev);
3692           structdef = snone;
3693           /* Only if typdef == tinbody is typdefcblev significant. */
3694           if (typdef == tinbody && cblev <= typdefcblev)
3695             {
3696               assert (cblev == typdefcblev);
3697               typdef = tend;
3698             }
3699           break;
3700         case '=':
3701           if (definedef != dnone)
3702             break;
3703           switch (fvdef)
3704             {
3705             case foperator:
3706             case finlist:
3707             case fignore:
3708             case vignore:
3709               break;
3710             case fvnameseen:
3711               if ((members && cblev == 1)
3712                   || (globals && cblev == 0 && (!fvextern || declarations)))
3713                 make_C_tag (FALSE); /* a variable */
3714               /* FALLTHRU */
3715             default:
3716               fvdef = vignore;
3717             }
3718           break;
3719         case '<':
3720           if (cplpl && structdef == stagseen)
3721             {
3722               structdef = sintemplate;
3723               break;
3724             }
3725           goto resetfvdef;
3726         case '>':
3727           if (structdef == sintemplate)
3728             {
3729               structdef = stagseen;
3730               break;
3731             }
3732           goto resetfvdef;
3733         case '+':
3734         case '-':
3735           if (objdef == oinbody && cblev == 0)
3736             {
3737               objdef = omethodsign;
3738               break;
3739             }
3740           /* FALLTHRU */
3741         resetfvdef:
3742         case '#': case '~': case '&': case '%': case '/': case '|':
3743         case '^': case '!': case '.': case '?': case ']':
3744           if (definedef != dnone)
3745             break;
3746           /* These surely cannot follow a function tag in C. */
3747           switch (fvdef)
3748             {
3749             case foperator:
3750             case finlist:
3751             case fignore:
3752             case vignore:
3753               break;
3754             default:
3755               fvdef = fvnone;
3756             }
3757           break;
3758         case '\0':
3759           if (objdef == otagseen)
3760             {
3761               make_C_tag (TRUE); /* an Objective C class */
3762               objdef = oignore;
3763             }
3764           /* If a macro spans multiple lines don't reset its state. */
3765           if (quotednl)
3766             CNL_SAVE_DEFINEDEF ();
3767           else
3768             CNL ();
3769           break;
3770         } /* switch (c) */
3771
3772     } /* while not eof */
3773
3774   free (token_name.buffer);
3775   free (lbs[0].lb.buffer);
3776   free (lbs[1].lb.buffer);
3777 }
3778
3779 /*
3780  * Process either a C++ file or a C file depending on the setting
3781  * of a global flag.
3782  */
3783 static void
3784 default_C_entries (inf)
3785      FILE *inf;
3786 {
3787   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3788 }
3789
3790 /* Always do plain C. */
3791 static void
3792 plain_C_entries (inf)
3793      FILE *inf;
3794 {
3795   C_entries (0, inf);
3796 }
3797
3798 /* Always do C++. */
3799 static void
3800 Cplusplus_entries (inf)
3801      FILE *inf;
3802 {
3803   C_entries (C_PLPL, inf);
3804 }
3805
3806 /* Always do Java. */
3807 static void
3808 Cjava_entries (inf)
3809      FILE *inf;
3810 {
3811   C_entries (C_JAVA, inf);
3812 }
3813
3814 /* Always do C*. */
3815 static void
3816 Cstar_entries (inf)
3817      FILE *inf;
3818 {
3819   C_entries (C_STAR, inf);
3820 }
3821
3822 /* Always do Yacc. */
3823 static void
3824 Yacc_entries (inf)
3825      FILE *inf;
3826 {
3827   C_entries (YACC, inf);
3828 }
3829
3830 \f
3831 /* Useful macros. */
3832 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
3833   for (;                        /* loop initialization */               \
3834        !feof (file_pointer)     /* loop test */                         \
3835        &&                       /* instructions at start of loop */     \
3836           (readline (&line_buffer, file_pointer),                       \
3837            char_pointer = line_buffer.buffer,                           \
3838            TRUE);                                                       \
3839       )
3840 #define LOOKING_AT(cp, keyword) /* keyword is a constant string */      \
3841   (strneq ((cp), keyword, sizeof(keyword)-1) /* cp points at keyword */ \
3842    && notinname ((cp)[sizeof(keyword)-1])       /* end of keyword */    \
3843    && ((cp) = skip_spaces((cp)+sizeof(keyword)-1))) /* skip spaces */
3844
3845 /*
3846  * Read a file, but do no processing.  This is used to do regexp
3847  * matching on files that have no language defined.
3848  */
3849 static void
3850 just_read_file (inf)
3851      FILE *inf;
3852 {
3853   register char *dummy;
3854
3855   LOOP_ON_INPUT_LINES (inf, lb, dummy)
3856     continue;
3857 }
3858
3859 \f
3860 /* Fortran parsing */
3861
3862 static void F_takeprec __P((void));
3863 static void F_getit __P((FILE *));
3864
3865 static void
3866 F_takeprec ()
3867 {
3868   dbp = skip_spaces (dbp);
3869   if (*dbp != '*')
3870     return;
3871   dbp++;
3872   dbp = skip_spaces (dbp);
3873   if (strneq (dbp, "(*)", 3))
3874     {
3875       dbp += 3;
3876       return;
3877     }
3878   if (!ISDIGIT (*dbp))
3879     {
3880       --dbp;                    /* force failure */
3881       return;
3882     }
3883   do
3884     dbp++;
3885   while (ISDIGIT (*dbp));
3886 }
3887
3888 static void
3889 F_getit (inf)
3890      FILE *inf;
3891 {
3892   register char *cp;
3893
3894   dbp = skip_spaces (dbp);
3895   if (*dbp == '\0')
3896     {
3897       readline (&lb, inf);
3898       dbp = lb.buffer;
3899       if (dbp[5] != '&')
3900         return;
3901       dbp += 6;
3902       dbp = skip_spaces (dbp);
3903     }
3904   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3905     return;
3906   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3907     continue;
3908   pfnote (savenstr (dbp, cp-dbp), TRUE,
3909           lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3910 }
3911
3912
3913 static void
3914 Fortran_functions (inf)
3915      FILE *inf;
3916 {
3917   LOOP_ON_INPUT_LINES (inf, lb, dbp)
3918     {
3919       if (*dbp == '%')
3920         dbp++;                  /* Ratfor escape to fortran */
3921       dbp = skip_spaces (dbp);
3922       if (*dbp == '\0')
3923         continue;
3924       switch (lowcase (*dbp))
3925         {
3926         case 'i':
3927           if (nocase_tail ("integer"))
3928             F_takeprec ();
3929           break;
3930         case 'r':
3931           if (nocase_tail ("real"))
3932             F_takeprec ();
3933           break;
3934         case 'l':
3935           if (nocase_tail ("logical"))
3936             F_takeprec ();
3937           break;
3938         case 'c':
3939           if (nocase_tail ("complex") || nocase_tail ("character"))
3940             F_takeprec ();
3941           break;
3942         case 'd':
3943           if (nocase_tail ("double"))
3944             {
3945               dbp = skip_spaces (dbp);
3946               if (*dbp == '\0')
3947                 continue;
3948               if (nocase_tail ("precision"))
3949                 break;
3950               continue;
3951             }
3952           break;
3953         }
3954       dbp = skip_spaces (dbp);
3955       if (*dbp == '\0')
3956         continue;
3957       switch (lowcase (*dbp))
3958         {
3959         case 'f':
3960           if (nocase_tail ("function"))
3961             F_getit (inf);
3962           continue;
3963         case 's':
3964           if (nocase_tail ("subroutine"))
3965             F_getit (inf);
3966           continue;
3967         case 'e':
3968           if (nocase_tail ("entry"))
3969             F_getit (inf);
3970           continue;
3971         case 'b':
3972           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
3973             {
3974               dbp = skip_spaces (dbp);
3975               if (*dbp == '\0') /* assume un-named */
3976                 pfnote (savestr ("blockdata"), TRUE,
3977                         lb.buffer, dbp - lb.buffer, lineno, linecharno);
3978               else
3979                 F_getit (inf);  /* look for name */
3980             }
3981           continue;
3982         }
3983     }
3984 }
3985
3986 \f
3987 /*
3988  * Ada parsing
3989  * Original code by
3990  * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be> (1998)
3991  */
3992
3993 static void Ada_getit __P((FILE *, char *));
3994
3995 /* Once we are positioned after an "interesting" keyword, let's get
3996    the real tag value necessary. */
3997 static void
3998 Ada_getit (inf, name_qualifier)
3999      FILE *inf;
4000      char *name_qualifier;
4001 {
4002   register char *cp;
4003   char *name;
4004   char c;
4005
4006   while (!feof (inf))
4007     {
4008       dbp = skip_spaces (dbp);
4009       if (*dbp == '\0'
4010           || (dbp[0] == '-' && dbp[1] == '-'))
4011         {
4012           readline (&lb, inf);
4013           dbp = lb.buffer;
4014         }
4015       switch (lowcase(*dbp))
4016         {
4017         case 'b':
4018           if (nocase_tail ("body"))
4019             {
4020               /* Skipping body of   procedure body   or   package body or ....
4021                  resetting qualifier to body instead of spec. */
4022               name_qualifier = "/b";
4023               continue;
4024             }
4025           break;
4026         case 't':
4027           /* Skipping type of   task type   or   protected type ... */
4028           if (nocase_tail ("type"))
4029             continue;
4030           break;
4031         }
4032       if (*dbp == '"')
4033         {
4034           dbp += 1;
4035           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4036             continue;
4037         }
4038       else
4039         {
4040           dbp = skip_spaces (dbp);
4041           for (cp = dbp;
4042                (*cp != '\0'
4043                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4044                cp++)
4045             continue;
4046           if (cp == dbp)
4047             return;
4048         }
4049       c = *cp;
4050       *cp = '\0';
4051       name = concat (dbp, name_qualifier, "");
4052       *cp = c;
4053       pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4054       if (c == '"')
4055         dbp = cp + 1;
4056       return;
4057     }
4058 }
4059
4060 static void
4061 Ada_funcs (inf)
4062      FILE *inf;
4063 {
4064   bool inquote = FALSE;
4065
4066   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4067     {
4068       while (*dbp != '\0')
4069         {
4070           /* Skip a string i.e. "abcd". */
4071           if (inquote || (*dbp == '"'))
4072             {
4073               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4074               if (dbp != NULL)
4075                 {
4076                   inquote = FALSE;
4077                   dbp += 1;
4078                   continue;     /* advance char */
4079                 }
4080               else
4081                 {
4082                   inquote = TRUE;
4083                   break;        /* advance line */
4084                 }
4085             }
4086
4087           /* Skip comments. */
4088           if (dbp[0] == '-' && dbp[1] == '-')
4089             break;              /* advance line */
4090
4091           /* Skip character enclosed in single quote i.e. 'a'
4092              and skip single quote starting an attribute i.e. 'Image. */
4093           if (*dbp == '\'')
4094             {
4095               dbp++ ;
4096               if (*dbp != '\0')
4097                 dbp++;
4098               continue;
4099             }
4100
4101           /* Search for beginning of a token.  */
4102           if (!begtoken (*dbp))
4103             {
4104               dbp++;
4105               continue;         /* advance char */
4106             }
4107
4108           /* We are at the beginning of a token. */
4109           switch (lowcase(*dbp))
4110             {
4111             case 'f':
4112               if (!packages_only && nocase_tail ("function"))
4113                 Ada_getit (inf, "/f");
4114               else
4115                 break;          /* from switch */
4116               continue;         /* advance char */
4117             case 'p':
4118               if (!packages_only && nocase_tail ("procedure"))
4119                 Ada_getit (inf, "/p");
4120               else if (nocase_tail ("package"))
4121                 Ada_getit (inf, "/s");
4122               else if (nocase_tail ("protected")) /* protected type */
4123                 Ada_getit (inf, "/t");
4124               else
4125                 break;          /* from switch */
4126               continue;         /* advance char */
4127             case 't':
4128               if (!packages_only && nocase_tail ("task"))
4129                 Ada_getit (inf, "/k");
4130               else if (typedefs && !packages_only && nocase_tail ("type"))
4131                 {
4132                   Ada_getit (inf, "/t");
4133                   while (*dbp != '\0')
4134                     dbp += 1;
4135                 }
4136               else
4137                 break;          /* from switch */
4138               continue;         /* advance char */
4139             }
4140
4141           /* Look for the end of the token. */
4142           while (!endtoken (*dbp))
4143             dbp++;
4144
4145         } /* advance char */
4146     } /* advance line */
4147 }
4148
4149 \f
4150 /*
4151  * Unix and microcontroller assembly tag handling
4152  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4153  * Idea by Bob Weiner, Motorola Inc. (1994)
4154  */
4155 static void
4156 Asm_labels (inf)
4157      FILE *inf;
4158 {
4159   register char *cp;
4160
4161   LOOP_ON_INPUT_LINES (inf, lb, cp)
4162     {
4163       /* If first char is alphabetic or one of [_.$], test for colon
4164          following identifier. */
4165       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4166         {
4167           /* Read past label. */
4168           cp++;
4169           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4170             cp++;
4171           if (*cp == ':' || iswhite (*cp))
4172             {
4173               /* Found end of label, so copy it and add it to the table. */
4174               pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
4175                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4176             }
4177         }
4178     }
4179 }
4180
4181 \f
4182 /*
4183  * Perl support
4184  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4185  * Perl variable names: /^(my|local).../
4186  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4187  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4188  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4189  */
4190 static void
4191 Perl_functions (inf)
4192      FILE *inf;
4193 {
4194   char *package = savestr ("main"); /* current package name */
4195   register char *cp;
4196
4197   LOOP_ON_INPUT_LINES (inf, lb, cp)
4198     {
4199       skip_spaces(cp);
4200
4201       if (LOOKING_AT (cp, "package"))
4202         {
4203           free (package);
4204           package = get_tag (cp);
4205           if (package == NULL)  /* can't parse package name */
4206             package = savestr ("");
4207           else
4208             package = savestr(package); /* make a copy */
4209         }
4210       else if (LOOKING_AT (cp, "sub"))
4211         {
4212           char *name, *fullname, *pos;
4213           char *sp = cp;
4214
4215           while (!notinname (*cp))
4216             cp++;
4217           if (cp == sp)
4218             continue;
4219           name = savenstr (sp, cp-sp);
4220           if ((pos = etags_strchr (name, ':')) != NULL && pos[1] == ':')
4221             fullname = name;
4222           else
4223             fullname = concat (package, "::", name);
4224           pfnote (fullname, TRUE,
4225                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4226           if (name != fullname)
4227             free (name);
4228         }
4229        else if (globals         /* only if tagging global vars is enabled */
4230                 && (LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local")))
4231         {
4232           /* After "my" or "local", but before any following paren or space. */
4233           char *varname = NULL;
4234
4235           if (*cp == '$' || *cp == '@' || *cp == '%')
4236             {
4237               char* varstart = ++cp;
4238               while (ISALNUM (*cp) || *cp == '_')
4239                 cp++;
4240               varname = savenstr (varstart, cp-varstart);
4241             }
4242           else
4243             {
4244               /* Should be examining a variable list at this point;
4245                  could insist on seeing an open parenthesis. */
4246               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4247                 cp++;
4248             }
4249
4250           /* Perhaps I should back cp up one character, so the TAGS table
4251              doesn't mention (and so depend upon) the following char. */
4252           pfnote (varname, FALSE,
4253                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4254         }
4255     }
4256 }
4257
4258
4259 /*
4260  * Python support
4261  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4262  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4263  * More ideas by seb bacon <seb@jamkit.com> (2002)
4264  */
4265 static void
4266 Python_functions (inf)
4267      FILE *inf;
4268 {
4269   register char *cp;
4270
4271   LOOP_ON_INPUT_LINES (inf, lb, cp)
4272     {
4273       cp = skip_spaces (cp);
4274       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4275         {
4276           char *name = cp;
4277           while (!notinname (*cp) && *cp != ':')
4278             cp++;
4279           pfnote (savenstr (name, cp-name), TRUE,
4280                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4281         }
4282     }
4283 }
4284
4285 \f
4286 /*
4287  * PHP support
4288  * Look for:
4289  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4290  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4291  *  - /^[ \t]*define\(\"[^\"]+/
4292  * Only with --members:
4293  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4294  * Idea by Diez B. Roggisch (2001)
4295  */
4296 static void
4297 PHP_functions (inf)
4298      FILE *inf;
4299 {
4300   register char *cp, *name;
4301   bool search_identifier = FALSE;
4302
4303   LOOP_ON_INPUT_LINES (inf, lb, cp)
4304     {
4305       cp = skip_spaces (cp);
4306       name = cp;
4307       if (search_identifier
4308           && *cp != '\0')
4309         {
4310           while (!notinname (*cp))
4311             cp++;
4312           pfnote (savenstr (name, cp-name), TRUE,
4313                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4314           search_identifier = FALSE;
4315         }
4316       else if (LOOKING_AT (cp, "function"))
4317         {
4318           if(*cp == '&')
4319             cp = skip_spaces (cp+1);
4320           if(*cp != '\0')
4321             {
4322               name = cp;
4323               while (!notinname (*cp))
4324                 cp++;
4325               pfnote (savenstr (name, cp-name), TRUE,
4326                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4327             }
4328           else
4329             search_identifier = TRUE;
4330         }
4331       else if (LOOKING_AT (cp, "class"))
4332         {
4333           if (*cp != '\0')
4334             {
4335               name = cp;
4336               while (*cp != '\0' && !iswhite (*cp))
4337                 cp++;
4338               pfnote (savenstr (name, cp-name), FALSE,
4339                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4340             }
4341           else
4342             search_identifier = TRUE;
4343         }
4344       else if (strneq (cp, "define", 6)
4345                && (cp = skip_spaces (cp+6))
4346                && *cp++ == '('
4347                && (*cp == '"' || *cp == '\''))
4348         {
4349           char quote = *cp++;
4350           name = cp;
4351           while (*cp != quote && *cp != '\0')
4352             cp++;
4353           pfnote (savenstr (name, cp-name), FALSE,
4354                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4355         }
4356       else if (members
4357                && LOOKING_AT (cp, "var")
4358                && *cp == '$')
4359         {
4360           name = cp;
4361           while (!notinname(*cp))
4362             cp++;
4363           pfnote (savenstr (name, cp-name), FALSE,
4364                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4365         }
4366     }
4367 }
4368
4369 \f
4370 /*
4371  * Cobol tag functions
4372  * We could look for anything that could be a paragraph name.
4373  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4374  * Idea by Corny de Souza (1993)
4375  */
4376 static void
4377 Cobol_paragraphs (inf)
4378      FILE *inf;
4379 {
4380   register char *bp, *ep;
4381
4382   LOOP_ON_INPUT_LINES (inf, lb, bp)
4383     {
4384       if (lb.len < 9)
4385         continue;
4386       bp += 8;
4387
4388       /* If eoln, compiler option or comment ignore whole line. */
4389       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4390         continue;
4391
4392       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4393         continue;
4394       if (*ep++ == '.')
4395         pfnote (savenstr (bp, ep-bp), TRUE,
4396                 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4397     }
4398 }
4399
4400 \f
4401 /*
4402  * Makefile support
4403  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4404  */
4405 static void
4406 Makefile_targets (inf)
4407      FILE *inf;
4408 {
4409   register char *bp;
4410
4411   LOOP_ON_INPUT_LINES (inf, lb, bp)
4412     {
4413       if (*bp == '\t' || *bp == '#')
4414         continue;
4415       while (*bp != '\0' && *bp != '=' && *bp != ':')
4416         bp++;
4417       if (*bp == ':' || (globals && *bp == '='))
4418         pfnote (savenstr (lb.buffer, bp - lb.buffer), TRUE,
4419                 lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4420     }
4421 }
4422
4423 \f
4424 /*
4425  * Pascal parsing
4426  * Original code by Mosur K. Mohan (1989)
4427  *
4428  *  Locates tags for procedures & functions.  Doesn't do any type- or
4429  *  var-definitions.  It does look for the keyword "extern" or
4430  *  "forward" immediately following the procedure statement; if found,
4431  *  the tag is skipped.
4432  */
4433 static void
4434 Pascal_functions (inf)
4435      FILE *inf;
4436 {
4437   linebuffer tline;             /* mostly copied from C_entries */
4438   long save_lcno;
4439   int save_lineno, save_len;
4440   char c, *cp, *namebuf;
4441
4442   bool                          /* each of these flags is TRUE iff: */
4443     incomment,                  /* point is inside a comment */
4444     inquote,                    /* point is inside '..' string */
4445     get_tagname,                /* point is after PROCEDURE/FUNCTION
4446                                    keyword, so next item = potential tag */
4447     found_tag,                  /* point is after a potential tag */
4448     inparms,                    /* point is within parameter-list */
4449     verify_tag;                 /* point has passed the parm-list, so the
4450                                    next token will determine whether this
4451                                    is a FORWARD/EXTERN to be ignored, or
4452                                    whether it is a real tag */
4453
4454   save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
4455   namebuf = NULL;               /* keep compiler quiet */
4456   dbp = lb.buffer;
4457   *dbp = '\0';
4458   initbuffer (&tline);
4459
4460   incomment = inquote = FALSE;
4461   found_tag = FALSE;            /* have a proc name; check if extern */
4462   get_tagname = FALSE;          /* have found "procedure" keyword    */
4463   inparms = FALSE;              /* found '(' after "proc"            */
4464   verify_tag = FALSE;           /* check if "extern" is ahead        */
4465
4466
4467   while (!feof (inf))           /* long main loop to get next char */
4468     {
4469       c = *dbp++;
4470       if (c == '\0')            /* if end of line */
4471         {
4472           readline (&lb, inf);
4473           dbp = lb.buffer;
4474           if (*dbp == '\0')
4475             continue;
4476           if (!((found_tag && verify_tag)
4477                 || get_tagname))
4478             c = *dbp++;         /* only if don't need *dbp pointing
4479                                    to the beginning of the name of
4480                                    the procedure or function */
4481         }
4482       if (incomment)
4483         {
4484           if (c == '}')         /* within { } comments */
4485             incomment = FALSE;
4486           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4487             {
4488               dbp++;
4489               incomment = FALSE;
4490             }
4491           continue;
4492         }
4493       else if (inquote)
4494         {
4495           if (c == '\'')
4496             inquote = FALSE;
4497           continue;
4498         }
4499       else
4500         switch (c)
4501           {
4502           case '\'':
4503             inquote = TRUE;     /* found first quote */
4504             continue;
4505           case '{':             /* found open { comment */
4506             incomment = TRUE;
4507             continue;
4508           case '(':
4509             if (*dbp == '*')    /* found open (* comment */
4510               {
4511                 incomment = TRUE;
4512                 dbp++;
4513               }
4514             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4515               inparms = TRUE;
4516             continue;
4517           case ')':             /* end of parms list */
4518             if (inparms)
4519               inparms = FALSE;
4520             continue;
4521           case ';':
4522             if (found_tag && !inparms) /* end of proc or fn stmt */
4523               {
4524                 verify_tag = TRUE;
4525                 break;
4526               }
4527             continue;
4528           }
4529       if (found_tag && verify_tag && (*dbp != ' '))
4530         {
4531           /* check if this is an "extern" declaration */
4532           if (*dbp == '\0')
4533             continue;
4534           if (lowcase (*dbp == 'e'))
4535             {
4536               if (nocase_tail ("extern")) /* superfluous, really! */
4537                 {
4538                   found_tag = FALSE;
4539                   verify_tag = FALSE;
4540                 }
4541             }
4542           else if (lowcase (*dbp) == 'f')
4543             {
4544               if (nocase_tail ("forward")) /*  check for forward reference */
4545                 {
4546                   found_tag = FALSE;
4547                   verify_tag = FALSE;
4548                 }
4549             }
4550           if (found_tag && verify_tag) /* not external proc, so make tag */
4551             {
4552               found_tag = FALSE;
4553               verify_tag = FALSE;
4554               pfnote (namebuf, TRUE,
4555                       tline.buffer, save_len, save_lineno, save_lcno);
4556               continue;
4557             }
4558         }
4559       if (get_tagname)          /* grab name of proc or fn */
4560         {
4561           if (*dbp == '\0')
4562             continue;
4563
4564           /* save all values for later tagging */
4565           linebuffer_setlen (&tline, lb.len);
4566           strcpy (tline.buffer, lb.buffer);
4567           save_lineno = lineno;
4568           save_lcno = linecharno;
4569
4570           /* grab block name */
4571           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4572             continue;
4573           namebuf = savenstr (dbp, cp-dbp);
4574           dbp = cp;             /* set dbp to e-o-token */
4575           save_len = dbp - lb.buffer + 1;
4576           get_tagname = FALSE;
4577           found_tag = TRUE;
4578           continue;
4579
4580           /* and proceed to check for "extern" */
4581         }
4582       else if (!incomment && !inquote && !found_tag)
4583         {
4584           /* check for proc/fn keywords */
4585           switch (lowcase (c))
4586             {
4587             case 'p':
4588               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4589                 get_tagname = TRUE;
4590               continue;
4591             case 'f':
4592               if (nocase_tail ("unction"))
4593                 get_tagname = TRUE;
4594               continue;
4595             }
4596         }
4597     }                           /* while not eof */
4598
4599   free (tline.buffer);
4600 }
4601
4602 \f
4603 /*
4604  * Lisp tag functions
4605  *  look for (def or (DEF, quote or QUOTE
4606  */
4607
4608 static void L_getit __P((void));
4609
4610 static void
4611 L_getit ()
4612 {
4613   if (*dbp == '\'')             /* Skip prefix quote */
4614     dbp++;
4615   else if (*dbp == '(')
4616   {
4617     dbp++;
4618     /* Try to skip "(quote " */
4619     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4620       /* Ok, then skip "(" before name in (defstruct (foo)) */
4621       dbp = skip_spaces (dbp);
4622   }
4623   get_tag (dbp);
4624 }
4625
4626 static void
4627 Lisp_functions (inf)
4628      FILE *inf;
4629 {
4630   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4631     {
4632       if (dbp[0] != '(')
4633         continue;
4634
4635       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4636         {
4637           dbp = skip_non_spaces (dbp);
4638           dbp = skip_spaces (dbp);
4639           L_getit ();
4640         }
4641       else
4642         {
4643           /* Check for (foo::defmumble name-defined ... */
4644           do
4645             dbp++;
4646           while (!notinname (*dbp) && *dbp != ':');
4647           if (*dbp == ':')
4648             {
4649               do
4650                 dbp++;
4651               while (*dbp == ':');
4652
4653               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4654                 {
4655                   dbp = skip_non_spaces (dbp);
4656                   dbp = skip_spaces (dbp);
4657                   L_getit ();
4658                 }
4659             }
4660         }
4661     }
4662 }
4663
4664 \f
4665 /*
4666  * Postscript tag functions
4667  * Just look for lines where the first character is '/'
4668  * Also look at "defineps" for PSWrap
4669  * Ideas by:
4670  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4671  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4672  */
4673 static void
4674 Postscript_functions (inf)
4675      FILE *inf;
4676 {
4677   register char *bp, *ep;
4678
4679   LOOP_ON_INPUT_LINES (inf, lb, bp)
4680     {
4681       if (bp[0] == '/')
4682         {
4683           for (ep = bp+1;
4684                *ep != '\0' && *ep != ' ' && *ep != '{';
4685                ep++)
4686             continue;
4687           pfnote (savenstr (bp, ep-bp), TRUE,
4688                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4689         }
4690       else if (LOOKING_AT (bp, "defineps"))
4691         get_tag (bp);
4692     }
4693 }
4694
4695 \f
4696 /*
4697  * Scheme tag functions
4698  * look for (def... xyzzy
4699  *          (def... (xyzzy
4700  *          (def ... ((...(xyzzy ....
4701  *          (set! xyzzy
4702  * Original code by Ken Haase (1985?)
4703  */
4704
4705 static void
4706 Scheme_functions (inf)
4707      FILE *inf;
4708 {
4709   register char *bp;
4710
4711   LOOP_ON_INPUT_LINES (inf, lb, bp)
4712     {
4713       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4714         {
4715           bp = skip_non_spaces (bp+4);
4716           /* Skip over open parens and white space */
4717           while (notinname (*bp))
4718             bp++;
4719           get_tag (bp);
4720         }
4721       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4722         get_tag (bp);
4723     }
4724 }
4725
4726 \f
4727 /* Find tags in TeX and LaTeX input files.  */
4728
4729 /* TEX_toktab is a table of TeX control sequences that define tags.
4730  * Each entry records one such control sequence.
4731  *
4732  * Original code from who knows whom.
4733  * Ideas by:
4734  *   Stefan Monnier (2002)
4735  */
4736
4737 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4738
4739 /* Default set of control sequences to put into TEX_toktab.
4740    The value of environment var TEXTAGS is prepended to this.  */
4741 static char *TEX_defenv = "\
4742 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4743 :part:appendix:entry:index:def\
4744 :newcommand:renewcommand:newenvironment:renewenvironment";
4745
4746 static void TEX_mode __P((FILE *));
4747 static void TEX_decode_env __P((char *, char *));
4748
4749 static char TEX_esc = '\\';
4750 static char TEX_opgrp = '{';
4751 static char TEX_clgrp = '}';
4752
4753 /*
4754  * TeX/LaTeX scanning loop.
4755  */
4756 static void
4757 TeX_commands (inf)
4758      FILE *inf;
4759 {
4760   char *cp;
4761   linebuffer *key;
4762
4763   /* Select either \ or ! as escape character.  */
4764   TEX_mode (inf);
4765
4766   /* Initialize token table once from environment. */
4767   if (TEX_toktab == NULL)
4768     TEX_decode_env ("TEXTAGS", TEX_defenv);
4769
4770   LOOP_ON_INPUT_LINES (inf, lb, cp)
4771     {
4772       /* Look at each TEX keyword in line. */
4773       for (;;)
4774         {
4775           /* Look for a TEX escape. */
4776           while (*cp++ != TEX_esc)
4777             if (cp[-1] == '\0' || cp[-1] == '%')
4778               goto tex_next_line;
4779
4780           for (key = TEX_toktab; key->buffer != NULL; key++)
4781             if (strneq (cp, key->buffer, key->len))
4782               {
4783                 register char *p;
4784                 char *name;
4785                 int linelen;
4786                 bool opgrp = FALSE;
4787
4788                 cp = skip_spaces (cp + key->len);
4789                 if (*cp == TEX_opgrp)
4790                   {
4791                     opgrp = TRUE;
4792                     cp++;
4793                   }
4794                 for (p = cp;
4795                      (!iswhite (*p) && *p != '#' &&
4796                       *p != TEX_opgrp && *p != TEX_clgrp);
4797                      p++)
4798                   continue;
4799                 name = savenstr (cp, p-cp);
4800                 linelen = lb.len;
4801                 if (!opgrp || *p == TEX_clgrp)
4802                   {
4803                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
4804                       *p++;
4805                     linelen = p - lb.buffer + 1;
4806                   }
4807                 pfnote (name, TRUE, lb.buffer, linelen, lineno, linecharno);
4808                 goto tex_next_line; /* We only tag a line once */
4809               }
4810         }
4811     tex_next_line:
4812       ;
4813     }
4814 }
4815
4816 #define TEX_LESC '\\'
4817 #define TEX_SESC '!'
4818
4819 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4820    chars accordingly. */
4821 static void
4822 TEX_mode (inf)
4823      FILE *inf;
4824 {
4825   int c;
4826
4827   while ((c = getc (inf)) != EOF)
4828     {
4829       /* Skip to next line if we hit the TeX comment char. */
4830       if (c == '%')
4831         while (c != '\n')
4832           c = getc (inf);
4833       else if (c == TEX_LESC || c == TEX_SESC )
4834         break;
4835     }
4836
4837   if (c == TEX_LESC)
4838     {
4839       TEX_esc = TEX_LESC;
4840       TEX_opgrp = '{';
4841       TEX_clgrp = '}';
4842     }
4843   else
4844     {
4845       TEX_esc = TEX_SESC;
4846       TEX_opgrp = '<';
4847       TEX_clgrp = '>';
4848     }
4849   /* If the input file is compressed, inf is a pipe, and rewind may fail.
4850      No attempt is made to correct the situation. */
4851   rewind (inf);
4852 }
4853
4854 /* Read environment and prepend it to the default string.
4855    Build token table. */
4856 static void
4857 TEX_decode_env (evarname, defenv)
4858      char *evarname;
4859      char *defenv;
4860 {
4861   register char *env, *p;
4862   int i, len;
4863
4864   /* Append default string to environment. */
4865   env = getenv (evarname);
4866   if (!env)
4867     env = defenv;
4868   else
4869     {
4870       char *oldenv = env;
4871       env = concat (oldenv, defenv, "");
4872     }
4873
4874   /* Allocate a token table */
4875   for (len = 1, p = env; p;)
4876     if ((p = etags_strchr (p, ':')) && *++p != '\0')
4877       len++;
4878   TEX_toktab = xnew (len, linebuffer);
4879
4880   /* Unpack environment string into token table. Be careful about */
4881   /* zero-length strings (leading ':', "::" and trailing ':') */
4882   for (i = 0; *env != '\0';)
4883     {
4884       p = etags_strchr (env, ':');
4885       if (!p)                   /* End of environment string. */
4886         p = env + strlen (env);
4887       if (p - env > 0)
4888         {                       /* Only non-zero strings. */
4889           TEX_toktab[i].buffer = savenstr (env, p - env);
4890           TEX_toktab[i].len = p - env;
4891           i++;
4892         }
4893       if (*p)
4894         env = p + 1;
4895       else
4896         {
4897           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
4898           TEX_toktab[i].len = 0;
4899           break;
4900         }
4901     }
4902 }
4903
4904 \f
4905 /* Texinfo support.  Dave Love, Mar. 2000.  */
4906 static void
4907 Texinfo_nodes (inf)
4908      FILE * inf;
4909 {
4910   char *cp, *start;
4911   LOOP_ON_INPUT_LINES (inf, lb, cp)
4912     if (LOOKING_AT (cp, "@node"))
4913       {
4914         start = cp;
4915         while (*cp != '\0' && *cp != ',')
4916           cp++;
4917         pfnote (savenstr (start, cp - start), TRUE,
4918                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4919       }
4920 }
4921
4922 \f
4923 /*
4924  * Prolog support
4925  *
4926  * Assumes that the predicate or rule starts at column 0.
4927  * Only the first clause of a predicate or rule is added.
4928  * Original code by Sunichirou Sugou (1989)
4929  * Rewritten by Anders Lindgren (1996)
4930  */
4931 static int prolog_pr __P((char *, char *));
4932 static void prolog_skip_comment __P((linebuffer *, FILE *));
4933 static int prolog_atom __P((char *, int));
4934
4935 static void
4936 Prolog_functions (inf)
4937      FILE *inf;
4938 {
4939   char *cp, *last;
4940   int len;
4941   int allocated;
4942
4943   allocated = 0;
4944   len = 0;
4945   last = NULL;
4946
4947   LOOP_ON_INPUT_LINES (inf, lb, cp)
4948     {
4949       if (cp[0] == '\0')        /* Empty line */
4950         continue;
4951       else if (iswhite (cp[0])) /* Not a predicate */
4952         continue;
4953       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
4954         prolog_skip_comment (&lb, inf);
4955       else if ((len = prolog_pr (cp, last)) > 0)
4956         {
4957           /* Predicate or rule.  Store the function name so that we
4958              only generate a tag for the first clause.  */
4959           if (last == NULL)
4960             last = xnew(len + 1, char);
4961           else if (len + 1 > allocated)
4962             xrnew (last, len + 1, char);
4963           allocated = len + 1;
4964           strncpy (last, cp, len);
4965           last[len] = '\0';
4966         }
4967     }
4968 }
4969
4970
4971 static void
4972 prolog_skip_comment (plb, inf)
4973      linebuffer *plb;
4974      FILE *inf;
4975 {
4976   char *cp;
4977
4978   do
4979     {
4980       for (cp = plb->buffer; *cp != '\0'; cp++)
4981         if (cp[0] == '*' && cp[1] == '/')
4982           return;
4983       readline (plb, inf);
4984     }
4985   while (!feof(inf));
4986 }
4987
4988 /*
4989  * A predicate or rule definition is added if it matches:
4990  *     <beginning of line><Prolog Atom><whitespace>(
4991  * or  <beginning of line><Prolog Atom><whitespace>:-
4992  *
4993  * It is added to the tags database if it doesn't match the
4994  * name of the previous clause header.
4995  *
4996  * Return the size of the name of the predicate or rule, or 0 if no
4997  * header was found.
4998  */
4999 static int
5000 prolog_pr (s, last)
5001      char *s;
5002      char *last;                /* Name of last clause. */
5003 {
5004   int pos;
5005   int len;
5006
5007   pos = prolog_atom (s, 0);
5008   if (pos < 1)
5009     return 0;
5010
5011   len = pos;
5012   pos = skip_spaces (s + pos) - s;
5013
5014   if ((s[pos] == '.'
5015        || (s[pos] == '(' && (pos += 1))
5016        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5017       && (last == NULL          /* save only the first clause */
5018           || len != strlen (last)
5019           || !strneq (s, last, len)))
5020         {
5021           pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
5022           return len;
5023         }
5024   else
5025     return 0;
5026 }
5027
5028 /*
5029  * Consume a Prolog atom.
5030  * Return the number of bytes consumed, or -1 if there was an error.
5031  *
5032  * A prolog atom, in this context, could be one of:
5033  * - An alphanumeric sequence, starting with a lower case letter.
5034  * - A quoted arbitrary string. Single quotes can escape themselves.
5035  *   Backslash quotes everything.
5036  */
5037 static int
5038 prolog_atom (s, pos)
5039      char *s;
5040      int pos;
5041 {
5042   int origpos;
5043
5044   origpos = pos;
5045
5046   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5047     {
5048       /* The atom is unquoted. */
5049       pos++;
5050       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5051         {
5052           pos++;
5053         }
5054       return pos - origpos;
5055     }
5056   else if (s[pos] == '\'')
5057     {
5058       pos++;
5059
5060       for (;;)
5061         {
5062           if (s[pos] == '\'')
5063             {
5064               pos++;
5065               if (s[pos] != '\'')
5066                 break;
5067               pos++;            /* A double quote */
5068             }
5069           else if (s[pos] == '\0')
5070             /* Multiline quoted atoms are ignored. */
5071             return -1;
5072           else if (s[pos] == '\\')
5073             {
5074               if (s[pos+1] == '\0')
5075                 return -1;
5076               pos += 2;
5077             }
5078           else
5079             pos++;
5080         }
5081       return pos - origpos;
5082     }
5083   else
5084     return -1;
5085 }
5086
5087 \f
5088 /*
5089  * Support for Erlang
5090  *
5091  * Generates tags for functions, defines, and records.
5092  * Assumes that Erlang functions start at column 0.
5093  * Original code by Anders Lindgren (1996)
5094  */
5095 static int erlang_func __P((char *, char *));
5096 static void erlang_attribute __P((char *));
5097 static int erlang_atom __P((char *));
5098
5099 static void
5100 Erlang_functions (inf)
5101      FILE *inf;
5102 {
5103   char *cp, *last;
5104   int len;
5105   int allocated;
5106
5107   allocated = 0;
5108   len = 0;
5109   last = NULL;
5110
5111   LOOP_ON_INPUT_LINES (inf, lb, cp)
5112     {
5113       if (cp[0] == '\0')        /* Empty line */
5114         continue;
5115       else if (iswhite (cp[0])) /* Not function nor attribute */
5116         continue;
5117       else if (cp[0] == '%')    /* comment */
5118         continue;
5119       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5120         continue;
5121       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5122         {
5123           erlang_attribute (cp);
5124           last = NULL;
5125         }
5126       else if ((len = erlang_func (cp, last)) > 0)
5127         {
5128           /*
5129            * Function.  Store the function name so that we only
5130            * generates a tag for the first clause.
5131            */
5132           if (last == NULL)
5133             last = xnew (len + 1, char);
5134           else if (len + 1 > allocated)
5135             xrnew (last, len + 1, char);
5136           allocated = len + 1;
5137           strncpy (last, cp, len);
5138           last[len] = '\0';
5139         }
5140     }
5141 }
5142
5143
5144 /*
5145  * A function definition is added if it matches:
5146  *     <beginning of line><Erlang Atom><whitespace>(
5147  *
5148  * It is added to the tags database if it doesn't match the
5149  * name of the previous clause header.
5150  *
5151  * Return the size of the name of the function, or 0 if no function
5152  * was found.
5153  */
5154 static int
5155 erlang_func (s, last)
5156      char *s;
5157      char *last;                /* Name of last clause. */
5158 {
5159   int pos;
5160   int len;
5161
5162   pos = erlang_atom (s);
5163   if (pos < 1)
5164     return 0;
5165
5166   len = pos;
5167   pos = skip_spaces (s + pos) - s;
5168
5169   /* Save only the first clause. */
5170   if (s[pos++] == '('
5171       && (last == NULL
5172           || len != (int)strlen (last)
5173           || !strneq (s, last, len)))
5174         {
5175           pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
5176           return len;
5177         }
5178
5179   return 0;
5180 }
5181
5182
5183 /*
5184  * Handle attributes.  Currently, tags are generated for defines
5185  * and records.
5186  *
5187  * They are on the form:
5188  * -define(foo, bar).
5189  * -define(Foo(M, N), M+N).
5190  * -record(graph, {vtab = notable, cyclic = true}).
5191  */
5192 static void
5193 erlang_attribute (s)
5194      char *s;
5195 {
5196   char *cp = s;
5197
5198   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5199       && *cp++ == '(')
5200     {
5201       int len = erlang_atom (skip_spaces (cp));
5202       if (len > 0)
5203         pfnote (savenstr (cp, len), TRUE,
5204                 s, cp + len - s, lineno, linecharno);
5205     }
5206   return;
5207 }
5208
5209
5210 /*
5211  * Consume an Erlang atom (or variable).
5212  * Return the number of bytes consumed, or -1 if there was an error.
5213  */
5214 static int
5215 erlang_atom (s)
5216      char *s;
5217 {
5218   int pos = 0;
5219
5220   if (ISALPHA (s[pos]) || s[pos] == '_')
5221     {
5222       /* The atom is unquoted. */
5223       do
5224         pos++;
5225       while (ISALNUM (s[pos]) || s[pos] == '_');
5226     }
5227   else if (s[pos] == '\'')
5228     {
5229       for (pos++; s[pos] != '\''; pos++)
5230         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5231             || (s[pos] == '\\' && s[++pos] == '\0'))
5232           return 0;
5233       pos++;
5234     }
5235
5236   return pos;
5237 }
5238
5239 \f
5240 #ifdef ETAGS_REGEXPS
5241
5242 static char *scan_separators __P((char *));
5243 static void add_regex __P((char *, language *));
5244 static char *substitute __P((char *, char *, struct re_registers *));
5245
5246 /*
5247  * Take a string like "/blah/" and turn it into "blah", verifying
5248  * that the first and last characters are the same, and handling
5249  * quoted separator characters.  Actually, stops on the occurrence of
5250  * an unquoted separator.  Also process \t, \n, etc. and turn into
5251  * appropriate characters. Works in place.  Null terminates name string.
5252  * Returns pointer to terminating separator, or NULL for
5253  * unterminated regexps.
5254  */
5255 static char *
5256 scan_separators (name)
5257      char *name;
5258 {
5259   char sep = name[0];
5260   char *copyto = name;
5261   bool quoted = FALSE;
5262
5263   for (++name; *name != '\0'; ++name)
5264     {
5265       if (quoted)
5266         {
5267           switch (*name)
5268             {
5269             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5270             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5271             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5272             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5273             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5274             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5275             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5276             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5277             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5278             default:
5279               if (*name == sep)
5280                 *copyto++ = sep;
5281               else
5282                 {
5283                   /* Something else is quoted, so preserve the quote. */
5284                   *copyto++ = '\\';
5285                   *copyto++ = *name;
5286                 }
5287               break;
5288             }
5289           quoted = FALSE;
5290         }
5291       else if (*name == '\\')
5292         quoted = TRUE;
5293       else if (*name == sep)
5294         break;
5295       else
5296         *copyto++ = *name;
5297     }
5298   if (*name != sep)
5299     name = NULL;                /* signal unterminated regexp */
5300
5301   /* Terminate copied string. */
5302   *copyto = '\0';
5303   return name;
5304 }
5305
5306 /* Look at the argument of --regex or --no-regex and do the right
5307    thing.  Same for each line of a regexp file. */
5308 static void
5309 analyse_regex (regex_arg)
5310      char *regex_arg;
5311 {
5312   if (regex_arg == NULL)
5313     {
5314       free_patterns ();         /* --no-regex: remove existing regexps */
5315       return;
5316     }
5317
5318   /* A real --regexp option or a line in a regexp file. */
5319   switch (regex_arg[0])
5320     {
5321       /* Comments in regexp file or null arg to --regex. */
5322     case '\0':
5323     case ' ':
5324     case '\t':
5325       break;
5326
5327       /* Read a regex file.  This is recursive and may result in a
5328          loop, which will stop when the file descriptors are exhausted. */
5329     case '@':
5330       {
5331         FILE *regexfp;
5332         linebuffer regexbuf;
5333         char *regexfile = regex_arg + 1;
5334
5335         /* regexfile is a file containing regexps, one per line. */
5336         regexfp = fopen (regexfile, "r");
5337         if (regexfp == NULL)
5338           {
5339             pfatal (regexfile);
5340             return;
5341           }
5342         initbuffer (&regexbuf);
5343         while (readline_internal (&regexbuf, regexfp) > 0)
5344           analyse_regex (regexbuf.buffer);
5345         free (regexbuf.buffer);
5346         fclose (regexfp);
5347       }
5348       break;
5349
5350       /* Regexp to be used for a specific language only. */
5351     case '{':
5352       {
5353         language *lang;
5354         char *lang_name = regex_arg + 1;
5355         char *cp;
5356
5357         for (cp = lang_name; *cp != '}'; cp++)
5358           if (*cp == '\0')
5359             {
5360               error ("unterminated language name in regex: %s", regex_arg);
5361               return;
5362             }
5363         *cp++ = '\0';
5364         lang = get_language_from_langname (lang_name);
5365         if (lang == NULL)
5366           return;
5367         add_regex (cp, lang);
5368       }
5369       break;
5370
5371       /* Regexp to be used for any language. */
5372     default:
5373       add_regex (regex_arg, NULL);
5374       break;
5375     }
5376 }
5377
5378 /* Turn a name, which is an ed-style (but Emacs syntax) regular
5379    expression, into a real regular expression by compiling it. */
5380 static void
5381 add_regex (regexp_pattern, lang)
5382      char *regexp_pattern;
5383      language *lang;
5384 {
5385   static struct re_pattern_buffer zeropattern;
5386   char sep, *pat, *name, *modifiers;
5387   const char *err;
5388   struct re_pattern_buffer *patbuf;
5389   pattern *pp;
5390   bool ignore_case, multi_line, single_line;
5391
5392
5393   if (strlen(regexp_pattern) < 3)
5394     {
5395       error ("null regexp", (char *)NULL);
5396       return;
5397     }
5398   sep = regexp_pattern[0];
5399   name = scan_separators (regexp_pattern);
5400   if (name == NULL)
5401     {
5402       error ("%s: unterminated regexp", regexp_pattern);
5403       return;
5404     }
5405   if (name[1] == sep)
5406     {
5407       error ("null name for regexp \"%s\"", regexp_pattern);
5408       return;
5409     }
5410   modifiers = scan_separators (name);
5411   if (modifiers == NULL)        /* no terminating separator --> no name */
5412     {
5413       modifiers = name;
5414       name = "";
5415     }
5416   else
5417     modifiers += 1;             /* skip separator */
5418
5419   /* Parse regex modifiers. */
5420   ignore_case = FALSE;          /* case is significant */
5421   multi_line = FALSE;           /* matches are done one line at a time */
5422   single_line = FALSE;          /* dot does not match newline */
5423   for (; modifiers[0] != '\0'; modifiers++)
5424     switch (modifiers[0])
5425       {
5426       case 'i':
5427         ignore_case = TRUE;
5428         break;
5429       case 's':
5430         single_line = TRUE;
5431         /* FALLTHRU */
5432       case 'm':
5433         multi_line = TRUE;
5434         need_filebuf = TRUE;
5435         break;
5436       default:
5437         {
5438           char wrongmod [2];
5439           wrongmod[0] = modifiers[0];
5440           wrongmod[1] = '\0';
5441           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5442         }
5443         break;
5444       }
5445
5446   patbuf = xnew (1, struct re_pattern_buffer);
5447   *patbuf = zeropattern;
5448   if (ignore_case)
5449     {
5450       static char lc_trans[CHARS];
5451       int i;
5452       for (i = 0; i < CHARS; i++)
5453         lc_trans[i] = lowcase (i);
5454       patbuf->translate = lc_trans;     /* translation table to fold case  */
5455     }
5456
5457   if (multi_line)
5458     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5459   else
5460     pat = regexp_pattern;
5461
5462   if (single_line)
5463     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5464   else
5465     re_set_syntax (RE_SYNTAX_EMACS);
5466
5467   err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5468   if (multi_line)
5469     free (pat);
5470   if (err != NULL)
5471     {
5472       error ("%s while compiling pattern", err);
5473       return;
5474     }
5475
5476   pp = p_head;
5477   p_head = xnew (1, pattern);
5478   p_head->regex = savestr (regexp_pattern);
5479   p_head->p_next = pp;
5480   p_head->lang = lang;
5481   p_head->pat = patbuf;
5482   p_head->name_pattern = savestr (name);
5483   p_head->error_signaled = FALSE;
5484   p_head->ignore_case = ignore_case;
5485   p_head->multi_line = multi_line;
5486 }
5487
5488 /*
5489  * Do the substitutions indicated by the regular expression and
5490  * arguments.
5491  */
5492 static char *
5493 substitute (in, out, regs)
5494      char *in, *out;
5495      struct re_registers *regs;
5496 {
5497   char *result, *t;
5498   int size, dig, diglen;
5499
5500   result = NULL;
5501   size = strlen (out);
5502
5503   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5504   if (out[size - 1] == '\\')
5505     fatal ("pattern error in \"%s\"", out);
5506   for (t = etags_strchr (out, '\\');
5507        t != NULL;
5508        t = etags_strchr (t + 2, '\\'))
5509     if (ISDIGIT (t[1]))
5510       {
5511         dig = t[1] - '0';
5512         diglen = regs->end[dig] - regs->start[dig];
5513         size += diglen - 2;
5514       }
5515     else
5516       size -= 1;
5517
5518   /* Allocate space and do the substitutions. */
5519   result = xnew (size + 1, char);
5520
5521   for (t = result; *out != '\0'; out++)
5522     if (*out == '\\' && ISDIGIT (*++out))
5523       {
5524         dig = *out - '0';
5525         diglen = regs->end[dig] - regs->start[dig];
5526         strncpy (t, in + regs->start[dig], diglen);
5527         t += diglen;
5528       }
5529     else
5530       *t++ = *out;
5531   *t = '\0';
5532
5533   assert (t <= result + size && t - result == (int)strlen (result));
5534
5535   return result;
5536 }
5537
5538 /* Deallocate all patterns. */
5539 static void
5540 free_patterns ()
5541 {
5542   pattern *pp;
5543   while (p_head != NULL)
5544     {
5545       pp = p_head->p_next;
5546       free (p_head->regex);
5547       free (p_head->name_pattern);
5548       free (p_head);
5549       p_head = pp;
5550     }
5551   return;
5552 }
5553
5554 /*
5555  * Reads the whole file as a single string from `filebuf' and looks for
5556  * multi-line regular expressions, creating tags on matches.
5557  * readline already dealt with normal regexps.
5558  *
5559  * Idea by Ben Wing <ben@666.com> (2002).
5560  */
5561 static void
5562 regex_tag_multiline ()
5563 {
5564   char *buffer = filebuf.buffer;
5565   pattern *pp;
5566
5567   for (pp = p_head; pp != NULL; pp = pp->p_next)
5568     {
5569       int match = 0;
5570
5571       if (!pp->multi_line)
5572         continue;               /* skip normal regexps */
5573
5574       /* Generic initialisations before parsing file from memory. */
5575       lineno = 1;               /* reset global line number */
5576       charno = 0;               /* reset global char number */
5577       linecharno = 0;           /* reset global char number of line start */
5578
5579       /* Only use generic regexps or those for the current language. */
5580       if (pp->lang != NULL && pp->lang != curfdp->lang)
5581         continue;
5582
5583       while (match >= 0 && match < filebuf.len)
5584         {
5585           match = re_search (pp->pat, buffer, filebuf.len, charno,
5586                              filebuf.len - match, &pp->regs);
5587           switch (match)
5588             {
5589             case -2:
5590               /* Some error. */
5591               if (!pp->error_signaled)
5592                 {
5593                   error ("regexp stack overflow while matching \"%s\"",
5594                          pp->regex);
5595                   pp->error_signaled = TRUE;
5596                 }
5597               break;
5598             case -1:
5599               /* No match. */
5600               break;
5601             default:
5602               if (match == pp->regs.end[0])
5603                 {
5604                   if (!pp->error_signaled)
5605                     {
5606                       error ("regexp matches the empty string: \"%s\"",
5607                              pp->regex);
5608                       pp->error_signaled = TRUE;
5609                     }
5610                   match = -3;   /* exit from while loop */
5611                   break;
5612                 }
5613
5614               /* Match occurred.  Construct a tag. */
5615               while (charno < pp->regs.end[0])
5616                 if (buffer[charno++] == '\n')
5617                   lineno++, linecharno = charno;
5618               if (pp->name_pattern[0] != '\0')
5619                 {
5620                   /* Make a named tag. */
5621                   char *name = substitute (buffer,
5622                                            pp->name_pattern, &pp->regs);
5623                   if (name != NULL)
5624                     pfnote (name, TRUE, buffer + linecharno,
5625                             charno - linecharno + 1, lineno, linecharno);
5626                 }
5627               else
5628                 {
5629                   /* Make an unnamed tag. */
5630                   pfnote ((char *)NULL, TRUE, buffer + linecharno,
5631                           charno - linecharno + 1, lineno, linecharno);
5632                 }
5633               break;
5634             }
5635         }
5636     }
5637 }
5638
5639 #endif /* ETAGS_REGEXPS */
5640
5641 \f
5642 static bool
5643 nocase_tail (cp)
5644      char *cp;
5645 {
5646   register int len = 0;
5647
5648   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5649     cp++, len++;
5650   if (*cp == '\0' && !intoken (dbp[len]))
5651     {
5652       dbp += len;
5653       return TRUE;
5654     }
5655   return FALSE;
5656 }
5657
5658 static char *
5659 get_tag (bp)
5660      register char *bp;
5661 {
5662   register char *cp, *name;
5663
5664   if (*bp == '\0')
5665     return NULL;
5666   /* Go till you get to white space or a syntactic break */
5667   for (cp = bp + 1; !notinname (*cp); cp++)
5668     continue;
5669   name = savenstr (bp, cp-bp);
5670   pfnote (name, TRUE,
5671           lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5672   return name;
5673 }
5674
5675 /* Initialize a linebuffer for use */
5676 static void
5677 initbuffer (lbp)
5678      linebuffer *lbp;
5679 {
5680   lbp->size = (DEBUG) ? 3 : 200;
5681   lbp->buffer = xnew (lbp->size, char);
5682   lbp->buffer[0] = '\0';
5683   lbp->len = 0;
5684 }
5685
5686 /*
5687  * Read a line of text from `stream' into `lbp', excluding the
5688  * newline or CR-NL, if any.  Return the number of characters read from
5689  * `stream', which is the length of the line including the newline.
5690  *
5691  * On DOS or Windows we do not count the CR character, if any before the
5692  * NL, in the returned length; this mirrors the behavior of Emacs on those
5693  * platforms (for text files, it translates CR-NL to NL as it reads in the
5694  * file).
5695  *
5696  * If multi-line regular expressions are requested, each line read is
5697  * appended to `filebuf'.
5698  */
5699 static long
5700 readline_internal (lbp, stream)
5701      linebuffer *lbp;
5702      register FILE *stream;
5703 {
5704   char *buffer = lbp->buffer;
5705   register char *p = lbp->buffer;
5706   register char *pend;
5707   int chars_deleted;
5708
5709   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
5710
5711   for (;;)
5712     {
5713       register int c = getc (stream);
5714       if (p == pend)
5715         {
5716           /* We're at the end of linebuffer: expand it. */
5717           lbp->size *= 2;
5718           xrnew (buffer, lbp->size, char);
5719           p += buffer - lbp->buffer;
5720           pend = buffer + lbp->size;
5721           lbp->buffer = buffer;
5722         }
5723       if (c == EOF)
5724         {
5725           *p = '\0';
5726           chars_deleted = 0;
5727           break;
5728         }
5729       if (c == '\n')
5730         {
5731           if (p > buffer && p[-1] == '\r')
5732             {
5733               p -= 1;
5734 #ifdef DOS_NT
5735              /* Assume CRLF->LF translation will be performed by Emacs
5736                 when loading this file, so CRs won't appear in the buffer.
5737                 It would be cleaner to compensate within Emacs;
5738                 however, Emacs does not know how many CRs were deleted
5739                 before any given point in the file.  */
5740               chars_deleted = 1;
5741 #else
5742               chars_deleted = 2;
5743 #endif
5744             }
5745           else
5746             {
5747               chars_deleted = 1;
5748             }
5749           *p = '\0';
5750           break;
5751         }
5752       *p++ = c;
5753     }
5754   lbp->len = p - buffer;
5755
5756   if (need_filebuf              /* we need filebuf for multi-line regexps */
5757       && chars_deleted > 0)     /* not at EOF */
5758     {
5759       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
5760         {
5761           /* Expand filebuf. */
5762           filebuf.size *= 2;
5763           xrnew (filebuf.buffer, filebuf.size, char);
5764         }
5765       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
5766       filebuf.len += lbp->len;
5767       filebuf.buffer[filebuf.len++] = '\n';
5768       filebuf.buffer[filebuf.len] = '\0';
5769     }
5770
5771   return lbp->len + chars_deleted;
5772 }
5773
5774 /*
5775  * Like readline_internal, above, but in addition try to match the
5776  * input line against relevant regular expressions and manage #line
5777  * directives.
5778  */
5779 static void
5780 readline (lbp, stream)
5781      linebuffer *lbp;
5782      FILE *stream;
5783 {
5784   long result;
5785
5786   linecharno = charno;          /* update global char number of line start */
5787   result = readline_internal (lbp, stream); /* read line */
5788   lineno += 1;                  /* increment global line number */
5789   charno += result;             /* increment global char number */
5790
5791   /* Honour #line directives. */
5792   if (!no_line_directive)
5793     {
5794       static bool discard_until_line_directive;
5795
5796       /* Check whether this is a #line directive. */
5797       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
5798         {
5799           int start, lno;
5800
5801           if (DEBUG) start = 0; /* shut up the compiler */
5802           if (sscanf (lbp->buffer, "#line %d \"%n", &lno, &start) == 1)
5803             {
5804               char *endp = lbp->buffer + start;
5805
5806               assert (start > 0);
5807               while ((endp = etags_strchr (endp, '"')) != NULL
5808                      && endp[-1] == '\\')
5809                 endp++;
5810               if (endp != NULL)
5811                 /* Ok, this is a real #line directive.  Let's deal with it. */
5812                 {
5813                   char *taggedabsname;  /* absolute name of original file */
5814                   char *taggedfname;    /* name of original file as given */
5815                   char *name;           /* temp var */
5816
5817                   discard_until_line_directive = FALSE; /* found it */
5818                   name = lbp->buffer + start;
5819                   *endp = '\0';
5820                   canonicalize_filename (name); /* for DOS */
5821                   taggedabsname = absolute_filename (name, curfdp->infabsdir);
5822                   if (filename_is_absolute (name)
5823                       || filename_is_absolute (curfdp->infname))
5824                     taggedfname = savestr (taggedabsname);
5825                   else
5826                     taggedfname = relative_filename (taggedabsname,tagfiledir);
5827
5828                   if (streq (curfdp->taggedfname, taggedfname))
5829                     /* The #line directive is only a line number change.  We
5830                        deal with this afterwards. */
5831                     free (taggedfname);
5832                   else
5833                     /* The tags following this #line directive should be
5834                        attributed to taggedfname.  In order to do this, set
5835                        curfdp accordingly. */
5836                     {
5837                       fdesc *fdp; /* file description pointer */
5838
5839                       /* Go look for a file description already set up for the
5840                          file indicated in the #line directive.  If there is
5841                          one, use it from now until the next #line
5842                          directive. */
5843                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
5844                         if (streq (fdp->infname, curfdp->infname)
5845                             && streq (fdp->taggedfname, taggedfname))
5846                           /* If we remove the second test above (after the &&)
5847                              then all entries pertaining to the same file are
5848                              coalesced in the tags file.  If we use it, then
5849                              entries pertaining to the same file but generated
5850                              from different files (via #line directives) will
5851                              go into separate sections in the tags file.  These
5852                              alternatives look equivalent.  The first one
5853                              destroys some apparently useless information. */
5854                           {
5855                             curfdp = fdp;
5856                             free (taggedfname);
5857                             break;
5858                           }
5859                       /* Else, if we already tagged the real file, skip all
5860                          input lines until the next #line directive. */
5861                       if (fdp == NULL) /* not found */
5862                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
5863                           if (streq (fdp->infabsname, taggedabsname))
5864                             {
5865                               discard_until_line_directive = TRUE;
5866                               free (taggedfname);
5867                               break;
5868                             }
5869                       /* Else create a new file description and use that from
5870                          now on, until the next #line directive. */
5871                       if (fdp == NULL) /* not found */
5872                         {
5873                           fdp = fdhead;
5874                           fdhead = xnew (1, fdesc);
5875                           *fdhead = *curfdp; /* copy curr. file description */
5876                           fdhead->next = fdp;
5877                           fdhead->infname = savestr (curfdp->infname);
5878                           fdhead->infabsname = savestr (curfdp->infabsname);
5879                           fdhead->infabsdir = savestr (curfdp->infabsdir);
5880                           fdhead->taggedfname = taggedfname;
5881                           fdhead->usecharno = FALSE;
5882                           curfdp = fdhead;
5883                         }
5884                     }
5885                   free (taggedabsname);
5886                   lineno = lno - 1;
5887                   readline (lbp, stream);
5888                   return;
5889                 } /* if a real #line directive */
5890             } /* if #line is followed by a a number */
5891         } /* if line begins with "#line " */
5892
5893       /* If we are here, no #line directive was found. */
5894       if (discard_until_line_directive)
5895         {
5896           if (result > 0)
5897             {
5898               /* Do a tail recursion on ourselves, thus discarding the contents
5899                  of the line buffer. */
5900               readline (lbp, stream);
5901               return;
5902             }
5903           /* End of file. */
5904           discard_until_line_directive = FALSE;
5905           return;
5906         }
5907     } /* if #line directives should be considered */
5908
5909 #ifdef ETAGS_REGEXPS
5910   {
5911     int match;
5912     pattern *pp;
5913
5914     /* Match against relevant patterns. */
5915     if (lbp->len > 0)
5916       for (pp = p_head; pp != NULL; pp = pp->p_next)
5917         {
5918           /* Only use generic regexps or those for the current language.
5919              Also do not use multiline regexps, which is the job of
5920              regex_tag_multiline. */
5921           if ((pp->lang != NULL && pp->lang != fdhead->lang)
5922               || pp->multi_line)
5923             continue;
5924
5925           match = re_match (pp->pat, lbp->buffer, lbp->len, 0, &pp->regs);
5926           switch (match)
5927             {
5928             case -2:
5929               /* Some error. */
5930               if (!pp->error_signaled)
5931                 {
5932                   error ("regexp stack overflow while matching \"%s\"",
5933                          pp->regex);
5934                   pp->error_signaled = TRUE;
5935                 }
5936               break;
5937             case -1:
5938               /* No match. */
5939               break;
5940             case 0:
5941               /* Empty string matched. */
5942               if (!pp->error_signaled)
5943                 {
5944                   error ("regexp matches the empty string: \"%s\"",
5945                          pp->regex);
5946                   pp->error_signaled = TRUE;
5947                 }
5948               break;
5949             default:
5950               /* Match occurred.  Construct a tag. */
5951               if (pp->name_pattern[0] != '\0')
5952                 {
5953                   /* Make a named tag. */
5954                   char *name = substitute (lbp->buffer,
5955                                            pp->name_pattern, &pp->regs);
5956                   if (name != NULL)
5957                     pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
5958                 }
5959               else
5960                 {
5961                   /* Make an unnamed tag. */
5962                   pfnote ((char *)NULL, TRUE,
5963                           lbp->buffer, match, lineno, linecharno);
5964                 }
5965               break;
5966             }
5967         }
5968   }
5969 #endif /* ETAGS_REGEXPS */
5970 }
5971
5972 \f
5973 /*
5974  * Return a pointer to a space of size strlen(cp)+1 allocated
5975  * with xnew where the string CP has been copied.
5976  */
5977 static char *
5978 savestr (cp)
5979      char *cp;
5980 {
5981   return savenstr (cp, strlen (cp));
5982 }
5983
5984 /*
5985  * Return a pointer to a space of size LEN+1 allocated with xnew where
5986  * the string CP has been copied for at most the first LEN characters.
5987  */
5988 static char *
5989 savenstr (cp, len)
5990      char *cp;
5991      int len;
5992 {
5993   register char *dp;
5994
5995   dp = xnew (len + 1, char);
5996   strncpy (dp, cp, len);
5997   dp[len] = '\0';
5998   return dp;
5999 }
6000
6001 /*
6002  * Return the ptr in sp at which the character c last
6003  * appears; NULL if not found
6004  *
6005  * Identical to POSIX strrchr, included for portability.
6006  */
6007 static char *
6008 etags_strrchr (sp, c)
6009      register const char *sp;
6010      register int c;
6011 {
6012   register const char *r;
6013
6014   r = NULL;
6015   do
6016     {
6017       if (*sp == c)
6018         r = sp;
6019   } while (*sp++);
6020   return (char *)r;
6021 }
6022
6023 /*
6024  * Return the ptr in sp at which the character c first
6025  * appears; NULL if not found
6026  *
6027  * Identical to POSIX strchr, included for portability.
6028  */
6029 static char *
6030 etags_strchr (sp, c)
6031      register const char *sp;
6032      register int c;
6033 {
6034   do
6035     {
6036       if (*sp == c)
6037         return (char *)sp;
6038     } while (*sp++);
6039   return NULL;
6040 }
6041
6042 /*
6043  * Return TRUE if the two strings are equal, ignoring case for alphabetic
6044  * characters.
6045  *
6046  * Analogous to BSD's strcasecmp, included for portability.
6047  */
6048 static bool
6049 strcaseeq (s1, s2)
6050      register const char *s1;
6051      register const char *s2;
6052 {
6053   while (*s1 != '\0'
6054          && (ISALPHA (*s1) && ISALPHA (*s2)
6055              ? lowcase (*s1) == lowcase (*s2)
6056              : *s1 == *s2))
6057     s1++, s2++;
6058
6059   return (*s1 == *s2);
6060 }
6061
6062 /* Skip spaces, return new pointer. */
6063 static char *
6064 skip_spaces (cp)
6065      char *cp;
6066 {
6067   while (iswhite (*cp))
6068     cp++;
6069   return cp;
6070 }
6071
6072 /* Skip non spaces, return new pointer. */
6073 static char *
6074 skip_non_spaces (cp)
6075      char *cp;
6076 {
6077   while (*cp != '\0' && !iswhite (*cp))
6078     cp++;
6079   return cp;
6080 }
6081
6082 /* Print error message and exit.  */
6083 void
6084 fatal (s1, s2)
6085      char *s1, *s2;
6086 {
6087   error (s1, s2);
6088   exit (BAD);
6089 }
6090
6091 static void
6092 pfatal (s1)
6093      char *s1;
6094 {
6095   perror (s1);
6096   exit (BAD);
6097 }
6098
6099 static void
6100 suggest_asking_for_help ()
6101 {
6102   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6103            progname,
6104 #ifdef LONG_OPTIONS
6105            "--help"
6106 #else
6107            "-h"
6108 #endif
6109            );
6110   exit (BAD);
6111 }
6112
6113 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6114 static void
6115 error (s1, s2)
6116      const char *s1, *s2;
6117 {
6118   fprintf (stderr, "%s: ", progname);
6119   fprintf (stderr, s1, s2);
6120   fprintf (stderr, "\n");
6121 }
6122
6123 /* Return a newly-allocated string whose contents
6124    concatenate those of s1, s2, s3.  */
6125 static char *
6126 concat (s1, s2, s3)
6127      char *s1, *s2, *s3;
6128 {
6129   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6130   char *result = xnew (len1 + len2 + len3 + 1, char);
6131
6132   strcpy (result, s1);
6133   strcpy (result + len1, s2);
6134   strcpy (result + len1 + len2, s3);
6135   result[len1 + len2 + len3] = '\0';
6136
6137   return result;
6138 }
6139
6140 \f
6141 /* Does the same work as the system V getcwd, but does not need to
6142    guess the buffer size in advance. */
6143 static char *
6144 etags_getcwd ()
6145 {
6146 #ifdef HAVE_GETCWD
6147   int bufsize = 200;
6148   char *path = xnew (bufsize, char);
6149
6150   while (getcwd (path, bufsize) == NULL)
6151     {
6152       if (errno != ERANGE)
6153         pfatal ("getcwd");
6154       bufsize *= 2;
6155       free (path);
6156       path = xnew (bufsize, char);
6157     }
6158
6159   canonicalize_filename (path);
6160   return path;
6161
6162 #else /* not HAVE_GETCWD */
6163 #if MSDOS
6164
6165   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6166
6167   getwd (path);
6168
6169   for (p = path; *p != '\0'; p++)
6170     if (*p == '\\')
6171       *p = '/';
6172     else
6173       *p = lowcase (*p);
6174
6175   return strdup (path);
6176 #else /* not MSDOS */
6177   linebuffer path;
6178   FILE *pipe;
6179
6180   initbuffer (&path);
6181   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6182   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6183     pfatal ("pwd");
6184   pclose (pipe);
6185
6186   return path.buffer;
6187 #endif /* not MSDOS */
6188 #endif /* not HAVE_GETCWD */
6189 }
6190
6191 /* Return a newly allocated string containing the file name of FILE
6192    relative to the absolute directory DIR (which should end with a slash). */
6193 static char *
6194 relative_filename (file, dir)
6195      char *file, *dir;
6196 {
6197   char *fp, *dp, *afn, *res;
6198   int i;
6199
6200   /* Find the common root of file and dir (with a trailing slash). */
6201   afn = absolute_filename (file, cwd);
6202   fp = afn;
6203   dp = dir;
6204   while (*fp++ == *dp++)
6205     continue;
6206   fp--, dp--;                   /* back to the first differing char */
6207 #ifdef DOS_NT
6208   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6209     return afn;
6210 #endif
6211   do                            /* look at the equal chars until '/' */
6212     fp--, dp--;
6213   while (*fp != '/');
6214
6215   /* Build a sequence of "../" strings for the resulting relative file name. */
6216   i = 0;
6217   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6218     i += 1;
6219   res = xnew (3*i + strlen (fp + 1) + 1, char);
6220   res[0] = '\0';
6221   while (i-- > 0)
6222     strcat (res, "../");
6223
6224   /* Add the file name relative to the common root of file and dir. */
6225   strcat (res, fp + 1);
6226   free (afn);
6227
6228   return res;
6229 }
6230
6231 /* Return a newly allocated string containing the absolute file name
6232    of FILE given DIR (which should end with a slash). */
6233 static char *
6234 absolute_filename (file, dir)
6235      char *file, *dir;
6236 {
6237   char *slashp, *cp, *res;
6238
6239   if (filename_is_absolute (file))
6240     res = savestr (file);
6241 #ifdef DOS_NT
6242   /* We don't support non-absolute file names with a drive
6243      letter, like `d:NAME' (it's too much hassle).  */
6244   else if (file[1] == ':')
6245     fatal ("%s: relative file names with drive letters not supported", file);
6246 #endif
6247   else
6248     res = concat (dir, file, "");
6249
6250   /* Delete the "/dirname/.." and "/." substrings. */
6251   slashp = etags_strchr (res, '/');
6252   while (slashp != NULL && slashp[0] != '\0')
6253     {
6254       if (slashp[1] == '.')
6255         {
6256           if (slashp[2] == '.'
6257               && (slashp[3] == '/' || slashp[3] == '\0'))
6258             {
6259               cp = slashp;
6260               do
6261                 cp--;
6262               while (cp >= res && !filename_is_absolute (cp));
6263               if (cp < res)
6264                 cp = slashp;    /* the absolute name begins with "/.." */
6265 #ifdef DOS_NT
6266               /* Under MSDOS and NT we get `d:/NAME' as absolute
6267                  file name, so the luser could say `d:/../NAME'.
6268                  We silently treat this as `d:/NAME'.  */
6269               else if (cp[0] != '/')
6270                 cp = slashp;
6271 #endif
6272               strcpy (cp, slashp + 3);
6273               slashp = cp;
6274               continue;
6275             }
6276           else if (slashp[2] == '/' || slashp[2] == '\0')
6277             {
6278               strcpy (slashp, slashp + 2);
6279               continue;
6280             }
6281         }
6282
6283       slashp = etags_strchr (slashp + 1, '/');
6284     }
6285
6286   if (res[0] == '\0')
6287     return savestr ("/");
6288   else
6289     return res;
6290 }
6291
6292 /* Return a newly allocated string containing the absolute
6293    file name of dir where FILE resides given DIR (which should
6294    end with a slash). */
6295 static char *
6296 absolute_dirname (file, dir)
6297      char *file, *dir;
6298 {
6299   char *slashp, *res;
6300   char save;
6301
6302   canonicalize_filename (file);
6303   slashp = etags_strrchr (file, '/');
6304   if (slashp == NULL)
6305     return savestr (dir);
6306   save = slashp[1];
6307   slashp[1] = '\0';
6308   res = absolute_filename (file, dir);
6309   slashp[1] = save;
6310
6311   return res;
6312 }
6313
6314 /* Whether the argument string is an absolute file name.  The argument
6315    string must have been canonicalized with canonicalize_filename. */
6316 static bool
6317 filename_is_absolute (fn)
6318      char *fn;
6319 {
6320   return (fn[0] == '/'
6321 #ifdef DOS_NT
6322           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6323 #endif
6324           );
6325 }
6326
6327 /* Translate backslashes into slashes.  Works in place. */
6328 static void
6329 canonicalize_filename (fn)
6330      register char *fn;
6331 {
6332 #ifdef DOS_NT
6333   /* Canonicalize drive letter case.  */
6334   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6335     fn[0] = upcase (fn[0]);
6336   /* Convert backslashes to slashes.  */
6337   for (; *fn != '\0'; fn++)
6338     if (*fn == '\\')
6339       *fn = '/';
6340 #else
6341   /* No action. */
6342   fn = NULL;                    /* shut up the compiler */
6343 #endif
6344 }
6345
6346 /* Set the minimum size of a string contained in a linebuffer. */
6347 static void
6348 linebuffer_setlen (lbp, toksize)
6349      linebuffer *lbp;
6350      int toksize;
6351 {
6352   while (lbp->size <= toksize)
6353     {
6354       lbp->size *= 2;
6355       xrnew (lbp->buffer, lbp->size, char);
6356     }
6357   lbp->len = toksize;
6358 }
6359
6360 /* Like malloc but get fatal error if memory is exhausted.  */
6361 static PTR
6362 xmalloc (size)
6363      unsigned int size;
6364 {
6365   PTR result = (PTR) malloc (size);
6366   if (result == NULL)
6367     fatal ("virtual memory exhausted", (char *)NULL);
6368   return result;
6369 }
6370
6371 static PTR
6372 xrealloc (ptr, size)
6373      char *ptr;
6374      unsigned int size;
6375 {
6376   PTR result = (PTR) realloc (ptr, size);
6377   if (result == NULL)
6378     fatal ("virtual memory exhausted", (char *)NULL);
6379   return result;
6380 }
6381
6382 /*
6383  * Local Variables:
6384  * c-indentation-style: gnu
6385  * indent-tabs-mode: t
6386  * tab-width: 8
6387  * fill-column: 79
6388  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "pattern")
6389  * End:
6390  */