code.delx.au - gnu-emacs/blob - lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2    Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001, 2002
   3    Free Software Foundation, Inc. and Ken Arnold
   4
   5  This file is not considered part of GNU Emacs.
   6
   7  This program is free software; you can redistribute it and/or modify
   8  it under the terms of the GNU General Public License as published by
   9  the Free Software Foundation; either version 2 of the License, or
  10  (at your option) any later version.
  11
  12  This program is distributed in the hope that it will be useful,
  13  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  GNU General Public License for more details.
  16
  17  You should have received a copy of the GNU General Public License
  18  along with this program; if not, write to the Free Software Foundation,
  19  Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
  20
  21 /*
  22  * Authors:
  23  *      Ctags originally by Ken Arnold.
  24  *      Fortran added by Jim Kleckner.
  25  *      Ed Pelegri-Llopart added C typedefs.
  26  *      Gnu Emacs TAGS format and modifications by RMS?
  27  * 1989 Sam Kendall added C++.
  28  * 1992 Joseph B. Wells improved C and C++ parsing.
  29  * 1993 Francesco Potortì reorganised C and C++.
  30  * 1994 Line-by-line regexp tags by Tom Tromey.
  31  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  32  * 2002 #line directives by Francesco Potortì.
  33  *
  34  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  35  */
  36
  37 /*
  38  * If you want to add support for a new language, start by looking at the LUA
  39  * language, which is the simplest.  Alternatively, consider shipping a
  40  * configuration file containing regexp definitions for etags.
  41  */
  42
  43 char pot_etags_version[] = "@(#) pot revision number is 17.5";
  44
  45 #define TRUE    1
  46 #define FALSE   0
  47
  48 #ifdef DEBUG
  49 #  undef DEBUG
  50 #  define DEBUG TRUE
  51 #else
  52 #  define DEBUG  FALSE
  53 #  define NDEBUG                /* disable assert */
  54 #endif
  55
  56 #ifdef HAVE_CONFIG_H
  57 # include <config.h>
  58   /* On some systems, Emacs defines static as nothing for the sake
  59      of unexec.  We don't want that here since we don't use unexec. */
  60 # undef static
  61 # define ETAGS_REGEXPS          /* use the regexp features */
  62 # define LONG_OPTIONS           /* accept long options */
  63 # ifndef PTR                    /* for Xemacs */
  64 #   define PTR void *
  65 # endif
  66 # ifndef __P                    /* for Xemacs */
  67 #   define __P(args) args
  68 # endif
  69 #else  /* no config.h */
  70 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
  71 #   define __P(args) args       /* use prototypes */
  72 #   define PTR void *           /* for generic pointers */
  73 # else /* not standard C */
  74 #   define __P(args) ()         /* no prototypes */
  75 #   define const                /* remove const for old compilers' sake */
  76 #   define PTR long *           /* don't use void* */
  77 # endif
  78 #endif /* !HAVE_CONFIG_H */
  79
  80 #ifndef _GNU_SOURCE
  81 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  82 #endif
  83
  84 #ifdef LONG_OPTIONS
  85 #  undef LONG_OPTIONS
  86 #  define LONG_OPTIONS TRUE
  87 #else
  88 #  define LONG_OPTIONS  FALSE
  89 #endif
  90
  91 /* WIN32_NATIVE is for Xemacs.
  92    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
  93 #ifdef WIN32_NATIVE
  94 # undef MSDOS
  95 # undef  WINDOWSNT
  96 # define WINDOWSNT
  97 #endif /* WIN32_NATIVE */
  98
  99 #ifdef MSDOS
 100 # undef MSDOS
 101 # define MSDOS TRUE
 102 # include <fcntl.h>
 103 # include <sys/param.h>
 104 # include <io.h>
 105 # ifndef HAVE_CONFIG_H
 106 #   define DOS_NT
 107 #   include <sys/config.h>
 108 # endif
 109 #else
 110 # define MSDOS FALSE
 111 #endif /* MSDOS */
 112
 113 #ifdef WINDOWSNT
 114 # include <stdlib.h>
 115 # include <fcntl.h>
 116 # include <string.h>
 117 # include <direct.h>
 118 # include <io.h>
 119 # define MAXPATHLEN _MAX_PATH
 120 # undef HAVE_NTGUI
 121 # undef  DOS_NT
 122 # define DOS_NT
 123 # ifndef HAVE_GETCWD
 124 #   define HAVE_GETCWD
 125 # endif /* undef HAVE_GETCWD */
 126 #else /* not WINDOWSNT */
 127 # ifdef STDC_HEADERS
 128 #  include <stdlib.h>
 129 #  include <string.h>
 130 # else /* no standard C headers */
 131     extern char *getenv ();
 132 #  ifdef VMS
 133 #   define EXIT_SUCCESS 1
 134 #   define EXIT_FAILURE 0
 135 #  else /* no VMS */
 136 #   define EXIT_SUCCESS 0
 137 #   define EXIT_FAILURE 1
 138 #  endif
 139 # endif
 140 #endif /* !WINDOWSNT */
 141
 142 #ifdef HAVE_UNISTD_H
 143 # include <unistd.h>
 144 #else
 145 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 146     extern char *getcwd (char *buf, size_t size);
 147 # endif
 148 #endif /* HAVE_UNISTD_H */
 149
 150 #include <stdio.h>
 151 #include <ctype.h>
 152 #include <errno.h>
 153 #ifndef errno
 154   extern int errno;
 155 #endif
 156 #include <sys/types.h>
 157 #include <sys/stat.h>
 158
 159 #include <assert.h>
 160 #ifdef NDEBUG
 161 # undef  assert                 /* some systems have a buggy assert.h */
 162 # define assert(x) ((void) 0)
 163 #endif
 164
 165 #if !defined (S_ISREG) && defined (S_IFREG)
 166 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 167 #endif
 168
 169 #if LONG_OPTIONS
 170 # include <getopt.h>
 171 #else
 172 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 173   extern char *optarg;
 174   extern int optind, opterr;
 175 #endif /* LONG_OPTIONS */
 176
 177 #ifdef ETAGS_REGEXPS
 178 # ifndef HAVE_CONFIG_H          /* this is a standalone compilation */
 179 #   ifdef __CYGWIN__            /* compiling on Cygwin */
 180                              !!! NOTICE !!!
 181  the regex.h distributed with Cygwin is not compatible with etags, alas!
 182 If you want regular expression support, you should delete this notice and
 183               arrange to use the GNU regex.h and regex.c.
 184 #   endif
 185 # endif
 186 # include <regex.h>
 187 #endif /* ETAGS_REGEXPS */
 188
 189 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 190  Leave it undefined to make the program "etags", which makes emacs-style
 191  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 192 #ifdef CTAGS
 193 # undef  CTAGS
 194 # define CTAGS TRUE
 195 #else
 196 # define CTAGS FALSE
 197 #endif
 198
 199 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 200 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 201 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 202 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 203
 204 #define CHARS 256               /* 2^sizeof(char) */
 205 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 206 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 207 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 208 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 209 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 210 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 211
 212 #define ISALNUM(c)      isalnum (CHAR(c))
 213 #define ISALPHA(c)      isalpha (CHAR(c))
 214 #define ISDIGIT(c)      isdigit (CHAR(c))
 215 #define ISLOWER(c)      islower (CHAR(c))
 216
 217 #define lowcase(c)      tolower (CHAR(c))
 218 #define upcase(c)       toupper (CHAR(c))
 219
 220
 221 /*
 222  *      xnew, xrnew -- allocate, reallocate storage
 223  *
 224  * SYNOPSIS:    Type *xnew (int n, Type);
 225  *              void xrnew (OldPointer, int n, Type);
 226  */
 227 #if DEBUG
 228 # include "chkmalloc.h"
 229 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 230                                                   (n) * sizeof (Type)))
 231 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 232                                         (char *) (op), (n) * sizeof (Type)))
 233 #else
 234 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 235 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 236                                         (char *) (op), (n) * sizeof (Type)))
 237 #endif
 238
 239 #define bool int
 240
 241 typedef void Lang_function __P((FILE *));
 242
 243 typedef struct
 244 {
 245   char *suffix;                 /* file name suffix for this compressor */
 246   char *command;                /* takes one arg and decompresses to stdout */
 247 } compressor;
 248
 249 typedef struct
 250 {
 251   char *name;                   /* language name */
 252   char *help;                   /* detailed help for the language */
 253   Lang_function *function;      /* parse function */
 254   char **suffixes;              /* name suffixes of this language's files */
 255   char **filenames;             /* names of this language's files */
 256   char **interpreters;          /* interpreters for this language */
 257   bool metasource;              /* source used to generate other sources */
 258 } language;
 259
 260 typedef struct fdesc
 261 {
 262   struct fdesc *next;           /* for the linked list */
 263   char *infname;                /* uncompressed input file name */
 264   char *infabsname;             /* absolute uncompressed input file name */
 265   char *infabsdir;              /* absolute dir of input file */
 266   char *taggedfname;            /* file name to write in tagfile */
 267   language *lang;               /* language of file */
 268   char *prop;                   /* file properties to write in tagfile */
 269   bool usecharno;               /* etags tags shall contain char number */
 270   bool written;                 /* entry written in the tags file */
 271 } fdesc;
 272
 273 typedef struct node_st
 274 {                               /* sorting structure */
 275   struct node_st *left, *right; /* left and right sons */
 276   fdesc *fdp;                   /* description of file to whom tag belongs */
 277   char *name;                   /* tag name */
 278   char *regex;                  /* search regexp */
 279   bool valid;                   /* write this tag on the tag file */
 280   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 281   bool been_warned;             /* warning already given for duplicated tag */
 282   int lno;                      /* line number tag is on */
 283   long cno;                     /* character number line starts on */
 284 } node;
 285
 286 /*
 287  * A `linebuffer' is a structure which holds a line of text.
 288  * `readline_internal' reads a line from a stream into a linebuffer
 289  * and works regardless of the length of the line.
 290  * SIZE is the size of BUFFER, LEN is the length of the string in
 291  * BUFFER after readline reads it.
 292  */
 293 typedef struct
 294 {
 295   long size;
 296   int len;
 297   char *buffer;
 298 } linebuffer;
 299
 300 /* Used to support mixing of --lang and file names. */
 301 typedef struct
 302 {
 303   enum {
 304     at_language,                /* a language specification */
 305     at_regexp,                  /* a regular expression */
 306     at_filename,                /* a file name */
 307     at_stdin,                   /* read from stdin here */
 308     at_end                      /* stop parsing the list */
 309   } arg_type;                   /* argument type */
 310   language *lang;               /* language associated with the argument */
 311   char *what;                   /* the argument itself */
 312 } argument;
 313
 314 #ifdef ETAGS_REGEXPS
 315 /* Structure defining a regular expression. */
 316 typedef struct regexp
 317 {
 318   struct regexp *p_next;        /* pointer to next in list */
 319   language *lang;               /* if set, use only for this language */
 320   char *pattern;                /* the regexp pattern */
 321   char *name;                   /* tag name */
 322   struct re_pattern_buffer *pat; /* the compiled pattern */
 323   struct re_registers regs;     /* re registers */
 324   bool error_signaled;          /* already signaled for this regexp */
 325   bool force_explicit_name;     /* do not allow implict tag name */
 326   bool ignore_case;             /* ignore case when matching */
 327   bool multi_line;              /* do a multi-line match on the whole file */
 328 } regexp;
 329 #endif /* ETAGS_REGEXPS */
 330
 331
 332 /* Many compilers barf on this:
 333         Lang_function Ada_funcs;
 334    so let's write it this way */
 335 static void Ada_funcs __P((FILE *));
 336 static void Asm_labels __P((FILE *));
 337 static void C_entries __P((int c_ext, FILE *));
 338 static void default_C_entries __P((FILE *));
 339 static void plain_C_entries __P((FILE *));
 340 static void Cjava_entries __P((FILE *));
 341 static void Cobol_paragraphs __P((FILE *));
 342 static void Cplusplus_entries __P((FILE *));
 343 static void Cstar_entries __P((FILE *));
 344 static void Erlang_functions __P((FILE *));
 345 static void Fortran_functions __P((FILE *));
 346 static void HTML_labels __P((FILE *));
 347 static void Lisp_functions __P((FILE *));
 348 static void Lua_functions __P((FILE *));
 349 static void Makefile_targets __P((FILE *));
 350 static void Pascal_functions __P((FILE *));
 351 static void Perl_functions __P((FILE *));
 352 static void PHP_functions __P((FILE *));
 353 static void PS_functions __P((FILE *));
 354 static void Prolog_functions __P((FILE *));
 355 static void Python_functions __P((FILE *));
 356 static void Scheme_functions __P((FILE *));
 357 static void TeX_commands __P((FILE *));
 358 static void Texinfo_nodes __P((FILE *));
 359 static void Yacc_entries __P((FILE *));
 360 static void just_read_file __P((FILE *));
 361
 362 static void print_language_names __P((void));
 363 static void print_version __P((void));
 364 static void print_help __P((argument *));
 365 int main __P((int, char **));
 366
 367 static compressor *get_compressor_from_suffix __P((char *, char **));
 368 static language *get_language_from_langname __P((const char *));
 369 static language *get_language_from_interpreter __P((char *));
 370 static language *get_language_from_filename __P((char *, bool));
 371 static void readline __P((linebuffer *, FILE *));
 372 static long readline_internal __P((linebuffer *, FILE *));
 373 static bool nocase_tail __P((char *));
 374 static void get_tag __P((char *, char **));
 375
 376 #ifdef ETAGS_REGEXPS
 377 static void analyse_regex __P((char *));
 378 static void free_regexps __P((void));
 379 static void regex_tag_multiline __P((void));
 380 #endif /* ETAGS_REGEXPS */
 381 static void error __P((const char *, const char *));
 382 static void suggest_asking_for_help __P((void));
 383 void fatal __P((char *, char *));
 384 static void pfatal __P((char *));
 385 static void add_node __P((node *, node **));
 386
 387 static void init __P((void));
 388 static void process_file_name __P((char *, language *));
 389 static void process_file __P((FILE *, char *, language *));
 390 static void find_entries __P((FILE *));
 391 static void free_tree __P((node *));
 392 static void free_fdesc __P((fdesc *));
 393 static void pfnote __P((char *, bool, char *, int, int, long));
 394 static void make_tag __P((char *, int, bool, char *, int, int, long));
 395 static void invalidate_nodes __P((fdesc *, node **));
 396 static void put_entries __P((node *));
 397
 398 static char *concat __P((char *, char *, char *));
 399 static char *skip_spaces __P((char *));
 400 static char *skip_non_spaces __P((char *));
 401 static char *savenstr __P((char *, int));
 402 static char *savestr __P((char *));
 403 static char *etags_strchr __P((const char *, int));
 404 static char *etags_strrchr __P((const char *, int));
 405 static int etags_strcasecmp __P((const char *, const char *));
 406 static int etags_strncasecmp __P((const char *, const char *, int));
 407 static char *etags_getcwd __P((void));
 408 static char *relative_filename __P((char *, char *));
 409 static char *absolute_filename __P((char *, char *));
 410 static char *absolute_dirname __P((char *, char *));
 411 static bool filename_is_absolute __P((char *f));
 412 static void canonicalize_filename __P((char *));
 413 static void linebuffer_init __P((linebuffer *));
 414 static void linebuffer_setlen __P((linebuffer *, int));
 415 static PTR xmalloc __P((unsigned int));
 416 static PTR xrealloc __P((char *, unsigned int));
 417
 418 \f
 419 static char searchar = '/';     /* use /.../ searches */
 420
 421 static char *tagfile;           /* output file */
 422 static char *progname;          /* name this program was invoked with */
 423 static char *cwd;               /* current working directory */
 424 static char *tagfiledir;        /* directory of tagfile */
 425 static FILE *tagf;              /* ioptr for tags file */
 426
 427 static fdesc *fdhead;           /* head of file description list */
 428 static fdesc *curfdp;           /* current file description */
 429 static int lineno;              /* line number of current line */
 430 static long charno;             /* current character number */
 431 static long linecharno;         /* charno of start of current line */
 432 static char *dbp;               /* pointer to start of current tag */
 433
 434 static const int invalidcharno = -1;
 435
 436 static node *nodehead;          /* the head of the binary tree of tags */
 437 static node *last_node;         /* the last node created */
 438
 439 static linebuffer lb;           /* the current line */
 440 static linebuffer filebuf;      /* a buffer containing the whole file */
 441 static linebuffer token_name;   /* a buffer containing a tag name */
 442
 443 /* boolean "functions" (see init)       */
 444 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 445 static char
 446   /* white chars */
 447   *white = " \f\t\n\r\v",
 448   /* not in a name */
 449   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 450   /* token ending chars */
 451   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 452   /* token starting chars */
 453   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 454   /* valid in-token chars */
 455   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 456
 457 static bool append_to_tagfile;  /* -a: append to tags */
 458 /* The next four default to TRUE for etags, but to FALSE for ctags.  */
 459 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 460 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 461                                 /* 0 struct/enum/union decls, and C++ */
 462                                 /* member functions. */
 463 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 464                                 /* constants and variables. */
 465                                 /* -D: opposite of -d.  Default under ctags. */
 466 static bool globals;            /* create tags for global variables */
 467 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 468 static bool members;            /* create tags for C member variables */
 469 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 470 static bool update;             /* -u: update tags */
 471 static bool vgrind_style;       /* -v: create vgrind style index output */
 472 static bool no_warnings;        /* -w: suppress warnings */
 473 static bool cxref_style;        /* -x: create cxref style output */
 474 static bool cplusplus;          /* .[hc] means C++, not C */
 475 static bool ignoreindent;       /* -I: ignore indentation in C */
 476 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 477
 478 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 479 static bool parsing_stdin;      /* --parse-stdin used */
 480
 481 #ifdef ETAGS_REGEXPS
 482 static regexp *p_head;          /* list of all regexps */
 483 static bool need_filebuf;       /* some regexes are multi-line */
 484 #else
 485 # define need_filebuf FALSE
 486 #endif /* ETAGS_REGEXPS */
 487
 488 #if LONG_OPTIONS
 489 static struct option longopts[] =
 490 {
 491   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 492   { "c++",                no_argument,       NULL,               'C'   },
 493   { "declarations",       no_argument,       &declarations,      TRUE  },
 494   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 495   { "help",               no_argument,       NULL,               'h'   },
 496   { "help",               no_argument,       NULL,               'H'   },
 497   { "ignore-indentation", no_argument,       NULL,               'I'   },
 498   { "language",           required_argument, NULL,               'l'   },
 499   { "members",            no_argument,       &members,           TRUE  },
 500   { "no-members",         no_argument,       &members,           FALSE },
 501   { "output",             required_argument, NULL,               'o'   },
 502 #ifdef ETAGS_REGEXPS
 503   { "regex",              required_argument, NULL,               'r'   },
 504   { "no-regex",           no_argument,       NULL,               'R'   },
 505   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 506 #endif /* ETAGS_REGEXPS */
 507   { "parse-stdin",        required_argument, NULL,               STDIN },
 508   { "version",            no_argument,       NULL,               'V'   },
 509
 510 #if CTAGS /* Etags options */
 511   { "backward-search",    no_argument,       NULL,               'B'   },
 512   { "cxref",              no_argument,       NULL,               'x'   },
 513   { "defines",            no_argument,       NULL,               'd'   },
 514   { "globals",            no_argument,       &globals,           TRUE  },
 515   { "typedefs",           no_argument,       NULL,               't'   },
 516   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 517   { "update",             no_argument,       NULL,               'u'   },
 518   { "vgrind",             no_argument,       NULL,               'v'   },
 519   { "no-warn",            no_argument,       NULL,               'w'   },
 520
 521 #else /* Ctags options */
 522   { "append",             no_argument,       NULL,               'a'   },
 523   { "no-defines",         no_argument,       NULL,               'D'   },
 524   { "no-globals",         no_argument,       &globals,           FALSE },
 525   { "include",            required_argument, NULL,               'i'   },
 526 #endif
 527   { NULL }
 528 };
 529 #endif /* LONG_OPTIONS */
 530
 531 static compressor compressors[] =
 532 {
 533   { "z", "gzip -d -c"},
 534   { "Z", "gzip -d -c"},
 535   { "gz", "gzip -d -c"},
 536   { "GZ", "gzip -d -c"},
 537   { "bz2", "bzip2 -d -c" },
 538   { NULL }
 539 };
 540
 541 /*
 542  * Language stuff.
 543  */
 544
 545 /* Ada code */
 546 static char *Ada_suffixes [] =
 547   { "ads", "adb", "ada", NULL };
 548 static char Ada_help [] =
 549 "In Ada code, functions, procedures, packages, tasks and types are\n\
 550 tags.  Use the `--packages-only' option to create tags for\n\
 551 packages only.\n\
 552 Ada tag names have suffixes indicating the type of entity:\n\
 553         Entity type:    Qualifier:\n\
 554         ------------    ----------\n\
 555         function        /f\n\
 556         procedure       /p\n\
 557         package spec    /s\n\
 558         package body    /b\n\
 559         type            /t\n\
 560         task            /k\n\
 561 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 562 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 563 will just search for any tag `bidule'.";
 564
 565 /* Assembly code */
 566 static char *Asm_suffixes [] =
 567   { "a",        /* Unix assembler */
 568     "asm", /* Microcontroller assembly */
 569     "def", /* BSO/Tasking definition includes  */
 570     "inc", /* Microcontroller include files */
 571     "ins", /* Microcontroller include files */
 572     "s", "sa", /* Unix assembler */
 573     "S",   /* cpp-processed Unix assembler */
 574     "src", /* BSO/Tasking C compiler output */
 575     NULL
 576   };
 577 static char Asm_help [] =
 578 "In assembler code, labels appearing at the beginning of a line,\n\
 579 followed by a colon, are tags.";
 580
 581
 582 /* Note that .c and .h can be considered C++, if the --c++ flag was
 583    given, or if the `class' or `template' keyowrds are met inside the file.
 584    That is why default_C_entries is called for these. */
 585 static char *default_C_suffixes [] =
 586   { "c", "h", NULL };
 587 static char default_C_help [] =
 588 "In C code, any C function or typedef is a tag, and so are\n\
 589 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 590 definitions and `enum' constants are tags unless you specify\n\
 591 `--no-defines'.  Global variables are tags unless you specify\n\
 592 `--no-globals'.  Use of `--no-globals' and `--no-defines'\n\
 593 can make the tags table file much smaller.\n\
 594 You can tag function declarations and external variables by\n\
 595 using `--declarations', and struct members by using `--members'.";
 596
 597 static char *Cplusplus_suffixes [] =
 598   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 599     "M",                        /* Objective C++ */
 600     "pdb",                      /* Postscript with C syntax */
 601     NULL };
 602 static char Cplusplus_help [] =
 603 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 604 --help --lang=c --lang=c++ for full help.)\n\
 605 In addition to C tags, member functions are also recognized, and\n\
 606 optionally member variables if you use the `--members' option.\n\
 607 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 608 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 609 `operator+'.";
 610
 611 static char *Cjava_suffixes [] =
 612   { "java", NULL };
 613 static char Cjava_help [] =
 614 "In Java code, all the tags constructs of C and C++ code are\n\
 615 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 616
 617
 618 static char *Cobol_suffixes [] =
 619   { "COB", "cob", NULL };
 620 static char Cobol_help [] =
 621 "In Cobol code, tags are paragraph names; that is, any word\n\
 622 starting in column 8 and followed by a period.";
 623
 624 static char *Cstar_suffixes [] =
 625   { "cs", "hs", NULL };
 626
 627 static char *Erlang_suffixes [] =
 628   { "erl", "hrl", NULL };
 629 static char Erlang_help [] =
 630 "In Erlang code, the tags are the functions, records and macros\n\
 631 defined in the file.";
 632
 633 static char *Fortran_suffixes [] =
 634   { "F", "f", "f90", "for", NULL };
 635 static char Fortran_help [] =
 636 "In Fortran code, functions, subroutines and block data are tags.";
 637
 638 static char *HTML_suffixes [] =
 639   { "htm", "html", "shtml", NULL };
 640 static char HTML_help [] =
 641 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 642 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 643 occurrences of `id='.";
 644
 645 static char *Lisp_suffixes [] =
 646   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 647 static char Lisp_help [] =
 648 "In Lisp code, any function defined with `defun', any variable\n\
 649 defined with `defvar' or `defconst', and in general the first\n\
 650 argument of any expression that starts with `(def' in column zero\n\
 651 is a tag.";
 652
 653 static char *Lua_suffixes [] =
 654   { "lua", "LUA", NULL };
 655 static char Lua_help [] =
 656 "In Lua scripts, all functions are tags.";
 657
 658 static char *Makefile_filenames [] =
 659   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 660 static char Makefile_help [] =
 661 "In makefiles, targets are tags; additionally, variables are tags\n\
 662 unless you specify `--no-globals'.";
 663
 664 static char *Objc_suffixes [] =
 665   { "lm",                       /* Objective lex file */
 666     "m",                        /* Objective C file */
 667      NULL };
 668 static char Objc_help [] =
 669 "In Objective C code, tags include Objective C definitions for classes,\n\
 670 class categories, methods and protocols.  Tags for variables and\n\
 671 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.";
 672
 673 static char *Pascal_suffixes [] =
 674   { "p", "pas", NULL };
 675 static char Pascal_help [] =
 676 "In Pascal code, the tags are the functions and procedures defined\n\
 677 in the file.";
 678
 679 static char *Perl_suffixes [] =
 680   { "pl", "pm", NULL };
 681 static char *Perl_interpreters [] =
 682   { "perl", "@PERL@", NULL };
 683 static char Perl_help [] =
 684 "In Perl code, the tags are the packages, subroutines and variables\n\
 685 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 686 `--globals' if you want to tag global variables.  Tags for\n\
 687 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 688 defined in the default package is `main::SUB'.";
 689
 690 static char *PHP_suffixes [] =
 691   { "php", "php3", "php4", NULL };
 692 static char PHP_help [] =
 693 "In PHP code, tags are functions, classes and defines.  When using\n\
 694 the `--members' option, vars are tags too.";
 695
 696 static char *plain_C_suffixes [] =
 697   { "pc",                       /* Pro*C file */
 698      NULL };
 699
 700 static char *PS_suffixes [] =
 701   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 702 static char PS_help [] =
 703 "In PostScript code, the tags are the functions.";
 704
 705 static char *Prolog_suffixes [] =
 706   { "prolog", NULL };
 707 static char Prolog_help [] =
 708 "In Prolog code, tags are predicates and rules at the beginning of\n\
 709 line.";
 710
 711 static char *Python_suffixes [] =
 712   { "py", NULL };
 713 static char Python_help [] =
 714 "In Python code, `def' or `class' at the beginning of a line\n\
 715 generate a tag.";
 716
 717 /* Can't do the `SCM' or `scm' prefix with a version number. */
 718 static char *Scheme_suffixes [] =
 719   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 720 static char Scheme_help [] =
 721 "In Scheme code, tags include anything defined with `def' or with a\n\
 722 construct whose name starts with `def'.  They also include\n\
 723 variables set with `set!' at top level in the file.";
 724
 725 static char *TeX_suffixes [] =
 726   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 727 static char TeX_help [] =
 728 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 729 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 730 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 731 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 732 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 733 \n\
 734 Other commands can be specified by setting the environment variable\n\
 735 `TEXTAGS' to a colon-separated list like, for example,\n\
 736      TEXTAGS=\"mycommand:myothercommand\".";
 737
 738
 739 static char *Texinfo_suffixes [] =
 740   { "texi", "texinfo", "txi", NULL };
 741 static char Texinfo_help [] =
 742 "for texinfo files, lines starting with @node are tagged.";
 743
 744 static char *Yacc_suffixes [] =
 745   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 746 static char Yacc_help [] =
 747 "In Bison or Yacc input files, each rule defines as a tag the\n\
 748 nonterminal it constructs.  The portions of the file that contain\n\
 749 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 750 for full help).";
 751
 752 static char auto_help [] =
 753 "`auto' is not a real language, it indicates to use\n\
 754 a default language for files base on file name suffix and file contents.";
 755
 756 static char none_help [] =
 757 "`none' is not a real language, it indicates to only do\n\
 758 regexp processing on files.";
 759
 760 static char no_lang_help [] =
 761 "No detailed help available for this language.";
 762
 763
 764 /*
 765  * Table of languages.
 766  *
 767  * It is ok for a given function to be listed under more than one
 768  * name.  I just didn't.
 769  */
 770
 771 static language lang_names [] =
 772 {
 773   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 774   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 775   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 776   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 777   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 778   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 779   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 780   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 781   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 782   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 783   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 784   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 785   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 786   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 787   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 788   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 789   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 790   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 791   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 792   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 793   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 794   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 795   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 796   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 797   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 798   { "auto",      auto_help },                      /* default guessing scheme */
 799   { "none",      none_help,      just_read_file }, /* regexp matching only */
 800   { NULL }                /* end of list */
 801 };
 802
 803 \f
 804 static void
 805 print_language_names ()
 806 {
 807   language *lang;
 808   char **name, **ext;
 809
 810   puts ("\nThese are the currently supported languages, along with the\n\
 811 default file names and dot suffixes:");
 812   for (lang = lang_names; lang->name != NULL; lang++)
 813     {
 814       printf ("  %-*s", 10, lang->name);
 815       if (lang->filenames != NULL)
 816         for (name = lang->filenames; *name != NULL; name++)
 817           printf (" %s", *name);
 818       if (lang->suffixes != NULL)
 819         for (ext = lang->suffixes; *ext != NULL; ext++)
 820           printf (" .%s", *ext);
 821       puts ("");
 822     }
 823   puts ("where `auto' means use default language for files based on file\n\
 824 name suffix, and `none' means only do regexp processing on files.\n\
 825 If no language is specified and no matching suffix is found,\n\
 826 the first line of the file is read for a sharp-bang (#!) sequence\n\
 827 followed by the name of an interpreter.  If no such sequence is found,\n\
 828 Fortran is tried first; if no tags are found, C is tried next.\n\
 829 When parsing any C file, a \"class\" or \"template\" keyword\n\
 830 switches to C++.");
 831   puts ("Compressed files are supported using gzip and bzip2.\n\
 832 \n\
 833 For detailed help on a given language use, for example,\n\
 834 etags --help --lang=ada.");
 835 }
 836
 837 #ifndef EMACS_NAME
 838 # define EMACS_NAME "standalone"
 839 #endif
 840 #ifndef VERSION
 841 # define VERSION "version"
 842 #endif
 843 static void
 844 print_version ()
 845 {
 846   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 847   puts ("Copyright (C) 2002 Free Software Foundation, Inc. and Ken Arnold");
 848   puts ("This program is distributed under the same terms as Emacs");
 849
 850   exit (EXIT_SUCCESS);
 851 }
 852
 853 static void
 854 print_help (argbuffer)
 855      argument *argbuffer;
 856 {
 857   bool help_for_lang = FALSE;
 858
 859   for (; argbuffer->arg_type != at_end; argbuffer++)
 860     if (argbuffer->arg_type == at_language)
 861       {
 862         if (help_for_lang)
 863           puts ("");
 864         puts (argbuffer->lang->help);
 865         help_for_lang = TRUE;
 866       }
 867
 868   if (help_for_lang)
 869     exit (EXIT_SUCCESS);
 870
 871   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 872 \n\
 873 These are the options accepted by %s.\n", progname, progname);
 874   if (LONG_OPTIONS)
 875     puts ("You may use unambiguous abbreviations for the long option names.");
 876   else
 877     puts ("Long option names do not work with this executable, as it is not\n\
 878 linked with GNU getopt.");
 879   puts ("  A - as file name means read names from stdin (one per line).\n\
 880 Absolute names are stored in the output file as they are.\n\
 881 Relative ones are stored relative to the output file's directory.\n");
 882
 883   if (!CTAGS)
 884     puts ("-a, --append\n\
 885         Append tag entries to existing tags file.");
 886
 887   puts ("--packages-only\n\
 888         For Ada files, only generate tags for packages.");
 889
 890   if (CTAGS)
 891     puts ("-B, --backward-search\n\
 892         Write the search commands for the tag entries using '?', the\n\
 893         backward-search command instead of '/', the forward-search command.");
 894
 895   /* This option is mostly obsolete, because etags can now automatically
 896      detect C++.  Retained for backward compatibility and for debugging and
 897      experimentation.  In principle, we could want to tag as C++ even
 898      before any "class" or "template" keyword.
 899   puts ("-C, --c++\n\
 900         Treat files whose name suffix defaults to C language as C++ files.");
 901   */
 902
 903   puts ("--declarations\n\
 904         In C and derived languages, create tags for function declarations,");
 905   if (CTAGS)
 906     puts ("\tand create tags for extern variables if --globals is used.");
 907   else
 908     puts
 909       ("\tand create tags for extern variables unless --no-globals is used.");
 910
 911   if (CTAGS)
 912     puts ("-d, --defines\n\
 913         Create tag entries for C #define constants and enum constants, too.");
 914   else
 915     puts ("-D, --no-defines\n\
 916         Don't create tag entries for C #define constants and enum constants.\n\
 917         This makes the tags file smaller.");
 918
 919   if (!CTAGS)
 920     puts ("-i FILE, --include=FILE\n\
 921         Include a note in tag file indicating that, when searching for\n\
 922         a tag, one should also consult the tags file FILE after\n\
 923         checking the current file.");
 924
 925   puts ("-l LANG, --language=LANG\n\
 926         Force the following files to be considered as written in the\n\
 927         named language up to the next --language=LANG option.");
 928
 929   if (CTAGS)
 930     puts ("--globals\n\
 931         Create tag entries for global variables in some languages.");
 932   else
 933     puts ("--no-globals\n\
 934         Do not create tag entries for global variables in some\n\
 935         languages.  This makes the tags file smaller.");
 936   puts ("--members\n\
 937         Create tag entries for members of structures in some languages.");
 938
 939 #ifdef ETAGS_REGEXPS
 940   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 941         Make a tag for each line matching a regular expression pattern\n\
 942         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 943         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 944         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 945         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 946   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 947         For example Tcl named tags can be created with:\n\
 948           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 949         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 950         `m' means to allow multi-line matches, `s' implies `m' and\n\
 951         causes dot to match any character, including newline.");
 952   puts ("-R, --no-regex\n\
 953         Don't create tags from regexps for the following files.");
 954 #endif /* ETAGS_REGEXPS */
 955   puts ("-I, --ignore-indentation\n\
 956         In C and C++ do not assume that a closing brace in the first\n\
 957         column is the final brace of a function or structure definition.");
 958   puts ("-o FILE, --output=FILE\n\
 959         Write the tags to FILE.");
 960   puts ("--parse-stdin=NAME\n\
 961         Read from standard input and record tags as belonging to file NAME.");
 962
 963   if (CTAGS)
 964     {
 965       puts ("-t, --typedefs\n\
 966         Generate tag entries for C and Ada typedefs.");
 967       puts ("-T, --typedefs-and-c++\n\
 968         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 969         and C++ member functions.");
 970     }
 971
 972   if (CTAGS)
 973     puts ("-u, --update\n\
 974         Update the tag entries for the given files, leaving tag\n\
 975         entries for other files in place.  Currently, this is\n\
 976         implemented by deleting the existing entries for the given\n\
 977         files and then rewriting the new entries at the end of the\n\
 978         tags file.  It is often faster to simply rebuild the entire\n\
 979         tag file than to use this.");
 980
 981   if (CTAGS)
 982     {
 983       puts ("-v, --vgrind\n\
 984         Generates an index of items intended for human consumption,\n\
 985         similar to the output of vgrind.  The index is sorted, and\n\
 986         gives the page number of each item.");
 987       puts ("-w, --no-warn\n\
 988         Suppress warning messages about entries defined in multiple\n\
 989         files.");
 990       puts ("-x, --cxref\n\
 991         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
 992         The output uses line numbers instead of page numbers, but\n\
 993         beyond that the differences are cosmetic; try both to see\n\
 994         which you like.");
 995     }
 996
 997   puts ("-V, --version\n\
 998         Print the version of the program.\n\
 999 -h, --help\n\
1000         Print this help message.\n\
1001         Followed by one or more `--language' options prints detailed\n\
1002         help about tag generation for the specified languages.");
1003
1004   print_language_names ();
1005
1006   puts ("");
1007   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1008
1009   exit (EXIT_SUCCESS);
1010 }
1011
1012 \f
1013 #ifdef VMS                      /* VMS specific functions */
1014
1015 #define EOS     '\0'
1016
1017 /* This is a BUG!  ANY arbitrary limit is a BUG!
1018    Won't someone please fix this?  */
1019 #define MAX_FILE_SPEC_LEN       255
1020 typedef struct  {
1021   short   curlen;
1022   char    body[MAX_FILE_SPEC_LEN + 1];
1023 } vspec;
1024
1025 /*
1026  v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1027  returning in each successive call the next file name matching the input
1028  spec. The function expects that each in_spec passed
1029  to it will be processed to completion; in particular, up to and
1030  including the call following that in which the last matching name
1031  is returned, the function ignores the value of in_spec, and will
1032  only start processing a new spec with the following call.
1033  If an error occurs, on return out_spec contains the value
1034  of in_spec when the error occurred.
1035
1036  With each successive file name returned in out_spec, the
1037  function's return value is one. When there are no more matching
1038  names the function returns zero. If on the first call no file
1039  matches in_spec, or there is any other error, -1 is returned.
1040 */
1041
1042 #include        <rmsdef.h>
1043 #include        <descrip.h>
1044 #define         OUTSIZE MAX_FILE_SPEC_LEN
1045 static short
1046 fn_exp (out, in)
1047      vspec *out;
1048      char *in;
1049 {
1050   static long context = 0;
1051   static struct dsc$descriptor_s o;
1052   static struct dsc$descriptor_s i;
1053   static bool pass1 = TRUE;
1054   long status;
1055   short retval;
1056
1057   if (pass1)
1058     {
1059       pass1 = FALSE;
1060       o.dsc$a_pointer = (char *) out;
1061       o.dsc$w_length = (short)OUTSIZE;
1062       i.dsc$a_pointer = in;
1063       i.dsc$w_length = (short)strlen(in);
1064       i.dsc$b_dtype = DSC$K_DTYPE_T;
1065       i.dsc$b_class = DSC$K_CLASS_S;
1066       o.dsc$b_dtype = DSC$K_DTYPE_VT;
1067       o.dsc$b_class = DSC$K_CLASS_VS;
1068     }
1069   if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1070     {
1071       out->body[out->curlen] = EOS;
1072       return 1;
1073     }
1074   else if (status == RMS$_NMF)
1075     retval = 0;
1076   else
1077     {
1078       strcpy(out->body, in);
1079       retval = -1;
1080     }
1081   lib$find_file_end(&context);
1082   pass1 = TRUE;
1083   return retval;
1084 }
1085
1086 /*
1087   v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1088   name of each file specified by the provided arg expanding wildcards.
1089 */
1090 static char *
1091 gfnames (arg, p_error)
1092      char *arg;
1093      bool *p_error;
1094 {
1095   static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1096
1097   switch (fn_exp (&filename, arg))
1098     {
1099     case 1:
1100       *p_error = FALSE;
1101       return filename.body;
1102     case 0:
1103       *p_error = FALSE;
1104       return NULL;
1105     default:
1106       *p_error = TRUE;
1107       return filename.body;
1108     }
1109 }
1110
1111 #ifndef OLD  /* Newer versions of VMS do provide `system'.  */
1112 system (cmd)
1113      char *cmd;
1114 {
1115   error ("%s", "system() function not implemented under VMS");
1116 }
1117 #endif
1118
1119 #define VERSION_DELIM   ';'
1120 char *massage_name (s)
1121      char *s;
1122 {
1123   char *start = s;
1124
1125   for ( ; *s; s++)
1126     if (*s == VERSION_DELIM)
1127       {
1128         *s = EOS;
1129         break;
1130       }
1131     else
1132       *s = lowcase (*s);
1133   return start;
1134 }
1135 #endif /* VMS */
1136
1137 \f
1138 int
1139 main (argc, argv)
1140      int argc;
1141      char *argv[];
1142 {
1143   int i;
1144   unsigned int nincluded_files;
1145   char **included_files;
1146   argument *argbuffer;
1147   int current_arg, file_count;
1148   linebuffer filename_lb;
1149   bool help_asked = FALSE;
1150 #ifdef VMS
1151   bool got_err;
1152 #endif
1153  char *optstring;
1154  int opt;
1155
1156
1157 #ifdef DOS_NT
1158   _fmode = O_BINARY;   /* all of files are treated as binary files */
1159 #endif /* DOS_NT */
1160
1161   progname = argv[0];
1162   nincluded_files = 0;
1163   included_files = xnew (argc, char *);
1164   current_arg = 0;
1165   file_count = 0;
1166
1167   /* Allocate enough no matter what happens.  Overkill, but each one
1168      is small. */
1169   argbuffer = xnew (argc, argument);
1170
1171   /*
1172    * If etags, always find typedefs and structure tags.  Why not?
1173    * Also default to find macro constants, enum constants and
1174    * global variables.
1175    */
1176   if (!CTAGS)
1177     {
1178       typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1179       globals = TRUE;
1180     }
1181
1182   optstring = "-";
1183 #ifdef ETAGS_REGEXPS
1184   optstring = "-r:Rc:";
1185 #endif /* ETAGS_REGEXPS */
1186   if (LONG_OPTIONS)
1187     optstring += 1;
1188   optstring = concat (optstring,
1189                       "Cf:Il:o:SVhH",
1190                       (CTAGS) ? "BxdtTuvw" : "aDi:");
1191
1192   while ((opt = getopt_long (argc, argv, optstring, longopts, 0)) != EOF)
1193     switch (opt)
1194       {
1195       case 0:
1196         /* If getopt returns 0, then it has already processed a
1197            long-named option.  We should do nothing.  */
1198         break;
1199
1200       case 1:
1201         /* This means that a file name has been seen.  Record it. */
1202         argbuffer[current_arg].arg_type = at_filename;
1203         argbuffer[current_arg].what     = optarg;
1204         ++current_arg;
1205         ++file_count;
1206         break;
1207
1208       case STDIN:
1209         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1210         argbuffer[current_arg].arg_type = at_stdin;
1211         argbuffer[current_arg].what     = optarg;
1212         ++current_arg;
1213         ++file_count;
1214         if (parsing_stdin)
1215           fatal ("cannot parse standard input more than once", (char *)NULL);
1216         parsing_stdin = TRUE;
1217         break;
1218
1219         /* Common options. */
1220       case 'C': cplusplus = TRUE;               break;
1221       case 'f':         /* for compatibility with old makefiles */
1222       case 'o':
1223         if (tagfile)
1224           {
1225             error ("-o option may only be given once.", (char *)NULL);
1226             suggest_asking_for_help ();
1227             /* NOTREACHED */
1228           }
1229         tagfile = optarg;
1230         break;
1231       case 'I':
1232       case 'S':         /* for backward compatibility */
1233         ignoreindent = TRUE;
1234         break;
1235       case 'l':
1236         {
1237           language *lang = get_language_from_langname (optarg);
1238           if (lang != NULL)
1239             {
1240               argbuffer[current_arg].lang = lang;
1241               argbuffer[current_arg].arg_type = at_language;
1242               ++current_arg;
1243             }
1244         }
1245         break;
1246       case 'c':
1247         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1248         optarg = concat (optarg, "i", ""); /* memory leak here */
1249         /* FALLTHRU */
1250       case 'r':
1251         argbuffer[current_arg].arg_type = at_regexp;
1252         argbuffer[current_arg].what = optarg;
1253         ++current_arg;
1254         break;
1255       case 'R':
1256         argbuffer[current_arg].arg_type = at_regexp;
1257         argbuffer[current_arg].what = NULL;
1258         ++current_arg;
1259         break;
1260       case 'V':
1261         print_version ();
1262         break;
1263       case 'h':
1264       case 'H':
1265         help_asked = TRUE;
1266         break;
1267
1268         /* Etags options */
1269       case 'a': append_to_tagfile = TRUE;                       break;
1270       case 'D': constantypedefs = FALSE;                        break;
1271       case 'i': included_files[nincluded_files++] = optarg;     break;
1272
1273         /* Ctags options. */
1274       case 'B': searchar = '?';                                 break;
1275       case 'd': constantypedefs = TRUE;                         break;
1276       case 't': typedefs = TRUE;                                break;
1277       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1278       case 'u': update = TRUE;                                  break;
1279       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1280       case 'x': cxref_style = TRUE;                             break;
1281       case 'w': no_warnings = TRUE;                             break;
1282       default:
1283         suggest_asking_for_help ();
1284         /* NOTREACHED */
1285       }
1286
1287   for (; optind < argc; optind++)
1288     {
1289       argbuffer[current_arg].arg_type = at_filename;
1290       argbuffer[current_arg].what = argv[optind];
1291       ++current_arg;
1292       ++file_count;
1293     }
1294
1295   argbuffer[current_arg].arg_type = at_end;
1296
1297   if (help_asked)
1298     print_help (argbuffer);
1299     /* NOTREACHED */
1300
1301   if (nincluded_files == 0 && file_count == 0)
1302     {
1303       error ("no input files specified.", (char *)NULL);
1304       suggest_asking_for_help ();
1305       /* NOTREACHED */
1306     }
1307
1308   if (tagfile == NULL)
1309     tagfile = CTAGS ? "tags" : "TAGS";
1310   cwd = etags_getcwd ();        /* the current working directory */
1311   if (cwd[strlen (cwd) - 1] != '/')
1312     {
1313       char *oldcwd = cwd;
1314       cwd = concat (oldcwd, "/", "");
1315       free (oldcwd);
1316     }
1317   /* Relative file names are made relative to the current directory. */
1318   if (streq (tagfile, "-")
1319       || strneq (tagfile, "/dev/", 5))
1320     tagfiledir = cwd;
1321   else
1322     tagfiledir = absolute_dirname (tagfile, cwd);
1323
1324   init ();                      /* set up boolean "functions" */
1325
1326   linebuffer_init (&lb);
1327   linebuffer_init (&filename_lb);
1328   linebuffer_init (&filebuf);
1329   linebuffer_init (&token_name);
1330
1331   if (!CTAGS)
1332     {
1333       if (streq (tagfile, "-"))
1334         {
1335           tagf = stdout;
1336 #ifdef DOS_NT
1337           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1338              doesn't take effect until after `stdout' is already open). */
1339           if (!isatty (fileno (stdout)))
1340             setmode (fileno (stdout), O_BINARY);
1341 #endif /* DOS_NT */
1342         }
1343       else
1344         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1345       if (tagf == NULL)
1346         pfatal (tagfile);
1347     }
1348
1349   /*
1350    * Loop through files finding functions.
1351    */
1352   for (i = 0; i < current_arg; i++)
1353     {
1354       static language *lang;    /* non-NULL if language is forced */
1355       char *this_file;
1356
1357       switch (argbuffer[i].arg_type)
1358         {
1359         case at_language:
1360           lang = argbuffer[i].lang;
1361           break;
1362 #ifdef ETAGS_REGEXPS
1363         case at_regexp:
1364           analyse_regex (argbuffer[i].what);
1365           break;
1366 #endif
1367         case at_filename:
1368 #ifdef VMS
1369           while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1370             {
1371               if (got_err)
1372                 {
1373                   error ("can't find file %s\n", this_file);
1374                   argc--, argv++;
1375                 }
1376               else
1377                 {
1378                   this_file = massage_name (this_file);
1379                 }
1380 #else
1381               this_file = argbuffer[i].what;
1382 #endif
1383               /* Input file named "-" means read file names from stdin
1384                  (one per line) and use them. */
1385               if (streq (this_file, "-"))
1386                 {
1387                   if (parsing_stdin)
1388                     fatal ("cannot parse standard input AND read file names from it",
1389                            (char *)NULL);
1390                   while (readline_internal (&filename_lb, stdin) > 0)
1391                     process_file_name (filename_lb.buffer, lang);
1392                 }
1393               else
1394                 process_file_name (this_file, lang);
1395 #ifdef VMS
1396             }
1397 #endif
1398           break;
1399         case at_stdin:
1400           this_file = argbuffer[i].what;
1401           process_file (stdin, this_file, lang);
1402           break;
1403         case at_end:
1404           break;
1405         }
1406     }
1407
1408 #ifdef ETAGS_REGEXPS
1409   free_regexps ();
1410 #endif /* ETAGS_REGEXPS */
1411   free (lb.buffer);
1412   free (filebuf.buffer);
1413   free (token_name.buffer);
1414
1415   if (!CTAGS || cxref_style)
1416     {
1417       put_entries (nodehead);   /* write the remainig tags (ETAGS) */
1418       free_tree (nodehead);
1419       nodehead = NULL;
1420       if (!CTAGS)
1421         {
1422           fdesc *fdp;
1423
1424           /* Output file entries that have no tags. */
1425           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1426             if (!fdp->written)
1427               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1428
1429           while (nincluded_files-- > 0)
1430             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1431         }
1432
1433       if (fclose (tagf) == EOF)
1434         pfatal (tagfile);
1435       exit (EXIT_SUCCESS);
1436     }
1437
1438   if (update)
1439     {
1440       char cmd[BUFSIZ];
1441       for (i = 0; i < current_arg; ++i)
1442         {
1443           switch (argbuffer[i].arg_type)
1444             {
1445             case at_filename:
1446             case at_stdin:
1447               break;
1448             default:
1449               continue;         /* the for loop */
1450             }
1451           sprintf (cmd,
1452                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1453                    tagfile, argbuffer[i].what, tagfile);
1454           if (system (cmd) != EXIT_SUCCESS)
1455             fatal ("failed to execute shell command", (char *)NULL);
1456         }
1457       append_to_tagfile = TRUE;
1458     }
1459
1460   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1461   if (tagf == NULL)
1462     pfatal (tagfile);
1463   put_entries (nodehead);       /* write all the tags (CTAGS) */
1464   free_tree (nodehead);
1465   nodehead = NULL;
1466   if (fclose (tagf) == EOF)
1467     pfatal (tagfile);
1468
1469   if (update)
1470     {
1471       char cmd[2*BUFSIZ+10];
1472       sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1473       exit (system (cmd));
1474     }
1475   return EXIT_SUCCESS;
1476 }
1477
1478
1479 /*
1480  * Return a compressor given the file name.  If EXTPTR is non-zero,
1481  * return a pointer into FILE where the compressor-specific
1482  * extension begins.  If no compressor is found, NULL is returned
1483  * and EXTPTR is not significant.
1484  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1485  */
1486 static compressor *
1487 get_compressor_from_suffix (file, extptr)
1488      char *file;
1489      char **extptr;
1490 {
1491   compressor *compr;
1492   char *slash, *suffix;
1493
1494   /* This relies on FN to be after canonicalize_filename,
1495      so we don't need to consider backslashes on DOS_NT.  */
1496   slash = etags_strrchr (file, '/');
1497   suffix = etags_strrchr (file, '.');
1498   if (suffix == NULL || suffix < slash)
1499     return NULL;
1500   if (extptr != NULL)
1501     *extptr = suffix;
1502   suffix += 1;
1503   /* Let those poor souls who live with DOS 8+3 file name limits get
1504      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1505      Only the first do loop is run if not MSDOS */
1506   do
1507     {
1508       for (compr = compressors; compr->suffix != NULL; compr++)
1509         if (streq (compr->suffix, suffix))
1510           return compr;
1511       if (!MSDOS)
1512         break;                  /* do it only once: not really a loop */
1513       if (extptr != NULL)
1514         *extptr = ++suffix;
1515     } while (*suffix != '\0');
1516   return NULL;
1517 }
1518
1519
1520
1521 /*
1522  * Return a language given the name.
1523  */
1524 static language *
1525 get_language_from_langname (name)
1526      const char *name;
1527 {
1528   language *lang;
1529
1530   if (name == NULL)
1531     error ("empty language name", (char *)NULL);
1532   else
1533     {
1534       for (lang = lang_names; lang->name != NULL; lang++)
1535         if (streq (name, lang->name))
1536           return lang;
1537       error ("unknown language \"%s\"", name);
1538     }
1539
1540   return NULL;
1541 }
1542
1543
1544 /*
1545  * Return a language given the interpreter name.
1546  */
1547 static language *
1548 get_language_from_interpreter (interpreter)
1549      char *interpreter;
1550 {
1551   language *lang;
1552   char **iname;
1553
1554   if (interpreter == NULL)
1555     return NULL;
1556   for (lang = lang_names; lang->name != NULL; lang++)
1557     if (lang->interpreters != NULL)
1558       for (iname = lang->interpreters; *iname != NULL; iname++)
1559         if (streq (*iname, interpreter))
1560             return lang;
1561
1562   return NULL;
1563 }
1564
1565
1566
1567 /*
1568  * Return a language given the file name.
1569  */
1570 static language *
1571 get_language_from_filename (file, case_sensitive)
1572      char *file;
1573      bool case_sensitive;
1574 {
1575   language *lang;
1576   char **name, **ext, *suffix;
1577
1578   /* Try whole file name first. */
1579   for (lang = lang_names; lang->name != NULL; lang++)
1580     if (lang->filenames != NULL)
1581       for (name = lang->filenames; *name != NULL; name++)
1582         if ((case_sensitive)
1583             ? streq (*name, file)
1584             : strcaseeq (*name, file))
1585           return lang;
1586
1587   /* If not found, try suffix after last dot. */
1588   suffix = etags_strrchr (file, '.');
1589   if (suffix == NULL)
1590     return NULL;
1591   suffix += 1;
1592   for (lang = lang_names; lang->name != NULL; lang++)
1593     if (lang->suffixes != NULL)
1594       for (ext = lang->suffixes; *ext != NULL; ext++)
1595         if ((case_sensitive)
1596             ? streq (*ext, suffix)
1597             : strcaseeq (*ext, suffix))
1598           return lang;
1599   return NULL;
1600 }
1601
1602 \f
1603 /*
1604  * This routine is called on each file argument.
1605  */
1606 static void
1607 process_file_name (file, lang)
1608      char *file;
1609      language *lang;
1610 {
1611   struct stat stat_buf;
1612   FILE *inf;
1613   fdesc *fdp;
1614   compressor *compr;
1615   char *compressed_name, *uncompressed_name;
1616   char *ext, *real_name;
1617   int retval;
1618
1619   canonicalize_filename (file);
1620   if (streq (file, tagfile) && !streq (tagfile, "-"))
1621     {
1622       error ("skipping inclusion of %s in self.", file);
1623       return;
1624     }
1625   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1626     {
1627       compressed_name = NULL;
1628       real_name = uncompressed_name = savestr (file);
1629     }
1630   else
1631     {
1632       real_name = compressed_name = savestr (file);
1633       uncompressed_name = savenstr (file, ext - file);
1634     }
1635
1636   /* If the canonicalized uncompressed name
1637      has already been dealt with, skip it silently. */
1638   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1639     {
1640       assert (fdp->infname != NULL);
1641       if (streq (uncompressed_name, fdp->infname))
1642         goto cleanup;
1643     }
1644
1645   if (stat (real_name, &stat_buf) != 0)
1646     {
1647       /* Reset real_name and try with a different name. */
1648       real_name = NULL;
1649       if (compressed_name != NULL) /* try with the given suffix */
1650         {
1651           if (stat (uncompressed_name, &stat_buf) == 0)
1652             real_name = uncompressed_name;
1653         }
1654       else                      /* try all possible suffixes */
1655         {
1656           for (compr = compressors; compr->suffix != NULL; compr++)
1657             {
1658               compressed_name = concat (file, ".", compr->suffix);
1659               if (stat (compressed_name, &stat_buf) != 0)
1660                 {
1661                   if (MSDOS)
1662                     {
1663                       char *suf = compressed_name + strlen (file);
1664                       size_t suflen = strlen (compr->suffix) + 1;
1665                       for ( ; suf[1]; suf++, suflen--)
1666                         {
1667                           memmove (suf, suf + 1, suflen);
1668                           if (stat (compressed_name, &stat_buf) == 0)
1669                             {
1670                               real_name = compressed_name;
1671                               break;
1672                             }
1673                         }
1674                       if (real_name != NULL)
1675                         break;
1676                     } /* MSDOS */
1677                   free (compressed_name);
1678                   compressed_name = NULL;
1679                 }
1680               else
1681                 {
1682                   real_name = compressed_name;
1683                   break;
1684                 }
1685             }
1686         }
1687       if (real_name == NULL)
1688         {
1689           perror (file);
1690           goto cleanup;
1691         }
1692     } /* try with a different name */
1693
1694   if (!S_ISREG (stat_buf.st_mode))
1695     {
1696       error ("skipping %s: it is not a regular file.", real_name);
1697       goto cleanup;
1698     }
1699   if (real_name == compressed_name)
1700     {
1701       char *cmd = concat (compr->command, " ", real_name);
1702       inf = (FILE *) popen (cmd, "r");
1703       free (cmd);
1704     }
1705   else
1706     inf = fopen (real_name, "r");
1707   if (inf == NULL)
1708     {
1709       perror (real_name);
1710       goto cleanup;
1711     }
1712
1713   process_file (inf, uncompressed_name, lang);
1714
1715   if (real_name == compressed_name)
1716     retval = pclose (inf);
1717   else
1718     retval = fclose (inf);
1719   if (retval < 0)
1720     pfatal (file);
1721
1722  cleanup:
1723   if (compressed_name) free (compressed_name);
1724   if (uncompressed_name) free (uncompressed_name);
1725   last_node = NULL;
1726   curfdp = NULL;
1727   return;
1728 }
1729
1730 static void
1731 process_file (fh, fn, lang)
1732      FILE *fh;
1733      char *fn;
1734      language *lang;
1735 {
1736   static const fdesc emptyfdesc;
1737   fdesc *fdp;
1738
1739   /* Create a new input file description entry. */
1740   fdp = xnew (1, fdesc);
1741   *fdp = emptyfdesc;
1742   fdp->next = fdhead;
1743   fdp->infname = savestr (fn);
1744   fdp->lang = lang;
1745   fdp->infabsname = absolute_filename (fn, cwd);
1746   fdp->infabsdir = absolute_dirname (fn, cwd);
1747   if (filename_is_absolute (fn))
1748     {
1749       /* An absolute file name.  Canonicalize it. */
1750       fdp->taggedfname = absolute_filename (fn, NULL);
1751     }
1752   else
1753     {
1754       /* A file name relative to cwd.  Make it relative
1755          to the directory of the tags file. */
1756       fdp->taggedfname = relative_filename (fn, tagfiledir);
1757     }
1758   fdp->usecharno = TRUE;        /* use char position when making tags */
1759   fdp->prop = NULL;
1760   fdp->written = FALSE;         /* not written on tags file yet */
1761
1762   fdhead = fdp;
1763   curfdp = fdhead;              /* the current file description */
1764
1765   find_entries (fh);
1766
1767   /* If not Ctags, and if this is not metasource and if it contained no #line
1768      directives, we can write the tags and free all nodes pointing to
1769      curfdp. */
1770   if (!CTAGS
1771       && curfdp->usecharno      /* no #line directives in this file */
1772       && !curfdp->lang->metasource)
1773     {
1774       node *np, *prev;
1775
1776       /* Look for the head of the sublist relative to this file.  See add_node
1777          for the structure of the node tree. */
1778       prev = NULL;
1779       for (np = nodehead; np != NULL; prev = np, np = np->left)
1780         if (np->fdp == curfdp)
1781           break;
1782
1783       /* If we generated tags for this file, write and delete them. */
1784       if (np != NULL)
1785         {
1786           /* This is the head of the last sublist, if any.  The following
1787              instructions depend on this being true. */
1788           assert (np->left == NULL);
1789
1790           assert (fdhead == curfdp);
1791           assert (last_node->fdp == curfdp);
1792           put_entries (np);     /* write tags for file curfdp->taggedfname */
1793           free_tree (np);       /* remove the written nodes */
1794           if (prev == NULL)
1795             nodehead = NULL;    /* no nodes left */
1796           else
1797             prev->left = NULL;  /* delete the pointer to the sublist */
1798         }
1799     }
1800 }
1801
1802 /*
1803  * This routine sets up the boolean pseudo-functions which work
1804  * by setting boolean flags dependent upon the corresponding character.
1805  * Every char which is NOT in that string is not a white char.  Therefore,
1806  * all of the array "_wht" is set to FALSE, and then the elements
1807  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1808  * of a char is TRUE if it is the string "white", else FALSE.
1809  */
1810 static void
1811 init ()
1812 {
1813   register char *sp;
1814   register int i;
1815
1816   for (i = 0; i < CHARS; i++)
1817     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1818   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1819   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1820   notinname('\0') = notinname('\n');
1821   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1822   begtoken('\0') = begtoken('\n');
1823   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1824   intoken('\0') = intoken('\n');
1825   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1826   endtoken('\0') = endtoken('\n');
1827 }
1828
1829 /*
1830  * This routine opens the specified file and calls the function
1831  * which finds the function and type definitions.
1832  */
1833 static void
1834 find_entries (inf)
1835      FILE *inf;
1836 {
1837   char *cp;
1838   language *lang = curfdp->lang;
1839   Lang_function *parser = NULL;
1840
1841   /* If user specified a language, use it. */
1842   if (lang != NULL && lang->function != NULL)
1843     {
1844       parser = lang->function;
1845     }
1846
1847   /* Else try to guess the language given the file name. */
1848   if (parser == NULL)
1849     {
1850       lang = get_language_from_filename (curfdp->infname, TRUE);
1851       if (lang != NULL && lang->function != NULL)
1852         {
1853           curfdp->lang = lang;
1854           parser = lang->function;
1855         }
1856     }
1857
1858   /* Else look for sharp-bang as the first two characters. */
1859   if (parser == NULL
1860       && readline_internal (&lb, inf) > 0
1861       && lb.len >= 2
1862       && lb.buffer[0] == '#'
1863       && lb.buffer[1] == '!')
1864     {
1865       char *lp;
1866
1867       /* Set lp to point at the first char after the last slash in the
1868          line or, if no slashes, at the first nonblank.  Then set cp to
1869          the first successive blank and terminate the string. */
1870       lp = etags_strrchr (lb.buffer+2, '/');
1871       if (lp != NULL)
1872         lp += 1;
1873       else
1874         lp = skip_spaces (lb.buffer + 2);
1875       cp = skip_non_spaces (lp);
1876       *cp = '\0';
1877
1878       if (strlen (lp) > 0)
1879         {
1880           lang = get_language_from_interpreter (lp);
1881           if (lang != NULL && lang->function != NULL)
1882             {
1883               curfdp->lang = lang;
1884               parser = lang->function;
1885             }
1886         }
1887     }
1888
1889   /* We rewind here, even if inf may be a pipe.  We fail if the
1890      length of the first line is longer than the pipe block size,
1891      which is unlikely. */
1892   rewind (inf);
1893
1894   /* Else try to guess the language given the case insensitive file name. */
1895   if (parser == NULL)
1896     {
1897       lang = get_language_from_filename (curfdp->infname, FALSE);
1898       if (lang != NULL && lang->function != NULL)
1899         {
1900           curfdp->lang = lang;
1901           parser = lang->function;
1902         }
1903     }
1904
1905   /* Else try Fortran or C. */
1906   if (parser == NULL)
1907     {
1908       node *old_last_node = last_node;
1909
1910       curfdp->lang = get_language_from_langname ("fortran");
1911       find_entries (inf);
1912
1913       if (old_last_node == last_node)
1914         /* No Fortran entries found.  Try C. */
1915         {
1916           /* We do not tag if rewind fails.
1917              Only the file name will be recorded in the tags file. */
1918           rewind (inf);
1919           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1920           find_entries (inf);
1921         }
1922       return;
1923     }
1924
1925   if (!no_line_directive
1926       && curfdp->lang != NULL && curfdp->lang->metasource)
1927     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1928        file, or anyway we parsed a file that is automatically generated from
1929        this one.  If this is the case, the bingo.c file contained #line
1930        directives that generated tags pointing to this file.  Let's delete
1931        them all before parsing this file, which is the real source. */
1932     {
1933       fdesc **fdpp = &fdhead;
1934       while (*fdpp != NULL)
1935         if (*fdpp != curfdp
1936             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1937           /* We found one of those!  We must delete both the file description
1938              and all tags referring to it. */
1939           {
1940             fdesc *badfdp = *fdpp;
1941
1942             /* Delete the tags referring to badfdp->taggedfname
1943                that were obtained from badfdp->infname. */
1944             invalidate_nodes (badfdp, &nodehead);
1945
1946             *fdpp = badfdp->next; /* remove the bad description from the list */
1947             free_fdesc (badfdp);
1948           }
1949         else
1950           fdpp = &(*fdpp)->next; /* advance the list pointer */
1951     }
1952
1953   assert (parser != NULL);
1954
1955   /* Generic initialisations before reading from file. */
1956   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1957
1958   /* Generic initialisations before parsing file with readline. */
1959   lineno = 0;                  /* reset global line number */
1960   charno = 0;                  /* reset global char number */
1961   linecharno = 0;              /* reset global char number of line start */
1962
1963   parser (inf);
1964
1965 #ifdef ETAGS_REGEXPS
1966   regex_tag_multiline ();
1967 #endif /* ETAGS_REGEXPS */
1968 }
1969
1970 \f
1971 /*
1972  * Check whether an implicitly named tag should be created,
1973  * then call `pfnote'.
1974  * NAME is a string that is internally copied by this function.
1975  *
1976  * TAGS format specification
1977  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1978  * The following is explained in some more detail in etc/ETAGS.EBNF.
1979  *
1980  * make_tag creates tags with "implicit tag names" (unnamed tags)
1981  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1982  *  1. NAME does not contain any of the characters in NONAM;
1983  *  2. LINESTART contains name as either a rightmost, or rightmost but
1984  *     one character, substring;
1985  *  3. the character, if any, immediately before NAME in LINESTART must
1986  *     be a character in NONAM;
1987  *  4. the character, if any, immediately after NAME in LINESTART must
1988  *     also be a character in NONAM.
1989  *
1990  * The implementation uses the notinname() macro, which recognises the
1991  * characters stored in the string `nonam'.
1992  * etags.el needs to use the same characters that are in NONAM.
1993  */
1994 static void
1995 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
1996      char *name;                /* tag name, or NULL if unnamed */
1997      int namelen;               /* tag length */
1998      bool is_func;              /* tag is a function */
1999      char *linestart;           /* start of the line where tag is */
2000      int linelen;               /* length of the line where tag is */
2001      int lno;                   /* line number */
2002      long cno;                  /* character number */
2003 {
2004   bool named = (name != NULL && namelen > 0);
2005
2006   if (!CTAGS && named)          /* maybe set named to false */
2007     /* Let's try to make an implicit tag name, that is, create an unnamed tag
2008        such that etags.el can guess a name from it. */
2009     {
2010       int i;
2011       register char *cp = name;
2012
2013       for (i = 0; i < namelen; i++)
2014         if (notinname (*cp++))
2015           break;
2016       if (i == namelen)                         /* rule #1 */
2017         {
2018           cp = linestart + linelen - namelen;
2019           if (notinname (linestart[linelen-1]))
2020             cp -= 1;                            /* rule #4 */
2021           if (cp >= linestart                   /* rule #2 */
2022               && (cp == linestart
2023                   || notinname (cp[-1]))        /* rule #3 */
2024               && strneq (name, cp, namelen))    /* rule #2 */
2025             named = FALSE;      /* use implicit tag name */
2026         }
2027     }
2028
2029   if (named)
2030     name = savenstr (name, namelen);
2031   else
2032     name = NULL;
2033   pfnote (name, is_func, linestart, linelen, lno, cno);
2034 }
2035
2036 /* Record a tag. */
2037 static void
2038 pfnote (name, is_func, linestart, linelen, lno, cno)
2039      char *name;                /* tag name, or NULL if unnamed */
2040      bool is_func;              /* tag is a function */
2041      char *linestart;           /* start of the line where tag is */
2042      int linelen;               /* length of the line where tag is */
2043      int lno;                   /* line number */
2044      long cno;                  /* character number */
2045 {
2046   register node *np;
2047
2048   assert (name == NULL || name[0] != '\0');
2049   if (CTAGS && name == NULL)
2050     return;
2051
2052   np = xnew (1, node);
2053
2054   /* If ctags mode, change name "main" to M<thisfilename>. */
2055   if (CTAGS && !cxref_style && streq (name, "main"))
2056     {
2057       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2058       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2059       fp = etags_strrchr (np->name, '.');
2060       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2061         fp[0] = '\0';
2062     }
2063   else
2064     np->name = name;
2065   np->valid = TRUE;
2066   np->been_warned = FALSE;
2067   np->fdp = curfdp;
2068   np->is_func = is_func;
2069   np->lno = lno;
2070   if (np->fdp->usecharno)
2071     /* Our char numbers are 0-base, because of C language tradition?
2072        ctags compatibility?  old versions compatibility?   I don't know.
2073        Anyway, since emacs's are 1-base we expect etags.el to take care
2074        of the difference.  If we wanted to have 1-based numbers, we would
2075        uncomment the +1 below. */
2076     np->cno = cno /* + 1 */ ;
2077   else
2078     np->cno = invalidcharno;
2079   np->left = np->right = NULL;
2080   if (CTAGS && !cxref_style)
2081     {
2082       if (strlen (linestart) < 50)
2083         np->regex = concat (linestart, "$", "");
2084       else
2085         np->regex = savenstr (linestart, 50);
2086     }
2087   else
2088     np->regex = savenstr (linestart, linelen);
2089
2090   add_node (np, &nodehead);
2091 }
2092
2093 /*
2094  * free_tree ()
2095  *      recurse on left children, iterate on right children.
2096  */
2097 static void
2098 free_tree (np)
2099      register node *np;
2100 {
2101   while (np)
2102     {
2103       register node *node_right = np->right;
2104       free_tree (np->left);
2105       if (np->name != NULL)
2106         free (np->name);
2107       free (np->regex);
2108       free (np);
2109       np = node_right;
2110     }
2111 }
2112
2113 /*
2114  * free_fdesc ()
2115  *      delete a file description
2116  */
2117 static void
2118 free_fdesc (fdp)
2119      register fdesc *fdp;
2120 {
2121   if (fdp->infname != NULL) free (fdp->infname);
2122   if (fdp->infabsname != NULL) free (fdp->infabsname);
2123   if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2124   if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2125   if (fdp->prop != NULL) free (fdp->prop);
2126   free (fdp);
2127 }
2128
2129 /*
2130  * add_node ()
2131  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2132  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2133  *      balancing.
2134  *
2135  *      add_node is the only function allowed to add nodes, so it can
2136  *      maintain state.
2137  */
2138 static void
2139 add_node (np, cur_node_p)
2140      node *np, **cur_node_p;
2141 {
2142   register int dif;
2143   register node *cur_node = *cur_node_p;
2144
2145   if (cur_node == NULL)
2146     {
2147       *cur_node_p = np;
2148       last_node = np;
2149       return;
2150     }
2151
2152   if (!CTAGS)
2153     /* Etags Mode */
2154     {
2155       /* For each file name, tags are in a linked sublist on the right
2156          pointer.  The first tags of different files are a linked list
2157          on the left pointer.  last_node points to the end of the last
2158          used sublist. */
2159       if (last_node != NULL && last_node->fdp == np->fdp)
2160         {
2161           /* Let's use the same sublist as the last added node. */
2162           assert (last_node->right == NULL);
2163           last_node->right = np;
2164           last_node = np;
2165         }
2166       else if (cur_node->fdp == np->fdp)
2167         {
2168           /* Scanning the list we found the head of a sublist which is
2169              good for us.  Let's scan this sublist. */
2170           add_node (np, &cur_node->right);
2171         }
2172       else
2173         /* The head of this sublist is not good for us.  Let's try the
2174            next one. */
2175         add_node (np, &cur_node->left);
2176     } /* if ETAGS mode */
2177
2178   else
2179     {
2180       /* Ctags Mode */
2181       dif = strcmp (np->name, cur_node->name);
2182
2183       /*
2184        * If this tag name matches an existing one, then
2185        * do not add the node, but maybe print a warning.
2186        */
2187       if (!dif)
2188         {
2189           if (np->fdp == cur_node->fdp)
2190             {
2191               if (!no_warnings)
2192                 {
2193                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2194                            np->fdp->infname, lineno, np->name);
2195                   fprintf (stderr, "Second entry ignored\n");
2196                 }
2197             }
2198           else if (!cur_node->been_warned && !no_warnings)
2199             {
2200               fprintf
2201                 (stderr,
2202                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2203                  np->fdp->infname, cur_node->fdp->infname, np->name);
2204               cur_node->been_warned = TRUE;
2205             }
2206           return;
2207         }
2208
2209       /* Actually add the node */
2210       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2211     } /* if CTAGS mode */
2212 }
2213
2214 /*
2215  * invalidate_nodes ()
2216  *      Scan the node tree and invalidate all nodes pointing to the
2217  *      given file description (CTAGS case) or free them (ETAGS case).
2218  */
2219 static void
2220 invalidate_nodes (badfdp, npp)
2221      fdesc *badfdp;
2222      node **npp;
2223 {
2224   node *np = *npp;
2225
2226   if (np == NULL)
2227     return;
2228
2229   if (CTAGS)
2230     {
2231       if (np->left != NULL)
2232         invalidate_nodes (badfdp, &np->left);
2233       if (np->fdp == badfdp)
2234         np->valid = FALSE;
2235       if (np->right != NULL)
2236         invalidate_nodes (badfdp, &np->right);
2237     }
2238   else
2239     {
2240       assert (np->fdp != NULL);
2241       if (np->fdp == badfdp)
2242         {
2243           *npp = np->left;      /* detach the sublist from the list */
2244           np->left = NULL;      /* isolate it */
2245           free_tree (np);       /* free it */
2246           invalidate_nodes (badfdp, npp);
2247         }
2248       else
2249         invalidate_nodes (badfdp, &np->left);
2250     }
2251 }
2252
2253 \f
2254 static int total_size_of_entries __P((node *));
2255 static int number_len __P((long));
2256
2257 /* Length of a non-negative number's decimal representation. */
2258 static int
2259 number_len (num)
2260      long num;
2261 {
2262   int len = 1;
2263   while ((num /= 10) > 0)
2264     len += 1;
2265   return len;
2266 }
2267
2268 /*
2269  * Return total number of characters that put_entries will output for
2270  * the nodes in the linked list at the right of the specified node.
2271  * This count is irrelevant with etags.el since emacs 19.34 at least,
2272  * but is still supplied for backward compatibility.
2273  */
2274 static int
2275 total_size_of_entries (np)
2276      register node *np;
2277 {
2278   register int total = 0;
2279
2280   for (; np != NULL; np = np->right)
2281     if (np->valid)
2282       {
2283         total += strlen (np->regex) + 1;                /* pat\177 */
2284         if (np->name != NULL)
2285           total += strlen (np->name) + 1;               /* name\001 */
2286         total += number_len ((long) np->lno) + 1;       /* lno, */
2287         if (np->cno != invalidcharno)                   /* cno */
2288           total += number_len (np->cno);
2289         total += 1;                                     /* newline */
2290       }
2291
2292   return total;
2293 }
2294
2295 static void
2296 put_entries (np)
2297      register node *np;
2298 {
2299   register char *sp;
2300   static fdesc *fdp = NULL;
2301
2302   if (np == NULL)
2303     return;
2304
2305   /* Output subentries that precede this one */
2306   if (CTAGS)
2307     put_entries (np->left);
2308
2309   /* Output this entry */
2310   if (np->valid)
2311     {
2312       if (!CTAGS)
2313         {
2314           /* Etags mode */
2315           if (fdp != np->fdp)
2316             {
2317               fdp = np->fdp;
2318               fprintf (tagf, "\f\n%s,%d\n",
2319                        fdp->taggedfname, total_size_of_entries (np));
2320               fdp->written = TRUE;
2321             }
2322           fputs (np->regex, tagf);
2323           fputc ('\177', tagf);
2324           if (np->name != NULL)
2325             {
2326               fputs (np->name, tagf);
2327               fputc ('\001', tagf);
2328             }
2329           fprintf (tagf, "%d,", np->lno);
2330           if (np->cno != invalidcharno)
2331             fprintf (tagf, "%ld", np->cno);
2332           fputs ("\n", tagf);
2333         }
2334       else
2335         {
2336           /* Ctags mode */
2337           if (np->name == NULL)
2338             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2339
2340           if (cxref_style)
2341             {
2342               if (vgrind_style)
2343                 fprintf (stdout, "%s %s %d\n",
2344                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2345               else
2346                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2347                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2348             }
2349           else
2350             {
2351               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2352
2353               if (np->is_func)
2354                 {               /* function or #define macro with args */
2355                   putc (searchar, tagf);
2356                   putc ('^', tagf);
2357
2358                   for (sp = np->regex; *sp; sp++)
2359                     {
2360                       if (*sp == '\\' || *sp == searchar)
2361                         putc ('\\', tagf);
2362                       putc (*sp, tagf);
2363                     }
2364                   putc (searchar, tagf);
2365                 }
2366               else
2367                 {               /* anything else; text pattern inadequate */
2368                   fprintf (tagf, "%d", np->lno);
2369                 }
2370               putc ('\n', tagf);
2371             }
2372         }
2373     } /* if this node contains a valid tag */
2374
2375   /* Output subentries that follow this one */
2376   put_entries (np->right);
2377   if (!CTAGS)
2378     put_entries (np->left);
2379 }
2380
2381 \f
2382 /* C extensions. */
2383 #define C_EXT   0x00fff         /* C extensions */
2384 #define C_PLAIN 0x00000         /* C */
2385 #define C_PLPL  0x00001         /* C++ */
2386 #define C_STAR  0x00003         /* C* */
2387 #define C_JAVA  0x00005         /* JAVA */
2388 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2389 #define YACC    0x10000         /* yacc file */
2390
2391 /*
2392  * The C symbol tables.
2393  */
2394 enum sym_type
2395 {
2396   st_none,
2397   st_C_objprot, st_C_objimpl, st_C_objend,
2398   st_C_gnumacro,
2399   st_C_ignore, st_C_attribute,
2400   st_C_javastruct,
2401   st_C_operator,
2402   st_C_class, st_C_template,
2403   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2404 };
2405
2406 static unsigned int hash __P((const char *, unsigned int));
2407 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2408 static enum sym_type C_symtype __P((char *, int, int));
2409
2410 /* Feed stuff between (but not including) %[ and %] lines to:
2411      gperf -m 5
2412 %[
2413 %compare-strncmp
2414 %enum
2415 %struct-type
2416 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2417 %%
2418 if,             0,                      st_C_ignore
2419 for,            0,                      st_C_ignore
2420 while,          0,                      st_C_ignore
2421 switch,         0,                      st_C_ignore
2422 return,         0,                      st_C_ignore
2423 __attribute__,  0,                      st_C_attribute
2424 @interface,     0,                      st_C_objprot
2425 @protocol,      0,                      st_C_objprot
2426 @implementation,0,                      st_C_objimpl
2427 @end,           0,                      st_C_objend
2428 import,         (C_JAVA & !C_PLPL),     st_C_ignore
2429 package,        (C_JAVA & !C_PLPL),     st_C_ignore
2430 friend,         C_PLPL,                 st_C_ignore
2431 extends,        (C_JAVA & !C_PLPL),     st_C_javastruct
2432 implements,     (C_JAVA & !C_PLPL),     st_C_javastruct
2433 interface,      (C_JAVA & !C_PLPL),     st_C_struct
2434 class,          0,                      st_C_class
2435 namespace,      C_PLPL,                 st_C_struct
2436 domain,         C_STAR,                 st_C_struct
2437 union,          0,                      st_C_struct
2438 struct,         0,                      st_C_struct
2439 extern,         0,                      st_C_extern
2440 enum,           0,                      st_C_enum
2441 typedef,        0,                      st_C_typedef
2442 define,         0,                      st_C_define
2443 operator,       C_PLPL,                 st_C_operator
2444 template,       0,                      st_C_template
2445 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2446 DEFUN,          0,                      st_C_gnumacro
2447 SYSCALL,        0,                      st_C_gnumacro
2448 ENTRY,          0,                      st_C_gnumacro
2449 PSEUDO,         0,                      st_C_gnumacro
2450 # These are defined inside C functions, so currently they are not met.
2451 # EXFUN used in glibc, DEFVAR_* in emacs.
2452 #EXFUN,         0,                      st_C_gnumacro
2453 #DEFVAR_,       0,                      st_C_gnumacro
2454 %]
2455 and replace lines between %< and %> with its output, then:
2456  - remove the #if characterset check
2457  - make in_word_set static and not inline. */
2458 /*%<*/
2459 /* C code produced by gperf version 3.0.1 */
2460 /* Command-line: gperf -m 5  */
2461 /* Computed positions: -k'1-2' */
2462
2463 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2464 /* maximum key range = 31, duplicates = 0 */
2465
2466 #ifdef __GNUC__
2467 __inline
2468 #else
2469 #ifdef __cplusplus
2470 inline
2471 #endif
2472 #endif
2473 static unsigned int
2474 hash (str, len)
2475      register const char *str;
2476      register unsigned int len;
2477 {
2478   static unsigned char asso_values[] =
2479     {
2480       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2481       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2482       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2483       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2484       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2485       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2486       34, 34, 34, 34,  1, 34, 34, 34, 14, 14,
2487       34, 34, 34, 34, 34, 34, 34, 34, 13, 34,
2488       13, 34, 34, 12, 34, 34, 34, 34, 34, 11,
2489       34, 34, 34, 34, 34,  8, 34, 11, 34, 12,
2490       11,  0,  1, 34,  7,  0, 34, 34, 11,  9,
2491        0,  4,  0, 34,  7,  4, 14, 21, 34, 15,
2492        0,  2, 34, 34, 34, 34, 34, 34, 34, 34,
2493       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2494       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2495       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2496       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2497       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2498       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2499       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2500       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2501       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2502       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2503       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2504       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2505       34, 34, 34, 34, 34, 34
2506     };
2507   return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]];
2508 }
2509
2510 static struct C_stab_entry *
2511 in_word_set (str, len)
2512      register const char *str;
2513      register unsigned int len;
2514 {
2515   enum
2516     {
2517       TOTAL_KEYWORDS = 31,
2518       MIN_WORD_LENGTH = 2,
2519       MAX_WORD_LENGTH = 15,
2520       MIN_HASH_VALUE = 3,
2521       MAX_HASH_VALUE = 33
2522     };
2523
2524   static struct C_stab_entry wordlist[] =
2525     {
2526       {""}, {""}, {""},
2527       {"if",            0,                      st_C_ignore},
2528       {"enum",          0,                      st_C_enum},
2529       {"@end",          0,                      st_C_objend},
2530       {"extern",                0,                      st_C_extern},
2531       {"extends",       (C_JAVA & !C_PLPL),     st_C_javastruct},
2532       {"for",           0,                      st_C_ignore},
2533       {"interface",     (C_JAVA & !C_PLPL),     st_C_struct},
2534       {"@protocol",     0,                      st_C_objprot},
2535       {"@interface",    0,                      st_C_objprot},
2536       {"operator",      C_PLPL,                 st_C_operator},
2537       {"return",                0,                      st_C_ignore},
2538       {"friend",                C_PLPL,                 st_C_ignore},
2539       {"import",                (C_JAVA & !C_PLPL),     st_C_ignore},
2540       {"@implementation",0,                     st_C_objimpl},
2541       {"define",                0,                      st_C_define},
2542       {"package",       (C_JAVA & !C_PLPL),     st_C_ignore},
2543       {"implements",    (C_JAVA & !C_PLPL),     st_C_javastruct},
2544       {"namespace",     C_PLPL,                 st_C_struct},
2545       {"domain",                C_STAR,                 st_C_struct},
2546       {"template",      0,                      st_C_template},
2547       {"typedef",       0,                      st_C_typedef},
2548       {"struct",                0,                      st_C_struct},
2549       {"switch",                0,                      st_C_ignore},
2550       {"union",         0,                      st_C_struct},
2551       {"while",         0,                      st_C_ignore},
2552       {"class",         0,                      st_C_class},
2553       {"__attribute__", 0,                      st_C_attribute},
2554       {"SYSCALL",       0,                      st_C_gnumacro},
2555       {"PSEUDO",                0,                      st_C_gnumacro},
2556       {"ENTRY",         0,                      st_C_gnumacro},
2557       {"DEFUN",         0,                      st_C_gnumacro}
2558     };
2559
2560   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2561     {
2562       register int key = hash (str, len);
2563
2564       if (key <= MAX_HASH_VALUE && key >= 0)
2565         {
2566           register const char *s = wordlist[key].name;
2567
2568           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2569             return &wordlist[key];
2570         }
2571     }
2572   return 0;
2573 }
2574 /*%>*/
2575
2576 static enum sym_type
2577 C_symtype (str, len, c_ext)
2578      char *str;
2579      int len;
2580      int c_ext;
2581 {
2582   register struct C_stab_entry *se = in_word_set (str, len);
2583
2584   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2585     return st_none;
2586   return se->type;
2587 }
2588
2589 \f
2590 /*
2591  * Ignoring __attribute__ ((list))
2592  */
2593 static bool inattribute;        /* looking at an __attribute__ construct */
2594
2595 /*
2596  * C functions and variables are recognized using a simple
2597  * finite automaton.  fvdef is its state variable.
2598  */
2599 static enum
2600 {
2601   fvnone,                       /* nothing seen */
2602   fdefunkey,                    /* Emacs DEFUN keyword seen */
2603   fdefunname,                   /* Emacs DEFUN name seen */
2604   foperator,                    /* func: operator keyword seen (cplpl) */
2605   fvnameseen,                   /* function or variable name seen */
2606   fstartlist,                   /* func: just after open parenthesis */
2607   finlist,                      /* func: in parameter list */
2608   flistseen,                    /* func: after parameter list */
2609   fignore,                      /* func: before open brace */
2610   vignore                       /* var-like: ignore until ';' */
2611 } fvdef;
2612
2613 static bool fvextern;           /* func or var: extern keyword seen; */
2614
2615 /*
2616  * typedefs are recognized using a simple finite automaton.
2617  * typdef is its state variable.
2618  */
2619 static enum
2620 {
2621   tnone,                        /* nothing seen */
2622   tkeyseen,                     /* typedef keyword seen */
2623   ttypeseen,                    /* defined type seen */
2624   tinbody,                      /* inside typedef body */
2625   tend,                         /* just before typedef tag */
2626   tignore                       /* junk after typedef tag */
2627 } typdef;
2628
2629 /*
2630  * struct-like structures (enum, struct and union) are recognized
2631  * using another simple finite automaton.  `structdef' is its state
2632  * variable.
2633  */
2634 static enum
2635 {
2636   snone,                        /* nothing seen yet,
2637                                    or in struct body if bracelev > 0 */
2638   skeyseen,                     /* struct-like keyword seen */
2639   stagseen,                     /* struct-like tag seen */
2640   scolonseen                    /* colon seen after struct-like tag */
2641 } structdef;
2642
2643 /*
2644  * When objdef is different from onone, objtag is the name of the class.
2645  */
2646 static char *objtag = "<uninited>";
2647
2648 /*
2649  * Yet another little state machine to deal with preprocessor lines.
2650  */
2651 static enum
2652 {
2653   dnone,                        /* nothing seen */
2654   dsharpseen,                   /* '#' seen as first char on line */
2655   ddefineseen,                  /* '#' and 'define' seen */
2656   dignorerest                   /* ignore rest of line */
2657 } definedef;
2658
2659 /*
2660  * State machine for Objective C protocols and implementations.
2661  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2662  */
2663 static enum
2664 {
2665   onone,                        /* nothing seen */
2666   oprotocol,                    /* @interface or @protocol seen */
2667   oimplementation,              /* @implementations seen */
2668   otagseen,                     /* class name seen */
2669   oparenseen,                   /* parenthesis before category seen */
2670   ocatseen,                     /* category name seen */
2671   oinbody,                      /* in @implementation body */
2672   omethodsign,                  /* in @implementation body, after +/- */
2673   omethodtag,                   /* after method name */
2674   omethodcolon,                 /* after method colon */
2675   omethodparm,                  /* after method parameter */
2676   oignore                       /* wait for @end */
2677 } objdef;
2678
2679
2680 /*
2681  * Use this structure to keep info about the token read, and how it
2682  * should be tagged.  Used by the make_C_tag function to build a tag.
2683  */
2684 static struct tok
2685 {
2686   char *line;                   /* string containing the token */
2687   int offset;                   /* where the token starts in LINE */
2688   int length;                   /* token length */
2689   /*
2690     The previous members can be used to pass strings around for generic
2691     purposes.  The following ones specifically refer to creating tags.  In this
2692     case the token contained here is the pattern that will be used to create a
2693     tag.
2694   */
2695   bool valid;                   /* do not create a tag; the token should be
2696                                    invalidated whenever a state machine is
2697                                    reset prematurely */
2698   bool named;                   /* create a named tag */
2699   int lineno;                   /* source line number of tag */
2700   long linepos;                 /* source char number of tag */
2701 } token;                        /* latest token read */
2702
2703 /*
2704  * Variables and functions for dealing with nested structures.
2705  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2706  */
2707 static void pushclass_above __P((int, char *, int));
2708 static void popclass_above __P((int));
2709 static void write_classname __P((linebuffer *, char *qualifier));
2710
2711 static struct {
2712   char **cname;                 /* nested class names */
2713   int *bracelev;                /* nested class brace level */
2714   int nl;                       /* class nesting level (elements used) */
2715   int size;                     /* length of the array */
2716 } cstack;                       /* stack for nested declaration tags */
2717 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2718 #define nestlev         (cstack.nl)
2719 /* After struct keyword or in struct body, not inside a nested function. */
2720 #define instruct        (structdef == snone && nestlev > 0                      \
2721                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2722
2723 static void
2724 pushclass_above (bracelev, str, len)
2725      int bracelev;
2726      char *str;
2727      int len;
2728 {
2729   int nl;
2730
2731   popclass_above (bracelev);
2732   nl = cstack.nl;
2733   if (nl >= cstack.size)
2734     {
2735       int size = cstack.size *= 2;
2736       xrnew (cstack.cname, size, char *);
2737       xrnew (cstack.bracelev, size, int);
2738     }
2739   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2740   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2741   cstack.bracelev[nl] = bracelev;
2742   cstack.nl = nl + 1;
2743 }
2744
2745 static void
2746 popclass_above (bracelev)
2747      int bracelev;
2748 {
2749   int nl;
2750
2751   for (nl = cstack.nl - 1;
2752        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2753        nl--)
2754     {
2755       if (cstack.cname[nl] != NULL)
2756         free (cstack.cname[nl]);
2757       cstack.nl = nl;
2758     }
2759 }
2760
2761 static void
2762 write_classname (cn, qualifier)
2763      linebuffer *cn;
2764      char *qualifier;
2765 {
2766   int i, len;
2767   int qlen = strlen (qualifier);
2768
2769   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2770     {
2771       len = 0;
2772       cn->len = 0;
2773       cn->buffer[0] = '\0';
2774     }
2775   else
2776     {
2777       len = strlen (cstack.cname[0]);
2778       linebuffer_setlen (cn, len);
2779       strcpy (cn->buffer, cstack.cname[0]);
2780     }
2781   for (i = 1; i < cstack.nl; i++)
2782     {
2783       char *s;
2784       int slen;
2785
2786       s = cstack.cname[i];
2787       if (s == NULL)
2788         continue;
2789       slen = strlen (s);
2790       len += slen + qlen;
2791       linebuffer_setlen (cn, len);
2792       strncat (cn->buffer, qualifier, qlen);
2793       strncat (cn->buffer, s, slen);
2794     }
2795 }
2796
2797 \f
2798 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2799 static void make_C_tag __P((bool));
2800
2801 /*
2802  * consider_token ()
2803  *      checks to see if the current token is at the start of a
2804  *      function or variable, or corresponds to a typedef, or
2805  *      is a struct/union/enum tag, or #define, or an enum constant.
2806  *
2807  *      *IS_FUNC gets TRUE iff the token is a function or #define macro
2808  *      with args.  C_EXTP points to which language we are looking at.
2809  *
2810  * Globals
2811  *      fvdef                   IN OUT
2812  *      structdef               IN OUT
2813  *      definedef               IN OUT
2814  *      typdef                  IN OUT
2815  *      objdef                  IN OUT
2816  */
2817
2818 static bool
2819 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2820      register char *str;        /* IN: token pointer */
2821      register int len;          /* IN: token length */
2822      register int c;            /* IN: first char after the token */
2823      int *c_extp;               /* IN, OUT: C extensions mask */
2824      int bracelev;              /* IN: brace level */
2825      int parlev;                /* IN: parenthesis level */
2826      bool *is_func_or_var;      /* OUT: function or variable found */
2827 {
2828   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2829      structtype is the type of the preceding struct-like keyword, and
2830      structbracelev is the brace level where it has been seen. */
2831   static enum sym_type structtype;
2832   static int structbracelev;
2833   static enum sym_type toktype;
2834
2835
2836   toktype = C_symtype (str, len, *c_extp);
2837
2838   /*
2839    * Skip __attribute__
2840    */
2841   if (toktype == st_C_attribute)
2842     {
2843       inattribute = TRUE;
2844       return FALSE;
2845      }
2846
2847    /*
2848     * Advance the definedef state machine.
2849     */
2850    switch (definedef)
2851      {
2852      case dnone:
2853        /* We're not on a preprocessor line. */
2854        if (toktype == st_C_gnumacro)
2855          {
2856            fvdef = fdefunkey;
2857            return FALSE;
2858          }
2859        break;
2860      case dsharpseen:
2861        if (toktype == st_C_define)
2862          {
2863            definedef = ddefineseen;
2864          }
2865        else
2866          {
2867            definedef = dignorerest;
2868          }
2869        return FALSE;
2870      case ddefineseen:
2871        /*
2872         * Make a tag for any macro, unless it is a constant
2873         * and constantypedefs is FALSE.
2874         */
2875        definedef = dignorerest;
2876        *is_func_or_var = (c == '(');
2877        if (!*is_func_or_var && !constantypedefs)
2878          return FALSE;
2879        else
2880          return TRUE;
2881      case dignorerest:
2882        return FALSE;
2883      default:
2884        error ("internal error: definedef value.", (char *)NULL);
2885      }
2886
2887    /*
2888     * Now typedefs
2889     */
2890    switch (typdef)
2891      {
2892      case tnone:
2893        if (toktype == st_C_typedef)
2894          {
2895            if (typedefs)
2896              typdef = tkeyseen;
2897            fvextern = FALSE;
2898            fvdef = fvnone;
2899            return FALSE;
2900          }
2901        break;
2902      case tkeyseen:
2903        switch (toktype)
2904          {
2905          default:
2906            break;
2907          case st_none:
2908          case st_C_class:
2909          case st_C_struct:
2910          case st_C_enum:
2911            typdef = ttypeseen;
2912          }
2913        break;
2914      case ttypeseen:
2915        if (structdef == snone && fvdef == fvnone)
2916          {
2917            fvdef = fvnameseen;
2918            return TRUE;
2919          }
2920        break;
2921      case tend:
2922        switch (toktype)
2923          {
2924          default:
2925            break;
2926          case st_C_class:
2927          case st_C_struct:
2928          case st_C_enum:
2929            return FALSE;
2930          }
2931        return TRUE;
2932      default:
2933        break;
2934      }
2935
2936    /*
2937     * This structdef business is NOT invoked when we are ctags and the
2938     * file is plain C.  This is because a struct tag may have the same
2939     * name as another tag, and this loses with ctags.
2940     */
2941    switch (toktype)
2942      {
2943      case st_C_javastruct:
2944        if (structdef == stagseen)
2945          structdef = scolonseen;
2946        return FALSE;
2947      case st_C_template:
2948      case st_C_class:
2949        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2950            && bracelev == 0
2951            && definedef == dnone && structdef == snone
2952            && typdef == tnone && fvdef == fvnone)
2953          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2954        if (toktype == st_C_template)
2955          break;
2956        /* FALLTHRU */
2957      case st_C_struct:
2958      case st_C_enum:
2959        if (parlev == 0
2960            && fvdef != vignore
2961            && (typdef == tkeyseen
2962                || (typedefs_or_cplusplus && structdef == snone)))
2963          {
2964            structdef = skeyseen;
2965            structtype = toktype;
2966            structbracelev = bracelev;
2967            if (fvdef == fvnameseen)
2968              fvdef = fvnone;
2969          }
2970        return FALSE;
2971      default:
2972        break;
2973      }
2974
2975    if (structdef == skeyseen)
2976      {
2977        structdef = stagseen;
2978        return TRUE;
2979      }
2980
2981    if (typdef != tnone)
2982      definedef = dnone;
2983
2984    /* Detect Objective C constructs. */
2985    switch (objdef)
2986      {
2987      case onone:
2988        switch (toktype)
2989          {
2990          case st_C_objprot:
2991            objdef = oprotocol;
2992            return FALSE;
2993          case st_C_objimpl:
2994            objdef = oimplementation;
2995            return FALSE;
2996          default:
2997            break;
2998          }
2999        break;
3000      case oimplementation:
3001        /* Save the class tag for functions or variables defined inside. */
3002        objtag = savenstr (str, len);
3003        objdef = oinbody;
3004        return FALSE;
3005      case oprotocol:
3006        /* Save the class tag for categories. */
3007        objtag = savenstr (str, len);
3008        objdef = otagseen;
3009        *is_func_or_var = TRUE;
3010        return TRUE;
3011      case oparenseen:
3012        objdef = ocatseen;
3013        *is_func_or_var = TRUE;
3014        return TRUE;
3015      case oinbody:
3016        break;
3017      case omethodsign:
3018        if (parlev == 0)
3019          {
3020            fvdef = fvnone;
3021            objdef = omethodtag;
3022            linebuffer_setlen (&token_name, len);
3023            strncpy (token_name.buffer, str, len);
3024            token_name.buffer[len] = '\0';
3025            return TRUE;
3026          }
3027        return FALSE;
3028      case omethodcolon:
3029        if (parlev == 0)
3030          objdef = omethodparm;
3031        return FALSE;
3032      case omethodparm:
3033        if (parlev == 0)
3034          {
3035            fvdef = fvnone;
3036            objdef = omethodtag;
3037            linebuffer_setlen (&token_name, token_name.len + len);
3038            strncat (token_name.buffer, str, len);
3039            return TRUE;
3040          }
3041        return FALSE;
3042      case oignore:
3043        if (toktype == st_C_objend)
3044          {
3045            /* Memory leakage here: the string pointed by objtag is
3046               never released, because many tests would be needed to
3047               avoid breaking on incorrect input code.  The amount of
3048               memory leaked here is the sum of the lengths of the
3049               class tags.
3050            free (objtag); */
3051            objdef = onone;
3052          }
3053        return FALSE;
3054      default:
3055        break;
3056      }
3057
3058    /* A function, variable or enum constant? */
3059    switch (toktype)
3060      {
3061      case st_C_extern:
3062        fvextern = TRUE;
3063        switch  (fvdef)
3064          {
3065          case finlist:
3066          case flistseen:
3067          case fignore:
3068          case vignore:
3069            break;
3070          default:
3071            fvdef = fvnone;
3072          }
3073        return FALSE;
3074      case st_C_ignore:
3075        fvextern = FALSE;
3076        fvdef = vignore;
3077        return FALSE;
3078      case st_C_operator:
3079        fvdef = foperator;
3080        *is_func_or_var = TRUE;
3081        return TRUE;
3082      case st_none:
3083        if (constantypedefs
3084            && structdef == snone
3085            && structtype == st_C_enum && bracelev > structbracelev)
3086          return TRUE;           /* enum constant */
3087        switch (fvdef)
3088          {
3089          case fdefunkey:
3090            if (bracelev > 0)
3091              break;
3092            fvdef = fdefunname;  /* GNU macro */
3093            *is_func_or_var = TRUE;
3094            return TRUE;
3095          case fvnone:
3096            switch (typdef)
3097              {
3098              case ttypeseen:
3099                return FALSE;
3100              case tnone:
3101                if ((strneq (str, "asm", 3) && endtoken (str[3]))
3102                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3103                  {
3104                    fvdef = vignore;
3105                    return FALSE;
3106                  }
3107                break;
3108              default:
3109                break;
3110              }
3111           /* FALLTHRU */
3112           case fvnameseen:
3113           if (len >= 10 && strneq (str+len-10, "::operator", 10))
3114             {
3115               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3116                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3117               fvdef = foperator;
3118               *is_func_or_var = TRUE;
3119               return TRUE;
3120             }
3121           if (bracelev > 0 && !instruct)
3122             break;
3123           fvdef = fvnameseen;   /* function or variable */
3124           *is_func_or_var = TRUE;
3125           return TRUE;
3126         default:
3127            break;
3128         }
3129       break;
3130     default:
3131       break;
3132     }
3133
3134   return FALSE;
3135 }
3136
3137 \f
3138 /*
3139  * C_entries often keeps pointers to tokens or lines which are older than
3140  * the line currently read.  By keeping two line buffers, and switching
3141  * them at end of line, it is possible to use those pointers.
3142  */
3143 static struct
3144 {
3145   long linepos;
3146   linebuffer lb;
3147 } lbs[2];
3148
3149 #define current_lb_is_new (newndx == curndx)
3150 #define switch_line_buffers() (curndx = 1 - curndx)
3151
3152 #define curlb (lbs[curndx].lb)
3153 #define newlb (lbs[newndx].lb)
3154 #define curlinepos (lbs[curndx].linepos)
3155 #define newlinepos (lbs[newndx].linepos)
3156
3157 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3158 #define cplpl (c_ext & C_PLPL)
3159 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3160
3161 #define CNL_SAVE_DEFINEDEF()                                            \
3162 do {                                                                    \
3163   curlinepos = charno;                                                  \
3164   readline (&curlb, inf);                                               \
3165   lp = curlb.buffer;                                                    \
3166   quotednl = FALSE;                                                     \
3167   newndx = curndx;                                                      \
3168 } while (0)
3169
3170 #define CNL()                                                           \
3171 do {                                                                    \
3172   CNL_SAVE_DEFINEDEF();                                                 \
3173   if (savetoken.valid)                                                  \
3174     {                                                                   \
3175       token = savetoken;                                                \
3176       savetoken.valid = FALSE;                                          \
3177     }                                                                   \
3178   definedef = dnone;                                                    \
3179 } while (0)
3180
3181
3182 static void
3183 make_C_tag (isfun)
3184      bool isfun;
3185 {
3186   /* This function should never be called when token.valid is FALSE, but
3187      we must protect against invalid input or internal errors. */
3188   if (!DEBUG && !token.valid)
3189     return;
3190
3191   if (token.valid)
3192     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3193               token.offset+token.length+1, token.lineno, token.linepos);
3194   else                          /* this case is optimised away if !DEBUG */
3195     make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3196               token_name.len + 17, isfun, token.line,
3197               token.offset+token.length+1, token.lineno, token.linepos);
3198
3199   token.valid = FALSE;
3200 }
3201
3202
3203 /*
3204  * C_entries ()
3205  *      This routine finds functions, variables, typedefs,
3206  *      #define's, enum constants and struct/union/enum definitions in
3207  *      C syntax and adds them to the list.
3208  */
3209 static void
3210 C_entries (c_ext, inf)
3211      int c_ext;                 /* extension of C */
3212      FILE *inf;                 /* input file */
3213 {
3214   register char c;              /* latest char read; '\0' for end of line */
3215   register char *lp;            /* pointer one beyond the character `c' */
3216   int curndx, newndx;           /* indices for current and new lb */
3217   register int tokoff;          /* offset in line of start of current token */
3218   register int toklen;          /* length of current token */
3219   char *qualifier;              /* string used to qualify names */
3220   int qlen;                     /* length of qualifier */
3221   int bracelev;                 /* current brace level */
3222   int bracketlev;               /* current bracket level */
3223   int parlev;                   /* current parenthesis level */
3224   int attrparlev;               /* __attribute__ parenthesis level */
3225   int templatelev;              /* current template level */
3226   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3227   bool incomm, inquote, inchar, quotednl, midtoken;
3228   bool yacc_rules;              /* in the rules part of a yacc file */
3229   struct tok savetoken;         /* token saved during preprocessor handling */
3230
3231
3232   linebuffer_init (&lbs[0].lb);
3233   linebuffer_init (&lbs[1].lb);
3234   if (cstack.size == 0)
3235     {
3236       cstack.size = (DEBUG) ? 1 : 4;
3237       cstack.nl = 0;
3238       cstack.cname = xnew (cstack.size, char *);
3239       cstack.bracelev = xnew (cstack.size, int);
3240     }
3241
3242   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3243   curndx = newndx = 0;
3244   lp = curlb.buffer;
3245   *lp = 0;
3246
3247   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3248   structdef = snone; definedef = dnone; objdef = onone;
3249   yacc_rules = FALSE;
3250   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3251   token.valid = savetoken.valid = FALSE;
3252   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3253   if (cjava)
3254     { qualifier = "."; qlen = 1; }
3255   else
3256     { qualifier = "::"; qlen = 2; }
3257
3258
3259   while (!feof (inf))
3260     {
3261       c = *lp++;
3262       if (c == '\\')
3263         {
3264           /* If we are at the end of the line, the next character is a
3265              '\0'; do not skip it, because it is what tells us
3266              to read the next line.  */
3267           if (*lp == '\0')
3268             {
3269               quotednl = TRUE;
3270               continue;
3271             }
3272           lp++;
3273           c = ' ';
3274         }
3275       else if (incomm)
3276         {
3277           switch (c)
3278             {
3279             case '*':
3280               if (*lp == '/')
3281                 {
3282                   c = *lp++;
3283                   incomm = FALSE;
3284                 }
3285               break;
3286             case '\0':
3287               /* Newlines inside comments do not end macro definitions in
3288                  traditional cpp. */
3289               CNL_SAVE_DEFINEDEF ();
3290               break;
3291             }
3292           continue;
3293         }
3294       else if (inquote)
3295         {
3296           switch (c)
3297             {
3298             case '"':
3299               inquote = FALSE;
3300               break;
3301             case '\0':
3302               /* Newlines inside strings do not end macro definitions
3303                  in traditional cpp, even though compilers don't
3304                  usually accept them. */
3305               CNL_SAVE_DEFINEDEF ();
3306               break;
3307             }
3308           continue;
3309         }
3310       else if (inchar)
3311         {
3312           switch (c)
3313             {
3314             case '\0':
3315               /* Hmmm, something went wrong. */
3316               CNL ();
3317               /* FALLTHRU */
3318             case '\'':
3319               inchar = FALSE;
3320               break;
3321             }
3322           continue;
3323         }
3324       else if (bracketlev > 0)
3325         {
3326           switch (c)
3327             {
3328             case ']':
3329               if (--bracketlev > 0)
3330                 continue;
3331               break;
3332             case '\0':
3333               CNL_SAVE_DEFINEDEF ();
3334               break;
3335             }
3336           continue;
3337         }
3338       else switch (c)
3339         {
3340         case '"':
3341           inquote = TRUE;
3342           if (inattribute)
3343             break;
3344           switch (fvdef)
3345             {
3346             case fdefunkey:
3347             case fstartlist:
3348             case finlist:
3349             case fignore:
3350             case vignore:
3351               break;
3352             default:
3353               fvextern = FALSE;
3354               fvdef = fvnone;
3355             }
3356           continue;
3357         case '\'':
3358           inchar = TRUE;
3359           if (inattribute)
3360             break;
3361           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3362             {
3363               fvextern = FALSE;
3364               fvdef = fvnone;
3365             }
3366           continue;
3367         case '/':
3368           if (*lp == '*')
3369             {
3370               lp++;
3371               incomm = TRUE;
3372               continue;
3373             }
3374           else if (/* cplpl && */ *lp == '/')
3375             {
3376               c = '\0';
3377               break;
3378             }
3379           else
3380             break;
3381         case '%':
3382           if ((c_ext & YACC) && *lp == '%')
3383             {
3384               /* Entering or exiting rules section in yacc file. */
3385               lp++;
3386               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3387               typdef = tnone; structdef = snone;
3388               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3389               bracelev = 0;
3390               yacc_rules = !yacc_rules;
3391               continue;
3392             }
3393           else
3394             break;
3395         case '#':
3396           if (definedef == dnone)
3397             {
3398               char *cp;
3399               bool cpptoken = TRUE;
3400
3401               /* Look back on this line.  If all blanks, or nonblanks
3402                  followed by an end of comment, this is a preprocessor
3403                  token. */
3404               for (cp = newlb.buffer; cp < lp-1; cp++)
3405                 if (!iswhite (*cp))
3406                   {
3407                     if (*cp == '*' && *(cp+1) == '/')
3408                       {
3409                         cp++;
3410                         cpptoken = TRUE;
3411                       }
3412                     else
3413                       cpptoken = FALSE;
3414                   }
3415               if (cpptoken)
3416                 definedef = dsharpseen;
3417             } /* if (definedef == dnone) */
3418           continue;
3419         case '[':
3420           bracketlev++;
3421             continue;
3422         } /* switch (c) */
3423
3424
3425       /* Consider token only if some involved conditions are satisfied. */
3426       if (typdef != tignore
3427           && definedef != dignorerest
3428           && fvdef != finlist
3429           && templatelev == 0
3430           && (definedef != dnone
3431               || structdef != scolonseen)
3432           && !inattribute)
3433         {
3434           if (midtoken)
3435             {
3436               if (endtoken (c))
3437                 {
3438                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3439                     /* This handles :: in the middle,
3440                        but not at the beginning of an identifier.
3441                        Also, space-separated :: is not recognised. */
3442                     {
3443                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3444                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3445                       lp += 2;
3446                       toklen += 2;
3447                       c = lp[-1];
3448                       goto still_in_token;
3449                     }
3450                   else
3451                     {
3452                       bool funorvar = FALSE;
3453
3454                       if (yacc_rules
3455                           || consider_token (newlb.buffer + tokoff, toklen, c,
3456                                              &c_ext, bracelev, parlev,
3457                                              &funorvar))
3458                         {
3459                           if (fvdef == foperator)
3460                             {
3461                               char *oldlp = lp;
3462                               lp = skip_spaces (lp-1);
3463                               if (*lp != '\0')
3464                                 lp += 1;
3465                               while (*lp != '\0'
3466                                      && !iswhite (*lp) && *lp != '(')
3467                                 lp += 1;
3468                               c = *lp++;
3469                               toklen += lp - oldlp;
3470                             }
3471                           token.named = FALSE;
3472                           if (!plainc
3473                               && nestlev > 0 && definedef == dnone)
3474                             /* in struct body */
3475                             {
3476                               write_classname (&token_name, qualifier);
3477                               linebuffer_setlen (&token_name,
3478                                                  token_name.len+qlen+toklen);
3479                               strcat (token_name.buffer, qualifier);
3480                               strncat (token_name.buffer,
3481                                        newlb.buffer + tokoff, toklen);
3482                               token.named = TRUE;
3483                             }
3484                           else if (objdef == ocatseen)
3485                             /* Objective C category */
3486                             {
3487                               int len = strlen (objtag) + 2 + toklen;
3488                               linebuffer_setlen (&token_name, len);
3489                               strcpy (token_name.buffer, objtag);
3490                               strcat (token_name.buffer, "(");
3491                               strncat (token_name.buffer,
3492                                        newlb.buffer + tokoff, toklen);
3493                               strcat (token_name.buffer, ")");
3494                               token.named = TRUE;
3495                             }
3496                           else if (objdef == omethodtag
3497                                    || objdef == omethodparm)
3498                             /* Objective C method */
3499                             {
3500                               token.named = TRUE;
3501                             }
3502                           else if (fvdef == fdefunname)
3503                             /* GNU DEFUN and similar macros */
3504                             {
3505                               bool defun = (newlb.buffer[tokoff] == 'F');
3506                               int off = tokoff;
3507                               int len = toklen;
3508
3509                               /* Rewrite the tag so that emacs lisp DEFUNs
3510                                  can be found by their elisp name */
3511                               if (defun)
3512                                 {
3513                                   off += 1;
3514                                   len -= 1;
3515                                 }
3516                               len = toklen;
3517                               linebuffer_setlen (&token_name, len);
3518                               strncpy (token_name.buffer,
3519                                        newlb.buffer + off, len);
3520                               token_name.buffer[len] = '\0';
3521                               if (defun)
3522                                 while (--len >= 0)
3523                                   if (token_name.buffer[len] == '_')
3524                                     token_name.buffer[len] = '-';
3525                               token.named = defun;
3526                             }
3527                           else
3528                             {
3529                               linebuffer_setlen (&token_name, toklen);
3530                               strncpy (token_name.buffer,
3531                                        newlb.buffer + tokoff, toklen);
3532                               token_name.buffer[toklen] = '\0';
3533                               /* Name macros and members. */
3534                               token.named = (structdef == stagseen
3535                                              || typdef == ttypeseen
3536                                              || typdef == tend
3537                                              || (funorvar
3538                                                  && definedef == dignorerest)
3539                                              || (funorvar
3540                                                  && definedef == dnone
3541                                                  && structdef == snone
3542                                                  && bracelev > 0));
3543                             }
3544                           token.lineno = lineno;
3545                           token.offset = tokoff;
3546                           token.length = toklen;
3547                           token.line = newlb.buffer;
3548                           token.linepos = newlinepos;
3549                           token.valid = TRUE;
3550
3551                           if (definedef == dnone
3552                               && (fvdef == fvnameseen
3553                                   || fvdef == foperator
3554                                   || structdef == stagseen
3555                                   || typdef == tend
3556                                   || typdef == ttypeseen
3557                                   || objdef != onone))
3558                             {
3559                               if (current_lb_is_new)
3560                                 switch_line_buffers ();
3561                             }
3562                           else if (definedef != dnone
3563                                    || fvdef == fdefunname
3564                                    || instruct)
3565                             make_C_tag (funorvar);
3566                         }
3567                       else /* not yacc and consider_token failed */
3568                         {
3569                           if (inattribute && fvdef == fignore)
3570                             {
3571                               /* We have just met __attribute__ after a
3572                                  function parameter list: do not tag the
3573                                  function again. */
3574                               fvdef = fvnone;
3575                             }
3576                         }
3577                       midtoken = FALSE;
3578                     }
3579                 } /* if (endtoken (c)) */
3580               else if (intoken (c))
3581                 still_in_token:
3582                 {
3583                   toklen++;
3584                   continue;
3585                 }
3586             } /* if (midtoken) */
3587           else if (begtoken (c))
3588             {
3589               switch (definedef)
3590                 {
3591                 case dnone:
3592                   switch (fvdef)
3593                     {
3594                     case fstartlist:
3595                       /* This prevents tagging fb in
3596                          void (__attribute__((noreturn)) *fb) (void);
3597                          Fixing this is not easy and not very important. */
3598                       fvdef = finlist;
3599                       continue;
3600                     case flistseen:
3601                       if (plainc || declarations)
3602                         {
3603                           make_C_tag (TRUE); /* a function */
3604                           fvdef = fignore;
3605                         }
3606                       break;
3607                     default:
3608                       break;
3609                     }
3610                   if (structdef == stagseen && !cjava)
3611                     {
3612                       popclass_above (bracelev);
3613                       structdef = snone;
3614                     }
3615                   break;
3616                 case dsharpseen:
3617                   savetoken = token;
3618                   break;
3619                 default:
3620                   break;
3621                 }
3622               if (!yacc_rules || lp == newlb.buffer + 1)
3623                 {
3624                   tokoff = lp - 1 - newlb.buffer;
3625                   toklen = 1;
3626                   midtoken = TRUE;
3627                 }
3628               continue;
3629             } /* if (begtoken) */
3630         } /* if must look at token */
3631
3632
3633       /* Detect end of line, colon, comma, semicolon and various braces
3634          after having handled a token.*/
3635       switch (c)
3636         {
3637         case ':':
3638           if (inattribute)
3639             break;
3640           if (yacc_rules && token.offset == 0 && token.valid)
3641             {
3642               make_C_tag (FALSE); /* a yacc function */
3643               break;
3644             }
3645           if (definedef != dnone)
3646             break;
3647           switch (objdef)
3648             {
3649             case  otagseen:
3650               objdef = oignore;
3651               make_C_tag (TRUE); /* an Objective C class */
3652               break;
3653             case omethodtag:
3654             case omethodparm:
3655               objdef = omethodcolon;
3656               linebuffer_setlen (&token_name, token_name.len + 1);
3657               strcat (token_name.buffer, ":");
3658               break;
3659             default:
3660               break;
3661             }
3662           if (structdef == stagseen)
3663             {
3664               structdef = scolonseen;
3665               break;
3666             }
3667           /* Should be useless, but may be work as a safety net. */
3668           if (cplpl && fvdef == flistseen)
3669             {
3670               make_C_tag (TRUE); /* a function */
3671               fvdef = fignore;
3672               break;
3673             }
3674           break;
3675         case ';':
3676           if (definedef != dnone || inattribute)
3677             break;
3678           switch (typdef)
3679             {
3680             case tend:
3681             case ttypeseen:
3682               make_C_tag (FALSE); /* a typedef */
3683               typdef = tnone;
3684               fvdef = fvnone;
3685               break;
3686             case tnone:
3687             case tinbody:
3688             case tignore:
3689               switch (fvdef)
3690                 {
3691                 case fignore:
3692                   if (typdef == tignore || cplpl)
3693                     fvdef = fvnone;
3694                   break;
3695                 case fvnameseen:
3696                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3697                       || (members && instruct))
3698                     make_C_tag (FALSE); /* a variable */
3699                   fvextern = FALSE;
3700                   fvdef = fvnone;
3701                   token.valid = FALSE;
3702                   break;
3703                 case flistseen:
3704                   if ((declarations
3705                        && (cplpl || !instruct)
3706                        && (typdef == tnone || (typdef != tignore && instruct)))
3707                       || (members
3708                           && plainc && instruct))
3709                     make_C_tag (TRUE);  /* a function */
3710                   /* FALLTHRU */
3711                 default:
3712                   fvextern = FALSE;
3713                   fvdef = fvnone;
3714                   if (declarations
3715                        && cplpl && structdef == stagseen)
3716                     make_C_tag (FALSE); /* forward declaration */
3717                   else
3718                     token.valid = FALSE;
3719                 } /* switch (fvdef) */
3720               /* FALLTHRU */
3721             default:
3722               if (!instruct)
3723                 typdef = tnone;
3724             }
3725           if (structdef == stagseen)
3726             structdef = snone;
3727           break;
3728         case ',':
3729           if (definedef != dnone || inattribute)
3730             break;
3731           switch (objdef)
3732             {
3733             case omethodtag:
3734             case omethodparm:
3735               make_C_tag (TRUE); /* an Objective C method */
3736               objdef = oinbody;
3737               break;
3738             default:
3739               break;
3740             }
3741           switch (fvdef)
3742             {
3743             case fdefunkey:
3744             case foperator:
3745             case fstartlist:
3746             case finlist:
3747             case fignore:
3748             case vignore:
3749               break;
3750             case fdefunname:
3751               fvdef = fignore;
3752               break;
3753             case fvnameseen:
3754               if (parlev == 0
3755                   && ((globals
3756                        && bracelev == 0
3757                        && templatelev == 0
3758                        && (!fvextern || declarations))
3759                       || (members && instruct)))
3760                   make_C_tag (FALSE); /* a variable */
3761               break;
3762             case flistseen:
3763               if ((declarations && typdef == tnone && !instruct)
3764                   || (members && typdef != tignore && instruct))
3765                 {
3766                   make_C_tag (TRUE); /* a function */
3767                   fvdef = fvnameseen;
3768                 }
3769               else if (!declarations)
3770                 fvdef = fvnone;
3771               token.valid = FALSE;
3772               break;
3773             default:
3774               fvdef = fvnone;
3775             }
3776           if (structdef == stagseen)
3777             structdef = snone;
3778           break;
3779         case ']':
3780           if (definedef != dnone || inattribute)
3781             break;
3782           if (structdef == stagseen)
3783             structdef = snone;
3784           switch (typdef)
3785             {
3786             case ttypeseen:
3787             case tend:
3788               typdef = tignore;
3789               make_C_tag (FALSE);       /* a typedef */
3790               break;
3791             case tnone:
3792             case tinbody:
3793               switch (fvdef)
3794                 {
3795                 case foperator:
3796                 case finlist:
3797                 case fignore:
3798                 case vignore:
3799                   break;
3800                 case fvnameseen:
3801                   if ((members && bracelev == 1)
3802                       || (globals && bracelev == 0
3803                           && (!fvextern || declarations)))
3804                     make_C_tag (FALSE); /* a variable */
3805                   /* FALLTHRU */
3806                 default:
3807                   fvdef = fvnone;
3808                 }
3809               break;
3810             default:
3811               break;
3812             }
3813           break;
3814         case '(':
3815           if (inattribute)
3816             {
3817               attrparlev++;
3818               break;
3819             }
3820           if (definedef != dnone)
3821             break;
3822           if (objdef == otagseen && parlev == 0)
3823             objdef = oparenseen;
3824           switch (fvdef)
3825             {
3826             case fvnameseen:
3827               if (typdef == ttypeseen
3828                   && *lp != '*'
3829                   && !instruct)
3830                 {
3831                   /* This handles constructs like:
3832                      typedef void OperatorFun (int fun); */
3833                   make_C_tag (FALSE);
3834                   typdef = tignore;
3835                   fvdef = fignore;
3836                   break;
3837                 }
3838               /* FALLTHRU */
3839             case foperator:
3840               fvdef = fstartlist;
3841               break;
3842             case flistseen:
3843               fvdef = finlist;
3844               break;
3845             default:
3846               break;
3847             }
3848           parlev++;
3849           break;
3850         case ')':
3851           if (inattribute)
3852             {
3853               if (--attrparlev == 0)
3854                 inattribute = FALSE;
3855               break;
3856             }
3857           if (definedef != dnone)
3858             break;
3859           if (objdef == ocatseen && parlev == 1)
3860             {
3861               make_C_tag (TRUE); /* an Objective C category */
3862               objdef = oignore;
3863             }
3864           if (--parlev == 0)
3865             {
3866               switch (fvdef)
3867                 {
3868                 case fstartlist:
3869                 case finlist:
3870                   fvdef = flistseen;
3871                   break;
3872                 default:
3873                   break;
3874                 }
3875               if (!instruct
3876                   && (typdef == tend
3877                       || typdef == ttypeseen))
3878                 {
3879                   typdef = tignore;
3880                   make_C_tag (FALSE); /* a typedef */
3881                 }
3882             }
3883           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3884             parlev = 0;
3885           break;
3886         case '{':
3887           if (definedef != dnone)
3888             break;
3889           if (typdef == ttypeseen)
3890             {
3891               /* Whenever typdef is set to tinbody (currently only
3892                  here), typdefbracelev should be set to bracelev. */
3893               typdef = tinbody;
3894               typdefbracelev = bracelev;
3895             }
3896           switch (fvdef)
3897             {
3898             case flistseen:
3899               make_C_tag (TRUE);    /* a function */
3900               /* FALLTHRU */
3901             case fignore:
3902               fvdef = fvnone;
3903               break;
3904             case fvnone:
3905               switch (objdef)
3906                 {
3907                 case otagseen:
3908                   make_C_tag (TRUE); /* an Objective C class */
3909                   objdef = oignore;
3910                   break;
3911                 case omethodtag:
3912                 case omethodparm:
3913                   make_C_tag (TRUE); /* an Objective C method */
3914                   objdef = oinbody;
3915                   break;
3916                 default:
3917                   /* Neutralize `extern "C" {' grot. */
3918                   if (bracelev == 0 && structdef == snone && nestlev == 0
3919                       && typdef == tnone)
3920                     bracelev = -1;
3921                 }
3922               break;
3923             default:
3924               break;
3925             }
3926           switch (structdef)
3927             {
3928             case skeyseen:         /* unnamed struct */
3929               pushclass_above (bracelev, NULL, 0);
3930               structdef = snone;
3931               break;
3932             case stagseen:         /* named struct or enum */
3933             case scolonseen:       /* a class */
3934               pushclass_above (bracelev,token.line+token.offset, token.length);
3935               structdef = snone;
3936               make_C_tag (FALSE);  /* a struct or enum */
3937               break;
3938             default:
3939               break;
3940             }
3941           bracelev++;
3942           break;
3943         case '*':
3944           if (definedef != dnone)
3945             break;
3946           if (fvdef == fstartlist)
3947             {
3948               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3949               token.valid = FALSE;
3950             }
3951           break;
3952         case '}':
3953           if (definedef != dnone)
3954             break;
3955           if (!ignoreindent && lp == newlb.buffer + 1)
3956             {
3957               if (bracelev != 0)
3958                 token.valid = FALSE;
3959               bracelev = 0;     /* reset brace level if first column */
3960               parlev = 0;       /* also reset paren level, just in case... */
3961             }
3962           else if (bracelev > 0)
3963             bracelev--;
3964           else
3965             token.valid = FALSE; /* something gone amiss, token unreliable */
3966           popclass_above (bracelev);
3967           structdef = snone;
3968           /* Only if typdef == tinbody is typdefbracelev significant. */
3969           if (typdef == tinbody && bracelev <= typdefbracelev)
3970             {
3971               assert (bracelev == typdefbracelev);
3972               typdef = tend;
3973             }
3974           break;
3975         case '=':
3976           if (definedef != dnone)
3977             break;
3978           switch (fvdef)
3979             {
3980             case foperator:
3981             case finlist:
3982             case fignore:
3983             case vignore:
3984               break;
3985             case fvnameseen:
3986               if ((members && bracelev == 1)
3987                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3988                 make_C_tag (FALSE); /* a variable */
3989               /* FALLTHRU */
3990             default:
3991               fvdef = vignore;
3992             }
3993           break;
3994         case '<':
3995           if (cplpl
3996               && (structdef == stagseen || fvdef == fvnameseen))
3997             {
3998               templatelev++;
3999               break;
4000             }
4001           goto resetfvdef;
4002         case '>':
4003           if (templatelev > 0)
4004             {
4005               templatelev--;
4006               break;
4007             }
4008           goto resetfvdef;
4009         case '+':
4010         case '-':
4011           if (objdef == oinbody && bracelev == 0)
4012             {
4013               objdef = omethodsign;
4014               break;
4015             }
4016           /* FALLTHRU */
4017         resetfvdef:
4018         case '#': case '~': case '&': case '%': case '/':
4019         case '|': case '^': case '!': case '.': case '?':
4020           if (definedef != dnone)
4021             break;
4022           /* These surely cannot follow a function tag in C. */
4023           switch (fvdef)
4024             {
4025             case foperator:
4026             case finlist:
4027             case fignore:
4028             case vignore:
4029               break;
4030             default:
4031               fvdef = fvnone;
4032             }
4033           break;
4034         case '\0':
4035           if (objdef == otagseen)
4036             {
4037               make_C_tag (TRUE); /* an Objective C class */
4038               objdef = oignore;
4039             }
4040           /* If a macro spans multiple lines don't reset its state. */
4041           if (quotednl)
4042             CNL_SAVE_DEFINEDEF ();
4043           else
4044             CNL ();
4045           break;
4046         } /* switch (c) */
4047
4048     } /* while not eof */
4049
4050   free (lbs[0].lb.buffer);
4051   free (lbs[1].lb.buffer);
4052 }
4053
4054 /*
4055  * Process either a C++ file or a C file depending on the setting
4056  * of a global flag.
4057  */
4058 static void
4059 default_C_entries (inf)
4060      FILE *inf;
4061 {
4062   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4063 }
4064
4065 /* Always do plain C. */
4066 static void
4067 plain_C_entries (inf)
4068      FILE *inf;
4069 {
4070   C_entries (0, inf);
4071 }
4072
4073 /* Always do C++. */
4074 static void
4075 Cplusplus_entries (inf)
4076      FILE *inf;
4077 {
4078   C_entries (C_PLPL, inf);
4079 }
4080
4081 /* Always do Java. */
4082 static void
4083 Cjava_entries (inf)
4084      FILE *inf;
4085 {
4086   C_entries (C_JAVA, inf);
4087 }
4088
4089 /* Always do C*. */
4090 static void
4091 Cstar_entries (inf)
4092      FILE *inf;
4093 {
4094   C_entries (C_STAR, inf);
4095 }
4096
4097 /* Always do Yacc. */
4098 static void
4099 Yacc_entries (inf)
4100      FILE *inf;
4101 {
4102   C_entries (YACC, inf);
4103 }
4104
4105 \f
4106 /* Useful macros. */
4107 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4108   for (;                        /* loop initialization */               \
4109        !feof (file_pointer)     /* loop test */                         \
4110        &&                       /* instructions at start of loop */     \
4111           (readline (&line_buffer, file_pointer),                       \
4112            char_pointer = line_buffer.buffer,                           \
4113            TRUE);                                                       \
4114       )
4115 #define LOOKING_AT(cp, keyword) /* keyword is a constant string */      \
4116   (strneq ((cp), keyword, sizeof(keyword)-1) /* cp points at keyword */ \
4117    && notinname ((cp)[sizeof(keyword)-1])       /* end of keyword */    \
4118    && ((cp) = skip_spaces((cp)+sizeof(keyword)-1))) /* skip spaces */
4119
4120 /*
4121  * Read a file, but do no processing.  This is used to do regexp
4122  * matching on files that have no language defined.
4123  */
4124 static void
4125 just_read_file (inf)
4126      FILE *inf;
4127 {
4128   register char *dummy;
4129
4130   LOOP_ON_INPUT_LINES (inf, lb, dummy)
4131     continue;
4132 }
4133
4134 \f
4135 /* Fortran parsing */
4136
4137 static void F_takeprec __P((void));
4138 static void F_getit __P((FILE *));
4139
4140 static void
4141 F_takeprec ()
4142 {
4143   dbp = skip_spaces (dbp);
4144   if (*dbp != '*')
4145     return;
4146   dbp++;
4147   dbp = skip_spaces (dbp);
4148   if (strneq (dbp, "(*)", 3))
4149     {
4150       dbp += 3;
4151       return;
4152     }
4153   if (!ISDIGIT (*dbp))
4154     {
4155       --dbp;                    /* force failure */
4156       return;
4157     }
4158   do
4159     dbp++;
4160   while (ISDIGIT (*dbp));
4161 }
4162
4163 static void
4164 F_getit (inf)
4165      FILE *inf;
4166 {
4167   register char *cp;
4168
4169   dbp = skip_spaces (dbp);
4170   if (*dbp == '\0')
4171     {
4172       readline (&lb, inf);
4173       dbp = lb.buffer;
4174       if (dbp[5] != '&')
4175         return;
4176       dbp += 6;
4177       dbp = skip_spaces (dbp);
4178     }
4179   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4180     return;
4181   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4182     continue;
4183   make_tag (dbp, cp-dbp, TRUE,
4184             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4185 }
4186
4187
4188 static void
4189 Fortran_functions (inf)
4190      FILE *inf;
4191 {
4192   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4193     {
4194       if (*dbp == '%')
4195         dbp++;                  /* Ratfor escape to fortran */
4196       dbp = skip_spaces (dbp);
4197       if (*dbp == '\0')
4198         continue;
4199       switch (lowcase (*dbp))
4200         {
4201         case 'i':
4202           if (nocase_tail ("integer"))
4203             F_takeprec ();
4204           break;
4205         case 'r':
4206           if (nocase_tail ("real"))
4207             F_takeprec ();
4208           break;
4209         case 'l':
4210           if (nocase_tail ("logical"))
4211             F_takeprec ();
4212           break;
4213         case 'c':
4214           if (nocase_tail ("complex") || nocase_tail ("character"))
4215             F_takeprec ();
4216           break;
4217         case 'd':
4218           if (nocase_tail ("double"))
4219             {
4220               dbp = skip_spaces (dbp);
4221               if (*dbp == '\0')
4222                 continue;
4223               if (nocase_tail ("precision"))
4224                 break;
4225               continue;
4226             }
4227           break;
4228         }
4229       dbp = skip_spaces (dbp);
4230       if (*dbp == '\0')
4231         continue;
4232       switch (lowcase (*dbp))
4233         {
4234         case 'f':
4235           if (nocase_tail ("function"))
4236             F_getit (inf);
4237           continue;
4238         case 's':
4239           if (nocase_tail ("subroutine"))
4240             F_getit (inf);
4241           continue;
4242         case 'e':
4243           if (nocase_tail ("entry"))
4244             F_getit (inf);
4245           continue;
4246         case 'b':
4247           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4248             {
4249               dbp = skip_spaces (dbp);
4250               if (*dbp == '\0') /* assume un-named */
4251                 make_tag ("blockdata", 9, TRUE,
4252                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4253               else
4254                 F_getit (inf);  /* look for name */
4255             }
4256           continue;
4257         }
4258     }
4259 }
4260
4261 \f
4262 /*
4263  * Ada parsing
4264  * Original code by
4265  * Philippe Waroquiers (1998)
4266  */
4267
4268 static void Ada_getit __P((FILE *, char *));
4269
4270 /* Once we are positioned after an "interesting" keyword, let's get
4271    the real tag value necessary. */
4272 static void
4273 Ada_getit (inf, name_qualifier)
4274      FILE *inf;
4275      char *name_qualifier;
4276 {
4277   register char *cp;
4278   char *name;
4279   char c;
4280
4281   while (!feof (inf))
4282     {
4283       dbp = skip_spaces (dbp);
4284       if (*dbp == '\0'
4285           || (dbp[0] == '-' && dbp[1] == '-'))
4286         {
4287           readline (&lb, inf);
4288           dbp = lb.buffer;
4289         }
4290       switch (lowcase(*dbp))
4291         {
4292         case 'b':
4293           if (nocase_tail ("body"))
4294             {
4295               /* Skipping body of   procedure body   or   package body or ....
4296                  resetting qualifier to body instead of spec. */
4297               name_qualifier = "/b";
4298               continue;
4299             }
4300           break;
4301         case 't':
4302           /* Skipping type of   task type   or   protected type ... */
4303           if (nocase_tail ("type"))
4304             continue;
4305           break;
4306         }
4307       if (*dbp == '"')
4308         {
4309           dbp += 1;
4310           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4311             continue;
4312         }
4313       else
4314         {
4315           dbp = skip_spaces (dbp);
4316           for (cp = dbp;
4317                (*cp != '\0'
4318                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4319                cp++)
4320             continue;
4321           if (cp == dbp)
4322             return;
4323         }
4324       c = *cp;
4325       *cp = '\0';
4326       name = concat (dbp, name_qualifier, "");
4327       *cp = c;
4328       make_tag (name, strlen (name), TRUE,
4329                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4330       free (name);
4331       if (c == '"')
4332         dbp = cp + 1;
4333       return;
4334     }
4335 }
4336
4337 static void
4338 Ada_funcs (inf)
4339      FILE *inf;
4340 {
4341   bool inquote = FALSE;
4342   bool skip_till_semicolumn = FALSE;
4343
4344   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4345     {
4346       while (*dbp != '\0')
4347         {
4348           /* Skip a string i.e. "abcd". */
4349           if (inquote || (*dbp == '"'))
4350             {
4351               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4352               if (dbp != NULL)
4353                 {
4354                   inquote = FALSE;
4355                   dbp += 1;
4356                   continue;     /* advance char */
4357                 }
4358               else
4359                 {
4360                   inquote = TRUE;
4361                   break;        /* advance line */
4362                 }
4363             }
4364
4365           /* Skip comments. */
4366           if (dbp[0] == '-' && dbp[1] == '-')
4367             break;              /* advance line */
4368
4369           /* Skip character enclosed in single quote i.e. 'a'
4370              and skip single quote starting an attribute i.e. 'Image. */
4371           if (*dbp == '\'')
4372             {
4373               dbp++ ;
4374               if (*dbp != '\0')
4375                 dbp++;
4376               continue;
4377             }
4378
4379           if (skip_till_semicolumn)
4380             {
4381               if (*dbp == ';')
4382                 skip_till_semicolumn = FALSE;
4383               dbp++;
4384               continue;         /* advance char */
4385             }
4386
4387           /* Search for beginning of a token.  */
4388           if (!begtoken (*dbp))
4389             {
4390               dbp++;
4391               continue;         /* advance char */
4392             }
4393
4394           /* We are at the beginning of a token. */
4395           switch (lowcase(*dbp))
4396             {
4397             case 'f':
4398               if (!packages_only && nocase_tail ("function"))
4399                 Ada_getit (inf, "/f");
4400               else
4401                 break;          /* from switch */
4402               continue;         /* advance char */
4403             case 'p':
4404               if (!packages_only && nocase_tail ("procedure"))
4405                 Ada_getit (inf, "/p");
4406               else if (nocase_tail ("package"))
4407                 Ada_getit (inf, "/s");
4408               else if (nocase_tail ("protected")) /* protected type */
4409                 Ada_getit (inf, "/t");
4410               else
4411                 break;          /* from switch */
4412               continue;         /* advance char */
4413
4414             case 'u':
4415               if (typedefs && !packages_only && nocase_tail ("use"))
4416                 {
4417                   /* when tagging types, avoid tagging  use type Pack.Typename;
4418                      for this, we will skip everything till a ; */
4419                   skip_till_semicolumn = TRUE;
4420                   continue;     /* advance char */
4421                 }
4422
4423             case 't':
4424               if (!packages_only && nocase_tail ("task"))
4425                 Ada_getit (inf, "/k");
4426               else if (typedefs && !packages_only && nocase_tail ("type"))
4427                 {
4428                   Ada_getit (inf, "/t");
4429                   while (*dbp != '\0')
4430                     dbp += 1;
4431                 }
4432               else
4433                 break;          /* from switch */
4434               continue;         /* advance char */
4435             }
4436
4437           /* Look for the end of the token. */
4438           while (!endtoken (*dbp))
4439             dbp++;
4440
4441         } /* advance char */
4442     } /* advance line */
4443 }
4444
4445 \f
4446 /*
4447  * Unix and microcontroller assembly tag handling
4448  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4449  * Idea by Bob Weiner, Motorola Inc. (1994)
4450  */
4451 static void
4452 Asm_labels (inf)
4453      FILE *inf;
4454 {
4455   register char *cp;
4456
4457   LOOP_ON_INPUT_LINES (inf, lb, cp)
4458     {
4459       /* If first char is alphabetic or one of [_.$], test for colon
4460          following identifier. */
4461       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4462         {
4463           /* Read past label. */
4464           cp++;
4465           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4466             cp++;
4467           if (*cp == ':' || iswhite (*cp))
4468             /* Found end of label, so copy it and add it to the table. */
4469             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4470                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4471         }
4472     }
4473 }
4474
4475 \f
4476 /*
4477  * Perl support
4478  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4479  * Perl variable names: /^(my|local).../
4480  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4481  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4482  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4483  */
4484 static void
4485 Perl_functions (inf)
4486      FILE *inf;
4487 {
4488   char *package = savestr ("main"); /* current package name */
4489   register char *cp;
4490
4491   LOOP_ON_INPUT_LINES (inf, lb, cp)
4492     {
4493       skip_spaces(cp);
4494
4495       if (LOOKING_AT (cp, "package"))
4496         {
4497           free (package);
4498           get_tag (cp, &package);
4499         }
4500       else if (LOOKING_AT (cp, "sub"))
4501         {
4502           char *pos;
4503           char *sp = cp;
4504
4505           while (!notinname (*cp))
4506             cp++;
4507           if (cp == sp)
4508             continue;           /* nothing found */
4509           if ((pos = etags_strchr (sp, ':')) != NULL
4510               && pos < cp && pos[1] == ':')
4511             /* The name is already qualified. */
4512             make_tag (sp, cp - sp, TRUE,
4513                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4514           else
4515             /* Qualify it. */
4516             {
4517               char savechar, *name;
4518
4519               savechar = *cp;
4520               *cp = '\0';
4521               name = concat (package, "::", sp);
4522               *cp = savechar;
4523               make_tag (name, strlen(name), TRUE,
4524                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4525               free (name);
4526             }
4527         }
4528        else if (globals)        /* only if we are tagging global vars */
4529         {
4530           /* Skip a qualifier, if any. */
4531           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4532           /* After "my" or "local", but before any following paren or space. */
4533           char *varstart = cp;
4534
4535           if (qual              /* should this be removed?  If yes, how? */
4536               && (*cp == '$' || *cp == '@' || *cp == '%'))
4537             {
4538               varstart += 1;
4539               do
4540                 cp++;
4541               while (ISALNUM (*cp) || *cp == '_');
4542             }
4543           else if (qual)
4544             {
4545               /* Should be examining a variable list at this point;
4546                  could insist on seeing an open parenthesis. */
4547               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4548                 cp++;
4549             }
4550           else
4551             continue;
4552
4553           make_tag (varstart, cp - varstart, FALSE,
4554                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4555         }
4556     }
4557 }
4558
4559
4560 /*
4561  * Python support
4562  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4563  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4564  * More ideas by seb bacon <seb@jamkit.com> (2002)
4565  */
4566 static void
4567 Python_functions (inf)
4568      FILE *inf;
4569 {
4570   register char *cp;
4571
4572   LOOP_ON_INPUT_LINES (inf, lb, cp)
4573     {
4574       cp = skip_spaces (cp);
4575       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4576         {
4577           char *name = cp;
4578           while (!notinname (*cp) && *cp != ':')
4579             cp++;
4580           make_tag (name, cp - name, TRUE,
4581                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4582         }
4583     }
4584 }
4585
4586 \f
4587 /*
4588  * PHP support
4589  * Look for:
4590  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4591  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4592  *  - /^[ \t]*define\(\"[^\"]+/
4593  * Only with --members:
4594  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4595  * Idea by Diez B. Roggisch (2001)
4596  */
4597 static void
4598 PHP_functions (inf)
4599      FILE *inf;
4600 {
4601   register char *cp, *name;
4602   bool search_identifier = FALSE;
4603
4604   LOOP_ON_INPUT_LINES (inf, lb, cp)
4605     {
4606       cp = skip_spaces (cp);
4607       name = cp;
4608       if (search_identifier
4609           && *cp != '\0')
4610         {
4611           while (!notinname (*cp))
4612             cp++;
4613           make_tag (name, cp - name, TRUE,
4614                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4615           search_identifier = FALSE;
4616         }
4617       else if (LOOKING_AT (cp, "function"))
4618         {
4619           if(*cp == '&')
4620             cp = skip_spaces (cp+1);
4621           if(*cp != '\0')
4622             {
4623               name = cp;
4624               while (!notinname (*cp))
4625                 cp++;
4626               make_tag (name, cp - name, TRUE,
4627                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4628             }
4629           else
4630             search_identifier = TRUE;
4631         }
4632       else if (LOOKING_AT (cp, "class"))
4633         {
4634           if (*cp != '\0')
4635             {
4636               name = cp;
4637               while (*cp != '\0' && !iswhite (*cp))
4638                 cp++;
4639               make_tag (name, cp - name, FALSE,
4640                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4641             }
4642           else
4643             search_identifier = TRUE;
4644         }
4645       else if (strneq (cp, "define", 6)
4646                && (cp = skip_spaces (cp+6))
4647                && *cp++ == '('
4648                && (*cp == '"' || *cp == '\''))
4649         {
4650           char quote = *cp++;
4651           name = cp;
4652           while (*cp != quote && *cp != '\0')
4653             cp++;
4654           make_tag (name, cp - name, FALSE,
4655                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4656         }
4657       else if (members
4658                && LOOKING_AT (cp, "var")
4659                && *cp == '$')
4660         {
4661           name = cp;
4662           while (!notinname(*cp))
4663             cp++;
4664           make_tag (name, cp - name, FALSE,
4665                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4666         }
4667     }
4668 }
4669
4670 \f
4671 /*
4672  * Cobol tag functions
4673  * We could look for anything that could be a paragraph name.
4674  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4675  * Idea by Corny de Souza (1993)
4676  */
4677 static void
4678 Cobol_paragraphs (inf)
4679      FILE *inf;
4680 {
4681   register char *bp, *ep;
4682
4683   LOOP_ON_INPUT_LINES (inf, lb, bp)
4684     {
4685       if (lb.len < 9)
4686         continue;
4687       bp += 8;
4688
4689       /* If eoln, compiler option or comment ignore whole line. */
4690       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4691         continue;
4692
4693       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4694         continue;
4695       if (*ep++ == '.')
4696         make_tag (bp, ep - bp, TRUE,
4697                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4698     }
4699 }
4700
4701 \f
4702 /*
4703  * Makefile support
4704  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4705  */
4706 static void
4707 Makefile_targets (inf)
4708      FILE *inf;
4709 {
4710   register char *bp;
4711
4712   LOOP_ON_INPUT_LINES (inf, lb, bp)
4713     {
4714       if (*bp == '\t' || *bp == '#')
4715         continue;
4716       while (*bp != '\0' && *bp != '=' && *bp != ':')
4717         bp++;
4718       if (*bp == ':' || (globals && *bp == '='))
4719         make_tag (lb.buffer, bp - lb.buffer, TRUE,
4720                   lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4721     }
4722 }
4723
4724 \f
4725 /*
4726  * Pascal parsing
4727  * Original code by Mosur K. Mohan (1989)
4728  *
4729  *  Locates tags for procedures & functions.  Doesn't do any type- or
4730  *  var-definitions.  It does look for the keyword "extern" or
4731  *  "forward" immediately following the procedure statement; if found,
4732  *  the tag is skipped.
4733  */
4734 static void
4735 Pascal_functions (inf)
4736      FILE *inf;
4737 {
4738   linebuffer tline;             /* mostly copied from C_entries */
4739   long save_lcno;
4740   int save_lineno, namelen, taglen;
4741   char c, *name;
4742
4743   bool                          /* each of these flags is TRUE iff: */
4744     incomment,                  /* point is inside a comment */
4745     inquote,                    /* point is inside '..' string */
4746     get_tagname,                /* point is after PROCEDURE/FUNCTION
4747                                    keyword, so next item = potential tag */
4748     found_tag,                  /* point is after a potential tag */
4749     inparms,                    /* point is within parameter-list */
4750     verify_tag;                 /* point has passed the parm-list, so the
4751                                    next token will determine whether this
4752                                    is a FORWARD/EXTERN to be ignored, or
4753                                    whether it is a real tag */
4754
4755   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4756   name = NULL;                  /* keep compiler quiet */
4757   dbp = lb.buffer;
4758   *dbp = '\0';
4759   linebuffer_init (&tline);
4760
4761   incomment = inquote = FALSE;
4762   found_tag = FALSE;            /* have a proc name; check if extern */
4763   get_tagname = FALSE;          /* found "procedure" keyword         */
4764   inparms = FALSE;              /* found '(' after "proc"            */
4765   verify_tag = FALSE;           /* check if "extern" is ahead        */
4766
4767
4768   while (!feof (inf))           /* long main loop to get next char */
4769     {
4770       c = *dbp++;
4771       if (c == '\0')            /* if end of line */
4772         {
4773           readline (&lb, inf);
4774           dbp = lb.buffer;
4775           if (*dbp == '\0')
4776             continue;
4777           if (!((found_tag && verify_tag)
4778                 || get_tagname))
4779             c = *dbp++;         /* only if don't need *dbp pointing
4780                                    to the beginning of the name of
4781                                    the procedure or function */
4782         }
4783       if (incomment)
4784         {
4785           if (c == '}')         /* within { } comments */
4786             incomment = FALSE;
4787           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4788             {
4789               dbp++;
4790               incomment = FALSE;
4791             }
4792           continue;
4793         }
4794       else if (inquote)
4795         {
4796           if (c == '\'')
4797             inquote = FALSE;
4798           continue;
4799         }
4800       else
4801         switch (c)
4802           {
4803           case '\'':
4804             inquote = TRUE;     /* found first quote */
4805             continue;
4806           case '{':             /* found open { comment */
4807             incomment = TRUE;
4808             continue;
4809           case '(':
4810             if (*dbp == '*')    /* found open (* comment */
4811               {
4812                 incomment = TRUE;
4813                 dbp++;
4814               }
4815             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4816               inparms = TRUE;
4817             continue;
4818           case ')':             /* end of parms list */
4819             if (inparms)
4820               inparms = FALSE;
4821             continue;
4822           case ';':
4823             if (found_tag && !inparms) /* end of proc or fn stmt */
4824               {
4825                 verify_tag = TRUE;
4826                 break;
4827               }
4828             continue;
4829           }
4830       if (found_tag && verify_tag && (*dbp != ' '))
4831         {
4832           /* Check if this is an "extern" declaration. */
4833           if (*dbp == '\0')
4834             continue;
4835           if (lowcase (*dbp == 'e'))
4836             {
4837               if (nocase_tail ("extern")) /* superfluous, really! */
4838                 {
4839                   found_tag = FALSE;
4840                   verify_tag = FALSE;
4841                 }
4842             }
4843           else if (lowcase (*dbp) == 'f')
4844             {
4845               if (nocase_tail ("forward")) /* check for forward reference */
4846                 {
4847                   found_tag = FALSE;
4848                   verify_tag = FALSE;
4849                 }
4850             }
4851           if (found_tag && verify_tag) /* not external proc, so make tag */
4852             {
4853               found_tag = FALSE;
4854               verify_tag = FALSE;
4855               make_tag (name, namelen, TRUE,
4856                         tline.buffer, taglen, save_lineno, save_lcno);
4857               continue;
4858             }
4859         }
4860       if (get_tagname)          /* grab name of proc or fn */
4861         {
4862           char *cp;
4863
4864           if (*dbp == '\0')
4865             continue;
4866
4867           /* Find block name. */
4868           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4869             continue;
4870
4871           /* Save all values for later tagging. */
4872           linebuffer_setlen (&tline, lb.len);
4873           strcpy (tline.buffer, lb.buffer);
4874           save_lineno = lineno;
4875           save_lcno = linecharno;
4876           name = tline.buffer + (dbp - lb.buffer);
4877           namelen = cp - dbp;
4878           taglen = cp - lb.buffer + 1;
4879
4880           dbp = cp;             /* set dbp to e-o-token */
4881           get_tagname = FALSE;
4882           found_tag = TRUE;
4883           continue;
4884
4885           /* And proceed to check for "extern". */
4886         }
4887       else if (!incomment && !inquote && !found_tag)
4888         {
4889           /* Check for proc/fn keywords. */
4890           switch (lowcase (c))
4891             {
4892             case 'p':
4893               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4894                 get_tagname = TRUE;
4895               continue;
4896             case 'f':
4897               if (nocase_tail ("unction"))
4898                 get_tagname = TRUE;
4899               continue;
4900             }
4901         }
4902     } /* while not eof */
4903
4904   free (tline.buffer);
4905 }
4906
4907 \f
4908 /*
4909  * Lisp tag functions
4910  *  look for (def or (DEF, quote or QUOTE
4911  */
4912
4913 static void L_getit __P((void));
4914
4915 static void
4916 L_getit ()
4917 {
4918   if (*dbp == '\'')             /* Skip prefix quote */
4919     dbp++;
4920   else if (*dbp == '(')
4921   {
4922     dbp++;
4923     /* Try to skip "(quote " */
4924     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4925       /* Ok, then skip "(" before name in (defstruct (foo)) */
4926       dbp = skip_spaces (dbp);
4927   }
4928   get_tag (dbp, NULL);
4929 }
4930
4931 static void
4932 Lisp_functions (inf)
4933      FILE *inf;
4934 {
4935   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4936     {
4937       if (dbp[0] != '(')
4938         continue;
4939
4940       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4941         {
4942           dbp = skip_non_spaces (dbp);
4943           dbp = skip_spaces (dbp);
4944           L_getit ();
4945         }
4946       else
4947         {
4948           /* Check for (foo::defmumble name-defined ... */
4949           do
4950             dbp++;
4951           while (!notinname (*dbp) && *dbp != ':');
4952           if (*dbp == ':')
4953             {
4954               do
4955                 dbp++;
4956               while (*dbp == ':');
4957
4958               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4959                 {
4960                   dbp = skip_non_spaces (dbp);
4961                   dbp = skip_spaces (dbp);
4962                   L_getit ();
4963                 }
4964             }
4965         }
4966     }
4967 }
4968
4969 \f
4970 /*
4971  * Lua script language parsing
4972  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4973  *
4974  *  "function" and "local function" are tags if they start at column 1.
4975  */
4976 static void
4977 Lua_functions (inf)
4978      FILE *inf;
4979 {
4980   register char *bp;
4981
4982   LOOP_ON_INPUT_LINES (inf, lb, bp)
4983     {
4984       if (bp[0] != 'f' && bp[0] != 'l')
4985         continue;
4986
4987       LOOKING_AT (bp, "local"); /* skip possible "local" */
4988
4989       if (LOOKING_AT (bp, "function"))
4990         get_tag (bp, NULL);
4991     }
4992 }
4993
4994 \f
4995 /*
4996  * Postscript tag functions
4997  * Just look for lines where the first character is '/'
4998  * Also look at "defineps" for PSWrap
4999  * Ideas by:
5000  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
5001  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5002  */
5003 static void
5004 PS_functions (inf)
5005      FILE *inf;
5006 {
5007   register char *bp, *ep;
5008
5009   LOOP_ON_INPUT_LINES (inf, lb, bp)
5010     {
5011       if (bp[0] == '/')
5012         {
5013           for (ep = bp+1;
5014                *ep != '\0' && *ep != ' ' && *ep != '{';
5015                ep++)
5016             continue;
5017           make_tag (bp, ep - bp, TRUE,
5018                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5019         }
5020       else if (LOOKING_AT (bp, "defineps"))
5021         get_tag (bp, NULL);
5022     }
5023 }
5024
5025 \f
5026 /*
5027  * Scheme tag functions
5028  * look for (def... xyzzy
5029  *          (def... (xyzzy
5030  *          (def ... ((...(xyzzy ....
5031  *          (set! xyzzy
5032  * Original code by Ken Haase (1985?)
5033  */
5034
5035 static void
5036 Scheme_functions (inf)
5037      FILE *inf;
5038 {
5039   register char *bp;
5040
5041   LOOP_ON_INPUT_LINES (inf, lb, bp)
5042     {
5043       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5044         {
5045           bp = skip_non_spaces (bp+4);
5046           /* Skip over open parens and white space */
5047           while (notinname (*bp))
5048             bp++;
5049           get_tag (bp, NULL);
5050         }
5051       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5052         get_tag (bp, NULL);
5053     }
5054 }
5055
5056 \f
5057 /* Find tags in TeX and LaTeX input files.  */
5058
5059 /* TEX_toktab is a table of TeX control sequences that define tags.
5060  * Each entry records one such control sequence.
5061  *
5062  * Original code from who knows whom.
5063  * Ideas by:
5064  *   Stefan Monnier (2002)
5065  */
5066
5067 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5068
5069 /* Default set of control sequences to put into TEX_toktab.
5070    The value of environment var TEXTAGS is prepended to this.  */
5071 static char *TEX_defenv = "\
5072 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5073 :part:appendix:entry:index:def\
5074 :newcommand:renewcommand:newenvironment:renewenvironment";
5075
5076 static void TEX_mode __P((FILE *));
5077 static void TEX_decode_env __P((char *, char *));
5078
5079 static char TEX_esc = '\\';
5080 static char TEX_opgrp = '{';
5081 static char TEX_clgrp = '}';
5082
5083 /*
5084  * TeX/LaTeX scanning loop.
5085  */
5086 static void
5087 TeX_commands (inf)
5088      FILE *inf;
5089 {
5090   char *cp;
5091   linebuffer *key;
5092
5093   /* Select either \ or ! as escape character.  */
5094   TEX_mode (inf);
5095
5096   /* Initialize token table once from environment. */
5097   if (TEX_toktab == NULL)
5098     TEX_decode_env ("TEXTAGS", TEX_defenv);
5099
5100   LOOP_ON_INPUT_LINES (inf, lb, cp)
5101     {
5102       /* Look at each TEX keyword in line. */
5103       for (;;)
5104         {
5105           /* Look for a TEX escape. */
5106           while (*cp++ != TEX_esc)
5107             if (cp[-1] == '\0' || cp[-1] == '%')
5108               goto tex_next_line;
5109
5110           for (key = TEX_toktab; key->buffer != NULL; key++)
5111             if (strneq (cp, key->buffer, key->len))
5112               {
5113                 register char *p;
5114                 int namelen, linelen;
5115                 bool opgrp = FALSE;
5116
5117                 cp = skip_spaces (cp + key->len);
5118                 if (*cp == TEX_opgrp)
5119                   {
5120                     opgrp = TRUE;
5121                     cp++;
5122                   }
5123                 for (p = cp;
5124                      (!iswhite (*p) && *p != '#' &&
5125                       *p != TEX_opgrp && *p != TEX_clgrp);
5126                      p++)
5127                   continue;
5128                 namelen = p - cp;
5129                 linelen = lb.len;
5130                 if (!opgrp || *p == TEX_clgrp)
5131                   {
5132                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5133                       *p++;
5134                     linelen = p - lb.buffer + 1;
5135                   }
5136                 make_tag (cp, namelen, TRUE,
5137                           lb.buffer, linelen, lineno, linecharno);
5138                 goto tex_next_line; /* We only tag a line once */
5139               }
5140         }
5141     tex_next_line:
5142       ;
5143     }
5144 }
5145
5146 #define TEX_LESC '\\'
5147 #define TEX_SESC '!'
5148
5149 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5150    chars accordingly. */
5151 static void
5152 TEX_mode (inf)
5153      FILE *inf;
5154 {
5155   int c;
5156
5157   while ((c = getc (inf)) != EOF)
5158     {
5159       /* Skip to next line if we hit the TeX comment char. */
5160       if (c == '%')
5161         while (c != '\n')
5162           c = getc (inf);
5163       else if (c == TEX_LESC || c == TEX_SESC )
5164         break;
5165     }
5166
5167   if (c == TEX_LESC)
5168     {
5169       TEX_esc = TEX_LESC;
5170       TEX_opgrp = '{';
5171       TEX_clgrp = '}';
5172     }
5173   else
5174     {
5175       TEX_esc = TEX_SESC;
5176       TEX_opgrp = '<';
5177       TEX_clgrp = '>';
5178     }
5179   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5180      No attempt is made to correct the situation. */
5181   rewind (inf);
5182 }
5183
5184 /* Read environment and prepend it to the default string.
5185    Build token table. */
5186 static void
5187 TEX_decode_env (evarname, defenv)
5188      char *evarname;
5189      char *defenv;
5190 {
5191   register char *env, *p;
5192   int i, len;
5193
5194   /* Append default string to environment. */
5195   env = getenv (evarname);
5196   if (!env)
5197     env = defenv;
5198   else
5199     {
5200       char *oldenv = env;
5201       env = concat (oldenv, defenv, "");
5202     }
5203
5204   /* Allocate a token table */
5205   for (len = 1, p = env; p;)
5206     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5207       len++;
5208   TEX_toktab = xnew (len, linebuffer);
5209
5210   /* Unpack environment string into token table. Be careful about */
5211   /* zero-length strings (leading ':', "::" and trailing ':') */
5212   for (i = 0; *env != '\0';)
5213     {
5214       p = etags_strchr (env, ':');
5215       if (!p)                   /* End of environment string. */
5216         p = env + strlen (env);
5217       if (p - env > 0)
5218         {                       /* Only non-zero strings. */
5219           TEX_toktab[i].buffer = savenstr (env, p - env);
5220           TEX_toktab[i].len = p - env;
5221           i++;
5222         }
5223       if (*p)
5224         env = p + 1;
5225       else
5226         {
5227           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5228           TEX_toktab[i].len = 0;
5229           break;
5230         }
5231     }
5232 }
5233
5234 \f
5235 /* Texinfo support.  Dave Love, Mar. 2000.  */
5236 static void
5237 Texinfo_nodes (inf)
5238      FILE * inf;
5239 {
5240   char *cp, *start;
5241   LOOP_ON_INPUT_LINES (inf, lb, cp)
5242     if (LOOKING_AT (cp, "@node"))
5243       {
5244         start = cp;
5245         while (*cp != '\0' && *cp != ',')
5246           cp++;
5247         make_tag (start, cp - start, TRUE,
5248                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5249       }
5250 }
5251
5252 \f
5253 /* Similar to LOOKING_AT but does not use notinname, does not skip */
5254 #define LOOKING_AT_NOCASE(cp, kw)       /* kw is a constant string */   \
5255   (strncaseeq ((cp), kw, sizeof(kw)-1)  /* cp points at kw */           \
5256    && ((cp) += sizeof(kw)-1))           /* skip spaces */
5257
5258 /*
5259  * HTML support.
5260  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5261  * Contents of <a name=xxx> are tags with name xxx.
5262  *
5263  * Francesco Potortì, 2002.
5264  */
5265 static void
5266 HTML_labels (inf)
5267      FILE * inf;
5268 {
5269   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5270   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5271   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5272   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5273   char *end;
5274
5275
5276   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5277
5278   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5279     for (;;)                    /* loop on the same line */
5280       {
5281         if (skiptag)            /* skip HTML tag */
5282           {
5283             while (*dbp != '\0' && *dbp != '>')
5284               dbp++;
5285             if (*dbp == '>')
5286               {
5287                 dbp += 1;
5288                 skiptag = FALSE;
5289                 continue;       /* look on the same line */
5290               }
5291             break;              /* go to next line */
5292           }
5293
5294         else if (intag) /* look for "name=" or "id=" */
5295           {
5296             while (*dbp != '\0' && *dbp != '>'
5297                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5298               dbp++;
5299             if (*dbp == '\0')
5300               break;            /* go to next line */
5301             if (*dbp == '>')
5302               {
5303                 dbp += 1;
5304                 intag = FALSE;
5305                 continue;       /* look on the same line */
5306               }
5307             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5308                 || LOOKING_AT_NOCASE (dbp, "id="))
5309               {
5310                 bool quoted = (dbp[0] == '"');
5311
5312                 if (quoted)
5313                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5314                     continue;
5315                 else
5316                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5317                     continue;
5318                 linebuffer_setlen (&token_name, end - dbp);
5319                 strncpy (token_name.buffer, dbp, end - dbp);
5320                 token_name.buffer[end - dbp] = '\0';
5321
5322                 dbp = end;
5323                 intag = FALSE;  /* we found what we looked for */
5324                 skiptag = TRUE; /* skip to the end of the tag */
5325                 getnext = TRUE; /* then grab the text */
5326                 continue;       /* look on the same line */
5327               }
5328             dbp += 1;
5329           }
5330
5331         else if (getnext)       /* grab next tokens and tag them */
5332           {
5333             dbp = skip_spaces (dbp);
5334             if (*dbp == '\0')
5335               break;            /* go to next line */
5336             if (*dbp == '<')
5337               {
5338                 intag = TRUE;
5339                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5340                 continue;       /* look on the same line */
5341               }
5342
5343             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5344               continue;
5345             make_tag (token_name.buffer, token_name.len, TRUE,
5346                       dbp, end - dbp, lineno, linecharno);
5347             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5348             getnext = FALSE;
5349             break;              /* go to next line */
5350           }
5351
5352         else                    /* look for an interesting HTML tag */
5353           {
5354             while (*dbp != '\0' && *dbp != '<')
5355               dbp++;
5356             if (*dbp == '\0')
5357               break;            /* go to next line */
5358             intag = TRUE;
5359             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5360               {
5361                 inanchor = TRUE;
5362                 continue;       /* look on the same line */
5363               }
5364             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5365                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5366                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5367                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5368               {
5369                 intag = FALSE;
5370                 getnext = TRUE;
5371                 continue;       /* look on the same line */
5372               }
5373             dbp += 1;
5374           }
5375       }
5376 }
5377
5378 \f
5379 /*
5380  * Prolog support
5381  *
5382  * Assumes that the predicate or rule starts at column 0.
5383  * Only the first clause of a predicate or rule is added.
5384  * Original code by Sunichirou Sugou (1989)
5385  * Rewritten by Anders Lindgren (1996)
5386  */
5387 static int prolog_pr __P((char *, char *));
5388 static void prolog_skip_comment __P((linebuffer *, FILE *));
5389 static int prolog_atom __P((char *, int));
5390
5391 static void
5392 Prolog_functions (inf)
5393      FILE *inf;
5394 {
5395   char *cp, *last;
5396   int len;
5397   int allocated;
5398
5399   allocated = 0;
5400   len = 0;
5401   last = NULL;
5402
5403   LOOP_ON_INPUT_LINES (inf, lb, cp)
5404     {
5405       if (cp[0] == '\0')        /* Empty line */
5406         continue;
5407       else if (iswhite (cp[0])) /* Not a predicate */
5408         continue;
5409       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5410         prolog_skip_comment (&lb, inf);
5411       else if ((len = prolog_pr (cp, last)) > 0)
5412         {
5413           /* Predicate or rule.  Store the function name so that we
5414              only generate a tag for the first clause.  */
5415           if (last == NULL)
5416             last = xnew(len + 1, char);
5417           else if (len + 1 > allocated)
5418             xrnew (last, len + 1, char);
5419           allocated = len + 1;
5420           strncpy (last, cp, len);
5421           last[len] = '\0';
5422         }
5423     }
5424 }
5425
5426
5427 static void
5428 prolog_skip_comment (plb, inf)
5429      linebuffer *plb;
5430      FILE *inf;
5431 {
5432   char *cp;
5433
5434   do
5435     {
5436       for (cp = plb->buffer; *cp != '\0'; cp++)
5437         if (cp[0] == '*' && cp[1] == '/')
5438           return;
5439       readline (plb, inf);
5440     }
5441   while (!feof(inf));
5442 }
5443
5444 /*
5445  * A predicate or rule definition is added if it matches:
5446  *     <beginning of line><Prolog Atom><whitespace>(
5447  * or  <beginning of line><Prolog Atom><whitespace>:-
5448  *
5449  * It is added to the tags database if it doesn't match the
5450  * name of the previous clause header.
5451  *
5452  * Return the size of the name of the predicate or rule, or 0 if no
5453  * header was found.
5454  */
5455 static int
5456 prolog_pr (s, last)
5457      char *s;
5458      char *last;                /* Name of last clause. */
5459 {
5460   int pos;
5461   int len;
5462
5463   pos = prolog_atom (s, 0);
5464   if (pos < 1)
5465     return 0;
5466
5467   len = pos;
5468   pos = skip_spaces (s + pos) - s;
5469
5470   if ((s[pos] == '.'
5471        || (s[pos] == '(' && (pos += 1))
5472        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5473       && (last == NULL          /* save only the first clause */
5474           || len != strlen (last)
5475           || !strneq (s, last, len)))
5476         {
5477           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5478           return len;
5479         }
5480   else
5481     return 0;
5482 }
5483
5484 /*
5485  * Consume a Prolog atom.
5486  * Return the number of bytes consumed, or -1 if there was an error.
5487  *
5488  * A prolog atom, in this context, could be one of:
5489  * - An alphanumeric sequence, starting with a lower case letter.
5490  * - A quoted arbitrary string. Single quotes can escape themselves.
5491  *   Backslash quotes everything.
5492  */
5493 static int
5494 prolog_atom (s, pos)
5495      char *s;
5496      int pos;
5497 {
5498   int origpos;
5499
5500   origpos = pos;
5501
5502   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5503     {
5504       /* The atom is unquoted. */
5505       pos++;
5506       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5507         {
5508           pos++;
5509         }
5510       return pos - origpos;
5511     }
5512   else if (s[pos] == '\'')
5513     {
5514       pos++;
5515
5516       for (;;)
5517         {
5518           if (s[pos] == '\'')
5519             {
5520               pos++;
5521               if (s[pos] != '\'')
5522                 break;
5523               pos++;            /* A double quote */
5524             }
5525           else if (s[pos] == '\0')
5526             /* Multiline quoted atoms are ignored. */
5527             return -1;
5528           else if (s[pos] == '\\')
5529             {
5530               if (s[pos+1] == '\0')
5531                 return -1;
5532               pos += 2;
5533             }
5534           else
5535             pos++;
5536         }
5537       return pos - origpos;
5538     }
5539   else
5540     return -1;
5541 }
5542
5543 \f
5544 /*
5545  * Support for Erlang
5546  *
5547  * Generates tags for functions, defines, and records.
5548  * Assumes that Erlang functions start at column 0.
5549  * Original code by Anders Lindgren (1996)
5550  */
5551 static int erlang_func __P((char *, char *));
5552 static void erlang_attribute __P((char *));
5553 static int erlang_atom __P((char *));
5554
5555 static void
5556 Erlang_functions (inf)
5557      FILE *inf;
5558 {
5559   char *cp, *last;
5560   int len;
5561   int allocated;
5562
5563   allocated = 0;
5564   len = 0;
5565   last = NULL;
5566
5567   LOOP_ON_INPUT_LINES (inf, lb, cp)
5568     {
5569       if (cp[0] == '\0')        /* Empty line */
5570         continue;
5571       else if (iswhite (cp[0])) /* Not function nor attribute */
5572         continue;
5573       else if (cp[0] == '%')    /* comment */
5574         continue;
5575       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5576         continue;
5577       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5578         {
5579           erlang_attribute (cp);
5580           last = NULL;
5581         }
5582       else if ((len = erlang_func (cp, last)) > 0)
5583         {
5584           /*
5585            * Function.  Store the function name so that we only
5586            * generates a tag for the first clause.
5587            */
5588           if (last == NULL)
5589             last = xnew (len + 1, char);
5590           else if (len + 1 > allocated)
5591             xrnew (last, len + 1, char);
5592           allocated = len + 1;
5593           strncpy (last, cp, len);
5594           last[len] = '\0';
5595         }
5596     }
5597 }
5598
5599
5600 /*
5601  * A function definition is added if it matches:
5602  *     <beginning of line><Erlang Atom><whitespace>(
5603  *
5604  * It is added to the tags database if it doesn't match the
5605  * name of the previous clause header.
5606  *
5607  * Return the size of the name of the function, or 0 if no function
5608  * was found.
5609  */
5610 static int
5611 erlang_func (s, last)
5612      char *s;
5613      char *last;                /* Name of last clause. */
5614 {
5615   int pos;
5616   int len;
5617
5618   pos = erlang_atom (s);
5619   if (pos < 1)
5620     return 0;
5621
5622   len = pos;
5623   pos = skip_spaces (s + pos) - s;
5624
5625   /* Save only the first clause. */
5626   if (s[pos++] == '('
5627       && (last == NULL
5628           || len != (int)strlen (last)
5629           || !strneq (s, last, len)))
5630         {
5631           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5632           return len;
5633         }
5634
5635   return 0;
5636 }
5637
5638
5639 /*
5640  * Handle attributes.  Currently, tags are generated for defines
5641  * and records.
5642  *
5643  * They are on the form:
5644  * -define(foo, bar).
5645  * -define(Foo(M, N), M+N).
5646  * -record(graph, {vtab = notable, cyclic = true}).
5647  */
5648 static void
5649 erlang_attribute (s)
5650      char *s;
5651 {
5652   char *cp = s;
5653
5654   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5655       && *cp++ == '(')
5656     {
5657       int len = erlang_atom (skip_spaces (cp));
5658       if (len > 0)
5659         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5660     }
5661   return;
5662 }
5663
5664
5665 /*
5666  * Consume an Erlang atom (or variable).
5667  * Return the number of bytes consumed, or -1 if there was an error.
5668  */
5669 static int
5670 erlang_atom (s)
5671      char *s;
5672 {
5673   int pos = 0;
5674
5675   if (ISALPHA (s[pos]) || s[pos] == '_')
5676     {
5677       /* The atom is unquoted. */
5678       do
5679         pos++;
5680       while (ISALNUM (s[pos]) || s[pos] == '_');
5681     }
5682   else if (s[pos] == '\'')
5683     {
5684       for (pos++; s[pos] != '\''; pos++)
5685         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5686             || (s[pos] == '\\' && s[++pos] == '\0'))
5687           return 0;
5688       pos++;
5689     }
5690
5691   return pos;
5692 }
5693
5694 \f
5695 #ifdef ETAGS_REGEXPS
5696
5697 static char *scan_separators __P((char *));
5698 static void add_regex __P((char *, language *));
5699 static char *substitute __P((char *, char *, struct re_registers *));
5700
5701 /*
5702  * Take a string like "/blah/" and turn it into "blah", verifying
5703  * that the first and last characters are the same, and handling
5704  * quoted separator characters.  Actually, stops on the occurrence of
5705  * an unquoted separator.  Also process \t, \n, etc. and turn into
5706  * appropriate characters. Works in place.  Null terminates name string.
5707  * Returns pointer to terminating separator, or NULL for
5708  * unterminated regexps.
5709  */
5710 static char *
5711 scan_separators (name)
5712      char *name;
5713 {
5714   char sep = name[0];
5715   char *copyto = name;
5716   bool quoted = FALSE;
5717
5718   for (++name; *name != '\0'; ++name)
5719     {
5720       if (quoted)
5721         {
5722           switch (*name)
5723             {
5724             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5725             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5726             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5727             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5728             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5729             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5730             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5731             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5732             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5733             default:
5734               if (*name == sep)
5735                 *copyto++ = sep;
5736               else
5737                 {
5738                   /* Something else is quoted, so preserve the quote. */
5739                   *copyto++ = '\\';
5740                   *copyto++ = *name;
5741                 }
5742               break;
5743             }
5744           quoted = FALSE;
5745         }
5746       else if (*name == '\\')
5747         quoted = TRUE;
5748       else if (*name == sep)
5749         break;
5750       else
5751         *copyto++ = *name;
5752     }
5753   if (*name != sep)
5754     name = NULL;                /* signal unterminated regexp */
5755
5756   /* Terminate copied string. */
5757   *copyto = '\0';
5758   return name;
5759 }
5760
5761 /* Look at the argument of --regex or --no-regex and do the right
5762    thing.  Same for each line of a regexp file. */
5763 static void
5764 analyse_regex (regex_arg)
5765      char *regex_arg;
5766 {
5767   if (regex_arg == NULL)
5768     {
5769       free_regexps ();          /* --no-regex: remove existing regexps */
5770       return;
5771     }
5772
5773   /* A real --regexp option or a line in a regexp file. */
5774   switch (regex_arg[0])
5775     {
5776       /* Comments in regexp file or null arg to --regex. */
5777     case '\0':
5778     case ' ':
5779     case '\t':
5780       break;
5781
5782       /* Read a regex file.  This is recursive and may result in a
5783          loop, which will stop when the file descriptors are exhausted. */
5784     case '@':
5785       {
5786         FILE *regexfp;
5787         linebuffer regexbuf;
5788         char *regexfile = regex_arg + 1;
5789
5790         /* regexfile is a file containing regexps, one per line. */
5791         regexfp = fopen (regexfile, "r");
5792         if (regexfp == NULL)
5793           {
5794             pfatal (regexfile);
5795             return;
5796           }
5797         linebuffer_init (&regexbuf);
5798         while (readline_internal (&regexbuf, regexfp) > 0)
5799           analyse_regex (regexbuf.buffer);
5800         free (regexbuf.buffer);
5801         fclose (regexfp);
5802       }
5803       break;
5804
5805       /* Regexp to be used for a specific language only. */
5806     case '{':
5807       {
5808         language *lang;
5809         char *lang_name = regex_arg + 1;
5810         char *cp;
5811
5812         for (cp = lang_name; *cp != '}'; cp++)
5813           if (*cp == '\0')
5814             {
5815               error ("unterminated language name in regex: %s", regex_arg);
5816               return;
5817             }
5818         *cp++ = '\0';
5819         lang = get_language_from_langname (lang_name);
5820         if (lang == NULL)
5821           return;
5822         add_regex (cp, lang);
5823       }
5824       break;
5825
5826       /* Regexp to be used for any language. */
5827     default:
5828       add_regex (regex_arg, NULL);
5829       break;
5830     }
5831 }
5832
5833 /* Separate the regexp pattern, compile it,
5834    and care for optional name and modifiers. */
5835 static void
5836 add_regex (regexp_pattern, lang)
5837      char *regexp_pattern;
5838      language *lang;
5839 {
5840   static struct re_pattern_buffer zeropattern;
5841   char sep, *pat, *name, *modifiers;
5842   const char *err;
5843   struct re_pattern_buffer *patbuf;
5844   regexp *rp;
5845   bool
5846     force_explicit_name = TRUE, /* do not use implicit tag names */
5847     ignore_case = FALSE,        /* case is significant */
5848     multi_line = FALSE,         /* matches are done one line at a time */
5849     single_line = FALSE;        /* dot does not match newline */
5850
5851
5852   if (strlen(regexp_pattern) < 3)
5853     {
5854       error ("null regexp", (char *)NULL);
5855       return;
5856     }
5857   sep = regexp_pattern[0];
5858   name = scan_separators (regexp_pattern);
5859   if (name == NULL)
5860     {
5861       error ("%s: unterminated regexp", regexp_pattern);
5862       return;
5863     }
5864   if (name[1] == sep)
5865     {
5866       error ("null name for regexp \"%s\"", regexp_pattern);
5867       return;
5868     }
5869   modifiers = scan_separators (name);
5870   if (modifiers == NULL)        /* no terminating separator --> no name */
5871     {
5872       modifiers = name;
5873       name = "";
5874     }
5875   else
5876     modifiers += 1;             /* skip separator */
5877
5878   /* Parse regex modifiers. */
5879   for (; modifiers[0] != '\0'; modifiers++)
5880     switch (modifiers[0])
5881       {
5882       case 'N':
5883         if (modifiers == name)
5884           error ("forcing explicit tag name but no name, ignoring", NULL);
5885         force_explicit_name = TRUE;
5886         break;
5887       case 'i':
5888         ignore_case = TRUE;
5889         break;
5890       case 's':
5891         single_line = TRUE;
5892         /* FALLTHRU */
5893       case 'm':
5894         multi_line = TRUE;
5895         need_filebuf = TRUE;
5896         break;
5897       default:
5898         {
5899           char wrongmod [2];
5900           wrongmod[0] = modifiers[0];
5901           wrongmod[1] = '\0';
5902           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5903         }
5904         break;
5905       }
5906
5907   patbuf = xnew (1, struct re_pattern_buffer);
5908   *patbuf = zeropattern;
5909   if (ignore_case)
5910     {
5911       static char lc_trans[CHARS];
5912       int i;
5913       for (i = 0; i < CHARS; i++)
5914         lc_trans[i] = lowcase (i);
5915       patbuf->translate = lc_trans;     /* translation table to fold case  */
5916     }
5917
5918   if (multi_line)
5919     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5920   else
5921     pat = regexp_pattern;
5922
5923   if (single_line)
5924     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5925   else
5926     re_set_syntax (RE_SYNTAX_EMACS);
5927
5928   err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5929   if (multi_line)
5930     free (pat);
5931   if (err != NULL)
5932     {
5933       error ("%s while compiling pattern", err);
5934       return;
5935     }
5936
5937   rp = p_head;
5938   p_head = xnew (1, regexp);
5939   p_head->pattern = savestr (regexp_pattern);
5940   p_head->p_next = rp;
5941   p_head->lang = lang;
5942   p_head->pat = patbuf;
5943   p_head->name = savestr (name);
5944   p_head->error_signaled = FALSE;
5945   p_head->force_explicit_name = force_explicit_name;
5946   p_head->ignore_case = ignore_case;
5947   p_head->multi_line = multi_line;
5948 }
5949
5950 /*
5951  * Do the substitutions indicated by the regular expression and
5952  * arguments.
5953  */
5954 static char *
5955 substitute (in, out, regs)
5956      char *in, *out;
5957      struct re_registers *regs;
5958 {
5959   char *result, *t;
5960   int size, dig, diglen;
5961
5962   result = NULL;
5963   size = strlen (out);
5964
5965   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5966   if (out[size - 1] == '\\')
5967     fatal ("pattern error in \"%s\"", out);
5968   for (t = etags_strchr (out, '\\');
5969        t != NULL;
5970        t = etags_strchr (t + 2, '\\'))
5971     if (ISDIGIT (t[1]))
5972       {
5973         dig = t[1] - '0';
5974         diglen = regs->end[dig] - regs->start[dig];
5975         size += diglen - 2;
5976       }
5977     else
5978       size -= 1;
5979
5980   /* Allocate space and do the substitutions. */
5981   assert (size >= 0);
5982   result = xnew (size + 1, char);
5983
5984   for (t = result; *out != '\0'; out++)
5985     if (*out == '\\' && ISDIGIT (*++out))
5986       {
5987         dig = *out - '0';
5988         diglen = regs->end[dig] - regs->start[dig];
5989         strncpy (t, in + regs->start[dig], diglen);
5990         t += diglen;
5991       }
5992     else
5993       *t++ = *out;
5994   *t = '\0';
5995
5996   assert (t <= result + size);
5997   assert (t - result == (int)strlen (result));
5998
5999   return result;
6000 }
6001
6002 /* Deallocate all regexps. */
6003 static void
6004 free_regexps ()
6005 {
6006   regexp *rp;
6007   while (p_head != NULL)
6008     {
6009       rp = p_head->p_next;
6010       free (p_head->pattern);
6011       free (p_head->name);
6012       free (p_head);
6013       p_head = rp;
6014     }
6015   return;
6016 }
6017
6018 /*
6019  * Reads the whole file as a single string from `filebuf' and looks for
6020  * multi-line regular expressions, creating tags on matches.
6021  * readline already dealt with normal regexps.
6022  *
6023  * Idea by Ben Wing <ben@666.com> (2002).
6024  */
6025 static void
6026 regex_tag_multiline ()
6027 {
6028   char *buffer = filebuf.buffer;
6029   regexp *rp;
6030   char *name;
6031
6032   for (rp = p_head; rp != NULL; rp = rp->p_next)
6033     {
6034       int match = 0;
6035
6036       if (!rp->multi_line)
6037         continue;               /* skip normal regexps */
6038
6039       /* Generic initialisations before parsing file from memory. */
6040       lineno = 1;               /* reset global line number */
6041       charno = 0;               /* reset global char number */
6042       linecharno = 0;           /* reset global char number of line start */
6043
6044       /* Only use generic regexps or those for the current language. */
6045       if (rp->lang != NULL && rp->lang != curfdp->lang)
6046         continue;
6047
6048       while (match >= 0 && match < filebuf.len)
6049         {
6050           match = re_search (rp->pat, buffer, filebuf.len, charno,
6051                              filebuf.len - match, &rp->regs);
6052           switch (match)
6053             {
6054             case -2:
6055               /* Some error. */
6056               if (!rp->error_signaled)
6057                 {
6058                   error ("regexp stack overflow while matching \"%s\"",
6059                          rp->pattern);
6060                   rp->error_signaled = TRUE;
6061                 }
6062               break;
6063             case -1:
6064               /* No match. */
6065               break;
6066             default:
6067               if (match == rp->regs.end[0])
6068                 {
6069                   if (!rp->error_signaled)
6070                     {
6071                       error ("regexp matches the empty string: \"%s\"",
6072                              rp->pattern);
6073                       rp->error_signaled = TRUE;
6074                     }
6075                   match = -3;   /* exit from while loop */
6076                   break;
6077                 }
6078
6079               /* Match occurred.  Construct a tag. */
6080               while (charno < rp->regs.end[0])
6081                 if (buffer[charno++] == '\n')
6082                   lineno++, linecharno = charno;
6083               name = rp->name;
6084               if (name[0] == '\0')
6085                 name = NULL;
6086               else /* make a named tag */
6087                 name = substitute (buffer, rp->name, &rp->regs);
6088               if (rp->force_explicit_name)
6089                 /* Force explicit tag name, if a name is there. */
6090                 pfnote (name, TRUE, buffer + linecharno,
6091                         charno - linecharno + 1, lineno, linecharno);
6092               else
6093                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6094                           charno - linecharno + 1, lineno, linecharno);
6095               break;
6096             }
6097         }
6098     }
6099 }
6100
6101 #endif /* ETAGS_REGEXPS */
6102
6103 \f
6104 static bool
6105 nocase_tail (cp)
6106      char *cp;
6107 {
6108   register int len = 0;
6109
6110   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6111     cp++, len++;
6112   if (*cp == '\0' && !intoken (dbp[len]))
6113     {
6114       dbp += len;
6115       return TRUE;
6116     }
6117   return FALSE;
6118 }
6119
6120 static void
6121 get_tag (bp, namepp)
6122      register char *bp;
6123      char **namepp;
6124 {
6125   register char *cp = bp;
6126
6127   if (*bp != '\0')
6128     {
6129       /* Go till you get to white space or a syntactic break */
6130       for (cp = bp + 1; !notinname (*cp); cp++)
6131         continue;
6132       make_tag (bp, cp - bp, TRUE,
6133                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6134     }
6135
6136   if (namepp != NULL)
6137     *namepp = savenstr (bp, cp - bp);
6138 }
6139
6140 /*
6141  * Read a line of text from `stream' into `lbp', excluding the
6142  * newline or CR-NL, if any.  Return the number of characters read from
6143  * `stream', which is the length of the line including the newline.
6144  *
6145  * On DOS or Windows we do not count the CR character, if any before the
6146  * NL, in the returned length; this mirrors the behavior of Emacs on those
6147  * platforms (for text files, it translates CR-NL to NL as it reads in the
6148  * file).
6149  *
6150  * If multi-line regular expressions are requested, each line read is
6151  * appended to `filebuf'.
6152  */
6153 static long
6154 readline_internal (lbp, stream)
6155      linebuffer *lbp;
6156      register FILE *stream;
6157 {
6158   char *buffer = lbp->buffer;
6159   register char *p = lbp->buffer;
6160   register char *pend;
6161   int chars_deleted;
6162
6163   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6164
6165   for (;;)
6166     {
6167       register int c = getc (stream);
6168       if (p == pend)
6169         {
6170           /* We're at the end of linebuffer: expand it. */
6171           lbp->size *= 2;
6172           xrnew (buffer, lbp->size, char);
6173           p += buffer - lbp->buffer;
6174           pend = buffer + lbp->size;
6175           lbp->buffer = buffer;
6176         }
6177       if (c == EOF)
6178         {
6179           *p = '\0';
6180           chars_deleted = 0;
6181           break;
6182         }
6183       if (c == '\n')
6184         {
6185           if (p > buffer && p[-1] == '\r')
6186             {
6187               p -= 1;
6188 #ifdef DOS_NT
6189              /* Assume CRLF->LF translation will be performed by Emacs
6190                 when loading this file, so CRs won't appear in the buffer.
6191                 It would be cleaner to compensate within Emacs;
6192                 however, Emacs does not know how many CRs were deleted
6193                 before any given point in the file.  */
6194               chars_deleted = 1;
6195 #else
6196               chars_deleted = 2;
6197 #endif
6198             }
6199           else
6200             {
6201               chars_deleted = 1;
6202             }
6203           *p = '\0';
6204           break;
6205         }
6206       *p++ = c;
6207     }
6208   lbp->len = p - buffer;
6209
6210   if (need_filebuf              /* we need filebuf for multi-line regexps */
6211       && chars_deleted > 0)     /* not at EOF */
6212     {
6213       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6214         {
6215           /* Expand filebuf. */
6216           filebuf.size *= 2;
6217           xrnew (filebuf.buffer, filebuf.size, char);
6218         }
6219       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6220       filebuf.len += lbp->len;
6221       filebuf.buffer[filebuf.len++] = '\n';
6222       filebuf.buffer[filebuf.len] = '\0';
6223     }
6224
6225   return lbp->len + chars_deleted;
6226 }
6227
6228 /*
6229  * Like readline_internal, above, but in addition try to match the
6230  * input line against relevant regular expressions and manage #line
6231  * directives.
6232  */
6233 static void
6234 readline (lbp, stream)
6235      linebuffer *lbp;
6236      FILE *stream;
6237 {
6238   long result;
6239
6240   linecharno = charno;          /* update global char number of line start */
6241   result = readline_internal (lbp, stream); /* read line */
6242   lineno += 1;                  /* increment global line number */
6243   charno += result;             /* increment global char number */
6244
6245   /* Honour #line directives. */
6246   if (!no_line_directive)
6247     {
6248       static bool discard_until_line_directive;
6249
6250       /* Check whether this is a #line directive. */
6251       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6252         {
6253           int start, lno;
6254
6255           if (DEBUG) start = 0; /* shut up the compiler */
6256           if (sscanf (lbp->buffer, "#line %d \"%n", &lno, &start) == 1)
6257             {
6258               char *endp = lbp->buffer + start;
6259
6260               assert (start > 0);
6261               while ((endp = etags_strchr (endp, '"')) != NULL
6262                      && endp[-1] == '\\')
6263                 endp++;
6264               if (endp != NULL)
6265                 /* Ok, this is a real #line directive.  Let's deal with it. */
6266                 {
6267                   char *taggedabsname;  /* absolute name of original file */
6268                   char *taggedfname;    /* name of original file as given */
6269                   char *name;           /* temp var */
6270
6271                   discard_until_line_directive = FALSE; /* found it */
6272                   name = lbp->buffer + start;
6273                   *endp = '\0';
6274                   canonicalize_filename (name); /* for DOS */
6275                   taggedabsname = absolute_filename (name, curfdp->infabsdir);
6276                   if (filename_is_absolute (name)
6277                       || filename_is_absolute (curfdp->infname))
6278                     taggedfname = savestr (taggedabsname);
6279                   else
6280                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6281
6282                   if (streq (curfdp->taggedfname, taggedfname))
6283                     /* The #line directive is only a line number change.  We
6284                        deal with this afterwards. */
6285                     free (taggedfname);
6286                   else
6287                     /* The tags following this #line directive should be
6288                        attributed to taggedfname.  In order to do this, set
6289                        curfdp accordingly. */
6290                     {
6291                       fdesc *fdp; /* file description pointer */
6292
6293                       /* Go look for a file description already set up for the
6294                          file indicated in the #line directive.  If there is
6295                          one, use it from now until the next #line
6296                          directive. */
6297                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6298                         if (streq (fdp->infname, curfdp->infname)
6299                             && streq (fdp->taggedfname, taggedfname))
6300                           /* If we remove the second test above (after the &&)
6301                              then all entries pertaining to the same file are
6302                              coalesced in the tags file.  If we use it, then
6303                              entries pertaining to the same file but generated
6304                              from different files (via #line directives) will
6305                              go into separate sections in the tags file.  These
6306                              alternatives look equivalent.  The first one
6307                              destroys some apparently useless information. */
6308                           {
6309                             curfdp = fdp;
6310                             free (taggedfname);
6311                             break;
6312                           }
6313                       /* Else, if we already tagged the real file, skip all
6314                          input lines until the next #line directive. */
6315                       if (fdp == NULL) /* not found */
6316                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6317                           if (streq (fdp->infabsname, taggedabsname))
6318                             {
6319                               discard_until_line_directive = TRUE;
6320                               free (taggedfname);
6321                               break;
6322                             }
6323                       /* Else create a new file description and use that from
6324                          now on, until the next #line directive. */
6325                       if (fdp == NULL) /* not found */
6326                         {
6327                           fdp = fdhead;
6328                           fdhead = xnew (1, fdesc);
6329                           *fdhead = *curfdp; /* copy curr. file description */
6330                           fdhead->next = fdp;
6331                           fdhead->infname = savestr (curfdp->infname);
6332                           fdhead->infabsname = savestr (curfdp->infabsname);
6333                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6334                           fdhead->taggedfname = taggedfname;
6335                           fdhead->usecharno = FALSE;
6336                           fdhead->prop = NULL;
6337                           fdhead->written = FALSE;
6338                           curfdp = fdhead;
6339                         }
6340                     }
6341                   free (taggedabsname);
6342                   lineno = lno - 1;
6343                   readline (lbp, stream);
6344                   return;
6345                 } /* if a real #line directive */
6346             } /* if #line is followed by a a number */
6347         } /* if line begins with "#line " */
6348
6349       /* If we are here, no #line directive was found. */
6350       if (discard_until_line_directive)
6351         {
6352           if (result > 0)
6353             {
6354               /* Do a tail recursion on ourselves, thus discarding the contents
6355                  of the line buffer. */
6356               readline (lbp, stream);
6357               return;
6358             }
6359           /* End of file. */
6360           discard_until_line_directive = FALSE;
6361           return;
6362         }
6363     } /* if #line directives should be considered */
6364
6365 #ifdef ETAGS_REGEXPS
6366   {
6367     int match;
6368     regexp *rp;
6369     char *name;
6370
6371     /* Match against relevant regexps. */
6372     if (lbp->len > 0)
6373       for (rp = p_head; rp != NULL; rp = rp->p_next)
6374         {
6375           /* Only use generic regexps or those for the current language.
6376              Also do not use multiline regexps, which is the job of
6377              regex_tag_multiline. */
6378           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6379               || rp->multi_line)
6380             continue;
6381
6382           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6383           switch (match)
6384             {
6385             case -2:
6386               /* Some error. */
6387               if (!rp->error_signaled)
6388                 {
6389                   error ("regexp stack overflow while matching \"%s\"",
6390                          rp->pattern);
6391                   rp->error_signaled = TRUE;
6392                 }
6393               break;
6394             case -1:
6395               /* No match. */
6396               break;
6397             case 0:
6398               /* Empty string matched. */
6399               if (!rp->error_signaled)
6400                 {
6401                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6402                   rp->error_signaled = TRUE;
6403                 }
6404               break;
6405             default:
6406               /* Match occurred.  Construct a tag. */
6407               name = rp->name;
6408               if (name[0] == '\0')
6409                 name = NULL;
6410               else /* make a named tag */
6411                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6412               if (rp->force_explicit_name)
6413                 /* Force explicit tag name, if a name is there. */
6414                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6415               else
6416                 make_tag (name, strlen (name), TRUE,
6417                           lbp->buffer, match, lineno, linecharno);
6418               break;
6419             }
6420         }
6421   }
6422 #endif /* ETAGS_REGEXPS */
6423 }
6424
6425 \f
6426 /*
6427  * Return a pointer to a space of size strlen(cp)+1 allocated
6428  * with xnew where the string CP has been copied.
6429  */
6430 static char *
6431 savestr (cp)
6432      char *cp;
6433 {
6434   return savenstr (cp, strlen (cp));
6435 }
6436
6437 /*
6438  * Return a pointer to a space of size LEN+1 allocated with xnew where
6439  * the string CP has been copied for at most the first LEN characters.
6440  */
6441 static char *
6442 savenstr (cp, len)
6443      char *cp;
6444      int len;
6445 {
6446   register char *dp;
6447
6448   dp = xnew (len + 1, char);
6449   strncpy (dp, cp, len);
6450   dp[len] = '\0';
6451   return dp;
6452 }
6453
6454 /*
6455  * Return the ptr in sp at which the character c last
6456  * appears; NULL if not found
6457  *
6458  * Identical to POSIX strrchr, included for portability.
6459  */
6460 static char *
6461 etags_strrchr (sp, c)
6462      register const char *sp;
6463      register int c;
6464 {
6465   register const char *r;
6466
6467   r = NULL;
6468   do
6469     {
6470       if (*sp == c)
6471         r = sp;
6472   } while (*sp++);
6473   return (char *)r;
6474 }
6475
6476 /*
6477  * Return the ptr in sp at which the character c first
6478  * appears; NULL if not found
6479  *
6480  * Identical to POSIX strchr, included for portability.
6481  */
6482 static char *
6483 etags_strchr (sp, c)
6484      register const char *sp;
6485      register int c;
6486 {
6487   do
6488     {
6489       if (*sp == c)
6490         return (char *)sp;
6491     } while (*sp++);
6492   return NULL;
6493 }
6494
6495 /*
6496  * Compare two strings, ignoring case for alphabetic characters.
6497  *
6498  * Same as BSD's strcasecmp, included for portability.
6499  */
6500 static int
6501 etags_strcasecmp (s1, s2)
6502      register const char *s1;
6503      register const char *s2;
6504 {
6505   while (*s1 != '\0'
6506          && (ISALPHA (*s1) && ISALPHA (*s2)
6507              ? lowcase (*s1) == lowcase (*s2)
6508              : *s1 == *s2))
6509     s1++, s2++;
6510
6511   return (ISALPHA (*s1) && ISALPHA (*s2)
6512           ? lowcase (*s1) - lowcase (*s2)
6513           : *s1 - *s2);
6514 }
6515
6516 /*
6517  * Compare two strings, ignoring case for alphabetic characters.
6518  * Stop after a given number of characters
6519  *
6520  * Same as BSD's strncasecmp, included for portability.
6521  */
6522 static int
6523 etags_strncasecmp (s1, s2, n)
6524      register const char *s1;
6525      register const char *s2;
6526      register int n;
6527 {
6528   while (*s1 != '\0' && n-- > 0
6529          && (ISALPHA (*s1) && ISALPHA (*s2)
6530              ? lowcase (*s1) == lowcase (*s2)
6531              : *s1 == *s2))
6532     s1++, s2++;
6533
6534   if (n < 0)
6535     return 0;
6536   else
6537     return (ISALPHA (*s1) && ISALPHA (*s2)
6538             ? lowcase (*s1) - lowcase (*s2)
6539             : *s1 - *s2);
6540 }
6541
6542 /* Skip spaces, return new pointer. */
6543 static char *
6544 skip_spaces (cp)
6545      char *cp;
6546 {
6547   while (iswhite (*cp))
6548     cp++;
6549   return cp;
6550 }
6551
6552 /* Skip non spaces, return new pointer. */
6553 static char *
6554 skip_non_spaces (cp)
6555      char *cp;
6556 {
6557   while (*cp != '\0' && !iswhite (*cp))
6558     cp++;
6559   return cp;
6560 }
6561
6562 /* Print error message and exit.  */
6563 void
6564 fatal (s1, s2)
6565      char *s1, *s2;
6566 {
6567   error (s1, s2);
6568   exit (EXIT_FAILURE);
6569 }
6570
6571 static void
6572 pfatal (s1)
6573      char *s1;
6574 {
6575   perror (s1);
6576   exit (EXIT_FAILURE);
6577 }
6578
6579 static void
6580 suggest_asking_for_help ()
6581 {
6582   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6583            progname, LONG_OPTIONS ? "--help" : "-h");
6584   exit (EXIT_FAILURE);
6585 }
6586
6587 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6588 static void
6589 error (s1, s2)
6590      const char *s1, *s2;
6591 {
6592   fprintf (stderr, "%s: ", progname);
6593   fprintf (stderr, s1, s2);
6594   fprintf (stderr, "\n");
6595 }
6596
6597 /* Return a newly-allocated string whose contents
6598    concatenate those of s1, s2, s3.  */
6599 static char *
6600 concat (s1, s2, s3)
6601      char *s1, *s2, *s3;
6602 {
6603   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6604   char *result = xnew (len1 + len2 + len3 + 1, char);
6605
6606   strcpy (result, s1);
6607   strcpy (result + len1, s2);
6608   strcpy (result + len1 + len2, s3);
6609   result[len1 + len2 + len3] = '\0';
6610
6611   return result;
6612 }
6613
6614 \f
6615 /* Does the same work as the system V getcwd, but does not need to
6616    guess the buffer size in advance. */
6617 static char *
6618 etags_getcwd ()
6619 {
6620 #ifdef HAVE_GETCWD
6621   int bufsize = 200;
6622   char *path = xnew (bufsize, char);
6623
6624   while (getcwd (path, bufsize) == NULL)
6625     {
6626       if (errno != ERANGE)
6627         pfatal ("getcwd");
6628       bufsize *= 2;
6629       free (path);
6630       path = xnew (bufsize, char);
6631     }
6632
6633   canonicalize_filename (path);
6634   return path;
6635
6636 #else /* not HAVE_GETCWD */
6637 #if MSDOS
6638
6639   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6640
6641   getwd (path);
6642
6643   for (p = path; *p != '\0'; p++)
6644     if (*p == '\\')
6645       *p = '/';
6646     else
6647       *p = lowcase (*p);
6648
6649   return strdup (path);
6650 #else /* not MSDOS */
6651   linebuffer path;
6652   FILE *pipe;
6653
6654   linebuffer_init (&path);
6655   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6656   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6657     pfatal ("pwd");
6658   pclose (pipe);
6659
6660   return path.buffer;
6661 #endif /* not MSDOS */
6662 #endif /* not HAVE_GETCWD */
6663 }
6664
6665 /* Return a newly allocated string containing the file name of FILE
6666    relative to the absolute directory DIR (which should end with a slash). */
6667 static char *
6668 relative_filename (file, dir)
6669      char *file, *dir;
6670 {
6671   char *fp, *dp, *afn, *res;
6672   int i;
6673
6674   /* Find the common root of file and dir (with a trailing slash). */
6675   afn = absolute_filename (file, cwd);
6676   fp = afn;
6677   dp = dir;
6678   while (*fp++ == *dp++)
6679     continue;
6680   fp--, dp--;                   /* back to the first differing char */
6681 #ifdef DOS_NT
6682   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6683     return afn;
6684 #endif
6685   do                            /* look at the equal chars until '/' */
6686     fp--, dp--;
6687   while (*fp != '/');
6688
6689   /* Build a sequence of "../" strings for the resulting relative file name. */
6690   i = 0;
6691   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6692     i += 1;
6693   res = xnew (3*i + strlen (fp + 1) + 1, char);
6694   res[0] = '\0';
6695   while (i-- > 0)
6696     strcat (res, "../");
6697
6698   /* Add the file name relative to the common root of file and dir. */
6699   strcat (res, fp + 1);
6700   free (afn);
6701
6702   return res;
6703 }
6704
6705 /* Return a newly allocated string containing the absolute file name
6706    of FILE given DIR (which should end with a slash). */
6707 static char *
6708 absolute_filename (file, dir)
6709      char *file, *dir;
6710 {
6711   char *slashp, *cp, *res;
6712
6713   if (filename_is_absolute (file))
6714     res = savestr (file);
6715 #ifdef DOS_NT
6716   /* We don't support non-absolute file names with a drive
6717      letter, like `d:NAME' (it's too much hassle).  */
6718   else if (file[1] == ':')
6719     fatal ("%s: relative file names with drive letters not supported", file);
6720 #endif
6721   else
6722     res = concat (dir, file, "");
6723
6724   /* Delete the "/dirname/.." and "/." substrings. */
6725   slashp = etags_strchr (res, '/');
6726   while (slashp != NULL && slashp[0] != '\0')
6727     {
6728       if (slashp[1] == '.')
6729         {
6730           if (slashp[2] == '.'
6731               && (slashp[3] == '/' || slashp[3] == '\0'))
6732             {
6733               cp = slashp;
6734               do
6735                 cp--;
6736               while (cp >= res && !filename_is_absolute (cp));
6737               if (cp < res)
6738                 cp = slashp;    /* the absolute name begins with "/.." */
6739 #ifdef DOS_NT
6740               /* Under MSDOS and NT we get `d:/NAME' as absolute
6741                  file name, so the luser could say `d:/../NAME'.
6742                  We silently treat this as `d:/NAME'.  */
6743               else if (cp[0] != '/')
6744                 cp = slashp;
6745 #endif
6746               strcpy (cp, slashp + 3);
6747               slashp = cp;
6748               continue;
6749             }
6750           else if (slashp[2] == '/' || slashp[2] == '\0')
6751             {
6752               strcpy (slashp, slashp + 2);
6753               continue;
6754             }
6755         }
6756
6757       slashp = etags_strchr (slashp + 1, '/');
6758     }
6759
6760   if (res[0] == '\0')
6761     return savestr ("/");
6762   else
6763     return res;
6764 }
6765
6766 /* Return a newly allocated string containing the absolute
6767    file name of dir where FILE resides given DIR (which should
6768    end with a slash). */
6769 static char *
6770 absolute_dirname (file, dir)
6771      char *file, *dir;
6772 {
6773   char *slashp, *res;
6774   char save;
6775
6776   canonicalize_filename (file);
6777   slashp = etags_strrchr (file, '/');
6778   if (slashp == NULL)
6779     return savestr (dir);
6780   save = slashp[1];
6781   slashp[1] = '\0';
6782   res = absolute_filename (file, dir);
6783   slashp[1] = save;
6784
6785   return res;
6786 }
6787
6788 /* Whether the argument string is an absolute file name.  The argument
6789    string must have been canonicalized with canonicalize_filename. */
6790 static bool
6791 filename_is_absolute (fn)
6792      char *fn;
6793 {
6794   return (fn[0] == '/'
6795 #ifdef DOS_NT
6796           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6797 #endif
6798           );
6799 }
6800
6801 /* Translate backslashes into slashes.  Works in place. */
6802 static void
6803 canonicalize_filename (fn)
6804      register char *fn;
6805 {
6806 #ifdef DOS_NT
6807   /* Canonicalize drive letter case.  */
6808   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6809     fn[0] = upcase (fn[0]);
6810   /* Convert backslashes to slashes.  */
6811   for (; *fn != '\0'; fn++)
6812     if (*fn == '\\')
6813       *fn = '/';
6814 #else
6815   /* No action. */
6816   fn = NULL;                    /* shut up the compiler */
6817 #endif
6818 }
6819
6820 \f
6821 /* Initialize a linebuffer for use */
6822 static void
6823 linebuffer_init (lbp)
6824      linebuffer *lbp;
6825 {
6826   lbp->size = (DEBUG) ? 3 : 200;
6827   lbp->buffer = xnew (lbp->size, char);
6828   lbp->buffer[0] = '\0';
6829   lbp->len = 0;
6830 }
6831
6832 /* Set the minimum size of a string contained in a linebuffer. */
6833 static void
6834 linebuffer_setlen (lbp, toksize)
6835      linebuffer *lbp;
6836      int toksize;
6837 {
6838   while (lbp->size <= toksize)
6839     {
6840       lbp->size *= 2;
6841       xrnew (lbp->buffer, lbp->size, char);
6842     }
6843   lbp->len = toksize;
6844 }
6845
6846 /* Like malloc but get fatal error if memory is exhausted. */
6847 static PTR
6848 xmalloc (size)
6849      unsigned int size;
6850 {
6851   PTR result = (PTR) malloc (size);
6852   if (result == NULL)
6853     fatal ("virtual memory exhausted", (char *)NULL);
6854   return result;
6855 }
6856
6857 static PTR
6858 xrealloc (ptr, size)
6859      char *ptr;
6860      unsigned int size;
6861 {
6862   PTR result = (PTR) realloc (ptr, size);
6863   if (result == NULL)
6864     fatal ("virtual memory exhausted", (char *)NULL);
6865   return result;
6866 }
6867
6868 /*
6869  * Local Variables:
6870  * c-indentation-style: gnu
6871  * indent-tabs-mode: t
6872  * tab-width: 8
6873  * fill-column: 79
6874  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6875  * End:
6876  */
6877
6878 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6879    (do not change this comment) */
6880
6881 /* etags.c ends here */