code.delx.au - gnu-emacs/blob - lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2    Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995,
   3                  1998, 1999, 2000, 2001, 2002, 2003, 2004,
   4                  2005 Free Software Foundation, Inc. and Ken Arnold
   5
   6  This file is not considered part of GNU Emacs.
   7
   8  This program is free software; you can redistribute it and/or modify
   9  it under the terms of the GNU General Public License as published by
  10  the Free Software Foundation; either version 2 of the License, or
  11  (at your option) any later version.
  12
  13  This program is distributed in the hope that it will be useful,
  14  but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  GNU General Public License for more details.
  17
  18  You should have received a copy of the GNU General Public License
  19  along with this program; if not, write to the Free Software Foundation,
  20  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
  21
  22 /*
  23  * Authors:
  24  *      Ctags originally by Ken Arnold.
  25  *      Fortran added by Jim Kleckner.
  26  *      Ed Pelegri-Llopart added C typedefs.
  27  *      Gnu Emacs TAGS format and modifications by RMS?
  28  * 1989 Sam Kendall added C++.
  29  * 1992 Joseph B. Wells improved C and C++ parsing.
  30  * 1993 Francesco Potortì reorganised C and C++.
  31  * 1994 Line-by-line regexp tags by Tom Tromey.
  32  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  33  * 2002 #line directives by Francesco Potortì.
  34  *
  35  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  36  */
  37
  38 /*
  39  * If you want to add support for a new language, start by looking at the LUA
  40  * language, which is the simplest.  Alternatively, consider shipping a
  41  * configuration file containing regexp definitions for etags.
  42  */
  43
  44 char pot_etags_version[] = "@(#) pot revision number is 17.14";
  45
  46 #define TRUE    1
  47 #define FALSE   0
  48
  49 #ifdef DEBUG
  50 #  undef DEBUG
  51 #  define DEBUG TRUE
  52 #else
  53 #  define DEBUG  FALSE
  54 #  define NDEBUG                /* disable assert */
  55 #endif
  56
  57 #ifdef HAVE_CONFIG_H
  58 # include <config.h>
  59   /* On some systems, Emacs defines static as nothing for the sake
  60      of unexec.  We don't want that here since we don't use unexec. */
  61 # undef static
  62 # define ETAGS_REGEXPS          /* use the regexp features */
  63 # define LONG_OPTIONS           /* accept long options */
  64 # ifndef PTR                    /* for Xemacs */
  65 #   define PTR void *
  66 # endif
  67 # ifndef __P                    /* for Xemacs */
  68 #   define __P(args) args
  69 # endif
  70 #else  /* no config.h */
  71 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
  72 #   define __P(args) args       /* use prototypes */
  73 #   define PTR void *           /* for generic pointers */
  74 # else /* not standard C */
  75 #   define __P(args) ()         /* no prototypes */
  76 #   define const                /* remove const for old compilers' sake */
  77 #   define PTR long *           /* don't use void* */
  78 # endif
  79 #endif /* !HAVE_CONFIG_H */
  80
  81 #ifndef _GNU_SOURCE
  82 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  83 #endif
  84
  85 #ifdef LONG_OPTIONS
  86 #  undef LONG_OPTIONS
  87 #  define LONG_OPTIONS TRUE
  88 #else
  89 #  define LONG_OPTIONS  FALSE
  90 #endif
  91
  92 /* WIN32_NATIVE is for Xemacs.
  93    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
  94 #ifdef WIN32_NATIVE
  95 # undef MSDOS
  96 # undef  WINDOWSNT
  97 # define WINDOWSNT
  98 #endif /* WIN32_NATIVE */
  99
 100 #ifdef MSDOS
 101 # undef MSDOS
 102 # define MSDOS TRUE
 103 # include <fcntl.h>
 104 # include <sys/param.h>
 105 # include <io.h>
 106 # ifndef HAVE_CONFIG_H
 107 #   define DOS_NT
 108 #   include <sys/config.h>
 109 # endif
 110 #else
 111 # define MSDOS FALSE
 112 #endif /* MSDOS */
 113
 114 #ifdef WINDOWSNT
 115 # include <stdlib.h>
 116 # include <fcntl.h>
 117 # include <string.h>
 118 # include <direct.h>
 119 # include <io.h>
 120 # define MAXPATHLEN _MAX_PATH
 121 # undef HAVE_NTGUI
 122 # undef  DOS_NT
 123 # define DOS_NT
 124 # ifndef HAVE_GETCWD
 125 #   define HAVE_GETCWD
 126 # endif /* undef HAVE_GETCWD */
 127 #else /* not WINDOWSNT */
 128 # ifdef STDC_HEADERS
 129 #  include <stdlib.h>
 130 #  include <string.h>
 131 # else /* no standard C headers */
 132     extern char *getenv ();
 133 #  ifdef VMS
 134 #   define EXIT_SUCCESS 1
 135 #   define EXIT_FAILURE 0
 136 #  else /* no VMS */
 137 #   define EXIT_SUCCESS 0
 138 #   define EXIT_FAILURE 1
 139 #  endif
 140 # endif
 141 #endif /* !WINDOWSNT */
 142
 143 #ifdef HAVE_UNISTD_H
 144 # include <unistd.h>
 145 #else
 146 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 147     extern char *getcwd (char *buf, size_t size);
 148 # endif
 149 #endif /* HAVE_UNISTD_H */
 150
 151 #include <stdio.h>
 152 #include <ctype.h>
 153 #include <errno.h>
 154 #ifndef errno
 155   extern int errno;
 156 #endif
 157 #include <sys/types.h>
 158 #include <sys/stat.h>
 159
 160 #include <assert.h>
 161 #ifdef NDEBUG
 162 # undef  assert                 /* some systems have a buggy assert.h */
 163 # define assert(x) ((void) 0)
 164 #endif
 165
 166 #if !defined (S_ISREG) && defined (S_IFREG)
 167 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 168 #endif
 169
 170 #if LONG_OPTIONS
 171 # include <getopt.h>
 172 #else
 173 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 174   extern char *optarg;
 175   extern int optind, opterr;
 176 #endif /* LONG_OPTIONS */
 177
 178 #ifdef ETAGS_REGEXPS
 179 # ifndef HAVE_CONFIG_H          /* this is a standalone compilation */
 180 #   ifdef __CYGWIN__            /* compiling on Cygwin */
 181                              !!! NOTICE !!!
 182  the regex.h distributed with Cygwin is not compatible with etags, alas!
 183 If you want regular expression support, you should delete this notice and
 184               arrange to use the GNU regex.h and regex.c.
 185 #   endif
 186 # endif
 187 # include <regex.h>
 188 #endif /* ETAGS_REGEXPS */
 189
 190 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 191  Leave it undefined to make the program "etags", which makes emacs-style
 192  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 193 #ifdef CTAGS
 194 # undef  CTAGS
 195 # define CTAGS TRUE
 196 #else
 197 # define CTAGS FALSE
 198 #endif
 199
 200 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 201 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 202 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 203 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 204
 205 #define CHARS 256               /* 2^sizeof(char) */
 206 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 207 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 208 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 209 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 210 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 211 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 212
 213 #define ISALNUM(c)      isalnum (CHAR(c))
 214 #define ISALPHA(c)      isalpha (CHAR(c))
 215 #define ISDIGIT(c)      isdigit (CHAR(c))
 216 #define ISLOWER(c)      islower (CHAR(c))
 217
 218 #define lowcase(c)      tolower (CHAR(c))
 219 #define upcase(c)       toupper (CHAR(c))
 220
 221
 222 /*
 223  *      xnew, xrnew -- allocate, reallocate storage
 224  *
 225  * SYNOPSIS:    Type *xnew (int n, Type);
 226  *              void xrnew (OldPointer, int n, Type);
 227  */
 228 #if DEBUG
 229 # include "chkmalloc.h"
 230 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 231                                                   (n) * sizeof (Type)))
 232 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 233                                         (char *) (op), (n) * sizeof (Type)))
 234 #else
 235 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 236 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 237                                         (char *) (op), (n) * sizeof (Type)))
 238 #endif
 239
 240 #define bool int
 241
 242 typedef void Lang_function __P((FILE *));
 243
 244 typedef struct
 245 {
 246   char *suffix;                 /* file name suffix for this compressor */
 247   char *command;                /* takes one arg and decompresses to stdout */
 248 } compressor;
 249
 250 typedef struct
 251 {
 252   char *name;                   /* language name */
 253   char *help;                   /* detailed help for the language */
 254   Lang_function *function;      /* parse function */
 255   char **suffixes;              /* name suffixes of this language's files */
 256   char **filenames;             /* names of this language's files */
 257   char **interpreters;          /* interpreters for this language */
 258   bool metasource;              /* source used to generate other sources */
 259 } language;
 260
 261 typedef struct fdesc
 262 {
 263   struct fdesc *next;           /* for the linked list */
 264   char *infname;                /* uncompressed input file name */
 265   char *infabsname;             /* absolute uncompressed input file name */
 266   char *infabsdir;              /* absolute dir of input file */
 267   char *taggedfname;            /* file name to write in tagfile */
 268   language *lang;               /* language of file */
 269   char *prop;                   /* file properties to write in tagfile */
 270   bool usecharno;               /* etags tags shall contain char number */
 271   bool written;                 /* entry written in the tags file */
 272 } fdesc;
 273
 274 typedef struct node_st
 275 {                               /* sorting structure */
 276   struct node_st *left, *right; /* left and right sons */
 277   fdesc *fdp;                   /* description of file to whom tag belongs */
 278   char *name;                   /* tag name */
 279   char *regex;                  /* search regexp */
 280   bool valid;                   /* write this tag on the tag file */
 281   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 282   bool been_warned;             /* warning already given for duplicated tag */
 283   int lno;                      /* line number tag is on */
 284   long cno;                     /* character number line starts on */
 285 } node;
 286
 287 /*
 288  * A `linebuffer' is a structure which holds a line of text.
 289  * `readline_internal' reads a line from a stream into a linebuffer
 290  * and works regardless of the length of the line.
 291  * SIZE is the size of BUFFER, LEN is the length of the string in
 292  * BUFFER after readline reads it.
 293  */
 294 typedef struct
 295 {
 296   long size;
 297   int len;
 298   char *buffer;
 299 } linebuffer;
 300
 301 /* Used to support mixing of --lang and file names. */
 302 typedef struct
 303 {
 304   enum {
 305     at_language,                /* a language specification */
 306     at_regexp,                  /* a regular expression */
 307     at_filename,                /* a file name */
 308     at_stdin,                   /* read from stdin here */
 309     at_end                      /* stop parsing the list */
 310   } arg_type;                   /* argument type */
 311   language *lang;               /* language associated with the argument */
 312   char *what;                   /* the argument itself */
 313 } argument;
 314
 315 #ifdef ETAGS_REGEXPS
 316 /* Structure defining a regular expression. */
 317 typedef struct regexp
 318 {
 319   struct regexp *p_next;        /* pointer to next in list */
 320   language *lang;               /* if set, use only for this language */
 321   char *pattern;                /* the regexp pattern */
 322   char *name;                   /* tag name */
 323   struct re_pattern_buffer *pat; /* the compiled pattern */
 324   struct re_registers regs;     /* re registers */
 325   bool error_signaled;          /* already signaled for this regexp */
 326   bool force_explicit_name;     /* do not allow implict tag name */
 327   bool ignore_case;             /* ignore case when matching */
 328   bool multi_line;              /* do a multi-line match on the whole file */
 329 } regexp;
 330 #endif /* ETAGS_REGEXPS */
 331
 332
 333 /* Many compilers barf on this:
 334         Lang_function Ada_funcs;
 335    so let's write it this way */
 336 static void Ada_funcs __P((FILE *));
 337 static void Asm_labels __P((FILE *));
 338 static void C_entries __P((int c_ext, FILE *));
 339 static void default_C_entries __P((FILE *));
 340 static void plain_C_entries __P((FILE *));
 341 static void Cjava_entries __P((FILE *));
 342 static void Cobol_paragraphs __P((FILE *));
 343 static void Cplusplus_entries __P((FILE *));
 344 static void Cstar_entries __P((FILE *));
 345 static void Erlang_functions __P((FILE *));
 346 static void Forth_words __P((FILE *));
 347 static void Fortran_functions __P((FILE *));
 348 static void HTML_labels __P((FILE *));
 349 static void Lisp_functions __P((FILE *));
 350 static void Lua_functions __P((FILE *));
 351 static void Makefile_targets __P((FILE *));
 352 static void Pascal_functions __P((FILE *));
 353 static void Perl_functions __P((FILE *));
 354 static void PHP_functions __P((FILE *));
 355 static void PS_functions __P((FILE *));
 356 static void Prolog_functions __P((FILE *));
 357 static void Python_functions __P((FILE *));
 358 static void Scheme_functions __P((FILE *));
 359 static void TeX_commands __P((FILE *));
 360 static void Texinfo_nodes __P((FILE *));
 361 static void Yacc_entries __P((FILE *));
 362 static void just_read_file __P((FILE *));
 363
 364 static void print_language_names __P((void));
 365 static void print_version __P((void));
 366 static void print_help __P((argument *));
 367 int main __P((int, char **));
 368
 369 static compressor *get_compressor_from_suffix __P((char *, char **));
 370 static language *get_language_from_langname __P((const char *));
 371 static language *get_language_from_interpreter __P((char *));
 372 static language *get_language_from_filename __P((char *, bool));
 373 static void readline __P((linebuffer *, FILE *));
 374 static long readline_internal __P((linebuffer *, FILE *));
 375 static bool nocase_tail __P((char *));
 376 static void get_tag __P((char *, char **));
 377
 378 #ifdef ETAGS_REGEXPS
 379 static void analyse_regex __P((char *));
 380 static void free_regexps __P((void));
 381 static void regex_tag_multiline __P((void));
 382 #endif /* ETAGS_REGEXPS */
 383 static void error __P((const char *, const char *));
 384 static void suggest_asking_for_help __P((void));
 385 void fatal __P((char *, char *));
 386 static void pfatal __P((char *));
 387 static void add_node __P((node *, node **));
 388
 389 static void init __P((void));
 390 static void process_file_name __P((char *, language *));
 391 static void process_file __P((FILE *, char *, language *));
 392 static void find_entries __P((FILE *));
 393 static void free_tree __P((node *));
 394 static void free_fdesc __P((fdesc *));
 395 static void pfnote __P((char *, bool, char *, int, int, long));
 396 static void make_tag __P((char *, int, bool, char *, int, int, long));
 397 static void invalidate_nodes __P((fdesc *, node **));
 398 static void put_entries __P((node *));
 399
 400 static char *concat __P((char *, char *, char *));
 401 static char *skip_spaces __P((char *));
 402 static char *skip_non_spaces __P((char *));
 403 static char *savenstr __P((char *, int));
 404 static char *savestr __P((char *));
 405 static char *etags_strchr __P((const char *, int));
 406 static char *etags_strrchr __P((const char *, int));
 407 static int etags_strcasecmp __P((const char *, const char *));
 408 static int etags_strncasecmp __P((const char *, const char *, int));
 409 static char *etags_getcwd __P((void));
 410 static char *relative_filename __P((char *, char *));
 411 static char *absolute_filename __P((char *, char *));
 412 static char *absolute_dirname __P((char *, char *));
 413 static bool filename_is_absolute __P((char *f));
 414 static void canonicalize_filename __P((char *));
 415 static void linebuffer_init __P((linebuffer *));
 416 static void linebuffer_setlen __P((linebuffer *, int));
 417 static PTR xmalloc __P((unsigned int));
 418 static PTR xrealloc __P((char *, unsigned int));
 419
 420 \f
 421 static char searchar = '/';     /* use /.../ searches */
 422
 423 static char *tagfile;           /* output file */
 424 static char *progname;          /* name this program was invoked with */
 425 static char *cwd;               /* current working directory */
 426 static char *tagfiledir;        /* directory of tagfile */
 427 static FILE *tagf;              /* ioptr for tags file */
 428
 429 static fdesc *fdhead;           /* head of file description list */
 430 static fdesc *curfdp;           /* current file description */
 431 static int lineno;              /* line number of current line */
 432 static long charno;             /* current character number */
 433 static long linecharno;         /* charno of start of current line */
 434 static char *dbp;               /* pointer to start of current tag */
 435
 436 static const int invalidcharno = -1;
 437
 438 static node *nodehead;          /* the head of the binary tree of tags */
 439 static node *last_node;         /* the last node created */
 440
 441 static linebuffer lb;           /* the current line */
 442 static linebuffer filebuf;      /* a buffer containing the whole file */
 443 static linebuffer token_name;   /* a buffer containing a tag name */
 444
 445 /* boolean "functions" (see init)       */
 446 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 447 static char
 448   /* white chars */
 449   *white = " \f\t\n\r\v",
 450   /* not in a name */
 451   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 452   /* token ending chars */
 453   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 454   /* token starting chars */
 455   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 456   /* valid in-token chars */
 457   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 458
 459 static bool append_to_tagfile;  /* -a: append to tags */
 460 /* The next four default to TRUE for etags, but to FALSE for ctags.  */
 461 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 462 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 463                                 /* 0 struct/enum/union decls, and C++ */
 464                                 /* member functions. */
 465 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 466                                 /* constants and variables. */
 467                                 /* -D: opposite of -d.  Default under ctags. */
 468 static bool globals;            /* create tags for global variables */
 469 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 470 static bool members;            /* create tags for C member variables */
 471 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 472 static bool update;             /* -u: update tags */
 473 static bool vgrind_style;       /* -v: create vgrind style index output */
 474 static bool no_warnings;        /* -w: suppress warnings */
 475 static bool cxref_style;        /* -x: create cxref style output */
 476 static bool cplusplus;          /* .[hc] means C++, not C */
 477 static bool ignoreindent;       /* -I: ignore indentation in C */
 478 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 479
 480 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 481 static bool parsing_stdin;      /* --parse-stdin used */
 482
 483 #ifdef ETAGS_REGEXPS
 484 static regexp *p_head;          /* list of all regexps */
 485 static bool need_filebuf;       /* some regexes are multi-line */
 486 #else
 487 # define need_filebuf FALSE
 488 #endif /* ETAGS_REGEXPS */
 489
 490 #if LONG_OPTIONS
 491 static struct option longopts[] =
 492 {
 493   { "append",             no_argument,       NULL,               'a'   },
 494   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 495   { "c++",                no_argument,       NULL,               'C'   },
 496   { "declarations",       no_argument,       &declarations,      TRUE  },
 497   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 498   { "help",               no_argument,       NULL,               'h'   },
 499   { "help",               no_argument,       NULL,               'H'   },
 500   { "ignore-indentation", no_argument,       NULL,               'I'   },
 501   { "language",           required_argument, NULL,               'l'   },
 502   { "members",            no_argument,       &members,           TRUE  },
 503   { "no-members",         no_argument,       &members,           FALSE },
 504   { "output",             required_argument, NULL,               'o'   },
 505 #ifdef ETAGS_REGEXPS
 506   { "regex",              required_argument, NULL,               'r'   },
 507   { "no-regex",           no_argument,       NULL,               'R'   },
 508   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 509 #endif /* ETAGS_REGEXPS */
 510   { "parse-stdin",        required_argument, NULL,               STDIN },
 511   { "version",            no_argument,       NULL,               'V'   },
 512
 513 #if CTAGS /* Ctags options */
 514   { "backward-search",    no_argument,       NULL,               'B'   },
 515   { "cxref",              no_argument,       NULL,               'x'   },
 516   { "defines",            no_argument,       NULL,               'd'   },
 517   { "globals",            no_argument,       &globals,           TRUE  },
 518   { "typedefs",           no_argument,       NULL,               't'   },
 519   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 520   { "update",             no_argument,       NULL,               'u'   },
 521   { "vgrind",             no_argument,       NULL,               'v'   },
 522   { "no-warn",            no_argument,       NULL,               'w'   },
 523
 524 #else /* Etags options */
 525   { "no-defines",         no_argument,       NULL,               'D'   },
 526   { "no-globals",         no_argument,       &globals,           FALSE },
 527   { "include",            required_argument, NULL,               'i'   },
 528 #endif
 529   { NULL }
 530 };
 531 #endif /* LONG_OPTIONS */
 532
 533 static compressor compressors[] =
 534 {
 535   { "z", "gzip -d -c"},
 536   { "Z", "gzip -d -c"},
 537   { "gz", "gzip -d -c"},
 538   { "GZ", "gzip -d -c"},
 539   { "bz2", "bzip2 -d -c" },
 540   { NULL }
 541 };
 542
 543 /*
 544  * Language stuff.
 545  */
 546
 547 /* Ada code */
 548 static char *Ada_suffixes [] =
 549   { "ads", "adb", "ada", NULL };
 550 static char Ada_help [] =
 551 "In Ada code, functions, procedures, packages, tasks and types are\n\
 552 tags.  Use the `--packages-only' option to create tags for\n\
 553 packages only.\n\
 554 Ada tag names have suffixes indicating the type of entity:\n\
 555         Entity type:    Qualifier:\n\
 556         ------------    ----------\n\
 557         function        /f\n\
 558         procedure       /p\n\
 559         package spec    /s\n\
 560         package body    /b\n\
 561         type            /t\n\
 562         task            /k\n\
 563 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 564 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 565 will just search for any tag `bidule'.";
 566
 567 /* Assembly code */
 568 static char *Asm_suffixes [] =
 569   { "a",        /* Unix assembler */
 570     "asm", /* Microcontroller assembly */
 571     "def", /* BSO/Tasking definition includes  */
 572     "inc", /* Microcontroller include files */
 573     "ins", /* Microcontroller include files */
 574     "s", "sa", /* Unix assembler */
 575     "S",   /* cpp-processed Unix assembler */
 576     "src", /* BSO/Tasking C compiler output */
 577     NULL
 578   };
 579 static char Asm_help [] =
 580 "In assembler code, labels appearing at the beginning of a line,\n\
 581 followed by a colon, are tags.";
 582
 583
 584 /* Note that .c and .h can be considered C++, if the --c++ flag was
 585    given, or if the `class' or `template' keyowrds are met inside the file.
 586    That is why default_C_entries is called for these. */
 587 static char *default_C_suffixes [] =
 588   { "c", "h", NULL };
 589 static char default_C_help [] =
 590 "In C code, any C function or typedef is a tag, and so are\n\
 591 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 592 definitions and `enum' constants are tags unless you specify\n\
 593 `--no-defines'.  Global variables are tags unless you specify\n\
 594 `--no-globals'.  Use of `--no-globals' and `--no-defines'\n\
 595 can make the tags table file much smaller.\n\
 596 You can tag function declarations and external variables by\n\
 597 using `--declarations', and struct members by using `--members'.";
 598
 599 static char *Cplusplus_suffixes [] =
 600   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 601     "M",                        /* Objective C++ */
 602     "pdb",                      /* Postscript with C syntax */
 603     NULL };
 604 static char Cplusplus_help [] =
 605 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 606 --help --lang=c --lang=c++ for full help.)\n\
 607 In addition to C tags, member functions are also recognized, and\n\
 608 optionally member variables if you use the `--members' option.\n\
 609 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 610 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 611 `operator+'.";
 612
 613 static char *Cjava_suffixes [] =
 614   { "java", NULL };
 615 static char Cjava_help [] =
 616 "In Java code, all the tags constructs of C and C++ code are\n\
 617 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 618
 619
 620 static char *Cobol_suffixes [] =
 621   { "COB", "cob", NULL };
 622 static char Cobol_help [] =
 623 "In Cobol code, tags are paragraph names; that is, any word\n\
 624 starting in column 8 and followed by a period.";
 625
 626 static char *Cstar_suffixes [] =
 627   { "cs", "hs", NULL };
 628
 629 static char *Erlang_suffixes [] =
 630   { "erl", "hrl", NULL };
 631 static char Erlang_help [] =
 632 "In Erlang code, the tags are the functions, records and macros\n\
 633 defined in the file.";
 634
 635 char *Forth_suffixes [] =
 636   { "fth", "tok", NULL };
 637 static char Forth_help [] =
 638 "In Forth code, tags are words defined by `:',\n\
 639 constant, code, create, defer, value, variable, buffer:, field.";
 640
 641 static char *Fortran_suffixes [] =
 642   { "F", "f", "f90", "for", NULL };
 643 static char Fortran_help [] =
 644 "In Fortran code, functions, subroutines and block data are tags.";
 645
 646 static char *HTML_suffixes [] =
 647   { "htm", "html", "shtml", NULL };
 648 static char HTML_help [] =
 649 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 650 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 651 occurrences of `id='.";
 652
 653 static char *Lisp_suffixes [] =
 654   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 655 static char Lisp_help [] =
 656 "In Lisp code, any function defined with `defun', any variable\n\
 657 defined with `defvar' or `defconst', and in general the first\n\
 658 argument of any expression that starts with `(def' in column zero\n\
 659 is a tag.";
 660
 661 static char *Lua_suffixes [] =
 662   { "lua", "LUA", NULL };
 663 static char Lua_help [] =
 664 "In Lua scripts, all functions are tags.";
 665
 666 static char *Makefile_filenames [] =
 667   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 668 static char Makefile_help [] =
 669 "In makefiles, targets are tags; additionally, variables are tags\n\
 670 unless you specify `--no-globals'.";
 671
 672 static char *Objc_suffixes [] =
 673   { "lm",                       /* Objective lex file */
 674     "m",                        /* Objective C file */
 675      NULL };
 676 static char Objc_help [] =
 677 "In Objective C code, tags include Objective C definitions for classes,\n\
 678 class categories, methods and protocols.  Tags for variables and\n\
 679 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.";
 680
 681 static char *Pascal_suffixes [] =
 682   { "p", "pas", NULL };
 683 static char Pascal_help [] =
 684 "In Pascal code, the tags are the functions and procedures defined\n\
 685 in the file.";
 686
 687 static char *Perl_suffixes [] =
 688   { "pl", "pm", NULL };
 689 static char *Perl_interpreters [] =
 690   { "perl", "@PERL@", NULL };
 691 static char Perl_help [] =
 692 "In Perl code, the tags are the packages, subroutines and variables\n\
 693 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 694 `--globals' if you want to tag global variables.  Tags for\n\
 695 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 696 defined in the default package is `main::SUB'.";
 697
 698 static char *PHP_suffixes [] =
 699   { "php", "php3", "php4", NULL };
 700 static char PHP_help [] =
 701 "In PHP code, tags are functions, classes and defines.  When using\n\
 702 the `--members' option, vars are tags too.";
 703
 704 static char *plain_C_suffixes [] =
 705   { "pc",                       /* Pro*C file */
 706      NULL };
 707
 708 static char *PS_suffixes [] =
 709   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 710 static char PS_help [] =
 711 "In PostScript code, the tags are the functions.";
 712
 713 static char *Prolog_suffixes [] =
 714   { "prolog", NULL };
 715 static char Prolog_help [] =
 716 "In Prolog code, tags are predicates and rules at the beginning of\n\
 717 line.";
 718
 719 static char *Python_suffixes [] =
 720   { "py", NULL };
 721 static char Python_help [] =
 722 "In Python code, `def' or `class' at the beginning of a line\n\
 723 generate a tag.";
 724
 725 /* Can't do the `SCM' or `scm' prefix with a version number. */
 726 static char *Scheme_suffixes [] =
 727   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 728 static char Scheme_help [] =
 729 "In Scheme code, tags include anything defined with `def' or with a\n\
 730 construct whose name starts with `def'.  They also include\n\
 731 variables set with `set!' at top level in the file.";
 732
 733 static char *TeX_suffixes [] =
 734   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 735 static char TeX_help [] =
 736 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 737 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 738 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 739 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 740 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 741 \n\
 742 Other commands can be specified by setting the environment variable\n\
 743 `TEXTAGS' to a colon-separated list like, for example,\n\
 744      TEXTAGS=\"mycommand:myothercommand\".";
 745
 746
 747 static char *Texinfo_suffixes [] =
 748   { "texi", "texinfo", "txi", NULL };
 749 static char Texinfo_help [] =
 750 "for texinfo files, lines starting with @node are tagged.";
 751
 752 static char *Yacc_suffixes [] =
 753   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 754 static char Yacc_help [] =
 755 "In Bison or Yacc input files, each rule defines as a tag the\n\
 756 nonterminal it constructs.  The portions of the file that contain\n\
 757 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 758 for full help).";
 759
 760 static char auto_help [] =
 761 "`auto' is not a real language, it indicates to use\n\
 762 a default language for files base on file name suffix and file contents.";
 763
 764 static char none_help [] =
 765 "`none' is not a real language, it indicates to only do\n\
 766 regexp processing on files.";
 767
 768 static char no_lang_help [] =
 769 "No detailed help available for this language.";
 770
 771
 772 /*
 773  * Table of languages.
 774  *
 775  * It is ok for a given function to be listed under more than one
 776  * name.  I just didn't.
 777  */
 778
 779 static language lang_names [] =
 780 {
 781   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 782   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 783   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 784   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 785   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 786   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 787   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 788   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 789   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 790   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 791   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 792   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 793   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 794   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 795   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 796   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 797   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 798   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 799   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 800   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 801   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 802   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 803   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 804   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 805   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 806   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 807   { "auto",      auto_help },                      /* default guessing scheme */
 808   { "none",      none_help,      just_read_file }, /* regexp matching only */
 809   { NULL }                /* end of list */
 810 };
 811
 812 \f
 813 static void
 814 print_language_names ()
 815 {
 816   language *lang;
 817   char **name, **ext;
 818
 819   puts ("\nThese are the currently supported languages, along with the\n\
 820 default file names and dot suffixes:");
 821   for (lang = lang_names; lang->name != NULL; lang++)
 822     {
 823       printf ("  %-*s", 10, lang->name);
 824       if (lang->filenames != NULL)
 825         for (name = lang->filenames; *name != NULL; name++)
 826           printf (" %s", *name);
 827       if (lang->suffixes != NULL)
 828         for (ext = lang->suffixes; *ext != NULL; ext++)
 829           printf (" .%s", *ext);
 830       puts ("");
 831     }
 832   puts ("where `auto' means use default language for files based on file\n\
 833 name suffix, and `none' means only do regexp processing on files.\n\
 834 If no language is specified and no matching suffix is found,\n\
 835 the first line of the file is read for a sharp-bang (#!) sequence\n\
 836 followed by the name of an interpreter.  If no such sequence is found,\n\
 837 Fortran is tried first; if no tags are found, C is tried next.\n\
 838 When parsing any C file, a \"class\" or \"template\" keyword\n\
 839 switches to C++.");
 840   puts ("Compressed files are supported using gzip and bzip2.\n\
 841 \n\
 842 For detailed help on a given language use, for example,\n\
 843 etags --help --lang=ada.");
 844 }
 845
 846 #ifndef EMACS_NAME
 847 # define EMACS_NAME "standalone"
 848 #endif
 849 #ifndef VERSION
 850 # define VERSION "version"
 851 #endif
 852 static void
 853 print_version ()
 854 {
 855   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 856   puts ("Copyright (C) 2002 Free Software Foundation, Inc. and Ken Arnold");
 857   puts ("This program is distributed under the same terms as Emacs");
 858
 859   exit (EXIT_SUCCESS);
 860 }
 861
 862 static void
 863 print_help (argbuffer)
 864      argument *argbuffer;
 865 {
 866   bool help_for_lang = FALSE;
 867
 868   for (; argbuffer->arg_type != at_end; argbuffer++)
 869     if (argbuffer->arg_type == at_language)
 870       {
 871         if (help_for_lang)
 872           puts ("");
 873         puts (argbuffer->lang->help);
 874         help_for_lang = TRUE;
 875       }
 876
 877   if (help_for_lang)
 878     exit (EXIT_SUCCESS);
 879
 880   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 881 \n\
 882 These are the options accepted by %s.\n", progname, progname);
 883   if (LONG_OPTIONS)
 884     puts ("You may use unambiguous abbreviations for the long option names.");
 885   else
 886     puts ("Long option names do not work with this executable, as it is not\n\
 887 linked with GNU getopt.");
 888   puts ("  A - as file name means read names from stdin (one per line).\n\
 889 Absolute names are stored in the output file as they are.\n\
 890 Relative ones are stored relative to the output file's directory.\n");
 891
 892   puts ("-a, --append\n\
 893         Append tag entries to existing tags file.");
 894
 895   puts ("--packages-only\n\
 896         For Ada files, only generate tags for packages.");
 897
 898   if (CTAGS)
 899     puts ("-B, --backward-search\n\
 900         Write the search commands for the tag entries using '?', the\n\
 901         backward-search command instead of '/', the forward-search command.");
 902
 903   /* This option is mostly obsolete, because etags can now automatically
 904      detect C++.  Retained for backward compatibility and for debugging and
 905      experimentation.  In principle, we could want to tag as C++ even
 906      before any "class" or "template" keyword.
 907   puts ("-C, --c++\n\
 908         Treat files whose name suffix defaults to C language as C++ files.");
 909   */
 910
 911   puts ("--declarations\n\
 912         In C and derived languages, create tags for function declarations,");
 913   if (CTAGS)
 914     puts ("\tand create tags for extern variables if --globals is used.");
 915   else
 916     puts
 917       ("\tand create tags for extern variables unless --no-globals is used.");
 918
 919   if (CTAGS)
 920     puts ("-d, --defines\n\
 921         Create tag entries for C #define constants and enum constants, too.");
 922   else
 923     puts ("-D, --no-defines\n\
 924         Don't create tag entries for C #define constants and enum constants.\n\
 925         This makes the tags file smaller.");
 926
 927   if (!CTAGS)
 928     puts ("-i FILE, --include=FILE\n\
 929         Include a note in tag file indicating that, when searching for\n\
 930         a tag, one should also consult the tags file FILE after\n\
 931         checking the current file.");
 932
 933   puts ("-l LANG, --language=LANG\n\
 934         Force the following files to be considered as written in the\n\
 935         named language up to the next --language=LANG option.");
 936
 937   if (CTAGS)
 938     puts ("--globals\n\
 939         Create tag entries for global variables in some languages.");
 940   else
 941     puts ("--no-globals\n\
 942         Do not create tag entries for global variables in some\n\
 943         languages.  This makes the tags file smaller.");
 944   puts ("--members\n\
 945         Create tag entries for members of structures in some languages.");
 946
 947 #ifdef ETAGS_REGEXPS
 948   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 949         Make a tag for each line matching a regular expression pattern\n\
 950         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 951         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 952         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 953         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 954   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 955         For example Tcl named tags can be created with:\n\
 956           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 957         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 958         `m' means to allow multi-line matches, `s' implies `m' and\n\
 959         causes dot to match any character, including newline.");
 960   puts ("-R, --no-regex\n\
 961         Don't create tags from regexps for the following files.");
 962 #endif /* ETAGS_REGEXPS */
 963   puts ("-I, --ignore-indentation\n\
 964         In C and C++ do not assume that a closing brace in the first\n\
 965         column is the final brace of a function or structure definition.");
 966   puts ("-o FILE, --output=FILE\n\
 967         Write the tags to FILE.");
 968   puts ("--parse-stdin=NAME\n\
 969         Read from standard input and record tags as belonging to file NAME.");
 970
 971   if (CTAGS)
 972     {
 973       puts ("-t, --typedefs\n\
 974         Generate tag entries for C and Ada typedefs.");
 975       puts ("-T, --typedefs-and-c++\n\
 976         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 977         and C++ member functions.");
 978     }
 979
 980   if (CTAGS)
 981     puts ("-u, --update\n\
 982         Update the tag entries for the given files, leaving tag\n\
 983         entries for other files in place.  Currently, this is\n\
 984         implemented by deleting the existing entries for the given\n\
 985         files and then rewriting the new entries at the end of the\n\
 986         tags file.  It is often faster to simply rebuild the entire\n\
 987         tag file than to use this.");
 988
 989   if (CTAGS)
 990     {
 991       puts ("-v, --vgrind\n\
 992         Generates an index of items intended for human consumption,\n\
 993         similar to the output of vgrind.  The index is sorted, and\n\
 994         gives the page number of each item.");
 995       puts ("-w, --no-warn\n\
 996         Suppress warning messages about entries defined in multiple\n\
 997         files.");
 998       puts ("-x, --cxref\n\
 999         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1000         The output uses line numbers instead of page numbers, but\n\
1001         beyond that the differences are cosmetic; try both to see\n\
1002         which you like.");
1003     }
1004
1005   puts ("-V, --version\n\
1006         Print the version of the program.\n\
1007 -h, --help\n\
1008         Print this help message.\n\
1009         Followed by one or more `--language' options prints detailed\n\
1010         help about tag generation for the specified languages.");
1011
1012   print_language_names ();
1013
1014   puts ("");
1015   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1016
1017   exit (EXIT_SUCCESS);
1018 }
1019
1020 \f
1021 #ifdef VMS                      /* VMS specific functions */
1022
1023 #define EOS     '\0'
1024
1025 /* This is a BUG!  ANY arbitrary limit is a BUG!
1026    Won't someone please fix this?  */
1027 #define MAX_FILE_SPEC_LEN       255
1028 typedef struct  {
1029   short   curlen;
1030   char    body[MAX_FILE_SPEC_LEN + 1];
1031 } vspec;
1032
1033 /*
1034  v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1035  returning in each successive call the next file name matching the input
1036  spec. The function expects that each in_spec passed
1037  to it will be processed to completion; in particular, up to and
1038  including the call following that in which the last matching name
1039  is returned, the function ignores the value of in_spec, and will
1040  only start processing a new spec with the following call.
1041  If an error occurs, on return out_spec contains the value
1042  of in_spec when the error occurred.
1043
1044  With each successive file name returned in out_spec, the
1045  function's return value is one. When there are no more matching
1046  names the function returns zero. If on the first call no file
1047  matches in_spec, or there is any other error, -1 is returned.
1048 */
1049
1050 #include        <rmsdef.h>
1051 #include        <descrip.h>
1052 #define         OUTSIZE MAX_FILE_SPEC_LEN
1053 static short
1054 fn_exp (out, in)
1055      vspec *out;
1056      char *in;
1057 {
1058   static long context = 0;
1059   static struct dsc$descriptor_s o;
1060   static struct dsc$descriptor_s i;
1061   static bool pass1 = TRUE;
1062   long status;
1063   short retval;
1064
1065   if (pass1)
1066     {
1067       pass1 = FALSE;
1068       o.dsc$a_pointer = (char *) out;
1069       o.dsc$w_length = (short)OUTSIZE;
1070       i.dsc$a_pointer = in;
1071       i.dsc$w_length = (short)strlen(in);
1072       i.dsc$b_dtype = DSC$K_DTYPE_T;
1073       i.dsc$b_class = DSC$K_CLASS_S;
1074       o.dsc$b_dtype = DSC$K_DTYPE_VT;
1075       o.dsc$b_class = DSC$K_CLASS_VS;
1076     }
1077   if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1078     {
1079       out->body[out->curlen] = EOS;
1080       return 1;
1081     }
1082   else if (status == RMS$_NMF)
1083     retval = 0;
1084   else
1085     {
1086       strcpy(out->body, in);
1087       retval = -1;
1088     }
1089   lib$find_file_end(&context);
1090   pass1 = TRUE;
1091   return retval;
1092 }
1093
1094 /*
1095   v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1096   name of each file specified by the provided arg expanding wildcards.
1097 */
1098 static char *
1099 gfnames (arg, p_error)
1100      char *arg;
1101      bool *p_error;
1102 {
1103   static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1104
1105   switch (fn_exp (&filename, arg))
1106     {
1107     case 1:
1108       *p_error = FALSE;
1109       return filename.body;
1110     case 0:
1111       *p_error = FALSE;
1112       return NULL;
1113     default:
1114       *p_error = TRUE;
1115       return filename.body;
1116     }
1117 }
1118
1119 #ifndef OLD  /* Newer versions of VMS do provide `system'.  */
1120 system (cmd)
1121      char *cmd;
1122 {
1123   error ("%s", "system() function not implemented under VMS");
1124 }
1125 #endif
1126
1127 #define VERSION_DELIM   ';'
1128 char *massage_name (s)
1129      char *s;
1130 {
1131   char *start = s;
1132
1133   for ( ; *s; s++)
1134     if (*s == VERSION_DELIM)
1135       {
1136         *s = EOS;
1137         break;
1138       }
1139     else
1140       *s = lowcase (*s);
1141   return start;
1142 }
1143 #endif /* VMS */
1144
1145 \f
1146 int
1147 main (argc, argv)
1148      int argc;
1149      char *argv[];
1150 {
1151   int i;
1152   unsigned int nincluded_files;
1153   char **included_files;
1154   argument *argbuffer;
1155   int current_arg, file_count;
1156   linebuffer filename_lb;
1157   bool help_asked = FALSE;
1158 #ifdef VMS
1159   bool got_err;
1160 #endif
1161  char *optstring;
1162  int opt;
1163
1164
1165 #ifdef DOS_NT
1166   _fmode = O_BINARY;   /* all of files are treated as binary files */
1167 #endif /* DOS_NT */
1168
1169   progname = argv[0];
1170   nincluded_files = 0;
1171   included_files = xnew (argc, char *);
1172   current_arg = 0;
1173   file_count = 0;
1174
1175   /* Allocate enough no matter what happens.  Overkill, but each one
1176      is small. */
1177   argbuffer = xnew (argc, argument);
1178
1179   /*
1180    * If etags, always find typedefs and structure tags.  Why not?
1181    * Also default to find macro constants, enum constants and
1182    * global variables.
1183    */
1184   if (!CTAGS)
1185     {
1186       typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1187       globals = TRUE;
1188     }
1189
1190   /* When the optstring begins with a '-' getopt_long does not rearrange the
1191      non-options arguments to be at the end, but leaves them alone. */
1192   optstring = "-";
1193 #ifdef ETAGS_REGEXPS
1194   optstring = "-r:Rc:";
1195 #endif /* ETAGS_REGEXPS */
1196   if (!LONG_OPTIONS)
1197     optstring += 1;             /* remove the initial '-' */
1198   optstring = concat (optstring,
1199                       "aCf:Il:o:SVhH",
1200                       (CTAGS) ? "BxdtTuvw" : "Di:");
1201
1202   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1203     switch (opt)
1204       {
1205       case 0:
1206         /* If getopt returns 0, then it has already processed a
1207            long-named option.  We should do nothing.  */
1208         break;
1209
1210       case 1:
1211         /* This means that a file name has been seen.  Record it. */
1212         argbuffer[current_arg].arg_type = at_filename;
1213         argbuffer[current_arg].what     = optarg;
1214         ++current_arg;
1215         ++file_count;
1216         break;
1217
1218       case STDIN:
1219         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1220         argbuffer[current_arg].arg_type = at_stdin;
1221         argbuffer[current_arg].what     = optarg;
1222         ++current_arg;
1223         ++file_count;
1224         if (parsing_stdin)
1225           fatal ("cannot parse standard input more than once", (char *)NULL);
1226         parsing_stdin = TRUE;
1227         break;
1228
1229         /* Common options. */
1230       case 'a': append_to_tagfile = TRUE;       break;
1231       case 'C': cplusplus = TRUE;               break;
1232       case 'f':         /* for compatibility with old makefiles */
1233       case 'o':
1234         if (tagfile)
1235           {
1236             error ("-o option may only be given once.", (char *)NULL);
1237             suggest_asking_for_help ();
1238             /* NOTREACHED */
1239           }
1240         tagfile = optarg;
1241         break;
1242       case 'I':
1243       case 'S':         /* for backward compatibility */
1244         ignoreindent = TRUE;
1245         break;
1246       case 'l':
1247         {
1248           language *lang = get_language_from_langname (optarg);
1249           if (lang != NULL)
1250             {
1251               argbuffer[current_arg].lang = lang;
1252               argbuffer[current_arg].arg_type = at_language;
1253               ++current_arg;
1254             }
1255         }
1256         break;
1257       case 'c':
1258         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1259         optarg = concat (optarg, "i", ""); /* memory leak here */
1260         /* FALLTHRU */
1261       case 'r':
1262         argbuffer[current_arg].arg_type = at_regexp;
1263         argbuffer[current_arg].what = optarg;
1264         ++current_arg;
1265         break;
1266       case 'R':
1267         argbuffer[current_arg].arg_type = at_regexp;
1268         argbuffer[current_arg].what = NULL;
1269         ++current_arg;
1270         break;
1271       case 'V':
1272         print_version ();
1273         break;
1274       case 'h':
1275       case 'H':
1276         help_asked = TRUE;
1277         break;
1278
1279         /* Etags options */
1280       case 'D': constantypedefs = FALSE;                        break;
1281       case 'i': included_files[nincluded_files++] = optarg;     break;
1282
1283         /* Ctags options. */
1284       case 'B': searchar = '?';                                 break;
1285       case 'd': constantypedefs = TRUE;                         break;
1286       case 't': typedefs = TRUE;                                break;
1287       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1288       case 'u': update = TRUE;                                  break;
1289       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1290       case 'x': cxref_style = TRUE;                             break;
1291       case 'w': no_warnings = TRUE;                             break;
1292       default:
1293         suggest_asking_for_help ();
1294         /* NOTREACHED */
1295       }
1296
1297   /* No more options.  Store the rest of arguments. */
1298   for (; optind < argc; optind++)
1299     {
1300       argbuffer[current_arg].arg_type = at_filename;
1301       argbuffer[current_arg].what = argv[optind];
1302       ++current_arg;
1303       ++file_count;
1304     }
1305
1306   argbuffer[current_arg].arg_type = at_end;
1307
1308   if (help_asked)
1309     print_help (argbuffer);
1310     /* NOTREACHED */
1311
1312   if (nincluded_files == 0 && file_count == 0)
1313     {
1314       error ("no input files specified.", (char *)NULL);
1315       suggest_asking_for_help ();
1316       /* NOTREACHED */
1317     }
1318
1319   if (tagfile == NULL)
1320     tagfile = CTAGS ? "tags" : "TAGS";
1321   cwd = etags_getcwd ();        /* the current working directory */
1322   if (cwd[strlen (cwd) - 1] != '/')
1323     {
1324       char *oldcwd = cwd;
1325       cwd = concat (oldcwd, "/", "");
1326       free (oldcwd);
1327     }
1328   /* Relative file names are made relative to the current directory. */
1329   if (streq (tagfile, "-")
1330       || strneq (tagfile, "/dev/", 5))
1331     tagfiledir = cwd;
1332   else
1333     tagfiledir = absolute_dirname (tagfile, cwd);
1334
1335   init ();                      /* set up boolean "functions" */
1336
1337   linebuffer_init (&lb);
1338   linebuffer_init (&filename_lb);
1339   linebuffer_init (&filebuf);
1340   linebuffer_init (&token_name);
1341
1342   if (!CTAGS)
1343     {
1344       if (streq (tagfile, "-"))
1345         {
1346           tagf = stdout;
1347 #ifdef DOS_NT
1348           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1349              doesn't take effect until after `stdout' is already open). */
1350           if (!isatty (fileno (stdout)))
1351             setmode (fileno (stdout), O_BINARY);
1352 #endif /* DOS_NT */
1353         }
1354       else
1355         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1356       if (tagf == NULL)
1357         pfatal (tagfile);
1358     }
1359
1360   /*
1361    * Loop through files finding functions.
1362    */
1363   for (i = 0; i < current_arg; i++)
1364     {
1365       static language *lang;    /* non-NULL if language is forced */
1366       char *this_file;
1367
1368       switch (argbuffer[i].arg_type)
1369         {
1370         case at_language:
1371           lang = argbuffer[i].lang;
1372           break;
1373 #ifdef ETAGS_REGEXPS
1374         case at_regexp:
1375           analyse_regex (argbuffer[i].what);
1376           break;
1377 #endif
1378         case at_filename:
1379 #ifdef VMS
1380           while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1381             {
1382               if (got_err)
1383                 {
1384                   error ("can't find file %s\n", this_file);
1385                   argc--, argv++;
1386                 }
1387               else
1388                 {
1389                   this_file = massage_name (this_file);
1390                 }
1391 #else
1392               this_file = argbuffer[i].what;
1393 #endif
1394               /* Input file named "-" means read file names from stdin
1395                  (one per line) and use them. */
1396               if (streq (this_file, "-"))
1397                 {
1398                   if (parsing_stdin)
1399                     fatal ("cannot parse standard input AND read file names from it",
1400                            (char *)NULL);
1401                   while (readline_internal (&filename_lb, stdin) > 0)
1402                     process_file_name (filename_lb.buffer, lang);
1403                 }
1404               else
1405                 process_file_name (this_file, lang);
1406 #ifdef VMS
1407             }
1408 #endif
1409           break;
1410         case at_stdin:
1411           this_file = argbuffer[i].what;
1412           process_file (stdin, this_file, lang);
1413           break;
1414         }
1415     }
1416
1417 #ifdef ETAGS_REGEXPS
1418   free_regexps ();
1419 #endif /* ETAGS_REGEXPS */
1420   free (lb.buffer);
1421   free (filebuf.buffer);
1422   free (token_name.buffer);
1423
1424   if (!CTAGS || cxref_style)
1425     {
1426       put_entries (nodehead);   /* write the remaining tags (ETAGS) */
1427       free_tree (nodehead);
1428       nodehead = NULL;
1429       if (!CTAGS)
1430         {
1431           fdesc *fdp;
1432
1433           /* Output file entries that have no tags. */
1434           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1435             if (!fdp->written)
1436               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1437
1438           while (nincluded_files-- > 0)
1439             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1440         }
1441
1442       if (fclose (tagf) == EOF)
1443         pfatal (tagfile);
1444       exit (EXIT_SUCCESS);
1445     }
1446
1447   if (update)
1448     {
1449       char cmd[BUFSIZ];
1450       for (i = 0; i < current_arg; ++i)
1451         {
1452           switch (argbuffer[i].arg_type)
1453             {
1454             case at_filename:
1455             case at_stdin:
1456               break;
1457             default:
1458               continue;         /* the for loop */
1459             }
1460           sprintf (cmd,
1461                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1462                    tagfile, argbuffer[i].what, tagfile);
1463           if (system (cmd) != EXIT_SUCCESS)
1464             fatal ("failed to execute shell command", (char *)NULL);
1465         }
1466       append_to_tagfile = TRUE;
1467     }
1468
1469   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1470   if (tagf == NULL)
1471     pfatal (tagfile);
1472   put_entries (nodehead);       /* write all the tags (CTAGS) */
1473   free_tree (nodehead);
1474   nodehead = NULL;
1475   if (fclose (tagf) == EOF)
1476     pfatal (tagfile);
1477
1478   if (CTAGS)
1479     if (append_to_tagfile || update)
1480       {
1481         char cmd[2*BUFSIZ+10];
1482         sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1483         exit (system (cmd));
1484       }
1485   return EXIT_SUCCESS;
1486 }
1487
1488
1489 /*
1490  * Return a compressor given the file name.  If EXTPTR is non-zero,
1491  * return a pointer into FILE where the compressor-specific
1492  * extension begins.  If no compressor is found, NULL is returned
1493  * and EXTPTR is not significant.
1494  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1495  */
1496 static compressor *
1497 get_compressor_from_suffix (file, extptr)
1498      char *file;
1499      char **extptr;
1500 {
1501   compressor *compr;
1502   char *slash, *suffix;
1503
1504   /* This relies on FN to be after canonicalize_filename,
1505      so we don't need to consider backslashes on DOS_NT.  */
1506   slash = etags_strrchr (file, '/');
1507   suffix = etags_strrchr (file, '.');
1508   if (suffix == NULL || suffix < slash)
1509     return NULL;
1510   if (extptr != NULL)
1511     *extptr = suffix;
1512   suffix += 1;
1513   /* Let those poor souls who live with DOS 8+3 file name limits get
1514      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1515      Only the first do loop is run if not MSDOS */
1516   do
1517     {
1518       for (compr = compressors; compr->suffix != NULL; compr++)
1519         if (streq (compr->suffix, suffix))
1520           return compr;
1521       if (!MSDOS)
1522         break;                  /* do it only once: not really a loop */
1523       if (extptr != NULL)
1524         *extptr = ++suffix;
1525     } while (*suffix != '\0');
1526   return NULL;
1527 }
1528
1529
1530
1531 /*
1532  * Return a language given the name.
1533  */
1534 static language *
1535 get_language_from_langname (name)
1536      const char *name;
1537 {
1538   language *lang;
1539
1540   if (name == NULL)
1541     error ("empty language name", (char *)NULL);
1542   else
1543     {
1544       for (lang = lang_names; lang->name != NULL; lang++)
1545         if (streq (name, lang->name))
1546           return lang;
1547       error ("unknown language \"%s\"", name);
1548     }
1549
1550   return NULL;
1551 }
1552
1553
1554 /*
1555  * Return a language given the interpreter name.
1556  */
1557 static language *
1558 get_language_from_interpreter (interpreter)
1559      char *interpreter;
1560 {
1561   language *lang;
1562   char **iname;
1563
1564   if (interpreter == NULL)
1565     return NULL;
1566   for (lang = lang_names; lang->name != NULL; lang++)
1567     if (lang->interpreters != NULL)
1568       for (iname = lang->interpreters; *iname != NULL; iname++)
1569         if (streq (*iname, interpreter))
1570             return lang;
1571
1572   return NULL;
1573 }
1574
1575
1576
1577 /*
1578  * Return a language given the file name.
1579  */
1580 static language *
1581 get_language_from_filename (file, case_sensitive)
1582      char *file;
1583      bool case_sensitive;
1584 {
1585   language *lang;
1586   char **name, **ext, *suffix;
1587
1588   /* Try whole file name first. */
1589   for (lang = lang_names; lang->name != NULL; lang++)
1590     if (lang->filenames != NULL)
1591       for (name = lang->filenames; *name != NULL; name++)
1592         if ((case_sensitive)
1593             ? streq (*name, file)
1594             : strcaseeq (*name, file))
1595           return lang;
1596
1597   /* If not found, try suffix after last dot. */
1598   suffix = etags_strrchr (file, '.');
1599   if (suffix == NULL)
1600     return NULL;
1601   suffix += 1;
1602   for (lang = lang_names; lang->name != NULL; lang++)
1603     if (lang->suffixes != NULL)
1604       for (ext = lang->suffixes; *ext != NULL; ext++)
1605         if ((case_sensitive)
1606             ? streq (*ext, suffix)
1607             : strcaseeq (*ext, suffix))
1608           return lang;
1609   return NULL;
1610 }
1611
1612 \f
1613 /*
1614  * This routine is called on each file argument.
1615  */
1616 static void
1617 process_file_name (file, lang)
1618      char *file;
1619      language *lang;
1620 {
1621   struct stat stat_buf;
1622   FILE *inf;
1623   fdesc *fdp;
1624   compressor *compr;
1625   char *compressed_name, *uncompressed_name;
1626   char *ext, *real_name;
1627   int retval;
1628
1629   canonicalize_filename (file);
1630   if (streq (file, tagfile) && !streq (tagfile, "-"))
1631     {
1632       error ("skipping inclusion of %s in self.", file);
1633       return;
1634     }
1635   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1636     {
1637       compressed_name = NULL;
1638       real_name = uncompressed_name = savestr (file);
1639     }
1640   else
1641     {
1642       real_name = compressed_name = savestr (file);
1643       uncompressed_name = savenstr (file, ext - file);
1644     }
1645
1646   /* If the canonicalized uncompressed name
1647      has already been dealt with, skip it silently. */
1648   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1649     {
1650       assert (fdp->infname != NULL);
1651       if (streq (uncompressed_name, fdp->infname))
1652         goto cleanup;
1653     }
1654
1655   if (stat (real_name, &stat_buf) != 0)
1656     {
1657       /* Reset real_name and try with a different name. */
1658       real_name = NULL;
1659       if (compressed_name != NULL) /* try with the given suffix */
1660         {
1661           if (stat (uncompressed_name, &stat_buf) == 0)
1662             real_name = uncompressed_name;
1663         }
1664       else                      /* try all possible suffixes */
1665         {
1666           for (compr = compressors; compr->suffix != NULL; compr++)
1667             {
1668               compressed_name = concat (file, ".", compr->suffix);
1669               if (stat (compressed_name, &stat_buf) != 0)
1670                 {
1671                   if (MSDOS)
1672                     {
1673                       char *suf = compressed_name + strlen (file);
1674                       size_t suflen = strlen (compr->suffix) + 1;
1675                       for ( ; suf[1]; suf++, suflen--)
1676                         {
1677                           memmove (suf, suf + 1, suflen);
1678                           if (stat (compressed_name, &stat_buf) == 0)
1679                             {
1680                               real_name = compressed_name;
1681                               break;
1682                             }
1683                         }
1684                       if (real_name != NULL)
1685                         break;
1686                     } /* MSDOS */
1687                   free (compressed_name);
1688                   compressed_name = NULL;
1689                 }
1690               else
1691                 {
1692                   real_name = compressed_name;
1693                   break;
1694                 }
1695             }
1696         }
1697       if (real_name == NULL)
1698         {
1699           perror (file);
1700           goto cleanup;
1701         }
1702     } /* try with a different name */
1703
1704   if (!S_ISREG (stat_buf.st_mode))
1705     {
1706       error ("skipping %s: it is not a regular file.", real_name);
1707       goto cleanup;
1708     }
1709   if (real_name == compressed_name)
1710     {
1711       char *cmd = concat (compr->command, " ", real_name);
1712       inf = (FILE *) popen (cmd, "r");
1713       free (cmd);
1714     }
1715   else
1716     inf = fopen (real_name, "r");
1717   if (inf == NULL)
1718     {
1719       perror (real_name);
1720       goto cleanup;
1721     }
1722
1723   process_file (inf, uncompressed_name, lang);
1724
1725   if (real_name == compressed_name)
1726     retval = pclose (inf);
1727   else
1728     retval = fclose (inf);
1729   if (retval < 0)
1730     pfatal (file);
1731
1732  cleanup:
1733   if (compressed_name) free (compressed_name);
1734   if (uncompressed_name) free (uncompressed_name);
1735   last_node = NULL;
1736   curfdp = NULL;
1737   return;
1738 }
1739
1740 static void
1741 process_file (fh, fn, lang)
1742      FILE *fh;
1743      char *fn;
1744      language *lang;
1745 {
1746   static const fdesc emptyfdesc;
1747   fdesc *fdp;
1748
1749   /* Create a new input file description entry. */
1750   fdp = xnew (1, fdesc);
1751   *fdp = emptyfdesc;
1752   fdp->next = fdhead;
1753   fdp->infname = savestr (fn);
1754   fdp->lang = lang;
1755   fdp->infabsname = absolute_filename (fn, cwd);
1756   fdp->infabsdir = absolute_dirname (fn, cwd);
1757   if (filename_is_absolute (fn))
1758     {
1759       /* An absolute file name.  Canonicalize it. */
1760       fdp->taggedfname = absolute_filename (fn, NULL);
1761     }
1762   else
1763     {
1764       /* A file name relative to cwd.  Make it relative
1765          to the directory of the tags file. */
1766       fdp->taggedfname = relative_filename (fn, tagfiledir);
1767     }
1768   fdp->usecharno = TRUE;        /* use char position when making tags */
1769   fdp->prop = NULL;
1770   fdp->written = FALSE;         /* not written on tags file yet */
1771
1772   fdhead = fdp;
1773   curfdp = fdhead;              /* the current file description */
1774
1775   find_entries (fh);
1776
1777   /* If not Ctags, and if this is not metasource and if it contained no #line
1778      directives, we can write the tags and free all nodes pointing to
1779      curfdp. */
1780   if (!CTAGS
1781       && curfdp->usecharno      /* no #line directives in this file */
1782       && !curfdp->lang->metasource)
1783     {
1784       node *np, *prev;
1785
1786       /* Look for the head of the sublist relative to this file.  See add_node
1787          for the structure of the node tree. */
1788       prev = NULL;
1789       for (np = nodehead; np != NULL; prev = np, np = np->left)
1790         if (np->fdp == curfdp)
1791           break;
1792
1793       /* If we generated tags for this file, write and delete them. */
1794       if (np != NULL)
1795         {
1796           /* This is the head of the last sublist, if any.  The following
1797              instructions depend on this being true. */
1798           assert (np->left == NULL);
1799
1800           assert (fdhead == curfdp);
1801           assert (last_node->fdp == curfdp);
1802           put_entries (np);     /* write tags for file curfdp->taggedfname */
1803           free_tree (np);       /* remove the written nodes */
1804           if (prev == NULL)
1805             nodehead = NULL;    /* no nodes left */
1806           else
1807             prev->left = NULL;  /* delete the pointer to the sublist */
1808         }
1809     }
1810 }
1811
1812 /*
1813  * This routine sets up the boolean pseudo-functions which work
1814  * by setting boolean flags dependent upon the corresponding character.
1815  * Every char which is NOT in that string is not a white char.  Therefore,
1816  * all of the array "_wht" is set to FALSE, and then the elements
1817  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1818  * of a char is TRUE if it is the string "white", else FALSE.
1819  */
1820 static void
1821 init ()
1822 {
1823   register char *sp;
1824   register int i;
1825
1826   for (i = 0; i < CHARS; i++)
1827     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1828   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1829   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1830   notinname('\0') = notinname('\n');
1831   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1832   begtoken('\0') = begtoken('\n');
1833   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1834   intoken('\0') = intoken('\n');
1835   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1836   endtoken('\0') = endtoken('\n');
1837 }
1838
1839 /*
1840  * This routine opens the specified file and calls the function
1841  * which finds the function and type definitions.
1842  */
1843 static void
1844 find_entries (inf)
1845      FILE *inf;
1846 {
1847   char *cp;
1848   language *lang = curfdp->lang;
1849   Lang_function *parser = NULL;
1850
1851   /* If user specified a language, use it. */
1852   if (lang != NULL && lang->function != NULL)
1853     {
1854       parser = lang->function;
1855     }
1856
1857   /* Else try to guess the language given the file name. */
1858   if (parser == NULL)
1859     {
1860       lang = get_language_from_filename (curfdp->infname, TRUE);
1861       if (lang != NULL && lang->function != NULL)
1862         {
1863           curfdp->lang = lang;
1864           parser = lang->function;
1865         }
1866     }
1867
1868   /* Else look for sharp-bang as the first two characters. */
1869   if (parser == NULL
1870       && readline_internal (&lb, inf) > 0
1871       && lb.len >= 2
1872       && lb.buffer[0] == '#'
1873       && lb.buffer[1] == '!')
1874     {
1875       char *lp;
1876
1877       /* Set lp to point at the first char after the last slash in the
1878          line or, if no slashes, at the first nonblank.  Then set cp to
1879          the first successive blank and terminate the string. */
1880       lp = etags_strrchr (lb.buffer+2, '/');
1881       if (lp != NULL)
1882         lp += 1;
1883       else
1884         lp = skip_spaces (lb.buffer + 2);
1885       cp = skip_non_spaces (lp);
1886       *cp = '\0';
1887
1888       if (strlen (lp) > 0)
1889         {
1890           lang = get_language_from_interpreter (lp);
1891           if (lang != NULL && lang->function != NULL)
1892             {
1893               curfdp->lang = lang;
1894               parser = lang->function;
1895             }
1896         }
1897     }
1898
1899   /* We rewind here, even if inf may be a pipe.  We fail if the
1900      length of the first line is longer than the pipe block size,
1901      which is unlikely. */
1902   rewind (inf);
1903
1904   /* Else try to guess the language given the case insensitive file name. */
1905   if (parser == NULL)
1906     {
1907       lang = get_language_from_filename (curfdp->infname, FALSE);
1908       if (lang != NULL && lang->function != NULL)
1909         {
1910           curfdp->lang = lang;
1911           parser = lang->function;
1912         }
1913     }
1914
1915   /* Else try Fortran or C. */
1916   if (parser == NULL)
1917     {
1918       node *old_last_node = last_node;
1919
1920       curfdp->lang = get_language_from_langname ("fortran");
1921       find_entries (inf);
1922
1923       if (old_last_node == last_node)
1924         /* No Fortran entries found.  Try C. */
1925         {
1926           /* We do not tag if rewind fails.
1927              Only the file name will be recorded in the tags file. */
1928           rewind (inf);
1929           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1930           find_entries (inf);
1931         }
1932       return;
1933     }
1934
1935   if (!no_line_directive
1936       && curfdp->lang != NULL && curfdp->lang->metasource)
1937     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1938        file, or anyway we parsed a file that is automatically generated from
1939        this one.  If this is the case, the bingo.c file contained #line
1940        directives that generated tags pointing to this file.  Let's delete
1941        them all before parsing this file, which is the real source. */
1942     {
1943       fdesc **fdpp = &fdhead;
1944       while (*fdpp != NULL)
1945         if (*fdpp != curfdp
1946             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1947           /* We found one of those!  We must delete both the file description
1948              and all tags referring to it. */
1949           {
1950             fdesc *badfdp = *fdpp;
1951
1952             /* Delete the tags referring to badfdp->taggedfname
1953                that were obtained from badfdp->infname. */
1954             invalidate_nodes (badfdp, &nodehead);
1955
1956             *fdpp = badfdp->next; /* remove the bad description from the list */
1957             free_fdesc (badfdp);
1958           }
1959         else
1960           fdpp = &(*fdpp)->next; /* advance the list pointer */
1961     }
1962
1963   assert (parser != NULL);
1964
1965   /* Generic initialisations before reading from file. */
1966   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1967
1968   /* Generic initialisations before parsing file with readline. */
1969   lineno = 0;                  /* reset global line number */
1970   charno = 0;                  /* reset global char number */
1971   linecharno = 0;              /* reset global char number of line start */
1972
1973   parser (inf);
1974
1975 #ifdef ETAGS_REGEXPS
1976   regex_tag_multiline ();
1977 #endif /* ETAGS_REGEXPS */
1978 }
1979
1980 \f
1981 /*
1982  * Check whether an implicitly named tag should be created,
1983  * then call `pfnote'.
1984  * NAME is a string that is internally copied by this function.
1985  *
1986  * TAGS format specification
1987  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1988  * The following is explained in some more detail in etc/ETAGS.EBNF.
1989  *
1990  * make_tag creates tags with "implicit tag names" (unnamed tags)
1991  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1992  *  1. NAME does not contain any of the characters in NONAM;
1993  *  2. LINESTART contains name as either a rightmost, or rightmost but
1994  *     one character, substring;
1995  *  3. the character, if any, immediately before NAME in LINESTART must
1996  *     be a character in NONAM;
1997  *  4. the character, if any, immediately after NAME in LINESTART must
1998  *     also be a character in NONAM.
1999  *
2000  * The implementation uses the notinname() macro, which recognises the
2001  * characters stored in the string `nonam'.
2002  * etags.el needs to use the same characters that are in NONAM.
2003  */
2004 static void
2005 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
2006      char *name;                /* tag name, or NULL if unnamed */
2007      int namelen;               /* tag length */
2008      bool is_func;              /* tag is a function */
2009      char *linestart;           /* start of the line where tag is */
2010      int linelen;               /* length of the line where tag is */
2011      int lno;                   /* line number */
2012      long cno;                  /* character number */
2013 {
2014   bool named = (name != NULL && namelen > 0);
2015
2016   if (!CTAGS && named)          /* maybe set named to false */
2017     /* Let's try to make an implicit tag name, that is, create an unnamed tag
2018        such that etags.el can guess a name from it. */
2019     {
2020       int i;
2021       register char *cp = name;
2022
2023       for (i = 0; i < namelen; i++)
2024         if (notinname (*cp++))
2025           break;
2026       if (i == namelen)                         /* rule #1 */
2027         {
2028           cp = linestart + linelen - namelen;
2029           if (notinname (linestart[linelen-1]))
2030             cp -= 1;                            /* rule #4 */
2031           if (cp >= linestart                   /* rule #2 */
2032               && (cp == linestart
2033                   || notinname (cp[-1]))        /* rule #3 */
2034               && strneq (name, cp, namelen))    /* rule #2 */
2035             named = FALSE;      /* use implicit tag name */
2036         }
2037     }
2038
2039   if (named)
2040     name = savenstr (name, namelen);
2041   else
2042     name = NULL;
2043   pfnote (name, is_func, linestart, linelen, lno, cno);
2044 }
2045
2046 /* Record a tag. */
2047 static void
2048 pfnote (name, is_func, linestart, linelen, lno, cno)
2049      char *name;                /* tag name, or NULL if unnamed */
2050      bool is_func;              /* tag is a function */
2051      char *linestart;           /* start of the line where tag is */
2052      int linelen;               /* length of the line where tag is */
2053      int lno;                   /* line number */
2054      long cno;                  /* character number */
2055 {
2056   register node *np;
2057
2058   assert (name == NULL || name[0] != '\0');
2059   if (CTAGS && name == NULL)
2060     return;
2061
2062   np = xnew (1, node);
2063
2064   /* If ctags mode, change name "main" to M<thisfilename>. */
2065   if (CTAGS && !cxref_style && streq (name, "main"))
2066     {
2067       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2068       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2069       fp = etags_strrchr (np->name, '.');
2070       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2071         fp[0] = '\0';
2072     }
2073   else
2074     np->name = name;
2075   np->valid = TRUE;
2076   np->been_warned = FALSE;
2077   np->fdp = curfdp;
2078   np->is_func = is_func;
2079   np->lno = lno;
2080   if (np->fdp->usecharno)
2081     /* Our char numbers are 0-base, because of C language tradition?
2082        ctags compatibility?  old versions compatibility?   I don't know.
2083        Anyway, since emacs's are 1-base we expect etags.el to take care
2084        of the difference.  If we wanted to have 1-based numbers, we would
2085        uncomment the +1 below. */
2086     np->cno = cno /* + 1 */ ;
2087   else
2088     np->cno = invalidcharno;
2089   np->left = np->right = NULL;
2090   if (CTAGS && !cxref_style)
2091     {
2092       if (strlen (linestart) < 50)
2093         np->regex = concat (linestart, "$", "");
2094       else
2095         np->regex = savenstr (linestart, 50);
2096     }
2097   else
2098     np->regex = savenstr (linestart, linelen);
2099
2100   add_node (np, &nodehead);
2101 }
2102
2103 /*
2104  * free_tree ()
2105  *      recurse on left children, iterate on right children.
2106  */
2107 static void
2108 free_tree (np)
2109      register node *np;
2110 {
2111   while (np)
2112     {
2113       register node *node_right = np->right;
2114       free_tree (np->left);
2115       if (np->name != NULL)
2116         free (np->name);
2117       free (np->regex);
2118       free (np);
2119       np = node_right;
2120     }
2121 }
2122
2123 /*
2124  * free_fdesc ()
2125  *      delete a file description
2126  */
2127 static void
2128 free_fdesc (fdp)
2129      register fdesc *fdp;
2130 {
2131   if (fdp->infname != NULL) free (fdp->infname);
2132   if (fdp->infabsname != NULL) free (fdp->infabsname);
2133   if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2134   if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2135   if (fdp->prop != NULL) free (fdp->prop);
2136   free (fdp);
2137 }
2138
2139 /*
2140  * add_node ()
2141  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2142  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2143  *      balancing.
2144  *
2145  *      add_node is the only function allowed to add nodes, so it can
2146  *      maintain state.
2147  */
2148 static void
2149 add_node (np, cur_node_p)
2150      node *np, **cur_node_p;
2151 {
2152   register int dif;
2153   register node *cur_node = *cur_node_p;
2154
2155   if (cur_node == NULL)
2156     {
2157       *cur_node_p = np;
2158       last_node = np;
2159       return;
2160     }
2161
2162   if (!CTAGS)
2163     /* Etags Mode */
2164     {
2165       /* For each file name, tags are in a linked sublist on the right
2166          pointer.  The first tags of different files are a linked list
2167          on the left pointer.  last_node points to the end of the last
2168          used sublist. */
2169       if (last_node != NULL && last_node->fdp == np->fdp)
2170         {
2171           /* Let's use the same sublist as the last added node. */
2172           assert (last_node->right == NULL);
2173           last_node->right = np;
2174           last_node = np;
2175         }
2176       else if (cur_node->fdp == np->fdp)
2177         {
2178           /* Scanning the list we found the head of a sublist which is
2179              good for us.  Let's scan this sublist. */
2180           add_node (np, &cur_node->right);
2181         }
2182       else
2183         /* The head of this sublist is not good for us.  Let's try the
2184            next one. */
2185         add_node (np, &cur_node->left);
2186     } /* if ETAGS mode */
2187
2188   else
2189     {
2190       /* Ctags Mode */
2191       dif = strcmp (np->name, cur_node->name);
2192
2193       /*
2194        * If this tag name matches an existing one, then
2195        * do not add the node, but maybe print a warning.
2196        */
2197       if (!dif)
2198         {
2199           if (np->fdp == cur_node->fdp)
2200             {
2201               if (!no_warnings)
2202                 {
2203                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2204                            np->fdp->infname, lineno, np->name);
2205                   fprintf (stderr, "Second entry ignored\n");
2206                 }
2207             }
2208           else if (!cur_node->been_warned && !no_warnings)
2209             {
2210               fprintf
2211                 (stderr,
2212                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2213                  np->fdp->infname, cur_node->fdp->infname, np->name);
2214               cur_node->been_warned = TRUE;
2215             }
2216           return;
2217         }
2218
2219       /* Actually add the node */
2220       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2221     } /* if CTAGS mode */
2222 }
2223
2224 /*
2225  * invalidate_nodes ()
2226  *      Scan the node tree and invalidate all nodes pointing to the
2227  *      given file description (CTAGS case) or free them (ETAGS case).
2228  */
2229 static void
2230 invalidate_nodes (badfdp, npp)
2231      fdesc *badfdp;
2232      node **npp;
2233 {
2234   node *np = *npp;
2235
2236   if (np == NULL)
2237     return;
2238
2239   if (CTAGS)
2240     {
2241       if (np->left != NULL)
2242         invalidate_nodes (badfdp, &np->left);
2243       if (np->fdp == badfdp)
2244         np->valid = FALSE;
2245       if (np->right != NULL)
2246         invalidate_nodes (badfdp, &np->right);
2247     }
2248   else
2249     {
2250       assert (np->fdp != NULL);
2251       if (np->fdp == badfdp)
2252         {
2253           *npp = np->left;      /* detach the sublist from the list */
2254           np->left = NULL;      /* isolate it */
2255           free_tree (np);       /* free it */
2256           invalidate_nodes (badfdp, npp);
2257         }
2258       else
2259         invalidate_nodes (badfdp, &np->left);
2260     }
2261 }
2262
2263 \f
2264 static int total_size_of_entries __P((node *));
2265 static int number_len __P((long));
2266
2267 /* Length of a non-negative number's decimal representation. */
2268 static int
2269 number_len (num)
2270      long num;
2271 {
2272   int len = 1;
2273   while ((num /= 10) > 0)
2274     len += 1;
2275   return len;
2276 }
2277
2278 /*
2279  * Return total number of characters that put_entries will output for
2280  * the nodes in the linked list at the right of the specified node.
2281  * This count is irrelevant with etags.el since emacs 19.34 at least,
2282  * but is still supplied for backward compatibility.
2283  */
2284 static int
2285 total_size_of_entries (np)
2286      register node *np;
2287 {
2288   register int total = 0;
2289
2290   for (; np != NULL; np = np->right)
2291     if (np->valid)
2292       {
2293         total += strlen (np->regex) + 1;                /* pat\177 */
2294         if (np->name != NULL)
2295           total += strlen (np->name) + 1;               /* name\001 */
2296         total += number_len ((long) np->lno) + 1;       /* lno, */
2297         if (np->cno != invalidcharno)                   /* cno */
2298           total += number_len (np->cno);
2299         total += 1;                                     /* newline */
2300       }
2301
2302   return total;
2303 }
2304
2305 static void
2306 put_entries (np)
2307      register node *np;
2308 {
2309   register char *sp;
2310   static fdesc *fdp = NULL;
2311
2312   if (np == NULL)
2313     return;
2314
2315   /* Output subentries that precede this one */
2316   if (CTAGS)
2317     put_entries (np->left);
2318
2319   /* Output this entry */
2320   if (np->valid)
2321     {
2322       if (!CTAGS)
2323         {
2324           /* Etags mode */
2325           if (fdp != np->fdp)
2326             {
2327               fdp = np->fdp;
2328               fprintf (tagf, "\f\n%s,%d\n",
2329                        fdp->taggedfname, total_size_of_entries (np));
2330               fdp->written = TRUE;
2331             }
2332           fputs (np->regex, tagf);
2333           fputc ('\177', tagf);
2334           if (np->name != NULL)
2335             {
2336               fputs (np->name, tagf);
2337               fputc ('\001', tagf);
2338             }
2339           fprintf (tagf, "%d,", np->lno);
2340           if (np->cno != invalidcharno)
2341             fprintf (tagf, "%ld", np->cno);
2342           fputs ("\n", tagf);
2343         }
2344       else
2345         {
2346           /* Ctags mode */
2347           if (np->name == NULL)
2348             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2349
2350           if (cxref_style)
2351             {
2352               if (vgrind_style)
2353                 fprintf (stdout, "%s %s %d\n",
2354                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2355               else
2356                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2357                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2358             }
2359           else
2360             {
2361               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2362
2363               if (np->is_func)
2364                 {               /* function or #define macro with args */
2365                   putc (searchar, tagf);
2366                   putc ('^', tagf);
2367
2368                   for (sp = np->regex; *sp; sp++)
2369                     {
2370                       if (*sp == '\\' || *sp == searchar)
2371                         putc ('\\', tagf);
2372                       putc (*sp, tagf);
2373                     }
2374                   putc (searchar, tagf);
2375                 }
2376               else
2377                 {               /* anything else; text pattern inadequate */
2378                   fprintf (tagf, "%d", np->lno);
2379                 }
2380               putc ('\n', tagf);
2381             }
2382         }
2383     } /* if this node contains a valid tag */
2384
2385   /* Output subentries that follow this one */
2386   put_entries (np->right);
2387   if (!CTAGS)
2388     put_entries (np->left);
2389 }
2390
2391 \f
2392 /* C extensions. */
2393 #define C_EXT   0x00fff         /* C extensions */
2394 #define C_PLAIN 0x00000         /* C */
2395 #define C_PLPL  0x00001         /* C++ */
2396 #define C_STAR  0x00003         /* C* */
2397 #define C_JAVA  0x00005         /* JAVA */
2398 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2399 #define YACC    0x10000         /* yacc file */
2400
2401 /*
2402  * The C symbol tables.
2403  */
2404 enum sym_type
2405 {
2406   st_none,
2407   st_C_objprot, st_C_objimpl, st_C_objend,
2408   st_C_gnumacro,
2409   st_C_ignore, st_C_attribute,
2410   st_C_javastruct,
2411   st_C_operator,
2412   st_C_class, st_C_template,
2413   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2414 };
2415
2416 static unsigned int hash __P((const char *, unsigned int));
2417 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2418 static enum sym_type C_symtype __P((char *, int, int));
2419
2420 /* Feed stuff between (but not including) %[ and %] lines to:
2421      gperf -m 5
2422 %[
2423 %compare-strncmp
2424 %enum
2425 %struct-type
2426 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2427 %%
2428 if,             0,                      st_C_ignore
2429 for,            0,                      st_C_ignore
2430 while,          0,                      st_C_ignore
2431 switch,         0,                      st_C_ignore
2432 return,         0,                      st_C_ignore
2433 __attribute__,  0,                      st_C_attribute
2434 @interface,     0,                      st_C_objprot
2435 @protocol,      0,                      st_C_objprot
2436 @implementation,0,                      st_C_objimpl
2437 @end,           0,                      st_C_objend
2438 import,         (C_JAVA & !C_PLPL),     st_C_ignore
2439 package,        (C_JAVA & !C_PLPL),     st_C_ignore
2440 friend,         C_PLPL,                 st_C_ignore
2441 extends,        (C_JAVA & !C_PLPL),     st_C_javastruct
2442 implements,     (C_JAVA & !C_PLPL),     st_C_javastruct
2443 interface,      (C_JAVA & !C_PLPL),     st_C_struct
2444 class,          0,                      st_C_class
2445 namespace,      C_PLPL,                 st_C_struct
2446 domain,         C_STAR,                 st_C_struct
2447 union,          0,                      st_C_struct
2448 struct,         0,                      st_C_struct
2449 extern,         0,                      st_C_extern
2450 enum,           0,                      st_C_enum
2451 typedef,        0,                      st_C_typedef
2452 define,         0,                      st_C_define
2453 operator,       C_PLPL,                 st_C_operator
2454 template,       0,                      st_C_template
2455 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2456 DEFUN,          0,                      st_C_gnumacro
2457 SYSCALL,        0,                      st_C_gnumacro
2458 ENTRY,          0,                      st_C_gnumacro
2459 PSEUDO,         0,                      st_C_gnumacro
2460 # These are defined inside C functions, so currently they are not met.
2461 # EXFUN used in glibc, DEFVAR_* in emacs.
2462 #EXFUN,         0,                      st_C_gnumacro
2463 #DEFVAR_,       0,                      st_C_gnumacro
2464 %]
2465 and replace lines between %< and %> with its output, then:
2466  - remove the #if characterset check
2467  - make in_word_set static and not inline. */
2468 /*%<*/
2469 /* C code produced by gperf version 3.0.1 */
2470 /* Command-line: gperf -m 5  */
2471 /* Computed positions: -k'1-2' */
2472
2473 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2474 /* maximum key range = 31, duplicates = 0 */
2475
2476 #ifdef __GNUC__
2477 __inline
2478 #else
2479 #ifdef __cplusplus
2480 inline
2481 #endif
2482 #endif
2483 static unsigned int
2484 hash (str, len)
2485      register const char *str;
2486      register unsigned int len;
2487 {
2488   static unsigned char asso_values[] =
2489     {
2490       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2491       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2492       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2493       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2494       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2495       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2496       34, 34, 34, 34,  1, 34, 34, 34, 14, 14,
2497       34, 34, 34, 34, 34, 34, 34, 34, 13, 34,
2498       13, 34, 34, 12, 34, 34, 34, 34, 34, 11,
2499       34, 34, 34, 34, 34,  8, 34, 11, 34, 12,
2500       11,  0,  1, 34,  7,  0, 34, 34, 11,  9,
2501        0,  4,  0, 34,  7,  4, 14, 21, 34, 15,
2502        0,  2, 34, 34, 34, 34, 34, 34, 34, 34,
2503       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2504       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2505       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2506       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2507       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2508       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2509       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2510       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2511       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2512       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2513       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2514       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2515       34, 34, 34, 34, 34, 34
2516     };
2517   return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]];
2518 }
2519
2520 static struct C_stab_entry *
2521 in_word_set (str, len)
2522      register const char *str;
2523      register unsigned int len;
2524 {
2525   enum
2526     {
2527       TOTAL_KEYWORDS = 31,
2528       MIN_WORD_LENGTH = 2,
2529       MAX_WORD_LENGTH = 15,
2530       MIN_HASH_VALUE = 3,
2531       MAX_HASH_VALUE = 33
2532     };
2533
2534   static struct C_stab_entry wordlist[] =
2535     {
2536       {""}, {""}, {""},
2537       {"if",            0,                      st_C_ignore},
2538       {"enum",          0,                      st_C_enum},
2539       {"@end",          0,                      st_C_objend},
2540       {"extern",                0,                      st_C_extern},
2541       {"extends",       (C_JAVA & !C_PLPL),     st_C_javastruct},
2542       {"for",           0,                      st_C_ignore},
2543       {"interface",     (C_JAVA & !C_PLPL),     st_C_struct},
2544       {"@protocol",     0,                      st_C_objprot},
2545       {"@interface",    0,                      st_C_objprot},
2546       {"operator",      C_PLPL,                 st_C_operator},
2547       {"return",                0,                      st_C_ignore},
2548       {"friend",                C_PLPL,                 st_C_ignore},
2549       {"import",                (C_JAVA & !C_PLPL),     st_C_ignore},
2550       {"@implementation",0,                     st_C_objimpl},
2551       {"define",                0,                      st_C_define},
2552       {"package",       (C_JAVA & !C_PLPL),     st_C_ignore},
2553       {"implements",    (C_JAVA & !C_PLPL),     st_C_javastruct},
2554       {"namespace",     C_PLPL,                 st_C_struct},
2555       {"domain",                C_STAR,                 st_C_struct},
2556       {"template",      0,                      st_C_template},
2557       {"typedef",       0,                      st_C_typedef},
2558       {"struct",                0,                      st_C_struct},
2559       {"switch",                0,                      st_C_ignore},
2560       {"union",         0,                      st_C_struct},
2561       {"while",         0,                      st_C_ignore},
2562       {"class",         0,                      st_C_class},
2563       {"__attribute__", 0,                      st_C_attribute},
2564       {"SYSCALL",       0,                      st_C_gnumacro},
2565       {"PSEUDO",                0,                      st_C_gnumacro},
2566       {"ENTRY",         0,                      st_C_gnumacro},
2567       {"DEFUN",         0,                      st_C_gnumacro}
2568     };
2569
2570   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2571     {
2572       register int key = hash (str, len);
2573
2574       if (key <= MAX_HASH_VALUE && key >= 0)
2575         {
2576           register const char *s = wordlist[key].name;
2577
2578           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2579             return &wordlist[key];
2580         }
2581     }
2582   return 0;
2583 }
2584 /*%>*/
2585
2586 static enum sym_type
2587 C_symtype (str, len, c_ext)
2588      char *str;
2589      int len;
2590      int c_ext;
2591 {
2592   register struct C_stab_entry *se = in_word_set (str, len);
2593
2594   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2595     return st_none;
2596   return se->type;
2597 }
2598
2599 \f
2600 /*
2601  * Ignoring __attribute__ ((list))
2602  */
2603 static bool inattribute;        /* looking at an __attribute__ construct */
2604
2605 /*
2606  * C functions and variables are recognized using a simple
2607  * finite automaton.  fvdef is its state variable.
2608  */
2609 static enum
2610 {
2611   fvnone,                       /* nothing seen */
2612   fdefunkey,                    /* Emacs DEFUN keyword seen */
2613   fdefunname,                   /* Emacs DEFUN name seen */
2614   foperator,                    /* func: operator keyword seen (cplpl) */
2615   fvnameseen,                   /* function or variable name seen */
2616   fstartlist,                   /* func: just after open parenthesis */
2617   finlist,                      /* func: in parameter list */
2618   flistseen,                    /* func: after parameter list */
2619   fignore,                      /* func: before open brace */
2620   vignore                       /* var-like: ignore until ';' */
2621 } fvdef;
2622
2623 static bool fvextern;           /* func or var: extern keyword seen; */
2624
2625 /*
2626  * typedefs are recognized using a simple finite automaton.
2627  * typdef is its state variable.
2628  */
2629 static enum
2630 {
2631   tnone,                        /* nothing seen */
2632   tkeyseen,                     /* typedef keyword seen */
2633   ttypeseen,                    /* defined type seen */
2634   tinbody,                      /* inside typedef body */
2635   tend,                         /* just before typedef tag */
2636   tignore                       /* junk after typedef tag */
2637 } typdef;
2638
2639 /*
2640  * struct-like structures (enum, struct and union) are recognized
2641  * using another simple finite automaton.  `structdef' is its state
2642  * variable.
2643  */
2644 static enum
2645 {
2646   snone,                        /* nothing seen yet,
2647                                    or in struct body if bracelev > 0 */
2648   skeyseen,                     /* struct-like keyword seen */
2649   stagseen,                     /* struct-like tag seen */
2650   scolonseen                    /* colon seen after struct-like tag */
2651 } structdef;
2652
2653 /*
2654  * When objdef is different from onone, objtag is the name of the class.
2655  */
2656 static char *objtag = "<uninited>";
2657
2658 /*
2659  * Yet another little state machine to deal with preprocessor lines.
2660  */
2661 static enum
2662 {
2663   dnone,                        /* nothing seen */
2664   dsharpseen,                   /* '#' seen as first char on line */
2665   ddefineseen,                  /* '#' and 'define' seen */
2666   dignorerest                   /* ignore rest of line */
2667 } definedef;
2668
2669 /*
2670  * State machine for Objective C protocols and implementations.
2671  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2672  */
2673 static enum
2674 {
2675   onone,                        /* nothing seen */
2676   oprotocol,                    /* @interface or @protocol seen */
2677   oimplementation,              /* @implementations seen */
2678   otagseen,                     /* class name seen */
2679   oparenseen,                   /* parenthesis before category seen */
2680   ocatseen,                     /* category name seen */
2681   oinbody,                      /* in @implementation body */
2682   omethodsign,                  /* in @implementation body, after +/- */
2683   omethodtag,                   /* after method name */
2684   omethodcolon,                 /* after method colon */
2685   omethodparm,                  /* after method parameter */
2686   oignore                       /* wait for @end */
2687 } objdef;
2688
2689
2690 /*
2691  * Use this structure to keep info about the token read, and how it
2692  * should be tagged.  Used by the make_C_tag function to build a tag.
2693  */
2694 static struct tok
2695 {
2696   char *line;                   /* string containing the token */
2697   int offset;                   /* where the token starts in LINE */
2698   int length;                   /* token length */
2699   /*
2700     The previous members can be used to pass strings around for generic
2701     purposes.  The following ones specifically refer to creating tags.  In this
2702     case the token contained here is the pattern that will be used to create a
2703     tag.
2704   */
2705   bool valid;                   /* do not create a tag; the token should be
2706                                    invalidated whenever a state machine is
2707                                    reset prematurely */
2708   bool named;                   /* create a named tag */
2709   int lineno;                   /* source line number of tag */
2710   long linepos;                 /* source char number of tag */
2711 } token;                        /* latest token read */
2712
2713 /*
2714  * Variables and functions for dealing with nested structures.
2715  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2716  */
2717 static void pushclass_above __P((int, char *, int));
2718 static void popclass_above __P((int));
2719 static void write_classname __P((linebuffer *, char *qualifier));
2720
2721 static struct {
2722   char **cname;                 /* nested class names */
2723   int *bracelev;                /* nested class brace level */
2724   int nl;                       /* class nesting level (elements used) */
2725   int size;                     /* length of the array */
2726 } cstack;                       /* stack for nested declaration tags */
2727 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2728 #define nestlev         (cstack.nl)
2729 /* After struct keyword or in struct body, not inside a nested function. */
2730 #define instruct        (structdef == snone && nestlev > 0                      \
2731                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2732
2733 static void
2734 pushclass_above (bracelev, str, len)
2735      int bracelev;
2736      char *str;
2737      int len;
2738 {
2739   int nl;
2740
2741   popclass_above (bracelev);
2742   nl = cstack.nl;
2743   if (nl >= cstack.size)
2744     {
2745       int size = cstack.size *= 2;
2746       xrnew (cstack.cname, size, char *);
2747       xrnew (cstack.bracelev, size, int);
2748     }
2749   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2750   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2751   cstack.bracelev[nl] = bracelev;
2752   cstack.nl = nl + 1;
2753 }
2754
2755 static void
2756 popclass_above (bracelev)
2757      int bracelev;
2758 {
2759   int nl;
2760
2761   for (nl = cstack.nl - 1;
2762        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2763        nl--)
2764     {
2765       if (cstack.cname[nl] != NULL)
2766         free (cstack.cname[nl]);
2767       cstack.nl = nl;
2768     }
2769 }
2770
2771 static void
2772 write_classname (cn, qualifier)
2773      linebuffer *cn;
2774      char *qualifier;
2775 {
2776   int i, len;
2777   int qlen = strlen (qualifier);
2778
2779   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2780     {
2781       len = 0;
2782       cn->len = 0;
2783       cn->buffer[0] = '\0';
2784     }
2785   else
2786     {
2787       len = strlen (cstack.cname[0]);
2788       linebuffer_setlen (cn, len);
2789       strcpy (cn->buffer, cstack.cname[0]);
2790     }
2791   for (i = 1; i < cstack.nl; i++)
2792     {
2793       char *s;
2794       int slen;
2795
2796       s = cstack.cname[i];
2797       if (s == NULL)
2798         continue;
2799       slen = strlen (s);
2800       len += slen + qlen;
2801       linebuffer_setlen (cn, len);
2802       strncat (cn->buffer, qualifier, qlen);
2803       strncat (cn->buffer, s, slen);
2804     }
2805 }
2806
2807 \f
2808 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2809 static void make_C_tag __P((bool));
2810
2811 /*
2812  * consider_token ()
2813  *      checks to see if the current token is at the start of a
2814  *      function or variable, or corresponds to a typedef, or
2815  *      is a struct/union/enum tag, or #define, or an enum constant.
2816  *
2817  *      *IS_FUNC gets TRUE iff the token is a function or #define macro
2818  *      with args.  C_EXTP points to which language we are looking at.
2819  *
2820  * Globals
2821  *      fvdef                   IN OUT
2822  *      structdef               IN OUT
2823  *      definedef               IN OUT
2824  *      typdef                  IN OUT
2825  *      objdef                  IN OUT
2826  */
2827
2828 static bool
2829 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2830      register char *str;        /* IN: token pointer */
2831      register int len;          /* IN: token length */
2832      register int c;            /* IN: first char after the token */
2833      int *c_extp;               /* IN, OUT: C extensions mask */
2834      int bracelev;              /* IN: brace level */
2835      int parlev;                /* IN: parenthesis level */
2836      bool *is_func_or_var;      /* OUT: function or variable found */
2837 {
2838   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2839      structtype is the type of the preceding struct-like keyword, and
2840      structbracelev is the brace level where it has been seen. */
2841   static enum sym_type structtype;
2842   static int structbracelev;
2843   static enum sym_type toktype;
2844
2845
2846   toktype = C_symtype (str, len, *c_extp);
2847
2848   /*
2849    * Skip __attribute__
2850    */
2851   if (toktype == st_C_attribute)
2852     {
2853       inattribute = TRUE;
2854       return FALSE;
2855      }
2856
2857    /*
2858     * Advance the definedef state machine.
2859     */
2860    switch (definedef)
2861      {
2862      case dnone:
2863        /* We're not on a preprocessor line. */
2864        if (toktype == st_C_gnumacro)
2865          {
2866            fvdef = fdefunkey;
2867            return FALSE;
2868          }
2869        break;
2870      case dsharpseen:
2871        if (toktype == st_C_define)
2872          {
2873            definedef = ddefineseen;
2874          }
2875        else
2876          {
2877            definedef = dignorerest;
2878          }
2879        return FALSE;
2880      case ddefineseen:
2881        /*
2882         * Make a tag for any macro, unless it is a constant
2883         * and constantypedefs is FALSE.
2884         */
2885        definedef = dignorerest;
2886        *is_func_or_var = (c == '(');
2887        if (!*is_func_or_var && !constantypedefs)
2888          return FALSE;
2889        else
2890          return TRUE;
2891      case dignorerest:
2892        return FALSE;
2893      default:
2894        error ("internal error: definedef value.", (char *)NULL);
2895      }
2896
2897    /*
2898     * Now typedefs
2899     */
2900    switch (typdef)
2901      {
2902      case tnone:
2903        if (toktype == st_C_typedef)
2904          {
2905            if (typedefs)
2906              typdef = tkeyseen;
2907            fvextern = FALSE;
2908            fvdef = fvnone;
2909            return FALSE;
2910          }
2911        break;
2912      case tkeyseen:
2913        switch (toktype)
2914          {
2915          case st_none:
2916          case st_C_class:
2917          case st_C_struct:
2918          case st_C_enum:
2919            typdef = ttypeseen;
2920          }
2921        break;
2922      case ttypeseen:
2923        if (structdef == snone && fvdef == fvnone)
2924          {
2925            fvdef = fvnameseen;
2926            return TRUE;
2927          }
2928        break;
2929      case tend:
2930        switch (toktype)
2931          {
2932          case st_C_class:
2933          case st_C_struct:
2934          case st_C_enum:
2935            return FALSE;
2936          }
2937        return TRUE;
2938      }
2939
2940    /*
2941     * This structdef business is NOT invoked when we are ctags and the
2942     * file is plain C.  This is because a struct tag may have the same
2943     * name as another tag, and this loses with ctags.
2944     */
2945    switch (toktype)
2946      {
2947      case st_C_javastruct:
2948        if (structdef == stagseen)
2949          structdef = scolonseen;
2950        return FALSE;
2951      case st_C_template:
2952      case st_C_class:
2953        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2954            && bracelev == 0
2955            && definedef == dnone && structdef == snone
2956            && typdef == tnone && fvdef == fvnone)
2957          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2958        if (toktype == st_C_template)
2959          break;
2960        /* FALLTHRU */
2961      case st_C_struct:
2962      case st_C_enum:
2963        if (parlev == 0
2964            && fvdef != vignore
2965            && (typdef == tkeyseen
2966                || (typedefs_or_cplusplus && structdef == snone)))
2967          {
2968            structdef = skeyseen;
2969            structtype = toktype;
2970            structbracelev = bracelev;
2971            if (fvdef == fvnameseen)
2972              fvdef = fvnone;
2973          }
2974        return FALSE;
2975      }
2976
2977    if (structdef == skeyseen)
2978      {
2979        structdef = stagseen;
2980        return TRUE;
2981      }
2982
2983    if (typdef != tnone)
2984      definedef = dnone;
2985
2986    /* Detect Objective C constructs. */
2987    switch (objdef)
2988      {
2989      case onone:
2990        switch (toktype)
2991          {
2992          case st_C_objprot:
2993            objdef = oprotocol;
2994            return FALSE;
2995          case st_C_objimpl:
2996            objdef = oimplementation;
2997            return FALSE;
2998          }
2999        break;
3000      case oimplementation:
3001        /* Save the class tag for functions or variables defined inside. */
3002        objtag = savenstr (str, len);
3003        objdef = oinbody;
3004        return FALSE;
3005      case oprotocol:
3006        /* Save the class tag for categories. */
3007        objtag = savenstr (str, len);
3008        objdef = otagseen;
3009        *is_func_or_var = TRUE;
3010        return TRUE;
3011      case oparenseen:
3012        objdef = ocatseen;
3013        *is_func_or_var = TRUE;
3014        return TRUE;
3015      case oinbody:
3016        break;
3017      case omethodsign:
3018        if (parlev == 0)
3019          {
3020            fvdef = fvnone;
3021            objdef = omethodtag;
3022            linebuffer_setlen (&token_name, len);
3023            strncpy (token_name.buffer, str, len);
3024            token_name.buffer[len] = '\0';
3025            return TRUE;
3026          }
3027        return FALSE;
3028      case omethodcolon:
3029        if (parlev == 0)
3030          objdef = omethodparm;
3031        return FALSE;
3032      case omethodparm:
3033        if (parlev == 0)
3034          {
3035            fvdef = fvnone;
3036            objdef = omethodtag;
3037            linebuffer_setlen (&token_name, token_name.len + len);
3038            strncat (token_name.buffer, str, len);
3039            return TRUE;
3040          }
3041        return FALSE;
3042      case oignore:
3043        if (toktype == st_C_objend)
3044          {
3045            /* Memory leakage here: the string pointed by objtag is
3046               never released, because many tests would be needed to
3047               avoid breaking on incorrect input code.  The amount of
3048               memory leaked here is the sum of the lengths of the
3049               class tags.
3050            free (objtag); */
3051            objdef = onone;
3052          }
3053        return FALSE;
3054      }
3055
3056    /* A function, variable or enum constant? */
3057    switch (toktype)
3058      {
3059      case st_C_extern:
3060        fvextern = TRUE;
3061        switch  (fvdef)
3062          {
3063          case finlist:
3064          case flistseen:
3065          case fignore:
3066          case vignore:
3067            break;
3068          default:
3069            fvdef = fvnone;
3070          }
3071        return FALSE;
3072      case st_C_ignore:
3073        fvextern = FALSE;
3074        fvdef = vignore;
3075        return FALSE;
3076      case st_C_operator:
3077        fvdef = foperator;
3078        *is_func_or_var = TRUE;
3079        return TRUE;
3080      case st_none:
3081        if (constantypedefs
3082            && structdef == snone
3083            && structtype == st_C_enum && bracelev > structbracelev)
3084          return TRUE;           /* enum constant */
3085        switch (fvdef)
3086          {
3087          case fdefunkey:
3088            if (bracelev > 0)
3089              break;
3090            fvdef = fdefunname;  /* GNU macro */
3091            *is_func_or_var = TRUE;
3092            return TRUE;
3093          case fvnone:
3094            switch (typdef)
3095              {
3096              case ttypeseen:
3097                return FALSE;
3098              case tnone:
3099                if ((strneq (str, "asm", 3) && endtoken (str[3]))
3100                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3101                  {
3102                    fvdef = vignore;
3103                    return FALSE;
3104                  }
3105                break;
3106              }
3107           /* FALLTHRU */
3108           case fvnameseen:
3109           if (len >= 10 && strneq (str+len-10, "::operator", 10))
3110             {
3111               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3112                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3113               fvdef = foperator;
3114               *is_func_or_var = TRUE;
3115               return TRUE;
3116             }
3117           if (bracelev > 0 && !instruct)
3118             break;
3119           fvdef = fvnameseen;   /* function or variable */
3120           *is_func_or_var = TRUE;
3121           return TRUE;
3122         }
3123       break;
3124     }
3125
3126   return FALSE;
3127 }
3128
3129 \f
3130 /*
3131  * C_entries often keeps pointers to tokens or lines which are older than
3132  * the line currently read.  By keeping two line buffers, and switching
3133  * them at end of line, it is possible to use those pointers.
3134  */
3135 static struct
3136 {
3137   long linepos;
3138   linebuffer lb;
3139 } lbs[2];
3140
3141 #define current_lb_is_new (newndx == curndx)
3142 #define switch_line_buffers() (curndx = 1 - curndx)
3143
3144 #define curlb (lbs[curndx].lb)
3145 #define newlb (lbs[newndx].lb)
3146 #define curlinepos (lbs[curndx].linepos)
3147 #define newlinepos (lbs[newndx].linepos)
3148
3149 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3150 #define cplpl (c_ext & C_PLPL)
3151 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3152
3153 #define CNL_SAVE_DEFINEDEF()                                            \
3154 do {                                                                    \
3155   curlinepos = charno;                                                  \
3156   readline (&curlb, inf);                                               \
3157   lp = curlb.buffer;                                                    \
3158   quotednl = FALSE;                                                     \
3159   newndx = curndx;                                                      \
3160 } while (0)
3161
3162 #define CNL()                                                           \
3163 do {                                                                    \
3164   CNL_SAVE_DEFINEDEF();                                                 \
3165   if (savetoken.valid)                                                  \
3166     {                                                                   \
3167       token = savetoken;                                                \
3168       savetoken.valid = FALSE;                                          \
3169     }                                                                   \
3170   definedef = dnone;                                                    \
3171 } while (0)
3172
3173
3174 static void
3175 make_C_tag (isfun)
3176      bool isfun;
3177 {
3178   /* This function should never be called when token.valid is FALSE, but
3179      we must protect against invalid input or internal errors. */
3180   if (!DEBUG && !token.valid)
3181     return;
3182
3183   if (token.valid)
3184     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3185               token.offset+token.length+1, token.lineno, token.linepos);
3186   else                          /* this case is optimised away if !DEBUG */
3187     make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3188               token_name.len + 17, isfun, token.line,
3189               token.offset+token.length+1, token.lineno, token.linepos);
3190
3191   token.valid = FALSE;
3192 }
3193
3194
3195 /*
3196  * C_entries ()
3197  *      This routine finds functions, variables, typedefs,
3198  *      #define's, enum constants and struct/union/enum definitions in
3199  *      C syntax and adds them to the list.
3200  */
3201 static void
3202 C_entries (c_ext, inf)
3203      int c_ext;                 /* extension of C */
3204      FILE *inf;                 /* input file */
3205 {
3206   register char c;              /* latest char read; '\0' for end of line */
3207   register char *lp;            /* pointer one beyond the character `c' */
3208   int curndx, newndx;           /* indices for current and new lb */
3209   register int tokoff;          /* offset in line of start of current token */
3210   register int toklen;          /* length of current token */
3211   char *qualifier;              /* string used to qualify names */
3212   int qlen;                     /* length of qualifier */
3213   int bracelev;                 /* current brace level */
3214   int bracketlev;               /* current bracket level */
3215   int parlev;                   /* current parenthesis level */
3216   int attrparlev;               /* __attribute__ parenthesis level */
3217   int templatelev;              /* current template level */
3218   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3219   bool incomm, inquote, inchar, quotednl, midtoken;
3220   bool yacc_rules;              /* in the rules part of a yacc file */
3221   struct tok savetoken;         /* token saved during preprocessor handling */
3222
3223
3224   linebuffer_init (&lbs[0].lb);
3225   linebuffer_init (&lbs[1].lb);
3226   if (cstack.size == 0)
3227     {
3228       cstack.size = (DEBUG) ? 1 : 4;
3229       cstack.nl = 0;
3230       cstack.cname = xnew (cstack.size, char *);
3231       cstack.bracelev = xnew (cstack.size, int);
3232     }
3233
3234   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3235   curndx = newndx = 0;
3236   lp = curlb.buffer;
3237   *lp = 0;
3238
3239   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3240   structdef = snone; definedef = dnone; objdef = onone;
3241   yacc_rules = FALSE;
3242   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3243   token.valid = savetoken.valid = FALSE;
3244   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3245   if (cjava)
3246     { qualifier = "."; qlen = 1; }
3247   else
3248     { qualifier = "::"; qlen = 2; }
3249
3250
3251   while (!feof (inf))
3252     {
3253       c = *lp++;
3254       if (c == '\\')
3255         {
3256           /* If we are at the end of the line, the next character is a
3257              '\0'; do not skip it, because it is what tells us
3258              to read the next line.  */
3259           if (*lp == '\0')
3260             {
3261               quotednl = TRUE;
3262               continue;
3263             }
3264           lp++;
3265           c = ' ';
3266         }
3267       else if (incomm)
3268         {
3269           switch (c)
3270             {
3271             case '*':
3272               if (*lp == '/')
3273                 {
3274                   c = *lp++;
3275                   incomm = FALSE;
3276                 }
3277               break;
3278             case '\0':
3279               /* Newlines inside comments do not end macro definitions in
3280                  traditional cpp. */
3281               CNL_SAVE_DEFINEDEF ();
3282               break;
3283             }
3284           continue;
3285         }
3286       else if (inquote)
3287         {
3288           switch (c)
3289             {
3290             case '"':
3291               inquote = FALSE;
3292               break;
3293             case '\0':
3294               /* Newlines inside strings do not end macro definitions
3295                  in traditional cpp, even though compilers don't
3296                  usually accept them. */
3297               CNL_SAVE_DEFINEDEF ();
3298               break;
3299             }
3300           continue;
3301         }
3302       else if (inchar)
3303         {
3304           switch (c)
3305             {
3306             case '\0':
3307               /* Hmmm, something went wrong. */
3308               CNL ();
3309               /* FALLTHRU */
3310             case '\'':
3311               inchar = FALSE;
3312               break;
3313             }
3314           continue;
3315         }
3316       else if (bracketlev > 0)
3317         {
3318           switch (c)
3319             {
3320             case ']':
3321               if (--bracketlev > 0)
3322                 continue;
3323               break;
3324             case '\0':
3325               CNL_SAVE_DEFINEDEF ();
3326               break;
3327             }
3328           continue;
3329         }
3330       else switch (c)
3331         {
3332         case '"':
3333           inquote = TRUE;
3334           if (inattribute)
3335             break;
3336           switch (fvdef)
3337             {
3338             case fdefunkey:
3339             case fstartlist:
3340             case finlist:
3341             case fignore:
3342             case vignore:
3343               break;
3344             default:
3345               fvextern = FALSE;
3346               fvdef = fvnone;
3347             }
3348           continue;
3349         case '\'':
3350           inchar = TRUE;
3351           if (inattribute)
3352             break;
3353           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3354             {
3355               fvextern = FALSE;
3356               fvdef = fvnone;
3357             }
3358           continue;
3359         case '/':
3360           if (*lp == '*')
3361             {
3362               lp++;
3363               incomm = TRUE;
3364               continue;
3365             }
3366           else if (/* cplpl && */ *lp == '/')
3367             {
3368               c = '\0';
3369               break;
3370             }
3371           else
3372             break;
3373         case '%':
3374           if ((c_ext & YACC) && *lp == '%')
3375             {
3376               /* Entering or exiting rules section in yacc file. */
3377               lp++;
3378               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3379               typdef = tnone; structdef = snone;
3380               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3381               bracelev = 0;
3382               yacc_rules = !yacc_rules;
3383               continue;
3384             }
3385           else
3386             break;
3387         case '#':
3388           if (definedef == dnone)
3389             {
3390               char *cp;
3391               bool cpptoken = TRUE;
3392
3393               /* Look back on this line.  If all blanks, or nonblanks
3394                  followed by an end of comment, this is a preprocessor
3395                  token. */
3396               for (cp = newlb.buffer; cp < lp-1; cp++)
3397                 if (!iswhite (*cp))
3398                   {
3399                     if (*cp == '*' && *(cp+1) == '/')
3400                       {
3401                         cp++;
3402                         cpptoken = TRUE;
3403                       }
3404                     else
3405                       cpptoken = FALSE;
3406                   }
3407               if (cpptoken)
3408                 definedef = dsharpseen;
3409             } /* if (definedef == dnone) */
3410           continue;
3411         case '[':
3412           bracketlev++;
3413             continue;
3414         } /* switch (c) */
3415
3416
3417       /* Consider token only if some involved conditions are satisfied. */
3418       if (typdef != tignore
3419           && definedef != dignorerest
3420           && fvdef != finlist
3421           && templatelev == 0
3422           && (definedef != dnone
3423               || structdef != scolonseen)
3424           && !inattribute)
3425         {
3426           if (midtoken)
3427             {
3428               if (endtoken (c))
3429                 {
3430                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3431                     /* This handles :: in the middle,
3432                        but not at the beginning of an identifier.
3433                        Also, space-separated :: is not recognised. */
3434                     {
3435                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3436                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3437                       lp += 2;
3438                       toklen += 2;
3439                       c = lp[-1];
3440                       goto still_in_token;
3441                     }
3442                   else
3443                     {
3444                       bool funorvar = FALSE;
3445
3446                       if (yacc_rules
3447                           || consider_token (newlb.buffer + tokoff, toklen, c,
3448                                              &c_ext, bracelev, parlev,
3449                                              &funorvar))
3450                         {
3451                           if (fvdef == foperator)
3452                             {
3453                               char *oldlp = lp;
3454                               lp = skip_spaces (lp-1);
3455                               if (*lp != '\0')
3456                                 lp += 1;
3457                               while (*lp != '\0'
3458                                      && !iswhite (*lp) && *lp != '(')
3459                                 lp += 1;
3460                               c = *lp++;
3461                               toklen += lp - oldlp;
3462                             }
3463                           token.named = FALSE;
3464                           if (!plainc
3465                               && nestlev > 0 && definedef == dnone)
3466                             /* in struct body */
3467                             {
3468                               write_classname (&token_name, qualifier);
3469                               linebuffer_setlen (&token_name,
3470                                                  token_name.len+qlen+toklen);
3471                               strcat (token_name.buffer, qualifier);
3472                               strncat (token_name.buffer,
3473                                        newlb.buffer + tokoff, toklen);
3474                               token.named = TRUE;
3475                             }
3476                           else if (objdef == ocatseen)
3477                             /* Objective C category */
3478                             {
3479                               int len = strlen (objtag) + 2 + toklen;
3480                               linebuffer_setlen (&token_name, len);
3481                               strcpy (token_name.buffer, objtag);
3482                               strcat (token_name.buffer, "(");
3483                               strncat (token_name.buffer,
3484                                        newlb.buffer + tokoff, toklen);
3485                               strcat (token_name.buffer, ")");
3486                               token.named = TRUE;
3487                             }
3488                           else if (objdef == omethodtag
3489                                    || objdef == omethodparm)
3490                             /* Objective C method */
3491                             {
3492                               token.named = TRUE;
3493                             }
3494                           else if (fvdef == fdefunname)
3495                             /* GNU DEFUN and similar macros */
3496                             {
3497                               bool defun = (newlb.buffer[tokoff] == 'F');
3498                               int off = tokoff;
3499                               int len = toklen;
3500
3501                               /* Rewrite the tag so that emacs lisp DEFUNs
3502                                  can be found by their elisp name */
3503                               if (defun)
3504                                 {
3505                                   off += 1;
3506                                   len -= 1;
3507                                 }
3508                               len = toklen;
3509                               linebuffer_setlen (&token_name, len);
3510                               strncpy (token_name.buffer,
3511                                        newlb.buffer + off, len);
3512                               token_name.buffer[len] = '\0';
3513                               if (defun)
3514                                 while (--len >= 0)
3515                                   if (token_name.buffer[len] == '_')
3516                                     token_name.buffer[len] = '-';
3517                               token.named = defun;
3518                             }
3519                           else
3520                             {
3521                               linebuffer_setlen (&token_name, toklen);
3522                               strncpy (token_name.buffer,
3523                                        newlb.buffer + tokoff, toklen);
3524                               token_name.buffer[toklen] = '\0';
3525                               /* Name macros and members. */
3526                               token.named = (structdef == stagseen
3527                                              || typdef == ttypeseen
3528                                              || typdef == tend
3529                                              || (funorvar
3530                                                  && definedef == dignorerest)
3531                                              || (funorvar
3532                                                  && definedef == dnone
3533                                                  && structdef == snone
3534                                                  && bracelev > 0));
3535                             }
3536                           token.lineno = lineno;
3537                           token.offset = tokoff;
3538                           token.length = toklen;
3539                           token.line = newlb.buffer;
3540                           token.linepos = newlinepos;
3541                           token.valid = TRUE;
3542
3543                           if (definedef == dnone
3544                               && (fvdef == fvnameseen
3545                                   || fvdef == foperator
3546                                   || structdef == stagseen
3547                                   || typdef == tend
3548                                   || typdef == ttypeseen
3549                                   || objdef != onone))
3550                             {
3551                               if (current_lb_is_new)
3552                                 switch_line_buffers ();
3553                             }
3554                           else if (definedef != dnone
3555                                    || fvdef == fdefunname
3556                                    || instruct)
3557                             make_C_tag (funorvar);
3558                         }
3559                       else /* not yacc and consider_token failed */
3560                         {
3561                           if (inattribute && fvdef == fignore)
3562                             {
3563                               /* We have just met __attribute__ after a
3564                                  function parameter list: do not tag the
3565                                  function again. */
3566                               fvdef = fvnone;
3567                             }
3568                         }
3569                       midtoken = FALSE;
3570                     }
3571                 } /* if (endtoken (c)) */
3572               else if (intoken (c))
3573                 still_in_token:
3574                 {
3575                   toklen++;
3576                   continue;
3577                 }
3578             } /* if (midtoken) */
3579           else if (begtoken (c))
3580             {
3581               switch (definedef)
3582                 {
3583                 case dnone:
3584                   switch (fvdef)
3585                     {
3586                     case fstartlist:
3587                       /* This prevents tagging fb in
3588                          void (__attribute__((noreturn)) *fb) (void);
3589                          Fixing this is not easy and not very important. */
3590                       fvdef = finlist;
3591                       continue;
3592                     case flistseen:
3593                       if (plainc || declarations)
3594                         {
3595                           make_C_tag (TRUE); /* a function */
3596                           fvdef = fignore;
3597                         }
3598                       break;
3599                     }
3600                   if (structdef == stagseen && !cjava)
3601                     {
3602                       popclass_above (bracelev);
3603                       structdef = snone;
3604                     }
3605                   break;
3606                 case dsharpseen:
3607                   savetoken = token;
3608                   break;
3609                 }
3610               if (!yacc_rules || lp == newlb.buffer + 1)
3611                 {
3612                   tokoff = lp - 1 - newlb.buffer;
3613                   toklen = 1;
3614                   midtoken = TRUE;
3615                 }
3616               continue;
3617             } /* if (begtoken) */
3618         } /* if must look at token */
3619
3620
3621       /* Detect end of line, colon, comma, semicolon and various braces
3622          after having handled a token.*/
3623       switch (c)
3624         {
3625         case ':':
3626           if (inattribute)
3627             break;
3628           if (yacc_rules && token.offset == 0 && token.valid)
3629             {
3630               make_C_tag (FALSE); /* a yacc function */
3631               break;
3632             }
3633           if (definedef != dnone)
3634             break;
3635           switch (objdef)
3636             {
3637             case  otagseen:
3638               objdef = oignore;
3639               make_C_tag (TRUE); /* an Objective C class */
3640               break;
3641             case omethodtag:
3642             case omethodparm:
3643               objdef = omethodcolon;
3644               linebuffer_setlen (&token_name, token_name.len + 1);
3645               strcat (token_name.buffer, ":");
3646               break;
3647             }
3648           if (structdef == stagseen)
3649             {
3650               structdef = scolonseen;
3651               break;
3652             }
3653           /* Should be useless, but may be work as a safety net. */
3654           if (cplpl && fvdef == flistseen)
3655             {
3656               make_C_tag (TRUE); /* a function */
3657               fvdef = fignore;
3658               break;
3659             }
3660           break;
3661         case ';':
3662           if (definedef != dnone || inattribute)
3663             break;
3664           switch (typdef)
3665             {
3666             case tend:
3667             case ttypeseen:
3668               make_C_tag (FALSE); /* a typedef */
3669               typdef = tnone;
3670               fvdef = fvnone;
3671               break;
3672             case tnone:
3673             case tinbody:
3674             case tignore:
3675               switch (fvdef)
3676                 {
3677                 case fignore:
3678                   if (typdef == tignore || cplpl)
3679                     fvdef = fvnone;
3680                   break;
3681                 case fvnameseen:
3682                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3683                       || (members && instruct))
3684                     make_C_tag (FALSE); /* a variable */
3685                   fvextern = FALSE;
3686                   fvdef = fvnone;
3687                   token.valid = FALSE;
3688                   break;
3689                 case flistseen:
3690                   if ((declarations
3691                        && (cplpl || !instruct)
3692                        && (typdef == tnone || (typdef != tignore && instruct)))
3693                       || (members
3694                           && plainc && instruct))
3695                     make_C_tag (TRUE);  /* a function */
3696                   /* FALLTHRU */
3697                 default:
3698                   fvextern = FALSE;
3699                   fvdef = fvnone;
3700                   if (declarations
3701                        && cplpl && structdef == stagseen)
3702                     make_C_tag (FALSE); /* forward declaration */
3703                   else
3704                     token.valid = FALSE;
3705                 } /* switch (fvdef) */
3706               /* FALLTHRU */
3707             default:
3708               if (!instruct)
3709                 typdef = tnone;
3710             }
3711           if (structdef == stagseen)
3712             structdef = snone;
3713           break;
3714         case ',':
3715           if (definedef != dnone || inattribute)
3716             break;
3717           switch (objdef)
3718             {
3719             case omethodtag:
3720             case omethodparm:
3721               make_C_tag (TRUE); /* an Objective C method */
3722               objdef = oinbody;
3723               break;
3724             }
3725           switch (fvdef)
3726             {
3727             case fdefunkey:
3728             case foperator:
3729             case fstartlist:
3730             case finlist:
3731             case fignore:
3732             case vignore:
3733               break;
3734             case fdefunname:
3735               fvdef = fignore;
3736               break;
3737             case fvnameseen:
3738               if (parlev == 0
3739                   && ((globals
3740                        && bracelev == 0
3741                        && templatelev == 0
3742                        && (!fvextern || declarations))
3743                       || (members && instruct)))
3744                   make_C_tag (FALSE); /* a variable */
3745               break;
3746             case flistseen:
3747               if ((declarations && typdef == tnone && !instruct)
3748                   || (members && typdef != tignore && instruct))
3749                 {
3750                   make_C_tag (TRUE); /* a function */
3751                   fvdef = fvnameseen;
3752                 }
3753               else if (!declarations)
3754                 fvdef = fvnone;
3755               token.valid = FALSE;
3756               break;
3757             default:
3758               fvdef = fvnone;
3759             }
3760           if (structdef == stagseen)
3761             structdef = snone;
3762           break;
3763         case ']':
3764           if (definedef != dnone || inattribute)
3765             break;
3766           if (structdef == stagseen)
3767             structdef = snone;
3768           switch (typdef)
3769             {
3770             case ttypeseen:
3771             case tend:
3772               typdef = tignore;
3773               make_C_tag (FALSE);       /* a typedef */
3774               break;
3775             case tnone:
3776             case tinbody:
3777               switch (fvdef)
3778                 {
3779                 case foperator:
3780                 case finlist:
3781                 case fignore:
3782                 case vignore:
3783                   break;
3784                 case fvnameseen:
3785                   if ((members && bracelev == 1)
3786                       || (globals && bracelev == 0
3787                           && (!fvextern || declarations)))
3788                     make_C_tag (FALSE); /* a variable */
3789                   /* FALLTHRU */
3790                 default:
3791                   fvdef = fvnone;
3792                 }
3793               break;
3794             }
3795           break;
3796         case '(':
3797           if (inattribute)
3798             {
3799               attrparlev++;
3800               break;
3801             }
3802           if (definedef != dnone)
3803             break;
3804           if (objdef == otagseen && parlev == 0)
3805             objdef = oparenseen;
3806           switch (fvdef)
3807             {
3808             case fvnameseen:
3809               if (typdef == ttypeseen
3810                   && *lp != '*'
3811                   && !instruct)
3812                 {
3813                   /* This handles constructs like:
3814                      typedef void OperatorFun (int fun); */
3815                   make_C_tag (FALSE);
3816                   typdef = tignore;
3817                   fvdef = fignore;
3818                   break;
3819                 }
3820               /* FALLTHRU */
3821             case foperator:
3822               fvdef = fstartlist;
3823               break;
3824             case flistseen:
3825               fvdef = finlist;
3826               break;
3827             }
3828           parlev++;
3829           break;
3830         case ')':
3831           if (inattribute)
3832             {
3833               if (--attrparlev == 0)
3834                 inattribute = FALSE;
3835               break;
3836             }
3837           if (definedef != dnone)
3838             break;
3839           if (objdef == ocatseen && parlev == 1)
3840             {
3841               make_C_tag (TRUE); /* an Objective C category */
3842               objdef = oignore;
3843             }
3844           if (--parlev == 0)
3845             {
3846               switch (fvdef)
3847                 {
3848                 case fstartlist:
3849                 case finlist:
3850                   fvdef = flistseen;
3851                   break;
3852                 }
3853               if (!instruct
3854                   && (typdef == tend
3855                       || typdef == ttypeseen))
3856                 {
3857                   typdef = tignore;
3858                   make_C_tag (FALSE); /* a typedef */
3859                 }
3860             }
3861           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3862             parlev = 0;
3863           break;
3864         case '{':
3865           if (definedef != dnone)
3866             break;
3867           if (typdef == ttypeseen)
3868             {
3869               /* Whenever typdef is set to tinbody (currently only
3870                  here), typdefbracelev should be set to bracelev. */
3871               typdef = tinbody;
3872               typdefbracelev = bracelev;
3873             }
3874           switch (fvdef)
3875             {
3876             case flistseen:
3877               make_C_tag (TRUE);    /* a function */
3878               /* FALLTHRU */
3879             case fignore:
3880               fvdef = fvnone;
3881               break;
3882             case fvnone:
3883               switch (objdef)
3884                 {
3885                 case otagseen:
3886                   make_C_tag (TRUE); /* an Objective C class */
3887                   objdef = oignore;
3888                   break;
3889                 case omethodtag:
3890                 case omethodparm:
3891                   make_C_tag (TRUE); /* an Objective C method */
3892                   objdef = oinbody;
3893                   break;
3894                 default:
3895                   /* Neutralize `extern "C" {' grot. */
3896                   if (bracelev == 0 && structdef == snone && nestlev == 0
3897                       && typdef == tnone)
3898                     bracelev = -1;
3899                 }
3900               break;
3901             }
3902           switch (structdef)
3903             {
3904             case skeyseen:         /* unnamed struct */
3905               pushclass_above (bracelev, NULL, 0);
3906               structdef = snone;
3907               break;
3908             case stagseen:         /* named struct or enum */
3909             case scolonseen:       /* a class */
3910               pushclass_above (bracelev,token.line+token.offset, token.length);
3911               structdef = snone;
3912               make_C_tag (FALSE);  /* a struct or enum */
3913               break;
3914             }
3915           bracelev++;
3916           break;
3917         case '*':
3918           if (definedef != dnone)
3919             break;
3920           if (fvdef == fstartlist)
3921             {
3922               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3923               token.valid = FALSE;
3924             }
3925           break;
3926         case '}':
3927           if (definedef != dnone)
3928             break;
3929           if (!ignoreindent && lp == newlb.buffer + 1)
3930             {
3931               if (bracelev != 0)
3932                 token.valid = FALSE;
3933               bracelev = 0;     /* reset brace level if first column */
3934               parlev = 0;       /* also reset paren level, just in case... */
3935             }
3936           else if (bracelev > 0)
3937             bracelev--;
3938           else
3939             token.valid = FALSE; /* something gone amiss, token unreliable */
3940           popclass_above (bracelev);
3941           structdef = snone;
3942           /* Only if typdef == tinbody is typdefbracelev significant. */
3943           if (typdef == tinbody && bracelev <= typdefbracelev)
3944             {
3945               assert (bracelev == typdefbracelev);
3946               typdef = tend;
3947             }
3948           break;
3949         case '=':
3950           if (definedef != dnone)
3951             break;
3952           switch (fvdef)
3953             {
3954             case foperator:
3955             case finlist:
3956             case fignore:
3957             case vignore:
3958               break;
3959             case fvnameseen:
3960               if ((members && bracelev == 1)
3961                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3962                 make_C_tag (FALSE); /* a variable */
3963               /* FALLTHRU */
3964             default:
3965               fvdef = vignore;
3966             }
3967           break;
3968         case '<':
3969           if (cplpl
3970               && (structdef == stagseen || fvdef == fvnameseen))
3971             {
3972               templatelev++;
3973               break;
3974             }
3975           goto resetfvdef;
3976         case '>':
3977           if (templatelev > 0)
3978             {
3979               templatelev--;
3980               break;
3981             }
3982           goto resetfvdef;
3983         case '+':
3984         case '-':
3985           if (objdef == oinbody && bracelev == 0)
3986             {
3987               objdef = omethodsign;
3988               break;
3989             }
3990           /* FALLTHRU */
3991         resetfvdef:
3992         case '#': case '~': case '&': case '%': case '/':
3993         case '|': case '^': case '!': case '.': case '?':
3994           if (definedef != dnone)
3995             break;
3996           /* These surely cannot follow a function tag in C. */
3997           switch (fvdef)
3998             {
3999             case foperator:
4000             case finlist:
4001             case fignore:
4002             case vignore:
4003               break;
4004             default:
4005               fvdef = fvnone;
4006             }
4007           break;
4008         case '\0':
4009           if (objdef == otagseen)
4010             {
4011               make_C_tag (TRUE); /* an Objective C class */
4012               objdef = oignore;
4013             }
4014           /* If a macro spans multiple lines don't reset its state. */
4015           if (quotednl)
4016             CNL_SAVE_DEFINEDEF ();
4017           else
4018             CNL ();
4019           break;
4020         } /* switch (c) */
4021
4022     } /* while not eof */
4023
4024   free (lbs[0].lb.buffer);
4025   free (lbs[1].lb.buffer);
4026 }
4027
4028 /*
4029  * Process either a C++ file or a C file depending on the setting
4030  * of a global flag.
4031  */
4032 static void
4033 default_C_entries (inf)
4034      FILE *inf;
4035 {
4036   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4037 }
4038
4039 /* Always do plain C. */
4040 static void
4041 plain_C_entries (inf)
4042      FILE *inf;
4043 {
4044   C_entries (0, inf);
4045 }
4046
4047 /* Always do C++. */
4048 static void
4049 Cplusplus_entries (inf)
4050      FILE *inf;
4051 {
4052   C_entries (C_PLPL, inf);
4053 }
4054
4055 /* Always do Java. */
4056 static void
4057 Cjava_entries (inf)
4058      FILE *inf;
4059 {
4060   C_entries (C_JAVA, inf);
4061 }
4062
4063 /* Always do C*. */
4064 static void
4065 Cstar_entries (inf)
4066      FILE *inf;
4067 {
4068   C_entries (C_STAR, inf);
4069 }
4070
4071 /* Always do Yacc. */
4072 static void
4073 Yacc_entries (inf)
4074      FILE *inf;
4075 {
4076   C_entries (YACC, inf);
4077 }
4078
4079 \f
4080 /* Useful macros. */
4081 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4082   for (;                        /* loop initialization */               \
4083        !feof (file_pointer)     /* loop test */                         \
4084        &&                       /* instructions at start of loop */     \
4085           (readline (&line_buffer, file_pointer),                       \
4086            char_pointer = line_buffer.buffer,                           \
4087            TRUE);                                                       \
4088       )
4089
4090 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
4091   ((assert("" kw), TRUE)   /* syntax error if not a literal string */   \
4092    && strneq ((cp), kw, sizeof(kw)-1)           /* cp points at kw */   \
4093    && notinname ((cp)[sizeof(kw)-1])            /* end of kw */         \
4094    && ((cp) = skip_spaces((cp)+sizeof(kw)-1)))  /* skip spaces */
4095
4096 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4097 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4098   ((assert("" kw), TRUE)     /* syntax error if not a literal string */ \
4099    && strncaseeq ((cp), kw, sizeof(kw)-1)       /* cp points at kw */   \
4100    && ((cp) += sizeof(kw)-1))                   /* skip spaces */
4101
4102 /*
4103  * Read a file, but do no processing.  This is used to do regexp
4104  * matching on files that have no language defined.
4105  */
4106 static void
4107 just_read_file (inf)
4108      FILE *inf;
4109 {
4110   register char *dummy;
4111
4112   LOOP_ON_INPUT_LINES (inf, lb, dummy)
4113     continue;
4114 }
4115
4116 \f
4117 /* Fortran parsing */
4118
4119 static void F_takeprec __P((void));
4120 static void F_getit __P((FILE *));
4121
4122 static void
4123 F_takeprec ()
4124 {
4125   dbp = skip_spaces (dbp);
4126   if (*dbp != '*')
4127     return;
4128   dbp++;
4129   dbp = skip_spaces (dbp);
4130   if (strneq (dbp, "(*)", 3))
4131     {
4132       dbp += 3;
4133       return;
4134     }
4135   if (!ISDIGIT (*dbp))
4136     {
4137       --dbp;                    /* force failure */
4138       return;
4139     }
4140   do
4141     dbp++;
4142   while (ISDIGIT (*dbp));
4143 }
4144
4145 static void
4146 F_getit (inf)
4147      FILE *inf;
4148 {
4149   register char *cp;
4150
4151   dbp = skip_spaces (dbp);
4152   if (*dbp == '\0')
4153     {
4154       readline (&lb, inf);
4155       dbp = lb.buffer;
4156       if (dbp[5] != '&')
4157         return;
4158       dbp += 6;
4159       dbp = skip_spaces (dbp);
4160     }
4161   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4162     return;
4163   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4164     continue;
4165   make_tag (dbp, cp-dbp, TRUE,
4166             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4167 }
4168
4169
4170 static void
4171 Fortran_functions (inf)
4172      FILE *inf;
4173 {
4174   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4175     {
4176       if (*dbp == '%')
4177         dbp++;                  /* Ratfor escape to fortran */
4178       dbp = skip_spaces (dbp);
4179       if (*dbp == '\0')
4180         continue;
4181       switch (lowcase (*dbp))
4182         {
4183         case 'i':
4184           if (nocase_tail ("integer"))
4185             F_takeprec ();
4186           break;
4187         case 'r':
4188           if (nocase_tail ("real"))
4189             F_takeprec ();
4190           break;
4191         case 'l':
4192           if (nocase_tail ("logical"))
4193             F_takeprec ();
4194           break;
4195         case 'c':
4196           if (nocase_tail ("complex") || nocase_tail ("character"))
4197             F_takeprec ();
4198           break;
4199         case 'd':
4200           if (nocase_tail ("double"))
4201             {
4202               dbp = skip_spaces (dbp);
4203               if (*dbp == '\0')
4204                 continue;
4205               if (nocase_tail ("precision"))
4206                 break;
4207               continue;
4208             }
4209           break;
4210         }
4211       dbp = skip_spaces (dbp);
4212       if (*dbp == '\0')
4213         continue;
4214       switch (lowcase (*dbp))
4215         {
4216         case 'f':
4217           if (nocase_tail ("function"))
4218             F_getit (inf);
4219           continue;
4220         case 's':
4221           if (nocase_tail ("subroutine"))
4222             F_getit (inf);
4223           continue;
4224         case 'e':
4225           if (nocase_tail ("entry"))
4226             F_getit (inf);
4227           continue;
4228         case 'b':
4229           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4230             {
4231               dbp = skip_spaces (dbp);
4232               if (*dbp == '\0') /* assume un-named */
4233                 make_tag ("blockdata", 9, TRUE,
4234                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4235               else
4236                 F_getit (inf);  /* look for name */
4237             }
4238           continue;
4239         }
4240     }
4241 }
4242
4243 \f
4244 /*
4245  * Ada parsing
4246  * Original code by
4247  * Philippe Waroquiers (1998)
4248  */
4249
4250 static void Ada_getit __P((FILE *, char *));
4251
4252 /* Once we are positioned after an "interesting" keyword, let's get
4253    the real tag value necessary. */
4254 static void
4255 Ada_getit (inf, name_qualifier)
4256      FILE *inf;
4257      char *name_qualifier;
4258 {
4259   register char *cp;
4260   char *name;
4261   char c;
4262
4263   while (!feof (inf))
4264     {
4265       dbp = skip_spaces (dbp);
4266       if (*dbp == '\0'
4267           || (dbp[0] == '-' && dbp[1] == '-'))
4268         {
4269           readline (&lb, inf);
4270           dbp = lb.buffer;
4271         }
4272       switch (lowcase(*dbp))
4273         {
4274         case 'b':
4275           if (nocase_tail ("body"))
4276             {
4277               /* Skipping body of   procedure body   or   package body or ....
4278                  resetting qualifier to body instead of spec. */
4279               name_qualifier = "/b";
4280               continue;
4281             }
4282           break;
4283         case 't':
4284           /* Skipping type of   task type   or   protected type ... */
4285           if (nocase_tail ("type"))
4286             continue;
4287           break;
4288         }
4289       if (*dbp == '"')
4290         {
4291           dbp += 1;
4292           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4293             continue;
4294         }
4295       else
4296         {
4297           dbp = skip_spaces (dbp);
4298           for (cp = dbp;
4299                (*cp != '\0'
4300                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4301                cp++)
4302             continue;
4303           if (cp == dbp)
4304             return;
4305         }
4306       c = *cp;
4307       *cp = '\0';
4308       name = concat (dbp, name_qualifier, "");
4309       *cp = c;
4310       make_tag (name, strlen (name), TRUE,
4311                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4312       free (name);
4313       if (c == '"')
4314         dbp = cp + 1;
4315       return;
4316     }
4317 }
4318
4319 static void
4320 Ada_funcs (inf)
4321      FILE *inf;
4322 {
4323   bool inquote = FALSE;
4324   bool skip_till_semicolumn = FALSE;
4325
4326   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4327     {
4328       while (*dbp != '\0')
4329         {
4330           /* Skip a string i.e. "abcd". */
4331           if (inquote || (*dbp == '"'))
4332             {
4333               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4334               if (dbp != NULL)
4335                 {
4336                   inquote = FALSE;
4337                   dbp += 1;
4338                   continue;     /* advance char */
4339                 }
4340               else
4341                 {
4342                   inquote = TRUE;
4343                   break;        /* advance line */
4344                 }
4345             }
4346
4347           /* Skip comments. */
4348           if (dbp[0] == '-' && dbp[1] == '-')
4349             break;              /* advance line */
4350
4351           /* Skip character enclosed in single quote i.e. 'a'
4352              and skip single quote starting an attribute i.e. 'Image. */
4353           if (*dbp == '\'')
4354             {
4355               dbp++ ;
4356               if (*dbp != '\0')
4357                 dbp++;
4358               continue;
4359             }
4360
4361           if (skip_till_semicolumn)
4362             {
4363               if (*dbp == ';')
4364                 skip_till_semicolumn = FALSE;
4365               dbp++;
4366               continue;         /* advance char */
4367             }
4368
4369           /* Search for beginning of a token.  */
4370           if (!begtoken (*dbp))
4371             {
4372               dbp++;
4373               continue;         /* advance char */
4374             }
4375
4376           /* We are at the beginning of a token. */
4377           switch (lowcase(*dbp))
4378             {
4379             case 'f':
4380               if (!packages_only && nocase_tail ("function"))
4381                 Ada_getit (inf, "/f");
4382               else
4383                 break;          /* from switch */
4384               continue;         /* advance char */
4385             case 'p':
4386               if (!packages_only && nocase_tail ("procedure"))
4387                 Ada_getit (inf, "/p");
4388               else if (nocase_tail ("package"))
4389                 Ada_getit (inf, "/s");
4390               else if (nocase_tail ("protected")) /* protected type */
4391                 Ada_getit (inf, "/t");
4392               else
4393                 break;          /* from switch */
4394               continue;         /* advance char */
4395
4396             case 'u':
4397               if (typedefs && !packages_only && nocase_tail ("use"))
4398                 {
4399                   /* when tagging types, avoid tagging  use type Pack.Typename;
4400                      for this, we will skip everything till a ; */
4401                   skip_till_semicolumn = TRUE;
4402                   continue;     /* advance char */
4403                 }
4404
4405             case 't':
4406               if (!packages_only && nocase_tail ("task"))
4407                 Ada_getit (inf, "/k");
4408               else if (typedefs && !packages_only && nocase_tail ("type"))
4409                 {
4410                   Ada_getit (inf, "/t");
4411                   while (*dbp != '\0')
4412                     dbp += 1;
4413                 }
4414               else
4415                 break;          /* from switch */
4416               continue;         /* advance char */
4417             }
4418
4419           /* Look for the end of the token. */
4420           while (!endtoken (*dbp))
4421             dbp++;
4422
4423         } /* advance char */
4424     } /* advance line */
4425 }
4426
4427 \f
4428 /*
4429  * Unix and microcontroller assembly tag handling
4430  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4431  * Idea by Bob Weiner, Motorola Inc. (1994)
4432  */
4433 static void
4434 Asm_labels (inf)
4435      FILE *inf;
4436 {
4437   register char *cp;
4438
4439   LOOP_ON_INPUT_LINES (inf, lb, cp)
4440     {
4441       /* If first char is alphabetic or one of [_.$], test for colon
4442          following identifier. */
4443       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4444         {
4445           /* Read past label. */
4446           cp++;
4447           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4448             cp++;
4449           if (*cp == ':' || iswhite (*cp))
4450             /* Found end of label, so copy it and add it to the table. */
4451             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4452                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4453         }
4454     }
4455 }
4456
4457 \f
4458 /*
4459  * Perl support
4460  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4461  * Perl variable names: /^(my|local).../
4462  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4463  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4464  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4465  */
4466 static void
4467 Perl_functions (inf)
4468      FILE *inf;
4469 {
4470   char *package = savestr ("main"); /* current package name */
4471   register char *cp;
4472
4473   LOOP_ON_INPUT_LINES (inf, lb, cp)
4474     {
4475       skip_spaces(cp);
4476
4477       if (LOOKING_AT (cp, "package"))
4478         {
4479           free (package);
4480           get_tag (cp, &package);
4481         }
4482       else if (LOOKING_AT (cp, "sub"))
4483         {
4484           char *pos;
4485           char *sp = cp;
4486
4487           while (!notinname (*cp))
4488             cp++;
4489           if (cp == sp)
4490             continue;           /* nothing found */
4491           if ((pos = etags_strchr (sp, ':')) != NULL
4492               && pos < cp && pos[1] == ':')
4493             /* The name is already qualified. */
4494             make_tag (sp, cp - sp, TRUE,
4495                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4496           else
4497             /* Qualify it. */
4498             {
4499               char savechar, *name;
4500
4501               savechar = *cp;
4502               *cp = '\0';
4503               name = concat (package, "::", sp);
4504               *cp = savechar;
4505               make_tag (name, strlen(name), TRUE,
4506                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4507               free (name);
4508             }
4509         }
4510        else if (globals)        /* only if we are tagging global vars */
4511         {
4512           /* Skip a qualifier, if any. */
4513           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4514           /* After "my" or "local", but before any following paren or space. */
4515           char *varstart = cp;
4516
4517           if (qual              /* should this be removed?  If yes, how? */
4518               && (*cp == '$' || *cp == '@' || *cp == '%'))
4519             {
4520               varstart += 1;
4521               do
4522                 cp++;
4523               while (ISALNUM (*cp) || *cp == '_');
4524             }
4525           else if (qual)
4526             {
4527               /* Should be examining a variable list at this point;
4528                  could insist on seeing an open parenthesis. */
4529               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4530                 cp++;
4531             }
4532           else
4533             continue;
4534
4535           make_tag (varstart, cp - varstart, FALSE,
4536                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4537         }
4538     }
4539 }
4540
4541
4542 /*
4543  * Python support
4544  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4545  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4546  * More ideas by seb bacon <seb@jamkit.com> (2002)
4547  */
4548 static void
4549 Python_functions (inf)
4550      FILE *inf;
4551 {
4552   register char *cp;
4553
4554   LOOP_ON_INPUT_LINES (inf, lb, cp)
4555     {
4556       cp = skip_spaces (cp);
4557       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4558         {
4559           char *name = cp;
4560           while (!notinname (*cp) && *cp != ':')
4561             cp++;
4562           make_tag (name, cp - name, TRUE,
4563                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4564         }
4565     }
4566 }
4567
4568 \f
4569 /*
4570  * PHP support
4571  * Look for:
4572  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4573  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4574  *  - /^[ \t]*define\(\"[^\"]+/
4575  * Only with --members:
4576  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4577  * Idea by Diez B. Roggisch (2001)
4578  */
4579 static void
4580 PHP_functions (inf)
4581      FILE *inf;
4582 {
4583   register char *cp, *name;
4584   bool search_identifier = FALSE;
4585
4586   LOOP_ON_INPUT_LINES (inf, lb, cp)
4587     {
4588       cp = skip_spaces (cp);
4589       name = cp;
4590       if (search_identifier
4591           && *cp != '\0')
4592         {
4593           while (!notinname (*cp))
4594             cp++;
4595           make_tag (name, cp - name, TRUE,
4596                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4597           search_identifier = FALSE;
4598         }
4599       else if (LOOKING_AT (cp, "function"))
4600         {
4601           if(*cp == '&')
4602             cp = skip_spaces (cp+1);
4603           if(*cp != '\0')
4604             {
4605               name = cp;
4606               while (!notinname (*cp))
4607                 cp++;
4608               make_tag (name, cp - name, TRUE,
4609                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4610             }
4611           else
4612             search_identifier = TRUE;
4613         }
4614       else if (LOOKING_AT (cp, "class"))
4615         {
4616           if (*cp != '\0')
4617             {
4618               name = cp;
4619               while (*cp != '\0' && !iswhite (*cp))
4620                 cp++;
4621               make_tag (name, cp - name, FALSE,
4622                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4623             }
4624           else
4625             search_identifier = TRUE;
4626         }
4627       else if (strneq (cp, "define", 6)
4628                && (cp = skip_spaces (cp+6))
4629                && *cp++ == '('
4630                && (*cp == '"' || *cp == '\''))
4631         {
4632           char quote = *cp++;
4633           name = cp;
4634           while (*cp != quote && *cp != '\0')
4635             cp++;
4636           make_tag (name, cp - name, FALSE,
4637                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4638         }
4639       else if (members
4640                && LOOKING_AT (cp, "var")
4641                && *cp == '$')
4642         {
4643           name = cp;
4644           while (!notinname(*cp))
4645             cp++;
4646           make_tag (name, cp - name, FALSE,
4647                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4648         }
4649     }
4650 }
4651
4652 \f
4653 /*
4654  * Cobol tag functions
4655  * We could look for anything that could be a paragraph name.
4656  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4657  * Idea by Corny de Souza (1993)
4658  */
4659 static void
4660 Cobol_paragraphs (inf)
4661      FILE *inf;
4662 {
4663   register char *bp, *ep;
4664
4665   LOOP_ON_INPUT_LINES (inf, lb, bp)
4666     {
4667       if (lb.len < 9)
4668         continue;
4669       bp += 8;
4670
4671       /* If eoln, compiler option or comment ignore whole line. */
4672       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4673         continue;
4674
4675       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4676         continue;
4677       if (*ep++ == '.')
4678         make_tag (bp, ep - bp, TRUE,
4679                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4680     }
4681 }
4682
4683 \f
4684 /*
4685  * Makefile support
4686  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4687  */
4688 static void
4689 Makefile_targets (inf)
4690      FILE *inf;
4691 {
4692   register char *bp;
4693
4694   LOOP_ON_INPUT_LINES (inf, lb, bp)
4695     {
4696       if (*bp == '\t' || *bp == '#')
4697         continue;
4698       while (*bp != '\0' && *bp != '=' && *bp != ':')
4699         bp++;
4700       if (*bp == ':' || (globals && *bp == '='))
4701         make_tag (lb.buffer, bp - lb.buffer, TRUE,
4702                   lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4703     }
4704 }
4705
4706 \f
4707 /*
4708  * Pascal parsing
4709  * Original code by Mosur K. Mohan (1989)
4710  *
4711  *  Locates tags for procedures & functions.  Doesn't do any type- or
4712  *  var-definitions.  It does look for the keyword "extern" or
4713  *  "forward" immediately following the procedure statement; if found,
4714  *  the tag is skipped.
4715  */
4716 static void
4717 Pascal_functions (inf)
4718      FILE *inf;
4719 {
4720   linebuffer tline;             /* mostly copied from C_entries */
4721   long save_lcno;
4722   int save_lineno, namelen, taglen;
4723   char c, *name;
4724
4725   bool                          /* each of these flags is TRUE iff: */
4726     incomment,                  /* point is inside a comment */
4727     inquote,                    /* point is inside '..' string */
4728     get_tagname,                /* point is after PROCEDURE/FUNCTION
4729                                    keyword, so next item = potential tag */
4730     found_tag,                  /* point is after a potential tag */
4731     inparms,                    /* point is within parameter-list */
4732     verify_tag;                 /* point has passed the parm-list, so the
4733                                    next token will determine whether this
4734                                    is a FORWARD/EXTERN to be ignored, or
4735                                    whether it is a real tag */
4736
4737   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4738   name = NULL;                  /* keep compiler quiet */
4739   dbp = lb.buffer;
4740   *dbp = '\0';
4741   linebuffer_init (&tline);
4742
4743   incomment = inquote = FALSE;
4744   found_tag = FALSE;            /* have a proc name; check if extern */
4745   get_tagname = FALSE;          /* found "procedure" keyword         */
4746   inparms = FALSE;              /* found '(' after "proc"            */
4747   verify_tag = FALSE;           /* check if "extern" is ahead        */
4748
4749
4750   while (!feof (inf))           /* long main loop to get next char */
4751     {
4752       c = *dbp++;
4753       if (c == '\0')            /* if end of line */
4754         {
4755           readline (&lb, inf);
4756           dbp = lb.buffer;
4757           if (*dbp == '\0')
4758             continue;
4759           if (!((found_tag && verify_tag)
4760                 || get_tagname))
4761             c = *dbp++;         /* only if don't need *dbp pointing
4762                                    to the beginning of the name of
4763                                    the procedure or function */
4764         }
4765       if (incomment)
4766         {
4767           if (c == '}')         /* within { } comments */
4768             incomment = FALSE;
4769           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4770             {
4771               dbp++;
4772               incomment = FALSE;
4773             }
4774           continue;
4775         }
4776       else if (inquote)
4777         {
4778           if (c == '\'')
4779             inquote = FALSE;
4780           continue;
4781         }
4782       else
4783         switch (c)
4784           {
4785           case '\'':
4786             inquote = TRUE;     /* found first quote */
4787             continue;
4788           case '{':             /* found open { comment */
4789             incomment = TRUE;
4790             continue;
4791           case '(':
4792             if (*dbp == '*')    /* found open (* comment */
4793               {
4794                 incomment = TRUE;
4795                 dbp++;
4796               }
4797             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4798               inparms = TRUE;
4799             continue;
4800           case ')':             /* end of parms list */
4801             if (inparms)
4802               inparms = FALSE;
4803             continue;
4804           case ';':
4805             if (found_tag && !inparms) /* end of proc or fn stmt */
4806               {
4807                 verify_tag = TRUE;
4808                 break;
4809               }
4810             continue;
4811           }
4812       if (found_tag && verify_tag && (*dbp != ' '))
4813         {
4814           /* Check if this is an "extern" declaration. */
4815           if (*dbp == '\0')
4816             continue;
4817           if (lowcase (*dbp == 'e'))
4818             {
4819               if (nocase_tail ("extern")) /* superfluous, really! */
4820                 {
4821                   found_tag = FALSE;
4822                   verify_tag = FALSE;
4823                 }
4824             }
4825           else if (lowcase (*dbp) == 'f')
4826             {
4827               if (nocase_tail ("forward")) /* check for forward reference */
4828                 {
4829                   found_tag = FALSE;
4830                   verify_tag = FALSE;
4831                 }
4832             }
4833           if (found_tag && verify_tag) /* not external proc, so make tag */
4834             {
4835               found_tag = FALSE;
4836               verify_tag = FALSE;
4837               make_tag (name, namelen, TRUE,
4838                         tline.buffer, taglen, save_lineno, save_lcno);
4839               continue;
4840             }
4841         }
4842       if (get_tagname)          /* grab name of proc or fn */
4843         {
4844           char *cp;
4845
4846           if (*dbp == '\0')
4847             continue;
4848
4849           /* Find block name. */
4850           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4851             continue;
4852
4853           /* Save all values for later tagging. */
4854           linebuffer_setlen (&tline, lb.len);
4855           strcpy (tline.buffer, lb.buffer);
4856           save_lineno = lineno;
4857           save_lcno = linecharno;
4858           name = tline.buffer + (dbp - lb.buffer);
4859           namelen = cp - dbp;
4860           taglen = cp - lb.buffer + 1;
4861
4862           dbp = cp;             /* set dbp to e-o-token */
4863           get_tagname = FALSE;
4864           found_tag = TRUE;
4865           continue;
4866
4867           /* And proceed to check for "extern". */
4868         }
4869       else if (!incomment && !inquote && !found_tag)
4870         {
4871           /* Check for proc/fn keywords. */
4872           switch (lowcase (c))
4873             {
4874             case 'p':
4875               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4876                 get_tagname = TRUE;
4877               continue;
4878             case 'f':
4879               if (nocase_tail ("unction"))
4880                 get_tagname = TRUE;
4881               continue;
4882             }
4883         }
4884     } /* while not eof */
4885
4886   free (tline.buffer);
4887 }
4888
4889 \f
4890 /*
4891  * Lisp tag functions
4892  *  look for (def or (DEF, quote or QUOTE
4893  */
4894
4895 static void L_getit __P((void));
4896
4897 static void
4898 L_getit ()
4899 {
4900   if (*dbp == '\'')             /* Skip prefix quote */
4901     dbp++;
4902   else if (*dbp == '(')
4903   {
4904     dbp++;
4905     /* Try to skip "(quote " */
4906     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4907       /* Ok, then skip "(" before name in (defstruct (foo)) */
4908       dbp = skip_spaces (dbp);
4909   }
4910   get_tag (dbp, NULL);
4911 }
4912
4913 static void
4914 Lisp_functions (inf)
4915      FILE *inf;
4916 {
4917   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4918     {
4919       if (dbp[0] != '(')
4920         continue;
4921
4922       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4923         {
4924           dbp = skip_non_spaces (dbp);
4925           dbp = skip_spaces (dbp);
4926           L_getit ();
4927         }
4928       else
4929         {
4930           /* Check for (foo::defmumble name-defined ... */
4931           do
4932             dbp++;
4933           while (!notinname (*dbp) && *dbp != ':');
4934           if (*dbp == ':')
4935             {
4936               do
4937                 dbp++;
4938               while (*dbp == ':');
4939
4940               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4941                 {
4942                   dbp = skip_non_spaces (dbp);
4943                   dbp = skip_spaces (dbp);
4944                   L_getit ();
4945                 }
4946             }
4947         }
4948     }
4949 }
4950
4951 \f
4952 /*
4953  * Lua script language parsing
4954  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4955  *
4956  *  "function" and "local function" are tags if they start at column 1.
4957  */
4958 static void
4959 Lua_functions (inf)
4960      FILE *inf;
4961 {
4962   register char *bp;
4963
4964   LOOP_ON_INPUT_LINES (inf, lb, bp)
4965     {
4966       if (bp[0] != 'f' && bp[0] != 'l')
4967         continue;
4968
4969       LOOKING_AT (bp, "local"); /* skip possible "local" */
4970
4971       if (LOOKING_AT (bp, "function"))
4972         get_tag (bp, NULL);
4973     }
4974 }
4975
4976 \f
4977 /*
4978  * Postscript tags
4979  * Just look for lines where the first character is '/'
4980  * Also look at "defineps" for PSWrap
4981  * Ideas by:
4982  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4983  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4984  */
4985 static void
4986 PS_functions (inf)
4987      FILE *inf;
4988 {
4989   register char *bp, *ep;
4990
4991   LOOP_ON_INPUT_LINES (inf, lb, bp)
4992     {
4993       if (bp[0] == '/')
4994         {
4995           for (ep = bp+1;
4996                *ep != '\0' && *ep != ' ' && *ep != '{';
4997                ep++)
4998             continue;
4999           make_tag (bp, ep - bp, TRUE,
5000                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5001         }
5002       else if (LOOKING_AT (bp, "defineps"))
5003         get_tag (bp, NULL);
5004     }
5005 }
5006
5007 \f
5008 /*
5009  * Forth tags
5010  * Ignore anything after \ followed by space or in ( )
5011  * Look for words defined by :
5012  * Look for constant, code, create, defer, value, and variable
5013  * OBP extensions:  Look for buffer:, field,
5014  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5015  */
5016 static void
5017 Forth_words (inf)
5018      FILE *inf;
5019 {
5020   register char *bp;
5021
5022   LOOP_ON_INPUT_LINES (inf, lb, bp)
5023     while ((bp = skip_spaces (bp))[0] != '\0')
5024       if (bp[0] == '\\' && iswhite(bp[1]))
5025         break;                  /* read next line */
5026       else if (bp[0] == '(' && iswhite(bp[1]))
5027         do                      /* skip to ) or eol */
5028           bp++;
5029         while (*bp != ')' && *bp != '\0');
5030       else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5031                || LOOKING_AT_NOCASE (bp, "constant")
5032                || LOOKING_AT_NOCASE (bp, "code")
5033                || LOOKING_AT_NOCASE (bp, "create")
5034                || LOOKING_AT_NOCASE (bp, "defer")
5035                || LOOKING_AT_NOCASE (bp, "value")
5036                || LOOKING_AT_NOCASE (bp, "variable")
5037                || LOOKING_AT_NOCASE (bp, "buffer:")
5038                || LOOKING_AT_NOCASE (bp, "field"))
5039         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
5040       else
5041         bp = skip_non_spaces (bp);
5042 }
5043
5044 \f
5045 /*
5046  * Scheme tag functions
5047  * look for (def... xyzzy
5048  *          (def... (xyzzy
5049  *          (def ... ((...(xyzzy ....
5050  *          (set! xyzzy
5051  * Original code by Ken Haase (1985?)
5052  */
5053 static void
5054 Scheme_functions (inf)
5055      FILE *inf;
5056 {
5057   register char *bp;
5058
5059   LOOP_ON_INPUT_LINES (inf, lb, bp)
5060     {
5061       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5062         {
5063           bp = skip_non_spaces (bp+4);
5064           /* Skip over open parens and white space */
5065           while (notinname (*bp))
5066             bp++;
5067           get_tag (bp, NULL);
5068         }
5069       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5070         get_tag (bp, NULL);
5071     }
5072 }
5073
5074 \f
5075 /* Find tags in TeX and LaTeX input files.  */
5076
5077 /* TEX_toktab is a table of TeX control sequences that define tags.
5078  * Each entry records one such control sequence.
5079  *
5080  * Original code from who knows whom.
5081  * Ideas by:
5082  *   Stefan Monnier (2002)
5083  */
5084
5085 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5086
5087 /* Default set of control sequences to put into TEX_toktab.
5088    The value of environment var TEXTAGS is prepended to this.  */
5089 static char *TEX_defenv = "\
5090 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5091 :part:appendix:entry:index:def\
5092 :newcommand:renewcommand:newenvironment:renewenvironment";
5093
5094 static void TEX_mode __P((FILE *));
5095 static void TEX_decode_env __P((char *, char *));
5096
5097 static char TEX_esc = '\\';
5098 static char TEX_opgrp = '{';
5099 static char TEX_clgrp = '}';
5100
5101 /*
5102  * TeX/LaTeX scanning loop.
5103  */
5104 static void
5105 TeX_commands (inf)
5106      FILE *inf;
5107 {
5108   char *cp;
5109   linebuffer *key;
5110
5111   /* Select either \ or ! as escape character.  */
5112   TEX_mode (inf);
5113
5114   /* Initialize token table once from environment. */
5115   if (TEX_toktab == NULL)
5116     TEX_decode_env ("TEXTAGS", TEX_defenv);
5117
5118   LOOP_ON_INPUT_LINES (inf, lb, cp)
5119     {
5120       /* Look at each TEX keyword in line. */
5121       for (;;)
5122         {
5123           /* Look for a TEX escape. */
5124           while (*cp++ != TEX_esc)
5125             if (cp[-1] == '\0' || cp[-1] == '%')
5126               goto tex_next_line;
5127
5128           for (key = TEX_toktab; key->buffer != NULL; key++)
5129             if (strneq (cp, key->buffer, key->len))
5130               {
5131                 register char *p;
5132                 int namelen, linelen;
5133                 bool opgrp = FALSE;
5134
5135                 cp = skip_spaces (cp + key->len);
5136                 if (*cp == TEX_opgrp)
5137                   {
5138                     opgrp = TRUE;
5139                     cp++;
5140                   }
5141                 for (p = cp;
5142                      (!iswhite (*p) && *p != '#' &&
5143                       *p != TEX_opgrp && *p != TEX_clgrp);
5144                      p++)
5145                   continue;
5146                 namelen = p - cp;
5147                 linelen = lb.len;
5148                 if (!opgrp || *p == TEX_clgrp)
5149                   {
5150                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5151                       *p++;
5152                     linelen = p - lb.buffer + 1;
5153                   }
5154                 make_tag (cp, namelen, TRUE,
5155                           lb.buffer, linelen, lineno, linecharno);
5156                 goto tex_next_line; /* We only tag a line once */
5157               }
5158         }
5159     tex_next_line:
5160       ;
5161     }
5162 }
5163
5164 #define TEX_LESC '\\'
5165 #define TEX_SESC '!'
5166
5167 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5168    chars accordingly. */
5169 static void
5170 TEX_mode (inf)
5171      FILE *inf;
5172 {
5173   int c;
5174
5175   while ((c = getc (inf)) != EOF)
5176     {
5177       /* Skip to next line if we hit the TeX comment char. */
5178       if (c == '%')
5179         while (c != '\n')
5180           c = getc (inf);
5181       else if (c == TEX_LESC || c == TEX_SESC )
5182         break;
5183     }
5184
5185   if (c == TEX_LESC)
5186     {
5187       TEX_esc = TEX_LESC;
5188       TEX_opgrp = '{';
5189       TEX_clgrp = '}';
5190     }
5191   else
5192     {
5193       TEX_esc = TEX_SESC;
5194       TEX_opgrp = '<';
5195       TEX_clgrp = '>';
5196     }
5197   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5198      No attempt is made to correct the situation. */
5199   rewind (inf);
5200 }
5201
5202 /* Read environment and prepend it to the default string.
5203    Build token table. */
5204 static void
5205 TEX_decode_env (evarname, defenv)
5206      char *evarname;
5207      char *defenv;
5208 {
5209   register char *env, *p;
5210   int i, len;
5211
5212   /* Append default string to environment. */
5213   env = getenv (evarname);
5214   if (!env)
5215     env = defenv;
5216   else
5217     {
5218       char *oldenv = env;
5219       env = concat (oldenv, defenv, "");
5220     }
5221
5222   /* Allocate a token table */
5223   for (len = 1, p = env; p;)
5224     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5225       len++;
5226   TEX_toktab = xnew (len, linebuffer);
5227
5228   /* Unpack environment string into token table. Be careful about */
5229   /* zero-length strings (leading ':', "::" and trailing ':') */
5230   for (i = 0; *env != '\0';)
5231     {
5232       p = etags_strchr (env, ':');
5233       if (!p)                   /* End of environment string. */
5234         p = env + strlen (env);
5235       if (p - env > 0)
5236         {                       /* Only non-zero strings. */
5237           TEX_toktab[i].buffer = savenstr (env, p - env);
5238           TEX_toktab[i].len = p - env;
5239           i++;
5240         }
5241       if (*p)
5242         env = p + 1;
5243       else
5244         {
5245           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5246           TEX_toktab[i].len = 0;
5247           break;
5248         }
5249     }
5250 }
5251
5252 \f
5253 /* Texinfo support.  Dave Love, Mar. 2000.  */
5254 static void
5255 Texinfo_nodes (inf)
5256      FILE * inf;
5257 {
5258   char *cp, *start;
5259   LOOP_ON_INPUT_LINES (inf, lb, cp)
5260     if (LOOKING_AT (cp, "@node"))
5261       {
5262         start = cp;
5263         while (*cp != '\0' && *cp != ',')
5264           cp++;
5265         make_tag (start, cp - start, TRUE,
5266                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5267       }
5268 }
5269
5270 \f
5271 /*
5272  * HTML support.
5273  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5274  * Contents of <a name=xxx> are tags with name xxx.
5275  *
5276  * Francesco Potortì, 2002.
5277  */
5278 static void
5279 HTML_labels (inf)
5280      FILE * inf;
5281 {
5282   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5283   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5284   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5285   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5286   char *end;
5287
5288
5289   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5290
5291   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5292     for (;;)                    /* loop on the same line */
5293       {
5294         if (skiptag)            /* skip HTML tag */
5295           {
5296             while (*dbp != '\0' && *dbp != '>')
5297               dbp++;
5298             if (*dbp == '>')
5299               {
5300                 dbp += 1;
5301                 skiptag = FALSE;
5302                 continue;       /* look on the same line */
5303               }
5304             break;              /* go to next line */
5305           }
5306
5307         else if (intag) /* look for "name=" or "id=" */
5308           {
5309             while (*dbp != '\0' && *dbp != '>'
5310                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5311               dbp++;
5312             if (*dbp == '\0')
5313               break;            /* go to next line */
5314             if (*dbp == '>')
5315               {
5316                 dbp += 1;
5317                 intag = FALSE;
5318                 continue;       /* look on the same line */
5319               }
5320             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5321                 || LOOKING_AT_NOCASE (dbp, "id="))
5322               {
5323                 bool quoted = (dbp[0] == '"');
5324
5325                 if (quoted)
5326                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5327                     continue;
5328                 else
5329                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5330                     continue;
5331                 linebuffer_setlen (&token_name, end - dbp);
5332                 strncpy (token_name.buffer, dbp, end - dbp);
5333                 token_name.buffer[end - dbp] = '\0';
5334
5335                 dbp = end;
5336                 intag = FALSE;  /* we found what we looked for */
5337                 skiptag = TRUE; /* skip to the end of the tag */
5338                 getnext = TRUE; /* then grab the text */
5339                 continue;       /* look on the same line */
5340               }
5341             dbp += 1;
5342           }
5343
5344         else if (getnext)       /* grab next tokens and tag them */
5345           {
5346             dbp = skip_spaces (dbp);
5347             if (*dbp == '\0')
5348               break;            /* go to next line */
5349             if (*dbp == '<')
5350               {
5351                 intag = TRUE;
5352                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5353                 continue;       /* look on the same line */
5354               }
5355
5356             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5357               continue;
5358             make_tag (token_name.buffer, token_name.len, TRUE,
5359                       dbp, end - dbp, lineno, linecharno);
5360             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5361             getnext = FALSE;
5362             break;              /* go to next line */
5363           }
5364
5365         else                    /* look for an interesting HTML tag */
5366           {
5367             while (*dbp != '\0' && *dbp != '<')
5368               dbp++;
5369             if (*dbp == '\0')
5370               break;            /* go to next line */
5371             intag = TRUE;
5372             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5373               {
5374                 inanchor = TRUE;
5375                 continue;       /* look on the same line */
5376               }
5377             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5378                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5379                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5380                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5381               {
5382                 intag = FALSE;
5383                 getnext = TRUE;
5384                 continue;       /* look on the same line */
5385               }
5386             dbp += 1;
5387           }
5388       }
5389 }
5390
5391 \f
5392 /*
5393  * Prolog support
5394  *
5395  * Assumes that the predicate or rule starts at column 0.
5396  * Only the first clause of a predicate or rule is added.
5397  * Original code by Sunichirou Sugou (1989)
5398  * Rewritten by Anders Lindgren (1996)
5399  */
5400 static int prolog_pr __P((char *, char *));
5401 static void prolog_skip_comment __P((linebuffer *, FILE *));
5402 static int prolog_atom __P((char *, int));
5403
5404 static void
5405 Prolog_functions (inf)
5406      FILE *inf;
5407 {
5408   char *cp, *last;
5409   int len;
5410   int allocated;
5411
5412   allocated = 0;
5413   len = 0;
5414   last = NULL;
5415
5416   LOOP_ON_INPUT_LINES (inf, lb, cp)
5417     {
5418       if (cp[0] == '\0')        /* Empty line */
5419         continue;
5420       else if (iswhite (cp[0])) /* Not a predicate */
5421         continue;
5422       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5423         prolog_skip_comment (&lb, inf);
5424       else if ((len = prolog_pr (cp, last)) > 0)
5425         {
5426           /* Predicate or rule.  Store the function name so that we
5427              only generate a tag for the first clause.  */
5428           if (last == NULL)
5429             last = xnew(len + 1, char);
5430           else if (len + 1 > allocated)
5431             xrnew (last, len + 1, char);
5432           allocated = len + 1;
5433           strncpy (last, cp, len);
5434           last[len] = '\0';
5435         }
5436     }
5437 }
5438
5439
5440 static void
5441 prolog_skip_comment (plb, inf)
5442      linebuffer *plb;
5443      FILE *inf;
5444 {
5445   char *cp;
5446
5447   do
5448     {
5449       for (cp = plb->buffer; *cp != '\0'; cp++)
5450         if (cp[0] == '*' && cp[1] == '/')
5451           return;
5452       readline (plb, inf);
5453     }
5454   while (!feof(inf));
5455 }
5456
5457 /*
5458  * A predicate or rule definition is added if it matches:
5459  *     <beginning of line><Prolog Atom><whitespace>(
5460  * or  <beginning of line><Prolog Atom><whitespace>:-
5461  *
5462  * It is added to the tags database if it doesn't match the
5463  * name of the previous clause header.
5464  *
5465  * Return the size of the name of the predicate or rule, or 0 if no
5466  * header was found.
5467  */
5468 static int
5469 prolog_pr (s, last)
5470      char *s;
5471      char *last;                /* Name of last clause. */
5472 {
5473   int pos;
5474   int len;
5475
5476   pos = prolog_atom (s, 0);
5477   if (pos < 1)
5478     return 0;
5479
5480   len = pos;
5481   pos = skip_spaces (s + pos) - s;
5482
5483   if ((s[pos] == '.'
5484        || (s[pos] == '(' && (pos += 1))
5485        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5486       && (last == NULL          /* save only the first clause */
5487           || len != (int)strlen (last)
5488           || !strneq (s, last, len)))
5489         {
5490           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5491           return len;
5492         }
5493   else
5494     return 0;
5495 }
5496
5497 /*
5498  * Consume a Prolog atom.
5499  * Return the number of bytes consumed, or -1 if there was an error.
5500  *
5501  * A prolog atom, in this context, could be one of:
5502  * - An alphanumeric sequence, starting with a lower case letter.
5503  * - A quoted arbitrary string. Single quotes can escape themselves.
5504  *   Backslash quotes everything.
5505  */
5506 static int
5507 prolog_atom (s, pos)
5508      char *s;
5509      int pos;
5510 {
5511   int origpos;
5512
5513   origpos = pos;
5514
5515   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5516     {
5517       /* The atom is unquoted. */
5518       pos++;
5519       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5520         {
5521           pos++;
5522         }
5523       return pos - origpos;
5524     }
5525   else if (s[pos] == '\'')
5526     {
5527       pos++;
5528
5529       for (;;)
5530         {
5531           if (s[pos] == '\'')
5532             {
5533               pos++;
5534               if (s[pos] != '\'')
5535                 break;
5536               pos++;            /* A double quote */
5537             }
5538           else if (s[pos] == '\0')
5539             /* Multiline quoted atoms are ignored. */
5540             return -1;
5541           else if (s[pos] == '\\')
5542             {
5543               if (s[pos+1] == '\0')
5544                 return -1;
5545               pos += 2;
5546             }
5547           else
5548             pos++;
5549         }
5550       return pos - origpos;
5551     }
5552   else
5553     return -1;
5554 }
5555
5556 \f
5557 /*
5558  * Support for Erlang
5559  *
5560  * Generates tags for functions, defines, and records.
5561  * Assumes that Erlang functions start at column 0.
5562  * Original code by Anders Lindgren (1996)
5563  */
5564 static int erlang_func __P((char *, char *));
5565 static void erlang_attribute __P((char *));
5566 static int erlang_atom __P((char *));
5567
5568 static void
5569 Erlang_functions (inf)
5570      FILE *inf;
5571 {
5572   char *cp, *last;
5573   int len;
5574   int allocated;
5575
5576   allocated = 0;
5577   len = 0;
5578   last = NULL;
5579
5580   LOOP_ON_INPUT_LINES (inf, lb, cp)
5581     {
5582       if (cp[0] == '\0')        /* Empty line */
5583         continue;
5584       else if (iswhite (cp[0])) /* Not function nor attribute */
5585         continue;
5586       else if (cp[0] == '%')    /* comment */
5587         continue;
5588       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5589         continue;
5590       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5591         {
5592           erlang_attribute (cp);
5593           last = NULL;
5594         }
5595       else if ((len = erlang_func (cp, last)) > 0)
5596         {
5597           /*
5598            * Function.  Store the function name so that we only
5599            * generates a tag for the first clause.
5600            */
5601           if (last == NULL)
5602             last = xnew (len + 1, char);
5603           else if (len + 1 > allocated)
5604             xrnew (last, len + 1, char);
5605           allocated = len + 1;
5606           strncpy (last, cp, len);
5607           last[len] = '\0';
5608         }
5609     }
5610 }
5611
5612
5613 /*
5614  * A function definition is added if it matches:
5615  *     <beginning of line><Erlang Atom><whitespace>(
5616  *
5617  * It is added to the tags database if it doesn't match the
5618  * name of the previous clause header.
5619  *
5620  * Return the size of the name of the function, or 0 if no function
5621  * was found.
5622  */
5623 static int
5624 erlang_func (s, last)
5625      char *s;
5626      char *last;                /* Name of last clause. */
5627 {
5628   int pos;
5629   int len;
5630
5631   pos = erlang_atom (s);
5632   if (pos < 1)
5633     return 0;
5634
5635   len = pos;
5636   pos = skip_spaces (s + pos) - s;
5637
5638   /* Save only the first clause. */
5639   if (s[pos++] == '('
5640       && (last == NULL
5641           || len != (int)strlen (last)
5642           || !strneq (s, last, len)))
5643         {
5644           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5645           return len;
5646         }
5647
5648   return 0;
5649 }
5650
5651
5652 /*
5653  * Handle attributes.  Currently, tags are generated for defines
5654  * and records.
5655  *
5656  * They are on the form:
5657  * -define(foo, bar).
5658  * -define(Foo(M, N), M+N).
5659  * -record(graph, {vtab = notable, cyclic = true}).
5660  */
5661 static void
5662 erlang_attribute (s)
5663      char *s;
5664 {
5665   char *cp = s;
5666
5667   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5668       && *cp++ == '(')
5669     {
5670       int len = erlang_atom (skip_spaces (cp));
5671       if (len > 0)
5672         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5673     }
5674   return;
5675 }
5676
5677
5678 /*
5679  * Consume an Erlang atom (or variable).
5680  * Return the number of bytes consumed, or -1 if there was an error.
5681  */
5682 static int
5683 erlang_atom (s)
5684      char *s;
5685 {
5686   int pos = 0;
5687
5688   if (ISALPHA (s[pos]) || s[pos] == '_')
5689     {
5690       /* The atom is unquoted. */
5691       do
5692         pos++;
5693       while (ISALNUM (s[pos]) || s[pos] == '_');
5694     }
5695   else if (s[pos] == '\'')
5696     {
5697       for (pos++; s[pos] != '\''; pos++)
5698         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5699             || (s[pos] == '\\' && s[++pos] == '\0'))
5700           return 0;
5701       pos++;
5702     }
5703
5704   return pos;
5705 }
5706
5707 \f
5708 #ifdef ETAGS_REGEXPS
5709
5710 static char *scan_separators __P((char *));
5711 static void add_regex __P((char *, language *));
5712 static char *substitute __P((char *, char *, struct re_registers *));
5713
5714 /*
5715  * Take a string like "/blah/" and turn it into "blah", verifying
5716  * that the first and last characters are the same, and handling
5717  * quoted separator characters.  Actually, stops on the occurrence of
5718  * an unquoted separator.  Also process \t, \n, etc. and turn into
5719  * appropriate characters. Works in place.  Null terminates name string.
5720  * Returns pointer to terminating separator, or NULL for
5721  * unterminated regexps.
5722  */
5723 static char *
5724 scan_separators (name)
5725      char *name;
5726 {
5727   char sep = name[0];
5728   char *copyto = name;
5729   bool quoted = FALSE;
5730
5731   for (++name; *name != '\0'; ++name)
5732     {
5733       if (quoted)
5734         {
5735           switch (*name)
5736             {
5737             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5738             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5739             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5740             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5741             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5742             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5743             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5744             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5745             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5746             default:
5747               if (*name == sep)
5748                 *copyto++ = sep;
5749               else
5750                 {
5751                   /* Something else is quoted, so preserve the quote. */
5752                   *copyto++ = '\\';
5753                   *copyto++ = *name;
5754                 }
5755               break;
5756             }
5757           quoted = FALSE;
5758         }
5759       else if (*name == '\\')
5760         quoted = TRUE;
5761       else if (*name == sep)
5762         break;
5763       else
5764         *copyto++ = *name;
5765     }
5766   if (*name != sep)
5767     name = NULL;                /* signal unterminated regexp */
5768
5769   /* Terminate copied string. */
5770   *copyto = '\0';
5771   return name;
5772 }
5773
5774 /* Look at the argument of --regex or --no-regex and do the right
5775    thing.  Same for each line of a regexp file. */
5776 static void
5777 analyse_regex (regex_arg)
5778      char *regex_arg;
5779 {
5780   if (regex_arg == NULL)
5781     {
5782       free_regexps ();          /* --no-regex: remove existing regexps */
5783       return;
5784     }
5785
5786   /* A real --regexp option or a line in a regexp file. */
5787   switch (regex_arg[0])
5788     {
5789       /* Comments in regexp file or null arg to --regex. */
5790     case '\0':
5791     case ' ':
5792     case '\t':
5793       break;
5794
5795       /* Read a regex file.  This is recursive and may result in a
5796          loop, which will stop when the file descriptors are exhausted. */
5797     case '@':
5798       {
5799         FILE *regexfp;
5800         linebuffer regexbuf;
5801         char *regexfile = regex_arg + 1;
5802
5803         /* regexfile is a file containing regexps, one per line. */
5804         regexfp = fopen (regexfile, "r");
5805         if (regexfp == NULL)
5806           {
5807             pfatal (regexfile);
5808             return;
5809           }
5810         linebuffer_init (&regexbuf);
5811         while (readline_internal (&regexbuf, regexfp) > 0)
5812           analyse_regex (regexbuf.buffer);
5813         free (regexbuf.buffer);
5814         fclose (regexfp);
5815       }
5816       break;
5817
5818       /* Regexp to be used for a specific language only. */
5819     case '{':
5820       {
5821         language *lang;
5822         char *lang_name = regex_arg + 1;
5823         char *cp;
5824
5825         for (cp = lang_name; *cp != '}'; cp++)
5826           if (*cp == '\0')
5827             {
5828               error ("unterminated language name in regex: %s", regex_arg);
5829               return;
5830             }
5831         *cp++ = '\0';
5832         lang = get_language_from_langname (lang_name);
5833         if (lang == NULL)
5834           return;
5835         add_regex (cp, lang);
5836       }
5837       break;
5838
5839       /* Regexp to be used for any language. */
5840     default:
5841       add_regex (regex_arg, NULL);
5842       break;
5843     }
5844 }
5845
5846 /* Separate the regexp pattern, compile it,
5847    and care for optional name and modifiers. */
5848 static void
5849 add_regex (regexp_pattern, lang)
5850      char *regexp_pattern;
5851      language *lang;
5852 {
5853   static struct re_pattern_buffer zeropattern;
5854   char sep, *pat, *name, *modifiers;
5855   const char *err;
5856   struct re_pattern_buffer *patbuf;
5857   regexp *rp;
5858   bool
5859     force_explicit_name = TRUE, /* do not use implicit tag names */
5860     ignore_case = FALSE,        /* case is significant */
5861     multi_line = FALSE,         /* matches are done one line at a time */
5862     single_line = FALSE;        /* dot does not match newline */
5863
5864
5865   if (strlen(regexp_pattern) < 3)
5866     {
5867       error ("null regexp", (char *)NULL);
5868       return;
5869     }
5870   sep = regexp_pattern[0];
5871   name = scan_separators (regexp_pattern);
5872   if (name == NULL)
5873     {
5874       error ("%s: unterminated regexp", regexp_pattern);
5875       return;
5876     }
5877   if (name[1] == sep)
5878     {
5879       error ("null name for regexp \"%s\"", regexp_pattern);
5880       return;
5881     }
5882   modifiers = scan_separators (name);
5883   if (modifiers == NULL)        /* no terminating separator --> no name */
5884     {
5885       modifiers = name;
5886       name = "";
5887     }
5888   else
5889     modifiers += 1;             /* skip separator */
5890
5891   /* Parse regex modifiers. */
5892   for (; modifiers[0] != '\0'; modifiers++)
5893     switch (modifiers[0])
5894       {
5895       case 'N':
5896         if (modifiers == name)
5897           error ("forcing explicit tag name but no name, ignoring", NULL);
5898         force_explicit_name = TRUE;
5899         break;
5900       case 'i':
5901         ignore_case = TRUE;
5902         break;
5903       case 's':
5904         single_line = TRUE;
5905         /* FALLTHRU */
5906       case 'm':
5907         multi_line = TRUE;
5908         need_filebuf = TRUE;
5909         break;
5910       default:
5911         {
5912           char wrongmod [2];
5913           wrongmod[0] = modifiers[0];
5914           wrongmod[1] = '\0';
5915           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5916         }
5917         break;
5918       }
5919
5920   patbuf = xnew (1, struct re_pattern_buffer);
5921   *patbuf = zeropattern;
5922   if (ignore_case)
5923     {
5924       static char lc_trans[CHARS];
5925       int i;
5926       for (i = 0; i < CHARS; i++)
5927         lc_trans[i] = lowcase (i);
5928       patbuf->translate = lc_trans;     /* translation table to fold case  */
5929     }
5930
5931   if (multi_line)
5932     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5933   else
5934     pat = regexp_pattern;
5935
5936   if (single_line)
5937     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5938   else
5939     re_set_syntax (RE_SYNTAX_EMACS);
5940
5941   err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5942   if (multi_line)
5943     free (pat);
5944   if (err != NULL)
5945     {
5946       error ("%s while compiling pattern", err);
5947       return;
5948     }
5949
5950   rp = p_head;
5951   p_head = xnew (1, regexp);
5952   p_head->pattern = savestr (regexp_pattern);
5953   p_head->p_next = rp;
5954   p_head->lang = lang;
5955   p_head->pat = patbuf;
5956   p_head->name = savestr (name);
5957   p_head->error_signaled = FALSE;
5958   p_head->force_explicit_name = force_explicit_name;
5959   p_head->ignore_case = ignore_case;
5960   p_head->multi_line = multi_line;
5961 }
5962
5963 /*
5964  * Do the substitutions indicated by the regular expression and
5965  * arguments.
5966  */
5967 static char *
5968 substitute (in, out, regs)
5969      char *in, *out;
5970      struct re_registers *regs;
5971 {
5972   char *result, *t;
5973   int size, dig, diglen;
5974
5975   result = NULL;
5976   size = strlen (out);
5977
5978   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5979   if (out[size - 1] == '\\')
5980     fatal ("pattern error in \"%s\"", out);
5981   for (t = etags_strchr (out, '\\');
5982        t != NULL;
5983        t = etags_strchr (t + 2, '\\'))
5984     if (ISDIGIT (t[1]))
5985       {
5986         dig = t[1] - '0';
5987         diglen = regs->end[dig] - regs->start[dig];
5988         size += diglen - 2;
5989       }
5990     else
5991       size -= 1;
5992
5993   /* Allocate space and do the substitutions. */
5994   assert (size >= 0);
5995   result = xnew (size + 1, char);
5996
5997   for (t = result; *out != '\0'; out++)
5998     if (*out == '\\' && ISDIGIT (*++out))
5999       {
6000         dig = *out - '0';
6001         diglen = regs->end[dig] - regs->start[dig];
6002         strncpy (t, in + regs->start[dig], diglen);
6003         t += diglen;
6004       }
6005     else
6006       *t++ = *out;
6007   *t = '\0';
6008
6009   assert (t <= result + size);
6010   assert (t - result == (int)strlen (result));
6011
6012   return result;
6013 }
6014
6015 /* Deallocate all regexps. */
6016 static void
6017 free_regexps ()
6018 {
6019   regexp *rp;
6020   while (p_head != NULL)
6021     {
6022       rp = p_head->p_next;
6023       free (p_head->pattern);
6024       free (p_head->name);
6025       free (p_head);
6026       p_head = rp;
6027     }
6028   return;
6029 }
6030
6031 /*
6032  * Reads the whole file as a single string from `filebuf' and looks for
6033  * multi-line regular expressions, creating tags on matches.
6034  * readline already dealt with normal regexps.
6035  *
6036  * Idea by Ben Wing <ben@666.com> (2002).
6037  */
6038 static void
6039 regex_tag_multiline ()
6040 {
6041   char *buffer = filebuf.buffer;
6042   regexp *rp;
6043   char *name;
6044
6045   for (rp = p_head; rp != NULL; rp = rp->p_next)
6046     {
6047       int match = 0;
6048
6049       if (!rp->multi_line)
6050         continue;               /* skip normal regexps */
6051
6052       /* Generic initialisations before parsing file from memory. */
6053       lineno = 1;               /* reset global line number */
6054       charno = 0;               /* reset global char number */
6055       linecharno = 0;           /* reset global char number of line start */
6056
6057       /* Only use generic regexps or those for the current language. */
6058       if (rp->lang != NULL && rp->lang != curfdp->lang)
6059         continue;
6060
6061       while (match >= 0 && match < filebuf.len)
6062         {
6063           match = re_search (rp->pat, buffer, filebuf.len, charno,
6064                              filebuf.len - match, &rp->regs);
6065           switch (match)
6066             {
6067             case -2:
6068               /* Some error. */
6069               if (!rp->error_signaled)
6070                 {
6071                   error ("regexp stack overflow while matching \"%s\"",
6072                          rp->pattern);
6073                   rp->error_signaled = TRUE;
6074                 }
6075               break;
6076             case -1:
6077               /* No match. */
6078               break;
6079             default:
6080               if (match == rp->regs.end[0])
6081                 {
6082                   if (!rp->error_signaled)
6083                     {
6084                       error ("regexp matches the empty string: \"%s\"",
6085                              rp->pattern);
6086                       rp->error_signaled = TRUE;
6087                     }
6088                   match = -3;   /* exit from while loop */
6089                   break;
6090                 }
6091
6092               /* Match occurred.  Construct a tag. */
6093               while (charno < rp->regs.end[0])
6094                 if (buffer[charno++] == '\n')
6095                   lineno++, linecharno = charno;
6096               name = rp->name;
6097               if (name[0] == '\0')
6098                 name = NULL;
6099               else /* make a named tag */
6100                 name = substitute (buffer, rp->name, &rp->regs);
6101               if (rp->force_explicit_name)
6102                 /* Force explicit tag name, if a name is there. */
6103                 pfnote (name, TRUE, buffer + linecharno,
6104                         charno - linecharno + 1, lineno, linecharno);
6105               else
6106                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6107                           charno - linecharno + 1, lineno, linecharno);
6108               break;
6109             }
6110         }
6111     }
6112 }
6113
6114 #endif /* ETAGS_REGEXPS */
6115
6116 \f
6117 static bool
6118 nocase_tail (cp)
6119      char *cp;
6120 {
6121   register int len = 0;
6122
6123   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6124     cp++, len++;
6125   if (*cp == '\0' && !intoken (dbp[len]))
6126     {
6127       dbp += len;
6128       return TRUE;
6129     }
6130   return FALSE;
6131 }
6132
6133 static void
6134 get_tag (bp, namepp)
6135      register char *bp;
6136      char **namepp;
6137 {
6138   register char *cp = bp;
6139
6140   if (*bp != '\0')
6141     {
6142       /* Go till you get to white space or a syntactic break */
6143       for (cp = bp + 1; !notinname (*cp); cp++)
6144         continue;
6145       make_tag (bp, cp - bp, TRUE,
6146                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6147     }
6148
6149   if (namepp != NULL)
6150     *namepp = savenstr (bp, cp - bp);
6151 }
6152
6153 /*
6154  * Read a line of text from `stream' into `lbp', excluding the
6155  * newline or CR-NL, if any.  Return the number of characters read from
6156  * `stream', which is the length of the line including the newline.
6157  *
6158  * On DOS or Windows we do not count the CR character, if any before the
6159  * NL, in the returned length; this mirrors the behavior of Emacs on those
6160  * platforms (for text files, it translates CR-NL to NL as it reads in the
6161  * file).
6162  *
6163  * If multi-line regular expressions are requested, each line read is
6164  * appended to `filebuf'.
6165  */
6166 static long
6167 readline_internal (lbp, stream)
6168      linebuffer *lbp;
6169      register FILE *stream;
6170 {
6171   char *buffer = lbp->buffer;
6172   register char *p = lbp->buffer;
6173   register char *pend;
6174   int chars_deleted;
6175
6176   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6177
6178   for (;;)
6179     {
6180       register int c = getc (stream);
6181       if (p == pend)
6182         {
6183           /* We're at the end of linebuffer: expand it. */
6184           lbp->size *= 2;
6185           xrnew (buffer, lbp->size, char);
6186           p += buffer - lbp->buffer;
6187           pend = buffer + lbp->size;
6188           lbp->buffer = buffer;
6189         }
6190       if (c == EOF)
6191         {
6192           *p = '\0';
6193           chars_deleted = 0;
6194           break;
6195         }
6196       if (c == '\n')
6197         {
6198           if (p > buffer && p[-1] == '\r')
6199             {
6200               p -= 1;
6201 #ifdef DOS_NT
6202              /* Assume CRLF->LF translation will be performed by Emacs
6203                 when loading this file, so CRs won't appear in the buffer.
6204                 It would be cleaner to compensate within Emacs;
6205                 however, Emacs does not know how many CRs were deleted
6206                 before any given point in the file.  */
6207               chars_deleted = 1;
6208 #else
6209               chars_deleted = 2;
6210 #endif
6211             }
6212           else
6213             {
6214               chars_deleted = 1;
6215             }
6216           *p = '\0';
6217           break;
6218         }
6219       *p++ = c;
6220     }
6221   lbp->len = p - buffer;
6222
6223   if (need_filebuf              /* we need filebuf for multi-line regexps */
6224       && chars_deleted > 0)     /* not at EOF */
6225     {
6226       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6227         {
6228           /* Expand filebuf. */
6229           filebuf.size *= 2;
6230           xrnew (filebuf.buffer, filebuf.size, char);
6231         }
6232       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6233       filebuf.len += lbp->len;
6234       filebuf.buffer[filebuf.len++] = '\n';
6235       filebuf.buffer[filebuf.len] = '\0';
6236     }
6237
6238   return lbp->len + chars_deleted;
6239 }
6240
6241 /*
6242  * Like readline_internal, above, but in addition try to match the
6243  * input line against relevant regular expressions and manage #line
6244  * directives.
6245  */
6246 static void
6247 readline (lbp, stream)
6248      linebuffer *lbp;
6249      FILE *stream;
6250 {
6251   long result;
6252
6253   linecharno = charno;          /* update global char number of line start */
6254   result = readline_internal (lbp, stream); /* read line */
6255   lineno += 1;                  /* increment global line number */
6256   charno += result;             /* increment global char number */
6257
6258   /* Honour #line directives. */
6259   if (!no_line_directive)
6260     {
6261       static bool discard_until_line_directive;
6262
6263       /* Check whether this is a #line directive. */
6264       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6265         {
6266           int start, lno;
6267
6268           if (DEBUG) start = 0; /* shut up the compiler */
6269           if (sscanf (lbp->buffer, "#line %d \"%n", &lno, &start) == 1)
6270             {
6271               char *endp = lbp->buffer + start;
6272
6273               assert (start > 0);
6274               while ((endp = etags_strchr (endp, '"')) != NULL
6275                      && endp[-1] == '\\')
6276                 endp++;
6277               if (endp != NULL)
6278                 /* Ok, this is a real #line directive.  Let's deal with it. */
6279                 {
6280                   char *taggedabsname;  /* absolute name of original file */
6281                   char *taggedfname;    /* name of original file as given */
6282                   char *name;           /* temp var */
6283
6284                   discard_until_line_directive = FALSE; /* found it */
6285                   name = lbp->buffer + start;
6286                   *endp = '\0';
6287                   canonicalize_filename (name); /* for DOS */
6288                   taggedabsname = absolute_filename (name, curfdp->infabsdir);
6289                   if (filename_is_absolute (name)
6290                       || filename_is_absolute (curfdp->infname))
6291                     taggedfname = savestr (taggedabsname);
6292                   else
6293                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6294
6295                   if (streq (curfdp->taggedfname, taggedfname))
6296                     /* The #line directive is only a line number change.  We
6297                        deal with this afterwards. */
6298                     free (taggedfname);
6299                   else
6300                     /* The tags following this #line directive should be
6301                        attributed to taggedfname.  In order to do this, set
6302                        curfdp accordingly. */
6303                     {
6304                       fdesc *fdp; /* file description pointer */
6305
6306                       /* Go look for a file description already set up for the
6307                          file indicated in the #line directive.  If there is
6308                          one, use it from now until the next #line
6309                          directive. */
6310                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6311                         if (streq (fdp->infname, curfdp->infname)
6312                             && streq (fdp->taggedfname, taggedfname))
6313                           /* If we remove the second test above (after the &&)
6314                              then all entries pertaining to the same file are
6315                              coalesced in the tags file.  If we use it, then
6316                              entries pertaining to the same file but generated
6317                              from different files (via #line directives) will
6318                              go into separate sections in the tags file.  These
6319                              alternatives look equivalent.  The first one
6320                              destroys some apparently useless information. */
6321                           {
6322                             curfdp = fdp;
6323                             free (taggedfname);
6324                             break;
6325                           }
6326                       /* Else, if we already tagged the real file, skip all
6327                          input lines until the next #line directive. */
6328                       if (fdp == NULL) /* not found */
6329                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6330                           if (streq (fdp->infabsname, taggedabsname))
6331                             {
6332                               discard_until_line_directive = TRUE;
6333                               free (taggedfname);
6334                               break;
6335                             }
6336                       /* Else create a new file description and use that from
6337                          now on, until the next #line directive. */
6338                       if (fdp == NULL) /* not found */
6339                         {
6340                           fdp = fdhead;
6341                           fdhead = xnew (1, fdesc);
6342                           *fdhead = *curfdp; /* copy curr. file description */
6343                           fdhead->next = fdp;
6344                           fdhead->infname = savestr (curfdp->infname);
6345                           fdhead->infabsname = savestr (curfdp->infabsname);
6346                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6347                           fdhead->taggedfname = taggedfname;
6348                           fdhead->usecharno = FALSE;
6349                           fdhead->prop = NULL;
6350                           fdhead->written = FALSE;
6351                           curfdp = fdhead;
6352                         }
6353                     }
6354                   free (taggedabsname);
6355                   lineno = lno - 1;
6356                   readline (lbp, stream);
6357                   return;
6358                 } /* if a real #line directive */
6359             } /* if #line is followed by a a number */
6360         } /* if line begins with "#line " */
6361
6362       /* If we are here, no #line directive was found. */
6363       if (discard_until_line_directive)
6364         {
6365           if (result > 0)
6366             {
6367               /* Do a tail recursion on ourselves, thus discarding the contents
6368                  of the line buffer. */
6369               readline (lbp, stream);
6370               return;
6371             }
6372           /* End of file. */
6373           discard_until_line_directive = FALSE;
6374           return;
6375         }
6376     } /* if #line directives should be considered */
6377
6378 #ifdef ETAGS_REGEXPS
6379   {
6380     int match;
6381     regexp *rp;
6382     char *name;
6383
6384     /* Match against relevant regexps. */
6385     if (lbp->len > 0)
6386       for (rp = p_head; rp != NULL; rp = rp->p_next)
6387         {
6388           /* Only use generic regexps or those for the current language.
6389              Also do not use multiline regexps, which is the job of
6390              regex_tag_multiline. */
6391           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6392               || rp->multi_line)
6393             continue;
6394
6395           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6396           switch (match)
6397             {
6398             case -2:
6399               /* Some error. */
6400               if (!rp->error_signaled)
6401                 {
6402                   error ("regexp stack overflow while matching \"%s\"",
6403                          rp->pattern);
6404                   rp->error_signaled = TRUE;
6405                 }
6406               break;
6407             case -1:
6408               /* No match. */
6409               break;
6410             case 0:
6411               /* Empty string matched. */
6412               if (!rp->error_signaled)
6413                 {
6414                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6415                   rp->error_signaled = TRUE;
6416                 }
6417               break;
6418             default:
6419               /* Match occurred.  Construct a tag. */
6420               name = rp->name;
6421               if (name[0] == '\0')
6422                 name = NULL;
6423               else /* make a named tag */
6424                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6425               if (rp->force_explicit_name)
6426                 /* Force explicit tag name, if a name is there. */
6427                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6428               else
6429                 make_tag (name, strlen (name), TRUE,
6430                           lbp->buffer, match, lineno, linecharno);
6431               break;
6432             }
6433         }
6434   }
6435 #endif /* ETAGS_REGEXPS */
6436 }
6437
6438 \f
6439 /*
6440  * Return a pointer to a space of size strlen(cp)+1 allocated
6441  * with xnew where the string CP has been copied.
6442  */
6443 static char *
6444 savestr (cp)
6445      char *cp;
6446 {
6447   return savenstr (cp, strlen (cp));
6448 }
6449
6450 /*
6451  * Return a pointer to a space of size LEN+1 allocated with xnew where
6452  * the string CP has been copied for at most the first LEN characters.
6453  */
6454 static char *
6455 savenstr (cp, len)
6456      char *cp;
6457      int len;
6458 {
6459   register char *dp;
6460
6461   dp = xnew (len + 1, char);
6462   strncpy (dp, cp, len);
6463   dp[len] = '\0';
6464   return dp;
6465 }
6466
6467 /*
6468  * Return the ptr in sp at which the character c last
6469  * appears; NULL if not found
6470  *
6471  * Identical to POSIX strrchr, included for portability.
6472  */
6473 static char *
6474 etags_strrchr (sp, c)
6475      register const char *sp;
6476      register int c;
6477 {
6478   register const char *r;
6479
6480   r = NULL;
6481   do
6482     {
6483       if (*sp == c)
6484         r = sp;
6485   } while (*sp++);
6486   return (char *)r;
6487 }
6488
6489 /*
6490  * Return the ptr in sp at which the character c first
6491  * appears; NULL if not found
6492  *
6493  * Identical to POSIX strchr, included for portability.
6494  */
6495 static char *
6496 etags_strchr (sp, c)
6497      register const char *sp;
6498      register int c;
6499 {
6500   do
6501     {
6502       if (*sp == c)
6503         return (char *)sp;
6504     } while (*sp++);
6505   return NULL;
6506 }
6507
6508 /*
6509  * Compare two strings, ignoring case for alphabetic characters.
6510  *
6511  * Same as BSD's strcasecmp, included for portability.
6512  */
6513 static int
6514 etags_strcasecmp (s1, s2)
6515      register const char *s1;
6516      register const char *s2;
6517 {
6518   while (*s1 != '\0'
6519          && (ISALPHA (*s1) && ISALPHA (*s2)
6520              ? lowcase (*s1) == lowcase (*s2)
6521              : *s1 == *s2))
6522     s1++, s2++;
6523
6524   return (ISALPHA (*s1) && ISALPHA (*s2)
6525           ? lowcase (*s1) - lowcase (*s2)
6526           : *s1 - *s2);
6527 }
6528
6529 /*
6530  * Compare two strings, ignoring case for alphabetic characters.
6531  * Stop after a given number of characters
6532  *
6533  * Same as BSD's strncasecmp, included for portability.
6534  */
6535 static int
6536 etags_strncasecmp (s1, s2, n)
6537      register const char *s1;
6538      register const char *s2;
6539      register int n;
6540 {
6541   while (*s1 != '\0' && n-- > 0
6542          && (ISALPHA (*s1) && ISALPHA (*s2)
6543              ? lowcase (*s1) == lowcase (*s2)
6544              : *s1 == *s2))
6545     s1++, s2++;
6546
6547   if (n < 0)
6548     return 0;
6549   else
6550     return (ISALPHA (*s1) && ISALPHA (*s2)
6551             ? lowcase (*s1) - lowcase (*s2)
6552             : *s1 - *s2);
6553 }
6554
6555 /* Skip spaces (end of string is not space), return new pointer. */
6556 static char *
6557 skip_spaces (cp)
6558      char *cp;
6559 {
6560   while (iswhite (*cp))
6561     cp++;
6562   return cp;
6563 }
6564
6565 /* Skip non spaces, except end of string, return new pointer. */
6566 static char *
6567 skip_non_spaces (cp)
6568      char *cp;
6569 {
6570   while (*cp != '\0' && !iswhite (*cp))
6571     cp++;
6572   return cp;
6573 }
6574
6575 /* Print error message and exit.  */
6576 void
6577 fatal (s1, s2)
6578      char *s1, *s2;
6579 {
6580   error (s1, s2);
6581   exit (EXIT_FAILURE);
6582 }
6583
6584 static void
6585 pfatal (s1)
6586      char *s1;
6587 {
6588   perror (s1);
6589   exit (EXIT_FAILURE);
6590 }
6591
6592 static void
6593 suggest_asking_for_help ()
6594 {
6595   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6596            progname, LONG_OPTIONS ? "--help" : "-h");
6597   exit (EXIT_FAILURE);
6598 }
6599
6600 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6601 static void
6602 error (s1, s2)
6603      const char *s1, *s2;
6604 {
6605   fprintf (stderr, "%s: ", progname);
6606   fprintf (stderr, s1, s2);
6607   fprintf (stderr, "\n");
6608 }
6609
6610 /* Return a newly-allocated string whose contents
6611    concatenate those of s1, s2, s3.  */
6612 static char *
6613 concat (s1, s2, s3)
6614      char *s1, *s2, *s3;
6615 {
6616   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6617   char *result = xnew (len1 + len2 + len3 + 1, char);
6618
6619   strcpy (result, s1);
6620   strcpy (result + len1, s2);
6621   strcpy (result + len1 + len2, s3);
6622   result[len1 + len2 + len3] = '\0';
6623
6624   return result;
6625 }
6626
6627 \f
6628 /* Does the same work as the system V getcwd, but does not need to
6629    guess the buffer size in advance. */
6630 static char *
6631 etags_getcwd ()
6632 {
6633 #ifdef HAVE_GETCWD
6634   int bufsize = 200;
6635   char *path = xnew (bufsize, char);
6636
6637   while (getcwd (path, bufsize) == NULL)
6638     {
6639       if (errno != ERANGE)
6640         pfatal ("getcwd");
6641       bufsize *= 2;
6642       free (path);
6643       path = xnew (bufsize, char);
6644     }
6645
6646   canonicalize_filename (path);
6647   return path;
6648
6649 #else /* not HAVE_GETCWD */
6650 #if MSDOS
6651
6652   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6653
6654   getwd (path);
6655
6656   for (p = path; *p != '\0'; p++)
6657     if (*p == '\\')
6658       *p = '/';
6659     else
6660       *p = lowcase (*p);
6661
6662   return strdup (path);
6663 #else /* not MSDOS */
6664   linebuffer path;
6665   FILE *pipe;
6666
6667   linebuffer_init (&path);
6668   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6669   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6670     pfatal ("pwd");
6671   pclose (pipe);
6672
6673   return path.buffer;
6674 #endif /* not MSDOS */
6675 #endif /* not HAVE_GETCWD */
6676 }
6677
6678 /* Return a newly allocated string containing the file name of FILE
6679    relative to the absolute directory DIR (which should end with a slash). */
6680 static char *
6681 relative_filename (file, dir)
6682      char *file, *dir;
6683 {
6684   char *fp, *dp, *afn, *res;
6685   int i;
6686
6687   /* Find the common root of file and dir (with a trailing slash). */
6688   afn = absolute_filename (file, cwd);
6689   fp = afn;
6690   dp = dir;
6691   while (*fp++ == *dp++)
6692     continue;
6693   fp--, dp--;                   /* back to the first differing char */
6694 #ifdef DOS_NT
6695   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6696     return afn;
6697 #endif
6698   do                            /* look at the equal chars until '/' */
6699     fp--, dp--;
6700   while (*fp != '/');
6701
6702   /* Build a sequence of "../" strings for the resulting relative file name. */
6703   i = 0;
6704   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6705     i += 1;
6706   res = xnew (3*i + strlen (fp + 1) + 1, char);
6707   res[0] = '\0';
6708   while (i-- > 0)
6709     strcat (res, "../");
6710
6711   /* Add the file name relative to the common root of file and dir. */
6712   strcat (res, fp + 1);
6713   free (afn);
6714
6715   return res;
6716 }
6717
6718 /* Return a newly allocated string containing the absolute file name
6719    of FILE given DIR (which should end with a slash). */
6720 static char *
6721 absolute_filename (file, dir)
6722      char *file, *dir;
6723 {
6724   char *slashp, *cp, *res;
6725
6726   if (filename_is_absolute (file))
6727     res = savestr (file);
6728 #ifdef DOS_NT
6729   /* We don't support non-absolute file names with a drive
6730      letter, like `d:NAME' (it's too much hassle).  */
6731   else if (file[1] == ':')
6732     fatal ("%s: relative file names with drive letters not supported", file);
6733 #endif
6734   else
6735     res = concat (dir, file, "");
6736
6737   /* Delete the "/dirname/.." and "/." substrings. */
6738   slashp = etags_strchr (res, '/');
6739   while (slashp != NULL && slashp[0] != '\0')
6740     {
6741       if (slashp[1] == '.')
6742         {
6743           if (slashp[2] == '.'
6744               && (slashp[3] == '/' || slashp[3] == '\0'))
6745             {
6746               cp = slashp;
6747               do
6748                 cp--;
6749               while (cp >= res && !filename_is_absolute (cp));
6750               if (cp < res)
6751                 cp = slashp;    /* the absolute name begins with "/.." */
6752 #ifdef DOS_NT
6753               /* Under MSDOS and NT we get `d:/NAME' as absolute
6754                  file name, so the luser could say `d:/../NAME'.
6755                  We silently treat this as `d:/NAME'.  */
6756               else if (cp[0] != '/')
6757                 cp = slashp;
6758 #endif
6759               strcpy (cp, slashp + 3);
6760               slashp = cp;
6761               continue;
6762             }
6763           else if (slashp[2] == '/' || slashp[2] == '\0')
6764             {
6765               strcpy (slashp, slashp + 2);
6766               continue;
6767             }
6768         }
6769
6770       slashp = etags_strchr (slashp + 1, '/');
6771     }
6772
6773   if (res[0] == '\0')
6774     return savestr ("/");
6775   else
6776     return res;
6777 }
6778
6779 /* Return a newly allocated string containing the absolute
6780    file name of dir where FILE resides given DIR (which should
6781    end with a slash). */
6782 static char *
6783 absolute_dirname (file, dir)
6784      char *file, *dir;
6785 {
6786   char *slashp, *res;
6787   char save;
6788
6789   canonicalize_filename (file);
6790   slashp = etags_strrchr (file, '/');
6791   if (slashp == NULL)
6792     return savestr (dir);
6793   save = slashp[1];
6794   slashp[1] = '\0';
6795   res = absolute_filename (file, dir);
6796   slashp[1] = save;
6797
6798   return res;
6799 }
6800
6801 /* Whether the argument string is an absolute file name.  The argument
6802    string must have been canonicalized with canonicalize_filename. */
6803 static bool
6804 filename_is_absolute (fn)
6805      char *fn;
6806 {
6807   return (fn[0] == '/'
6808 #ifdef DOS_NT
6809           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6810 #endif
6811           );
6812 }
6813
6814 /* Translate backslashes into slashes.  Works in place. */
6815 static void
6816 canonicalize_filename (fn)
6817      register char *fn;
6818 {
6819 #ifdef DOS_NT
6820   /* Canonicalize drive letter case.  */
6821   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6822     fn[0] = upcase (fn[0]);
6823   /* Convert backslashes to slashes.  */
6824   for (; *fn != '\0'; fn++)
6825     if (*fn == '\\')
6826       *fn = '/';
6827 #else
6828   /* No action. */
6829   fn = NULL;                    /* shut up the compiler */
6830 #endif
6831 }
6832
6833 \f
6834 /* Initialize a linebuffer for use */
6835 static void
6836 linebuffer_init (lbp)
6837      linebuffer *lbp;
6838 {
6839   lbp->size = (DEBUG) ? 3 : 200;
6840   lbp->buffer = xnew (lbp->size, char);
6841   lbp->buffer[0] = '\0';
6842   lbp->len = 0;
6843 }
6844
6845 /* Set the minimum size of a string contained in a linebuffer. */
6846 static void
6847 linebuffer_setlen (lbp, toksize)
6848      linebuffer *lbp;
6849      int toksize;
6850 {
6851   while (lbp->size <= toksize)
6852     {
6853       lbp->size *= 2;
6854       xrnew (lbp->buffer, lbp->size, char);
6855     }
6856   lbp->len = toksize;
6857 }
6858
6859 /* Like malloc but get fatal error if memory is exhausted. */
6860 static PTR
6861 xmalloc (size)
6862      unsigned int size;
6863 {
6864   PTR result = (PTR) malloc (size);
6865   if (result == NULL)
6866     fatal ("virtual memory exhausted", (char *)NULL);
6867   return result;
6868 }
6869
6870 static PTR
6871 xrealloc (ptr, size)
6872      char *ptr;
6873      unsigned int size;
6874 {
6875   PTR result = (PTR) realloc (ptr, size);
6876   if (result == NULL)
6877     fatal ("virtual memory exhausted", (char *)NULL);
6878   return result;
6879 }
6880
6881 /*
6882  * Local Variables:
6883  * c-indentation-style: gnu
6884  * indent-tabs-mode: t
6885  * tab-width: 8
6886  * fill-column: 79
6887  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6888  * End:
6889  */
6890
6891 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6892    (do not change this comment) */
6893
6894 /* etags.c ends here */