code.delx.au - gnu-emacs/blob - lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2    Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995,
   3                  1998, 1999, 2000, 2001, 2002, 2003, 2004,
   4                  2005, 2006 Free Software Foundation, Inc. and Ken Arnold
   5
   6  This file is not considered part of GNU Emacs.
   7
   8  This program is free software; you can redistribute it and/or modify
   9  it under the terms of the GNU General Public License as published by
  10  the Free Software Foundation; either version 2 of the License, or
  11  (at your option) any later version.
  12
  13  This program is distributed in the hope that it will be useful,
  14  but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  GNU General Public License for more details.
  17
  18  You should have received a copy of the GNU General Public License
  19  along with this program; if not, write to the Free Software Foundation,
  20  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
  21
  22 /*
  23  * Authors:
  24  *      Ctags originally by Ken Arnold.
  25  *      Fortran added by Jim Kleckner.
  26  *      Ed Pelegri-Llopart added C typedefs.
  27  *      Gnu Emacs TAGS format and modifications by RMS?
  28  * 1989 Sam Kendall added C++.
  29  * 1992 Joseph B. Wells improved C and C++ parsing.
  30  * 1993 Francesco Potortì reorganised C and C++.
  31  * 1994 Line-by-line regexp tags by Tom Tromey.
  32  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  33  * 2002 #line directives by Francesco Potortì.
  34  *
  35  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  36  */
  37
  38 /*
  39  * If you want to add support for a new language, start by looking at the LUA
  40  * language, which is the simplest.  Alternatively, consider shipping a
  41  * configuration file containing regexp definitions for etags.
  42  */
  43
  44 char pot_etags_version[] = "@(#) pot revision number is 17.18";
  45
  46 #define TRUE    1
  47 #define FALSE   0
  48
  49 #ifdef DEBUG
  50 #  undef DEBUG
  51 #  define DEBUG TRUE
  52 #else
  53 #  define DEBUG  FALSE
  54 #  define NDEBUG                /* disable assert */
  55 #endif
  56
  57 #ifdef HAVE_CONFIG_H
  58 # include <config.h>
  59   /* On some systems, Emacs defines static as nothing for the sake
  60      of unexec.  We don't want that here since we don't use unexec. */
  61 # undef static
  62 # define ETAGS_REGEXPS          /* use the regexp features */
  63 # define LONG_OPTIONS           /* accept long options */
  64 # ifndef PTR                    /* for Xemacs */
  65 #   define PTR void *
  66 # endif
  67 # ifndef __P                    /* for Xemacs */
  68 #   define __P(args) args
  69 # endif
  70 #else  /* no config.h */
  71 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
  72 #   define __P(args) args       /* use prototypes */
  73 #   define PTR void *           /* for generic pointers */
  74 # else /* not standard C */
  75 #   define __P(args) ()         /* no prototypes */
  76 #   define const                /* remove const for old compilers' sake */
  77 #   define PTR long *           /* don't use void* */
  78 # endif
  79 #endif /* !HAVE_CONFIG_H */
  80
  81 #ifndef _GNU_SOURCE
  82 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  83 #endif
  84
  85 #ifdef LONG_OPTIONS
  86 #  undef LONG_OPTIONS
  87 #  define LONG_OPTIONS TRUE
  88 #else
  89 #  define LONG_OPTIONS  FALSE
  90 #endif
  91
  92 /* WIN32_NATIVE is for Xemacs.
  93    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
  94 #ifdef WIN32_NATIVE
  95 # undef MSDOS
  96 # undef  WINDOWSNT
  97 # define WINDOWSNT
  98 #endif /* WIN32_NATIVE */
  99
 100 #ifdef MSDOS
 101 # undef MSDOS
 102 # define MSDOS TRUE
 103 # include <fcntl.h>
 104 # include <sys/param.h>
 105 # include <io.h>
 106 # ifndef HAVE_CONFIG_H
 107 #   define DOS_NT
 108 #   include <sys/config.h>
 109 # endif
 110 #else
 111 # define MSDOS FALSE
 112 #endif /* MSDOS */
 113
 114 #ifdef WINDOWSNT
 115 # include <stdlib.h>
 116 # include <fcntl.h>
 117 # include <string.h>
 118 # include <direct.h>
 119 # include <io.h>
 120 # define MAXPATHLEN _MAX_PATH
 121 # undef HAVE_NTGUI
 122 # undef  DOS_NT
 123 # define DOS_NT
 124 # ifndef HAVE_GETCWD
 125 #   define HAVE_GETCWD
 126 # endif /* undef HAVE_GETCWD */
 127 #else /* not WINDOWSNT */
 128 # ifdef STDC_HEADERS
 129 #  include <stdlib.h>
 130 #  include <string.h>
 131 # else /* no standard C headers */
 132     extern char *getenv ();
 133 #  ifdef VMS
 134 #   define EXIT_SUCCESS 1
 135 #   define EXIT_FAILURE 0
 136 #  else /* no VMS */
 137 #   define EXIT_SUCCESS 0
 138 #   define EXIT_FAILURE 1
 139 #  endif
 140 # endif
 141 #endif /* !WINDOWSNT */
 142
 143 #ifdef HAVE_UNISTD_H
 144 # include <unistd.h>
 145 #else
 146 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 147     extern char *getcwd (char *buf, size_t size);
 148 # endif
 149 #endif /* HAVE_UNISTD_H */
 150
 151 #include <stdio.h>
 152 #include <ctype.h>
 153 #include <errno.h>
 154 #ifndef errno
 155   extern int errno;
 156 #endif
 157 #include <sys/types.h>
 158 #include <sys/stat.h>
 159
 160 #include <assert.h>
 161 #ifdef NDEBUG
 162 # undef  assert                 /* some systems have a buggy assert.h */
 163 # define assert(x) ((void) 0)
 164 #endif
 165
 166 #if !defined (S_ISREG) && defined (S_IFREG)
 167 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 168 #endif
 169
 170 #if LONG_OPTIONS
 171 # include <getopt.h>
 172 #else
 173 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 174   extern char *optarg;
 175   extern int optind, opterr;
 176 #endif /* LONG_OPTIONS */
 177
 178 #ifdef ETAGS_REGEXPS
 179 # ifndef HAVE_CONFIG_H          /* this is a standalone compilation */
 180 #   ifdef __CYGWIN__            /* compiling on Cygwin */
 181                              !!! NOTICE !!!
 182  the regex.h distributed with Cygwin is not compatible with etags, alas!
 183 If you want regular expression support, you should delete this notice and
 184               arrange to use the GNU regex.h and regex.c.
 185 #   endif
 186 # endif
 187 # include <regex.h>
 188 #endif /* ETAGS_REGEXPS */
 189
 190 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 191  Leave it undefined to make the program "etags", which makes emacs-style
 192  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 193 #ifdef CTAGS
 194 # undef  CTAGS
 195 # define CTAGS TRUE
 196 #else
 197 # define CTAGS FALSE
 198 #endif
 199
 200 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 201 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 202 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 203 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 204
 205 #define CHARS 256               /* 2^sizeof(char) */
 206 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 207 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 208 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 209 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 210 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 211 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 212
 213 #define ISALNUM(c)      isalnum (CHAR(c))
 214 #define ISALPHA(c)      isalpha (CHAR(c))
 215 #define ISDIGIT(c)      isdigit (CHAR(c))
 216 #define ISLOWER(c)      islower (CHAR(c))
 217
 218 #define lowcase(c)      tolower (CHAR(c))
 219 #define upcase(c)       toupper (CHAR(c))
 220
 221
 222 /*
 223  *      xnew, xrnew -- allocate, reallocate storage
 224  *
 225  * SYNOPSIS:    Type *xnew (int n, Type);
 226  *              void xrnew (OldPointer, int n, Type);
 227  */
 228 #if DEBUG
 229 # include "chkmalloc.h"
 230 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 231                                                   (n) * sizeof (Type)))
 232 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 233                                         (char *) (op), (n) * sizeof (Type)))
 234 #else
 235 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 236 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 237                                         (char *) (op), (n) * sizeof (Type)))
 238 #endif
 239
 240 #define bool int
 241
 242 typedef void Lang_function __P((FILE *));
 243
 244 typedef struct
 245 {
 246   char *suffix;                 /* file name suffix for this compressor */
 247   char *command;                /* takes one arg and decompresses to stdout */
 248 } compressor;
 249
 250 typedef struct
 251 {
 252   char *name;                   /* language name */
 253   char *help;                   /* detailed help for the language */
 254   Lang_function *function;      /* parse function */
 255   char **suffixes;              /* name suffixes of this language's files */
 256   char **filenames;             /* names of this language's files */
 257   char **interpreters;          /* interpreters for this language */
 258   bool metasource;              /* source used to generate other sources */
 259 } language;
 260
 261 typedef struct fdesc
 262 {
 263   struct fdesc *next;           /* for the linked list */
 264   char *infname;                /* uncompressed input file name */
 265   char *infabsname;             /* absolute uncompressed input file name */
 266   char *infabsdir;              /* absolute dir of input file */
 267   char *taggedfname;            /* file name to write in tagfile */
 268   language *lang;               /* language of file */
 269   char *prop;                   /* file properties to write in tagfile */
 270   bool usecharno;               /* etags tags shall contain char number */
 271   bool written;                 /* entry written in the tags file */
 272 } fdesc;
 273
 274 typedef struct node_st
 275 {                               /* sorting structure */
 276   struct node_st *left, *right; /* left and right sons */
 277   fdesc *fdp;                   /* description of file to whom tag belongs */
 278   char *name;                   /* tag name */
 279   char *regex;                  /* search regexp */
 280   bool valid;                   /* write this tag on the tag file */
 281   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 282   bool been_warned;             /* warning already given for duplicated tag */
 283   int lno;                      /* line number tag is on */
 284   long cno;                     /* character number line starts on */
 285 } node;
 286
 287 /*
 288  * A `linebuffer' is a structure which holds a line of text.
 289  * `readline_internal' reads a line from a stream into a linebuffer
 290  * and works regardless of the length of the line.
 291  * SIZE is the size of BUFFER, LEN is the length of the string in
 292  * BUFFER after readline reads it.
 293  */
 294 typedef struct
 295 {
 296   long size;
 297   int len;
 298   char *buffer;
 299 } linebuffer;
 300
 301 /* Used to support mixing of --lang and file names. */
 302 typedef struct
 303 {
 304   enum {
 305     at_language,                /* a language specification */
 306     at_regexp,                  /* a regular expression */
 307     at_filename,                /* a file name */
 308     at_stdin,                   /* read from stdin here */
 309     at_end                      /* stop parsing the list */
 310   } arg_type;                   /* argument type */
 311   language *lang;               /* language associated with the argument */
 312   char *what;                   /* the argument itself */
 313 } argument;
 314
 315 #ifdef ETAGS_REGEXPS
 316 /* Structure defining a regular expression. */
 317 typedef struct regexp
 318 {
 319   struct regexp *p_next;        /* pointer to next in list */
 320   language *lang;               /* if set, use only for this language */
 321   char *pattern;                /* the regexp pattern */
 322   char *name;                   /* tag name */
 323   struct re_pattern_buffer *pat; /* the compiled pattern */
 324   struct re_registers regs;     /* re registers */
 325   bool error_signaled;          /* already signaled for this regexp */
 326   bool force_explicit_name;     /* do not allow implict tag name */
 327   bool ignore_case;             /* ignore case when matching */
 328   bool multi_line;              /* do a multi-line match on the whole file */
 329 } regexp;
 330 #endif /* ETAGS_REGEXPS */
 331
 332
 333 /* Many compilers barf on this:
 334         Lang_function Ada_funcs;
 335    so let's write it this way */
 336 static void Ada_funcs __P((FILE *));
 337 static void Asm_labels __P((FILE *));
 338 static void C_entries __P((int c_ext, FILE *));
 339 static void default_C_entries __P((FILE *));
 340 static void plain_C_entries __P((FILE *));
 341 static void Cjava_entries __P((FILE *));
 342 static void Cobol_paragraphs __P((FILE *));
 343 static void Cplusplus_entries __P((FILE *));
 344 static void Cstar_entries __P((FILE *));
 345 static void Erlang_functions __P((FILE *));
 346 static void Forth_words __P((FILE *));
 347 static void Fortran_functions __P((FILE *));
 348 static void HTML_labels __P((FILE *));
 349 static void Lisp_functions __P((FILE *));
 350 static void Lua_functions __P((FILE *));
 351 static void Makefile_targets __P((FILE *));
 352 static void Pascal_functions __P((FILE *));
 353 static void Perl_functions __P((FILE *));
 354 static void PHP_functions __P((FILE *));
 355 static void PS_functions __P((FILE *));
 356 static void Prolog_functions __P((FILE *));
 357 static void Python_functions __P((FILE *));
 358 static void Scheme_functions __P((FILE *));
 359 static void TeX_commands __P((FILE *));
 360 static void Texinfo_nodes __P((FILE *));
 361 static void Yacc_entries __P((FILE *));
 362 static void just_read_file __P((FILE *));
 363
 364 static void print_language_names __P((void));
 365 static void print_version __P((void));
 366 static void print_help __P((argument *));
 367 int main __P((int, char **));
 368
 369 static compressor *get_compressor_from_suffix __P((char *, char **));
 370 static language *get_language_from_langname __P((const char *));
 371 static language *get_language_from_interpreter __P((char *));
 372 static language *get_language_from_filename __P((char *, bool));
 373 static void readline __P((linebuffer *, FILE *));
 374 static long readline_internal __P((linebuffer *, FILE *));
 375 static bool nocase_tail __P((char *));
 376 static void get_tag __P((char *, char **));
 377
 378 #ifdef ETAGS_REGEXPS
 379 static void analyse_regex __P((char *));
 380 static void free_regexps __P((void));
 381 static void regex_tag_multiline __P((void));
 382 #endif /* ETAGS_REGEXPS */
 383 static void error __P((const char *, const char *));
 384 static void suggest_asking_for_help __P((void));
 385 void fatal __P((char *, char *));
 386 static void pfatal __P((char *));
 387 static void add_node __P((node *, node **));
 388
 389 static void init __P((void));
 390 static void process_file_name __P((char *, language *));
 391 static void process_file __P((FILE *, char *, language *));
 392 static void find_entries __P((FILE *));
 393 static void free_tree __P((node *));
 394 static void free_fdesc __P((fdesc *));
 395 static void pfnote __P((char *, bool, char *, int, int, long));
 396 static void make_tag __P((char *, int, bool, char *, int, int, long));
 397 static void invalidate_nodes __P((fdesc *, node **));
 398 static void put_entries __P((node *));
 399
 400 static char *concat __P((char *, char *, char *));
 401 static char *skip_spaces __P((char *));
 402 static char *skip_non_spaces __P((char *));
 403 static char *savenstr __P((char *, int));
 404 static char *savestr __P((char *));
 405 static char *etags_strchr __P((const char *, int));
 406 static char *etags_strrchr __P((const char *, int));
 407 static int etags_strcasecmp __P((const char *, const char *));
 408 static int etags_strncasecmp __P((const char *, const char *, int));
 409 static char *etags_getcwd __P((void));
 410 static char *relative_filename __P((char *, char *));
 411 static char *absolute_filename __P((char *, char *));
 412 static char *absolute_dirname __P((char *, char *));
 413 static bool filename_is_absolute __P((char *f));
 414 static void canonicalize_filename __P((char *));
 415 static void linebuffer_init __P((linebuffer *));
 416 static void linebuffer_setlen __P((linebuffer *, int));
 417 static PTR xmalloc __P((unsigned int));
 418 static PTR xrealloc __P((char *, unsigned int));
 419
 420 \f
 421 static char searchar = '/';     /* use /.../ searches */
 422
 423 static char *tagfile;           /* output file */
 424 static char *progname;          /* name this program was invoked with */
 425 static char *cwd;               /* current working directory */
 426 static char *tagfiledir;        /* directory of tagfile */
 427 static FILE *tagf;              /* ioptr for tags file */
 428
 429 static fdesc *fdhead;           /* head of file description list */
 430 static fdesc *curfdp;           /* current file description */
 431 static int lineno;              /* line number of current line */
 432 static long charno;             /* current character number */
 433 static long linecharno;         /* charno of start of current line */
 434 static char *dbp;               /* pointer to start of current tag */
 435
 436 static const int invalidcharno = -1;
 437
 438 static node *nodehead;          /* the head of the binary tree of tags */
 439 static node *last_node;         /* the last node created */
 440
 441 static linebuffer lb;           /* the current line */
 442 static linebuffer filebuf;      /* a buffer containing the whole file */
 443 static linebuffer token_name;   /* a buffer containing a tag name */
 444
 445 /* boolean "functions" (see init)       */
 446 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 447 static char
 448   /* white chars */
 449   *white = " \f\t\n\r\v",
 450   /* not in a name */
 451   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 452   /* token ending chars */
 453   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 454   /* token starting chars */
 455   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 456   /* valid in-token chars */
 457   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 458
 459 static bool append_to_tagfile;  /* -a: append to tags */
 460 /* The next four default to TRUE for etags, but to FALSE for ctags.  */
 461 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 462 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 463                                 /* 0 struct/enum/union decls, and C++ */
 464                                 /* member functions. */
 465 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 466                                 /* constants and variables. */
 467                                 /* -D: opposite of -d.  Default under ctags. */
 468 static bool globals;            /* create tags for global variables */
 469 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 470 static bool members;            /* create tags for C member variables */
 471 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 472 static bool update;             /* -u: update tags */
 473 static bool vgrind_style;       /* -v: create vgrind style index output */
 474 static bool no_warnings;        /* -w: suppress warnings */
 475 static bool cxref_style;        /* -x: create cxref style output */
 476 static bool cplusplus;          /* .[hc] means C++, not C */
 477 static bool ignoreindent;       /* -I: ignore indentation in C */
 478 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 479
 480 /* STDIN is defined in LynxOS system headers */
 481 #ifdef STDIN
 482 # undef STDIN
 483 #endif
 484
 485 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 486 static bool parsing_stdin;      /* --parse-stdin used */
 487
 488 #ifdef ETAGS_REGEXPS
 489 static regexp *p_head;          /* list of all regexps */
 490 static bool need_filebuf;       /* some regexes are multi-line */
 491 #else
 492 # define need_filebuf FALSE
 493 #endif /* ETAGS_REGEXPS */
 494
 495 #if LONG_OPTIONS
 496 static struct option longopts[] =
 497 {
 498   { "append",             no_argument,       NULL,               'a'   },
 499   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 500   { "c++",                no_argument,       NULL,               'C'   },
 501   { "declarations",       no_argument,       &declarations,      TRUE  },
 502   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 503   { "help",               no_argument,       NULL,               'h'   },
 504   { "help",               no_argument,       NULL,               'H'   },
 505   { "ignore-indentation", no_argument,       NULL,               'I'   },
 506   { "language",           required_argument, NULL,               'l'   },
 507   { "members",            no_argument,       &members,           TRUE  },
 508   { "no-members",         no_argument,       &members,           FALSE },
 509   { "output",             required_argument, NULL,               'o'   },
 510 #ifdef ETAGS_REGEXPS
 511   { "regex",              required_argument, NULL,               'r'   },
 512   { "no-regex",           no_argument,       NULL,               'R'   },
 513   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 514 #endif /* ETAGS_REGEXPS */
 515   { "parse-stdin",        required_argument, NULL,               STDIN },
 516   { "version",            no_argument,       NULL,               'V'   },
 517
 518 #if CTAGS /* Ctags options */
 519   { "backward-search",    no_argument,       NULL,               'B'   },
 520   { "cxref",              no_argument,       NULL,               'x'   },
 521   { "defines",            no_argument,       NULL,               'd'   },
 522   { "globals",            no_argument,       &globals,           TRUE  },
 523   { "typedefs",           no_argument,       NULL,               't'   },
 524   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 525   { "update",             no_argument,       NULL,               'u'   },
 526   { "vgrind",             no_argument,       NULL,               'v'   },
 527   { "no-warn",            no_argument,       NULL,               'w'   },
 528
 529 #else /* Etags options */
 530   { "no-defines",         no_argument,       NULL,               'D'   },
 531   { "no-globals",         no_argument,       &globals,           FALSE },
 532   { "include",            required_argument, NULL,               'i'   },
 533 #endif
 534   { NULL }
 535 };
 536 #endif /* LONG_OPTIONS */
 537
 538 static compressor compressors[] =
 539 {
 540   { "z", "gzip -d -c"},
 541   { "Z", "gzip -d -c"},
 542   { "gz", "gzip -d -c"},
 543   { "GZ", "gzip -d -c"},
 544   { "bz2", "bzip2 -d -c" },
 545   { NULL }
 546 };
 547
 548 /*
 549  * Language stuff.
 550  */
 551
 552 /* Ada code */
 553 static char *Ada_suffixes [] =
 554   { "ads", "adb", "ada", NULL };
 555 static char Ada_help [] =
 556 "In Ada code, functions, procedures, packages, tasks and types are\n\
 557 tags.  Use the `--packages-only' option to create tags for\n\
 558 packages only.\n\
 559 Ada tag names have suffixes indicating the type of entity:\n\
 560         Entity type:    Qualifier:\n\
 561         ------------    ----------\n\
 562         function        /f\n\
 563         procedure       /p\n\
 564         package spec    /s\n\
 565         package body    /b\n\
 566         type            /t\n\
 567         task            /k\n\
 568 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 569 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 570 will just search for any tag `bidule'.";
 571
 572 /* Assembly code */
 573 static char *Asm_suffixes [] =
 574   { "a",        /* Unix assembler */
 575     "asm", /* Microcontroller assembly */
 576     "def", /* BSO/Tasking definition includes  */
 577     "inc", /* Microcontroller include files */
 578     "ins", /* Microcontroller include files */
 579     "s", "sa", /* Unix assembler */
 580     "S",   /* cpp-processed Unix assembler */
 581     "src", /* BSO/Tasking C compiler output */
 582     NULL
 583   };
 584 static char Asm_help [] =
 585 "In assembler code, labels appearing at the beginning of a line,\n\
 586 followed by a colon, are tags.";
 587
 588
 589 /* Note that .c and .h can be considered C++, if the --c++ flag was
 590    given, or if the `class' or `template' keyowrds are met inside the file.
 591    That is why default_C_entries is called for these. */
 592 static char *default_C_suffixes [] =
 593   { "c", "h", NULL };
 594 static char default_C_help [] =
 595 "In C code, any C function or typedef is a tag, and so are\n\
 596 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 597 definitions and `enum' constants are tags unless you specify\n\
 598 `--no-defines'.  Global variables are tags unless you specify\n\
 599 `--no-globals'.  Use of `--no-globals' and `--no-defines'\n\
 600 can make the tags table file much smaller.\n\
 601 You can tag function declarations and external variables by\n\
 602 using `--declarations', and struct members by using `--members'.";
 603
 604 static char *Cplusplus_suffixes [] =
 605   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 606     "M",                        /* Objective C++ */
 607     "pdb",                      /* Postscript with C syntax */
 608     NULL };
 609 static char Cplusplus_help [] =
 610 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 611 --help --lang=c --lang=c++ for full help.)\n\
 612 In addition to C tags, member functions are also recognized, and\n\
 613 optionally member variables if you use the `--members' option.\n\
 614 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 615 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 616 `operator+'.";
 617
 618 static char *Cjava_suffixes [] =
 619   { "java", NULL };
 620 static char Cjava_help [] =
 621 "In Java code, all the tags constructs of C and C++ code are\n\
 622 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 623
 624
 625 static char *Cobol_suffixes [] =
 626   { "COB", "cob", NULL };
 627 static char Cobol_help [] =
 628 "In Cobol code, tags are paragraph names; that is, any word\n\
 629 starting in column 8 and followed by a period.";
 630
 631 static char *Cstar_suffixes [] =
 632   { "cs", "hs", NULL };
 633
 634 static char *Erlang_suffixes [] =
 635   { "erl", "hrl", NULL };
 636 static char Erlang_help [] =
 637 "In Erlang code, the tags are the functions, records and macros\n\
 638 defined in the file.";
 639
 640 char *Forth_suffixes [] =
 641   { "fth", "tok", NULL };
 642 static char Forth_help [] =
 643 "In Forth code, tags are words defined by `:',\n\
 644 constant, code, create, defer, value, variable, buffer:, field.";
 645
 646 static char *Fortran_suffixes [] =
 647   { "F", "f", "f90", "for", NULL };
 648 static char Fortran_help [] =
 649 "In Fortran code, functions, subroutines and block data are tags.";
 650
 651 static char *HTML_suffixes [] =
 652   { "htm", "html", "shtml", NULL };
 653 static char HTML_help [] =
 654 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 655 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 656 occurrences of `id='.";
 657
 658 static char *Lisp_suffixes [] =
 659   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 660 static char Lisp_help [] =
 661 "In Lisp code, any function defined with `defun', any variable\n\
 662 defined with `defvar' or `defconst', and in general the first\n\
 663 argument of any expression that starts with `(def' in column zero\n\
 664 is a tag.";
 665
 666 static char *Lua_suffixes [] =
 667   { "lua", "LUA", NULL };
 668 static char Lua_help [] =
 669 "In Lua scripts, all functions are tags.";
 670
 671 static char *Makefile_filenames [] =
 672   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 673 static char Makefile_help [] =
 674 "In makefiles, targets are tags; additionally, variables are tags\n\
 675 unless you specify `--no-globals'.";
 676
 677 static char *Objc_suffixes [] =
 678   { "lm",                       /* Objective lex file */
 679     "m",                        /* Objective C file */
 680      NULL };
 681 static char Objc_help [] =
 682 "In Objective C code, tags include Objective C definitions for classes,\n\
 683 class categories, methods and protocols.  Tags for variables and\n\
 684 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.";
 685
 686 static char *Pascal_suffixes [] =
 687   { "p", "pas", NULL };
 688 static char Pascal_help [] =
 689 "In Pascal code, the tags are the functions and procedures defined\n\
 690 in the file.";
 691
 692 static char *Perl_suffixes [] =
 693   { "pl", "pm", NULL };
 694 static char *Perl_interpreters [] =
 695   { "perl", "@PERL@", NULL };
 696 static char Perl_help [] =
 697 "In Perl code, the tags are the packages, subroutines and variables\n\
 698 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 699 `--globals' if you want to tag global variables.  Tags for\n\
 700 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 701 defined in the default package is `main::SUB'.";
 702
 703 static char *PHP_suffixes [] =
 704   { "php", "php3", "php4", NULL };
 705 static char PHP_help [] =
 706 "In PHP code, tags are functions, classes and defines.  When using\n\
 707 the `--members' option, vars are tags too.";
 708
 709 static char *plain_C_suffixes [] =
 710   { "pc",                       /* Pro*C file */
 711      NULL };
 712
 713 static char *PS_suffixes [] =
 714   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 715 static char PS_help [] =
 716 "In PostScript code, the tags are the functions.";
 717
 718 static char *Prolog_suffixes [] =
 719   { "prolog", NULL };
 720 static char Prolog_help [] =
 721 "In Prolog code, tags are predicates and rules at the beginning of\n\
 722 line.";
 723
 724 static char *Python_suffixes [] =
 725   { "py", NULL };
 726 static char Python_help [] =
 727 "In Python code, `def' or `class' at the beginning of a line\n\
 728 generate a tag.";
 729
 730 /* Can't do the `SCM' or `scm' prefix with a version number. */
 731 static char *Scheme_suffixes [] =
 732   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 733 static char Scheme_help [] =
 734 "In Scheme code, tags include anything defined with `def' or with a\n\
 735 construct whose name starts with `def'.  They also include\n\
 736 variables set with `set!' at top level in the file.";
 737
 738 static char *TeX_suffixes [] =
 739   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 740 static char TeX_help [] =
 741 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 742 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 743 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 744 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 745 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 746 \n\
 747 Other commands can be specified by setting the environment variable\n\
 748 `TEXTAGS' to a colon-separated list like, for example,\n\
 749      TEXTAGS=\"mycommand:myothercommand\".";
 750
 751
 752 static char *Texinfo_suffixes [] =
 753   { "texi", "texinfo", "txi", NULL };
 754 static char Texinfo_help [] =
 755 "for texinfo files, lines starting with @node are tagged.";
 756
 757 static char *Yacc_suffixes [] =
 758   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 759 static char Yacc_help [] =
 760 "In Bison or Yacc input files, each rule defines as a tag the\n\
 761 nonterminal it constructs.  The portions of the file that contain\n\
 762 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 763 for full help).";
 764
 765 static char auto_help [] =
 766 "`auto' is not a real language, it indicates to use\n\
 767 a default language for files base on file name suffix and file contents.";
 768
 769 static char none_help [] =
 770 "`none' is not a real language, it indicates to only do\n\
 771 regexp processing on files.";
 772
 773 static char no_lang_help [] =
 774 "No detailed help available for this language.";
 775
 776
 777 /*
 778  * Table of languages.
 779  *
 780  * It is ok for a given function to be listed under more than one
 781  * name.  I just didn't.
 782  */
 783
 784 static language lang_names [] =
 785 {
 786   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 787   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 788   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 789   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 790   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 791   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 792   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 793   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 794   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 795   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 796   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 797   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 798   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 799   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 800   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 801   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 802   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 803   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 804   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 805   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 806   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 807   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 808   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 809   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 810   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 811   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 812   { "auto",      auto_help },                      /* default guessing scheme */
 813   { "none",      none_help,      just_read_file }, /* regexp matching only */
 814   { NULL }                /* end of list */
 815 };
 816
 817 \f
 818 static void
 819 print_language_names ()
 820 {
 821   language *lang;
 822   char **name, **ext;
 823
 824   puts ("\nThese are the currently supported languages, along with the\n\
 825 default file names and dot suffixes:");
 826   for (lang = lang_names; lang->name != NULL; lang++)
 827     {
 828       printf ("  %-*s", 10, lang->name);
 829       if (lang->filenames != NULL)
 830         for (name = lang->filenames; *name != NULL; name++)
 831           printf (" %s", *name);
 832       if (lang->suffixes != NULL)
 833         for (ext = lang->suffixes; *ext != NULL; ext++)
 834           printf (" .%s", *ext);
 835       puts ("");
 836     }
 837   puts ("where `auto' means use default language for files based on file\n\
 838 name suffix, and `none' means only do regexp processing on files.\n\
 839 If no language is specified and no matching suffix is found,\n\
 840 the first line of the file is read for a sharp-bang (#!) sequence\n\
 841 followed by the name of an interpreter.  If no such sequence is found,\n\
 842 Fortran is tried first; if no tags are found, C is tried next.\n\
 843 When parsing any C file, a \"class\" or \"template\" keyword\n\
 844 switches to C++.");
 845   puts ("Compressed files are supported using gzip and bzip2.\n\
 846 \n\
 847 For detailed help on a given language use, for example,\n\
 848 etags --help --lang=ada.");
 849 }
 850
 851 #ifndef EMACS_NAME
 852 # define EMACS_NAME "standalone"
 853 #endif
 854 #ifndef VERSION
 855 # define VERSION "version"
 856 #endif
 857 static void
 858 print_version ()
 859 {
 860   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 861   puts ("Copyright (C) 2006 Free Software Foundation, Inc. and Ken Arnold");
 862   puts ("This program is distributed under the same terms as Emacs");
 863
 864   exit (EXIT_SUCCESS);
 865 }
 866
 867 static void
 868 print_help (argbuffer)
 869      argument *argbuffer;
 870 {
 871   bool help_for_lang = FALSE;
 872
 873   for (; argbuffer->arg_type != at_end; argbuffer++)
 874     if (argbuffer->arg_type == at_language)
 875       {
 876         if (help_for_lang)
 877           puts ("");
 878         puts (argbuffer->lang->help);
 879         help_for_lang = TRUE;
 880       }
 881
 882   if (help_for_lang)
 883     exit (EXIT_SUCCESS);
 884
 885   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 886 \n\
 887 These are the options accepted by %s.\n", progname, progname);
 888   if (LONG_OPTIONS)
 889     puts ("You may use unambiguous abbreviations for the long option names.");
 890   else
 891     puts ("Long option names do not work with this executable, as it is not\n\
 892 linked with GNU getopt.");
 893   puts ("  A - as file name means read names from stdin (one per line).\n\
 894 Absolute names are stored in the output file as they are.\n\
 895 Relative ones are stored relative to the output file's directory.\n");
 896
 897   puts ("-a, --append\n\
 898         Append tag entries to existing tags file.");
 899
 900   puts ("--packages-only\n\
 901         For Ada files, only generate tags for packages.");
 902
 903   if (CTAGS)
 904     puts ("-B, --backward-search\n\
 905         Write the search commands for the tag entries using '?', the\n\
 906         backward-search command instead of '/', the forward-search command.");
 907
 908   /* This option is mostly obsolete, because etags can now automatically
 909      detect C++.  Retained for backward compatibility and for debugging and
 910      experimentation.  In principle, we could want to tag as C++ even
 911      before any "class" or "template" keyword.
 912   puts ("-C, --c++\n\
 913         Treat files whose name suffix defaults to C language as C++ files.");
 914   */
 915
 916   puts ("--declarations\n\
 917         In C and derived languages, create tags for function declarations,");
 918   if (CTAGS)
 919     puts ("\tand create tags for extern variables if --globals is used.");
 920   else
 921     puts
 922       ("\tand create tags for extern variables unless --no-globals is used.");
 923
 924   if (CTAGS)
 925     puts ("-d, --defines\n\
 926         Create tag entries for C #define constants and enum constants, too.");
 927   else
 928     puts ("-D, --no-defines\n\
 929         Don't create tag entries for C #define constants and enum constants.\n\
 930         This makes the tags file smaller.");
 931
 932   if (!CTAGS)
 933     puts ("-i FILE, --include=FILE\n\
 934         Include a note in tag file indicating that, when searching for\n\
 935         a tag, one should also consult the tags file FILE after\n\
 936         checking the current file.");
 937
 938   puts ("-l LANG, --language=LANG\n\
 939         Force the following files to be considered as written in the\n\
 940         named language up to the next --language=LANG option.");
 941
 942   if (CTAGS)
 943     puts ("--globals\n\
 944         Create tag entries for global variables in some languages.");
 945   else
 946     puts ("--no-globals\n\
 947         Do not create tag entries for global variables in some\n\
 948         languages.  This makes the tags file smaller.");
 949   puts ("--members\n\
 950         Create tag entries for members of structures in some languages.");
 951
 952 #ifdef ETAGS_REGEXPS
 953   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 954         Make a tag for each line matching a regular expression pattern\n\
 955         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 956         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 957         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 958         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 959   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 960         For example Tcl named tags can be created with:\n\
 961           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 962         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 963         `m' means to allow multi-line matches, `s' implies `m' and\n\
 964         causes dot to match any character, including newline.");
 965   puts ("-R, --no-regex\n\
 966         Don't create tags from regexps for the following files.");
 967 #endif /* ETAGS_REGEXPS */
 968   puts ("-I, --ignore-indentation\n\
 969         In C and C++ do not assume that a closing brace in the first\n\
 970         column is the final brace of a function or structure definition.");
 971   puts ("-o FILE, --output=FILE\n\
 972         Write the tags to FILE.");
 973   puts ("--parse-stdin=NAME\n\
 974         Read from standard input and record tags as belonging to file NAME.");
 975
 976   if (CTAGS)
 977     {
 978       puts ("-t, --typedefs\n\
 979         Generate tag entries for C and Ada typedefs.");
 980       puts ("-T, --typedefs-and-c++\n\
 981         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 982         and C++ member functions.");
 983     }
 984
 985   if (CTAGS)
 986     puts ("-u, --update\n\
 987         Update the tag entries for the given files, leaving tag\n\
 988         entries for other files in place.  Currently, this is\n\
 989         implemented by deleting the existing entries for the given\n\
 990         files and then rewriting the new entries at the end of the\n\
 991         tags file.  It is often faster to simply rebuild the entire\n\
 992         tag file than to use this.");
 993
 994   if (CTAGS)
 995     {
 996       puts ("-v, --vgrind\n\
 997         Print on the standard output an index of items intended for\n\
 998         human consumption, similar to the output of vgrind.  The index\n\
 999         is sorted, and gives the page number of each item.");
1000       puts ("-w, --no-warn\n\
1001         Suppress warning messages about entries defined in multiple\n\
1002         files.");
1003       puts ("-x, --cxref\n\
1004         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1005         The output uses line numbers instead of page numbers, but\n\
1006         beyond that the differences are cosmetic; try both to see\n\
1007         which you like.");
1008     }
1009
1010   puts ("-V, --version\n\
1011         Print the version of the program.\n\
1012 -h, --help\n\
1013         Print this help message.\n\
1014         Followed by one or more `--language' options prints detailed\n\
1015         help about tag generation for the specified languages.");
1016
1017   print_language_names ();
1018
1019   puts ("");
1020   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1021
1022   exit (EXIT_SUCCESS);
1023 }
1024
1025 \f
1026 #ifdef VMS                      /* VMS specific functions */
1027
1028 #define EOS     '\0'
1029
1030 /* This is a BUG!  ANY arbitrary limit is a BUG!
1031    Won't someone please fix this?  */
1032 #define MAX_FILE_SPEC_LEN       255
1033 typedef struct  {
1034   short   curlen;
1035   char    body[MAX_FILE_SPEC_LEN + 1];
1036 } vspec;
1037
1038 /*
1039  v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1040  returning in each successive call the next file name matching the input
1041  spec. The function expects that each in_spec passed
1042  to it will be processed to completion; in particular, up to and
1043  including the call following that in which the last matching name
1044  is returned, the function ignores the value of in_spec, and will
1045  only start processing a new spec with the following call.
1046  If an error occurs, on return out_spec contains the value
1047  of in_spec when the error occurred.
1048
1049  With each successive file name returned in out_spec, the
1050  function's return value is one. When there are no more matching
1051  names the function returns zero. If on the first call no file
1052  matches in_spec, or there is any other error, -1 is returned.
1053 */
1054
1055 #include        <rmsdef.h>
1056 #include        <descrip.h>
1057 #define         OUTSIZE MAX_FILE_SPEC_LEN
1058 static short
1059 fn_exp (out, in)
1060      vspec *out;
1061      char *in;
1062 {
1063   static long context = 0;
1064   static struct dsc$descriptor_s o;
1065   static struct dsc$descriptor_s i;
1066   static bool pass1 = TRUE;
1067   long status;
1068   short retval;
1069
1070   if (pass1)
1071     {
1072       pass1 = FALSE;
1073       o.dsc$a_pointer = (char *) out;
1074       o.dsc$w_length = (short)OUTSIZE;
1075       i.dsc$a_pointer = in;
1076       i.dsc$w_length = (short)strlen(in);
1077       i.dsc$b_dtype = DSC$K_DTYPE_T;
1078       i.dsc$b_class = DSC$K_CLASS_S;
1079       o.dsc$b_dtype = DSC$K_DTYPE_VT;
1080       o.dsc$b_class = DSC$K_CLASS_VS;
1081     }
1082   if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1083     {
1084       out->body[out->curlen] = EOS;
1085       return 1;
1086     }
1087   else if (status == RMS$_NMF)
1088     retval = 0;
1089   else
1090     {
1091       strcpy(out->body, in);
1092       retval = -1;
1093     }
1094   lib$find_file_end(&context);
1095   pass1 = TRUE;
1096   return retval;
1097 }
1098
1099 /*
1100   v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1101   name of each file specified by the provided arg expanding wildcards.
1102 */
1103 static char *
1104 gfnames (arg, p_error)
1105      char *arg;
1106      bool *p_error;
1107 {
1108   static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1109
1110   switch (fn_exp (&filename, arg))
1111     {
1112     case 1:
1113       *p_error = FALSE;
1114       return filename.body;
1115     case 0:
1116       *p_error = FALSE;
1117       return NULL;
1118     default:
1119       *p_error = TRUE;
1120       return filename.body;
1121     }
1122 }
1123
1124 #ifndef OLD  /* Newer versions of VMS do provide `system'.  */
1125 system (cmd)
1126      char *cmd;
1127 {
1128   error ("%s", "system() function not implemented under VMS");
1129 }
1130 #endif
1131
1132 #define VERSION_DELIM   ';'
1133 char *massage_name (s)
1134      char *s;
1135 {
1136   char *start = s;
1137
1138   for ( ; *s; s++)
1139     if (*s == VERSION_DELIM)
1140       {
1141         *s = EOS;
1142         break;
1143       }
1144     else
1145       *s = lowcase (*s);
1146   return start;
1147 }
1148 #endif /* VMS */
1149
1150 \f
1151 int
1152 main (argc, argv)
1153      int argc;
1154      char *argv[];
1155 {
1156   int i;
1157   unsigned int nincluded_files;
1158   char **included_files;
1159   argument *argbuffer;
1160   int current_arg, file_count;
1161   linebuffer filename_lb;
1162   bool help_asked = FALSE;
1163 #ifdef VMS
1164   bool got_err;
1165 #endif
1166  char *optstring;
1167  int opt;
1168
1169
1170 #ifdef DOS_NT
1171   _fmode = O_BINARY;   /* all of files are treated as binary files */
1172 #endif /* DOS_NT */
1173
1174   progname = argv[0];
1175   nincluded_files = 0;
1176   included_files = xnew (argc, char *);
1177   current_arg = 0;
1178   file_count = 0;
1179
1180   /* Allocate enough no matter what happens.  Overkill, but each one
1181      is small. */
1182   argbuffer = xnew (argc, argument);
1183
1184   /*
1185    * If etags, always find typedefs and structure tags.  Why not?
1186    * Also default to find macro constants, enum constants and
1187    * global variables.
1188    */
1189   if (!CTAGS)
1190     {
1191       typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1192       globals = TRUE;
1193     }
1194
1195   /* When the optstring begins with a '-' getopt_long does not rearrange the
1196      non-options arguments to be at the end, but leaves them alone. */
1197   optstring = "-";
1198 #ifdef ETAGS_REGEXPS
1199   optstring = "-r:Rc:";
1200 #endif /* ETAGS_REGEXPS */
1201   if (!LONG_OPTIONS)
1202     optstring += 1;             /* remove the initial '-' */
1203   optstring = concat (optstring,
1204                       "aCf:Il:o:SVhH",
1205                       (CTAGS) ? "BxdtTuvw" : "Di:");
1206
1207   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1208     switch (opt)
1209       {
1210       case 0:
1211         /* If getopt returns 0, then it has already processed a
1212            long-named option.  We should do nothing.  */
1213         break;
1214
1215       case 1:
1216         /* This means that a file name has been seen.  Record it. */
1217         argbuffer[current_arg].arg_type = at_filename;
1218         argbuffer[current_arg].what     = optarg;
1219         ++current_arg;
1220         ++file_count;
1221         break;
1222
1223       case STDIN:
1224         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1225         argbuffer[current_arg].arg_type = at_stdin;
1226         argbuffer[current_arg].what     = optarg;
1227         ++current_arg;
1228         ++file_count;
1229         if (parsing_stdin)
1230           fatal ("cannot parse standard input more than once", (char *)NULL);
1231         parsing_stdin = TRUE;
1232         break;
1233
1234         /* Common options. */
1235       case 'a': append_to_tagfile = TRUE;       break;
1236       case 'C': cplusplus = TRUE;               break;
1237       case 'f':         /* for compatibility with old makefiles */
1238       case 'o':
1239         if (tagfile)
1240           {
1241             error ("-o option may only be given once.", (char *)NULL);
1242             suggest_asking_for_help ();
1243             /* NOTREACHED */
1244           }
1245         tagfile = optarg;
1246         break;
1247       case 'I':
1248       case 'S':         /* for backward compatibility */
1249         ignoreindent = TRUE;
1250         break;
1251       case 'l':
1252         {
1253           language *lang = get_language_from_langname (optarg);
1254           if (lang != NULL)
1255             {
1256               argbuffer[current_arg].lang = lang;
1257               argbuffer[current_arg].arg_type = at_language;
1258               ++current_arg;
1259             }
1260         }
1261         break;
1262       case 'c':
1263         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1264         optarg = concat (optarg, "i", ""); /* memory leak here */
1265         /* FALLTHRU */
1266       case 'r':
1267         argbuffer[current_arg].arg_type = at_regexp;
1268         argbuffer[current_arg].what = optarg;
1269         ++current_arg;
1270         break;
1271       case 'R':
1272         argbuffer[current_arg].arg_type = at_regexp;
1273         argbuffer[current_arg].what = NULL;
1274         ++current_arg;
1275         break;
1276       case 'V':
1277         print_version ();
1278         break;
1279       case 'h':
1280       case 'H':
1281         help_asked = TRUE;
1282         break;
1283
1284         /* Etags options */
1285       case 'D': constantypedefs = FALSE;                        break;
1286       case 'i': included_files[nincluded_files++] = optarg;     break;
1287
1288         /* Ctags options. */
1289       case 'B': searchar = '?';                                 break;
1290       case 'd': constantypedefs = TRUE;                         break;
1291       case 't': typedefs = TRUE;                                break;
1292       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1293       case 'u': update = TRUE;                                  break;
1294       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1295       case 'x': cxref_style = TRUE;                             break;
1296       case 'w': no_warnings = TRUE;                             break;
1297       default:
1298         suggest_asking_for_help ();
1299         /* NOTREACHED */
1300       }
1301
1302   /* No more options.  Store the rest of arguments. */
1303   for (; optind < argc; optind++)
1304     {
1305       argbuffer[current_arg].arg_type = at_filename;
1306       argbuffer[current_arg].what = argv[optind];
1307       ++current_arg;
1308       ++file_count;
1309     }
1310
1311   argbuffer[current_arg].arg_type = at_end;
1312
1313   if (help_asked)
1314     print_help (argbuffer);
1315     /* NOTREACHED */
1316
1317   if (nincluded_files == 0 && file_count == 0)
1318     {
1319       error ("no input files specified.", (char *)NULL);
1320       suggest_asking_for_help ();
1321       /* NOTREACHED */
1322     }
1323
1324   if (tagfile == NULL)
1325     tagfile = CTAGS ? "tags" : "TAGS";
1326   cwd = etags_getcwd ();        /* the current working directory */
1327   if (cwd[strlen (cwd) - 1] != '/')
1328     {
1329       char *oldcwd = cwd;
1330       cwd = concat (oldcwd, "/", "");
1331       free (oldcwd);
1332     }
1333   /* Relative file names are made relative to the current directory. */
1334   if (streq (tagfile, "-")
1335       || strneq (tagfile, "/dev/", 5))
1336     tagfiledir = cwd;
1337   else
1338     tagfiledir = absolute_dirname (tagfile, cwd);
1339
1340   init ();                      /* set up boolean "functions" */
1341
1342   linebuffer_init (&lb);
1343   linebuffer_init (&filename_lb);
1344   linebuffer_init (&filebuf);
1345   linebuffer_init (&token_name);
1346
1347   if (!CTAGS)
1348     {
1349       if (streq (tagfile, "-"))
1350         {
1351           tagf = stdout;
1352 #ifdef DOS_NT
1353           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1354              doesn't take effect until after `stdout' is already open). */
1355           if (!isatty (fileno (stdout)))
1356             setmode (fileno (stdout), O_BINARY);
1357 #endif /* DOS_NT */
1358         }
1359       else
1360         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1361       if (tagf == NULL)
1362         pfatal (tagfile);
1363     }
1364
1365   /*
1366    * Loop through files finding functions.
1367    */
1368   for (i = 0; i < current_arg; i++)
1369     {
1370       static language *lang;    /* non-NULL if language is forced */
1371       char *this_file;
1372
1373       switch (argbuffer[i].arg_type)
1374         {
1375         case at_language:
1376           lang = argbuffer[i].lang;
1377           break;
1378 #ifdef ETAGS_REGEXPS
1379         case at_regexp:
1380           analyse_regex (argbuffer[i].what);
1381           break;
1382 #endif
1383         case at_filename:
1384 #ifdef VMS
1385           while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1386             {
1387               if (got_err)
1388                 {
1389                   error ("can't find file %s\n", this_file);
1390                   argc--, argv++;
1391                 }
1392               else
1393                 {
1394                   this_file = massage_name (this_file);
1395                 }
1396 #else
1397               this_file = argbuffer[i].what;
1398 #endif
1399               /* Input file named "-" means read file names from stdin
1400                  (one per line) and use them. */
1401               if (streq (this_file, "-"))
1402                 {
1403                   if (parsing_stdin)
1404                     fatal ("cannot parse standard input AND read file names from it",
1405                            (char *)NULL);
1406                   while (readline_internal (&filename_lb, stdin) > 0)
1407                     process_file_name (filename_lb.buffer, lang);
1408                 }
1409               else
1410                 process_file_name (this_file, lang);
1411 #ifdef VMS
1412             }
1413 #endif
1414           break;
1415         case at_stdin:
1416           this_file = argbuffer[i].what;
1417           process_file (stdin, this_file, lang);
1418           break;
1419         }
1420     }
1421
1422 #ifdef ETAGS_REGEXPS
1423   free_regexps ();
1424 #endif /* ETAGS_REGEXPS */
1425   free (lb.buffer);
1426   free (filebuf.buffer);
1427   free (token_name.buffer);
1428
1429   if (!CTAGS || cxref_style)
1430     {
1431       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1432       put_entries (nodehead);
1433       free_tree (nodehead);
1434       nodehead = NULL;
1435       if (!CTAGS)
1436         {
1437           fdesc *fdp;
1438
1439           /* Output file entries that have no tags. */
1440           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1441             if (!fdp->written)
1442               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1443
1444           while (nincluded_files-- > 0)
1445             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1446
1447           if (fclose (tagf) == EOF)
1448             pfatal (tagfile);
1449         }
1450
1451       exit (EXIT_SUCCESS);
1452     }
1453
1454   if (update)
1455     {
1456       char cmd[BUFSIZ];
1457       for (i = 0; i < current_arg; ++i)
1458         {
1459           switch (argbuffer[i].arg_type)
1460             {
1461             case at_filename:
1462             case at_stdin:
1463               break;
1464             default:
1465               continue;         /* the for loop */
1466             }
1467           sprintf (cmd,
1468                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1469                    tagfile, argbuffer[i].what, tagfile);
1470           if (system (cmd) != EXIT_SUCCESS)
1471             fatal ("failed to execute shell command", (char *)NULL);
1472         }
1473       append_to_tagfile = TRUE;
1474     }
1475
1476   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1477   if (tagf == NULL)
1478     pfatal (tagfile);
1479   put_entries (nodehead);       /* write all the tags (CTAGS) */
1480   free_tree (nodehead);
1481   nodehead = NULL;
1482   if (fclose (tagf) == EOF)
1483     pfatal (tagfile);
1484
1485   if (CTAGS)
1486     if (append_to_tagfile || update)
1487       {
1488         char cmd[2*BUFSIZ+10];
1489         sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1490         exit (system (cmd));
1491       }
1492   return EXIT_SUCCESS;
1493 }
1494
1495
1496 /*
1497  * Return a compressor given the file name.  If EXTPTR is non-zero,
1498  * return a pointer into FILE where the compressor-specific
1499  * extension begins.  If no compressor is found, NULL is returned
1500  * and EXTPTR is not significant.
1501  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1502  */
1503 static compressor *
1504 get_compressor_from_suffix (file, extptr)
1505      char *file;
1506      char **extptr;
1507 {
1508   compressor *compr;
1509   char *slash, *suffix;
1510
1511   /* This relies on FN to be after canonicalize_filename,
1512      so we don't need to consider backslashes on DOS_NT.  */
1513   slash = etags_strrchr (file, '/');
1514   suffix = etags_strrchr (file, '.');
1515   if (suffix == NULL || suffix < slash)
1516     return NULL;
1517   if (extptr != NULL)
1518     *extptr = suffix;
1519   suffix += 1;
1520   /* Let those poor souls who live with DOS 8+3 file name limits get
1521      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1522      Only the first do loop is run if not MSDOS */
1523   do
1524     {
1525       for (compr = compressors; compr->suffix != NULL; compr++)
1526         if (streq (compr->suffix, suffix))
1527           return compr;
1528       if (!MSDOS)
1529         break;                  /* do it only once: not really a loop */
1530       if (extptr != NULL)
1531         *extptr = ++suffix;
1532     } while (*suffix != '\0');
1533   return NULL;
1534 }
1535
1536
1537
1538 /*
1539  * Return a language given the name.
1540  */
1541 static language *
1542 get_language_from_langname (name)
1543      const char *name;
1544 {
1545   language *lang;
1546
1547   if (name == NULL)
1548     error ("empty language name", (char *)NULL);
1549   else
1550     {
1551       for (lang = lang_names; lang->name != NULL; lang++)
1552         if (streq (name, lang->name))
1553           return lang;
1554       error ("unknown language \"%s\"", name);
1555     }
1556
1557   return NULL;
1558 }
1559
1560
1561 /*
1562  * Return a language given the interpreter name.
1563  */
1564 static language *
1565 get_language_from_interpreter (interpreter)
1566      char *interpreter;
1567 {
1568   language *lang;
1569   char **iname;
1570
1571   if (interpreter == NULL)
1572     return NULL;
1573   for (lang = lang_names; lang->name != NULL; lang++)
1574     if (lang->interpreters != NULL)
1575       for (iname = lang->interpreters; *iname != NULL; iname++)
1576         if (streq (*iname, interpreter))
1577             return lang;
1578
1579   return NULL;
1580 }
1581
1582
1583
1584 /*
1585  * Return a language given the file name.
1586  */
1587 static language *
1588 get_language_from_filename (file, case_sensitive)
1589      char *file;
1590      bool case_sensitive;
1591 {
1592   language *lang;
1593   char **name, **ext, *suffix;
1594
1595   /* Try whole file name first. */
1596   for (lang = lang_names; lang->name != NULL; lang++)
1597     if (lang->filenames != NULL)
1598       for (name = lang->filenames; *name != NULL; name++)
1599         if ((case_sensitive)
1600             ? streq (*name, file)
1601             : strcaseeq (*name, file))
1602           return lang;
1603
1604   /* If not found, try suffix after last dot. */
1605   suffix = etags_strrchr (file, '.');
1606   if (suffix == NULL)
1607     return NULL;
1608   suffix += 1;
1609   for (lang = lang_names; lang->name != NULL; lang++)
1610     if (lang->suffixes != NULL)
1611       for (ext = lang->suffixes; *ext != NULL; ext++)
1612         if ((case_sensitive)
1613             ? streq (*ext, suffix)
1614             : strcaseeq (*ext, suffix))
1615           return lang;
1616   return NULL;
1617 }
1618
1619 \f
1620 /*
1621  * This routine is called on each file argument.
1622  */
1623 static void
1624 process_file_name (file, lang)
1625      char *file;
1626      language *lang;
1627 {
1628   struct stat stat_buf;
1629   FILE *inf;
1630   fdesc *fdp;
1631   compressor *compr;
1632   char *compressed_name, *uncompressed_name;
1633   char *ext, *real_name;
1634   int retval;
1635
1636   canonicalize_filename (file);
1637   if (streq (file, tagfile) && !streq (tagfile, "-"))
1638     {
1639       error ("skipping inclusion of %s in self.", file);
1640       return;
1641     }
1642   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1643     {
1644       compressed_name = NULL;
1645       real_name = uncompressed_name = savestr (file);
1646     }
1647   else
1648     {
1649       real_name = compressed_name = savestr (file);
1650       uncompressed_name = savenstr (file, ext - file);
1651     }
1652
1653   /* If the canonicalized uncompressed name
1654      has already been dealt with, skip it silently. */
1655   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1656     {
1657       assert (fdp->infname != NULL);
1658       if (streq (uncompressed_name, fdp->infname))
1659         goto cleanup;
1660     }
1661
1662   if (stat (real_name, &stat_buf) != 0)
1663     {
1664       /* Reset real_name and try with a different name. */
1665       real_name = NULL;
1666       if (compressed_name != NULL) /* try with the given suffix */
1667         {
1668           if (stat (uncompressed_name, &stat_buf) == 0)
1669             real_name = uncompressed_name;
1670         }
1671       else                      /* try all possible suffixes */
1672         {
1673           for (compr = compressors; compr->suffix != NULL; compr++)
1674             {
1675               compressed_name = concat (file, ".", compr->suffix);
1676               if (stat (compressed_name, &stat_buf) != 0)
1677                 {
1678                   if (MSDOS)
1679                     {
1680                       char *suf = compressed_name + strlen (file);
1681                       size_t suflen = strlen (compr->suffix) + 1;
1682                       for ( ; suf[1]; suf++, suflen--)
1683                         {
1684                           memmove (suf, suf + 1, suflen);
1685                           if (stat (compressed_name, &stat_buf) == 0)
1686                             {
1687                               real_name = compressed_name;
1688                               break;
1689                             }
1690                         }
1691                       if (real_name != NULL)
1692                         break;
1693                     } /* MSDOS */
1694                   free (compressed_name);
1695                   compressed_name = NULL;
1696                 }
1697               else
1698                 {
1699                   real_name = compressed_name;
1700                   break;
1701                 }
1702             }
1703         }
1704       if (real_name == NULL)
1705         {
1706           perror (file);
1707           goto cleanup;
1708         }
1709     } /* try with a different name */
1710
1711   if (!S_ISREG (stat_buf.st_mode))
1712     {
1713       error ("skipping %s: it is not a regular file.", real_name);
1714       goto cleanup;
1715     }
1716   if (real_name == compressed_name)
1717     {
1718       char *cmd = concat (compr->command, " ", real_name);
1719       inf = (FILE *) popen (cmd, "r");
1720       free (cmd);
1721     }
1722   else
1723     inf = fopen (real_name, "r");
1724   if (inf == NULL)
1725     {
1726       perror (real_name);
1727       goto cleanup;
1728     }
1729
1730   process_file (inf, uncompressed_name, lang);
1731
1732   if (real_name == compressed_name)
1733     retval = pclose (inf);
1734   else
1735     retval = fclose (inf);
1736   if (retval < 0)
1737     pfatal (file);
1738
1739  cleanup:
1740   if (compressed_name) free (compressed_name);
1741   if (uncompressed_name) free (uncompressed_name);
1742   last_node = NULL;
1743   curfdp = NULL;
1744   return;
1745 }
1746
1747 static void
1748 process_file (fh, fn, lang)
1749      FILE *fh;
1750      char *fn;
1751      language *lang;
1752 {
1753   static const fdesc emptyfdesc;
1754   fdesc *fdp;
1755
1756   /* Create a new input file description entry. */
1757   fdp = xnew (1, fdesc);
1758   *fdp = emptyfdesc;
1759   fdp->next = fdhead;
1760   fdp->infname = savestr (fn);
1761   fdp->lang = lang;
1762   fdp->infabsname = absolute_filename (fn, cwd);
1763   fdp->infabsdir = absolute_dirname (fn, cwd);
1764   if (filename_is_absolute (fn))
1765     {
1766       /* An absolute file name.  Canonicalize it. */
1767       fdp->taggedfname = absolute_filename (fn, NULL);
1768     }
1769   else
1770     {
1771       /* A file name relative to cwd.  Make it relative
1772          to the directory of the tags file. */
1773       fdp->taggedfname = relative_filename (fn, tagfiledir);
1774     }
1775   fdp->usecharno = TRUE;        /* use char position when making tags */
1776   fdp->prop = NULL;
1777   fdp->written = FALSE;         /* not written on tags file yet */
1778
1779   fdhead = fdp;
1780   curfdp = fdhead;              /* the current file description */
1781
1782   find_entries (fh);
1783
1784   /* If not Ctags, and if this is not metasource and if it contained no #line
1785      directives, we can write the tags and free all nodes pointing to
1786      curfdp. */
1787   if (!CTAGS
1788       && curfdp->usecharno      /* no #line directives in this file */
1789       && !curfdp->lang->metasource)
1790     {
1791       node *np, *prev;
1792
1793       /* Look for the head of the sublist relative to this file.  See add_node
1794          for the structure of the node tree. */
1795       prev = NULL;
1796       for (np = nodehead; np != NULL; prev = np, np = np->left)
1797         if (np->fdp == curfdp)
1798           break;
1799
1800       /* If we generated tags for this file, write and delete them. */
1801       if (np != NULL)
1802         {
1803           /* This is the head of the last sublist, if any.  The following
1804              instructions depend on this being true. */
1805           assert (np->left == NULL);
1806
1807           assert (fdhead == curfdp);
1808           assert (last_node->fdp == curfdp);
1809           put_entries (np);     /* write tags for file curfdp->taggedfname */
1810           free_tree (np);       /* remove the written nodes */
1811           if (prev == NULL)
1812             nodehead = NULL;    /* no nodes left */
1813           else
1814             prev->left = NULL;  /* delete the pointer to the sublist */
1815         }
1816     }
1817 }
1818
1819 /*
1820  * This routine sets up the boolean pseudo-functions which work
1821  * by setting boolean flags dependent upon the corresponding character.
1822  * Every char which is NOT in that string is not a white char.  Therefore,
1823  * all of the array "_wht" is set to FALSE, and then the elements
1824  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1825  * of a char is TRUE if it is the string "white", else FALSE.
1826  */
1827 static void
1828 init ()
1829 {
1830   register char *sp;
1831   register int i;
1832
1833   for (i = 0; i < CHARS; i++)
1834     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1835   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1836   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1837   notinname('\0') = notinname('\n');
1838   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1839   begtoken('\0') = begtoken('\n');
1840   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1841   intoken('\0') = intoken('\n');
1842   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1843   endtoken('\0') = endtoken('\n');
1844 }
1845
1846 /*
1847  * This routine opens the specified file and calls the function
1848  * which finds the function and type definitions.
1849  */
1850 static void
1851 find_entries (inf)
1852      FILE *inf;
1853 {
1854   char *cp;
1855   language *lang = curfdp->lang;
1856   Lang_function *parser = NULL;
1857
1858   /* If user specified a language, use it. */
1859   if (lang != NULL && lang->function != NULL)
1860     {
1861       parser = lang->function;
1862     }
1863
1864   /* Else try to guess the language given the file name. */
1865   if (parser == NULL)
1866     {
1867       lang = get_language_from_filename (curfdp->infname, TRUE);
1868       if (lang != NULL && lang->function != NULL)
1869         {
1870           curfdp->lang = lang;
1871           parser = lang->function;
1872         }
1873     }
1874
1875   /* Else look for sharp-bang as the first two characters. */
1876   if (parser == NULL
1877       && readline_internal (&lb, inf) > 0
1878       && lb.len >= 2
1879       && lb.buffer[0] == '#'
1880       && lb.buffer[1] == '!')
1881     {
1882       char *lp;
1883
1884       /* Set lp to point at the first char after the last slash in the
1885          line or, if no slashes, at the first nonblank.  Then set cp to
1886          the first successive blank and terminate the string. */
1887       lp = etags_strrchr (lb.buffer+2, '/');
1888       if (lp != NULL)
1889         lp += 1;
1890       else
1891         lp = skip_spaces (lb.buffer + 2);
1892       cp = skip_non_spaces (lp);
1893       *cp = '\0';
1894
1895       if (strlen (lp) > 0)
1896         {
1897           lang = get_language_from_interpreter (lp);
1898           if (lang != NULL && lang->function != NULL)
1899             {
1900               curfdp->lang = lang;
1901               parser = lang->function;
1902             }
1903         }
1904     }
1905
1906   /* We rewind here, even if inf may be a pipe.  We fail if the
1907      length of the first line is longer than the pipe block size,
1908      which is unlikely. */
1909   rewind (inf);
1910
1911   /* Else try to guess the language given the case insensitive file name. */
1912   if (parser == NULL)
1913     {
1914       lang = get_language_from_filename (curfdp->infname, FALSE);
1915       if (lang != NULL && lang->function != NULL)
1916         {
1917           curfdp->lang = lang;
1918           parser = lang->function;
1919         }
1920     }
1921
1922   /* Else try Fortran or C. */
1923   if (parser == NULL)
1924     {
1925       node *old_last_node = last_node;
1926
1927       curfdp->lang = get_language_from_langname ("fortran");
1928       find_entries (inf);
1929
1930       if (old_last_node == last_node)
1931         /* No Fortran entries found.  Try C. */
1932         {
1933           /* We do not tag if rewind fails.
1934              Only the file name will be recorded in the tags file. */
1935           rewind (inf);
1936           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1937           find_entries (inf);
1938         }
1939       return;
1940     }
1941
1942   if (!no_line_directive
1943       && curfdp->lang != NULL && curfdp->lang->metasource)
1944     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1945        file, or anyway we parsed a file that is automatically generated from
1946        this one.  If this is the case, the bingo.c file contained #line
1947        directives that generated tags pointing to this file.  Let's delete
1948        them all before parsing this file, which is the real source. */
1949     {
1950       fdesc **fdpp = &fdhead;
1951       while (*fdpp != NULL)
1952         if (*fdpp != curfdp
1953             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1954           /* We found one of those!  We must delete both the file description
1955              and all tags referring to it. */
1956           {
1957             fdesc *badfdp = *fdpp;
1958
1959             /* Delete the tags referring to badfdp->taggedfname
1960                that were obtained from badfdp->infname. */
1961             invalidate_nodes (badfdp, &nodehead);
1962
1963             *fdpp = badfdp->next; /* remove the bad description from the list */
1964             free_fdesc (badfdp);
1965           }
1966         else
1967           fdpp = &(*fdpp)->next; /* advance the list pointer */
1968     }
1969
1970   assert (parser != NULL);
1971
1972   /* Generic initialisations before reading from file. */
1973   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1974
1975   /* Generic initialisations before parsing file with readline. */
1976   lineno = 0;                  /* reset global line number */
1977   charno = 0;                  /* reset global char number */
1978   linecharno = 0;              /* reset global char number of line start */
1979
1980   parser (inf);
1981
1982 #ifdef ETAGS_REGEXPS
1983   regex_tag_multiline ();
1984 #endif /* ETAGS_REGEXPS */
1985 }
1986
1987 \f
1988 /*
1989  * Check whether an implicitly named tag should be created,
1990  * then call `pfnote'.
1991  * NAME is a string that is internally copied by this function.
1992  *
1993  * TAGS format specification
1994  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1995  * The following is explained in some more detail in etc/ETAGS.EBNF.
1996  *
1997  * make_tag creates tags with "implicit tag names" (unnamed tags)
1998  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1999  *  1. NAME does not contain any of the characters in NONAM;
2000  *  2. LINESTART contains name as either a rightmost, or rightmost but
2001  *     one character, substring;
2002  *  3. the character, if any, immediately before NAME in LINESTART must
2003  *     be a character in NONAM;
2004  *  4. the character, if any, immediately after NAME in LINESTART must
2005  *     also be a character in NONAM.
2006  *
2007  * The implementation uses the notinname() macro, which recognises the
2008  * characters stored in the string `nonam'.
2009  * etags.el needs to use the same characters that are in NONAM.
2010  */
2011 static void
2012 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
2013      char *name;                /* tag name, or NULL if unnamed */
2014      int namelen;               /* tag length */
2015      bool is_func;              /* tag is a function */
2016      char *linestart;           /* start of the line where tag is */
2017      int linelen;               /* length of the line where tag is */
2018      int lno;                   /* line number */
2019      long cno;                  /* character number */
2020 {
2021   bool named = (name != NULL && namelen > 0);
2022
2023   if (!CTAGS && named)          /* maybe set named to false */
2024     /* Let's try to make an implicit tag name, that is, create an unnamed tag
2025        such that etags.el can guess a name from it. */
2026     {
2027       int i;
2028       register char *cp = name;
2029
2030       for (i = 0; i < namelen; i++)
2031         if (notinname (*cp++))
2032           break;
2033       if (i == namelen)                         /* rule #1 */
2034         {
2035           cp = linestart + linelen - namelen;
2036           if (notinname (linestart[linelen-1]))
2037             cp -= 1;                            /* rule #4 */
2038           if (cp >= linestart                   /* rule #2 */
2039               && (cp == linestart
2040                   || notinname (cp[-1]))        /* rule #3 */
2041               && strneq (name, cp, namelen))    /* rule #2 */
2042             named = FALSE;      /* use implicit tag name */
2043         }
2044     }
2045
2046   if (named)
2047     name = savenstr (name, namelen);
2048   else
2049     name = NULL;
2050   pfnote (name, is_func, linestart, linelen, lno, cno);
2051 }
2052
2053 /* Record a tag. */
2054 static void
2055 pfnote (name, is_func, linestart, linelen, lno, cno)
2056      char *name;                /* tag name, or NULL if unnamed */
2057      bool is_func;              /* tag is a function */
2058      char *linestart;           /* start of the line where tag is */
2059      int linelen;               /* length of the line where tag is */
2060      int lno;                   /* line number */
2061      long cno;                  /* character number */
2062 {
2063   register node *np;
2064
2065   assert (name == NULL || name[0] != '\0');
2066   if (CTAGS && name == NULL)
2067     return;
2068
2069   np = xnew (1, node);
2070
2071   /* If ctags mode, change name "main" to M<thisfilename>. */
2072   if (CTAGS && !cxref_style && streq (name, "main"))
2073     {
2074       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2075       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2076       fp = etags_strrchr (np->name, '.');
2077       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2078         fp[0] = '\0';
2079     }
2080   else
2081     np->name = name;
2082   np->valid = TRUE;
2083   np->been_warned = FALSE;
2084   np->fdp = curfdp;
2085   np->is_func = is_func;
2086   np->lno = lno;
2087   if (np->fdp->usecharno)
2088     /* Our char numbers are 0-base, because of C language tradition?
2089        ctags compatibility?  old versions compatibility?   I don't know.
2090        Anyway, since emacs's are 1-base we expect etags.el to take care
2091        of the difference.  If we wanted to have 1-based numbers, we would
2092        uncomment the +1 below. */
2093     np->cno = cno /* + 1 */ ;
2094   else
2095     np->cno = invalidcharno;
2096   np->left = np->right = NULL;
2097   if (CTAGS && !cxref_style)
2098     {
2099       if (strlen (linestart) < 50)
2100         np->regex = concat (linestart, "$", "");
2101       else
2102         np->regex = savenstr (linestart, 50);
2103     }
2104   else
2105     np->regex = savenstr (linestart, linelen);
2106
2107   add_node (np, &nodehead);
2108 }
2109
2110 /*
2111  * free_tree ()
2112  *      recurse on left children, iterate on right children.
2113  */
2114 static void
2115 free_tree (np)
2116      register node *np;
2117 {
2118   while (np)
2119     {
2120       register node *node_right = np->right;
2121       free_tree (np->left);
2122       if (np->name != NULL)
2123         free (np->name);
2124       free (np->regex);
2125       free (np);
2126       np = node_right;
2127     }
2128 }
2129
2130 /*
2131  * free_fdesc ()
2132  *      delete a file description
2133  */
2134 static void
2135 free_fdesc (fdp)
2136      register fdesc *fdp;
2137 {
2138   if (fdp->infname != NULL) free (fdp->infname);
2139   if (fdp->infabsname != NULL) free (fdp->infabsname);
2140   if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2141   if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2142   if (fdp->prop != NULL) free (fdp->prop);
2143   free (fdp);
2144 }
2145
2146 /*
2147  * add_node ()
2148  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2149  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2150  *      balancing.
2151  *
2152  *      add_node is the only function allowed to add nodes, so it can
2153  *      maintain state.
2154  */
2155 static void
2156 add_node (np, cur_node_p)
2157      node *np, **cur_node_p;
2158 {
2159   register int dif;
2160   register node *cur_node = *cur_node_p;
2161
2162   if (cur_node == NULL)
2163     {
2164       *cur_node_p = np;
2165       last_node = np;
2166       return;
2167     }
2168
2169   if (!CTAGS)
2170     /* Etags Mode */
2171     {
2172       /* For each file name, tags are in a linked sublist on the right
2173          pointer.  The first tags of different files are a linked list
2174          on the left pointer.  last_node points to the end of the last
2175          used sublist. */
2176       if (last_node != NULL && last_node->fdp == np->fdp)
2177         {
2178           /* Let's use the same sublist as the last added node. */
2179           assert (last_node->right == NULL);
2180           last_node->right = np;
2181           last_node = np;
2182         }
2183       else if (cur_node->fdp == np->fdp)
2184         {
2185           /* Scanning the list we found the head of a sublist which is
2186              good for us.  Let's scan this sublist. */
2187           add_node (np, &cur_node->right);
2188         }
2189       else
2190         /* The head of this sublist is not good for us.  Let's try the
2191            next one. */
2192         add_node (np, &cur_node->left);
2193     } /* if ETAGS mode */
2194
2195   else
2196     {
2197       /* Ctags Mode */
2198       dif = strcmp (np->name, cur_node->name);
2199
2200       /*
2201        * If this tag name matches an existing one, then
2202        * do not add the node, but maybe print a warning.
2203        */
2204       if (!dif)
2205         {
2206           if (np->fdp == cur_node->fdp)
2207             {
2208               if (!no_warnings)
2209                 {
2210                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2211                            np->fdp->infname, lineno, np->name);
2212                   fprintf (stderr, "Second entry ignored\n");
2213                 }
2214             }
2215           else if (!cur_node->been_warned && !no_warnings)
2216             {
2217               fprintf
2218                 (stderr,
2219                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2220                  np->fdp->infname, cur_node->fdp->infname, np->name);
2221               cur_node->been_warned = TRUE;
2222             }
2223           return;
2224         }
2225
2226       /* Actually add the node */
2227       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2228     } /* if CTAGS mode */
2229 }
2230
2231 /*
2232  * invalidate_nodes ()
2233  *      Scan the node tree and invalidate all nodes pointing to the
2234  *      given file description (CTAGS case) or free them (ETAGS case).
2235  */
2236 static void
2237 invalidate_nodes (badfdp, npp)
2238      fdesc *badfdp;
2239      node **npp;
2240 {
2241   node *np = *npp;
2242
2243   if (np == NULL)
2244     return;
2245
2246   if (CTAGS)
2247     {
2248       if (np->left != NULL)
2249         invalidate_nodes (badfdp, &np->left);
2250       if (np->fdp == badfdp)
2251         np->valid = FALSE;
2252       if (np->right != NULL)
2253         invalidate_nodes (badfdp, &np->right);
2254     }
2255   else
2256     {
2257       assert (np->fdp != NULL);
2258       if (np->fdp == badfdp)
2259         {
2260           *npp = np->left;      /* detach the sublist from the list */
2261           np->left = NULL;      /* isolate it */
2262           free_tree (np);       /* free it */
2263           invalidate_nodes (badfdp, npp);
2264         }
2265       else
2266         invalidate_nodes (badfdp, &np->left);
2267     }
2268 }
2269
2270 \f
2271 static int total_size_of_entries __P((node *));
2272 static int number_len __P((long));
2273
2274 /* Length of a non-negative number's decimal representation. */
2275 static int
2276 number_len (num)
2277      long num;
2278 {
2279   int len = 1;
2280   while ((num /= 10) > 0)
2281     len += 1;
2282   return len;
2283 }
2284
2285 /*
2286  * Return total number of characters that put_entries will output for
2287  * the nodes in the linked list at the right of the specified node.
2288  * This count is irrelevant with etags.el since emacs 19.34 at least,
2289  * but is still supplied for backward compatibility.
2290  */
2291 static int
2292 total_size_of_entries (np)
2293      register node *np;
2294 {
2295   register int total = 0;
2296
2297   for (; np != NULL; np = np->right)
2298     if (np->valid)
2299       {
2300         total += strlen (np->regex) + 1;                /* pat\177 */
2301         if (np->name != NULL)
2302           total += strlen (np->name) + 1;               /* name\001 */
2303         total += number_len ((long) np->lno) + 1;       /* lno, */
2304         if (np->cno != invalidcharno)                   /* cno */
2305           total += number_len (np->cno);
2306         total += 1;                                     /* newline */
2307       }
2308
2309   return total;
2310 }
2311
2312 static void
2313 put_entries (np)
2314      register node *np;
2315 {
2316   register char *sp;
2317   static fdesc *fdp = NULL;
2318
2319   if (np == NULL)
2320     return;
2321
2322   /* Output subentries that precede this one */
2323   if (CTAGS)
2324     put_entries (np->left);
2325
2326   /* Output this entry */
2327   if (np->valid)
2328     {
2329       if (!CTAGS)
2330         {
2331           /* Etags mode */
2332           if (fdp != np->fdp)
2333             {
2334               fdp = np->fdp;
2335               fprintf (tagf, "\f\n%s,%d\n",
2336                        fdp->taggedfname, total_size_of_entries (np));
2337               fdp->written = TRUE;
2338             }
2339           fputs (np->regex, tagf);
2340           fputc ('\177', tagf);
2341           if (np->name != NULL)
2342             {
2343               fputs (np->name, tagf);
2344               fputc ('\001', tagf);
2345             }
2346           fprintf (tagf, "%d,", np->lno);
2347           if (np->cno != invalidcharno)
2348             fprintf (tagf, "%ld", np->cno);
2349           fputs ("\n", tagf);
2350         }
2351       else
2352         {
2353           /* Ctags mode */
2354           if (np->name == NULL)
2355             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2356
2357           if (cxref_style)
2358             {
2359               if (vgrind_style)
2360                 fprintf (stdout, "%s %s %d\n",
2361                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2362               else
2363                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2364                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2365             }
2366           else
2367             {
2368               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2369
2370               if (np->is_func)
2371                 {               /* function or #define macro with args */
2372                   putc (searchar, tagf);
2373                   putc ('^', tagf);
2374
2375                   for (sp = np->regex; *sp; sp++)
2376                     {
2377                       if (*sp == '\\' || *sp == searchar)
2378                         putc ('\\', tagf);
2379                       putc (*sp, tagf);
2380                     }
2381                   putc (searchar, tagf);
2382                 }
2383               else
2384                 {               /* anything else; text pattern inadequate */
2385                   fprintf (tagf, "%d", np->lno);
2386                 }
2387               putc ('\n', tagf);
2388             }
2389         }
2390     } /* if this node contains a valid tag */
2391
2392   /* Output subentries that follow this one */
2393   put_entries (np->right);
2394   if (!CTAGS)
2395     put_entries (np->left);
2396 }
2397
2398 \f
2399 /* C extensions. */
2400 #define C_EXT   0x00fff         /* C extensions */
2401 #define C_PLAIN 0x00000         /* C */
2402 #define C_PLPL  0x00001         /* C++ */
2403 #define C_STAR  0x00003         /* C* */
2404 #define C_JAVA  0x00005         /* JAVA */
2405 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2406 #define YACC    0x10000         /* yacc file */
2407
2408 /*
2409  * The C symbol tables.
2410  */
2411 enum sym_type
2412 {
2413   st_none,
2414   st_C_objprot, st_C_objimpl, st_C_objend,
2415   st_C_gnumacro,
2416   st_C_ignore, st_C_attribute,
2417   st_C_javastruct,
2418   st_C_operator,
2419   st_C_class, st_C_template,
2420   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2421 };
2422
2423 static unsigned int hash __P((const char *, unsigned int));
2424 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2425 static enum sym_type C_symtype __P((char *, int, int));
2426
2427 /* Feed stuff between (but not including) %[ and %] lines to:
2428      gperf -m 5
2429 %[
2430 %compare-strncmp
2431 %enum
2432 %struct-type
2433 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2434 %%
2435 if,             0,                      st_C_ignore
2436 for,            0,                      st_C_ignore
2437 while,          0,                      st_C_ignore
2438 switch,         0,                      st_C_ignore
2439 return,         0,                      st_C_ignore
2440 __attribute__,  0,                      st_C_attribute
2441 @interface,     0,                      st_C_objprot
2442 @protocol,      0,                      st_C_objprot
2443 @implementation,0,                      st_C_objimpl
2444 @end,           0,                      st_C_objend
2445 import,         (C_JAVA & !C_PLPL),     st_C_ignore
2446 package,        (C_JAVA & !C_PLPL),     st_C_ignore
2447 friend,         C_PLPL,                 st_C_ignore
2448 extends,        (C_JAVA & !C_PLPL),     st_C_javastruct
2449 implements,     (C_JAVA & !C_PLPL),     st_C_javastruct
2450 interface,      (C_JAVA & !C_PLPL),     st_C_struct
2451 class,          0,                      st_C_class
2452 namespace,      C_PLPL,                 st_C_struct
2453 domain,         C_STAR,                 st_C_struct
2454 union,          0,                      st_C_struct
2455 struct,         0,                      st_C_struct
2456 extern,         0,                      st_C_extern
2457 enum,           0,                      st_C_enum
2458 typedef,        0,                      st_C_typedef
2459 define,         0,                      st_C_define
2460 undef,          0,                      st_C_define
2461 operator,       C_PLPL,                 st_C_operator
2462 template,       0,                      st_C_template
2463 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2464 DEFUN,          0,                      st_C_gnumacro
2465 SYSCALL,        0,                      st_C_gnumacro
2466 ENTRY,          0,                      st_C_gnumacro
2467 PSEUDO,         0,                      st_C_gnumacro
2468 # These are defined inside C functions, so currently they are not met.
2469 # EXFUN used in glibc, DEFVAR_* in emacs.
2470 #EXFUN,         0,                      st_C_gnumacro
2471 #DEFVAR_,       0,                      st_C_gnumacro
2472 %]
2473 and replace lines between %< and %> with its output, then:
2474  - remove the #if characterset check
2475  - make in_word_set static and not inline. */
2476 /*%<*/
2477 /* C code produced by gperf version 3.0.1 */
2478 /* Command-line: gperf -m 5  */
2479 /* Computed positions: -k'2-3' */
2480
2481 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2482 /* maximum key range = 33, duplicates = 0 */
2483
2484 #ifdef __GNUC__
2485 __inline
2486 #else
2487 #ifdef __cplusplus
2488 inline
2489 #endif
2490 #endif
2491 static unsigned int
2492 hash (str, len)
2493      register const char *str;
2494      register unsigned int len;
2495 {
2496   static unsigned char asso_values[] =
2497     {
2498       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2499       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2500       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2501       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2502       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2503       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2504       35, 35, 35, 35, 35, 35, 35, 35, 35, 15,
2505       14, 35, 35, 35, 35, 35, 35, 35, 14, 35,
2506       35, 35, 35, 12, 13, 35, 35, 35, 35, 12,
2507       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2508       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2509        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2510        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2511       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2512       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2513       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2514       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2515       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2516       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2517       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2518       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2519       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2520       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2521       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2522       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2523       35, 35, 35, 35, 35, 35
2524     };
2525   register int hval = len;
2526
2527   switch (hval)
2528     {
2529       default:
2530         hval += asso_values[(unsigned char)str[2]];
2531       /*FALLTHROUGH*/
2532       case 2:
2533         hval += asso_values[(unsigned char)str[1]];
2534         break;
2535     }
2536   return hval;
2537 }
2538
2539 static struct C_stab_entry *
2540 in_word_set (str, len)
2541      register const char *str;
2542      register unsigned int len;
2543 {
2544   enum
2545     {
2546       TOTAL_KEYWORDS = 32,
2547       MIN_WORD_LENGTH = 2,
2548       MAX_WORD_LENGTH = 15,
2549       MIN_HASH_VALUE = 2,
2550       MAX_HASH_VALUE = 34
2551     };
2552
2553   static struct C_stab_entry wordlist[] =
2554     {
2555       {""}, {""},
2556       {"if",            0,                      st_C_ignore},
2557       {""},
2558       {"@end",          0,                      st_C_objend},
2559       {"union",         0,                      st_C_struct},
2560       {"define",                0,                      st_C_define},
2561       {"import",                (C_JAVA & !C_PLPL),     st_C_ignore},
2562       {"template",      0,                      st_C_template},
2563       {"operator",      C_PLPL,                 st_C_operator},
2564       {"@interface",    0,                      st_C_objprot},
2565       {"implements",    (C_JAVA & !C_PLPL),     st_C_javastruct},
2566       {"friend",                C_PLPL,                 st_C_ignore},
2567       {"typedef",       0,                      st_C_typedef},
2568       {"return",                0,                      st_C_ignore},
2569       {"@implementation",0,                     st_C_objimpl},
2570       {"@protocol",     0,                      st_C_objprot},
2571       {"interface",     (C_JAVA & !C_PLPL),     st_C_struct},
2572       {"extern",                0,                      st_C_extern},
2573       {"extends",       (C_JAVA & !C_PLPL),     st_C_javastruct},
2574       {"struct",                0,                      st_C_struct},
2575       {"domain",                C_STAR,                 st_C_struct},
2576       {"switch",                0,                      st_C_ignore},
2577       {"enum",          0,                      st_C_enum},
2578       {"for",           0,                      st_C_ignore},
2579       {"namespace",     C_PLPL,                 st_C_struct},
2580       {"class",         0,                      st_C_class},
2581       {"while",         0,                      st_C_ignore},
2582       {"undef",         0,                      st_C_define},
2583       {"package",       (C_JAVA & !C_PLPL),     st_C_ignore},
2584       {"__attribute__", 0,                      st_C_attribute},
2585       {"SYSCALL",       0,                      st_C_gnumacro},
2586       {"ENTRY",         0,                      st_C_gnumacro},
2587       {"PSEUDO",                0,                      st_C_gnumacro},
2588       {"DEFUN",         0,                      st_C_gnumacro}
2589     };
2590
2591   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2592     {
2593       register int key = hash (str, len);
2594
2595       if (key <= MAX_HASH_VALUE && key >= 0)
2596         {
2597           register const char *s = wordlist[key].name;
2598
2599           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2600             return &wordlist[key];
2601         }
2602     }
2603   return 0;
2604 }
2605 /*%>*/
2606
2607 static enum sym_type
2608 C_symtype (str, len, c_ext)
2609      char *str;
2610      int len;
2611      int c_ext;
2612 {
2613   register struct C_stab_entry *se = in_word_set (str, len);
2614
2615   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2616     return st_none;
2617   return se->type;
2618 }
2619
2620 \f
2621 /*
2622  * Ignoring __attribute__ ((list))
2623  */
2624 static bool inattribute;        /* looking at an __attribute__ construct */
2625
2626 /*
2627  * C functions and variables are recognized using a simple
2628  * finite automaton.  fvdef is its state variable.
2629  */
2630 static enum
2631 {
2632   fvnone,                       /* nothing seen */
2633   fdefunkey,                    /* Emacs DEFUN keyword seen */
2634   fdefunname,                   /* Emacs DEFUN name seen */
2635   foperator,                    /* func: operator keyword seen (cplpl) */
2636   fvnameseen,                   /* function or variable name seen */
2637   fstartlist,                   /* func: just after open parenthesis */
2638   finlist,                      /* func: in parameter list */
2639   flistseen,                    /* func: after parameter list */
2640   fignore,                      /* func: before open brace */
2641   vignore                       /* var-like: ignore until ';' */
2642 } fvdef;
2643
2644 static bool fvextern;           /* func or var: extern keyword seen; */
2645
2646 /*
2647  * typedefs are recognized using a simple finite automaton.
2648  * typdef is its state variable.
2649  */
2650 static enum
2651 {
2652   tnone,                        /* nothing seen */
2653   tkeyseen,                     /* typedef keyword seen */
2654   ttypeseen,                    /* defined type seen */
2655   tinbody,                      /* inside typedef body */
2656   tend,                         /* just before typedef tag */
2657   tignore                       /* junk after typedef tag */
2658 } typdef;
2659
2660 /*
2661  * struct-like structures (enum, struct and union) are recognized
2662  * using another simple finite automaton.  `structdef' is its state
2663  * variable.
2664  */
2665 static enum
2666 {
2667   snone,                        /* nothing seen yet,
2668                                    or in struct body if bracelev > 0 */
2669   skeyseen,                     /* struct-like keyword seen */
2670   stagseen,                     /* struct-like tag seen */
2671   scolonseen                    /* colon seen after struct-like tag */
2672 } structdef;
2673
2674 /*
2675  * When objdef is different from onone, objtag is the name of the class.
2676  */
2677 static char *objtag = "<uninited>";
2678
2679 /*
2680  * Yet another little state machine to deal with preprocessor lines.
2681  */
2682 static enum
2683 {
2684   dnone,                        /* nothing seen */
2685   dsharpseen,                   /* '#' seen as first char on line */
2686   ddefineseen,                  /* '#' and 'define' seen */
2687   dignorerest                   /* ignore rest of line */
2688 } definedef;
2689
2690 /*
2691  * State machine for Objective C protocols and implementations.
2692  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2693  */
2694 static enum
2695 {
2696   onone,                        /* nothing seen */
2697   oprotocol,                    /* @interface or @protocol seen */
2698   oimplementation,              /* @implementations seen */
2699   otagseen,                     /* class name seen */
2700   oparenseen,                   /* parenthesis before category seen */
2701   ocatseen,                     /* category name seen */
2702   oinbody,                      /* in @implementation body */
2703   omethodsign,                  /* in @implementation body, after +/- */
2704   omethodtag,                   /* after method name */
2705   omethodcolon,                 /* after method colon */
2706   omethodparm,                  /* after method parameter */
2707   oignore                       /* wait for @end */
2708 } objdef;
2709
2710
2711 /*
2712  * Use this structure to keep info about the token read, and how it
2713  * should be tagged.  Used by the make_C_tag function to build a tag.
2714  */
2715 static struct tok
2716 {
2717   char *line;                   /* string containing the token */
2718   int offset;                   /* where the token starts in LINE */
2719   int length;                   /* token length */
2720   /*
2721     The previous members can be used to pass strings around for generic
2722     purposes.  The following ones specifically refer to creating tags.  In this
2723     case the token contained here is the pattern that will be used to create a
2724     tag.
2725   */
2726   bool valid;                   /* do not create a tag; the token should be
2727                                    invalidated whenever a state machine is
2728                                    reset prematurely */
2729   bool named;                   /* create a named tag */
2730   int lineno;                   /* source line number of tag */
2731   long linepos;                 /* source char number of tag */
2732 } token;                        /* latest token read */
2733
2734 /*
2735  * Variables and functions for dealing with nested structures.
2736  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2737  */
2738 static void pushclass_above __P((int, char *, int));
2739 static void popclass_above __P((int));
2740 static void write_classname __P((linebuffer *, char *qualifier));
2741
2742 static struct {
2743   char **cname;                 /* nested class names */
2744   int *bracelev;                /* nested class brace level */
2745   int nl;                       /* class nesting level (elements used) */
2746   int size;                     /* length of the array */
2747 } cstack;                       /* stack for nested declaration tags */
2748 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2749 #define nestlev         (cstack.nl)
2750 /* After struct keyword or in struct body, not inside a nested function. */
2751 #define instruct        (structdef == snone && nestlev > 0                      \
2752                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2753
2754 static void
2755 pushclass_above (bracelev, str, len)
2756      int bracelev;
2757      char *str;
2758      int len;
2759 {
2760   int nl;
2761
2762   popclass_above (bracelev);
2763   nl = cstack.nl;
2764   if (nl >= cstack.size)
2765     {
2766       int size = cstack.size *= 2;
2767       xrnew (cstack.cname, size, char *);
2768       xrnew (cstack.bracelev, size, int);
2769     }
2770   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2771   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2772   cstack.bracelev[nl] = bracelev;
2773   cstack.nl = nl + 1;
2774 }
2775
2776 static void
2777 popclass_above (bracelev)
2778      int bracelev;
2779 {
2780   int nl;
2781
2782   for (nl = cstack.nl - 1;
2783        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2784        nl--)
2785     {
2786       if (cstack.cname[nl] != NULL)
2787         free (cstack.cname[nl]);
2788       cstack.nl = nl;
2789     }
2790 }
2791
2792 static void
2793 write_classname (cn, qualifier)
2794      linebuffer *cn;
2795      char *qualifier;
2796 {
2797   int i, len;
2798   int qlen = strlen (qualifier);
2799
2800   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2801     {
2802       len = 0;
2803       cn->len = 0;
2804       cn->buffer[0] = '\0';
2805     }
2806   else
2807     {
2808       len = strlen (cstack.cname[0]);
2809       linebuffer_setlen (cn, len);
2810       strcpy (cn->buffer, cstack.cname[0]);
2811     }
2812   for (i = 1; i < cstack.nl; i++)
2813     {
2814       char *s;
2815       int slen;
2816
2817       s = cstack.cname[i];
2818       if (s == NULL)
2819         continue;
2820       slen = strlen (s);
2821       len += slen + qlen;
2822       linebuffer_setlen (cn, len);
2823       strncat (cn->buffer, qualifier, qlen);
2824       strncat (cn->buffer, s, slen);
2825     }
2826 }
2827
2828 \f
2829 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2830 static void make_C_tag __P((bool));
2831
2832 /*
2833  * consider_token ()
2834  *      checks to see if the current token is at the start of a
2835  *      function or variable, or corresponds to a typedef, or
2836  *      is a struct/union/enum tag, or #define, or an enum constant.
2837  *
2838  *      *IS_FUNC gets TRUE iff the token is a function or #define macro
2839  *      with args.  C_EXTP points to which language we are looking at.
2840  *
2841  * Globals
2842  *      fvdef                   IN OUT
2843  *      structdef               IN OUT
2844  *      definedef               IN OUT
2845  *      typdef                  IN OUT
2846  *      objdef                  IN OUT
2847  */
2848
2849 static bool
2850 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2851      register char *str;        /* IN: token pointer */
2852      register int len;          /* IN: token length */
2853      register int c;            /* IN: first char after the token */
2854      int *c_extp;               /* IN, OUT: C extensions mask */
2855      int bracelev;              /* IN: brace level */
2856      int parlev;                /* IN: parenthesis level */
2857      bool *is_func_or_var;      /* OUT: function or variable found */
2858 {
2859   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2860      structtype is the type of the preceding struct-like keyword, and
2861      structbracelev is the brace level where it has been seen. */
2862   static enum sym_type structtype;
2863   static int structbracelev;
2864   static enum sym_type toktype;
2865
2866
2867   toktype = C_symtype (str, len, *c_extp);
2868
2869   /*
2870    * Skip __attribute__
2871    */
2872   if (toktype == st_C_attribute)
2873     {
2874       inattribute = TRUE;
2875       return FALSE;
2876      }
2877
2878    /*
2879     * Advance the definedef state machine.
2880     */
2881    switch (definedef)
2882      {
2883      case dnone:
2884        /* We're not on a preprocessor line. */
2885        if (toktype == st_C_gnumacro)
2886          {
2887            fvdef = fdefunkey;
2888            return FALSE;
2889          }
2890        break;
2891      case dsharpseen:
2892        if (toktype == st_C_define)
2893          {
2894            definedef = ddefineseen;
2895          }
2896        else
2897          {
2898            definedef = dignorerest;
2899          }
2900        return FALSE;
2901      case ddefineseen:
2902        /*
2903         * Make a tag for any macro, unless it is a constant
2904         * and constantypedefs is FALSE.
2905         */
2906        definedef = dignorerest;
2907        *is_func_or_var = (c == '(');
2908        if (!*is_func_or_var && !constantypedefs)
2909          return FALSE;
2910        else
2911          return TRUE;
2912      case dignorerest:
2913        return FALSE;
2914      default:
2915        error ("internal error: definedef value.", (char *)NULL);
2916      }
2917
2918    /*
2919     * Now typedefs
2920     */
2921    switch (typdef)
2922      {
2923      case tnone:
2924        if (toktype == st_C_typedef)
2925          {
2926            if (typedefs)
2927              typdef = tkeyseen;
2928            fvextern = FALSE;
2929            fvdef = fvnone;
2930            return FALSE;
2931          }
2932        break;
2933      case tkeyseen:
2934        switch (toktype)
2935          {
2936          case st_none:
2937          case st_C_class:
2938          case st_C_struct:
2939          case st_C_enum:
2940            typdef = ttypeseen;
2941          }
2942        break;
2943      case ttypeseen:
2944        if (structdef == snone && fvdef == fvnone)
2945          {
2946            fvdef = fvnameseen;
2947            return TRUE;
2948          }
2949        break;
2950      case tend:
2951        switch (toktype)
2952          {
2953          case st_C_class:
2954          case st_C_struct:
2955          case st_C_enum:
2956            return FALSE;
2957          }
2958        return TRUE;
2959      }
2960
2961    /*
2962     * This structdef business is NOT invoked when we are ctags and the
2963     * file is plain C.  This is because a struct tag may have the same
2964     * name as another tag, and this loses with ctags.
2965     */
2966    switch (toktype)
2967      {
2968      case st_C_javastruct:
2969        if (structdef == stagseen)
2970          structdef = scolonseen;
2971        return FALSE;
2972      case st_C_template:
2973      case st_C_class:
2974        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2975            && bracelev == 0
2976            && definedef == dnone && structdef == snone
2977            && typdef == tnone && fvdef == fvnone)
2978          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2979        if (toktype == st_C_template)
2980          break;
2981        /* FALLTHRU */
2982      case st_C_struct:
2983      case st_C_enum:
2984        if (parlev == 0
2985            && fvdef != vignore
2986            && (typdef == tkeyseen
2987                || (typedefs_or_cplusplus && structdef == snone)))
2988          {
2989            structdef = skeyseen;
2990            structtype = toktype;
2991            structbracelev = bracelev;
2992            if (fvdef == fvnameseen)
2993              fvdef = fvnone;
2994          }
2995        return FALSE;
2996      }
2997
2998    if (structdef == skeyseen)
2999      {
3000        structdef = stagseen;
3001        return TRUE;
3002      }
3003
3004    if (typdef != tnone)
3005      definedef = dnone;
3006
3007    /* Detect Objective C constructs. */
3008    switch (objdef)
3009      {
3010      case onone:
3011        switch (toktype)
3012          {
3013          case st_C_objprot:
3014            objdef = oprotocol;
3015            return FALSE;
3016          case st_C_objimpl:
3017            objdef = oimplementation;
3018            return FALSE;
3019          }
3020        break;
3021      case oimplementation:
3022        /* Save the class tag for functions or variables defined inside. */
3023        objtag = savenstr (str, len);
3024        objdef = oinbody;
3025        return FALSE;
3026      case oprotocol:
3027        /* Save the class tag for categories. */
3028        objtag = savenstr (str, len);
3029        objdef = otagseen;
3030        *is_func_or_var = TRUE;
3031        return TRUE;
3032      case oparenseen:
3033        objdef = ocatseen;
3034        *is_func_or_var = TRUE;
3035        return TRUE;
3036      case oinbody:
3037        break;
3038      case omethodsign:
3039        if (parlev == 0)
3040          {
3041            fvdef = fvnone;
3042            objdef = omethodtag;
3043            linebuffer_setlen (&token_name, len);
3044            strncpy (token_name.buffer, str, len);
3045            token_name.buffer[len] = '\0';
3046            return TRUE;
3047          }
3048        return FALSE;
3049      case omethodcolon:
3050        if (parlev == 0)
3051          objdef = omethodparm;
3052        return FALSE;
3053      case omethodparm:
3054        if (parlev == 0)
3055          {
3056            fvdef = fvnone;
3057            objdef = omethodtag;
3058            linebuffer_setlen (&token_name, token_name.len + len);
3059            strncat (token_name.buffer, str, len);
3060            return TRUE;
3061          }
3062        return FALSE;
3063      case oignore:
3064        if (toktype == st_C_objend)
3065          {
3066            /* Memory leakage here: the string pointed by objtag is
3067               never released, because many tests would be needed to
3068               avoid breaking on incorrect input code.  The amount of
3069               memory leaked here is the sum of the lengths of the
3070               class tags.
3071            free (objtag); */
3072            objdef = onone;
3073          }
3074        return FALSE;
3075      }
3076
3077    /* A function, variable or enum constant? */
3078    switch (toktype)
3079      {
3080      case st_C_extern:
3081        fvextern = TRUE;
3082        switch  (fvdef)
3083          {
3084          case finlist:
3085          case flistseen:
3086          case fignore:
3087          case vignore:
3088            break;
3089          default:
3090            fvdef = fvnone;
3091          }
3092        return FALSE;
3093      case st_C_ignore:
3094        fvextern = FALSE;
3095        fvdef = vignore;
3096        return FALSE;
3097      case st_C_operator:
3098        fvdef = foperator;
3099        *is_func_or_var = TRUE;
3100        return TRUE;
3101      case st_none:
3102        if (constantypedefs
3103            && structdef == snone
3104            && structtype == st_C_enum && bracelev > structbracelev)
3105          return TRUE;           /* enum constant */
3106        switch (fvdef)
3107          {
3108          case fdefunkey:
3109            if (bracelev > 0)
3110              break;
3111            fvdef = fdefunname;  /* GNU macro */
3112            *is_func_or_var = TRUE;
3113            return TRUE;
3114          case fvnone:
3115            switch (typdef)
3116              {
3117              case ttypeseen:
3118                return FALSE;
3119              case tnone:
3120                if ((strneq (str, "asm", 3) && endtoken (str[3]))
3121                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3122                  {
3123                    fvdef = vignore;
3124                    return FALSE;
3125                  }
3126                break;
3127              }
3128           /* FALLTHRU */
3129           case fvnameseen:
3130           if (len >= 10 && strneq (str+len-10, "::operator", 10))
3131             {
3132               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3133                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3134               fvdef = foperator;
3135               *is_func_or_var = TRUE;
3136               return TRUE;
3137             }
3138           if (bracelev > 0 && !instruct)
3139             break;
3140           fvdef = fvnameseen;   /* function or variable */
3141           *is_func_or_var = TRUE;
3142           return TRUE;
3143         }
3144       break;
3145     }
3146
3147   return FALSE;
3148 }
3149
3150 \f
3151 /*
3152  * C_entries often keeps pointers to tokens or lines which are older than
3153  * the line currently read.  By keeping two line buffers, and switching
3154  * them at end of line, it is possible to use those pointers.
3155  */
3156 static struct
3157 {
3158   long linepos;
3159   linebuffer lb;
3160 } lbs[2];
3161
3162 #define current_lb_is_new (newndx == curndx)
3163 #define switch_line_buffers() (curndx = 1 - curndx)
3164
3165 #define curlb (lbs[curndx].lb)
3166 #define newlb (lbs[newndx].lb)
3167 #define curlinepos (lbs[curndx].linepos)
3168 #define newlinepos (lbs[newndx].linepos)
3169
3170 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3171 #define cplpl (c_ext & C_PLPL)
3172 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3173
3174 #define CNL_SAVE_DEFINEDEF()                                            \
3175 do {                                                                    \
3176   curlinepos = charno;                                                  \
3177   readline (&curlb, inf);                                               \
3178   lp = curlb.buffer;                                                    \
3179   quotednl = FALSE;                                                     \
3180   newndx = curndx;                                                      \
3181 } while (0)
3182
3183 #define CNL()                                                           \
3184 do {                                                                    \
3185   CNL_SAVE_DEFINEDEF();                                                 \
3186   if (savetoken.valid)                                                  \
3187     {                                                                   \
3188       token = savetoken;                                                \
3189       savetoken.valid = FALSE;                                          \
3190     }                                                                   \
3191   definedef = dnone;                                                    \
3192 } while (0)
3193
3194
3195 static void
3196 make_C_tag (isfun)
3197      bool isfun;
3198 {
3199   /* This function should never be called when token.valid is FALSE, but
3200      we must protect against invalid input or internal errors. */
3201   if (!DEBUG && !token.valid)
3202     return;
3203
3204   if (token.valid)
3205     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3206               token.offset+token.length+1, token.lineno, token.linepos);
3207   else                          /* this case is optimised away if !DEBUG */
3208     make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3209               token_name.len + 17, isfun, token.line,
3210               token.offset+token.length+1, token.lineno, token.linepos);
3211
3212   token.valid = FALSE;
3213 }
3214
3215
3216 /*
3217  * C_entries ()
3218  *      This routine finds functions, variables, typedefs,
3219  *      #define's, enum constants and struct/union/enum definitions in
3220  *      C syntax and adds them to the list.
3221  */
3222 static void
3223 C_entries (c_ext, inf)
3224      int c_ext;                 /* extension of C */
3225      FILE *inf;                 /* input file */
3226 {
3227   register char c;              /* latest char read; '\0' for end of line */
3228   register char *lp;            /* pointer one beyond the character `c' */
3229   int curndx, newndx;           /* indices for current and new lb */
3230   register int tokoff;          /* offset in line of start of current token */
3231   register int toklen;          /* length of current token */
3232   char *qualifier;              /* string used to qualify names */
3233   int qlen;                     /* length of qualifier */
3234   int bracelev;                 /* current brace level */
3235   int bracketlev;               /* current bracket level */
3236   int parlev;                   /* current parenthesis level */
3237   int attrparlev;               /* __attribute__ parenthesis level */
3238   int templatelev;              /* current template level */
3239   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3240   bool incomm, inquote, inchar, quotednl, midtoken;
3241   bool yacc_rules;              /* in the rules part of a yacc file */
3242   struct tok savetoken;         /* token saved during preprocessor handling */
3243
3244
3245   linebuffer_init (&lbs[0].lb);
3246   linebuffer_init (&lbs[1].lb);
3247   if (cstack.size == 0)
3248     {
3249       cstack.size = (DEBUG) ? 1 : 4;
3250       cstack.nl = 0;
3251       cstack.cname = xnew (cstack.size, char *);
3252       cstack.bracelev = xnew (cstack.size, int);
3253     }
3254
3255   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3256   curndx = newndx = 0;
3257   lp = curlb.buffer;
3258   *lp = 0;
3259
3260   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3261   structdef = snone; definedef = dnone; objdef = onone;
3262   yacc_rules = FALSE;
3263   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3264   token.valid = savetoken.valid = FALSE;
3265   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3266   if (cjava)
3267     { qualifier = "."; qlen = 1; }
3268   else
3269     { qualifier = "::"; qlen = 2; }
3270
3271
3272   while (!feof (inf))
3273     {
3274       c = *lp++;
3275       if (c == '\\')
3276         {
3277           /* If we are at the end of the line, the next character is a
3278              '\0'; do not skip it, because it is what tells us
3279              to read the next line.  */
3280           if (*lp == '\0')
3281             {
3282               quotednl = TRUE;
3283               continue;
3284             }
3285           lp++;
3286           c = ' ';
3287         }
3288       else if (incomm)
3289         {
3290           switch (c)
3291             {
3292             case '*':
3293               if (*lp == '/')
3294                 {
3295                   c = *lp++;
3296                   incomm = FALSE;
3297                 }
3298               break;
3299             case '\0':
3300               /* Newlines inside comments do not end macro definitions in
3301                  traditional cpp. */
3302               CNL_SAVE_DEFINEDEF ();
3303               break;
3304             }
3305           continue;
3306         }
3307       else if (inquote)
3308         {
3309           switch (c)
3310             {
3311             case '"':
3312               inquote = FALSE;
3313               break;
3314             case '\0':
3315               /* Newlines inside strings do not end macro definitions
3316                  in traditional cpp, even though compilers don't
3317                  usually accept them. */
3318               CNL_SAVE_DEFINEDEF ();
3319               break;
3320             }
3321           continue;
3322         }
3323       else if (inchar)
3324         {
3325           switch (c)
3326             {
3327             case '\0':
3328               /* Hmmm, something went wrong. */
3329               CNL ();
3330               /* FALLTHRU */
3331             case '\'':
3332               inchar = FALSE;
3333               break;
3334             }
3335           continue;
3336         }
3337       else if (bracketlev > 0)
3338         {
3339           switch (c)
3340             {
3341             case ']':
3342               if (--bracketlev > 0)
3343                 continue;
3344               break;
3345             case '\0':
3346               CNL_SAVE_DEFINEDEF ();
3347               break;
3348             }
3349           continue;
3350         }
3351       else switch (c)
3352         {
3353         case '"':
3354           inquote = TRUE;
3355           if (inattribute)
3356             break;
3357           switch (fvdef)
3358             {
3359             case fdefunkey:
3360             case fstartlist:
3361             case finlist:
3362             case fignore:
3363             case vignore:
3364               break;
3365             default:
3366               fvextern = FALSE;
3367               fvdef = fvnone;
3368             }
3369           continue;
3370         case '\'':
3371           inchar = TRUE;
3372           if (inattribute)
3373             break;
3374           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3375             {
3376               fvextern = FALSE;
3377               fvdef = fvnone;
3378             }
3379           continue;
3380         case '/':
3381           if (*lp == '*')
3382             {
3383               lp++;
3384               incomm = TRUE;
3385               continue;
3386             }
3387           else if (/* cplpl && */ *lp == '/')
3388             {
3389               c = '\0';
3390               break;
3391             }
3392           else
3393             break;
3394         case '%':
3395           if ((c_ext & YACC) && *lp == '%')
3396             {
3397               /* Entering or exiting rules section in yacc file. */
3398               lp++;
3399               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3400               typdef = tnone; structdef = snone;
3401               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3402               bracelev = 0;
3403               yacc_rules = !yacc_rules;
3404               continue;
3405             }
3406           else
3407             break;
3408         case '#':
3409           if (definedef == dnone)
3410             {
3411               char *cp;
3412               bool cpptoken = TRUE;
3413
3414               /* Look back on this line.  If all blanks, or nonblanks
3415                  followed by an end of comment, this is a preprocessor
3416                  token. */
3417               for (cp = newlb.buffer; cp < lp-1; cp++)
3418                 if (!iswhite (*cp))
3419                   {
3420                     if (*cp == '*' && *(cp+1) == '/')
3421                       {
3422                         cp++;
3423                         cpptoken = TRUE;
3424                       }
3425                     else
3426                       cpptoken = FALSE;
3427                   }
3428               if (cpptoken)
3429                 definedef = dsharpseen;
3430             } /* if (definedef == dnone) */
3431           continue;
3432         case '[':
3433           bracketlev++;
3434             continue;
3435         } /* switch (c) */
3436
3437
3438       /* Consider token only if some involved conditions are satisfied. */
3439       if (typdef != tignore
3440           && definedef != dignorerest
3441           && fvdef != finlist
3442           && templatelev == 0
3443           && (definedef != dnone
3444               || structdef != scolonseen)
3445           && !inattribute)
3446         {
3447           if (midtoken)
3448             {
3449               if (endtoken (c))
3450                 {
3451                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3452                     /* This handles :: in the middle,
3453                        but not at the beginning of an identifier.
3454                        Also, space-separated :: is not recognised. */
3455                     {
3456                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3457                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3458                       lp += 2;
3459                       toklen += 2;
3460                       c = lp[-1];
3461                       goto still_in_token;
3462                     }
3463                   else
3464                     {
3465                       bool funorvar = FALSE;
3466
3467                       if (yacc_rules
3468                           || consider_token (newlb.buffer + tokoff, toklen, c,
3469                                              &c_ext, bracelev, parlev,
3470                                              &funorvar))
3471                         {
3472                           if (fvdef == foperator)
3473                             {
3474                               char *oldlp = lp;
3475                               lp = skip_spaces (lp-1);
3476                               if (*lp != '\0')
3477                                 lp += 1;
3478                               while (*lp != '\0'
3479                                      && !iswhite (*lp) && *lp != '(')
3480                                 lp += 1;
3481                               c = *lp++;
3482                               toklen += lp - oldlp;
3483                             }
3484                           token.named = FALSE;
3485                           if (!plainc
3486                               && nestlev > 0 && definedef == dnone)
3487                             /* in struct body */
3488                             {
3489                               write_classname (&token_name, qualifier);
3490                               linebuffer_setlen (&token_name,
3491                                                  token_name.len+qlen+toklen);
3492                               strcat (token_name.buffer, qualifier);
3493                               strncat (token_name.buffer,
3494                                        newlb.buffer + tokoff, toklen);
3495                               token.named = TRUE;
3496                             }
3497                           else if (objdef == ocatseen)
3498                             /* Objective C category */
3499                             {
3500                               int len = strlen (objtag) + 2 + toklen;
3501                               linebuffer_setlen (&token_name, len);
3502                               strcpy (token_name.buffer, objtag);
3503                               strcat (token_name.buffer, "(");
3504                               strncat (token_name.buffer,
3505                                        newlb.buffer + tokoff, toklen);
3506                               strcat (token_name.buffer, ")");
3507                               token.named = TRUE;
3508                             }
3509                           else if (objdef == omethodtag
3510                                    || objdef == omethodparm)
3511                             /* Objective C method */
3512                             {
3513                               token.named = TRUE;
3514                             }
3515                           else if (fvdef == fdefunname)
3516                             /* GNU DEFUN and similar macros */
3517                             {
3518                               bool defun = (newlb.buffer[tokoff] == 'F');
3519                               int off = tokoff;
3520                               int len = toklen;
3521
3522                               /* Rewrite the tag so that emacs lisp DEFUNs
3523                                  can be found by their elisp name */
3524                               if (defun)
3525                                 {
3526                                   off += 1;
3527                                   len -= 1;
3528                                 }
3529                               len = toklen;
3530                               linebuffer_setlen (&token_name, len);
3531                               strncpy (token_name.buffer,
3532                                        newlb.buffer + off, len);
3533                               token_name.buffer[len] = '\0';
3534                               if (defun)
3535                                 while (--len >= 0)
3536                                   if (token_name.buffer[len] == '_')
3537                                     token_name.buffer[len] = '-';
3538                               token.named = defun;
3539                             }
3540                           else
3541                             {
3542                               linebuffer_setlen (&token_name, toklen);
3543                               strncpy (token_name.buffer,
3544                                        newlb.buffer + tokoff, toklen);
3545                               token_name.buffer[toklen] = '\0';
3546                               /* Name macros and members. */
3547                               token.named = (structdef == stagseen
3548                                              || typdef == ttypeseen
3549                                              || typdef == tend
3550                                              || (funorvar
3551                                                  && definedef == dignorerest)
3552                                              || (funorvar
3553                                                  && definedef == dnone
3554                                                  && structdef == snone
3555                                                  && bracelev > 0));
3556                             }
3557                           token.lineno = lineno;
3558                           token.offset = tokoff;
3559                           token.length = toklen;
3560                           token.line = newlb.buffer;
3561                           token.linepos = newlinepos;
3562                           token.valid = TRUE;
3563
3564                           if (definedef == dnone
3565                               && (fvdef == fvnameseen
3566                                   || fvdef == foperator
3567                                   || structdef == stagseen
3568                                   || typdef == tend
3569                                   || typdef == ttypeseen
3570                                   || objdef != onone))
3571                             {
3572                               if (current_lb_is_new)
3573                                 switch_line_buffers ();
3574                             }
3575                           else if (definedef != dnone
3576                                    || fvdef == fdefunname
3577                                    || instruct)
3578                             make_C_tag (funorvar);
3579                         }
3580                       else /* not yacc and consider_token failed */
3581                         {
3582                           if (inattribute && fvdef == fignore)
3583                             {
3584                               /* We have just met __attribute__ after a
3585                                  function parameter list: do not tag the
3586                                  function again. */
3587                               fvdef = fvnone;
3588                             }
3589                         }
3590                       midtoken = FALSE;
3591                     }
3592                 } /* if (endtoken (c)) */
3593               else if (intoken (c))
3594                 still_in_token:
3595                 {
3596                   toklen++;
3597                   continue;
3598                 }
3599             } /* if (midtoken) */
3600           else if (begtoken (c))
3601             {
3602               switch (definedef)
3603                 {
3604                 case dnone:
3605                   switch (fvdef)
3606                     {
3607                     case fstartlist:
3608                       /* This prevents tagging fb in
3609                          void (__attribute__((noreturn)) *fb) (void);
3610                          Fixing this is not easy and not very important. */
3611                       fvdef = finlist;
3612                       continue;
3613                     case flistseen:
3614                       if (plainc || declarations)
3615                         {
3616                           make_C_tag (TRUE); /* a function */
3617                           fvdef = fignore;
3618                         }
3619                       break;
3620                     }
3621                   if (structdef == stagseen && !cjava)
3622                     {
3623                       popclass_above (bracelev);
3624                       structdef = snone;
3625                     }
3626                   break;
3627                 case dsharpseen:
3628                   savetoken = token;
3629                   break;
3630                 }
3631               if (!yacc_rules || lp == newlb.buffer + 1)
3632                 {
3633                   tokoff = lp - 1 - newlb.buffer;
3634                   toklen = 1;
3635                   midtoken = TRUE;
3636                 }
3637               continue;
3638             } /* if (begtoken) */
3639         } /* if must look at token */
3640
3641
3642       /* Detect end of line, colon, comma, semicolon and various braces
3643          after having handled a token.*/
3644       switch (c)
3645         {
3646         case ':':
3647           if (inattribute)
3648             break;
3649           if (yacc_rules && token.offset == 0 && token.valid)
3650             {
3651               make_C_tag (FALSE); /* a yacc function */
3652               break;
3653             }
3654           if (definedef != dnone)
3655             break;
3656           switch (objdef)
3657             {
3658             case  otagseen:
3659               objdef = oignore;
3660               make_C_tag (TRUE); /* an Objective C class */
3661               break;
3662             case omethodtag:
3663             case omethodparm:
3664               objdef = omethodcolon;
3665               linebuffer_setlen (&token_name, token_name.len + 1);
3666               strcat (token_name.buffer, ":");
3667               break;
3668             }
3669           if (structdef == stagseen)
3670             {
3671               structdef = scolonseen;
3672               break;
3673             }
3674           /* Should be useless, but may be work as a safety net. */
3675           if (cplpl && fvdef == flistseen)
3676             {
3677               make_C_tag (TRUE); /* a function */
3678               fvdef = fignore;
3679               break;
3680             }
3681           break;
3682         case ';':
3683           if (definedef != dnone || inattribute)
3684             break;
3685           switch (typdef)
3686             {
3687             case tend:
3688             case ttypeseen:
3689               make_C_tag (FALSE); /* a typedef */
3690               typdef = tnone;
3691               fvdef = fvnone;
3692               break;
3693             case tnone:
3694             case tinbody:
3695             case tignore:
3696               switch (fvdef)
3697                 {
3698                 case fignore:
3699                   if (typdef == tignore || cplpl)
3700                     fvdef = fvnone;
3701                   break;
3702                 case fvnameseen:
3703                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3704                       || (members && instruct))
3705                     make_C_tag (FALSE); /* a variable */
3706                   fvextern = FALSE;
3707                   fvdef = fvnone;
3708                   token.valid = FALSE;
3709                   break;
3710                 case flistseen:
3711                   if ((declarations
3712                        && (cplpl || !instruct)
3713                        && (typdef == tnone || (typdef != tignore && instruct)))
3714                       || (members
3715                           && plainc && instruct))
3716                     make_C_tag (TRUE);  /* a function */
3717                   /* FALLTHRU */
3718                 default:
3719                   fvextern = FALSE;
3720                   fvdef = fvnone;
3721                   if (declarations
3722                        && cplpl && structdef == stagseen)
3723                     make_C_tag (FALSE); /* forward declaration */
3724                   else
3725                     token.valid = FALSE;
3726                 } /* switch (fvdef) */
3727               /* FALLTHRU */
3728             default:
3729               if (!instruct)
3730                 typdef = tnone;
3731             }
3732           if (structdef == stagseen)
3733             structdef = snone;
3734           break;
3735         case ',':
3736           if (definedef != dnone || inattribute)
3737             break;
3738           switch (objdef)
3739             {
3740             case omethodtag:
3741             case omethodparm:
3742               make_C_tag (TRUE); /* an Objective C method */
3743               objdef = oinbody;
3744               break;
3745             }
3746           switch (fvdef)
3747             {
3748             case fdefunkey:
3749             case foperator:
3750             case fstartlist:
3751             case finlist:
3752             case fignore:
3753             case vignore:
3754               break;
3755             case fdefunname:
3756               fvdef = fignore;
3757               break;
3758             case fvnameseen:
3759               if (parlev == 0
3760                   && ((globals
3761                        && bracelev == 0
3762                        && templatelev == 0
3763                        && (!fvextern || declarations))
3764                       || (members && instruct)))
3765                   make_C_tag (FALSE); /* a variable */
3766               break;
3767             case flistseen:
3768               if ((declarations && typdef == tnone && !instruct)
3769                   || (members && typdef != tignore && instruct))
3770                 {
3771                   make_C_tag (TRUE); /* a function */
3772                   fvdef = fvnameseen;
3773                 }
3774               else if (!declarations)
3775                 fvdef = fvnone;
3776               token.valid = FALSE;
3777               break;
3778             default:
3779               fvdef = fvnone;
3780             }
3781           if (structdef == stagseen)
3782             structdef = snone;
3783           break;
3784         case ']':
3785           if (definedef != dnone || inattribute)
3786             break;
3787           if (structdef == stagseen)
3788             structdef = snone;
3789           switch (typdef)
3790             {
3791             case ttypeseen:
3792             case tend:
3793               typdef = tignore;
3794               make_C_tag (FALSE);       /* a typedef */
3795               break;
3796             case tnone:
3797             case tinbody:
3798               switch (fvdef)
3799                 {
3800                 case foperator:
3801                 case finlist:
3802                 case fignore:
3803                 case vignore:
3804                   break;
3805                 case fvnameseen:
3806                   if ((members && bracelev == 1)
3807                       || (globals && bracelev == 0
3808                           && (!fvextern || declarations)))
3809                     make_C_tag (FALSE); /* a variable */
3810                   /* FALLTHRU */
3811                 default:
3812                   fvdef = fvnone;
3813                 }
3814               break;
3815             }
3816           break;
3817         case '(':
3818           if (inattribute)
3819             {
3820               attrparlev++;
3821               break;
3822             }
3823           if (definedef != dnone)
3824             break;
3825           if (objdef == otagseen && parlev == 0)
3826             objdef = oparenseen;
3827           switch (fvdef)
3828             {
3829             case fvnameseen:
3830               if (typdef == ttypeseen
3831                   && *lp != '*'
3832                   && !instruct)
3833                 {
3834                   /* This handles constructs like:
3835                      typedef void OperatorFun (int fun); */
3836                   make_C_tag (FALSE);
3837                   typdef = tignore;
3838                   fvdef = fignore;
3839                   break;
3840                 }
3841               /* FALLTHRU */
3842             case foperator:
3843               fvdef = fstartlist;
3844               break;
3845             case flistseen:
3846               fvdef = finlist;
3847               break;
3848             }
3849           parlev++;
3850           break;
3851         case ')':
3852           if (inattribute)
3853             {
3854               if (--attrparlev == 0)
3855                 inattribute = FALSE;
3856               break;
3857             }
3858           if (definedef != dnone)
3859             break;
3860           if (objdef == ocatseen && parlev == 1)
3861             {
3862               make_C_tag (TRUE); /* an Objective C category */
3863               objdef = oignore;
3864             }
3865           if (--parlev == 0)
3866             {
3867               switch (fvdef)
3868                 {
3869                 case fstartlist:
3870                 case finlist:
3871                   fvdef = flistseen;
3872                   break;
3873                 }
3874               if (!instruct
3875                   && (typdef == tend
3876                       || typdef == ttypeseen))
3877                 {
3878                   typdef = tignore;
3879                   make_C_tag (FALSE); /* a typedef */
3880                 }
3881             }
3882           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3883             parlev = 0;
3884           break;
3885         case '{':
3886           if (definedef != dnone)
3887             break;
3888           if (typdef == ttypeseen)
3889             {
3890               /* Whenever typdef is set to tinbody (currently only
3891                  here), typdefbracelev should be set to bracelev. */
3892               typdef = tinbody;
3893               typdefbracelev = bracelev;
3894             }
3895           switch (fvdef)
3896             {
3897             case flistseen:
3898               make_C_tag (TRUE);    /* a function */
3899               /* FALLTHRU */
3900             case fignore:
3901               fvdef = fvnone;
3902               break;
3903             case fvnone:
3904               switch (objdef)
3905                 {
3906                 case otagseen:
3907                   make_C_tag (TRUE); /* an Objective C class */
3908                   objdef = oignore;
3909                   break;
3910                 case omethodtag:
3911                 case omethodparm:
3912                   make_C_tag (TRUE); /* an Objective C method */
3913                   objdef = oinbody;
3914                   break;
3915                 default:
3916                   /* Neutralize `extern "C" {' grot. */
3917                   if (bracelev == 0 && structdef == snone && nestlev == 0
3918                       && typdef == tnone)
3919                     bracelev = -1;
3920                 }
3921               break;
3922             }
3923           switch (structdef)
3924             {
3925             case skeyseen:         /* unnamed struct */
3926               pushclass_above (bracelev, NULL, 0);
3927               structdef = snone;
3928               break;
3929             case stagseen:         /* named struct or enum */
3930             case scolonseen:       /* a class */
3931               pushclass_above (bracelev,token.line+token.offset, token.length);
3932               structdef = snone;
3933               make_C_tag (FALSE);  /* a struct or enum */
3934               break;
3935             }
3936           bracelev++;
3937           break;
3938         case '*':
3939           if (definedef != dnone)
3940             break;
3941           if (fvdef == fstartlist)
3942             {
3943               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3944               token.valid = FALSE;
3945             }
3946           break;
3947         case '}':
3948           if (definedef != dnone)
3949             break;
3950           if (!ignoreindent && lp == newlb.buffer + 1)
3951             {
3952               if (bracelev != 0)
3953                 token.valid = FALSE;
3954               bracelev = 0;     /* reset brace level if first column */
3955               parlev = 0;       /* also reset paren level, just in case... */
3956             }
3957           else if (bracelev > 0)
3958             bracelev--;
3959           else
3960             token.valid = FALSE; /* something gone amiss, token unreliable */
3961           popclass_above (bracelev);
3962           structdef = snone;
3963           /* Only if typdef == tinbody is typdefbracelev significant. */
3964           if (typdef == tinbody && bracelev <= typdefbracelev)
3965             {
3966               assert (bracelev == typdefbracelev);
3967               typdef = tend;
3968             }
3969           break;
3970         case '=':
3971           if (definedef != dnone)
3972             break;
3973           switch (fvdef)
3974             {
3975             case foperator:
3976             case finlist:
3977             case fignore:
3978             case vignore:
3979               break;
3980             case fvnameseen:
3981               if ((members && bracelev == 1)
3982                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3983                 make_C_tag (FALSE); /* a variable */
3984               /* FALLTHRU */
3985             default:
3986               fvdef = vignore;
3987             }
3988           break;
3989         case '<':
3990           if (cplpl
3991               && (structdef == stagseen || fvdef == fvnameseen))
3992             {
3993               templatelev++;
3994               break;
3995             }
3996           goto resetfvdef;
3997         case '>':
3998           if (templatelev > 0)
3999             {
4000               templatelev--;
4001               break;
4002             }
4003           goto resetfvdef;
4004         case '+':
4005         case '-':
4006           if (objdef == oinbody && bracelev == 0)
4007             {
4008               objdef = omethodsign;
4009               break;
4010             }
4011           /* FALLTHRU */
4012         resetfvdef:
4013         case '#': case '~': case '&': case '%': case '/':
4014         case '|': case '^': case '!': case '.': case '?':
4015           if (definedef != dnone)
4016             break;
4017           /* These surely cannot follow a function tag in C. */
4018           switch (fvdef)
4019             {
4020             case foperator:
4021             case finlist:
4022             case fignore:
4023             case vignore:
4024               break;
4025             default:
4026               fvdef = fvnone;
4027             }
4028           break;
4029         case '\0':
4030           if (objdef == otagseen)
4031             {
4032               make_C_tag (TRUE); /* an Objective C class */
4033               objdef = oignore;
4034             }
4035           /* If a macro spans multiple lines don't reset its state. */
4036           if (quotednl)
4037             CNL_SAVE_DEFINEDEF ();
4038           else
4039             CNL ();
4040           break;
4041         } /* switch (c) */
4042
4043     } /* while not eof */
4044
4045   free (lbs[0].lb.buffer);
4046   free (lbs[1].lb.buffer);
4047 }
4048
4049 /*
4050  * Process either a C++ file or a C file depending on the setting
4051  * of a global flag.
4052  */
4053 static void
4054 default_C_entries (inf)
4055      FILE *inf;
4056 {
4057   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4058 }
4059
4060 /* Always do plain C. */
4061 static void
4062 plain_C_entries (inf)
4063      FILE *inf;
4064 {
4065   C_entries (0, inf);
4066 }
4067
4068 /* Always do C++. */
4069 static void
4070 Cplusplus_entries (inf)
4071      FILE *inf;
4072 {
4073   C_entries (C_PLPL, inf);
4074 }
4075
4076 /* Always do Java. */
4077 static void
4078 Cjava_entries (inf)
4079      FILE *inf;
4080 {
4081   C_entries (C_JAVA, inf);
4082 }
4083
4084 /* Always do C*. */
4085 static void
4086 Cstar_entries (inf)
4087      FILE *inf;
4088 {
4089   C_entries (C_STAR, inf);
4090 }
4091
4092 /* Always do Yacc. */
4093 static void
4094 Yacc_entries (inf)
4095      FILE *inf;
4096 {
4097   C_entries (YACC, inf);
4098 }
4099
4100 \f
4101 /* Useful macros. */
4102 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4103   for (;                        /* loop initialization */               \
4104        !feof (file_pointer)     /* loop test */                         \
4105        &&                       /* instructions at start of loop */     \
4106           (readline (&line_buffer, file_pointer),                       \
4107            char_pointer = line_buffer.buffer,                           \
4108            TRUE);                                                       \
4109       )
4110
4111 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
4112   ((assert("" kw), TRUE)   /* syntax error if not a literal string */   \
4113    && strneq ((cp), kw, sizeof(kw)-1)           /* cp points at kw */   \
4114    && notinname ((cp)[sizeof(kw)-1])            /* end of kw */         \
4115    && ((cp) = skip_spaces((cp)+sizeof(kw)-1)))  /* skip spaces */
4116
4117 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4118 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4119   ((assert("" kw), TRUE)     /* syntax error if not a literal string */ \
4120    && strncaseeq ((cp), kw, sizeof(kw)-1)       /* cp points at kw */   \
4121    && ((cp) += sizeof(kw)-1))                   /* skip spaces */
4122
4123 /*
4124  * Read a file, but do no processing.  This is used to do regexp
4125  * matching on files that have no language defined.
4126  */
4127 static void
4128 just_read_file (inf)
4129      FILE *inf;
4130 {
4131   register char *dummy;
4132
4133   LOOP_ON_INPUT_LINES (inf, lb, dummy)
4134     continue;
4135 }
4136
4137 \f
4138 /* Fortran parsing */
4139
4140 static void F_takeprec __P((void));
4141 static void F_getit __P((FILE *));
4142
4143 static void
4144 F_takeprec ()
4145 {
4146   dbp = skip_spaces (dbp);
4147   if (*dbp != '*')
4148     return;
4149   dbp++;
4150   dbp = skip_spaces (dbp);
4151   if (strneq (dbp, "(*)", 3))
4152     {
4153       dbp += 3;
4154       return;
4155     }
4156   if (!ISDIGIT (*dbp))
4157     {
4158       --dbp;                    /* force failure */
4159       return;
4160     }
4161   do
4162     dbp++;
4163   while (ISDIGIT (*dbp));
4164 }
4165
4166 static void
4167 F_getit (inf)
4168      FILE *inf;
4169 {
4170   register char *cp;
4171
4172   dbp = skip_spaces (dbp);
4173   if (*dbp == '\0')
4174     {
4175       readline (&lb, inf);
4176       dbp = lb.buffer;
4177       if (dbp[5] != '&')
4178         return;
4179       dbp += 6;
4180       dbp = skip_spaces (dbp);
4181     }
4182   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4183     return;
4184   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4185     continue;
4186   make_tag (dbp, cp-dbp, TRUE,
4187             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4188 }
4189
4190
4191 static void
4192 Fortran_functions (inf)
4193      FILE *inf;
4194 {
4195   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4196     {
4197       if (*dbp == '%')
4198         dbp++;                  /* Ratfor escape to fortran */
4199       dbp = skip_spaces (dbp);
4200       if (*dbp == '\0')
4201         continue;
4202       switch (lowcase (*dbp))
4203         {
4204         case 'i':
4205           if (nocase_tail ("integer"))
4206             F_takeprec ();
4207           break;
4208         case 'r':
4209           if (nocase_tail ("real"))
4210             F_takeprec ();
4211           break;
4212         case 'l':
4213           if (nocase_tail ("logical"))
4214             F_takeprec ();
4215           break;
4216         case 'c':
4217           if (nocase_tail ("complex") || nocase_tail ("character"))
4218             F_takeprec ();
4219           break;
4220         case 'd':
4221           if (nocase_tail ("double"))
4222             {
4223               dbp = skip_spaces (dbp);
4224               if (*dbp == '\0')
4225                 continue;
4226               if (nocase_tail ("precision"))
4227                 break;
4228               continue;
4229             }
4230           break;
4231         }
4232       dbp = skip_spaces (dbp);
4233       if (*dbp == '\0')
4234         continue;
4235       switch (lowcase (*dbp))
4236         {
4237         case 'f':
4238           if (nocase_tail ("function"))
4239             F_getit (inf);
4240           continue;
4241         case 's':
4242           if (nocase_tail ("subroutine"))
4243             F_getit (inf);
4244           continue;
4245         case 'e':
4246           if (nocase_tail ("entry"))
4247             F_getit (inf);
4248           continue;
4249         case 'b':
4250           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4251             {
4252               dbp = skip_spaces (dbp);
4253               if (*dbp == '\0') /* assume un-named */
4254                 make_tag ("blockdata", 9, TRUE,
4255                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4256               else
4257                 F_getit (inf);  /* look for name */
4258             }
4259           continue;
4260         }
4261     }
4262 }
4263
4264 \f
4265 /*
4266  * Ada parsing
4267  * Original code by
4268  * Philippe Waroquiers (1998)
4269  */
4270
4271 static void Ada_getit __P((FILE *, char *));
4272
4273 /* Once we are positioned after an "interesting" keyword, let's get
4274    the real tag value necessary. */
4275 static void
4276 Ada_getit (inf, name_qualifier)
4277      FILE *inf;
4278      char *name_qualifier;
4279 {
4280   register char *cp;
4281   char *name;
4282   char c;
4283
4284   while (!feof (inf))
4285     {
4286       dbp = skip_spaces (dbp);
4287       if (*dbp == '\0'
4288           || (dbp[0] == '-' && dbp[1] == '-'))
4289         {
4290           readline (&lb, inf);
4291           dbp = lb.buffer;
4292         }
4293       switch (lowcase(*dbp))
4294         {
4295         case 'b':
4296           if (nocase_tail ("body"))
4297             {
4298               /* Skipping body of   procedure body   or   package body or ....
4299                  resetting qualifier to body instead of spec. */
4300               name_qualifier = "/b";
4301               continue;
4302             }
4303           break;
4304         case 't':
4305           /* Skipping type of   task type   or   protected type ... */
4306           if (nocase_tail ("type"))
4307             continue;
4308           break;
4309         }
4310       if (*dbp == '"')
4311         {
4312           dbp += 1;
4313           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4314             continue;
4315         }
4316       else
4317         {
4318           dbp = skip_spaces (dbp);
4319           for (cp = dbp;
4320                (*cp != '\0'
4321                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4322                cp++)
4323             continue;
4324           if (cp == dbp)
4325             return;
4326         }
4327       c = *cp;
4328       *cp = '\0';
4329       name = concat (dbp, name_qualifier, "");
4330       *cp = c;
4331       make_tag (name, strlen (name), TRUE,
4332                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4333       free (name);
4334       if (c == '"')
4335         dbp = cp + 1;
4336       return;
4337     }
4338 }
4339
4340 static void
4341 Ada_funcs (inf)
4342      FILE *inf;
4343 {
4344   bool inquote = FALSE;
4345   bool skip_till_semicolumn = FALSE;
4346
4347   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4348     {
4349       while (*dbp != '\0')
4350         {
4351           /* Skip a string i.e. "abcd". */
4352           if (inquote || (*dbp == '"'))
4353             {
4354               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4355               if (dbp != NULL)
4356                 {
4357                   inquote = FALSE;
4358                   dbp += 1;
4359                   continue;     /* advance char */
4360                 }
4361               else
4362                 {
4363                   inquote = TRUE;
4364                   break;        /* advance line */
4365                 }
4366             }
4367
4368           /* Skip comments. */
4369           if (dbp[0] == '-' && dbp[1] == '-')
4370             break;              /* advance line */
4371
4372           /* Skip character enclosed in single quote i.e. 'a'
4373              and skip single quote starting an attribute i.e. 'Image. */
4374           if (*dbp == '\'')
4375             {
4376               dbp++ ;
4377               if (*dbp != '\0')
4378                 dbp++;
4379               continue;
4380             }
4381
4382           if (skip_till_semicolumn)
4383             {
4384               if (*dbp == ';')
4385                 skip_till_semicolumn = FALSE;
4386               dbp++;
4387               continue;         /* advance char */
4388             }
4389
4390           /* Search for beginning of a token.  */
4391           if (!begtoken (*dbp))
4392             {
4393               dbp++;
4394               continue;         /* advance char */
4395             }
4396
4397           /* We are at the beginning of a token. */
4398           switch (lowcase(*dbp))
4399             {
4400             case 'f':
4401               if (!packages_only && nocase_tail ("function"))
4402                 Ada_getit (inf, "/f");
4403               else
4404                 break;          /* from switch */
4405               continue;         /* advance char */
4406             case 'p':
4407               if (!packages_only && nocase_tail ("procedure"))
4408                 Ada_getit (inf, "/p");
4409               else if (nocase_tail ("package"))
4410                 Ada_getit (inf, "/s");
4411               else if (nocase_tail ("protected")) /* protected type */
4412                 Ada_getit (inf, "/t");
4413               else
4414                 break;          /* from switch */
4415               continue;         /* advance char */
4416
4417             case 'u':
4418               if (typedefs && !packages_only && nocase_tail ("use"))
4419                 {
4420                   /* when tagging types, avoid tagging  use type Pack.Typename;
4421                      for this, we will skip everything till a ; */
4422                   skip_till_semicolumn = TRUE;
4423                   continue;     /* advance char */
4424                 }
4425
4426             case 't':
4427               if (!packages_only && nocase_tail ("task"))
4428                 Ada_getit (inf, "/k");
4429               else if (typedefs && !packages_only && nocase_tail ("type"))
4430                 {
4431                   Ada_getit (inf, "/t");
4432                   while (*dbp != '\0')
4433                     dbp += 1;
4434                 }
4435               else
4436                 break;          /* from switch */
4437               continue;         /* advance char */
4438             }
4439
4440           /* Look for the end of the token. */
4441           while (!endtoken (*dbp))
4442             dbp++;
4443
4444         } /* advance char */
4445     } /* advance line */
4446 }
4447
4448 \f
4449 /*
4450  * Unix and microcontroller assembly tag handling
4451  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4452  * Idea by Bob Weiner, Motorola Inc. (1994)
4453  */
4454 static void
4455 Asm_labels (inf)
4456      FILE *inf;
4457 {
4458   register char *cp;
4459
4460   LOOP_ON_INPUT_LINES (inf, lb, cp)
4461     {
4462       /* If first char is alphabetic or one of [_.$], test for colon
4463          following identifier. */
4464       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4465         {
4466           /* Read past label. */
4467           cp++;
4468           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4469             cp++;
4470           if (*cp == ':' || iswhite (*cp))
4471             /* Found end of label, so copy it and add it to the table. */
4472             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4473                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4474         }
4475     }
4476 }
4477
4478 \f
4479 /*
4480  * Perl support
4481  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4482  * Perl variable names: /^(my|local).../
4483  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4484  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4485  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4486  */
4487 static void
4488 Perl_functions (inf)
4489      FILE *inf;
4490 {
4491   char *package = savestr ("main"); /* current package name */
4492   register char *cp;
4493
4494   LOOP_ON_INPUT_LINES (inf, lb, cp)
4495     {
4496       skip_spaces(cp);
4497
4498       if (LOOKING_AT (cp, "package"))
4499         {
4500           free (package);
4501           get_tag (cp, &package);
4502         }
4503       else if (LOOKING_AT (cp, "sub"))
4504         {
4505           char *pos;
4506           char *sp = cp;
4507
4508           while (!notinname (*cp))
4509             cp++;
4510           if (cp == sp)
4511             continue;           /* nothing found */
4512           if ((pos = etags_strchr (sp, ':')) != NULL
4513               && pos < cp && pos[1] == ':')
4514             /* The name is already qualified. */
4515             make_tag (sp, cp - sp, TRUE,
4516                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4517           else
4518             /* Qualify it. */
4519             {
4520               char savechar, *name;
4521
4522               savechar = *cp;
4523               *cp = '\0';
4524               name = concat (package, "::", sp);
4525               *cp = savechar;
4526               make_tag (name, strlen(name), TRUE,
4527                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4528               free (name);
4529             }
4530         }
4531        else if (globals)        /* only if we are tagging global vars */
4532         {
4533           /* Skip a qualifier, if any. */
4534           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4535           /* After "my" or "local", but before any following paren or space. */
4536           char *varstart = cp;
4537
4538           if (qual              /* should this be removed?  If yes, how? */
4539               && (*cp == '$' || *cp == '@' || *cp == '%'))
4540             {
4541               varstart += 1;
4542               do
4543                 cp++;
4544               while (ISALNUM (*cp) || *cp == '_');
4545             }
4546           else if (qual)
4547             {
4548               /* Should be examining a variable list at this point;
4549                  could insist on seeing an open parenthesis. */
4550               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4551                 cp++;
4552             }
4553           else
4554             continue;
4555
4556           make_tag (varstart, cp - varstart, FALSE,
4557                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4558         }
4559     }
4560   free (package);
4561 }
4562
4563
4564 /*
4565  * Python support
4566  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4567  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4568  * More ideas by seb bacon <seb@jamkit.com> (2002)
4569  */
4570 static void
4571 Python_functions (inf)
4572      FILE *inf;
4573 {
4574   register char *cp;
4575
4576   LOOP_ON_INPUT_LINES (inf, lb, cp)
4577     {
4578       cp = skip_spaces (cp);
4579       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4580         {
4581           char *name = cp;
4582           while (!notinname (*cp) && *cp != ':')
4583             cp++;
4584           make_tag (name, cp - name, TRUE,
4585                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4586         }
4587     }
4588 }
4589
4590 \f
4591 /*
4592  * PHP support
4593  * Look for:
4594  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4595  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4596  *  - /^[ \t]*define\(\"[^\"]+/
4597  * Only with --members:
4598  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4599  * Idea by Diez B. Roggisch (2001)
4600  */
4601 static void
4602 PHP_functions (inf)
4603      FILE *inf;
4604 {
4605   register char *cp, *name;
4606   bool search_identifier = FALSE;
4607
4608   LOOP_ON_INPUT_LINES (inf, lb, cp)
4609     {
4610       cp = skip_spaces (cp);
4611       name = cp;
4612       if (search_identifier
4613           && *cp != '\0')
4614         {
4615           while (!notinname (*cp))
4616             cp++;
4617           make_tag (name, cp - name, TRUE,
4618                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4619           search_identifier = FALSE;
4620         }
4621       else if (LOOKING_AT (cp, "function"))
4622         {
4623           if(*cp == '&')
4624             cp = skip_spaces (cp+1);
4625           if(*cp != '\0')
4626             {
4627               name = cp;
4628               while (!notinname (*cp))
4629                 cp++;
4630               make_tag (name, cp - name, TRUE,
4631                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4632             }
4633           else
4634             search_identifier = TRUE;
4635         }
4636       else if (LOOKING_AT (cp, "class"))
4637         {
4638           if (*cp != '\0')
4639             {
4640               name = cp;
4641               while (*cp != '\0' && !iswhite (*cp))
4642                 cp++;
4643               make_tag (name, cp - name, FALSE,
4644                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4645             }
4646           else
4647             search_identifier = TRUE;
4648         }
4649       else if (strneq (cp, "define", 6)
4650                && (cp = skip_spaces (cp+6))
4651                && *cp++ == '('
4652                && (*cp == '"' || *cp == '\''))
4653         {
4654           char quote = *cp++;
4655           name = cp;
4656           while (*cp != quote && *cp != '\0')
4657             cp++;
4658           make_tag (name, cp - name, FALSE,
4659                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4660         }
4661       else if (members
4662                && LOOKING_AT (cp, "var")
4663                && *cp == '$')
4664         {
4665           name = cp;
4666           while (!notinname(*cp))
4667             cp++;
4668           make_tag (name, cp - name, FALSE,
4669                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4670         }
4671     }
4672 }
4673
4674 \f
4675 /*
4676  * Cobol tag functions
4677  * We could look for anything that could be a paragraph name.
4678  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4679  * Idea by Corny de Souza (1993)
4680  */
4681 static void
4682 Cobol_paragraphs (inf)
4683      FILE *inf;
4684 {
4685   register char *bp, *ep;
4686
4687   LOOP_ON_INPUT_LINES (inf, lb, bp)
4688     {
4689       if (lb.len < 9)
4690         continue;
4691       bp += 8;
4692
4693       /* If eoln, compiler option or comment ignore whole line. */
4694       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4695         continue;
4696
4697       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4698         continue;
4699       if (*ep++ == '.')
4700         make_tag (bp, ep - bp, TRUE,
4701                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4702     }
4703 }
4704
4705 \f
4706 /*
4707  * Makefile support
4708  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4709  */
4710 static void
4711 Makefile_targets (inf)
4712      FILE *inf;
4713 {
4714   register char *bp;
4715
4716   LOOP_ON_INPUT_LINES (inf, lb, bp)
4717     {
4718       if (*bp == '\t' || *bp == '#')
4719         continue;
4720       while (*bp != '\0' && *bp != '=' && *bp != ':')
4721         bp++;
4722       if (*bp == ':' || (globals && *bp == '='))
4723         make_tag (lb.buffer, bp - lb.buffer, TRUE,
4724                   lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4725     }
4726 }
4727
4728 \f
4729 /*
4730  * Pascal parsing
4731  * Original code by Mosur K. Mohan (1989)
4732  *
4733  *  Locates tags for procedures & functions.  Doesn't do any type- or
4734  *  var-definitions.  It does look for the keyword "extern" or
4735  *  "forward" immediately following the procedure statement; if found,
4736  *  the tag is skipped.
4737  */
4738 static void
4739 Pascal_functions (inf)
4740      FILE *inf;
4741 {
4742   linebuffer tline;             /* mostly copied from C_entries */
4743   long save_lcno;
4744   int save_lineno, namelen, taglen;
4745   char c, *name;
4746
4747   bool                          /* each of these flags is TRUE iff: */
4748     incomment,                  /* point is inside a comment */
4749     inquote,                    /* point is inside '..' string */
4750     get_tagname,                /* point is after PROCEDURE/FUNCTION
4751                                    keyword, so next item = potential tag */
4752     found_tag,                  /* point is after a potential tag */
4753     inparms,                    /* point is within parameter-list */
4754     verify_tag;                 /* point has passed the parm-list, so the
4755                                    next token will determine whether this
4756                                    is a FORWARD/EXTERN to be ignored, or
4757                                    whether it is a real tag */
4758
4759   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4760   name = NULL;                  /* keep compiler quiet */
4761   dbp = lb.buffer;
4762   *dbp = '\0';
4763   linebuffer_init (&tline);
4764
4765   incomment = inquote = FALSE;
4766   found_tag = FALSE;            /* have a proc name; check if extern */
4767   get_tagname = FALSE;          /* found "procedure" keyword         */
4768   inparms = FALSE;              /* found '(' after "proc"            */
4769   verify_tag = FALSE;           /* check if "extern" is ahead        */
4770
4771
4772   while (!feof (inf))           /* long main loop to get next char */
4773     {
4774       c = *dbp++;
4775       if (c == '\0')            /* if end of line */
4776         {
4777           readline (&lb, inf);
4778           dbp = lb.buffer;
4779           if (*dbp == '\0')
4780             continue;
4781           if (!((found_tag && verify_tag)
4782                 || get_tagname))
4783             c = *dbp++;         /* only if don't need *dbp pointing
4784                                    to the beginning of the name of
4785                                    the procedure or function */
4786         }
4787       if (incomment)
4788         {
4789           if (c == '}')         /* within { } comments */
4790             incomment = FALSE;
4791           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4792             {
4793               dbp++;
4794               incomment = FALSE;
4795             }
4796           continue;
4797         }
4798       else if (inquote)
4799         {
4800           if (c == '\'')
4801             inquote = FALSE;
4802           continue;
4803         }
4804       else
4805         switch (c)
4806           {
4807           case '\'':
4808             inquote = TRUE;     /* found first quote */
4809             continue;
4810           case '{':             /* found open { comment */
4811             incomment = TRUE;
4812             continue;
4813           case '(':
4814             if (*dbp == '*')    /* found open (* comment */
4815               {
4816                 incomment = TRUE;
4817                 dbp++;
4818               }
4819             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4820               inparms = TRUE;
4821             continue;
4822           case ')':             /* end of parms list */
4823             if (inparms)
4824               inparms = FALSE;
4825             continue;
4826           case ';':
4827             if (found_tag && !inparms) /* end of proc or fn stmt */
4828               {
4829                 verify_tag = TRUE;
4830                 break;
4831               }
4832             continue;
4833           }
4834       if (found_tag && verify_tag && (*dbp != ' '))
4835         {
4836           /* Check if this is an "extern" declaration. */
4837           if (*dbp == '\0')
4838             continue;
4839           if (lowcase (*dbp == 'e'))
4840             {
4841               if (nocase_tail ("extern")) /* superfluous, really! */
4842                 {
4843                   found_tag = FALSE;
4844                   verify_tag = FALSE;
4845                 }
4846             }
4847           else if (lowcase (*dbp) == 'f')
4848             {
4849               if (nocase_tail ("forward")) /* check for forward reference */
4850                 {
4851                   found_tag = FALSE;
4852                   verify_tag = FALSE;
4853                 }
4854             }
4855           if (found_tag && verify_tag) /* not external proc, so make tag */
4856             {
4857               found_tag = FALSE;
4858               verify_tag = FALSE;
4859               make_tag (name, namelen, TRUE,
4860                         tline.buffer, taglen, save_lineno, save_lcno);
4861               continue;
4862             }
4863         }
4864       if (get_tagname)          /* grab name of proc or fn */
4865         {
4866           char *cp;
4867
4868           if (*dbp == '\0')
4869             continue;
4870
4871           /* Find block name. */
4872           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4873             continue;
4874
4875           /* Save all values for later tagging. */
4876           linebuffer_setlen (&tline, lb.len);
4877           strcpy (tline.buffer, lb.buffer);
4878           save_lineno = lineno;
4879           save_lcno = linecharno;
4880           name = tline.buffer + (dbp - lb.buffer);
4881           namelen = cp - dbp;
4882           taglen = cp - lb.buffer + 1;
4883
4884           dbp = cp;             /* set dbp to e-o-token */
4885           get_tagname = FALSE;
4886           found_tag = TRUE;
4887           continue;
4888
4889           /* And proceed to check for "extern". */
4890         }
4891       else if (!incomment && !inquote && !found_tag)
4892         {
4893           /* Check for proc/fn keywords. */
4894           switch (lowcase (c))
4895             {
4896             case 'p':
4897               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4898                 get_tagname = TRUE;
4899               continue;
4900             case 'f':
4901               if (nocase_tail ("unction"))
4902                 get_tagname = TRUE;
4903               continue;
4904             }
4905         }
4906     } /* while not eof */
4907
4908   free (tline.buffer);
4909 }
4910
4911 \f
4912 /*
4913  * Lisp tag functions
4914  *  look for (def or (DEF, quote or QUOTE
4915  */
4916
4917 static void L_getit __P((void));
4918
4919 static void
4920 L_getit ()
4921 {
4922   if (*dbp == '\'')             /* Skip prefix quote */
4923     dbp++;
4924   else if (*dbp == '(')
4925   {
4926     dbp++;
4927     /* Try to skip "(quote " */
4928     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4929       /* Ok, then skip "(" before name in (defstruct (foo)) */
4930       dbp = skip_spaces (dbp);
4931   }
4932   get_tag (dbp, NULL);
4933 }
4934
4935 static void
4936 Lisp_functions (inf)
4937      FILE *inf;
4938 {
4939   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4940     {
4941       if (dbp[0] != '(')
4942         continue;
4943
4944       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4945         {
4946           dbp = skip_non_spaces (dbp);
4947           dbp = skip_spaces (dbp);
4948           L_getit ();
4949         }
4950       else
4951         {
4952           /* Check for (foo::defmumble name-defined ... */
4953           do
4954             dbp++;
4955           while (!notinname (*dbp) && *dbp != ':');
4956           if (*dbp == ':')
4957             {
4958               do
4959                 dbp++;
4960               while (*dbp == ':');
4961
4962               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4963                 {
4964                   dbp = skip_non_spaces (dbp);
4965                   dbp = skip_spaces (dbp);
4966                   L_getit ();
4967                 }
4968             }
4969         }
4970     }
4971 }
4972
4973 \f
4974 /*
4975  * Lua script language parsing
4976  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4977  *
4978  *  "function" and "local function" are tags if they start at column 1.
4979  */
4980 static void
4981 Lua_functions (inf)
4982      FILE *inf;
4983 {
4984   register char *bp;
4985
4986   LOOP_ON_INPUT_LINES (inf, lb, bp)
4987     {
4988       if (bp[0] != 'f' && bp[0] != 'l')
4989         continue;
4990
4991       LOOKING_AT (bp, "local"); /* skip possible "local" */
4992
4993       if (LOOKING_AT (bp, "function"))
4994         get_tag (bp, NULL);
4995     }
4996 }
4997
4998 \f
4999 /*
5000  * Postscript tags
5001  * Just look for lines where the first character is '/'
5002  * Also look at "defineps" for PSWrap
5003  * Ideas by:
5004  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
5005  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5006  */
5007 static void
5008 PS_functions (inf)
5009      FILE *inf;
5010 {
5011   register char *bp, *ep;
5012
5013   LOOP_ON_INPUT_LINES (inf, lb, bp)
5014     {
5015       if (bp[0] == '/')
5016         {
5017           for (ep = bp+1;
5018                *ep != '\0' && *ep != ' ' && *ep != '{';
5019                ep++)
5020             continue;
5021           make_tag (bp, ep - bp, TRUE,
5022                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5023         }
5024       else if (LOOKING_AT (bp, "defineps"))
5025         get_tag (bp, NULL);
5026     }
5027 }
5028
5029 \f
5030 /*
5031  * Forth tags
5032  * Ignore anything after \ followed by space or in ( )
5033  * Look for words defined by :
5034  * Look for constant, code, create, defer, value, and variable
5035  * OBP extensions:  Look for buffer:, field,
5036  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5037  */
5038 static void
5039 Forth_words (inf)
5040      FILE *inf;
5041 {
5042   register char *bp;
5043
5044   LOOP_ON_INPUT_LINES (inf, lb, bp)
5045     while ((bp = skip_spaces (bp))[0] != '\0')
5046       if (bp[0] == '\\' && iswhite(bp[1]))
5047         break;                  /* read next line */
5048       else if (bp[0] == '(' && iswhite(bp[1]))
5049         do                      /* skip to ) or eol */
5050           bp++;
5051         while (*bp != ')' && *bp != '\0');
5052       else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5053                || LOOKING_AT_NOCASE (bp, "constant")
5054                || LOOKING_AT_NOCASE (bp, "code")
5055                || LOOKING_AT_NOCASE (bp, "create")
5056                || LOOKING_AT_NOCASE (bp, "defer")
5057                || LOOKING_AT_NOCASE (bp, "value")
5058                || LOOKING_AT_NOCASE (bp, "variable")
5059                || LOOKING_AT_NOCASE (bp, "buffer:")
5060                || LOOKING_AT_NOCASE (bp, "field"))
5061         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
5062       else
5063         bp = skip_non_spaces (bp);
5064 }
5065
5066 \f
5067 /*
5068  * Scheme tag functions
5069  * look for (def... xyzzy
5070  *          (def... (xyzzy
5071  *          (def ... ((...(xyzzy ....
5072  *          (set! xyzzy
5073  * Original code by Ken Haase (1985?)
5074  */
5075 static void
5076 Scheme_functions (inf)
5077      FILE *inf;
5078 {
5079   register char *bp;
5080
5081   LOOP_ON_INPUT_LINES (inf, lb, bp)
5082     {
5083       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5084         {
5085           bp = skip_non_spaces (bp+4);
5086           /* Skip over open parens and white space */
5087           while (notinname (*bp))
5088             bp++;
5089           get_tag (bp, NULL);
5090         }
5091       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5092         get_tag (bp, NULL);
5093     }
5094 }
5095
5096 \f
5097 /* Find tags in TeX and LaTeX input files.  */
5098
5099 /* TEX_toktab is a table of TeX control sequences that define tags.
5100  * Each entry records one such control sequence.
5101  *
5102  * Original code from who knows whom.
5103  * Ideas by:
5104  *   Stefan Monnier (2002)
5105  */
5106
5107 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5108
5109 /* Default set of control sequences to put into TEX_toktab.
5110    The value of environment var TEXTAGS is prepended to this.  */
5111 static char *TEX_defenv = "\
5112 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5113 :part:appendix:entry:index:def\
5114 :newcommand:renewcommand:newenvironment:renewenvironment";
5115
5116 static void TEX_mode __P((FILE *));
5117 static void TEX_decode_env __P((char *, char *));
5118
5119 static char TEX_esc = '\\';
5120 static char TEX_opgrp = '{';
5121 static char TEX_clgrp = '}';
5122
5123 /*
5124  * TeX/LaTeX scanning loop.
5125  */
5126 static void
5127 TeX_commands (inf)
5128      FILE *inf;
5129 {
5130   char *cp;
5131   linebuffer *key;
5132
5133   /* Select either \ or ! as escape character.  */
5134   TEX_mode (inf);
5135
5136   /* Initialize token table once from environment. */
5137   if (TEX_toktab == NULL)
5138     TEX_decode_env ("TEXTAGS", TEX_defenv);
5139
5140   LOOP_ON_INPUT_LINES (inf, lb, cp)
5141     {
5142       /* Look at each TEX keyword in line. */
5143       for (;;)
5144         {
5145           /* Look for a TEX escape. */
5146           while (*cp++ != TEX_esc)
5147             if (cp[-1] == '\0' || cp[-1] == '%')
5148               goto tex_next_line;
5149
5150           for (key = TEX_toktab; key->buffer != NULL; key++)
5151             if (strneq (cp, key->buffer, key->len))
5152               {
5153                 register char *p;
5154                 int namelen, linelen;
5155                 bool opgrp = FALSE;
5156
5157                 cp = skip_spaces (cp + key->len);
5158                 if (*cp == TEX_opgrp)
5159                   {
5160                     opgrp = TRUE;
5161                     cp++;
5162                   }
5163                 for (p = cp;
5164                      (!iswhite (*p) && *p != '#' &&
5165                       *p != TEX_opgrp && *p != TEX_clgrp);
5166                      p++)
5167                   continue;
5168                 namelen = p - cp;
5169                 linelen = lb.len;
5170                 if (!opgrp || *p == TEX_clgrp)
5171                   {
5172                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5173                       *p++;
5174                     linelen = p - lb.buffer + 1;
5175                   }
5176                 make_tag (cp, namelen, TRUE,
5177                           lb.buffer, linelen, lineno, linecharno);
5178                 goto tex_next_line; /* We only tag a line once */
5179               }
5180         }
5181     tex_next_line:
5182       ;
5183     }
5184 }
5185
5186 #define TEX_LESC '\\'
5187 #define TEX_SESC '!'
5188
5189 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5190    chars accordingly. */
5191 static void
5192 TEX_mode (inf)
5193      FILE *inf;
5194 {
5195   int c;
5196
5197   while ((c = getc (inf)) != EOF)
5198     {
5199       /* Skip to next line if we hit the TeX comment char. */
5200       if (c == '%')
5201         while (c != '\n')
5202           c = getc (inf);
5203       else if (c == TEX_LESC || c == TEX_SESC )
5204         break;
5205     }
5206
5207   if (c == TEX_LESC)
5208     {
5209       TEX_esc = TEX_LESC;
5210       TEX_opgrp = '{';
5211       TEX_clgrp = '}';
5212     }
5213   else
5214     {
5215       TEX_esc = TEX_SESC;
5216       TEX_opgrp = '<';
5217       TEX_clgrp = '>';
5218     }
5219   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5220      No attempt is made to correct the situation. */
5221   rewind (inf);
5222 }
5223
5224 /* Read environment and prepend it to the default string.
5225    Build token table. */
5226 static void
5227 TEX_decode_env (evarname, defenv)
5228      char *evarname;
5229      char *defenv;
5230 {
5231   register char *env, *p;
5232   int i, len;
5233
5234   /* Append default string to environment. */
5235   env = getenv (evarname);
5236   if (!env)
5237     env = defenv;
5238   else
5239     {
5240       char *oldenv = env;
5241       env = concat (oldenv, defenv, "");
5242     }
5243
5244   /* Allocate a token table */
5245   for (len = 1, p = env; p;)
5246     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5247       len++;
5248   TEX_toktab = xnew (len, linebuffer);
5249
5250   /* Unpack environment string into token table. Be careful about */
5251   /* zero-length strings (leading ':', "::" and trailing ':') */
5252   for (i = 0; *env != '\0';)
5253     {
5254       p = etags_strchr (env, ':');
5255       if (!p)                   /* End of environment string. */
5256         p = env + strlen (env);
5257       if (p - env > 0)
5258         {                       /* Only non-zero strings. */
5259           TEX_toktab[i].buffer = savenstr (env, p - env);
5260           TEX_toktab[i].len = p - env;
5261           i++;
5262         }
5263       if (*p)
5264         env = p + 1;
5265       else
5266         {
5267           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5268           TEX_toktab[i].len = 0;
5269           break;
5270         }
5271     }
5272 }
5273
5274 \f
5275 /* Texinfo support.  Dave Love, Mar. 2000.  */
5276 static void
5277 Texinfo_nodes (inf)
5278      FILE * inf;
5279 {
5280   char *cp, *start;
5281   LOOP_ON_INPUT_LINES (inf, lb, cp)
5282     if (LOOKING_AT (cp, "@node"))
5283       {
5284         start = cp;
5285         while (*cp != '\0' && *cp != ',')
5286           cp++;
5287         make_tag (start, cp - start, TRUE,
5288                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5289       }
5290 }
5291
5292 \f
5293 /*
5294  * HTML support.
5295  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5296  * Contents of <a name=xxx> are tags with name xxx.
5297  *
5298  * Francesco Potortì, 2002.
5299  */
5300 static void
5301 HTML_labels (inf)
5302      FILE * inf;
5303 {
5304   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5305   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5306   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5307   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5308   char *end;
5309
5310
5311   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5312
5313   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5314     for (;;)                    /* loop on the same line */
5315       {
5316         if (skiptag)            /* skip HTML tag */
5317           {
5318             while (*dbp != '\0' && *dbp != '>')
5319               dbp++;
5320             if (*dbp == '>')
5321               {
5322                 dbp += 1;
5323                 skiptag = FALSE;
5324                 continue;       /* look on the same line */
5325               }
5326             break;              /* go to next line */
5327           }
5328
5329         else if (intag) /* look for "name=" or "id=" */
5330           {
5331             while (*dbp != '\0' && *dbp != '>'
5332                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5333               dbp++;
5334             if (*dbp == '\0')
5335               break;            /* go to next line */
5336             if (*dbp == '>')
5337               {
5338                 dbp += 1;
5339                 intag = FALSE;
5340                 continue;       /* look on the same line */
5341               }
5342             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5343                 || LOOKING_AT_NOCASE (dbp, "id="))
5344               {
5345                 bool quoted = (dbp[0] == '"');
5346
5347                 if (quoted)
5348                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5349                     continue;
5350                 else
5351                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5352                     continue;
5353                 linebuffer_setlen (&token_name, end - dbp);
5354                 strncpy (token_name.buffer, dbp, end - dbp);
5355                 token_name.buffer[end - dbp] = '\0';
5356
5357                 dbp = end;
5358                 intag = FALSE;  /* we found what we looked for */
5359                 skiptag = TRUE; /* skip to the end of the tag */
5360                 getnext = TRUE; /* then grab the text */
5361                 continue;       /* look on the same line */
5362               }
5363             dbp += 1;
5364           }
5365
5366         else if (getnext)       /* grab next tokens and tag them */
5367           {
5368             dbp = skip_spaces (dbp);
5369             if (*dbp == '\0')
5370               break;            /* go to next line */
5371             if (*dbp == '<')
5372               {
5373                 intag = TRUE;
5374                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5375                 continue;       /* look on the same line */
5376               }
5377
5378             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5379               continue;
5380             make_tag (token_name.buffer, token_name.len, TRUE,
5381                       dbp, end - dbp, lineno, linecharno);
5382             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5383             getnext = FALSE;
5384             break;              /* go to next line */
5385           }
5386
5387         else                    /* look for an interesting HTML tag */
5388           {
5389             while (*dbp != '\0' && *dbp != '<')
5390               dbp++;
5391             if (*dbp == '\0')
5392               break;            /* go to next line */
5393             intag = TRUE;
5394             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5395               {
5396                 inanchor = TRUE;
5397                 continue;       /* look on the same line */
5398               }
5399             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5400                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5401                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5402                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5403               {
5404                 intag = FALSE;
5405                 getnext = TRUE;
5406                 continue;       /* look on the same line */
5407               }
5408             dbp += 1;
5409           }
5410       }
5411 }
5412
5413 \f
5414 /*
5415  * Prolog support
5416  *
5417  * Assumes that the predicate or rule starts at column 0.
5418  * Only the first clause of a predicate or rule is added.
5419  * Original code by Sunichirou Sugou (1989)
5420  * Rewritten by Anders Lindgren (1996)
5421  */
5422 static int prolog_pr __P((char *, char *));
5423 static void prolog_skip_comment __P((linebuffer *, FILE *));
5424 static int prolog_atom __P((char *, int));
5425
5426 static void
5427 Prolog_functions (inf)
5428      FILE *inf;
5429 {
5430   char *cp, *last;
5431   int len;
5432   int allocated;
5433
5434   allocated = 0;
5435   len = 0;
5436   last = NULL;
5437
5438   LOOP_ON_INPUT_LINES (inf, lb, cp)
5439     {
5440       if (cp[0] == '\0')        /* Empty line */
5441         continue;
5442       else if (iswhite (cp[0])) /* Not a predicate */
5443         continue;
5444       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5445         prolog_skip_comment (&lb, inf);
5446       else if ((len = prolog_pr (cp, last)) > 0)
5447         {
5448           /* Predicate or rule.  Store the function name so that we
5449              only generate a tag for the first clause.  */
5450           if (last == NULL)
5451             last = xnew(len + 1, char);
5452           else if (len + 1 > allocated)
5453             xrnew (last, len + 1, char);
5454           allocated = len + 1;
5455           strncpy (last, cp, len);
5456           last[len] = '\0';
5457         }
5458     }
5459   if (last != NULL)
5460     free (last);
5461 }
5462
5463
5464 static void
5465 prolog_skip_comment (plb, inf)
5466      linebuffer *plb;
5467      FILE *inf;
5468 {
5469   char *cp;
5470
5471   do
5472     {
5473       for (cp = plb->buffer; *cp != '\0'; cp++)
5474         if (cp[0] == '*' && cp[1] == '/')
5475           return;
5476       readline (plb, inf);
5477     }
5478   while (!feof(inf));
5479 }
5480
5481 /*
5482  * A predicate or rule definition is added if it matches:
5483  *     <beginning of line><Prolog Atom><whitespace>(
5484  * or  <beginning of line><Prolog Atom><whitespace>:-
5485  *
5486  * It is added to the tags database if it doesn't match the
5487  * name of the previous clause header.
5488  *
5489  * Return the size of the name of the predicate or rule, or 0 if no
5490  * header was found.
5491  */
5492 static int
5493 prolog_pr (s, last)
5494      char *s;
5495      char *last;                /* Name of last clause. */
5496 {
5497   int pos;
5498   int len;
5499
5500   pos = prolog_atom (s, 0);
5501   if (pos < 1)
5502     return 0;
5503
5504   len = pos;
5505   pos = skip_spaces (s + pos) - s;
5506
5507   if ((s[pos] == '.'
5508        || (s[pos] == '(' && (pos += 1))
5509        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5510       && (last == NULL          /* save only the first clause */
5511           || len != (int)strlen (last)
5512           || !strneq (s, last, len)))
5513         {
5514           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5515           return len;
5516         }
5517   else
5518     return 0;
5519 }
5520
5521 /*
5522  * Consume a Prolog atom.
5523  * Return the number of bytes consumed, or -1 if there was an error.
5524  *
5525  * A prolog atom, in this context, could be one of:
5526  * - An alphanumeric sequence, starting with a lower case letter.
5527  * - A quoted arbitrary string. Single quotes can escape themselves.
5528  *   Backslash quotes everything.
5529  */
5530 static int
5531 prolog_atom (s, pos)
5532      char *s;
5533      int pos;
5534 {
5535   int origpos;
5536
5537   origpos = pos;
5538
5539   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5540     {
5541       /* The atom is unquoted. */
5542       pos++;
5543       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5544         {
5545           pos++;
5546         }
5547       return pos - origpos;
5548     }
5549   else if (s[pos] == '\'')
5550     {
5551       pos++;
5552
5553       for (;;)
5554         {
5555           if (s[pos] == '\'')
5556             {
5557               pos++;
5558               if (s[pos] != '\'')
5559                 break;
5560               pos++;            /* A double quote */
5561             }
5562           else if (s[pos] == '\0')
5563             /* Multiline quoted atoms are ignored. */
5564             return -1;
5565           else if (s[pos] == '\\')
5566             {
5567               if (s[pos+1] == '\0')
5568                 return -1;
5569               pos += 2;
5570             }
5571           else
5572             pos++;
5573         }
5574       return pos - origpos;
5575     }
5576   else
5577     return -1;
5578 }
5579
5580 \f
5581 /*
5582  * Support for Erlang
5583  *
5584  * Generates tags for functions, defines, and records.
5585  * Assumes that Erlang functions start at column 0.
5586  * Original code by Anders Lindgren (1996)
5587  */
5588 static int erlang_func __P((char *, char *));
5589 static void erlang_attribute __P((char *));
5590 static int erlang_atom __P((char *));
5591
5592 static void
5593 Erlang_functions (inf)
5594      FILE *inf;
5595 {
5596   char *cp, *last;
5597   int len;
5598   int allocated;
5599
5600   allocated = 0;
5601   len = 0;
5602   last = NULL;
5603
5604   LOOP_ON_INPUT_LINES (inf, lb, cp)
5605     {
5606       if (cp[0] == '\0')        /* Empty line */
5607         continue;
5608       else if (iswhite (cp[0])) /* Not function nor attribute */
5609         continue;
5610       else if (cp[0] == '%')    /* comment */
5611         continue;
5612       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5613         continue;
5614       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5615         {
5616           erlang_attribute (cp);
5617           if (last != NULL)
5618             {
5619               free (last);
5620               last = NULL;
5621             }
5622         }
5623       else if ((len = erlang_func (cp, last)) > 0)
5624         {
5625           /*
5626            * Function.  Store the function name so that we only
5627            * generates a tag for the first clause.
5628            */
5629           if (last == NULL)
5630             last = xnew (len + 1, char);
5631           else if (len + 1 > allocated)
5632             xrnew (last, len + 1, char);
5633           allocated = len + 1;
5634           strncpy (last, cp, len);
5635           last[len] = '\0';
5636         }
5637     }
5638   if (last != NULL)
5639     free (last);
5640 }
5641
5642
5643 /*
5644  * A function definition is added if it matches:
5645  *     <beginning of line><Erlang Atom><whitespace>(
5646  *
5647  * It is added to the tags database if it doesn't match the
5648  * name of the previous clause header.
5649  *
5650  * Return the size of the name of the function, or 0 if no function
5651  * was found.
5652  */
5653 static int
5654 erlang_func (s, last)
5655      char *s;
5656      char *last;                /* Name of last clause. */
5657 {
5658   int pos;
5659   int len;
5660
5661   pos = erlang_atom (s);
5662   if (pos < 1)
5663     return 0;
5664
5665   len = pos;
5666   pos = skip_spaces (s + pos) - s;
5667
5668   /* Save only the first clause. */
5669   if (s[pos++] == '('
5670       && (last == NULL
5671           || len != (int)strlen (last)
5672           || !strneq (s, last, len)))
5673         {
5674           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5675           return len;
5676         }
5677
5678   return 0;
5679 }
5680
5681
5682 /*
5683  * Handle attributes.  Currently, tags are generated for defines
5684  * and records.
5685  *
5686  * They are on the form:
5687  * -define(foo, bar).
5688  * -define(Foo(M, N), M+N).
5689  * -record(graph, {vtab = notable, cyclic = true}).
5690  */
5691 static void
5692 erlang_attribute (s)
5693      char *s;
5694 {
5695   char *cp = s;
5696
5697   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5698       && *cp++ == '(')
5699     {
5700       int len = erlang_atom (skip_spaces (cp));
5701       if (len > 0)
5702         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5703     }
5704   return;
5705 }
5706
5707
5708 /*
5709  * Consume an Erlang atom (or variable).
5710  * Return the number of bytes consumed, or -1 if there was an error.
5711  */
5712 static int
5713 erlang_atom (s)
5714      char *s;
5715 {
5716   int pos = 0;
5717
5718   if (ISALPHA (s[pos]) || s[pos] == '_')
5719     {
5720       /* The atom is unquoted. */
5721       do
5722         pos++;
5723       while (ISALNUM (s[pos]) || s[pos] == '_');
5724     }
5725   else if (s[pos] == '\'')
5726     {
5727       for (pos++; s[pos] != '\''; pos++)
5728         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5729             || (s[pos] == '\\' && s[++pos] == '\0'))
5730           return 0;
5731       pos++;
5732     }
5733
5734   return pos;
5735 }
5736
5737 \f
5738 #ifdef ETAGS_REGEXPS
5739
5740 static char *scan_separators __P((char *));
5741 static void add_regex __P((char *, language *));
5742 static char *substitute __P((char *, char *, struct re_registers *));
5743
5744 /*
5745  * Take a string like "/blah/" and turn it into "blah", verifying
5746  * that the first and last characters are the same, and handling
5747  * quoted separator characters.  Actually, stops on the occurrence of
5748  * an unquoted separator.  Also process \t, \n, etc. and turn into
5749  * appropriate characters. Works in place.  Null terminates name string.
5750  * Returns pointer to terminating separator, or NULL for
5751  * unterminated regexps.
5752  */
5753 static char *
5754 scan_separators (name)
5755      char *name;
5756 {
5757   char sep = name[0];
5758   char *copyto = name;
5759   bool quoted = FALSE;
5760
5761   for (++name; *name != '\0'; ++name)
5762     {
5763       if (quoted)
5764         {
5765           switch (*name)
5766             {
5767             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5768             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5769             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5770             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5771             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5772             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5773             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5774             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5775             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5776             default:
5777               if (*name == sep)
5778                 *copyto++ = sep;
5779               else
5780                 {
5781                   /* Something else is quoted, so preserve the quote. */
5782                   *copyto++ = '\\';
5783                   *copyto++ = *name;
5784                 }
5785               break;
5786             }
5787           quoted = FALSE;
5788         }
5789       else if (*name == '\\')
5790         quoted = TRUE;
5791       else if (*name == sep)
5792         break;
5793       else
5794         *copyto++ = *name;
5795     }
5796   if (*name != sep)
5797     name = NULL;                /* signal unterminated regexp */
5798
5799   /* Terminate copied string. */
5800   *copyto = '\0';
5801   return name;
5802 }
5803
5804 /* Look at the argument of --regex or --no-regex and do the right
5805    thing.  Same for each line of a regexp file. */
5806 static void
5807 analyse_regex (regex_arg)
5808      char *regex_arg;
5809 {
5810   if (regex_arg == NULL)
5811     {
5812       free_regexps ();          /* --no-regex: remove existing regexps */
5813       return;
5814     }
5815
5816   /* A real --regexp option or a line in a regexp file. */
5817   switch (regex_arg[0])
5818     {
5819       /* Comments in regexp file or null arg to --regex. */
5820     case '\0':
5821     case ' ':
5822     case '\t':
5823       break;
5824
5825       /* Read a regex file.  This is recursive and may result in a
5826          loop, which will stop when the file descriptors are exhausted. */
5827     case '@':
5828       {
5829         FILE *regexfp;
5830         linebuffer regexbuf;
5831         char *regexfile = regex_arg + 1;
5832
5833         /* regexfile is a file containing regexps, one per line. */
5834         regexfp = fopen (regexfile, "r");
5835         if (regexfp == NULL)
5836           {
5837             pfatal (regexfile);
5838             return;
5839           }
5840         linebuffer_init (&regexbuf);
5841         while (readline_internal (&regexbuf, regexfp) > 0)
5842           analyse_regex (regexbuf.buffer);
5843         free (regexbuf.buffer);
5844         fclose (regexfp);
5845       }
5846       break;
5847
5848       /* Regexp to be used for a specific language only. */
5849     case '{':
5850       {
5851         language *lang;
5852         char *lang_name = regex_arg + 1;
5853         char *cp;
5854
5855         for (cp = lang_name; *cp != '}'; cp++)
5856           if (*cp == '\0')
5857             {
5858               error ("unterminated language name in regex: %s", regex_arg);
5859               return;
5860             }
5861         *cp++ = '\0';
5862         lang = get_language_from_langname (lang_name);
5863         if (lang == NULL)
5864           return;
5865         add_regex (cp, lang);
5866       }
5867       break;
5868
5869       /* Regexp to be used for any language. */
5870     default:
5871       add_regex (regex_arg, NULL);
5872       break;
5873     }
5874 }
5875
5876 /* Separate the regexp pattern, compile it,
5877    and care for optional name and modifiers. */
5878 static void
5879 add_regex (regexp_pattern, lang)
5880      char *regexp_pattern;
5881      language *lang;
5882 {
5883   static struct re_pattern_buffer zeropattern;
5884   char sep, *pat, *name, *modifiers;
5885   const char *err;
5886   struct re_pattern_buffer *patbuf;
5887   regexp *rp;
5888   bool
5889     force_explicit_name = TRUE, /* do not use implicit tag names */
5890     ignore_case = FALSE,        /* case is significant */
5891     multi_line = FALSE,         /* matches are done one line at a time */
5892     single_line = FALSE;        /* dot does not match newline */
5893
5894
5895   if (strlen(regexp_pattern) < 3)
5896     {
5897       error ("null regexp", (char *)NULL);
5898       return;
5899     }
5900   sep = regexp_pattern[0];
5901   name = scan_separators (regexp_pattern);
5902   if (name == NULL)
5903     {
5904       error ("%s: unterminated regexp", regexp_pattern);
5905       return;
5906     }
5907   if (name[1] == sep)
5908     {
5909       error ("null name for regexp \"%s\"", regexp_pattern);
5910       return;
5911     }
5912   modifiers = scan_separators (name);
5913   if (modifiers == NULL)        /* no terminating separator --> no name */
5914     {
5915       modifiers = name;
5916       name = "";
5917     }
5918   else
5919     modifiers += 1;             /* skip separator */
5920
5921   /* Parse regex modifiers. */
5922   for (; modifiers[0] != '\0'; modifiers++)
5923     switch (modifiers[0])
5924       {
5925       case 'N':
5926         if (modifiers == name)
5927           error ("forcing explicit tag name but no name, ignoring", NULL);
5928         force_explicit_name = TRUE;
5929         break;
5930       case 'i':
5931         ignore_case = TRUE;
5932         break;
5933       case 's':
5934         single_line = TRUE;
5935         /* FALLTHRU */
5936       case 'm':
5937         multi_line = TRUE;
5938         need_filebuf = TRUE;
5939         break;
5940       default:
5941         {
5942           char wrongmod [2];
5943           wrongmod[0] = modifiers[0];
5944           wrongmod[1] = '\0';
5945           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5946         }
5947         break;
5948       }
5949
5950   patbuf = xnew (1, struct re_pattern_buffer);
5951   *patbuf = zeropattern;
5952   if (ignore_case)
5953     {
5954       static char lc_trans[CHARS];
5955       int i;
5956       for (i = 0; i < CHARS; i++)
5957         lc_trans[i] = lowcase (i);
5958       patbuf->translate = lc_trans;     /* translation table to fold case  */
5959     }
5960
5961   if (multi_line)
5962     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5963   else
5964     pat = regexp_pattern;
5965
5966   if (single_line)
5967     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5968   else
5969     re_set_syntax (RE_SYNTAX_EMACS);
5970
5971   err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5972   if (multi_line)
5973     free (pat);
5974   if (err != NULL)
5975     {
5976       error ("%s while compiling pattern", err);
5977       return;
5978     }
5979
5980   rp = p_head;
5981   p_head = xnew (1, regexp);
5982   p_head->pattern = savestr (regexp_pattern);
5983   p_head->p_next = rp;
5984   p_head->lang = lang;
5985   p_head->pat = patbuf;
5986   p_head->name = savestr (name);
5987   p_head->error_signaled = FALSE;
5988   p_head->force_explicit_name = force_explicit_name;
5989   p_head->ignore_case = ignore_case;
5990   p_head->multi_line = multi_line;
5991 }
5992
5993 /*
5994  * Do the substitutions indicated by the regular expression and
5995  * arguments.
5996  */
5997 static char *
5998 substitute (in, out, regs)
5999      char *in, *out;
6000      struct re_registers *regs;
6001 {
6002   char *result, *t;
6003   int size, dig, diglen;
6004
6005   result = NULL;
6006   size = strlen (out);
6007
6008   /* Pass 1: figure out how much to allocate by finding all \N strings. */
6009   if (out[size - 1] == '\\')
6010     fatal ("pattern error in \"%s\"", out);
6011   for (t = etags_strchr (out, '\\');
6012        t != NULL;
6013        t = etags_strchr (t + 2, '\\'))
6014     if (ISDIGIT (t[1]))
6015       {
6016         dig = t[1] - '0';
6017         diglen = regs->end[dig] - regs->start[dig];
6018         size += diglen - 2;
6019       }
6020     else
6021       size -= 1;
6022
6023   /* Allocate space and do the substitutions. */
6024   assert (size >= 0);
6025   result = xnew (size + 1, char);
6026
6027   for (t = result; *out != '\0'; out++)
6028     if (*out == '\\' && ISDIGIT (*++out))
6029       {
6030         dig = *out - '0';
6031         diglen = regs->end[dig] - regs->start[dig];
6032         strncpy (t, in + regs->start[dig], diglen);
6033         t += diglen;
6034       }
6035     else
6036       *t++ = *out;
6037   *t = '\0';
6038
6039   assert (t <= result + size);
6040   assert (t - result == (int)strlen (result));
6041
6042   return result;
6043 }
6044
6045 /* Deallocate all regexps. */
6046 static void
6047 free_regexps ()
6048 {
6049   regexp *rp;
6050   while (p_head != NULL)
6051     {
6052       rp = p_head->p_next;
6053       free (p_head->pattern);
6054       free (p_head->name);
6055       free (p_head);
6056       p_head = rp;
6057     }
6058   return;
6059 }
6060
6061 /*
6062  * Reads the whole file as a single string from `filebuf' and looks for
6063  * multi-line regular expressions, creating tags on matches.
6064  * readline already dealt with normal regexps.
6065  *
6066  * Idea by Ben Wing <ben@666.com> (2002).
6067  */
6068 static void
6069 regex_tag_multiline ()
6070 {
6071   char *buffer = filebuf.buffer;
6072   regexp *rp;
6073   char *name;
6074
6075   for (rp = p_head; rp != NULL; rp = rp->p_next)
6076     {
6077       int match = 0;
6078
6079       if (!rp->multi_line)
6080         continue;               /* skip normal regexps */
6081
6082       /* Generic initialisations before parsing file from memory. */
6083       lineno = 1;               /* reset global line number */
6084       charno = 0;               /* reset global char number */
6085       linecharno = 0;           /* reset global char number of line start */
6086
6087       /* Only use generic regexps or those for the current language. */
6088       if (rp->lang != NULL && rp->lang != curfdp->lang)
6089         continue;
6090
6091       while (match >= 0 && match < filebuf.len)
6092         {
6093           match = re_search (rp->pat, buffer, filebuf.len, charno,
6094                              filebuf.len - match, &rp->regs);
6095           switch (match)
6096             {
6097             case -2:
6098               /* Some error. */
6099               if (!rp->error_signaled)
6100                 {
6101                   error ("regexp stack overflow while matching \"%s\"",
6102                          rp->pattern);
6103                   rp->error_signaled = TRUE;
6104                 }
6105               break;
6106             case -1:
6107               /* No match. */
6108               break;
6109             default:
6110               if (match == rp->regs.end[0])
6111                 {
6112                   if (!rp->error_signaled)
6113                     {
6114                       error ("regexp matches the empty string: \"%s\"",
6115                              rp->pattern);
6116                       rp->error_signaled = TRUE;
6117                     }
6118                   match = -3;   /* exit from while loop */
6119                   break;
6120                 }
6121
6122               /* Match occurred.  Construct a tag. */
6123               while (charno < rp->regs.end[0])
6124                 if (buffer[charno++] == '\n')
6125                   lineno++, linecharno = charno;
6126               name = rp->name;
6127               if (name[0] == '\0')
6128                 name = NULL;
6129               else /* make a named tag */
6130                 name = substitute (buffer, rp->name, &rp->regs);
6131               if (rp->force_explicit_name)
6132                 /* Force explicit tag name, if a name is there. */
6133                 pfnote (name, TRUE, buffer + linecharno,
6134                         charno - linecharno + 1, lineno, linecharno);
6135               else
6136                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6137                           charno - linecharno + 1, lineno, linecharno);
6138               break;
6139             }
6140         }
6141     }
6142 }
6143
6144 #endif /* ETAGS_REGEXPS */
6145
6146 \f
6147 static bool
6148 nocase_tail (cp)
6149      char *cp;
6150 {
6151   register int len = 0;
6152
6153   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6154     cp++, len++;
6155   if (*cp == '\0' && !intoken (dbp[len]))
6156     {
6157       dbp += len;
6158       return TRUE;
6159     }
6160   return FALSE;
6161 }
6162
6163 static void
6164 get_tag (bp, namepp)
6165      register char *bp;
6166      char **namepp;
6167 {
6168   register char *cp = bp;
6169
6170   if (*bp != '\0')
6171     {
6172       /* Go till you get to white space or a syntactic break */
6173       for (cp = bp + 1; !notinname (*cp); cp++)
6174         continue;
6175       make_tag (bp, cp - bp, TRUE,
6176                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6177     }
6178
6179   if (namepp != NULL)
6180     *namepp = savenstr (bp, cp - bp);
6181 }
6182
6183 /*
6184  * Read a line of text from `stream' into `lbp', excluding the
6185  * newline or CR-NL, if any.  Return the number of characters read from
6186  * `stream', which is the length of the line including the newline.
6187  *
6188  * On DOS or Windows we do not count the CR character, if any before the
6189  * NL, in the returned length; this mirrors the behavior of Emacs on those
6190  * platforms (for text files, it translates CR-NL to NL as it reads in the
6191  * file).
6192  *
6193  * If multi-line regular expressions are requested, each line read is
6194  * appended to `filebuf'.
6195  */
6196 static long
6197 readline_internal (lbp, stream)
6198      linebuffer *lbp;
6199      register FILE *stream;
6200 {
6201   char *buffer = lbp->buffer;
6202   register char *p = lbp->buffer;
6203   register char *pend;
6204   int chars_deleted;
6205
6206   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6207
6208   for (;;)
6209     {
6210       register int c = getc (stream);
6211       if (p == pend)
6212         {
6213           /* We're at the end of linebuffer: expand it. */
6214           lbp->size *= 2;
6215           xrnew (buffer, lbp->size, char);
6216           p += buffer - lbp->buffer;
6217           pend = buffer + lbp->size;
6218           lbp->buffer = buffer;
6219         }
6220       if (c == EOF)
6221         {
6222           *p = '\0';
6223           chars_deleted = 0;
6224           break;
6225         }
6226       if (c == '\n')
6227         {
6228           if (p > buffer && p[-1] == '\r')
6229             {
6230               p -= 1;
6231 #ifdef DOS_NT
6232              /* Assume CRLF->LF translation will be performed by Emacs
6233                 when loading this file, so CRs won't appear in the buffer.
6234                 It would be cleaner to compensate within Emacs;
6235                 however, Emacs does not know how many CRs were deleted
6236                 before any given point in the file.  */
6237               chars_deleted = 1;
6238 #else
6239               chars_deleted = 2;
6240 #endif
6241             }
6242           else
6243             {
6244               chars_deleted = 1;
6245             }
6246           *p = '\0';
6247           break;
6248         }
6249       *p++ = c;
6250     }
6251   lbp->len = p - buffer;
6252
6253   if (need_filebuf              /* we need filebuf for multi-line regexps */
6254       && chars_deleted > 0)     /* not at EOF */
6255     {
6256       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6257         {
6258           /* Expand filebuf. */
6259           filebuf.size *= 2;
6260           xrnew (filebuf.buffer, filebuf.size, char);
6261         }
6262       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6263       filebuf.len += lbp->len;
6264       filebuf.buffer[filebuf.len++] = '\n';
6265       filebuf.buffer[filebuf.len] = '\0';
6266     }
6267
6268   return lbp->len + chars_deleted;
6269 }
6270
6271 /*
6272  * Like readline_internal, above, but in addition try to match the
6273  * input line against relevant regular expressions and manage #line
6274  * directives.
6275  */
6276 static void
6277 readline (lbp, stream)
6278      linebuffer *lbp;
6279      FILE *stream;
6280 {
6281   long result;
6282
6283   linecharno = charno;          /* update global char number of line start */
6284   result = readline_internal (lbp, stream); /* read line */
6285   lineno += 1;                  /* increment global line number */
6286   charno += result;             /* increment global char number */
6287
6288   /* Honour #line directives. */
6289   if (!no_line_directive)
6290     {
6291       static bool discard_until_line_directive;
6292
6293       /* Check whether this is a #line directive. */
6294       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6295         {
6296           int start, lno;
6297
6298           if (DEBUG) start = 0; /* shut up the compiler */
6299           if (sscanf (lbp->buffer, "#line %d \"%n", &lno, &start) == 1)
6300             {
6301               char *endp = lbp->buffer + start;
6302
6303               assert (start > 0);
6304               while ((endp = etags_strchr (endp, '"')) != NULL
6305                      && endp[-1] == '\\')
6306                 endp++;
6307               if (endp != NULL)
6308                 /* Ok, this is a real #line directive.  Let's deal with it. */
6309                 {
6310                   char *taggedabsname;  /* absolute name of original file */
6311                   char *taggedfname;    /* name of original file as given */
6312                   char *name;           /* temp var */
6313
6314                   discard_until_line_directive = FALSE; /* found it */
6315                   name = lbp->buffer + start;
6316                   *endp = '\0';
6317                   canonicalize_filename (name); /* for DOS */
6318                   taggedabsname = absolute_filename (name, curfdp->infabsdir);
6319                   if (filename_is_absolute (name)
6320                       || filename_is_absolute (curfdp->infname))
6321                     taggedfname = savestr (taggedabsname);
6322                   else
6323                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6324
6325                   if (streq (curfdp->taggedfname, taggedfname))
6326                     /* The #line directive is only a line number change.  We
6327                        deal with this afterwards. */
6328                     free (taggedfname);
6329                   else
6330                     /* The tags following this #line directive should be
6331                        attributed to taggedfname.  In order to do this, set
6332                        curfdp accordingly. */
6333                     {
6334                       fdesc *fdp; /* file description pointer */
6335
6336                       /* Go look for a file description already set up for the
6337                          file indicated in the #line directive.  If there is
6338                          one, use it from now until the next #line
6339                          directive. */
6340                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6341                         if (streq (fdp->infname, curfdp->infname)
6342                             && streq (fdp->taggedfname, taggedfname))
6343                           /* If we remove the second test above (after the &&)
6344                              then all entries pertaining to the same file are
6345                              coalesced in the tags file.  If we use it, then
6346                              entries pertaining to the same file but generated
6347                              from different files (via #line directives) will
6348                              go into separate sections in the tags file.  These
6349                              alternatives look equivalent.  The first one
6350                              destroys some apparently useless information. */
6351                           {
6352                             curfdp = fdp;
6353                             free (taggedfname);
6354                             break;
6355                           }
6356                       /* Else, if we already tagged the real file, skip all
6357                          input lines until the next #line directive. */
6358                       if (fdp == NULL) /* not found */
6359                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6360                           if (streq (fdp->infabsname, taggedabsname))
6361                             {
6362                               discard_until_line_directive = TRUE;
6363                               free (taggedfname);
6364                               break;
6365                             }
6366                       /* Else create a new file description and use that from
6367                          now on, until the next #line directive. */
6368                       if (fdp == NULL) /* not found */
6369                         {
6370                           fdp = fdhead;
6371                           fdhead = xnew (1, fdesc);
6372                           *fdhead = *curfdp; /* copy curr. file description */
6373                           fdhead->next = fdp;
6374                           fdhead->infname = savestr (curfdp->infname);
6375                           fdhead->infabsname = savestr (curfdp->infabsname);
6376                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6377                           fdhead->taggedfname = taggedfname;
6378                           fdhead->usecharno = FALSE;
6379                           fdhead->prop = NULL;
6380                           fdhead->written = FALSE;
6381                           curfdp = fdhead;
6382                         }
6383                     }
6384                   free (taggedabsname);
6385                   lineno = lno - 1;
6386                   readline (lbp, stream);
6387                   return;
6388                 } /* if a real #line directive */
6389             } /* if #line is followed by a a number */
6390         } /* if line begins with "#line " */
6391
6392       /* If we are here, no #line directive was found. */
6393       if (discard_until_line_directive)
6394         {
6395           if (result > 0)
6396             {
6397               /* Do a tail recursion on ourselves, thus discarding the contents
6398                  of the line buffer. */
6399               readline (lbp, stream);
6400               return;
6401             }
6402           /* End of file. */
6403           discard_until_line_directive = FALSE;
6404           return;
6405         }
6406     } /* if #line directives should be considered */
6407
6408 #ifdef ETAGS_REGEXPS
6409   {
6410     int match;
6411     regexp *rp;
6412     char *name;
6413
6414     /* Match against relevant regexps. */
6415     if (lbp->len > 0)
6416       for (rp = p_head; rp != NULL; rp = rp->p_next)
6417         {
6418           /* Only use generic regexps or those for the current language.
6419              Also do not use multiline regexps, which is the job of
6420              regex_tag_multiline. */
6421           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6422               || rp->multi_line)
6423             continue;
6424
6425           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6426           switch (match)
6427             {
6428             case -2:
6429               /* Some error. */
6430               if (!rp->error_signaled)
6431                 {
6432                   error ("regexp stack overflow while matching \"%s\"",
6433                          rp->pattern);
6434                   rp->error_signaled = TRUE;
6435                 }
6436               break;
6437             case -1:
6438               /* No match. */
6439               break;
6440             case 0:
6441               /* Empty string matched. */
6442               if (!rp->error_signaled)
6443                 {
6444                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6445                   rp->error_signaled = TRUE;
6446                 }
6447               break;
6448             default:
6449               /* Match occurred.  Construct a tag. */
6450               name = rp->name;
6451               if (name[0] == '\0')
6452                 name = NULL;
6453               else /* make a named tag */
6454                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6455               if (rp->force_explicit_name)
6456                 /* Force explicit tag name, if a name is there. */
6457                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6458               else
6459                 make_tag (name, strlen (name), TRUE,
6460                           lbp->buffer, match, lineno, linecharno);
6461               break;
6462             }
6463         }
6464   }
6465 #endif /* ETAGS_REGEXPS */
6466 }
6467
6468 \f
6469 /*
6470  * Return a pointer to a space of size strlen(cp)+1 allocated
6471  * with xnew where the string CP has been copied.
6472  */
6473 static char *
6474 savestr (cp)
6475      char *cp;
6476 {
6477   return savenstr (cp, strlen (cp));
6478 }
6479
6480 /*
6481  * Return a pointer to a space of size LEN+1 allocated with xnew where
6482  * the string CP has been copied for at most the first LEN characters.
6483  */
6484 static char *
6485 savenstr (cp, len)
6486      char *cp;
6487      int len;
6488 {
6489   register char *dp;
6490
6491   dp = xnew (len + 1, char);
6492   strncpy (dp, cp, len);
6493   dp[len] = '\0';
6494   return dp;
6495 }
6496
6497 /*
6498  * Return the ptr in sp at which the character c last
6499  * appears; NULL if not found
6500  *
6501  * Identical to POSIX strrchr, included for portability.
6502  */
6503 static char *
6504 etags_strrchr (sp, c)
6505      register const char *sp;
6506      register int c;
6507 {
6508   register const char *r;
6509
6510   r = NULL;
6511   do
6512     {
6513       if (*sp == c)
6514         r = sp;
6515   } while (*sp++);
6516   return (char *)r;
6517 }
6518
6519 /*
6520  * Return the ptr in sp at which the character c first
6521  * appears; NULL if not found
6522  *
6523  * Identical to POSIX strchr, included for portability.
6524  */
6525 static char *
6526 etags_strchr (sp, c)
6527      register const char *sp;
6528      register int c;
6529 {
6530   do
6531     {
6532       if (*sp == c)
6533         return (char *)sp;
6534     } while (*sp++);
6535   return NULL;
6536 }
6537
6538 /*
6539  * Compare two strings, ignoring case for alphabetic characters.
6540  *
6541  * Same as BSD's strcasecmp, included for portability.
6542  */
6543 static int
6544 etags_strcasecmp (s1, s2)
6545      register const char *s1;
6546      register const char *s2;
6547 {
6548   while (*s1 != '\0'
6549          && (ISALPHA (*s1) && ISALPHA (*s2)
6550              ? lowcase (*s1) == lowcase (*s2)
6551              : *s1 == *s2))
6552     s1++, s2++;
6553
6554   return (ISALPHA (*s1) && ISALPHA (*s2)
6555           ? lowcase (*s1) - lowcase (*s2)
6556           : *s1 - *s2);
6557 }
6558
6559 /*
6560  * Compare two strings, ignoring case for alphabetic characters.
6561  * Stop after a given number of characters
6562  *
6563  * Same as BSD's strncasecmp, included for portability.
6564  */
6565 static int
6566 etags_strncasecmp (s1, s2, n)
6567      register const char *s1;
6568      register const char *s2;
6569      register int n;
6570 {
6571   while (*s1 != '\0' && n-- > 0
6572          && (ISALPHA (*s1) && ISALPHA (*s2)
6573              ? lowcase (*s1) == lowcase (*s2)
6574              : *s1 == *s2))
6575     s1++, s2++;
6576
6577   if (n < 0)
6578     return 0;
6579   else
6580     return (ISALPHA (*s1) && ISALPHA (*s2)
6581             ? lowcase (*s1) - lowcase (*s2)
6582             : *s1 - *s2);
6583 }
6584
6585 /* Skip spaces (end of string is not space), return new pointer. */
6586 static char *
6587 skip_spaces (cp)
6588      char *cp;
6589 {
6590   while (iswhite (*cp))
6591     cp++;
6592   return cp;
6593 }
6594
6595 /* Skip non spaces, except end of string, return new pointer. */
6596 static char *
6597 skip_non_spaces (cp)
6598      char *cp;
6599 {
6600   while (*cp != '\0' && !iswhite (*cp))
6601     cp++;
6602   return cp;
6603 }
6604
6605 /* Print error message and exit.  */
6606 void
6607 fatal (s1, s2)
6608      char *s1, *s2;
6609 {
6610   error (s1, s2);
6611   exit (EXIT_FAILURE);
6612 }
6613
6614 static void
6615 pfatal (s1)
6616      char *s1;
6617 {
6618   perror (s1);
6619   exit (EXIT_FAILURE);
6620 }
6621
6622 static void
6623 suggest_asking_for_help ()
6624 {
6625   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6626            progname, LONG_OPTIONS ? "--help" : "-h");
6627   exit (EXIT_FAILURE);
6628 }
6629
6630 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6631 static void
6632 error (s1, s2)
6633      const char *s1, *s2;
6634 {
6635   fprintf (stderr, "%s: ", progname);
6636   fprintf (stderr, s1, s2);
6637   fprintf (stderr, "\n");
6638 }
6639
6640 /* Return a newly-allocated string whose contents
6641    concatenate those of s1, s2, s3.  */
6642 static char *
6643 concat (s1, s2, s3)
6644      char *s1, *s2, *s3;
6645 {
6646   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6647   char *result = xnew (len1 + len2 + len3 + 1, char);
6648
6649   strcpy (result, s1);
6650   strcpy (result + len1, s2);
6651   strcpy (result + len1 + len2, s3);
6652   result[len1 + len2 + len3] = '\0';
6653
6654   return result;
6655 }
6656
6657 \f
6658 /* Does the same work as the system V getcwd, but does not need to
6659    guess the buffer size in advance. */
6660 static char *
6661 etags_getcwd ()
6662 {
6663 #ifdef HAVE_GETCWD
6664   int bufsize = 200;
6665   char *path = xnew (bufsize, char);
6666
6667   while (getcwd (path, bufsize) == NULL)
6668     {
6669       if (errno != ERANGE)
6670         pfatal ("getcwd");
6671       bufsize *= 2;
6672       free (path);
6673       path = xnew (bufsize, char);
6674     }
6675
6676   canonicalize_filename (path);
6677   return path;
6678
6679 #else /* not HAVE_GETCWD */
6680 #if MSDOS
6681
6682   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6683
6684   getwd (path);
6685
6686   for (p = path; *p != '\0'; p++)
6687     if (*p == '\\')
6688       *p = '/';
6689     else
6690       *p = lowcase (*p);
6691
6692   return strdup (path);
6693 #else /* not MSDOS */
6694   linebuffer path;
6695   FILE *pipe;
6696
6697   linebuffer_init (&path);
6698   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6699   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6700     pfatal ("pwd");
6701   pclose (pipe);
6702
6703   return path.buffer;
6704 #endif /* not MSDOS */
6705 #endif /* not HAVE_GETCWD */
6706 }
6707
6708 /* Return a newly allocated string containing the file name of FILE
6709    relative to the absolute directory DIR (which should end with a slash). */
6710 static char *
6711 relative_filename (file, dir)
6712      char *file, *dir;
6713 {
6714   char *fp, *dp, *afn, *res;
6715   int i;
6716
6717   /* Find the common root of file and dir (with a trailing slash). */
6718   afn = absolute_filename (file, cwd);
6719   fp = afn;
6720   dp = dir;
6721   while (*fp++ == *dp++)
6722     continue;
6723   fp--, dp--;                   /* back to the first differing char */
6724 #ifdef DOS_NT
6725   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6726     return afn;
6727 #endif
6728   do                            /* look at the equal chars until '/' */
6729     fp--, dp--;
6730   while (*fp != '/');
6731
6732   /* Build a sequence of "../" strings for the resulting relative file name. */
6733   i = 0;
6734   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6735     i += 1;
6736   res = xnew (3*i + strlen (fp + 1) + 1, char);
6737   res[0] = '\0';
6738   while (i-- > 0)
6739     strcat (res, "../");
6740
6741   /* Add the file name relative to the common root of file and dir. */
6742   strcat (res, fp + 1);
6743   free (afn);
6744
6745   return res;
6746 }
6747
6748 /* Return a newly allocated string containing the absolute file name
6749    of FILE given DIR (which should end with a slash). */
6750 static char *
6751 absolute_filename (file, dir)
6752      char *file, *dir;
6753 {
6754   char *slashp, *cp, *res;
6755
6756   if (filename_is_absolute (file))
6757     res = savestr (file);
6758 #ifdef DOS_NT
6759   /* We don't support non-absolute file names with a drive
6760      letter, like `d:NAME' (it's too much hassle).  */
6761   else if (file[1] == ':')
6762     fatal ("%s: relative file names with drive letters not supported", file);
6763 #endif
6764   else
6765     res = concat (dir, file, "");
6766
6767   /* Delete the "/dirname/.." and "/." substrings. */
6768   slashp = etags_strchr (res, '/');
6769   while (slashp != NULL && slashp[0] != '\0')
6770     {
6771       if (slashp[1] == '.')
6772         {
6773           if (slashp[2] == '.'
6774               && (slashp[3] == '/' || slashp[3] == '\0'))
6775             {
6776               cp = slashp;
6777               do
6778                 cp--;
6779               while (cp >= res && !filename_is_absolute (cp));
6780               if (cp < res)
6781                 cp = slashp;    /* the absolute name begins with "/.." */
6782 #ifdef DOS_NT
6783               /* Under MSDOS and NT we get `d:/NAME' as absolute
6784                  file name, so the luser could say `d:/../NAME'.
6785                  We silently treat this as `d:/NAME'.  */
6786               else if (cp[0] != '/')
6787                 cp = slashp;
6788 #endif
6789               strcpy (cp, slashp + 3);
6790               slashp = cp;
6791               continue;
6792             }
6793           else if (slashp[2] == '/' || slashp[2] == '\0')
6794             {
6795               strcpy (slashp, slashp + 2);
6796               continue;
6797             }
6798         }
6799
6800       slashp = etags_strchr (slashp + 1, '/');
6801     }
6802
6803   if (res[0] == '\0')           /* just a safety net: should never happen */
6804     {
6805       free (res);
6806       return savestr ("/");
6807     }
6808   else
6809     return res;
6810 }
6811
6812 /* Return a newly allocated string containing the absolute
6813    file name of dir where FILE resides given DIR (which should
6814    end with a slash). */
6815 static char *
6816 absolute_dirname (file, dir)
6817      char *file, *dir;
6818 {
6819   char *slashp, *res;
6820   char save;
6821
6822   canonicalize_filename (file);
6823   slashp = etags_strrchr (file, '/');
6824   if (slashp == NULL)
6825     return savestr (dir);
6826   save = slashp[1];
6827   slashp[1] = '\0';
6828   res = absolute_filename (file, dir);
6829   slashp[1] = save;
6830
6831   return res;
6832 }
6833
6834 /* Whether the argument string is an absolute file name.  The argument
6835    string must have been canonicalized with canonicalize_filename. */
6836 static bool
6837 filename_is_absolute (fn)
6838      char *fn;
6839 {
6840   return (fn[0] == '/'
6841 #ifdef DOS_NT
6842           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6843 #endif
6844           );
6845 }
6846
6847 /* Translate backslashes into slashes.  Works in place. */
6848 static void
6849 canonicalize_filename (fn)
6850      register char *fn;
6851 {
6852 #ifdef DOS_NT
6853   /* Canonicalize drive letter case.  */
6854   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6855     fn[0] = upcase (fn[0]);
6856   /* Convert backslashes to slashes.  */
6857   for (; *fn != '\0'; fn++)
6858     if (*fn == '\\')
6859       *fn = '/';
6860 #else
6861   /* No action. */
6862   fn = NULL;                    /* shut up the compiler */
6863 #endif
6864 }
6865
6866 \f
6867 /* Initialize a linebuffer for use */
6868 static void
6869 linebuffer_init (lbp)
6870      linebuffer *lbp;
6871 {
6872   lbp->size = (DEBUG) ? 3 : 200;
6873   lbp->buffer = xnew (lbp->size, char);
6874   lbp->buffer[0] = '\0';
6875   lbp->len = 0;
6876 }
6877
6878 /* Set the minimum size of a string contained in a linebuffer. */
6879 static void
6880 linebuffer_setlen (lbp, toksize)
6881      linebuffer *lbp;
6882      int toksize;
6883 {
6884   while (lbp->size <= toksize)
6885     {
6886       lbp->size *= 2;
6887       xrnew (lbp->buffer, lbp->size, char);
6888     }
6889   lbp->len = toksize;
6890 }
6891
6892 /* Like malloc but get fatal error if memory is exhausted. */
6893 static PTR
6894 xmalloc (size)
6895      unsigned int size;
6896 {
6897   PTR result = (PTR) malloc (size);
6898   if (result == NULL)
6899     fatal ("virtual memory exhausted", (char *)NULL);
6900   return result;
6901 }
6902
6903 static PTR
6904 xrealloc (ptr, size)
6905      char *ptr;
6906      unsigned int size;
6907 {
6908   PTR result = (PTR) realloc (ptr, size);
6909   if (result == NULL)
6910     fatal ("virtual memory exhausted", (char *)NULL);
6911   return result;
6912 }
6913
6914 /*
6915  * Local Variables:
6916  * indent-tabs-mode: t
6917  * tab-width: 8
6918  * fill-column: 79
6919  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6920  * End:
6921  */
6922
6923 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6924    (do not change this comment) */
6925
6926 /* etags.c ends here */