X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/b2db879bf58809ab78845c7f8932d58041a1ff67..71cbb89543ea21a9edea72cb24d2df6e714b60db:/lib-src/etags.c diff --git a/lib-src/etags.c b/lib-src/etags.c index 9a17863c45..e090207782 100644 --- a/lib-src/etags.c +++ b/lib-src/etags.c @@ -1,6 +1,7 @@ /* Tags file maker to go with GNU Emacs - Copyright (C) 1984, 87, 88, 89, 93, 94, 95 + Copyright (C) 1984, 87, 88, 89, 93, 94, 95, 98, 99 Free Software Foundation, Inc. and Ken Arnold + This file is not considered part of GNU Emacs. This program is free software; you can redistribute it and/or modify @@ -14,8 +15,8 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +along with this program; if not, write to the Free Software Foundation, +Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /* * Authors: @@ -25,45 +26,90 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ * Gnu Emacs TAGS format and modifications by RMS? * Sam Kendall added C++. * Francesco Potorti` reorganised C and C++ based on work by Joe Wells. -#ifdef ETAGS_REGEXPS * Regexp tags by Tom Tromey. -#endif * - * Francesco Potorti` (pot@cnuce.cnr.it) is the current maintainer. + * Francesco Potorti` (pot@gnu.org) is the current maintainer. */ -char pot_etags_version[] = "@(#) pot revision number is 11.30"; +char pot_etags_version[] = "@(#) pot revision number is 13.44"; #define TRUE 1 #define FALSE 0 + #ifndef DEBUG # define DEBUG FALSE #endif -#ifdef MSDOS -#include -#include -#endif /* MSDOS */ - -#ifdef WINDOWSNT -#include -#include -#include -#define MAXPATHLEN _MAX_PATH +#if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C)) +# define P_(proto) proto +#else +# define P_(proto) () #endif #ifdef HAVE_CONFIG_H -#include -/* On some systems, Emacs defines static as nothing for the sake - of unexec. We don't want that here since we don't use unexec. */ -#undef static +# include + /* On some systems, Emacs defines static as nothing for the sake + of unexec. We don't want that here since we don't use unexec. */ +# undef static +# define ETAGS_REGEXPS /* use the regexp features */ +# define LONG_OPTIONS /* accept long options */ +#endif /* HAVE_CONFIG_H */ + +#ifndef _GNU_SOURCE +# define _GNU_SOURCE 1 /* enables some compiler checks on GNU */ #endif +#ifdef MSDOS +# undef MSDOS +# define MSDOS TRUE +# include +# include +# include +# ifndef HAVE_CONFIG_H +# define DOS_NT +# include +# endif +#else +# define MSDOS FALSE +#endif /* MSDOS */ + +#ifdef WINDOWSNT +# include +# include +# include +# include +# include +# define MAXPATHLEN _MAX_PATH +# ifdef HAVE_CONFIG_H +# undef HAVE_NTGUI +# else +# define DOS_NT +# endif /* not HAVE_CONFIG_H */ +# ifndef HAVE_GETCWD +# define HAVE_GETCWD +# endif /* undef HAVE_GETCWD */ +#else /* !WINDOWSNT */ +# ifdef STDC_HEADERS +# include +# include +# else + extern char *getenv (); +# endif +#endif /* !WINDOWSNT */ + +#ifdef HAVE_UNISTD_H +# include +#else +# if defined (HAVE_GETCWD) && !WINDOWSNT + extern char *getcwd (char *buf, size_t size); +# endif +#endif /* HAVE_UNISTD_H */ + #include #include #include #ifndef errno -extern int errno; + extern int errno; #endif #include #include @@ -72,14 +118,20 @@ extern int errno; # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) #endif -#include +#ifdef LONG_OPTIONS +# include +#else +# define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr) + extern char *optarg; + extern int optind, opterr; +#endif /* LONG_OPTIONS */ #ifdef ETAGS_REGEXPS -#include +# include #endif /* ETAGS_REGEXPS */ /* Define CTAGS to make the program "ctags" compatible with the usual one. - Let it undefined to make the program "etags", which makes emacs-style + Leave it undefined to make the program "etags", which makes emacs-style tag tables and tags typedefs, #defines and struct/union/enum by default. */ #ifdef CTAGS # undef CTAGS @@ -90,392 +142,510 @@ extern int errno; /* Exit codes for success and failure. */ #ifdef VMS -#define GOOD 1 -#define BAD 0 +# define GOOD 1 +# define BAD 0 #else -#define GOOD 0 -#define BAD 1 +# define GOOD 0 +# define BAD 1 #endif /* C extensions. */ #define C_PLPL 0x00001 /* C++ */ #define C_STAR 0x00003 /* C* */ +#define C_JAVA 0x00005 /* JAVA */ #define YACC 0x10000 /* yacc file */ -#define streq(s,t) (strcmp (s, t) == 0) -#define strneq(s,t,n) (strncmp (s, t, n) == 0) +#define streq(s,t) ((DEBUG && (s) == NULL && (t) == NULL \ + && (abort (), 1)) || !strcmp (s, t)) +#define strneq(s,t,n) ((DEBUG && (s) == NULL && (t) == NULL \ + && (abort (), 1)) || !strncmp (s, t, n)) -#define iswhite(arg) (_wht[arg]) /* T if char is white */ -#define begtoken(arg) (_btk[arg]) /* T if char can start token */ -#define intoken(arg) (_itk[arg]) /* T if char can be in token */ -#define endtoken(arg) (_etk[arg]) /* T if char ends tokens */ +#define lowcase(c) tolower ((char)c) -#ifdef DOS_NT -# define absolutefn(fn) (fn[0] == '/' || (isalpha (fn[0]) && fn[1] == ':')) -#else -# define absolutefn(fn) (fn[0] == '/') -#endif +#define CHARS 256 /* 2^sizeof(char) */ +#define CHAR(x) ((unsigned int)x & (CHARS - 1)) +#define iswhite(c) (_wht[CHAR(c)]) /* c is white */ +#define notinname(c) (_nin[CHAR(c)]) /* c is not in a name */ +#define begtoken(c) (_btk[CHAR(c)]) /* c can start token */ +#define intoken(c) (_itk[CHAR(c)]) /* c can be in token */ +#define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens */ /* - * xnew -- allocate storage + * xnew, xrnew -- allocate, reallocate storage * * SYNOPSIS: Type *xnew (int n, Type); + * Type *xrnew (OldPointer, int n, Type); */ -#define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type))) +#ifdef chkmalloc +# include "chkmalloc.h" +# define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \ + (n) * sizeof (Type))) +# define xrnew(op,n,Type) ((Type *) trace_realloc (__FILE__, __LINE__, \ + (op), (n) * sizeof (Type))) +#else +# define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type))) +# define xrnew(op,n,Type) ((Type *) xrealloc ((op), (n) * sizeof (Type))) +#endif + +typedef int bool; + +typedef void Lang_function P_((FILE *)); -typedef int logical; +typedef struct +{ + char *suffix; + char *command; /* Takes one arg and decompresses to stdout */ +} compressor; + +typedef struct +{ + char *name; + Lang_function *function; + char **suffixes; + char **interpreters; +} language; -typedef struct nd_st -{ /* sorting structure */ +typedef struct node_st +{ /* sorting structure */ char *name; /* function or type name */ char *file; /* file name */ - logical is_func; /* use pattern or line no */ - logical named; /* list name separately */ - logical been_warned; /* set if noticed dup */ + bool is_func; /* use pattern or line no */ + bool been_warned; /* set if noticed dup */ int lno; /* line number tag is on */ long cno; /* character number line starts on */ char *pat; /* search pattern */ - struct nd_st *left, *right; /* left and right sons */ -} NODE; - -extern char *getenv (); - -char *concat (); -char *savenstr (), *savestr (); -char *etags_strchr (), *etags_strrchr (); -char *etags_getcwd (); -char *relative_filename (), *absolute_filename (), *absolute_dirname (); -long *xmalloc (), *xrealloc (); - -typedef void Lang_function (); -#if FALSE /* many compilers barf on this */ -Lang_function Asm_labels; -Lang_function default_C_entries; -Lang_function C_entries; -Lang_function Cplusplus_entries; -Lang_function Cstar_entries; -Lang_function Fortran_functions; -Lang_function Yacc_entries; -Lang_function Lisp_functions; -Lang_function Pascal_functions; -Lang_function Prolog_functions; -Lang_function Scheme_functions; -Lang_function TeX_functions; -Lang_function just_read_file; -#else /* so let's write it this way */ -void Asm_labels (); -void default_C_entries (); -void C_entries (); -void Cplusplus_entries (); -void Cstar_entries (); -void Fortran_functions (); -void Yacc_entries (); -void Lisp_functions (); -void Pascal_functions (); -void Prolog_functions (); -void Scheme_functions (); -void TeX_functions (); -void just_read_file (); -#endif + struct node_st *left, *right; /* left and right sons */ +} node; + +/* + * A `linebuffer' is a structure which holds a line of text. + * `readline_internal' reads a line from a stream into a linebuffer + * and works regardless of the length of the line. + * SIZE is the size of BUFFER, LEN is the length of the string in + * BUFFER after readline reads it. + */ +typedef struct +{ + long size; + int len; + char *buffer; +} linebuffer; + +/* Many compilers barf on this: + Lang_function Ada_funcs; + so let's write it this way */ +static void Ada_funcs P_((FILE *)); +static void Asm_labels P_((FILE *)); +static void C_entries P_((int c_ext, FILE *)); +static void default_C_entries P_((FILE *)); +static void plain_C_entries P_((FILE *)); +static void Cjava_entries P_((FILE *)); +static void Cobol_paragraphs P_((FILE *)); +static void Cplusplus_entries P_((FILE *)); +static void Cstar_entries P_((FILE *)); +static void Erlang_functions P_((FILE *)); +static void Fortran_functions P_((FILE *)); +static void Yacc_entries P_((FILE *)); +static void Lisp_functions P_((FILE *)); +static void Pascal_functions P_((FILE *)); +static void Perl_functions P_((FILE *)); +static void Postscript_functions P_((FILE *)); +static void Prolog_functions P_((FILE *)); +static void Python_functions P_((FILE *)); +static void Scheme_functions P_((FILE *)); +static void TeX_functions P_((FILE *)); +static void just_read_file P_((FILE *)); + +static void print_language_names P_((void)); +static void print_version P_((void)); +static void print_help P_((void)); +int main P_((int, char **)); +static int number_len P_((long)); + +static compressor *get_compressor_from_suffix P_((char *, char **)); +static language *get_language_from_name P_((char *)); +static language *get_language_from_interpreter P_((char *)); +static language *get_language_from_suffix P_((char *)); +static int total_size_of_entries P_((node *)); +static long readline P_((linebuffer *, FILE *)); +static long readline_internal P_((linebuffer *, FILE *)); +static void get_tag P_((char *)); -logical get_language (); -int total_size_of_entries (); -long readline (); -long readline_internal (); #ifdef ETAGS_REGEXPS -void add_regex (); -#endif -void add_node (); -void error (); -void fatal (), pfatal (); -void find_entries (); -void free_tree (); -void getit (); -void init (); -void initbuffer (); -void pfnote (); -void process_file (); -void put_entries (); -void takeprec (); +static void analyse_regex P_((char *, bool)); +static void add_regex P_((char *, bool, language *)); +static void free_patterns P_((void)); +#endif /* ETAGS_REGEXPS */ +static void error P_((const char *, const char *)); +static void suggest_asking_for_help P_((void)); +static void fatal P_((char *, char *)); +static void pfatal P_((char *)); +static void add_node P_((node *, node **)); + +static void init P_((void)); +static void initbuffer P_((linebuffer *)); +static void find_entries P_((char *, FILE *)); +static void free_tree P_((node *)); +static void pfnote P_((char *, bool, char *, int, int, long)); +static void new_pfnote P_((char *, int, bool, char *, int, int, long)); +static void process_file P_((char *)); +static void put_entries P_((node *)); +static void takeprec P_((void)); + +static char *concat P_((char *, char *, char *)); +static char *skip_spaces P_((char *)); +static char *skip_non_spaces P_((char *)); +static char *savenstr P_((char *, int)); +static char *savestr P_((char *)); +static char *etags_strchr P_((const char *, int)); +static char *etags_strrchr P_((const char *, int)); +static char *etags_getcwd P_((void)); +static char *relative_filename P_((char *, char *)); +static char *absolute_filename P_((char *, char *)); +static char *absolute_dirname P_((char *, char *)); +static bool filename_is_absolute P_((char *f)); +static void canonicalize_filename P_((char *)); +static void grow_linebuffer P_((linebuffer *, int)); +static long *xmalloc P_((unsigned int)); +static long *xrealloc P_((char *, unsigned int)); char searchar = '/'; /* use /.../ searches */ -int lineno; /* line number of current line */ -long charno; /* current character number */ - -long linecharno; /* charno of start of line; not used by C, - but by every other language. */ - -char *curfile; /* current input file name */ char *tagfile; /* output file */ char *progname; /* name this program was invoked with */ char *cwd; /* current working directory */ char *tagfiledir; /* directory of tagfile */ - FILE *tagf; /* ioptr for tags file */ -NODE *head; /* the head of the binary tree of tags */ -/* - * A `struct linebuffer' is a structure which holds a line of text. - * `readline' reads a line from a stream into a linebuffer and works - * regardless of the length of the line. - */ -struct linebuffer -{ - long size; - char *buffer; -}; +char *curfile; /* current input file name */ +language *curlang; /* current language */ + +int lineno; /* line number of current line */ +long charno; /* current character number */ +long linecharno; /* charno of start of current line */ +char *dbp; /* pointer to start of current tag */ -struct linebuffer lb; /* the current line */ -struct linebuffer token_name; /* used by C_entries as temporary area */ +node *head; /* the head of the binary tree of tags */ + +linebuffer lb; /* the current line */ +linebuffer token_name; /* used by C_entries as a temporary area */ struct { long linepos; - struct linebuffer lb; /* used by C_entries instead of lb */ + linebuffer lb; /* used by C_entries instead of lb */ } lbs[2]; /* boolean "functions" (see init) */ -logical _wht[0177], _etk[0177], _itk[0177], _btk[0177]; +bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS]; char - *white = " \f\t\n\013", /* white chars */ - *endtk = " \t\n\013\"'#()[]{}=-+%*/&|^~!<>;,.:?", /* token ending chars */ - /* token starting chars */ - *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~", - /* valid in-token chars */ - *intk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789"; - -logical append_to_tagfile; /* -a: append to tags */ -/* The following three default to TRUE for etags, but to FALSE for ctags. */ -logical typedefs; /* -t: create tags for typedefs */ -logical typedefs_and_cplusplus; /* -T: create tags for typedefs, level */ + /* white chars */ + *white = " \f\t\n\r\v", + /* not in a name */ + *nonam = " \f\t\n\r(=,[;", + /* token ending chars */ + *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?", + /* token starting chars */ + *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@", + /* valid in-token chars */ + *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789"; + +bool append_to_tagfile; /* -a: append to tags */ +/* The following four default to TRUE for etags, but to FALSE for ctags. */ +bool typedefs; /* -t: create tags for C and Ada typedefs */ +bool typedefs_and_cplusplus; /* -T: create tags for C typedefs, level */ /* 0 struct/enum/union decls, and C++ */ /* member functions. */ -logical constantypedefs; /* -d: create tags for C #define and enum */ - /* constants. Enum consts not implemented. */ +bool constantypedefs; /* -d: create tags for C #define, enum */ + /* constants and variables. */ /* -D: opposite of -d. Default under ctags. */ -logical update; /* -u: update tags */ -logical vgrind_style; /* -v: create vgrind style index output */ -logical no_warnings; /* -w: suppress warnings */ -logical cxref_style; /* -x: create cxref style output */ -logical cplusplus; /* .[hc] means C++, not C */ -logical noindentypedefs; /* -I: ignore indentation in C */ -#define permit_duplicates TRUE /* allow duplicate tags */ - +bool declarations; /* --declarations: tag them and extern in C&Co*/ +bool globals; /* create tags for global variables */ +bool members; /* create tags for C member variables */ +bool update; /* -u: update tags */ +bool vgrind_style; /* -v: create vgrind style index output */ +bool no_warnings; /* -w: suppress warnings */ +bool cxref_style; /* -x: create cxref style output */ +bool cplusplus; /* .[hc] means C++, not C */ +bool noindentypedefs; /* -I: ignore indentation in C */ +bool packages_only; /* --packages-only: in Ada, only tag packages*/ + +#ifdef LONG_OPTIONS struct option longopts[] = { - { "append", no_argument, NULL, 'a' }, - { "backward-search", no_argument, NULL, 'B' }, - { "c++", no_argument, NULL, 'C' }, - { "cxref", no_argument, NULL, 'x' }, - { "defines", no_argument, NULL, 'd' }, - { "help", no_argument, NULL, 'h' }, - { "help", no_argument, NULL, 'H' }, - { "ignore-indentation", no_argument, NULL, 'I' }, - { "include", required_argument, NULL, 'i' }, - { "language", required_argument, NULL, 'l' }, - { "no-defines", no_argument, NULL, 'D' }, - { "no-regex", no_argument, NULL, 'R' }, - { "no-warn", no_argument, NULL, 'w' }, - { "output", required_argument, NULL, 'o' }, - { "regex", required_argument, NULL, 'r' }, - { "typedefs", no_argument, NULL, 't' }, - { "typedefs-and-c++", no_argument, NULL, 'T' }, - { "update", no_argument, NULL, 'u' }, - { "version", no_argument, NULL, 'V' }, - { "vgrind", no_argument, NULL, 'v' }, - { 0 } + { "packages-only", no_argument, &packages_only, TRUE }, + { "append", no_argument, NULL, 'a' }, + { "backward-search", no_argument, NULL, 'B' }, + { "c++", no_argument, NULL, 'C' }, + { "cxref", no_argument, NULL, 'x' }, + { "defines", no_argument, NULL, 'd' }, + { "declarations", no_argument, &declarations, TRUE }, + { "no-defines", no_argument, NULL, 'D' }, + { "globals", no_argument, &globals, TRUE }, + { "no-globals", no_argument, &globals, FALSE }, + { "help", no_argument, NULL, 'h' }, + { "help", no_argument, NULL, 'H' }, + { "ignore-indentation", no_argument, NULL, 'I' }, + { "include", required_argument, NULL, 'i' }, + { "language", required_argument, NULL, 'l' }, + { "members", no_argument, &members, TRUE }, + { "no-members", no_argument, &members, FALSE }, + { "no-warn", no_argument, NULL, 'w' }, + { "output", required_argument, NULL, 'o' }, +#ifdef ETAGS_REGEXPS + { "regex", required_argument, NULL, 'r' }, + { "no-regex", no_argument, NULL, 'R' }, + { "ignore-case-regex", required_argument, NULL, 'c' }, +#endif /* ETAGS_REGEXPS */ + { "typedefs", no_argument, NULL, 't' }, + { "typedefs-and-c++", no_argument, NULL, 'T' }, + { "update", no_argument, NULL, 'u' }, + { "version", no_argument, NULL, 'V' }, + { "vgrind", no_argument, NULL, 'v' }, + { NULL } }; +#endif /* LONG_OPTIONS */ #ifdef ETAGS_REGEXPS -/* Structure defining a regular expression. Elements are +/* Structure defining a regular expression. Elements are the compiled pattern, and the name string. */ -struct pattern +typedef struct pattern { + struct pattern *p_next; + language *language; + char *regex; struct re_pattern_buffer *pattern; struct re_registers regs; char *name_pattern; - logical error_signaled; -}; + bool error_signaled; +} pattern; -/* Number of regexps found. */ -int num_patterns = 0; +/* List of all regexps. */ +pattern *p_head = NULL; -/* Array of all regexps. */ -struct pattern *patterns = NULL; +/* How many characters in the character set. (From regex.c.) */ +#define CHAR_SET_SIZE 256 +/* Translation table for case-insensitive matching. */ +char lc_trans[CHAR_SET_SIZE]; #endif /* ETAGS_REGEXPS */ -/* Language stuff. */ -struct lang_entry +compressor compressors[] = { - char *extension; - Lang_function *function; + { "z", "gzip -d -c"}, + { "Z", "gzip -d -c"}, + { "gz", "gzip -d -c"}, + { "GZ", "gzip -d -c"}, + { "bz2", "bzip2 -d -c" }, + { NULL } }; -/* Table of language names and corresponding functions. */ -/* It is ok for a given function to be listed under more than one - name. I just didn't. */ -/* "auto" language reverts to default behavior. */ -struct lang_entry lang_names[] = -{ - { "asm", Asm_labels }, - { "c", default_C_entries }, - { "c++", Cplusplus_entries }, - { "c*", Cstar_entries }, - { "fortran", Fortran_functions }, - { "lisp", Lisp_functions }, - { "none", just_read_file }, - { "pascal", Pascal_functions }, - { "scheme" , Scheme_functions }, - { "tex", TeX_functions }, - { "auto", NULL }, - { NULL, NULL } -}; - -/* Table of file extensions and corresponding language functions. */ -struct lang_entry lang_extensions[] = -{ - /* Assume that ".s" or ".a" is assembly code. -wolfgang. - Or even ".sa". */ - { "a", Asm_labels }, /* Unix assembler */ - { "asm", Asm_labels }, /* Microcontroller assembly */ - { "def", Asm_labels }, /* BSO/Tasking definition includes */ - { "inc", Asm_labels }, /* Microcontroller include files */ - { "ins", Asm_labels }, /* Microcontroller include files */ - { "s", Asm_labels }, - { "sa", Asm_labels }, /* Unix assembler */ - { "src", Asm_labels }, /* BSO/Tasking C compiler output */ - - /* .aux, .bbl, .clo, .cls, .dtx or .tex implies LaTeX source code. */ - { "aux", TeX_functions }, - { "bbl", TeX_functions }, - { "clo", TeX_functions }, - { "cls", TeX_functions }, - { "dtx", TeX_functions }, - { "sty", TeX_functions }, - { "tex", TeX_functions }, - - /* .l or .el or .lisp (or .cl or .clisp or ...) implies lisp source code */ - { "cl", Lisp_functions }, - { "clisp", Lisp_functions }, - { "el", Lisp_functions }, - { "l", Lisp_functions }, - { "lisp", Lisp_functions }, - { "lsp", Lisp_functions }, - - /* .scm or .sm or .scheme implies scheme source code */ - { "SCM", Scheme_functions }, - { "SM", Scheme_functions }, - { "oak", Scheme_functions }, - { "sch", Scheme_functions }, - { "scheme", Scheme_functions }, - { "scm", Scheme_functions }, - { "sm", Scheme_functions }, - { "t", Scheme_functions }, - /* FIXME Can't do the `SCM' or `scm' prefix with a version number */ - - /* Note that ".c" and ".h" can be considered C++, if the --c++ - flag was given. That is why default_C_entries is called here. */ - { "c", default_C_entries }, - { "h", default_C_entries }, - - /* .C or .H or .c++ or .cc or .cpp or .cxx or .h++ or .hh or .hxx: - a C++ file */ - { "C", Cplusplus_entries }, - { "H", Cplusplus_entries }, - { "c++", Cplusplus_entries }, - { "cc", Cplusplus_entries }, - { "cpp", Cplusplus_entries }, - { "cxx", Cplusplus_entries }, - { "h++", Cplusplus_entries }, - { "hh", Cplusplus_entries }, - { "hxx", Cplusplus_entries }, - - /* .y: a yacc file */ - { "y", Yacc_entries }, - - /* .cs or .hs: a C* file */ - { "cs", Cstar_entries }, - { "hs", Cstar_entries }, - - /* .F, .f and .for are FORTRAN. */ - { "F", Fortran_functions }, - { "f", Fortran_functions }, - { "for", Fortran_functions }, - - /* .pl implies prolog source code */ - { "pl", Prolog_functions }, - - /* .p or .pas: a Pascal file */ - { "p", Pascal_functions }, - { "pas", Pascal_functions }, - - { NULL, NULL } -}; +/* + * Language stuff. + */ /* Non-NULL if language fixed. */ -Lang_function *lang_func = NULL; +language *forced_lang = NULL; + +/* Ada code */ +char *Ada_suffixes [] = + { "ads", "adb", "ada", NULL }; + +/* Assembly code */ +char *Asm_suffixes [] = { "a", /* Unix assembler */ + "asm", /* Microcontroller assembly */ + "def", /* BSO/Tasking definition includes */ + "inc", /* Microcontroller include files */ + "ins", /* Microcontroller include files */ + "s", "sa", /* Unix assembler */ + "S", /* cpp-processed Unix assembler */ + "src", /* BSO/Tasking C compiler output */ + NULL + }; + +/* Note that .c and .h can be considered C++, if the --c++ flag was + given. That is why default_C_entries is called here. */ +char *default_C_suffixes [] = + { "c", "h", NULL }; + +char *Cplusplus_suffixes [] = + { "C", "H", "c++", "cc", "cpp", "cxx", "h++", "hh", "hpp", "hxx", + "M", /* Objective C++ */ + "pdb", /* Postscript with C syntax */ + NULL }; + +char *Cjava_suffixes [] = + { "java", NULL }; + +char *Cobol_suffixes [] = + { "COB", "cob", NULL }; + +char *Cstar_suffixes [] = + { "cs", "hs", NULL }; + +char *Erlang_suffixes [] = + { "erl", "hrl", NULL }; + +char *Fortran_suffixes [] = + { "F", "f", "f90", "for", NULL }; + +char *Lisp_suffixes [] = + { "cl", "clisp", "el", "l", "lisp", "lsp", "ml", NULL }; + +char *Pascal_suffixes [] = + { "p", "pas", NULL }; +char *Perl_suffixes [] = + { "pl", "pm", NULL }; +char *Perl_interpreters [] = + { "perl", "@PERL@", NULL }; + +char *plain_C_suffixes [] = + { "pc", /* Pro*C file */ + "m", /* Objective C file */ + "lm", /* Objective lex file */ + NULL }; + +char *Postscript_suffixes [] = + { "ps", "psw", NULL }; /* .psw is for PSWrap */ + +char *Prolog_suffixes [] = + { "prolog", NULL }; + +char *Python_suffixes [] = + { "py", NULL }; + +/* Can't do the `SCM' or `scm' prefix with a version number. */ +char *Scheme_suffixes [] = + { "SCM", "SM", "oak", "sch", "scheme", "scm", "sm", "ss", "t", NULL }; + +char *TeX_suffixes [] = + { "TeX", "bib", "clo", "cls", "ltx", "sty", "tex", NULL }; + +char *Yacc_suffixes [] = + { "y", "ym", "yy", "yxx", "y++", NULL }; /* .ym is Objective yacc file */ + +/* + * Table of languages. + * + * It is ok for a given function to be listed under more than one + * name. I just didn't. + */ + +language lang_names [] = +{ + { "ada", Ada_funcs, Ada_suffixes, NULL }, + { "asm", Asm_labels, Asm_suffixes, NULL }, + { "c", default_C_entries, default_C_suffixes, NULL }, + { "c++", Cplusplus_entries, Cplusplus_suffixes, NULL }, + { "c*", Cstar_entries, Cstar_suffixes, NULL }, + { "cobol", Cobol_paragraphs, Cobol_suffixes, NULL }, + { "erlang", Erlang_functions, Erlang_suffixes, NULL }, + { "fortran", Fortran_functions, Fortran_suffixes, NULL }, + { "java", Cjava_entries, Cjava_suffixes, NULL }, + { "lisp", Lisp_functions, Lisp_suffixes, NULL }, + { "pascal", Pascal_functions, Pascal_suffixes, NULL }, + { "perl", Perl_functions, Perl_suffixes, Perl_interpreters }, + { "postscript", Postscript_functions, Postscript_suffixes, NULL }, + { "proc", plain_C_entries, plain_C_suffixes, NULL }, + { "prolog", Prolog_functions, Prolog_suffixes, NULL }, + { "python", Python_functions, Python_suffixes, NULL }, + { "scheme", Scheme_functions, Scheme_suffixes, NULL }, + { "tex", TeX_functions, TeX_suffixes, NULL }, + { "yacc", Yacc_entries, Yacc_suffixes, NULL }, + { "auto", NULL }, /* default guessing scheme */ + { "none", just_read_file }, /* regexp matching only */ + { NULL, NULL } /* end of list */ +}; -void +static void print_language_names () { - struct lang_entry *name, *ext; + language *lang; + char **ext; puts ("\nThese are the currently supported languages, along with the\n\ -default extensions for files:"); - for (name = lang_names; name->extension; ++name) +default file name suffixes:"); + for (lang = lang_names; lang->name != NULL; lang++) { - printf ("\t%s\t", name->extension); - for (ext = lang_extensions; ext->extension; ++ext) - if (name->function == ext->function) - printf (" .%s", ext->extension); + printf ("\t%s\t", lang->name); + if (lang->suffixes != NULL) + for (ext = lang->suffixes; *ext != NULL; ext++) + printf (" .%s", *ext); puts (""); } - puts ("Where `auto' means use default language for files based on filename\n\ -extension, and `none' means only do regexp processing on files.\n\ -If no language is specified and no extension is found for some file,\n\ -Fortran is tried first; if no tags are found, C is tried next."); + puts ("Where `auto' means use default language for files based on file\n\ +name suffix, and `none' means only do regexp processing on files.\n\ +If no language is specified and no matching suffix is found,\n\ +the first line of the file is read for a sharp-bang (#!) sequence\n\ +followed by the name of an interpreter. If no such sequence is found,\n\ +Fortran is tried first; if no tags are found, C is tried next.\n\ +Compressed files are supported using gzip and bzip2."); } -void +#ifndef EMACS_NAME +# define EMACS_NAME "GNU Emacs" +#endif +#ifndef VERSION +# define VERSION "21" +#endif +static void print_version () { -#ifdef VERSION - printf ("%s for Emacs version %s.\n", (CTAGS) ? "CTAGS" : "ETAGS", VERSION); -#else - printf ("%s for Emacs version 19.\n", (CTAGS) ? "CTAGS" : "ETAGS"); -#endif + printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION); + puts ("Copyright (C) 1999 Free Software Foundation, Inc. and Ken Arnold"); + puts ("This program is distributed under the same terms as Emacs"); exit (GOOD); } -void +static void print_help () { - printf ("These are the options accepted by %s. You may use unambiguous\n\ -abbreviations for the long option names. A - as file name means read\n\ -names from stdin.\n\n", progname); + printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\ +\n\ +These are the options accepted by %s.\n", progname, progname); +#ifdef LONG_OPTIONS + puts ("You may use unambiguous abbreviations for the long option names."); +#else + puts ("Long option names do not work with this executable, as it is not\n\ +linked with GNU getopt."); +#endif /* LONG_OPTIONS */ + puts ("A - as file name means read names from stdin (one per line)."); + if (!CTAGS) + printf (" Absolute names are stored in the output file as they are.\n\ +Relative ones are stored relative to the output file's directory."); + puts ("\n"); puts ("-a, --append\n\ Append tag entries to existing tags file."); + puts ("--packages-only\n\ + For Ada files, only generate tags for packages ."); + if (CTAGS) puts ("-B, --backward-search\n\ Write the search commands for the tag entries using '?', the\n\ backward-search command instead of '/', the forward-search command."); puts ("-C, --c++\n\ - Treat files whose extension defaults to C language as C++ files."); + Treat files whose name suffix defaults to C language as C++ files."); + + puts ("--declarations\n\ + In C and derived languages, create tags for function declarations,"); + if (CTAGS) + puts ("\tand create tags for extern variables if --globals is used."); + else + puts + ("\tand create tags for extern variables unless --no-globals is used."); if (CTAGS) puts ("-d, --defines\n\ - Create tag entries for constant C #defines, too."); + Create tag entries for C #define constants and enum constants, too."); else puts ("-D, --no-defines\n\ - Don't create tag entries for constant C #defines. This makes\n\ - the tags file smaller."); + Don't create tag entries for C #define constants and enum constants.\n\ + This makes the tags file smaller."); if (!CTAGS) { @@ -488,13 +658,27 @@ names from stdin.\n\n", progname); named language up to the next --language=LANG option."); } + if (CTAGS) + puts ("--globals\n\ + Create tag entries for global variables in some languages."); + else + puts ("--no-globals\n\ + Do not create tag entries for global variables in some\n\ + languages. This makes the tags file smaller."); + puts ("--members\n\ + Create tag entries for member variables in C and derived languages."); + #ifdef ETAGS_REGEXPS - puts ("-r /REGEXP/, --regex=/REGEXP/\n\ - Make a tag for each line matching pattern REGEXP in the\n\ - following files. REGEXP is anchored (as if preceded by ^).\n\ - The form /REGEXP/NAME/ creates a named tag. For example Tcl\n\ - named tags can be created with:\n\ + puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\ + Make a tag for each line matching pattern REGEXP in the following\n\ + files. {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\ + regexfile is a file containing one REGEXP per line.\n\ + REGEXP is anchored (as if preceded by ^).\n\ + The form /REGEXP/NAME/ creates a named tag.\n\ + For example Tcl named tags can be created with:\n\ --regex=/proc[ \\t]+\\([^ \\t]+\\)/\\1/."); + puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\ + Like -r, --regex but ignore case when matching expressions."); puts ("-R, --no-regex\n\ Don't create tags from regexps for the following files."); #endif /* ETAGS_REGEXPS */ @@ -509,7 +693,7 @@ names from stdin.\n\n", progname); if (CTAGS) { puts ("-t, --typedefs\n\ - Generate tag entries for C typedefs."); + Generate tag entries for C and Ada typedefs."); puts ("-T, --typedefs-and-c++\n\ Generate tag entries for C typedefs, C struct/enum/union tags,\n\ and C++ member functions."); @@ -541,6 +725,9 @@ names from stdin.\n\n", progname); print_language_names (); + puts (""); + puts ("Report bugs to bug-gnu-emacs@gnu.org"); + exit (GOOD); } @@ -549,16 +736,17 @@ enum argument_type { at_language, at_regexp, - at_filename + at_filename, + at_icregexp }; -/* This structure helps us allow mixing of --lang and filenames. */ +/* This structure helps us allow mixing of --lang and file names. */ typedef struct { enum argument_type arg_type; char *what; - Lang_function *function; -} ARGUMENT; + language *lang; /* language of the regexp */ +} argument; #ifdef VMS /* VMS specific functions */ @@ -574,25 +762,25 @@ typedef struct { /* v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names - returning in each successive call the next filename matching the input + returning in each successive call the next file name matching the input spec. The function expects that each in_spec passed to it will be processed to completion; in particular, up to and including the call following that in which the last matching name is returned, the function ignores the value of in_spec, and will - only start processing a new spec with the following call. + only start processing a new spec with the following call. If an error occurs, on return out_spec contains the value of in_spec when the error occurred. - With each successive filename returned in out_spec, the + With each successive file name returned in out_spec, the function's return value is one. When there are no more matching names the function returns zero. If on the first call no file - matches in_spec, or there is any other error, -1 is returned. + matches in_spec, or there is any other error, -1 is returned. */ #include #include #define OUTSIZE MAX_FILE_SPEC_LEN -short +static short fn_exp (out, in) vspec *out; char *in; @@ -600,7 +788,7 @@ fn_exp (out, in) static long context = 0; static struct dsc$descriptor_s o; static struct dsc$descriptor_s i; - static logical pass1 = TRUE; + static bool pass1 = TRUE; long status; short retval; @@ -631,16 +819,16 @@ fn_exp (out, in) lib$find_file_end(&context); pass1 = TRUE; return retval; -} +} /* - v1.01 nmm 19-Aug-85 gfnames - return in successive calls the + v1.01 nmm 19-Aug-85 gfnames - return in successive calls the name of each file specified by the provided arg expanding wildcards. */ -char * +static char * gfnames (arg, p_error) char *arg; - logical *p_error; + bool *p_error; { static vspec filename = {MAX_FILE_SPEC_LEN, "\0"}; @@ -662,7 +850,7 @@ gfnames (arg, p_error) system (cmd) char *cmd; { - fprintf (stderr, "system() function not implemented under VMS\n"); + error ("%s", "system() function not implemented under VMS"); } #endif @@ -670,7 +858,7 @@ system (cmd) char *massage_name (s) char *s; { - char *start = s; + char *start = s; for ( ; *s; s++) if (*s == VERSION_DELIM) @@ -679,55 +867,78 @@ char *massage_name (s) break; } else - *s = tolower(*s); + *s = lowcase (*s); return start; } #endif /* VMS */ -void +int main (argc, argv) int argc; char *argv[]; { int i; - unsigned int nincluded_files = 0; - char **included_files = xnew (argc, char *); + unsigned int nincluded_files; + char **included_files; char *this_file; - ARGUMENT *argbuffer; - int current_arg = 0, file_count = 0; - struct linebuffer filename_lb; + argument *argbuffer; + int current_arg, file_count; + linebuffer filename_lb; #ifdef VMS - logical got_err; + bool got_err; #endif - + #ifdef DOS_NT _fmode = O_BINARY; /* all of files are treated as binary files */ #endif /* DOS_NT */ progname = argv[0]; + nincluded_files = 0; + included_files = xnew (argc, char *); + current_arg = 0; + file_count = 0; /* Allocate enough no matter what happens. Overkill, but each one is small. */ - argbuffer = xnew (argc, ARGUMENT); + argbuffer = xnew (argc, argument); #ifdef ETAGS_REGEXPS /* Set syntax for regular expression routines. */ - re_set_syntax (RE_SYNTAX_EMACS); + re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS); + /* Translation table for case-insensitive search. */ + for (i = 0; i < CHAR_SET_SIZE; i++) + lc_trans[i] = lowcase (i); #endif /* ETAGS_REGEXPS */ /* * If etags, always find typedefs and structure tags. Why not? - * Also default is to find macro constants. + * Also default is to find macro constants, enum constants and + * global variables. */ if (!CTAGS) - typedefs = typedefs_and_cplusplus = constantypedefs = TRUE; + { + typedefs = typedefs_and_cplusplus = constantypedefs = TRUE; + globals = TRUE; + members = FALSE; + } while (1) { - int opt = getopt_long (argc, argv, - "-aCdDf:Il:o:r:RStTi:BuvxwVhH", longopts, 0); + int opt; + char *optstring; + +#ifdef ETAGS_REGEXPS + optstring = "-aCdDf:Il:o:r:c:RStTi:BuvxwVhH"; +#else + optstring = "-aCdDf:Il:o:StTi:BuvxwVhH"; +#endif /* ETAGS_REGEXPS */ + +#ifndef LONG_OPTIONS + optstring = optstring + 1; +#endif /* LONG_OPTIONS */ + opt = getopt_long (argc, argv, optstring, longopts, 0); if (opt == EOF) break; @@ -739,7 +950,7 @@ main (argc, argv) break; case 1: - /* This means that a filename has been seen. Record it. */ + /* This means that a file name has been seen. Record it. */ argbuffer[current_arg].arg_type = at_filename; argbuffer[current_arg].what = optarg; ++current_arg; @@ -747,25 +958,16 @@ main (argc, argv) break; /* Common options. */ - case 'a': - append_to_tagfile = TRUE; - break; - case 'C': - cplusplus = TRUE; - break; - case 'd': - constantypedefs = TRUE; - break; - case 'D': - constantypedefs = FALSE; - break; + case 'a': append_to_tagfile = TRUE; break; + case 'C': cplusplus = TRUE; break; + case 'd': constantypedefs = TRUE; break; + case 'D': constantypedefs = FALSE; break; case 'f': /* for compatibility with old makefiles */ case 'o': if (tagfile) { - fprintf (stderr, "%s: -%c option may only be given once.\n", - progname, opt); - goto usage; + error ("-o option may only be given once.", (char *)NULL); + suggest_asking_for_help (); } tagfile = optarg; break; @@ -774,14 +976,15 @@ main (argc, argv) noindentypedefs = TRUE; break; case 'l': - if (!get_language (optarg, &argbuffer[current_arg].function)) - { - fprintf (stderr, "%s: language \"%s\" not recognized.\n", - progname, optarg); - goto usage; - } - argbuffer[current_arg].arg_type = at_language; - ++current_arg; + { + language *lang = get_language_from_name (optarg); + if (lang != NULL) + { + argbuffer[current_arg].lang = lang; + argbuffer[current_arg].arg_type = at_language; + ++current_arg; + } + } break; #ifdef ETAGS_REGEXPS case 'r': @@ -794,6 +997,11 @@ main (argc, argv) argbuffer[current_arg].what = NULL; ++current_arg; break; + case 'c': + argbuffer[current_arg].arg_type = at_icregexp; + argbuffer[current_arg].what = optarg; + ++current_arg; + break; #endif /* ETAGS_REGEXPS */ case 'V': print_version (); @@ -815,24 +1023,14 @@ main (argc, argv) break; #else /* CTAGS */ /* Ctags options. */ - case 'B': - searchar = '?'; - break; - case 'u': - update = TRUE; - break; - case 'v': - vgrind_style = TRUE; - /*FALLTHRU*/ - case 'x': - cxref_style = TRUE; - break; - case 'w': - no_warnings = TRUE; - break; + case 'B': searchar = '?'; break; + case 'u': update = TRUE; break; + case 'v': vgrind_style = TRUE; /*FALLTHRU*/ + case 'x': cxref_style = TRUE; break; + case 'w': no_warnings = TRUE; break; #endif /* CTAGS */ default: - goto usage; + suggest_asking_for_help (); } } @@ -846,28 +1044,23 @@ main (argc, argv) if (nincluded_files == 0 && file_count == 0) { - fprintf (stderr, "%s: No input files specified.\n", progname); - - usage: - fprintf (stderr, "\tTry `%s --help' for a complete list of options.\n", - progname); - exit (BAD); + error ("no input files specified.", (char *)NULL); + suggest_asking_for_help (); } if (tagfile == NULL) - { - tagfile = CTAGS ? "tags" : "TAGS"; - } + tagfile = CTAGS ? "tags" : "TAGS"; cwd = etags_getcwd (); /* the current working directory */ - strcat (cwd, "/"); - if (streq (tagfile, "-")) + if (cwd[strlen (cwd) - 1] != '/') { - tagfiledir = cwd; + char *oldcwd = cwd; + cwd = concat (oldcwd, "/", ""); + free (oldcwd); } + if (streq (tagfile, "-")) + tagfiledir = cwd; else - { - tagfiledir = absolute_dirname (tagfile, cwd); - } + tagfiledir = absolute_dirname (tagfile, cwd); init (); /* set up boolean "functions" */ @@ -880,7 +1073,15 @@ main (argc, argv) if (!CTAGS) { if (streq (tagfile, "-")) - tagf = stdout; + { + tagf = stdout; +#ifdef DOS_NT + /* Switch redirected `stdout' to binary mode (setting `_fmode' + doesn't take effect until after `stdout' is already open). */ + if (!isatty (fileno (stdout))) + setmode (fileno (stdout), O_BINARY); +#endif /* DOS_NT */ + } else tagf = fopen (tagfile, append_to_tagfile ? "a" : "w"); if (tagf == NULL) @@ -895,11 +1096,14 @@ main (argc, argv) switch (argbuffer[i].arg_type) { case at_language: - lang_func = argbuffer[i].function; + forced_lang = argbuffer[i].lang; break; #ifdef ETAGS_REGEXPS case at_regexp: - add_regex (argbuffer[i].what); + analyse_regex (argbuffer[i].what, FALSE); + break; + case at_icregexp: + analyse_regex (argbuffer[i].what, TRUE); break; #endif case at_filename: @@ -908,7 +1112,7 @@ main (argc, argv) { if (got_err) { - error ("Can't find file %s\n", this_file); + error ("can't find file %s\n", this_file); argc--, argv++; } else @@ -919,7 +1123,7 @@ main (argc, argv) this_file = argbuffer[i].what; #endif /* Input file named "-" means read file names from stdin - and use them. */ + (one per line) and use them. */ if (streq (this_file, "-")) while (readline_internal (&filename_lb, stdin) > 0) process_file (filename_lb.buffer); @@ -932,6 +1136,10 @@ main (argc, argv) } } +#ifdef ETAGS_REGEXPS + free_patterns (); +#endif /* ETAGS_REGEXPS */ + if (!CTAGS) { while (nincluded_files-- > 0) @@ -945,10 +1153,9 @@ main (argc, argv) because we want them ordered. Let's do it now. */ if (cxref_style) { - tagf = fopen (tagfile, append_to_tagfile ? "a" : "w"); - if (tagf == NULL) - pfatal (tagfile); put_entries (head); + free_tree (head); + head = NULL; exit (GOOD); } @@ -963,7 +1170,7 @@ main (argc, argv) "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS", tagfile, argbuffer[i].what, tagfile); if (system (cmd) != GOOD) - fatal ("failed to execute shell command"); + fatal ("failed to execute shell command", (char *)NULL); } append_to_tagfile = TRUE; } @@ -972,6 +1179,8 @@ main (argc, argv) if (tagf == NULL) pfatal (tagfile); put_entries (head); + free_tree (head); + head = NULL; fclose (tagf); if (update) @@ -980,76 +1189,263 @@ main (argc, argv) sprintf (cmd, "sort %s -o %s", tagfile, tagfile); exit (system (cmd)); } - exit (GOOD); + return GOOD; +} + + + +/* + * Return a compressor given the file name. If EXTPTR is non-zero, + * return a pointer into FILE where the compressor-specific + * extension begins. If no compressor is found, NULL is returned + * and EXTPTR is not significant. + * Idea by Vladimir Alexiev + */ +static compressor * +get_compressor_from_suffix (file, extptr) + char *file; + char **extptr; +{ + compressor *compr; + char *slash, *suffix; + + /* This relies on FN to be after canonicalize_filename, + so we don't need to consider backslashes on DOS_NT. */ + slash = etags_strrchr (file, '/'); + suffix = etags_strrchr (file, '.'); + if (suffix == NULL || suffix < slash) + return NULL; + if (extptr != NULL) + *extptr = suffix; + suffix += 1; + /* Let those poor souls who live with DOS 8+3 file name limits get + some solace by treating foo.cgz as if it were foo.c.gz, etc. + Only the first do loop is run if not MSDOS */ + do + { + for (compr = compressors; compr->suffix != NULL; compr++) + if (streq (compr->suffix, suffix)) + return compr; + if (!MSDOS) + break; /* do it only once: not really a loop */ + if (extptr != NULL) + *extptr = ++suffix; + } while (*suffix != '\0'); + return NULL; } + /* - * Set the language, given the name. + * Return a language given the name. */ -logical -get_language (language, func) - char *language; - Lang_function **func; +static language * +get_language_from_name (name) + char *name; { - struct lang_entry *lang; + language *lang; - for (lang = lang_names; lang->extension; ++lang) + if (name == NULL) + error ("empty language name", (char *)NULL); + else { - if (streq (language, lang->extension)) - { - *func = lang->function; - return TRUE; - } + for (lang = lang_names; lang->name != NULL; lang++) + if (streq (name, lang->name)) + return lang; + error ("unknown language \"%s\"", name); } - return FALSE; + return NULL; +} + + +/* + * Return a language given the interpreter name. + */ +static language * +get_language_from_interpreter (interpreter) + char *interpreter; +{ + language *lang; + char **iname; + + if (interpreter == NULL) + return NULL; + for (lang = lang_names; lang->name != NULL; lang++) + if (lang->interpreters != NULL) + for (iname = lang->interpreters; *iname != NULL; iname++) + if (streq (*iname, interpreter)) + return lang; + + return NULL; +} + + + +/* + * Return a language given the file name. + */ +static language * +get_language_from_suffix (file) + char *file; +{ + language *lang; + char **ext, *suffix; + + suffix = etags_strrchr (file, '.'); + if (suffix == NULL) + return NULL; + suffix += 1; + for (lang = lang_names; lang->name != NULL; lang++) + if (lang->suffixes != NULL) + for (ext = lang->suffixes; *ext != NULL; ext++) + if (streq (*ext, suffix)) + return lang; + return NULL; } + /* * This routine is called on each file argument. */ -void +static void process_file (file) char *file; { struct stat stat_buf; FILE *inf; + compressor *compr; + char *compressed_name, *uncompressed_name; + char *ext, *real_name; + - if (stat (file, &stat_buf) == 0 && !S_ISREG (stat_buf.st_mode)) + canonicalize_filename (file); + if (streq (file, tagfile) && !streq (tagfile, "-")) { - fprintf (stderr, "Skipping %s: it is not a regular file.\n", file); + error ("skipping inclusion of %s in self.", file); return; } - if (streq (file, tagfile) && !streq (tagfile, "-")) + if ((compr = get_compressor_from_suffix (file, &ext)) == NULL) { - fprintf (stderr, "Skipping inclusion of %s in self.\n", file); - return; + compressed_name = NULL; + real_name = uncompressed_name = savestr (file); + } + else + { + real_name = compressed_name = savestr (file); + uncompressed_name = savenstr (file, ext - file); + } + + /* If the canonicalised uncompressed name has already be dealt with, + skip it silently, else add it to the list. */ + { + typedef struct processed_file + { + char *filename; + struct processed_file *next; + } processed_file; + static processed_file *pf_head = NULL; + register processed_file *fnp; + + for (fnp = pf_head; fnp != NULL; fnp = fnp->next) + if (streq (uncompressed_name, fnp->filename)) + goto exit; + fnp = pf_head; + pf_head = xnew (1, struct processed_file); + pf_head->filename = savestr (uncompressed_name); + pf_head->next = fnp; + } + + if (stat (real_name, &stat_buf) != 0) + { + /* Reset real_name and try with a different name. */ + real_name = NULL; + if (compressed_name != NULL) /* try with the given suffix */ + { + if (stat (uncompressed_name, &stat_buf) == 0) + real_name = uncompressed_name; + } + else /* try all possible suffixes */ + { + for (compr = compressors; compr->suffix != NULL; compr++) + { + compressed_name = concat (file, ".", compr->suffix); + if (stat (compressed_name, &stat_buf) != 0) + { + if (MSDOS) + { + char *suf = compressed_name + strlen (file); + size_t suflen = strlen (compr->suffix) + 1; + for ( ; suf[1]; suf++, suflen--) + { + memmove (suf, suf + 1, suflen); + if (stat (compressed_name, &stat_buf) == 0) + { + real_name = compressed_name; + break; + } + } + if (real_name != NULL) + break; + } /* MSDOS */ + free (compressed_name); + compressed_name = NULL; + } + else + { + real_name = compressed_name; + break; + } + } + } + if (real_name == NULL) + { + perror (file); + goto exit; + } + } /* try with a different name */ + + if (!S_ISREG (stat_buf.st_mode)) + { + error ("skipping %s: it is not a regular file.", real_name); + goto exit; + } + if (real_name == compressed_name) + { + char *cmd = concat (compr->command, " ", real_name); + inf = popen (cmd, "r"); + free (cmd); } - inf = fopen (file, "r"); + else + inf = fopen (real_name, "r"); if (inf == NULL) { - perror (file); - return; + perror (real_name); + goto exit; } - find_entries (file, inf); + find_entries (uncompressed_name, inf); + + if (real_name == compressed_name) + pclose (inf); + else + fclose (inf); if (!CTAGS) { char *filename; - if (absolutefn (file)) + if (filename_is_absolute (uncompressed_name)) { - /* file is an absolute filename. Canonicalise it. */ - filename = absolute_filename (file, cwd); + /* file is an absolute file name. Canonicalise it. */ + filename = absolute_filename (uncompressed_name, cwd); } else { - /* file is a filename relative to cwd. Make it relative + /* file is a file name relative to cwd. Make it relative to the directory of the tags file. */ - filename = relative_filename (file, tagfiledir); + filename = relative_filename (uncompressed_name, tagfiledir); } fprintf (tagf, "\f\n%s,%d\n", filename, total_size_of_entries (head)); free (filename); @@ -1057,152 +1453,258 @@ process_file (file) free_tree (head); head = NULL; } + + exit: + if (compressed_name) free(compressed_name); + if (uncompressed_name) free(uncompressed_name); + return; } /* * This routine sets up the boolean pseudo-functions which work - * by setting boolean flags dependent upon the corresponding character + * by setting boolean flags dependent upon the corresponding character. * Every char which is NOT in that string is not a white char. Therefore, * all of the array "_wht" is set to FALSE, and then the elements * subscripted by the chars in "white" are set to TRUE. Thus "_wht" * of a char is TRUE if it is the string "white", else FALSE. */ -void +static void init () { register char *sp; register int i; - for (i = 0; i < 0177; i++) - _wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE; - for (sp = white; *sp; sp++) - _wht[*sp] = TRUE; - for (sp = endtk; *sp; sp++) - _etk[*sp] = TRUE; - for (sp = intk; *sp; sp++) - _itk[*sp] = TRUE; - for (sp = begtk; *sp; sp++) - _btk[*sp] = TRUE; - _wht[0] = _wht['\n']; - _etk[0] = _etk['\n']; - _btk[0] = _btk['\n']; - _itk[0] = _itk['\n']; + for (i = 0; i < CHARS; i++) + iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE; + for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE; + for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE; + notinname('\0') = notinname('\n'); + for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE; + begtoken('\0') = begtoken('\n'); + for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE; + intoken('\0') = intoken('\n'); + for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE; + endtoken('\0') = endtoken('\n'); } /* * This routine opens the specified file and calls the function * which finds the function and type definitions. */ -void +node *last_node = NULL; + +static void find_entries (file, inf) char *file; FILE *inf; { char *cp; - struct lang_entry *lang; - NODE *old_last_node; - extern NODE *last_node; - + language *lang; + node *old_last_node; + + /* Memory leakage here: the string pointed by curfile is + never released, because curfile is copied into np->file + for each node, to be used in CTAGS mode. The amount of + memory leaked here is the sum of the lengths of the + file names. */ curfile = savestr (file); - cp = etags_strrchr (file, '.'); /* If user specified a language, use it. */ - if (lang_func != NULL) + lang = forced_lang; + if (lang != NULL && lang->function != NULL) { - lang_func (inf); - fclose (inf); + curlang = lang; + lang->function (inf); return; } - if (cp) + /* Try to guess the language given the file name. */ + lang = get_language_from_suffix (file); + if (lang != NULL && lang->function != NULL) { - ++cp; - for (lang = lang_extensions; lang->extension; ++lang) + curlang = lang; + lang->function (inf); + return; + } + + /* Look for sharp-bang as the first two characters. */ + if (readline_internal (&lb, inf) > 0 + && lb.len >= 2 + && lb.buffer[0] == '#' + && lb.buffer[1] == '!') + { + char *lp; + + /* Set lp to point at the first char after the last slash in the + line or, if no slashes, at the first nonblank. Then set cp to + the first successive blank and terminate the string. */ + lp = etags_strrchr (lb.buffer+2, '/'); + if (lp != NULL) + lp += 1; + else + lp = skip_spaces (lb.buffer + 2); + cp = skip_non_spaces (lp); + *cp = '\0'; + + if (strlen (lp) > 0) { - if (streq (cp, lang->extension)) + lang = get_language_from_interpreter (lp); + if (lang != NULL && lang->function != NULL) { + curlang = lang; lang->function (inf); - fclose (inf); return; } } } + /* We rewind here, even if inf may be a pipe. We fail if the + length of the first line is longer than the pipe block size, + which is unlikely. */ + rewind (inf); /* Try Fortran. */ old_last_node = last_node; + curlang = get_language_from_name ("fortran"); Fortran_functions (inf); /* No Fortran entries found. Try C. */ if (old_last_node == last_node) { + /* We do not tag if rewind fails. + Only the file name will be recorded in the tags file. */ rewind (inf); + curlang = get_language_from_name (cplusplus ? "c++" : "c"); default_C_entries (inf); } - fclose (inf); + return; } /* Record a tag. */ -void -pfnote (name, is_func, named, linestart, linelen, lno, cno) - char *name; /* tag name */ - logical is_func; /* tag is a function */ - logical named; /* tag different from text of definition */ +static void +pfnote (name, is_func, linestart, linelen, lno, cno) + char *name; /* tag name, or NULL if unnamed */ + bool is_func; /* tag is a function */ char *linestart; /* start of the line where tag is */ int linelen; /* length of the line where tag is */ int lno; /* line number */ long cno; /* character number */ { - register NODE *np = xnew (1, NODE); - register char *fp; + register node *np; + + if (CTAGS && name == NULL) + return; + + np = xnew (1, node); /* If ctags mode, change name "main" to M. */ if (CTAGS && !cxref_style && streq (name, "main")) { - fp = etags_strrchr (curfile, '/'); - np->name = concat ("M", fp == 0 ? curfile : fp + 1, ""); + register char *fp = etags_strrchr (curfile, '/'); + np->name = concat ("M", fp == NULL ? curfile : fp + 1, ""); fp = etags_strrchr (np->name, '.'); - if (fp && fp[1] != '\0' && fp[2] == '\0') - fp[0] = 0; - np->named = TRUE; + if (fp != NULL && fp[1] != '\0' && fp[2] == '\0') + fp[0] = '\0'; } else - { - np->name = name; - np->named = named; - } + np->name = name; np->been_warned = FALSE; np->file = curfile; np->is_func = is_func; np->lno = lno; /* Our char numbers are 0-base, because of C language tradition? ctags compatibility? old versions compatibility? I don't know. - Anyway, since emacs's are 1-base we espect etags.el to take care + Anyway, since emacs's are 1-base we expect etags.el to take care of the difference. If we wanted to have 1-based numbers, we would uncomment the +1 below. */ np->cno = cno /* + 1 */ ; np->left = np->right = NULL; - np->pat = savenstr (linestart, ((CTAGS && !cxref_style) ? 50 : linelen)); + if (CTAGS && !cxref_style) + { + if (strlen (linestart) < 50) + np->pat = concat (linestart, "$", ""); + else + np->pat = savenstr (linestart, 50); + } + else + np->pat = savenstr (linestart, linelen); add_node (np, &head); } +/* Date: Wed, 22 Jan 1997 02:56:31 -0500 [last amended 18 Sep 1997] + * From: Sam Kendall + * Subject: Proposal for firming up the TAGS format specification + * To: F.Potorti@cnuce.cnr.it + * + * pfnote should emit the optimized form [unnamed tag] only if: + * 1. name does not contain any of the characters " \t\r\n(),;"; + * 2. linestart contains name as either a rightmost, or rightmost but + * one character, substring; + * 3. the character, if any, immediately before name in linestart must + * be one of the characters " \t(),;"; + * 4. the character, if any, immediately after name in linestart must + * also be one of the characters " \t(),;". + * + * The real implementation uses the notinname() macro, which recognises + * characters slightly different form " \t\r\n(),;". See the variable + * `nonam'. + */ +#define traditional_tag_style TRUE +static void +new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno) + char *name; /* tag name, or NULL if unnamed */ + int namelen; /* tag length */ + bool is_func; /* tag is a function */ + char *linestart; /* start of the line where tag is */ + int linelen; /* length of the line where tag is */ + int lno; /* line number */ + long cno; /* character number */ +{ + register char *cp; + bool named; + + named = TRUE; + if (!CTAGS) + { + for (cp = name; !notinname (*cp); cp++) + continue; + if (*cp == '\0') /* rule #1 */ + { + cp = linestart + linelen - namelen; + if (notinname (linestart[linelen-1])) + cp -= 1; /* rule #4 */ + if (cp >= linestart /* rule #2 */ + && (cp == linestart + || notinname (cp[-1])) /* rule #3 */ + && strneq (name, cp, namelen)) /* rule #2 */ + named = FALSE; /* use unnamed tag */ + } + } + + if (named) + name = savenstr (name, namelen); + else + name = NULL; + pfnote (name, is_func, linestart, linelen, lno, cno); +} + /* * free_tree () * recurse on left children, iterate on right children. */ -void -free_tree (node) - register NODE *node; +static void +free_tree (np) + register node *np; { - while (node) + while (np) { - register NODE *node_right = node->right; - free_tree (node->left); - if (node->named) - free (node->name); - free (node->pat); - free ((char *) node); - node = node_right; + register node *node_right = np->right; + free_tree (np->left); + if (np->name != NULL) + free (np->name); + free (np->pat); + free (np); + np = node_right; } } @@ -1215,18 +1717,17 @@ free_tree (node) * add_node is the only function allowed to add nodes, so it can * maintain state. */ -NODE *last_node = NULL; -void -add_node (node, cur_node_p) - NODE *node, **cur_node_p; +static void +add_node (np, cur_node_p) + node *np, **cur_node_p; { register int dif; - register NODE *cur_node = *cur_node_p; + register node *cur_node = *cur_node_p; if (cur_node == NULL) { - *cur_node_p = node; - last_node = node; + *cur_node_p = np; + last_node = np; return; } @@ -1234,14 +1735,14 @@ add_node (node, cur_node_p) { /* Etags Mode */ if (last_node == NULL) - fatal ("internal error in add_node", 0); - last_node->right = node; - last_node = node; + fatal ("internal error in add_node", (char *)NULL); + last_node->right = np; + last_node = np; } else { /* Ctags Mode */ - dif = strcmp (node->name, cur_node->name); + dif = strcmp (np->name, cur_node->name); /* * If this tag name matches an existing one, then @@ -1249,113 +1750,105 @@ add_node (node, cur_node_p) */ if (!dif) { - if (node->file == cur_node->file) + if (streq (np->file, cur_node->file)) { if (!no_warnings) { fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n", - node->file, lineno, node->name); + np->file, lineno, np->name); fprintf (stderr, "Second entry ignored\n"); } - return; } - if (!cur_node->been_warned && !no_warnings) + else if (!cur_node->been_warned && !no_warnings) { - fprintf (stderr, - "Duplicate entry in files %s and %s: %s (Warning only)\n", - node->file, cur_node->file, node->name); + fprintf + (stderr, + "Duplicate entry in files %s and %s: %s (Warning only)\n", + np->file, cur_node->file, np->name); + cur_node->been_warned = TRUE; } - cur_node->been_warned = TRUE; return; } - /* Maybe refuse to add duplicate nodes. */ - if (!permit_duplicates) - { - if (streq (node->name, cur_node->name) - && streq (node->file, cur_node->file)) - return; - } - /* Actually add the node */ - add_node (node, dif < 0 ? &cur_node->left : &cur_node->right); + add_node (np, dif < 0 ? &cur_node->left : &cur_node->right); } } -void -put_entries (node) - register NODE *node; +static void +put_entries (np) + register node *np; { register char *sp; - if (node == NULL) + if (np == NULL) return; /* Output subentries that precede this one */ - put_entries (node->left); + put_entries (np->left); /* Output this entry */ if (!CTAGS) { - if (node->named) + if (np->name != NULL) + fprintf (tagf, "%s\177%s\001%d,%ld\n", + np->pat, np->name, np->lno, np->cno); + else + fprintf (tagf, "%s\177%d,%ld\n", + np->pat, np->lno, np->cno); + } + else + { + if (np->name == NULL) + error ("internal error: NULL name in ctags mode.", (char *)NULL); + + if (cxref_style) { - fprintf (tagf, "%s\177%s\001%d,%d\n", - node->pat, node->name, - node->lno, node->cno); + if (vgrind_style) + fprintf (stdout, "%s %s %d\n", + np->name, np->file, (np->lno + 63) / 64); + else + fprintf (stdout, "%-16s %3d %-16s %s\n", + np->name, np->lno, np->file, np->pat); } else { - fprintf (tagf, "%s\177%d,%d\n", - node->pat, - node->lno, node->cno); - } - } - else if (!cxref_style) - { - fprintf (tagf, "%s\t%s\t", - node->name, node->file); + fprintf (tagf, "%s\t%s\t", np->name, np->file); - if (node->is_func) - { /* a function */ - putc (searchar, tagf); - putc ('^', tagf); + if (np->is_func) + { /* a function */ + putc (searchar, tagf); + putc ('^', tagf); - for (sp = node->pat; *sp; sp++) - { - if (*sp == '\\' || *sp == searchar) - putc ('\\', tagf); - putc (*sp, tagf); + for (sp = np->pat; *sp; sp++) + { + if (*sp == '\\' || *sp == searchar) + putc ('\\', tagf); + putc (*sp, tagf); + } + putc (searchar, tagf); } - putc (searchar, tagf); - } - else - { /* a typedef; text pattern inadequate */ - fprintf (tagf, "%d", node->lno); + else + { /* a typedef; text pattern inadequate */ + fprintf (tagf, "%d", np->lno); + } + putc ('\n', tagf); } - putc ('\n', tagf); } - else if (vgrind_style) - fprintf (stdout, "%s %s %d\n", - node->name, node->file, (node->lno + 63) / 64); - else - fprintf (stdout, "%-16s %3d %-16s %s\n", - node->name, node->lno, node->file, node->pat); /* Output subentries that follow this one */ - put_entries (node->right); + put_entries (np->right); } /* Length of a number's decimal representation. */ -int +static int number_len (num) long num; { - int len = 0; - if (!num) - return 1; - for (; num; num /= 10) - ++len; + int len = 1; + while ((num /= 10) > 0) + len += 1; return len; } @@ -1366,26 +1859,25 @@ number_len (num) * is irrelevant with the new tags.el, but is still supplied for * backward compatibility. */ -int -total_size_of_entries (node) - register NODE *node; +static int +total_size_of_entries (np) + register node *np; { register int total; - if (node == NULL) + if (np == NULL) return 0; - total = 0; - for (; node; node = node->right) + for (total = 0; np != NULL; np = np->right) { /* Count left subentries. */ - total += total_size_of_entries (node->left); + total += total_size_of_entries (np->left); /* Count this entry */ - total += strlen (node->pat) + 1; - total += number_len ((long) node->lno) + 1 + number_len (node->cno) + 1; - if (node->named) - total += 1 + strlen (node->name); /* \001name */ + total += strlen (np->pat) + 1; + total += number_len ((long) np->lno) + 1 + number_len (np->cno) + 1; + if (np->name != NULL) + total += 1 + strlen (np->name); /* \001name */ } return total; @@ -1396,21 +1888,50 @@ total_size_of_entries (node) */ enum sym_type { - st_none, st_C_struct, st_C_enum, st_C_define, st_C_typedef, st_C_typespec + st_none, + st_C_objprot, st_C_objimpl, st_C_objend, + st_C_gnumacro, + st_C_ignore, + st_C_javastruct, + st_C_operator, + st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec }; +static unsigned int hash P_((const char *, unsigned int)); +static struct C_stab_entry * in_word_set P_((const char *, unsigned int)); +static enum sym_type C_symtype P_((char *, int, int)); + /* Feed stuff between (but not including) %[ and %] lines to: - gperf -c -k1,3 -o -p -r -t + gperf -c -k 1,3 -o -p -r -t %[ struct C_stab_entry { char *name; int c_ext; enum sym_type type; } %% +if, 0, st_C_ignore +for, 0, st_C_ignore +while, 0, st_C_ignore +switch, 0, st_C_ignore +return, 0, st_C_ignore +@interface, 0, st_C_objprot +@protocol, 0, st_C_objprot +@implementation,0, st_C_objimpl +@end, 0, st_C_objend +import, C_JAVA, st_C_ignore +package, C_JAVA, st_C_ignore +friend, C_PLPL, st_C_ignore +extends, C_JAVA, st_C_javastruct +implements, C_JAVA, st_C_javastruct +interface, C_JAVA, st_C_struct class, C_PLPL, st_C_struct +namespace, C_PLPL, st_C_struct domain, C_STAR, st_C_struct union, 0, st_C_struct struct, 0, st_C_struct +extern, 0, st_C_extern enum, 0, st_C_enum typedef, 0, st_C_typedef define, 0, st_C_define +operator, C_PLPL, st_C_operator +bool, C_PLPL, st_C_typespec long, 0, st_C_typespec short, 0, st_C_typespec int, 0, st_C_typespec @@ -1421,108 +1942,181 @@ signed, 0, st_C_typespec unsigned, 0, st_C_typespec auto, 0, st_C_typespec void, 0, st_C_typespec -extern, 0, st_C_typespec static, 0, st_C_typespec const, 0, st_C_typespec volatile, 0, st_C_typespec +explicit, C_PLPL, st_C_typespec +mutable, C_PLPL, st_C_typespec +typename, C_PLPL, st_C_typespec +# DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach). +DEFUN, 0, st_C_gnumacro +SYSCALL, 0, st_C_gnumacro +ENTRY, 0, st_C_gnumacro +PSEUDO, 0, st_C_gnumacro +# These are defined inside C functions, so currently they are not met. +# EXFUN used in glibc, DEFVAR_* in emacs. +#EXFUN, 0, st_C_gnumacro +#DEFVAR_, 0, st_C_gnumacro %] and replace lines between %< and %> with its output. */ /*%<*/ -/* C code produced by gperf version 1.8.1 (K&R C version) */ -/* Command-line: gperf -c -k1,3 -o -p -r -t */ - - +/* C code produced by gperf version 2.7.1 (19981006 egcs) */ +/* Command-line: gperf -c -k 1,3 -o -p -r -t */ struct C_stab_entry { char *name; int c_ext; enum sym_type type; }; -#define MIN_WORD_LENGTH 3 -#define MAX_WORD_LENGTH 8 -#define MIN_HASH_VALUE 10 -#define MAX_HASH_VALUE 62 -/* - 21 keywords - 53 is the maximum key range -*/ +#define TOTAL_KEYWORDS 46 +#define MIN_WORD_LENGTH 2 +#define MAX_WORD_LENGTH 15 +#define MIN_HASH_VALUE 13 +#define MAX_HASH_VALUE 123 +/* maximum key range = 111, duplicates = 0 */ -static int +#ifdef __GNUC__ +__inline +#endif +static unsigned int hash (str, len) - register char *str; - register int len; -{ - static unsigned char hash_table[] = - { - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 2, 62, 7, - 6, 9, 15, 30, 62, 24, 62, 62, 1, 24, - 7, 27, 13, 62, 19, 26, 18, 27, 1, 62, - 62, 62, 62, 62, 62, 62, 62, 62, - }; - return len + hash_table[str[2]] + hash_table[str[0]]; -} - -struct C_stab_entry * -in_word_set (str, len) - register char *str; - register int len; -{ - - static struct C_stab_entry wordlist[] = - { - {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, - {"",}, - {"volatile", 0, st_C_typespec}, - {"",}, - {"long", 0, st_C_typespec}, - {"char", 0, st_C_typespec}, - {"class", C_PLPL, st_C_struct}, - {"",}, {"",}, {"",}, {"",}, - {"const", 0, st_C_typespec}, - {"",}, {"",}, {"",}, {"",}, - {"auto", 0, st_C_typespec}, - {"",}, {"",}, - {"define", 0, st_C_define}, - {"",}, - {"void", 0, st_C_typespec}, - {"",}, {"",}, {"",}, - {"extern", 0, st_C_typespec}, - {"static", 0, st_C_typespec}, - {"",}, - {"domain", C_STAR, st_C_struct}, - {"",}, - {"typedef", 0, st_C_typedef}, - {"double", 0, st_C_typespec}, - {"enum", 0, st_C_enum}, - {"",}, {"",}, {"",}, {"",}, - {"int", 0, st_C_typespec}, - {"",}, - {"float", 0, st_C_typespec}, - {"",}, {"",}, {"",}, - {"struct", 0, st_C_struct}, - {"",}, {"",}, {"",}, {"",}, - {"union", 0, st_C_struct}, - {"",}, - {"short", 0, st_C_typespec}, - {"",}, {"",}, - {"unsigned", 0, st_C_typespec}, - {"signed", 0, st_C_typespec}, + register const char *str; + register unsigned int len; +{ + static unsigned char asso_values[] = + { + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 3, 124, 124, 124, 43, 6, + 11, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 11, 124, 124, 58, 7, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 57, 7, 42, + 4, 14, 52, 0, 124, 53, 124, 124, 29, 11, + 6, 35, 32, 124, 29, 34, 59, 58, 51, 24, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124 + }; + register int hval = len; + + switch (hval) + { + default: + case 3: + hval += asso_values[(unsigned char)str[2]]; + case 2: + case 1: + hval += asso_values[(unsigned char)str[0]]; + break; + } + return hval; +} + +#ifdef __GNUC__ +__inline +#endif +static struct C_stab_entry * +in_word_set (str, len) + register const char *str; + register unsigned int len; +{ + static struct C_stab_entry wordlist[] = + { + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, + {"@end", 0, st_C_objend}, + {""}, {""}, {""}, {""}, + {"ENTRY", 0, st_C_gnumacro}, + {"@interface", 0, st_C_objprot}, + {""}, + {"domain", C_STAR, st_C_struct}, + {""}, + {"PSEUDO", 0, st_C_gnumacro}, + {""}, {""}, + {"namespace", C_PLPL, st_C_struct}, + {""}, {""}, + {"@implementation",0, st_C_objimpl}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"long", 0, st_C_typespec}, + {"signed", 0, st_C_typespec}, + {"@protocol", 0, st_C_objprot}, + {""}, {""}, {""}, {""}, + {"bool", C_PLPL, st_C_typespec}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"const", 0, st_C_typespec}, + {"explicit", C_PLPL, st_C_typespec}, + {"if", 0, st_C_ignore}, + {""}, + {"operator", C_PLPL, st_C_operator}, + {""}, + {"DEFUN", 0, st_C_gnumacro}, + {""}, {""}, + {"define", 0, st_C_define}, + {""}, {""}, {""}, {""}, {""}, + {"double", 0, st_C_typespec}, + {"struct", 0, st_C_struct}, + {""}, {""}, {""}, {""}, + {"short", 0, st_C_typespec}, + {""}, + {"enum", 0, st_C_enum}, + {"mutable", C_PLPL, st_C_typespec}, + {""}, + {"extern", 0, st_C_extern}, + {"extends", C_JAVA, st_C_javastruct}, + {"package", C_JAVA, st_C_ignore}, + {"while", 0, st_C_ignore}, + {""}, + {"for", 0, st_C_ignore}, + {""}, {""}, {""}, + {"volatile", 0, st_C_typespec}, + {""}, {""}, + {"import", C_JAVA, st_C_ignore}, + {"float", 0, st_C_typespec}, + {"switch", 0, st_C_ignore}, + {"return", 0, st_C_ignore}, + {"implements", C_JAVA, st_C_javastruct}, + {""}, + {"static", 0, st_C_typespec}, + {"typedef", 0, st_C_typedef}, + {"typename", C_PLPL, st_C_typespec}, + {"unsigned", 0, st_C_typespec}, + {""}, {""}, + {"char", 0, st_C_typespec}, + {"class", C_PLPL, st_C_struct}, + {""}, {""}, {""}, + {"void", 0, st_C_typespec}, + {""}, {""}, + {"friend", C_PLPL, st_C_ignore}, + {""}, {""}, {""}, + {"int", 0, st_C_typespec}, + {"union", 0, st_C_struct}, + {""}, {""}, {""}, + {"auto", 0, st_C_typespec}, + {"interface", C_JAVA, st_C_struct}, + {""}, + {"SYSCALL", 0, st_C_gnumacro} }; if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) { register int key = hash (str, len); - if (key <= MAX_HASH_VALUE && key >= MIN_HASH_VALUE) + if (key <= MAX_HASH_VALUE && key >= 0) { - register char *s = wordlist[key].name; + register const char *s = wordlist[key].name; - if (*s == *str && strneq (str + 1, s + 1, len - 1)) + if (*str == *s && !strncmp (str + 1, s + 1, len - 1)) return &wordlist[key]; } } @@ -1530,13 +2124,13 @@ in_word_set (str, len) } /*%>*/ -enum sym_type -C_symtype(str, len, c_ext) +static enum sym_type +C_symtype (str, len, c_ext) char *str; int len; int c_ext; { - register struct C_stab_entry *se = in_word_set(str, len); + register struct C_stab_entry *se = in_word_set (str, len); if (se == NULL || (se->c_ext && !(c_ext & se->c_ext))) return st_none; @@ -1544,112 +2138,160 @@ C_symtype(str, len, c_ext) } /* - * C functions are recognized using a simple finite automaton. - * funcdef is its state variable. + * C functions and variables are recognized using a simple + * finite automaton. fvdef is its state variable. */ -typedef enum +enum { - fnone, /* nothing seen */ - ftagseen, /* function-like tag seen */ - fstartlist, /* just after open parenthesis */ - finlist, /* in parameter list */ - flistseen, /* after parameter list */ - fignore /* before open brace */ -} FUNCST; -FUNCST funcdef; - + fvnone, /* nothing seen */ + foperator, /* func: operator keyword seen (cplpl) */ + fvnameseen, /* function or variable name seen */ + fstartlist, /* func: just after open parenthesis */ + finlist, /* func: in parameter list */ + flistseen, /* func: after parameter list */ + fignore, /* func: before open brace */ + vignore /* var-like: ignore until ';' */ +} fvdef; + +bool fvextern; /* func or var: extern keyword seen; */ /* * typedefs are recognized using a simple finite automaton. - * typeddef is its state variable. + * typdef is its state variable. */ -typedef enum +enum { tnone, /* nothing seen */ - ttypedseen, /* typedef keyword seen */ + tkeyseen, /* typedef keyword seen */ + ttypeseen, /* defined type seen */ tinbody, /* inside typedef body */ tend, /* just before typedef tag */ tignore /* junk after typedef tag */ -} TYPEDST; -TYPEDST typdef; +} typdef; - /* + /* * struct-like structures (enum, struct and union) are recognized * using another simple finite automaton. `structdef' is its state * variable. */ -typedef enum +enum { snone, /* nothing seen yet */ skeyseen, /* struct-like keyword seen */ stagseen, /* struct-like tag seen */ scolonseen, /* colon seen after struct-like tag */ sinbody /* in struct body: recognize member func defs*/ -} STRUCTST; -STRUCTST structdef; +} structdef; /* * When structdef is stagseen, scolonseen, or sinbody, structtag is the - * struct tag, and structtype is the type of the preceding struct-like + * struct tag, and structtype is the type of the preceding struct-like * keyword. */ char *structtag = ""; enum sym_type structtype; +/* + * When objdef is different from onone, objtag is the name of the class. + */ +char *objtag = ""; + /* * Yet another little state machine to deal with preprocessor lines. */ -typedef enum +enum { dnone, /* nothing seen */ dsharpseen, /* '#' seen as first char on line */ ddefineseen, /* '#' and 'define' seen */ dignorerest /* ignore rest of line */ -} DEFINEST; -DEFINEST definedef; +} definedef; + +/* + * State machine for Objective C protocols and implementations. + * Tom R.Hageman + */ +enum +{ + onone, /* nothing seen */ + oprotocol, /* @interface or @protocol seen */ + oimplementation, /* @implementations seen */ + otagseen, /* class name seen */ + oparenseen, /* parenthesis before category seen */ + ocatseen, /* category name seen */ + oinbody, /* in @implementation body */ + omethodsign, /* in @implementation body, after +/- */ + omethodtag, /* after method name */ + omethodcolon, /* after method colon */ + omethodparm, /* after method parameter */ + oignore /* wait for @end */ +} objdef; + + +/* + * Use this structure to keep info about the token read, and how it + * should be tagged. Used by the make_C_tag function to build a tag. + */ +typedef struct +{ + bool valid; + char *str; + bool named; + int linelen; + int lineno; + long linepos; + char *buffer; +} token; + +token tok; /* latest token read */ /* * Set this to TRUE, and the next token considered is called a function. * Used only for GNU emacs's function-defining macros. */ -logical next_token_is_func; +bool next_token_is_func; /* * TRUE in the rules part of a yacc file, FALSE outside (parse as C). */ -logical yacc_rules; +bool yacc_rules; + +/* + * methodlen is the length of the method name stored in token_name. + */ +int methodlen; + +static bool consider_token P_((char *, int, int, int, int, int, bool *)); +static void make_C_tag P_((bool)); /* * consider_token () * checks to see if the current token is at the start of a - * function, or corresponds to a typedef, or is a struct/union/enum - * tag. + * function or variable, or corresponds to a typedef, or + * is a struct/union/enum tag, or #define, or an enum constant. * - * *IS_FUNC gets TRUE iff the token is a function or macro with args. - * C_EXT is which language we are looking at. - * - * In the future we will need some way to adjust where the end of - * the token is; for instance, implementing the C++ keyword - * `operator' properly will adjust the end of the token to be after - * whatever follows `operator'. + * *IS_FUNC gets TRUE iff the token is a function or #define macro + * with args. C_EXT is which language we are looking at. * * Globals - * funcdef IN OUT + * fvdef IN OUT * structdef IN OUT * definedef IN OUT * typdef IN OUT + * objdef IN OUT * next_token_is_func IN OUT */ -logical -consider_token (str, len, c, c_ext, cblev, is_func) +static bool +consider_token (str, len, c, c_ext, cblev, parlev, is_func_or_var) register char *str; /* IN: token pointer */ register int len; /* IN: token length */ - register char c; /* IN: first char after the token */ + register int c; /* IN: first char after the token */ int c_ext; /* IN: C extensions mask */ int cblev; /* IN: curly brace level */ - logical *is_func; /* OUT: function found */ + int parlev; /* IN: parenthesis level */ + bool *is_func_or_var; /* OUT: function or variable found */ { enum sym_type toktype = C_symtype (str, len, c_ext); @@ -1677,15 +2319,15 @@ consider_token (str, len, c, c_ext, cblev, is_func) * and constantypedefs is FALSE. */ definedef = dignorerest; - *is_func = (c == '('); - if (!*is_func && !constantypedefs) + *is_func_or_var = (c == '('); + if (!*is_func_or_var && !constantypedefs) return FALSE; else return TRUE; case dignorerest: return FALSE; default: - error ("internal error: definedef value.", 0); + error ("internal error: definedef value.", (char *)NULL); } /* @@ -1697,20 +2339,20 @@ consider_token (str, len, c, c_ext, cblev, is_func) if (toktype == st_C_typedef) { if (typedefs) - typdef = ttypedseen; - funcdef = fnone; + typdef = tkeyseen; + fvextern = FALSE; + fvdef = fvnone; return FALSE; } break; - case ttypedseen: + case tkeyseen: switch (toktype) { case st_none: case st_C_typespec: - typdef = tend; - break; case st_C_struct: case st_C_enum: + typdef = ttypeseen; break; } /* Do not return here, so the structdef stuff has a chance. */ @@ -1735,17 +2377,16 @@ consider_token (str, len, c, c_ext, cblev, is_func) * This structdef business is NOT invoked when we are ctags and the * file is plain C. This is because a struct tag may have the same * name as another tag, and this loses with ctags. - * - * This if statement deals with the typdef state machine as - * follows: if typdef==ttypedseen and token is struct/union/class/enum, - * return FALSE. All the other code here is for the structdef - * state machine. */ switch (toktype) { + case st_C_javastruct: + if (structdef == stagseen) + structdef = scolonseen; + return FALSE; case st_C_struct: case st_C_enum: - if (typdef == ttypedseen + if (typdef == tkeyseen || (typedefs_and_cplusplus && cblev == 0 && structdef == snone)) { structdef = skeyseen; @@ -1753,10 +2394,11 @@ consider_token (str, len, c, c_ext, cblev, is_func) } return FALSE; } + if (structdef == skeyseen) { - /* Save the tag for struct/union/class, for functions that may be - defined inside. */ + /* Save the tag for struct/union/class, for functions and variables + that may be defined inside. */ if (structtype == st_C_struct) structtag = savenstr (str, len); else @@ -1765,52 +2407,143 @@ consider_token (str, len, c, c_ext, cblev, is_func) return TRUE; } - /* Avoid entering funcdef stuff if typdef is going on. */ if (typdef != tnone) + definedef = dnone; + + /* Detect GNU macros. + + Writers of emacs code are recommended to put the + first two args of a DEFUN on the same line. + + The DEFUN macro, used in emacs C source code, has a first arg + that is a string (the lisp function name), and a second arg that + is a C function name. Since etags skips strings, the second arg + is tagged. This is unfortunate, as it would be better to tag the + first arg. The simplest way to deal with this problem would be + to name the tag with a name built from the function name, by + removing the initial 'F' character and substituting '-' for '_'. + Anyway, this assumes that the conventions of naming lisp + functions will never change. Currently, this method is not + implemented. */ + if (definedef == dnone && toktype == st_C_gnumacro) { - definedef = dnone; + next_token_is_func = TRUE; return FALSE; } - - /* Detect GNU macros. */ - if (definedef == dnone) - if (strneq (str, "DEFUN", 5) /* Used in emacs */ -#if FALSE - These are defined inside C functions, so currently they - are not met anyway. - || strneq (str, "EXFUN", 5) /* Used in glibc */ - || strneq (str, "DEFVAR_", 7) /* Used in emacs */ -#endif - || strneq (str, "SYSCALL", 7) /* Used in glibc (mach) */ - || strneq (str, "ENTRY", 5) /* Used in glibc */ - || strneq (str, "PSEUDO", 6)) /* Used in glibc */ - - { - next_token_is_func = TRUE; - return FALSE; - } if (next_token_is_func) { next_token_is_func = FALSE; - funcdef = fignore; - *is_func = TRUE; + fvdef = fignore; + *is_func_or_var = TRUE; + return TRUE; + } + + /* Detect Objective C constructs. */ + switch (objdef) + { + case onone: + switch (toktype) + { + case st_C_objprot: + objdef = oprotocol; + return FALSE; + case st_C_objimpl: + objdef = oimplementation; + return FALSE; + } + break; + case oimplementation: + /* Save the class tag for functions or variables defined inside. */ + objtag = savenstr (str, len); + objdef = oinbody; + return FALSE; + case oprotocol: + /* Save the class tag for categories. */ + objtag = savenstr (str, len); + objdef = otagseen; + *is_func_or_var = TRUE; return TRUE; + case oparenseen: + objdef = ocatseen; + *is_func_or_var = TRUE; + return TRUE; + case oinbody: + break; + case omethodsign: + if (parlev == 0) + { + objdef = omethodtag; + methodlen = len; + grow_linebuffer (&token_name, methodlen + 1); + strncpy (token_name.buffer, str, len); + token_name.buffer[methodlen] = '\0'; + token_name.len = methodlen; + return TRUE; + } + return FALSE; + case omethodcolon: + if (parlev == 0) + objdef = omethodparm; + return FALSE; + case omethodparm: + if (parlev == 0) + { + objdef = omethodtag; + methodlen += len; + grow_linebuffer (&token_name, methodlen + 1); + strncat (token_name.buffer, str, len); + token_name.len = methodlen; + return TRUE; + } + return FALSE; + case oignore: + if (toktype == st_C_objend) + { + /* Memory leakage here: the string pointed by objtag is + never released, because many tests would be needed to + avoid breaking on incorrect input code. The amount of + memory leaked here is the sum of the lengths of the + class tags. + free (objtag); */ + objdef = onone; + } + return FALSE; } - /* A function? */ + /* A function, variable or enum constant? */ switch (toktype) { + case st_C_extern: + fvextern = TRUE; + /* FALLTHRU */ case st_C_typespec: - if (funcdef != finlist && funcdef != fignore) - funcdef = fnone; /* should be useless */ + if (fvdef != finlist && fvdef != fignore && fvdef != vignore) + fvdef = fvnone; /* should be useless */ return FALSE; - default: - if (funcdef == fnone) + case st_C_ignore: + fvextern = FALSE; + fvdef = vignore; + return FALSE; + case st_C_operator: + fvdef = foperator; + *is_func_or_var = TRUE; + return TRUE; + case st_none: + if ((c_ext & C_PLPL) && strneq (str+len-10, "::operator", 10)) + { + fvdef = foperator; + *is_func_or_var = TRUE; + return TRUE; + } + if (constantypedefs && structdef == sinbody && structtype == st_C_enum) + return TRUE; + if (fvdef == fvnone) { - funcdef = ftagseen; - *is_func = TRUE; + fvdef = fvnameseen; /* function or variable */ + *is_func_or_var = TRUE; return TRUE; } + break; } return FALSE; @@ -1818,21 +2551,10 @@ consider_token (str, len, c, c_ext, cblev, is_func) /* * C_entries () - * This routine finds functions, typedefs, #define's and - * struct/union/enum definitions in C syntax and adds them - * to the list. + * This routine finds functions, variables, typedefs, + * #define's, enum constants and struct/union/enum definitions in + * C syntax and adds them to the list. */ -typedef struct -{ - logical valid; - char *str; - logical named; - int linelen; - int lineno; - long linepos; - char *buffer; -} TOKEN; - #define current_lb_is_new (newndx == curndx) #define switch_line_buffers() (curndx = 1 - curndx) @@ -1843,19 +2565,20 @@ typedef struct #define othlinepos (lbs[1-curndx].linepos) #define newlinepos (lbs[newndx].linepos) -#define CNL_SAVE_DEFINEDEF \ +#define CNL_SAVE_DEFINEDEF() \ do { \ curlinepos = charno; \ lineno++; \ + linecharno = charno; \ charno += readline (&curlb, inf); \ lp = curlb.buffer; \ quotednl = FALSE; \ newndx = curndx; \ } while (0) -#define CNL \ +#define CNL() \ do { \ - CNL_SAVE_DEFINEDEF; \ + CNL_SAVE_DEFINEDEF(); \ if (savetok.valid) \ { \ tok = savetok; \ @@ -1864,16 +2587,37 @@ do { \ definedef = dnone; \ } while (0) -#define make_tag(isfun) do \ -{ \ - if (tok.valid) \ - pfnote (savestr (token_name.buffer), isfun, tok.named, \ - tok.buffer, tok.linelen, tok.lineno, tok.linepos); \ - else if (DEBUG) abort (); \ - tok.valid = FALSE; \ -} while (0) -void +static void +make_C_tag (isfun) + bool isfun; +{ + /* This function should never be called when tok.valid is FALSE, but + we must protect against invalid input or internal errors. */ + if (tok.valid) + { + if (traditional_tag_style) + { + /* This was the original code. Now we call new_pfnote instead, + which uses the new method for naming tags (see new_pfnote). */ + char *name = NULL; + + if (CTAGS || tok.named) + name = savestr (token_name.buffer); + pfnote (name, isfun, + tok.buffer, tok.linelen, tok.lineno, tok.linepos); + } + else + new_pfnote (token_name.buffer, token_name.len, isfun, + tok.buffer, tok.linelen, tok.lineno, tok.linepos); + tok.valid = FALSE; + } + else if (DEBUG) + abort (); +} + + +static void C_entries (c_ext, inf) int c_ext; /* extension of C */ FILE *inf; /* input file */ @@ -1881,29 +2625,38 @@ C_entries (c_ext, inf) register char c; /* latest char read; '\0' for end of line */ register char *lp; /* pointer one beyond the character `c' */ int curndx, newndx; /* indices for current and new lb */ - TOKEN tok; /* latest token read */ register int tokoff; /* offset in line of start of current token */ register int toklen; /* length of current token */ + char *qualifier; /* string used to qualify names */ + int qlen; /* length of qualifier */ int cblev; /* current curly brace level */ int parlev; /* current parenthesis level */ - logical incomm, inquote, inchar, quotednl, midtoken; - logical cplpl; - TOKEN savetok; /* token saved during preprocessor handling */ + bool incomm, inquote, inchar, quotednl, midtoken; + bool purec, cplpl, cjava; + token savetok; /* token saved during preprocessor handling */ + tokoff = toklen = 0; /* keep compiler quiet */ curndx = newndx = 0; lineno = 0; charno = 0; lp = curlb.buffer; *lp = 0; - definedef = dnone; funcdef = fnone; typdef = tnone; structdef = snone; + fvdef = fvnone; fvextern = FALSE; typdef = tnone; + structdef = snone; definedef = dnone; objdef = onone; next_token_is_func = yacc_rules = FALSE; midtoken = inquote = inchar = incomm = quotednl = FALSE; tok.valid = savetok.valid = FALSE; cblev = 0; parlev = 0; - cplpl = c_ext & C_PLPL; + purec = !(c_ext & ~YACC); /* no extensions (apart from possibly yacc) */ + cplpl = (c_ext & C_PLPL) == C_PLPL; + cjava = (c_ext & C_JAVA) == C_JAVA; + if (cjava) + { qualifier = "."; qlen = 1; } + else + { qualifier = "::"; qlen = 2; } while (!feof (inf)) { @@ -1935,7 +2688,7 @@ C_entries (c_ext, inf) case '\0': /* Newlines inside comments do not end macro definitions in traditional cpp. */ - CNL_SAVE_DEFINEDEF; + CNL_SAVE_DEFINEDEF (); break; } continue; @@ -1951,7 +2704,7 @@ C_entries (c_ext, inf) /* Newlines inside strings do not end macro definitions in traditional cpp, even though compilers don't usually accept them. */ - CNL_SAVE_DEFINEDEF; + CNL_SAVE_DEFINEDEF (); break; } continue; @@ -1962,7 +2715,7 @@ C_entries (c_ext, inf) { case '\0': /* Hmmm, something went wrong. */ - CNL; + CNL (); /* FALLTHRU */ case '\'': inchar = FALSE; @@ -1970,18 +2723,24 @@ C_entries (c_ext, inf) } continue; } - else + else switch (c) { case '"': inquote = TRUE; - if (funcdef != finlist && funcdef != fignore) - funcdef = fnone; + if (fvdef != finlist && fvdef != fignore && fvdef !=vignore) + { + fvextern = FALSE; + fvdef = fvnone; + } continue; case '\'': inchar = TRUE; - if (funcdef != finlist && funcdef != fignore) - funcdef = fnone; + if (fvdef != finlist && fvdef != fignore && fvdef !=vignore) + { + fvextern = FALSE; + fvdef = fvnone; + } continue; case '/': if (*lp == '*') @@ -1990,9 +2749,9 @@ C_entries (c_ext, inf) incomm = TRUE; continue; } - else if (cplpl && *lp == '/') + else if (/* cplpl && */ *lp == '/') { - c = 0; + c = '\0'; break; } else @@ -2002,7 +2761,7 @@ C_entries (c_ext, inf) { /* entering or exiting rules section in yacc file */ lp++; - definedef = dnone; funcdef = fnone; + definedef = dnone; fvdef = fvnone; fvextern = FALSE; typdef = tnone; structdef = snone; next_token_is_func = FALSE; midtoken = inquote = inchar = incomm = quotednl = FALSE; @@ -2016,7 +2775,7 @@ C_entries (c_ext, inf) if (definedef == dnone) { char *cp; - logical cpptoken = TRUE; + bool cpptoken = TRUE; /* Look back on this line. If all blanks, or nonblanks followed by an end of comment, this is a preprocessor @@ -2043,70 +2802,99 @@ C_entries (c_ext, inf) /* Consider token only if some complicated conditions are satisfied. */ if ((definedef != dnone || (cblev == 0 && structdef != scolonseen) - || (cblev == 1 && cplpl && structdef == sinbody)) + || (cblev == 1 && cplpl && structdef == sinbody) + || (structdef == sinbody && purec)) && typdef != tignore && definedef != dignorerest - && funcdef != finlist) + && fvdef != finlist) { if (midtoken) { if (endtoken (c)) { - if (cplpl && c == ':' && *lp == ':' && begtoken(*(lp + 1))) + bool funorvar = FALSE; + + if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1])) { /* * This handles :: in the middle, but not at the - * beginning of an identifier. + * beginning of an identifier. Also, space-separated + * :: is not recognised. */ lp += 2; - toklen += 3; + toklen += 2; + c = lp[-1]; + goto intok; } else { - logical is_func = FALSE; - if (yacc_rules - || consider_token (newlb.buffer + tokoff, toklen, - c, c_ext, cblev, &is_func)) + || consider_token (newlb.buffer + tokoff, toklen, c, + c_ext, cblev, parlev, &funorvar)) { - if (structdef == sinbody + if (fvdef == foperator) + { + char *oldlp = lp; + lp = skip_spaces (lp-1); + if (*lp != '\0') + lp += 1; + while (*lp != '\0' + && !iswhite (*lp) && *lp != '(') + lp += 1; + c = *lp++; + toklen += lp - oldlp; + } + tok.named = FALSE; + if (!purec + && funorvar && definedef == dnone - && is_func) - /* function defined in C++ class body */ + && structdef == sinbody) + /* function or var defined in C++ class body */ { - int strsize = strlen(structtag) + 2 + toklen + 1; - while (token_name.size < strsize) - { - token_name.size *= 2; - token_name.buffer - = (char *) xrealloc (token_name.buffer, - token_name.size); - } + int len = strlen (structtag) + qlen + toklen; + grow_linebuffer (&token_name, len + 1); strcpy (token_name.buffer, structtag); - strcat (token_name.buffer, "::"); + strcat (token_name.buffer, qualifier); + strncat (token_name.buffer, + newlb.buffer + tokoff, toklen); + token_name.len = len; + tok.named = TRUE; + } + else if (objdef == ocatseen) + /* Objective C category */ + { + int len = strlen (objtag) + 2 + toklen; + grow_linebuffer (&token_name, len + 1); + strcpy (token_name.buffer, objtag); + strcat (token_name.buffer, "("); strncat (token_name.buffer, - newlb.buffer+tokoff, toklen); + newlb.buffer + tokoff, toklen); + strcat (token_name.buffer, ")"); + token_name.len = len; + tok.named = TRUE; + } + else if (objdef == omethodtag + || objdef == omethodparm) + /* Objective C method */ + { tok.named = TRUE; } else { - while (token_name.size < toklen + 1) - { - token_name.size *= 2; - token_name.buffer - = (char *) xrealloc (token_name.buffer, - token_name.size); - } + grow_linebuffer (&token_name, toklen + 1); strncpy (token_name.buffer, - newlb.buffer+tokoff, toklen); + newlb.buffer + tokoff, toklen); token_name.buffer[toklen] = '\0'; - if (structdef == stagseen - || typdef == tend - || (is_func - && definedef == dignorerest)) /* macro */ - tok.named = TRUE; - else - tok.named = FALSE; + token_name.len = toklen; + /* Name macros and members. */ + tok.named = (structdef == stagseen + || typdef == ttypeseen + || typdef == tend + || (funorvar + && definedef == dignorerest) + || (funorvar + && definedef == dnone + && structdef == sinbody)); } tok.lineno = lineno; tok.linelen = tokoff + toklen + 1; @@ -2115,20 +2903,23 @@ C_entries (c_ext, inf) tok.valid = TRUE; if (definedef == dnone - && (funcdef == ftagseen + && (fvdef == fvnameseen + || fvdef == foperator || structdef == stagseen - || typdef == tend)) + || typdef == tend + || objdef != onone)) { if (current_lb_is_new) switch_line_buffers (); } else - make_tag (is_func); + make_C_tag (funorvar); } midtoken = FALSE; } } /* if (endtoken (c)) */ else if (intoken (c)) + intok: { toklen++; continue; @@ -2139,20 +2930,20 @@ C_entries (c_ext, inf) switch (definedef) { case dnone: - switch (funcdef) + switch (fvdef) { case fstartlist: - funcdef = finlist; + fvdef = finlist; continue; case flistseen: - make_tag (TRUE); - funcdef = fignore; + make_C_tag (TRUE); /* a function */ + fvdef = fignore; break; - case ftagseen: - funcdef = fnone; + case fvnameseen: + fvdef = fvnone; break; } - if (structdef == stagseen) + if (structdef == stagseen && !cjava) structdef = snone; break; case dsharpseen: @@ -2176,20 +2967,36 @@ C_entries (c_ext, inf) case ':': if (definedef != dnone) break; + switch (objdef) + { + case otagseen: + objdef = oignore; + make_C_tag (TRUE); /* an Objective C class */ + break; + case omethodtag: + case omethodparm: + objdef = omethodcolon; + methodlen += 1; + grow_linebuffer (&token_name, methodlen + 1); + strcat (token_name.buffer, ":"); + token_name.len = methodlen; + break; + } if (structdef == stagseen) structdef = scolonseen; else - switch (funcdef) + switch (fvdef) { - case ftagseen: + case fvnameseen: if (yacc_rules) { - make_tag (FALSE); - funcdef = fignore; + make_C_tag (FALSE); /* a yacc function */ + fvdef = fignore; } break; case fstartlist: - funcdef = fnone; + fvextern = FALSE; + fvdef = fvnone; break; } break; @@ -2200,21 +3007,64 @@ C_entries (c_ext, inf) switch (typdef) { case tend: - make_tag (FALSE); + make_C_tag (FALSE); /* a typedef */ /* FALLTHRU */ default: typdef = tnone; } - if (funcdef != fignore) - funcdef = fnone; + switch (fvdef) + { + case fignore: + break; + case fvnameseen: + if ((members && cblev == 1) + || (globals && cblev == 0 && (!fvextern || declarations))) + make_C_tag (FALSE); /* a variable */ + fvextern = FALSE; + fvdef = fvnone; + tok.valid = FALSE; + break; + case flistseen: + if (declarations && (cblev == 0 || cblev == 1)) + make_C_tag (TRUE); /* a function declaration */ + /* FALLTHRU */ + default: + fvextern = FALSE; + fvdef = fvnone; + /* The following instruction invalidates the token. + Probably the token should be invalidated in all + other cases where some state machine is reset. */ + tok.valid = FALSE; + } if (structdef == stagseen) structdef = snone; break; case ',': if (definedef != dnone) break; - if (funcdef != finlist && funcdef != fignore) - funcdef = fnone; + switch (objdef) + { + case omethodtag: + case omethodparm: + make_C_tag (TRUE); /* an Objective C method */ + objdef = oinbody; + break; + } + switch (fvdef) + { + case foperator: + case finlist: + case fignore: + case vignore: + break; + case fvnameseen: + if ((members && cblev == 1) + || (globals && cblev == 0 && (!fvextern || declarations))) + make_C_tag (FALSE); /* a variable */ + break; + default: + fvdef = fvnone; + } if (structdef == stagseen) structdef = snone; break; @@ -2224,40 +3074,51 @@ C_entries (c_ext, inf) if (cblev == 0 && typdef == tend) { typdef = tignore; - make_tag (FALSE); + make_C_tag (FALSE); /* a typedef */ + break; + } + switch (fvdef) + { + case foperator: + case finlist: + case fignore: + case vignore: break; + case fvnameseen: + if ((members && cblev == 1) + || (globals && cblev == 0 && (!fvextern || declarations))) + make_C_tag (FALSE); /* a variable */ + /* FALLTHRU */ + default: + fvdef = fvnone; } - if (funcdef != finlist && funcdef != fignore) - funcdef = fnone; if (structdef == stagseen) structdef = snone; break; case '(': if (definedef != dnone) break; - switch (funcdef) + if (objdef == otagseen && parlev == 0) + objdef = oparenseen; + switch (fvdef) { - case fnone: - switch (typdef) + case fvnameseen: + if (typdef == ttypeseen + && tok.valid + && *lp != '*' + && structdef != sinbody) { - case ttypedseen: - case tend: - /* Make sure that the next char is not a '*'. - This handles constructs like: + /* This handles constructs like: typedef void OperatorFun (int fun); */ - if (*lp != '*') - { - typdef = tignore; - make_tag (FALSE); - } - break; - } /* switch (typdef) */ - break; - case ftagseen: - funcdef = fstartlist; + make_C_tag (FALSE); + typdef = tignore; + } + /* FALLTHRU */ + case foperator: + fvdef = fstartlist; break; case flistseen: - funcdef = finlist; + fvdef = finlist; break; } parlev++; @@ -2265,19 +3126,24 @@ C_entries (c_ext, inf) case ')': if (definedef != dnone) break; + if (objdef == ocatseen && parlev == 1) + { + make_C_tag (TRUE); /* an Objective C category */ + objdef = oignore; + } if (--parlev == 0) { - switch (funcdef) + switch (fvdef) { case fstartlist: case finlist: - funcdef = flistseen; + fvdef = flistseen; break; } - if (cblev == 0 && typdef == tend) + if (cblev == 0 && (typdef == tend)) { typdef = tignore; - make_tag (FALSE); + make_C_tag (FALSE); /* a typedef */ } } else if (parlev < 0) /* can happen due to ill-conceived #if's. */ @@ -2286,40 +3152,53 @@ C_entries (c_ext, inf) case '{': if (definedef != dnone) break; - if (typdef == ttypedseen) + if (typdef == ttypeseen) typdef = tinbody; switch (structdef) { case skeyseen: /* unnamed struct */ - structtag = "_anonymous_"; structdef = sinbody; + structtag = "_anonymous_"; break; case stagseen: case scolonseen: /* named struct */ structdef = sinbody; - make_tag (FALSE); + make_C_tag (FALSE); /* a struct */ break; } - switch (funcdef) + switch (fvdef) { case flistseen: - make_tag (TRUE); + make_C_tag (TRUE); /* a function */ /* FALLTHRU */ case fignore: - funcdef = fnone; + fvdef = fvnone; break; - case fnone: - /* Neutralize `extern "C" {' grot and look inside structs. */ - if (cblev == 0 && structdef == snone && typdef == tnone) - cblev = -1; + case fvnone: + switch (objdef) + { + case otagseen: + make_C_tag (TRUE); /* an Objective C class */ + objdef = oignore; + break; + case omethodtag: + case omethodparm: + make_C_tag (TRUE); /* an Objective C method */ + objdef = oinbody; + break; + default: + /* Neutralize `extern "C" {' grot. */ + if (cblev == 0 && structdef == snone && typdef == tnone) + cblev = -1; + } } cblev++; break; case '*': if (definedef != dnone) break; - if (funcdef == fstartlist) - funcdef = fnone; /* avoid tagging `foo' in `foo (*bar()) ()' */ + if (fvdef == fstartlist) + fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */ break; case '}': if (definedef != dnone) @@ -2335,30 +3214,72 @@ C_entries (c_ext, inf) { if (typdef == tinbody) typdef = tend; -#if FALSE /* too risky */ + /* Memory leakage here: the string pointed by structtag is + never released, because I fear to miss something and + break things while freeing the area. The amount of + memory leaked here is the sum of the lengths of the + struct tags. if (structdef == sinbody) - free (structtag); -#endif + free (structtag); */ structdef = snone; structtag = ""; } break; case '=': - case '#': case '+': case '-': case '~': case '&': case '%': case '/': - case '|': case '^': case '!': case '<': case '>': case '.': case '?': if (definedef != dnone) break; - /* These surely cannot follow a function tag. */ - if (funcdef != finlist && funcdef != fignore) - funcdef = fnone; + switch (fvdef) + { + case foperator: + case finlist: + case fignore: + case vignore: + break; + case fvnameseen: + if ((members && cblev == 1) + || (globals && cblev == 0 && (!fvextern || declarations))) + make_C_tag (FALSE); /* a variable */ + /* FALLTHRU */ + default: + fvdef = vignore; + } + break; + case '+': + case '-': + if (objdef == oinbody && cblev == 0) + { + objdef = omethodsign; + break; + } + /* FALLTHRU */ + case '#': case '~': case '&': case '%': case '/': case '|': + case '^': case '!': case '<': case '>': case '.': case '?': case ']': + if (definedef != dnone) + break; + /* These surely cannot follow a function tag in C. */ + switch (fvdef) + { + case foperator: + case finlist: + case fignore: + case vignore: + break; + default: + fvdef = fvnone; + } break; case '\0': + if (objdef == otagseen) + { + make_C_tag (TRUE); /* an Objective C class */ + objdef = oignore; + } /* If a macro spans multiple lines don't reset its state. */ if (quotednl) - CNL_SAVE_DEFINEDEF; + CNL_SAVE_DEFINEDEF (); else - CNL; + CNL (); break; } /* switch (c) */ @@ -2369,23 +3290,39 @@ C_entries (c_ext, inf) * Process either a C++ file or a C file depending on the setting * of a global flag. */ -void +static void default_C_entries (inf) FILE *inf; { C_entries (cplusplus ? C_PLPL : 0, inf); } +/* Always do plain ANSI C. */ +static void +plain_C_entries (inf) + FILE *inf; +{ + C_entries (0, inf); +} + /* Always do C++. */ -void +static void Cplusplus_entries (inf) FILE *inf; { C_entries (C_PLPL, inf); } +/* Always do Java. */ +static void +Cjava_entries (inf) + FILE *inf; +{ + C_entries (C_JAVA, inf); +} + /* Always do C*. */ -void +static void Cstar_entries (inf) FILE *inf; { @@ -2393,26 +3330,54 @@ Cstar_entries (inf) } /* Always do Yacc. */ -void +static void Yacc_entries (inf) FILE *inf; { C_entries (YACC, inf); } +/* A useful macro. */ +#define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \ + for (lineno = charno = 0; /* loop initialization */ \ + !feof (file_pointer) /* loop test */ \ + && (lineno++, /* instructions at start of loop */ \ + linecharno = charno, \ + charno += readline (&line_buffer, file_pointer), \ + char_pointer = lb.buffer, \ + TRUE); \ + ) + + +/* + * Read a file, but do no processing. This is used to do regexp + * matching on files that have no language defined. + */ +static void +just_read_file (inf) + FILE *inf; +{ + register char *dummy; + + LOOP_ON_INPUT_LINES (inf, lb, dummy) + continue; +} + /* Fortran parsing */ -char *dbp; +static bool tail P_((char *)); +static void takeprec P_((void)); +static void getit P_((FILE *)); -logical +static bool tail (cp) char *cp; { register int len = 0; - while (*cp && (*cp | ' ') == (dbp[len] | ' ')) + while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len])) cp++, len++; - if (*cp == 0) + if (*cp == '\0' && !intoken (dbp[len])) { dbp += len; return TRUE; @@ -2420,18 +3385,19 @@ tail (cp) return FALSE; } -void +static void takeprec () { - while (isspace (*dbp)) - dbp++; + dbp = skip_spaces (dbp); if (*dbp != '*') return; dbp++; - while (isspace (*dbp)) - dbp++; - if (tail ("(*)")) - return; + dbp = skip_spaces (dbp); + if (strneq (dbp, "(*)", 3)) + { + dbp += 3; + return; + } if (!isdigit (*dbp)) { --dbp; /* force failure */ @@ -2442,14 +3408,13 @@ takeprec () while (isdigit (*dbp)); } -void +static void getit (inf) FILE *inf; { register char *cp; - while (isspace (*dbp)) - dbp++; + dbp = skip_spaces (dbp); if (*dbp == '\0') { lineno++; @@ -2459,42 +3424,29 @@ getit (inf) if (dbp[5] != '&') return; dbp += 6; - while (isspace (*dbp)) - dbp++; + dbp = skip_spaces (dbp); } - if (!isalpha (*dbp) - && *dbp != '_' - && *dbp != '$') + if (!isalpha (*dbp) && *dbp != '_' && *dbp != '$') return; - for (cp = dbp + 1; - (*cp - && (isalpha (*cp) || isdigit (*cp) || (*cp == '_') || (*cp == '$'))); - cp++) + for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++) continue; - pfnote (savenstr (dbp, cp-dbp), TRUE, FALSE, lb.buffer, - cp - lb.buffer + 1, lineno, linecharno); + pfnote (savenstr (dbp, cp-dbp), TRUE, + lb.buffer, cp - lb.buffer + 1, lineno, linecharno); } -void + +static void Fortran_functions (inf) FILE *inf; { - lineno = 0; - charno = 0; - - while (!feof (inf)) + LOOP_ON_INPUT_LINES (inf, lb, dbp) { - lineno++; - linecharno = charno; - charno += readline (&lb, inf); - dbp = lb.buffer; if (*dbp == '%') dbp++; /* Ratfor escape to fortran */ - while (isspace (*dbp)) - dbp++; - if (*dbp == 0) + dbp = skip_spaces (dbp); + if (*dbp == '\0') continue; - switch (*dbp | ' ') + switch (lowcase (*dbp)) { case 'i': if (tail ("integer")) @@ -2515,9 +3467,8 @@ Fortran_functions (inf) case 'd': if (tail ("double")) { - while (isspace (*dbp)) - dbp++; - if (*dbp == 0) + dbp = skip_spaces (dbp); + if (*dbp == '\0') continue; if (tail ("precision")) break; @@ -2525,11 +3476,10 @@ Fortran_functions (inf) } break; } - while (isspace (*dbp)) - dbp++; - if (*dbp == 0) + dbp = skip_spaces (dbp); + if (*dbp == '\0') continue; - switch (*dbp | ' ') + switch (lowcase (*dbp)) { case 'f': if (tail ("function")) @@ -2543,40 +3493,202 @@ Fortran_functions (inf) if (tail ("entry")) getit (inf); continue; - case 'p': - if (tail ("program")) + case 'b': + if (tail ("blockdata") || tail ("block data")) { - getit (inf); - continue; + dbp = skip_spaces (dbp); + if (*dbp == '\0') /* assume un-named */ + pfnote (savestr ("blockdata"), TRUE, + lb.buffer, dbp - lb.buffer, lineno, linecharno); + else + getit (inf); /* look for name */ } - if (tail ("procedure")) - getit (inf); continue; } } } +/* + * Philippe Waroquiers , 1998-04-24 + * Ada parsing + */ + +static void adagetit P_((FILE *, char *)); + +/* Once we are positioned after an "interesting" keyword, let's get + the real tag value necessary. */ +static void +adagetit (inf, name_qualifier) + FILE *inf; + char *name_qualifier; +{ + register char *cp; + char *name; + char c; + + while (!feof (inf)) + { + dbp = skip_spaces (dbp); + if (*dbp == '\0' + || (dbp[0] == '-' && dbp[1] == '-')) + { + lineno++; + linecharno = charno; + charno += readline (&lb, inf); + dbp = lb.buffer; + } + switch (*dbp) + { + case 'b': + case 'B': + if (tail ("body")) + { + /* Skipping body of procedure body or package body or .... + resetting qualifier to body instead of spec. */ + name_qualifier = "/b"; + continue; + } + break; + case 't': + case 'T': + /* Skipping type of task type or protected type ... */ + if (tail ("type")) + continue; + break; + } + if (*dbp == '"') + { + dbp += 1; + for (cp = dbp; *cp != '\0' && *cp != '"'; cp++) + continue; + } + else + { + dbp = skip_spaces (dbp); + for (cp = dbp; + (*cp != '\0' + && (isalpha (*cp) || isdigit (*cp) || *cp == '_' || *cp == '.')); + cp++) + continue; + if (cp == dbp) + return; + } + c = *cp; + *cp = '\0'; + name = concat (dbp, name_qualifier, ""); + *cp = c; + pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno); + if (c == '"') + dbp = cp + 1; + return; + } +} + +static void +Ada_funcs (inf) + FILE *inf; +{ + bool inquote = FALSE; + + LOOP_ON_INPUT_LINES (inf, lb, dbp) + { + while (*dbp != '\0') + { + /* Skip a string i.e. "abcd". */ + if (inquote || (*dbp == '"')) + { + dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"'); + if (dbp != NULL) + { + inquote = FALSE; + dbp += 1; + continue; /* advance char */ + } + else + { + inquote = TRUE; + break; /* advance line */ + } + } + + /* Skip comments. */ + if (dbp[0] == '-' && dbp[1] == '-') + break; /* advance line */ + + /* Skip character enclosed in single quote i.e. 'a' + and skip single quote starting an attribute i.e. 'Image. */ + if (*dbp == '\'') + { + dbp++ ; + if (*dbp != '\0') + dbp++; + continue; + } + + /* Search for beginning of a token. */ + if (!begtoken (*dbp)) + { + dbp++; + continue; /* advance char */ + } + + /* We are at the beginning of a token. */ + switch (*dbp) + { + case 'f': + case 'F': + if (!packages_only && tail ("function")) + adagetit (inf, "/f"); + else + break; /* from switch */ + continue; /* advance char */ + case 'p': + case 'P': + if (!packages_only && tail ("procedure")) + adagetit (inf, "/p"); + else if (tail ("package")) + adagetit (inf, "/s"); + else if (tail ("protected")) /* protected type */ + adagetit (inf, "/t"); + else + break; /* from switch */ + continue; /* advance char */ + case 't': + case 'T': + if (!packages_only && tail ("task")) + adagetit (inf, "/k"); + else if (typedefs && !packages_only && tail ("type")) + { + adagetit (inf, "/t"); + while (*dbp != '\0') + dbp += 1; + } + else + break; /* from switch */ + continue; /* advance char */ + } + + /* Look for the end of the token. */ + while (!endtoken (*dbp)) + dbp++; + + } /* advance char */ + } /* advance line */ +} + /* * Bob Weiner, Motorola Inc., 4/3/94 * Unix and microcontroller assembly tag handling * look for '^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]' */ -void +static void Asm_labels (inf) FILE *inf; { register char *cp; - lineno = 0; - charno = 0; - - while (!feof (inf)) + LOOP_ON_INPUT_LINES (inf, lb, cp) { - lineno++; - linecharno = charno; - charno += readline (&lb, inf); - cp = lb.buffer; - /* If first char is alphabetic or one of [_.$], test for colon following identifier. */ if (isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$') @@ -2585,25 +3697,154 @@ Asm_labels (inf) cp++; while (isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$') cp++; - if (*cp == ':' || isspace (*cp)) + if (*cp == ':' || iswhite (*cp)) { /* Found end of label, so copy it and add it to the table. */ - pfnote (savenstr (lb.buffer, cp-lb.buffer), TRUE, FALSE, + pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno); } } } } -/* Added by Mosur Mohan, 4/22/88 */ -/* Pascal parsing */ +/* + * Perl support by Bart Robinson + * enhanced by Michael Ernst + * Perl sub names: look for /^sub[ \t\n]+[^ \t\n{]+/ + * Perl variable names: /^(my|local).../ + */ +static void +Perl_functions (inf) + FILE *inf; +{ + register char *cp; + + LOOP_ON_INPUT_LINES (inf, lb, cp) + { + if (*cp++ == 's' + && *cp++ == 'u' + && *cp++ == 'b' && iswhite (*cp++)) + { + cp = skip_spaces (cp); + if (*cp != '\0') + { + char *sp = cp; + while (*cp != '\0' + && !iswhite (*cp) && *cp != '{' && *cp != '(') + cp++; + pfnote (savenstr (sp, cp-sp), TRUE, + lb.buffer, cp - lb.buffer + 1, lineno, linecharno); + } + } + else if (globals /* only if tagging global vars is enabled */ + && ((cp = lb.buffer, + *cp++ == 'm' + && *cp++ == 'y') + || (cp = lb.buffer, + *cp++ == 'l' + && *cp++ == 'o' + && *cp++ == 'c' + && *cp++ == 'a' + && *cp++ == 'l')) + && (*cp == '(' || iswhite (*cp))) + { + /* After "my" or "local", but before any following paren or space. */ + char *varname = NULL; + + cp = skip_spaces (cp); + if (*cp == '$' || *cp == '@' || *cp == '%') + { + char* varstart = ++cp; + while (isalnum (*cp) || *cp == '_') + cp++; + varname = savenstr (varstart, cp-varstart); + } + else + { + /* Should be examining a variable list at this point; + could insist on seeing an open parenthesis. */ + while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')') + cp++; + } + + /* Perhaps I should back cp up one character, so the TAGS table + doesn't mention (and so depend upon) the following char. */ + pfnote ((CTAGS) ? savenstr (lb.buffer, cp-lb.buffer) : varname, + FALSE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno); + } + } +} + +/* + * Python support by Eric S. Raymond + * Look for /^def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/ + */ +static void +Python_functions (inf) + FILE *inf; +{ + register char *cp; + + LOOP_ON_INPUT_LINES (inf, lb, cp) + { + if (*cp++ == 'd' + && *cp++ == 'e' + && *cp++ == 'f' && iswhite (*cp++)) + { + cp = skip_spaces (cp); + while (*cp != '\0' && !iswhite (*cp) && *cp != '(' && *cp != ':') + cp++; + pfnote (NULL, TRUE, + lb.buffer, cp - lb.buffer + 1, lineno, linecharno); + } + + cp = lb.buffer; + if (*cp++ == 'c' + && *cp++ == 'l' + && *cp++ == 'a' + && *cp++ == 's' + && *cp++ == 's' && iswhite (*cp++)) + { + cp = skip_spaces (cp); + while (*cp != '\0' && !iswhite (*cp) && *cp != '(' && *cp != ':') + cp++; + pfnote (NULL, TRUE, + lb.buffer, cp - lb.buffer + 1, lineno, linecharno); + } + } +} + +/* Idea by Corny de Souza + * Cobol tag functions + * We could look for anything that could be a paragraph name. + * i.e. anything that starts in column 8 is one word and ends in a full stop. + */ +static void +Cobol_paragraphs (inf) + FILE *inf; +{ + register char *bp, *ep; + + LOOP_ON_INPUT_LINES (inf, lb, bp) + { + if (lb.len < 9) + continue; + bp += 8; + + /* If eoln, compiler option or comment ignore whole line. */ + if (bp[-1] != ' ' || !isalnum (bp[0])) + continue; -#define GET_NEW_LINE \ -{ \ - linecharno = charno; lineno++; \ - charno += 1 + readline (&lb, inf); \ - dbp = lb.buffer; \ + for (ep = bp; isalnum (*ep) || *ep == '-'; ep++) + continue; + if (*ep++ == '.') + pfnote (savenstr (bp, ep-bp), TRUE, + lb.buffer, ep - lb.buffer + 1, lineno, linecharno); + } } + +/* Added by Mosur Mohan, 4/22/88 */ +/* Pascal parsing */ /* * Locates tags for procedures & functions. Doesn't do any type- or @@ -2611,32 +3852,33 @@ Asm_labels (inf) * "forward" immediately following the procedure statement; if found, * the tag is skipped. */ -void +static void Pascal_functions (inf) FILE *inf; { - struct linebuffer tline; /* mostly copied from C_entries */ + linebuffer tline; /* mostly copied from C_entries */ long save_lcno; - int save_lineno; - char c, *cp; - char *nambuf; + int save_lineno, save_len; + char c, *cp, *namebuf; - logical /* each of these flags is TRUE iff: */ + bool /* each of these flags is TRUE iff: */ incomment, /* point is inside a comment */ inquote, /* point is inside '..' string */ - get_tagname, /* point is after PROCEDURE/FUNCTION */ - /* keyword, so next item = potential tag */ + get_tagname, /* point is after PROCEDURE/FUNCTION + keyword, so next item = potential tag */ found_tag, /* point is after a potential tag */ inparms, /* point is within parameter-list */ - verify_tag; /* point has passed the parm-list, so the */ - /* next token will determine whether */ - /* this is a FORWARD/EXTERN to be */ - /* ignored, or whether it is a real tag */ + verify_tag; /* point has passed the parm-list, so the + next token will determine whether this + is a FORWARD/EXTERN to be ignored, or + whether it is a real tag */ + save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */ + namebuf = NULL; /* keep compiler quiet */ lineno = 0; charno = 0; dbp = lb.buffer; - *dbp = 0; + *dbp = '\0'; initbuffer (&tline); incomment = inquote = FALSE; @@ -2645,26 +3887,29 @@ Pascal_functions (inf) inparms = FALSE; /* found '(' after "proc" */ verify_tag = FALSE; /* check if "extern" is ahead */ - /* long main loop to get next char */ - while (!feof (inf)) + + while (!feof (inf)) /* long main loop to get next char */ { c = *dbp++; if (c == '\0') /* if end of line */ { - GET_NEW_LINE; + lineno++; + linecharno = charno; + charno += readline (&lb, inf); + dbp = lb.buffer; if (*dbp == '\0') continue; - if (!((found_tag && verify_tag) || - get_tagname)) - c = *dbp++; /* only if don't need *dbp pointing */ - /* to the beginning of the name of */ - /* the procedure or function */ + if (!((found_tag && verify_tag) + || get_tagname)) + c = *dbp++; /* only if don't need *dbp pointing + to the beginning of the name of + the procedure or function */ } if (incomment) { - if (c == '}') /* within { - } comments */ + if (c == '}') /* within { } comments */ incomment = FALSE; - else if (c == '*' && dbp[1] == ')') /* within (* - *) comments */ + else if (c == '*' && *dbp == ')') /* within (* *) comments */ { dbp++; incomment = FALSE; @@ -2683,11 +3928,11 @@ Pascal_functions (inf) case '\'': inquote = TRUE; /* found first quote */ continue; - case '{': /* found open-{-comment */ + case '{': /* found open { comment */ incomment = TRUE; continue; case '(': - if (*dbp == '*') /* found open-(*-comment */ + if (*dbp == '*') /* found open (* comment */ { incomment = TRUE; dbp++; @@ -2700,19 +3945,19 @@ Pascal_functions (inf) inparms = FALSE; continue; case ';': - if ((found_tag) && (!inparms)) /* end of proc or fn stmt */ + if (found_tag && !inparms) /* end of proc or fn stmt */ { verify_tag = TRUE; break; } continue; } - if ((found_tag) && (verify_tag) && (*dbp != ' ')) + if (found_tag && verify_tag && (*dbp != ' ')) { /* check if this is an "extern" declaration */ - if (*dbp == 0) + if (*dbp == '\0') continue; - if ((*dbp == 'e') || (*dbp == 'E')) + if (lowcase (*dbp == 'e')) { if (tail ("extern")) /* superfluous, really! */ { @@ -2720,7 +3965,7 @@ Pascal_functions (inf) verify_tag = FALSE; } } - else if ((*dbp == 'f') || (*dbp == 'F')) + else if (lowcase (*dbp) == 'f') { if (tail ("forward")) /* check for forward reference */ { @@ -2728,31 +3973,32 @@ Pascal_functions (inf) verify_tag = FALSE; } } - if ((found_tag) && (verify_tag)) /* not external proc, so make tag */ + if (found_tag && verify_tag) /* not external proc, so make tag */ { found_tag = FALSE; verify_tag = FALSE; - pfnote (nambuf, TRUE, FALSE, tline.buffer, - cp - tline.buffer + 1, save_lineno, save_lcno); + pfnote (namebuf, TRUE, + tline.buffer, save_len, save_lineno, save_lcno); continue; } } if (get_tagname) /* grab name of proc or fn */ { - if (*dbp == 0) + if (*dbp == '\0') continue; /* save all values for later tagging */ - tline.size = lb.size; + grow_linebuffer (&tline, lb.len + 1); strcpy (tline.buffer, lb.buffer); save_lineno = lineno; save_lcno = linecharno; /* grab block name */ - for (cp = dbp + 1; *cp && (!endtoken (*cp)); cp++) + for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++) continue; - nambuf = savenstr (dbp, cp-dbp); - dbp = cp; /* restore dbp to e-o-token */ + namebuf = savenstr (dbp, cp-dbp); + dbp = cp; /* set dbp to e-o-token */ + save_len = dbp - lb.buffer + 1; get_tagname = FALSE; found_tag = TRUE; continue; @@ -2762,7 +4008,7 @@ Pascal_functions (inf) else if (!incomment && !inquote && !found_tag) { /* check for proc/fn keywords */ - switch (c | ' ') + switch (lowcase (c)) { case 'p': if (tail ("rocedure")) /* c = 'p', dbp has advanced */ @@ -2775,13 +4021,20 @@ Pascal_functions (inf) } } } /* while not eof */ + + free (tline.buffer); } /* * lisp tag functions * look for (def or (DEF, quote or QUOTE */ -int + +static int L_isdef P_((char *)); +static int L_isquote P_((char *)); +static void L_getit P_((void)); + +static int L_isdef (strp) register char *strp; { @@ -2790,63 +4043,57 @@ L_isdef (strp) && (strp[3] == 'f' || strp[3] == 'F')); } -int +static int L_isquote (strp) register char *strp; { - return ((*(++strp) == 'q' || *strp == 'Q') - && (*(++strp) == 'u' || *strp == 'U') - && (*(++strp) == 'o' || *strp == 'O') - && (*(++strp) == 't' || *strp == 'T') - && (*(++strp) == 'e' || *strp == 'E') - && isspace(*(++strp))); + return ((*++strp == 'q' || *strp == 'Q') + && (*++strp == 'u' || *strp == 'U') + && (*++strp == 'o' || *strp == 'O') + && (*++strp == 't' || *strp == 'T') + && (*++strp == 'e' || *strp == 'E') + && iswhite (*++strp)); } -void +static void L_getit () { register char *cp; if (*dbp == '\'') /* Skip prefix quote */ dbp++; - else if (*dbp == '(' && L_isquote (dbp)) /* Skip "(quote " */ + else if (*dbp == '(') { - dbp += 7; - while (isspace(*dbp)) - dbp++; + if (L_isquote (dbp)) + dbp += 7; /* Skip "(quote " */ + else + dbp += 1; /* Skip "(" before name in (defstruct (foo)) */ + dbp = skip_spaces (dbp); } + for (cp = dbp /*+1*/; - *cp && *cp != '(' && *cp != ' ' && *cp != ')'; + *cp != '\0' && *cp != '(' && !iswhite(*cp) && *cp != ')'; cp++) continue; if (cp == dbp) return; - - pfnote (savenstr (dbp, cp-dbp), TRUE, FALSE, lb.buffer, - cp - lb.buffer + 1, lineno, linecharno); + + pfnote (savenstr (dbp, cp-dbp), TRUE, + lb.buffer, cp - lb.buffer + 1, lineno, linecharno); } -void +static void Lisp_functions (inf) FILE *inf; { - lineno = 0; - charno = 0; - - while (!feof (inf)) + LOOP_ON_INPUT_LINES (inf, lb, dbp) { - lineno++; - linecharno = charno; - charno += readline (&lb, inf); - dbp = lb.buffer; if (dbp[0] == '(') { if (L_isdef (dbp)) { - while (!isspace (*dbp)) - dbp++; - while (isspace (*dbp)) - dbp++; + dbp = skip_non_spaces (dbp); + dbp = skip_spaces (dbp); L_getit (); } else @@ -2854,7 +4101,7 @@ Lisp_functions (inf) /* Check for (foo::defmumble name-defined ... */ do dbp++; - while (*dbp && !isspace (*dbp) + while (*dbp != '\0' && !iswhite (*dbp) && *dbp != ':' && *dbp != '(' && *dbp != ')'); if (*dbp == ':') { @@ -2864,10 +4111,8 @@ Lisp_functions (inf) if (L_isdef (dbp - 1)) { - while (!isspace (*dbp)) - dbp++; - while (isspace (*dbp)) - dbp++; + dbp = skip_non_spaces (dbp); + dbp = skip_spaces (dbp); L_getit (); } } @@ -2875,6 +4120,40 @@ Lisp_functions (inf) } } } + +/* + * Postscript tag functions + * Just look for lines where the first character is '/' + * Richard Mlynarik + * Also look at "defineps" for PSWrap + * suggested by Masatake YAMATO + */ +static void +Postscript_functions (inf) + FILE *inf; +{ + register char *bp, *ep; + + LOOP_ON_INPUT_LINES (inf, lb, bp) + { + if (bp[0] == '/') + { + for (ep = bp+1; + *ep != '\0' && *ep != ' ' && *ep != '{'; + ep++) + continue; + pfnote (savenstr (bp, ep-bp), TRUE, + lb.buffer, ep - lb.buffer + 1, lineno, linecharno); + } + else if (strneq (bp, "defineps", 8)) + { + bp = skip_non_spaces (bp); + bp = skip_spaces (bp); + get_tag (bp); + } + } +} + /* * Scheme tag functions @@ -2884,65 +4163,38 @@ Lisp_functions (inf) * look for (set! xyzzy */ -void get_scheme (); - -void +static void Scheme_functions (inf) FILE *inf; { - lineno = 0; - charno = 0; + register char *bp; - while (!feof (inf)) + LOOP_ON_INPUT_LINES (inf, lb, bp) { - lineno++; - linecharno = charno; - charno += readline (&lb, inf); - dbp = lb.buffer; - if (dbp[0] == '(' && - (dbp[1] == 'D' || dbp[1] == 'd') && - (dbp[2] == 'E' || dbp[2] == 'e') && - (dbp[3] == 'F' || dbp[3] == 'f')) + if (bp[0] == '(' + && (bp[1] == 'D' || bp[1] == 'd') + && (bp[2] == 'E' || bp[2] == 'e') + && (bp[3] == 'F' || bp[3] == 'f')) { - while (!isspace (*dbp)) - dbp++; + bp = skip_non_spaces (bp); /* Skip over open parens and white space */ - while (*dbp && (isspace (*dbp) || *dbp == '(')) - dbp++; - get_scheme (); + while (iswhite (*bp) || *bp == '(') + bp++; + get_tag (bp); } - if (dbp[0] == '(' && - (dbp[1] == 'S' || dbp[1] == 's') && - (dbp[2] == 'E' || dbp[2] == 'e') && - (dbp[3] == 'T' || dbp[3] == 't') && - (dbp[4] == '!' || dbp[4] == '!') && - (isspace (dbp[5]))) + if (bp[0] == '(' + && (bp[1] == 'S' || bp[1] == 's') + && (bp[2] == 'E' || bp[2] == 'e') + && (bp[3] == 'T' || bp[3] == 't') + && (bp[4] == '!' || bp[4] == '!') + && (iswhite (bp[5]))) { - while (!isspace (*dbp)) - dbp++; - /* Skip over white space */ - while (isspace (*dbp)) - dbp++; - get_scheme (); + bp = skip_non_spaces (bp); + bp = skip_spaces (bp); + get_tag (bp); } } } - -void -get_scheme () -{ - register char *cp; - - if (*dbp == 0) - return; - /* Go till you get to white space or a syntactic break */ - for (cp = dbp + 1; - *cp && *cp != '(' && *cp != ')' && !isspace (*cp); - cp++) - continue; - pfnote (savenstr (dbp, cp-dbp), TRUE, FALSE, - lb.buffer, cp - lb.buffer + 1, lineno, linecharno); -} /* Find tags in TeX and LaTeX input files. */ @@ -2961,12 +4213,12 @@ struct TEX_tabent *TEX_toktab = NULL; /* Table with tag tokens */ The value of environment var TEXTAGS is prepended to this. */ char *TEX_defenv = "\ -:chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem:typeout"; +:chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\ +:part:appendix:entry:index"; -void TEX_mode (); -struct TEX_tabent *TEX_decode_env (); -void TEX_getit (); -int TEX_Token (); +static void TEX_mode P_((FILE *)); +static struct TEX_tabent *TEX_decode_env P_((char *, char *)); +static int TEX_Token P_((char *)); char TEX_esc = '\\'; char TEX_opgrp = '{'; @@ -2975,14 +4227,12 @@ char TEX_clgrp = '}'; /* * TeX/LaTeX scanning loop. */ -void +static void TeX_functions (inf) FILE *inf; { - char *lasthit; - - lineno = 0; - charno = 0; + char *cp, *lasthit; + register int i; /* Select either \ or ! as escape character. */ TEX_mode (inf); @@ -2991,26 +4241,28 @@ TeX_functions (inf) if (!TEX_toktab) TEX_toktab = TEX_decode_env ("TEXTAGS", TEX_defenv); - while (!feof (inf)) - { /* Scan each line in file */ - lineno++; - linecharno = charno; - charno += readline (&lb, inf); - dbp = lb.buffer; - lasthit = dbp; - while (dbp = etags_strchr (dbp, TEX_esc)) /* Look at each esc in line */ + LOOP_ON_INPUT_LINES (inf, lb, cp) + { + lasthit = cp; + /* Look at each esc in line. */ + while ((cp = etags_strchr (cp, TEX_esc)) != NULL) { - register int i; - - if (!*(++dbp)) + if (*++cp == '\0') break; - linecharno += dbp - lasthit; - lasthit = dbp; + linecharno += cp - lasthit; + lasthit = cp; i = TEX_Token (lasthit); - if (0 <= i) + if (i >= 0) { - TEX_getit (lasthit, TEX_toktab[i].len); - break; /* We only save a line once */ + /* We seem to include the TeX command in the tag name. + register char *p; + for (p = lasthit + TEX_toktab[i].len; + *p != '\0' && *p != TEX_clgrp; + p++) + continue; */ + pfnote (/*savenstr (lasthit, p-lasthit)*/ (char *)NULL, TRUE, + lb.buffer, lb.len, lineno, linecharno); + break; /* We only tag a line once */ } } } @@ -3022,7 +4274,7 @@ TeX_functions (inf) /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping chars accordingly. */ -void +static void TEX_mode (inf) FILE *inf; { @@ -3050,12 +4302,14 @@ TEX_mode (inf) TEX_opgrp = '<'; TEX_clgrp = '>'; } + /* If the input file is compressed, inf is a pipe, and rewind may fail. + No attempt is made to correct the situation. */ rewind (inf); } /* Read environment and prepend it to the default string. Build token table. */ -struct TEX_tabent * +static struct TEX_tabent * TEX_decode_env (evarname, defenv) char *evarname; char *defenv; @@ -3070,11 +4324,14 @@ TEX_decode_env (evarname, defenv) if (!env) env = defenv; else - env = concat (env, defenv, ""); + { + char *oldenv = env; + env = concat (oldenv, defenv, ""); + } /* Allocate a token table */ for (size = 1, p = env; p;) - if ((p = etags_strchr (p, ':')) && *(++p)) + if ((p = etags_strchr (p, ':')) && *++p != '\0') size++; /* Add 1 to leave room for null terminator. */ tab = xnew (size + 1, struct TEX_tabent); @@ -3104,32 +4361,12 @@ TEX_decode_env (evarname, defenv) return tab; } -/* Record a tag defined by a TeX command of length LEN and starting at NAME. - The name being defined actually starts at (NAME + LEN + 1). - But we seem to include the TeX command in the tag name. */ -void -TEX_getit (name, len) - char *name; - int len; -{ - char *p = name + len; - - if (*name == 0) - return; - - /* Let tag name extend to next group close (or end of line) */ - while (*p && *p != TEX_clgrp) - p++; - pfnote (savenstr (name, p-name), TRUE, FALSE, lb.buffer, - strlen (lb.buffer), lineno, linecharno); -} - /* If the text at CP matches one of the tag-defining TeX command names, return the pointer to the first occurrence of that command in TEX_toktab. Otherwise return -1. - Keep the capital `T' in `Token' for dumb truncating compilers + Keep the capital `T' in `token' for dumb truncating compilers (this distinguishes it from `TEX_toktab' */ -int +static int TEX_Token (cp) char *cp; { @@ -3141,119 +4378,368 @@ TEX_Token (cp) return -1; } -/* Support for Prolog. */ +/* + * Prolog support (rewritten) by Anders Lindgren, Mar. 96 + * + * Assumes that the predicate starts at column 0. + * Only the first clause of a predicate is added. + */ +static int prolog_pred P_((char *, char *)); +static void prolog_skip_comment P_((linebuffer *, FILE *)); +static int prolog_atom P_((char *, int)); -/* Whole head (not only functor, but also arguments) - is gotten in compound term. */ -void -prolog_getit (s) - char *s; +static void +Prolog_functions (inf) + FILE *inf; { - char *save_s; - int insquote, npar; + char *cp, *last; + int len; + int allocated; - save_s = s; - insquote = FALSE; - npar = 0; - while (1) + allocated = 0; + len = 0; + last = NULL; + + LOOP_ON_INPUT_LINES (inf, lb, cp) { - if (s[0] == '\0') /* syntax error. */ - return; - else if (insquote && s[0] == '\'' && s[1] == '\'') - s += 2; - else if (s[0] == '\'') - { - insquote = !insquote; - s++; - } - else if (!insquote && s[0] == '(') - { - npar++; - s++; - } - else if (!insquote && s[0] == ')') + if (cp[0] == '\0') /* Empty line */ + continue; + else if (iswhite (cp[0])) /* Not a predicate */ + continue; + else if (cp[0] == '/' && cp[1] == '*') /* comment. */ + prolog_skip_comment (&lb, inf); + else if ((len = prolog_pred (cp, last)) > 0) { - npar--; - s++; - if (npar == 0) - break; - else if (npar < 0) /* syntax error. */ - return; + /* Predicate. Store the function name so that we only + generate a tag for the first clause. */ + if (last == NULL) + last = xnew(len + 1, char); + else if (len + 1 > allocated) + last = xrnew (last, len + 1, char); + allocated = len + 1; + strncpy (last, cp, len); + last[len] = '\0'; } - else if (!insquote && s[0] == '.' - && (isspace (s[1]) || s[1] == '\0')) - { /* fullstop. */ - if (npar != 0) /* syntax error. */ - return; - s++; - break; - } - else - s++; } - pfnote (savenstr (save_s, s-save_s), TRUE, FALSE, - save_s, s-save_s, lineno, linecharno); } -/* It is assumed that prolog predicate starts from column 0. */ -void -Prolog_functions (inf) + +static void +prolog_skip_comment (plb, inf) + linebuffer *plb; FILE *inf; { - void skip_comment (), prolog_getit (); + char *cp; - lineno = linecharno = charno = 0; - while (!feof (inf)) + do { + for (cp = plb->buffer; *cp != '\0'; cp++) + if (cp[0] == '*' && cp[1] == '/') + return; lineno++; - linecharno += charno; - charno = readline (&lb, inf) + 1; /* 1 for newline. */ - dbp = lb.buffer; - if (isspace (dbp[0])) /* not predicate header. */ + linecharno += readline (plb, inf); + } + while (!feof(inf)); +} + +/* + * A predicate definition is added if it matches: + * ( + * + * It is added to the tags database if it doesn't match the + * name of the previous clause header. + * + * Return the size of the name of the predicate, or 0 if no header + * was found. + */ +static int +prolog_pred (s, last) + char *s; + char *last; /* Name of last clause. */ +{ + int pos; + int len; + + pos = prolog_atom (s, 0); + if (pos < 1) + return 0; + + len = pos; + pos = skip_spaces (s + pos) - s; + + if ((s[pos] == '(') || (s[pos] == '.')) + { + if (s[pos] == '(') + pos++; + + /* Save only the first clause. */ + if (last == NULL + || len != (int)strlen (last) + || !strneq (s, last, len)) + { + pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno); + return len; + } + } + return 0; +} + +/* + * Consume a Prolog atom. + * Return the number of bytes consumed, or -1 if there was an error. + * + * A prolog atom, in this context, could be one of: + * - An alphanumeric sequence, starting with a lower case letter. + * - A quoted arbitrary string. Single quotes can escape themselves. + * Backslash quotes everything. + */ +static int +prolog_atom (s, pos) + char *s; + int pos; +{ + int origpos; + + origpos = pos; + + if (islower(s[pos]) || (s[pos] == '_')) + { + /* The atom is unquoted. */ + pos++; + while (isalnum(s[pos]) || (s[pos] == '_')) + { + pos++; + } + return pos - origpos; + } + else if (s[pos] == '\'') + { + pos++; + + while (1) + { + if (s[pos] == '\'') + { + pos++; + if (s[pos] != '\'') + break; + pos++; /* A double quote */ + } + else if (s[pos] == '\0') + /* Multiline quoted atoms are ignored. */ + return -1; + else if (s[pos] == '\\') + { + if (s[pos+1] == '\0') + return -1; + pos += 2; + } + else + pos++; + } + return pos - origpos; + } + else + return -1; +} + +/* + * Support for Erlang -- Anders Lindgren, Feb 1996. + * + * Generates tags for functions, defines, and records. + * + * Assumes that Erlang functions start at column 0. + */ +static int erlang_func P_((char *, char *)); +static void erlang_attribute P_((char *)); +static int erlang_atom P_((char *, int)); + +static void +Erlang_functions (inf) + FILE *inf; +{ + char *cp, *last; + int len; + int allocated; + + allocated = 0; + len = 0; + last = NULL; + + LOOP_ON_INPUT_LINES (inf, lb, cp) + { + if (cp[0] == '\0') /* Empty line */ + continue; + else if (iswhite (cp[0])) /* Not function nor attribute */ continue; - else if (dbp[0] == '%') /* comment. */ + else if (cp[0] == '%') /* comment */ continue; - else if (dbp[0] == '/' && dbp[1] == '*') /* comment. */ - skip_comment (&lb, inf, &lineno, &linecharno); - else /* found. */ - prolog_getit (dbp); + else if (cp[0] == '"') /* Sometimes, strings start in column one */ + continue; + else if (cp[0] == '-') /* attribute, e.g. "-define" */ + { + erlang_attribute (cp); + last = NULL; + } + else if ((len = erlang_func (cp, last)) > 0) + { + /* + * Function. Store the function name so that we only + * generates a tag for the first clause. + */ + if (last == NULL) + last = xnew (len + 1, char); + else if (len + 1 > allocated) + last = xrnew (last, len + 1, char); + allocated = len + 1; + strncpy (last, cp, len); + last[len] = '\0'; + } } } -void -skip_comment (plb, inf, plineno, plinecharno) - struct linebuffer *plb; - FILE *inf; - int *plineno; /* result */ - long *plinecharno; /* result */ + +/* + * A function definition is added if it matches: + * ( + * + * It is added to the tags database if it doesn't match the + * name of the previous clause header. + * + * Return the size of the name of the function, or 0 if no function + * was found. + */ +static int +erlang_func (s, last) + char *s; + char *last; /* Name of last clause. */ { - char *cp; + int pos; + int len; - do + pos = erlang_atom (s, 0); + if (pos < 1) + return 0; + + len = pos; + pos = skip_spaces (s + pos) - s; + + /* Save only the first clause. */ + if (s[pos++] == '(' + && (last == NULL + || len != (int)strlen (last) + || !strneq (s, last, len))) + { + pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno); + return len; + } + + return 0; +} + + +/* + * Handle attributes. Currently, tags are generated for defines + * and records. + * + * They are on the form: + * -define(foo, bar). + * -define(Foo(M, N), M+N). + * -record(graph, {vtab = notable, cyclic = true}). + */ +static void +erlang_attribute (s) + char *s; +{ + int pos; + int len; + + if (strneq (s, "-define", 7) || strneq (s, "-record", 7)) { - for (cp = plb->buffer; *cp != '\0'; cp++) - if (cp[0] == '*' && cp[1] == '/') - return; - (*plineno)++; - *plinecharno += readline (plb, inf) + 1; /* 1 for newline. */ + pos = skip_spaces (s + 7) - s; + if (s[pos++] == '(') + { + pos = skip_spaces (s + pos) - s; + len = erlang_atom (s, pos); + if (len != 0) + pfnote (savenstr (& s[pos], len), TRUE, + s, pos + len, lineno, linecharno); + } } - while (!feof(inf)); + return; +} + + +/* + * Consume an Erlang atom (or variable). + * Return the number of bytes consumed, or -1 if there was an error. + */ +static int +erlang_atom (s, pos) + char *s; + int pos; +{ + int origpos; + + origpos = pos; + + if (isalpha (s[pos]) || s[pos] == '_') + { + /* The atom is unquoted. */ + pos++; + while (isalnum (s[pos]) || s[pos] == '_') + pos++; + return pos - origpos; + } + else if (s[pos] == '\'') + { + pos++; + + while (1) + { + if (s[pos] == '\'') + { + pos++; + break; + } + else if (s[pos] == '\0') + /* Multiline quoted atoms are ignored. */ + return -1; + else if (s[pos] == '\\') + { + if (s[pos+1] == '\0') + return -1; + pos += 2; + } + else + pos++; + } + return pos - origpos; + } + else + return -1; } #ifdef ETAGS_REGEXPS + +static char *scan_separators P_((char *)); +static void analyse_regex P_((char *, bool)); +static void add_regex P_((char *, bool, language *)); +static char *substitute P_((char *, char *, struct re_registers *)); + /* Take a string like "/blah/" and turn it into "blah", making sure that the first and last characters are the same, and handling - quoted separator characters. Actually, stops on the occurence of + quoted separator characters. Actually, stops on the occurrence of an unquoted separator. Also turns "\t" into a Tab character. Returns pointer to terminating separator. Works in place. Null terminates name string. */ -char * +static char * scan_separators (name) char *name; { char sep = name[0]; char *copyto = name; - logical quoted = FALSE; + bool quoted = FALSE; for (++name; *name != '\0'; ++name) { @@ -3284,29 +4770,90 @@ scan_separators (name) return name; } +/* Look at the argument of --regex or --no-regex and do the right + thing. Same for each line of a regexp file. */ +static void +analyse_regex (regex_arg, ignore_case) + char *regex_arg; + bool ignore_case; +{ + if (regex_arg == NULL) + free_patterns (); /* --no-regex: remove existing regexps */ + + /* A real --regexp option or a line in a regexp file. */ + switch (regex_arg[0]) + { + /* Comments in regexp file or null arg to --regex. */ + case '\0': + case ' ': + case '\t': + break; + + /* Read a regex file. This is recursive and may result in a + loop, which will stop when the file descriptors are exhausted. */ + case '@': + { + FILE *regexfp; + linebuffer regexbuf; + char *regexfile = regex_arg + 1; + + /* regexfile is a file containing regexps, one per line. */ + regexfp = fopen (regexfile, "r"); + if (regexfp == NULL) + { + pfatal (regexfile); + return; + } + initbuffer (®exbuf); + while (readline_internal (®exbuf, regexfp) > 0) + analyse_regex (regexbuf.buffer, ignore_case); + free (regexbuf.buffer); + fclose (regexfp); + } + break; + + /* Regexp to be used for a specific language only. */ + case '{': + { + language *lang; + char *lang_name = regex_arg + 1; + char *cp; + + for (cp = lang_name; *cp != '}'; cp++) + if (*cp == '\0') + { + error ("unterminated language name in regex: %s", regex_arg); + return; + } + *cp = '\0'; + lang = get_language_from_name (lang_name); + if (lang == NULL) + return; + add_regex (cp + 1, ignore_case, lang); + } + break; + + /* Regexp to be used for any language. */ + default: + add_regex (regex_arg, ignore_case, NULL); + break; + } +} + /* Turn a name, which is an ed-style (but Emacs syntax) regular expression, into a real regular expression by compiling it. */ -void -add_regex (regexp_pattern) +static void +add_regex (regexp_pattern, ignore_case, lang) char *regexp_pattern; + bool ignore_case; + language *lang; { char *name; const char *err; struct re_pattern_buffer *patbuf; + pattern *pp; - if (regexp_pattern == NULL) - { - /* Remove existing regexps. */ - num_patterns = 0; - patterns = NULL; - return; - } - if (regexp_pattern[0] == '\0') - { - error ("missing regexp", 0); - return; - } if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0]) { error ("%s: unterminated regexp", regexp_pattern); @@ -3315,13 +4862,14 @@ add_regex (regexp_pattern) name = scan_separators (regexp_pattern); if (regexp_pattern[0] == '\0') { - error ("null regexp", 0); + error ("null regexp", (char *)NULL); return; } (void) scan_separators (name); patbuf = xnew (1, struct re_pattern_buffer); - patbuf->translate = NULL; + /* Translation table to fold case if appropriate. */ + patbuf->translate = (ignore_case) ? lc_trans : NULL; patbuf->fastmap = NULL; patbuf->buffer = NULL; patbuf->allocated = 0; @@ -3333,130 +4881,148 @@ add_regex (regexp_pattern) return; } - num_patterns += 1; - if (num_patterns == 1) - patterns = xnew (1, struct pattern); - else - patterns = ((struct pattern *) - xrealloc (patterns, - (num_patterns * sizeof (struct pattern)))); - patterns[num_patterns - 1].pattern = patbuf; - patterns[num_patterns - 1].name_pattern = savestr (name); - patterns[num_patterns - 1].error_signaled = FALSE; + pp = p_head; + p_head = xnew (1, pattern); + p_head->regex = savestr (regexp_pattern); + p_head->p_next = pp; + p_head->language = lang; + p_head->pattern = patbuf; + p_head->name_pattern = savestr (name); + p_head->error_signaled = FALSE; } /* - * Do the subtitutions indicated by the regular expression and + * Do the substitutions indicated by the regular expression and * arguments. */ -char * +static char * substitute (in, out, regs) char *in, *out; struct re_registers *regs; { - char *result = NULL, *t; - int size = 0; - - /* Pass 1: figure out how much size to allocate. */ - for (t = out; *t; ++t) - { - if (*t == '\\') - { - ++t; - if (!*t) - { - fprintf (stderr, "%s: pattern subtitution ends prematurely\n", - progname); - return NULL; - } - if (isdigit (*t)) - { - int dig = *t - '0'; - size += regs->end[dig] - regs->start[dig]; - } - } - } + char *result, *t; + int size, dig, diglen; + + result = NULL; + size = strlen (out); + + /* Pass 1: figure out how much to allocate by finding all \N strings. */ + if (out[size - 1] == '\\') + fatal ("pattern error in \"%s\"", out); + for (t = etags_strchr (out, '\\'); + t != NULL; + t = etags_strchr (t + 2, '\\')) + if (isdigit (t[1])) + { + dig = t[1] - '0'; + diglen = regs->end[dig] - regs->start[dig]; + size += diglen - 2; + } + else + size -= 1; /* Allocate space and do the substitutions. */ result = xnew (size + 1, char); - size = 0; - for (; *out; ++out) - { - if (*out == '\\') - { - ++out; - if (isdigit (*out)) - { - /* Using "dig2" satisfies my debugger. Bleah. */ - int dig2 = *out - '0'; - strncpy (result + size, in + regs->start[dig2], - regs->end[dig2] - regs->start[dig2]); - size += regs->end[dig2] - regs->start[dig2]; - } - else - { - switch (*out) - { - case '\t': - result[size++] = '\t'; - break; - case '\\': - *out = '\\'; - break; - default: - result[size++] = *out; - break; - } - } - } - else - result[size++] = *out; - } - result[size] = '\0'; + + for (t = result; *out != '\0'; out++) + if (*out == '\\' && isdigit (*++out)) + { + /* Using "dig2" satisfies my debugger. Bleah. */ + dig = *out - '0'; + diglen = regs->end[dig] - regs->start[dig]; + strncpy (t, in + regs->start[dig], diglen); + t += diglen; + } + else + *t++ = *out; + *t = '\0'; + + if (DEBUG && (t > result + size || t - result != (int)strlen (result))) + abort (); return result; } + +/* Deallocate all patterns. */ +static void +free_patterns () +{ + pattern *pp; + while (p_head != NULL) + { + pp = p_head->p_next; + free (p_head->regex); + free (p_head->name_pattern); + free (p_head); + p_head = pp; + } + return; +} +static void +get_tag (bp) + register char *bp; +{ + register char *cp; + + if (*bp == '\0') + return; + /* Go till you get to white space or a syntactic break */ + for (cp = bp + 1; + *cp != '\0' && *cp != '(' && *cp != ')' && !iswhite (*cp); + cp++) + continue; + pfnote (savenstr (bp, cp-bp), TRUE, + lb.buffer, cp - lb.buffer + 1, lineno, linecharno); +} + #endif /* ETAGS_REGEXPS */ /* Initialize a linebuffer for use */ -void -initbuffer (linebuffer) - struct linebuffer *linebuffer; +static void +initbuffer (lbp) + linebuffer *lbp; { - linebuffer->size = 200; - linebuffer->buffer = xnew (200, char); + lbp->size = 200; + lbp->buffer = xnew (200, char); } /* - * Read a line of text from `stream' into `linebuffer'. - * Return the number of characters read from `stream', - * which is the length of the line including the newline, if any. + * Read a line of text from `stream' into `lbp', excluding the + * newline or CR-NL, if any. Return the number of characters read from + * `stream', which is the length of the line including the newline. + * + * On DOS or Windows we do not count the CR character, if any, before the + * NL, in the returned length; this mirrors the behavior of emacs on those + * platforms (for text files, it translates CR-NL to NL as it reads in the + * file). */ -long -readline_internal (linebuffer, stream) - struct linebuffer *linebuffer; +static long +readline_internal (lbp, stream) + linebuffer *lbp; register FILE *stream; { - char *buffer = linebuffer->buffer; - register char *p = linebuffer->buffer; + char *buffer = lbp->buffer; + register char *p = lbp->buffer; register char *pend; int chars_deleted; - pend = p + linebuffer->size; /* Separate to avoid 386/IX compiler bug. */ + pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */ while (1) { register int c = getc (stream); if (p == pend) { - linebuffer->size *= 2; - buffer = (char *) xrealloc (buffer, linebuffer->size); - p += buffer - linebuffer->buffer; - pend = buffer + linebuffer->size; - linebuffer->buffer = buffer; + /* We're at the end of linebuffer: expand it. */ + lbp->size *= 2; + buffer = xrnew (buffer, lbp->size, char); + p += buffer - lbp->buffer; + pend = buffer + lbp->size; + lbp->buffer = buffer; } if (c == EOF) { + *p = '\0'; chars_deleted = 0; break; } @@ -3464,102 +5030,98 @@ readline_internal (linebuffer, stream) { if (p > buffer && p[-1] == '\r') { - *--p = '\0'; + p -= 1; +#ifdef DOS_NT + /* Assume CRLF->LF translation will be performed by Emacs + when loading this file, so CRs won't appear in the buffer. + It would be cleaner to compensate within Emacs; + however, Emacs does not know how many CRs were deleted + before any given point in the file. */ + chars_deleted = 1; +#else chars_deleted = 2; +#endif } else { - *p = '\0'; chars_deleted = 1; } + *p = '\0'; break; } *p++ = c; } + lbp->len = p - buffer; - return p - buffer + chars_deleted; + return lbp->len + chars_deleted; } /* - * Like readline_internal, above, but try to match the input - * line against any existing regular expressions. + * Like readline_internal, above, but in addition try to match the + * input line against relevant regular expressions. */ -long -readline (linebuffer, stream) - struct linebuffer *linebuffer; +static long +readline (lbp, stream) + linebuffer *lbp; FILE *stream; { /* Read new line. */ - int i; - long result = readline_internal (linebuffer, stream); - + long result = readline_internal (lbp, stream); #ifdef ETAGS_REGEXPS - /* Match against all listed patterns. */ - for (i = 0; i < num_patterns; ++i) - { - int match = re_match (patterns[i].pattern, linebuffer->buffer, - (int)result, 0, &patterns[i].regs); - switch (match) - { - case -2: - /* Some error. */ - if (!patterns[i].error_signaled) - { - error ("error while matching pattern %d", i); - patterns[i].error_signaled = TRUE; - } - break; - case -1: - /* No match. */ - break; - default: - /* Match occurred. Construct a tag. */ - if (patterns[i].name_pattern[0] != '\0') - { - /* Make a named tag. */ - char *name = substitute (linebuffer->buffer, - patterns[i].name_pattern, - &patterns[i].regs); - if (name != NULL) - pfnote (name, TRUE, TRUE, linebuffer->buffer, - match, lineno, linecharno); - } - else - { - /* Make an unnamed tag. */ - pfnote (NULL, TRUE, FALSE, linebuffer->buffer, - match, lineno, linecharno); - } - break; - } - } + int match; + pattern *pp; + + /* Match against relevant patterns. */ + if (lbp->len > 0) + for (pp = p_head; pp != NULL; pp = pp->p_next) + { + /* Only use generic regexps or those for the current language. */ + if (pp->language != NULL && pp->language != curlang) + continue; + + match = re_match (pp->pattern, lbp->buffer, lbp->len, 0, &pp->regs); + switch (match) + { + case -2: + /* Some error. */ + if (!pp->error_signaled) + { + error ("error while matching \"%s\"", pp->regex); + pp->error_signaled = TRUE; + } + break; + case -1: + /* No match. */ + break; + default: + /* Match occurred. Construct a tag. */ + if (pp->name_pattern[0] != '\0') + { + /* Make a named tag. */ + char *name = substitute (lbp->buffer, + pp->name_pattern, &pp->regs); + if (name != NULL) + pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno); + } + else + { + /* Make an unnamed tag. */ + pfnote ((char *)NULL, TRUE, + lbp->buffer, match, lineno, linecharno); + } + break; + } + } #endif /* ETAGS_REGEXPS */ return result; } - -/* - * Read a file, but do no processing. This is used to do regexp - * matching on files that have no language defined. - */ -void -just_read_file (inf) - FILE *inf; -{ - while (!feof (inf)) - { - ++lineno; - linecharno = charno; - charno += readline (&lb, inf) + 1; - } -} - /* * Return a pointer to a space of size strlen(cp)+1 allocated * with xnew where the string CP has been copied. */ -char * +static char * savestr (cp) char *cp; { @@ -3570,7 +5132,7 @@ savestr (cp) * Return a pointer to a space of size LEN+1 allocated with xnew where * the string CP has been copied for at most the first LEN characters. */ -char * +static char * savenstr (cp, len) char *cp; int len; @@ -3587,13 +5149,14 @@ savenstr (cp, len) * Return the ptr in sp at which the character c last * appears; NULL if not found * - * Identical to System V strrchr, included for portability. + * Identical to POSIX strrchr, included for portability. */ -char * +static char * etags_strrchr (sp, c) - register char *sp, c; + register const char *sp; + register int c; { - register char *r; + register const char *r; r = NULL; do @@ -3601,7 +5164,7 @@ etags_strrchr (sp, c) if (*sp == c) r = sp; } while (*sp++); - return r; + return (char *)r; } @@ -3609,22 +5172,43 @@ etags_strrchr (sp, c) * Return the ptr in sp at which the character c first * appears; NULL if not found * - * Identical to System V strchr, included for portability. + * Identical to POSIX strchr, included for portability. */ -char * +static char * etags_strchr (sp, c) - register char *sp, c; + register const char *sp; + register int c; { do { if (*sp == c) - return sp; + return (char *)sp; } while (*sp++); return NULL; } +/* Skip spaces, return new pointer. */ +static char * +skip_spaces (cp) + char *cp; +{ + while (iswhite (*cp)) + cp++; + return cp; +} + +/* Skip non spaces, return new pointer. */ +static char * +skip_non_spaces (cp) + char *cp; +{ + while (*cp != '\0' && !iswhite (*cp)) + cp++; + return cp; +} + /* Print error message and exit. */ -void +static void fatal (s1, s2) char *s1, *s2; { @@ -3632,7 +5216,7 @@ fatal (s1, s2) exit (BAD); } -void +static void pfatal (s1) char *s1; { @@ -3640,10 +5224,24 @@ pfatal (s1) exit (BAD); } +static void +suggest_asking_for_help () +{ + fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n", + progname, +#ifdef LONG_OPTIONS + "--help" +#else + "-h" +#endif + ); + exit (BAD); +} + /* Print error message. `s1' is printf control string, `s2' is arg for it. */ -void +static void error (s1, s2) - char *s1, *s2; + const char *s1, *s2; { fprintf (stderr, "%s: ", progname); fprintf (stderr, s1, s2); @@ -3652,7 +5250,7 @@ error (s1, s2) /* Return a newly-allocated string whose contents concatenate those of s1, s2, s3. */ -char * +static char * concat (s1, s2, s3) char *s1, *s2, *s3; { @@ -3668,24 +5266,11 @@ concat (s1, s2, s3) } /* Does the same work as the system V getcwd, but does not need to - guess buffer size in advance. */ -char * + guess the buffer size in advance. */ +static char * etags_getcwd () { -#ifdef DOS_NT - char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */ - - getwd (path); - p = path; - while (*p) - if (*p == '\\') - *p++ = '/'; - else - *p++ = tolower (*p); - - return strdup (path); -#else /* not DOS_NT */ -#if HAVE_GETCWD +#ifdef HAVE_GETCWD int bufsize = 200; char *path = xnew (bufsize, char); @@ -3694,12 +5279,28 @@ etags_getcwd () if (errno != ERANGE) pfatal ("getcwd"); bufsize *= 2; + free (path); path = xnew (bufsize, char); } + canonicalize_filename (path); return path; -#else /* not DOS_NT and not HAVE_GETCWD */ - struct linebuffer path; + +#else /* not HAVE_GETCWD */ +#ifdef MSDOS + char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */ + + getwd (path); + + for (p = path; *p != '\0'; p++) + if (*p == '\\') + *p = '/'; + else + *p = lowcase (*p); + + return strdup (path); +#else /* not MSDOS */ + linebuffer path; FILE *pipe; initbuffer (&path); @@ -3709,58 +5310,68 @@ etags_getcwd () pclose (pipe); return path.buffer; +#endif /* not MSDOS */ #endif /* not HAVE_GETCWD */ -#endif /* not DOS_NT */ } -/* Return a newly allocated string containing the filename - of FILE relative to the absolute directory DIR (which - should end with a slash). */ -char * +/* Return a newly allocated string containing the file name of FILE + relative to the absolute directory DIR (which should end with a slash). */ +static char * relative_filename (file, dir) char *file, *dir; { - char *fp, *dp, *res; + char *fp, *dp, *afn, *res; + int i; - /* Find the common root of file and dir. */ - fp = absolute_filename (file, cwd); + /* Find the common root of file and dir (with a trailing slash). */ + afn = absolute_filename (file, cwd); + fp = afn; dp = dir; while (*fp++ == *dp++) continue; - do - { - fp--; - dp--; - } + fp--, dp--; /* back to the first differing char */ +#ifdef DOS_NT + if (fp == afn && afn[0] != '/') /* cannot build a relative name */ + return afn; +#endif + do /* look at the equal chars until '/' */ + fp--, dp--; while (*fp != '/'); - /* Build a sequence of "../" strings for the resulting relative filename. */ - for (dp = etags_strchr (dp + 1, '/'), res = ""; - dp != NULL; - dp = etags_strchr (dp + 1, '/')) - { - res = concat (res, "../", ""); - } + /* Build a sequence of "../" strings for the resulting relative file name. */ + i = 0; + while ((dp = etags_strchr (dp + 1, '/')) != NULL) + i += 1; + res = xnew (3*i + strlen (fp + 1) + 1, char); + res[0] = '\0'; + while (i-- > 0) + strcat (res, "../"); - /* Add the filename relative to the common root of file and dir. */ - res = concat (res, fp + 1, ""); + /* Add the file name relative to the common root of file and dir. */ + strcat (res, fp + 1); + free (afn); - return res; /* temporary stub */ + return res; } -/* Return a newly allocated string containing the - absolute filename of FILE given CWD (which should - end with a slash). */ -char * -absolute_filename (file, cwd) - char *file, *cwd; +/* Return a newly allocated string containing the absolute file name + of FILE given DIR (which should end with a slash). */ +static char * +absolute_filename (file, dir) + char *file, *dir; { char *slashp, *cp, *res; - if (absolutefn (file)) - res = concat (file, "", ""); + if (filename_is_absolute (file)) + res = savestr (file); +#ifdef DOS_NT + /* We don't support non-absolute file names with a drive + letter, like `d:NAME' (it's too much hassle). */ + else if (file[1] == ':') + fatal ("%s: relative file names with drive letters not supported", file); +#endif else - res = concat (cwd, file, ""); + res = concat (dir, file, ""); /* Delete the "/dirname/.." and "/." substrings. */ slashp = etags_strchr (res, '/'); @@ -3774,18 +5385,17 @@ absolute_filename (file, cwd) cp = slashp; do cp--; - while (cp >= res && *cp != '/'); - if (*cp == '/') - { - strcpy (cp, slashp + 3); - } - else /* else (cp == res) */ - { - if (slashp[3] != '\0') - strcpy (cp, slashp + 4); - else - return "."; - } + while (cp >= res && !filename_is_absolute (cp)); + if (cp < res) + cp = slashp; /* the absolute name begins with "/.." */ +#ifdef DOS_NT + /* Under MSDOS and NT we get `d:/NAME' as absolute + file name, so the luser could say `d:/../NAME'. + We silently treat this as `d:/NAME'. */ + else if (cp[0] != '/') + cp = slashp; +#endif + strcpy (cp, slashp + 3); slashp = cp; continue; } @@ -3799,48 +5409,95 @@ absolute_filename (file, cwd) slashp = etags_strchr (slashp + 1, '/'); } - return res; + if (res[0] == '\0') + return savestr ("/"); + else + return res; } /* Return a newly allocated string containing the absolute - filename of dir where FILE resides given CWD (which should + file name of dir where FILE resides given DIR (which should end with a slash). */ -char * -absolute_dirname (file, cwd) - char *file, *cwd; +static char * +absolute_dirname (file, dir) + char *file, *dir; { char *slashp, *res; char save; + canonicalize_filename (file); slashp = etags_strrchr (file, '/'); if (slashp == NULL) - return cwd; + return savestr (dir); save = slashp[1]; slashp[1] = '\0'; - res = absolute_filename (file, cwd); + res = absolute_filename (file, dir); slashp[1] = save; return res; } +/* Whether the argument string is an absolute file name. The argument + string must have been canonicalized with canonicalize_filename. */ +static bool +filename_is_absolute (fn) + char *fn; +{ + return (fn[0] == '/' +#ifdef DOS_NT + || (isalpha(fn[0]) && fn[1] == ':' && fn[2] == '/') +#endif + ); +} + +/* Translate backslashes into slashes. Works in place. */ +static void +canonicalize_filename (fn) + register char *fn; +{ +#ifdef DOS_NT + /* Canonicalize drive letter case. */ + if (islower (fn[0])) + fn[0] = toupper (fn[0]); + /* Convert backslashes to slashes. */ + for (; *fn != '\0'; fn++) + if (*fn == '\\') + *fn = '/'; +#else + /* No action. */ + fn = NULL; /* shut up the compiler */ +#endif +} + +/* Increase the size of a linebuffer. */ +static void +grow_linebuffer (lbp, toksize) + linebuffer *lbp; + int toksize; +{ + while (lbp->size < toksize) + lbp->size *= 2; + lbp->buffer = xrnew (lbp->buffer, lbp->size, char); +} + /* Like malloc but get fatal error if memory is exhausted. */ -long * +static long * xmalloc (size) unsigned int size; { long *result = (long *) malloc (size); if (result == NULL) - fatal ("virtual memory exhausted", 0); + fatal ("virtual memory exhausted", (char *)NULL); return result; } -long * +static long * xrealloc (ptr, size) char *ptr; unsigned int size; { - long *result = (long *) realloc (ptr, size); + long *result = (long *) realloc (ptr, size); if (result == NULL) - fatal ("virtual memory exhausted"); + fatal ("virtual memory exhausted", (char *)NULL); return result; }