Created
September 30, 2011 14:46
-
-
Save terrycojones/1253956 to your computer and use it in GitHub Desktop.
mytags
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
#PATH=/bin:/usr/ucb:/usr/bin | |
# | |
# usage: mytags [source-files] | |
# Enhanced version of ctags. | |
# Merge standard "ctags" and create extra tags from #define statements | |
# and declarations. | |
# | |
# Declaration cases not handled: | |
# ============================== | |
# | |
# | |
# - Repeated identifier names. | |
# ========================== | |
# Only the first instance will be tagged. | |
# Be careful about ^]'ing to tags that are in functions... you may | |
# not get what you want. Worse, you might get put into another file | |
# without getting what you want. You can always get back with ^^ | |
# | |
# | |
# - Lines of declarations that are continued with a comma e.g. | |
# ========================================================== | |
# int fred, harry, joe, | |
# mike, dick; | |
# Will not try to produce tags for mike or dick. | |
# | |
# | |
# - Declaration lines that do not start with a type name e.g. | |
# ========================================================= | |
# /* silly comment in the way */ int fred; | |
# Will not tag fred. | |
# | |
# | |
# - Declarations in comment blocks will be tagged e.g. | |
# ================================================== | |
# /* start of comment | |
# int fred; | |
# int harry; | |
# end of comment */ | |
# Will produce tags for fred and harry (if they don't already exist). | |
# | |
# | |
# Run ctags, create extra tags, sort. | |
# | |
# Note that vi searches in NOMAGIC mode, meaning | |
# only ^ and $ have any effect. Thus we have | |
# to escape these, and /\, but nothing else. | |
# (Note also that due to a bug in vi you get left in | |
# nomagic mode if the pattern isn't found) | |
# | |
# Steve Hayman (MFCF) | |
# Terry Jones (F.U.N. Corporation) 18/10/87 | |
# | |
if [ $# -eq 0 ] | |
then | |
echo usage: `basename $0` files | |
exit 1 | |
fi | |
# | |
# Make the standard tags file with ctags. | |
# | |
ctags -w -t $* | |
# | |
# Do the additional tags | |
# | |
gawk ' | |
# | |
# Initialise a few handy-dandy associative arrays. | |
# | |
BEGIN { | |
keywd["char"]++ | |
keywd["int"]++ | |
keywd["long"]++ | |
keywd["double"]++ | |
keywd["float"]++ | |
keywd["short"]++ | |
keywd["register"]++ | |
keywd["static"]++ | |
keywd["void"]++ | |
keywd["unsigned"]++ | |
follow["["]++; | |
follow["="]++ | |
follow[";"]++ | |
} | |
# | |
# The #define grabber. | |
# | |
NF > 0 && /^#[ ]*define/ { | |
total_tokens++ | |
if ($1 == "#") | |
token = $3 | |
else | |
token = $2 | |
# | |
# Careful with macro functions. | |
# | |
if ( i = index(token, "(") ) | |
token = substr(token, 1, i - 1) | |
# | |
# Set up these tags for later output (see END clause). | |
# | |
patterns[total_tokens] = $0 | |
files[total_tokens] = FILENAME | |
tags[total_tokens] = token | |
next | |
} | |
# | |
# The declaration grabber. | |
# | |
# | |
# Make sure we have some fields and that the first is a type name. | |
# Could check that NF>1 but for declarations like int*fred; | |
# | |
NF > 0 && keywd[$1] == 1 { | |
# | |
# If the last field is a keyword then we must have something like | |
# | |
# unsigned int | |
# silly() | |
# | |
# And so we should just continue to the next line | |
# (We could probably do a getline before the next, but then again | |
# they might just have a #define there... who knows? who cares?) | |
# | |
if ( keywd[$NF] ) | |
next | |
# | |
# Check to find the first word on the line that is not in the keywd | |
# array. This must (famous last words) be the identifier we want. | |
# | |
for ( i = 2; i <= NF; i++ ) { | |
if( keywd[$i] == 0 ) | |
break | |
} | |
# | |
# Get the tail of the line, starting from the first identifier. | |
# | |
spot = index($0, $i) | |
line = substr($0, spot, length - spot + 1) | |
# | |
# Strip trailing characters from line like ; and = and [ if present | |
# | |
# *Dont* break out of the for loop once you have found one as | |
# this will make the order of their declaration in the START | |
# clause important. Anyway, it is not clear who would come | |
# first out of = and [ | |
# | |
# We do this here since we want a line such as | |
# | |
# char *fred="this is fred" /* comment about fred the char* */ | |
# | |
# to be cut off at the "=" instead of processing each of the ten fields | |
# *fred="this, is, fred", /*, comment, about, fred, the, char* and */ | |
# to see if it they are identifiers. This way we process only "*fred". | |
# since the line gets chopped off at the "=". | |
# | |
# (Dont take "," out at this stage, since we are going to split on ",") | |
# | |
for ( f in follow ) { | |
if ( j = index(line, f) ) { | |
# god knows why i have to do this | |
fred = substr(line, 1, j - 1) | |
line = fred | |
} | |
} | |
# | |
# Split the line that remains on commas. | |
# | |
total_ids = split(line, identifiers, ",") | |
# | |
# Process each of the identifiers. | |
# | |
for ( i = 1; i <= total_ids; i++ ) { | |
token = identifiers[i] | |
if ( length( token ) == 0 ) | |
continue | |
# | |
# If there is a "(" present then this must be a function name | |
# as in | |
# | |
# int silly() | |
# | |
# so we just continue. | |
# | |
if ( index(token, "(") ) | |
continue | |
# | |
# Strip off leading white space and * characters. | |
# | |
while ( (first = substr(token, 1, 1) ) == "*" || \ | |
first == " " || first == " " ) | |
token = substr(token, 2, length(token) - 1) | |
# | |
# Otherwise lets assume we have an identifier. | |
# Check to see that it is not already in existence, if it is | |
# then its too bad for the user, well throw this one away. | |
# | |
# (one alternative would be to prepend the function name (if there | |
# is one) to the identifier name). But this is messy and probably | |
# would never get used anyway. | |
# | |
if ( identifiers[ token ] == 1 ) | |
continue | |
identifiers[ token ] = 1 | |
total_tokens++ | |
# | |
# And finally set up the arrays for later use. | |
# | |
patterns[total_tokens] = $0 | |
tags[total_tokens] = token | |
files[total_tokens] = FILENAME | |
} | |
} | |
# | |
# Finally, process all of the tags array. | |
# | |
# The search pattern is the entire line. Print a line that looks like | |
# | |
# token <tab> filename <tab> /<appropriately-escaped-pattern>/ | |
# | |
END { | |
for ( tok in patterns ) { | |
pattern = patterns[tok] | |
file = files[tok] | |
tag = tags[tok] | |
printf "%s\t%s\t/^", tag, file | |
for ( i = 1; i <= length(pattern); i++ ) { | |
if( index("^$/\\", c = substr(pattern,i,1)) ) | |
printf "\\" | |
printf "%s", c | |
} | |
printf "$/\n" | |
} | |
} | |
# | |
# Send all of this into sort, merging the tags we created with ctags | |
# | |
' $* | sort -u -o tags - tags |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment