Created
December 13, 2008 22:21
-
-
Save anonymous/35562 to your computer and use it in GitHub Desktop.
Effective Lines of Code Counter. Type: perl eLOC.pl --help
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
# eLOC - Effective Lines of Code Counter | |
# JFS (2005) | |
# | |
use strict; | |
use warnings; | |
use sigtrap; | |
use diagnostics; | |
use warnings::register; | |
no warnings __PACKAGE__; | |
sub DEBUG { 0 } | |
use English qw( -no_match_vars ) ; # Avoids regex performance penalty | |
use Getopt::Long qw(:config gnu_getopt); | |
use File::DosGlob 'glob'; | |
use Pod::Usage; | |
our $VERSION = '0.01'; | |
# globals | |
use constant NOTFILENAME => undef; | |
my %counter = ( | |
'PHYS' => 0, | |
'ELOC' => 0, | |
'PURE_COMMENT' => 0, | |
'BLANK' => 0, | |
'LLOC' => 0, | |
'INLINE_COMMENT'=> 0, | |
'LOC' => 0, | |
); | |
my %header = ( | |
"eloc" => "eloc", | |
"lloc" => "lloc", | |
"loc" => "loc", | |
"comment" => "comment", | |
"blank" => "blank", | |
"newline" => "newline", | |
"logicline" => "lgcline", | |
); | |
my %total = %counter; # copy | |
my $c = \%counter; # see format below | |
my $h = \%header; # see top format below | |
my $inside_multiline_comment = 0; | |
my $filename = NOTFILENAME; | |
my $filecount = 0; | |
my $filename_header = "file name"; | |
# process input args | |
my $version = ''; | |
my $help = ''; | |
my $man = ''; | |
my $is_deterministic = ''; | |
my $has_header = ''; | |
print STDERR "Input args:'" if DEBUG; | |
print STDERR (join("|",@ARGV),"'\n") if DEBUG; | |
my %option = ('version' => \$version, | |
'help' => \$help, | |
'man' => \$man, | |
'deterministic' => \$is_deterministic, | |
'header' => \$has_header | |
); | |
GetOptions( \%option, 'version', 'help', 'man', | |
'eloc|e', # print the eLOC counts | |
'lloc|s', # print the lLOC counts (code statements) | |
'loc|l' , # print the LOC counts (eLOC + lines of a single brace or parenthesis) | |
'comment|c' , # print the comments counts (count lines which contains a comment) | |
'blank|b' , # print the blank counts | |
'newline|n' , # print the newline count | |
'logicline|g' , # print the logical line count (= LOC + Comment Lines + Blank Lines) | |
'deterministic', # print the LOC determination for every line in the source file | |
'header', # print header line | |
) or invalid_options("$0: invalid options\nTry `$0 --help' for more information."); | |
version() if $version; | |
pod2usage(-exitstatus => 0, -verbose => 1) if $help ; | |
pod2usage(-exitstatus => 0, -verbose => 2) if $man; | |
# | |
$has_header = 1 if $is_deterministic && $has_header eq ''; | |
#format for print_loc_metric() | |
my ($format, $format_top) = make_format(); | |
print STDERR "format:\n" if DEBUG > 10; | |
print STDERR $format if DEBUG > 10; | |
eval $format; | |
die $@ if $@; # $EVAL_ERROR | |
if(DEBUG>10) { | |
print STDERR ("format_top:\n", $format_top); | |
} | |
if( $has_header) { | |
eval $format_top; | |
die $@ if $@; # $EVAL_ERROR | |
} | |
# process files | |
print STDERR ("Input args after Getopts():\n", | |
join("|",@ARGV),"\n") if DEBUG > 10; | |
expand_wildcards(); | |
@ARGV = '-' unless @ARGV; | |
foreach my $fn (@ARGV) { | |
$filename = $fn; | |
unless (open(IN, "<$filename")) { | |
warn "$0: Unable to read from '$filename': $!\n"; | |
next; | |
} | |
print STDERR "Scanning $filename...\n" if DEBUG; | |
clear_counters(); | |
generate_loc_metric(); | |
$filecount++; | |
print_loc_metric(); | |
close(IN) | |
or warn "$0: Could not close $filename: $!\n"; | |
} | |
# print total | |
if($filecount > 1) { | |
$filename = "total"; | |
$c = \%total; | |
print_loc_metric(); | |
} | |
exit 0; | |
#------------------------------------------------- | |
sub wsglob { | |
my @list = glob; | |
@list ? @list : @_; #HACK: defence from emtpy list from glob() | |
} | |
sub expand_wildcards { | |
print STDERR ("Input args before expand_wildcards():\n", | |
join("|",@ARGV),"\n") if DEBUG; | |
{ | |
@ARGV = map( /['*?']/o ? wsglob($_) : $_ , @ARGV); | |
} | |
print STDERR ("Input args after expand_wildcards():\n", | |
join("|",@ARGV),"\n") if DEBUG; | |
} | |
sub clear_counters { | |
for my $name ( keys %counter) { | |
$counter{$name} = 0; | |
} | |
} | |
sub make_format { | |
my $f = 'format STDOUT =' . "\n"; | |
$f .= '# LOC, eLOC, lLOC, comment, blank, newline, logicline and filename' . "\n"; | |
my $f_top = 'format STDOUT_TOP =' . "\n"; | |
my $console_screen_width = (get_terminal_size())[0]; | |
print STDERR '$console_screen_width=' . $console_screen_width ."\n" if DEBUG>10; | |
$console_screen_width = 100 if $console_screen_width < 0; | |
my $is_print_specifiers_set = | |
($option{"eloc"} or | |
$option{"lloc"} or | |
$option{"loc"} or | |
$option{"comment"} or | |
$option{"blank"} or | |
$option{"newline"} or | |
$option{"logicline"}); | |
my %o = %option; | |
my $fc = 0; | |
if( $is_print_specifiers_set ) { | |
$fc++ if $o{"eloc"}; | |
$fc++ if $o{"lloc"}; | |
$fc++ if $o{"loc"}; | |
$fc++ if $o{"comment"}; | |
$fc++ if $o{"blank"}; | |
$fc++ if $o{"newline"}; | |
$fc++ if $o{"logicline"}; | |
if( $fc == 0 ) { die "$0: assertion failed: field count is zero" } | |
} | |
else { | |
# default | |
$fc = 7; | |
$o{"loc"} = 1; | |
$o{"eloc"} = 1; | |
$o{"lloc"} = 1; | |
$o{"comment"} = 1; | |
$o{"blank"} = 1; | |
$o{"newline"} = 1; | |
$o{"logicline"} = 1; | |
} | |
if (DEBUG > 10) { | |
while( (my ($name, $value) = each %{o}) ) { | |
print STDERR "name=$name, value=$value\n"; | |
} | |
} | |
# picture line | |
my $field_format = '@>>>>>> '; | |
my $field_width = length $field_format; | |
my $picture_line = $field_format x $fc; | |
# place for filename | |
$picture_line .= '^'; | |
$picture_line .= '<' x ($console_screen_width - $field_width * $fc - 2); | |
$picture_line .= "\n"; | |
$f .= $picture_line; | |
$f_top .= $picture_line; | |
# argument line | |
$f .= '$$c{"LOC"}, ' ,$f_top .= '$$h{"loc"}, ' if $o{"loc"}; | |
$f .= '$$c{"ELOC"}, ' ,$f_top .= '$$h{"eloc"}, ' if $o{"eloc"}; | |
$f .= '$$c{"LLOC"}, ' ,$f_top .= '$$h{"lloc"}, ' if $o{"lloc"}; | |
$f .= '$$c{"comment"}, ' ,$f_top .= '$$h{"comment"}, ' if $o{"comment"}; | |
$f .= '$$c{"BLANK"}, ' ,$f_top .= '$$h{"blank"}, ' if $o{"blank"}; | |
$f .= '$$c{"PHYS"}, ' ,$f_top .= '$$h{"newline"}, ' if $o{"newline"}; | |
$f .= '$$c{"logicline"}, ',$f_top .= '$$h{"logicline"}, ' if $o{"logicline"}; | |
$f .= '$filename' . "\n"; | |
$f_top .= '$filename_header' . "\n"; | |
# 2nd argument line for long file names | |
$f .= '^'; | |
$f .= '<' x ($console_screen_width-2); | |
$f .= '~~' . "\n" | |
.' $filename' . "\n"; | |
$f .='.' . "\n"; | |
$f_top .='.' . "\n"; | |
return ($f, $f_top); | |
} | |
sub generate_loc_metric { | |
my $is_concatinated = 0; | |
LINE: while(<IN>) | |
{ | |
chomp; | |
print if $is_deterministic && !$is_concatinated; | |
# handle multiline code statements | |
if ($is_concatinated = s/\\$//) { | |
warnings::warnif("$0: '\\'-ending line concantinated"); | |
increment('PHYS'); | |
print "\n" if $is_deterministic; | |
my $line = <IN>; | |
$_ .= $line; | |
chomp($line); | |
print $line if $is_deterministic; | |
redo unless eof(IN); | |
} | |
# blank lines, including inside comments, don't move to next line here | |
increment('BLANK') if( /^\s*$/ ); | |
# check whether multiline comments finished | |
if( $inside_multiline_comment && m~\*/\s*(\S*)\s*$~ ) { | |
$inside_multiline_comment = 0; | |
# check the rest of the line if it contains non-whitespace characters | |
#debug $_ = $REDO_LINE . $1, redo LINE if($1); | |
warnings::warnif("$0: expression '$1' after '*/' discarded") if($1); | |
# else mark as pure comment | |
increment('PURE_COMMENT'); | |
next LINE; | |
} | |
# inside multiline comments | |
increment('PURE_COMMENT'), next LINE if( $inside_multiline_comment ); | |
# C++ style comment at the begining of line (except whitespaces) | |
increment('PURE_COMMENT'), next LINE if( m~^\s*//~ ); | |
# C style comment at the begining of line (except whitespaces) | |
if ( m~^\s*/\*~ ) { | |
$inside_multiline_comment = 1 unless( m~\*/~ ); | |
increment('PURE_COMMENT'), next LINE; | |
} | |
# inline comment, don't move to next line here | |
increment('INLINE_COMMENT') if ( is_inline_comment($_) ); | |
# lLOC implicitly incremented inside is_inline_comment($) | |
# | |
increment('LOC') unless( /^\s*$/ ); | |
# standalone braces or parenthesis | |
next LINE if( /^\s*(?:\{|\}|\(|\))+\s*$/ ); | |
# eLOC is not comments, blanks or standalone braces or parenthesis | |
# therefore just increment eLOC counter here | |
increment('ELOC'), next LINE unless( /^\s*$/ ); | |
} | |
continue { | |
increment('PHYS'); | |
print " [$.]\n" if $is_deterministic; # $INPUT_LINE_NUMBER | |
} | |
} | |
sub print_loc_metric { | |
$$c{'comment'} = $$c{'PURE_COMMENT'} + $$c{'INLINE_COMMENT'}; | |
# LOC + Comment Lines + Blank Lines | |
$$c{'logicline'} = $$c{'LOC'} + $$c{'comment'} + $$c{'BLANK'}; | |
unless (defined $filename) { | |
die "print_loc_metric(): filename is not defined"; | |
} | |
my $fn = $filename; | |
$filename = "", $filename_header = "" | |
unless($#ARGV); | |
print STDERR ("ARGV in print_loc_metric:" , join('|',@ARGV), "\n") | |
if DEBUG; | |
write STDOUT; # replace with printf | |
$filename = $fn; | |
} | |
sub increment { | |
my $loc_type = shift; | |
defined $loc_type | |
or die 'increment(\$): input argument is undefined'; | |
$counter{$loc_type}++; | |
$total{$loc_type}++; | |
print "\t#". $loc_type ."#" if $is_deterministic; | |
} | |
sub is_inline_comment { | |
my $line = shift; | |
defined $line | |
or die 'is_inline_comment($): $line is not defined'; | |
print "\n$line" if DEBUG > 10; | |
# here: line is not empty, not begining both C and C++ comments signs, | |
# not standalone '{}()', not inside multiline comment, | |
# ending '\' removed (joined line created if needed) | |
# Possible cases: | |
# - no C\C++ comment signs => is_inline_comment = 0 | |
# - C++ comment (no C comment sign) | |
# * no quote characters => is_inline_comment = 1 | |
# * at least one comment sign is not quoted => is_inline_comment = 1 | |
# * all comment signs are quoted => is_inline_comment = 0 | |
# - C comment (no C++ comment sign) | |
# * no quote characters => is_inline_comment = 1, | |
# ~ odd number of '/*' and '*/' => $inside_multiple_comment = 1 | |
# ~ even number => $inside_multiple_comment = 0 | |
# * etc... | |
# - ... | |
# algorithm: move along the line from left to right | |
# rule: quoted comments are not counted | |
# rule: quoted by distinct style quotes are not counted | |
# rule: commented quotes are not counted | |
# rule: commented distinct style comments are not counted | |
# rule: increment('LLOC') if not-quoted, not-commented | |
# semi-colon presents in the line except that two | |
# semi-colon in for() counted as one. | |
# | |
$_ = $line; #hack: $_ = $line inside sub | |
# state | |
my %s = ( | |
'c' => 0, # c slash star - inside c style comments | |
'cpp' => 0, # c++ slash slash - inside C++ style comment | |
'qm' => 0, # quoted mark - inside quoted string | |
'qqm' => 0, # double quoted - inside double quoted string | |
); | |
my $has_comment = 0; | |
# find state | |
LOOP: | |
{ | |
/\G\"/gc && do { # match double quote | |
unless( $s{'qm'} || $s{'c'} || $s{'cpp'} ) { | |
# toggle | |
$s{'qqm'} = $s{'qqm'} ? 0 : 1; | |
} | |
redo LOOP; | |
}; | |
/\G\'/gc && do { # match single quote | |
unless( $s{'qqm'} || $s{'c'} || $s{'cpp'} ) { | |
# toggle | |
$s{'qm'} = $s{'qm'} ? 0 : 1; | |
} | |
redo LOOP; | |
}; | |
m~\G//~gc && do { # match C++ comment sign | |
unless( $s{'qm'} || $s{'qqm'} || $s{'c'} ) { | |
# on | |
$has_comment = 1; | |
$s{'cpp'} = 1; | |
} | |
redo LOOP; | |
}; | |
m~\G/\*~gc && do { # match begining C comment sign | |
unless( $s{'qm'} || $s{'qqm'} || $s{'cpp'} ) { | |
# on | |
$has_comment = 1; | |
$s{'c'} = $s{'c'} ? 1 : 1; | |
} | |
redo LOOP; | |
}; | |
m~\G\*/~gc && do { # match ending C comment sign | |
unless( $s{'qm'} || $s{'qqm'} || $s{'cpp'} ) { | |
# off | |
if( $s{'c'} ) { | |
$s{'c'} = 0; | |
} | |
else { | |
die 'is_inline_comment($): unexpected c style ending comment sign'. | |
"\n'$line'"; | |
} | |
} | |
redo LOOP; | |
}; | |
/\Gfor\s*\(.*\;.*\;.*\)/gc && do { # match for loop | |
unless( $s{'qm'} || $s{'qqm'} || $s{'cpp'} || $s{'c'} ) { | |
# not-commented, not-quoted semi-colon | |
increment('LLOC'); | |
} | |
redo LOOP; | |
}; | |
/\G\;/gc && do { # match semi-colon | |
unless( $s{'qm'} || $s{'qqm'} || $s{'cpp'} || $s{'c'} ) { | |
# not-commented, not-quoted semi-colon | |
# not inside for() loop | |
increment('LLOC'); | |
} | |
redo LOOP; | |
}; | |
/\G./gc && do { # match any other character | |
# skip 1 character | |
redo LOOP; | |
}; | |
/\G$/gc && do { # match end of the line | |
last LOOP; | |
}; | |
#default | |
die 'is_inline_comment($): unexpected character in the line:' . | |
"\n'$line'"; | |
} | |
# apply state | |
$inside_multiline_comment = $s{'c'}; | |
return $has_comment; | |
} | |
sub version { | |
# TODO: version implementation | |
print <<"VERSION"; | |
NAME v$VERSION | |
Written by AUTHOR | |
COPYRIGHT AND LICENSE | |
VERSION | |
exit 0; | |
} | |
sub invalid_options { | |
print STDERR (@_ ,"\n"); | |
exit 2; | |
} | |
sub get_terminal_size { | |
my ($wchar, $hchar) = ( -1, -1); | |
my $win32console = <<'WIN32_CONSOLE'; | |
use Win32::Console; | |
my $CONSOLE = new Win32::Console(); | |
($wchar, $hchar) = $CONSOLE->MaxWindow(); | |
WIN32_CONSOLE | |
eval($win32console); | |
return ($wchar, $hchar) unless( $@ ); | |
warnings::warnif($@); # $EVAL_ERROR | |
my $term_readkey = <<'TERM_READKEY'; | |
use Term::ReadKey; | |
($wchar,$hchar, $wpixels, $hpixels) = GetTerminalSize(); | |
TERM_READKEY | |
eval($term_readkey); | |
return ($wchar, $hchar) unless( $@ ); | |
warnings::warnif($@); # $EVAL_ERROR | |
my $ioctl = <<'IOCTL'; | |
require 'sys/ioctl.ph'; | |
die "no TIOCGWINSZ " unless defined &TIOCGWINSZ; | |
open(TTY, "+</dev/tty") | |
or die "No tty: $!"; | |
unless (ioctl(TTY, &TIOCGWINSZ, $winsize='')) { | |
die sprintf "$0: ioctl TIOCGWINSZ (%08x: $!)\n", | |
&TIOCGWINSZ; | |
} | |
($hchar, $wchar, $xpixel, $ypixel) = | |
unpack('S4', $winsize); # probably $hchar & $wchar should be swapped here | |
IOCTL | |
eval($ioctl); | |
warnings::warnif($@) if $@ ; # $EVAL_ERROR | |
return ($wchar, $hchar); | |
} | |
1; | |
__END__ | |
=head1 NAME | |
eLOC - Effective Lines of Code Counter | |
=head1 SYNOPSIS | |
B<eloc> B<[>OPTIONB<]...> B<[>FILEB<]...> | |
Print LOC, eLOC, lLOC, comment, blank, newline and logicline counts | |
for each FILE, and a total line if more than one FILE is specified. | |
See L</"LOC Specification"> for more info, use `eloc --man'. | |
-e, --eloc print the {E}LOC counts | |
-s, --lloc print the lLOC counts (code {S}tatements) | |
-l, --loc print the {L}OC counts (eLOC + lines of a single brace or parenthesis) | |
-c, --comment print the {C}omments counts (count lines which contains a comment) | |
-b, --blank print the {B}lank counts | |
-n, --newline print the {N}ewline count | |
-g, --logicline print the lo{G}ical line count (= LOC + Comment Lines + Blank Lines) | |
--deterministic print the LOC determination for every line in the source file | |
--header print header line | |
--help display this help and exit | |
--man display full help and exit | |
--version output version information and exit | |
With no FILE, or when FILE is -, read standard input. | |
Metrics counted by the program are based on narration from | |
http://msquaredtechnologies.com/m2rsm/docs/rsm_metrics_narration.htm | |
=for TODO: Comment Percent = Comment Line Count / Logical Line Count ) x 100 | |
=for TODO: White Space Percentage = (Number of spaces / Number of spaces and characters) * 100 | |
=head1 DESCRIPTION | |
eLOC is a simple LOC counter. See L</"LOC Specification">. | |
=head2 LOC Specification | |
=over 1 | |
=item LOC | |
Lines Of Code = eLOC + lines of a single brace or parenthesis | |
=item eLOC | |
An effective line of code or eLOC is the measurement of all lines that are | |
not comments, blanks or standalone braces or parenthesis. | |
This metric more closely represents the quantity of work performed. | |
RSM introduces eLOC as a metrics standard. | |
See http://msquaredtechnologies.com/m2rsm/docs/rsm_metrics_narration.htm | |
=item lLOC | |
Logical lines of code represent a metrics for those line of code which form | |
code statements. These statements are terminated with a semi-colon. | |
The control line for the "for" loop contain two semi-colons but accounts | |
for only one semi colon. | |
See http://msquaredtechnologies.com/m2rsm/docs/rsm_metrics_narration.htm | |
=item comment | |
comment = pure comment + inline comment | |
=over | |
=item pure comment | |
Comment lines represent a metrics for pure comment line without any code in it. | |
See L</"inline comment">. | |
=item inline comment | |
Inline comment line is a line which contains both LOC line and pure comment. | |
Inline comment line and pure comment line (see L</"pure comment">) | |
are mutually exclusive, that is a given physical line cannot be an inline comment | |
line and a pure comment line simultaneously. | |
=over | |
=item Example: | |
static const int defaultWidth = 400; // value provided in declaration | |
=back | |
=back | |
=item blank | |
Blank line is a line which contains at most whitespaces. | |
Blank lines are counted inside comments too. | |
=item logicline | |
The logical line count = LOC + Comment Lines + Blank Lines | |
=back | |
=head1 KNOWN BUGS AND LIMITATIONS | |
=over | |
=item | |
It supports only C/C++ source files. | |
=item | |
Comments inside for(;;) statements are not counted | |
=over | |
=item Example: | |
for(int i = 0; i < N /*comment*/; i++ ); #LLOC# #LLOC# #LOC# #ELOC# #PHYS# [1] | |
=back | |
=item | |
'\'-ending lines are concatinated ( though newline count is valid) | |
=item | |
Input from stdin is not supported in the case | |
the script is envoked solely by name without explicit perl executable. | |
=item | |
Wildcards in path with spaces are not supported (like GNU utilities). | |
=back | |
=over | |
=begin fixed | |
=item Limitation: single source file | |
Only one source file at time supported | |
=item Limitation: LLOC is unsupported | |
The logical lines of code metric is unsupported. | |
=item missed inline comment for C style comment | |
#include <math.h> /* comment */ #ELOC# #PHYS# [2] | |
But must be | |
#include <math.h> /* comment */ #INLINE_COMMENT# #ELOC# #PHYS# [2] | |
=item wrong LOC type for the code after '*/' | |
/* another #PURE_COMMENT# #PHYS# [36] | |
trick #PURE_COMMENT# #PHYS# [37] | |
*/ i++; #PURE_COMMENT# #PHYS# [38] | |
In the last line must be | |
#INLINE_COMMENT# #PHYS# [38] | |
=end fixed | |
=back | |
=head1 SEE ALSO | |
Metrics counted by the program are based on narration from L<http://msquaredtechnologies.com/m2rsm/docs/rsm_metrics_narration.htm> | |
=cut | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment