Created
August 8, 2010 20:42
a script that collects statistics on perl code to aid in prioritizing while refactoring
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
D:\Web-Dev\Greenphyl_v2\perlcodegreenphyl\apps\GreenPhyl\test>perl code_statistics.pl | |
### Collecting block statistics | |
### Average Block Length | |
18.5932553337922 | |
### Average Block Size | |
670.863041982106 | |
### Average Block Depth | |
2.39917412250516 | |
### Top Ten Longest Blocks | |
File Line Lines Size Depth Dev. | |
================================================================================ | |
lib/Greenphyl/Load/Family.pm 57 294 16570 1 15.81 | |
lib/Greenphyl/Load/Uniprot.pm 103 287 11540 1 15.44 | |
cgi-bin/update_pipeline.cgi 121 260 7608 1 13.98 | |
lib/Greenphyl/Load/Family.pm 79 260 15030 2 13.98 | |
cgi-bin/job_process.cgi 46 250 10726 1 13.45 | |
lib/Greenphyl/Load/Family.pm 91 247 14549 3 13.28 | |
lib/Greenphyl/Load/Interproscan.pm 33 236 10354 1 12.69 | |
lib/Greenphyl/Load/Family.pm 120 217 13306 4 11.67 | |
lib/Greenphyl/Load/Family.pm 134 202 12659 5 10.86 | |
lib/Greenphyl/Load/Family.pm 140 195 12400 6 10.49 | |
lib/Greenphyl/Load/Go.pm 27 195 8422 1 10.49 | |
### Top Ten Biggest Blocks | |
File Line Lines Size Depth Dev. | |
================================================================================ | |
lib/Greenphyl/Load/Family.pm 57 294 16570 1 24.70 | |
lib/Greenphyl/Load/Family.pm 79 260 15030 2 22.40 | |
lib/Greenphyl/Load/Family.pm 91 247 14549 3 21.69 | |
lib/Greenphyl/Load/Family.pm 120 217 13306 4 19.83 | |
lib/Greenphyl/Load/Family.pm 134 202 12659 5 18.87 | |
lib/Greenphyl/Load/Family.pm 140 195 12400 6 18.48 | |
lib/Greenphyl/Load/Family.pm 145 189 12138 7 18.09 | |
lib/Greenphyl/Load/Uniprot.pm 103 287 11540 1 17.20 | |
cgi-bin/job_process.cgi 46 250 10726 1 15.99 | |
lib/Greenphyl/Load/Interproscan.pm 33 236 10354 1 15.43 | |
cgi-bin/job_process.cgi 98 191 8454 2 12.60 | |
### Top Ten Deepest Located Blocks | |
File Line Lines Size Depth Dev. | |
================================================================================ | |
lib/Greenphyl/Load/Family.pm 189 8 639 12 5.00 | |
lib/Greenphyl/Load/Family.pm 242 8 617 12 5.00 | |
lib/Greenphyl/Load/Family.pm 182 5 203 12 5.00 | |
lib/Greenphyl/Load/Family.pm 167 31 2073 11 4.58 | |
lib/Greenphyl/Load/Family.pm 238 13 954 11 4.58 | |
lib/Greenphyl/Load/Go.pm 104 5 330 11 4.58 | |
lib/Greenphyl/Load/Uniprot.pm 523 5 301 11 4.58 | |
lib/Greenphyl/Load/Family.pm 234 3 166 11 4.58 | |
lib/Greenphyl/Load/Family.pm 272 55 3317 10 4.17 | |
lib/Greenphyl/Load/Family.pm 164 35 2243 10 4.17 | |
lib/Greenphyl/Load/Family.pm 228 24 1685 10 4.17 | |
D:\Web-Dev\Greenphyl_v2\perlcodegreenphyl\apps\GreenPhyl\test> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use strict; | |
use warnings; | |
use feature 'say'; | |
use File::Find::Rule; | |
use File::Grep qw( fgrep fmap fdo ); | |
use PPI; | |
use Smart::Comments; | |
use List::Util qw( reduce ); | |
use HTML::Template; | |
use Term::ProgressBar::Simple; | |
my @ignore_list = qw( | |
site/lib/Perl/APIReference site/lib/Bio/MolEvol/CodonModel.pm | |
Syntax/Highlight/Engine/Kate site/lib/ExtUtils/XSpp/Grammar.pm | |
); | |
$|++; | |
run(); | |
exit; | |
sub run | |
{ | |
my @files = grep is_not_ignored(), find_code_files(); | |
### Collecting block statistics | |
my $progress = Term::ProgressBar::Simple->new( progress_config( count => scalar(@files) ) ); | |
my @blocks = map find_subs( $_, sub { $progress->increment; } ), @files; | |
### Average Block Length | |
my $avg_length = get_and_say_average( 'line_count', \@blocks ); | |
### Average Block Size | |
my $avg_size = get_and_say_average( 'size', \@blocks ); | |
### Average Block Depth | |
my $avg_depth = get_and_say_average( 'indent', \@blocks ); | |
### Top Ten Longest Blocks | |
print_top_ten_table( 'line_count', $avg_length, \@blocks ); | |
### Top Ten Biggest Blocks | |
print_top_ten_table( 'size', $avg_size, \@blocks ); | |
### Top Ten Deepest Located Blocks | |
print_top_ten_table( 'indent', $avg_depth, \@blocks ); | |
return; | |
} | |
sub find_code_files | |
{ | |
return File::Find::Rule->file->name( '*.pm', '*.pl', '*.phtml', '*.cgi' )->in('.'); | |
} | |
sub progress_config { | |
my %config = @_; | |
die "no count given" if !defined $config{count}; | |
$config{ETA} ||= 'linear'; | |
#$config{minor_char} ||= ''; | |
$config{max_update_rate} ||= 0.1; | |
return \%config; | |
} | |
sub is_not_ignored { | |
my $ignore = join '|', @ignore_list; | |
return 1 if !$ignore; | |
return 0 if $_ =~ m/$ignore/; | |
return 1; | |
} | |
sub get_and_say_average { | |
my ( $column, $blocks ) = @_; | |
my $avg = 1 / @{$blocks} * reduce { $a + $b->{$column} } 0, @{$blocks}; | |
say $avg; | |
return $avg; | |
} | |
sub print_top_ten_table { | |
my ( $column, $avg, $blocks ) = @_; | |
@{$blocks} = sort { $b->{$column} <=> $a->{$column} } @{$blocks}; | |
print_statistics( $avg, $column, @{$blocks} ); | |
return; | |
} | |
sub find_subs | |
{ | |
my ( $file, $post_process_hook ) = @_; | |
my $code = PPI::Document->new($file) || return; | |
my $blocks = $code->find('PPI::Structure::Block'); | |
$blocks ||= []; | |
my @block_data = map collect_block_data( $_, $file ), @{$blocks}; | |
$post_process_hook->(); | |
return @block_data; | |
} | |
sub collect_block_data { | |
my ( $block, $file ) = @_; | |
my @lines = split( '\n', $block->content ); | |
my $location = $block->location; | |
my $size = length $block->content; | |
# blocks with only one line are probably just hash accessors and thus irrelevant | |
# however we want to keep huge examples of those around, just in case they're interesting | |
return if @lines == 1 and $size < 150; | |
my %block_data; | |
$block_data{size} = $size; | |
$block_data{line_count} = @lines; | |
$block_data{line} = $location->[0]; | |
$block_data{file} = $file; | |
$block_data{indent} = first_child_indent( $block, $block_data{line} ); | |
return \%block_data; | |
} | |
sub first_child_indent { | |
my ( $block, $first_line ) = @_; | |
my $first_child = $block->find_first( sub { is_indent_relevant_element( @_, $first_line ) } ); | |
# blocks without a child give us the indentation of the first brace, which is not reliable | |
# so we null the indent to keep them out of the top ten | |
return 0 if !$first_child; | |
my $child_loc = $first_child->location; | |
my $child_indent = int( $child_loc->[1] / 4 ); | |
return $child_indent; | |
} | |
=head2 is_indent_relevant_element | |
This is an element evaluation routine. It is used by the PPI element finder. | |
Its return codes mean: | |
- 1 : return this | |
- 0 : skip this, look at children | |
- undef : skip this, ignore children | |
It instructs the finder to return the first element inside a block that is | |
relevant in determining the indentation depth of a block. | |
This means it triggers either either on actual code on the first line; or | |
anything non-whitespace and non-comment on the following ones. | |
Brace stacking in the first line is ignored, as well as "sub {"-stacking. | |
It is slightly complicated by the fact that the location detection for | |
PPI::Statement objects is broken, which means their children have to be | |
examined. | |
=cut | |
sub is_indent_relevant_element { | |
my ( $parent, $element, $first_line ) = @_; | |
my $class = $element->class; | |
return 0 if $class eq 'PPI::Statement'; # location detection on these is broken, | |
return 0 if $class eq 'PPI::Statement::Compound'; # so we need to look at their children | |
# this is stuff we ignore on the first line | |
if( $element->location->[0] == $first_line ) { | |
return 0 if $class eq 'PPI::Structure::Block'; # brace stacking and "sub {"-stacking constructs need to be inspected | |
return undef if $class eq 'PPI::Token::Structure'; # the braces themselves are skipped | |
return undef if $class eq 'PPI::Token::Word' and $element->content eq 'sub'; # sub as well | |
} | |
return 1 if $element->significant; # this is an actual significant element, either on first line or later, we take it | |
return undef; # other sorts of children of the block get ignored, as well as their children | |
} | |
sub print_statistics { | |
my ( $avg, $column ) = ( shift, shift ); | |
my $template = "A47 A7 A6 A7 A6 A7"; | |
say pack( $template, ' File', ' Line', 'Lines', ' Size', 'Depth', ' Dev.' ); | |
say pack( $template, ('==================================================') x 6 ); | |
print_block_line( $_, $template, $avg, $column ) for @_[ 0 .. 10 ]; | |
return; | |
} | |
sub print_block_line { | |
my ( $block, $template, $avg, $column ) = @_; | |
my $file = $block->{file}; | |
my $line = sprintf( "%6d ", $block->{line} ); | |
my $line_count = sprintf( "%5d ", $block->{line_count} ); | |
my $size = sprintf( "%6d ", $block->{size} ); | |
my $indent = sprintf( "%5d ", $block->{indent} ); | |
my $deviation = sprintf( "%7.2f", $block->{$column} / $avg ); | |
if ( length $file > 47 ) { | |
$file = substr( $file, -43, 43 ); | |
$file = "...$file"; | |
} | |
say pack( $template, $file, $line, $line_count, $size, $indent, $deviation ); | |
return; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Code::Statistics | |
Code::Statistics::Target::Block | |
Code::Statistics::Metric::Indentation | |
Code::Statistics::Metric::Length | |
Code::Statistics::Metric::Size | |
Code::Statistics::View::HTML | |
Code::Statistics::View::Shell | |
cstat_collect.pl -> cstat.db | |
cstat.db -> cstat_html.pl | |
cstat.db -> cstat_shell.pl ? c_stat.pl |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment