wchristian · August 8, 2010 20:42
diff --git a/_output b/_output
 D:\Web-Dev\Greenphyl_v2\perlcodegreenphyl\apps\GreenPhyl\test>perl code_statistics.pl

 ### Collecting block statistics


 ### Average Block Length
 18.5932553337922

 ### Average Block Size
 670.863041982106

 ### Average Block Depth
 2.39917412250516

 ### Top Ten Longest Blocks
  File                                           Line Lines   Size Depth    Dev.
 ================================================================================
 lib/Greenphyl/Load/Family.pm                       57   294  16570     1   15.81
 lib/Greenphyl/Load/Uniprot.pm                     103   287  11540     1   15.44
 cgi-bin/update_pipeline.cgi                       121   260   7608     1   13.98
 lib/Greenphyl/Load/Family.pm                       79   260  15030     2   13.98
 cgi-bin/job_process.cgi                            46   250  10726     1   13.45
 lib/Greenphyl/Load/Family.pm                       91   247  14549     3   13.28
 lib/Greenphyl/Load/Interproscan.pm                 33   236  10354     1   12.69
 lib/Greenphyl/Load/Family.pm                      120   217  13306     4   11.67
 lib/Greenphyl/Load/Family.pm                      134   202  12659     5   10.86
 lib/Greenphyl/Load/Family.pm                      140   195  12400     6   10.49
 lib/Greenphyl/Load/Go.pm                           27   195   8422     1   10.49

 ### Top Ten Biggest Blocks
  File                                           Line Lines   Size Depth    Dev.
 ================================================================================
 lib/Greenphyl/Load/Family.pm                       57   294  16570     1   24.70
 lib/Greenphyl/Load/Family.pm                       79   260  15030     2   22.40
 lib/Greenphyl/Load/Family.pm                       91   247  14549     3   21.69
 lib/Greenphyl/Load/Family.pm                      120   217  13306     4   19.83
 lib/Greenphyl/Load/Family.pm                      134   202  12659     5   18.87
 lib/Greenphyl/Load/Family.pm                      140   195  12400     6   18.48
 lib/Greenphyl/Load/Family.pm                      145   189  12138     7   18.09
 lib/Greenphyl/Load/Uniprot.pm                     103   287  11540     1   17.20
 cgi-bin/job_process.cgi                            46   250  10726     1   15.99
 lib/Greenphyl/Load/Interproscan.pm                 33   236  10354     1   15.43
 cgi-bin/job_process.cgi                            98   191   8454     2   12.60

 ### Top Ten Deepest Located Blocks
  File                                           Line Lines   Size Depth    Dev.
 ================================================================================
 lib/Greenphyl/Load/Family.pm                      189     8    639    12    5.00
 lib/Greenphyl/Load/Family.pm                      242     8    617    12    5.00
 lib/Greenphyl/Load/Family.pm                      182     5    203    12    5.00
 lib/Greenphyl/Load/Family.pm                      167    31   2073    11    4.58
 lib/Greenphyl/Load/Family.pm                      238    13    954    11    4.58
 lib/Greenphyl/Load/Go.pm                          104     5    330    11    4.58
 lib/Greenphyl/Load/Uniprot.pm                     523     5    301    11    4.58
 lib/Greenphyl/Load/Family.pm                      234     3    166    11    4.58
 lib/Greenphyl/Load/Family.pm                      272    55   3317    10    4.17
 lib/Greenphyl/Load/Family.pm                      164    35   2243    10    4.17
 lib/Greenphyl/Load/Family.pm                      228    24   1685    10    4.17

 D:\Web-Dev\Greenphyl_v2\perlcodegreenphyl\apps\GreenPhyl\test>
diff --git a/code_statistics.pl b/code_statistics.pl
 #!/usr/bin/perl

 use strict;
 use warnings;

 use feature 'say';

 use File::Find::Rule;
 use File::Grep qw( fgrep fmap fdo );
 use PPI;
 use Smart::Comments;
 use List::Util qw( reduce );
 use HTML::Template;
 use Term::ProgressBar::Simple;

 my @ignore_list = qw(


    site/lib/Perl/APIReference   site/lib/Bio/MolEvol/CodonModel.pm
    Syntax/Highlight/Engine/Kate   site/lib/ExtUtils/XSpp/Grammar.pm
 );

 $|++;

 run();

 exit;

 sub run
 {
    my @files = grep is_not_ignored(), find_code_files();

    ### Collecting block statistics
    my $progress = Term::ProgressBar::Simple->new( progress_config( count => scalar(@files) ) );
    my @blocks = map find_subs( $_, sub { $progress->increment; } ), @files;

    ### Average Block Length
    my $avg_length = get_and_say_average( 'line_count', \@blocks );

    ### Average Block Size
    my $avg_size = get_and_say_average( 'size', \@blocks );

    ### Average Block Depth
    my $avg_depth = get_and_say_average( 'indent', \@blocks );

    ### Top Ten Longest Blocks
    print_top_ten_table( 'line_count', $avg_length, \@blocks );

    ### Top Ten Biggest Blocks
    print_top_ten_table( 'size', $avg_size, \@blocks );

    ### Top Ten Deepest Located Blocks
    print_top_ten_table( 'indent', $avg_depth, \@blocks );

    return;
 }

 sub find_code_files
 {
    return File::Find::Rule->file->name( '*.pm', '*.pl', '*.phtml', '*.cgi' )->in('.');
 }

 sub progress_config {
    my %config = @_;
    die "no count given" if !defined $config{count};
    $config{ETA} ||= 'linear';
    #$config{minor_char} ||= '';
    $config{max_update_rate} ||= 0.1;
    return \%config;
 }

 sub is_not_ignored {
    my $ignore = join '|', @ignore_list;
    return 1 if !$ignore;

    return 0 if $_ =~ m/$ignore/;
    return 1;
 }

 sub get_and_say_average {
    my ( $column, $blocks ) = @_;
    my $avg = 1 / @{$blocks} * reduce { $a + $b->{$column} } 0, @{$blocks};
    say $avg;
    return $avg;
 }

 sub print_top_ten_table {
    my ( $column, $avg, $blocks ) = @_;

    @{$blocks} = sort { $b->{$column} <=> $a->{$column} } @{$blocks};
    print_statistics( $avg, $column, @{$blocks} );

    return;
 }

 sub find_subs
 {
    my ( $file, $post_process_hook ) = @_;

    my $code   = PPI::Document->new($file) || return;
    my $blocks = $code->find('PPI::Structure::Block');
    $blocks ||= [];

    my @block_data = map collect_block_data( $_, $file ), @{$blocks};
    $post_process_hook->();

    return @block_data;
 }

 sub collect_block_data {
    my ( $block, $file ) = @_;

    my @lines = split( '\n', $block->content );
    my $location = $block->location;
    my $size = length $block->content;

    # blocks with only one line are probably just hash accessors and thus irrelevant
    # however we want to keep huge examples of those around, just in case they're interesting
    return if @lines == 1 and $size < 150;

    my %block_data;
    $block_data{size}       = $size;
    $block_data{line_count} = @lines;
    $block_data{line}       = $location->[0];
    $block_data{file}       = $file;
    $block_data{indent}     = first_child_indent( $block, $block_data{line} );

    return \%block_data;
 }

 sub first_child_indent {
    my ( $block, $first_line ) = @_;

    my $first_child = $block->find_first( sub { is_indent_relevant_element( @_, $first_line ) } );

    # blocks without a child give us the indentation of the first brace, which is not reliable
    # so we null the indent to keep them out of the top ten
    return 0 if !$first_child;

    my $child_loc = $first_child->location;
    my $child_indent = int( $child_loc->[1] / 4 );

    return $child_indent;
 }

 =head2 is_indent_relevant_element
    This is an element evaluation routine. It is used by the PPI element finder.
    Its return codes mean:
    - 1 : return this
    - 0 : skip this, look at children
    - undef : skip this, ignore children

    It instructs the finder to return the first element inside a block that is
    relevant in determining the indentation depth of a block.

    This means it triggers either either on actual code on the first line; or
    anything non-whitespace and non-comment on the following ones.

    Brace stacking in the first line is ignored, as well as "sub {"-stacking.

    It is slightly complicated by the fact that the location detection for
    PPI::Statement objects is broken, which means their children have to be
    examined.

 =cut

 sub is_indent_relevant_element {
    my ( $parent, $element, $first_line ) = @_;
    my $class = $element->class;
    return 0 if $class eq 'PPI::Statement';             # location detection on these is broken,
    return 0 if $class eq 'PPI::Statement::Compound';   # so we need to look at their children

    # this is stuff we ignore on the first line
    if( $element->location->[0] == $first_line ) {
        return 0 if $class eq 'PPI::Structure::Block'; # brace stacking and "sub {"-stacking constructs need to be inspected
        return undef if $class eq 'PPI::Token::Structure'; # the braces themselves are skipped
        return undef if $class eq 'PPI::Token::Word' and $element->content eq 'sub'; # sub as well
    }

    return 1 if $element->significant; # this is an actual significant element, either on first line or later, we take it

    return undef; # other sorts of children of the block get ignored, as well as their children
 }

 sub print_statistics {
    my ( $avg, $column ) = ( shift, shift );

    my $template = "A47 A7 A6 A7 A6 A7";

    say pack( $template, '  File', '  Line', 'Lines', '  Size', 'Depth', '   Dev.' );
    say pack( $template, ('==================================================') x 6 );

    print_block_line( $_, $template,  $avg, $column ) for @_[ 0 .. 10 ];

    return;
 }

 sub print_block_line {
    my ( $block, $template, $avg, $column ) = @_;

    my $file       = $block->{file};
    my $line       = sprintf( "%6d ", $block->{line} );
    my $line_count = sprintf( "%5d ", $block->{line_count} );
    my $size       = sprintf( "%6d ", $block->{size} );
    my $indent     = sprintf( "%5d ", $block->{indent} );
    my $deviation  = sprintf( "%7.2f", $block->{$column} / $avg );

    if ( length $file > 47 ) {
        $file = substr( $file, -43, 43 );
        $file = "...$file";
    }

    say pack( $template, $file, $line, $line_count, $size, $indent, $deviation );

    return;
 }
diff --git a/structure.txt b/structure.txt
 Code::Statistics

 Code::Statistics::Target::Block

 Code::Statistics::Metric::Indentation
 Code::Statistics::Metric::Length
 Code::Statistics::Metric::Size

 Code::Statistics::View::HTML
 Code::Statistics::View::Shell

 cstat_collect.pl -> cstat.db
 cstat.db -> cstat_html.pl
 cstat.db -> cstat_shell.pl ? c_stat.pl
	D:\Web-Dev\Greenphyl_v2\perlcodegreenphyl\apps\GreenPhyl\test>perl code_statistics.pl

	### Collecting block statistics


	### Average Block Length
	18.5932553337922

	### Average Block Size
	670.863041982106

	### Average Block Depth
	2.39917412250516

	### Top Ten Longest Blocks
	File Line Lines Size Depth Dev.
	================================================================================
	lib/Greenphyl/Load/Family.pm 57 294 16570 1 15.81
	lib/Greenphyl/Load/Uniprot.pm 103 287 11540 1 15.44
	cgi-bin/update_pipeline.cgi 121 260 7608 1 13.98
	lib/Greenphyl/Load/Family.pm 79 260 15030 2 13.98
	cgi-bin/job_process.cgi 46 250 10726 1 13.45
	lib/Greenphyl/Load/Family.pm 91 247 14549 3 13.28
	lib/Greenphyl/Load/Interproscan.pm 33 236 10354 1 12.69
	lib/Greenphyl/Load/Family.pm 120 217 13306 4 11.67
	lib/Greenphyl/Load/Family.pm 134 202 12659 5 10.86
	lib/Greenphyl/Load/Family.pm 140 195 12400 6 10.49
	lib/Greenphyl/Load/Go.pm 27 195 8422 1 10.49

	### Top Ten Biggest Blocks
	File Line Lines Size Depth Dev.
	================================================================================
	lib/Greenphyl/Load/Family.pm 57 294 16570 1 24.70
	lib/Greenphyl/Load/Family.pm 79 260 15030 2 22.40
	lib/Greenphyl/Load/Family.pm 91 247 14549 3 21.69
	lib/Greenphyl/Load/Family.pm 120 217 13306 4 19.83
	lib/Greenphyl/Load/Family.pm 134 202 12659 5 18.87
	lib/Greenphyl/Load/Family.pm 140 195 12400 6 18.48
	lib/Greenphyl/Load/Family.pm 145 189 12138 7 18.09
	lib/Greenphyl/Load/Uniprot.pm 103 287 11540 1 17.20
	cgi-bin/job_process.cgi 46 250 10726 1 15.99
	lib/Greenphyl/Load/Interproscan.pm 33 236 10354 1 15.43
	cgi-bin/job_process.cgi 98 191 8454 2 12.60

	### Top Ten Deepest Located Blocks
	File Line Lines Size Depth Dev.
	================================================================================
	lib/Greenphyl/Load/Family.pm 189 8 639 12 5.00
	lib/Greenphyl/Load/Family.pm 242 8 617 12 5.00
	lib/Greenphyl/Load/Family.pm 182 5 203 12 5.00
	lib/Greenphyl/Load/Family.pm 167 31 2073 11 4.58
	lib/Greenphyl/Load/Family.pm 238 13 954 11 4.58
	lib/Greenphyl/Load/Go.pm 104 5 330 11 4.58
	lib/Greenphyl/Load/Uniprot.pm 523 5 301 11 4.58
	lib/Greenphyl/Load/Family.pm 234 3 166 11 4.58
	lib/Greenphyl/Load/Family.pm 272 55 3317 10 4.17
	lib/Greenphyl/Load/Family.pm 164 35 2243 10 4.17
	lib/Greenphyl/Load/Family.pm 228 24 1685 10 4.17

	D:\Web-Dev\Greenphyl_v2\perlcodegreenphyl\apps\GreenPhyl\test>
	#!/usr/bin/perl

	use strict;
	use warnings;

	use feature 'say';

	use File::Find::Rule;
	use File::Grep qw( fgrep fmap fdo );
	use PPI;
	use Smart::Comments;
	use List::Util qw( reduce );
	use HTML::Template;
	use Term::ProgressBar::Simple;

	my @ignore_list = qw(


	site/lib/Perl/APIReference site/lib/Bio/MolEvol/CodonModel.pm
	Syntax/Highlight/Engine/Kate site/lib/ExtUtils/XSpp/Grammar.pm
	);

	$\|++;

	run();

	exit;

	sub run
	{
	my @files = grep is_not_ignored(), find_code_files();

	### Collecting block statistics
	my $progress = Term::ProgressBar::Simple->new( progress_config( count => scalar(@files) ) );
	my @blocks = map find_subs( $_, sub { $progress->increment; } ), @files;

	### Average Block Length
	my $avg_length = get_and_say_average( 'line_count', \@blocks );

	### Average Block Size
	my $avg_size = get_and_say_average( 'size', \@blocks );

	### Average Block Depth
	my $avg_depth = get_and_say_average( 'indent', \@blocks );

	### Top Ten Longest Blocks
	print_top_ten_table( 'line_count', $avg_length, \@blocks );

	### Top Ten Biggest Blocks
	print_top_ten_table( 'size', $avg_size, \@blocks );

	### Top Ten Deepest Located Blocks
	print_top_ten_table( 'indent', $avg_depth, \@blocks );

	return;
	}

	sub find_code_files
	{
	return File::Find::Rule->file->name( '.pm', '.pl', '.phtml', '.cgi' )->in('.');
	}

	sub progress_config {
	my %config = @_;
	die "no count given" if !defined $config{count};
	$config{ETA} \|\|= 'linear';
	#$config{minor_char} \|\|= '';
	$config{max_update_rate} \|\|= 0.1;
	return \%config;
	}

	sub is_not_ignored {
	my $ignore = join '\|', @ignore_list;
	return 1 if !$ignore;

	return 0 if $_ =~ m/$ignore/;
	return 1;
	}

	sub get_and_say_average {
	my ( $column, $blocks ) = @_;
	my $avg = 1 / @{$blocks} * reduce { $a + $b->{$column} } 0, @{$blocks};
	say $avg;
	return $avg;
	}

	sub print_top_ten_table {
	my ( $column, $avg, $blocks ) = @_;

	@{$blocks} = sort { $b->{$column} <=> $a->{$column} } @{$blocks};
	print_statistics( $avg, $column, @{$blocks} );

	return;
	}

	sub find_subs
	{
	my ( $file, $post_process_hook ) = @_;

	my $code = PPI::Document->new($file) \|\| return;
	my $blocks = $code->find('PPI::Structure::Block');
	$blocks \|\|= [];

	my @block_data = map collect_block_data( $_, $file ), @{$blocks};
	$post_process_hook->();

	return @block_data;
	}

	sub collect_block_data {
	my ( $block, $file ) = @_;

	my @lines = split( '\n', $block->content );
	my $location = $block->location;
	my $size = length $block->content;

	# blocks with only one line are probably just hash accessors and thus irrelevant
	# however we want to keep huge examples of those around, just in case they're interesting
	return if @lines == 1 and $size < 150;

	my %block_data;
	$block_data{size} = $size;
	$block_data{line_count} = @lines;
	$block_data{line} = $location->[0];
	$block_data{file} = $file;
	$block_data{indent} = first_child_indent( $block, $block_data{line} );

	return \%block_data;
	}

	sub first_child_indent {
	my ( $block, $first_line ) = @_;

	my $first_child = $block->find_first( sub { is_indent_relevant_element( @_, $first_line ) } );

	# blocks without a child give us the indentation of the first brace, which is not reliable
	# so we null the indent to keep them out of the top ten
	return 0 if !$first_child;

	my $child_loc = $first_child->location;
	my $child_indent = int( $child_loc->[1] / 4 );

	return $child_indent;
	}

	=head2 is_indent_relevant_element
	This is an element evaluation routine. It is used by the PPI element finder.
	Its return codes mean:
	- 1 : return this
	- 0 : skip this, look at children
	- undef : skip this, ignore children

	It instructs the finder to return the first element inside a block that is
	relevant in determining the indentation depth of a block.

	This means it triggers either either on actual code on the first line; or
	anything non-whitespace and non-comment on the following ones.

	Brace stacking in the first line is ignored, as well as "sub {"-stacking.

	It is slightly complicated by the fact that the location detection for
	PPI::Statement objects is broken, which means their children have to be
	examined.

	=cut

	sub is_indent_relevant_element {
	my ( $parent, $element, $first_line ) = @_;
	my $class = $element->class;
	return 0 if $class eq 'PPI::Statement'; # location detection on these is broken,
	return 0 if $class eq 'PPI::Statement::Compound'; # so we need to look at their children

	# this is stuff we ignore on the first line
	if( $element->location->[0] == $first_line ) {
	return 0 if $class eq 'PPI::Structure::Block'; # brace stacking and "sub {"-stacking constructs need to be inspected
	return undef if $class eq 'PPI::Token::Structure'; # the braces themselves are skipped
	return undef if $class eq 'PPI::Token::Word' and $element->content eq 'sub'; # sub as well
	}

	return 1 if $element->significant; # this is an actual significant element, either on first line or later, we take it

	return undef; # other sorts of children of the block get ignored, as well as their children
	}

	sub print_statistics {
	my ( $avg, $column ) = ( shift, shift );

	my $template = "A47 A7 A6 A7 A6 A7";

	say pack( $template, ' File', ' Line', 'Lines', ' Size', 'Depth', ' Dev.' );
	say pack( $template, ('==================================================') x 6 );

	print_block_line( $_, $template, $avg, $column ) for @_[ 0 .. 10 ];

	return;
	}

	sub print_block_line {
	my ( $block, $template, $avg, $column ) = @_;

	my $file = $block->{file};
	my $line = sprintf( "%6d ", $block->{line} );
	my $line_count = sprintf( "%5d ", $block->{line_count} );
	my $size = sprintf( "%6d ", $block->{size} );
	my $indent = sprintf( "%5d ", $block->{indent} );
	my $deviation = sprintf( "%7.2f", $block->{$column} / $avg );

	if ( length $file > 47 ) {
	$file = substr( $file, -43, 43 );
	$file = "...$file";
	}

	say pack( $template, $file, $line, $line_count, $size, $indent, $deviation );

	return;
	}
	Code::Statistics

	Code::Statistics::Target::Block

	Code::Statistics::Metric::Indentation
	Code::Statistics::Metric::Length
	Code::Statistics::Metric::Size

	Code::Statistics::View::HTML
	Code::Statistics::View::Shell

	cstat_collect.pl -> cstat.db
	cstat.db -> cstat_html.pl
	cstat.db -> cstat_shell.pl ? c_stat.pl