Skip to content

Instantly share code, notes, and snippets.

@am0c
Created February 11, 2011 18:46
Show Gist options
  • Save am0c/822807 to your computer and use it in GitHub Desktop.
Save am0c/822807 to your computer and use it in GitHub Desktop.
quick-dirty code duplication analyzer
#!/usr/bin/env perl
#
# Code Duplication Analyzer
# --------------------------
#
# Qui-Quic-Quick-and-Dirty-ly coded and
# I noticed that this approach does not
# return the trustful results.
#
#
use 5.010;
use Storable;
use warnings;
use strict;
local $| = 1;
my $FILE = shift;
my $cont = do { open my $fh, "<", $FILE or die; local $/; <$fh> };
my @SYNTAX_SEQ = ();
my %SYNTAX_TABLE = ();
my %SYNTAX_MAP = ();
my @STACK = ();
@SYNTAX_SEQ = $cont =~ m{([a-z_][a-z0-9_]*)}gi;
$SYNTAX_TABLE{$_}++ for @SYNTAX_SEQ;
my $i = 0; for (keys %SYNTAX_TABLE) {
$SYNTAX_TABLE{$_} = $i;
$SYNTAX_MAP{$i} = $_;
$i++;
}
@SYNTAX_SEQ = map { $SYNTAX_TABLE{$_} } @SYNTAX_SEQ;
#-----
use List::Util qw(first);
use Tree::Fast;
my $TREE = Tree::Fast->new('root');
my $SEQ = "@SYNTAX_SEQ";
sub push_tree {
my ($key_list, $valhash) = @_;
my @key_list = @$key_list;
my $cursor = $TREE;
while (my $key = shift @key_list) {
my @children = $cursor->children;
my ($child) = first { my $valhash = $_->value; $valhash->{size} == $key } @children;
unless (defined $child) {
$child = Tree::Fast->new({ seq => '', size => 0 });
$cursor->add_child({}, $child);
}
$cursor = $child;
}
$cursor->set_value($valhash);
}
sub recurse_maximum_at {
my ($cur, $size) = @_;
push @STACK, $SYNTAX_SEQ[$cur+$size];
my $substr = "@STACK";
my $count = () = ($SEQ =~ m/($substr)/g);
push_tree( \@STACK, { seq => $substr, size => $size } );
return recurse_maximum_at( $cur, $size+1 ) if $count > 1 && $cur+$size < $#SYNTAX_SEQ;
return $size;
}
sub analyze {
say "analyze a file:";
my $buffer = 0;
for my $i (0 .. $#SYNTAX_SEQ) {
@STACK = ();
print "\rloading... ", $i, " / ", $#SYNTAX_SEQ if $buffer++ % 30 == 0;
recurse_maximum_at( $i, 0 );
}
}
sub store_tree {
say "store analyzed information: ";
store $TREE, "$FILE.anal";
}
sub retrieve_tree {
say "information retrieved from a file: ";
$TREE = retrieve "$FILE.anal";
}
sub dump_tree {
for my $node ($TREE->traverse) {
my $valhash = $node->value;
say "* [", $valhash->{seq}, "]: ", $valhash->{size};
}
}
sub dump_string_tree {
for my $node ($TREE->traverse($TREE->LEVEL_ORDER)) {
my $valhash = $node->value;
my @list = map $SYNTAX_MAP{$_}, split " ", $valhash->{seq};
say $valhash->{size}, ": ", "@list" if $valhash->{size} >= 1;
}
}
if (-e "$FILE.anal") {
retrieve_tree;
}
else {
analyze;
store_tree;
}
#dump_tree;
dump_string_tree;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment