Skip to content

Instantly share code, notes, and snippets.

@dav-m85
Created August 13, 2013 22:10
Show Gist options
  • Save dav-m85/6226202 to your computer and use it in GitHub Desktop.
Save dav-m85/6226202 to your computer and use it in GitHub Desktop.
Extract Surface, Exchange and Agitation for SEO keywords
#!/bin/perl
# use warnings;
use Data::Dumper;
$num_args = $#ARGV + 1;
if ($num_args != 2) {
print "\nUsage: ./seokw.pl corpusA corpusB\n";
exit;
}
my %Ca, my %Cb;
open (CSV, "<", $ARGV[0]) or die $!;
while(my $line = <CSV>) {
my @cells = csvsplit($line); # or csvsplit($line, $my_custom_seperator)
if($cells[4] <= 0){
print "OUPS: $line";
}
else{
$Ca{$cells[0]} = $cells[4];
}
}
close CSV;
open (CSV, "<", $ARGV[1]) or die $!;
while(my $line = <CSV>) {
my @cells = csvsplit($line); # or csvsplit($line, $my_custom_seperator)
if($cells[4] <= 0){
print "OUPS: $line";
}
else{
$Cb{$cells[0]} = $cells[4];
}
}
close CSV;
# Compute Exchange
# Compute Agitation
my $Agitation = 0, my $Exchange = 0;
while( ($kw, $p) = each %Ca){
if($Cb{$kw} != undef and $Cb{$kw} != $p){
$Agitation++;
}
if($Cb{$kw} == undef){
$Exchange++;
}
}
# Compute Resulting Surface
my $Surface = 0;
while( ($kw, $p) = each %Cb){
$Surface += 1 / $p;
}
my $CardCb = keys (%Cb);
$Surface = $Surface / $CardCb;
print "seo.keywords.all.agitation $Agitation\n";
print "seo.keywords.all.exchange $Exchange\n";
print "seo.keywords.all.surface $Surface\n";
# http://stackoverflow.com/questions/3065095/how-do-i-efficiently-parse-a-csv-file-in-perl
sub csvsplit {
my $line = shift;
my $sep = (shift or ',');
return () unless $line;
my @cells;
$line =~ s/\r?\n$//;
my $re = qr/(?:^|$sep)(?:"([^"]*)"|([^$sep]*))/;
while($line =~ /$re/g) {
my $value = defined $1 ? $1 : $2;
push @cells, (defined $value ? $value : '');
}
return @cells;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment