Skip to content

Instantly share code, notes, and snippets.

@jlisic
Created October 22, 2014 02:52
Show Gist options
  • Save jlisic/5c1342e525eb3977d564 to your computer and use it in GitHub Desktop.
Save jlisic/5c1342e525eb3977d564 to your computer and use it in GitHub Desktop.
use Lingua::EN::Fathom;
# this is a simple perl file to go along with linga.sas
# this work is derived from the example at CPAN:
# http://search.cpan.org/dist/Lingua-EN-Fathom/lib/Lingua/EN/Fathom.pm
# create object
my $text = new Lingua::EN::Fathom;
# read in text file
$text->analyse_file( $ARGV[0] );
# get data
$accumulate = 1;
$text->analyse_block($text_string,$accumulate);
# get the info from text
$num_chars = $text->num_chars;
$num_words = $text->num_words;
$percent_complex_words = $text->percent_complex_words;
$num_sentences = $text->num_sentences;
$num_text_lines = $text->num_text_lines;
$num_blank_lines = $text->num_blank_lines;
$num_paragraphs = $text->num_paragraphs;
$syllables_per_word = $text->syllables_per_word;
$words_per_sentence = $text->words_per_sentence;
$fog = $text->fog;
$flesch = $text->flesch;
$kincaid = $text->kincaid;
# print it all out in a way that can be parsed easily
print("num_chars $num_chars\n");
print("num_words $num_words\n");
print("percent_complex_words $percent_complex_words \n") ;
print("num_sentences $num_sentences\n");
print("num_text_lines $num_text_lines\n");
print("num_blank_lines $num_blank_lines\n");
print("num_paragraphs $num_paragraphs\n");
print("syllables_per_word $syllables_per_word\n");
print("words_per_sentence $words_per_sentence\n");
print("fog $fog\n");
print("flesch $flesch\n");
print("kincaid $kincaid");
/**************************************************************************/
/* Name: lingua.sas */
/* Author: Jonathan Lisic <[email protected]> */
/**************************************************************************/
/* Description: */
/* */
/* lingua is a macro based on the example for the Lingua Module in CPAN */
/* http://search.cpan.org/dist/Lingua-EN-Fathom/lib/Lingua/EN/Fathom.pm */
/**************************************************************************/
/* Notes: */
/* */
/* This is not the fastest code around but it is simple and it works! */
/* It is assumed that you have strawberry perl installed with the linga */
/* perl module */
/**************************************************************************/
/* macro */
/* workDir - Temporary directory where lingua.pl also is */
/* varName - The data set to apply this to (right now only supports one */
/* input - data set that contains varName */
/* output - output data set */
/*
Example:
%let workDir = C:\Users\JonathanLisic\src\lingua;
%let varName = myVeryLongString;
%let input = work.input;
%let output = work.output;
%lingua( &workDir, &varName, &input, &output );
*/
%macro lingua (workDir, varName, input, output);
/* create initial data set */
data &output.;
set _NULL_;
run;
/* get number of rows in data set */
proc sql noprint;
select distinct count(*) into :rows from &input.; /* separated by ' ';*/
quit;
%do i = 1 %to &rows.;
/* write out the variable */
filename lingOut "&workDir.\sample.txt";
data _NULL_;
set &input.;
file lingOut;
if (_N_ = &i.) then do;
put &varName.;
end;
run;
/* now we create the named pipe */
filename lingua pipe "c:\Strawberry\perl\bin\perl &workDir.\lingua.pl &workDir.\sample.txt";
/* create data set */
data linguaTmp (drop = line);
infile lingua truncover;
input line $char80.;
variable=scan(line,1,' ');
value=input(substr(line,length(variable)+1),best8.);
/* sorting to remove the occasional repleated line */
proc sort data=linguaTmp nodup;
by variable value;
run;
/* transpose the output */
proc transpose data=linguaTmp out=linguaTmp (drop=_name_);
id variable;
var value;
run;
/* append to output */
data &output.;
set &output. linguaTmp (keep= flesch fog kincaid num_blank_lines num_chars num_paragraphs num_sentences num_text_lines num_words percent_complex_words syllables_per_word words_per_sentence);
run;
%end;
%mend;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment