jlisic · October 22, 2014 02:52
diff --git a/lingua.pl b/lingua.pl
 use Lingua::EN::Fathom;

 # this is a simple perl file to go along with linga.sas
 # this work is derived from the example at CPAN: 
 # http://search.cpan.org/dist/Lingua-EN-Fathom/lib/Lingua/EN/Fathom.pm

 # create object
 my $text = new Lingua::EN::Fathom;

 # read in text file
 $text->analyse_file( $ARGV[0] );

 # get data
 $accumulate = 1;
 $text->analyse_block($text_string,$accumulate);

 # get the info from text
 $num_chars             = $text->num_chars;
 $num_words             = $text->num_words;
 $percent_complex_words = $text->percent_complex_words;
 $num_sentences         = $text->num_sentences;
 $num_text_lines        = $text->num_text_lines;
 $num_blank_lines       = $text->num_blank_lines;
 $num_paragraphs        = $text->num_paragraphs;
 $syllables_per_word    = $text->syllables_per_word;
 $words_per_sentence    = $text->words_per_sentence;
 $fog     = $text->fog;
 $flesch  = $text->flesch;
 $kincaid = $text->kincaid;

 # print it all out in a way that can be parsed easily
 print("num_chars $num_chars\n");
 print("num_words $num_words\n");
 print("percent_complex_words $percent_complex_words \n") ;
 print("num_sentences $num_sentences\n");
 print("num_text_lines $num_text_lines\n");
 print("num_blank_lines $num_blank_lines\n");
 print("num_paragraphs $num_paragraphs\n");
 print("syllables_per_word $syllables_per_word\n");
 print("words_per_sentence $words_per_sentence\n");
 print("fog $fog\n");
 print("flesch $flesch\n");
 print("kincaid $kincaid");
diff --git a/lingua.sas b/lingua.sas
 /**************************************************************************/
 /* Name: lingua.sas                                                       */
 /* Author: Jonathan Lisic <[email protected]>                              */
 /**************************************************************************/                                                                     
 /* Description:                                                           */
 /*                                                                        */
 /* lingua is a macro based on the example for the Lingua Module in CPAN   */
 /* http://search.cpan.org/dist/Lingua-EN-Fathom/lib/Lingua/EN/Fathom.pm   */
 /**************************************************************************/
 /* Notes:                                                                 */
 /*                                                                        */
 /* This is not the fastest code around but it is simple and it works!     */
 /* It is assumed that you have strawberry perl installed with the linga   */
 /* perl module                                                            */
 /**************************************************************************/

 /* macro */
 /* workDir - Temporary directory where lingua.pl also is */
 /* varName - The data set to apply this to (right now only supports one */
 /* input   - data set that contains varName */
 /* output  - output data set */

 /* 

 Example:

 %let workDir = C:\Users\JonathanLisic\src\lingua;
 %let varName = 	myVeryLongString;
 %let input  = work.input;
 %let output = work.output;

 %lingua( &workDir, &varName, &input, &output );

 */


 %macro lingua (workDir, varName, input, output);

 /* create initial data set */
 data &output.;
 set _NULL_;
 run;

 /* get number of rows in data set */
 proc sql noprint;
  select distinct count(*) into :rows from &input.; /* separated by ' ';*/
 quit;

 %do i = 1 %to &rows.; 

 /* write out the variable */
 filename lingOut "&workDir.\sample.txt"; 

 data _NULL_;
 set &input.;
 file lingOut;
  if (_N_ = &i.) then do;
     put &varName.;
  end;
 run;

 /* now we create the named pipe */
 filename lingua pipe "c:\Strawberry\perl\bin\perl &workDir.\lingua.pl &workDir.\sample.txt"; 

 /* create data set */
 data linguaTmp (drop = line); 
 infile lingua truncover; 
 input  line $char80.; 
 variable=scan(line,1,' '); 
 value=input(substr(line,length(variable)+1),best8.); 

 /* sorting to remove the occasional repleated line */
 proc sort data=linguaTmp nodup;
 by variable value;
 run;

 /* transpose the output */
 proc transpose data=linguaTmp out=linguaTmp (drop=_name_);
 id variable;
 var value;
 run;

 /* append to output */
 data &output.;
 set  &output. linguaTmp (keep= flesch fog kincaid num_blank_lines num_chars num_paragraphs num_sentences num_text_lines num_words percent_complex_words syllables_per_word words_per_sentence);
 run;

 %end;

 %mend;
	use Lingua::EN::Fathom;

	# this is a simple perl file to go along with linga.sas
	# this work is derived from the example at CPAN:
	# http://search.cpan.org/dist/Lingua-EN-Fathom/lib/Lingua/EN/Fathom.pm

	# create object
	my $text = new Lingua::EN::Fathom;

	# read in text file
	$text->analyse_file( $ARGV[0] );

	# get data
	$accumulate = 1;
	$text->analyse_block($text_string,$accumulate);

	# get the info from text
	$num_chars = $text->num_chars;
	$num_words = $text->num_words;
	$percent_complex_words = $text->percent_complex_words;
	$num_sentences = $text->num_sentences;
	$num_text_lines = $text->num_text_lines;
	$num_blank_lines = $text->num_blank_lines;
	$num_paragraphs = $text->num_paragraphs;
	$syllables_per_word = $text->syllables_per_word;
	$words_per_sentence = $text->words_per_sentence;
	$fog = $text->fog;
	$flesch = $text->flesch;
	$kincaid = $text->kincaid;

	# print it all out in a way that can be parsed easily
	print("num_chars $num_chars\n");
	print("num_words $num_words\n");
	print("percent_complex_words $percent_complex_words \n") ;
	print("num_sentences $num_sentences\n");
	print("num_text_lines $num_text_lines\n");
	print("num_blank_lines $num_blank_lines\n");
	print("num_paragraphs $num_paragraphs\n");
	print("syllables_per_word $syllables_per_word\n");
	print("words_per_sentence $words_per_sentence\n");
	print("fog $fog\n");
	print("flesch $flesch\n");
	print("kincaid $kincaid");
	/**************************************************************************/
	/* Name: lingua.sas */
	/* Author: Jonathan Lisic <[email protected]> */
	/**************************************************************************/
	/* Description: */
	/* */
	/* lingua is a macro based on the example for the Lingua Module in CPAN */
	/* http://search.cpan.org/dist/Lingua-EN-Fathom/lib/Lingua/EN/Fathom.pm */
	/**************************************************************************/
	/* Notes: */
	/* */
	/* This is not the fastest code around but it is simple and it works! */
	/* It is assumed that you have strawberry perl installed with the linga */
	/* perl module */
	/**************************************************************************/

	/* macro */
	/* workDir - Temporary directory where lingua.pl also is */
	/* varName - The data set to apply this to (right now only supports one */
	/* input - data set that contains varName */
	/* output - output data set */

	/*

	Example:

	%let workDir = C:\Users\JonathanLisic\src\lingua;
	%let varName = myVeryLongString;
	%let input = work.input;
	%let output = work.output;

	%lingua( &workDir, &varName, &input, &output );

	*/


	%macro lingua (workDir, varName, input, output);

	/* create initial data set */
	data &output.;
	set _NULL_;
	run;

	/* get number of rows in data set */
	proc sql noprint;
	select distinct count() into :rows from &input.; / separated by ' ';*/
	quit;

	%do i = 1 %to &rows.;

	/* write out the variable */
	filename lingOut "&workDir.\sample.txt";

	data _NULL_;
	set &input.;
	file lingOut;
	if (_N_ = &i.) then do;
	put &varName.;
	end;
	run;

	/* now we create the named pipe */
	filename lingua pipe "c:\Strawberry\perl\bin\perl &workDir.\lingua.pl &workDir.\sample.txt";

	/* create data set */
	data linguaTmp (drop = line);
	infile lingua truncover;
	input line $char80.;
	variable=scan(line,1,' ');
	value=input(substr(line,length(variable)+1),best8.);

	/* sorting to remove the occasional repleated line */
	proc sort data=linguaTmp nodup;
	by variable value;
	run;

	/* transpose the output */
	proc transpose data=linguaTmp out=linguaTmp (drop=_name_);
	id variable;
	var value;
	run;

	/* append to output */
	data &output.;
	set &output. linguaTmp (keep= flesch fog kincaid num_blank_lines num_chars num_paragraphs num_sentences num_text_lines num_words percent_complex_words syllables_per_word words_per_sentence);
	run;

	%end;

	%mend;