Created
September 16, 2008 19:16
-
-
Save ELLIOTTCABLE/11109 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env ruby | |
############################################################################ | |
# textilewc: Count words in a Textile file, if somewhat crudely. | |
# By Tammy Cravit, [email protected] | |
# | |
# $Revision$ $Date$ | |
# | |
# If a single file is provided on the command line, it displays the number | |
# of words in the file. If multiple files are given on the command line, it | |
# generates a listing similar to the output of wc(1), except that directories | |
# and non-Textile files are silently skipped with no warning. | |
# | |
# And yes, this is more complicated than it probably needs to be | |
############################################################################ | |
# IMPORTANT NOTE: | |
# | |
# In my directories, I use the prefix character _ to denote a template file, | |
# such as "_Character Template.textile". The definition of textile_file? | |
# will SILENTLY ignore Textile files whose names start with an underscore. | |
# | |
# You can override this behavior on the command line with the -a option. The | |
# default behavior can be changed by modifying the initialize method to set | |
# @hide_leading_underscores to false. If you call the TextileWordCounter | |
# class from your own code, you can change this setting with the method | |
# hide_leading_underscores!(boolValue) | |
############################################################################ | |
require 'rubygems' | |
gem 'RedCloth', ">= 0" | |
require 'redcloth' | |
############################################################################ | |
# The main Textile word counter class | |
############################################################################ | |
class TextileWordCounter | |
def initialize | |
@hide_leading_underscores = true | |
end | |
# Return true to exclude files with leading _ characters in their names | |
# from the count. I use a leading _ to denote a template file, so I want | |
# them excluded when I run textilewc with a wildcard. | |
def hide_leading_underscores? | |
@hide_leading_underscores | |
end | |
def hide_leading_underscores!(val) | |
@hide_leading_underscores = val | |
end | |
# Helper method to determine if a file is a Textile document. | |
def textile_file?(file) | |
if hide_leading_underscores? | |
file =~ /\.textile$/i && file !~ /^_/ | |
else | |
file =~ /\.textile$/i | |
end | |
end | |
# Count the words in a Textile string. Used by count_words_file and (by | |
# extension) count_words_files | |
def count_words(filecontent) | |
html_content = RedCloth.new(filecontent).to_html | |
s = html_content.gsub(/<\/?[^>]*>/, "") # Strip HTML tags | |
s = s.gsub(/\&\#[0123456789]+\;/, "") # Strip HTML entity chars | |
s.split.length | |
end | |
# Count the words in a single file. This can be invoked directly, or | |
# from within count_words_multifiles. | |
def count_words_file(file) | |
if File.exists?(file) | |
buff = IO.readlines(file).join("\n") | |
fsize = count_words(buff) | |
printf "%7d %s\n", fsize, file | |
fsize | |
else | |
0 | |
end | |
end | |
# Count words in multiple files | |
def count_words_files(filelist) | |
total_count = 0 | |
filelist.each { | |
|file| | |
unless File.directory?(file) | |
if textile_file?(file) | |
fsize = count_words_file(file) | |
total_count += fsize | |
end | |
end | |
} | |
if filelist.count > 1 | |
printf "%7d total\n", total_count | |
end | |
end | |
def CommandLineDriver(args) | |
if (args.count == 0) | |
puts <<END | |
*************************************************************** | |
#{$0}: Count words in one or more Textile files. | |
By Tammy Cravit, [email protected] | |
$Revision$ | |
*************************************************************** | |
Usage: #{$0} [-a] <file> [file] ... | |
The default behavior of the script is to ignore files whose | |
names begin with an underscore. To include these files in the | |
count, include the -a option. | |
END | |
else | |
if args.count == 1 | |
if File.exists?(args[0]) | |
count_words_file(args[0]) | |
else | |
printf "%s: not found\n", args[0] | |
end | |
else | |
if args[0] == "-a" | |
hide_leading_underscores!(false) | |
count_words_files(args.slice(1, args.count - 1)) | |
else | |
count_words_files(args) | |
end | |
end | |
end | |
end | |
end | |
############################################################################ | |
# Driver routine - display a message if no files are specified; | |
# otherwise, count the words in the subset of specified files which | |
# are Textile files. | |
############################################################################ | |
module Kernel | |
# Runs the passed block of code, if the calling file is the one being executed. | |
def on_execute | |
calling_file = caller.first.split(':').first | |
if File.expand_path(calling_file) == File.expand_path($0) | |
yield | |
end | |
end | |
end | |
on_execute do | |
TextileWordCounter.new.CommandLineDriver(ARGV) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment