Created
September 16, 2008 23:18
-
-
Save tammymakesthings/11162 to your computer and use it in GitHub Desktop.
markupwc 1.0 release
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env ruby | |
########################################################################### | |
# mkdwc: Behaves like wc(1) but operates on the raw text of one or more | |
# Markdown or Textile files. | |
# Tammy Cravit, [email protected] | |
########################################################################### | |
# This code can also be included into another script if you want to extend | |
# the MarkdownWC class. | |
########################################################################### | |
# This is the third take on this script, and abstracts out the markup- | |
# specific stuff as much as I know how to do. That way, the script can | |
# fairly easily be expanded to support just about any markup that can be | |
# easily converted to HTML, just by plugging in whatever code is needed | |
# to render the markup, and writing a new formatter. | |
########################################################################### | |
# Include the gems and packages we need | |
%w[rubygems peg_markdown RedCloth stringray optparse on_execute].each { | |
|dep| | |
require dep | |
} | |
OPTIONS = { | |
:chars => false, | |
:words => false, | |
:lines => false, | |
:ingore_underscores => true, | |
} | |
########################################################################### | |
# Load the StringRay stuff | |
########################################################################### | |
String.send :include, StringRay | |
########################################################################### | |
# These classes abstract out the markup-specific formatting stuff | |
# so we can use one utility script for multiple markup languages. | |
# | |
# To create a new markup formatter engine, do the following: | |
# | |
# 1. Define a class that inherits from MarkupEngineFormatter | |
# 2. Add a markup_engine method that returns the class name of the | |
# markup processor. The markup processor must take the marked-up | |
# text as an argument to initialize, and must define a to_html | |
# method which returns valid HTML. | |
# 3. Add a file_masks method that returns an array of the file extensions | |
# which are valid for the new markup language. | |
# 4. Add a file_type method that returns the name of the markup language. | |
# This is used by optparse in displaying the usage message. | |
# 5. Add a test to the on_execute block at the end of the script, which | |
# dispatches to your new class based on the script name. | |
# 6. Create a symlink from markupwc to the new script name you defined in | |
# step 5. | |
########################################################################### | |
class MarkupEngineFormatter | |
def initialize | |
@file_content = "" | |
end | |
def set_content(content) | |
@file_content = content | |
end | |
def strip(s) | |
s.gsub(/<\/?[^>]*>/, "").gsub(/\&\#[0123456789]+\;/, "?") | |
end | |
def strip_markup | |
engine = markup_engine.new(@file_content) | |
strip engine.to_html | |
end | |
def select_files(alist) | |
filemask = file_masks.join("|") | |
alist.find_all{|item| item =~ /\.(#{filemask})$/} | |
end | |
end | |
# Formatter for Markdown files | |
class MarkdownFormatter < MarkupEngineFormatter | |
def markup_engine | |
PEGMarkdown | |
end | |
def file_masks | |
%w[markdown mkd mdown markdn md] | |
end | |
def file_type | |
"Markdown" | |
end | |
end | |
# Formatter for Textile files | |
class TextileFormatter < MarkupEngineFormatter | |
def markup_engine | |
RedCloth | |
end | |
def file_masks | |
%w[textile] | |
end | |
def file_type | |
"Textile" | |
end | |
end | |
########################################################################### | |
# MarkupWC is the main class for the application. | |
########################################################################### | |
class MarkupWC | |
def initialize(formatter,filetype) | |
@formatter = formatter | |
@filetype = @formatter.file_type | |
end | |
# Count the characters, words and lines in a single Markdown file. | |
def count_file(filepath) | |
if File.exists?(filepath) | |
file_lines = IO.readlines(filepath) | |
lines = file_lines.count | |
@formatter.set_content(file_lines.join("\n")) | |
stripped = @formatter.strip_markup | |
characters = stripped.length | |
words = stripped.to_stray.select { | |
|w| w.is_a? StringRay::Word }.size | |
[characters,words,lines] | |
else | |
[0,0,0] | |
end | |
end | |
# Invoke count_file on a group of files, and display individual and | |
# aggregate results | |
def count_many_files(filelist) | |
tchars = twords = tlines = 0 | |
unless filelist.empty? | |
filelist.each { |f| | |
chars, words, lines = count_file(f) | |
print_result f, chars, words, lines | |
tchars += chars ; twords += words ; tlines += lines | |
} | |
if filelist.count > 1 | |
print_result "total", tchars, twords, tlines | |
end | |
end | |
end | |
# Parse the command line options | |
def parse_command_line | |
o = OptionParser.new do |o| | |
script_name = $0.split('/').last | |
o.set_summary_indent(' ') | |
o.banner = "Usage: #{script_name} [ -c | -w | -l ] <file> [file] ..." | |
o.define_head "Count words, lines, and characters in one or more #{@filetype} files." | |
o.separator "" | |
o.on("-c", "--chars", "Count only characters") { |OPTIONS[:chars]| } | |
o.on("-w", "--words", "Count only words") { |OPTIONS[:words]| } | |
o.on("-l", "--lines", "Count only lines") { |OPTIONS[:lines]| } | |
o.on("-a", "--all-files", | |
"Include files beginning with _") { |OPTIONS[:ignore_underscores]|} | |
o.separator "" | |
o.on_tail("-h", "--help", "Show this help message") { puts o; exit } | |
o.parse!(ARGV) | |
if ARGV.count == 0 | |
puts o | |
exit | |
end | |
numopts = 0 | |
%w[chars words lines].each { |opt| numopts += 1 if OPTIONS[opt.intern] } | |
if numopts > 1 | |
puts "The -c, -w and -l options are mututally exclusive" | |
puts "" | |
puts o | |
exit | |
end | |
end | |
end | |
# Process a group of files specified on the command line. This is the | |
# entrypoint for the script when it's invoked interactively. | |
def process_files | |
parse_command_line | |
filelist = @formatter.select_files(ARGV) | |
unless OPTIONS[:ignore_underscores] | |
filelist = filelist.find_all{|item| item !~ /^_/} | |
end | |
if filelist.empty? | |
puts "#{$0.split('/').last}: filelist is empty" | |
exit | |
end | |
count_many_files filelist | |
end | |
private | |
# Display the results for a single file. | |
def print_result(file, chars, words, lines) | |
if OPTIONS[:chars] | |
printf "%6d %s\n", chars, file | |
elsif OPTIONS[:words] | |
printf "%6d %s\n", words, file | |
elsif OPTIONS[:lines] | |
printf "%6d %s\n", lines, file | |
else | |
printf "%6d %6d %6d %s\n", lines, words, chars, file | |
end | |
end | |
end | |
########################################################################### | |
# Actually run the script from the command line | |
########################################################################### | |
on_execute do | |
script_name = $0.split('/').last | |
formatter, filetype = nil, "" | |
if script_name == "mkdwc" or script_name == "mmwc" | |
formatter, filetype = MarkdownFormatter.new, "Markdown" | |
elsif script_name == "ttwc" | |
formatter, filetype = TextileFormatter.new, "Textile" | |
else | |
puts "Don't know what to do for script: #{$0}" | |
exit | |
end | |
unless formatter.nil? | |
MarkupWC.new(formatter, filetype).process_files | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment