ttscoff · February 20, 2025 09:39 · clintonpaquin · Jul 1, 2014 · ekalinin · Sep 2, 2014
diff --git a/github_toc.rb b/github_toc.rb
 #!/usr/bin/env ruby
 # encoding: utf-8

 =begin
 github_toc v0.2.0
 Brett Terpstra 2014
 <http://brettterpstra.com/2014/07/01/github-toc-service/>

 Creates a linked table of contents from headers in a GitHub readme
 Place a [toc] marker in the file to have it automatically replaced with the TOC
 If there's no marker, it adds the generated TOC to the beginning of the input text

 - You can use [toc 2] to limit the depth of the TOC from 1-4 levels
 - You can use [toc mmd] to use MultiMarkdown format header ids
 - You can use them together ([toc 3 mmd])
  - depth number must come before "mmd"
 - You can also specify 1-4 after the command on the command line (`cat README.md|github_toc 3`)
 - You can specify mmd or no_mmd on the command line. no_mmd will change the
  formatting of mmd headers in an existing toc

 Input:
 - Specify a filename in the arguments to read from file on disk
 - You can specify an output filename after the input file, otherwise output is to STDOUT
 - Specifying the same file for input and output will modify in place

 Usage:
  Piped text - `cat filename.md | github_toc` => returns only TOC
  Read a file, output to STDOUT - `github_toc filename.md` => returns full text of file with TOC
  Modify a file in place, with a depth limit - `github_toc 2 filename.md filename.md`

 Notes:
 - Header 1s (# Header) are ignored if there's only one of them, then only 2-6 are collected
 - If a file already has a TOC generated by this script, running it again will replace it.
 - Headers above the position of a [toc] tag or existing TOC in the document are ignored

 Changelog:

 v0.2.0 November 21, 2014
 - Use "mmd" in [toc 2 mmd]
 - use mmd or no_mmd on the command line
 - remembers depth and header style for repeating
 - cli args override recorded args (no_mmd will erase mmd in tags)
 - if there's more than one h1, it recognizes them as sections
 - fixes for github header id generation
 - defaults to 2 levels if no depth is specified
 - fixed specifying output file when it doesn't exist yet
 =end

 depth = false
 infile = false
 outfile = false
 mmd_headers = nil

 # Parse any arguments for a depth, input, and output file
 ARGV.each do |arg|
  if arg =~ /^\d+$/
    depth = arg.to_i
  elsif arg =~ /^(no_)?mmd$/i
    mmd_headers = $1.nil? ? true : false
  elsif File.exists?(File.expand_path(arg)) && infile.class == FalseClass
    infile = File.expand_path(arg)
  else
    if File.exists?(File.expand_path(File.dirname(arg)))
      outfile = File.expand_path(arg)
    else
      $stderr.puts "Output file directory doesn't exist"
      Process.exit 1
    end
  end
 end

 # If we found an infile, read it
 if infile
  input = IO.read(infile)
 # if not, hope for piped input on STDIN
 else
  input = STDIN.read
 end

 # Error if we don't have any input
 unless input && input.length > 0
  puts "You must pipe text to STDIN or specify filename"
  puts "#{File.basename(__FILE__)} [1-4] [input_file [output_file]]"
 end

 # Split the content at any included toc tag or block
 # so that we can only analyze headers after that point
 content = input.dup
 if input =~ /^\[\s*toc\s*(?:(\d)\s*)?(mmd)?\s*\]\s*$/i
  depth ||= $1.nil? ? 2 : $1.to_i
  mmd_headers = true unless $2.nil? || !mmd_headers.nil?
  content = content.split(/^\[toc\s*(\d\s*)?(mmd)?\s*\]\s*$/i)[1..-1].join()
 elsif input =~ /^## Contents.*?<!-- end toc (?:(\d) )?(mmd )?-->/m
  depth ||= $1.to_i unless $1.nil?
  mmd_headers = true unless $2.nil? || !mmd_headers.nil?
  content = content.split(/^## Contents.*?<!-- end toc (\d )?(mmd )?-->/m)[1..-1].join()
 end

 # Make sure depth is set and is between 1 and 4
 depth = 2 unless depth && depth > 0
 depth = 4 if depth > 4

 # Start the TOC block
 toc = "## Contents\n\n"

 h1_count = content.scan(/^\#[^#]/).length
 if h1_count > 1
  # if there are more than one first level headers, include them
  headers = content.scan(/^(\#{1,6})[^#](.*)$/)
 else
  # Find all the headers (2-6)
  headers = content.scan(/^(\#{2,6})[^#](.*)$/)
 end

 # Narrow down the highest level header (lowest number)
 top_level = 6
 headers.each do |m|
  top_level = m[0].length if m[0].length < top_level
 end

 # Set the max level as a function of the highest available header
 # and the max depth preference
 max_level = top_level + (depth - 1)
 headers.delete_if {|h| h[0].length > max_level}

 # Build the TOC from the headers array
 headers.each do |m|
  indent = "    " * (m[0].length - top_level)
  title = m[1]
  # Generate the GitHub style ID references
  if mmd_headers
    link = title.strip.gsub(/[^a-z\-0-9]+/i,'').downcase
  else
    link = title.strip.gsub(/[^ a-z0-9]/i,'').gsub(/ +/,"-").downcase
  end
  toc += "#{indent}- [#{title}](##{link})\n"
 end

 # Close the TOC block
 mmd = mmd_headers ? "mmd " : ""
 dep = depth == 2 ? "" : "#{depth} "
 toc += "\n<!-- end toc #{dep}#{mmd}-->"

 # If the original content had the [toc] tag, replace that
 if input =~ /^\[toc\s*(\d+)?\s*(mmd)?\s*\]\s*$/mi
  output = input.sub(/^\[toc\s*(\d+)?\s*(mmd)?\s*\]\s*$/mi, toc + "\n")
 # If the original content had an existing TOC, replace that
 elsif input =~ /^## Contents.*?<!-- end toc (\d )?(mmd )?-->/m
  output = input.sub(/^## Contents.*?<!-- end toc (\d )?(mmd )?-->/m, toc)
 # Otherwise, just insert the TOC block at the beginning
 else
  output = toc + "\n\n" + input
 end

 # If we were given an output file, write to it
 if outfile
  File.open(outfile,'w') do |f|
    f.puts output
  end
 # Otherwise, output to STDOUT
 else
  $stdout.puts output
 end

 # Workflow: [/Users/ttscoff/Library/Services/GitHub TOC.workflow, 00CDD1A5-5674-4E08-94AC-61E05F0020B5]
	#!/usr/bin/env ruby
	# encoding: utf-8

	=begin
	github_toc v0.2.0
	Brett Terpstra 2014
	<http://brettterpstra.com/2014/07/01/github-toc-service/>

	Creates a linked table of contents from headers in a GitHub readme
	Place a [toc] marker in the file to have it automatically replaced with the TOC
	If there's no marker, it adds the generated TOC to the beginning of the input text

	- You can use [toc 2] to limit the depth of the TOC from 1-4 levels
	- You can use [toc mmd] to use MultiMarkdown format header ids
	- You can use them together ([toc 3 mmd])
	- depth number must come before "mmd"
	- You can also specify 1-4 after the command on the command line (`cat README.md\|github_toc 3`)
	- You can specify mmd or no_mmd on the command line. no_mmd will change the
	formatting of mmd headers in an existing toc

	Input:
	- Specify a filename in the arguments to read from file on disk
	- You can specify an output filename after the input file, otherwise output is to STDOUT
	- Specifying the same file for input and output will modify in place

	Usage:
	Piped text - `cat filename.md \| github_toc` => returns only TOC
	Read a file, output to STDOUT - `github_toc filename.md` => returns full text of file with TOC
	Modify a file in place, with a depth limit - `github_toc 2 filename.md filename.md`

	Notes:
	- Header 1s (# Header) are ignored if there's only one of them, then only 2-6 are collected
	- If a file already has a TOC generated by this script, running it again will replace it.
	- Headers above the position of a [toc] tag or existing TOC in the document are ignored

	Changelog:

	v0.2.0 November 21, 2014
	- Use "mmd" in [toc 2 mmd]
	- use mmd or no_mmd on the command line
	- remembers depth and header style for repeating
	- cli args override recorded args (no_mmd will erase mmd in tags)
	- if there's more than one h1, it recognizes them as sections
	- fixes for github header id generation
	- defaults to 2 levels if no depth is specified
	- fixed specifying output file when it doesn't exist yet
	=end

	depth = false
	infile = false
	outfile = false
	mmd_headers = nil

	# Parse any arguments for a depth, input, and output file
	ARGV.each do \|arg\|
	if arg =~ /^\d+$/
	depth = arg.to_i
	elsif arg =~ /^(no_)?mmd$/i
	mmd_headers = $1.nil? ? true : false
	elsif File.exists?(File.expand_path(arg)) && infile.class == FalseClass
	infile = File.expand_path(arg)
	else
	if File.exists?(File.expand_path(File.dirname(arg)))
	outfile = File.expand_path(arg)
	else
	$stderr.puts "Output file directory doesn't exist"
	Process.exit 1
	end
	end
	end

	# If we found an infile, read it
	if infile
	input = IO.read(infile)
	# if not, hope for piped input on STDIN
	else
	input = STDIN.read
	end

	# Error if we don't have any input
	unless input && input.length > 0
	puts "You must pipe text to STDIN or specify filename"
	puts "#{File.basename(__FILE__)} [1-4] [input_file [output_file]]"
	end

	# Split the content at any included toc tag or block
	# so that we can only analyze headers after that point
	content = input.dup
	if input =~ /^\[\stoc\s(?:(\d)\s)?(mmd)?\s\]\s*$/i
	depth \|\|= $1.nil? ? 2 : $1.to_i
	mmd_headers = true unless $2.nil? \|\| !mmd_headers.nil?
	content = content.split(/^\[toc\s(\d\s)?(mmd)?\s\]\s$/i)[1..-1].join()
	elsif input =~ /^## Contents.*?<!-- end toc (?:(\d) )?(mmd )?-->/m
	depth \|\|= $1.to_i unless $1.nil?
	mmd_headers = true unless $2.nil? \|\| !mmd_headers.nil?
	content = content.split(/^## Contents.*?<!-- end toc (\d )?(mmd )?-->/m)[1..-1].join()
	end

	# Make sure depth is set and is between 1 and 4
	depth = 2 unless depth && depth > 0
	depth = 4 if depth > 4

	# Start the TOC block
	toc = "## Contents\n\n"

	h1_count = content.scan(/^\#[^#]/).length
	if h1_count > 1
	# if there are more than one first level headers, include them
	headers = content.scan(/^(\#{1,6})[^#](.*)$/)
	else
	# Find all the headers (2-6)
	headers = content.scan(/^(\#{2,6})[^#](.*)$/)
	end

	# Narrow down the highest level header (lowest number)
	top_level = 6
	headers.each do \|m\|
	top_level = m[0].length if m[0].length < top_level
	end

	# Set the max level as a function of the highest available header
	# and the max depth preference
	max_level = top_level + (depth - 1)
	headers.delete_if {\|h\| h[0].length > max_level}

	# Build the TOC from the headers array
	headers.each do \|m\|
	indent = " " * (m[0].length - top_level)
	title = m[1]
	# Generate the GitHub style ID references
	if mmd_headers
	link = title.strip.gsub(/[^a-z\-0-9]+/i,'').downcase
	else
	link = title.strip.gsub(/[^ a-z0-9]/i,'').gsub(/ +/,"-").downcase
	end
	toc += "#{indent}- [#{title}](##{link})\n"
	end

	# Close the TOC block
	mmd = mmd_headers ? "mmd " : ""
	dep = depth == 2 ? "" : "#{depth} "
	toc += "\n<!-- end toc #{dep}#{mmd}-->"

	# If the original content had the [toc] tag, replace that
	if input =~ /^\[toc\s(\d+)?\s(mmd)?\s\]\s$/mi
	output = input.sub(/^\[toc\s(\d+)?\s(mmd)?\s\]\s$/mi, toc + "\n")
	# If the original content had an existing TOC, replace that
	elsif input =~ /^## Contents.*?<!-- end toc (\d )?(mmd )?-->/m
	output = input.sub(/^## Contents.*?<!-- end toc (\d )?(mmd )?-->/m, toc)
	# Otherwise, just insert the TOC block at the beginning
	else
	output = toc + "\n\n" + input
	end

	# If we were given an output file, write to it
	if outfile
	File.open(outfile,'w') do \|f\|
	f.puts output
	end
	# Otherwise, output to STDOUT
	else
	$stdout.puts output
	end

	# Workflow: [/Users/ttscoff/Library/Services/GitHub TOC.workflow, 00CDD1A5-5674-4E08-94AC-61E05F0020B5]