|
#!/usr/bin/env ruby |
|
|
|
require 'htmli' |
|
|
|
# Aux stuff |
|
|
|
class Array |
|
def receach(&b) # recursive each |
|
b[self] |
|
each { |e| |
|
Array === e ? e.receach(&b) : b[e] |
|
} |
|
end |
|
end |
|
|
|
# utility proc for iterated index retrieval |
|
# (grouping does not matter): |
|
# ipath[a, 0, 0, 1, 2] == ipath[a, [0, 0, 1, 2]] == a[0][0][1][2] |
|
ipath = proc do |a,*i| |
|
i.flatten.each { |j| a = a[j] } |
|
a |
|
end |
|
|
|
|
|
### |
|
|
|
# read the doc and parse with HTMLi and extract the "markdown-body" div |
|
rh0 = nil |
|
HTMLi.instance_eval { mktree sanitize(tokenize($<)) }.receach do |e| |
|
Array === e and Array === e[0] and e[0].size == 3 and |
|
e[0].map(&:class) - [String, Symbol] == [] and |
|
e[0][0..1].map(&:to_s) == %w[tag div] and e[0][2] =~ /class="([^"]+)"/ and |
|
$1 == "markdown-body" and (rh0 = e; break) |
|
end |
|
|
|
# extract headers |
|
rh1 = [].instance_eval do |
|
rh0.receach { |e| |
|
Array === e and Array === e[0] and e[0].size == 2 and |
|
e[0].map(&:class) - [String, Symbol] == [] and |
|
e[0][0].to_s == "tag" and e[0][1]=~/\Ah\d+\Z/i and self << e |
|
} |
|
self |
|
end |
|
|
|
# from headers, extract the relevant bits |
|
# (header level, anchor, content) |
|
rh2 = rh1.map do |e| |
|
l,a,c = [[0,1],[1,0,0,2],1].map{|x| ipath[e,x]} |
|
a =~ /href="([^"]+)"/ |
|
[Integer(l[1..-1]), $1, c[1..-1]] |
|
end |
|
|
|
# sanitize |
|
rh3 = rh2.map.with_index do |e,i| |
|
l,a,c = e |
|
# calculating sanitized header level |
|
l = rh2.transpose[0][0..i].sort.uniq.index(l) |
|
# convert back header content to markdown |
|
c = open("|pandoc -f html -t markdown","r+") { |f| |
|
HTMLi.format [[:root, nil], c], out: f |
|
f.close_write |
|
f.read.strip |
|
} |
|
[l, a, c] |
|
end |
|
|
|
|
|
# present result as markdown list |
|
rh3.each do |l,a,c| |
|
puts "#{' '*4*l}- [#{c}](#{a})" |
|
end |