Skip to content

Instantly share code, notes, and snippets.

@mimoo
Created March 8, 2021 04:48
Show Gist options
  • Save mimoo/bae1065d92efe9e18b75bb4a86834f6a to your computer and use it in GitHub Desktop.
Save mimoo/bae1065d92efe9e18b75bb4a86834f6a to your computer and use it in GitHub Desktop.
Flamegraph of your asciidoc books

I wrote a very primitive script to produce a flamegraph out of a number of asciidoc files. See this blog post for some idea of the result.

flamegraph

WARNING: It's heavily taylored to my own setup, so not sure it'll work directly with yours, or you might have some customization to do. (If there's enough interest I could do a more general script / Dockerfile.)

How to use?

get the flamegraph tool:

git clone --depth 1 [email protected]:brendangregg/FlameGraph.git

get the python code included in this file

then run the following command:

python get_sections.py flamegraph | ./FlameGraph/flamegraph.pl > flamegraph.svg

then open the flamegraph.svg in your browser.

note that you can also check what your sections are with this command:

python get_sections.py

What is the expected setup

This is the expected file structure:

your_folder/
├── manuscript/ # your book
│   ├── 1_some_chapter.adoc # chapters must be named <num>_<name>.adoc
│   └── 2_some_other_chapter.adoc
├── Flamegraph/ # the clone of the FlameGraph repository
└── get_sections.py # the file contained in this gist

If you have a different file structure, you might want to replace this snippet of code with a manual list of your files:

    filenames = glob.glob("./manuscript/*_*.adoc")
    filenames = sorted(filenames, key=lambda line: int(
        os.path.basename(line).split("_", 1)[0]))

as well as get the chapter number in a different way:

            # get chapter number
            chapt_num = int(name.split("_", 1)[0])

It'll expect .adoc files that start with the name of the chapter as depth-1 heading:

= Chapter Title

== first section

=== first subsection

if you do not have such a convention, you might want to change this part:

                match depth:
                    # main title: initialization
                    case 1:

with case x if len(prefix) == 0:

import glob
import re
import os
import sys
####
class Chapter:
def __init__(self, num: int, data: Iterator[str]):
self.num = str(num)
self.data = data
def parse(self):
words = {}
nums = []
prefix = []
# read section of the chapter
first_line = True
is_comment = False
is_formula = False
# read line by line
while True:
# read next line
line = next(self.data, None)
# stop when we reached the end
if line is None:
break
# skip first line
if first_line:
first_line = False
continue
# skip comments
if is_comment:
if len(line) >= 4 and line[:4] == "////":
is_comment = False
else:
continue
else:
if len(line) >= 4 and line[:4] == "////":
is_comment = True
continue
# skip formulas
if is_formula:
if len(line) >= 4 and line[:4] == "****":
is_formula = False
else:
continue
else:
if len(line) >= 4 and line[:4] == "****":
is_formula = True
continue
# retrieve information from titles only (e.g. ==== TITLE)
depth = count_equal(line)
is_title = depth > 0
if is_title:
# get section name
raw_name = line.rsplit("=", 1)[1].rstrip("\n")
name = simplify_name(raw_name)
# ignore empty titles
if name == "":
continue
# handle
match depth:
# main title: initialization
case 1:
assert(len(prefix) == 0)
assert(len(words) == 0)
nums = [self.num]
prefix = [name]
# nesting
case x if x > len(prefix):
nums.append(1)
prefix.append(name)
# unesting
case x if x < len(prefix):
nums.pop()
nums[-1] += 1
prefix.pop()
prefix[-1] = name
# same prefix
case x:
nums[-1] += 1
prefix[-1] = name
path = path_from(nums, prefix)
assert(path not in words)
words[path] = 0
#
continue
# if we're still not initialized, skip
if len(prefix) == 0:
continue
# a normal line: count the words
path = path_from(nums, prefix)
words[path] += count_words(line)
#
return words
def path_from(nums, prefix):
nums = [str(num) for num in nums]
# trick to make sure chapters are sorted
if len(nums[0]) == 1:
nums[0] = "0" + nums[0]
path = []
for depth, pref in enumerate(prefix):
num = ".".join(nums[:depth+1])
path.append(num + " " + pref)
return ";".join(path)
def simplify_name(name: str):
res = name
res = res.strip(" ")
# res = res.replace(" ", "_")
return res
####
def count_equal(line):
len_line = len(line)
top = min(len_line, 5)
res = 0
for i in range(top):
if line[i] == "=":
res += 1
else:
break
return res
def count_words(line):
return len(line.split(" "))
def parse_chapter(chapt_num, chapter):
section_depth = {0: chapt_num}
# read section of the chapter
words = 0
first_line = True
is_comment = False
is_formula = False
for line in chapter:
# skip first line
if first_line:
first_line = False
continue
# skip comments
if is_comment:
if len(line) >= 4 and line[:4] == "////":
is_comment = False
else:
continue
else:
if len(line) >= 4 and line[:4] == "////":
is_comment = True
continue
# skip formulas
if is_formula:
if len(line) >= 4 and line[:4] == "****":
is_formula = False
else:
continue
else:
if len(line) >= 4 and line[:4] == "****":
is_formula = True
continue
# retrieve information from titles only (e.g. ==== TITLE)
depth = count_equal(line)
is_title = depth > 0
if is_title:
name = line.rsplit("=", 1)[1].rstrip("\n")
# ignore empty titles
if name == "":
continue
# recurse
section_depth = print_section_number(section_depth, depth-1, name)
continue
# a normal line, count the words
words += count_words(line)
def print_section_number(section_depth, depth, name):
# add in dictionary
if depth in section_depth:
section_depth[depth] += 1
else:
section_depth[depth] = 1
section_depth = clear_depth(section_depth, depth)
# format
fmt = ""
for i in range(depth+1):
# print("access", i)
fmt += str(section_depth[i]) + "."
print("-" * depth + fmt + name)
return section_depth
def clear_depth(section_depth, depth):
while True:
depth += 1
if depth in section_depth:
section_depth[depth] = 0
else:
break
return section_depth
def main():
# parse arguments
flamegraph = False # generate a flamegraph
verbose = False
if len(sys.argv) > 1:
if sys.argv[1] == "flamegraph":
flamegraph = True
if sys.argv[1] == "verbose":
verbose = True
# read chapter files sorted by their number
filenames = glob.glob("./manuscript/*_*.adoc")
filenames = sorted(filenames, key=lambda line: int(
os.path.basename(line).split("_", 1)[0]))
# parse each file one by one
for filename in filenames:
with open(filename, 'r') as f:
# get filename
name = os.path.basename(filename)
if verbose:
print(name)
# get chapter number
chapt_num = int(name.split("_", 1)[0])
# magic
if flamegraph:
chapter = Chapter(chapt_num, iter(f))
res = chapter.parse()
for key in res:
print(key, res[key])
else:
parse_chapter(chapt_num-1, f)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment