Created
March 18, 2022 13:53
-
-
Save joswr1ght/5a180a5d9365ccf4e6f678b2cbbec5c4 to your computer and use it in GitHub Desktop.
Test one or more Asciidoc .adoc files for missing document attribute definitions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Using one or more adoc files, build a list of all document attributes, | |
# then search for typo'd or undefined references to the attributes. | |
# | |
# 2022-03-18 | |
# Copyright (c) 2022 Joshua Wright <[email protected]> | |
# LICENSE: GPL 3.0 | |
import sys | |
import os | |
import re | |
import glob | |
from collections import namedtuple | |
COLORFILE = "[36m" | |
COLORRESET = "[0m" | |
PREFIX = "BADATTR: " | |
AdocRecord = namedtuple("AdocRecord", "filename loc") | |
def readFile(adocdata: dict, adoc: str, adocfiles: list): | |
""" | |
Read the file specfied in adoc, creating a dictionary of lines in adocdata. | |
If the adoc source uses an `include` directive, read that file's contents | |
as well, as long as it isn't already in the adocfiles list. | |
This function is recursive, calling itself when a new file is identified | |
with `include`. | |
""" | |
includere = r"(include::)(.+)\[" | |
with open(adoc) as adocfp: | |
loc = 0 | |
for line in adocfp.readlines(): | |
loc += 1 | |
adocdata[line] = AdocRecord(adoc, loc) # AdocRecord is a namedtuple | |
match = re.search(includere, line) | |
if match: | |
# This line references another document; process it | |
# if it's not already in adocfiles | |
includefile = match.groups()[1] | |
if (includefile not in adocfiles): | |
adocfiles.append(includefile) | |
adocdata = readFile(adocdata, includefile, adocfiles) | |
return adocdata | |
if __name__ == "__main__": | |
adocfiles = [] | |
if (len(sys.argv) == 1): | |
# No arguments; exit | |
sys.exit(0) | |
for adoc in sys.argv[1:]: | |
if (os.path.splitext(adoc)[1] != ".adoc"): | |
print(f"{PREFIX}Skipping non-adoc file {adoc}") | |
continue | |
else: | |
# We need to look at all files in this project directory to | |
# identify the root-level adoc file. Get the file dirname and glob | |
# the other adoc files in this directory. | |
adocfiles.append(adoc) | |
adocfiles += glob.glob(os.path.dirname(adoc) + '/*.adoc') | |
# Process this list of adoc files to build a set of references; make the | |
# list unique | |
adocfiles = list(set(adocfiles)) | |
# adocdata is a dictionary of `line:AdocRecord` where `line` is the adoc | |
# file source line as the dictionary key | |
adocdata = {} | |
# Process each of the files identified on the command line or through | |
# globbing other .adoc files in the same directories, building a dictionary | |
# of lines of source with a namedtuple attribute for file name and line | |
# number/loc. | |
for adoc in adocfiles: | |
adocdata = readFile(adocdata, adoc, adocfiles) | |
# Iterate through adocdata, building a list of document attributes `:foo:` | |
attr = [] | |
attrre = r"^:(\w+):\s" | |
for line in adocdata.keys(): | |
match = re.search(attrre, line) | |
if (match): | |
attr.append(match.groups()[0]) | |
# With the list of document attributes, look for references that do not | |
# have a corresponding attribute. | |
refre = r"{(\w+)}" | |
problems = 0 | |
for line, lineattr in adocdata.items(): | |
match = re.search(refre, line) | |
if (match): | |
ref = match.groups()[0] | |
if ref not in attr: | |
problems += 1 | |
print( | |
f"{PREFIX}Bad attribute in " | |
f"{COLORFILE}{os.path.basename(lineattr.filename)}:{lineattr.loc}" | |
f"{COLORRESET}: {{{ref}}}") | |
sys.exit(problems) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment