Last active
May 13, 2018 06:40
-
-
Save hishnash/933ce146289faf88dd633c249eb5f228 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This is an example gist combining a few different source files from our build | |
script. | |
licensed to be under MIT | |
copywrite: Matthaus Woolard 2016 [email protected] | |
""" | |
import json | |
import os | |
import errno | |
import re | |
import warnings | |
from docutils import nodes | |
from docutils.nodes import Text | |
from docutils.parsers.rst import Directive, directives | |
from sphinx.builders.html import StandaloneHTMLBuilder | |
from sphinx.util import ensuredir, copyfile | |
from sphinx.util.nodes import inline_all_toctrees, nested_parse_with_titles | |
from sphinx.util.console import bold, darkgreen, brown | |
from algoliasearch import algoliasearch | |
import hashlib | |
from docutils.nodes import title | |
from sectiontile.sectiontile import SectionTileNode | |
# we need to do some image magic :) | |
IMAGE_RX = re.compile(r'^(?P<mainname>[^@.]+)(?P<res>\@2x)?.(?P<postfix>[^\s]+)$') | |
# set this | |
DOC_BASE_URL = '' | |
algolia_to_index = [] | |
class BreadcrumbContainerNode(SectionTileNode): | |
@staticmethod | |
def visit_node(self, node): | |
self.body.append( | |
self.starttag(node, 'div')) | |
@staticmethod | |
def depart_node(self, node): | |
self.body.append('</div>\n') | |
class BreadCrumbs(Directive): | |
final_argument_whitespace = True | |
has_content = True | |
required_arguments = 0 | |
optional_arguments = 0 | |
option_spec = { | |
'icon': directives.unchanged, | |
'keywords': directives.unchanged | |
} | |
def run(self): | |
keywords = self.options.get('keywords', None) | |
if keywords is not None and len(keywords) > 0: | |
keywords = keywords.split(' ') | |
else: | |
keywords = [] | |
node = BreadcrumbContainerNode(icon=self.options.get('icon', None), index_keywords=keywords) | |
node.document = self.state.document | |
nested_parse_with_titles(self.state, self.content, node) | |
return [node, ] | |
def dict_children(children, docname=None, no_index=False, page_title=None, | |
category=None): | |
""" | |
Convert the AST to a dict that is ready to upload and be used by React | |
warning this is a reclusive function | |
""" | |
child_dicts = [] | |
text = '' | |
keywords = [] | |
for child in children: | |
child_dict = { | |
'module': child.__class__.__module__, | |
'type': child.__class__.__name__ | |
} | |
child_dict['children'], _text, kw = dict_children(child.children, | |
docname=docname, | |
no_index=no_index, | |
page_title=page_title, | |
category=category) | |
text += ' ' + _text | |
keywords += kw | |
try: | |
attributes = child.attributes | |
child_dict['attributes'] = attributes | |
except AttributeError: | |
pass | |
else: | |
# a parent node will aggregate all the keywords of its children | |
# these are used for searching. | |
keywords += attributes.get('index_keywords', []) | |
if attributes.get('indexed', False) and not no_index: | |
index(child_dict, _text, docname, | |
kw + attributes.get('index_keywords', []), page_title, | |
category) | |
if isinstance(child, Text): | |
# We collect all text and merge into one long string for search | |
child_dict['attributes'] = {'text': child.astext()} | |
text += ' ' + child.astext() | |
child_dicts.append(child_dict) | |
return child_dicts, text, keywords | |
def index(child_dict, text, docname, keywords, page_title, category): | |
""" | |
Index a page item and push to Algolia search | |
""" | |
new_dict = child_dict.copy() | |
del new_dict['children'] | |
version = os.environ.get('DOC_VERSION', None) | |
if version is None: | |
return | |
ref = find_reference(child_dict['attributes']) | |
add_to_algolia_search({ | |
'title': child_dict['attributes']['index_title'], | |
'docname': docname, | |
'content': new_dict, | |
'text': text, | |
'priority': child_dict['attributes'].get('index_priority', 0), | |
'ref': ref, | |
'keywords': keywords, | |
'version': version, | |
'page_title': page_title, | |
'category': category, | |
'url': DOC_BASE_URL + '/{docname}#{ref}'.format( | |
docname=docname, | |
ref=ref, | |
), | |
}) | |
def index_page(doc_tree, docname, text, keywords, category): | |
""" | |
Index a page, push it to algolia | |
""" | |
if docname == 'index': | |
return | |
t = find_title(doc_tree) | |
version = os.environ.get('DOC_VERSION', None) | |
if version is None: | |
return | |
add_to_algolia_search({ | |
'title': t, | |
'docname': docname, | |
'content': {'type': 'page', 'module': 'page'}, | |
'text': text, | |
'priority': 0, | |
'keywords': keywords, | |
'version': version, | |
'page_title': t, | |
'category': category, | |
'url': DOC_BASE_URL + '/{docname}'.format(docname=docname), | |
}) | |
def find_reference(attributes): | |
""" | |
helper function to extract info for indexing creation of links | |
""" | |
return attributes.get('ids', [''])[0] | |
def find_category(doctrees): | |
""" | |
Find the Breadcrumbs on this pages AST | |
""" | |
for doctree in doctrees: | |
for node in doctree.traverse(BreadcrumbContainerNode): | |
for node in node.traverse(Text): | |
return node.astext() | |
def find_title(doctrees): | |
""" | |
Get the page title | |
""" | |
for doctree in doctrees: | |
for node in doctree.traverse(title): | |
_, text, _ = dict_children([node], no_index=True) | |
return text | |
def add_to_algolia_search(data): | |
""" | |
Add an item to be pushed to algolia | |
""" | |
algolia_to_index.append(data) | |
def push_to_algolia_search(): | |
if len(os.environ.get('DOC_VERSION', '')) < 12: | |
raise ValueError('Invalid DOC_VERSION NO INDEXING DONE') | |
client = algoliasearch.Client( | |
os.environ.get('ALGOLIA_KEY', ''), | |
os.environ.get('ALGOLIA_SECRET', '') | |
) | |
index = client.init_index(os.environ.get('ALGOLIA_INDEX')) | |
index.delete_by_query('', {'facetFilters': 'version:{}'.format( | |
os.environ.get('DOC_VERSION', None))}) | |
index.add_objects(algolia_to_index) | |
class JsonHTMLBuilder(StandaloneHTMLBuilder): | |
""" | |
Build both the html output but at the same time intercept the AST | |
dump it to json and write the backup html for non javascript pages. | |
""" | |
name = 'json-html-build' | |
format = 'html' | |
out_suffix = '.html' | |
def __init__(self, *args, **kwargs): | |
""" | |
We need to extract some images and build a map of these to replace | |
the image names, we append the md5 of the image into the image name. | |
:param args: | |
:param kwargs: | |
""" | |
super().__init__(*args, **kwargs) | |
self._raw_images = {} | |
self.images_1x = {} | |
self.images_2x = {} | |
self.image_map = {} | |
def write_doc(self, docname, doctree: nodes.document): | |
""" | |
This method is called to write the AST to html it is called once per | |
page. | |
""" | |
page_title = find_title(doctree) | |
category = find_category(doctree) | |
dict, page_text, keywords = dict_children(doctree.children, docname, | |
page_title=page_title, | |
category=category) | |
index_page(doctree.children, docname, page_text, keywords, category) | |
# we also build a ast dump of the toc | |
toc, _, _ = dict_children([self.env.get_toc_for(docname, self)], | |
docname, no_index=True) | |
toc_filename = os.path.join(self.outdir, docname + '.toc.json') | |
filename = os.path.join(self.outdir, docname + '.json') | |
if not os.path.exists(os.path.dirname(filename)): | |
try: | |
os.makedirs(os.path.dirname(filename)) | |
except OSError as exc: # Guard against race condition | |
if exc.errno != errno.EEXIST: | |
raise | |
with open(filename, 'w') as fp: | |
json.dump( | |
{ | |
'data': dict, | |
'toc': toc | |
}, fp) | |
with open(toc_filename, 'w') as fp: | |
json.dump(toc, fp) | |
super().write_doc(docname, doctree) | |
def copy_image_files(self): | |
""" | |
We want to just update images if they change so users browser | |
cache works well | |
""" | |
# copy image files | |
if self.images_1x: | |
# evey image must have a 1x version | |
ensuredir(os.path.join(self.outdir, self.imagedir)) | |
for _src in self.app.status_iterator(self.images_1x, | |
'copying images... ', | |
brown, len(self.images_1x)): | |
src, _, dest = self.images_1x[_src] | |
try: | |
copyfile(os.path.join(self.srcdir, src), | |
os.path.join(self.outdir, self.imagedir, dest)) | |
except Exception as err: | |
self.warn('cannot copy image file %r: %s' % | |
(os.path.join(self.srcdir, src), err)) | |
if _src in self.images_2x: | |
src, _, dest = self.images_2x[_src] | |
try: | |
copyfile(os.path.join(self.srcdir, src), | |
os.path.join(self.outdir, self.imagedir, | |
dest)) | |
except Exception as err: | |
self.warn('cannot copy image file %r: %s' % | |
(os.path.join(self.srcdir, src), err)) | |
filename = os.path.join(self.outdir, 'image_mappings.json') | |
with open(filename, 'w') as f: | |
json.dump(self.image_map, f) | |
def post_process_images(self, doctree): | |
"""Pick the best candidate for all image URIs.""" | |
for node in doctree.traverse(nodes.image): | |
if '?' in node['candidates']: | |
# don't rewrite nonlocal image URIs | |
continue | |
if '*' not in node['candidates']: | |
for imgtype in self.supported_image_types: | |
candidate = node['candidates'].get(imgtype, None) | |
if candidate: | |
break | |
else: | |
self.warn( | |
'no matching candidate for image URI %r' % node['uri'], | |
'%s:%s' % (node.source, getattr(node, 'line', ''))) | |
continue | |
node['uri'] = candidate | |
else: | |
candidate = node['uri'] | |
if candidate not in self.env.images: | |
# non-existing URI; let it alone | |
continue | |
self._raw_images[candidate] = self.env.images[candidate][1] | |
file, file_hashed, file_2x, file_2x_hashed = self.get_image_name( | |
candidate, self.env.images[candidate][1]) | |
self.images[candidate] = file_hashed | |
path = os.path.join(os.path.dirname(candidate), | |
file) | |
self.images_1x[candidate] = (path, file, file_hashed) | |
if file_2x: | |
path = os.path.join(os.path.dirname(candidate), | |
file_2x) | |
self.images_2x[candidate] = (path, file_2x, file_2x_hashed) | |
uri = node['uri'] | |
if uri in self.images_1x: | |
self.image_map[uri] = self.images_1x[uri][2] | |
node['uri'] = self.images_1x[uri][2] | |
if uri in self.images_2x: | |
node['uri_2x'] = self.images_2x[uri][2] | |
node.replace_self(node) | |
for node in doctree.traverse(SectionTileNode): | |
if node['icon'] is None: | |
continue | |
_candidate = node['icon'] | |
if _candidate.startswith('/'): | |
candidate = _candidate[1:] | |
else: | |
candidate = _candidate | |
if candidate in self.env.images: | |
file_name = self.env.images[candidate][1] | |
else: | |
file_name = candidate.split('/')[-1] | |
self._raw_images[candidate] = file_name | |
file, file_hashed, file_2x, file_2x_hashed = self.get_image_name( | |
candidate, file_name) | |
self.images[candidate] = file_hashed | |
if file is not None: | |
path = os.path.join(os.path.dirname(candidate), file) | |
self.images_1x[candidate] = (path, file, file_hashed) | |
if file_2x: | |
path = os.path.join(os.path.dirname(candidate), | |
file_2x) | |
self.images_2x[candidate] = (path, file_2x, file_2x_hashed) | |
if candidate in self.images_1x: | |
self.image_map[_candidate] = self.images_1x[candidate][2] | |
node['icon'] = self.images_1x[candidate][2] | |
if candidate in self.images_2x: | |
node['icon_2x'] = self.images_2x[candidate][2] | |
node.replace_self(node) | |
def get_image_name(self, image_path, file_name): | |
fp = re.search(IMAGE_RX, file_name) | |
if fp is None: | |
raise ValueError('BAD IMAGE NAME:', image_path) | |
mainname = fp.group('mainname') | |
postfix = fp.group('postfix') | |
res = fp.group('res') | |
dir = os.path.dirname(image_path) | |
if res == '@2x': | |
hash_2x = self.get_file(dir, mainname, res, postfix) | |
hash = self.get_file(dir, mainname, '', postfix) | |
else: | |
hash_2x = self.get_file(dir, mainname, '@2x', postfix) | |
hash = self.get_file(dir, mainname, '', postfix) | |
file_2x_hashed = None | |
file_2x = None | |
file_hashed = None | |
file = None | |
if hash_2x is not None: | |
file_2x_hashed = '{}-{}-@2x.{}'.format(mainname, hash_2x, postfix) | |
file_2x = '{}@2x.{}'.format(mainname, postfix) | |
if hash is not None: | |
file_hashed = '{}-{}.{}'.format(mainname, hash, postfix) | |
file = '{}.{}'.format(mainname, postfix) | |
return file, file_hashed, file_2x, file_2x_hashed | |
def get_file(self, dir, mainname, res, postfix): | |
file_name = mainname + res + '.' + postfix | |
try: | |
p = os.path.join(self.srcdir, dir, file_name) | |
with open(p, 'rb') as f: | |
return hashlib.md5(f.read()).hexdigest() | |
except FileNotFoundError: | |
warnings.warn('Cant fine {}'.format(file_name)) | |
return None | |
def finish(self, *args, **kwargs): | |
super().finish(*args, **kwargs) | |
if os.environ.get('DOC_VERSION', None) is not None: | |
if os.environ.get('DOC_DEPLOY', None) == 'TRUE': | |
push_to_algolia_search() | |
def setup(app): | |
app.add_builder(JsonHTMLBuilder) | |
app.add_directive('bc', BreadCrumbs) | |
app.add_node(BreadcrumbContainerNode, | |
html=(BreadcrumbContainerNode.visit_node, | |
BreadcrumbContainerNode.depart_node)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment