Skip to content

Instantly share code, notes, and snippets.

@amuramatsu
Last active April 19, 2017 13:33
Show Gist options
  • Save amuramatsu/150a6aa29e54c83d8773e2bd980d81b2 to your computer and use it in GitHub Desktop.
Save amuramatsu/150a6aa29e54c83d8773e2bd980d81b2 to your computer and use it in GitHub Desktop.
#! /usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright (c) 2016, 2017 MURAMATSU Atsushi <[email protected]>
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
import sys
import re
try:
import locale
locale.setlocale(locale.LC_ALL, '')
except:
pass
import chardet
from docutils.core import (Publisher, publish_cmdline,
default_description, default_usage)
from docutils.readers import standalone
try:
import html5css3
except ImportError:
html5css3 = None
description = ('Generates {} documents from standalone reStructuredText '
'sources for Japanese. ' + default_description)
_JPN_CHARS = u'[\u3000-\u9FFF\U0001B000-\U0001B0FF\U0001F900-\U00030000]'
_JPN_NEWLINE_REMOVER_RE = re.compile(
u"({0})(\r?\n)([ \t]*{0})".format(_JPN_CHARS)
)
class JReader(standalone.Reader):
def read(self, source, parser, settings):
from docutils import io
self.source = source
if not self.parser:
self.parser = parser
self.settings = settings
if (isinstance(source, io.FileInput) and
source.encoding is None and source.source_path is not None):
with open(source.source_path, 'rb') as binary:
buf = binary.read()
charcode = chardet.detect(buf)
source.encoding = charcode['encoding']
# reopen file
source.source = open(
source.source_path, 'rU',
encoding=source.encoding, errors=source.error_handler)
buf = self.source.read()
self.input = _JPN_NEWLINE_REMOVER_RE.sub(r"\1\\\2\3", buf)
self.parse()
return self.document
class JPublisher(Publisher):
def set_reader(self, reader_name, parser, parser_name):
"""Set `self.reader` by name."""
self.reader = JReader(parser, parser_name)
self.parser = self.reader.parser
def publish_cmdline(reader=None, reader_name='standalone',
parser=None, parser_name='restructuredtext',
writer=None, writer_name='pseudoxml',
settings=None, settings_spec=None,
settings_overrides=None, config_section=None,
enable_exit_status=True, argv=None,
usage=default_usage, description=default_description):
"""
Set up & run a `Publisher` for command-line-based file I/O (input and
output file paths taken automatically from the command line). Return the
encoded string output also.
Parameters: see `publish_programmatically` for the remainder.
- `argv`: Command-line argument list to use instead of ``sys.argv[1:]``.
- `usage`: Usage string, output if there's a problem parsing the command
line.
- `description`: Program description, output for the "--help" option
(along with command-line option descriptions).
"""
pub = JPublisher(reader, parser, writer, settings=settings)
pub.set_components(reader_name, parser_name, writer_name)
output = pub.publish(
argv, usage, description, settings_spec, settings_overrides,
config_section=config_section, enable_exit_status=enable_exit_status)
return output
def publish_string(source, source_path=None, destination_path=None,
reader=None, reader_name='standalone',
parser=None, parser_name='restructuredtext',
writer=None, writer_name='pseudoxml',
settings=None, settings_spec=None,
settings_overrides=None, config_section=None,
enable_exit_status=False):
"""
Set up & run a `Publisher` for programmatic use with string I/O. Return
the encoded string or Unicode string output.
For encoded string output, be sure to set the 'output_encoding' setting to
the desired encoding. Set it to 'unicode' for unencoded Unicode string
output. Here's one way::
publish_string(..., settings_overrides={'output_encoding': 'unicode'})
Similarly for Unicode string input (`source`)::
publish_string(..., settings_overrides={'input_encoding': 'unicode'})
Parameters: see `publish_programmatically`.
"""
from docutils import io
pub = JPublisher(reader, parser, writer, settings=settings,
source_class=io.StringInput,
destination_class=io.StringOutput)
pub.set_components(reader_name, parser_name, writer_name)
pub.process_programmatic_settings(
settings_spec, settings_overrides, config_section)
pub.set_source(source, source_path)
pub.set_destination(None, destination_path)
return pub.publish(enable_exit_status=enable_exit_status)
def mode_error():
if html5css3 is None:
html5_option = ""
else:
html5_option = "| html5"
print("ERROR: Please set first argument set to one of\n"
" [ html " + html5_option + " | latex | xetex | s5 | man | odt | xml | pseudoxml ]")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) < 2:
mode_error()
writer_name = None
writer = None
add_opts = []
if sys.argv[1] in ("rst2html", "html"):
writer_name = 'html'
elif html5css3 is not None and sys.argv[1] in ("rst2html5", "html5"):
writer_name = 'html5'
writer = html5css3.Writer()
add_opts = [ '--math-output', 'mathml',
'--pygments' ]
elif sys.argv[1] in ("rst2latex", "latex"):
writer_name = 'latex'
add_opts = [ '--documentclass', 'jsarticle',
'--documentoptions', 'uplatex,a4paper' ]
elif sys.argv[1] in ("rst2xetex", "xetex"):
writer_name = 'xetex'
add_opts = [ '--documentclass', 'bxjsarticle',
'--documentoptions', 'xelatex,ja=standard,a4paper',
'--latex-preamble',
"\n".join([
r'\setmainfont{Times New Roman}',
r'\setsansfont{Arial}',
r'\setmonofont{Courier New}']) ]
elif sys.argv[1] in ("rst2s5", "s5"):
writer_name = 's5'
elif sys.argv[1] in ("rst2man", "man"):
writer_name = 'manpage'
elif sys.argv[1] in ("rst2odt", "odt"):
writer_name = 'odf_odt'
elif sys.argv[1] in ("rst2pseudoxml", "pseudoxml"):
writer_name = 'pseudoxml'
elif sys.argv[1] in ("rst2xml", "xml"):
writer_name = 'xml'
else:
mode_error()
sys.argv = [ sys.argv[0] ] + add_opts + sys.argv[2:]
description = description.format(writer_name)
publish_cmdline(writer_name=writer_name, writer=writer,
description=description)
@amuramatsu
Copy link
Author

日本語に特化した rst2html or rst2latex

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment