Last active
April 13, 2022 12:23
-
-
Save sixtyfive/35c192ce780ad6375ab24545bd6e06ba to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'docx' | |
require 'nokogiri' | |
def docxparas2html(finpath) | |
doc = Docx::Document.open(finpath) | |
xsl = Nokogiri::XSLT File.read 'docxparas2html.xsl' | |
nodes = doc.paragraphs.map(&:node) | |
nodes.each{|node| | |
node['xmlns:w'] = 'http://schemas.microsoft.com/office/word/2018/wordml' | |
doc_from_node = Nokogiri::XML node.to_xml | |
html_fragment = xsl.transform(doc_from_node, ['key', 'value']).to_xml.split(/\n/)[1..].join | |
puts html_fragment | |
} | |
end | |
ARGV.each{|arg| docxparas2html arg if arg.downcase.match? /\.docx$/} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<xsl:stylesheet version="1.0" | |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" | |
xmlns:w="http://schemas.microsoft.com/office/word/2018/wordml"> | |
<xsl:template match="/"> | |
<xsl:for-each select="//w:r"> | |
<xsl:variable name="styles"> | |
<xsl:if test="w:rPr/w:rFonts"><xsl:variable name="fontsize" select="w:rPr/w:sz/@w:val"/>font-family:'<xsl:apply-templates select="w:rPr/w:rFonts/@w:ascii"/>';font-size:<xsl:value-of select="$fontsize div 2"/>;</xsl:if> | |
<xsl:if test="w:rPr/w:b">font-weight:bold;</xsl:if> | |
<xsl:if test="w:rPr/w:i">font-style:italic;</xsl:if> | |
<xsl:if test="w:rPr/w:u"><xsl:if test="not(w:rPr/w:u/@w:val)">text-decoration:underline;</xsl:if></xsl:if> | |
<xsl:if test="w:rPr/w:strike"><xsl:if test="not(w:rPr/w:strike/@w:val)">text-decoration:line-through;</xsl:if></xsl:if> | |
<xsl:if test="w:rPr/w:color">color:<xsl:apply-templates select="w:rPr/w:color/@w:val"/>;</xsl:if> | |
<xsl:if test="w:rPr/w:shd">background-color:<xsl:apply-templates select="w:rPr/w:shd/@w:fill"/>;</xsl:if> | |
</xsl:variable> | |
<xsl:choose> | |
<xsl:when test="$styles = ''"><xsl:apply-templates select="w:t"/></xsl:when> | |
<xsl:when test="$styles = ' '"><xsl:apply-templates select="w:t"/></xsl:when> | |
<xsl:otherwise><span style="{$styles}"><xsl:apply-templates select="w:t"/></span></xsl:otherwise> | |
</xsl:choose> | |
</xsl:for-each> | |
</xsl:template> | |
</xsl:stylesheet> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8" standalone="yes"?> | |
<w:document xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" mc:Ignorable="w14 wp14"> | |
<w:body> | |
<w:p> | |
<w:pPr> | |
<w:pStyle w:val="Normal"/> | |
<w:bidi w:val="0"/> | |
<w:jc w:val="left"/> | |
<w:rPr/> | |
</w:pPr> | |
<w:r> | |
<w:rPr> | |
<w:rFonts w:ascii="Lucida Handwriting" w:hAnsi="Lucida Handwriting"/> | |
<w:sz w:val="22"/> | |
<w:szCs w:val="22"/> | |
</w:rPr> | |
<w:t>Test text</w:t> | |
</w:r> | |
<w:r> | |
<w:rPr/> | |
<w:t xml:space="preserve"> with </w:t> | |
</w:r> | |
<w:r> | |
<w:rPr> | |
<w:i/> | |
<w:iCs/> | |
</w:rPr> | |
<w:t>various</w:t> | |
</w:r> | |
<w:r> | |
<w:rPr/> | |
<w:t xml:space="preserve"> </w:t> | |
</w:r> | |
<w:r> | |
<w:rPr> | |
<w:b/> | |
<w:bCs/> | |
</w:rPr> | |
<w:t>styles</w:t> | |
</w:r> | |
<w:r> | |
<w:rPr/> | |
<w:t xml:space="preserve">, </w:t> | |
</w:r> | |
<w:r> | |
<w:rPr> | |
<w:b/> | |
<w:bCs/> | |
<w:i/> | |
<w:iCs/> | |
<w:strike w:val="false"/> | |
<w:dstrike w:val="false"/> | |
<w:u w:val="none"/> | |
</w:rPr> | |
<w:t>among</w:t> | |
</w:r> | |
<w:r> | |
<w:rPr/> | |
<w:t xml:space="preserve"> them </w:t> | |
</w:r> | |
<w:r> | |
<w:rPr> | |
<w:color w:val="C9211E"/> | |
</w:rPr> | |
<w:t>color</w:t> | |
</w:r> | |
<w:r> | |
<w:rPr/> | |
<w:t xml:space="preserve"> </w:t> | |
</w:r> | |
<w:r> | |
<w:rPr> | |
<w:shd w:fill="FFFF00" w:val="clear"/> | |
</w:rPr> | |
<w:t>and</w:t> | |
</w:r> | |
<w:r> | |
<w:rPr/> | |
<w:t xml:space="preserve"> </w:t> | |
</w:r> | |
<w:r> | |
<w:rPr> | |
<w:u w:val="single"/> | |
</w:rPr> | |
<w:t>underlined</w:t> | |
</w:r> | |
<w:r> | |
<w:rPr/> | |
<w:t xml:space="preserve"> text as well as </w:t> | |
</w:r> | |
<w:r> | |
<w:rPr> | |
<w:strike/> | |
</w:rPr> | |
<w:t>strikethrough</w:t> | |
</w:r> | |
<w:r> | |
<w:rPr/> | |
<w:t>.</w:t> | |
</w:r> | |
</w:p> | |
<w:sectPr> | |
<w:type w:val="nextPage"/> | |
<w:pgSz w:w="12240" w:h="15840"/> | |
<w:pgMar w:left="1134" w:right="1134" w:gutter="0" w:header="0" w:top="1134" w:footer="0" w:bottom="1134"/> | |
<w:pgNumType w:fmt="decimal"/> | |
<w:formProt w:val="false"/> | |
<w:textDirection w:val="lrTb"/> | |
</w:sectPr> | |
</w:body> | |
</w:document> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source 'https://rubygems.org' | |
gem 'docx' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment