Skip to content

Instantly share code, notes, and snippets.

@dantonnoriega
Last active April 8, 2020 00:14
Show Gist options
  • Save dantonnoriega/a905343b4ec15e5e212c to your computer and use it in GitHub Desktop.
Save dantonnoriega/a905343b4ec15e5e212c to your computer and use it in GitHub Desktop.
Bash script that uses Applescript to convert all html files within a directory to docx files. It also converts headings and normal fonts to "Calibri"; resizes images; and also centers, adds spacing, and padding to tables.
#!/usr/local/bin/bash
# Original Applescript by Andrew Heiss (https://gist.github.com/andrewheiss/5bb905b1cfb244ebab40)
# SCRIPT MUST BE IN SAME DIRECTORY AS HTML FILES
# use: ~$ zsh ./html2docx.sh
# OR ~$ bash ./html2docx.sh
echo
html2docx () {
echo "Converting $2.html to $2.docx..."
osascript <<EOD
set base_folder to "$1/"
set file_in to base_folder & "$2.html"
set file_out to base_folder & "$2.docx"
tell application "Microsoft Word"
activate
open file_in
# tables: change font, font size; add spacing and padding; format
set tbls to tables of active document
repeat with tbl in tbls
set name of font object of text object of tbl to "Consolas"
set font size of font object of text object of tbl to 10
set font size of font object of text object of row 1 of tbl to 12
set alignment of row 1 of tbl to align row center
select tbl
set myRange1 to text object of selection
set myRange1 to move start of range myRange1 by a character item count - 1
set myRange2 to collapse range myRange1 direction collapse start
set myRange3 to collapse range myRange1 direction collapse end
select myRange2
type text selection text "\n"
select myRange3
type text selection text "\n"
set allow page breaks of tbl to False
select tbl
select (row 1 of selection)
set alignment of paragraph format of selection to align paragraph center
set enable borders of border options of tbl to true
# set inside table borders
set BOR1 to get border tbl which border border horizontal
set BOR2 to get border tbl which border border vertical
set line style of BOR1 to line style single
set line style of BOR2 to line style single
set line width of BOR1 to line width100 point
set line width of BOR2 to line width450 point
set color index of BOR1 to white
set color index of BOR2 to white
# set left and right table borders
set BOR1 to get border tbl which border border left
set BOR2 to get border tbl which border border right
set line style of BOR1 to line style single
set line style of BOR2 to line style single
set line width of BOR1 to line width225 point
set line width of BOR2 to line width225 point
set color index of BOR1 to white
set color index of BOR2 to white
# adjust outside borders
set outside line style of border options of tbl to line style thin thick med gap
set outside color index of border options of tbl to white
set BOR3 to get border row 1 of tbl which border border bottom
set line style of BOR3 to line style single
set color index of BOR3 to gray25
end repeat
# adjust paragraph styles
set allPar to paragraphs of active document
repeat with i in allPar
if (style of i is Word style "Normal (Web)" of active document) then
select text object of i
set style of selection to "Normal"
set space after of i to 6
end if
end repeat
# adjust fonts
set name of font object of Word style style heading1 of active document to "Calibri"
set name of font object of Word style style heading2 of active document to "Calibri"
set name of font object of Word style style heading3 of active document to "Calibri"
set bold of font object of Word style style heading1 of active document to true
set bold of font object of Word style style heading2 of active document to true
set bold of font object of Word style style heading3 of active document to true
set name of font object of Word style style normal of active document to "Calibri"
set name of font object of Word style style strong of active document to "Calibri"
set name of font object of Word style style emphasis of active document to "Calibri"
# scale and center images then break links
## first, break links
set all_images to inline pictures of active document
repeat with img in all_images
try
break link link format of img
end try
end repeat
## second, adjust size and center
set all_inline_images to inline pictures of active document
repeat with img in all_inline_images
try
set lock aspect ratio of img to true
set height of img to inches to points inches 4
set alignment of horizontal line format of img to horizontal line align center
end try
end repeat
# remove "Previous" and "Next" if last paragraph (happens in some html conversions)
set LastParagraph to text object of last paragraph of active document
select LastParagraph
set selFind to find object of selection
set forward of selFind to true
set wrap of selFind to find stop
## remove "previous"
set content of selFind to "Previous"
execute find selFind
if found of selFind is true then
delete selection
end if
## remove "next"
set content of selFind to "Next"
execute find selFind
if found of selFind is true then
delete selection
end if
# shrink font of entire document, twice
shrink font font object of text object of active document
shrink font font object of text object of active document
set view type of view of active window to print view
save as active document file name file_out file format format document
close active document
end tell
EOD
echo "...done"
}
#get path
PWD=$(pwd)
# convert all .html files do docx
## works with bash and zsh
find . -maxdepth 1 -type f -name '*.html' | while read F; do FF=${F#*/}; html2docx $PWD ${FF%.html}; done;
## works with zsh but not bash
# find . -maxdepth 1 -type f -name '*.html' | while read F; do html2docx $PWD ${${F#*/}%.html}; done;
#quit word
osascript -e 'quit app "Microsoft Word"'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment