Created
March 1, 2018 01:30
-
-
Save mikeplus64/d480454e4864abc6059f9056d714ddeb to your computer and use it in GitHub Desktop.
Export images from a table from a HTML file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Unholy script of helping to rename images exported from LibreOffice Calc. | |
* | |
* instructions: | |
* 1. open sheet in libreoffice calc | |
* 2. ensure all images entirely fit *within* the boundaries of their row | |
* 3. export as html | |
* 4. inject this script to the end of the html <body> | |
* 5. open html file in browser of choice | |
* 6. save script when prompted to same directory as html file | |
* 7. run script | |
* 8. bask in the glory of the renamed images | |
* | |
* It kinda-sorta works with Excel. The problem is that Excel's html export is | |
* more complicated. It wraps images in a table of their own so that they can be | |
* positioned according to the .xlsx file. LibreOffice happily discards | |
* positioning completely when exporting html. | |
* | |
* Inject this script at the bottom of a html file's <body> as | |
* <script src="./export.js"></script>, or copy paste the whole script into the | |
* <script> tag, or just run it from the browser's console. | |
* | |
* It'll generate a script that renames all the images in tables as | |
* 'FILENAME/ROW_ID-IMG_NO.EXT'. You have to save and run this generated script | |
* in the same directory as the .html file. If you're on Windows and use Chrome | |
* it might rename .bat to .bin file--manually override it. | |
* | |
* where | |
* FILENAME: base name of the .html file you inject this script into | |
* ROW_ID: assumed to be the first cell in each row; | |
* IMG_NO: 0 to the Nth image found in each row; | |
* EXT: probably .jpg | |
* | |
*/ | |
let dir = location.href.split('/') | |
dir = decodeURIComponent(dir[dir.length - 1]).replace('.html', ''); | |
let badid = 0; | |
const commands = []; | |
const isWindows = ['Windows','Win64','Win32'].indexOf(navigator.platform) !== -1; | |
const scriptFilename = `${dir}-${isWindows ? 'rename-images.bat' : 'rename-images.sh'}`; | |
const copy = isWindows ? 'copy' : 'cp'; | |
if (!isWindows) { commands.push('#!/bin/sh'); } | |
commands.push(`mkdir ${JSON.stringify(dir)}`); | |
// Actually scrape the images from each row. Manually edit the .html to remove | |
// tables you don't want (e.g. from sheets you don't care about), or use | |
// #table_id tr as the selector here | |
document.querySelectorAll('table > tbody > tr').forEach((row, rowNo) => { | |
if (rowNo === 0) { return; } | |
let id = row.cells[0].textContent; | |
if (id == null || id === '') { | |
id = `unknown-${badid}`; | |
badid += 1; | |
} | |
row.querySelectorAll('img').forEach((img, imgNo) => { | |
const path = decodeURIComponent(img.src).split('/'); | |
const filename = path[path.length - 1]; | |
// matches asdf, asdf.jpg, asdf.jpeg, asdf.jpg?foo | |
const ext = filename.split(/\.(.{3,4})(\?.*)?/)[1]; | |
const suffix = ext !== undefined && ext !== '' ? '.' + ext : ''; | |
const target = `${dir}/${id}-${imgNo}${suffix}`; | |
commands.push(`${copy} ${JSON.stringify(filename)} ${JSON.stringify(target)}`); | |
}); | |
}); | |
const saveFile = (function () { | |
const a = document.createElement("a"); | |
document.body.appendChild(a); | |
a.style = "display: none"; | |
return function (mimetype, data, fileName) { | |
const blob = new Blob([data], {type: mimetype}); | |
const url = window.URL.createObjectURL(blob); | |
a.href = url; | |
a.download = fileName; | |
a.click(); | |
window.URL.revokeObjectURL(url); | |
} | |
})(); | |
saveFile('text/plain', commands.join(isWindows ? '\r\n' : '\n'), scriptFilename); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment