Skip to content

Instantly share code, notes, and snippets.

@goooseman
Created January 29, 2020 17:18
Show Gist options
  • Save goooseman/269ae9a28f8406fea4db88ec837a89b3 to your computer and use it in GitHub Desktop.
Save goooseman/269ae9a28f8406fea4db88ec837a89b3 to your computer and use it in GitHub Desktop.
Node.js script to mass remove columns from a bunch of Excel files
const XLSX = require("xlsx");
const glob = require("glob");
const path = require("path");
let i = 0;
glob
.sync(path.join(__dirname, "./original/*.xlsx"))
.forEach(function(file) {
// tslint:disable-next-line: non-literal-require
i++;
console.log(`Working with file ${file}`);
var workbook = XLSX.readFile(file);
var worksheet = workbook.Sheets[workbook.SheetNames[0]];
delete_cols(worksheet, 8, 2);
XLSX.writeFile(workbook, path.join(__dirname, `./modified/10012-${i}-12-2019.xlsx`));
});
// Credits to SheetJSDev. Taken from https://github.com/SheetJS/sheetjs/issues/1304#issuecomment-434206858
/*
deletes `ncols` cols STARTING WITH `start_col`
usage: delete_cols(ws, 4, 3); // deletes columns E-G and shifts everything after G to the left by 3 columns
*/
function delete_cols(ws, start_col, ncols) {
if(!ws) throw new Error("operation expects a worksheet");
var dense = Array.isArray(ws);
if(!ncols) ncols = 1;
if(!start_col) start_col = 0;
/* extract original range */
var range = XLSX.utils.decode_range(ws["!ref"]);
var R = 0, C = 0;
var formula_cb = function($0, $1, $2, $3, $4, $5) {
var _R = XLSX.utils.decode_row($5), _C = XLSX.utils.decode_col($3);
if(_C >= start_col) {
_C -= ncols;
if(_C < start_col) return "#REF!";
}
return $1+($2=="$" ? $2+$3 : XLSX.utils.encode_col(_C))+($4=="$" ? $4+$5 : XLSX.utils.encode_row(_R));
};
var addr, naddr;
for(C = start_col + ncols; C <= range.e.c; ++C) {
for(R = range.s.r; R <= range.e.r; ++R) {
addr = XLSX.utils.encode_cell({r:R, c:C});
naddr = XLSX.utils.encode_cell({r:R, c:C - ncols});
if(!ws[addr]) { delete ws[naddr]; continue; }
if(ws[addr].f) ws[addr].f = ws[addr].f.replace(crefregex, formula_cb);
ws[naddr] = ws[addr];
}
}
for(C = range.e.c; C > range.e.c - ncols; --C) {
for(R = range.s.r; R <= range.e.r; ++R) {
addr = XLSX.utils.encode_cell({r:R, c:C});
delete ws[addr];
}
}
for(C = 0; C < start_col; ++C) {
for(R = range.s.r; R <= range.e.r; ++R) {
addr = XLSX.utils.encode_cell({r:R, c:C});
if(ws[addr] && ws[addr].f) ws[addr].f = ws[addr].f.replace(crefregex, formula_cb);
}
}
/* write new range */
range.e.c -= ncols;
if(range.e.c < range.s.c) range.e.c = range.s.c;
ws["!ref"] = XLSX.utils.encode_range(clamp_range(range));
/* merge cells */
if(ws["!merges"]) ws["!merges"].forEach(function(merge, idx) {
var mergerange;
switch(typeof merge) {
case 'string': mergerange = XLSX.utils.decode_range(merge); break;
case 'object': mergerange = merge; break;
default: throw new Error("Unexpected merge ref " + merge);
}
if(mergerange.s.c >= start_col) {
mergerange.s.c = Math.max(mergerange.s.c - ncols, start_col);
if(mergerange.e.c < start_col + ncols) { delete ws["!merges"][idx]; return; }
mergerange.e.c -= ncols;
if(mergerange.e.c < mergerange.s.c) { delete ws["!merges"][idx]; return; }
} else if(mergerange.e.c >= start_col) mergerange.e.c = Math.max(mergerange.e.c - ncols, start_col);
clamp_range(mergerange);
ws["!merges"][idx] = mergerange;
});
if(ws["!merges"]) ws["!merges"] = ws["!merges"].filter(function(x) { return !!x; });
/* cols */
if(ws["!cols"]) ws["!cols"].splice(start_col, ncols);
}
function clamp_range(range) {
if(range.e.r >= (1<<20)) range.e.r = (1<<20)-1;
if(range.e.c >= (1<<14)) range.e.c = (1<<14)-1;
return range;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment