Skip to content

Instantly share code, notes, and snippets.

@blizzardengle
Last active October 31, 2025 06:32
Show Gist options
  • Save blizzardengle/6c7ae6e4b10a7ce6f16731bfb4bcba06 to your computer and use it in GitHub Desktop.
Save blizzardengle/6c7ae6e4b10a7ce6f16731bfb4bcba06 to your computer and use it in GitHub Desktop.
Capture Table
/**
* Extract data from an HTML table with flexible column selection and processing.
*
* @param {string} id - The table's HTML id attribute
* @param {string|number[]} columns - Column selection: "*" for all, "1-3" for range, "1,3,5" for specific, or array of column numbers (1-based)
* @param {Object} options - Configuration options
* @param {boolean} options.combine - Combine multiple cells into single string per row (default: false)
* @param {boolean} options.skipHeaders - Skip rows containing <th> elements (default: false)
* @param {number} options.skipRows - Number of rows to skip from the top (default: 0)
* @param {string} options.delimiter - Character(s) to separate cell data (default: '\t')
* @param {Object} options.callbacks - Column processing callbacks: { '*': globalFn, 1: col1Fn, 3: col3Fn }
* @param {string} options.emptyCell - Placeholder for empty cells (default: '')
* @param {boolean} options.downloadCSV - Auto-download result as CSV file (default: false)
* @param {string} options.filename - Filename for CSV download (default: 'table-export.csv')
* @returns {string} The extracted and processed table data (or empty string if downloaded)
*/
const captureTable = (() => {
/**
* Parse column selection string or array into array of 1-based column numbers
*/
const parseColumnSelection = (columns, table) => {
// If already an array, assume it's 1-based column numbers
if (Array.isArray(columns)) {
return columns;
}
// Handle string selections
if (typeof columns === 'string') {
const trimmed = columns.trim();
// All columns
if (trimmed === '*' || trimmed.toLowerCase() === 'all') {
const firstRow = table.querySelector('tr');
const cellCount = firstRow ? firstRow.querySelectorAll('td, th').length : 0;
return Array.from({ length: cellCount }, (_, i) => i + 1);
}
// Parse ranges and comma-separated values: "1-3,5,7-9"
const parts = trimmed.split(',').map(p => p.trim());
const columnSet = new Set();
parts.forEach(part => {
if (part.includes('-')) {
// Range: "1-3"
const [start, end] = part.split('-').map(n => parseInt(n.trim(), 10));
for (let i = start; i <= end; i++) {
columnSet.add(i);
}
} else {
// Single column: "5"
columnSet.add(parseInt(part, 10));
}
});
return Array.from(columnSet).sort((a, b) => a - b);
}
// Default to first column if nothing else works
return [1];
};
/**
* Extract data from a row's cells
*/
const extractRowData = (cells, columnNumbers, config, isHeader = false) => {
const rowData = [];
columnNumbers.forEach(colNum => {
const cellIndex = colNum - 1; // Convert to 0-based for array access
const cell = cells[cellIndex];
let cellData = cell ? cell.innerText.trim() : config.emptyCell;
// Apply callbacks if not a header row
if (!isHeader && Object.keys(config.callbacks).length > 0) {
// First apply global callback (*)
if (config.callbacks['*']) {
cellData = config.callbacks['*'](cellData, colNum);
}
// Then apply column-specific callback
if (config.callbacks[colNum]) {
cellData = config.callbacks[colNum](cellData, colNum);
}
}
rowData.push(cellData);
});
// Combine or delimit
if (config.combine) {
return rowData.join('') + '\n';
} else {
return rowData.join(config.delimiter) + '\n';
}
};
/**
* Expand a table with colspan/rowspan into a logical grid
* Creates an in-memory table where every cell position is filled
*/
const expandTableSpans = (originalTable) => {
const rows = Array.from(originalTable.querySelectorAll('tr'));
// Build a grid to track which positions are occupied
const grid = [];
const newTable = document.createElement('table');
rows.forEach((row, rowIndex) => {
const cells = Array.from(row.querySelectorAll('td, th'));
const newRow = document.createElement('tr');
if (!grid[rowIndex]) {
grid[rowIndex] = [];
}
let colIndex = 0;
cells.forEach(cell => {
// Find next available column position
while (grid[rowIndex][colIndex]) {
colIndex++;
}
const colspan = parseInt(cell.getAttribute('colspan') || '1', 10);
const rowspan = parseInt(cell.getAttribute('rowspan') || '1', 10);
const cellContent = cell.innerText.trim();
const isHeader = cell.tagName === 'TH';
// Fill the grid for this cell and its spans
for (let r = 0; r < rowspan; r++) {
if (!grid[rowIndex + r]) {
grid[rowIndex + r] = [];
}
for (let c = 0; c < colspan; c++) {
grid[rowIndex + r][colIndex + c] = {
content: cellContent,
isHeader: isHeader
};
}
}
colIndex += colspan;
});
newTable.appendChild(newRow);
});
// Now populate the new table with expanded cells
grid.forEach((rowData, rowIndex) => {
const newRow = newTable.querySelectorAll('tr')[rowIndex];
rowData.forEach(cellData => {
const newCell = document.createElement(cellData.isHeader ? 'th' : 'td');
newCell.innerText = cellData.content;
newRow.appendChild(newCell);
});
});
return newTable;
};
/**
* Check if table has any colspan or rowspan attributes
*/
const hasSpans = (table) => {
const cells = table.querySelectorAll('td, th');
return Array.from(cells).some(cell =>
cell.hasAttribute('colspan') || cell.hasAttribute('rowspan')
);
};
/**
* Trigger download of string data as CSV file
*/
const downloadAsCSV = (data, filename) => {
const blob = new Blob([data], { type: 'text/csv;charset=utf-8;' });
const link = document.createElement('a');
const url = URL.createObjectURL(blob);
link.setAttribute('href', url);
link.setAttribute('download', filename);
link.style.visibility = 'hidden';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
URL.revokeObjectURL(url);
};
// Return the main function with access to private helper functions
return (id, columns, options = {}) => {
// Default options
const config = {
combine: false,
skipHeaders: false,
skipRows: 0,
delimiter: '\t',
callbacks: {},
emptyCell: '',
downloadCSV: false,
filename: 'table-export.csv',
...options
};
const tbl = document.getElementById(id);
if (!tbl) {
console.error(`Table with id "${id}" not found`);
return '';
}
// Parse column selection into array of 1-based column numbers
const columnNumbers = parseColumnSelection(columns, tbl);
// Always expand table if it has colspan/rowspan
const processedTable = hasSpans(tbl) ? expandTableSpans(tbl) : tbl;
const rows = Array.from(processedTable.querySelectorAll('tr'));
let output = '';
rows.forEach((row, rowIndex) => {
// Skip rows if configured
if (rowIndex < config.skipRows) {
return;
}
// Check if this is a header row
const isHeaderRow = row.querySelector('th') !== null;
// Skip header rows if configured
if (config.skipHeaders && isHeaderRow) {
return;
}
// Get cells (th or td depending on row type)
const cells = Array.from(row.querySelectorAll(isHeaderRow ? 'th' : 'td'));
if (cells.length === 0) {
return; // Skip rows with no cells
}
output += extractRowData(cells, columnNumbers, config, isHeaderRow);
});
console.log(output);
// Download as CSV if requested
if (config.downloadCSV) {
downloadAsCSV(output, config.filename);
return '';
}
return output;
};
})();
@128f
Copy link

128f commented Feb 18, 2023

worked as advertised, applied an id test to the table in question and did

captureTable("test", [2])

and got the column I wanted

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment