Last active
October 9, 2024 06:06
-
-
Save johannesjo/6b11ef072a0cb467cc93a885b5a1c19f to your computer and use it in GitHub Desktop.
Snippet to convert html table to json (to be used with google chrome or similiar)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function tableToJson(table) { | |
var data = []; | |
// first row needs to be headers | |
var headers = []; | |
for (var i=0; i<table.rows[0].cells.length; i++) { | |
headers[i] = table.rows[0].cells[i].innerHTML.toLowerCase().replace(/ /gi,''); | |
} | |
// go through cells | |
for (var i=1; i<table.rows.length; i++) { | |
var tableRow = table.rows[i]; | |
var rowData = {}; | |
for (var j=0; j<tableRow.cells.length; j++) { | |
rowData[ headers[j] ] = tableRow.cells[j].innerHTML; | |
} | |
data.push(rowData); | |
} | |
return data; | |
} | |
JSON.stringify(tableToJson($0)); |
GavinRay97
commented
Aug 9, 2021
•
@GavinRay97 Excellent! thx
nice one
Very nice example, but only works on tables that we are expecting to correctly have used a table with th cells.
If we are expecting a table to have a "header row" but it does not use th cells for this we need to update the code.
Using @GavinRay97's original example this enhances it to work with tables where we aren't sure if they will have th cells.
Usage: parseHTMLTableElem(document.getElementById('first-table'), false OR true);
Return: {'headers: [], 'rows':[]}
// Parse HTML table element to JSON array of objects
function parseHTMLTableElem( tableEl, expectingHeaderRow ) {
var columns = Array.from( tableEl.querySelectorAll( 'th' ) ).map( it => it.textContent );
var rows = Array.from( tableEl.querySelectorAll( 'tbody > tr' ) );
// must check for table that has no th cells, but only if we are told to "expectingHeaderRow"
if ( columns.length == 0 && expectingHeaderRow ) {
// get columns for a non-th'd table
columns = Array.from( tableEl.querySelectorAll( 'tbody > tr' )[ 0 ].children ).map( it => it.textContent )
// must remove first row as it is the header
rows.shift();
}
const returnJson = {
'headers': columns,
'rows': rows.map( row => {
const cells = Array.from( row.querySelectorAll( 'td' ) )
return columns.reduce( ( obj, col, idx ) => {
obj[ col ] = cells[ idx ].textContent
return obj
}, {} )
} )
};
// if we were expecting a header row with th cells lets see if we got it
// if we got nothing lets try looking for a regular table row as the header
if ( !expectingHeaderRow && returnJson.headers.length == 0 && ( returnJson.rows[ 0 ] && Object.keys( returnJson.rows[ 0 ] ).length === 0 ) ) {
return parseHTMLTableElem( tableEl, true );
}
return returnJson;
}
And another variant that supports custom headers:
function tableToJson(table, headers = []) {
var data = [];
// first row needs to be headers
if(!headers.length) {
for (var i = 0; i < table.rows[0].cells.length; i++) {
headers[i] = table.rows[0].cells[i].innerHTML.toLowerCase().replace(/ /gi, '');
}
}
// go through cells
for (var i = 1; i < table.rows.length; i++) {
var tableRow = table.rows[i];
var rowData = {};
for (var j = 0; j < tableRow.cells.length; j++) {
rowData[headers[j]] = tableRow.cells[j].innerHTML;
}
data.push(rowData);
}
return data;
}
JSON.stringify(tableToJson(document.getElementsByTagName("table")[0], ["nr", "title", "description"]));
joining the party,
function tableToObject(table) {
let trs = table.querySelectorAll('tr');
let headers = Array.from(trs[0].querySelectorAll('th')).map(th => th.textContent.trim());
let ret = [];
for (let i = 1; i < trs.length; i++) {
let obj = {};
let tds = trs[i].querySelectorAll('td');
for (let j = 0; j < tds.length; j++) {
obj[headers[j]] = tds[j].textContent.trim();
}
ret.push(obj);
}
return ret;
}
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment