Skip to content

Instantly share code, notes, and snippets.

@johannesjo
Last active October 9, 2024 06:06
Show Gist options
  • Save johannesjo/6b11ef072a0cb467cc93a885b5a1c19f to your computer and use it in GitHub Desktop.
Save johannesjo/6b11ef072a0cb467cc93a885b5a1c19f to your computer and use it in GitHub Desktop.
Snippet to convert html table to json (to be used with google chrome or similiar)
function tableToJson(table) {
var data = [];
// first row needs to be headers
var headers = [];
for (var i=0; i<table.rows[0].cells.length; i++) {
headers[i] = table.rows[0].cells[i].innerHTML.toLowerCase().replace(/ /gi,'');
}
// go through cells
for (var i=1; i<table.rows.length; i++) {
var tableRow = table.rows[i];
var rowData = {};
for (var j=0; j<tableRow.cells.length; j++) {
rowData[ headers[j] ] = tableRow.cells[j].innerHTML;
}
data.push(rowData);
}
return data;
}
JSON.stringify(tableToJson($0));
@achaayb
Copy link

achaayb commented Jan 8, 2021

tnx

@image72
Copy link

image72 commented Aug 5, 2021

function getJSON(table) {  
  // thead
  const thead = Array.from(table.tHead.rows[0].children).map((el) => el.textContent);

  // tbody
  const tbody = Array.from(table.tBodies[0].rows).map((row) =>
    Array.from(row.cells).map((cell) => cell.textContent)
  );

  const headStr = thead.join(',') + '\n';
  const bodyStr = tbody.reduce((pre, row) => {
    pre += row.join(',');
    pre += '\n'
    return pre;
  }, '\n');
  
  return {
    // table: tbody.unshift(thead),
    csv: headStr + bodyStr,
    table: [thead].concat(tbody),
    thead,
    tbody
  };
}

@GavinRay97
Copy link

GavinRay97 commented Aug 9, 2021

// Parse HTML table element to JSON array of objects
function parseHTMLTableElem(tableEl) {
    const columns = Array.from(tableEl.querySelectorAll('th')).map(it => it.textContent)
    const rows = tableEl.querySelectorAll('tbody > tr')
    return Array.from(rows).map(row => {
        const cells = Array.from(row.querySelectorAll('td'))
        return columns.reduce((obj, col, idx) => {
            obj[col] = cells[idx].textContent
            return obj
        }, {})
    })
}

@thr3a
Copy link

thr3a commented Dec 24, 2021

@GavinRay97 Excellent! thx

@kaledsoft
Copy link

nice one

@toddsmyth91
Copy link

toddsmyth91 commented Feb 23, 2022

Very nice example, but only works on tables that we are expecting to correctly have used a table with th cells.
If we are expecting a table to have a "header row" but it does not use th cells for this we need to update the code.
Using @GavinRay97's original example this enhances it to work with tables where we aren't sure if they will have th cells.
Usage: parseHTMLTableElem(document.getElementById('first-table'), false OR true);
Return: {'headers: [], 'rows':[]}

// Parse HTML table element to JSON array of objects
function parseHTMLTableElem( tableEl, expectingHeaderRow ) {
	var columns = Array.from( tableEl.querySelectorAll( 'th' ) ).map( it => it.textContent );
	var rows = Array.from( tableEl.querySelectorAll( 'tbody > tr' ) );
	// must check for table that has no th cells, but only if we are told to "expectingHeaderRow"
	if ( columns.length == 0 && expectingHeaderRow ) {
		// get columns for a non-th'd table
		columns = Array.from( tableEl.querySelectorAll( 'tbody > tr' )[ 0 ].children ).map( it => it.textContent )
		// must remove first row as it is the header
		rows.shift();
	}
	const returnJson = {
		'headers': columns,
		'rows': rows.map( row => {
			const cells = Array.from( row.querySelectorAll( 'td' ) )
			return columns.reduce( ( obj, col, idx ) => {
				obj[ col ] = cells[ idx ].textContent
				return obj
			}, {} )
		} )
	};
	// if we were expecting a header row with th cells lets see if we got it
	// if we got nothing lets try looking for a regular table row as the header
	if ( !expectingHeaderRow && returnJson.headers.length == 0 && ( returnJson.rows[ 0 ] && Object.keys( returnJson.rows[ 0 ] ).length === 0 ) ) {
		return parseHTMLTableElem( tableEl, true );
	}
	return returnJson;
}

@johannesjo
Copy link
Author

johannesjo commented Apr 23, 2024

And another variant that supports custom headers:

function tableToJson(table, headers = []) {
  var data = [];

  // first row needs to be headers
  if(!headers.length) {
    for (var i = 0; i < table.rows[0].cells.length; i++) {
      headers[i] = table.rows[0].cells[i].innerHTML.toLowerCase().replace(/ /gi, '');
    }
  }

  // go through cells
  for (var i = 1; i < table.rows.length; i++) {

    var tableRow = table.rows[i];
    var rowData = {};

    for (var j = 0; j < tableRow.cells.length; j++) {

      rowData[headers[j]] = tableRow.cells[j].innerHTML;

    }

    data.push(rowData);
  }

  return data;
}

JSON.stringify(tableToJson(document.getElementsByTagName("table")[0], ["nr", "title", "description"]));

@divinity76
Copy link

joining the party,

function tableToObject(table) {
    let trs = table.querySelectorAll('tr');
    let headers = Array.from(trs[0].querySelectorAll('th')).map(th => th.textContent.trim());
    let ret = [];
    for (let i = 1; i < trs.length; i++) {
        let obj = {};
        let tds = trs[i].querySelectorAll('td');
        for (let j = 0; j < tds.length; j++) {
            obj[headers[j]] = tds[j].textContent.trim();
        }
        ret.push(obj);
    }
    return ret;
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment