Forked from sunnywalker/html-table-to-markdown-extra.html
Created
September 20, 2018 03:29
-
-
Save benjaminv/09a286d5d0edb6852c56bc7aa508ed23 to your computer and use it in GitHub Desktop.
HTML Table to Markdown Extra converter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html> | |
<head> | |
<meta charset="utf-8" /> | |
<title>HTML Table to Markdown Extra Table</title> | |
<style type="text/css"> | |
* { -moz-box-sizing: border-box; -webkit-box-sizing: border-box; box-sizing: border-box;} | |
body { font-family: -apple-system, "Segoe UI", Arial, Helvetica, sans-serif; line-height: 1.5; | |
text-rendering: optimizeLegibility; -webkit-font-smoothing: antialiased; -moz-osx-font-smoothing: grayscale; } | |
textarea { width: 100%; height: 15em; } | |
button { font-size: inherit; } | |
.cols-50 h2 { margin: 0; } | |
.cols-50 p { margin: 0; } | |
code { color: #303; font-weight: bold; font-family: Consolas, Menlo, "Courier New", Courier, monospace; } | |
@media (min-width: 40em) { | |
.cols-50 { display: flex; flex-flow: row wrap; } | |
.cols-50 > * { flex: 1 0 auto; width: 50%; } | |
.cols-50 > *:nth-child(odd) { padding-right: 0.5em; } | |
.cols-50 > *:nth-child(even) { padding-left: 0.5em; } | |
} | |
</style> | |
</head> | |
<body> | |
<h1>HTML Table to Markdown Extra Table</h1> | |
<p><em>Paste HTML table code into the Input, click the Convert button, and a | |
<a href="https://michelf.ca/projects/php-markdown/extra/#table">Markdown Extra table</a> | |
will be placed in the Output. This is meant as a first-pass for table conversion and will | |
not work for all types of tables.</em></p> | |
<div class="cols-50"> | |
<div> | |
<h2>Input</h2> | |
<form method="post"> | |
<p><textarea name="in" id="in" autofocus placeholder="paste HTML table code here"><table><thead> | |
<tr> | |
<th>Header 1</th> | |
<th>Header 2</th> | |
</tr> | |
</thead><tbody> | |
<tr> | |
<td><a href="http://example.com/">Cell 1, 1</a></td> | |
<td>Cell <em>1, 2</em></td> | |
</tr> | |
<tr> | |
<td>Cell <code>2, 1</code></td> | |
<td>Cell <strong>2, 2</strong></td> | |
</tr> | |
</tbody></table></textarea> | |
<button type="submit" id="submit">Convert</button></p> | |
</form> | |
</div> | |
<div> | |
<h2>Output</h2> | |
<p><textarea id="out" placeholder="markdown extra converted table will appear here"></textarea></p> | |
</div> | |
</div> | |
<h2>Notes</h2> | |
<ul> | |
<li>Malformed HTML such as missing closing tags will likely produce undesirable results.</li> | |
<li>No CSS styling is preserved.</li> | |
<li>Cell widths in the Markdown are not equalized.</li> | |
<li>The first row in the table is assumed to be the header row, regardless | |
of <code>thead</code> and <code>th</code> tags.</li> | |
<li>Cells containing <code>ul/ol</code>, <code>p</code> are compressed to single | |
line cells. It's likely that such a complex table would be better served with | |
different formatting (headings, paragraphs) anyway for | |
accessibility/readability reasons.</li> | |
<li>Tables with <code>colspan</code> and <code>rowspan</code> are likely to | |
produce undesirable results and are beyond the scope of Markdown Extra anyway.</li> | |
<li><code>img</code> tags are converted to Markdown only if they have a | |
<code>src</code> and an <code>alt</code> parameter. All other parameters are | |
ignored, including <code>title</code>.</li> | |
<li><code>a</code>, <code>br</code>, <code>strong</code>, <code>em</code>, | |
and <code>code</code> tags are converted to Markdown. <strong>All other | |
tags are discarded.</strong></li> | |
</ul> | |
<script src="http://code.jquery.com/jquery-latest.js"></script> | |
<script> | |
// http://stackoverflow.com/a/5450113 | |
function repeat(pattern, count) { | |
if (count < 1) return ''; | |
var result = ''; | |
while (count > 1) { | |
if (count & 1) result += pattern; | |
count >>= 1, pattern += pattern; | |
} | |
return result + pattern; | |
} | |
!(function($) { | |
// cache the inputs | |
var o = $('#out'), | |
i = $('#in'); | |
// bind a submit handler to the form | |
$('form').on('submit', function(evt) { | |
// stop the normal form submit process because we're doing everything here in this function | |
evt.preventDefault(); | |
// get the input text | |
var t = i.val(); | |
// only proceed if there is text to work with | |
if (t.length) { | |
// now perform all the changes on our t string via the r() function above | |
t = t.replace(/\t/g, ' '); // convert tabs to a single space | |
t = t.replace(/\s*[\r\n]\s*/g, ''); // remove lines | |
t = t.replace(/<\!--[\s\S]*?-->/g, ''); // remove html comments | |
t = t.replace(/ *<a[^>]* href="(.*?)"[^>]*> *(.*?)<\/a>/ig, '[$2]($1)'); // convert anchor tags | |
t = t.replace(/<\/?strong.*?>/g, '**'); // convert strong to ** | |
t = t.replace(/<\/?em.*?>/g, '_'); // convert em to _ | |
t = t.replace(/<\/?code.*?>/g, '`'); // convert code to ` | |
t = t.replace(/<img[^>]* src="(.*?)"[^>]* alt="(.*?)"[^>]*>/ig, ''); // convert images with src, alt | |
t = t.replace(/<img[^>]* alt="(.*?)"[^>]* src="(.*?)"[^>]*>/ig, ''); // convert images with alt, src | |
t = t.replace(/ *<tr[^>]*>/ig, '\n|'); // build <tr> as "\n|" | |
t = t.replace(/\s*<t[dh].*?>/ig, ' '); // convert <td> and <th> to a space | |
t = t.replace(/\s*<\/t[dh]>/ig, ' |'); // build </td> and </th> as " |" | |
t = t.replace(/ /ig, ''); // drop non-breaking spaces | |
t = t.replace(/&/ig, '&'); // de-entize ampersands | |
t = t.replace(/<br[^>]*>/ig, '\t'); // temporarily convert BR tags to tabs | |
t = t.replace(/<\/?[^>]+>/ig, ''); // drop all other tags | |
t = t.replace(/\t *\|/g, ' |'); // drop cell-ending BRs | |
t = t.replace(/\s*\t\s*/g, '<br />'); // convert tabs back to BR tags | |
t = t.replace(/\| {2,}/g, '| '); // tighten spacing after the pipe symbols | |
t = t.replace(/ {2,}\|/g, ' |'); // tighten spacing before the pipe symbols | |
t = t.replace(/^ +\|/gm, '|'); // trim line-leading whitespace | |
t = t.replace(/ {4,}/g, ' '); // convert 4+ spaces to three spaces | |
t = t.replace(/^\s+|\s+$/g, ''); // trim whitespace | |
// generate the header row separators | |
var lines = t.split("\n"); | |
if (lines && lines.length) { | |
var segments = lines[0].split('|'), | |
headers = '|'; | |
for (var j = 1; j < segments.length - 1; j++) { | |
headers += repeat('-', segments[j].length) + '|'; | |
} | |
// console.log(headers); | |
t = lines[0] + "\n" + headers + "\n" + lines.slice(1).join("\n"); | |
} | |
} | |
// put the new version into the output box | |
o.val(t); | |
// clear the old version for quick pasting of new code | |
i.val(''); | |
// select all the text in the output box and set the browser focus to the output box | |
o[0].select(); | |
o.focus(); | |
}); | |
})(jQuery); | |
</script> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment