Last active
March 16, 2022 09:45
-
-
Save jeremyredhead/2522f82c950eee3433815bf50ee3ca8d to your computer and use it in GitHub Desktop.
semi-throwaway code to parse a wordpress readme.txt-style formated document
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ==UserScript== | |
// @name WP Changelog Parser | |
// @version 0.1 | |
// @match *://*/*/changelog.txt | |
// @match file:///*/*/changelog.txt | |
// @grant none | |
// ==/UserScript== | |
var doc = document | |
var btnText, btnHTML | |
btnText = doc.createElement('button') | |
btnHTML = doc.createElement('button') | |
btnText.innerText = 'Plaintext' | |
btnHTML.innerText = 'Parsed HTML' | |
doc.body.prepend(btnText) | |
doc.body.prepend(btnHTML) | |
btnHTML.onclick = function(){ | |
convert() | |
document.querySelector('#html').style.display = 'block' | |
} | |
btnText.onclick = function(){ | |
var doc = document | |
var html = doc.querySelector('#html') | |
if (html) html.style.display = 'none' | |
doc.querySelector('pre').style.display = 'block' | |
} | |
function convert() { | |
var doc = document, | |
elm = doc.createElement.bind(doc) | |
var pre = doc.querySelector('pre') // TODO: check if this mandated by HTML spec | |
var html = doc.querySelector('#html') || doc.body.appendChild(elm('div')) | |
html.id = 'html', html.innerHTML = '' | |
pre.style.display = 'none' | |
var lines = pre.textContent.split('\n') | |
for (var line of lines) { | |
var inList = false | |
var lastList = html.querySelector('ul:last-child') | |
var [,type,text] = line.match(/^([*=]+)?(.*?)\1?$/) | |
// the regex is quite simple, really... :p | |
if (type === '==') type = 'h1' | |
if (type === '=' ) type = 'h2' | |
if (type === '**') type = 'h3' | |
if (type == '***') type = 'h4' | |
if (type == null) type = 'p' | |
if (type === '*') (type = 'li', | |
inList = true) // ;3 | |
if (inList) { | |
lastList = lastList || html.appendChild(elm('ul')) | |
lastList.appendChild(elm('li')) | |
.innerHTML = htmlify(text) | |
} else { | |
html.appendChild(elm(type)) | |
.innerHTML = htmlify(text) | |
} | |
} | |
} | |
function htmlify(text) { | |
return text | |
.replace(/`(.+?)`/, '<code>$1</code') | |
.replace(/\[([^[\]]+?)\]\((.+?)\)/, '<a href="$2">$1</a>') | |
// gee this regex is complex... good thing the question marks magically fix it somehow! :) | |
.replace(/(?<=\s)(https?:\/\/[^\s]+?)(?=[,.;:!?)]?(?:\s|$))/, '<a href="$1">$1</a>') | |
// TODO: "tokenized" parsing to prevent corrupting e.g. `[why://would](*anyone* do http://this?)` | |
/* .replace(/\b([`*]+)(.+?)\1\b/g, function(match, type, text) { | |
if (type == '`') type = 'code' | |
if (type == '*') type = 'em' // is that right? idk | |
}) */ | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
note:
currently (as of revision 1), this is a bit buggy as it doesn't handle list items with linebreaks
(not to mention all the empty
<p/>
s generated throughout)also, it doesn't handle inline formatting. or html.
(but i plan to solve the latter by asking jetpack to go back to using inline markdown, not inline html ;)