Last active
December 29, 2015 17:28
-
-
Save wakaba/7703957 to your computer and use it in GitHub Desktop.
microdata dumper bookmarklet
http://www.whatwg.org/specs/web-apps/current-work/#microdata
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
javascript: | |
/* You are granted a license to use, reproduce and create derivative works of this document. */ | |
var root = document; | |
var r = []; | |
var items = root.querySelectorAll ('[itemscope]:not([itemprop])'); | |
for (var i = 0; i < items.length; i++) { | |
if (items[i].namespaceURI !== 'http://www.w3.org/1999/xhtml') continue; | |
r.push (getItem (items[i])); | |
} | |
r = sortNodes (r); | |
console.log (r); | |
showItemList (r); | |
function sortNodes (ns) { | |
return ns.sort (function (a, b) { var c = b.node.compareDocumentPosition (a.node); return (c & Node.DOCUMENT_POSITION_PRECEDING ? -1 : c & Node.DOCUMENT_POSITION_FOLLOWING ? 1 : 0) }); | |
} | |
function getItem (root) { | |
var results = {}; | |
var memory = [root]; | |
var pending = []; | |
for (var j = 0; j < root.children.length; j++) { | |
pending.push (root.children[j]); | |
} | |
if (root.namespaceURI === 'http://www.w3.org/1999/xhtml' && root.hasAttribute ('itemref')) { | |
var refs = root.getAttribute ('itemref').split (/[\x09\x0A\x0C\x0D\x20]+/); | |
for (var j = 0; j < refs.length; j++) { | |
var el = root.getElementById (refs[j]); // XXX | |
if (el) pending.push (el); | |
} | |
} | |
LOOP: while (true) { | |
if (pending.length === 0) break LOOP; | |
var current = pending.shift (); | |
for (var j = 0; j < memory.length; j++) { | |
if (memory[j] === current) { | |
// microdata error | |
continue; | |
} | |
} | |
memory.push (current); | |
if (!(current.namespaceURI === 'http://www.w3.org/1999/xhtml' && current.hasAttribute ('itemscope'))) { | |
for (var j = 0; j < current.children.length; j++) { | |
pending.push (current.children[j]); | |
} | |
} | |
if (current.namespaceURI === 'http://www.w3.org/1999/xhtml') { | |
var propNames = (current.getAttribute ('itemprop') || '').split (/[\x09\x0A\x0C\x0D\x20]+/); | |
for (var j = 0; j < propNames.length; j++) { | |
if (propNames[j].length) { | |
results[propNames[j]] = results[propNames[j]] || []; | |
results[propNames[j]].push (getItemValue (current)); | |
} | |
} | |
} | |
} | |
var item = {node: root, props: {}, types: {}}; | |
for (var n in results) { | |
item.props[n] = sortNodes (results[n]); | |
} | |
var t = (root.getAttribute ('itemtype') || '').split (/[\x09\x0A\x0C\x0D\x20]+/); | |
var types = {}; | |
for (var i = 0; i < t.length; i++) { | |
item.types[t[i]] = true; | |
} | |
if (root.hasAttribute ('itemid')) { | |
var a = document.createElement ('a'); | |
a.setAttributeNS ('http://www.w3.org/XML/1998/namespace', 'xml:base', root.baseURI); | |
a.setAttribute ('href', root.getAttribute ('itemid')); | |
if (a.href !== '') { | |
item.id = a.href; | |
} | |
} | |
return item; | |
} // getItem | |
function getItemValue (el) { | |
if (el.namespaceURI === 'http://www.w3.org/1999/xhtml') { | |
if (el.hasAttribute ('itemscope')) { | |
return getItem (el); | |
} else if (el.localName === 'meta') { | |
return {text: el.content, node: el}; | |
} else if (el.localName === 'audio' || | |
el.localName === 'embed' || | |
el.localName === 'iframe' || | |
el.localName === 'img' || | |
el.localName === 'source' || | |
el.localName === 'track' || | |
el.localName === 'video') { | |
return {text: el.src, node: el, isURL: true}; // XXX strictly, base might be wrong | |
} else if (el.localName === 'a' || | |
el.localName === 'area' || | |
el.localName === 'link') { | |
return {text: el.href, node: el, isURL: true}; // XXX strictly, base might be wrong | |
} else if (el.localName === 'object') { | |
return {text: el.data, node: el, isURL: true}; // XXX strictly, base might be wrong | |
} else if (el.localName === 'data' || | |
el.localName === 'meter') { | |
return {text: el.getAttribute ('value') || '', node: el}; | |
} else if (el.localName === 'time') { | |
return {text: el.getAttribute ('datetime') || el.textContent, node: el}; | |
} | |
} | |
return {text: el.textContent, node: el}; | |
} // getItemValue | |
function showItemList (items) { | |
var container = document.createElement ('aside'); | |
container.innerHTML = '<style scoped>.microdata-item-list { background: white; color: black; display: block; position: static; float: none; border: 2px blue solid; padding: 0.5em; font-size: 1rem; font-style: normal; font-weight: normal; text-align: left; } .microdata-item-list li { display: block; margin: 0 0 1em 0 } .microdata-item-list li > code, .microdata-item-list td > code { font-weight: bolder } .microdata-item-list table { width: 100% } .microdata-item-list table:empty::before { content: "(no properties)" } .microdata-item-list th, .microdata-item-list td { vertical-align: top; border: 1px gray solid; padding: 0.3em; word-break: break-word } .microdata-item-list th { width: 30% } .microdata-item-list code { white-space: pre-wrap; word-break: break-all }</style><ul class=microdata-item-list></ul>'; | |
var itemsEl = container.lastChild; | |
for (var i = 0; i < items.length; i++) { | |
var itemEl = dumpItem (items[i]); | |
itemsEl.appendChild (document.createElement ('li')).appendChild (itemEl); | |
} | |
document.body.appendChild (container); | |
container.scrollIntoView (); | |
} // showItemList | |
function dumpItem (item) { | |
var itemEl = document.createDocumentFragment (); | |
for (var type in item.types) { | |
if (!item.types[type]) continue; | |
var code = document.createElement ('code'); | |
code.innerHTML = '<a></a>'; | |
code.firstChild.textContent = type; | |
code.firstChild.href = type; | |
itemEl.appendChild (code); | |
itemEl.appendChild (document.createTextNode (' ')); | |
} | |
if (item.id) { | |
itemEl.appendChild (document.createTextNode (' ')); | |
itemEl.appendChild (document.createElement ('strong')).appendChild (document.createTextNode ('ID')); | |
itemEl.appendChild (document.createTextNode (': ')); | |
var code = itemEl.appendChild (document.createElement ('code')); | |
code.innerHTML = '<a></a>'; | |
code.firstChild.textContent = item.id; | |
code.firstChild.href = item.id; | |
} | |
var propsEl = document.createElement ('table'); | |
for (var propName in item.props) { | |
var propEl = document.createElement ('tr'); | |
propEl.innerHTML = '<th><code></code><td>'; | |
propEl.firstChild.firstChild.textContent = propName; | |
propEl.firstChild.rowSpan = item.props[propName].length || 1; | |
for (var i = 0; i < item.props[propName].length; i++) { | |
var value = item.props[propName][i]; | |
if (typeof (value.text) !== 'undefined') { | |
if (value.isURL) { | |
propEl.lastChild.innerHTML = '<code><a></a></code>'; | |
propEl.lastChild.firstChild.firstChild.textContent = value.text; | |
propEl.lastChild.firstChild.firstChild.href = value.text; | |
} else { | |
propEl.lastChild.textContent = value.text; | |
} | |
} else { | |
propEl.lastChild.appendChild (dumpItem (value)); | |
} | |
propsEl.appendChild (propEl); | |
propEl = document.createElement ('tr'); | |
propEl.innerHTML = '<td>'; | |
} | |
} | |
itemEl.appendChild (propsEl); | |
return itemEl; | |
} // dumpItem |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment