Last active
December 23, 2015 12:08
-
-
Save phuedx/2c611a651eb2ed501b6b to your computer and use it in GitHub Desktop.
The sizes of Parsoid- and non-Parsoid-generated (very nearly almost all Cite-generated) element IDs on https://en.wikipedia.org/api/rest_v1/page/html/Barack_Obama.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// $ curl -so /dev/null https://en.wikipedia.org/api/rest_v1/page/html/Barack_Obama -w '%{size_download}' | |
var TOTAL_BYTES = 1582727; | |
function total_bytes_for_ids(ids) { | |
return ids.reduce( | |
// 5 additional bytes for the "id=''" string | |
(acc, id) => acc + id.length + 5, | |
0 | |
); | |
} | |
var ids = Array.from(document.querySelectorAll('[id]')).map(el => el.id); | |
var ids_bytes = total_bytes_for_ids(ids); | |
console.log(ids_bytes); | |
var non_parsoid_ids = ids.filter( | |
// Parsoid IDs are prefixed with "mw" but some reference IDs are prefixed with "mw-". | |
id => (id[0] !== 'm' && mw[1] !== 'w') || (id[0] === 'm' && id[1] === 'w' && id[2] === '-') | |
); | |
var non_parsoid_ids_bytes = total_bytes_for_ids(non_parsoid_ids); | |
console.log(non_parsoid_ids_bytes); | |
console.log(ids_bytes / TOTAL_BYTES); | |
console.log(non_parsoid_ids_bytes / TOTAL_BYTES); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
88845 | |
34107 | |
0.05613412799554187 | |
0.021549515488141668 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment