Last active
March 16, 2024 07:19
-
-
Save webdev23/0fc20feb866aaad04a515fbfae419a7c to your computer and use it in GitHub Desktop.
💾RePack bookmarklet. Dump static page in one html file, highlight contents, embed styles, convert images as data-uri, remove scripts.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Bookmarklet | |
javascript:void (()=>{let e="";try{document.designMode="on",onmouseup=()=>{document.execCommand("backColor",!1,"chartreuse"),window.getSelection().removeAllRanges()};var n=/^\s*('|")/;[...document.styleSheets].forEach((t=>{[...t.rules].forEach((t=>{var o,c;e+=(o=t.cssText,c=function(e){var n;(n=e,fetch(n).then((e=>e.blob())).then((e=>new Promise(((n,t)=>{const o=new FileReader;o.onloadend=()=>n(o.result),o.onerror=console.log(n,t),o.readAsDataURL(e)}))))).then((e=>{console.log("RESULT:",e)}))},[/(@import\s+)(')(.+?)(')/gi,/(@import\s+)(")(.+?)(")/gi,/(url\s*\()(\s*')([^']+?)(')/gi,/(url\s*\()(\s*")([^"]+?)(")/gi,/(url\s*\()(\s*)([^\s'")].*?)(\s*\))/gi].reduce((function(e,t,o){return e.replace(t,(function(e,t,o,r,s){var a=c(r);return console.log(t,o,a,s),n.test(a)&&n.test(o)&&(o=s=""),t+o+a+s}))}),o))}))})),[...document.querySelectorAll("link")].forEach((e=>{e.outerHTML=""})),[...document.querySelectorAll("source")].forEach((e=>{var n;(n=e.src,fetch(n).then((e=>e.blob())).then((e=>new Promise(((n,t)=>{const o=new FileReader;o.onloadend=()=>n(o.result),o.onerror=t,o.readAsDataURL(e)}))))).then((n=>{console.log("RESULT SRC SOURCE:",n),e.src=n,e.srcset=""}))})),[...document.querySelectorAll("iframe")].forEach((e=>{e.sandbox="",e.src=""})),[...document.querySelectorAll("img")].forEach((e=>{var n;(n=e.src,fetch(n).then((e=>e.blob())).then((e=>new Promise(((n,t)=>{const o=new FileReader;o.onloadend=()=>n(o.result),o.onerror=t,o.readAsDataURL(e)}))))).then((n=>{console.log("RESULT:",n),e.src=n}))})),document.body.appendChild(Object.assign(document.createElement("style"),{textContent:e.split('url("undefined")').join("")})),document.head.appendChild(Object.assign(document.createElement("base"),{href:new URL(document.URL).origin})),document.head.appendChild(Object.assign(document.createElement("meta"),{httpEquiv:"Content-Security-Policy",content:"object-src 'none'; connect-src 'none' default-src 'none'; script-src 'none'; connect-src 'none'; img-src 'none'; style-src 'none'; base-uri 'none';form-action 'none'"})),document.head.appendChild(Object.assign(document.createElement("meta"),{httpEquiv:"X-Frame-Options",content:"deny"})),document.head.prepend(Object.assign(document.createComment("\n\n-- Page dumped using %F0%9F%92%BERePack bookmarklet https://gist.github.com/webdev23/0fc20feb866aaad04a515fbfae419a7c\n-- From url "+document.URL+"\n\n"),{})),[...document.scripts].forEach((e=>{e.outerHTML=""})),[...document.querySelectorAll("img")].forEach((e=>{0!=e.src.indexOf("http://")&&0!=e.src.indexOf("https://")||(e.src="")})),setTimeout((function(){let e=document.createElement("a");e.download="%F0%9F%92%BE_"+new URL(document.location.href).origin+"_"+document.title+".html",e.href=URL.createObjectURL(new Blob([document.documentElement.outerHTML])),document.designMode="off",e.click()}),1e4)}catch(e){alert("Error, offline.\nThis page has very strict Cross Origin Sharing Policies (CORS)\nand or Content Security Policies (CSP) against Cross Site Scripting (X-XSS-Protection mode block).\nThe download of assets can't succeed from a browser context.\n:(\n"+e.message)}})(); | |
###The target: | |
Saving a web page that works offline into a single file, create fully static HTML pages with embedded assets. | |
Highlight important texts. | |
###The problem: | |
When saving a page using save page as, the browser creates a html file and one or many folders with tons of assets, images, libraries and styles. | |
This becomes quickly a mess. Almost every page in the internet are not monolithic, garbled of trackers, ads scripts, dynamic components, and more. | |
Most pages are dynamic, the content is populated after the load, using JavaScript. This makes a page downloaded with curl or wget often unreadable, even broken. Assets are then missing, when we open with a browser this local file, many calls are sent to the original server but it often fails due to modern CSP, CORS protections and logins. | |
Relative links get broken. Likely the tab will hang indefinitely, even if the internet is working. | |
###The limits: | |
Those sharing protections are well made, and makes in browser's JavaScript unusable on those contents. | |
Thus this bookmarklet won't run on most social networks, Youtube, Stackoverflow, Reddit, Imgur... | |
Will works on: Most internet pages with public contents, Github, MDN, Gumtree, Wikipedia, etc, etc. | |
###The process: | |
To obtain a good rendering of a page, we let our browser build the target page as we are seeing it. See the network tab in the inspector tool. Sometimes hundreds of calls are made (!). Everything is loaded. | |
Click the bookmarklet. | |
From there, all original scripts are discarded from the DOM. | |
The page become editable for a minimum of 10 seconds. Meanwhile you can change texts or highlight important parts with the mouse. | |
You can see the console for detailed messages. | |
All css <link> stylesheets are dumped into one single <style>, keeping the cascading behavior. Import rules for fonts and in css url() images are fetched again and transformed into inline base64 data-uri's. | |
Images <img> and assets in <source> are also parsed and converted into base64 data-uri's. | |
Iframes are silenced, and sanboxed. | |
Relative links are made usable by using a <base> rule. | |
New CSP and X-Frame-Options rules are added to the document, in the view of disallowing all internet calls, if any. | |
After about 10 seconds, the page came as a new download. | |
Open it and see the inspector tools. If everything is alright, the page render well, and the network tab in the inspector tool is empty. The console is empty. The page is then fully static with embedded assets. | |
###The code: | |
(() => { | |
let e = ""; | |
try { | |
(document.designMode = "on"), | |
(onmouseup = () => { | |
document.execCommand("backColor", !1, "chartreuse"), | |
window.getSelection().removeAllRanges(); | |
}); | |
var n = /^\s*('|")/; | |
[...document.styleSheets].forEach((t) => { | |
[...t.rules].forEach((t) => { | |
var o, c; | |
e += | |
((o = t.cssText), | |
(c = function (e) { | |
var n; | |
((n = e), | |
fetch(n) | |
.then((e) => e.blob()) | |
.then( | |
(e) => | |
new Promise((n, t) => { | |
const o = new FileReader(); | |
(o.onloadend = () => n(o.result)), | |
(o.onerror = console.log(n, t)), | |
o.readAsDataURL(e); | |
}) | |
)).then((e) => { | |
console.log("RESULT:", e); | |
}); | |
}), | |
[ | |
/(@import\s+)(')(.+?)(')/gi, | |
/(@import\s+)(")(.+?)(")/gi, | |
/(url\s*\()(\s*')([^']+?)(')/gi, | |
/(url\s*\()(\s*")([^"]+?)(")/gi, | |
/(url\s*\()(\s*)([^\s'")].*?)(\s*\))/gi, | |
].reduce(function (e, t, o) { | |
return e.replace(t, function (e, t, o, r, s) { | |
var a = c(r); | |
return ( | |
console.log(t, o, a, s), | |
n.test(a) && n.test(o) && (o = s = ""), | |
t + o + a + s | |
); | |
}); | |
}, o)); | |
}); | |
}), | |
[...document.querySelectorAll("link")].forEach((e) => { | |
e.outerHTML = ""; | |
}), | |
[...document.querySelectorAll("source")].forEach((e) => { | |
var n; | |
((n = e.src), | |
fetch(n) | |
.then((e) => e.blob()) | |
.then( | |
(e) => | |
new Promise((n, t) => { | |
const o = new FileReader(); | |
(o.onloadend = () => n(o.result)), | |
(o.onerror = t), | |
o.readAsDataURL(e); | |
}) | |
)).then((n) => { | |
console.log("RESULT SRC SOURCE:", n), (e.src = n), (e.srcset = ""); | |
}); | |
}), | |
[...document.querySelectorAll("iframe")].forEach((e) => { | |
(e.sandbox = ""), (e.src = ""); | |
}), | |
[...document.querySelectorAll("img")].forEach((e) => { | |
var n; | |
((n = e.src), | |
fetch(n) | |
.then((e) => e.blob()) | |
.then( | |
(e) => | |
new Promise((n, t) => { | |
const o = new FileReader(); | |
(o.onloadend = () => n(o.result)), | |
(o.onerror = t), | |
o.readAsDataURL(e); | |
}) | |
)).then((n) => { | |
console.log("RESULT:", n), (e.src = n); | |
}); | |
}), | |
document.body.appendChild( | |
Object.assign(document.createElement("style"), { | |
textContent: e.split('url("undefined")').join(""), | |
}) | |
), | |
document.head.appendChild( | |
Object.assign(document.createElement("base"), { | |
href: new URL(document.URL).origin, | |
}) | |
), | |
document.head.appendChild( | |
Object.assign(document.createElement("meta"), { | |
httpEquiv: "Content-Security-Policy", | |
content: | |
"object-src 'none'; connect-src 'none' default-src 'none'; script-src 'none'; connect-src 'none'; img-src 'none'; style-src 'none'; base-uri 'none';form-action 'none'", | |
}) | |
), | |
document.head.appendChild( | |
Object.assign(document.createElement("meta"), { | |
httpEquiv: "X-Frame-Options", | |
content: "deny", | |
}) | |
), | |
document.head.prepend( | |
Object.assign( | |
document.createComment( | |
"\n\n-- Page dumped using 💾RePack bookmarklet https://gist.github.com/webdev23/0fc20feb866aaad04a515fbfae419a7c\n-- From url " + | |
document.URL + | |
"\n\n" | |
), | |
{} | |
) | |
), | |
[...document.scripts].forEach((e) => { | |
e.outerHTML = ""; | |
}), | |
[...document.querySelectorAll("img")].forEach((e) => { | |
(0 != e.src.indexOf("http://") && 0 != e.src.indexOf("https://")) || | |
(e.src = ""); | |
}), | |
setTimeout(function () { | |
let e = document.createElement("a"); | |
(e.download = | |
"%F0%9F%92%BE_" + | |
new URL(document.location.href).origin + | |
"_" + | |
document.title + | |
".html"), | |
(e.href = URL.createObjectURL( | |
new Blob([document.documentElement.outerHTML]) | |
)), | |
(document.designMode = "off"), | |
e.click(); | |
}, 1e4); | |
} catch (e) { | |
alert( | |
"Error, offline.\nThis page has very strict Cross Origin Sharing Policies (CORS)\nand or Content Security Policies (CSP) against Cross Site Scripting (X-XSS-Protection mode block).\nThe download of assets can't succeed from a browser context.\n:(\n" + | |
e.message | |
); | |
} | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment