-
-
Save graymouser/a33fbb75f94f08af7e36 to your computer and use it in GitHub Desktop.
/* | |
After purchasing a humble book bundle, go to your download page for that bundle. | |
Open a console window for the page and paste in the below javascript | |
*/ | |
$('a').each(function(i){ | |
if ($.trim($(this).text()) == 'MOBI') { | |
$('body').append('<iframe id="dl_iframe_'+i+'" style="display:none;">'); | |
document.getElementById('dl_iframe_'+i).src = $(this).data('web'); | |
} | |
}); | |
FYI, for regular wget (e.g. Unix, Linux, Mac), it's just -O, not -Outfile (WHY does powershell have to be different?). So you need to modify the above to change -Outfile
to -O
Although Mac doesn't have wget installed by default, in that case using curl, modify the IF statement to be:
if (a.href.startsWith("https://dl.humble.com") && a.href.includes("pdf")) cmds += "curl \"" + a.href + "\" -o " + removeExtra(a.href) + ".pdf \n";
This code works if you set firefox to save pdf instead of preview it(Firefox > Preferences > Applications > Adobe PDF document : Save File):
function Book(title, author, formats) {
this.title = title;
this.author = author;
this.formats = formats;
};
// Change this to non-zero to download
var seconds_between_switch_book = 0; // 10;
var seconds_between_download = 0; // 3;
var books = [];
var rows = document.querySelectorAll('.subproduct-selector');
rows.forEach(function(item, item_index) {
setTimeout(function() {
item.click();
var title = item.querySelectorAll('h2')[0].title;
var author = item.querySelectorAll('p')[0].innerText;
var formats = [...document.querySelectorAll('div.js-download-button')].map(
download_item => download_item.querySelectorAll('h4')[0].innerText
)
books.push(new Book(title, author, formats));
document.querySelectorAll('div.js-download-button').forEach(function(download_item, download_index){
setTimeout(function() {
var format = download_item.querySelectorAll('h4')[0].childNodes[1].data;
console.log(item_index, download_index, title, format);
// uncomment this to download
//download_item.click();
}, seconds_between_download * 1000 * download_index);
});
}, seconds_between_switch_book * 1000 * item_index);
});
setTimeout(function(){
console.table(books);
copy(books);
}, (rows.length + 1) * 1000 * seconds_between_switch_book);
So I'm currently downloading just about everything to put in a Calibre library. Since some of the bundles have some repeat content (looking at you , Make) I updated the @KurtBurgess script to test the working directory for a copy of the current file and skip it if present:
cmds = "";
function buildCommand(a, ext) {
let filename = removeExtra(a.href);
ext = '.' + ext;
cmds += "If(Test-Path -Path \"" + filename + ext + "\") {Write-Warning \"" + filename + ext + " exists, skipping \"} Else { wget \"" + a.href + "\" -Outfile " + filename + ext + "}\n";
}
function removeExtra(a2){
a2 = a2.replace('https://dl.humble.com/','');
a2 = a2.substring(0, a2.indexOf('.'));
return a2;
}
for (a of document.getElementsByTagName("a")) {
if (a.href.startsWith("https://dl.humble.com") && a.href.includes("pdf")) buildCommand(a, 'pdf');
if (a.href.startsWith("https://dl.humble.com") && a.href.includes("epub")) buildCommand (a, 'epub');
if (a.href.startsWith("https://dl.humble.com") && a.href.includes("cbz")) buildCommand(a, 'cbz');
};
console.log(cmds);
Next steps, adding a bash variant, and seeing if I can remove the repeated if statements for a some
var pattern = /(MOBI|EPUB|PDF( ?\(H.\))?|CBZ|Download)$/i; var nodes = document.getElementsByTagName('a'); var downloadCmd = ''; for (i in nodes) { var a = nodes[i]; if (a && a.text && pattern.test(a.text.trim()) && a.attributes['href']) { downloadCmd += a.attributes['href'].value + "\"\n"; } } var output = document.createElement("pre"); output.textContent = downloadCmd; document.getElementById("papers-content").prepend(output);
Copy/Paste the links in one txt and run wget:
wget --no-check-certificate --content-disposition -r -H -np -nH -N --cut-dirs=1 -e robots=off -l1 -i ./linksfilename.txt -B 'https://dl.humble.com/'
A modified version of @kellerkindt
var nodes_a = document.querySelectorAll('.downloads a:not(.dlmd5)');
for (node of nodes_a) {
console.log('wget --content-disposition', node.href);
};
If you're using the above, you may need to place the generated link in double quotes so your shell interprets the ampersand literally. I tried to tweak this but I hit an issue with whitespace which would be easy for someone who actually knows Javascript to fix. Sadly this person is not me.
var nodes_a = document.querySelectorAll('.downloads a:not(.dlmd5)');
for (node of nodes_a) {
var tmp = node.href;
tmp = tmp.replace(/ /g,'')
console.log('wget --content-disposition \"'+tmp+"\"");
};
Maybe this works. Apologies for hackyness. I'm sure a better alteration is possible but like I say, I don't know javascript
I like my files to be organized, so here's my take on it.
const commands = [];
document.querySelectorAll('.row').forEach(row => {
const bookTitle = row.dataset.humanName;
[...row.querySelectorAll('.downloads .flexbtn a')].forEach(el => {
const downloadLink = el.href;
const fileName = /\.com\/([^?]+)/.exec(downloadLink)[1];
commands.push(`curl --create-dirs -o "${bookTitle}/${fileName}" "${downloadLink}"`);
});
});
console.log(commands.join('; '));
Instead of wget this uses curl, because wget's -O does not create directories automatically (and while -P does, -O and -P cannot be used together).
The resulting directory tree is like this:
.
├── Advanced Penetration Testing
│ ├── advancedpenetrationtesting.epub
│ └── advancedpenetrationtesting.pdf
├── Applied Cryptography: Protocols, Algorithms and Source Code in C, 20th Anniversary Edition
│ ├── applied_cryptography_protocols_algorithms_and_source_code_in_c.epub
│ └── applied_cryptography_protocols_algorithms_and_source_code_in_c.pdf
└── Cryptography Engineering: Design Principles and Practical Applications
├── cryptography_engineering_design_principles_and_practical_applications.epub
├── cryptography_engineering_design_principles_and_practical_applications.pdf
└── cryptography_engineering_design_principles_and_practical_applications.prc
I took @jmerle's code and changed the last line:
console.log(commands.join('; ');
to:
console.log(commands.join(' && ');
That way, it didn't try to download everything at once.
If you want to verify your downloads, here's the code to make the md5 hashes visible:
var md5_links = document.querySelectorAll(".dlmd5");
for (i in md5_links) {
md5_links[i].click();
}
OR...
If you are like me and have way too many book bundles, you might be interested in something like the following code.
function getTitle() {
var re = /^Humble\ Book\ Bundle\:\ (.*)\ \(/g;
return re.exec(document.title)[1];
}
function showHashes() {
document.querySelectorAll('.dlmd5').forEach(md5 => {
if (md5.innerText.trim() == 'md5') {
md5.click();
}
});
}
function gatherInfo() {
const data = [];
const bundleTitle = getTitle();
showHashes();
document.querySelectorAll('.row').forEach(row => {
const bookTitle = row.dataset.humanName;
[...row.querySelectorAll('.downloads .download')].forEach(dl => {
const downloadLink = dl.querySelector('.flexbtn a').href;
const filename = /\.com\/([^?]+)/.exec(downloadLink)[1];
const md5 = dl.querySelector('a.dlmd5').innerText.trim();
data.push({
"bundleTitle": bundleTitle,
"bookTitle": bookTitle,
"filename": filename,
"downloadLink": downloadLink,
"md5": md5
});
});
});
return data;
}
function downloadBookBundle() {
const commands = []
const md5Sums = [];
const info = gatherInfo();
for (var i in info) {
bundleTitle = info[i]["bundleTitle"];
bookTitle = info[i]["bookTitle"];
filename = info[i]["filename"];
downloadLink = info[i]["downloadLink"];
md5 = info[i]["md5"];
commands.push(`curl --create-dirs -o "${bundleTitle}/${bookTitle}/${filename}" "${downloadLink}"`);
md5Sums.push(`${md5} ${bundleTitle}/${bookTitle}/${filename}`);
};
console.log(commands.join(' && '));
console.log(md5Sums.join('\n'));
}
downloadBookBundle();
It is based upon's @jmerle's approach and is also forked here: https://gist.github.com/fsteffek/bf4ac1e3d2601629a6c9cca94b5649f6.
What does it do?
- It prints the command line command for curl to download your Humble Book Bundle. I modified it, so each bundle is saved into a separate folder:
.
├── Bundle Name
│ └── Book Name
│ └── Files
└── More Bundles
- It prints the content of an md5 file, which
md5sum
can read/check. Paste it into a file likehb_all_books.md5
...
5b3e6de1fc4c45be45b1299ea50a6a7d Essential Knowledge by MIT Press/Cloud Computing/cloudcomputing.epub
a14391f6971da830d064c2c0fd132019 Essential Knowledge by MIT Press/Cloud Computing/cloudcomputing.mobi
...
... and check it with md5sum -c hb_all_books.md5
.
Essential Knowledge by MIT Press/Cloud Computing/cloudcomputing.epub: OK
Essential Knowledge by MIT Press/Cloud Computing/cloudcomputing.mobi: OK
...
Feel free to tell me how to make this script more readable, convenient and generally just better.
My JavaScript fork of this script is still working today: https://gist.github.com/zuazo/a91ecbb97b90ef3ef9ce8caf361199a2
Building on what @azdle wrote I have modified the script to only select PDF files and changed the syntax for Windows PowerShell's wget command:
It's ugly but it works,
Thanks @azdle, couldn't of done it without your code to start